![]() |
|
|||
File indexing completed on 2025-09-13 08:53:54
0001 //------------------------------- -*- C++ -*- -------------------------------// 0002 // Copyright Celeritas contributors: see top-level COPYRIGHT file for details 0003 // SPDX-License-Identifier: (Apache-2.0 OR MIT) 0004 //---------------------------------------------------------------------------// 0005 //! \file corecel/random/engine/XorwowRngEngine.hh 0006 //---------------------------------------------------------------------------// 0007 #pragma once 0008 0009 #include "corecel/Assert.hh" 0010 #include "corecel/OpaqueId.hh" 0011 #include "corecel/Types.hh" 0012 #include "corecel/random/data/XorwowRngData.hh" 0013 #include "corecel/random/distribution/GenerateCanonical.hh" 0014 #include "corecel/random/distribution/detail/GenerateCanonical32.hh" 0015 #include "corecel/sys/ThreadId.hh" 0016 0017 namespace celeritas 0018 { 0019 //---------------------------------------------------------------------------// 0020 /*! 0021 * Generate random data using the XORWOW algorithm. 0022 * 0023 * The XorwowRngEngine uses a C++11-like interface to generate random data. The 0024 * sampling of uniform floating point data is done with specializations to the 0025 * GenerateCanonical class. 0026 * 0027 * The \c resize function for \c XorwowRngStateData will fully randomize the 0028 * state at initialization. Alternatively, the state can be initialized with a 0029 * seed, subsequence, and offset. 0030 * 0031 * See Marsaglia (2003) for the theory underlying the algorithm and the the 0032 * "example" \c xorwow that combines an \em xorshift output with a Weyl 0033 * sequence (https://www.jstatsoft.org/index.php/jss/article/view/v008i14/916). 0034 * 0035 * For a description of the jump ahead method using the polynomial 0036 * representation of the recurrence, see 0037 * \citet{haramato-jump-2008, 0038 * https://pubsonline.informs.org/doi/10.1287/ijoc.1070.0251}. 0039 * 0040 * The jump polynomials were precomputed using [a python script]( 0041 * https://github.com/celeritas-project/utils/blob/main/prng/xorwow-jump.py). 0042 * For a more detailed description of how to calculate the jump polynomials 0043 * using Knuth's square-and-multiply algorithm in \f$ O(k^2 log d) \f$ time 0044 * (where \em k is the number of bits in the state and \em d is the number of 0045 * steps to skip ahead), see \citet{collins-rng-2008, 0046 * http://www.dtic.mil/docs/citations/ADA486379}. 0047 */ 0048 class XorwowRngEngine 0049 { 0050 public: 0051 //!@{ 0052 //! \name Type aliases 0053 using uint_t = XorwowUInt; 0054 using result_type = uint_t; 0055 using Initializer_t = XorwowRngInitializer; 0056 using ParamsRef = NativeCRef<XorwowRngParamsData>; 0057 using StateRef = NativeRef<XorwowRngStateData>; 0058 //!@} 0059 0060 public: 0061 //! Lowest value potentially generated 0062 static CELER_CONSTEXPR_FUNCTION result_type min() { return 0u; } 0063 //! Highest value potentially generated 0064 static CELER_CONSTEXPR_FUNCTION result_type max() { return 0xffffffffu; } 0065 0066 // Construct from state and persistent data 0067 inline CELER_FUNCTION XorwowRngEngine(ParamsRef const& params, 0068 StateRef const& state, 0069 TrackSlotId tid); 0070 0071 // Initialize state 0072 inline CELER_FUNCTION XorwowRngEngine& operator=(Initializer_t const&); 0073 0074 // Generate a 32-bit pseudorandom number 0075 inline CELER_FUNCTION result_type operator()(); 0076 0077 // Advance the state \c count times 0078 inline CELER_FUNCTION void discard(ull_int count); 0079 0080 private: 0081 /// TYPES /// 0082 0083 using JumpPoly = Array<uint_t, 5>; 0084 using ArrayJumpPoly = Array<JumpPoly, 32>; 0085 0086 /// DATA /// 0087 0088 ParamsRef const& params_; 0089 XorwowState* state_; 0090 0091 //// HELPER FUNCTIONS //// 0092 0093 inline CELER_FUNCTION void discard_subsequence(ull_int); 0094 inline CELER_FUNCTION void next(); 0095 inline CELER_FUNCTION void jump(ull_int, ArrayJumpPoly const&); 0096 inline CELER_FUNCTION void jump(JumpPoly const&); 0097 0098 // Helper RNG for initializing the state 0099 struct SplitMix64 0100 { 0101 std::uint64_t state; 0102 inline CELER_FUNCTION std::uint64_t operator()(); 0103 }; 0104 }; 0105 0106 //---------------------------------------------------------------------------// 0107 /*! 0108 * Specialization of GenerateCanonical for XorwowRngEngine. 0109 */ 0110 template<class RealType> 0111 struct GenerateCanonical<XorwowRngEngine, RealType> 0112 { 0113 //!@{ 0114 //! \name Type aliases 0115 using real_type = RealType; 0116 using result_type = RealType; 0117 //!@} 0118 0119 //! Declare that we use the 32-bit canonical generator 0120 static constexpr auto policy = GenerateCanonicalPolicy::builtin32; 0121 0122 //! Sample a random number on [0, 1) 0123 CELER_FORCEINLINE_FUNCTION result_type operator()(XorwowRngEngine& rng) 0124 { 0125 return detail::GenerateCanonical32<RealType>()(rng); 0126 } 0127 }; 0128 0129 //---------------------------------------------------------------------------// 0130 // INLINE DEFINITIONS 0131 //---------------------------------------------------------------------------// 0132 /*! 0133 * Construct from state and persistent data. 0134 */ 0135 CELER_FUNCTION 0136 XorwowRngEngine::XorwowRngEngine(ParamsRef const& params, 0137 StateRef const& state, 0138 TrackSlotId tid) 0139 : params_(params) 0140 { 0141 CELER_EXPECT(tid < state.state.size()); 0142 state_ = &state.state[tid]; 0143 } 0144 0145 //---------------------------------------------------------------------------// 0146 /*! 0147 * Initialize the RNG engine. 0148 * 0149 * This moves the state ahead to the given subsequence (a subsequence has size 0150 * 2^67) and skips \c offset random numbers. 0151 * 0152 * It is recommended to initialize the state using a very different generator 0153 * from the one being initialized to avoid correlations. Here the 64-bit 0154 * SplitMix64 generator is used for initialization (See Matsumoto, Wada, 0155 * Kuramoto, and Ashihara, "Common defects in initialization of pseudorandom 0156 * number generators". https://dl.acm.org/doi/10.1145/1276927.1276928.) 0157 */ 0158 CELER_FUNCTION XorwowRngEngine& 0159 XorwowRngEngine::operator=(Initializer_t const& init) 0160 { 0161 auto& s = state_->xorstate; 0162 0163 // Initialize the state from the seed 0164 SplitMix64 rng{init.seed[0]}; 0165 std::uint64_t seed = rng(); 0166 s[0] = static_cast<uint_t>(seed); 0167 s[1] = static_cast<uint_t>(seed >> 32); 0168 seed = rng(); 0169 s[2] = static_cast<uint_t>(seed); 0170 s[3] = static_cast<uint_t>(seed >> 32); 0171 seed = rng(); 0172 s[4] = static_cast<uint_t>(seed); 0173 state_->weylstate = static_cast<uint_t>(seed >> 32); 0174 0175 // Skip ahead 0176 this->discard_subsequence(init.subsequence); 0177 this->discard(init.offset); 0178 0179 return *this; 0180 } 0181 0182 //---------------------------------------------------------------------------// 0183 /*! 0184 * Generate a 32-bit pseudorandom number using the 'xorwow' engine. 0185 */ 0186 CELER_FUNCTION auto XorwowRngEngine::operator()() -> result_type 0187 { 0188 this->next(); 0189 state_->weylstate += 362437u; 0190 return state_->weylstate + state_->xorstate[4]; 0191 } 0192 0193 //---------------------------------------------------------------------------// 0194 /*! 0195 * Advance the state \c count times. 0196 */ 0197 CELER_FUNCTION void XorwowRngEngine::discard(ull_int count) 0198 { 0199 this->jump(count, params_.jump); 0200 state_->weylstate += static_cast<unsigned int>(count) * 362437u; 0201 } 0202 0203 //---------------------------------------------------------------------------// 0204 /*! 0205 * Advance the state \c count subsequences (\c count * 2^67 times). 0206 * 0207 * Note that the Weyl sequence value remains the same since it has period 2^32 0208 * which divides evenly into 2^67. 0209 */ 0210 CELER_FUNCTION void XorwowRngEngine::discard_subsequence(ull_int count) 0211 { 0212 this->jump(count, params_.jump_subsequence); 0213 } 0214 0215 //---------------------------------------------------------------------------// 0216 /*! 0217 * Apply the transformation to the state. 0218 * 0219 * This does not update the Weyl sequence value. 0220 */ 0221 CELER_FUNCTION void XorwowRngEngine::next() 0222 { 0223 auto& s = state_->xorstate; 0224 auto const t = (s[0] ^ (s[0] >> 2u)); 0225 0226 s[0] = s[1]; 0227 s[1] = s[2]; 0228 s[2] = s[3]; 0229 s[3] = s[4]; 0230 s[4] = (s[4] ^ (s[4] << 4u)) ^ (t ^ (t << 1u)); 0231 } 0232 0233 //---------------------------------------------------------------------------// 0234 /*! 0235 * Jump ahead \c count steps or subsequences. 0236 * 0237 * This applies the jump polynomials until the given number of steps or 0238 * subsequences have been skipped. 0239 */ 0240 CELER_FUNCTION void 0241 XorwowRngEngine::jump(ull_int count, ArrayJumpPoly const& jump_poly_arr) 0242 { 0243 // Maximum number of times to apply any jump polynomial. Since the jump 0244 // sizes are 4^i for i = [0, 32), the max is 3. 0245 constexpr size_type max_num_jump = 3; 0246 0247 // Start with the smallest jump (either one step or one subsequence) 0248 size_type jump_idx = 0; 0249 while (count > 0) 0250 { 0251 // Number of times to apply this jump polynomial 0252 uint_t num_jump = static_cast<uint_t>(count) & max_num_jump; 0253 for (size_type i = 0; i < num_jump; ++i) 0254 { 0255 CELER_ASSERT(jump_idx < jump_poly_arr.size()); 0256 this->jump(jump_poly_arr[jump_idx]); 0257 } 0258 ++jump_idx; 0259 count >>= 2; 0260 } 0261 } 0262 0263 //---------------------------------------------------------------------------// 0264 /*! 0265 * Jump ahead using the given jump polynomial. 0266 * 0267 * This uses the polynomial representation to apply the recurrence to the 0268 * state. The theory is described in 0269 * https://pubsonline.informs.org/doi/10.1287/ijoc.1070.0251. Let 0270 * \f[ 0271 g(z) = z^d \mod p(z) = a_1 z^{k-1} + ... + a_{k-1} z + a_k, 0272 * \f] 0273 * where \f$ p(z) = det(zI + T) \f$ is the characteristic polynomial and \f$ T 0274 * \f$ is the transformation matrix. Observing that \f$ g(z) = z^d q(z)p(z) \f$ 0275 * for some polynomial \f$ q(z) \f$ and that \f$ p(T) = 0 \f$ (a fundamental 0276 * property of the characteristic polynomial), it follows that 0277 * \f[ 0278 g(T) = T^d = a_1 A^{k-1} + ... + a_{k-1} A + a_k I. 0279 * \f] 0280 * Therefore, using the precalculated coefficients of the jump polynomial \f$ 0281 * g(z) \f$ and Horner's method for polynomial evaluation, the state after \f$ 0282 * d \f$ steps can be computed as 0283 * \f[ 0284 T^d x = T(...T(T(T a_1 x + a_2 x) + a_3 x) + ... + a_{k-1} x) + a_k x. 0285 * \f] 0286 * Note that applying \f$ T \f$ to \f$ x \f$ is equivalent to calling \c 0287 * next(), and that in \f$ F_2 \f$, the finite field with two elements, 0288 * addition is the same as subtraction and equivalent to bitwise exclusive or, 0289 * and multiplication is bitwise and. 0290 */ 0291 CELER_FUNCTION void XorwowRngEngine::jump(JumpPoly const& jump_poly) 0292 { 0293 Array<uint_t, 5> s = {0}; 0294 for (size_type i : range(params_.num_words())) 0295 { 0296 for (size_type j : range(params_.num_bits())) 0297 { 0298 if (jump_poly[i] & (1 << j)) 0299 { 0300 for (size_type k : range(params_.num_words())) 0301 { 0302 s[k] ^= state_->xorstate[k]; 0303 } 0304 } 0305 this->next(); 0306 } 0307 } 0308 state_->xorstate = s; 0309 } 0310 0311 //---------------------------------------------------------------------------// 0312 /*! 0313 * Generate a 64-bit pseudorandom number using the SplitMix64 engine. 0314 * 0315 * This is used to initialize the XORWOW state. See https://prng.di.unimi.it. 0316 */ 0317 CELER_FUNCTION std::uint64_t XorwowRngEngine::SplitMix64::operator()() 0318 { 0319 std::uint64_t z = (state += 0x9e3779b97f4a7c15ull); 0320 z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9ull; 0321 z = (z ^ (z >> 27)) * 0x94d049bb133111ebull; 0322 return z ^ (z >> 31); 0323 } 0324 0325 //---------------------------------------------------------------------------// 0326 } // namespace celeritas
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |