![]() |
|
|||
File indexing completed on 2025-02-22 10:31:31
0001 //----------------------------------*-C++-*----------------------------------// 0002 // Copyright 2022-2024 UT-Battelle, LLC, and other Celeritas developers. 0003 // See the top-level COPYRIGHT file for details. 0004 // SPDX-License-Identifier: (Apache-2.0 OR MIT) 0005 //---------------------------------------------------------------------------// 0006 //! \file celeritas/random/XorwowRngEngine.hh 0007 //---------------------------------------------------------------------------// 0008 #pragma once 0009 0010 #include "corecel/Assert.hh" 0011 #include "corecel/OpaqueId.hh" 0012 #include "corecel/Types.hh" 0013 #include "corecel/sys/ThreadId.hh" 0014 0015 #include "XorwowRngData.hh" 0016 #include "distribution/GenerateCanonical.hh" 0017 0018 #include "detail/GenerateCanonical32.hh" 0019 0020 namespace celeritas 0021 { 0022 //---------------------------------------------------------------------------// 0023 /*! 0024 * Generate random data using the XORWOW algorithm. 0025 * 0026 * The XorwowRngEngine uses a C++11-like interface to generate random data. The 0027 * sampling of uniform floating point data is done with specializations to the 0028 * GenerateCanonical class. 0029 * 0030 * The \c resize function for \c XorwowRngStateData will fully randomize the 0031 * state at initialization. Alternatively, the state can be initialized with a 0032 * seed, subsequence, and offset. 0033 * 0034 * See Marsaglia (2003) for the theory underlying the algorithm and the the 0035 * "example" \c xorwow that combines an \em xorshift output with a Weyl 0036 * sequence (https://www.jstatsoft.org/index.php/jss/article/view/v008i14/916). 0037 * 0038 * For a description of the jump ahead method using the polynomial 0039 * representation of the recurrence, see: Haramoto, H., Matsumoto, M., 0040 * Nishimura, T., Panneton, F., L’Ecuyer, P. 2008. "Efficient jump ahead for 0041 * F2-linear random number generators". INFORMS Journal on Computing. 0042 * https://pubsonline.informs.org/doi/10.1287/ijoc.1070.0251. 0043 * 0044 * The jump polynomials were precomputed using 0045 * https://github.com/celeritas-project/utils/blob/main/prng/xorwow-jump.py. 0046 * For a more detailed description of how to calculate the jump polynomials 0047 * using Knuth's square-and-multiply algorithm in \f$ O(k^2 log d) \f$ time 0048 * (where \em k is the number of bits in the state and \em d is the number of 0049 * steps to skip ahead), 0050 * see: Collins, J. 2008. "Testing, Selection, and Implementation of Random 0051 * Number Generators". ARL-TR-4498. 0052 * https://apps.dtic.mil/sti/pdfs/ADA486637.pdf. 0053 */ 0054 class XorwowRngEngine 0055 { 0056 public: 0057 //!@{ 0058 //! \name Type aliases 0059 using uint_t = XorwowUInt; 0060 using result_type = uint_t; 0061 using Initializer_t = XorwowRngInitializer; 0062 using ParamsRef = NativeCRef<XorwowRngParamsData>; 0063 using StateRef = NativeRef<XorwowRngStateData>; 0064 //!@} 0065 0066 public: 0067 //! Lowest value potentially generated 0068 static CELER_CONSTEXPR_FUNCTION result_type min() { return 0u; } 0069 //! Highest value potentially generated 0070 static CELER_CONSTEXPR_FUNCTION result_type max() { return 0xffffffffu; } 0071 0072 // Construct from state and persistent data 0073 inline CELER_FUNCTION XorwowRngEngine(ParamsRef const& params, 0074 StateRef const& state, 0075 TrackSlotId tid); 0076 0077 // Initialize state 0078 inline CELER_FUNCTION XorwowRngEngine& operator=(Initializer_t const&); 0079 0080 // Generate a 32-bit pseudorandom number 0081 inline CELER_FUNCTION result_type operator()(); 0082 0083 // Advance the state \c count times 0084 inline CELER_FUNCTION void discard(ull_int count); 0085 0086 private: 0087 /// TYPES /// 0088 0089 using JumpPoly = Array<uint_t, 5>; 0090 using ArrayJumpPoly = Array<JumpPoly, 32>; 0091 0092 /// DATA /// 0093 0094 ParamsRef const& params_; 0095 XorwowState* state_; 0096 0097 //// HELPER FUNCTIONS //// 0098 0099 inline CELER_FUNCTION void discard_subsequence(ull_int); 0100 inline CELER_FUNCTION void next(); 0101 inline CELER_FUNCTION void jump(ull_int, ArrayJumpPoly const&); 0102 inline CELER_FUNCTION void jump(JumpPoly const&); 0103 0104 // Helper RNG for initializing the state 0105 struct SplitMix64 0106 { 0107 std::uint64_t state; 0108 inline CELER_FUNCTION std::uint64_t operator()(); 0109 }; 0110 }; 0111 0112 //---------------------------------------------------------------------------// 0113 /*! 0114 * Specialization of GenerateCanonical for XorwowRngEngine. 0115 */ 0116 template<class RealType> 0117 class GenerateCanonical<XorwowRngEngine, RealType> 0118 { 0119 public: 0120 //!@{ 0121 //! \name Type aliases 0122 using real_type = RealType; 0123 using result_type = RealType; 0124 //!@} 0125 0126 public: 0127 //! Sample a random number on [0, 1) 0128 CELER_FORCEINLINE_FUNCTION result_type operator()(XorwowRngEngine& rng) 0129 { 0130 return detail::GenerateCanonical32<RealType>()(rng); 0131 } 0132 }; 0133 0134 //---------------------------------------------------------------------------// 0135 // INLINE DEFINITIONS 0136 //---------------------------------------------------------------------------// 0137 /*! 0138 * Construct from state and persistent data. 0139 */ 0140 CELER_FUNCTION 0141 XorwowRngEngine::XorwowRngEngine(ParamsRef const& params, 0142 StateRef const& state, 0143 TrackSlotId tid) 0144 : params_(params) 0145 { 0146 CELER_EXPECT(tid < state.state.size()); 0147 state_ = &state.state[tid]; 0148 } 0149 0150 //---------------------------------------------------------------------------// 0151 /*! 0152 * Initialize the RNG engine. 0153 * 0154 * This moves the state ahead to the given subsequence (a subsequence has size 0155 * 2^67) and skips \c offset random numbers. 0156 * 0157 * It is recommended to initialize the state using a very different generator 0158 * from the one being initialized to avoid correlations. Here the 64-bit 0159 * SplitMix64 generator is used for initialization (See Matsumoto, Wada, 0160 * Kuramoto, and Ashihara, "Common defects in initialization of pseudorandom 0161 * number generators". https://dl.acm.org/doi/10.1145/1276927.1276928.) 0162 */ 0163 CELER_FUNCTION XorwowRngEngine& 0164 XorwowRngEngine::operator=(Initializer_t const& init) 0165 { 0166 auto& s = state_->xorstate; 0167 0168 // Initialize the state from the seed 0169 SplitMix64 rng{init.seed[0]}; 0170 std::uint64_t seed = rng(); 0171 s[0] = static_cast<uint_t>(seed); 0172 s[1] = static_cast<uint_t>(seed >> 32); 0173 seed = rng(); 0174 s[2] = static_cast<uint_t>(seed); 0175 s[3] = static_cast<uint_t>(seed >> 32); 0176 seed = rng(); 0177 s[4] = static_cast<uint_t>(seed); 0178 state_->weylstate = static_cast<uint_t>(seed >> 32); 0179 0180 // Skip ahead 0181 this->discard_subsequence(init.subsequence); 0182 this->discard(init.offset); 0183 0184 return *this; 0185 } 0186 0187 //---------------------------------------------------------------------------// 0188 /*! 0189 * Generate a 32-bit pseudorandom number using the 'xorwow' engine. 0190 */ 0191 CELER_FUNCTION auto XorwowRngEngine::operator()() -> result_type 0192 { 0193 this->next(); 0194 state_->weylstate += 362437u; 0195 return state_->weylstate + state_->xorstate[4]; 0196 } 0197 0198 //---------------------------------------------------------------------------// 0199 /*! 0200 * Advance the state \c count times. 0201 */ 0202 CELER_FUNCTION void XorwowRngEngine::discard(ull_int count) 0203 { 0204 this->jump(count, params_.jump); 0205 state_->weylstate += static_cast<unsigned int>(count) * 362437u; 0206 } 0207 0208 //---------------------------------------------------------------------------// 0209 /*! 0210 * Advance the state \c count subsequences (\c count * 2^67 times). 0211 * 0212 * Note that the Weyl sequence value remains the same since it has period 2^32 0213 * which divides evenly into 2^67. 0214 */ 0215 CELER_FUNCTION void XorwowRngEngine::discard_subsequence(ull_int count) 0216 { 0217 this->jump(count, params_.jump_subsequence); 0218 } 0219 0220 //---------------------------------------------------------------------------// 0221 /*! 0222 * Apply the transformation to the state. 0223 * 0224 * This does not update the Weyl sequence value. 0225 */ 0226 CELER_FUNCTION void XorwowRngEngine::next() 0227 { 0228 auto& s = state_->xorstate; 0229 auto const t = (s[0] ^ (s[0] >> 2u)); 0230 0231 s[0] = s[1]; 0232 s[1] = s[2]; 0233 s[2] = s[3]; 0234 s[3] = s[4]; 0235 s[4] = (s[4] ^ (s[4] << 4u)) ^ (t ^ (t << 1u)); 0236 } 0237 0238 //---------------------------------------------------------------------------// 0239 /*! 0240 * Jump ahead \c count steps or subsequences. 0241 * 0242 * This applies the jump polynomials until the given number of steps or 0243 * subsequences have been skipped. 0244 */ 0245 CELER_FUNCTION void 0246 XorwowRngEngine::jump(ull_int count, ArrayJumpPoly const& jump_poly_arr) 0247 { 0248 // Maximum number of times to apply any jump polynomial. Since the jump 0249 // sizes are 4^i for i = [0, 32), the max is 3. 0250 constexpr size_type max_num_jump = 3; 0251 0252 // Start with the smallest jump (either one step or one subsequence) 0253 size_type jump_idx = 0; 0254 while (count > 0) 0255 { 0256 // Number of times to apply this jump polynomial 0257 uint_t num_jump = static_cast<uint_t>(count) & max_num_jump; 0258 for (size_type i = 0; i < num_jump; ++i) 0259 { 0260 CELER_ASSERT(jump_idx < jump_poly_arr.size()); 0261 this->jump(jump_poly_arr[jump_idx]); 0262 } 0263 ++jump_idx; 0264 count >>= 2; 0265 } 0266 } 0267 0268 //---------------------------------------------------------------------------// 0269 /*! 0270 * Jump ahead using the given jump polynomial. 0271 * 0272 * This uses the polynomial representation to apply the recurrence to the 0273 * state. The theory is described in 0274 * https://pubsonline.informs.org/doi/10.1287/ijoc.1070.0251. Let 0275 * \f[ 0276 g(z) = z^d \mod p(z) = a_1 z^{k-1} + ... + a_{k-1} z + a_k, 0277 * \f] 0278 * where \f$ p(z) = det(zI + T) \f$ is the characteristic polynomial and \f$ T 0279 * \f$ is the transformation matrix. Observing that \f$ g(z) = z^d q(z)p(z) \f$ 0280 * for some polynomial \f$ q(z) \f$ and that \f$ p(T) = 0 \f$ (a fundamental 0281 * property of the characteristic polynomial), it follows that 0282 * \f[ 0283 g(T) = T^d = a_1 A^{k-1} + ... + a_{k-1} A + a_k I. 0284 * \f] 0285 * Therefore, using the precalculated coefficients of the jump polynomial \f$ 0286 * g(z) \f$ and Horner's method for polynomial evaluation, the state after \f$ 0287 * d \f$ steps can be computed as 0288 * \f[ 0289 T^d x = T(...T(T(T a_1 x + a_2 x) + a_3 x) + ... + a_{k-1} x) + a_k x. 0290 * \f] 0291 * Note that applying \f$ T \f$ to \f$ x \f$ is equivalent to calling \c 0292 * next(), and that in \f$ F_2 \f$, the finite field with two elements, 0293 * addition is the same as subtraction and equivalent to bitwise exclusive or, 0294 * and multiplication is bitwise and. 0295 */ 0296 CELER_FUNCTION void XorwowRngEngine::jump(JumpPoly const& jump_poly) 0297 { 0298 Array<uint_t, 5> s = {0}; 0299 for (size_type i : range(params_.num_words())) 0300 { 0301 for (size_type j : range(params_.num_bits())) 0302 { 0303 if (jump_poly[i] & (1 << j)) 0304 { 0305 for (size_type k : range(params_.num_words())) 0306 { 0307 s[k] ^= state_->xorstate[k]; 0308 } 0309 } 0310 this->next(); 0311 } 0312 } 0313 state_->xorstate = s; 0314 } 0315 0316 //---------------------------------------------------------------------------// 0317 /*! 0318 * Generate a 64-bit pseudorandom number using the SplitMix64 engine. 0319 * 0320 * This is used to initialize the XORWOW state. See https://prng.di.unimi.it. 0321 */ 0322 CELER_FUNCTION std::uint64_t XorwowRngEngine::SplitMix64::operator()() 0323 { 0324 std::uint64_t z = (state += 0x9e3779b97f4a7c15ull); 0325 z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9ull; 0326 z = (z ^ (z >> 27)) * 0x94d049bb133111ebull; 0327 return z ^ (z >> 31); 0328 } 0329 0330 //---------------------------------------------------------------------------// 0331 } // namespace celeritas
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |