Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-13 08:53:54

0001 //------------------------------- -*- C++ -*- -------------------------------//
0002 // Copyright Celeritas contributors: see top-level COPYRIGHT file for details
0003 // SPDX-License-Identifier: (Apache-2.0 OR MIT)
0004 //---------------------------------------------------------------------------//
0005 //! \file corecel/random/engine/XorwowRngEngine.hh
0006 //---------------------------------------------------------------------------//
0007 #pragma once
0008 
0009 #include "corecel/Assert.hh"
0010 #include "corecel/OpaqueId.hh"
0011 #include "corecel/Types.hh"
0012 #include "corecel/random/data/XorwowRngData.hh"
0013 #include "corecel/random/distribution/GenerateCanonical.hh"
0014 #include "corecel/random/distribution/detail/GenerateCanonical32.hh"
0015 #include "corecel/sys/ThreadId.hh"
0016 
0017 namespace celeritas
0018 {
0019 //---------------------------------------------------------------------------//
0020 /*!
0021  * Generate random data using the XORWOW algorithm.
0022  *
0023  * The XorwowRngEngine uses a C++11-like interface to generate random data. The
0024  * sampling of uniform floating point data is done with specializations to the
0025  * GenerateCanonical class.
0026  *
0027  * The \c resize function for \c XorwowRngStateData will fully randomize the
0028  * state at initialization. Alternatively, the state can be initialized with a
0029  * seed, subsequence, and offset.
0030  *
0031  * See Marsaglia (2003) for the theory underlying the algorithm and the the
0032  * "example" \c xorwow that combines an \em xorshift output with a Weyl
0033  * sequence (https://www.jstatsoft.org/index.php/jss/article/view/v008i14/916).
0034  *
0035  * For a description of the jump ahead method using the polynomial
0036  * representation of the recurrence, see
0037  * \citet{haramato-jump-2008,
0038  * https://pubsonline.informs.org/doi/10.1287/ijoc.1070.0251}.
0039  *
0040  * The jump polynomials were precomputed using [a python script](
0041  * https://github.com/celeritas-project/utils/blob/main/prng/xorwow-jump.py).
0042  * For a more detailed description of how to calculate the jump polynomials
0043  * using Knuth's square-and-multiply algorithm in \f$ O(k^2 log d) \f$ time
0044  * (where \em k is the number of bits in the state and \em d is the number of
0045  * steps to skip ahead), see \citet{collins-rng-2008,
0046  * http://www.dtic.mil/docs/citations/ADA486379}.
0047  */
0048 class XorwowRngEngine
0049 {
0050   public:
0051     //!@{
0052     //! \name Type aliases
0053     using uint_t = XorwowUInt;
0054     using result_type = uint_t;
0055     using Initializer_t = XorwowRngInitializer;
0056     using ParamsRef = NativeCRef<XorwowRngParamsData>;
0057     using StateRef = NativeRef<XorwowRngStateData>;
0058     //!@}
0059 
0060   public:
0061     //! Lowest value potentially generated
0062     static CELER_CONSTEXPR_FUNCTION result_type min() { return 0u; }
0063     //! Highest value potentially generated
0064     static CELER_CONSTEXPR_FUNCTION result_type max() { return 0xffffffffu; }
0065 
0066     // Construct from state and persistent data
0067     inline CELER_FUNCTION XorwowRngEngine(ParamsRef const& params,
0068                                           StateRef const& state,
0069                                           TrackSlotId tid);
0070 
0071     // Initialize state
0072     inline CELER_FUNCTION XorwowRngEngine& operator=(Initializer_t const&);
0073 
0074     // Generate a 32-bit pseudorandom number
0075     inline CELER_FUNCTION result_type operator()();
0076 
0077     // Advance the state \c count times
0078     inline CELER_FUNCTION void discard(ull_int count);
0079 
0080   private:
0081     /// TYPES ///
0082 
0083     using JumpPoly = Array<uint_t, 5>;
0084     using ArrayJumpPoly = Array<JumpPoly, 32>;
0085 
0086     /// DATA ///
0087 
0088     ParamsRef const& params_;
0089     XorwowState* state_;
0090 
0091     //// HELPER FUNCTIONS ////
0092 
0093     inline CELER_FUNCTION void discard_subsequence(ull_int);
0094     inline CELER_FUNCTION void next();
0095     inline CELER_FUNCTION void jump(ull_int, ArrayJumpPoly const&);
0096     inline CELER_FUNCTION void jump(JumpPoly const&);
0097 
0098     // Helper RNG for initializing the state
0099     struct SplitMix64
0100     {
0101         std::uint64_t state;
0102         inline CELER_FUNCTION std::uint64_t operator()();
0103     };
0104 };
0105 
0106 //---------------------------------------------------------------------------//
0107 /*!
0108  * Specialization of GenerateCanonical for XorwowRngEngine.
0109  */
0110 template<class RealType>
0111 struct GenerateCanonical<XorwowRngEngine, RealType>
0112 {
0113     //!@{
0114     //! \name Type aliases
0115     using real_type = RealType;
0116     using result_type = RealType;
0117     //!@}
0118 
0119     //! Declare that we use the 32-bit canonical generator
0120     static constexpr auto policy = GenerateCanonicalPolicy::builtin32;
0121 
0122     //! Sample a random number on [0, 1)
0123     CELER_FORCEINLINE_FUNCTION result_type operator()(XorwowRngEngine& rng)
0124     {
0125         return detail::GenerateCanonical32<RealType>()(rng);
0126     }
0127 };
0128 
0129 //---------------------------------------------------------------------------//
0130 // INLINE DEFINITIONS
0131 //---------------------------------------------------------------------------//
0132 /*!
0133  * Construct from state and persistent data.
0134  */
0135 CELER_FUNCTION
0136 XorwowRngEngine::XorwowRngEngine(ParamsRef const& params,
0137                                  StateRef const& state,
0138                                  TrackSlotId tid)
0139     : params_(params)
0140 {
0141     CELER_EXPECT(tid < state.state.size());
0142     state_ = &state.state[tid];
0143 }
0144 
0145 //---------------------------------------------------------------------------//
0146 /*!
0147  * Initialize the RNG engine.
0148  *
0149  * This moves the state ahead to the given subsequence (a subsequence has size
0150  * 2^67) and skips \c offset random numbers.
0151  *
0152  * It is recommended to initialize the state using a very different generator
0153  * from the one being initialized to avoid correlations. Here the 64-bit
0154  * SplitMix64 generator is used for initialization (See Matsumoto, Wada,
0155  * Kuramoto, and Ashihara, "Common defects in initialization of pseudorandom
0156  * number generators". https://dl.acm.org/doi/10.1145/1276927.1276928.)
0157  */
0158 CELER_FUNCTION XorwowRngEngine&
0159 XorwowRngEngine::operator=(Initializer_t const& init)
0160 {
0161     auto& s = state_->xorstate;
0162 
0163     // Initialize the state from the seed
0164     SplitMix64 rng{init.seed[0]};
0165     std::uint64_t seed = rng();
0166     s[0] = static_cast<uint_t>(seed);
0167     s[1] = static_cast<uint_t>(seed >> 32);
0168     seed = rng();
0169     s[2] = static_cast<uint_t>(seed);
0170     s[3] = static_cast<uint_t>(seed >> 32);
0171     seed = rng();
0172     s[4] = static_cast<uint_t>(seed);
0173     state_->weylstate = static_cast<uint_t>(seed >> 32);
0174 
0175     // Skip ahead
0176     this->discard_subsequence(init.subsequence);
0177     this->discard(init.offset);
0178 
0179     return *this;
0180 }
0181 
0182 //---------------------------------------------------------------------------//
0183 /*!
0184  * Generate a 32-bit pseudorandom number using the 'xorwow' engine.
0185  */
0186 CELER_FUNCTION auto XorwowRngEngine::operator()() -> result_type
0187 {
0188     this->next();
0189     state_->weylstate += 362437u;
0190     return state_->weylstate + state_->xorstate[4];
0191 }
0192 
0193 //---------------------------------------------------------------------------//
0194 /*!
0195  * Advance the state \c count times.
0196  */
0197 CELER_FUNCTION void XorwowRngEngine::discard(ull_int count)
0198 {
0199     this->jump(count, params_.jump);
0200     state_->weylstate += static_cast<unsigned int>(count) * 362437u;
0201 }
0202 
0203 //---------------------------------------------------------------------------//
0204 /*!
0205  * Advance the state \c count subsequences (\c count * 2^67 times).
0206  *
0207  * Note that the Weyl sequence value remains the same since it has period 2^32
0208  * which divides evenly into 2^67.
0209  */
0210 CELER_FUNCTION void XorwowRngEngine::discard_subsequence(ull_int count)
0211 {
0212     this->jump(count, params_.jump_subsequence);
0213 }
0214 
0215 //---------------------------------------------------------------------------//
0216 /*!
0217  * Apply the transformation to the state.
0218  *
0219  * This does not update the Weyl sequence value.
0220  */
0221 CELER_FUNCTION void XorwowRngEngine::next()
0222 {
0223     auto& s = state_->xorstate;
0224     auto const t = (s[0] ^ (s[0] >> 2u));
0225 
0226     s[0] = s[1];
0227     s[1] = s[2];
0228     s[2] = s[3];
0229     s[3] = s[4];
0230     s[4] = (s[4] ^ (s[4] << 4u)) ^ (t ^ (t << 1u));
0231 }
0232 
0233 //---------------------------------------------------------------------------//
0234 /*!
0235  * Jump ahead \c count steps or subsequences.
0236  *
0237  * This applies the jump polynomials until the given number of steps or
0238  * subsequences have been skipped.
0239  */
0240 CELER_FUNCTION void
0241 XorwowRngEngine::jump(ull_int count, ArrayJumpPoly const& jump_poly_arr)
0242 {
0243     // Maximum number of times to apply any jump polynomial. Since the jump
0244     // sizes are 4^i for i = [0, 32), the max is 3.
0245     constexpr size_type max_num_jump = 3;
0246 
0247     // Start with the smallest jump (either one step or one subsequence)
0248     size_type jump_idx = 0;
0249     while (count > 0)
0250     {
0251         // Number of times to apply this jump polynomial
0252         uint_t num_jump = static_cast<uint_t>(count) & max_num_jump;
0253         for (size_type i = 0; i < num_jump; ++i)
0254         {
0255             CELER_ASSERT(jump_idx < jump_poly_arr.size());
0256             this->jump(jump_poly_arr[jump_idx]);
0257         }
0258         ++jump_idx;
0259         count >>= 2;
0260     }
0261 }
0262 
0263 //---------------------------------------------------------------------------//
0264 /*!
0265  * Jump ahead using the given jump polynomial.
0266  *
0267  * This uses the polynomial representation to apply the recurrence to the
0268  * state. The theory is described in
0269  * https://pubsonline.informs.org/doi/10.1287/ijoc.1070.0251. Let
0270  * \f[
0271    g(z) = z^d \mod p(z) = a_1 z^{k-1} + ... + a_{k-1} z + a_k,
0272  * \f]
0273  * where \f$ p(z) = det(zI + T) \f$ is the characteristic polynomial and \f$ T
0274  * \f$ is the transformation matrix. Observing that \f$ g(z) = z^d q(z)p(z) \f$
0275  * for some polynomial \f$ q(z) \f$ and that \f$ p(T) = 0 \f$ (a fundamental
0276  * property of the characteristic polynomial), it follows that
0277  * \f[
0278    g(T) = T^d = a_1 A^{k-1} + ... + a_{k-1} A + a_k I.
0279  * \f]
0280  * Therefore, using the precalculated coefficients of the jump polynomial \f$
0281  * g(z) \f$ and Horner's method for polynomial evaluation, the state after \f$
0282  * d \f$ steps can be computed as
0283  * \f[
0284    T^d x = T(...T(T(T a_1 x + a_2 x) + a_3 x) + ... + a_{k-1} x) + a_k x.
0285  * \f]
0286  * Note that applying \f$ T \f$ to \f$ x \f$ is equivalent to calling \c
0287  * next(), and that in \f$ F_2 \f$, the finite field with two elements,
0288  * addition is the same as subtraction and equivalent to bitwise exclusive or,
0289  * and multiplication is bitwise and.
0290  */
0291 CELER_FUNCTION void XorwowRngEngine::jump(JumpPoly const& jump_poly)
0292 {
0293     Array<uint_t, 5> s = {0};
0294     for (size_type i : range(params_.num_words()))
0295     {
0296         for (size_type j : range(params_.num_bits()))
0297         {
0298             if (jump_poly[i] & (1 << j))
0299             {
0300                 for (size_type k : range(params_.num_words()))
0301                 {
0302                     s[k] ^= state_->xorstate[k];
0303                 }
0304             }
0305             this->next();
0306         }
0307     }
0308     state_->xorstate = s;
0309 }
0310 
0311 //---------------------------------------------------------------------------//
0312 /*!
0313  * Generate a 64-bit pseudorandom number using the SplitMix64 engine.
0314  *
0315  * This is used to initialize the XORWOW state. See https://prng.di.unimi.it.
0316  */
0317 CELER_FUNCTION std::uint64_t XorwowRngEngine::SplitMix64::operator()()
0318 {
0319     std::uint64_t z = (state += 0x9e3779b97f4a7c15ull);
0320     z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9ull;
0321     z = (z ^ (z >> 27)) * 0x94d049bb133111ebull;
0322     return z ^ (z >> 31);
0323 }
0324 
0325 //---------------------------------------------------------------------------//
0326 }  // namespace celeritas