include/sipm/SiPMRandom.h

0001 /** @class sipm::SiPMRandom SimSiPM/src/components/SiPMRandom.h SiPMRandom.h
0002  *
0003  * @brief Class for random number generation.
0004  *
0005  * Class used for random number generation. The simulation needs very fast
0006  * pseudo-random number generation, Xorhift256+ algorithm is used as it is one
0007  * of the fastest considering modern x86-64 architectures.
0008  *
0009  *  @author Edoardo Proserpio
0010  *  @date 2020
0011  */
0012
0013 #ifndef SIPM_RANDOM_H
0014 #define SIPM_RANDOM_H
0015
0016 #include <array>
0017 #include <cstddef>
0018 #include <cstdlib>
0019 #ifdef __AVX512F__
0020 #include <immintrin.h>
0021 #endif
0022
0023 #include <cmath>
0024 #include <cstdint>
0025 #include <cstring>
0026 #include <vector>
0027
0028 #include "SiPMTypes.h"
0029
0030 // Musl implementation of lcg64
0031 // Keeping this function limited to this translation unit
0032 static constexpr uint64_t lcg64(const uint64_t x) { return (x * 10419395304814325825ULL + 1) % -1ULL; }
0033
0034 namespace sipm {
0035 namespace SiPMRng {
0036 /// @brief Implementation of xoshiro256+ 1.0 PRNG algorithm
0037 /** This is xoshiro256+ 1.0, our best and fastest generator for floating-point
0038  * numbers. We suggest to use its upper bits for floating-point
0039  * generation, as it is slightly faster than xoshiro256++/xoshiro256**. It
0040  * passes all tests we are aware of except for the lowest three bits,
0041  * which might fail linearity tests (and just those), so if low linear
0042  * complexity is not considered an issue (as it is usually the case) it
0043  * can be used to generate 64-bit outputs, too.
0044  *
0045  * We suggest to use a sign test to extract a random Boolean value, and
0046  * right shifts to extract subsets of bits.
0047  *
0048  * The state must be seeded so that it is not everywhere zero. If you have
0049  * a 64-bit seed, we suggest to seed a splitmix64 generator and use its
0050  * output to fill s. */
0051 class Xorshift256plus {
0052 public:
0053   /// @brief Default contructor for Xorshift256plus
0054   /// It cretates an instance of Xorhift256plus and sets the seed using
0055   /// a 64 bit LCG and a random value from system randomd device
0056   Xorshift256plus() { seed(); }
0057
0058   /// @brief Constructor with seed for Xorhift256plus
0059   /// It cretates an instance of Xorshift256plus and sets the seed
0060   /// using a 64 bit LCG and the seed value provided
0061   /// @param sd uint64_t User provided seed. Must not be 0!
0062   explicit Xorshift256plus(const uint64_t sd) { seed(sd); }
0063
0064   /// @brief Sets a random seed generated using system random device.
0065   void seed();
0066
0067   /// @brief Manually set a seed
0068   void seed(const uint64_t);
0069
0070 private:
0071   alignas(64) uint64_t s[4];
0072   static constexpr uint32_t N = 1 << 16;
0073   alignas(64) uint64_t buffer[N];
0074   uint32_t index = N;
0075 #ifdef __AVX512F__
0076   __m512i __s[4];
0077 #endif
0078
0079 public:
0080   /// @brief Returns a pseud-random 64-bits integer
0081   inline uint64_t operator()() noexcept {
0082     if (index == N) {
0083       getRand(buffer, N);
0084       index = 0;
0085     }
0086     return buffer[index++];
0087   }
0088
0089 #ifdef __AVX512F__
0090   // Overload for uint64_t
0091   inline void getRand(uint64_t* array, const uint32_t n) noexcept {
0092     size_t i = 0;
0093     // Generate 8 uint64_t values per iteration
0094     for (; i + 8 <= n; i += 8) {
0095       const __m512i __result = _mm512_add_epi64(__s[0], __s[3]);
0096       _mm512_store_si512(array + i, __result);
0097
0098       const __m512i __t = _mm512_slli_epi64(__s[1], 17);
0099
0100       __s[2] = _mm512_xor_epi64(__s[2], __s[0]);
0101       __s[3] = _mm512_xor_epi64(__s[3], __s[1]);
0102       __s[1] = _mm512_xor_epi64(__s[1], __s[2]);
0103       __s[0] = _mm512_xor_epi64(__s[0], __s[3]);
0104
0105       __s[2] = _mm512_xor_si512(__s[2], __t);
0106
0107       __s[3] = _mm512_rol_epi64(__s[3], 45);
0108     }
0109     // Handle leftover if n is not a multiple of 8
0110     if (i < n) {
0111       alignas(64) uint64_t buffer[8];
0112       const __m512i __result = _mm512_add_epi64(__s[0], __s[3]);
0113       _mm512_store_si512(buffer, __result);
0114
0115       const __m512i __t = _mm512_slli_epi64(__s[1], 17);
0116
0117       __s[2] = _mm512_xor_si512(__s[2], __s[0]);
0118       __s[3] = _mm512_xor_si512(__s[3], __s[1]);
0119       __s[1] = _mm512_xor_si512(__s[1], __s[2]);
0120       __s[0] = _mm512_xor_si512(__s[0], __s[3]);
0121
0122       __s[2] = _mm512_xor_si512(__s[2], __t);
0123
0124       __s[3] = _mm512_rol_epi64(__s[3], 45);
0125       for (size_t j = 0; j < n - i; ++j) {
0126         array[i + j] = buffer[j];
0127       }
0128     }
0129   }
0130
0131   // Overload for uint32_t
0132   inline void getRand(uint32_t* array, const uint32_t n) noexcept {
0133     size_t i = 0;
0134
0135     // Generate 8 uint64_t values per iteration and split them into 16 uint32_t values
0136     for (; i + 16 <= n; i += 16) {
0137       const __m512i __result = _mm512_add_epi64(__s[0], __s[3]);
0138
0139       _mm512_store_si512(array + i, __result);
0140
0141       const __m512i __t = _mm512_slli_epi64(__s[1], 17);
0142
0143       __s[2] = _mm512_xor_si512(__s[2], __s[0]);
0144       __s[3] = _mm512_xor_si512(__s[3], __s[1]);
0145       __s[1] = _mm512_xor_si512(__s[1], __s[2]);
0146       __s[0] = _mm512_xor_si512(__s[0], __s[3]);
0147
0148       __s[2] = _mm512_xor_si512(__s[2], __t);
0149
0150       __s[3] = _mm512_rol_epi64(__s[3], 45);
0151     }
0152
0153     // Handle leftover if n is not a multiple of 16
0154     if (i < n) {
0155       alignas(64) uint32_t buffer[16];
0156       const __m512i __result = _mm512_add_epi64(__s[0], __s[3]);
0157       _mm512_store_si512(buffer, __result);
0158
0159       const __m512i __t = _mm512_slli_epi64(__s[1], 17);
0160
0161       __s[2] = _mm512_xor_si512(__s[2], __s[0]);
0162       __s[3] = _mm512_xor_si512(__s[3], __s[1]);
0163       __s[1] = _mm512_xor_si512(__s[1], __s[2]);
0164       __s[0] = _mm512_xor_si512(__s[0], __s[3]);
0165
0166       __s[2] = _mm512_xor_si512(__s[2], __t);
0167
0168       __s[3] = _mm512_rol_epi64(__s[3], 45);
0169
0170       // Fill the remaining uint32_t values
0171       for (size_t j = 0; j < n - i; ++j) {
0172         array[i + j] = buffer[j];
0173       }
0174     }
0175   }
0176 #else
0177
0178   // Overload for uint64_t (most of the times this one is used)
0179   inline void getRand(uint64_t* array, const uint32_t n) {
0180     for (uint32_t i = 0; i < n; ++i) {
0181       const uint64_t randVal = s[0] + s[3];
0182       const uint64_t t = s[1] << 17;
0183       s[2] ^= s[0];
0184       s[3] ^= s[1];
0185       s[1] ^= s[2];
0186       s[0] ^= s[3];
0187
0188       s[2] ^= t;
0189       s[3] = (s[3] << 45) | (s[3] >> (64 - 45));
0190
0191       array[i] = randVal;
0192     }
0193   }
0194
0195   // Overload for uint32_t
0196   inline void getRand(uint32_t* array, const uint32_t n) {
0197     for (uint32_t i = 0; i < n; i += 2) {
0198       const uint64_t randVal = s[0] + s[3];
0199       const uint64_t t = s[1] << 17;
0200       s[2] ^= s[0];
0201       s[3] ^= s[1];
0202       s[1] ^= s[2];
0203       s[0] ^= s[3];
0204
0205       s[2] ^= t;
0206       s[3] = (s[3] << 45) | (s[3] >> (64 - 45));
0207
0208       array[i] = static_cast<uint32_t>(randVal); // Lower 32 bits
0209       if (i + 1 < n) {
0210         array[i + 1] = static_cast<uint32_t>(randVal >> 32); // Upper 32 bits
0211       }
0212     }
0213   }
0214 #endif
0215 };
0216 } // namespace SiPMRng
0217
0218 class SiPMRandom {
0219 public:
0220   SiPMRandom() = default;
0221
0222   /// @brief Get a reference to the underlying PRNG */
0223   /// Use this method to seed the PRNG and to get the status
0224   SiPMRng::Xorshift256plus& rng() { return m_rng; }
0225
0226   // Seed underlying rng
0227   void seed(const uint64_t x) { m_rng.seed(x); }
0228
0229   /// @brief Gives an uniformly distributed random
0230   template <typename T = double>
0231   inline T Rand() noexcept;
0232   pair<float, float> RandF2() noexcept;
0233   /// @brief Gives an uniform random integer
0234   uint32_t randInteger(const uint32_t) noexcept;
0235   pair<uint32_t> randInteger2(const uint32_t) noexcept;
0236
0237   /// @brief Gives random value with gaussian distribution
0238   double randGaussian(const double, const double) noexcept;
0239   /// @brief Gives random float with gaussian distribution
0240   float randGaussianF(const float, const float) noexcept;
0241   /// @brief Gives random double with exponential distribution
0242   double randExponential(const double) noexcept;
0243   /// @brief Gives random float with exponential distribution
0244   float randExponentialF(const float) noexcept;
0245   /// @brief Gives random value with poisson distribution
0246   uint32_t randPoisson(const double mu) noexcept;
0247
0248   /// @brief Vector version of @ref Rand()
0249   std::vector<double> Rand(const uint32_t);
0250   /// @brief Vector version of @ref RandF()
0251   std::vector<float> RandF(const uint32_t);
0252   /// @brief Vector version of @ref randGaussian()
0253   std::vector<double> randGaussian(const double, const double, const uint32_t);
0254   /// @brief Vector version of @ref randGaussianF()
0255   std::vector<float> randGaussianF(const float, const float, const uint32_t);
0256   /// @brief Vector version of @ref randInteger()
0257   std::vector<uint32_t> randInteger(const uint32_t max, const uint32_t n);
0258   /// @brief Vector version of @ref randExponential()
0259   std::vector<double> randExponential(const double, const uint32_t);
0260   /// @brief Vector version of @ref randExponentialF()
0261   std::vector<float> randExponentialF(const float, const uint32_t);
0262
0263 private:
0264   SiPMRng::Xorshift256plus m_rng;
0265 };
0266
0267 /**
0268  * This method uses the internal PRNG to get a random uint64
0269  * then sets its sign bit to 0 and the exponent bits are set to
0270  * 0x3fff. By aliasing the uint to a double and subtracting 1
0271  * the result is a random double in range (0-1].
0272  */
0273 template <>
0274 inline double SiPMRandom::Rand<double>() noexcept {
0275   return (m_rng() >> 11) * 0x1p-53;
0276 }
0277
0278 /**
0279  * This method uses the internal PRNG to get a random uint64
0280  * then sets its sign bit to 0 and the exponent bits are set to
0281  * 0x3f8. By aliasing the uint to a double and subtracting 1
0282  * the result is a random float in range (0-1].
0283  */
0284 template <>
0285 inline float SiPMRandom::Rand<float>() noexcept {
0286   return (m_rng() >> 40) * 0x1p-24f;
0287 }
0288 } // namespace sipm
0289 #endif /* SIPM_RANDOM_H */