eic-opticks/sysrap/smath.h

0001 #pragma once
0002
0003
0004 #if defined(__CUDACC__) || defined(__CUDABE__)
0005    #define SMATH_METHOD __device__
0006 #else
0007    #define SMATH_METHOD
0008 #endif
0009
0010
0011 struct smath
0012 {
0013     static constexpr float hc_eVnm = 1239.8418754200f ; // G4: h_Planck*c_light/(eV*nm)
0014
0015     SMATH_METHOD static void rotateUz(float3& d, const float3& u );
0016     SMATH_METHOD static int count_nibbles( unsigned long long );
0017     SMATH_METHOD static float erfcinvf(float u2);
0018 };
0019
0020
0021 /**
0022 smath::rotateUz
0023 -----------------
0024
0025 This rotates the reference frame of a vector such that the original Z-axis will lie in the
0026 direction of *u*. Many rotations would accomplish this; the one selected
0027 uses *u* as its third column and is given by the below matrix.
0028
0029 The below CUDA implementation follows the CLHEP implementation used by Geant4::
0030
0031      // geant4.10.00.p01/source/externals/clhep/src/ThreeVector.cc
0032      72 Hep3Vector & Hep3Vector::rotateUz(const Hep3Vector& NewUzVector) {
0033      73   // NewUzVector must be normalized !
0034      74
0035      75   double u1 = NewUzVector.x();
0036      76   double u2 = NewUzVector.y();
0037      77   double u3 = NewUzVector.z();
0038      78   double up = u1*u1 + u2*u2;
0039      79
0040      80   if (up>0) {
0041      81       up = std::sqrt(up);
0042      82       double px = dx,  py = dy,  pz = dz;
0043      83       dx = (u1*u3*px - u2*py)/up + u1*pz;
0044      84       dy = (u2*u3*px + u1*py)/up + u2*pz;
0045      85       dz =    -up*px +             u3*pz;
0046      86     }
0047      87   else if (u3 < 0.) { dx = -dx; dz = -dz; }      // phi=0  teta=pi
0048      88   else {};
0049      89   return *this;
0050      90 }
0051
0052 This implements rotation of (px,py,pz) vector into (dx,dy,dz)
0053 using the below rotation matrix, the columns of which must be
0054 orthogonal unit vectors.::
0055
0056                 |  u.x * u.z / up   -u.y / up    u.x  |
0057         d  =    |  u.y * u.z / up   +u.x / up    u.y  |      p
0058                 |   -up               0.         u.z  |
0059
0060 Taking dot products between and within columns shows that to
0061 be the case for normalized u. See oxrap/rotateUz.h for the algebra.
0062
0063 Special cases:
0064
0065 u = [0,0,1] (up=0., !u.z<0.)
0066    does nothing, effectively identity matrix
0067
0068 u = [0,0,-1] (up=0., u.z<0. )
0069    flip x, and z which is a rotation of pi/2 about y
0070
0071                |   -1    0     0   |
0072       d =      |    0    1     0   |   p
0073                |    0    0    -1   |
0074
0075 **/
0076
0077 inline SMATH_METHOD void smath::rotateUz(float3& d, const float3& u )
0078 {
0079     float up = u.x*u.x + u.y*u.y ;
0080     if (up>0.f)
0081     {
0082         up = sqrt(up);
0083         float px = d.x ;
0084         float py = d.y ;
0085         float pz = d.z ;
0086         d.x = (u.x*u.z*px - u.y*py)/up + u.x*pz;
0087         d.y = (u.y*u.z*px + u.x*py)/up + u.y*pz;
0088         d.z =    -up*px +                u.z*pz;
0089     }
0090     else if (u.z < 0.f )
0091     {
0092         d.x = -d.x;
0093         d.z = -d.z;
0094     }
0095 }
0096
0097 /**
0098 smath::count_nibbles
0099 ---------------------
0100
0101 Refer to SBit::count_nibbles for explanation.
0102
0103 **/
0104 inline SMATH_METHOD int smath::count_nibbles(unsigned long long x)
0105 {
0106     x |= x >> 1 ;
0107     x |= x >> 2 ;
0108     x &= 0x1111111111111111ull ;
0109     x = (x + (x >> 4)) & 0xF0F0F0F0F0F0F0Full ;
0110     unsigned long long count = (x * 0x101010101010101ull) >> 56 ;
0111     return count ;
0112 }
0113
0114
0115 #ifdef MOCK_CUDA
0116 // this defines global erfcinvf function as standin for
0117 #include "s_mock_erfcinvf.h"
0118 #endif
0119
0120 /**
0121 smath::erfcinvf
0122 ----------------
0123
0124 Actually little need for this as CUDA already provides an erfcinvf function,
0125 and for MOCK_CUDA a corresponding global is also defined based on
0126 using njuffa_erfcinvf.h.
0127 However as globals tend to be difficult to find its convenient to
0128 include in smath.h for elucidatory+discovery purposes.
0129
0130 +-------------+--------------+
0131 | domain      | erfcinvf(x)  |
0132 +=============+==============+
0133 |      x < 0  |  nan         |
0134 +-------------+--------------+
0135 |      x = 0  |   inf        |
0136 +-------------+--------------+
0137 |      x = 1  |    0         |
0138 +-------------+--------------+
0139 |      x = 2  |  -inf        |
0140 +-------------+--------------+
0141
0142 CAUTION: for matching with Geant4 the erfcinvf result is scaled
0143 by -sqrtf(2.f) with domain folded in half from 0->2 to 0->1
0144 See Geant4/CLHEP classes::
0145
0146     g4-cls RandGaussQ
0147     g4-cls G4MTRandGaussQ
0148     g4-cls G4OpBoundaryProcess
0149
0150 Geant4 sigma_alpha ground surface smears normal using angle from::
0151
0152     alpha = G4RandGauss::shoot(0.0,sigma_alpha);  // (mean, stdDev)
0153
0154 Tests while trying to do this on GPU::
0155
0156     sysrap/tests/S4MTRandGaussQTest.sh
0157     sysrap/tests/erfcinvf_Test.sh
0158     sysrap/tests/njuffa_erfcinvf_test.sh
0159     sysrap/tests/smath_test.sh
0160
0161 **/
0162
0163 inline SMATH_METHOD float smath::erfcinvf(float u2)
0164 {
0165 #if defined(__CUDACC__) || defined(__CUDABE__) || defined(MOCK_CUDA)
0166     return ::erfcinvf(u2) ;
0167 #else
0168     return 0.f ;
0169 #endif
0170 }
0171
0172