eic-opticks/qudarap/qsim.h

0001 /**
0002 qsim.h : GPU side struct prepared CPU side by QSim.hh
0003 ========================================================
0004
0005 qsim.h replaces the OptiX 6 context in a CUDA-centric way.
0006 Canonical use is from CSGOptiX/CSGOptiX7.cu:simulate
0007
0008 * qsim.h instance is uploaded once only at CSGOptiX instanciation
0009   as this encompasses the physics not the event-by-event info.
0010
0011 * qsim encompasses global info relevant to all photons, meaning that any changes
0012   made to the qsim instance from single photon threads must be into thread-owned "idx"
0013   slots into arrays to avoid interference
0014
0015 * temporary working state local to each photon is held in *sctx*
0016   and passed around using reference arguments
0017
0018 TODO:
0019
0020 1. get more of the below to work on CPU with mocked curand (and in future mocked tex2D and cudaTextureObject_t )
0021    NB must move decl and implementation to do this
0022
0023 **/
0024
0025 #if defined(__CUDACC__) || defined(__CUDABE__)
0026    #define QSIM_METHOD __device__
0027 #else
0028    #define QSIM_METHOD
0029 #endif
0030
0031 #include "OpticksGenstep.h"
0032 #include "OpticksPhoton.h"
0033
0034 #include "sflow.h"
0035 #include "sqat4.h"
0036 #include "sc4u.h"
0037 #include "sxyz.h"
0038 #include "sphoton.h"
0039
0040 #include "storch.h"
0041 #include "scarrier.h"
0042 #include "sevent.h"
0043 #include "sstate.h"
0044 #include "smatsur.h"
0045
0046
0047 #ifndef PRODUCTION
0048 #include "srec.h"
0049 #include "sseq.h"
0050 #include "stag.h"
0051 #ifdef DEBUG_LOGF
0052 #define KLUDGE_FASTMATH_LOGF(u) (u < 0.998f ? __logf(u) : __logf(u) - 0.46735790f*1e-7f )
0053 #endif
0054 #endif
0055
0056 #include "sctx.h"
0057
0058 #include "qrng.h"
0059 #include "qbase.h"
0060 #include "qprop.h"
0061 #include "qmultifilm.h"
0062 #include "qbnd.h"
0063 #include "qscint.h"
0064 #include "qcerenkov.h"
0065 #include "qpmt.h"
0066 #include "tcomplex.h"
0067
0068
0069 struct qcerenkov ;
0070
0071 struct qsim
0072 {
0073     qbase*              base ;
0074     sevent*             evt ;
0075     qrng<RNG>*          rng ;
0076     qbnd*               bnd ;
0077     qmultifilm*         multifilm;
0078     qcerenkov*          cerenkov ;
0079     qscint*             scint ;
0080     qpmt<float>*        pmt ;
0081
0082 #if defined(__CUDACC__) || defined(__CUDABE__)
0083 #else
0084     qsim(); // instanciated on CPU (see QSim::init_sim) and copied to device so no ctor in device code
0085 #endif
0086
0087     QSIM_METHOD void    generate_photon_dummy( sphoton& p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const ;
0088     QSIM_METHOD static float3 uniform_sphere(const float u0, const float u1);
0089     QSIM_METHOD static float RandGaussQ_shoot( RNG& rng, float mean, float stdDev );
0090     QSIM_METHOD static void SmearNormal_SigmaAlpha( RNG& rng, float3* smeared_normal, const float3* direction, const float3* normal, float sigma_alpha, const sctx& ctx );
0091     QSIM_METHOD static void SmearNormal_Polish(     RNG& rng, float3* smeared_normal, const float3* direction, const float3* normal, float polish     , const sctx& ctx );
0092
0093 #if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA)
0094     QSIM_METHOD static float3 uniform_sphere(RNG& rng);
0095 #endif
0096
0097 #if defined(__CUDACC__) || defined(__CUDABE__)
0098     QSIM_METHOD float4  multifilm_lookup(unsigned pmtType, float nm, float aoi);
0099 #endif
0100
0101 #if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND )  || defined(MOCK_CUDA)
0102     QSIM_METHOD static void lambertian_direction(float3* dir, const float3* normal, float orient, RNG& rng, sctx& ctx );
0103     QSIM_METHOD static void random_direction_marsaglia(float3* dir, RNG& rng, sctx& ctx );
0104     QSIM_METHOD void rayleigh_scatter(RNG& rng, sctx& ctx );
0105     QSIM_METHOD int     propagate_to_boundary( unsigned& flag, RNG& rng, sctx& ctx );
0106 #endif
0107
0108 #if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA)
0109     QSIM_METHOD int     propagate_at_boundary(        unsigned& flag, RNG& rng, sctx& ctx, float theTransmittance=-1.f ) const ;
0110     QSIM_METHOD int     propagate_at_boundary_with_T( unsigned& flag, RNG& rng, sctx& ctx, float theTransmittance ) const ;
0111 #endif
0112
0113 #if defined(__CUDACC__) || defined(__CUDABE__)
0114     QSIM_METHOD int     propagate_at_surface_MultiFilm(unsigned& flag, RNG& rng, sctx& ctx );
0115 #endif
0116
0117 #if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA)
0118     QSIM_METHOD int     propagate_at_surface(           unsigned& flag, RNG& rng, sctx& ctx );
0119     QSIM_METHOD int     propagate_at_surface_Detect(    unsigned& flag, RNG& rng, sctx& ctx ) const ;
0120 #if defined( WITH_CUSTOM4 )
0121     QSIM_METHOD int     propagate_at_surface_CustomART( unsigned& flag, RNG& rng, sctx& ctx ) const ;
0122 #endif
0123 #endif
0124
0125 #if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA)
0126     QSIM_METHOD void    reflect_diffuse(                       RNG& rng, sctx& ctx );
0127     QSIM_METHOD void    reflect_specular(                      RNG& rng, sctx& ctx );
0128
0129     QSIM_METHOD void    fake_propagate( sphoton& p, const quad2* mock_prd, RNG& rng, unsigned long long idx );
0130     QSIM_METHOD int     propagate(const int bounce, RNG& rng, sctx& ctx );
0131
0132     QSIM_METHOD void    hemisphere_polarized( unsigned polz, bool inwards, RNG& rng, sctx& ctx );
0133     QSIM_METHOD void    generate_photon_simtrace(         quad4&   p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const ;
0134     QSIM_METHOD void    generate_photon_simtrace_frame(   quad4&   p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const ;
0135     QSIM_METHOD void    generate_photon(                  sphoton& p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const ;
0136 #endif
0137 };
0138
0139 // CTOR
0140 #if defined(__CUDACC__) || defined(__CUDABE__)
0141 #else
0142 inline qsim::qsim()    // instanciated on CPU (see QSim::init_sim) and copied to device so no ctor in device code
0143         :
0144         base(nullptr),
0145         evt(nullptr),
0146         rng(nullptr),
0147         bnd(nullptr),
0148         multifilm(nullptr),
0149         cerenkov(nullptr),
0150         scint(nullptr),
0151         pmt(nullptr)
0152     {
0153     }
0154 #endif
0155
0156 inline QSIM_METHOD void qsim::generate_photon_dummy(sphoton& p_, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const
0157 {
0158     quad4& p = (quad4&)p_ ;
0159 #ifndef PRODUCTION
0160     printf("//qsim::generate_photon_dummy  photon_id %3lld genstep_id %3d  gs.q0.i ( gencode:%3d %3d %3d %3d ) \n",
0161        photon_id,
0162        genstep_id,
0163        gs.q0.i.x,
0164        gs.q0.i.y,
0165        gs.q0.i.z,
0166        gs.q0.i.w
0167       );
0168 #endif
0169     p.q0.i.x = 1 ; p.q0.i.y = 2 ; p.q0.i.z = 3 ; p.q0.i.w = 4 ;
0170     p.q1.i.x = 1 ; p.q1.i.y = 2 ; p.q1.i.z = 3 ; p.q1.i.w = 4 ;
0171     p.q2.i.x = 1 ; p.q2.i.y = 2 ; p.q2.i.z = 3 ; p.q2.i.w = 4 ;
0172     p.q3.i.x = 1 ; p.q3.i.y = 2 ; p.q3.i.z = 3 ; p.q3.i.w = 4 ;
0173
0174     p.set_flag(TORCH);
0175 }
0176
0177 inline QSIM_METHOD float3 qsim::uniform_sphere(const float u0, const float u1)
0178 {
0179     float phi = u0*2.f*M_PIf;
0180     float cosTheta = 2.f*u1 - 1.f ; // -1.f -> 1.f
0181     float sinTheta = sqrtf(1.f-cosTheta*cosTheta);
0182     return make_float3(cosf(phi)*sinTheta, sinf(phi)*sinTheta, cosTheta);
0183 }
0184
0185
0186 #if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA)
0187 /**
0188 qsim::uniform_sphere
0189 ---------------------
0190
0191 **/
0192 inline QSIM_METHOD float3 qsim::uniform_sphere(RNG& rng)
0193 {
0194     float phi = curand_uniform(&rng)*2.f*M_PIf;
0195     float cosTheta = 2.f*curand_uniform(&rng) - 1.f ; // -1.f -> 1.f
0196     float sinTheta = sqrtf(1.f-cosTheta*cosTheta);
0197     return make_float3(cosf(phi)*sinTheta, sinf(phi)*sinTheta, cosTheta);
0198 }
0199
0200 /**
0201 qsim::RandGaussQ_shoot
0202 ------------------------
0203
0204 See::
0205
0206     sysrap/tests/erfcinvf_Test.sh
0207     sysrap/tests/S4MTRandGaussQTest.sh
0208
0209     g4-cls RandGaussQ
0210     g4-cls G4MTRandGaussQ
0211
0212 **/
0213 inline QSIM_METHOD float qsim::RandGaussQ_shoot( RNG& rng, float mean, float stdDev )
0214 {
0215     float u2 = 2.f*curand_uniform(&rng) ;
0216     float v = -M_SQRT2f*erfcinvf(u2)*stdDev + mean ;
0217     //printf("//qsim.RandGaussQ_shoot mean %10.5f stdDev %10.5f u2 %10.5f v %10.5f \n", mean, stdDev, u2, v  ) ;
0218     return v ;
0219 }
0220
0221
0222 /**
0223 qsim::SmearNormal_SigmaAlpha
0224 ------------------------------
0225
0226 CAUTION : THIS CURRENTLY NOT USED BY ANYTHING OTHER THAN TESTS
0227
0228 The reason is that a Geant4 simulation with some sigma_alpha/polish
0229 surfaces will not actually use G4OpBoundaryProcess::GetFacetNormal unless
0230 a bunch of conditions are satisfied such as having non-zero values of some
0231 of prob_sl/prob_ss/prob_bs that induces G4OpBoundaryProcess::ChooseReflection
0232 to return something other that LambertianReflection.
0233
0234 **THIS MAKES ME SUSPECT ACTUAL USE OF NORMAL SMEARING IS VERY RARE**
0235
0236 +----------------------+-------------------------------+------------------------------+----------------------+
0237 | G4OpBoundaryProcess  | G4MaterialPropertiesIndex.hh  | G4MaterialPropertiesTable.cc | ChooseReflection     |
0238 +======================+===============================+==============================+======================+
0239 | prob_sl              | kSPECULARLOBECONSTANT         | SPECULARLOBECONSTANT         | LobeReflection       |
0240 +----------------------+-------------------------------+------------------------------+----------------------+
0241 | prob_ss              | kSPECULARSPIKECONSTANT        | SPECULARSPIKECONSTANT        | SpikeReflection      |
0242 +----------------------+-------------------------------+------------------------------+----------------------+
0243 | prob_bs              | kBACKSCATTERCONSTANT          | BACKSCATTERCONSTANT          | BackScattering       |
0244 +----------------------+-------------------------------+------------------------------+----------------------+
0245 | all zero =>          |                               |                              | LambertianReflection |
0246 +----------------------+-------------------------------+------------------------------+----------------------+
0247
0248 TODO: full simulation run with breakpoint "BP=C4OpBoundaryProcess::GetFacetNormal"
0249
0250 * C4 (not G4) as Custom4 is in use for the boundary process
0251
0252 **/
0253
0254 inline QSIM_METHOD void qsim::SmearNormal_SigmaAlpha(
0255     RNG& rng,
0256     float3* smeared_normal,
0257     const float3* direction,
0258     const float3* normal,
0259     float sigma_alpha,
0260     const sctx& ctx
0261    )
0262 {
0263 #if !defined(PRODUCTION) && defined(MOCK_CUDA_DEBUG)
0264     bool dump = ctx.pidx == -1 ;
0265 #endif
0266
0267     if(sigma_alpha == 0.f)
0268     {
0269         *smeared_normal = *normal ;
0270         return ;
0271     }
0272     float f_max = fminf(1.f,4.f*sigma_alpha);
0273
0274 #if !defined(PRODUCTION) && defined(MOCK_CUDA_DEBUG)
0275     if(dump) printf("//qsim::SmearNormal_SigmaAlpha.MOCK_CUDA_DEBUG sigma_alpha %10.5f f_max %10.5f  \n", sigma_alpha, f_max );
0276 #endif
0277
0278     float alpha, sin_alpha, phi, u0, u1, u2 ;
0279     bool reject_alpha ;
0280     bool reject_dir ;
0281
0282     do {
0283         do {
0284             //alpha = RandGaussQ_shoot(rng, 0.f, sigma_alpha );  // mean:0.f stdDev:sigma_alpha
0285             u0 = curand_uniform(&rng) ;
0286             alpha = -M_SQRT2f*erfcinvf(2.f*u0)*sigma_alpha ;
0287
0288             sin_alpha = sinf(alpha);
0289             u1 = curand_uniform(&rng) ;
0290             reject_alpha = alpha >= M_PIf/2.f || (u1*f_max > sin_alpha) ;
0291
0292 #if !defined(PRODUCTION) && defined(MOCK_CUDA_DEBUG)
0293             if(dump) printf("//qsim::SmearNormal_SigmaAlpha.MOCK_CUDA_DEBUG u0 %10.5f alpha %10.5f sin_alpha %10.5f u1 %10.5f u1*f_max %10.5f  (u1*f_max > sin_alpha) %d reject_alpha %d  \n",
0294                u0, alpha, sin_alpha, u1, u1*f_max, (u1*f_max > sin_alpha), reject_alpha );
0295             // theres lots of alpha rejected : eg all -ve sin_alpha
0296 #endif
0297
0298         } while( reject_alpha ) ;
0299
0300         u2 = curand_uniform(&rng) ;
0301         phi = u2*M_PIf*2.f ;
0302
0303         smeared_normal->x = sin_alpha * cosf(phi) ;
0304         smeared_normal->y = sin_alpha * sinf(phi) ;
0305         smeared_normal->z = cosf(alpha) ;
0306
0307         smath::rotateUz(*smeared_normal, *normal);
0308         reject_dir = dot(*smeared_normal, *direction ) >= 0.f ;
0309         // reject smears that move the normal into same hemi as direction
0310
0311 #if !defined(PRODUCTION) && defined(MOCK_CUDA_DEBUG)
0312         if(dump) printf("//qsim::SmearNormal_SigmaAlpha.MOCK_CUDA_DEBUG u2 %10.5f phi %10.5f smeared_normal ( %10.5f, %10.5f, %10.5f)  reject_dir %d  \n",
0313                u2, phi, smeared_normal->x, smeared_normal->y, smeared_normal->z, reject_dir );
0314 #endif
0315
0316
0317     } while( reject_dir ) ;
0318 }
0319
0320 /**
0321 qsim::SmearNormal_Polish
0322 ------------------------------
0323
0324 CAUTION : THIS CURRENTLY NOT USED BY ANYTHING OTHER THAN TESTS : SEE DETAILS ABOVE
0325
0326 **/
0327
0328 inline QSIM_METHOD void qsim::SmearNormal_Polish(
0329     RNG& rng,
0330     float3* smeared_normal,
0331     const float3* direction,
0332     const float3* normal,
0333     float polish,
0334     const sctx& ctx
0335     )
0336 {
0337 #if !defined(PRODUCTION) && defined(MOCK_CUDA_DEBUG)
0338     bool dump = ctx.pidx == -1 ;
0339 #endif
0340
0341     if(polish == 1.f)
0342     {
0343         *smeared_normal = *normal ;
0344         return ;
0345     }
0346
0347     float u0, u1, u2 ;
0348     float3 smear ;
0349     bool reject_mag ;
0350     bool reject_dir ;
0351
0352     do {
0353         do {
0354             u0 = curand_uniform(&rng);
0355             u1 = curand_uniform(&rng) ;
0356             u2 = curand_uniform(&rng) ;
0357             smear.x = 2.f*u0 - 1.f ;
0358             smear.y = 2.f*u1 - 1.f ;
0359             smear.z = 2.f*u2 - 1.f ;
0360             reject_mag = length(smear) > 1.f  ;   // HMM: could this use just dot(smear, smear) ?
0361        }
0362        while( reject_mag );
0363
0364        *smeared_normal = *normal + (1.f-polish)*smear;
0365        reject_dir = dot(*smeared_normal, *direction) >= 0.f ;
0366     }
0367     while( reject_dir );
0368     *smeared_normal = normalize(*smeared_normal);
0369 }
0370
0371
0372
0373
0374
0375 #endif
0376
0377 #if defined(__CUDACC__) || defined(__CUDABE__)
0378 /*
0379 qsim::multifilm_lookup
0380 -----------------------
0381
0382 */
0383
0384 inline QSIM_METHOD float4 qsim::multifilm_lookup(unsigned pmtType, float nm, float aoi)
0385 {
0386     float4 value = multifilm->lookup(pmtType, nm, aoi);
0387     return value;
0388 }
0389
0390 #endif
0391
0392 #if defined(__CUDACC__) || defined(__CUDABE__) || defined(MOCK_CURAND) || defined(MOCK_CUDA)
0393
0394 /**
0395 qsim::lambertian_direction following G4LambertianRand
0396 --------------------------------------------------------
0397
0398 g4-cls G4RandomTools::
0399
0400      59 inline G4ThreeVector G4LambertianRand(const G4ThreeVector& normal)
0401      60 {
0402      61   G4ThreeVector vect;
0403      62   G4double ndotv;
0404      63   G4int count=0;
0405      64   const G4int max_trials = 1024;
0406      65
0407      66   do
0408      67   {
0409      68     ++count;
0410      69     vect = G4RandomDirection();
0411      70     ndotv = normal * vect;
0412      71
0413      72     if (ndotv < 0.0)
0414      73     {
0415      74       vect = -vect;
0416      75       ndotv = -ndotv;
0417      76     }
0418      77
0419      78   } while (!(G4UniformRand() < ndotv) && (count < max_trials));
0420      79
0421      80   return vect;
0422      81 }
0423
0424
0425 NB: potentially bad for performance for dir pointer to be into global mem
0426 as opposed to local stack float3 : as this keeps changing the dir before
0427 arriving at the final one
0428
0429 **/
0430 inline  QSIM_METHOD void qsim::lambertian_direction(float3* dir, const float3* normal, float orient, RNG& rng, sctx& ctx )
0431 {
0432 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
0433     unsigned long long PIDX = 0xffffffffff ;
0434     if(ctx.pidx == PIDX )
0435     {
0436         printf("//qsim.lambertian_direction.head pidx %7lld : normal = np.array([%10.5f,%10.5f,%10.5f]) ; orient = %10.5f  \n",
0437             ctx.pidx, normal->x, normal->y, normal->z, orient  );
0438     }
0439 #endif
0440
0441     float ndotv ;
0442     int count = 0 ;
0443     float u ;
0444     do
0445     {
0446         count++ ;
0447         random_direction_marsaglia(dir, rng, ctx); // sets dir to random point on unit sphere
0448         ndotv = dot( *dir, *normal )*orient ;
0449         if( ndotv < 0.f )
0450         {
0451             *dir = -1.f*(*dir) ;
0452             ndotv = -1.f*ndotv ;
0453         }
0454         // when random dir is in opposite hemisphere to oriented normal
0455         // flip the dir into same hemi and ndotv
0456
0457         u = curand_uniform(&rng) ;
0458
0459 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
0460
0461         if(ctx.pidx == PIDX)
0462         {
0463             printf("//qsim.lambertian_direction.loop pidx %7lld : dir = np.array([%10.5f,%10.5f,%10.5f]) ; count = %d ; ndotv = %10.5f ; u = %10.5f \n",
0464                 ctx.pidx, dir->x, dir->y, dir->z, count, ndotv, u   );
0465
0466         }
0467 #endif
0468     }
0469     while (!(u < ndotv) && (count < 1024)) ;
0470     // distribution looks pretty similar without the while loop
0471
0472
0473 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
0474     if(ctx.pidx == PIDX)
0475     {
0476         printf("//qsim.lambertian_direction.tail pidx %7lld : dir = np.array([%10.5f,%10.5f,%10.5f]) ; count = %d ; ndotv = %10.5f \n",
0477             ctx.pidx, dir->x, dir->y, dir->z, count, ndotv  );
0478
0479     }
0480 #endif
0481
0482
0483 }
0484
0485 /**
0486 qsim::random_direction_marsaglia following G4RandomDirection
0487 -------------------------------------------------------------
0488
0489 * https://mathworld.wolfram.com/SpherePointPicking.html
0490
0491 ::
0492
0493                           v
0494                           |
0495               +---------.-|- -----------+
0496               |      .    |     .       |
0497               |   .       |          .  |
0498               |           |             |
0499               | .         |            .|
0500               |           |             |
0501           ----+-----------0-------------+---- u
0502               |.          |             |
0503               |           |            .|
0504               | .         |             |
0505               |           |          .  |
0506               |    .      |      .      |
0507               +--------.--|--.----------+
0508                           |
0509
0510 Marsaglia (1972) derived an elegant method that consists of picking u and v from independent
0511 uniform distributions on (-1,1) and rejecting points for which uu+vv >=1.
0512 So are picking points from within the (u,v) disc.
0513
0514 For those remaining random points on the 2D (u,v) disc the below (u,v) to (x,y,z)
0515 mapping is used to give a 3D position on the unit sphere,
0516
0517     x=2*u*sqrt(1-(uu+vv))
0518     y=2*v*sqrt(1-(uu+vv))
0519     z=1-2(uu+vv)
0520
0521 Checking normalization, it reduces to 1::
0522
0523    xx + yy + zz =
0524          4uu (1-(uu+vv))
0525          4vv (1-(uu+vv)) +
0526         1 -4(uu+vv) + 4(uu+vv)(uu+vv)
0527                 = 1
0528
0529 So that means the random 3D (x,y,z) points are on the unit sphere.
0530
0531 ::
0532
0533      g4-cls G4RandomDirection
0534
0535      58 // G.Marsaglia (1972) method
0536      59 inline G4ThreeVector G4RandomDirection()
0537      60 {
0538      61   G4double u, v, b;
0539      62   do {
0540      63     u = 2.*G4UniformRand() - 1.;
0541      64     v = 2.*G4UniformRand() - 1.;
0542      65     b = u*u + v*v;
0543      66   } while (b > 1.);
0544      67   G4double a = 2.*std::sqrt(1. - b);
0545      68   return G4ThreeVector(a*u, a*v, 2.*b - 1.);
0546      69 }
0547
0548 **/
0549
0550
0551 inline QSIM_METHOD void qsim::random_direction_marsaglia(float3* dir,  RNG& rng, sctx& ctx  )
0552 {
0553     // NB: no use of ctx.tagr so this has not been random aligned
0554     float u0, u1 ;
0555     float u, v, b, a  ;
0556     do
0557     {
0558         u0 = curand_uniform(&rng);
0559         u1 = curand_uniform(&rng);
0560         //if( idx == 0u ) printf("//qsim.random_direction_marsaglia pidx %7lld u0 %10.4f u1 %10.4f \n", ctx.pidx, u0, u1 );
0561         u = 2.f*u0 - 1.f ;
0562         v = 2.f*u1 - 1.f ;
0563         b = u*u + v*v ;
0564     }
0565     while( b > 1.f ) ;
0566
0567     a = 2.f*sqrtf( 1.f - b );
0568
0569     dir->x = a*u ;
0570     dir->y = a*v ;
0571     dir->z = 2.f*b - 1.f ;
0572 }
0573
0574
0575
0576 /**
0577 qsim::rayleigh_scatter
0578 ------------------------------
0579
0580 Following G4OpRayleigh::PostStepDoIt
0581
0582 * https://bugzilla-geant4.kek.jp/show_bug.cgi?id=207 Xin Qian patch
0583
0584
0585 Transverse wave nature means::
0586
0587    dot(p_direction, p_polarization)  = 0
0588    dot(direction,   polarization)  = 0
0589
0590 *constant* and normalized direction retains transversality thru the candidate scatter::
0591
0592     pol = p_pol + constant*dir
0593
0594     dot(pol, dir) = dot(p_pol, dir) + constant* dot(dir, dir)
0595                   = dot(p_pol, dir) + constant* 1.
0596                   = dot(p_pol, dir) - dot(p_pol, dir)
0597                   = 0.
0598
0599 **/
0600
0601 inline QSIM_METHOD void qsim::rayleigh_scatter(RNG& rng, sctx& ctx )
0602 {
0603     sphoton& p = ctx.p ;
0604     float3 direction ;
0605     float3 polarization ;
0606
0607     bool looping(true) ;
0608     do
0609     {
0610         float u0 = curand_uniform(&rng) ;
0611         float u1 = curand_uniform(&rng) ;
0612         float u2 = curand_uniform(&rng) ;
0613         float u3 = curand_uniform(&rng) ;
0614         float u4 = curand_uniform(&rng) ;
0615
0616 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
0617         stagr& tagr = ctx.tagr ;  // UNTESTED
0618         tagr.add(stag_sc, u0);
0619         tagr.add(stag_sc, u1);
0620         tagr.add(stag_sc, u2);
0621         tagr.add(stag_sc, u3);
0622         tagr.add(stag_sc, u4);
0623 #endif
0624         float cosTheta = u0 ;
0625         float sinTheta = sqrtf(1.0f-u0*u0);
0626         if(u1 < 0.5f ) cosTheta = -cosTheta ;
0627         // could use uniform_sphere here : but not doing so to follow G4OpRayleigh more closely
0628
0629         float sinPhi ;
0630         float cosPhi ;
0631
0632 #if defined(MOCK_CURAND ) || defined(MOCK_CUDA)
0633         //__sincosf(2.f*M_PIf*u2,&sinPhi,&cosPhi);   // apple extension
0634         float phi = 2.f*M_PIf*u2 ;
0635         sinPhi = sinf(phi);
0636         cosPhi = cosf(phi);
0637 #else
0638         sincosf(2.f*M_PIf*u2,&sinPhi,&cosPhi);
0639 #endif
0640
0641         direction.x = sinTheta * cosPhi;
0642         direction.y = sinTheta * sinPhi;
0643         direction.z = cosTheta ;
0644
0645         smath::rotateUz(direction, p.mom );
0646
0647         float constant = -dot(direction, p.pol );
0648
0649         polarization.x = p.pol.x + constant*direction.x ;
0650         polarization.y = p.pol.y + constant*direction.y ;
0651         polarization.z = p.pol.z + constant*direction.z ;
0652
0653         if(dot(polarization, polarization) == 0.f )
0654         {
0655
0656 #if defined( MOCK_CURAND ) || defined(MOCK_CUDA)
0657             //__sincosf(2.f*M_PIf*u3,&sinPhi,&cosPhi);
0658             phi = 2.f*M_PIf*u3 ;
0659             sinPhi = sinf(phi);
0660             cosPhi = cosf(phi);
0661 #else
0662             sincosf(2.f*M_PIf*u3,&sinPhi,&cosPhi);
0663 #endif
0664
0665             polarization.x = cosPhi ;
0666             polarization.y = sinPhi ;
0667             polarization.z = 0.f ;
0668
0669             smath::rotateUz(polarization, direction);
0670         }
0671         else
0672         {
0673             // There are two directions which are perpendicular
0674             // to the new momentum direction
0675             if(u3 < 0.5f) polarization = -polarization ;
0676         }
0677         polarization = normalize(polarization);
0678
0679         // simulate according to the distribution cos^2(theta)
0680         // where theta is the angle between old and new polarizations
0681         float doCosTheta = dot(polarization, p.pol ) ;
0682         float doCosTheta2 = doCosTheta*doCosTheta ;
0683         looping = doCosTheta2 < u4 ;
0684
0685     } while ( looping ) ;
0686
0687     p.mom = direction ;
0688     p.pol = polarization ;
0689 }
0690
0691
0692 /**
0693 qsim::propagate_to_boundary
0694 ------------------------------
0695
0696 +---------------------+------------------+---------------------------------------------------------+-------------------------------------------------------+
0697 | flag                |   command        |  changed                                                |  note                                                 |
0698 +=====================+==================+=========================================================+=======================================================+
0699 |   BULK_REEMIT       |   CONTINUE       |  time, position, direction, polarization, wavelength    | advance to reemit position with everything changed    |
0700 +---------------------+------------------+---------------------------------------------------------+-------------------------------------------------------+
0701 |   BULK_SCATTER      |   CONTINUE       |  time, position, direction, polarization                | advance to scatter position, new dir+pol              |
0702 +---------------------+------------------+---------------------------------------------------------+-------------------------------------------------------+
0703 |   BULK_ABSORB       |   BREAK          |  time, position                                         | advance to absorption position, dir+pol unchanged     |
0704 +---------------------+------------------+---------------------------------------------------------+-------------------------------------------------------+
0705 |   not set "SAIL"    |   BOUNDARY       |  time, position                                         | advanced to border position, dir+pol unchanged        |
0706 +---------------------+------------------+---------------------------------------------------------+-------------------------------------------------------+
0707
0708
0709 TODO:
0710    whilst in measurement iteration try changing the four "return"
0711    in the below into a single return: by setting command variable
0712    and returning that
0713
0714 **/
0715
0716
0717
0718 inline QSIM_METHOD int qsim::propagate_to_boundary(unsigned& flag, RNG& rng, sctx& ctx)
0719 {
0720     sphoton& p = ctx.p ;
0721     const sstate& s = ctx.s ;
0722
0723     const float& absorption_length = s.material1.y ;
0724     const float& scattering_length = s.material1.z ;
0725     const float& reemission_prob = s.material1.w ;
0726     const float& group_velocity = s.m1group2.x ;
0727     const float& distance_to_boundary = ctx.prd->q0.f.w ;
0728
0729
0730 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
0731     float u_to_sci = curand_uniform(&rng) ;  // purely for alignment with G4
0732     float u_to_bnd = curand_uniform(&rng) ;  // purely for alignment with G4
0733 #endif
0734     float u_scattering = curand_uniform(&rng) ;
0735     float u_absorption = curand_uniform(&rng) ;
0736
0737 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
0738     stagr& tagr = ctx.tagr ;
0739     tagr.add( stag_to_sci, u_to_sci);
0740     tagr.add( stag_to_bnd, u_to_bnd);
0741     tagr.add( stag_to_sca, u_scattering);
0742     tagr.add( stag_to_abs, u_absorption);
0743 #endif
0744
0745
0746 #if !defined(PRODUCTION) && defined(DEBUG_LOGF)
0747     // see notes/issues/U4LogTest_maybe_replacing_G4Log_G4UniformRand_in_Absorption_and_Scattering_with_float_version_will_avoid_deviations.rst
0748     float scattering_distance = -scattering_length*KLUDGE_FASTMATH_LOGF(u_scattering);
0749     float absorption_distance = -absorption_length*KLUDGE_FASTMATH_LOGF(u_absorption);
0750 #else
0751     float scattering_distance = -scattering_length*logf(u_scattering);
0752     float absorption_distance = -absorption_length*logf(u_absorption);
0753 #endif
0754
0755 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
0756
0757     if(ctx.pidx == base->pidx)
0758     {
0759     printf("//qsim.propagate_to_boundary.head pidx %7lld : u_absorption %10.8f logf(u_absorption) %10.8f absorption_length %10.4f absorption_distance %10.6f \n",
0760         ctx.pidx, u_absorption, logf(u_absorption), absorption_length, absorption_distance );
0761
0762     printf("//qsim.propagate_to_boundary.head pidx %7lld : post = np.array([%10.5f,%10.5f,%10.5f,%10.5f]) \n",
0763         ctx.pidx, p.pos.x, p.pos.y, p.pos.z, p.time );
0764
0765     printf("//qsim.propagate_to_boundary.head pidx %7lld : distance_to_boundary %10.4f absorption_distance %10.4f scattering_distance %10.4f \n",
0766              ctx.pidx, distance_to_boundary, absorption_distance, scattering_distance );
0767
0768     printf("//qsim.propagate_to_boundary.head pidx %7lld : u_scattering %10.4f u_absorption %10.4f \n",
0769              ctx.pidx, u_scattering, u_absorption  );
0770
0771     }
0772 #endif
0773
0774
0775
0776
0777
0778     if (absorption_distance <= scattering_distance)
0779     {
0780         if (absorption_distance <= distance_to_boundary)
0781         {
0782             p.time += absorption_distance/group_velocity ;
0783             p.pos  += absorption_distance*(p.mom) ;
0784
0785
0786 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
0787             float absorb_time_delta = absorption_distance/group_velocity ;
0788             if( ctx.pidx == base->pidx )
0789             {
0790             printf("//qsim.propagate_to_boundary.body.BULK_ABSORB pidx %7lld : post = np.array([%10.5f,%10.5f,%10.5f,%10.5f]) ; absorb_time_delta = %10.8f   \n",
0791                     ctx.pidx, p.pos.x, p.pos.y, p.pos.z, p.time, absorb_time_delta  );
0792
0793             }
0794 #endif
0795
0796             float u_reemit = reemission_prob == 0.f ? 2.f : curand_uniform(&rng);  // avoid consumption at absorption when not scintillator
0797
0798
0799 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
0800             if( u_reemit != 2.f ) tagr.add( stag_to_ree, u_reemit) ;
0801 #endif
0802
0803
0804             if (u_reemit < reemission_prob)
0805             {
0806                 float u_re_wavelength = curand_uniform(&rng);
0807                 float u_re_mom_ph = curand_uniform(&rng);
0808                 float u_re_mom_ct = curand_uniform(&rng);
0809                 float u_re_pol_ph = curand_uniform(&rng);
0810                 float u_re_pol_ct = curand_uniform(&rng);
0811
0812                 p.wavelength = scint->wavelength(u_re_wavelength);
0813                 p.mom = uniform_sphere(u_re_mom_ph, u_re_mom_ct);
0814                 p.pol = normalize(cross(uniform_sphere(u_re_pol_ph, u_re_pol_ct), p.mom));
0815
0816 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
0817                 tagr.add( stag_re_wl, u_re_wavelength);
0818                 tagr.add( stag_re_mom_ph, u_re_mom_ph);
0819                 tagr.add( stag_re_mom_ct, u_re_mom_ct);
0820                 tagr.add( stag_re_pol_ph, u_re_pol_ph);
0821                 tagr.add( stag_re_pol_ct, u_re_pol_ct);
0822 #endif
0823
0824                 flag = BULK_REEMIT ;
0825                 return CONTINUE;
0826             }
0827             else
0828             {
0829                 flag = BULK_ABSORB ;
0830                 return BREAK;
0831             }
0832         }
0833         //  otherwise sail to boundary
0834     }
0835     else
0836     {
0837         if (scattering_distance <= distance_to_boundary)
0838         {
0839             p.time += scattering_distance/group_velocity ;
0840             p.pos  += scattering_distance*(p.mom) ;
0841
0842             rayleigh_scatter(rng, ctx);  // changes dir and pol, consumes 5u at each turn of rejection sampling loop
0843
0844             flag = BULK_SCATTER;
0845
0846             return CONTINUE;
0847         }
0848           //  otherwise sail to boundary
0849     }     // if scattering_distance < absorption_distance
0850
0851
0852
0853     p.pos  += distance_to_boundary*(p.mom) ;
0854     p.time += distance_to_boundary/group_velocity   ;
0855
0856 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
0857     float sail_time_delta = distance_to_boundary/group_velocity ;
0858     if( ctx.pidx == base->pidx ) printf("//qsim.propagate_to_boundary.tail.SAIL pidx %7lld : post = np.array([%10.5f,%10.5f,%10.5f,%10.5f]) ;  sail_time_delta = %10.5f   \n",
0859           ctx.pidx, p.pos.x, p.pos.y, p.pos.z, p.time, sail_time_delta  );
0860 #endif
0861
0862     return BOUNDARY ;
0863 }
0864 #endif
0865 #if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA)
0866 /**
0867 qsim::propagate_at_boundary
0868 ------------------------------------------
0869
0870 This was brought over from oxrap/cu/propagate.h:propagate_at_boundary_geant4_style
0871 See env-/g4op-/G4OpBoundaryProcess.cc annotations to follow this
0872 and compare the Opticks and Geant4 implementations.
0873
0874 Input:
0875
0876 * p.direction
0877 * p.polarization
0878 * s.material1.x    : refractive index
0879 * s.material2.x    : refractive index
0880 * prd.normal
0881
0882 Consumes one random deciding between BOUNDARY_REFLECT and BOUNDARY_TRANSMIT
0883
0884 +---------------------+------------------+-------------------------------+----------+
0885 | output flag         |   command        |  changed                      |  note    |
0886 +=====================+==================+===============================+==========+
0887 |   BOUNDARY_REFLECT  |    -             |  direction, polarization      |          |
0888 +---------------------+------------------+-------------------------------+----------+
0889 |   BOUNDARY_TRANSMIT |    -             |  direction, polarization      |          |
0890 +---------------------+------------------+-------------------------------+----------+
0891
0892 Notes:
0893
0894 * when geometry and refractive indices dictates TIR there is no dependence on u_reflect and always get reflection
0895
0896
0897 ::
0898
0899                     s1
0900                   +----+
0901                    \   .   /      ^
0902               c1   i\  .  / r    /|\
0903                      \ . /        |
0904         material1     \./         | n
0905         ---------------+----------+----------
0906         material2      .\
0907                        . \
0908                   c2   .  \ t
0909                        .   \
0910                        +----+
0911                          s2
0912
0913 Snells law::
0914
0915      s1    n2
0916     --- = ---         s1 n1 = s2 n2         eta = n1/n2     s1 eta = s2
0917      s2    n1
0918
0919      s1.s1 = 1 - c1.c1   # trig identity
0920
0921      s2.s2 = 1 - c2.c2
0922
0923     s1 eta = s2          # snell
0924
0925     s1s1 eta eta = s2s2
0926
0927     ( 1.f - c1c1 ) eta eta = 1.f - c2c2
0928
0929      c2c2 = 1.f - eta eta ( 1.f - c1c1 )    # snell and trig identity
0930
0931 Because the electromagnetic wave is transverse, the field incident onto the
0932 interface can be decomposed into two polarization components, one P-polarized,
0933 i.e., with the electric field vector inside the plane of incidence, and the
0934 other one S-polarized, i.e., orthogonal to that plane.
0935
0936
0937 inconsistent normal definitions, c1 is expected to be +ve and normal needs to be oriented against initial direction
0938 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0939
0940 This is apparent from reflected direction vector::
0941
0942       *direction + 2.0f*c1*surface_normal
0943
0944 The standard normal vector at an intersection position on the surface of a shape
0945 is defined to be rigidly oriented outwards away from the shape.
0946 This definition is used by *fill_state* in order to determine properties
0947 of this material m1 and the next material m2 on the other side of the boundary.
0948
0949 The below math assumes that the photon direction is always against the normal
0950 such that the sign of c1 is +ve. Having -ve c1 leads to non-sensical -ve TranCoeff
0951 which results in always relecting.
0952
0953 So what about photons going in the other direction ?
0954 Surface normal is used in several places in the below so presumably must
0955 arrange to have an oriented normal that is flipped appropriately OR perhaps can change the math ?
0956
0957 In summary this is a case of inconsistent definitions of the normal,
0958 that will need to be oriented ~half the time.
0959
0960 TODO: try avoiding "float3 oriented_normal" instead just use "bool orient"
0961       and multiply prd.normal by 1.f or -1.f depending on orient at every use
0962
0963
0964 random aligned matching with examples/Geant/BoundaryStandalone
0965 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0966
0967 * S/P/"X"-polarized + TIR + normal_incidence all now matching
0968 * noted a 1-in-a-million deviant from TransCoeff cut edge float/double for S and P
0969
0970 * initially had two more deviants at very close to normal incidence that were aligned by changing
0971   the criteria to match Geant4 "sint1 == 0." better::
0972
0973     //const bool normal_incidence = fabs(c1) > 0.999999f ;
0974     const bool normal_incidence = fabs(c1) == 1.f ;
0975
0976 * see notes/issues/QSimTest_propagate_at_boundary_vs_BoundaryStandalone_G4OpBoundaryProcessTest.rst
0977
0978
0979
0980 **Normal Incidence Special Case**
0981
0982 Judging normal_incidence based on absolete dot product being exactly unity "c1 == 1.f" is problematic
0983 as when very near to normal incidence there are vectors for which the absolute dot product
0984 is not quite 1.f but the cross product does give an exactly zero vector which gives
0985 A_trans (nan, nan, nan) from the normalize doing : (zero,zero,zero)/zero.
0986
0987 Solution is to judge normal incidence based on trans_length as that is what the
0988 calculation actually needs to be non-zero in order to be able to normalize trans to give A_trans.
0989
0990 However using "bool normal_incidence = trans_length == 0.f" also problematic
0991 as it means would be using very small trans vectors to define A_trans and this
0992 would cause a difference with double precision Geant4 and float precision Opticks.
0993 So try using a cutoff "trans_length < 1e-6f" below which to special case normal
0994 incidence.
0995
0996 **/
0997
0998 inline QSIM_METHOD int qsim::propagate_at_boundary(unsigned& flag, RNG& rng, sctx& ctx, float theTransmittance ) const
0999 {
1000 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1001     if(ctx.pidx == base->pidx)
1002     printf("//propagate_at_boundary.DEBUG_PIDX ctx.pidx %7lld base %p base.pidx %7lld \n", ctx.pidx, base, base->pidx  );
1003 #endif
1004
1005 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
1006     if(ctx.pidx == base->pidx)
1007     printf("//propagate_at_boundary.DEBUG_TAG ctx.pidx %7lld base %p base.pidx %7lld \n", ctx.pidx, base, base->pidx  );
1008 #endif
1009     // stray "return 0;" left here 2024-12-14 caused : ~/j/issues/jok-tds-missing-BR-BT-on-A-side.rst
1010
1011     sphoton& p = ctx.p ;
1012     const sstate& s = ctx.s ;
1013
1014     const float& n1 = s.material1.x ;
1015     const float& n2 = s.material2.x ;
1016     const float eta = n1/n2 ;
1017
1018     const float3* normal = (float3*)&ctx.prd->q0.f.x ;     // geometrical outwards normal
1019
1020     const float _c1 = -dot(p.mom, *normal );                // _c1 : cos(angle_of_incidence) not yet oriented
1021     const float3 oriented_normal = _c1 < 0.f ? -(*normal) : (*normal) ; // oriented against incident p.mom
1022     const float3 trans = cross(p.mom, oriented_normal) ;   // perpendicular to plane of incidence, S-pol direction
1023     const float trans_length = length(trans) ;             // same as sin(theta), as p.mom and oriented_normal are unit vectors
1024     const bool normal_incidence = trans_length < 1e-6f  ;  // p.mom parallel/anti-parallel to oriented_normal
1025     const float3 A_trans = normal_incidence ? p.pol : trans/trans_length ; // normalized unit vector : perpendicular to plane of incidence
1026     const float E1_perp = dot(p.pol, A_trans);     // amplitude of polarization in direction perpendicular to plane of incidence, ie S polarization
1027
1028     const float c1 = fabs(_c1) ;
1029
1030 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1031     if(ctx.pidx == base->pidx)
1032     {
1033     printf("//qsim.propagate_at_boundary.head pidx %7lld : theTransmittance = %10.8f \n", ctx.pidx, theTransmittance  );
1034     printf("//qsim.propagate_at_boundary.head pidx %7lld : nrm = np.array([%10.8f,%10.8f,%10.8f]) ; lnrm = %10.8f  \n",
1035          ctx.pidx, oriented_normal.x, oriented_normal.y, oriented_normal.z, length(oriented_normal) );
1036     printf("//qsim.propagate_at_boundary.head pidx %7lld : pos = np.array([%10.5f,%10.5f,%10.5f]) ; lpos = %10.8f \n",
1037          ctx.pidx, p.pos.x, p.pos.y, p.pos.z, length(p.pos) );
1038     printf("//qsim.propagate_at_boundary.head pidx %7lld : mom0 = np.array([%10.8f,%10.8f,%10.8f]) ; lmom0 = %10.8f \n",
1039          ctx.pidx, p.mom.x, p.mom.y, p.mom.z, length(p.mom)  );
1040     printf("//qsim.propagate_at_boundary.head pidx %7lld : pol0 = np.array([%10.8f,%10.8f,%10.8f]) ; lpol0 = %10.8f \n",
1041           ctx.pidx, p.pol.x, p.pol.y, p.pol.z, length(p.pol)  );
1042     printf("//qsim.propagate_at_boundary.head pidx %7lld : n1,n2,eta = (%10.8f,%10.8f,%10.8f) \n", ctx.pidx, n1, n2, eta );
1043     printf("//qsim.propagate_at_boundary.head pidx %7lld : c1 = %10.8f ; normal_incidence = %d \n", ctx.pidx, c1, normal_incidence );
1044     }
1045 #endif
1046
1047     const float c2c2 = 1.f - eta*eta*(1.f - c1 * c1 ) ;   // Snells law and trig identity
1048     bool tir = c2c2 < 0.f ;
1049     const float EdotN = dot(p.pol, oriented_normal ) ;  // used for TIR polarization
1050     const float c2 = tir ? 0.f : sqrtf(c2c2) ;   // c2 chosen +ve, set to 0.f for TIR => reflection_coefficient = 1.0f : so will always reflect
1051     const float n1c1 = n1*c1 ;
1052     const float n2c2 = n2*c2 ;
1053     const float n2c1 = n2*c1 ;
1054     const float n1c2 = n1*c2 ;
1055
1056     const float2 E1   = normal_incidence ? make_float2( 0.f, 1.f) : make_float2( E1_perp , length( p.pol - (E1_perp*A_trans) ) );
1057     const float2 E2_t = make_float2(  2.f*n1c1*E1.x/(n1c1+n2c2), 2.f*n1c1*E1.y/(n2c1+n1c2) ) ;  // ( S:perp, P:parl )
1058     const float2 E2_r = make_float2( E2_t.x - E1.x             , (n2*E2_t.y/n1) - E1.y     ) ;  // ( S:perp, P:parl )
1059     const float2 RR = normalize(E2_r) ;
1060     const float2 TT = normalize(E2_t) ;
1061     const float TransCoeff = theTransmittance >= 0.f ?
1062                                                            theTransmittance
1063                                                      :
1064                                                            ( tir || n1c1 == 0.f ? 0.f : n2c2*dot(E2_t,E2_t)/n1c1 )
1065                                                      ;
1066
1067     /*
1068     E1, E2_t, E2_t: incident, transmitted and reflected amplitudes in S and P directions
1069     RR, TT: normalized
1070     */
1071
1072 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1073     if(ctx.pidx == base->pidx)
1074     {
1075     printf("//qsim.propagate_at_boundary.body pidx %7lld : TransCoeff = %10.8f ; n1c1 = %10.8f ; n2c2 = %10.8f \n",
1076             ctx.pidx, TransCoeff, n1c1, n2c2 );
1077
1078     printf("//qsim.propagate_at_boundary.body pidx %7lld : E2_t = np.array([%10.8f,%10.8f]) ; lE2_t = %10.8f \n",
1079             ctx.pidx,  E2_t.x, E2_t.y, length(E2_t) );
1080
1081     printf("//qsim.propagate_at_boundary.body pidx %7lld : A_trans = np.array([%10.8f,%10.8f,%10.8f]) ; lA_trans = %10.8f \n",
1082             ctx.pidx,  A_trans.x, A_trans.y, A_trans.z, length(A_trans) );
1083
1084     }
1085 #endif
1086
1087
1088 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
1089     const float u_boundary_burn = curand_uniform(&rng) ;  // needed for random consumption alignment with Geant4 G4OpBoundaryProcess::PostStepDoIt
1090 #endif
1091     const float u_reflect = curand_uniform(&rng) ;
1092     bool reflect = u_reflect > TransCoeff  ;
1093
1094 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
1095     stagr& tagr = ctx.tagr ;
1096     tagr.add( stag_at_burn_sf_sd, u_boundary_burn);
1097     tagr.add( stag_at_ref,  u_reflect);
1098 #endif
1099
1100 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1101     if(ctx.pidx == base->pidx)
1102     {
1103     printf("//qsim.propagate_at_boundary.body pidx %7lld : u_reflect %10.4f TransCoeff %10.4f reflect %d \n",
1104               ctx.pidx,  u_reflect, TransCoeff, reflect   );
1105
1106     printf("//qsim.propagate_at_boundary.body pidx %7lld : mom0 = np.array([%10.8f,%10.8f,%10.8f]) ; lmom0 = %10.8f \n",
1107                ctx.pidx, p.mom.x, p.mom.y, p.mom.z, length(p.mom) ) ;
1108
1109     printf("//qsim.propagate_at_boundary.body pidx %7lld : pos = np.array([%10.5f,%10.5f,%10.5f]) ; lpos = %10.8f \n",
1110                ctx.pidx, p.pos.x, p.pos.y, p.pos.z, length(p.pos)  );
1111
1112     printf("//qsim.propagate_at_boundary.body pidx %7lld : nrm = np.array([%10.8f,%10.8f,%10.8f]) ; lnrm = %10.8f \n",
1113                ctx.pidx, oriented_normal.x, oriented_normal.y, oriented_normal.z, length(oriented_normal) ) ;
1114
1115     printf("//qsim.propagate_at_boundary.body pidx %7lld : n1 = %10.8f ; n2 = %10.8f ; eta = %10.8f  \n",
1116                ctx.pidx, n1, n2, eta );
1117
1118     printf("//qsim.propagate_at_boundary.body pidx %7lld : c1 = %10.8f ; eta_c1 = %10.8f ; c2 = %10.8f ; eta_c1__c2 = %10.8f \n",
1119                ctx.pidx, c1, eta*c1, c2, (eta*c1 - c2) );
1120
1121     }
1122 #endif
1123
1124     p.mom = reflect
1125                     ?
1126                        p.mom + 2.0f*c1*oriented_normal
1127                     :
1128                        eta*(p.mom) + (eta*c1 - c2)*oriented_normal
1129                     ;
1130
1131
1132     // Q: Does the new p.mom need to be normalized ?
1133     // A: NO, it is inherently normalized as derived in the comment below
1134
1135
1136     const float3 A_paral = normalize(cross(p.mom, A_trans));   // new P-pol direction
1137
1138     p.pol =  normal_incidence ?
1139                                          ( reflect ?  p.pol*(n2>n1? -1.f:1.f) : p.pol )
1140                                       :
1141                                          ( reflect ?
1142                                                    ( tir ?  -p.pol + 2.f*EdotN*oriented_normal : RR.x*A_trans + RR.y*A_paral )
1143
1144                                                    :
1145                                                        TT.x*A_trans + TT.y*A_paral
1146
1147                                                    )
1148                                       ;
1149
1150
1151
1152      // Q: Above expression kinda implies A_trans and A_paral are same for reflect and transmit ?
1153      // A: NO IT DOESNT,
1154      //    A_trans is the same (except for normal incidence) as there is only one perpendicular
1155      //    to the plane of incidence which is the same for i,r,t.
1156      //
1157      //    A_paral depends on the new p.mom (is has to be orthogonal to p.mom and A_trans)
1158      //    and p.mom of course is different for r and t
1159      //    (the reflect bool is used in multiple places, not just here)
1160
1161
1162
1163 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1164     if(ctx.pidx == base->pidx)
1165     {
1166     printf("//qsim.propagate_at_boundary.tail pidx %7lld : reflect %d tir %d TransCoeff %10.4f u_reflect %10.4f \n", ctx.pidx, reflect, tir, TransCoeff, u_reflect );
1167     printf("//qsim.propagate_at_boundary.tail pidx %7lld : mom1 = np.array([%10.8f,%10.8f,%10.8f]) ; lmom1 = %10.8f  \n",
1168         ctx.pidx, p.mom.x, p.mom.y, p.mom.z, length(p.mom) );
1169     printf("//qsim.propagate_at_boundary.tail pidx %7lld : pol1 = np.array([%10.8f,%10.8f,%10.8f]) ; lpol1 = %10.8f \n",
1170         ctx.pidx, p.pol.x, p.pol.y, p.pol.z, length(p.pol) );
1171     }
1172
1173     /*
1174     if(ctx.pidx == 251959)
1175     {
1176         printf("//qsim.propagate_at_boundary RR.x %10.4f A_trans (%10.4f %10.4f %10.4f )  RR.y %10.4f  A_paral (%10.4f %10.4f %10.4f ) \n",
1177               RR.x, A_trans.x, A_trans.y, A_trans.z,
1178               RR.y, A_paral.x, A_paral.y, A_paral.z );
1179
1180         printf("//qsim.propagate_at_boundary reflect %d  tir %d polarization (%10.4f, %10.4f, %10.4f) \n", reflect, tir, p.pol.x, p.pol.y, p.pol.z );
1181     }
1182     */
1183 #endif
1184
1185     flag = reflect ? BOUNDARY_REFLECT : BOUNDARY_TRANSMIT ;
1186
1187
1188 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
1189     if( flag ==  BOUNDARY_REFLECT )
1190     {
1191         const float u_br_align_0 = curand_uniform(&rng) ;
1192         const float u_br_align_1 = curand_uniform(&rng) ;
1193         const float u_br_align_2 = curand_uniform(&rng) ;
1194         const float u_br_align_3 = curand_uniform(&rng) ;
1195
1196         // switched below standard tags from stag_br_align_0/1/2/3 to simplify A:tag to B:stack mapping
1197         tagr.add( stag_to_sci, u_br_align_0 );
1198         tagr.add( stag_to_bnd, u_br_align_1 );
1199         tagr.add( stag_to_sca, u_br_align_2 );
1200         tagr.add( stag_to_abs, u_br_align_3 );
1201     }
1202 #endif
1203
1204     return CONTINUE ;
1205 }
1206 /**
1207 qsim::propagate_at_boundary_with_T
1208 ------------------------------------
1209
1210 Variant of qsim::propagate_at_boundary where a value of theTransmittance
1211 is passed in as an argument (eg from CustomART calculation) rather then
1212 calculated here.
1213
1214 HMM: was hoping that passing in theTransmittance would afford some
1215 simplifications, but it seems almost no simplification is possible
1216 as need to calculate everything anyhow for the polarization.
1217
1218 Given that this is almost exactly the same as qsim::propagate_at_boundary
1219 and no simplification is afforded, might as well just keep using that
1220 and leave this just for testing.
1221
1222 **/
1223
1224 inline QSIM_METHOD int qsim::propagate_at_boundary_with_T(unsigned& flag, RNG& rng, sctx& ctx, float theTransmittance ) const
1225 {
1226     sphoton& p = ctx.p ;
1227     const sstate& s = ctx.s ;
1228
1229     const float& n1 = s.material1.x ;
1230     const float& n2 = s.material2.x ;
1231     const float eta = n1/n2 ;
1232
1233     const float3* normal = (float3*)&ctx.prd->q0.f.x ;     // geometrical outwards normal
1234
1235     const float _c1 = -dot(p.mom, *normal );                // _c1 : cos(angle_of_incidence) not yet oriented
1236     const float3 oriented_normal = _c1 < 0.f ? -(*normal) : (*normal) ; // oriented against incident p.mom
1237     const float3 trans = cross(p.mom, oriented_normal) ;   // perpendicular to plane of incidence, S-pol direction
1238     const float trans_length = length(trans) ;             // same as sin(theta), as p.mom and oriented_normal are unit vectors
1239     const bool normal_incidence = trans_length < 1e-6f  ;  // p.mom parallel/anti-parallel to oriented_normal
1240     const float3 A_trans = normal_incidence ? p.pol : trans/trans_length ; // normalized unit vector : perpendicular to plane of incidence
1241     const float E1_perp = dot(p.pol, A_trans);     // amplitude of polarization in direction perpendicular to plane of incidence, ie S polarization
1242
1243     const float c1 = fabs(_c1) ;
1244
1245     const float c2c2 = 1.f - eta*eta*(1.f - c1 * c1 ) ;   // Snells law and trig identity
1246     bool tir = c2c2 < 0.f ;
1247     const float EdotN = dot(p.pol, oriented_normal ) ;  // used for TIR polarization
1248     const float c2 = tir ? 0.f : sqrtf(c2c2) ;   // c2 chosen +ve, set to 0.f for TIR => reflection_coefficient = 1.0f : so will always reflect
1249     const float n1c1 = n1*c1 ;
1250     const float n2c2 = n2*c2 ;
1251     const float n2c1 = n2*c1 ;
1252     const float n1c2 = n1*c2 ;
1253
1254     const float2 E1   = normal_incidence ? make_float2( 0.f, 1.f) : make_float2( E1_perp , length( p.pol - (E1_perp*A_trans) ) );
1255     const float2 E2_t = make_float2(  2.f*n1c1*E1.x/(n1c1+n2c2), 2.f*n1c1*E1.y/(n2c1+n1c2) ) ;  // ( S:perp, P:parl )
1256     const float2 E2_r = make_float2( E2_t.x - E1.x             , (n2*E2_t.y/n1) - E1.y     ) ;  // ( S:perp, P:parl )
1257     const float2 RR = normalize(E2_r) ;
1258     const float2 TT = normalize(E2_t) ;
1259
1260
1261 /*
1262     const float TransCoeff = theTransmittance >= 0.f ?
1263                                                            theTransmittance
1264                                                      :
1265                                                            ( tir || n1c1 == 0.f ? 0.f : n2c2*dot(E2_t,E2_t)/n1c1 )
1266                                                      ;
1267 */
1268
1269     const float& TransCoeff = theTransmittance ;
1270
1271     const float u_reflect = curand_uniform(&rng) ;
1272     bool reflect = u_reflect > TransCoeff  ;
1273
1274     p.mom = reflect
1275                     ?
1276                        p.mom + 2.0f*c1*oriented_normal
1277                     :
1278                        eta*(p.mom) + (eta*c1 - c2)*oriented_normal
1279                     ;
1280
1281
1282     const float3 A_paral = normalize(cross(p.mom, A_trans));   // new P-pol direction
1283
1284     p.pol =  normal_incidence ?
1285                                          ( reflect ?  p.pol*(n2>n1? -1.f:1.f) : p.pol )
1286                                       :
1287                                          ( reflect ?
1288                                                    ( tir ?  -p.pol + 2.f*EdotN*oriented_normal : RR.x*A_trans + RR.y*A_paral )
1289
1290                                                    :
1291                                                        TT.x*A_trans + TT.y*A_paral
1292
1293                                                    )
1294                                       ;
1295
1296     flag = reflect ? BOUNDARY_REFLECT : BOUNDARY_TRANSMIT ;
1297
1298
1299     return CONTINUE ;
1300 }
1301
1302 #endif
1303
1304
1305
1306
1307
1308
1309
1310
1311 /**
1312 Reflected momentum vector
1313 ---------------------------
1314
1315 ::
1316
1317      p.mom(new) = p.mom(old) + 2.f*c1*oriented_normal
1318
1319
1320      PdotN = dot(p.mom(old), oriented_normal) = -c1
1321
1322      "c1" is +ve, above dot product -ve as incident vector is against the normal
1323
1324      p.mom(new) = p.mom(old) -2.f dot(p.mom(old), oriented_normal) * oriented_normal
1325
1326      NewMomentum = OldMomentum - 2*PdotN*oriented_normal
1327
1328
1329
1330      OA = oriented_normal
1331
1332      IO = OC = p.mom (old)
1333           OR = p.mom (new)
1334
1335           OR = OC + CD + DR  = OC + 2.f*DR
1336           CD = DR = c1*oriented_normal
1337
1338
1339                   s1   s1
1340                 I----A----R
1341                 .\   .   /.
1342             c1  . \  .  / .  c1
1343                 .  \ . /  .
1344                 .   \./   .
1345        ---------+----O----D------
1346                 :    :.   :
1347                 :    : .  :  c1
1348            c1   :    :  . :
1349                 :    :   .:
1350                 +....B....C
1351                  s1    s1
1352
1353
1354 * NB that is inherently normalized, as demonstrated in the below reference
1355
1356
1357 Transmitted momentum vector
1358 -------------------------------
1359
1360 * https://graphics.stanford.edu/courses/cs148-10-summer/docs/2006--degreve--reflection_refraction.pdf
1361 * ~/opticks_refs/bram_de_greve_reflection_refraction_in_ray_tracing.pdf
1362
1363
1364 ::
1365
1366      eta = n1/n2
1367
1368      p.mom(new) =  eta*p.mom(old) + (eta*c1 - c2)*oriented_normal
1369
1370
1371 Consider the perp and para components of the incident and transmitted mom vectors::
1372
1373     i = i_perp + i_para
1374     t = t_perp + t_para
1375
1376     i_perp = ( i.n ) n      (component of i in normal direction)
1377     t_perp = ( t.n ) n      (component of t in normal direction )
1378
1379     i_para = i - (i.n) n
1380     t_para = t - (t.n) n
1381
1382     # HMM caution with backwards oriented normal direction convention
1383     #     and dot product signs
1384
1385
1386 Snell::
1387
1388     n1 s1 = n2 s2
1389
1390     eta s1 = s2      (eta = n1/n2)
1391
1392     s2 = eta s1
1393
1394     |i_para| = s1
1395     |t_para| = s2
1396
1397     t_para  = eta i_para     (as they point in same direction)
1398
1399     t_para  = eta ( i - (i.n) n )
1400             = eta ( i + c1 n )
1401
1402     t_perp  = - sqrt( 1-|t_para|^2 ) n     (-ve as opposite direction to oriented normal)
1403             = - c2 n
1404
1405     t   = eta i + ( eta c1 - c2 ) n
1406
1407
1408 Now is that inherently normalized ? YES
1409
1410    t.t =  eta*eta*i.i  + (eta c1 - c2 )*(eta c1 - c2 )*n.n + 2 eta (eta c1 - c2) i.n
1411
1412          (i.i = 1, n.n = 1, i.n = -c1 )
1413
1414        = eta*eta + (eta c1 - c2)*(eta c1 - c2) + 2 eta(eta c1 - c2) (-c1 )
1415
1416        = eta eta  +  eta eta c1 c1 + c2 c2 - 2 eta c1 c2  - 2 eta eta c1 c1 + 2 eta c2 c1
1417                                            --------------                    --------------
1418        =   eta eta  ( 1 - c1 c1 ) + c2 c2
1419
1420        = 1.f      ( form above   c2c2 = 1.f - eta eta ( 1.f - c1c1 ) )    # snell and trig identity
1421
1422
1423
1424 Compare transmitted vector with G4OpBoundaryProcess::DielectricDielectric
1425 ----------------------------------------------------------------------------
1426
1427 ::
1428
1429     1226                 theStatus = FresnelRefraction;
1430     1227
1431     1228                 if (sint1 > 0.0) {      // incident ray oblique
1432     1229
1433     1230                    alpha = cost1 - cost2*(Rindex2/Rindex1);
1434     1231                    NewMomentum = OldMomentum + alpha*theFacetNormal;
1435     1232                    NewMomentum = NewMomentum.unit();
1436
1437
1438     t = eta i  + (eta c1 - c2 ) n      eta = n1/n2
1439     t/eta = i + (c1 - c2/eta ) n
1440
1441     Because Geant4 normalizes NewMomentum it gets away with
1442     playing fast and loose with factors of 1/eta
1443
1444
1445 **/
1446
1447
1448
1449
1450
1451
1452
1453 /*
1454 G4OpBoundaryProcess::DielectricDielectric
1455
1456
1457 1152               if (sint1 > 0.0) {
1458 1153                  A_trans = OldMomentum.cross(theFacetNormal);
1459 1154                  A_trans = A_trans.unit();
1460 1155                  E1_perp = OldPolarization * A_trans;
1461 1156                  E1pp    = E1_perp * A_trans;
1462 1157                  E1pl    = OldPolarization - E1pp;
1463 1158                  E1_parl = E1pl.mag();
1464 1159               }
1465 1160               else {
1466 1161                  A_trans  = OldPolarization;
1467 1162                  // Here we Follow Jackson's conventions and we set the
1468 1163                  // parallel component = 1 in case of a ray perpendicular
1469 1164                  // to the surface
1470 1165                  E1_perp  = 0.0;
1471 1166                  E1_parl  = 1.0;
1472 1167               }
1473 1168
1474 1169               s1 = Rindex1*cost1;
1475 1170               E2_perp = 2.*s1*E1_perp/(Rindex1*cost1+Rindex2*cost2);
1476 1171               E2_parl = 2.*s1*E1_parl/(Rindex2*cost1+Rindex1*cost2);
1477 1172               E2_total = E2_perp*E2_perp + E2_parl*E2_parl;
1478 1173               s2 = Rindex2*cost2*E2_total;
1479 1174
1480 1175               if (theTransmittance > 0) TransCoeff = theTransmittance;
1481 1176               else if (cost1 != 0.0) TransCoeff = s2/s1;
1482 1177               else TransCoeff = 0.0;
1483
1484
1485 reflect
1486
1487 1217
1488 1218                        E2_parl   = Rindex2*E2_parl/Rindex1 - E1_parl;
1489 1219                        E2_perp   = E2_perp - E1_perp;
1490 1220                        E2_total  = E2_perp*E2_perp + E2_parl*E2_parl;
1491 1221                        A_paral   = NewMomentum.cross(A_trans);
1492 1222                        A_paral   = A_paral.unit();
1493 1223                        E2_abs    = std::sqrt(E2_total);
1494 1224                        C_parl    = E2_parl/E2_abs;
1495 1225                        C_perp    = E2_perp/E2_abs;
1496 1226
1497 1227                        NewPolarization = C_parl*A_paral + C_perp*A_trans;
1498 1228
1499
1500 transmit
1501
1502 1253                    alpha = cost1 - cost2*(Rindex2/Rindex1);
1503 1254                    NewMomentum = OldMomentum + alpha*theFacetNormal;
1504 1255                    NewMomentum = NewMomentum.unit();
1505 1256 //                   PdotN = -cost2;
1506 1257                    A_paral = NewMomentum.cross(A_trans);
1507 1258                    A_paral = A_paral.unit();
1508
1509 1259                    E2_abs     = std::sqrt(E2_total);
1510 1260                    C_parl     = E2_parl/E2_abs;
1511 1261                    C_perp     = E2_perp/E2_abs;
1512 1262
1513 1263                    NewPolarization = C_parl*A_paral + C_perp*A_trans;
1514
1515 */
1516
1517
1518 /*
1519 qsim::propagate_at_surface_MultiFilm
1520 -------------------------------
1521
1522 based on https://juno.ihep.ac.cn/trac/browser/offline/trunk/Simulation/DetSimV2/PMTSim/src/junoPMTOpticalModel.cc
1523 which follows a flawed approach to polarization
1524
1525 Rs: s-component reflect probability
1526 Ts: s-component transmit probability
1527 Rp: p-component reflect probability
1528 Tp: p-component reflect probability
1529
1530 TODO:
1531    access the qe
1532    access the multifilm catagory
1533    optical photon polarization
1534
1535 */
1536
1537 #if defined(__CUDACC__) || defined(__CUDABE__)
1538 inline QSIM_METHOD int qsim::propagate_at_surface_MultiFilm(unsigned& flag, RNG& rng, sctx& ctx )
1539 {
1540
1541     const float one = 1.0f;
1542     const sphoton& p = ctx.p ;
1543     const float3* normal = (float3*)&ctx.prd->q0.f.x ;
1544     int   lpmtid = ctx.prd->identity() - 1 ;  // identity comes from optixInstance.instanceId where 0 means not-a-sensor
1545
1546     float minus_cos_theta = dot(p.mom, *normal);
1547     int pmtcat = pmt->get_lpmtcat_from_lpmtid(lpmtid);
1548     float wv_nm = p.wavelength;
1549
1550     float4 RsTsRpTp = multifilm->lookup(pmtcat, wv_nm, minus_cos_theta);
1551
1552
1553     const float c1 = fabs(minus_cos_theta) ;
1554     const float s1 = sqrtf(one -c1*c1);
1555
1556     float EsEs = s1 > 0.f ? dot(p.pol, cross( p.mom, *normal))/s1 : 0.f ;
1557     EsEs *= EsEs;   //   orienting normal doesnt matter as squared : this is S_vs_P power fraction
1558
1559
1560     float3 ART ;
1561     ART.z = RsTsRpTp.y*EsEs + RsTsRpTp.w*(one - EsEs);
1562     ART.y = RsTsRpTp.x*EsEs + RsTsRpTp.z*(one - EsEs);
1563     ART.x = one - (ART.y+ART.z);
1564
1565     const float& A = ART.x ;
1566     const float& T = ART.z ;
1567
1568
1569     float4 RsTsRpTpNormal = multifilm->lookup(pmtcat, wv_nm, -one );
1570     // Normal means the photon incident from glass to vacuum, AOI = 0 deg  cos_theta = -1.f
1571
1572     float3 ART_normal;
1573     ART_normal.z = 0.5f*(RsTsRpTpNormal.y + RsTsRpTpNormal.w); // T:0.5f*(Ts+Tp)
1574     ART_normal.y = 0.5f*(RsTsRpTpNormal.x + RsTsRpTpNormal.z); // R:0.5f*(Rs+Rp)
1575     ART_normal.x = one -(ART_normal.y + ART_normal.z) ;        // 1.f - (R+T)
1576
1577     const float& An = ART_normal.x ;
1578     const float energy_eV = qpmt<float>::hc_eVnm/wv_nm ;
1579     const float qe_scale = pmt->get_qescale_from_lpmtid(lpmtid);
1580     const float qe_shape = pmt->get_lpmtcat_qe(pmtcat, energy_eV);
1581
1582     const float _qe = minus_cos_theta > 0.f ? 0.f : qe_scale * qe_shape;
1583
1584     const float& theAbsorption = A;
1585     const float& theTransmittance = T/(one-A);
1586     const float& theEfficiency = _qe/An;
1587
1588     float u_theAbsorption = curand_uniform(&rng);
1589     int action = u_theAbsorption < theAbsorption  ? BREAK : CONTINUE ;
1590
1591     if( action == BREAK )
1592     {
1593         float u_theEfficiency = curand_uniform(&rng) ;
1594         flag = u_theEfficiency < theEfficiency ? SURFACE_DETECT : SURFACE_ABSORB ;
1595     }
1596     else
1597     {
1598         propagate_at_boundary( flag, rng, ctx, theTransmittance  );
1599     }
1600
1601     //printf("//qsim.propagate_at_surface_MultiFilm pidx %7lld lpmtid %d ART ( %7.3f %7.3f %7.3f ) u_theAbsorption  %7.3f action %d \n",
1602     //ctx.pidx, lpmtid, ART.x, ART.y, ART.z, u_theAbsorption, action);
1603
1604     return action ;
1605
1606 }
1607
1608 #endif
1609
1610
1611 #if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA)
1612
1613 /**
1614 qsim::propagate_at_surface   (HMM: perhaps propagate_at_simplified_surface )
1615 -------------------------------------------------------------------------------
1616
1617 +---------------------+------------------+---------------------------------------------------------+--------------+
1618 | output flag         |   command        |  changed                                                |  note        |
1619 +=====================+==================+=========================================================+==============+
1620 |   SURFACE_ABSORB    |    BREAK         |                                                         |              |
1621 +---------------------+------------------+---------------------------------------------------------+--------------+
1622 |   SURFACE_DETECT    |    BREAK         |                                                         |              |
1623 +---------------------+------------------+---------------------------------------------------------+--------------+
1624 |   SURFACE_DREFLECT  |    CONTINUE      |   direction, polarization                               |              |
1625 +---------------------+------------------+---------------------------------------------------------+--------------+
1626 |   SURFACE_SREFLECT  |    CONTINUE      |   direction, polarization                               |              |
1627 +---------------------+------------------+---------------------------------------------------------+--------------+
1628
1629 Action depens on s.surface float4 params::
1630
1631 +---------------+---------------------+----------------------------------+
1632 | s.surface.x   | detect fraction     |                                  |
1633 +---------------+---------------------+----------------------------------+
1634 | s.surface.y   | absorb fraction     |                                  |
1635 +---------------+---------------------+----------------------------------+
1636 | s.surface.z   | reflect specular    | NOT USED AS ALL FOUR SUM TO 1.   |
1637 +---------------+---------------------+----------------------------------+
1638 | s.surface.w   | reflect diffuse     |                                  |
1639 +---------------+---------------------+----------------------------------+
1640
1641 Excluding technical alignment burns a single u_surface random is throw
1642 to decide on what happens at the surface according to the float4 probabilities
1643 that are assumed to sum to unity::
1644
1645    +-------------------+-------------------+---------------------+------------------------+
1646    |                   |                   |                     |                        |
1647    |  absorb           |  detect           |  reflect_diffuse_   |  "reflect_specular_"   |
1648    0--SURFACE_ABSORB---|--SURFACE_DETECT---|--SURFACE_DREFLECT---|--SURFACE_SREFLECT------1
1649    |  s.surface.y      |  s.surface.x      |  s.surface.w        |  s.surface.z           |
1650    |                   |                   |                     |                        |
1651    +-------------------+-------------------+---------------------+------------------------+
1652
1653 Refs::
1654
1655     GSurfaceLib::propertyName
1656     GSurfaceLib::createStandardSurface
1657
1658 Different types of surface are modelled by changing the four values,
1659 in such a way that they always sum to unity::
1660
1661     In [20]: t.oldsur.shape
1662     Out[20]: (46, 2, 761, 4)
1663
1664     In [21]: t.oldsur[:,0,:].shape  ## pick payload group 0
1665     Out[21]: (46, 761, 4)
1666
1667     assert np.all( np.sum(t.oldsur[:,0,:], axis=2) == 1. )
1668
1669 The s.surface float4 is filled by qbnd::fill_state via::
1670
1671     const int su_line = cosTheta > 0.f ? line + ISUR : line + OSUR ;
1672     s.surface = boundary_lookup(wavelength,su_line,0) ;
1673     s.optical = optical[su_line].u ;
1674
1675 **/
1676
1677 inline QSIM_METHOD int qsim::propagate_at_surface(unsigned& flag, RNG& rng, sctx& ctx)
1678 {
1679     const sstate& s = ctx.s ;
1680     const float& detect = s.surface.x ;
1681     const float& absorb = s.surface.y ;
1682     //const float& reflect_specular_ = s.surface.z ;
1683     const float& reflect_diffuse_  = s.surface.w ;
1684
1685     float u_surface = curand_uniform(&rng);
1686
1687 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
1688     stagr& tagr = ctx.tagr ;
1689     float u_surface_burn = curand_uniform(&rng);
1690     tagr.add( stag_at_burn_sf_sd, u_surface);
1691     tagr.add( stag_sf_burn,       u_surface_burn);
1692 #endif
1693
1694
1695     int action = u_surface < absorb + detect ? BREAK : CONTINUE  ;
1696
1697     if( action == BREAK )
1698     {
1699 #if defined(WITH_CUSTOM4)
1700         int pmtid = ctx.prd->identity() - 1 ;     // identity comes from optixInstance.instanceId where 0 means not-a-sensor
1701         float qe = 1.f ;
1702         float u_qe = curand_uniform(&rng);
1703         if( s_pmt::is_spmtid(pmtid) )
1704         {
1705             const float energy_eV = qpmt<float>::hc_eVnm/ctx.p.wavelength ;
1706             float qe_shape = pmt->s_qeshape_prop->interpolate( 0, energy_eV );
1707             float qe_scale = pmt->get_s_qescale_from_spmtid(pmtid);
1708             qe = qe_shape*qe_scale ;
1709 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1710             if(ctx.pidx == base->pidx)
1711             printf("//qsim.propagate_at_surface.BREAK.is_spmtid pidx %7lld : pmtid %d energy_eV %7.3f qe_shape %7.3f qe_scale %7.3f qe %7.3f detect %7.3f absorb %7.3f reflect_specular %7.3f reflect_diffuse %7.3f \n" ,
1712                 ctx.pidx, pmtid, energy_eV, qe_shape, qe_scale, qe, detect, absorb, s.surface.z, reflect_diffuse_ );
1713 #endif
1714         }
1715         flag = u_surface < absorb ?
1716                                       SURFACE_ABSORB
1717                                   :
1718                                       ( u_qe < qe  ? EFFICIENCY_COLLECT : EFFICIENCY_CULL  )
1719                                   ;
1720 #else
1721         flag = u_surface < absorb ?
1722                                       SURFACE_ABSORB
1723                                   :
1724                                       SURFACE_DETECT
1725                                   ;
1726 #endif
1727
1728 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1729         if(ctx.pidx == base->pidx)
1730         printf("//qsim.propagate_at_surface.SA/SD.BREAK pidx %7lld : flag %d \n" , ctx.pidx, flag );
1731 #endif
1732     }
1733     else
1734     {
1735         flag = u_surface < absorb + detect + reflect_diffuse_ ?  SURFACE_DREFLECT : SURFACE_SREFLECT ;
1736         switch(flag)
1737         {
1738             case SURFACE_DREFLECT: reflect_diffuse( rng, ctx)  ; break ;
1739             case SURFACE_SREFLECT: reflect_specular(rng, ctx)  ; break ;
1740         }
1741 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1742         if(ctx.pidx == base->pidx)
1743         printf("//qsim.propagate_at_surface.DR/SR.CONTINUE pidx %7lld : flag %d \n" , ctx.pidx, flag );
1744 #endif
1745     }
1746     return action ;
1747 }
1748
1749
1750 inline QSIM_METHOD int qsim::propagate_at_surface_Detect(unsigned& flag, RNG& rng, sctx& ctx) const
1751 {
1752     float u_surface_burn = curand_uniform(&rng);  // for random alignment
1753     flag = SURFACE_DETECT ;
1754     return BREAK ;
1755 }
1756
1757
1758 #if defined(WITH_CUSTOM4)
1759
1760 /**
1761 qsim::propagate_at_surface_CustomART
1762 -------------------------------------
1763
1764 lpmtid:-1
1765    indicates "not-a-sensor", that occurring at this juncture would
1766    indicate a sensor_identity issue as CustomART special surfaces
1767    are expected to always have sensor identities
1768
1769
1770 Where ctx.prd->identity() comes from ? Where is the "+ 1" done ?
1771 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1772
1773 1. SBT::collectInstances sets OptixInstance::instanceId from sqat4::get_IAS_OptixInstance_instanceId
1774    aka "sensor_identifier"
1775
1776 2. CSGFoundry::addInstance for firstcall:true does the "+1" as done by CSGFoundry::addInstanceVector
1777
1778 3. original access to the copyno from Geant4 in U4SensorIdentifierDefault::getInstanceIdentity
1779    which is used from U4Tree::identifySensitiveInstances to populate the stree.h snode::sensor_id
1780
1781 **/
1782
1783 inline QSIM_METHOD int qsim::propagate_at_surface_CustomART(unsigned& flag, RNG& rng, sctx& ctx) const
1784 {
1785
1786     sphoton& p = ctx.p ;
1787     const float3* normal = (float3*)&ctx.prd->q0.f.x ;  // geometrical outwards normal
1788     int lpmtid = ctx.prd->identity() - 1 ;  // identity comes from optixInstance.instanceId where 0 means not-a-sensor
1789     const float lposcost = ctx.prd->lposcost() ;  // local frame intersect position cosine theta
1790
1791
1792     float minus_cos_theta = dot(p.mom, *normal);
1793     float dot_pol_cross_mom_nrm = dot(p.pol,cross(p.mom,*normal)) ;
1794
1795 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1796     if( ctx.pidx_debug )
1797     {
1798     float3 cross_mom_nrm = cross(p.mom, *normal) ;
1799     printf("//qsim::propagate_at_surface_CustomART pidx %7lld : mom = np.array([%10.8f,%10.8f,%10.8f]) ; lmom = %10.8f \n",
1800        ctx.pidx, p.mom.x, p.mom.y, p.mom.z, length(p.mom) );
1801     printf("//qsim::propagate_at_surface_CustomART pidx %7lld : pol = np.array([%10.8f,%10.8f,%10.8f]) ; lpol = %10.8f \n",
1802        ctx.pidx, p.pol.x, p.pol.y, p.pol.z, length(p.pol) );
1803     printf("//qsim::propagate_at_surface_CustomART pidx %7lld : nrm = np.array([%10.8f,%10.8f,%10.8f]) ; lnrm = %10.8f \n",
1804        ctx.pidx, normal->x, normal->y, normal->z, length(*normal) );
1805     printf("//qsim::propagate_at_surface_CustomART pidx %7lld : cross_mom_nrm = np.array([%10.8f,%10.8f,%10.8f]) ; lcross_mom_nrm = %10.8f  \n",
1806            ctx.pidx, cross_mom_nrm.x, cross_mom_nrm.y, cross_mom_nrm.z, length(cross_mom_nrm)  );
1807     printf("//qsim::propagate_at_surface_CustomART pidx %7lld : dot_pol_cross_mom_nrm = %10.8f \n", ctx.pidx, dot_pol_cross_mom_nrm );
1808     printf("//qsim::propagate_at_surface_CustomART pidx %7lld : minus_cos_theta = %10.8f \n", ctx.pidx, minus_cos_theta );
1809     printf("//qsim::propagate_at_surface_CustomART pidx %7lld : lposcost = %10.8f (expect 0->1)\n", ctx.pidx, lposcost );
1810     }
1811 #endif
1812
1813     // formerly excluded Custom4 hits onto WP PMTs see ~/j/issues/jok-tds-mu-running-NOT-A-SENSOR-warnings.rst
1814     //if(lpmtid < s_pmt::OFFSET_CD_LPMT || lpmtid >= s_pmt::OFFSET_WP_PMT_END )
1815     //if(lpmtid < s_pmt::OFFSET_CD_LPMT || lpmtid >= s_pmt::OFFSET_WP_ATM_LPMT_END )
1816     if(lpmtid < s_pmt::OFFSET_CD_LPMT || lpmtid >=   s_pmt::OFFSET_WP_WAL_PMT_END )
1817     {
1818         flag = NAN_ABORT ;
1819 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1820         printf("//qsim::propagate_at_surface_CustomART pidx %7lld lpmtid %d : ERROR UNEXPECTED LPMTID : NAN_ABORT \n", ctx.pidx, lpmtid );
1821 #endif
1822         return BREAK ;
1823     }
1824
1825 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1826     if( ctx.pidx_debug )
1827     printf("//qsim::propagate_at_surface_CustomART pidx %7lld lpmtid %d wl %8.4f mct %8.4f dpcmn %8.4f pmt %p pre-ATQC \n",
1828            ctx.pidx, lpmtid, p.wavelength, minus_cos_theta, dot_pol_cross_mom_nrm, pmt );
1829 #endif
1830
1831     float ATQC[4] = {} ;
1832
1833 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1834     if(lpmtid > -1 && pmt != nullptr) pmt->get_lpmtid_ATQC(ATQC, lpmtid, p.wavelength, minus_cos_theta, dot_pol_cross_mom_nrm, lposcost, ctx.pidx, ctx.pidx_debug );
1835 #else
1836     if(lpmtid > -1 && pmt != nullptr) pmt->get_lpmtid_ATQC(ATQC, lpmtid, p.wavelength, minus_cos_theta, dot_pol_cross_mom_nrm, lposcost );
1837 #endif
1838
1839
1840
1841 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1842     if( ctx.pidx_debug )
1843     printf("//qsim::propagate_at_surface_CustomART pidx %7lld lpmtid %d wl %8.4f mct %8.4f dpcmn %8.4f lpc %8.4f ATQC ( %8.4f %8.4f %8.4f %8.4f  ) \n",
1844            ctx.pidx, lpmtid, p.wavelength, minus_cos_theta, dot_pol_cross_mom_nrm, lposcost, ATQC[0], ATQC[1], ATQC[2], ATQC[3] );
1845 #endif
1846
1847
1848     const float& theAbsorption        = ATQC[0];
1849     const float& theTransmittance     = ATQC[1];
1850     const float& theEfficiency        = ATQC[2];
1851     const float& collectionEfficiency = ATQC[3];
1852
1853     float u_theAbsorption = curand_uniform(&rng);
1854     int action = u_theAbsorption < theAbsorption  ? BREAK : CONTINUE ;
1855
1856
1857 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1858     if( ctx.pidx_debug )
1859         printf("//qsim.propagate_at_surface_CustomART pidx %7lld lpmtid %d ATQC ( %8.4f %8.4f %8.4f %8.4f ) u_theAbsorption  %7.3f action %d \n",
1860         ctx.pidx, lpmtid, ATQC[0], ATQC[1], ATQC[2], ATQC[3], u_theAbsorption, action  );
1861 #endif
1862
1863     if( action == BREAK )
1864     {
1865         float u_theEfficiency = curand_uniform(&rng) ;
1866         float u_collectionEfficiency = curand_uniform(&rng);
1867
1868         flag = u_theEfficiency < theEfficiency ?
1869                                                     (  u_collectionEfficiency < collectionEfficiency ? EFFICIENCY_COLLECT : EFFICIENCY_CULL )
1870                                                :
1871                                                     SURFACE_ABSORB
1872                                                ;
1873
1874         // former SD:SURFACE_DETECT, now becomes EC:EFFICIENCY_COLLECT or EX:EFFICIENCY_CULL depending on collectionEfficiency and random throw
1875
1876 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1877         if( ctx.pidx_debug )
1878             printf("//qsim.propagate_at_surface_CustomART.BREAK.SD/SA EC/EX pidx %7lld lpmtid %d ATQC ( %8.4f %8.4f %8.4f %8.4f ) u_theEfficiency  %8.4f theEfficiency %8.4f flag %d \n",
1879                                                                     ctx.pidx, lpmtid, ATQC[0],ATQC[1], ATQC[2],ATQC[3],  u_theEfficiency,  theEfficiency, flag );
1880 #endif
1881
1882     }
1883     else
1884     {
1885
1886 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1887         if( ctx.pidx_debug )
1888             printf("//qsim.propagate_at_surface_CustomART.CONTINUE pidx %7lld lpmtid %d ATQC ( %7.3f %7.3f %7.3f %7.3f ) theTransmittance %7.3f  \n",
1889             ctx.pidx, lpmtid, ATQC[0], ATQC[1], ATQC[2], ATQC[3], theTransmittance  );
1890 #endif
1891
1892         propagate_at_boundary( flag, rng, ctx, theTransmittance  );
1893     }
1894     return action ;
1895 }
1896 #endif
1897
1898 #endif
1899
1900
1901 #if defined(__CUDACC__) || defined(__CUDABE__)  || defined(MOCK_CURAND) || defined(MOCK_CUDA)
1902
1903 /**
1904 qsim::reflect_diffuse cf G4OpBoundaryProcess::DoReflection
1905 -----------------------------------------------------------
1906
1907 * LobeReflection is not yet implemnented in qsim.h
1908
1909 ::
1910
1911     355 inline
1912     356 void G4OpBoundaryProcess_MOCK::DoReflection()
1913     357 {
1914     358         if ( theStatus == LambertianReflection ) {
1915     359
1916     360           NewMomentum = G4LambertianRand(theGlobalNormal);
1917     361           theFacetNormal = (NewMomentum - OldMomentum).unit();
1918     362
1919     363         }
1920     364         else if ( theFinish == ground ) {
1921     365
1922     366           theStatus = LobeReflection;
1923     367           if ( PropertyPointer1 && PropertyPointer2 ){
1924     368           } else {
1925     369              theFacetNormal =
1926     370                  GetFacetNormal(OldMomentum,theGlobalNormal);
1927     371           }
1928     372           G4double PdotN = OldMomentum * theFacetNormal;
1929     373           NewMomentum = OldMomentum - (2.*PdotN)*theFacetNormal;
1930     374
1931     375         }
1932     376         else {
1933     377
1934     378           theStatus = SpikeReflection;
1935     379           theFacetNormal = theGlobalNormal;
1936     380           G4double PdotN = OldMomentum * theFacetNormal;
1937     381           NewMomentum = OldMomentum - (2.*PdotN)*theFacetNormal;
1938     382
1939     383         }
1940     384         G4double EdotN = OldPolarization * theFacetNormal;
1941     385         NewPolarization = -OldPolarization + (2.*EdotN)*theFacetNormal;
1942     386 }
1943
1944
1945
1946
1947 Orient the normal depending on incident momentum to handle inside/outside
1948 --------------------------------------------------------------------------
1949
1950 HMM: Geant4 is very flippy with normals whereas Opticks uses fixed
1951 geometrical normals that may then need to be oriented for a specfic task,
1952 such as diffuse reflection from the shape.
1953
1954
1955 ::
1956
1957         geometrical normal
1958          :
1959        \ : /  diffuse reflection from outside in same hemi as geometrical normal : orient 1.f
1960         \:/   incident momentum is against the geometrical normal   dot( old_mom, *normal ) < 0.f
1961        --*------+
1962       /         |
1963      /          |
1964      \          /
1965       \ \   /  /   diffuse reflection from inside needs to use a flipped geometrical normal : orient -1.f
1966        \ \ /  /    incident moment is with the geometrical normal   dot( old_mom, *normal ) > 0.f
1967         \_*__/
1968           :
1969           :
1970           geometrical normal
1971
1972
1973 orient is used to flip the reflection normal back against the incident direction
1974
1975 **/
1976
1977 inline QSIM_METHOD void qsim::reflect_diffuse( RNG& rng, sctx& ctx )
1978 {
1979     sphoton& p = ctx.p ;
1980
1981     float3 old_mom = p.mom ;
1982
1983     const float3* normal = ctx.prd->normal() ;    // geometrical outwards normal
1984
1985     //const float orient = -1.f ; // BUG : FIXED ORIENT FLIP CANNOT BE CORRECT
1986     const float orient = dot( old_mom, *normal ) > 0.f ? -1.f : 1.f ;
1987
1988     lambertian_direction( &p.mom, normal, orient, rng, ctx );
1989
1990
1991     float3 facet_normal = normalize( p.mom - old_mom );
1992     const float EdotN = dot( p.pol, facet_normal );
1993     p.pol = -1.f*(p.pol) + 2.f*EdotN*facet_normal ;
1994
1995 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1996     if(ctx.pidx == base->pidx)
1997     {
1998     printf("//qsim.reflect_diffuse pidx %7lld : old_mom = np.array([%10.5f,%10.5f,%10.5f]) \n",
1999         ctx.pidx,  old_mom.x, old_mom.y, old_mom.z ) ;
2000
2001     printf("//qsim.reflect_diffuse pidx %7lld : normal0 = np.array([%10.5f,%10.5f,%10.5f]) \n",
2002         ctx.pidx,  normal->x, normal->y, normal->z ) ;
2003
2004     printf("//qsim.reflect_diffuse pidx %7lld : p.mom = np.array([%10.5f,%10.5f,%10.5f]) \n",
2005         ctx.pidx,  p.mom.x, p.mom.y, p.mom.z ) ;
2006
2007     printf("//qsim.reflect_diffuse pidx %7lld : facet_normal = np.array([%10.5f,%10.5f,%10.5f]) \n",
2008         ctx.pidx,  facet_normal.x, facet_normal.y, facet_normal.z ) ;
2009     }
2010 #endif
2011
2012 }
2013
2014 /**
2015 qsim::reflect_specular
2016 -----------------------
2017
2018 TODO: check the fixed orient flip with specular reflections
2019 from inside and outside
2020
2021 Its not so obvious here because the oriented normal is part of the
2022 mom calc so it would be easy to double flip.
2023
2024 G4OpBoundaryProcess::PostStepDoIt does an early flip of theGlobalNormal
2025 but the G4Navigator does flips before that too... so Geant4 is too flippy
2026 to be helpful.
2027
2028 **/
2029
2030 inline QSIM_METHOD void qsim::reflect_specular( RNG& rng, sctx& ctx )
2031 {
2032     sphoton& p = ctx.p ;
2033     const float3* normal = ctx.prd->normal() ;
2034
2035 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
2036     if(ctx.pidx == base->pidx)
2037     {
2038     printf("//qsim.reflect_specular.head pidx %7lld : normal0 = np.array([%10.5f,%10.5f,%10.5f]) \n",
2039         ctx.pidx,  normal->x, normal->y, normal->z ) ;
2040
2041     printf("//qsim.reflect_specular.head pidx %7lld : mom0 = np.array([%10.5f,%10.5f,%10.5f]) \n",
2042         ctx.pidx,  p.mom.x, p.mom.y, p.mom.z ) ;
2043
2044     printf("//qsim.reflect_specular.head pidx %7lld : pol0 = np.array([%10.5f,%10.5f,%10.5f]) \n",
2045         ctx.pidx,  p.pol.x, p.pol.y, p.pol.z ) ;
2046     }
2047 #endif
2048
2049 #ifdef WITH_ORIENT
2050     //const float orient = -1.f ;
2051     const float orient = 1.f ;
2052     // because orient appears twice in the below p.mom p.pol calcs
2053     // it being +1.f or -1.f makes no difference
2054
2055     const float PdotN = dot( p.mom, *normal )*orient ;
2056     p.mom = p.mom - 2.f*PdotN*(*normal)*orient ;
2057
2058     const float EdotN = dot( p.pol, *normal )*orient ;
2059     p.pol = -1.f*(p.pol) + 2.f*EdotN*(*normal)*orient  ;
2060 #else
2061     // removed orient as does not effect calc, hence confusing and pointless
2062     const float PdotN = dot( p.mom, *normal ) ;
2063     p.mom = p.mom - 2.f*PdotN*(*normal) ;
2064
2065     const float EdotN = dot( p.pol, *normal ) ;
2066     p.pol = -1.f*(p.pol) + 2.f*EdotN*(*normal)  ;
2067 #endif
2068
2069 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
2070     if(ctx.pidx == base->pidx)
2071     {
2072     printf("//qsim.reflect_specular.tail pidx %7lld : mom1 = np.array([%10.5f,%10.5f,%10.5f]) ; PdotN = %10.5f ; EdotN = %10.5f \n",
2073         ctx.pidx,  p.mom.x, p.mom.y, p.mom.z, PdotN, EdotN  ) ;
2074
2075     printf("//qsim.reflect_specular.tail pidx %7lld : pol1 = np.array([%10.5f,%10.5f,%10.5f]) \n",
2076         ctx.pidx,  p.pol.x, p.pol.y, p.pol.z ) ;
2077
2078     }
2079 #endif
2080
2081
2082 }
2083
2084 /**
2085 qsim::fake_propagate
2086 -----------------------
2087
2088 This uses mock input prd (quad2) to provide a CUDA only (no OptiX, no geometry)
2089 test of qsim propagation machinery.
2090
2091 * NB: qsim::mock_propagate is intended to be very similar to CSGOptiX/CSGOptiX7.cu::simulate
2092
2093 TODO
2094 ~~~~~
2095
2096 * compare with cx/CSGOptiX7.cu::simulate and find users of this to see if it could be made more similar to cx::simulate
2097 * can sstate be slimmed : seems not very easily
2098 * simplify sstate persisting (quad6?quad5?)
2099
2100 Stages within bounce loop
2101 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2102
2103 1. mock call to OptiX trace : doing "geometry lookup"
2104    photon position + direction -> surface normal + distance and identity, boundary
2105
2106    * cosTheta sign gives boundary orientation
2107
2108 2. lookup material/surface properties using boundary and orientation (cosTheta) from geometry lookup
2109
2110 3. mutate photon and set flag using material properties
2111
2112   * note that photons that SAIL to boundary are mutated twice within the while loop (by propagate_to_boundary and propagate_at_boundary/surface)
2113
2114 Thoughts
2115 ~~~~~~~~~~~
2116
2117 NB: the different collection streams (photon, record, rec, seq) must stay independent
2118 so can switch them off easily in production running
2119
2120
2121 **/
2122
2123 inline QSIM_METHOD void qsim::fake_propagate( sphoton& p, const quad2* mock_prd, RNG& rng, unsigned long long idx )
2124 {
2125     p.set_flag(TORCH);  // setting initial flag : in reality this should be done by generation
2126
2127     qsim* sim = this ;
2128
2129     sctx ctx = {} ;
2130     ctx.p = p ;     // Q: Why is this different from CSGOptiX7.cu:simulate ? A: Presumably due to input photon.
2131     ctx.evt = evt ;
2132     ctx.pidx = idx ;
2133
2134     int command = START ;
2135     int bounce = 0 ;
2136 #ifndef PRODUCTION
2137     ctx.point(bounce);
2138 #endif
2139
2140     while( bounce < evt->max_bounce )
2141     {
2142         ctx.prd = mock_prd + (evt->max_bounce*idx+bounce) ;
2143         if( ctx.prd->boundary() == 0xffffu ) break ;   // SHOULD NEVER HAPPEN : propagate can do nothing meaningful without a boundary
2144 #ifndef PRODUCTION
2145         ctx.trace(bounce);
2146 #endif
2147
2148 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
2149         if(idx == base->pidx)
2150         printf("//qsim.fake_propagate pidx %7lld bounce %d evt.max_bounce %d prd.q0.f.xyzw (%10.4f %10.4f %10.4f %10.4f) \n",
2151              idx, bounce, evt->max_bounce, ctx.prd->q0.f.x, ctx.prd->q0.f.y, ctx.prd->q0.f.z, ctx.prd->q0.f.w );
2152 #endif
2153         command = sim->propagate(bounce, rng, ctx );
2154         bounce++;
2155 #ifndef PRODUCTION
2156         ctx.point(bounce);
2157 #endif
2158         if(command == BREAK) break ;
2159     }
2160 #ifndef PRODUCTION
2161     ctx.end();
2162 #endif
2163     evt->photon[idx] = ctx.p ;
2164 }
2165
2166 /**
2167 qsim::propagate : one "bounce" propagate_to_boundary/propagate_at_boundary
2168 -----------------------------------------------------------------------------
2169
2170 This is canonically invoked from within the bounce loop of CSGOptiX/OptiX7Test.cu:simulate
2171 after tracing (or mock tracing) has populated ctx.prd
2172
2173 MISSING intersects
2174 ~~~~~~~~~~~~~~~~~~~
2175
2176 Formerly thought "MISSING needs to return BREAK" but MISSING
2177 intersects only happen with "broken" test geometry so no need to be
2178 concerned with that.
2179
2180 z-plus special sensor surfaces
2181 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2182
2183 The two kinds of z-plus sensor surfaces need to fallback to ordinary
2184 surface handling for z-minus, when lposcost < 0.f at lower z-hemi.
2185 Initially thought would need to use optical.z to refer to
2186 the fallback and call qbnd::fill_state again.
2187 But of course that is wrong, the osur/isur surface references
2188 already provide the z-minus fallback.
2189 Hence can implement the fallback using standard surface branch
2190 with no change to qbnd::fill_state.
2191 For z-plus with lposcost > 0.f upper z-hemi simply ignore
2192 the s.surface
2193
2194 traditional POM special surface : smatsur_Surface_zplus_sensor_A
2195 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2196
2197 HMM: could use standard surface handling for this but with perfect
2198 detector surface swapped in.
2199
2200
2201 TMM multilayer POM special surface : smatsur_Surface_zplus_sensor_CustomART
2202 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2203
2204 Enabling special surface handling is controlled by ctx.optical.y "ems" enum
2205 which wheels in the qpmt.h stack calc following C4CustomART::doIt
2206 for CustomART special surfaces.
2207
2208 Prior to supporting special surfaces, within the command == BOUNDARY used::
2209
2210     command = ctx.s.optical.x == 0 ?
2211                                       propagate_at_boundary( flag, rng, ctx )
2212                                   :
2213                                       propagate_at_surface( flag, rng, ctx )
2214                                   ;
2215
2216 **/
2217
2218 inline QSIM_METHOD int qsim::propagate(const int bounce, RNG& rng, sctx& ctx )  // ::simulate
2219 {
2220     const unsigned boundary = ctx.prd->boundary() ;
2221     const unsigned identity = ctx.prd->identity() ; // sensor_identifier+1, 0:not-a-sensor
2222     const unsigned iindex = ctx.prd->iindex() ;
2223     const float lposcost = ctx.prd->lposcost() ;  // local frame intersect position cosine theta
2224
2225     const float3* normal = ctx.prd->normal();
2226     float cosTheta = dot(ctx.p.mom, *normal ) ;
2227
2228 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
2229     if( ctx.pidx == base->pidx )
2230     {
2231     printf("\n//qsim.propagate.head pidx %7lld : ctx.evt.index %d evt.index %d \n", ctx.pidx, ctx.evt->index, evt->index );
2232
2233     printf("\n//qsim.propagate.head pidx %7lld : bnc %d boundary %d cosTheta %10.8f \n", ctx.pidx, bounce, boundary, cosTheta );
2234
2235     printf("//qsim.propagate.head pidx %7lld : mom = np.array([%10.8f,%10.8f,%10.8f]) ; lmom = %10.8f  \n",
2236                  ctx.pidx, ctx.p.mom.x, ctx.p.mom.y, ctx.p.mom.z, length(ctx.p.mom) ) ;
2237
2238     printf("//qsim.propagate.head pidx %7lld : pos = np.array([%10.5f,%10.5f,%10.5f]) ; lpos = %10.8f \n",
2239                  ctx.pidx, ctx.p.pos.x, ctx.p.pos.y, ctx.p.pos.z, length(ctx.p.pos) ) ;
2240
2241     printf("//qsim.propagate.head pidx %7lld : nrm = np.array([(%10.8f,%10.8f,%10.8f]) ; lnrm = %10.8f  \n",
2242                  ctx.pidx, normal->x, normal->y, normal->z, length(*normal) );
2243
2244     }
2245 #endif
2246
2247     // copy geometry info into the sphoton struct
2248     ctx.p.set_prd(boundary, identity, cosTheta, iindex );  // HMM: lposcost not passed along
2249
2250     bnd->fill_state(ctx.s, boundary, ctx.p.wavelength, cosTheta, ctx.pidx, base->pidx );
2251
2252     unsigned flag = 0 ;
2253
2254     int command = propagate_to_boundary( flag, rng, ctx );
2255     /**
2256     command possibilities:
2257
2258     1. CONTINUE (eg scatter)
2259     2. BREAK (eg absorb)
2260     3. BOUNDARY
2261
2262     **/
2263
2264 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
2265     if( ctx.pidx == base->pidx )
2266     printf("//qsim.propagate.body pidx %7lld bounce %d command %d flag %d s.optical.x %d s.optical.y %d \n",
2267           ctx.pidx, bounce, command, flag, ctx.s.optical.x, ctx.s.optical.y );
2268 #endif
2269
2270     if( command == BOUNDARY )
2271     {
2272         const int& ems = ctx.s.optical.y ;
2273
2274 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
2275         if( ctx.pidx == base->pidx )
2276         {
2277 #if defined(WITH_CUSTOM4)
2278             printf("//qsim.propagate.body.WITH_CUSTOM4 pidx %7lld  BOUNDARY ems %d lposcost %7.3f \n", ctx.pidx, ems, lposcost );
2279 #else
2280             printf("//qsim.propagate.body.NOT:WITH_CUSTOM4 pidx %7lld BOUNDARY ems %d lposcost %7.3f \n", ctx.pidx, ems, lposcost);
2281 #endif
2282         }
2283 #endif
2284
2285         if( ems == smatsur_NoSurface )
2286         {
2287             command = propagate_at_boundary( flag, rng, ctx ) ;
2288         }
2289         else if( ems == smatsur_Surface )
2290         {
2291             command = propagate_at_surface( flag, rng, ctx ) ;
2292         }
2293         else if( lposcost < 0.f )  // could combine with prior, but handy for debug to keep separate
2294         {
2295 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
2296             if( ctx.pidx == base->pidx )
2297                 printf("//qsim.propagate.body (lposcost < 0.f) pidx %7lld bounce %d command %d flag %d ems %d \n",
2298                 ctx.pidx, bounce, command, flag, ems  );
2299 #endif
2300             command = propagate_at_surface( flag, rng, ctx ) ;
2301         }
2302         else if( ems == smatsur_Surface_zplus_sensor_A )
2303         {
2304             command = propagate_at_surface_Detect( flag, rng, ctx ) ;
2305         }
2306         else if( ems == smatsur_Surface_zplus_sensor_CustomART )
2307         {
2308 #if defined(WITH_CUSTOM4)
2309             command = propagate_at_surface_CustomART( flag, rng, ctx ) ;
2310             //command = base->custom_lut == 0u ? propagate_at_surface_CustomART( flag, rng, ctx ) : propagate_at_surface_MultiFilm(flag, rng, ctx );
2311
2312 #endif
2313         }
2314     }
2315     ctx.p.set_flag(flag);
2316     // Q: Does flag need to be single bit at this point OR can multiple "flags" be OR-ed together here ?
2317     // A: Decided to keep the flag as single bitted, and directly set EFFICENCY_COLLECT/CULL into ctx.p.flagmask
2318
2319
2320 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
2321     if( ctx.pidx == base->pidx )
2322     printf("//qsim.propagate.tail pidx %7lld bounce %d command %d flag %d ctx.s.optical.y(ems) %d \n",
2323              ctx.pidx, bounce, command, flag, ctx.s.optical.y  );
2324 #endif
2325
2326     return command ;
2327 }
2328 /**
2329 Q: Where does ctx.s.optical come from ?
2330 A: Populated in qbnd::fill_state based on boundary and cosTheta sign to get the
2331    su_line index of the optical buffer which distinguishes surface from boundary,
2332    as acted upon above. See also GBndLib::createOpticalBuffer
2333
2334 Q: Can the big "if" above be changed into a switch ?
2335 A: YES, but non-trivially and probably confusingly. This is because
2336    the special surface handling condition needs to be after the possibiliy
2337    of fallback to ordinary surface handling for "lposcost < 0.f" and also
2338    ordinary NoSurface must be possible no matter the lposcost value.
2339
2340 **/
2341
2342
2343
2344 /**
2345 qsim::hemisphere_polarized
2346 ------------------------------
2347
2348
2349           direction      | surface_normal
2350                \         |
2351                 \        |
2352               .  \       |
2353           .       \      |
2354       .            \     |
2355      within         \    |
2356                      \   |
2357                       \  |
2358                        \ |
2359                         \|
2360            --------------+------------
2361
2362
2363 *plane of incidence*
2364     plane containing *surface_normal* *direction* and *within* vectors
2365
2366 *transverse*
2367     vector transverse to the plane of incidence (S polarized)
2368
2369 *within*
2370     vector within the plane of incidence and perpendicular to *direction* (P polarized)
2371
2372
2373 A +ve Z upper hemisphere of *direction* is generated and then rotateUz oriented
2374 to adopt the *surface_normal* vector as its Z direction.
2375
2376 For inwards=true the normal direction is flipped to orient all the directions
2377 inwards.
2378
2379 **/
2380
2381 inline QSIM_METHOD void qsim::hemisphere_polarized( unsigned polz, bool inwards, RNG& rng, sctx& ctx )
2382 {
2383     sphoton& p = ctx.p ;
2384     const float3* normal = ctx.prd->normal() ;
2385
2386     //printf("//qsim.hemisphere_polarized polz %d normal (%10.4f, %10.4f, %10.4f) \n", polz, normal->x, normal->y, normal->z );
2387
2388     float u_hemipol_phi = curand_uniform(&rng) ;
2389     float phi = u_hemipol_phi*2.f*M_PIf;  // 0->2pi
2390     float cosTheta = curand_uniform(&rng) ;      // 0->1
2391
2392 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
2393     stagr& tagr = ctx.tagr ;
2394     tagr.add( stag_hp_ph, u_hemipol_phi );
2395     tagr.add( stag_hp_ph, cosTheta );    // trying to reduce stag::BITS from 5 to 4, so change stag_hp_ct to stag_hp_ph
2396 #endif
2397
2398     float sinTheta = sqrtf(1.f-cosTheta*cosTheta);
2399
2400     p.mom.x = cosf(phi)*sinTheta ;
2401     p.mom.y = sinf(phi)*sinTheta ;
2402     p.mom.z = cosTheta ;
2403
2404     smath::rotateUz( p.mom, (*normal) * ( inwards ? -1.f : 1.f ));
2405
2406     //printf("//qsim.hemisphere_polarized polz %d p.mom (%10.4f, %10.4f, %10.4f) \n", polz, p.mom.x, p.mom.y, p.mom.z );
2407
2408     // what about normal incidence ?
2409     const float3 transverse = normalize(cross(p.mom, (*normal) * ( inwards ? -1.f : 1.f )  )) ; // perpendicular to plane of incidence
2410     const float3 within = normalize( cross(p.mom, transverse) );  //   within plane of incidence and perpendicular to direction
2411
2412     switch(polz)
2413     {
2414         case 0: p.pol = transverse ; break ;   // S-polarizatiom
2415         case 1: p.pol = within     ; break ;   // P-polarization
2416         case 2: p.pol = normalize( 0.5f*transverse + (1.f-0.5f)*within )  ; break ;  // equal admixture
2417     }
2418 }
2419
2420
2421
2422
2423 /**
2424 qsim::generate_photon_simtrace
2425 --------------------------------
2426
2427 Canonical invokation from CSGOptiX7.cu:simtrace
2428
2429 * NB simtrace cxs center-extent-genstep are very different to standard Cerenkov/Scintillation gensteps
2430
2431 These gensteps are for example created in SEvent::MakeCenterExtentGensteps the below generation
2432 should be comparable to the CPU implementation::
2433
2434     SFrameGenstep::GenerateCenterExtentGenstepPhotons
2435     SFrameGenstep::GenerateSimtracePhotons
2436
2437 HMM: note that the photon_id is global to the launch making it potentially a very large number
2438 but for identication purposes having a local to the photons of the genstep index would
2439 be more useful and would allow storage within much less bits.
2440
2441 TODO: implement local index by including photon_id offset with the gensteps
2442
2443 * NB the sxyz.h enum order is different to the python one  eg XYZ=0
2444
2445 SEE : sevent::add_simtrace
2446
2447 **/
2448
2449 inline QSIM_METHOD void qsim::generate_photon_simtrace(quad4& p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const
2450 {
2451     const int& gencode = gs.q0.i.x ;
2452     switch(gencode)
2453     {
2454         case OpticksGenstep_FRAME:                   generate_photon_simtrace_frame(p, rng, gs, photon_id, genstep_id ); break ;
2455         case OpticksGenstep_INPUT_PHOTON_SIMTRACE:   { p = (quad4&)evt->simtrace[photon_id] ; }                          ; break ;
2456     }
2457 }
2458
2459 inline QSIM_METHOD void qsim::generate_photon_simtrace_frame(quad4& p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const
2460 {
2461     C4U gsid ;
2462
2463     //int gencode          = gs.q0.i.x ;
2464     int gridaxes           = gs.q0.i.y ;  // { XYZ, YZ, XZ, XY }
2465     gsid.u                 = gs.q0.i.z ;
2466     //unsigned num_photons = gs.q0.u.w ;
2467
2468     p.q0.f.x = gs.q1.f.x ;   // start with genstep local frame position, typically origin  (0,0,0)
2469     p.q0.f.y = gs.q1.f.y ;
2470     p.q0.f.z = gs.q1.f.z ;
2471     p.q0.f.w = 1.f ;
2472
2473     //printf("//qsim.generate_photon_simtrace_frame gridaxes %d gs.q1 (%10.4f %10.4f %10.4f %10.4f) \n", gridaxes, gs.q1.f.x, gs.q1.f.y, gs.q1.f.z, gs.q1.f.w );
2474
2475     float u0 = curand_uniform(&rng);
2476     float sinPhi, cosPhi;
2477 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
2478     __sincosf(2.f*M_PIf*u0,&sinPhi,&cosPhi);
2479 #else
2480     sincosf(2.f*M_PIf*u0,&sinPhi,&cosPhi);
2481 #endif
2482
2483     float u1 = curand_uniform(&rng);
2484     float cosTheta = 2.f*u1 - 1.f ;
2485     float sinTheta = sqrtf(1.f-cosTheta*cosTheta) ;
2486
2487     //printf("//qsim.generate_photon_simtrace_frame u0 %10.4f sinPhi   %10.4f cosPhi   %10.4f \n", u0, sinPhi, cosPhi );
2488     //printf("//qsim.generate_photon_simtrace_frame u1 %10.4f sinTheta %10.4f cosTheta %10.4f \n", u1, sinTheta, cosTheta );
2489     //printf("//qsim.generate_photon_simtrace_frame  u0 %10.4f sinPhi   %10.4f cosPhi   %10.4f u1 %10.4f sinTheta %10.4f cosTheta %10.4f \n",  u0, sinPhi, cosPhi, u1, sinTheta, cosTheta );
2490
2491     switch( gridaxes )
2492     {
2493         case YZ:  { p.q1.f.x = 0.f    ;  p.q1.f.y = cosPhi ;  p.q1.f.z = sinPhi ;  p.q1.f.w = 0.f ; } ; break ;
2494         case XZ:  { p.q1.f.x = cosPhi ;  p.q1.f.y = 0.f    ;  p.q1.f.z = sinPhi ;  p.q1.f.w = 0.f ; } ; break ;
2495         case XY:  { p.q1.f.x = cosPhi ;  p.q1.f.y = sinPhi ;  p.q1.f.z = 0.f    ;  p.q1.f.w = 0.f ; } ; break ;
2496         case XYZ: { p.q1.f.x = sinTheta*cosPhi ;
2497                     p.q1.f.y = sinTheta*sinPhi ;
2498                     p.q1.f.z = cosTheta        ;
2499                     p.q1.f.w = 0.f ; } ; break ;   // previously used XZ
2500     }
2501
2502
2503     qat4 qt(gs) ; // copy 4x4 transform from last 4 quads of genstep
2504     qt.right_multiply_inplace( p.q0.f, 1.f );   // position
2505     qt.right_multiply_inplace( p.q1.f, 0.f );   // direction
2506
2507
2508     unsigned char ucj = (photon_id < 255 ? photon_id : 255 ) ;
2509     gsid.c4.w = ucj ;
2510     p.q3.u.w = gsid.u ;   // WARNING : THIS GSID LOOKS TO BE STOMPED ON BY sevent::add_simtrace
2511 }
2512
2513 /**
2514 qsim::generate_photon
2515 ----------------------
2516
2517 Moved non-standard center-extent (aka frame) gensteps to use qsim::generate_photon_simtrace not this
2518
2519 **/
2520
2521 inline QSIM_METHOD void qsim::generate_photon(sphoton& p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const
2522 {
2523     const int& gencode = gs.q0.i.x ;
2524     switch(gencode)
2525     {
2526         case OpticksGenstep_CARRIER:         scarrier::generate(     p, rng, gs, photon_id, genstep_id)  ; break ;
2527         case OpticksGenstep_TORCH:           storch::generate(       p, rng, gs, photon_id, genstep_id ) ; break ;
2528
2529         case OpticksGenstep_G4Cerenkov_modified:
2530         case OpticksGenstep_CERENKOV:
2531                                               cerenkov->generate(    p, rng, gs, photon_id, genstep_id ) ; break ;
2532
2533         case OpticksGenstep_DsG4Scintillation_r4695:
2534         case OpticksGenstep_SCINTILLATION:
2535                                               scint->generate(        p, rng, gs, photon_id, genstep_id ) ; break ;
2536
2537         case OpticksGenstep_INPUT_PHOTON:    { p = evt->photon[photon_id] ; p.set_flag(TORCH) ; }        ; break ;
2538         default:                             generate_photon_dummy(  p, rng, gs, photon_id, genstep_id)  ; break ;
2539     }
2540     p.set_index(photon_id) ;
2541 }
2542 #endif
2543