Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:11

0001 /**
0002 qsim.h : GPU side struct prepared CPU side by QSim.hh
0003 ========================================================
0004 
0005 qsim.h replaces the OptiX 6 context in a CUDA-centric way.
0006 Canonical use is from CSGOptiX/CSGOptiX7.cu:simulate
0007 
0008 * qsim.h instance is uploaded once only at CSGOptiX instanciation
0009   as this encompasses the physics not the event-by-event info.
0010 
0011 * qsim encompasses global info relevant to all photons, meaning that any changes
0012   made to the qsim instance from single photon threads must be into thread-owned "idx"
0013   slots into arrays to avoid interference
0014 
0015 * temporary working state local to each photon is held in *sctx*
0016   and passed around using reference arguments
0017 
0018 TODO:
0019 
0020 1. get more of the below to work on CPU with mocked curand (and in future mocked tex2D and cudaTextureObject_t )
0021    NB must move decl and implementation to do this
0022 
0023 **/
0024 
0025 #if defined(__CUDACC__) || defined(__CUDABE__)
0026    #define QSIM_METHOD __device__
0027 #else
0028    #define QSIM_METHOD
0029 #endif
0030 
0031 #include "OpticksGenstep.h"
0032 #include "OpticksPhoton.h"
0033 
0034 #include "sflow.h"
0035 #include "sqat4.h"
0036 #include "sc4u.h"
0037 #include "sxyz.h"
0038 #include "sphoton.h"
0039 
0040 #include "storch.h"
0041 #include "scarrier.h"
0042 #include "sevent.h"
0043 #include "sstate.h"
0044 #include "smatsur.h"
0045 
0046 
0047 #ifndef PRODUCTION
0048 #include "srec.h"
0049 #include "sseq.h"
0050 #include "stag.h"
0051 #ifdef DEBUG_LOGF
0052 #define KLUDGE_FASTMATH_LOGF(u) (u < 0.998f ? __logf(u) : __logf(u) - 0.46735790f*1e-7f )
0053 #endif
0054 #endif
0055 
0056 #include "sctx.h"
0057 
0058 #include "qrng.h"
0059 #include "qbase.h"
0060 #include "qprop.h"
0061 #include "qmultifilm.h"
0062 #include "qbnd.h"
0063 #include "qscint.h"
0064 #include "qcerenkov.h"
0065 #include "qpmt.h"
0066 #include "tcomplex.h"
0067 
0068 
0069 struct qcerenkov ;
0070 
0071 struct qsim
0072 {
0073     qbase*              base ;
0074     sevent*             evt ;
0075     qrng<RNG>*          rng ;
0076     qbnd*               bnd ;
0077     qmultifilm*         multifilm;
0078     qcerenkov*          cerenkov ;
0079     qscint*             scint ;
0080     qpmt<float>*        pmt ;
0081 
0082 #if defined(__CUDACC__) || defined(__CUDABE__)
0083 #else
0084     qsim(); // instanciated on CPU (see QSim::init_sim) and copied to device so no ctor in device code
0085 #endif
0086 
0087     QSIM_METHOD void    generate_photon_dummy( sphoton& p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const ;
0088     QSIM_METHOD static float3 uniform_sphere(const float u0, const float u1);
0089     QSIM_METHOD static float RandGaussQ_shoot( RNG& rng, float mean, float stdDev );
0090     QSIM_METHOD static void SmearNormal_SigmaAlpha( RNG& rng, float3* smeared_normal, const float3* direction, const float3* normal, float sigma_alpha, const sctx& ctx );
0091     QSIM_METHOD static void SmearNormal_Polish(     RNG& rng, float3* smeared_normal, const float3* direction, const float3* normal, float polish     , const sctx& ctx );
0092 
0093 #if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA)
0094     QSIM_METHOD static float3 uniform_sphere(RNG& rng);
0095 #endif
0096 
0097 #if defined(__CUDACC__) || defined(__CUDABE__)
0098     QSIM_METHOD float4  multifilm_lookup(unsigned pmtType, float nm, float aoi);
0099 #endif
0100 
0101 #if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND )  || defined(MOCK_CUDA)
0102     QSIM_METHOD static void lambertian_direction(float3* dir, const float3* normal, float orient, RNG& rng, sctx& ctx );
0103     QSIM_METHOD static void random_direction_marsaglia(float3* dir, RNG& rng, sctx& ctx );
0104     QSIM_METHOD void rayleigh_scatter(RNG& rng, sctx& ctx );
0105     QSIM_METHOD int     propagate_to_boundary( unsigned& flag, RNG& rng, sctx& ctx );
0106 #endif
0107 
0108 #if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA)
0109     QSIM_METHOD int     propagate_at_boundary(        unsigned& flag, RNG& rng, sctx& ctx, float theTransmittance=-1.f ) const ;
0110     QSIM_METHOD int     propagate_at_boundary_with_T( unsigned& flag, RNG& rng, sctx& ctx, float theTransmittance ) const ;
0111 #endif
0112 
0113 #if defined(__CUDACC__) || defined(__CUDABE__)
0114     QSIM_METHOD int     propagate_at_surface_MultiFilm(unsigned& flag, RNG& rng, sctx& ctx );
0115 #endif
0116 
0117 #if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA)
0118     QSIM_METHOD int     propagate_at_surface(           unsigned& flag, RNG& rng, sctx& ctx );
0119     QSIM_METHOD int     propagate_at_surface_Detect(    unsigned& flag, RNG& rng, sctx& ctx ) const ;
0120 #if defined( WITH_CUSTOM4 )
0121     QSIM_METHOD int     propagate_at_surface_CustomART( unsigned& flag, RNG& rng, sctx& ctx ) const ;
0122 #endif
0123 #endif
0124 
0125 #if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA)
0126     QSIM_METHOD void    reflect_diffuse(                       RNG& rng, sctx& ctx );
0127     QSIM_METHOD void    reflect_specular(                      RNG& rng, sctx& ctx );
0128 
0129     QSIM_METHOD void    fake_propagate( sphoton& p, const quad2* mock_prd, RNG& rng, unsigned long long idx );
0130     QSIM_METHOD int     propagate(const int bounce, RNG& rng, sctx& ctx );
0131 
0132     QSIM_METHOD void    hemisphere_polarized( unsigned polz, bool inwards, RNG& rng, sctx& ctx );
0133     QSIM_METHOD void    generate_photon_simtrace(         quad4&   p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const ;
0134     QSIM_METHOD void    generate_photon_simtrace_frame(   quad4&   p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const ;
0135     QSIM_METHOD void    generate_photon(                  sphoton& p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const ;
0136 #endif
0137 };
0138 
0139 // CTOR
0140 #if defined(__CUDACC__) || defined(__CUDABE__)
0141 #else
0142 inline qsim::qsim()    // instanciated on CPU (see QSim::init_sim) and copied to device so no ctor in device code
0143         :
0144         base(nullptr),
0145         evt(nullptr),
0146         rng(nullptr),
0147         bnd(nullptr),
0148         multifilm(nullptr),
0149         cerenkov(nullptr),
0150         scint(nullptr),
0151         pmt(nullptr)
0152     {
0153     }
0154 #endif
0155 
0156 inline QSIM_METHOD void qsim::generate_photon_dummy(sphoton& p_, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const
0157 {
0158     quad4& p = (quad4&)p_ ;
0159 #ifndef PRODUCTION
0160     printf("//qsim::generate_photon_dummy  photon_id %3lld genstep_id %3d  gs.q0.i ( gencode:%3d %3d %3d %3d ) \n",
0161        photon_id,
0162        genstep_id,
0163        gs.q0.i.x,
0164        gs.q0.i.y,
0165        gs.q0.i.z,
0166        gs.q0.i.w
0167       );
0168 #endif
0169     p.q0.i.x = 1 ; p.q0.i.y = 2 ; p.q0.i.z = 3 ; p.q0.i.w = 4 ;
0170     p.q1.i.x = 1 ; p.q1.i.y = 2 ; p.q1.i.z = 3 ; p.q1.i.w = 4 ;
0171     p.q2.i.x = 1 ; p.q2.i.y = 2 ; p.q2.i.z = 3 ; p.q2.i.w = 4 ;
0172     p.q3.i.x = 1 ; p.q3.i.y = 2 ; p.q3.i.z = 3 ; p.q3.i.w = 4 ;
0173 
0174     p.set_flag(TORCH);
0175 }
0176 
0177 inline QSIM_METHOD float3 qsim::uniform_sphere(const float u0, const float u1)
0178 {
0179     float phi = u0*2.f*M_PIf;
0180     float cosTheta = 2.f*u1 - 1.f ; // -1.f -> 1.f
0181     float sinTheta = sqrtf(1.f-cosTheta*cosTheta);
0182     return make_float3(cosf(phi)*sinTheta, sinf(phi)*sinTheta, cosTheta);
0183 }
0184 
0185 
0186 #if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA)
0187 /**
0188 qsim::uniform_sphere
0189 ---------------------
0190 
0191 **/
0192 inline QSIM_METHOD float3 qsim::uniform_sphere(RNG& rng)
0193 {
0194     float phi = curand_uniform(&rng)*2.f*M_PIf;
0195     float cosTheta = 2.f*curand_uniform(&rng) - 1.f ; // -1.f -> 1.f
0196     float sinTheta = sqrtf(1.f-cosTheta*cosTheta);
0197     return make_float3(cosf(phi)*sinTheta, sinf(phi)*sinTheta, cosTheta);
0198 }
0199 
0200 /**
0201 qsim::RandGaussQ_shoot
0202 ------------------------
0203 
0204 See::
0205 
0206     sysrap/tests/erfcinvf_Test.sh
0207     sysrap/tests/S4MTRandGaussQTest.sh
0208 
0209     g4-cls RandGaussQ
0210     g4-cls G4MTRandGaussQ
0211 
0212 **/
0213 inline QSIM_METHOD float qsim::RandGaussQ_shoot( RNG& rng, float mean, float stdDev )
0214 {
0215     float u2 = 2.f*curand_uniform(&rng) ;
0216     float v = -M_SQRT2f*erfcinvf(u2)*stdDev + mean ;
0217     //printf("//qsim.RandGaussQ_shoot mean %10.5f stdDev %10.5f u2 %10.5f v %10.5f \n", mean, stdDev, u2, v  ) ;
0218     return v ;
0219 }
0220 
0221 
0222 /**
0223 qsim::SmearNormal_SigmaAlpha
0224 ------------------------------
0225 
0226 CAUTION : THIS CURRENTLY NOT USED BY ANYTHING OTHER THAN TESTS
0227 
0228 The reason is that a Geant4 simulation with some sigma_alpha/polish
0229 surfaces will not actually use G4OpBoundaryProcess::GetFacetNormal unless
0230 a bunch of conditions are satisfied such as having non-zero values of some
0231 of prob_sl/prob_ss/prob_bs that induces G4OpBoundaryProcess::ChooseReflection
0232 to return something other that LambertianReflection.
0233 
0234 **THIS MAKES ME SUSPECT ACTUAL USE OF NORMAL SMEARING IS VERY RARE**
0235 
0236 +----------------------+-------------------------------+------------------------------+----------------------+
0237 | G4OpBoundaryProcess  | G4MaterialPropertiesIndex.hh  | G4MaterialPropertiesTable.cc | ChooseReflection     |
0238 +======================+===============================+==============================+======================+
0239 | prob_sl              | kSPECULARLOBECONSTANT         | SPECULARLOBECONSTANT         | LobeReflection       |
0240 +----------------------+-------------------------------+------------------------------+----------------------+
0241 | prob_ss              | kSPECULARSPIKECONSTANT        | SPECULARSPIKECONSTANT        | SpikeReflection      |
0242 +----------------------+-------------------------------+------------------------------+----------------------+
0243 | prob_bs              | kBACKSCATTERCONSTANT          | BACKSCATTERCONSTANT          | BackScattering       |
0244 +----------------------+-------------------------------+------------------------------+----------------------+
0245 | all zero =>          |                               |                              | LambertianReflection |
0246 +----------------------+-------------------------------+------------------------------+----------------------+
0247 
0248 TODO: full simulation run with breakpoint "BP=C4OpBoundaryProcess::GetFacetNormal"
0249 
0250 * C4 (not G4) as Custom4 is in use for the boundary process
0251 
0252 **/
0253 
0254 inline QSIM_METHOD void qsim::SmearNormal_SigmaAlpha(
0255     RNG& rng,
0256     float3* smeared_normal,
0257     const float3* direction,
0258     const float3* normal,
0259     float sigma_alpha,
0260     const sctx& ctx
0261    )
0262 {
0263 #if !defined(PRODUCTION) && defined(MOCK_CUDA_DEBUG)
0264     bool dump = ctx.pidx == -1 ;
0265 #endif
0266 
0267     if(sigma_alpha == 0.f)
0268     {
0269         *smeared_normal = *normal ;
0270         return ;
0271     }
0272     float f_max = fminf(1.f,4.f*sigma_alpha);
0273 
0274 #if !defined(PRODUCTION) && defined(MOCK_CUDA_DEBUG)
0275     if(dump) printf("//qsim::SmearNormal_SigmaAlpha.MOCK_CUDA_DEBUG sigma_alpha %10.5f f_max %10.5f  \n", sigma_alpha, f_max );
0276 #endif
0277 
0278     float alpha, sin_alpha, phi, u0, u1, u2 ;
0279     bool reject_alpha ;
0280     bool reject_dir ;
0281 
0282     do {
0283         do {
0284             //alpha = RandGaussQ_shoot(rng, 0.f, sigma_alpha );  // mean:0.f stdDev:sigma_alpha
0285             u0 = curand_uniform(&rng) ;
0286             alpha = -M_SQRT2f*erfcinvf(2.f*u0)*sigma_alpha ;
0287 
0288             sin_alpha = sinf(alpha);
0289             u1 = curand_uniform(&rng) ;
0290             reject_alpha = alpha >= M_PIf/2.f || (u1*f_max > sin_alpha) ;
0291 
0292 #if !defined(PRODUCTION) && defined(MOCK_CUDA_DEBUG)
0293             if(dump) printf("//qsim::SmearNormal_SigmaAlpha.MOCK_CUDA_DEBUG u0 %10.5f alpha %10.5f sin_alpha %10.5f u1 %10.5f u1*f_max %10.5f  (u1*f_max > sin_alpha) %d reject_alpha %d  \n",
0294                u0, alpha, sin_alpha, u1, u1*f_max, (u1*f_max > sin_alpha), reject_alpha );
0295             // theres lots of alpha rejected : eg all -ve sin_alpha
0296 #endif
0297 
0298         } while( reject_alpha ) ;
0299 
0300         u2 = curand_uniform(&rng) ;
0301         phi = u2*M_PIf*2.f ;
0302 
0303         smeared_normal->x = sin_alpha * cosf(phi) ;
0304         smeared_normal->y = sin_alpha * sinf(phi) ;
0305         smeared_normal->z = cosf(alpha) ;
0306 
0307         smath::rotateUz(*smeared_normal, *normal);
0308         reject_dir = dot(*smeared_normal, *direction ) >= 0.f ;
0309         // reject smears that move the normal into same hemi as direction
0310 
0311 #if !defined(PRODUCTION) && defined(MOCK_CUDA_DEBUG)
0312         if(dump) printf("//qsim::SmearNormal_SigmaAlpha.MOCK_CUDA_DEBUG u2 %10.5f phi %10.5f smeared_normal ( %10.5f, %10.5f, %10.5f)  reject_dir %d  \n",
0313                u2, phi, smeared_normal->x, smeared_normal->y, smeared_normal->z, reject_dir );
0314 #endif
0315 
0316 
0317     } while( reject_dir ) ;
0318 }
0319 
0320 /**
0321 qsim::SmearNormal_Polish
0322 ------------------------------
0323 
0324 CAUTION : THIS CURRENTLY NOT USED BY ANYTHING OTHER THAN TESTS : SEE DETAILS ABOVE
0325 
0326 **/
0327 
0328 inline QSIM_METHOD void qsim::SmearNormal_Polish(
0329     RNG& rng,
0330     float3* smeared_normal,
0331     const float3* direction,
0332     const float3* normal,
0333     float polish,
0334     const sctx& ctx
0335     )
0336 {
0337 #if !defined(PRODUCTION) && defined(MOCK_CUDA_DEBUG)
0338     bool dump = ctx.pidx == -1 ;
0339 #endif
0340 
0341     if(polish == 1.f)
0342     {
0343         *smeared_normal = *normal ;
0344         return ;
0345     }
0346 
0347     float u0, u1, u2 ;
0348     float3 smear ;
0349     bool reject_mag ;
0350     bool reject_dir ;
0351 
0352     do {
0353         do {
0354             u0 = curand_uniform(&rng);
0355             u1 = curand_uniform(&rng) ;
0356             u2 = curand_uniform(&rng) ;
0357             smear.x = 2.f*u0 - 1.f ;
0358             smear.y = 2.f*u1 - 1.f ;
0359             smear.z = 2.f*u2 - 1.f ;
0360             reject_mag = length(smear) > 1.f  ;   // HMM: could this use just dot(smear, smear) ?
0361        }
0362        while( reject_mag );
0363 
0364        *smeared_normal = *normal + (1.f-polish)*smear;
0365        reject_dir = dot(*smeared_normal, *direction) >= 0.f ;
0366     }
0367     while( reject_dir );
0368     *smeared_normal = normalize(*smeared_normal);
0369 }
0370 
0371 
0372 
0373 
0374 
0375 #endif
0376 
0377 #if defined(__CUDACC__) || defined(__CUDABE__)
0378 /*
0379 qsim::multifilm_lookup
0380 -----------------------
0381 
0382 */
0383 
0384 inline QSIM_METHOD float4 qsim::multifilm_lookup(unsigned pmtType, float nm, float aoi)
0385 {
0386     float4 value = multifilm->lookup(pmtType, nm, aoi);
0387     return value;
0388 }
0389 
0390 #endif
0391 
0392 #if defined(__CUDACC__) || defined(__CUDABE__) || defined(MOCK_CURAND) || defined(MOCK_CUDA)
0393 
0394 /**
0395 qsim::lambertian_direction following G4LambertianRand
0396 --------------------------------------------------------
0397 
0398 g4-cls G4RandomTools::
0399 
0400      59 inline G4ThreeVector G4LambertianRand(const G4ThreeVector& normal)
0401      60 {
0402      61   G4ThreeVector vect;
0403      62   G4double ndotv;
0404      63   G4int count=0;
0405      64   const G4int max_trials = 1024;
0406      65
0407      66   do
0408      67   {
0409      68     ++count;
0410      69     vect = G4RandomDirection();
0411      70     ndotv = normal * vect;
0412      71
0413      72     if (ndotv < 0.0)
0414      73     {
0415      74       vect = -vect;
0416      75       ndotv = -ndotv;
0417      76     }
0418      77
0419      78   } while (!(G4UniformRand() < ndotv) && (count < max_trials));
0420      79
0421      80   return vect;
0422      81 }
0423 
0424 
0425 NB: potentially bad for performance for dir pointer to be into global mem
0426 as opposed to local stack float3 : as this keeps changing the dir before
0427 arriving at the final one
0428 
0429 **/
0430 inline  QSIM_METHOD void qsim::lambertian_direction(float3* dir, const float3* normal, float orient, RNG& rng, sctx& ctx )
0431 {
0432 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
0433     unsigned long long PIDX = 0xffffffffff ;
0434     if(ctx.pidx == PIDX )
0435     {
0436         printf("//qsim.lambertian_direction.head pidx %7lld : normal = np.array([%10.5f,%10.5f,%10.5f]) ; orient = %10.5f  \n",
0437             ctx.pidx, normal->x, normal->y, normal->z, orient  );
0438     }
0439 #endif
0440 
0441     float ndotv ;
0442     int count = 0 ;
0443     float u ;
0444     do
0445     {
0446         count++ ;
0447         random_direction_marsaglia(dir, rng, ctx); // sets dir to random point on unit sphere
0448         ndotv = dot( *dir, *normal )*orient ;
0449         if( ndotv < 0.f )
0450         {
0451             *dir = -1.f*(*dir) ;
0452             ndotv = -1.f*ndotv ;
0453         }
0454         // when random dir is in opposite hemisphere to oriented normal
0455         // flip the dir into same hemi and ndotv
0456 
0457         u = curand_uniform(&rng) ;
0458 
0459 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
0460 
0461         if(ctx.pidx == PIDX)
0462         {
0463             printf("//qsim.lambertian_direction.loop pidx %7lld : dir = np.array([%10.5f,%10.5f,%10.5f]) ; count = %d ; ndotv = %10.5f ; u = %10.5f \n",
0464                 ctx.pidx, dir->x, dir->y, dir->z, count, ndotv, u   );
0465 
0466         }
0467 #endif
0468     }
0469     while (!(u < ndotv) && (count < 1024)) ;
0470     // distribution looks pretty similar without the while loop
0471 
0472 
0473 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
0474     if(ctx.pidx == PIDX)
0475     {
0476         printf("//qsim.lambertian_direction.tail pidx %7lld : dir = np.array([%10.5f,%10.5f,%10.5f]) ; count = %d ; ndotv = %10.5f \n",
0477             ctx.pidx, dir->x, dir->y, dir->z, count, ndotv  );
0478 
0479     }
0480 #endif
0481 
0482 
0483 }
0484 
0485 /**
0486 qsim::random_direction_marsaglia following G4RandomDirection
0487 -------------------------------------------------------------
0488 
0489 * https://mathworld.wolfram.com/SpherePointPicking.html
0490 
0491 ::
0492 
0493                           v
0494                           |
0495               +---------.-|- -----------+
0496               |      .    |     .       |
0497               |   .       |          .  |
0498               |           |             |
0499               | .         |            .|
0500               |           |             |
0501           ----+-----------0-------------+---- u
0502               |.          |             |
0503               |           |            .|
0504               | .         |             |
0505               |           |          .  |
0506               |    .      |      .      |
0507               +--------.--|--.----------+
0508                           |
0509 
0510 Marsaglia (1972) derived an elegant method that consists of picking u and v from independent
0511 uniform distributions on (-1,1) and rejecting points for which uu+vv >=1.
0512 So are picking points from within the (u,v) disc.
0513 
0514 For those remaining random points on the 2D (u,v) disc the below (u,v) to (x,y,z)
0515 mapping is used to give a 3D position on the unit sphere,
0516 
0517     x=2*u*sqrt(1-(uu+vv))
0518     y=2*v*sqrt(1-(uu+vv))
0519     z=1-2(uu+vv)
0520 
0521 Checking normalization, it reduces to 1::
0522 
0523    xx + yy + zz =
0524          4uu (1-(uu+vv))
0525          4vv (1-(uu+vv)) +
0526         1 -4(uu+vv) + 4(uu+vv)(uu+vv)
0527                 = 1
0528 
0529 So that means the random 3D (x,y,z) points are on the unit sphere.
0530 
0531 ::
0532 
0533      g4-cls G4RandomDirection
0534 
0535      58 // G.Marsaglia (1972) method
0536      59 inline G4ThreeVector G4RandomDirection()
0537      60 {
0538      61   G4double u, v, b;
0539      62   do {
0540      63     u = 2.*G4UniformRand() - 1.;
0541      64     v = 2.*G4UniformRand() - 1.;
0542      65     b = u*u + v*v;
0543      66   } while (b > 1.);
0544      67   G4double a = 2.*std::sqrt(1. - b);
0545      68   return G4ThreeVector(a*u, a*v, 2.*b - 1.);
0546      69 }
0547 
0548 **/
0549 
0550 
0551 inline QSIM_METHOD void qsim::random_direction_marsaglia(float3* dir,  RNG& rng, sctx& ctx  )
0552 {
0553     // NB: no use of ctx.tagr so this has not been random aligned
0554     float u0, u1 ;
0555     float u, v, b, a  ;
0556     do
0557     {
0558         u0 = curand_uniform(&rng);
0559         u1 = curand_uniform(&rng);
0560         //if( idx == 0u ) printf("//qsim.random_direction_marsaglia pidx %7lld u0 %10.4f u1 %10.4f \n", ctx.pidx, u0, u1 );
0561         u = 2.f*u0 - 1.f ;
0562         v = 2.f*u1 - 1.f ;
0563         b = u*u + v*v ;
0564     }
0565     while( b > 1.f ) ;
0566 
0567     a = 2.f*sqrtf( 1.f - b );
0568 
0569     dir->x = a*u ;
0570     dir->y = a*v ;
0571     dir->z = 2.f*b - 1.f ;
0572 }
0573 
0574 
0575 
0576 /**
0577 qsim::rayleigh_scatter
0578 ------------------------------
0579 
0580 Following G4OpRayleigh::PostStepDoIt
0581 
0582 * https://bugzilla-geant4.kek.jp/show_bug.cgi?id=207 Xin Qian patch
0583 
0584 
0585 Transverse wave nature means::
0586 
0587    dot(p_direction, p_polarization)  = 0
0588    dot(direction,   polarization)  = 0
0589 
0590 *constant* and normalized direction retains transversality thru the candidate scatter::
0591 
0592     pol = p_pol + constant*dir
0593 
0594     dot(pol, dir) = dot(p_pol, dir) + constant* dot(dir, dir)
0595                   = dot(p_pol, dir) + constant* 1.
0596                   = dot(p_pol, dir) - dot(p_pol, dir)
0597                   = 0.
0598 
0599 **/
0600 
0601 inline QSIM_METHOD void qsim::rayleigh_scatter(RNG& rng, sctx& ctx )
0602 {
0603     sphoton& p = ctx.p ;
0604     float3 direction ;
0605     float3 polarization ;
0606 
0607     bool looping(true) ;
0608     do
0609     {
0610         float u0 = curand_uniform(&rng) ;
0611         float u1 = curand_uniform(&rng) ;
0612         float u2 = curand_uniform(&rng) ;
0613         float u3 = curand_uniform(&rng) ;
0614         float u4 = curand_uniform(&rng) ;
0615 
0616 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
0617         stagr& tagr = ctx.tagr ;  // UNTESTED
0618         tagr.add(stag_sc, u0);
0619         tagr.add(stag_sc, u1);
0620         tagr.add(stag_sc, u2);
0621         tagr.add(stag_sc, u3);
0622         tagr.add(stag_sc, u4);
0623 #endif
0624         float cosTheta = u0 ;
0625         float sinTheta = sqrtf(1.0f-u0*u0);
0626         if(u1 < 0.5f ) cosTheta = -cosTheta ;
0627         // could use uniform_sphere here : but not doing so to follow G4OpRayleigh more closely
0628 
0629         float sinPhi ;
0630         float cosPhi ;
0631 
0632 #if defined(MOCK_CURAND ) || defined(MOCK_CUDA)
0633         //__sincosf(2.f*M_PIf*u2,&sinPhi,&cosPhi);   // apple extension
0634         float phi = 2.f*M_PIf*u2 ;
0635         sinPhi = sinf(phi);
0636         cosPhi = cosf(phi);
0637 #else
0638         sincosf(2.f*M_PIf*u2,&sinPhi,&cosPhi);
0639 #endif
0640 
0641         direction.x = sinTheta * cosPhi;
0642         direction.y = sinTheta * sinPhi;
0643         direction.z = cosTheta ;
0644 
0645         smath::rotateUz(direction, p.mom );
0646 
0647         float constant = -dot(direction, p.pol );
0648 
0649         polarization.x = p.pol.x + constant*direction.x ;
0650         polarization.y = p.pol.y + constant*direction.y ;
0651         polarization.z = p.pol.z + constant*direction.z ;
0652 
0653         if(dot(polarization, polarization) == 0.f )
0654         {
0655 
0656 #if defined( MOCK_CURAND ) || defined(MOCK_CUDA)
0657             //__sincosf(2.f*M_PIf*u3,&sinPhi,&cosPhi);
0658             phi = 2.f*M_PIf*u3 ;
0659             sinPhi = sinf(phi);
0660             cosPhi = cosf(phi);
0661 #else
0662             sincosf(2.f*M_PIf*u3,&sinPhi,&cosPhi);
0663 #endif
0664 
0665             polarization.x = cosPhi ;
0666             polarization.y = sinPhi ;
0667             polarization.z = 0.f ;
0668 
0669             smath::rotateUz(polarization, direction);
0670         }
0671         else
0672         {
0673             // There are two directions which are perpendicular
0674             // to the new momentum direction
0675             if(u3 < 0.5f) polarization = -polarization ;
0676         }
0677         polarization = normalize(polarization);
0678 
0679         // simulate according to the distribution cos^2(theta)
0680         // where theta is the angle between old and new polarizations
0681         float doCosTheta = dot(polarization, p.pol ) ;
0682         float doCosTheta2 = doCosTheta*doCosTheta ;
0683         looping = doCosTheta2 < u4 ;
0684 
0685     } while ( looping ) ;
0686 
0687     p.mom = direction ;
0688     p.pol = polarization ;
0689 }
0690 
0691 
0692 /**
0693 qsim::propagate_to_boundary
0694 ------------------------------
0695 
0696 +---------------------+------------------+---------------------------------------------------------+-------------------------------------------------------+
0697 | flag                |   command        |  changed                                                |  note                                                 |
0698 +=====================+==================+=========================================================+=======================================================+
0699 |   BULK_REEMIT       |   CONTINUE       |  time, position, direction, polarization, wavelength    | advance to reemit position with everything changed    |
0700 +---------------------+------------------+---------------------------------------------------------+-------------------------------------------------------+
0701 |   BULK_SCATTER      |   CONTINUE       |  time, position, direction, polarization                | advance to scatter position, new dir+pol              |
0702 +---------------------+------------------+---------------------------------------------------------+-------------------------------------------------------+
0703 |   BULK_ABSORB       |   BREAK          |  time, position                                         | advance to absorption position, dir+pol unchanged     |
0704 +---------------------+------------------+---------------------------------------------------------+-------------------------------------------------------+
0705 |   not set "SAIL"    |   BOUNDARY       |  time, position                                         | advanced to border position, dir+pol unchanged        |
0706 +---------------------+------------------+---------------------------------------------------------+-------------------------------------------------------+
0707 
0708 
0709 TODO:
0710    whilst in measurement iteration try changing the four "return"
0711    in the below into a single return: by setting command variable
0712    and returning that
0713 
0714 **/
0715 
0716 
0717 
0718 inline QSIM_METHOD int qsim::propagate_to_boundary(unsigned& flag, RNG& rng, sctx& ctx)
0719 {
0720     sphoton& p = ctx.p ;
0721     const sstate& s = ctx.s ;
0722 
0723     const float& absorption_length = s.material1.y ;
0724     const float& scattering_length = s.material1.z ;
0725     const float& reemission_prob = s.material1.w ;
0726     const float& group_velocity = s.m1group2.x ;
0727     const float& distance_to_boundary = ctx.prd->q0.f.w ;
0728 
0729 
0730 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
0731     float u_to_sci = curand_uniform(&rng) ;  // purely for alignment with G4
0732     float u_to_bnd = curand_uniform(&rng) ;  // purely for alignment with G4
0733 #endif
0734     float u_scattering = curand_uniform(&rng) ;
0735     float u_absorption = curand_uniform(&rng) ;
0736 
0737 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
0738     stagr& tagr = ctx.tagr ;
0739     tagr.add( stag_to_sci, u_to_sci);
0740     tagr.add( stag_to_bnd, u_to_bnd);
0741     tagr.add( stag_to_sca, u_scattering);
0742     tagr.add( stag_to_abs, u_absorption);
0743 #endif
0744 
0745 
0746 #if !defined(PRODUCTION) && defined(DEBUG_LOGF)
0747     // see notes/issues/U4LogTest_maybe_replacing_G4Log_G4UniformRand_in_Absorption_and_Scattering_with_float_version_will_avoid_deviations.rst
0748     float scattering_distance = -scattering_length*KLUDGE_FASTMATH_LOGF(u_scattering);
0749     float absorption_distance = -absorption_length*KLUDGE_FASTMATH_LOGF(u_absorption);
0750 #else
0751     float scattering_distance = -scattering_length*logf(u_scattering);
0752     float absorption_distance = -absorption_length*logf(u_absorption);
0753 #endif
0754 
0755 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
0756 
0757     if(ctx.pidx == base->pidx)
0758     {
0759     printf("//qsim.propagate_to_boundary.head pidx %7lld : u_absorption %10.8f logf(u_absorption) %10.8f absorption_length %10.4f absorption_distance %10.6f \n",
0760         ctx.pidx, u_absorption, logf(u_absorption), absorption_length, absorption_distance );
0761 
0762     printf("//qsim.propagate_to_boundary.head pidx %7lld : post = np.array([%10.5f,%10.5f,%10.5f,%10.5f]) \n",
0763         ctx.pidx, p.pos.x, p.pos.y, p.pos.z, p.time );
0764 
0765     printf("//qsim.propagate_to_boundary.head pidx %7lld : distance_to_boundary %10.4f absorption_distance %10.4f scattering_distance %10.4f \n",
0766              ctx.pidx, distance_to_boundary, absorption_distance, scattering_distance );
0767 
0768     printf("//qsim.propagate_to_boundary.head pidx %7lld : u_scattering %10.4f u_absorption %10.4f \n",
0769              ctx.pidx, u_scattering, u_absorption  );
0770 
0771     }
0772 #endif
0773 
0774 
0775 
0776 
0777 
0778     if (absorption_distance <= scattering_distance)
0779     {
0780         if (absorption_distance <= distance_to_boundary)
0781         {
0782             p.time += absorption_distance/group_velocity ;
0783             p.pos  += absorption_distance*(p.mom) ;
0784 
0785 
0786 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
0787             float absorb_time_delta = absorption_distance/group_velocity ;
0788             if( ctx.pidx == base->pidx )
0789             {
0790             printf("//qsim.propagate_to_boundary.body.BULK_ABSORB pidx %7lld : post = np.array([%10.5f,%10.5f,%10.5f,%10.5f]) ; absorb_time_delta = %10.8f   \n",
0791                     ctx.pidx, p.pos.x, p.pos.y, p.pos.z, p.time, absorb_time_delta  );
0792 
0793             }
0794 #endif
0795 
0796             float u_reemit = reemission_prob == 0.f ? 2.f : curand_uniform(&rng);  // avoid consumption at absorption when not scintillator
0797 
0798 
0799 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
0800             if( u_reemit != 2.f ) tagr.add( stag_to_ree, u_reemit) ;
0801 #endif
0802 
0803 
0804             if (u_reemit < reemission_prob)
0805             {
0806                 float u_re_wavelength = curand_uniform(&rng);
0807                 float u_re_mom_ph = curand_uniform(&rng);
0808                 float u_re_mom_ct = curand_uniform(&rng);
0809                 float u_re_pol_ph = curand_uniform(&rng);
0810                 float u_re_pol_ct = curand_uniform(&rng);
0811 
0812                 p.wavelength = scint->wavelength(u_re_wavelength);
0813                 p.mom = uniform_sphere(u_re_mom_ph, u_re_mom_ct);
0814                 p.pol = normalize(cross(uniform_sphere(u_re_pol_ph, u_re_pol_ct), p.mom));
0815 
0816 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
0817                 tagr.add( stag_re_wl, u_re_wavelength);
0818                 tagr.add( stag_re_mom_ph, u_re_mom_ph);
0819                 tagr.add( stag_re_mom_ct, u_re_mom_ct);
0820                 tagr.add( stag_re_pol_ph, u_re_pol_ph);
0821                 tagr.add( stag_re_pol_ct, u_re_pol_ct);
0822 #endif
0823 
0824                 flag = BULK_REEMIT ;
0825                 return CONTINUE;
0826             }
0827             else
0828             {
0829                 flag = BULK_ABSORB ;
0830                 return BREAK;
0831             }
0832         }
0833         //  otherwise sail to boundary
0834     }
0835     else
0836     {
0837         if (scattering_distance <= distance_to_boundary)
0838         {
0839             p.time += scattering_distance/group_velocity ;
0840             p.pos  += scattering_distance*(p.mom) ;
0841 
0842             rayleigh_scatter(rng, ctx);  // changes dir and pol, consumes 5u at each turn of rejection sampling loop
0843 
0844             flag = BULK_SCATTER;
0845 
0846             return CONTINUE;
0847         }
0848           //  otherwise sail to boundary
0849     }     // if scattering_distance < absorption_distance
0850 
0851 
0852 
0853     p.pos  += distance_to_boundary*(p.mom) ;
0854     p.time += distance_to_boundary/group_velocity   ;
0855 
0856 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
0857     float sail_time_delta = distance_to_boundary/group_velocity ;
0858     if( ctx.pidx == base->pidx ) printf("//qsim.propagate_to_boundary.tail.SAIL pidx %7lld : post = np.array([%10.5f,%10.5f,%10.5f,%10.5f]) ;  sail_time_delta = %10.5f   \n",
0859           ctx.pidx, p.pos.x, p.pos.y, p.pos.z, p.time, sail_time_delta  );
0860 #endif
0861 
0862     return BOUNDARY ;
0863 }
0864 #endif
0865 #if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA)
0866 /**
0867 qsim::propagate_at_boundary
0868 ------------------------------------------
0869 
0870 This was brought over from oxrap/cu/propagate.h:propagate_at_boundary_geant4_style
0871 See env-/g4op-/G4OpBoundaryProcess.cc annotations to follow this
0872 and compare the Opticks and Geant4 implementations.
0873 
0874 Input:
0875 
0876 * p.direction
0877 * p.polarization
0878 * s.material1.x    : refractive index
0879 * s.material2.x    : refractive index
0880 * prd.normal
0881 
0882 Consumes one random deciding between BOUNDARY_REFLECT and BOUNDARY_TRANSMIT
0883 
0884 +---------------------+------------------+-------------------------------+----------+
0885 | output flag         |   command        |  changed                      |  note    |
0886 +=====================+==================+===============================+==========+
0887 |   BOUNDARY_REFLECT  |    -             |  direction, polarization      |          |
0888 +---------------------+------------------+-------------------------------+----------+
0889 |   BOUNDARY_TRANSMIT |    -             |  direction, polarization      |          |
0890 +---------------------+------------------+-------------------------------+----------+
0891 
0892 Notes:
0893 
0894 * when geometry and refractive indices dictates TIR there is no dependence on u_reflect and always get reflection
0895 
0896 
0897 ::
0898 
0899                     s1
0900                   +----+
0901                    \   .   /      ^
0902               c1   i\  .  / r    /|\
0903                      \ . /        |
0904         material1     \./         | n
0905         ---------------+----------+----------
0906         material2      .\
0907                        . \
0908                   c2   .  \ t
0909                        .   \
0910                        +----+
0911                          s2
0912 
0913 Snells law::
0914 
0915      s1    n2
0916     --- = ---         s1 n1 = s2 n2         eta = n1/n2     s1 eta = s2
0917      s2    n1
0918 
0919      s1.s1 = 1 - c1.c1   # trig identity
0920 
0921      s2.s2 = 1 - c2.c2
0922 
0923     s1 eta = s2          # snell
0924 
0925     s1s1 eta eta = s2s2
0926 
0927     ( 1.f - c1c1 ) eta eta = 1.f - c2c2
0928 
0929      c2c2 = 1.f - eta eta ( 1.f - c1c1 )    # snell and trig identity
0930 
0931 Because the electromagnetic wave is transverse, the field incident onto the
0932 interface can be decomposed into two polarization components, one P-polarized,
0933 i.e., with the electric field vector inside the plane of incidence, and the
0934 other one S-polarized, i.e., orthogonal to that plane.
0935 
0936 
0937 inconsistent normal definitions, c1 is expected to be +ve and normal needs to be oriented against initial direction
0938 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0939 
0940 This is apparent from reflected direction vector::
0941 
0942       *direction + 2.0f*c1*surface_normal
0943 
0944 The standard normal vector at an intersection position on the surface of a shape
0945 is defined to be rigidly oriented outwards away from the shape.
0946 This definition is used by *fill_state* in order to determine properties
0947 of this material m1 and the next material m2 on the other side of the boundary.
0948 
0949 The below math assumes that the photon direction is always against the normal
0950 such that the sign of c1 is +ve. Having -ve c1 leads to non-sensical -ve TranCoeff
0951 which results in always relecting.
0952 
0953 So what about photons going in the other direction ?
0954 Surface normal is used in several places in the below so presumably must
0955 arrange to have an oriented normal that is flipped appropriately OR perhaps can change the math ?
0956 
0957 In summary this is a case of inconsistent definitions of the normal,
0958 that will need to be oriented ~half the time.
0959 
0960 TODO: try avoiding "float3 oriented_normal" instead just use "bool orient"
0961       and multiply prd.normal by 1.f or -1.f depending on orient at every use
0962 
0963 
0964 random aligned matching with examples/Geant/BoundaryStandalone
0965 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0966 
0967 * S/P/"X"-polarized + TIR + normal_incidence all now matching
0968 * noted a 1-in-a-million deviant from TransCoeff cut edge float/double for S and P
0969 
0970 * initially had two more deviants at very close to normal incidence that were aligned by changing
0971   the criteria to match Geant4 "sint1 == 0." better::
0972 
0973     //const bool normal_incidence = fabs(c1) > 0.999999f ;
0974     const bool normal_incidence = fabs(c1) == 1.f ;
0975 
0976 * see notes/issues/QSimTest_propagate_at_boundary_vs_BoundaryStandalone_G4OpBoundaryProcessTest.rst
0977 
0978 
0979 
0980 **Normal Incidence Special Case**
0981 
0982 Judging normal_incidence based on absolete dot product being exactly unity "c1 == 1.f" is problematic
0983 as when very near to normal incidence there are vectors for which the absolute dot product
0984 is not quite 1.f but the cross product does give an exactly zero vector which gives
0985 A_trans (nan, nan, nan) from the normalize doing : (zero,zero,zero)/zero.
0986 
0987 Solution is to judge normal incidence based on trans_length as that is what the
0988 calculation actually needs to be non-zero in order to be able to normalize trans to give A_trans.
0989 
0990 However using "bool normal_incidence = trans_length == 0.f" also problematic
0991 as it means would be using very small trans vectors to define A_trans and this
0992 would cause a difference with double precision Geant4 and float precision Opticks.
0993 So try using a cutoff "trans_length < 1e-6f" below which to special case normal
0994 incidence.
0995 
0996 **/
0997 
0998 inline QSIM_METHOD int qsim::propagate_at_boundary(unsigned& flag, RNG& rng, sctx& ctx, float theTransmittance ) const
0999 {
1000 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1001     if(ctx.pidx == base->pidx)
1002     printf("//propagate_at_boundary.DEBUG_PIDX ctx.pidx %7lld base %p base.pidx %7lld \n", ctx.pidx, base, base->pidx  );
1003 #endif
1004 
1005 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
1006     if(ctx.pidx == base->pidx)
1007     printf("//propagate_at_boundary.DEBUG_TAG ctx.pidx %7lld base %p base.pidx %7lld \n", ctx.pidx, base, base->pidx  );
1008 #endif
1009     // stray "return 0;" left here 2024-12-14 caused : ~/j/issues/jok-tds-missing-BR-BT-on-A-side.rst
1010 
1011     sphoton& p = ctx.p ;
1012     const sstate& s = ctx.s ;
1013 
1014     const float& n1 = s.material1.x ;
1015     const float& n2 = s.material2.x ;
1016     const float eta = n1/n2 ;
1017 
1018     const float3* normal = (float3*)&ctx.prd->q0.f.x ;     // geometrical outwards normal
1019 
1020     const float _c1 = -dot(p.mom, *normal );                // _c1 : cos(angle_of_incidence) not yet oriented
1021     const float3 oriented_normal = _c1 < 0.f ? -(*normal) : (*normal) ; // oriented against incident p.mom
1022     const float3 trans = cross(p.mom, oriented_normal) ;   // perpendicular to plane of incidence, S-pol direction
1023     const float trans_length = length(trans) ;             // same as sin(theta), as p.mom and oriented_normal are unit vectors
1024     const bool normal_incidence = trans_length < 1e-6f  ;  // p.mom parallel/anti-parallel to oriented_normal
1025     const float3 A_trans = normal_incidence ? p.pol : trans/trans_length ; // normalized unit vector : perpendicular to plane of incidence
1026     const float E1_perp = dot(p.pol, A_trans);     // amplitude of polarization in direction perpendicular to plane of incidence, ie S polarization
1027 
1028     const float c1 = fabs(_c1) ;
1029 
1030 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1031     if(ctx.pidx == base->pidx)
1032     {
1033     printf("//qsim.propagate_at_boundary.head pidx %7lld : theTransmittance = %10.8f \n", ctx.pidx, theTransmittance  );
1034     printf("//qsim.propagate_at_boundary.head pidx %7lld : nrm = np.array([%10.8f,%10.8f,%10.8f]) ; lnrm = %10.8f  \n",
1035          ctx.pidx, oriented_normal.x, oriented_normal.y, oriented_normal.z, length(oriented_normal) );
1036     printf("//qsim.propagate_at_boundary.head pidx %7lld : pos = np.array([%10.5f,%10.5f,%10.5f]) ; lpos = %10.8f \n",
1037          ctx.pidx, p.pos.x, p.pos.y, p.pos.z, length(p.pos) );
1038     printf("//qsim.propagate_at_boundary.head pidx %7lld : mom0 = np.array([%10.8f,%10.8f,%10.8f]) ; lmom0 = %10.8f \n",
1039          ctx.pidx, p.mom.x, p.mom.y, p.mom.z, length(p.mom)  );
1040     printf("//qsim.propagate_at_boundary.head pidx %7lld : pol0 = np.array([%10.8f,%10.8f,%10.8f]) ; lpol0 = %10.8f \n",
1041           ctx.pidx, p.pol.x, p.pol.y, p.pol.z, length(p.pol)  );
1042     printf("//qsim.propagate_at_boundary.head pidx %7lld : n1,n2,eta = (%10.8f,%10.8f,%10.8f) \n", ctx.pidx, n1, n2, eta );
1043     printf("//qsim.propagate_at_boundary.head pidx %7lld : c1 = %10.8f ; normal_incidence = %d \n", ctx.pidx, c1, normal_incidence );
1044     }
1045 #endif
1046 
1047     const float c2c2 = 1.f - eta*eta*(1.f - c1 * c1 ) ;   // Snells law and trig identity
1048     bool tir = c2c2 < 0.f ;
1049     const float EdotN = dot(p.pol, oriented_normal ) ;  // used for TIR polarization
1050     const float c2 = tir ? 0.f : sqrtf(c2c2) ;   // c2 chosen +ve, set to 0.f for TIR => reflection_coefficient = 1.0f : so will always reflect
1051     const float n1c1 = n1*c1 ;
1052     const float n2c2 = n2*c2 ;
1053     const float n2c1 = n2*c1 ;
1054     const float n1c2 = n1*c2 ;
1055 
1056     const float2 E1   = normal_incidence ? make_float2( 0.f, 1.f) : make_float2( E1_perp , length( p.pol - (E1_perp*A_trans) ) );
1057     const float2 E2_t = make_float2(  2.f*n1c1*E1.x/(n1c1+n2c2), 2.f*n1c1*E1.y/(n2c1+n1c2) ) ;  // ( S:perp, P:parl )
1058     const float2 E2_r = make_float2( E2_t.x - E1.x             , (n2*E2_t.y/n1) - E1.y     ) ;  // ( S:perp, P:parl )
1059     const float2 RR = normalize(E2_r) ;
1060     const float2 TT = normalize(E2_t) ;
1061     const float TransCoeff = theTransmittance >= 0.f ?
1062                                                            theTransmittance
1063                                                      :
1064                                                            ( tir || n1c1 == 0.f ? 0.f : n2c2*dot(E2_t,E2_t)/n1c1 )
1065                                                      ;
1066 
1067     /*
1068     E1, E2_t, E2_t: incident, transmitted and reflected amplitudes in S and P directions
1069     RR, TT: normalized
1070     */
1071 
1072 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1073     if(ctx.pidx == base->pidx)
1074     {
1075     printf("//qsim.propagate_at_boundary.body pidx %7lld : TransCoeff = %10.8f ; n1c1 = %10.8f ; n2c2 = %10.8f \n",
1076             ctx.pidx, TransCoeff, n1c1, n2c2 );
1077 
1078     printf("//qsim.propagate_at_boundary.body pidx %7lld : E2_t = np.array([%10.8f,%10.8f]) ; lE2_t = %10.8f \n",
1079             ctx.pidx,  E2_t.x, E2_t.y, length(E2_t) );
1080 
1081     printf("//qsim.propagate_at_boundary.body pidx %7lld : A_trans = np.array([%10.8f,%10.8f,%10.8f]) ; lA_trans = %10.8f \n",
1082             ctx.pidx,  A_trans.x, A_trans.y, A_trans.z, length(A_trans) );
1083 
1084     }
1085 #endif
1086 
1087 
1088 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
1089     const float u_boundary_burn = curand_uniform(&rng) ;  // needed for random consumption alignment with Geant4 G4OpBoundaryProcess::PostStepDoIt
1090 #endif
1091     const float u_reflect = curand_uniform(&rng) ;
1092     bool reflect = u_reflect > TransCoeff  ;
1093 
1094 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
1095     stagr& tagr = ctx.tagr ;
1096     tagr.add( stag_at_burn_sf_sd, u_boundary_burn);
1097     tagr.add( stag_at_ref,  u_reflect);
1098 #endif
1099 
1100 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1101     if(ctx.pidx == base->pidx)
1102     {
1103     printf("//qsim.propagate_at_boundary.body pidx %7lld : u_reflect %10.4f TransCoeff %10.4f reflect %d \n",
1104               ctx.pidx,  u_reflect, TransCoeff, reflect   );
1105 
1106     printf("//qsim.propagate_at_boundary.body pidx %7lld : mom0 = np.array([%10.8f,%10.8f,%10.8f]) ; lmom0 = %10.8f \n",
1107                ctx.pidx, p.mom.x, p.mom.y, p.mom.z, length(p.mom) ) ;
1108 
1109     printf("//qsim.propagate_at_boundary.body pidx %7lld : pos = np.array([%10.5f,%10.5f,%10.5f]) ; lpos = %10.8f \n",
1110                ctx.pidx, p.pos.x, p.pos.y, p.pos.z, length(p.pos)  );
1111 
1112     printf("//qsim.propagate_at_boundary.body pidx %7lld : nrm = np.array([%10.8f,%10.8f,%10.8f]) ; lnrm = %10.8f \n",
1113                ctx.pidx, oriented_normal.x, oriented_normal.y, oriented_normal.z, length(oriented_normal) ) ;
1114 
1115     printf("//qsim.propagate_at_boundary.body pidx %7lld : n1 = %10.8f ; n2 = %10.8f ; eta = %10.8f  \n",
1116                ctx.pidx, n1, n2, eta );
1117 
1118     printf("//qsim.propagate_at_boundary.body pidx %7lld : c1 = %10.8f ; eta_c1 = %10.8f ; c2 = %10.8f ; eta_c1__c2 = %10.8f \n",
1119                ctx.pidx, c1, eta*c1, c2, (eta*c1 - c2) );
1120 
1121     }
1122 #endif
1123 
1124     p.mom = reflect
1125                     ?
1126                        p.mom + 2.0f*c1*oriented_normal
1127                     :
1128                        eta*(p.mom) + (eta*c1 - c2)*oriented_normal
1129                     ;
1130 
1131 
1132     // Q: Does the new p.mom need to be normalized ?
1133     // A: NO, it is inherently normalized as derived in the comment below
1134 
1135 
1136     const float3 A_paral = normalize(cross(p.mom, A_trans));   // new P-pol direction
1137 
1138     p.pol =  normal_incidence ?
1139                                          ( reflect ?  p.pol*(n2>n1? -1.f:1.f) : p.pol )
1140                                       :
1141                                          ( reflect ?
1142                                                    ( tir ?  -p.pol + 2.f*EdotN*oriented_normal : RR.x*A_trans + RR.y*A_paral )
1143 
1144                                                    :
1145                                                        TT.x*A_trans + TT.y*A_paral
1146 
1147                                                    )
1148                                       ;
1149 
1150 
1151 
1152      // Q: Above expression kinda implies A_trans and A_paral are same for reflect and transmit ?
1153      // A: NO IT DOESNT,
1154      //    A_trans is the same (except for normal incidence) as there is only one perpendicular
1155      //    to the plane of incidence which is the same for i,r,t.
1156      //
1157      //    A_paral depends on the new p.mom (is has to be orthogonal to p.mom and A_trans)
1158      //    and p.mom of course is different for r and t
1159      //    (the reflect bool is used in multiple places, not just here)
1160 
1161 
1162 
1163 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1164     if(ctx.pidx == base->pidx)
1165     {
1166     printf("//qsim.propagate_at_boundary.tail pidx %7lld : reflect %d tir %d TransCoeff %10.4f u_reflect %10.4f \n", ctx.pidx, reflect, tir, TransCoeff, u_reflect );
1167     printf("//qsim.propagate_at_boundary.tail pidx %7lld : mom1 = np.array([%10.8f,%10.8f,%10.8f]) ; lmom1 = %10.8f  \n",
1168         ctx.pidx, p.mom.x, p.mom.y, p.mom.z, length(p.mom) );
1169     printf("//qsim.propagate_at_boundary.tail pidx %7lld : pol1 = np.array([%10.8f,%10.8f,%10.8f]) ; lpol1 = %10.8f \n",
1170         ctx.pidx, p.pol.x, p.pol.y, p.pol.z, length(p.pol) );
1171     }
1172 
1173     /*
1174     if(ctx.pidx == 251959)
1175     {
1176         printf("//qsim.propagate_at_boundary RR.x %10.4f A_trans (%10.4f %10.4f %10.4f )  RR.y %10.4f  A_paral (%10.4f %10.4f %10.4f ) \n",
1177               RR.x, A_trans.x, A_trans.y, A_trans.z,
1178               RR.y, A_paral.x, A_paral.y, A_paral.z );
1179 
1180         printf("//qsim.propagate_at_boundary reflect %d  tir %d polarization (%10.4f, %10.4f, %10.4f) \n", reflect, tir, p.pol.x, p.pol.y, p.pol.z );
1181     }
1182     */
1183 #endif
1184 
1185     flag = reflect ? BOUNDARY_REFLECT : BOUNDARY_TRANSMIT ;
1186 
1187 
1188 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
1189     if( flag ==  BOUNDARY_REFLECT )
1190     {
1191         const float u_br_align_0 = curand_uniform(&rng) ;
1192         const float u_br_align_1 = curand_uniform(&rng) ;
1193         const float u_br_align_2 = curand_uniform(&rng) ;
1194         const float u_br_align_3 = curand_uniform(&rng) ;
1195 
1196         // switched below standard tags from stag_br_align_0/1/2/3 to simplify A:tag to B:stack mapping
1197         tagr.add( stag_to_sci, u_br_align_0 );
1198         tagr.add( stag_to_bnd, u_br_align_1 );
1199         tagr.add( stag_to_sca, u_br_align_2 );
1200         tagr.add( stag_to_abs, u_br_align_3 );
1201     }
1202 #endif
1203 
1204     return CONTINUE ;
1205 }
1206 /**
1207 qsim::propagate_at_boundary_with_T
1208 ------------------------------------
1209 
1210 Variant of qsim::propagate_at_boundary where a value of theTransmittance
1211 is passed in as an argument (eg from CustomART calculation) rather then
1212 calculated here.
1213 
1214 HMM: was hoping that passing in theTransmittance would afford some
1215 simplifications, but it seems almost no simplification is possible
1216 as need to calculate everything anyhow for the polarization.
1217 
1218 Given that this is almost exactly the same as qsim::propagate_at_boundary
1219 and no simplification is afforded, might as well just keep using that
1220 and leave this just for testing.
1221 
1222 **/
1223 
1224 inline QSIM_METHOD int qsim::propagate_at_boundary_with_T(unsigned& flag, RNG& rng, sctx& ctx, float theTransmittance ) const
1225 {
1226     sphoton& p = ctx.p ;
1227     const sstate& s = ctx.s ;
1228 
1229     const float& n1 = s.material1.x ;
1230     const float& n2 = s.material2.x ;
1231     const float eta = n1/n2 ;
1232 
1233     const float3* normal = (float3*)&ctx.prd->q0.f.x ;     // geometrical outwards normal
1234 
1235     const float _c1 = -dot(p.mom, *normal );                // _c1 : cos(angle_of_incidence) not yet oriented
1236     const float3 oriented_normal = _c1 < 0.f ? -(*normal) : (*normal) ; // oriented against incident p.mom
1237     const float3 trans = cross(p.mom, oriented_normal) ;   // perpendicular to plane of incidence, S-pol direction
1238     const float trans_length = length(trans) ;             // same as sin(theta), as p.mom and oriented_normal are unit vectors
1239     const bool normal_incidence = trans_length < 1e-6f  ;  // p.mom parallel/anti-parallel to oriented_normal
1240     const float3 A_trans = normal_incidence ? p.pol : trans/trans_length ; // normalized unit vector : perpendicular to plane of incidence
1241     const float E1_perp = dot(p.pol, A_trans);     // amplitude of polarization in direction perpendicular to plane of incidence, ie S polarization
1242 
1243     const float c1 = fabs(_c1) ;
1244 
1245     const float c2c2 = 1.f - eta*eta*(1.f - c1 * c1 ) ;   // Snells law and trig identity
1246     bool tir = c2c2 < 0.f ;
1247     const float EdotN = dot(p.pol, oriented_normal ) ;  // used for TIR polarization
1248     const float c2 = tir ? 0.f : sqrtf(c2c2) ;   // c2 chosen +ve, set to 0.f for TIR => reflection_coefficient = 1.0f : so will always reflect
1249     const float n1c1 = n1*c1 ;
1250     const float n2c2 = n2*c2 ;
1251     const float n2c1 = n2*c1 ;
1252     const float n1c2 = n1*c2 ;
1253 
1254     const float2 E1   = normal_incidence ? make_float2( 0.f, 1.f) : make_float2( E1_perp , length( p.pol - (E1_perp*A_trans) ) );
1255     const float2 E2_t = make_float2(  2.f*n1c1*E1.x/(n1c1+n2c2), 2.f*n1c1*E1.y/(n2c1+n1c2) ) ;  // ( S:perp, P:parl )
1256     const float2 E2_r = make_float2( E2_t.x - E1.x             , (n2*E2_t.y/n1) - E1.y     ) ;  // ( S:perp, P:parl )
1257     const float2 RR = normalize(E2_r) ;
1258     const float2 TT = normalize(E2_t) ;
1259 
1260 
1261 /*
1262     const float TransCoeff = theTransmittance >= 0.f ?
1263                                                            theTransmittance
1264                                                      :
1265                                                            ( tir || n1c1 == 0.f ? 0.f : n2c2*dot(E2_t,E2_t)/n1c1 )
1266                                                      ;
1267 */
1268 
1269     const float& TransCoeff = theTransmittance ;
1270 
1271     const float u_reflect = curand_uniform(&rng) ;
1272     bool reflect = u_reflect > TransCoeff  ;
1273 
1274     p.mom = reflect
1275                     ?
1276                        p.mom + 2.0f*c1*oriented_normal
1277                     :
1278                        eta*(p.mom) + (eta*c1 - c2)*oriented_normal
1279                     ;
1280 
1281 
1282     const float3 A_paral = normalize(cross(p.mom, A_trans));   // new P-pol direction
1283 
1284     p.pol =  normal_incidence ?
1285                                          ( reflect ?  p.pol*(n2>n1? -1.f:1.f) : p.pol )
1286                                       :
1287                                          ( reflect ?
1288                                                    ( tir ?  -p.pol + 2.f*EdotN*oriented_normal : RR.x*A_trans + RR.y*A_paral )
1289 
1290                                                    :
1291                                                        TT.x*A_trans + TT.y*A_paral
1292 
1293                                                    )
1294                                       ;
1295 
1296     flag = reflect ? BOUNDARY_REFLECT : BOUNDARY_TRANSMIT ;
1297 
1298 
1299     return CONTINUE ;
1300 }
1301 
1302 #endif
1303 
1304 
1305 
1306 
1307 
1308 
1309 
1310 
1311 /**
1312 Reflected momentum vector
1313 ---------------------------
1314 
1315 ::
1316 
1317      p.mom(new) = p.mom(old) + 2.f*c1*oriented_normal
1318 
1319 
1320      PdotN = dot(p.mom(old), oriented_normal) = -c1
1321 
1322      "c1" is +ve, above dot product -ve as incident vector is against the normal
1323 
1324      p.mom(new) = p.mom(old) -2.f dot(p.mom(old), oriented_normal) * oriented_normal
1325 
1326      NewMomentum = OldMomentum - 2*PdotN*oriented_normal
1327 
1328 
1329 
1330      OA = oriented_normal
1331 
1332      IO = OC = p.mom (old)
1333           OR = p.mom (new)
1334 
1335           OR = OC + CD + DR  = OC + 2.f*DR
1336           CD = DR = c1*oriented_normal
1337 
1338 
1339                   s1   s1
1340                 I----A----R
1341                 .\   .   /.
1342             c1  . \  .  / .  c1
1343                 .  \ . /  .
1344                 .   \./   .
1345        ---------+----O----D------
1346                 :    :.   :
1347                 :    : .  :  c1
1348            c1   :    :  . :
1349                 :    :   .:
1350                 +....B....C
1351                  s1    s1
1352 
1353 
1354 * NB that is inherently normalized, as demonstrated in the below reference
1355 
1356 
1357 Transmitted momentum vector
1358 -------------------------------
1359 
1360 * https://graphics.stanford.edu/courses/cs148-10-summer/docs/2006--degreve--reflection_refraction.pdf
1361 * ~/opticks_refs/bram_de_greve_reflection_refraction_in_ray_tracing.pdf
1362 
1363 
1364 ::
1365 
1366      eta = n1/n2
1367 
1368      p.mom(new) =  eta*p.mom(old) + (eta*c1 - c2)*oriented_normal
1369 
1370 
1371 Consider the perp and para components of the incident and transmitted mom vectors::
1372 
1373     i = i_perp + i_para
1374     t = t_perp + t_para
1375 
1376     i_perp = ( i.n ) n      (component of i in normal direction)
1377     t_perp = ( t.n ) n      (component of t in normal direction )
1378 
1379     i_para = i - (i.n) n
1380     t_para = t - (t.n) n
1381 
1382     # HMM caution with backwards oriented normal direction convention
1383     #     and dot product signs
1384 
1385 
1386 Snell::
1387 
1388     n1 s1 = n2 s2
1389 
1390     eta s1 = s2      (eta = n1/n2)
1391 
1392     s2 = eta s1
1393 
1394     |i_para| = s1
1395     |t_para| = s2
1396 
1397     t_para  = eta i_para     (as they point in same direction)
1398 
1399     t_para  = eta ( i - (i.n) n )
1400             = eta ( i + c1 n )
1401 
1402     t_perp  = - sqrt( 1-|t_para|^2 ) n     (-ve as opposite direction to oriented normal)
1403             = - c2 n
1404 
1405     t   = eta i + ( eta c1 - c2 ) n
1406 
1407 
1408 Now is that inherently normalized ? YES
1409 
1410    t.t =  eta*eta*i.i  + (eta c1 - c2 )*(eta c1 - c2 )*n.n + 2 eta (eta c1 - c2) i.n
1411 
1412          (i.i = 1, n.n = 1, i.n = -c1 )
1413 
1414        = eta*eta + (eta c1 - c2)*(eta c1 - c2) + 2 eta(eta c1 - c2) (-c1 )
1415 
1416        = eta eta  +  eta eta c1 c1 + c2 c2 - 2 eta c1 c2  - 2 eta eta c1 c1 + 2 eta c2 c1
1417                                            --------------                    --------------
1418        =   eta eta  ( 1 - c1 c1 ) + c2 c2
1419 
1420        = 1.f      ( form above   c2c2 = 1.f - eta eta ( 1.f - c1c1 ) )    # snell and trig identity
1421 
1422 
1423 
1424 Compare transmitted vector with G4OpBoundaryProcess::DielectricDielectric
1425 ----------------------------------------------------------------------------
1426 
1427 ::
1428 
1429     1226                 theStatus = FresnelRefraction;
1430     1227
1431     1228                 if (sint1 > 0.0) {      // incident ray oblique
1432     1229
1433     1230                    alpha = cost1 - cost2*(Rindex2/Rindex1);
1434     1231                    NewMomentum = OldMomentum + alpha*theFacetNormal;
1435     1232                    NewMomentum = NewMomentum.unit();
1436 
1437 
1438     t = eta i  + (eta c1 - c2 ) n      eta = n1/n2
1439     t/eta = i + (c1 - c2/eta ) n
1440 
1441     Because Geant4 normalizes NewMomentum it gets away with
1442     playing fast and loose with factors of 1/eta
1443 
1444 
1445 **/
1446 
1447 
1448 
1449 
1450 
1451 
1452 
1453 /*
1454 G4OpBoundaryProcess::DielectricDielectric
1455 
1456 
1457 1152               if (sint1 > 0.0) {
1458 1153                  A_trans = OldMomentum.cross(theFacetNormal);
1459 1154                  A_trans = A_trans.unit();
1460 1155                  E1_perp = OldPolarization * A_trans;
1461 1156                  E1pp    = E1_perp * A_trans;
1462 1157                  E1pl    = OldPolarization - E1pp;
1463 1158                  E1_parl = E1pl.mag();
1464 1159               }
1465 1160               else {
1466 1161                  A_trans  = OldPolarization;
1467 1162                  // Here we Follow Jackson's conventions and we set the
1468 1163                  // parallel component = 1 in case of a ray perpendicular
1469 1164                  // to the surface
1470 1165                  E1_perp  = 0.0;
1471 1166                  E1_parl  = 1.0;
1472 1167               }
1473 1168
1474 1169               s1 = Rindex1*cost1;
1475 1170               E2_perp = 2.*s1*E1_perp/(Rindex1*cost1+Rindex2*cost2);
1476 1171               E2_parl = 2.*s1*E1_parl/(Rindex2*cost1+Rindex1*cost2);
1477 1172               E2_total = E2_perp*E2_perp + E2_parl*E2_parl;
1478 1173               s2 = Rindex2*cost2*E2_total;
1479 1174
1480 1175               if (theTransmittance > 0) TransCoeff = theTransmittance;
1481 1176               else if (cost1 != 0.0) TransCoeff = s2/s1;
1482 1177               else TransCoeff = 0.0;
1483 
1484 
1485 reflect
1486 
1487 1217
1488 1218                        E2_parl   = Rindex2*E2_parl/Rindex1 - E1_parl;
1489 1219                        E2_perp   = E2_perp - E1_perp;
1490 1220                        E2_total  = E2_perp*E2_perp + E2_parl*E2_parl;
1491 1221                        A_paral   = NewMomentum.cross(A_trans);
1492 1222                        A_paral   = A_paral.unit();
1493 1223                        E2_abs    = std::sqrt(E2_total);
1494 1224                        C_parl    = E2_parl/E2_abs;
1495 1225                        C_perp    = E2_perp/E2_abs;
1496 1226
1497 1227                        NewPolarization = C_parl*A_paral + C_perp*A_trans;
1498 1228
1499 
1500 transmit
1501 
1502 1253                    alpha = cost1 - cost2*(Rindex2/Rindex1);
1503 1254                    NewMomentum = OldMomentum + alpha*theFacetNormal;
1504 1255                    NewMomentum = NewMomentum.unit();
1505 1256 //                   PdotN = -cost2;
1506 1257                    A_paral = NewMomentum.cross(A_trans);
1507 1258                    A_paral = A_paral.unit();
1508 
1509 1259                    E2_abs     = std::sqrt(E2_total);
1510 1260                    C_parl     = E2_parl/E2_abs;
1511 1261                    C_perp     = E2_perp/E2_abs;
1512 1262
1513 1263                    NewPolarization = C_parl*A_paral + C_perp*A_trans;
1514 
1515 */
1516 
1517 
1518 /*
1519 qsim::propagate_at_surface_MultiFilm
1520 -------------------------------
1521 
1522 based on https://juno.ihep.ac.cn/trac/browser/offline/trunk/Simulation/DetSimV2/PMTSim/src/junoPMTOpticalModel.cc
1523 which follows a flawed approach to polarization
1524 
1525 Rs: s-component reflect probability
1526 Ts: s-component transmit probability
1527 Rp: p-component reflect probability
1528 Tp: p-component reflect probability
1529 
1530 TODO:
1531    access the qe
1532    access the multifilm catagory
1533    optical photon polarization
1534 
1535 */
1536 
1537 #if defined(__CUDACC__) || defined(__CUDABE__)
1538 inline QSIM_METHOD int qsim::propagate_at_surface_MultiFilm(unsigned& flag, RNG& rng, sctx& ctx )
1539 {
1540 
1541     const float one = 1.0f;
1542     const sphoton& p = ctx.p ;
1543     const float3* normal = (float3*)&ctx.prd->q0.f.x ;
1544     int   lpmtid = ctx.prd->identity() - 1 ;  // identity comes from optixInstance.instanceId where 0 means not-a-sensor
1545 
1546     float minus_cos_theta = dot(p.mom, *normal);
1547     int pmtcat = pmt->get_lpmtcat_from_lpmtid(lpmtid);
1548     float wv_nm = p.wavelength;
1549 
1550     float4 RsTsRpTp = multifilm->lookup(pmtcat, wv_nm, minus_cos_theta);
1551 
1552 
1553     const float c1 = fabs(minus_cos_theta) ;
1554     const float s1 = sqrtf(one -c1*c1);
1555 
1556     float EsEs = s1 > 0.f ? dot(p.pol, cross( p.mom, *normal))/s1 : 0.f ;
1557     EsEs *= EsEs;   //   orienting normal doesnt matter as squared : this is S_vs_P power fraction
1558 
1559 
1560     float3 ART ;
1561     ART.z = RsTsRpTp.y*EsEs + RsTsRpTp.w*(one - EsEs);
1562     ART.y = RsTsRpTp.x*EsEs + RsTsRpTp.z*(one - EsEs);
1563     ART.x = one - (ART.y+ART.z);
1564 
1565     const float& A = ART.x ;
1566     const float& T = ART.z ;
1567 
1568 
1569     float4 RsTsRpTpNormal = multifilm->lookup(pmtcat, wv_nm, -one );
1570     // Normal means the photon incident from glass to vacuum, AOI = 0 deg  cos_theta = -1.f
1571 
1572     float3 ART_normal;
1573     ART_normal.z = 0.5f*(RsTsRpTpNormal.y + RsTsRpTpNormal.w); // T:0.5f*(Ts+Tp)
1574     ART_normal.y = 0.5f*(RsTsRpTpNormal.x + RsTsRpTpNormal.z); // R:0.5f*(Rs+Rp)
1575     ART_normal.x = one -(ART_normal.y + ART_normal.z) ;        // 1.f - (R+T)
1576 
1577     const float& An = ART_normal.x ;
1578     const float energy_eV = qpmt<float>::hc_eVnm/wv_nm ;
1579     const float qe_scale = pmt->get_qescale_from_lpmtid(lpmtid);
1580     const float qe_shape = pmt->get_lpmtcat_qe(pmtcat, energy_eV);
1581 
1582     const float _qe = minus_cos_theta > 0.f ? 0.f : qe_scale * qe_shape;
1583 
1584     const float& theAbsorption = A;
1585     const float& theTransmittance = T/(one-A);
1586     const float& theEfficiency = _qe/An;
1587 
1588     float u_theAbsorption = curand_uniform(&rng);
1589     int action = u_theAbsorption < theAbsorption  ? BREAK : CONTINUE ;
1590 
1591     if( action == BREAK )
1592     {
1593         float u_theEfficiency = curand_uniform(&rng) ;
1594         flag = u_theEfficiency < theEfficiency ? SURFACE_DETECT : SURFACE_ABSORB ;
1595     }
1596     else
1597     {
1598         propagate_at_boundary( flag, rng, ctx, theTransmittance  );
1599     }
1600 
1601     //printf("//qsim.propagate_at_surface_MultiFilm pidx %7lld lpmtid %d ART ( %7.3f %7.3f %7.3f ) u_theAbsorption  %7.3f action %d \n",
1602     //ctx.pidx, lpmtid, ART.x, ART.y, ART.z, u_theAbsorption, action);
1603 
1604     return action ;
1605 
1606 }
1607 
1608 #endif
1609 
1610 
1611 #if defined(__CUDACC__) || defined(__CUDABE__) || defined( MOCK_CURAND ) || defined(MOCK_CUDA)
1612 
1613 /**
1614 qsim::propagate_at_surface   (HMM: perhaps propagate_at_simplified_surface )
1615 -------------------------------------------------------------------------------
1616 
1617 +---------------------+------------------+---------------------------------------------------------+--------------+
1618 | output flag         |   command        |  changed                                                |  note        |
1619 +=====================+==================+=========================================================+==============+
1620 |   SURFACE_ABSORB    |    BREAK         |                                                         |              |
1621 +---------------------+------------------+---------------------------------------------------------+--------------+
1622 |   SURFACE_DETECT    |    BREAK         |                                                         |              |
1623 +---------------------+------------------+---------------------------------------------------------+--------------+
1624 |   SURFACE_DREFLECT  |    CONTINUE      |   direction, polarization                               |              |
1625 +---------------------+------------------+---------------------------------------------------------+--------------+
1626 |   SURFACE_SREFLECT  |    CONTINUE      |   direction, polarization                               |              |
1627 +---------------------+------------------+---------------------------------------------------------+--------------+
1628 
1629 Action depens on s.surface float4 params::
1630 
1631 +---------------+---------------------+----------------------------------+
1632 | s.surface.x   | detect fraction     |                                  |
1633 +---------------+---------------------+----------------------------------+
1634 | s.surface.y   | absorb fraction     |                                  |
1635 +---------------+---------------------+----------------------------------+
1636 | s.surface.z   | reflect specular    | NOT USED AS ALL FOUR SUM TO 1.   |
1637 +---------------+---------------------+----------------------------------+
1638 | s.surface.w   | reflect diffuse     |                                  |
1639 +---------------+---------------------+----------------------------------+
1640 
1641 Excluding technical alignment burns a single u_surface random is throw
1642 to decide on what happens at the surface according to the float4 probabilities
1643 that are assumed to sum to unity::
1644 
1645    +-------------------+-------------------+---------------------+------------------------+
1646    |                   |                   |                     |                        |
1647    |  absorb           |  detect           |  reflect_diffuse_   |  "reflect_specular_"   |
1648    0--SURFACE_ABSORB---|--SURFACE_DETECT---|--SURFACE_DREFLECT---|--SURFACE_SREFLECT------1
1649    |  s.surface.y      |  s.surface.x      |  s.surface.w        |  s.surface.z           |
1650    |                   |                   |                     |                        |
1651    +-------------------+-------------------+---------------------+------------------------+
1652 
1653 Refs::
1654 
1655     GSurfaceLib::propertyName
1656     GSurfaceLib::createStandardSurface
1657 
1658 Different types of surface are modelled by changing the four values,
1659 in such a way that they always sum to unity::
1660 
1661     In [20]: t.oldsur.shape
1662     Out[20]: (46, 2, 761, 4)
1663 
1664     In [21]: t.oldsur[:,0,:].shape  ## pick payload group 0
1665     Out[21]: (46, 761, 4)
1666 
1667     assert np.all( np.sum(t.oldsur[:,0,:], axis=2) == 1. )
1668 
1669 The s.surface float4 is filled by qbnd::fill_state via::
1670 
1671     const int su_line = cosTheta > 0.f ? line + ISUR : line + OSUR ;
1672     s.surface = boundary_lookup(wavelength,su_line,0) ;
1673     s.optical = optical[su_line].u ;
1674 
1675 **/
1676 
1677 inline QSIM_METHOD int qsim::propagate_at_surface(unsigned& flag, RNG& rng, sctx& ctx)
1678 {
1679     const sstate& s = ctx.s ;
1680     const float& detect = s.surface.x ;
1681     const float& absorb = s.surface.y ;
1682     //const float& reflect_specular_ = s.surface.z ;
1683     const float& reflect_diffuse_  = s.surface.w ;
1684 
1685     float u_surface = curand_uniform(&rng);
1686 
1687 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
1688     stagr& tagr = ctx.tagr ;
1689     float u_surface_burn = curand_uniform(&rng);
1690     tagr.add( stag_at_burn_sf_sd, u_surface);
1691     tagr.add( stag_sf_burn,       u_surface_burn);
1692 #endif
1693 
1694 
1695     int action = u_surface < absorb + detect ? BREAK : CONTINUE  ;
1696 
1697     if( action == BREAK )
1698     {
1699 #if defined(WITH_CUSTOM4)
1700         int pmtid = ctx.prd->identity() - 1 ;     // identity comes from optixInstance.instanceId where 0 means not-a-sensor
1701         float qe = 1.f ;
1702         float u_qe = curand_uniform(&rng);
1703         if( s_pmt::is_spmtid(pmtid) )
1704         {
1705             const float energy_eV = qpmt<float>::hc_eVnm/ctx.p.wavelength ;
1706             float qe_shape = pmt->s_qeshape_prop->interpolate( 0, energy_eV );
1707             float qe_scale = pmt->get_s_qescale_from_spmtid(pmtid);
1708             qe = qe_shape*qe_scale ;
1709 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1710             if(ctx.pidx == base->pidx)
1711             printf("//qsim.propagate_at_surface.BREAK.is_spmtid pidx %7lld : pmtid %d energy_eV %7.3f qe_shape %7.3f qe_scale %7.3f qe %7.3f detect %7.3f absorb %7.3f reflect_specular %7.3f reflect_diffuse %7.3f \n" ,
1712                 ctx.pidx, pmtid, energy_eV, qe_shape, qe_scale, qe, detect, absorb, s.surface.z, reflect_diffuse_ );
1713 #endif
1714         }
1715         flag = u_surface < absorb ?
1716                                       SURFACE_ABSORB
1717                                   :
1718                                       ( u_qe < qe  ? EFFICIENCY_COLLECT : EFFICIENCY_CULL  )
1719                                   ;
1720 #else
1721         flag = u_surface < absorb ?
1722                                       SURFACE_ABSORB
1723                                   :
1724                                       SURFACE_DETECT
1725                                   ;
1726 #endif
1727 
1728 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1729         if(ctx.pidx == base->pidx)
1730         printf("//qsim.propagate_at_surface.SA/SD.BREAK pidx %7lld : flag %d \n" , ctx.pidx, flag );
1731 #endif
1732     }
1733     else
1734     {
1735         flag = u_surface < absorb + detect + reflect_diffuse_ ?  SURFACE_DREFLECT : SURFACE_SREFLECT ;
1736         switch(flag)
1737         {
1738             case SURFACE_DREFLECT: reflect_diffuse( rng, ctx)  ; break ;
1739             case SURFACE_SREFLECT: reflect_specular(rng, ctx)  ; break ;
1740         }
1741 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1742         if(ctx.pidx == base->pidx)
1743         printf("//qsim.propagate_at_surface.DR/SR.CONTINUE pidx %7lld : flag %d \n" , ctx.pidx, flag );
1744 #endif
1745     }
1746     return action ;
1747 }
1748 
1749 
1750 inline QSIM_METHOD int qsim::propagate_at_surface_Detect(unsigned& flag, RNG& rng, sctx& ctx) const
1751 {
1752     float u_surface_burn = curand_uniform(&rng);  // for random alignment
1753     flag = SURFACE_DETECT ;
1754     return BREAK ;
1755 }
1756 
1757 
1758 #if defined(WITH_CUSTOM4)
1759 
1760 /**
1761 qsim::propagate_at_surface_CustomART
1762 -------------------------------------
1763 
1764 lpmtid:-1
1765    indicates "not-a-sensor", that occurring at this juncture would
1766    indicate a sensor_identity issue as CustomART special surfaces
1767    are expected to always have sensor identities
1768 
1769 
1770 Where ctx.prd->identity() comes from ? Where is the "+ 1" done ?
1771 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1772 
1773 1. SBT::collectInstances sets OptixInstance::instanceId from sqat4::get_IAS_OptixInstance_instanceId
1774    aka "sensor_identifier"
1775 
1776 2. CSGFoundry::addInstance for firstcall:true does the "+1" as done by CSGFoundry::addInstanceVector
1777 
1778 3. original access to the copyno from Geant4 in U4SensorIdentifierDefault::getInstanceIdentity
1779    which is used from U4Tree::identifySensitiveInstances to populate the stree.h snode::sensor_id
1780 
1781 **/
1782 
1783 inline QSIM_METHOD int qsim::propagate_at_surface_CustomART(unsigned& flag, RNG& rng, sctx& ctx) const
1784 {
1785 
1786     sphoton& p = ctx.p ;
1787     const float3* normal = (float3*)&ctx.prd->q0.f.x ;  // geometrical outwards normal
1788     int lpmtid = ctx.prd->identity() - 1 ;  // identity comes from optixInstance.instanceId where 0 means not-a-sensor
1789     const float lposcost = ctx.prd->lposcost() ;  // local frame intersect position cosine theta
1790 
1791 
1792     float minus_cos_theta = dot(p.mom, *normal);
1793     float dot_pol_cross_mom_nrm = dot(p.pol,cross(p.mom,*normal)) ;
1794 
1795 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1796     if( ctx.pidx_debug )
1797     {
1798     float3 cross_mom_nrm = cross(p.mom, *normal) ;
1799     printf("//qsim::propagate_at_surface_CustomART pidx %7lld : mom = np.array([%10.8f,%10.8f,%10.8f]) ; lmom = %10.8f \n",
1800        ctx.pidx, p.mom.x, p.mom.y, p.mom.z, length(p.mom) );
1801     printf("//qsim::propagate_at_surface_CustomART pidx %7lld : pol = np.array([%10.8f,%10.8f,%10.8f]) ; lpol = %10.8f \n",
1802        ctx.pidx, p.pol.x, p.pol.y, p.pol.z, length(p.pol) );
1803     printf("//qsim::propagate_at_surface_CustomART pidx %7lld : nrm = np.array([%10.8f,%10.8f,%10.8f]) ; lnrm = %10.8f \n",
1804        ctx.pidx, normal->x, normal->y, normal->z, length(*normal) );
1805     printf("//qsim::propagate_at_surface_CustomART pidx %7lld : cross_mom_nrm = np.array([%10.8f,%10.8f,%10.8f]) ; lcross_mom_nrm = %10.8f  \n",
1806            ctx.pidx, cross_mom_nrm.x, cross_mom_nrm.y, cross_mom_nrm.z, length(cross_mom_nrm)  );
1807     printf("//qsim::propagate_at_surface_CustomART pidx %7lld : dot_pol_cross_mom_nrm = %10.8f \n", ctx.pidx, dot_pol_cross_mom_nrm );
1808     printf("//qsim::propagate_at_surface_CustomART pidx %7lld : minus_cos_theta = %10.8f \n", ctx.pidx, minus_cos_theta );
1809     printf("//qsim::propagate_at_surface_CustomART pidx %7lld : lposcost = %10.8f (expect 0->1)\n", ctx.pidx, lposcost );
1810     }
1811 #endif
1812 
1813     // formerly excluded Custom4 hits onto WP PMTs see ~/j/issues/jok-tds-mu-running-NOT-A-SENSOR-warnings.rst
1814     //if(lpmtid < s_pmt::OFFSET_CD_LPMT || lpmtid >= s_pmt::OFFSET_WP_PMT_END )
1815     //if(lpmtid < s_pmt::OFFSET_CD_LPMT || lpmtid >= s_pmt::OFFSET_WP_ATM_LPMT_END )
1816     if(lpmtid < s_pmt::OFFSET_CD_LPMT || lpmtid >=   s_pmt::OFFSET_WP_WAL_PMT_END )
1817     {
1818         flag = NAN_ABORT ;
1819 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1820         printf("//qsim::propagate_at_surface_CustomART pidx %7lld lpmtid %d : ERROR UNEXPECTED LPMTID : NAN_ABORT \n", ctx.pidx, lpmtid );
1821 #endif
1822         return BREAK ;
1823     }
1824 
1825 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1826     if( ctx.pidx_debug )
1827     printf("//qsim::propagate_at_surface_CustomART pidx %7lld lpmtid %d wl %8.4f mct %8.4f dpcmn %8.4f pmt %p pre-ATQC \n",
1828            ctx.pidx, lpmtid, p.wavelength, minus_cos_theta, dot_pol_cross_mom_nrm, pmt );
1829 #endif
1830 
1831     float ATQC[4] = {} ;
1832 
1833 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1834     if(lpmtid > -1 && pmt != nullptr) pmt->get_lpmtid_ATQC(ATQC, lpmtid, p.wavelength, minus_cos_theta, dot_pol_cross_mom_nrm, lposcost, ctx.pidx, ctx.pidx_debug );
1835 #else
1836     if(lpmtid > -1 && pmt != nullptr) pmt->get_lpmtid_ATQC(ATQC, lpmtid, p.wavelength, minus_cos_theta, dot_pol_cross_mom_nrm, lposcost );
1837 #endif
1838 
1839 
1840 
1841 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1842     if( ctx.pidx_debug )
1843     printf("//qsim::propagate_at_surface_CustomART pidx %7lld lpmtid %d wl %8.4f mct %8.4f dpcmn %8.4f lpc %8.4f ATQC ( %8.4f %8.4f %8.4f %8.4f  ) \n",
1844            ctx.pidx, lpmtid, p.wavelength, minus_cos_theta, dot_pol_cross_mom_nrm, lposcost, ATQC[0], ATQC[1], ATQC[2], ATQC[3] );
1845 #endif
1846 
1847 
1848     const float& theAbsorption        = ATQC[0];
1849     const float& theTransmittance     = ATQC[1];
1850     const float& theEfficiency        = ATQC[2];
1851     const float& collectionEfficiency = ATQC[3];
1852 
1853     float u_theAbsorption = curand_uniform(&rng);
1854     int action = u_theAbsorption < theAbsorption  ? BREAK : CONTINUE ;
1855 
1856 
1857 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1858     if( ctx.pidx_debug )
1859         printf("//qsim.propagate_at_surface_CustomART pidx %7lld lpmtid %d ATQC ( %8.4f %8.4f %8.4f %8.4f ) u_theAbsorption  %7.3f action %d \n",
1860         ctx.pidx, lpmtid, ATQC[0], ATQC[1], ATQC[2], ATQC[3], u_theAbsorption, action  );
1861 #endif
1862 
1863     if( action == BREAK )
1864     {
1865         float u_theEfficiency = curand_uniform(&rng) ;
1866         float u_collectionEfficiency = curand_uniform(&rng);
1867 
1868         flag = u_theEfficiency < theEfficiency ?
1869                                                     (  u_collectionEfficiency < collectionEfficiency ? EFFICIENCY_COLLECT : EFFICIENCY_CULL )
1870                                                :
1871                                                     SURFACE_ABSORB
1872                                                ;
1873 
1874         // former SD:SURFACE_DETECT, now becomes EC:EFFICIENCY_COLLECT or EX:EFFICIENCY_CULL depending on collectionEfficiency and random throw
1875 
1876 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1877         if( ctx.pidx_debug )
1878             printf("//qsim.propagate_at_surface_CustomART.BREAK.SD/SA EC/EX pidx %7lld lpmtid %d ATQC ( %8.4f %8.4f %8.4f %8.4f ) u_theEfficiency  %8.4f theEfficiency %8.4f flag %d \n",
1879                                                                     ctx.pidx, lpmtid, ATQC[0],ATQC[1], ATQC[2],ATQC[3],  u_theEfficiency,  theEfficiency, flag );
1880 #endif
1881 
1882     }
1883     else
1884     {
1885 
1886 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1887         if( ctx.pidx_debug )
1888             printf("//qsim.propagate_at_surface_CustomART.CONTINUE pidx %7lld lpmtid %d ATQC ( %7.3f %7.3f %7.3f %7.3f ) theTransmittance %7.3f  \n",
1889             ctx.pidx, lpmtid, ATQC[0], ATQC[1], ATQC[2], ATQC[3], theTransmittance  );
1890 #endif
1891 
1892         propagate_at_boundary( flag, rng, ctx, theTransmittance  );
1893     }
1894     return action ;
1895 }
1896 #endif
1897 
1898 #endif
1899 
1900 
1901 #if defined(__CUDACC__) || defined(__CUDABE__)  || defined(MOCK_CURAND) || defined(MOCK_CUDA)
1902 
1903 /**
1904 qsim::reflect_diffuse cf G4OpBoundaryProcess::DoReflection
1905 -----------------------------------------------------------
1906 
1907 * LobeReflection is not yet implemnented in qsim.h
1908 
1909 ::
1910 
1911     355 inline
1912     356 void G4OpBoundaryProcess_MOCK::DoReflection()
1913     357 {
1914     358         if ( theStatus == LambertianReflection ) {
1915     359
1916     360           NewMomentum = G4LambertianRand(theGlobalNormal);
1917     361           theFacetNormal = (NewMomentum - OldMomentum).unit();
1918     362
1919     363         }
1920     364         else if ( theFinish == ground ) {
1921     365
1922     366           theStatus = LobeReflection;
1923     367           if ( PropertyPointer1 && PropertyPointer2 ){
1924     368           } else {
1925     369              theFacetNormal =
1926     370                  GetFacetNormal(OldMomentum,theGlobalNormal);
1927     371           }
1928     372           G4double PdotN = OldMomentum * theFacetNormal;
1929     373           NewMomentum = OldMomentum - (2.*PdotN)*theFacetNormal;
1930     374
1931     375         }
1932     376         else {
1933     377
1934     378           theStatus = SpikeReflection;
1935     379           theFacetNormal = theGlobalNormal;
1936     380           G4double PdotN = OldMomentum * theFacetNormal;
1937     381           NewMomentum = OldMomentum - (2.*PdotN)*theFacetNormal;
1938     382
1939     383         }
1940     384         G4double EdotN = OldPolarization * theFacetNormal;
1941     385         NewPolarization = -OldPolarization + (2.*EdotN)*theFacetNormal;
1942     386 }
1943 
1944 
1945 
1946 
1947 Orient the normal depending on incident momentum to handle inside/outside
1948 --------------------------------------------------------------------------
1949 
1950 HMM: Geant4 is very flippy with normals whereas Opticks uses fixed
1951 geometrical normals that may then need to be oriented for a specfic task,
1952 such as diffuse reflection from the shape.
1953 
1954 
1955 ::
1956 
1957         geometrical normal
1958          :
1959        \ : /  diffuse reflection from outside in same hemi as geometrical normal : orient 1.f
1960         \:/   incident momentum is against the geometrical normal   dot( old_mom, *normal ) < 0.f
1961        --*------+
1962       /         |
1963      /          |
1964      \          /
1965       \ \   /  /   diffuse reflection from inside needs to use a flipped geometrical normal : orient -1.f
1966        \ \ /  /    incident moment is with the geometrical normal   dot( old_mom, *normal ) > 0.f
1967         \_*__/
1968           :
1969           :
1970           geometrical normal
1971 
1972 
1973 orient is used to flip the reflection normal back against the incident direction
1974 
1975 **/
1976 
1977 inline QSIM_METHOD void qsim::reflect_diffuse( RNG& rng, sctx& ctx )
1978 {
1979     sphoton& p = ctx.p ;
1980 
1981     float3 old_mom = p.mom ;
1982 
1983     const float3* normal = ctx.prd->normal() ;    // geometrical outwards normal
1984 
1985     //const float orient = -1.f ; // BUG : FIXED ORIENT FLIP CANNOT BE CORRECT
1986     const float orient = dot( old_mom, *normal ) > 0.f ? -1.f : 1.f ;
1987 
1988     lambertian_direction( &p.mom, normal, orient, rng, ctx );
1989 
1990 
1991     float3 facet_normal = normalize( p.mom - old_mom );
1992     const float EdotN = dot( p.pol, facet_normal );
1993     p.pol = -1.f*(p.pol) + 2.f*EdotN*facet_normal ;
1994 
1995 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
1996     if(ctx.pidx == base->pidx)
1997     {
1998     printf("//qsim.reflect_diffuse pidx %7lld : old_mom = np.array([%10.5f,%10.5f,%10.5f]) \n",
1999         ctx.pidx,  old_mom.x, old_mom.y, old_mom.z ) ;
2000 
2001     printf("//qsim.reflect_diffuse pidx %7lld : normal0 = np.array([%10.5f,%10.5f,%10.5f]) \n",
2002         ctx.pidx,  normal->x, normal->y, normal->z ) ;
2003 
2004     printf("//qsim.reflect_diffuse pidx %7lld : p.mom = np.array([%10.5f,%10.5f,%10.5f]) \n",
2005         ctx.pidx,  p.mom.x, p.mom.y, p.mom.z ) ;
2006 
2007     printf("//qsim.reflect_diffuse pidx %7lld : facet_normal = np.array([%10.5f,%10.5f,%10.5f]) \n",
2008         ctx.pidx,  facet_normal.x, facet_normal.y, facet_normal.z ) ;
2009     }
2010 #endif
2011 
2012 }
2013 
2014 /**
2015 qsim::reflect_specular
2016 -----------------------
2017 
2018 TODO: check the fixed orient flip with specular reflections
2019 from inside and outside
2020 
2021 Its not so obvious here because the oriented normal is part of the
2022 mom calc so it would be easy to double flip.
2023 
2024 G4OpBoundaryProcess::PostStepDoIt does an early flip of theGlobalNormal
2025 but the G4Navigator does flips before that too... so Geant4 is too flippy
2026 to be helpful.
2027 
2028 **/
2029 
2030 inline QSIM_METHOD void qsim::reflect_specular( RNG& rng, sctx& ctx )
2031 {
2032     sphoton& p = ctx.p ;
2033     const float3* normal = ctx.prd->normal() ;
2034 
2035 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
2036     if(ctx.pidx == base->pidx)
2037     {
2038     printf("//qsim.reflect_specular.head pidx %7lld : normal0 = np.array([%10.5f,%10.5f,%10.5f]) \n",
2039         ctx.pidx,  normal->x, normal->y, normal->z ) ;
2040 
2041     printf("//qsim.reflect_specular.head pidx %7lld : mom0 = np.array([%10.5f,%10.5f,%10.5f]) \n",
2042         ctx.pidx,  p.mom.x, p.mom.y, p.mom.z ) ;
2043 
2044     printf("//qsim.reflect_specular.head pidx %7lld : pol0 = np.array([%10.5f,%10.5f,%10.5f]) \n",
2045         ctx.pidx,  p.pol.x, p.pol.y, p.pol.z ) ;
2046     }
2047 #endif
2048 
2049 #ifdef WITH_ORIENT
2050     //const float orient = -1.f ;
2051     const float orient = 1.f ;
2052     // because orient appears twice in the below p.mom p.pol calcs
2053     // it being +1.f or -1.f makes no difference
2054 
2055     const float PdotN = dot( p.mom, *normal )*orient ;
2056     p.mom = p.mom - 2.f*PdotN*(*normal)*orient ;
2057 
2058     const float EdotN = dot( p.pol, *normal )*orient ;
2059     p.pol = -1.f*(p.pol) + 2.f*EdotN*(*normal)*orient  ;
2060 #else
2061     // removed orient as does not effect calc, hence confusing and pointless
2062     const float PdotN = dot( p.mom, *normal ) ;
2063     p.mom = p.mom - 2.f*PdotN*(*normal) ;
2064 
2065     const float EdotN = dot( p.pol, *normal ) ;
2066     p.pol = -1.f*(p.pol) + 2.f*EdotN*(*normal)  ;
2067 #endif
2068 
2069 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
2070     if(ctx.pidx == base->pidx)
2071     {
2072     printf("//qsim.reflect_specular.tail pidx %7lld : mom1 = np.array([%10.5f,%10.5f,%10.5f]) ; PdotN = %10.5f ; EdotN = %10.5f \n",
2073         ctx.pidx,  p.mom.x, p.mom.y, p.mom.z, PdotN, EdotN  ) ;
2074 
2075     printf("//qsim.reflect_specular.tail pidx %7lld : pol1 = np.array([%10.5f,%10.5f,%10.5f]) \n",
2076         ctx.pidx,  p.pol.x, p.pol.y, p.pol.z ) ;
2077 
2078     }
2079 #endif
2080 
2081 
2082 }
2083 
2084 /**
2085 qsim::fake_propagate
2086 -----------------------
2087 
2088 This uses mock input prd (quad2) to provide a CUDA only (no OptiX, no geometry)
2089 test of qsim propagation machinery.
2090 
2091 * NB: qsim::mock_propagate is intended to be very similar to CSGOptiX/CSGOptiX7.cu::simulate
2092 
2093 TODO
2094 ~~~~~
2095 
2096 * compare with cx/CSGOptiX7.cu::simulate and find users of this to see if it could be made more similar to cx::simulate
2097 * can sstate be slimmed : seems not very easily
2098 * simplify sstate persisting (quad6?quad5?)
2099 
2100 Stages within bounce loop
2101 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2102 
2103 1. mock call to OptiX trace : doing "geometry lookup"
2104    photon position + direction -> surface normal + distance and identity, boundary
2105 
2106    * cosTheta sign gives boundary orientation
2107 
2108 2. lookup material/surface properties using boundary and orientation (cosTheta) from geometry lookup
2109 
2110 3. mutate photon and set flag using material properties
2111 
2112   * note that photons that SAIL to boundary are mutated twice within the while loop (by propagate_to_boundary and propagate_at_boundary/surface)
2113 
2114 Thoughts
2115 ~~~~~~~~~~~
2116 
2117 NB: the different collection streams (photon, record, rec, seq) must stay independent
2118 so can switch them off easily in production running
2119 
2120 
2121 **/
2122 
2123 inline QSIM_METHOD void qsim::fake_propagate( sphoton& p, const quad2* mock_prd, RNG& rng, unsigned long long idx )
2124 {
2125     p.set_flag(TORCH);  // setting initial flag : in reality this should be done by generation
2126 
2127     qsim* sim = this ;
2128 
2129     sctx ctx = {} ;
2130     ctx.p = p ;     // Q: Why is this different from CSGOptiX7.cu:simulate ? A: Presumably due to input photon.
2131     ctx.evt = evt ;
2132     ctx.pidx = idx ;
2133 
2134     int command = START ;
2135     int bounce = 0 ;
2136 #ifndef PRODUCTION
2137     ctx.point(bounce);
2138 #endif
2139 
2140     while( bounce < evt->max_bounce )
2141     {
2142         ctx.prd = mock_prd + (evt->max_bounce*idx+bounce) ;
2143         if( ctx.prd->boundary() == 0xffffu ) break ;   // SHOULD NEVER HAPPEN : propagate can do nothing meaningful without a boundary
2144 #ifndef PRODUCTION
2145         ctx.trace(bounce);
2146 #endif
2147 
2148 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
2149         if(idx == base->pidx)
2150         printf("//qsim.fake_propagate pidx %7lld bounce %d evt.max_bounce %d prd.q0.f.xyzw (%10.4f %10.4f %10.4f %10.4f) \n",
2151              idx, bounce, evt->max_bounce, ctx.prd->q0.f.x, ctx.prd->q0.f.y, ctx.prd->q0.f.z, ctx.prd->q0.f.w );
2152 #endif
2153         command = sim->propagate(bounce, rng, ctx );
2154         bounce++;
2155 #ifndef PRODUCTION
2156         ctx.point(bounce);
2157 #endif
2158         if(command == BREAK) break ;
2159     }
2160 #ifndef PRODUCTION
2161     ctx.end();
2162 #endif
2163     evt->photon[idx] = ctx.p ;
2164 }
2165 
2166 /**
2167 qsim::propagate : one "bounce" propagate_to_boundary/propagate_at_boundary
2168 -----------------------------------------------------------------------------
2169 
2170 This is canonically invoked from within the bounce loop of CSGOptiX/OptiX7Test.cu:simulate
2171 after tracing (or mock tracing) has populated ctx.prd
2172 
2173 MISSING intersects
2174 ~~~~~~~~~~~~~~~~~~~
2175 
2176 Formerly thought "MISSING needs to return BREAK" but MISSING
2177 intersects only happen with "broken" test geometry so no need to be
2178 concerned with that.
2179 
2180 z-plus special sensor surfaces
2181 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2182 
2183 The two kinds of z-plus sensor surfaces need to fallback to ordinary
2184 surface handling for z-minus, when lposcost < 0.f at lower z-hemi.
2185 Initially thought would need to use optical.z to refer to
2186 the fallback and call qbnd::fill_state again.
2187 But of course that is wrong, the osur/isur surface references
2188 already provide the z-minus fallback.
2189 Hence can implement the fallback using standard surface branch
2190 with no change to qbnd::fill_state.
2191 For z-plus with lposcost > 0.f upper z-hemi simply ignore
2192 the s.surface
2193 
2194 traditional POM special surface : smatsur_Surface_zplus_sensor_A
2195 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2196 
2197 HMM: could use standard surface handling for this but with perfect
2198 detector surface swapped in.
2199 
2200 
2201 TMM multilayer POM special surface : smatsur_Surface_zplus_sensor_CustomART
2202 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2203 
2204 Enabling special surface handling is controlled by ctx.optical.y "ems" enum
2205 which wheels in the qpmt.h stack calc following C4CustomART::doIt
2206 for CustomART special surfaces.
2207 
2208 Prior to supporting special surfaces, within the command == BOUNDARY used::
2209 
2210     command = ctx.s.optical.x == 0 ?
2211                                       propagate_at_boundary( flag, rng, ctx )
2212                                   :
2213                                       propagate_at_surface( flag, rng, ctx )
2214                                   ;
2215 
2216 **/
2217 
2218 inline QSIM_METHOD int qsim::propagate(const int bounce, RNG& rng, sctx& ctx )  // ::simulate
2219 {
2220     const unsigned boundary = ctx.prd->boundary() ;
2221     const unsigned identity = ctx.prd->identity() ; // sensor_identifier+1, 0:not-a-sensor
2222     const unsigned iindex = ctx.prd->iindex() ;
2223     const float lposcost = ctx.prd->lposcost() ;  // local frame intersect position cosine theta
2224 
2225     const float3* normal = ctx.prd->normal();
2226     float cosTheta = dot(ctx.p.mom, *normal ) ;
2227 
2228 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
2229     if( ctx.pidx == base->pidx )
2230     {
2231     printf("\n//qsim.propagate.head pidx %7lld : ctx.evt.index %d evt.index %d \n", ctx.pidx, ctx.evt->index, evt->index );
2232 
2233     printf("\n//qsim.propagate.head pidx %7lld : bnc %d boundary %d cosTheta %10.8f \n", ctx.pidx, bounce, boundary, cosTheta );
2234 
2235     printf("//qsim.propagate.head pidx %7lld : mom = np.array([%10.8f,%10.8f,%10.8f]) ; lmom = %10.8f  \n",
2236                  ctx.pidx, ctx.p.mom.x, ctx.p.mom.y, ctx.p.mom.z, length(ctx.p.mom) ) ;
2237 
2238     printf("//qsim.propagate.head pidx %7lld : pos = np.array([%10.5f,%10.5f,%10.5f]) ; lpos = %10.8f \n",
2239                  ctx.pidx, ctx.p.pos.x, ctx.p.pos.y, ctx.p.pos.z, length(ctx.p.pos) ) ;
2240 
2241     printf("//qsim.propagate.head pidx %7lld : nrm = np.array([(%10.8f,%10.8f,%10.8f]) ; lnrm = %10.8f  \n",
2242                  ctx.pidx, normal->x, normal->y, normal->z, length(*normal) );
2243 
2244     }
2245 #endif
2246 
2247     // copy geometry info into the sphoton struct
2248     ctx.p.set_prd(boundary, identity, cosTheta, iindex );  // HMM: lposcost not passed along
2249 
2250     bnd->fill_state(ctx.s, boundary, ctx.p.wavelength, cosTheta, ctx.pidx, base->pidx );
2251 
2252     unsigned flag = 0 ;
2253 
2254     int command = propagate_to_boundary( flag, rng, ctx );
2255     /**
2256     command possibilities:
2257 
2258     1. CONTINUE (eg scatter)
2259     2. BREAK (eg absorb)
2260     3. BOUNDARY
2261 
2262     **/
2263 
2264 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
2265     if( ctx.pidx == base->pidx )
2266     printf("//qsim.propagate.body pidx %7lld bounce %d command %d flag %d s.optical.x %d s.optical.y %d \n",
2267           ctx.pidx, bounce, command, flag, ctx.s.optical.x, ctx.s.optical.y );
2268 #endif
2269 
2270     if( command == BOUNDARY )
2271     {
2272         const int& ems = ctx.s.optical.y ;
2273 
2274 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
2275         if( ctx.pidx == base->pidx )
2276         {
2277 #if defined(WITH_CUSTOM4)
2278             printf("//qsim.propagate.body.WITH_CUSTOM4 pidx %7lld  BOUNDARY ems %d lposcost %7.3f \n", ctx.pidx, ems, lposcost );
2279 #else
2280             printf("//qsim.propagate.body.NOT:WITH_CUSTOM4 pidx %7lld BOUNDARY ems %d lposcost %7.3f \n", ctx.pidx, ems, lposcost);
2281 #endif
2282         }
2283 #endif
2284 
2285         if( ems == smatsur_NoSurface )
2286         {
2287             command = propagate_at_boundary( flag, rng, ctx ) ;
2288         }
2289         else if( ems == smatsur_Surface )
2290         {
2291             command = propagate_at_surface( flag, rng, ctx ) ;
2292         }
2293         else if( lposcost < 0.f )  // could combine with prior, but handy for debug to keep separate
2294         {
2295 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
2296             if( ctx.pidx == base->pidx )
2297                 printf("//qsim.propagate.body (lposcost < 0.f) pidx %7lld bounce %d command %d flag %d ems %d \n",
2298                 ctx.pidx, bounce, command, flag, ems  );
2299 #endif
2300             command = propagate_at_surface( flag, rng, ctx ) ;
2301         }
2302         else if( ems == smatsur_Surface_zplus_sensor_A )
2303         {
2304             command = propagate_at_surface_Detect( flag, rng, ctx ) ;
2305         }
2306         else if( ems == smatsur_Surface_zplus_sensor_CustomART )
2307         {
2308 #if defined(WITH_CUSTOM4)
2309             command = propagate_at_surface_CustomART( flag, rng, ctx ) ;
2310             //command = base->custom_lut == 0u ? propagate_at_surface_CustomART( flag, rng, ctx ) : propagate_at_surface_MultiFilm(flag, rng, ctx );
2311 
2312 #endif
2313         }
2314     }
2315     ctx.p.set_flag(flag);
2316     // Q: Does flag need to be single bit at this point OR can multiple "flags" be OR-ed together here ?
2317     // A: Decided to keep the flag as single bitted, and directly set EFFICENCY_COLLECT/CULL into ctx.p.flagmask
2318 
2319 
2320 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
2321     if( ctx.pidx == base->pidx )
2322     printf("//qsim.propagate.tail pidx %7lld bounce %d command %d flag %d ctx.s.optical.y(ems) %d \n",
2323              ctx.pidx, bounce, command, flag, ctx.s.optical.y  );
2324 #endif
2325 
2326     return command ;
2327 }
2328 /**
2329 Q: Where does ctx.s.optical come from ?
2330 A: Populated in qbnd::fill_state based on boundary and cosTheta sign to get the
2331    su_line index of the optical buffer which distinguishes surface from boundary,
2332    as acted upon above. See also GBndLib::createOpticalBuffer
2333 
2334 Q: Can the big "if" above be changed into a switch ?
2335 A: YES, but non-trivially and probably confusingly. This is because
2336    the special surface handling condition needs to be after the possibiliy
2337    of fallback to ordinary surface handling for "lposcost < 0.f" and also
2338    ordinary NoSurface must be possible no matter the lposcost value.
2339 
2340 **/
2341 
2342 
2343 
2344 /**
2345 qsim::hemisphere_polarized
2346 ------------------------------
2347 
2348 
2349           direction      | surface_normal
2350                \         |
2351                 \        |
2352               .  \       |
2353           .       \      |
2354       .            \     |
2355      within         \    |
2356                      \   |
2357                       \  |
2358                        \ |
2359                         \|
2360            --------------+------------
2361 
2362 
2363 *plane of incidence*
2364     plane containing *surface_normal* *direction* and *within* vectors
2365 
2366 *transverse*
2367     vector transverse to the plane of incidence (S polarized)
2368 
2369 *within*
2370     vector within the plane of incidence and perpendicular to *direction* (P polarized)
2371 
2372 
2373 A +ve Z upper hemisphere of *direction* is generated and then rotateUz oriented
2374 to adopt the *surface_normal* vector as its Z direction.
2375 
2376 For inwards=true the normal direction is flipped to orient all the directions
2377 inwards.
2378 
2379 **/
2380 
2381 inline QSIM_METHOD void qsim::hemisphere_polarized( unsigned polz, bool inwards, RNG& rng, sctx& ctx )
2382 {
2383     sphoton& p = ctx.p ;
2384     const float3* normal = ctx.prd->normal() ;
2385 
2386     //printf("//qsim.hemisphere_polarized polz %d normal (%10.4f, %10.4f, %10.4f) \n", polz, normal->x, normal->y, normal->z );
2387 
2388     float u_hemipol_phi = curand_uniform(&rng) ;
2389     float phi = u_hemipol_phi*2.f*M_PIf;  // 0->2pi
2390     float cosTheta = curand_uniform(&rng) ;      // 0->1
2391 
2392 #if !defined(PRODUCTION) && defined(DEBUG_TAG)
2393     stagr& tagr = ctx.tagr ;
2394     tagr.add( stag_hp_ph, u_hemipol_phi );
2395     tagr.add( stag_hp_ph, cosTheta );    // trying to reduce stag::BITS from 5 to 4, so change stag_hp_ct to stag_hp_ph
2396 #endif
2397 
2398     float sinTheta = sqrtf(1.f-cosTheta*cosTheta);
2399 
2400     p.mom.x = cosf(phi)*sinTheta ;
2401     p.mom.y = sinf(phi)*sinTheta ;
2402     p.mom.z = cosTheta ;
2403 
2404     smath::rotateUz( p.mom, (*normal) * ( inwards ? -1.f : 1.f ));
2405 
2406     //printf("//qsim.hemisphere_polarized polz %d p.mom (%10.4f, %10.4f, %10.4f) \n", polz, p.mom.x, p.mom.y, p.mom.z );
2407 
2408     // what about normal incidence ?
2409     const float3 transverse = normalize(cross(p.mom, (*normal) * ( inwards ? -1.f : 1.f )  )) ; // perpendicular to plane of incidence
2410     const float3 within = normalize( cross(p.mom, transverse) );  //   within plane of incidence and perpendicular to direction
2411 
2412     switch(polz)
2413     {
2414         case 0: p.pol = transverse ; break ;   // S-polarizatiom
2415         case 1: p.pol = within     ; break ;   // P-polarization
2416         case 2: p.pol = normalize( 0.5f*transverse + (1.f-0.5f)*within )  ; break ;  // equal admixture
2417     }
2418 }
2419 
2420 
2421 
2422 
2423 /**
2424 qsim::generate_photon_simtrace
2425 --------------------------------
2426 
2427 Canonical invokation from CSGOptiX7.cu:simtrace
2428 
2429 * NB simtrace cxs center-extent-genstep are very different to standard Cerenkov/Scintillation gensteps
2430 
2431 These gensteps are for example created in SEvent::MakeCenterExtentGensteps the below generation
2432 should be comparable to the CPU implementation::
2433 
2434     SFrameGenstep::GenerateCenterExtentGenstepPhotons
2435     SFrameGenstep::GenerateSimtracePhotons
2436 
2437 HMM: note that the photon_id is global to the launch making it potentially a very large number
2438 but for identication purposes having a local to the photons of the genstep index would
2439 be more useful and would allow storage within much less bits.
2440 
2441 TODO: implement local index by including photon_id offset with the gensteps
2442 
2443 * NB the sxyz.h enum order is different to the python one  eg XYZ=0
2444 
2445 SEE : sevent::add_simtrace
2446 
2447 **/
2448 
2449 inline QSIM_METHOD void qsim::generate_photon_simtrace(quad4& p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const
2450 {
2451     const int& gencode = gs.q0.i.x ;
2452     switch(gencode)
2453     {
2454         case OpticksGenstep_FRAME:                   generate_photon_simtrace_frame(p, rng, gs, photon_id, genstep_id ); break ;
2455         case OpticksGenstep_INPUT_PHOTON_SIMTRACE:   { p = (quad4&)evt->simtrace[photon_id] ; }                          ; break ;
2456     }
2457 }
2458 
2459 inline QSIM_METHOD void qsim::generate_photon_simtrace_frame(quad4& p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const
2460 {
2461     C4U gsid ;
2462 
2463     //int gencode          = gs.q0.i.x ;
2464     int gridaxes           = gs.q0.i.y ;  // { XYZ, YZ, XZ, XY }
2465     gsid.u                 = gs.q0.i.z ;
2466     //unsigned num_photons = gs.q0.u.w ;
2467 
2468     p.q0.f.x = gs.q1.f.x ;   // start with genstep local frame position, typically origin  (0,0,0)
2469     p.q0.f.y = gs.q1.f.y ;
2470     p.q0.f.z = gs.q1.f.z ;
2471     p.q0.f.w = 1.f ;
2472 
2473     //printf("//qsim.generate_photon_simtrace_frame gridaxes %d gs.q1 (%10.4f %10.4f %10.4f %10.4f) \n", gridaxes, gs.q1.f.x, gs.q1.f.y, gs.q1.f.z, gs.q1.f.w );
2474 
2475     float u0 = curand_uniform(&rng);
2476     float sinPhi, cosPhi;
2477 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
2478     __sincosf(2.f*M_PIf*u0,&sinPhi,&cosPhi);
2479 #else
2480     sincosf(2.f*M_PIf*u0,&sinPhi,&cosPhi);
2481 #endif
2482 
2483     float u1 = curand_uniform(&rng);
2484     float cosTheta = 2.f*u1 - 1.f ;
2485     float sinTheta = sqrtf(1.f-cosTheta*cosTheta) ;
2486 
2487     //printf("//qsim.generate_photon_simtrace_frame u0 %10.4f sinPhi   %10.4f cosPhi   %10.4f \n", u0, sinPhi, cosPhi );
2488     //printf("//qsim.generate_photon_simtrace_frame u1 %10.4f sinTheta %10.4f cosTheta %10.4f \n", u1, sinTheta, cosTheta );
2489     //printf("//qsim.generate_photon_simtrace_frame  u0 %10.4f sinPhi   %10.4f cosPhi   %10.4f u1 %10.4f sinTheta %10.4f cosTheta %10.4f \n",  u0, sinPhi, cosPhi, u1, sinTheta, cosTheta );
2490 
2491     switch( gridaxes )
2492     {
2493         case YZ:  { p.q1.f.x = 0.f    ;  p.q1.f.y = cosPhi ;  p.q1.f.z = sinPhi ;  p.q1.f.w = 0.f ; } ; break ;
2494         case XZ:  { p.q1.f.x = cosPhi ;  p.q1.f.y = 0.f    ;  p.q1.f.z = sinPhi ;  p.q1.f.w = 0.f ; } ; break ;
2495         case XY:  { p.q1.f.x = cosPhi ;  p.q1.f.y = sinPhi ;  p.q1.f.z = 0.f    ;  p.q1.f.w = 0.f ; } ; break ;
2496         case XYZ: { p.q1.f.x = sinTheta*cosPhi ;
2497                     p.q1.f.y = sinTheta*sinPhi ;
2498                     p.q1.f.z = cosTheta        ;
2499                     p.q1.f.w = 0.f ; } ; break ;   // previously used XZ
2500     }
2501 
2502 
2503     qat4 qt(gs) ; // copy 4x4 transform from last 4 quads of genstep
2504     qt.right_multiply_inplace( p.q0.f, 1.f );   // position
2505     qt.right_multiply_inplace( p.q1.f, 0.f );   // direction
2506 
2507 
2508     unsigned char ucj = (photon_id < 255 ? photon_id : 255 ) ;
2509     gsid.c4.w = ucj ;
2510     p.q3.u.w = gsid.u ;   // WARNING : THIS GSID LOOKS TO BE STOMPED ON BY sevent::add_simtrace
2511 }
2512 
2513 /**
2514 qsim::generate_photon
2515 ----------------------
2516 
2517 Moved non-standard center-extent (aka frame) gensteps to use qsim::generate_photon_simtrace not this
2518 
2519 **/
2520 
2521 inline QSIM_METHOD void qsim::generate_photon(sphoton& p, RNG& rng, const quad6& gs, unsigned long long photon_id, unsigned genstep_id ) const
2522 {
2523     const int& gencode = gs.q0.i.x ;
2524     switch(gencode)
2525     {
2526         case OpticksGenstep_CARRIER:         scarrier::generate(     p, rng, gs, photon_id, genstep_id)  ; break ;
2527         case OpticksGenstep_TORCH:           storch::generate(       p, rng, gs, photon_id, genstep_id ) ; break ;
2528 
2529         case OpticksGenstep_G4Cerenkov_modified:
2530         case OpticksGenstep_CERENKOV:
2531                                               cerenkov->generate(    p, rng, gs, photon_id, genstep_id ) ; break ;
2532 
2533         case OpticksGenstep_DsG4Scintillation_r4695:
2534         case OpticksGenstep_SCINTILLATION:
2535                                               scint->generate(        p, rng, gs, photon_id, genstep_id ) ; break ;
2536 
2537         case OpticksGenstep_INPUT_PHOTON:    { p = evt->photon[photon_id] ; p.set_flag(TORCH) ; }        ; break ;
2538         default:                             generate_photon_dummy(  p, rng, gs, photon_id, genstep_id)  ; break ;
2539     }
2540     p.set_index(photon_id) ;
2541 }
2542 #endif
2543