Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:10

0001 /**
0002 QSim.cu : extern void CUDA launch functions testing qsim.h methods
0003 -------------------------------------------------------------------------------------
0004 
0005 The launch functions are all invoked from QSim.cc methods with corresponding names.
0006 
0007 
0008 TODO: split off debug functions from actually used functions
0009 
0010 **/
0011 
0012 
0013 #include "stdio.h"
0014 #include "qrng.h"
0015 
0016 #include "scuda.h"
0017 #include "squad.h"
0018 #include "scurand.h"
0019 #include "sphoton.h"
0020 #include "srec.h"
0021 #include "scerenkov.h"
0022 #include "sevent.h"
0023 #include "sstate.h"
0024 
0025 
0026 #include "qprop.h"
0027 #include "qbnd.h"
0028 #include "qsim.h"
0029 #include "qcerenkov.h"
0030 #include "qbase.h"
0031 #include "qdebug.h"
0032 
0033 #include "QSimLaunch.hh"
0034 
0035 
0036 /**
0037 _QSim_rng_sequence
0038 --------------------
0039 
0040 **/
0041 
0042 template <typename T>
0043 __global__ void _QSim_rng_sequence(qsim* sim, T* seq, unsigned ni, unsigned nv, unsigned id_offset )
0044 {
0045     unsigned id = blockIdx.x*blockDim.x + threadIdx.x;
0046     if (id >= ni) return;
0047 
0048     unsigned evt_index = sim->evt->index ;
0049     //if( id == 0 ) printf("//_QSim_rng_sequence id %d ni %d sim->evt->index %d \n", id, ni, sim->evt->index );
0050 
0051     RNG rng ;
0052     sim->rng->init(rng, evt_index, id+id_offset) ;
0053 
0054     unsigned ibase = id*nv ;
0055 
0056     for(unsigned v=0 ; v < nv ; v++)
0057     {
0058         T u = scurand<T>::uniform(&rng) ;
0059         seq[ibase+v] = u ;
0060     }
0061 }
0062 
0063 
0064 template <typename T>
0065 extern void QSim_rng_sequence(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, T*  seq, unsigned ni, unsigned nv, unsigned id_offset )
0066 {
0067     printf("//QSim_rng_sequence ni %d nv %d id_offset %d  \n", ni, nv, id_offset );
0068     _QSim_rng_sequence<T><<<numBlocks,threadsPerBlock>>>( sim, seq, ni, nv, id_offset );
0069 
0070 }
0071 
0072 template void QSim_rng_sequence(dim3, dim3, qsim*, float* , unsigned, unsigned, unsigned );
0073 template void QSim_rng_sequence(dim3, dim3, qsim*, double*, unsigned, unsigned, unsigned );
0074 
0075 
0076 
0077 
0078 
0079 __global__ void _QSim_scint_wavelength(qsim* sim, float* wavelength, unsigned num_wavelength )
0080 {
0081     unsigned id = blockIdx.x*blockDim.x + threadIdx.x;
0082     if (id >= num_wavelength) return;
0083 
0084     RNG rng ;
0085     sim->rng->init(rng, 0, id ) ;
0086 
0087 
0088     float u_wl = curand_uniform(&rng);
0089     float wl = sim->scint->wavelength(u_wl) ;
0090 
0091     if(id % 100000 == 0) printf("//_QSim_scint_wavelength id %d  wl %10.4f    \n", id, wl  );
0092 
0093     wavelength[id] = wl ;
0094 }
0095 
0096 extern void QSim_scint_wavelength(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, float* wavelength, unsigned num_wavelength )
0097 {
0098     printf("//QSim_scint_wavelength num_wavelength %d \n", num_wavelength );
0099     _QSim_scint_wavelength<<<numBlocks,threadsPerBlock>>>( sim, wavelength, num_wavelength );
0100 }
0101 
0102 
0103 
0104 
0105 
0106 __global__ void _QSim_RandGaussQ_shoot(qsim* sim, float* vv, unsigned num_v )
0107 {
0108     unsigned id = blockIdx.x*blockDim.x + threadIdx.x;
0109     if (id >= num_v) return;
0110 
0111 
0112     RNG rng ;
0113     sim->rng->init(rng, 0, id ) ;
0114 
0115 
0116     float mean = 5.f ;
0117     float stdDev = 0.1f ;
0118     float v = sim->RandGaussQ_shoot(rng, mean, stdDev ) ;
0119 
0120     //if(id % 100000 == 0)
0121     //printf("//_QSim_RandGaussQ_shoot id %d  v %10.4f    \n", id, v  );
0122 
0123     vv[id] = v ;
0124 }
0125 
0126 extern void QSim_RandGaussQ_shoot(  dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, float* v, unsigned num_v )
0127 {
0128     printf("//QSim_RandGaussQ_shoot num_v %d \n", num_v );
0129     _QSim_RandGaussQ_shoot<<<numBlocks,threadsPerBlock>>>( sim, v, num_v );
0130 }
0131 
0132 
0133 /**
0134 _QSim_dbg_gs_generate
0135 ------------------------
0136 
0137 Generate photons using the debug cerenkov or scint genstep
0138 
0139 **/
0140 
0141 
0142 __global__ void _QSim_dbg_gs_generate(qsim* sim, qdebug* dbg, sphoton* photon, unsigned num_photon, unsigned type )
0143 {
0144     unsigned idx = blockIdx.x*blockDim.x + threadIdx.x;
0145     if (idx >= num_photon) return;
0146 
0147     RNG rng ;
0148     sim->rng->init(rng, 0, idx ) ;
0149 
0150 
0151     //printf("//_QSim_dbg_gs_generate sim.cerenkov %p sim.scint %p \n", sim->cerenkov, sim->scint );
0152 
0153     int gsid = -1 ;
0154     sphoton p ;
0155 
0156     if( type == CERENKOV_GENERATE ) // TODO: other flavors of ck gen
0157     {
0158         const quad6& gs = (const quad6&)dbg->cerenkov_gs ;
0159         sim->cerenkov->generate(p, rng, gs, idx, gsid );
0160     }
0161     else if( type == SCINT_GENERATE )
0162     {
0163         const quad6& gs = (const quad6&)dbg->scint_gs ;
0164         sim->scint->generate(p, rng, gs, idx, gsid );
0165     }
0166     photon[idx] = p ;
0167 }
0168 
0169 
0170 extern void QSim_dbg_gs_generate(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, qdebug* dbg, sphoton* photon, unsigned num_photon, unsigned type )
0171 {
0172     printf("//QSim_dbg_gs_generate sim %p dbg %p photon %p num_photon %d type %d name %s \n", sim, dbg, photon, num_photon, type, QSimLaunch::Name(type) );
0173 
0174     _QSim_dbg_gs_generate<<<numBlocks,threadsPerBlock>>>( sim, dbg, photon, num_photon, type );
0175 }
0176 
0177 
0178 
0179 
0180 
0181 
0182 
0183 __global__ void _QSim_generate_photon(qsim* sim)
0184 {
0185     unsigned idx = blockIdx.x*blockDim.x + threadIdx.x;
0186 
0187     sevent* evt = sim->evt ;
0188 
0189     if (idx >= evt->num_photon) return;
0190 
0191 
0192     RNG rng ;
0193     sim->rng->init(rng, 0, idx ) ;
0194 
0195 
0196     unsigned genstep_id = evt->seed[idx] ;
0197     const quad6& gs     = evt->genstep[genstep_id] ;
0198 
0199     //printf("//_QSim_generate_photon idx %4d evt->num_photon %4d genstep_id %4d  \n", idx, evt->num_photon, genstep_id );
0200 
0201     sphoton p ;
0202     sim->generate_photon(p, rng, gs, idx, genstep_id );
0203 
0204     evt->photon[idx] = p ;
0205 }
0206 
0207 extern void QSim_generate_photon(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim )
0208 {
0209     printf("//QSim_generate_photon sim %p \n", sim );
0210     // NB trying to de-reference the sim and evt pointers here gives "Bus error"
0211     // because the code of this function is not running on device despite being compiled by nvcc
0212     _QSim_generate_photon<<<numBlocks,threadsPerBlock>>>( sim );
0213 }
0214 
0215 
0216 
0217 
0218 __global__ void _QSim_fill_state_0(qsim* sim, quad6* state,  unsigned num_state, qdebug* dbg )
0219 {
0220     unsigned state_id = blockIdx.x*blockDim.x + threadIdx.x;
0221     printf("//_QSim_fill_state_0 state_id %d \n", state_id );
0222 
0223     if (state_id >= num_state) return;
0224 
0225     sstate s ;
0226 
0227     float wavelength = dbg->wavelength ;
0228     float cosTheta = dbg->cosTheta ;
0229     int boundary = state_id + 1 ;
0230 
0231     printf("//_QSim_fill_state_0 state_id %d  boundary %d wavelength %10.4f cosTheta %10.4f   \n", state_id, boundary, wavelength, cosTheta );
0232 
0233     unsigned base_pidx = -1u ;
0234 
0235     sim->bnd->fill_state(s, boundary, wavelength, cosTheta, state_id, base_pidx );
0236 
0237     state[state_id].q0.f = s.material1 ;
0238     state[state_id].q1.f = s.m1group2 ;
0239     state[state_id].q2.f = s.material2 ;
0240     state[state_id].q3.f = s.surface ;
0241     state[state_id].q4.u = s.optical ;
0242     state[state_id].q5.u = s.index ;
0243 
0244     //printf("//_QSim_fill_state_0 s.material1 %10.4f %10.4f %10.4f %10.4f \n", s.material1.x, s.material1.y, s.material1.z, s.material1.w );
0245 }
0246 
0247 extern void QSim_fill_state_0(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, quad6* state, unsigned num_state, qdebug* dbg )
0248 {
0249     printf("//QSim_fill_state_0 sim %p state %p num_state %d dbg %p \n", sim, state, num_state, dbg );
0250     _QSim_fill_state_0<<<numBlocks,threadsPerBlock>>>( sim, state, num_state, dbg  );
0251 }
0252 
0253 
0254 
0255 
0256 __global__ void _QSim_fill_state_1( qsim* sim, sstate* state,  unsigned num_state, qdebug* dbg )
0257 {
0258     unsigned state_id = blockIdx.x*blockDim.x + threadIdx.x;
0259     printf("//_QSim_fill_state_1 blockIdx.x %d blockDim.x %d threadIdx.x %d state_id %d num_state %d \n", blockIdx.x, blockDim.x, threadIdx.x, state_id, num_state );
0260 
0261     if (state_id >= num_state) return;
0262 
0263 
0264     const float& wavelength = dbg->wavelength ;
0265     const float& cosTheta = dbg->cosTheta ;
0266     int boundary = state_id + 1 ; // boundary is 1-based  TOFIX: boundary now 0-based
0267 
0268     printf("//_QSim_fill_state_1 state_id %d  boundary %d wavelength %10.4f cosTheta %10.4f   \n", state_id, boundary, wavelength, cosTheta );
0269 
0270     unsigned base_pidx = -1u ;
0271 
0272     sstate s ;
0273     sim->bnd->fill_state(s, boundary, wavelength, cosTheta, state_id, base_pidx );
0274 
0275     state[state_id] = s ;
0276 
0277     //printf("//_QSim_fill_state_1 s.material1 %10.4f %10.4f %10.4f %10.4f \n", s.material1.x, s.material1.y, s.material1.z, s.material1.w );
0278 }
0279 
0280 extern void QSim_fill_state_1(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, sstate* state, unsigned num_state, qdebug* dbg )
0281 {
0282     printf("//QSim_fill_state_1 sim %p state %p num_state %d dbg %p \n", sim, state, num_state, dbg );
0283     _QSim_fill_state_1<<<numBlocks,threadsPerBlock>>>( sim, state, num_state, dbg  );
0284 }
0285 
0286 
0287 
0288 
0289 __global__ void _QSim_rayleigh_scatter_align( qsim* sim, sphoton* photon,  unsigned num_photon, qdebug* dbg )
0290 {
0291     unsigned idx = blockIdx.x*blockDim.x + threadIdx.x;
0292     //printf("//_QSim_rayleigh_scatter_align blockIdx.x %d blockDim.x %d threadIdx.x %d id %d num_photon %d \n", blockIdx.x, blockDim.x, threadIdx.x, id, num_photon );
0293 
0294     if (idx >= num_photon) return;
0295 
0296     RNG rng ;
0297     sim->rng->init(rng, 0, idx ) ;
0298 
0299 
0300     sctx ctx = {} ;
0301     ctx.idx = idx ;
0302     ctx.p = dbg->p ;    // need local copy of photon otherwise would have write interference between threads
0303 
0304     sim->rayleigh_scatter(rng, ctx);
0305 
0306     photon[idx] = ctx.p ;
0307 }
0308 
0309 __global__ void _QSim_propagate_to_boundary( qsim* sim, sphoton* photon, unsigned num_photon, qdebug* dbg )
0310 {
0311     unsigned idx = blockIdx.x*blockDim.x + threadIdx.x;
0312     //printf("//_QSim_propagate_to_boundary blockIdx.x %d blockDim.x %d threadIdx.x %d propagate_id %d \n", blockIdx.x, blockDim.x, threadIdx.x, propagate_id );
0313 
0314     if (idx >= num_photon) return;
0315 
0316     RNG rng ;
0317     sim->rng->init(rng, 0, idx ) ;
0318 
0319 
0320 
0321 
0322     sctx ctx = {} ;
0323     ctx.idx = idx ;
0324     ctx.prd = &dbg->prd ;  // no need for local copy when readonly
0325     ctx.s = dbg->s ;
0326     ctx.p = dbg->p ;      // need local copy of photon otherwise will have write interference between threads
0327 
0328     unsigned flag = 0u ;
0329     //sim->propagate_to_boundary( flag, p, prd, s, rng, idx, tagr );
0330     sim->propagate_to_boundary( flag, rng, ctx );
0331     ctx.p.set_flag(flag);
0332 
0333     sphoton& p = ctx.p ;
0334 
0335     photon[idx] = p ;
0336 
0337     //const float3* position = (float3*)&p.q0.f.x ;
0338     //const float& time = p.q0.f.w ;
0339 
0340     printf("//_QSim_propagate_to_boundary flag %d position %10.4f %10.4f %10.4f  time %10.4f  \n", flag, p.pos.x, p.pos.y, p.pos.z, p.time );
0341 
0342 }
0343 
0344 __global__ void _QSim_propagate_at_boundary_generate( qsim* sim, sphoton* photon, unsigned num_photon, qdebug* dbg )
0345 {
0346     unsigned idx = blockIdx.x*blockDim.x + threadIdx.x;
0347     //printf("//_QSim_propagate_at_boundary_generate blockIdx.x %d blockDim.x %d threadIdx.x %d propagate_id %d \n", blockIdx.x, blockDim.x, threadIdx.x, propagate_id );
0348 
0349     if (idx >= num_photon) return;
0350 
0351     RNG rng ;
0352     sim->rng->init(rng, 0, idx ) ;
0353 
0354 
0355     sctx ctx = {} ;
0356     ctx.idx = idx ;
0357     ctx.prd = &dbg->prd ;  // no need for local copy when readonly
0358     ctx.s = dbg->s ;
0359     ctx.p = dbg->p ;    // need local copy of photon otherwise will have write interference between threads
0360 
0361     quad4& q = (quad4&)ctx.p ;
0362     q.q0.f = q.q1.f ;   // non-standard record initial mom and pol into q0, q3
0363     q.q3.f = q.q2.f ;
0364 
0365     unsigned flag = 0 ;
0366     //sim->propagate_at_boundary( flag, p, prd, s, rng, idx, tagr );
0367     sim->propagate_at_boundary( flag, rng, ctx );
0368 
0369     q.q3.u.w = flag ;          // non-standard
0370 
0371     photon[idx] = ctx.p ;
0372 }
0373 
0374 
0375 
0376 /**
0377 _QSim_propagate_at_boundary_mutate
0378 ------------------------------------
0379 
0380 Observe pullback fails after running this when qsim::propagate_at_boundary
0381 uses tagr for DEBUG_TAG recording of random consumption.
0382 
0383 **/
0384 
0385 __global__ void _QSim_propagate_at_boundary_mutate( qsim* sim, sphoton* photon, unsigned num_photon, qdebug* dbg )
0386 {
0387     unsigned idx = blockIdx.x*blockDim.x + threadIdx.x;
0388     if (idx >= num_photon) return;
0389 
0390     //printf("//_QSim_propagate_at_boundary_mutate.head blockIdx.x %d blockDim.x %d threadIdx.x %d idx %d dbg %p \n", blockIdx.x, blockDim.x, threadIdx.x, idx, dbg );
0391 
0392 
0393 
0394     RNG rng ;
0395     sim->rng->init(rng, 0, idx ) ;
0396 
0397     //printf("//_QSim_propagate_at_boundary_mutate.after rnginit blockIdx.x %d blockDim.x %d threadIdx.x %d idx %d dbg %p \n", blockIdx.x, blockDim.x, threadIdx.x, idx, dbg );
0398 
0399 
0400     sctx ctx = {} ;
0401     ctx.idx = idx ;
0402     ctx.p = photon[idx] ;
0403     ctx.s = dbg->s ;
0404     ctx.prd = &dbg->prd ;
0405 
0406     quad4&  q  = (quad4&)ctx.p ;
0407     q.q0.f = q.q1.f ;   // non-standard record initial mom and pol into q0, q3
0408     q.q3.f = q.q2.f ;
0409 
0410     //if(idx%1000==0) printf("//_QSim_propagate_at_boundary_mutate bef callidx %d \n", idx );
0411 
0412 
0413 
0414     unsigned flag = 0 ;
0415 
0416     sim->propagate_at_boundary( flag, rng, ctx );
0417 
0418     q.q3.u.w = flag ;  // non-standard
0419 
0420     photon[idx] = ctx.p ;
0421 
0422 
0423 
0424     //if(idx%1000==0) printf("//_QSim_propagate_at_boundary_mutate idx %d \n", idx);
0425 }
0426 
0427 
0428 __global__ void _QSim_propagate_at_multifilm_mutate( qsim* sim, sphoton* photon, unsigned num_photon, qdebug* dbg ){
0429 
0430     unsigned idx = blockIdx.x*blockDim.x + threadIdx.x;
0431     //printf("//_QSim_propagate_at_multifilm_mutate : Thread index: idx = %d ", idx);
0432 
0433     if (idx >= num_photon) return;
0434 
0435     RNG rng ;
0436     sim->rng->init(rng, 0, idx ) ;
0437 
0438 
0439     sctx ctx = {} ;
0440     ctx.idx = idx ;
0441     ctx.p = photon[idx] ;
0442     ctx.s = dbg->s ;
0443     ctx.prd = &dbg->prd ;
0444 
0445     quad4&  q  = (quad4&)ctx.p ;
0446 
0447     unsigned long long jump = 1000;
0448     skipahead(jump, &rng);
0449     q.q0.f = q.q1.f ;   // non-standard record initial mom and pol into q0, q3
0450     q.q3.f = q.q2.f ;
0451 
0452     unsigned flag = 0u ;
0453     //sim->propagate_at_multifilm(flag, p, prd, s, rng, idx, tagr );
0454     sim->propagate_at_surface_MultiFilm(flag, rng, ctx );
0455     //printf("//_QSim_propagate_at_multifilm_mutate : Thread index: idx = %d  flag = %d", idx, flag );
0456 
0457     q.q3.u.w = flag ;  // non-standard
0458     photon[idx] = ctx.p ;
0459 }
0460 
0461 
0462 
0463 
0464 
0465 __global__ void _QSim_hemisphere_polarized( qsim* sim, sphoton* photon, unsigned num_photon, qdebug* dbg, unsigned polz )
0466 {
0467     unsigned idx = blockIdx.x*blockDim.x + threadIdx.x;
0468     if (idx >= num_photon) return;
0469 
0470     //printf("//_QSim_hemisphere_polarized idx %d num_photon %d polz %d \n", idx, num_photon, polz );
0471 
0472     RNG rng ;
0473     sim->rng->init(rng, 0, idx ) ;
0474 
0475 
0476     sctx ctx = {} ;
0477     ctx.idx = idx ;
0478     ctx.p = dbg->p ;
0479     ctx.prd = &dbg->prd ;
0480 
0481     bool inwards = true ;
0482 
0483     //sim->hemisphere_polarized( p, polz, inwards,  prd, rng, tagr );
0484     sim->hemisphere_polarized( polz, inwards, rng, ctx );
0485 
0486     photon[idx] = ctx.p ;
0487 }
0488 
0489 
0490 /**
0491 _QSim_reflect_generate
0492 -----------------------
0493 
0494 ::
0495 
0496     q0 : initial q1 (mom, ?)
0497     q1 : final q1   (mom. ?)
0498     q2 : final q2   (pol, wl)
0499     q3 : initial q2 (pol, wl)
0500 
0501 
0502 **/
0503 
0504 
0505 __global__ void _QSim_reflect_generate( qsim* sim, sphoton* photon, unsigned num_photon, qdebug* dbg, unsigned type )
0506 {
0507     unsigned idx = blockIdx.x*blockDim.x + threadIdx.x;
0508     if (idx >= num_photon) return;
0509 
0510 
0511     RNG rng ;
0512     sim->rng->init(rng, 0, idx ) ;
0513 
0514 
0515     sctx ctx = {} ;
0516 
0517     ctx.idx = idx ;
0518     ctx.prd = &dbg->prd ;
0519     ctx.p = dbg->p ;
0520 
0521     quad4& q = (quad4&)ctx.p ;
0522     q.q0.f = q.q1.f ;   // non-standard record initial q1 (mom) into q0 and initial q2 (pol,wl) into q3
0523     q.q3.f = q.q2.f ;
0524 
0525     float u_decision_burn = curand_uniform(&rng);   // aligns consumption
0526     //printf("//_QSim_reflect_generate id %d u_decision_burn %10.4f \n", id, u_decision_burn );
0527 
0528     switch(type)
0529     {
0530         case REFLECT_DIFFUSE:   sim->reflect_diffuse(  rng, ctx) ;  break ;
0531         case REFLECT_SPECULAR:  sim->reflect_specular( rng, ctx) ;  break ;
0532     }
0533     photon[idx] = ctx.p ;
0534 }
0535 
0536 
0537 
0538 __global__ void _QSim_quad_launch( qsim* sim, quad* q, unsigned num_quad, qdebug* dbg, unsigned type )
0539 {
0540     unsigned idx = blockIdx.x*blockDim.x + threadIdx.x;
0541     if (idx >= num_quad ) return;
0542 
0543 
0544     RNG rng ;
0545     sim->rng->init(rng, 0, idx ) ;
0546 
0547 
0548     sctx ctx = {} ;
0549     ctx.idx = idx ;
0550 
0551     float3* v3 = (float3*)&q[idx].f.x ;
0552 
0553 
0554     if( type == QGEN_LAMBERTIAN_DIRECTION )
0555     {
0556         sim->lambertian_direction( v3, &dbg->normal, dbg->orient, rng, ctx );
0557     }
0558     else if( type == QGEN_RANDOM_DIRECTION_MARSAGLIA )
0559     {
0560         sim->random_direction_marsaglia( v3, rng, ctx );
0561     }
0562     else if( type == QGEN_SMEAR_NORMAL_SIGMA_ALPHA )
0563     {
0564         sim->SmearNormal_SigmaAlpha( rng, v3, &dbg->direction, &dbg->normal, dbg->value , ctx );
0565     }
0566     else if( type == QGEN_SMEAR_NORMAL_POLISH )
0567     {
0568         sim->SmearNormal_Polish(     rng, v3, &dbg->direction, &dbg->normal, dbg->value , ctx );
0569     }
0570 
0571 
0572     q[idx].u.w = idx ;
0573 }
0574 
0575 
0576 
0577 
0578 
0579 
0580 
0581 
0582 
0583 extern void QSim_quad_launch(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, quad* q, unsigned num_quad, qdebug* dbg, unsigned type  )
0584 {
0585 
0586     const char* name = QSimLaunch::Name(type) ;
0587     printf("//QSim_quad_launch sim %p quad %p num_quad %d dbg %p type %d name %s \n", sim, q, num_quad, dbg, type, name );
0588 
0589     assert( type == QGEN_RANDOM_DIRECTION_MARSAGLIA ||
0590             type == QGEN_LAMBERTIAN_DIRECTION       ||
0591             type == QGEN_SMEAR_NORMAL_SIGMA_ALPHA   ||
0592             type == QGEN_SMEAR_NORMAL_POLISH   ) ;
0593 
0594     _QSim_quad_launch<<<numBlocks,threadsPerBlock>>>( sim, q, num_quad, dbg, type ) ;
0595 }
0596 
0597 /**
0598 QSim_photon_launch
0599 --------------------
0600 
0601 Invoked from QSim::photon_launch_mutate all pointer args are on device.
0602 
0603 **/
0604 
0605 extern void QSim_photon_launch(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, sphoton* photon, unsigned num_photon, qdebug* dbg, unsigned type  )
0606 {
0607     const char* name = QSimLaunch::Name(type) ;
0608     printf("//QSim_photon_launch sim %p photon %p num_photon %d dbg %p type %d name %s \n", sim, photon, num_photon, dbg, type, name );
0609     switch(type)
0610     {
0611         case PROPAGATE_TO_BOUNDARY:
0612                                      _QSim_propagate_to_boundary<<<numBlocks,threadsPerBlock>>>(  sim, photon, num_photon, dbg  )   ; break ;
0613 
0614         case RAYLEIGH_SCATTER_ALIGN:
0615                                      _QSim_rayleigh_scatter_align<<<numBlocks,threadsPerBlock>>>( sim, photon, num_photon, dbg  )   ; break ;
0616 
0617         case HEMISPHERE_S_POLARIZED:
0618         case HEMISPHERE_P_POLARIZED:
0619         case HEMISPHERE_X_POLARIZED:
0620                                      _QSim_hemisphere_polarized<<<numBlocks,threadsPerBlock>>>(   sim, photon, num_photon, dbg, type - HEMISPHERE_S_POLARIZED  ) ; break ;
0621 
0622         case PROPAGATE_AT_BOUNDARY_S_POLARIZED:
0623         case PROPAGATE_AT_BOUNDARY_P_POLARIZED:
0624         case PROPAGATE_AT_BOUNDARY_X_POLARIZED:
0625                              _QSim_propagate_at_boundary_mutate<<<numBlocks,threadsPerBlock>>>(    sim, photon, num_photon, dbg  ) ; break ;
0626 
0627         case PROPAGATE_AT_MULTIFILM_S_POLARIZED:
0628         case PROPAGATE_AT_MULTIFILM_P_POLARIZED:
0629         case PROPAGATE_AT_MULTIFILM_X_POLARIZED:
0630                              _QSim_propagate_at_multifilm_mutate<<<numBlocks,threadsPerBlock>>>(    sim, photon, num_photon, dbg  ) ; break ;
0631 
0632 
0633 
0634         case PROPAGATE_AT_BOUNDARY:
0635         case PROPAGATE_AT_BOUNDARY_NORMAL_INCIDENCE:
0636                              _QSim_propagate_at_boundary_generate<<<numBlocks,threadsPerBlock>>>(  sim, photon, num_photon, dbg  )   ; break ;
0637 
0638 
0639         case REFLECT_DIFFUSE:
0640         case REFLECT_SPECULAR:
0641                             _QSim_reflect_generate<<<numBlocks,threadsPerBlock>>>( sim, photon, num_photon, dbg, type ) ; break ;
0642 
0643 
0644     }
0645 
0646     cudaDeviceSynchronize();
0647     printf("//QSim_photon_launch post launch cudaDeviceSynchronize \n");
0648 }
0649 
0650 
0651 #if !defined(PRODUCTION)
0652 /**
0653 _QSim_fake_propagate
0654 -----------------------
0655 
0656 TODO: compare performance using reference or pointer into global mem here rather than local stack copy
0657 
0658 **/
0659 
0660 __global__ void _QSim_fake_propagate( qsim* sim, quad2* prd )
0661 {
0662     sevent* evt = sim->evt ;
0663     int idx = blockIdx.x*blockDim.x + threadIdx.x;
0664     if (idx >= evt->num_photon ) return;
0665 
0666 #ifdef DEBUG_PIDX
0667     qbase* base = sim->base ;
0668     if( idx == base->pidx )
0669     printf("//_QSim_fake_propagate idx %d evt.num_photon %ld evt.max_record %d  \n", idx, evt->num_photon, evt->max_record );
0670 #endif
0671 
0672 
0673     RNG rng ;
0674     sim->rng->init(rng, 0, idx ) ;
0675 
0676 
0677     sphoton p = evt->photon[idx] ;
0678     p.index = idx;
0679 
0680     sim->fake_propagate( p, prd, rng, idx );
0681 
0682 }
0683 
0684 
0685 extern void QSim_fake_propagate_launch(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, quad2* prd )
0686 {
0687     _QSim_fake_propagate<<<numBlocks,threadsPerBlock>>>( sim, prd );
0688 }
0689 
0690 #endif
0691 
0692 __global__ void _QSim_boundary_lookup_all(qsim* sim, quad* lookup, unsigned width, unsigned height )
0693 {
0694     unsigned ix = blockIdx.x * blockDim.x + threadIdx.x;
0695     unsigned iy = blockIdx.y * blockDim.y + threadIdx.y;
0696     unsigned index = iy * width + ix ;
0697     if (ix >= width | iy >= height ) return;
0698 
0699     quad q ;
0700     q.f = sim->bnd->boundary_lookup( ix, iy );
0701     lookup[index] = q ;
0702 }
0703 
0704 extern void QSim_boundary_lookup_all(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, quad* lookup, unsigned width, unsigned height )
0705 {
0706     printf("//QSim_boundary_lookup width %d  height %d \n", width, height );
0707     _QSim_boundary_lookup_all<<<numBlocks,threadsPerBlock>>>( sim, lookup, width, height );
0708 }
0709 
0710 
0711 
0712 __global__ void _QSim_boundary_lookup_line(qsim* sim, quad* lookup, float* domain, unsigned num_lookup, unsigned line, unsigned k )
0713 {
0714     unsigned id = blockIdx.x*blockDim.x + threadIdx.x;
0715     if (id >= num_lookup) return;
0716     float wavelength = domain[id] ;
0717     quad q ;
0718     q.f = sim->bnd->boundary_lookup( wavelength, line, k );
0719     lookup[id] = q ;
0720 }
0721 
0722 
0723 extern void QSim_boundary_lookup_line(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, quad* lookup, float* domain, unsigned num_lookup, unsigned line, unsigned k )
0724 {
0725     printf("//QSim_boundary_lookup_line num_lookup %d line %d k %d  \n", num_lookup, line, k );
0726     _QSim_boundary_lookup_line<<<numBlocks,threadsPerBlock>>>( sim, lookup, domain, num_lookup, line, k );
0727 }
0728 
0729 
0730 
0731 template <typename T>
0732 __global__ void _QSim_prop_lookup(qsim* sim, T* lookup, const T* domain, unsigned domain_width, unsigned* pids, unsigned num_pids )
0733 {
0734     unsigned ix = blockIdx.x * blockDim.x + threadIdx.x;
0735     unsigned iy = blockIdx.y * blockDim.y + threadIdx.y;
0736     if (ix >= domain_width || iy >= num_pids  ) return;
0737 
0738     T x = domain[ix] ;
0739     unsigned pid = pids[iy] ;
0740 
0741     T y = sim->cerenkov->prop->interpolate( pid, x );
0742     lookup[iy*domain_width + ix] = y ;
0743 }
0744 
0745 template <typename T>
0746 extern void QSim_prop_lookup( dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, T* lookup, const T* domain, unsigned domain_width, unsigned* pids, unsigned num_pids )
0747 {
0748     printf("//QSim_prop_lookup domain_width %d num_pids %d  \n", domain_width, num_pids );
0749     _QSim_prop_lookup<<<numBlocks,threadsPerBlock>>>( sim, lookup, domain, domain_width, pids, num_pids );
0750 }
0751 
0752 
0753 template void QSim_prop_lookup(dim3, dim3, qsim*, double*, double const*, unsigned, unsigned*, unsigned) ;
0754 template void QSim_prop_lookup(dim3, dim3, qsim*,  float*,  float const*, unsigned, unsigned*, unsigned ) ;
0755 
0756 
0757 
0758 
0759 /**
0760 ipid
0761    output index for that pid, which will usually differ from the index of the pid
0762 **/
0763 
0764 template <typename T>
0765 __global__ void _QSim_prop_lookup_one(qsim* sim, T* lookup, const T* domain, unsigned domain_width, unsigned num_pids, unsigned pid, unsigned ipid )
0766 {
0767     unsigned ix = blockIdx.x * blockDim.x + threadIdx.x;
0768     if (ix >= domain_width || pid >= num_pids  ) return;
0769 
0770     T x = domain[ix] ;
0771     T y = sim->cerenkov->prop->interpolate( pid, x );
0772 
0773     lookup[ipid*domain_width + ix] = y ;
0774 }
0775 
0776 template <typename T>
0777 extern  void QSim_prop_lookup_one(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, T* lookup, const T* domain, unsigned domain_width, unsigned num_pids, unsigned pid, unsigned ipid )
0778 {
0779     printf("//QSim_prop_lookup_one domain_width %d num_pids %d pid %d ipid %d \n", domain_width, num_pids, pid, ipid );
0780     _QSim_prop_lookup_one<<<numBlocks,threadsPerBlock>>>( sim, lookup, domain, domain_width, num_pids, pid, ipid );
0781 }
0782 
0783 template void QSim_prop_lookup_one(dim3, dim3, qsim*, double*, const double*, unsigned, unsigned, unsigned, unsigned ) ;
0784 template void QSim_prop_lookup_one(dim3, dim3, qsim*, float*, const float*, unsigned, unsigned, unsigned, unsigned ) ;
0785 
0786 
0787 
0788 __global__ void _QSim_multifilm_lookup_all(qsim* sim, quad2* sample,  quad2* result,  unsigned width, unsigned height )
0789 {
0790     unsigned ix = blockIdx.x * blockDim.x + threadIdx.x;
0791     unsigned iy = blockIdx.y * blockDim.y + threadIdx.y;
0792     unsigned index = iy * width + ix ;
0793     if (ix >= width | iy >= height ) return;
0794 
0795     unsigned pmtType = sample[index].q0.u.x;
0796 
0797     float    wv      = sample[index].q0.f.y;
0798     float    aoi     = sample[index].q0.f.z;
0799 
0800     float4 res = sim->multifilm_lookup( pmtType , wv, aoi );
0801 
0802     result[index].q0.u.x = pmtType;
0803     result[index].q0.f.y = wv ;
0804     result[index].q0.f.z = aoi;
0805 
0806     result[index].q1.f.x = res.x;
0807     result[index].q1.f.y = res.y;
0808     result[index].q1.f.z = res.z;
0809     result[index].q1.f.w = res.w;
0810 
0811     if(index < 100)
0812     {printf( "//index %d res.x %10.4f res.y %10.4f res.z %10.4f res.w %10.4f sample.x %10.4f sample.y %10.4f sample.z %10.4f sample.w %10.4f\n ",index,  res.x, res.y, res.z, res.w,  sample[index].q1.f.x, sample[index].q1.f.y, sample[index].q1.f.z, sample[index].q1.f.w);
0813      }
0814 
0815 }
0816 
0817 
0818 extern void QSim_multifilm_lookup_all(dim3 numBlocks, dim3 threadsPerBlock, qsim* sim, quad2* sample, quad2* result,  unsigned width, unsigned height )
0819 {
0820     printf("//QSim_multifilm_lookup width %d  height %d \n", width, height );
0821     _QSim_multifilm_lookup_all<<<numBlocks,threadsPerBlock>>>( sim, sample,result , width, height );
0822 }
0823 
0824 
0825 
0826 
0827 
0828