Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:01

0001 /**
0002 CSGOptiX7.cu
0003 ===================
0004 
0005 NB: ONLY CODE THAT MUST BE HERE DUE TO OPTIX DEPENDENCY SHOULD BE HERE
0006 everything else should be located elsewhere : mostly in qudarap: sevent, qsim
0007 or the sysrap basis types sphoton quad4 quad2 etc.. where the code is reusable
0008 and more easily tested.
0009 
0010 Functions
0011 -----------
0012 
0013 trace
0014     populate quad2 prd by call to optixTrace
0015 
0016 make_color
0017     minimal normal "shader"
0018 
0019 render
0020     raygen function : calling trace and "shading" pixels
0021 
0022 simulate
0023     raygen function : qsim::generate_photon, bounce while loop, qsim::propagate
0024 
0025     * ifndef PRODUCTION sctx::trace sctx::point record the propagation point-by-point
0026 
0027 simtrace
0028     raygen function : qsim.h generate_photon_simtrace, trace, sevent::add_simtrace
0029 
0030 __raygen__rg
0031     calls one of the above raygen functions depending on params.raygenmode
0032 
0033 setPayload
0034     mechanics of communication when not using WITH_PRD
0035 
0036 __miss_ms
0037     default quad2 prd OR payload for rays that miss
0038 
0039 __closesthit__ch
0040     populate quad2 prd OR payload for rays that intersect
0041 
0042 __intersection__is
0043     converts OptiX HitGroupData into corresponding CSGNode and calls intersect_prim
0044     giving float4 isect: (normal_at_intersect, distance)
0045 
0046 **/
0047 
0048 #include <optix.h>
0049 
0050 #include "SRG.h"
0051 #include "scuda.h"
0052 #include "squad.h"
0053 #include "sqat4.h"
0054 #include "sphoton.h"
0055 #include "sphotonlite.h"
0056 #include "scerenkov.h"
0057 #include "sstate.h"
0058 
0059 #ifndef PRODUCTION
0060 #include "stag.h"
0061 #include "sseq.h"
0062 #include "srec.h"
0063 #endif
0064 
0065 #include "sevent.h"
0066 #include "sctx.h"
0067 
0068 // simulation
0069 #include <curand_kernel.h>
0070 
0071 #include "qrng.h"
0072 #include "qsim.h"
0073 
0074 #include "csg_intersect_leaf.h"
0075 #include "csg_intersect_node.h"
0076 #include "csg_intersect_tree.h"
0077 
0078 #include "Binding.h"
0079 #include "Params.h"
0080 
0081 #ifdef WITH_PRD
0082 #include "scuda_pointer.h"
0083 #include "SOPTIX_getPRD.h"
0084 #endif
0085 
0086 extern "C" { __constant__ Params params ;  }
0087 
0088 /**
0089 trace : pure function, with no use of params, everything via args
0090 -------------------------------------------------------------------
0091 
0092 refine:false
0093     does single optixTrace
0094 
0095 refine:true
0096     does a second optixTrace if 99 percent of the distance returned by the
0097     first optixTrace exceeds the refine_distance argument.
0098     This attempts to improve the precision of long distance intersects
0099     by doing a 2nd closer intersect.
0100 
0101 
0102 Outcome of trace is to populate *prd* by payload and attribute passing.
0103 When WITH_PRD macro is defined only 2 32-bit payload values are used to
0104 pass the 64-bit  pointer, otherwise more payload and attributes values
0105 are used to pass the contents IS->CH->RG.
0106 
0107 See __closesthit__ch to see where the payload p0-p7 comes from.
0108 **/
0109 
0110 template<bool refine>
0111 static __forceinline__ __device__ void trace(
0112         OptixTraversableHandle handle,
0113         float3                 ray_origin,
0114         float3                 ray_direction,
0115         float                  tmin,
0116         float                  tmax,
0117         quad2*                 prd,
0118         unsigned               visibilityMask,
0119         float                  refine_distance
0120         )
0121 {
0122     const float rayTime = 0.0f ;
0123     OptixRayFlags rayFlags = OPTIX_RAY_FLAG_DISABLE_ANYHIT ;   // OPTIX_RAY_FLAG_NONE
0124     const unsigned SBToffset = 0u ;
0125     const unsigned SBTstride = 1u ;
0126     const unsigned missSBTIndex = 0u ;
0127 #ifdef WITH_PRD
0128     uint32_t p0, p1 ;
0129     packPointer( prd, p0, p1 ); // scuda_pointer.h : pack prd addr from RG program into two uint32_t passed as payload
0130 
0131     if(!refine)
0132     {
0133         optixTrace(
0134             handle,
0135             ray_origin,
0136             ray_direction,
0137             tmin,
0138             tmax,
0139             rayTime,
0140             visibilityMask,
0141             rayFlags,
0142             SBToffset,
0143             SBTstride,
0144             missSBTIndex,
0145             p0, p1
0146             );
0147     }
0148     else
0149     {
0150         optixTrace(
0151             handle,
0152             ray_origin,
0153             ray_direction,
0154             tmin,
0155             tmax,
0156             rayTime,
0157             visibilityMask,
0158             rayFlags,
0159             SBToffset,
0160             SBTstride,
0161             missSBTIndex,
0162             p0, p1
0163             );
0164 
0165         float t_approx = 0.99f*prd->distance() ;
0166         // decrease to avoid imprecise first intersect from
0167         // leading to giving a miss or other intersect the 2nd time
0168         if( t_approx > refine_distance )
0169         {
0170             float3 closer_ray_origin = ray_origin + t_approx*ray_direction ;
0171             optixTrace(
0172                 handle,
0173                 closer_ray_origin,
0174                 ray_direction,
0175                 tmin,
0176                 tmax,
0177                 rayTime,
0178                 visibilityMask,
0179                 rayFlags,
0180                 SBToffset,
0181                 SBTstride,
0182                 missSBTIndex,
0183                 p0, p1
0184                 );
0185 
0186             prd->distance_add( t_approx );
0187         }
0188     }
0189 
0190 #else
0191     uint32_t p0, p1, p2, p3, p4, p5, p6, p7  ;
0192     optixTrace(
0193             handle,
0194             ray_origin,
0195             ray_direction,
0196             tmin,
0197             tmax,
0198             rayTime,
0199             visibilityMask,
0200             rayFlags,
0201             SBToffset,
0202             SBTstride,
0203             missSBTIndex,
0204             p0, p1, p2, p3, p4, p5, p6, p7
0205             );
0206     // unclear where the uint_as_float CUDA device function is defined, seems CUDA intrinsic without header ?
0207     prd->q0.f.x = __uint_as_float( p0 );
0208     prd->q0.f.y = __uint_as_float( p1 );
0209     prd->q0.f.z = __uint_as_float( p2 );
0210     prd->q0.f.w = __uint_as_float( p3 );
0211     prd->set_identity(p4) ;
0212     prd->set_globalPrimIdx_boundary_(p5) ;
0213     prd->set_lposcost(__uint_as_float(p6)) ;   // trace.not-WITH_PRD
0214     prd->set_iindex(p7) ;
0215 #endif
0216 }
0217 
0218 //#if !defined(PRODUCTION) && defined(WITH_RENDER)
0219 #if defined(WITH_RENDER)
0220 
0221 __forceinline__ __device__ uchar4 make_normal_pixel( const float3& normal, float depth )  // pure
0222 {
0223     return make_uchar4(
0224             static_cast<uint8_t>( clamp( normal.x, 0.0f, 1.0f ) *255.0f ),
0225             static_cast<uint8_t>( clamp( normal.y, 0.0f, 1.0f ) *255.0f ),
0226             static_cast<uint8_t>( clamp( normal.z, 0.0f, 1.0f ) *255.0f ),
0227             static_cast<uint8_t>( clamp( depth   , 0.0f, 1.0f ) *255.0f )
0228             );
0229 }
0230 
0231 __forceinline__ __device__ uchar4 make_zdepth_pixel( float depth )  // pure
0232 {
0233     return make_uchar4(
0234             static_cast<uint8_t>( clamp( depth   , 0.0f, 1.0f ) *255.0f ),
0235             static_cast<uint8_t>( clamp( depth   , 0.0f, 1.0f ) *255.0f ),
0236             static_cast<uint8_t>( clamp( depth   , 0.0f, 1.0f ) *255.0f ),
0237             static_cast<uint8_t>( clamp( depth   , 0.0f, 1.0f ) *255.0f )
0238             );
0239 }
0240 
0241 
0242 
0243 /**
0244 render : non-pure, uses params for viewpoint inputs and pixels output
0245 -----------------------------------------------------------------------
0246 
0247 Bugs with normal(0.f,0.f,0.f) via normalizing yields diddled_normal(nan,nan,nan)
0248 which make_color manages to clamp to (0,0,0) black.
0249 
0250 **/
0251 
0252 static __forceinline__ __device__ void render( const uint3& idx, const uint3& dim, quad2* prd )
0253 {
0254 
0255 #if defined(DEBUG_PIDX)
0256     //if(idx.x == 10 && idx.y == 10) printf("//CSGOptiX7.cu:render idx(%d,%d,%d) dim(%d,%d,%d) \n", idx.x, idx.y, idx.z, dim.x, dim.y, dim.z );
0257 #endif
0258 
0259     float2 d = 2.0f * make_float2(
0260             static_cast<float>(idx.x)/static_cast<float>(dim.x),
0261             static_cast<float>(idx.y)/static_cast<float>(dim.y)
0262             ) - 1.0f;
0263 
0264 
0265     const unsigned cameratype = params.cameratype ;
0266     const float3 dxyUV = d.x * params.U + params.V * ( params.traceyflip ? -d.y : d.y ) ;
0267     const float3 origin    = cameratype == 0u ? params.eye                     : params.eye + dxyUV    ;
0268     const float3 direction = cameratype == 0u ? normalize( dxyUV + params.W )  : normalize( params.W ) ;
0269     //                           cameratype 0u:perspective,                    1u:orthographic
0270 
0271     trace<false>(
0272         params.handle,
0273         origin,
0274         direction,
0275         params.tmin,
0276         params.tmax,
0277         prd,
0278         params.vizmask,
0279         params.PropagateRefineDistance
0280     );
0281 
0282 #if defined(DEBUG_PIDX)
0283     //if(idx.x == 10 && idx.y == 10) printf("//CSGOptiX7.cu:render prd.distance(%7.3f)  prd.lposcost(%7.3f)  \n", prd->distance(), prd->lposcost()  );
0284 #endif
0285 
0286 
0287 
0288 
0289 
0290     const float3* normal = prd->normal();
0291 
0292 #if defined(DEBUG_PIDX)
0293     //if(idx.x == 10 && idx.y == 10) printf("//CSGOptiX7.cu:render normal(%7.3f,%7.3f,%7.3f)  \n", normal->x, normal->y, normal->z );
0294 #endif
0295 
0296     float3 diddled_normal = normalize(*normal)*0.5f + 0.5f ; // diddling lightens the render, with mid-grey "pedestal"
0297 
0298     float eye_z = -prd->distance()*dot(params.WNORM, direction) ;
0299     const float& A = params.ZPROJ.z ;
0300     const float& B = params.ZPROJ.w ;
0301     float zdepth = cameratype == 0u ? -(A + B/eye_z) : A*eye_z + B  ;  // cf SGLM::zdepth1
0302 
0303     if( prd->is_boundary_miss() ) zdepth = 0.999f ;
0304     // setting miss zdepth to 1.f give black miss pixels, 0.999f gives expected mid-grey from normal of (0.f,0.f,0.f)
0305     // previously with zdepth of zero for miss pixels found that OpenGL record rendering did not
0306     // appear infront of the grey miss pixels : because they were behind them (zdepth > 0.f ) presumably
0307 
0308     unsigned index = idx.y * params.width + idx.x ;
0309 
0310     if(params.pixels)
0311     {
0312 #if defined(DEBUG_PIDX)
0313         //if(idx.x == 10 && idx.y == 10) printf("//CSGOptiX7.cu:render/params.pixels diddled_normal(%7.3f,%7.3f,%7.3f)  \n", diddled_normal.x, diddled_normal.y, diddled_normal.z );
0314 #endif
0315         params.pixels[index] = params.rendertype == 0 ? make_normal_pixel( diddled_normal, zdepth ) : make_zdepth_pixel( zdepth ) ;
0316     }
0317     if(params.isect)
0318     {
0319         float3 position = origin + direction*prd->distance() ;
0320         params.isect[index]  = make_float4( position.x, position.y, position.z, __uint_as_float(prd->identity())) ;
0321     }
0322 
0323 
0324 }
0325 #endif
0326 
0327 
0328 #if defined(WITH_SIMULATE)
0329 
0330 /**
0331 simulate : uses params for input: gensteps, seeds and output photons
0332 ----------------------------------------------------------------------
0333 
0334 Contrast with the monolithic old way with OptiXRap/cu/generate.cu:generate
0335 
0336 This method aims to get as much as possible of its functionality from
0337 separately implemented and tested headers.
0338 
0339 The big thing that CSGOptiX provides is geometry intersection, only that must be here.
0340 Everything else should be implemented and tested elsewhere, mostly in QUDARap headers.
0341 
0342 Hence this "simulate" needs to act as a coordinator.
0343 Params take central role in enabling this:
0344 
0345 
0346 Params
0347 ~~~~~~~
0348 
0349 * CPU side params including qsim.h sevent.h pointers instanciated in CSGOptiX::CSGOptiX
0350   and populated by CSGOptiX::init methods before being uploaded by CSGOptiX::prepareParam
0351 
0352 COMPARE WITH qsim::mock_propagate
0353 
0354 
0355 
0356 Is cycling of the launch_idx.x (unsigned)idx possible here ? NOT YET
0357 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0358 
0359 * 0xffffffff/1e9 = 4.29 billion photons in one launch would need VRAM of 275 GB::
0360 
0361     In [102]: (0xffffffff*4*4*4)/1e9
0362     Out[102]: 274.87790688   # 275 GB of photons in one launch ?
0363 
0364 
0365 * suggests should limit max_slot to 0xffffffff (4.29 billion, on top of VRAM requirements)
0366 * current heuristic limit of 250M photons when limited by 32 GB VRAM
0367 
0368 * NOTE THE ABSOLUTE photon_idx IS LIKELY BEING CYCLED HOWEVER
0369 
0370 
0371 tmin/tmin0 avoiding light leakage "tunnelling"
0372 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0373 
0374 The ray trace tmin is used to prevent rays with origin "on" a surface (ie very close to it)
0375 from intersecting with the surface when trying to escape from it.
0376 For example on reflecting from a surface multiple intersects can be avoided
0377 with a suitably chosed tmin. Using the same tmin for all steps of the photon is simple
0378 but can have undesirable consequences. For example when generation by scintillation/cerenkov/torch
0379 or scatter/reemission happens within tmin of a surface the photon might "tunnel"
0380 through the surface. Globally reducing tmin would reduce the probability of tunneling but
0381 would cause repeated intersection issues for rays starting "on" surfaces.
0382 
0383 To avoid this dilemma a second tmin0 and the photon step flags that cause
0384 tmin0 to be used for the next ray trace can be configured.
0385 As shown in the below table the default photon step flags that cause tmin0
0386 to be used for the next ray trace are TO,CK,SI,SC,RE which
0387 are all things (generation, scattering, reemission) that mostly happen
0388 in the bulk away from boundaries where there is less concern regarding
0389 self intersection. As self-intersection is not so much of a concern tmin0
0390 might be set to zero. However, there could then be a problem with self intersection
0391 in cases where these things happen within close to surfaces.
0392 
0393 +----------------------------------+-----------------------------------------+-------------------+
0394 |  envvar                          |  method                                 | default           |
0395 +==================================+=========================================+===================+
0396 | OPTICKS_PROPAGATE_EPSILON        |  SEventConfig::PropagateEpsilon()       |  0.05f            |
0397 +----------------------------------+-----------------------------------------+-------------------+
0398 | OPTICKS_PROPAGATE_EPSILON0       |  SEventConfig::PropagateEpsilon0()      |  0.05f            |
0399 +----------------------------------+-----------------------------------------+-------------------+
0400 | OPTICKS_PROPAGATE_EPSILON0_MASK  |  SEventConfig::PropagateEpsilon0Mask()  |  TO,CK,SI,SC,RE   |
0401 +----------------------------------+-----------------------------------------+-------------------+
0402 
0403 **/
0404 
0405 static __forceinline__ __device__ void simulate( const uint3& launch_idx, const uint3& dim, quad2* prd )
0406 {
0407     sevent* evt = params.evt ;
0408     if (launch_idx.x >= evt->num_seed) return;   // was evt->num_photon
0409 
0410     unsigned idx = launch_idx.x ;
0411     unsigned genstep_idx = evt->seed[idx] ;
0412     const quad6& gs = evt->genstep[genstep_idx] ;
0413     // genstep needs the raw index, from zero for each genstep slice sub-launch
0414 
0415     unsigned long long photon_idx = params.photon_slot_offset + idx ;
0416     // 2025/10/20 change from unsigned to avoid clocking photon_idx and duplicating
0417     //
0418     // rng_state access and array recording needs the absolute photon_idx
0419     // for multi-launch and single-launch simulation to match.
0420     // The offset hides the technicality of the multi-launch from output.
0421 
0422     qsim* sim = params.sim ;
0423 
0424 //#define OLD_WITHOUT_SKIPAHEAD 1
0425 #ifdef OLD_WITHOUT_SKIPAHEAD
0426     RNG rng = sim->rngstate[photon_idx] ;
0427 #else
0428     RNG rng ;
0429     sim->rng->init( rng, sim->evt->index, photon_idx );
0430 #endif
0431 
0432     sctx ctx = {} ;
0433     ctx.evt = evt ;   // sevent.h
0434     ctx.prd = prd ;   // squad.h quad2
0435 
0436     ctx.idx = idx ;
0437     ctx.pidx = photon_idx ;
0438 
0439 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
0440     ctx.pidx_debug = sim->base->pidx == photon_idx ;
0441 #endif
0442 
0443     sim->generate_photon(ctx.p, rng, gs, photon_idx, genstep_idx );
0444 
0445     int command = START ;
0446     int bounce = 0 ;
0447 #ifndef PRODUCTION
0448     ctx.point(bounce);
0449 #endif
0450     while( bounce < evt->max_bounce && ctx.p.time < params.max_time )
0451     {
0452         float tmin = ( ctx.p.orient_boundary_flag & params.PropagateEpsilon0Mask ) ? params.tmin0 : params.tmin ;
0453 
0454         // intersect query filling (quad2)prd
0455         switch(params.PropagateRefine)
0456         {
0457             case 0u: trace<false>( params.handle, ctx.p.pos, ctx.p.mom, tmin, params.tmax, prd, params.vizmask, params.PropagateRefineDistance );  break ;
0458             case 1u: trace<true>(  params.handle, ctx.p.pos, ctx.p.mom, tmin, params.tmax, prd, params.vizmask, params.PropagateRefineDistance );  break ;
0459         }
0460 
0461         if( prd->boundary() == 0xffffu ) break ; // SHOULD ONLY HAPPEN FOR PHOTONS STARTING OUTSIDE WORLD
0462         // propagate can do nothing meaningful without a boundary
0463 
0464         // HMM: normalize here or within CSG ? Actually only needed for
0465         // geometry with active scaling, such as ellipsoid.
0466         // TODO: move this so its only done when needed
0467         //     ~/o/notes/issues/CSGOptiX_simulate_avoid_normalizing_every_normal.rst
0468         //
0469 
0470         float3* normal = prd->normal();
0471         *normal = normalize(*normal);
0472 
0473 #ifndef PRODUCTION
0474         ctx.trace(bounce);
0475 #endif
0476         command = sim->propagate(bounce, rng, ctx);
0477         bounce++;
0478 #ifndef PRODUCTION
0479         ctx.point(bounce) ;
0480 #endif
0481         if(command == BREAK) break ;
0482     }
0483 #ifndef PRODUCTION
0484     ctx.end();  // write seq, tag, flat
0485 #endif
0486 
0487 
0488     if( evt->photon )
0489     {
0490         evt->photon[idx] = ctx.p ;  // *idx* (not *photon_idx*) as needs to go from zero for photons from a slice of genstep array
0491     }
0492 
0493     if( evt->photonlite )
0494     {
0495         sphotonlite l ;
0496         l.init( ctx.p.identity, ctx.p.time, ctx.p.flagmask );
0497         l.set_lpos(prd->lposcost(), prd->lposfphi() );
0498         evt->photonlite[idx] = l ;  // *idx* (not *photon_idx*) as needs to go from zero for photons from a slice of genstep array
0499     }
0500 
0501 
0502 
0503 }
0504 
0505 #endif
0506 
0507 
0508 //#if !defined(PRODUCTION) && defined(WITH_SIMTRACE)
0509 #if defined(WITH_SIMTRACE)
0510 
0511 /**
0512 simtrace
0513 ----------
0514 
0515 Used for making 2D cross section views of geometry intersects
0516 
0517 Note how seeding is still needed here despite the highly artificial
0518 nature of the center-extent grid of gensteps as the threads of the launch
0519 still needs to access different gensteps across the grid.
0520 
0521 TODO: Compose frames of pixels, isect and "fphoton" within the cegs window
0522 using the positions of the intersect "photons".
0523 Note that multiple threads may be writing to the same pixel
0524 that is apparently not a problem, just which does it is uncontrolled.
0525 
0526 unsigned index = iz * params.width + ix ;
0527 if( index > 0 )
0528 {
0529     params.pixels[index] = make_uchar4( 255u, 0u, 0u, 255u) ;
0530     params.isect[index] = make_float4( ipos.x, ipos.y, ipos.z, uint_as_float(identity)) ;
0531     params.fphoton[index] = p ;
0532 }
0533 **/
0534 
0535 
0536 static __forceinline__ __device__ void simtrace( const uint3& launch_idx, const uint3& dim, quad2* prd )
0537 {
0538     unsigned idx = launch_idx.x ;
0539     sevent* evt  = params.evt ;
0540     if (idx >= evt->num_simtrace) return;    // num_slot for multi launch simtrace ?
0541 
0542     unsigned genstep_idx = evt->seed[idx] ;
0543     unsigned photon_idx  = params.photon_slot_offset + idx ;
0544     // photon_idx same as idx for first launch, offset beyond first for multi-launch
0545 
0546 #if defined(DEBUG_PIDX)
0547     if(photon_idx == 0) printf("//CSGOptiX7.cu : simtrace idx %d photon_idx %d  genstep_idx %d evt->num_simtrace %ld \n", idx, photon_idx, genstep_idx, evt->num_simtrace );
0548 #endif
0549 
0550     const quad6& gs = evt->genstep[genstep_idx] ;
0551 
0552     qsim* sim = params.sim ;
0553     RNG rng ;
0554     sim->rng->init(rng, 0, photon_idx) ;
0555 
0556     quad4 p ;
0557     sim->generate_photon_simtrace(p, rng, gs, photon_idx, genstep_idx );
0558 
0559 
0560     // HUH: this is not the layout of sevent::add_simtrace
0561     const float3& pos = (const float3&)p.q0.f  ;
0562     const float3& mom = (const float3&)p.q1.f ;
0563 
0564 
0565 #if defined(DEBUG_PIDX)
0566     if(photon_idx == 0) printf("//CSGOptiX7.cu : simtrace idx %d pos.xyz %7.3f,%7.3f,%7.3f mom.xyz %7.3f,%7.3f,%7.3f  \n", idx, pos.x, pos.y, pos.z, mom.x, mom.y, mom.z );
0567 #endif
0568 
0569     switch(params.PropagateRefine)
0570     {
0571         case 0u: trace<false>( params.handle, pos, mom, params.tmin, params.tmax, prd, params.vizmask, params.PropagateRefineDistance );  break ;
0572         case 1u: trace<true>(  params.handle, pos, mom, params.tmin, params.tmax, prd, params.vizmask, params.PropagateRefineDistance );  break ;
0573     }
0574 
0575     evt->add_simtrace( idx, p, prd, params.tmin );  // sevent
0576     // not photon_idx, needs to go from zero for photons from a slice of genstep array
0577 }
0578 #endif
0579 
0580 /**
0581 for angular efficiency need intersection point in object frame to get the angles
0582 **/
0583 
0584 extern "C" __global__ void __raygen__rg_dummy()
0585 {
0586 }
0587 
0588 extern "C" __global__ void __raygen__rg()
0589 {
0590     const uint3 idx = optixGetLaunchIndex();
0591     const uint3 dim = optixGetLaunchDimensions();
0592 
0593     //bool midpix = idx.x == dim.x/2 && idx.y == dim.y/2 && idx.z == dim.z/2 ;
0594     //if(midpix) printf("//__raygen_rg.midpix params.raygenmode %d  \n", params.raygenmode);
0595 
0596     quad2 prd ;
0597     prd.zero();
0598 
0599 
0600     switch( params.raygenmode )
0601     {
0602 
0603 #ifdef WITH_SIMULATE
0604         case SRG_SIMULATE:  simulate( idx, dim, &prd ) ; break ;
0605 #endif
0606 #ifdef WITH_RENDER
0607         case SRG_RENDER:    render(   idx, dim, &prd ) ; break ;
0608 #endif
0609 #ifdef WITH_SIMTRACE
0610         case SRG_SIMTRACE:  simtrace( idx, dim, &prd ) ; break ;
0611 #endif
0612     }
0613 
0614 }
0615 
0616 
0617 #ifdef WITH_PRD
0618 #else
0619 /**
0620 *setPayload* is used from __closesthit__ and __miss__ providing communication to __raygen__ optixTrace call
0621 
0622 NB THESE QWN NEED NOT BE THE SAME AS THE ATTRIB USED TO COMMUNICATE BETWEEN __intersection__is and __closesthit__ch
0623 
0624 **/
0625 static __forceinline__ __device__ void setPayload(
0626      float    normal_x,        float normal_y,                  float normal_z, float distance,
0627      unsigned iindex_identity, unsigned globalPrimIdx_boundary, float lposcost, float lposfphi )
0628 {
0629     optixSetPayload_0( __float_as_uint( normal_x ) );
0630     optixSetPayload_1( __float_as_uint( normal_y ) );
0631     optixSetPayload_2( __float_as_uint( normal_z ) );
0632     optixSetPayload_3( __float_as_uint( distance ) );
0633     optixSetPayload_4( iindex_identity );
0634     optixSetPayload_5( globalPrimIdx_boundary );
0635     optixSetPayload_6( lposcost );
0636     optixSetPayload_7( lposfphi );
0637 
0638     // num_payload_values PIP::PIP must match the payload slots used up to maximum of 8
0639     // NB : payload is distinct from attributes
0640 }
0641 #endif
0642 
0643 /**
0644 __miss__ms
0645 -------------
0646 
0647 * missing "normal" is somewhat render specific and this is used for
0648   all raygenmode but Miss should never happen with real simulations
0649 * Miss can happen with simple geometry testing however when shoot
0650   rays from outside the "world"
0651 
0652 **/
0653 
0654 
0655 extern "C" __global__ void __miss__ms()
0656 {
0657     MissData* ms  = reinterpret_cast<MissData*>( optixGetSbtDataPointer() );
0658     const unsigned ii_id = 0xffffffffu ;
0659     const unsigned gp_bd = 0xffffffffu ;
0660     const float lposcost = 0.f ;
0661     const float lposfphi = 0.f ;
0662 
0663 #ifdef WITH_PRD
0664     quad2* prd = SOPTIX_getPRD<quad2>();
0665 
0666     prd->q0.f.x = ms->r ;
0667     prd->q0.f.y = ms->g ;
0668     prd->q0.f.z = ms->b ;
0669     prd->q0.f.w = 1.f ;     // attempt to allow rendering infront of miss pixels, but dont work
0670 
0671     prd->q1.u.x = 0u ;
0672     prd->q1.u.y = 0u ;
0673     prd->q1.u.z = 0u ;
0674     prd->q1.u.w = 0u ;
0675 
0676     prd->set_iindex_identity_(ii_id);
0677     prd->set_globalPrimIdx_boundary_(gp_bd);
0678     prd->set_lpos(lposcost, lposfphi);   // __miss__ms.WITH_PRD
0679 #else
0680     setPayload( ms->r, ms->g, ms->b, 0.f, ii_id, gp_bd, lposcost, lposfphi );  // communicate from ms->rg
0681 #endif
0682 }
0683 
0684 /**
0685 __closesthit__ch : pass attributes from __intersection__ into setPayload
0686 ============================================================================
0687 
0688 optixGetInstanceIndex (aka iindex)
0689     0-based index within IAS
0690 
0691 optixGetInstanceId (aka identity)
0692     user supplied instanceId,
0693     see IAS_Builder::Build, sysrap/sqat4.h sqat4::get_IAS_OptixInstance_instanceId
0694     from July 2023: carries sensor_identifier+1 as needed for QPMT
0695 
0696 optixGetPrimitiveIndex (aka prim_idx)
0697     (not currently propagated)
0698     local index of AABB within the GAS,
0699     see GAS_Builder::MakeCustomPrimitivesBI_11N  (1+index-of-CSGPrim within CSGSolid/GAS).
0700     Note that instanced solids adds little to the number of AABB,
0701     most come from unfortunate repeated usage of prims in the non-instanced global
0702     GAS with repeatIdx 0 (JUNO up to ~4000)
0703 
0704 optixGetRayTmax
0705     In intersection and CH returns the current smallest reported hitT or the tmax passed into rtTrace
0706     if no hit has been reported
0707 
0708 
0709 optixGetPrimitiveType
0710     OPTIX_PRIMITIVE_TYPE_CUSTOM   = 0x2500    ## 9472 : GET THIS
0711     OPTIX_PRIMITIVE_TYPE_TRIANGLE = 0x2531    ## 9521 : HUH:GETTING ZERO WHEN EXPECT THIS ?
0712 
0713 
0714 ana:CH
0715    intersect IS program populates most of the prd per-ray-data struct
0716    including the trace distance and local normal at intersect
0717    this CH program just adds instance info and transforms the normal
0718    from object to world space
0719 
0720 tri:CH
0721    builtin triangles have no user defined intersect program, so this tri:CH
0722    program must populate everything that the ana:IS and ana:CH does
0723 
0724 tri:boundary
0725 
0726 
0727 ana:normals
0728    Calculation done by each shape implementation, no other choice ?
0729 
0730 tri:normals
0731    Possibilities:
0732 
0733    1. normal from cross product of vertex positions,
0734       GPU float precision calc
0735 
0736    2. normal from barycentric weighted vertex normals
0737       (probably best for sphere/torus or similar with small triangles)
0738 
0739    3. pick normal of one of the vertices,
0740       profits from double precision vertex normal calculated
0741       ahead of time
0742       (probably best for cube or similar with large triangles)
0743 
0744    Which is best depends on the shape and how the input
0745    vertex normals are calculated.
0746 
0747 **/
0748 
0749 extern "C" __global__ void __closesthit__ch()
0750 {
0751     unsigned iindex = optixGetInstanceIndex() ;
0752     unsigned identity = optixGetInstanceId() ;
0753     unsigned iindex_identity = (( iindex & 0xffffu ) << 16 ) | ( identity & 0xffffu ) ;
0754 
0755     OptixPrimitiveType type = optixGetPrimitiveType(); // HUH: getting type 0, when expect OPTIX_PRIMITIVE_TYPE_TRIANGLE
0756     const HitGroupData* hg = reinterpret_cast<HitGroupData*>( optixGetSbtDataPointer() );
0757 
0758 #if defined(DEBUG_PIDX)
0759     //const uint3 idx = optixGetLaunchIndex();
0760     //if(idx.x == 10 && idx.y == 10) printf("//__closesthit__ch idx(%u,%u,%u) type %d \n", idx.x, idx.y, idx.z, type);
0761     //if(identity == 52264 || identity == 52265 || identity == 52266) printf("//__closesthit__ch iindex %u type %d identity %d \n", iindex, type, identity );
0762 #endif
0763 
0764     if(type == OPTIX_PRIMITIVE_TYPE_TRIANGLE || type == 0)  // WHY GETTING ZERO HERE ?
0765     {
0766         const TriMesh& mesh = hg->mesh ;
0767         unsigned globalPrimIdx_boundary = (( mesh.globalPrimIdx & 0xffffu ) << 16 ) | ( mesh.boundary & 0xffffu ) ;
0768         const unsigned prim_idx = optixGetPrimitiveIndex();
0769         const float2   barys    = optixGetTriangleBarycentrics();
0770 
0771         uint3 tri = mesh.indice[ prim_idx ];
0772         const float3 P0 = mesh.vertex[ tri.x ];
0773         const float3 P1 = mesh.vertex[ tri.y ];
0774         const float3 P2 = mesh.vertex[ tri.z ];
0775         const float3 P = ( 1.0f-barys.x-barys.y)*P0 + barys.x*P1 + barys.y*P2;
0776 
0777         //const float3 N0 = mesh.normal[ tri.x ];
0778         //const float3 N1 = mesh.normal[ tri.y ];
0779         //const float3 N2 = mesh.normal[ tri.z ];
0780         //const float3 Ng = ( 1.0f-barys.x-barys.y)*N0 + barys.x*N1 + barys.y*N2; // guesss
0781         // local normal from  bary-weighted vertex normals
0782 
0783         const float3 Ng = cross( P1-P0, P2-P0 );
0784         // local normal from cross product of vectors between vertices : HMM is winding order correct : TODO: check sense of normal
0785 
0786         const float3 N = normalize( optixTransformNormalFromObjectToWorldSpace( Ng ) );
0787 
0788         float t = optixGetRayTmax() ;
0789 
0790         // cannot get Object frame ray_origin/direction in CH (only IS,AH)
0791         //const float3 ray_origin = optixGetObjectRayOrigin();
0792         //const float3 ray_direction = optixGetObjectRayDirection();
0793         //const float3 lpos = ray_origin + t*ray_direction  ;
0794         // HMM: could use P to give the local position ?
0795 
0796         float lposcost = normalize_cost(P); // scuda.h  "cosTheta" z/len of local frame position
0797         float lposfphi = normalize_fphi(P);
0798 
0799 
0800 #ifdef WITH_PRD
0801         quad2* prd = SOPTIX_getPRD<quad2>();
0802 
0803         prd->q0.f.x = N.x ;
0804         prd->q0.f.y = N.y ;
0805         prd->q0.f.z = N.z ;
0806         prd->q0.f.w = t ;
0807 
0808         prd->set_iindex_identity_( iindex_identity ) ;
0809         prd->set_globalPrimIdx_boundary_(  globalPrimIdx_boundary ) ;
0810         prd->set_lpos(lposcost, lposfphi);   // __closesthit__ch.WITH_PRD.TRIANGLE
0811 
0812 #else
0813         setPayload( N.x, N.y, N.z, t, iindex_identity, globalPrimIdx_boundary, lposcost, lposfphi );  // communicate from ch->rg
0814 #endif
0815     }
0816     else if(type == OPTIX_PRIMITIVE_TYPE_CUSTOM)
0817     {
0818         //const CustomPrim& cpr = hg->prim ;
0819 #ifdef WITH_PRD
0820         quad2* prd = SOPTIX_getPRD<quad2>();
0821 
0822         prd->set_iindex_identity_( iindex_identity ) ;
0823 
0824         float3* normal = prd->normal();
0825         *normal = optixTransformNormalFromObjectToWorldSpace( *normal ) ;
0826 #else
0827 
0828         // NB SEE
0829         const float3 local_normal =    // geometry object frame normal at intersection point
0830             make_float3(
0831                     __uint_as_float( optixGetAttribute_0() ),
0832                     __uint_as_float( optixGetAttribute_1() ),
0833                     __uint_as_float( optixGetAttribute_2() )
0834                     );
0835 
0836         const float distance = __uint_as_float(  optixGetAttribute_3() ) ;
0837         unsigned globalPrimIdx_boundary = optixGetAttribute_4() ;
0838         const float lposcost = __uint_as_float( optixGetAttribute_5() ) ;
0839         const float lposfphi = 0.f ; // NOT IMPL WHEN NOT:WITH_PRD
0840 
0841         float3 normal = optixTransformNormalFromObjectToWorldSpace( local_normal ) ;
0842 
0843         setPayload( normal.x, normal.y, normal.z, distance, iindex_identity, globalPrimIdx_boundary, lposcost, lposfphi );  // communicate from ch->rg
0844                 //   p0       p1        p2        p3        p4               p5                      p6        p7
0845 #endif
0846     }
0847 }
0848 
0849 /**
0850 __intersection__is
0851 ----------------------
0852 
0853 HitGroupData provides the numNode and nodeOffset of the intersected CSGPrim.
0854 Which Prim gets intersected relies on the CSGPrim::setSbtIndexOffset
0855 
0856 Note that optixReportIntersection returns a bool, but that is
0857 only relevant when using anyHit as it provides a way to ignore hits.
0858 But Opticks does not use anyHit so the returned bool should
0859 always be true.
0860 
0861 The attributes passed into optixReportIntersection are
0862 available within the CH (and AH) programs.
0863 
0864 HMM: notice that HitGroupData::numNode is not used here, must be looking that up ?
0865 COULD: reduce HitGroupData to just the nodeOffset
0866 
0867 **/
0868 
0869 extern "C" __global__ void __intersection__is()
0870 {
0871 
0872 #if defined(DEBUG_PIDXYZ)
0873     const uint3 idx = optixGetLaunchIndex();
0874     const uint3 dim = optixGetLaunchDimensions();
0875     bool dumpxyz = idx.x == params.pidxyz.x && idx.y == params.pidxyz.y && idx.z == params.pidxyz.z ;
0876     //if(dumpxyz) printf("//__intersection__is  idx(%u,%u,%u) dim(%u,%u,%u) dumpxyz:%d \n", idx.x, idx.y, idx.z, dim.x, dim.y, dim.z, dumpxyz);
0877 #else
0878     bool dumpxyz = false ;
0879 #endif
0880 
0881 
0882     HitGroupData* hg  = (HitGroupData*)optixGetSbtDataPointer();
0883     int nodeOffset = hg->prim.nodeOffset ;
0884     int globalPrimIdx = hg->prim.globalPrimIdx ;
0885 
0886     const CSGNode* node = params.node + nodeOffset ;  // root of tree
0887     const float4* plan = params.plan ;
0888     const qat4*   itra = params.itra ;
0889 
0890     const float  t_min = optixGetRayTmin() ;
0891     const float3 ray_origin = optixGetObjectRayOrigin();
0892     const float3 ray_direction = optixGetObjectRayDirection();
0893 
0894     float4 isect ; // .xyz normal .w distance
0895     isect.x = 0.f ;
0896     isect.y = 0.f ;
0897     isect.z = 0.f ;
0898     isect.w = 0.f ;
0899 
0900     bool valid_isect = intersect_prim(isect, node, plan, itra, t_min , ray_origin, ray_direction, dumpxyz );
0901     if(valid_isect)
0902     {
0903         const float3 lpos = ray_origin + isect.w*ray_direction ;
0904         const float lposcost = normalize_cost(lpos);  // scuda.h cosTheta z/len of local(aka Object) frame position
0905         const float lposfphi = normalize_fphi(lpos);
0906 
0907         const unsigned hitKind = 0u ;     // only up to 127:0x7f : could use to customize how attributes interpreted
0908         const unsigned boundary = node->boundary() ;  // all CSGNode in the tree for one CSGPrim tree have same boundary
0909         const unsigned globalPrimIdx_boundary = (( globalPrimIdx & 0xffffu ) << 16 ) | ( boundary & 0xffffu ) ;
0910 
0911 #ifdef WITH_PRD
0912         if(optixReportIntersection( isect.w, hitKind))
0913         {
0914             quad2* prd = SOPTIX_getPRD<quad2>(); // access prd addr from RG program
0915             prd->q0.f = isect ;  // .w:distance and .xyz:normal which starts as the local frame one
0916             prd->set_globalPrimIdx_boundary_(globalPrimIdx_boundary) ;
0917             prd->set_lpos(lposcost, lposfphi);    // __intersection__is.WITH_PRD.CUSTOM
0918         }
0919 #else
0920        // TODO: REMOVE NOT:WITH_PRD
0921         unsigned a0, a1, a2, a3, a4, a5  ; // MUST CORRESPOND TO num_attribute_values in PIP::PIP
0922         a0 = __float_as_uint( isect.x );     // isect.xyz is object frame normal of geometry at intersection point
0923         a1 = __float_as_uint( isect.y );
0924         a2 = __float_as_uint( isect.z );
0925         a3 = __float_as_uint( isect.w ) ;
0926         a4 = globalPrimIdx_boundary ;
0927         a5 = __float_as_uint( lposcost );
0928         optixReportIntersection( isect.w, hitKind, a0, a1, a2, a3, a4, a5 );
0929 
0930         // IS:optixReportIntersection writes the attributes that can be read in CH and AH programs
0931         // max 8 attribute registers, see PIP::PIP, communicate to __closesthit__ch
0932 #endif
0933 
0934    }
0935 
0936 #if defined(DEBUG_PIDXYZ)
0937     //if(dumpxyz) printf("//__intersection__is  idx(%u,%u,%u) dim(%u,%u,%u) dumpxyz:%d valid_isect:%d\n", idx.x, idx.y, idx.z, dim.x, dim.y, dim.z, dumpxyz, valid_isect);
0938 #endif
0939 
0940 }
0941 // story begins with intersection