eic-opticks/sysrap/sctx.h

0001 #pragma once
0002 /**
0003 sctx.h : holding "thread local" state
0004 =========================================
0005
0006 Canonical usage from GPU and CPU:
0007
0008 1. CSGOptiX/CSGOptiX7.cu:simulate
0009 2. sysrap/SEvt::pointPhoton
0010
0011
0012 Q: why not keep such state in sevent.h *evt* ?
0013 A: this state must be "thread" local, whereas the evt instance
0014    is shared by all threads and always saves into (idx, bounce)
0015    slotted locations
0016
0017 This avoids non-production instrumentation costing anything
0018 in production by simply removing it from the context via the
0019 PRODUCTION macro.
0020
0021
0022 sctx::aux usually accessed via SEvt::current_aux
0023 -------------------------------------------------
0024
0025 TODO : document the current_aux content here
0026
0027 +----+
0028 |    |
0029 +====+
0030 | q0 |
0031 +----+
0032 | q1 |
0033 +----+
0034 | q2 |
0035 +----+
0036 | q3 |
0037 +----+
0038
0039 q2.i.z : formerly fakemask, now uc4packed from the spho label
0040 q2.i.w : st
0041
0042
0043 **/
0044
0045 #if defined(__CUDACC__) || defined(__CUDABE__)
0046 #    define SCTX_METHOD __device__ __forceinline__
0047 #else
0048 #    define SCTX_METHOD inline
0049 #endif
0050
0051
0052 #define WITH_SUP 1
0053
0054 struct sevent ;
0055 struct quad2 ;
0056 struct sphoton ;
0057 struct sstate ;
0058
0059 #ifndef PRODUCTION
0060 struct srec ;
0061 struct sseq ;
0062 struct stagr ;
0063 struct quad4 ;
0064 struct quad6 ;
0065 #endif
0066
0067 struct sctx
0068 {
0069     sevent* evt ;
0070     const quad2* prd ;
0071     unsigned idx ;    // local launch index : always zero based
0072     unsigned long long pidx ;   // absolute photon index : offset from zero for launches other than first
0073
0074 #if !defined(PRODUCTION) && defined(DEBUG_PIDX)
0075     bool pidx_debug ;
0076 #endif
0077
0078     sphoton p ;
0079     sstate  s ;
0080
0081 #ifndef PRODUCTION
0082     srec rec ;
0083     sseq seq ;
0084     stagr tagr ;
0085     quad4 aux ;
0086 #ifdef WITH_SUP
0087     quad6 sup ;
0088 #endif
0089     // NB these are heavy : important to test with and without PRODUCTION
0090     // as these are expected to be rather expensive
0091 #endif
0092
0093 #if defined(__CUDACC__) || defined(__CUDABE__)
0094 #else
0095     SCTX_METHOD void zero();
0096 #endif
0097
0098 #ifndef PRODUCTION
0099     SCTX_METHOD void point(int bounce);
0100     SCTX_METHOD void trace(int bounce);
0101     SCTX_METHOD void end();
0102 #endif
0103 };
0104
0105
0106 #if defined(__CUDACC__) || defined(__CUDABE__)
0107 #else
0108 SCTX_METHOD void sctx::zero(){ *this = {} ; }
0109 #endif
0110
0111
0112 #ifndef PRODUCTION
0113 /**
0114 sctx::point : copy current sphoton p into (idx,bounce) entries of evt->record/rec/seq/aux
0115 -------------------------------------------------------------------------------------------
0116
0117 Notice this is NOT writing into evt->photon, that is done at SEvt::finalPhoton
0118 The reason for this split is that photon arrays are always needed
0119 but the others record/rec/seq/aux are only used for debugging purposes.
0120
0121 IDEA/TODO: When wishing to examine simulation records in a very small region of geometry
0122 (to check clearance between surfaces for example) it is currently necessary to
0123 run with huge statistics, then transfer around all that data and then promptly
0124 not look at most of it in analysis.  This suggests implementing an optional
0125 recording bbox (presumably within the target frame) into which photon record
0126 points must be in order to be recorded. This way can run full simulation but
0127 only record the region of current interest.
0128
0129 HMM: to do that need to access target frame GPU side ? Or could transform the
0130 input bbox within target frame into a global frame bbox and use that ?
0131
0132 **/
0133
0134 SCTX_METHOD void sctx::point(int bounce)
0135 {
0136     if(evt->record && bounce < evt->max_record) evt->record[evt->max_record*idx+bounce] = p ;
0137     if(evt->rec    && bounce < evt->max_rec)    evt->add_rec( rec, idx, bounce, p );    // this copies into evt->rec array
0138     if(evt->seq)                                seq.add_nibble( bounce, p.flag(), p.boundary() );
0139     if(evt->aux    && bounce < evt->max_aux)    evt->aux[evt->max_aux*idx+bounce] = aux ;
0140 }
0141
0142
0143 /**
0144 sctx::trace : copy current prd into (idx,bounce) entry of evt->prd
0145 ---------------------------------------------------------------------
0146
0147 As *prd* is updated by *trace* rather than *propagate* it is handled separately to sctx:point.
0148 The *prd* corresponds to the arrows (and trace) that gets between the points, eg::
0149
0150    TO->BT->BT->SC->AB
0151
0152 **/
0153
0154 SCTX_METHOD void sctx::trace(int bounce)
0155 {
0156     if(evt->prd) evt->prd[evt->max_prd*idx+bounce] = *prd ;
0157 }
0158
0159 /**
0160 sctx::end : copy current seq into idx entry of evt->seq
0161 -----------------------------------------------------------
0162
0163 Q: did I forget rec ?
0164 A: No. rec+record are added bounce-by-bounce into evt->rec/record in sctx::point
0165
0166    * seq is different because it is added nibble by nibble into the big integer
0167
0168
0169 Q: why not copy p into evt->photon[idx] here ?
0170 A: unsure, currently thats done in SEvt::finalPhoton
0171
0172 **/
0173
0174 SCTX_METHOD void sctx::end()
0175 {
0176     if(evt->seq)  evt->seq[idx] = seq ;
0177 #ifdef WITH_SUP
0178     if(evt->sup)  evt->sup[idx] = sup ;
0179 #endif
0180 #ifdef DEBUG_TAG
0181     if(evt->tag)  evt->tag[idx]  = tagr.tag ;
0182     if(evt->flat) evt->flat[idx] = tagr.flat ;
0183 #endif
0184 }
0185
0186 #endif
0187