Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-10 07:49:40

0001 #include <cuda_runtime.h>
0002 #include <sstream>
0003 #include <csignal>
0004 
0005 
0006 
0007 #include "SEvt.hh"
0008 
0009 #include "scuda.h"
0010 #include "squad.h"
0011 
0012 #include "sphoton.h"
0013 #include "sphotonlite.h"
0014 
0015 #include "sslice.h"
0016 
0017 #ifndef PRODUCTION
0018 #include "srec.h"
0019 #include "sseq.h"
0020 #include "stag.h"
0021 #endif
0022 
0023 #include "sevent.h"
0024 #include "salloc.h"
0025 #include "sstamp.h"
0026 #include "ssys.h"
0027 
0028 #include "sqat4.h"
0029 #include "stran.h"
0030 
0031 #include "SU.hh"
0032 #include "SPM.hh"
0033 
0034 #include "SComp.h"
0035 #include "SGenstep.h"
0036 #include "SEvent.hh"
0037 #include "SEvt.hh"
0038 #include "SEventConfig.hh"
0039 #include "NP.hh"
0040 #include "SLOG.hh"
0041 
0042 #include "OpticksGenstep.h"
0043 
0044 #include "QEvt.hh"
0045 #include "QBuf.hh"
0046 #include "QBuf.hh"
0047 #include "QU.hh"
0048 
0049 
0050 template struct QBuf<quad6> ;
0051 
0052 bool QEvt::LIFECYCLE = ssys::getenvbool(QEvt__LIFECYCLE) ;
0053 
0054 const plog::Severity QEvt::LEVEL = SLOG::EnvLevel("QEvt", "DEBUG");
0055 QEvt* QEvt::INSTANCE = nullptr ;
0056 QEvt* QEvt::Get(){ return INSTANCE ; }
0057 
0058 const bool QEvt::SEvt_NPFold_VERBOSE  = ssys::getenvbool("QEvt__SEvt_NPFold_VERBOSE") ;
0059 
0060 std::string QEvt::Desc() // static
0061 {
0062     std::stringstream ss ;
0063     ss << "QEvt::Desc" << std::endl
0064        << " QEvt__SEvt_NPFold_VERBOSE     : " << ( SEvt_NPFold_VERBOSE     ? "YES" : "NO " ) << std::endl
0065        ;
0066 
0067     std::string str = ss.str();
0068     return str ;
0069 }
0070 
0071 
0072 
0073 sevent* QEvt::getDevicePtr() const
0074 {
0075     return d_evt ;
0076 }
0077 
0078 /**
0079 QEvt::QEvt
0080 ----------------
0081 
0082 Canonical QEvt instance resides within QSim and is instanciated by QSim::QSim.
0083 Instanciation allocates device buffers with sizes configured by SEventConfig
0084 
0085 
0086 Holds:
0087 
0088 * SEvt.hh:sev
0089 * sevent.h:evt
0090 * sevent.h:d_evt
0091 * NP.hh:gs
0092 * NP.hh:input_photon
0093 
0094 
0095 Q: Where is the SEvt::EGPU instanciated ?
0096 
0097 **/
0098 
0099 QEvt::QEvt()
0100     :
0101     sev(SEvt::Get_EGPU()),
0102     photon_selector(sev ? sev->photon_selector : nullptr),
0103     photonlite_selector(sev ? sev->photonlite_selector : nullptr),
0104     evt(sev ? sev->evt : nullptr),
0105     d_evt(QU::device_alloc<sevent>(1,"QEvt::QEvt/sevent")),
0106     gs(nullptr),
0107     gss(nullptr),
0108     input_photon(nullptr),
0109     upload_count(0)
0110 {
0111     LOG(LEVEL);
0112     LOG_IF(info, LIFECYCLE) ;
0113     INSTANCE = this ;
0114     init();
0115 }
0116 
0117 /**
0118 QEvt::init
0119 --------------
0120 
0121 Only configures limits, no allocation yet. Allocation happens in QEvt::setGenstep QEvt::setNumPhoton
0122 
0123 HMM: hostside sevent.h instance could reside in SEvt together with photon_selector then hostside setup
0124 can be common between the branches
0125 
0126 **/
0127 
0128 void QEvt::init()
0129 {
0130     LOG_IF(fatal, !sev) << "QEvt instanciated before SEvt instanciated : this is not going to fly " ;
0131 
0132     assert(sev);
0133     assert(evt);
0134     assert(photon_selector);
0135     assert(photonlite_selector);
0136 
0137     LOG(LEVEL) << " QEvt::init calling SEvt/setCompProvider " ;
0138     sev->setCompProvider(this);
0139 
0140     init_SEvt();
0141 }
0142 
0143 void QEvt::init_SEvt()
0144 {
0145     if(SEvt_NPFold_VERBOSE)
0146     {
0147         LOG(info) << " QEvt__SEvt_NPFold_VERBOSE : setting SEvt:setFoldVerbose " ;
0148         sev->setFoldVerbose(true);
0149     }
0150 }
0151 
0152 
0153 std::string QEvt::desc() const
0154 {
0155     std::stringstream ss ;
0156     ss << evt->desc() << std::endl ;
0157     std::string s = ss.str();
0158     return s ;
0159 }
0160 
0161 std::string QEvt::desc_alloc() const
0162 {
0163     salloc* alloc = QU::alloc ;
0164     std::stringstream ss ;
0165     ss << "[QEvt::desc_alloc " << std::endl ;
0166     ss << ( alloc ? "salloc::desc" : "NO-salloc" ) << std::endl ;
0167     ss << ( alloc ? alloc->desc() : "" ) << std::endl ;
0168     ss << "]QEvt::desc_alloc " << std::endl ;
0169     std::string s = ss.str();
0170     return s ;
0171 }
0172 
0173 
0174 
0175 /**
0176 QEvt::setGenstepUpload_NP
0177 ------------------------------
0178 
0179 Canonically invoked from QSim::simulate and QSim::simtrace just prior to cx->launch
0180 
0181 **/
0182 int QEvt::setGenstepUpload_NP(const NP* gs_ )
0183 {
0184     LOG_IF(info, SEvt::LIFECYCLE) << "[" ;
0185     int rc = setGenstepUpload_NP(gs_, nullptr );
0186     LOG_IF(info, SEvt::LIFECYCLE) << "]" ;
0187     return rc ;
0188 }
0189 
0190 /**
0191 QEvt::setGenstepUpload_NP
0192 -----------------------------
0193 
0194 Uploads all OR a slice of the gensteps
0195 
0196 **/
0197 
0198 
0199 int QEvt::setGenstepUpload_NP(const NP* gs_, const sslice* gss_ )
0200 {
0201     LOG_IF( fatal, gs_ == nullptr ) << " gs_ null " ;
0202     assert( gs_ );
0203 
0204     gs = gs_ ;
0205     gss = gss_ ? new sslice(*gss_) : nullptr ;
0206 
0207     SGenstep::Check(gs);
0208 
0209     LOG(LEVEL)
0210         << " gs " << ( gs ? gs->sstr() : "-" )
0211         << SGenstep::Desc(gs, 10)
0212         ;
0213 
0214     int64_t num_gs = gs ? gs->shape[0] : 0 ;
0215 
0216     int64_t gs_start = gss ? gss->gs_start : 0 ;
0217     int64_t gs_stop  = gss ? gss->gs_stop  : num_gs ;
0218 
0219     assert( gs_start >= 0 && gs_start <  num_gs );
0220     assert( gs_stop  >= 1 && gs_stop  <= num_gs );
0221 
0222     const char* data = gs ? gs->bytes() : nullptr ;
0223     const quad6* qq = (const quad6*)data ;
0224 
0225     int rc = setGenstepUpload(qq, gs_start, gs_stop );
0226 
0227     if(gss == nullptr) return rc ;
0228 
0229 
0230     bool gss_consistent = gss->ph_count == evt->num_photon ;
0231     LOG_IF(fatal, !gss_consistent )
0232         << " gss.desc " << gss->desc() << "\n"
0233         << " gss->ph_count " << gss->ph_count << "\n"
0234         << " evt->num_photon " << evt->num_photon << "\n"
0235         << " gss_consistent " << ( gss_consistent ? "YES" : "NO " ) << "\n"
0236         ;
0237 
0238     size_t last_rng_state_idx = gss->ph_offset + gss->ph_count ;
0239     bool in_range = last_rng_state_idx <= evt->max_curand ;
0240 
0241     LOG_IF(fatal, !in_range)
0242         << " gss.desc " << gss->desc() << "\n"
0243         << " gss->ph_offset " << gss->ph_offset << "\n"
0244         << " gss->ph_count " << gss->ph_count << "\n"
0245         << " gss->ph_offset + gss->ph_count " << last_rng_state_idx << "(last_rng_state_idx) must be <= max_curand for valid rng_state access\n"
0246         << " evt->max_curand " << evt->max_curand << "\n"
0247         << " evt->num_curand " << evt->num_curand << "\n"
0248         << " evt->max_slot " << evt->max_slot << "\n"
0249         ;
0250 
0251     assert( gss_consistent );
0252     assert( in_range );
0253 
0254     return rc ;
0255 }
0256 
0257 
0258 unsigned long long QEvt::get_photon_slot_offset() const
0259 {
0260     typedef unsigned long long ULL ;
0261     return gss ? ULL(gss->ph_offset) : 0ull ;   // (sslice)gss::ph_offset is int64_t
0262 }
0263 
0264 
0265 /**
0266 QEvt::clear
0267 --------------
0268 
0269 This is called from QSim::reset
0270 The former omission of gs deletion was reported by Ilker Parmaksiz.
0271 
0272 **/
0273 
0274 void QEvt::clear()
0275 {
0276     delete gs ;
0277     gs = nullptr ;
0278 }
0279 
0280 
0281 
0282 
0283 
0284 /**
0285 QEvt::setGenstepUpload
0286 ---------------------------
0287 
0288 Switch to quad6* arg to allow direct from vector upload,
0289 
0290 Recall that even with input photon running, still have gensteps.
0291 If the number of gensteps is zero there are no photons and no launch.
0292 
0293 
0294 1. if not already allocated QEvt::device_alloc_genstep_and_seed
0295    using configured sevent::max_genstep sevent::max_photon values
0296 
0297 2. QU::copy_host_to_device the sevent::num_genstep
0298    and setting pointer sevent::genstep
0299 
0300 3. QU::device_memset zeroing the seed buffer : this is needed
0301    for each launch, doing at initialization only is not sufficient.
0302    **This is a documented limitation of sysrap/iexpand.h**
0303 
0304 4. QEvt::count_genstep_photons_and_fill_seed_buffer
0305 
0306    * calculates the total number of seeds (and photons) on device
0307      by adding the photons from each genstep and setting evt->num_seed
0308 
0309    * populates seed buffer using num photons per genstep from genstep buffer,
0310      which is the way each photon thread refers back to its genstep
0311 
0312 5. setNumSimtrace/setInputPhoton/setNumPhoton which may allocate records
0313 
0314 
0315 **/
0316 
0317 int QEvt::setGenstepUpload(const quad6* qq0, int num_gs )
0318 {
0319     return setGenstepUpload(qq0, 0, num_gs );
0320 }
0321 
0322 /**
0323 QEvt::setGenstepUpload
0324 -------------------------
0325 
0326 HMM: evt->num_seed comes from summing the genstep photon counts
0327 
0328 
0329 **/
0330 
0331 
0332 int QEvt::setGenstepUpload(const quad6* qq0, int gs_start, int gs_stop )
0333 {
0334     const quad6* qq = qq0 + gs_start ;
0335 
0336 
0337     LOG_IF(info, SEvt::LIFECYCLE) << "[" ;
0338 #ifndef PRODUCTION
0339     sev->t_setGenstep_3 = sstamp::Now();
0340 #endif
0341 
0342     int num_genstep = gs_stop - gs_start ;
0343     bool zero_genstep = num_genstep == 0 ;
0344 
0345     evt->num_genstep = num_genstep ;
0346     bool not_allocated = evt->genstep == nullptr && evt->seed == nullptr ;
0347 
0348     LOG_IF(info, LIFECYCLE) << " not_allocated " << ( not_allocated ? "YES" : "NO" ) ;
0349 
0350     LOG(LEVEL)
0351         << " gs_start " << gs_start
0352         << " gs_stop " << gs_stop
0353         << " evt.num_genstep " << evt->num_genstep
0354         << " not_allocated " << ( not_allocated ? "YES" : "NO" )
0355         << " zero_genstep " << ( zero_genstep ? "YES" : "NO " )
0356         ;
0357 
0358     if(not_allocated)
0359     {
0360         LOG(LEVEL) << "[ device_alloc_genstep_and_seed " ;
0361         device_alloc_genstep_and_seed() ;
0362         LOG(LEVEL) << "] device_alloc_genstep_and_seed " ;
0363     }
0364 
0365 
0366     bool num_gs_allowed = evt->num_genstep <= evt->max_genstep ;
0367     LOG_IF(fatal, !num_gs_allowed) << " evt.num_genstep " << evt->num_genstep << " evt.max_genstep " << evt->max_genstep ;
0368     assert( num_gs_allowed );
0369 
0370 #ifndef PRODUCTION
0371     sev->t_setGenstep_4 = sstamp::Now();
0372 #endif
0373 
0374     if( qq != nullptr )
0375     {
0376         LOG(LEVEL) << "[ QU::copy_host_to_device " ;
0377         QU::copy_host_to_device<quad6>( evt->genstep, (quad6*)qq, evt->num_genstep );
0378         LOG(LEVEL) << "] QU::copy_host_to_device " ;
0379     }
0380 
0381 #ifndef PRODUCTION
0382     sev->t_setGenstep_5 = sstamp::Now();
0383 #endif
0384 
0385     LOG(LEVEL) << "[ QU::device_memset " ;
0386     QU::device_memset<int>(   evt->seed,    0, evt->max_slot );  // was max_photon but max_slot makes more sense
0387     LOG(LEVEL) << "] QU::device_memset " ;
0388 
0389 #ifndef PRODUCTION
0390     sev->t_setGenstep_6 = sstamp::Now();
0391 #endif
0392 
0393     if(num_genstep > 0)
0394     {
0395         //count_genstep_photons();   // sets evt->num_seed
0396         //fill_seed_buffer() ;       // populates seed buffer
0397         LOG(LEVEL) << "[ count_genstep_photons_and_fill_seed_buffer " ;
0398         count_genstep_photons_and_fill_seed_buffer();   // combi-function doing what both the above do
0399         LOG(LEVEL) << "] count_genstep_photons_and_fill_seed_buffer " ;
0400     }
0401     else
0402     {
0403         LOG(error) << " num_genstep ZERO : proceed anyhow eg for low level QSimTest tests" ;
0404     }
0405 
0406 #ifndef PRODUCTION
0407     sev->t_setGenstep_7 = sstamp::Now();
0408 #endif
0409 
0410     int gencode0 = SGenstep::GetGencode(qq, 0) ; // gencode of first genstep or OpticksGenstep_INVALID for qq nullptr
0411 
0412     if(OpticksGenstep_::IsFrame(gencode0))   // OpticksGenstep_FRAME  (HMM: Obtuse, maybe change to SIMTRACE ?)
0413     {
0414         setNumSimtrace( evt->num_seed );
0415     }
0416     else if(OpticksGenstep_::IsInputPhoton(gencode0)) // OpticksGenstep_INPUT_PHOTON  (NOT: _TORCH)
0417     {
0418         setInputPhotonAndUpload();
0419     }
0420     else if(OpticksGenstep_::IsInputPhotonSimtrace(gencode0)) // OpticksGenstep_INPUT_PHOTON_SIMTRACE
0421     {
0422         setInputPhotonSimtraceAndUpload();
0423     }
0424     else
0425     {
0426         setNumPhoton( evt->num_seed );  // *HEAVY* : photon, rec, record may be allocated here depending on SEventConfig
0427     }
0428     upload_count += 1 ;
0429 
0430 #ifndef PRODUCTION
0431     sev->t_setGenstep_8 = sstamp::Now();
0432 #endif
0433     LOG_IF(info, SEvt::LIFECYCLE) << "]" ;
0434 
0435 
0436     int rc = zero_genstep ? 1 : 0 ;
0437     LOG_IF(error, rc != 0 ) << "No gensteps in SEvt::EGPU : ONLY OK WITH VERY LOW LEVEL TESTING eg QSimTest  " ;
0438 
0439     return rc ;
0440 }
0441 
0442 
0443 
0444 
0445 
0446 
0447 /**
0448 QEvt::device_alloc_genstep_and_seed
0449 -------------------------------------------
0450 
0451 Allocates memory for genstep and seed, keeping device pointers within
0452 the hostside sevent.h "evt->genstep" "evt->seed"
0453 
0454 **/
0455 
0456 void QEvt::device_alloc_genstep_and_seed()
0457 {
0458     LOG_IF(info, LIFECYCLE) ;
0459     LOG(LEVEL)
0460         << " device_alloc genstep and seed "
0461         << " evt.max_genstep " << evt->max_genstep
0462         << " evt.max_slot " << evt->max_slot
0463         << " evt.max_photon " << evt->max_photon
0464         ;
0465     evt->genstep = QU::device_alloc<quad6>( evt->max_genstep, "QEvt::setGenstep/device_alloc_genstep_and_seed:quad6/max_genstep" ) ;
0466     evt->seed    = QU::device_alloc<int>(   evt->max_slot   , "QEvt::setGenstep/device_alloc_genstep_and_seed:int/max_slot" )  ;
0467                                      //     ^^^^^^^^^^^^^^^ was max_photon but max_slot now makes more sense
0468 
0469 }
0470 
0471 
0472 
0473 /**
0474 QEvt::setInputPhotonAndUpload
0475 ------------------------------------
0476 
0477 This is a private method invoked only from QEvt::setGenstepUpload
0478 
0479 1. SEvt::gatherInputPhoton narrows or copies the input
0480    photons (which may be frame transformed) providing
0481    a narrowed f4 array.
0482 
0483    NB gatherInputPhoton always provides a fresh
0484    unencumbered array that a subsequent SEvt::clear
0485    cannot delete. So that means it just LEAKs,
0486    but that currently not much of a problem
0487    as input photons are used for debugging purposes
0488    currently
0489 
0490    TODO: WHEN DOING LEAK CHECKING TRY TO FIND THIS
0491    LEAK AND AVOID IT BY DELETING THE ARRAY HERE
0492    IMMEDIATELY AFTER UPLOAD
0493 
0494    Input photons are awkward because they do not
0495    follow the pattern of other arrays. They:
0496 
0497    * originate on the CPU (like gensteps)
0498    * have no dedicated device buffer for them (unlike gensteps)
0499    * get copied into the photons buffer instead of
0500      being generated on device
0501    * are not downloaded from device
0502 
0503    Effectively input photons are a cheat to avoid
0504    on device generation that is convenient for
0505    debugging, and especially useful to provide
0506    common inputs for random aligned bi-simulation.
0507 
0508 
0509 2. QEvt::checkInputPhoton expectation asserts
0510 
0511 3. QU::copy_host_to_device upload the input photon array
0512    into the photon buffer
0513 
0514 **/
0515 
0516 void QEvt::setInputPhotonAndUpload()
0517 {
0518     LOG_IF(info, LIFECYCLE) ;
0519     LOG(LEVEL);
0520     input_photon = sev->gatherInputPhoton();
0521     checkInputPhoton();
0522 
0523     int numph = input_photon->shape[0] ;
0524     setNumPhoton( numph );
0525     QU::copy_host_to_device<sphoton>( evt->photon, (sphoton*)input_photon->bytes(), numph );
0526 }
0527 
0528 
0529 void QEvt::setInputPhotonSimtraceAndUpload()
0530 {
0531     LOG_IF(info, LIFECYCLE) ;
0532     LOG(LEVEL);
0533     input_photon = sev->gatherInputPhoton();
0534     checkInputPhoton();
0535 
0536     int numph = input_photon->shape[0] ;
0537     setNumSimtrace( numph );
0538     QU::copy_host_to_device<quad4>( evt->simtrace, (quad4*)input_photon->bytes(), numph );
0539 }
0540 
0541 
0542 
0543 void QEvt::checkInputPhoton() const
0544 {
0545     LOG_IF(fatal, input_photon == nullptr)
0546         << " INCONSISTENT : OpticksGenstep_INPUT_PHOTON by no input photon array "
0547         ;
0548 
0549     assert(input_photon);
0550 
0551     bool expected_shape = input_photon->has_shape( -1, 4, 4) ;
0552     bool expected_ebyte = input_photon->ebyte == 4 ;
0553 
0554     size_t numph = input_photon->shape[0] ;
0555     bool expected_numph = evt->num_seed == numph ;
0556 
0557     LOG_IF(fatal, !expected_shape) << " !expected_shape " << input_photon->sstr() ;
0558     LOG_IF(fatal, !expected_ebyte) << " !expected_ebyte " << input_photon->ebyte ;
0559     LOG_IF(fatal, !expected_numph) << " !expected_numph " << numph << " evt.num_seed " << ( evt ? evt->num_seed : -1 )  ;
0560 
0561     assert(expected_shape);
0562     assert(expected_ebyte);
0563     assert(expected_numph);
0564 }
0565 
0566 
0567 
0568 
0569 // TODO: how to avoid duplication between QEvt and SEvt ?
0570 
0571 bool QEvt::hasGenstep() const { return evt->genstep != nullptr ; }
0572 bool QEvt::hasSeed() const {    return evt->seed != nullptr ; }
0573 bool QEvt::hasPhoton() const {  return evt->photon != nullptr ; }
0574 bool QEvt::hasPhotonLite() const {  return evt->photonlite != nullptr ; }
0575 bool QEvt::hasRecord() const { return evt->record != nullptr ; }
0576 bool QEvt::hasRec() const    { return evt->rec != nullptr ; }
0577 bool QEvt::hasSeq() const    { return evt->seq != nullptr ; }
0578 bool QEvt::hasPrd() const    { return evt->prd != nullptr ; }
0579 bool QEvt::hasTag() const    { return evt->tag != nullptr ; }
0580 bool QEvt::hasFlat() const   { return evt->flat != nullptr ; }
0581 bool QEvt::hasHit() const    { return evt->hit != nullptr ; }
0582 bool QEvt::hasHitLite() const    { return evt->hitlite != nullptr ; }
0583 bool QEvt::hasSimtrace() const  { return evt->simtrace != nullptr ; }
0584 
0585 
0586 
0587 
0588 /**
0589 QEvt::count_genstep_photons
0590 ------------------------------
0591 
0592 thrust::reduce using strided iterator summing over GPU side gensteps
0593 
0594 **/
0595 
0596 extern "C" unsigned QEvt_count_genstep_photons(sevent* evt) ;
0597 unsigned QEvt::count_genstep_photons()
0598 {
0599    return QEvt_count_genstep_photons( evt );
0600 }
0601 
0602 /**
0603 QEvt::fill_seed_buffer
0604 ---------------------------
0605 
0606 Populates seed buffer using the number of photons from each genstep
0607 
0608 The photon seed buffer is a device buffer containing integer indices referencing
0609 into the genstep buffer. The seeds provide the association between the photon
0610 and the genstep required to generate it.
0611 
0612 **/
0613 
0614 extern "C" void QEvt_fill_seed_buffer(sevent* evt );
0615 void QEvt::fill_seed_buffer()
0616 {
0617     LOG_IF(info, LIFECYCLE) ;
0618     QEvt_fill_seed_buffer( evt );
0619 }
0620 
0621 extern "C" void QEvt_count_genstep_photons_and_fill_seed_buffer(sevent* evt );
0622 void QEvt::count_genstep_photons_and_fill_seed_buffer()
0623 {
0624     LOG_IF(info, LIFECYCLE) ;
0625     QEvt_count_genstep_photons_and_fill_seed_buffer( evt );
0626 }
0627 
0628 
0629 
0630 
0631 NP* QEvt::getGenstep() const
0632 {
0633     NP* _gs = const_cast<NP*>(gs) ; // const_cast so can use QEvt::gatherComponent_
0634     LOG(LEVEL) << " _gs " << ( _gs ? _gs->sstr() : "-" ) ;
0635     return _gs ;
0636 }
0637 NP* QEvt::getInputPhoton() const
0638 {
0639     return input_photon ;
0640 }
0641 
0642 
0643 
0644 
0645 
0646 
0647 
0648 /**
0649 QEvt::gatherPhoton(NP* p) :  mutating API
0650 -------------------------------------------
0651 
0652 * QU::copy_device_to_host using (sevent)evt->photon/num_photon
0653 
0654   * sevent.h needs changing for each sub-launch
0655 
0656 
0657 **/
0658 
0659 void QEvt::gatherPhoton(NP* p) const
0660 {
0661 
0662     bool expected_shape =  p->has_shape(evt->num_photon, 4, 4) ;
0663     LOG(expected_shape ? LEVEL : fatal) << "[ evt.num_photon " << evt->num_photon << " p.sstr " << p->sstr() << " evt.photon " << evt->photon ;
0664     LOG(info) << "[ evt.num_photon " << evt->num_photon << " p.sstr " << p->sstr() << " evt.photon " << evt->photon ;
0665     assert(expected_shape );
0666 
0667     int rc = QU::copy_device_to_host<sphoton>( (sphoton*)p->bytes(), evt->photon, evt->num_photon );
0668 
0669     LOG_IF(fatal, rc != 0)
0670          << " QU::copy_device_to_host photon FAILED "
0671          << " evt->photon " << ( evt->photon ? "Y" : "N" )
0672          << " evt->num_photon " <<  evt->num_photon
0673          ;
0674 
0675     if(rc != 0) std::raise(SIGINT) ;
0676 
0677     LOG(LEVEL) << "] evt.num_photon " << evt->num_photon  ;
0678 }
0679 
0680 NP* QEvt::gatherPhoton() const
0681 {
0682     NP* p = sev->makePhoton();
0683     gatherPhoton(p);
0684     return p ;
0685 }
0686 
0687 
0688 
0689 
0690 
0691 
0692 
0693 void QEvt::gatherPhotonLite(NP* l) const
0694 {
0695     bool expected_arr =  sphotonlite::expected(l);
0696     LOG(expected_arr ? LEVEL : fatal) << "[ evt.num_photon " << evt->num_photon << " l.sstr " << l->sstr() << " evt.photon " << evt->photon ;
0697     LOG(info) << "[ evt.num_photon " << evt->num_photon << " l.sstr " << l->sstr() << " evt.photon " << evt->photon ;
0698     assert(expected_arr );
0699 
0700     int rc = QU::copy_device_to_host<sphotonlite>( (sphotonlite*)l->bytes(), evt->photonlite, evt->num_photon );
0701 
0702     LOG_IF(fatal, rc != 0)
0703          << " QU::copy_device_to_host photonlite FAILED "
0704          << " evt->photonlite " << ( evt->photonlite ? "Y" : "N" )
0705          << " evt->num_photon " <<  evt->num_photon
0706          ;
0707 
0708     if(rc != 0) std::raise(SIGINT) ;
0709 
0710     LOG(LEVEL) << "] evt.num_photon " << evt->num_photon  ;
0711 }
0712 
0713 
0714 NP* QEvt::gatherPhotonLite() const
0715 {
0716     NP* l = sev->makePhotonLite();
0717     gatherPhotonLite(l);
0718     return l ;
0719 }
0720 
0721 
0722 
0723 
0724 
0725 
0726 #ifndef PRODUCTION
0727 
0728 NP* QEvt::gatherSeed() const
0729 {
0730     bool has_seed = hasSeed() ;
0731     LOG_IF(fatal, !has_seed) << " gatherSeed called when there is no such array, use SEventConfig::SetCompMask to avoid " ;
0732     if(!has_seed) return nullptr ;
0733     NP* s = NP::Make<int>( evt->num_seed );   // TODO: use SEvt::makeSeed
0734     QU::copy_device_to_host<int>( (int*)s->bytes(), evt->seed, evt->num_seed );
0735     return s ;
0736 }
0737 
0738 NP* QEvt::gatherDomain() const { return sev ? sev->gatherDomain() : nullptr ; }
0739 
0740 
0741 /**
0742 QEvt::gatherGenstepFromDevice
0743 ---------------------------------
0744 
0745 Gensteps originate on host and are uploaded to device, so downloading
0746 them from device is not usually done. It is for debugging only.
0747 
0748 **/
0749 
0750 NP* QEvt::gatherGenstepFromDevice() const
0751 {
0752     NP* a = NP::Make<float>( evt->num_genstep, 6, 4 );
0753     QU::copy_device_to_host<quad6>( (quad6*)a->bytes(), evt->genstep, evt->num_genstep );
0754     return a ;
0755 }
0756 
0757 
0758 void QEvt::gatherSimtrace(NP* t) const
0759 {
0760     LOG(LEVEL) << "[ evt.num_simtrace " << evt->num_simtrace << " t.sstr " << t->sstr() << " evt.simtrace " << evt->simtrace ;
0761     assert( t->has_shape(evt->num_simtrace, 4, 4) );
0762     QU::copy_device_to_host<quad4>( (quad4*)t->bytes(), evt->simtrace, evt->num_simtrace );
0763     LOG(LEVEL) << "] evt.num_simtrace " << evt->num_simtrace  ;
0764 }
0765 NP* QEvt::gatherSimtrace() const
0766 {
0767     bool has_simtrace = hasSimtrace();
0768     LOG_IF(LEVEL, !has_simtrace) << " getSimtrace called when there is no such array, use SEventConfig::SetCompMask to avoid " ;
0769     if(!has_simtrace) return nullptr ;
0770     NP* t = NP::Make<float>( evt->num_simtrace, 4, 4);   // TODO: use SEvt::makeSimtrace ?
0771     gatherSimtrace(t);
0772     return t ;
0773 }
0774 
0775 void QEvt::gatherSeq(NP* seq) const
0776 {
0777     bool has_seq = hasSeq();
0778     if(!has_seq) return ;
0779     LOG(LEVEL) << "[ evt.num_seq " << evt->num_seq << " seq.sstr " << seq->sstr() << " evt.seq " << evt->seq ;
0780     assert( seq->has_shape(evt->num_seq, 2) );
0781     QU::copy_device_to_host<sseq>( (sseq*)seq->bytes(), evt->seq, evt->num_seq );
0782     LOG(LEVEL) << "] evt.num_seq " << evt->num_seq  ;
0783 }
0784 NP* QEvt::gatherSeq() const
0785 {
0786     bool has_seq = hasSeq();
0787     LOG_IF(LEVEL, !has_seq) << " gatherSeq called when there is no such array, use SEventConfig::SetCompMask to avoid " ;
0788     if(!has_seq) return nullptr ;
0789 
0790     NP* seq = sev->makeSeq();
0791 
0792     gatherSeq(seq);
0793     return seq ;
0794 }
0795 
0796 
0797 
0798 NP* QEvt::gatherPrd() const
0799 {
0800     bool has_prd = hasPrd();
0801     LOG_IF(LEVEL, !has_prd) << " gatherPrd called when there is no such array, use SEventConfig::SetCompMask to avoid " ;
0802     if(!has_prd) return nullptr ;
0803 
0804     NP* prd = sev->makePrd();
0805     LOG(LEVEL) << " evt.num_prd " << evt->num_prd ;
0806     QU::copy_device_to_host<quad2>( (quad2*)prd->bytes(), evt->prd, evt->num_prd );
0807     return prd ;
0808 }
0809 
0810 NP* QEvt::gatherTag() const
0811 {
0812     bool has_tag = hasTag() ;
0813     LOG_IF(LEVEL, !has_tag) << " gatherTag called when there is no such array, use SEventConfig::SetCompMask to avoid " ;
0814     if(!has_tag) return nullptr ;
0815 
0816     NP* tag = sev->makeTag();
0817     LOG(LEVEL) << " evt.num_tag " << evt->num_tag << " tag.desc " << tag->desc() ;
0818     QU::copy_device_to_host<stag>( (stag*)tag->bytes(), evt->tag, evt->num_tag );
0819     return tag ;
0820 }
0821 
0822 NP* QEvt::gatherFlat() const
0823 {
0824     bool has_flat = hasFlat();
0825     LOG_IF(LEVEL, !has_flat) << " gatherFlat called when there is no such array, use SEventConfig::SetCompMask to avoid " ;
0826     if(!has_flat) return nullptr ;
0827 
0828     NP* flat = sev->makeFlat();
0829     LOG(LEVEL) << " evt.num_flat " << evt->num_flat << " flat.desc " << flat->desc() ;
0830     QU::copy_device_to_host<sflat>( (sflat*)flat->bytes(), evt->flat, evt->num_flat );
0831     return flat ;
0832 }
0833 
0834 
0835 NP* QEvt::gatherRecord() const
0836 {
0837     bool has_record = hasRecord() ;
0838     LOG_IF(LEVEL, !has_record) << " gatherRecord called when there is no such array, use SEventConfig::SetCompMask to avoid " ;
0839     if(!has_record) return nullptr ;
0840 
0841     NP* r = sev->makeRecord();
0842 
0843     LOG(LEVEL) << " evt.num_record " << evt->num_record ;
0844     QU::copy_device_to_host<sphoton>( (sphoton*)r->bytes(), evt->record, evt->num_record );
0845     return r ;
0846 }
0847 
0848 NP* QEvt::gatherRec() const
0849 {
0850     NP* r = nullptr ;
0851     bool has_rec = hasRec();
0852     LOG_IF(LEVEL, !has_rec ) << " gatherRec called when there is no such array, use SEventConfig::SetCompMask to avoid " ;
0853     if(!has_rec) return nullptr ;
0854 
0855     r = sev->makeRec();
0856 
0857     LOG(LEVEL)
0858         << " evt.num_photon " << evt->num_photon
0859         << " evt.max_rec " << evt->max_rec
0860         << " evt.num_rec " << evt->num_rec
0861         << " evt.num_photon*evt.max_rec " << evt->num_photon*evt->max_rec
0862         ;
0863 
0864     assert( evt->num_photon*evt->max_rec == evt->num_rec );
0865 
0866     QU::copy_device_to_host<srec>( (srec*)r->bytes(), evt->rec, evt->num_rec );
0867     return r ;
0868 }
0869 #endif
0870 
0871 /**
0872 QEvt::getNumHit  TODO:rejig
0873 -----------------------------------
0874 
0875 HMM: applies photon_selector to the GPU photon array, thats surprising
0876 for a "get" method... TODO: maybe rearrange to do that once only
0877 at the gatherHit stage and subsequently just get the count from
0878 SEvt::fold
0879 
0880 **/
0881 
0882 
0883 size_t QEvt::getNumHit() const
0884 {
0885     assert( evt->photon );
0886     assert( evt->num_photon );
0887     LOG_IF(info, LIFECYCLE) ;
0888 
0889     evt->num_hit = SU::count_if_sphoton( evt->photon, evt->num_photon, *photon_selector );
0890 
0891     LOG(LEVEL) << " evt.photon " << evt->photon << " evt.num_photon " << evt->num_photon << " evt.num_hit " << evt->num_hit ;
0892     return evt->num_hit ;
0893 }
0894 
0895 
0896 
0897 size_t QEvt::getNumHitLite() const
0898 {
0899     assert( evt->photonlite );
0900     assert( evt->num_photonlite );
0901     assert( 0 && "WHO CALLS THIS : BETTER TO GET FROM ALREADY GATHERED ?");
0902 
0903     LOG_IF(info, LIFECYCLE) ;
0904 
0905     evt->num_hitlite = SU::count_if_sphotonlite( evt->photonlite, evt->num_photonlite, *photonlite_selector );
0906 
0907     LOG(LEVEL) << " evt.photonlite " << evt->photonlite << " evt.num_photonlite " << evt->num_photonlite << " evt.num_hitlite " << evt->num_hitlite ;
0908     return evt->num_hitlite ;
0909 }
0910 
0911 /**
0912 QEvt::gatherHit
0913 ------------------
0914 
0915 1. on device count *evt.num_hit* passing the photon *photon_selector*
0916 
0917 7. return NP hits array to caller, who becomes owner of the array
0918 
0919 Note that the device hits array is allocated and freed for each launch.
0920 This is due to the expectation that the number of hits will vary greatly from launch to launch
0921 unlike the number of photons which is expected to be rather similar for most launches other than
0922 remainder last launches.
0923 
0924 The alternative to this dynamic "busy" handling of hits would be to reuse a fixed hits buffer
0925 sized to max_photons : that however seems unpalatable due it always doubling up GPU memory for
0926 photons and hits.
0927 
0928 hitmask metadata was formerly placed on the hit array,
0929 subsequently moved to domain_meta as domain should
0930 always be present, unlike hits.
0931 
0932 **/
0933 
0934 NP* QEvt::gatherHit() const
0935 {
0936     // hasHit (more correctly "hasHitArray") at this juncture is misleadingly always false,
0937     // because the hits array is derived (selecting from the photons) by *gatherHit_*
0938 
0939     bool has_photon = hasPhoton();
0940 
0941     LOG_IF(LEVEL, !has_photon) << " gatherHit called when there is no photon array " ;
0942     if(!has_photon) return nullptr ;
0943 
0944     assert( evt->photon );
0945 
0946     LOG_IF(fatal, evt->num_photon == 0 ) << " evt->num_photon ZERO " ;
0947     assert( evt->num_photon );
0948 
0949     evt->num_hit = SU::count_if_sphoton( evt->photon, evt->num_photon, *photon_selector );
0950     NP* hit = evt->num_hit > 0 ? gatherHit_() : nullptr ;
0951 
0952     LOG(LEVEL)
0953         << " evt.photon " << evt->photon
0954         << " evt.num_photon " << evt->num_photon
0955         << " evt.num_hit " << evt->num_hit
0956         << " hit " << ( hit ? hit->sstr() : "-" )
0957         << " photon_selector.hitmask " << photon_selector->hitmask
0958         << " SEventConfig::HitMask " << SEventConfig::HitMask()
0959         << " SEventConfig::HitMaskLabel " << SEventConfig::HitMaskLabel()
0960         << " SEventConfig::ModeLite " << SEventConfig::ModeLite()
0961         << " SEventConfig::ModeMerge " << SEventConfig::ModeMerge()
0962         ;
0963 
0964     return hit ;
0965 }
0966 
0967 
0968 
0969 
0970 
0971 
0972 
0973 
0974 
0975 
0976 
0977 
0978 
0979 
0980 
0981 
0982 
0983 
0984 
0985 NP* QEvt::gatherHitLite() const
0986 {
0987     // hasHitLite at this juncture is misleadingly always false,
0988     // because the hitlite array is derived by *gatherHitLite_* which  selects from the photonlite
0989 
0990     bool has_photonlite = hasPhotonLite();
0991 
0992     LOG_IF(LEVEL, !has_photonlite) << " gatherHitLite called when there is no photonlite array " ;
0993     if(!has_photonlite) return nullptr ;
0994 
0995     assert( evt->photonlite );
0996 
0997     LOG_IF(fatal, evt->num_photonlite == 0 ) << " evt->num_photonlite ZERO " ;
0998     assert( evt->num_photonlite );
0999 
1000     evt->num_hitlite = SU::count_if_sphotonlite( evt->photonlite, evt->num_photonlite, *photonlite_selector );
1001     NP* hitlite = evt->num_hitlite > 0 ? gatherHitLite_() : nullptr ;
1002 
1003     LOG(LEVEL)
1004         << " evt.photonlite " << evt->photonlite
1005         << " evt.num_photonlite " << evt->num_photonlite
1006         << " evt.num_hitlite " << evt->num_hitlite
1007         << " hitlite " << ( hitlite ? hitlite->sstr() : "-" )
1008         << " photonlite_selector.hitmask " << photonlite_selector->hitmask
1009         << " SEventConfig::HitMask " << SEventConfig::HitMask()
1010         << " SEventConfig::HitMaskLabel " << SEventConfig::HitMaskLabel()
1011         << " SEventConfig::ModeLite " << SEventConfig::ModeLite()
1012         << " SEventConfig::ModeMerge " << SEventConfig::ModeMerge()
1013         ;
1014 
1015     return hitlite ;
1016 }
1017 
1018 
1019 
1020 /**
1021 QEvt::gatherHitLiteMerged
1022 ---------------------------
1023 
1024 This selects from photonlite and then merges that
1025 both on GPU before gathering back the merged hits to CPU
1026 
1027 NB with multi-launch a further final merge is required,
1028 that is invoked from QSim::simulate
1029 
1030 
1031 
1032 **/
1033 
1034 NP* QEvt::gatherHitLiteMerged() const
1035 {
1036     bool has_photonlite = hasPhotonLite();
1037     LOG_IF(LEVEL, !has_photonlite) << " gatherHitLiteMerged called when there is no photonlite array " ;
1038     if(!has_photonlite) return nullptr ;
1039 
1040     cudaStream_t stream = 0 ;
1041     NP* hitlitemerged = PerLaunchMerge<sphotonlite>(evt, stream);
1042 
1043     LOG(LEVEL)
1044         << " evt.photonlite " << evt->photonlite
1045         << " evt.num_photonlite " << evt->num_photonlite
1046         << " evt.num_hitlitemerged " << evt->num_hitlitemerged
1047         << " hitlitemerged " << ( hitlitemerged ? hitlitemerged->sstr() : "-" )
1048         << " photonlite_selector.hitmask " << photonlite_selector->hitmask
1049         << " SEventConfig::HitMask " << SEventConfig::HitMask()
1050         << " SEventConfig::HitMaskLabel " << SEventConfig::HitMaskLabel()
1051         << " SEventConfig::ModeLite " << SEventConfig::ModeLite()
1052         << " SEventConfig::ModeMerge " << SEventConfig::ModeMerge()
1053         << " SEventConfig::MergeWindow " << SEventConfig::MergeWindow()
1054         ;
1055 
1056     return hitlitemerged ;
1057 }
1058 
1059 NP* QEvt::gatherHitMerged() const
1060 {
1061     bool has_photon = hasPhoton();
1062     LOG_IF(LEVEL, !has_photon) << " gatherHitMerged called when there is no photon array " ;
1063     if(!has_photon) return nullptr ;
1064 
1065     assert( evt->photon );
1066     LOG_IF(fatal, evt->num_photon == 0 ) << " evt->num_photon ZERO " ;
1067     assert( evt->num_photon );
1068 
1069     cudaStream_t stream = 0 ;
1070     NP* hitmerged = PerLaunchMerge<sphoton>(evt, stream);
1071 
1072     LOG(LEVEL)
1073         << " evt.photon " << evt->photon
1074         << " evt.num_photon " << evt->num_photon
1075         << " evt.num_hitmerged " << evt->num_hitmerged
1076         << " hitmerged " << ( hitmerged ? hitmerged->sstr() : "-" )
1077         << " photon_selector.hitmask " << photon_selector->hitmask
1078         << " SEventConfig::HitMask " << SEventConfig::HitMask()
1079         << " SEventConfig::HitMaskLabel " << SEventConfig::HitMaskLabel()
1080         << " SEventConfig::ModeLite " << SEventConfig::ModeLite()
1081         << " SEventConfig::ModeMerge " << SEventConfig::ModeMerge()
1082         << " SEventConfig::MergeWindow " << SEventConfig::MergeWindow()
1083         ;
1084 
1085     return hitmerged ;
1086 }
1087 
1088 
1089 
1090 
1091 
1092 
1093 
1094 
1095 
1096 
1097 
1098 /**
1099 QEvt::gatherHit_
1100 --------------------
1101 
1102 1. allocate *evt.hit* GPU buffer using *evt.num_hit*
1103 2. SU::copy_if_device_to_device_presized_sphoton from *evt.photon* to *evt.hit* using the *photon_selector*
1104 3. host allocate the NP hits array using *evt.num_hit*
1105 4. copy hits from device to the host NP hits array
1106 5. free *evt.hit* on device
1107 
1108 
1109 **/
1110 
1111 
1112 
1113 NP* QEvt::gatherHit_() const
1114 {
1115     LOG_IF(info, LIFECYCLE) ;
1116 
1117     evt->hit = QU::device_alloc<sphoton>( evt->num_hit, "QEvt::gatherHit_:sphoton" );
1118 
1119     SU::copy_if_device_to_device_presized_sphoton( evt->hit, evt->photon, evt->num_photon,  *photon_selector );
1120 
1121     NP* hit = sphoton::zeros( evt->num_hit );
1122 
1123     QU::copy_device_to_host<sphoton>( (sphoton*)hit->bytes(), evt->hit, evt->num_hit );
1124 
1125     QU::device_free<sphoton>( evt->hit );
1126 
1127     evt->hit = nullptr ;
1128 
1129     LOG(LEVEL) << " hit.sstr " << hit->sstr() ;
1130 
1131     return hit ;
1132 }
1133 
1134 
1135 NP* QEvt::gatherHitLite_() const
1136 {
1137     LOG_IF(info, LIFECYCLE) ;
1138 
1139     evt->hitlite = QU::device_alloc<sphotonlite>( evt->num_hitlite, "QEvt::gatherHitLite_:sphotonlite" );
1140 
1141     SU::copy_if_device_to_device_presized_sphotonlite( evt->hitlite, evt->photonlite, evt->num_photonlite,  *photonlite_selector );
1142 
1143     NP* hitlite = sphotonlite::zeros( evt->num_hitlite );
1144 
1145     QU::copy_device_to_host<sphotonlite>( (sphotonlite*)hitlite->bytes(), evt->hitlite, evt->num_hitlite );
1146 
1147     QU::device_free<sphotonlite>( evt->hitlite );
1148 
1149     evt->hitlite = nullptr ;
1150 
1151     LOG(LEVEL) << " hitlite.sstr " << hitlite->sstr() ;
1152 
1153     return hitlite ;
1154 }
1155 
1156 
1157 
1158 /**
1159 QEvt::PerLaunchMerge
1160 ----------------------
1161 
1162 Canonical usage from::
1163 
1164     QEvt::gatherHitMerged
1165     QEvt::gatherHitLiteMerged
1166 
1167 Where those get invoked from SEvt::gather called from QSim::simulate
1168 
1169 **/
1170 
1171 template<typename T>
1172 NP* QEvt::PerLaunchMerge(sevent* evt, cudaStream_t stream ) // static
1173 {
1174     // below four calls return whats appropriate depending on template type of sphoton OR sphotonlite
1175     // dealing with either  photonlite/hitlitemerged OR photon/hitmerged
1176 
1177     T* d_in = evt->get_photon_ptr<T>();
1178     size_t num_in = evt->get_photon_num<T>();
1179 
1180     T** d_out_ref = evt->get_hitmerged_ptr_ref<T>();
1181     size_t* num_out_ref = evt->get_hitmerged_num_ref<T>();
1182 
1183     SPM::merge_partial_select(
1184          d_in,
1185          num_in,
1186          d_out_ref,
1187          num_out_ref,
1188          SEventConfig::HitMask(),
1189          SEventConfig::MergeWindow(),
1190          stream);
1191 
1192     cudaStreamSynchronize(stream); // blocks until all preceeding operations in stream complete
1193 
1194     NP* out = T::zeros( *num_out_ref ); // hitmerged OR hitlitemerged
1195 
1196     SPM::copy_device_to_host_async<T>( (T*)out->bytes(), *d_out_ref, *num_out_ref, stream );
1197 
1198     cudaFreeAsync(*d_out_ref, stream);
1199     *d_out_ref = nullptr ;
1200 
1201     cudaStreamSynchronize(stream); // blocks until all preceeding operations in stream complete
1202 
1203     LOG(LEVEL) << " out.sstr " << ( out ? out->sstr() : "-" )  ;
1204 
1205     return out ;
1206 }
1207 
1208 template NP* QEvt::PerLaunchMerge<sphoton>(    sevent* evt, cudaStream_t stream);
1209 template NP* QEvt::PerLaunchMerge<sphotonlite>(sevent* evt, cudaStream_t stream);
1210 
1211 
1212 
1213 /**
1214 QEvt::FinalMerge
1215 ----------------
1216 
1217 The canonical argument array this is used with is concat_hitlitemerged
1218 invoked from QSim::simulate_final_merge
1219 
1220 Conceptually the FinalMerge and PerLaunchMerge use the same processing
1221 with both flagmask selection and hit merging using (identity,timebucket) key
1222 with typical OPTICKS_MERGE_WINDOW of 1 (ns).  However the two cases differ
1223 in their inputs:
1224 
1225 +----------------+---------------------------------------+----------------------------------+
1226 | Method         |  Input                                |   Output                         |
1227 +================+=======================================+==================================+
1228 | PerLaunchMerge |  photonlite device array              |  hitlitemerged NP array on host  |
1229 +----------------+---------------------------------------+----------------------------------+
1230 | FinalMerge     |  concat hitlitemerge NP array on host |  ditto                           |
1231 +----------------+---------------------------------------+----------------------------------+
1232 
1233 **/
1234 
1235 template<typename T>
1236 NP* QEvt::FinalMerge(const NP* all, cudaStream_t stream ) // static
1237 {
1238     NP_future merge_result = FinalMerge_async<T>(all, stream );
1239 
1240     cudaStream_t consumer ;
1241     cudaStreamCreate(&consumer);
1242 
1243     merge_result.wait(consumer);
1244 
1245     NP* out = merge_result.arr ;
1246 
1247     return out ;
1248 }
1249 
1250 template NP* QEvt::FinalMerge<sphoton>(    const NP* all, cudaStream_t stream);
1251 template NP* QEvt::FinalMerge<sphotonlite>(const NP* all, cudaStream_t stream);
1252 
1253 
1254 
1255 
1256 
1257 
1258 /**
1259 QEvt::FinalMerge_async
1260 -----------------------
1261 
1262 **/
1263 
1264 
1265 template<typename T>
1266 NP_future QEvt::FinalMerge_async(const NP* all, cudaStream_t stream ) // static
1267 {
1268     size_t num_all = all->num_items();
1269 
1270     // 1. alloc and upload concatenation of the per-launch merged hits
1271 
1272     T* d_all = nullptr;
1273     if(num_all > 0)
1274     {
1275         cudaMallocAsync(&d_all, num_all * sizeof(T), stream);
1276         cudaMemcpyAsync(d_all, (T*)all->bytes(), num_all * sizeof(T), cudaMemcpyHostToDevice, stream);
1277     }
1278 
1279     // 2. invoke final merge
1280 
1281     SPM_future<T> merge_result = SPM::merge_partial_select_async<T>(
1282         d_all ? d_all : nullptr,
1283         num_all,
1284         SPM::ALREADY_HITMASK_SELECTED,
1285         SEventConfig::MergeWindow(),
1286         stream);
1287 
1288     if(d_all) cudaFreeAsync(d_all, stream);
1289 
1290 
1291     //printf("QEvt::FinalMerge_async after wait on merge_result.count %ld \n", merge_result.count);
1292 
1293     // 4. use merge result to host alloc and download
1294 
1295     static cudaStream_t dl_stream = []{ cudaStream_t s; cudaStreamCreate(&s); return s; }();
1296 
1297     NP_future result;
1298     result.arr = T::zeros( merge_result.count );
1299     cudaEventCreateWithFlags(&result.ready, cudaEventDisableTiming);
1300 
1301     if( merge_result.count > 0 && merge_result.ptr )
1302     {
1303         // normal path : work to do
1304         cudaStreamWaitEvent(dl_stream, merge_result.ready, 0);
1305 
1306         SPM::copy_device_to_host_async<T>( (T*)result.arr->bytes(), merge_result.ptr, merge_result.count, dl_stream );
1307 
1308         cudaFreeAsync(merge_result.ptr, dl_stream);
1309 
1310         cudaEventRecord(result.ready, dl_stream);
1311     }
1312     else
1313     {
1314         cudaEventRecord(result.ready, stream);
1315     }
1316 
1317     if (merge_result.ready) cudaEventDestroy(merge_result.ready);
1318 
1319     return result ;
1320 }
1321 // sed -n '/^NP_future QEvt::FinalMerge_async/,/^}/p' ~/o/qudarap/QEvt.cc | pbcopy
1322 
1323 
1324 
1325 
1326 /**
1327 QEvt::getMeta
1328 -----------------
1329 
1330 SCompProvider method, canonically used from SEvt::endOfEvent/SEvt::gather_metadata
1331 
1332 **/
1333 
1334 std::string QEvt::getMeta() const
1335 {
1336     return sev->meta ;
1337 }
1338 
1339 const char* QEvt::getTypeName() const
1340 {
1341     return TYPENAME ;
1342 }
1343 
1344 /**
1345 QEvt::gatherComponent
1346 ------------------------
1347 
1348 Invoked for example by SEvt::gather_components via the SCompProvider protocol
1349 
1350 **/
1351 
1352 NP* QEvt::gatherComponent(unsigned cmp) const
1353 {
1354     LOG(LEVEL) << "[ cmp " << cmp ;
1355     unsigned gather_mask = SEventConfig::GatherComp();
1356     bool proceed = (gather_mask & cmp) != 0 ;
1357     NP* a = proceed ? gatherComponent_(cmp) : nullptr ;
1358     LOG(LEVEL) << "[ cmp " << cmp << " proceed " << proceed << " a " <<  a ;
1359     return a ;
1360 }
1361 
1362 /**
1363 QEvt::gatherComponent_
1364 -------------------------
1365 
1366 Gather downloads from device, get accesses from host
1367 
1368 **/
1369 
1370 NP* QEvt::gatherComponent_(unsigned cmp) const
1371 {
1372     NP* a = nullptr ;
1373     switch(cmp)
1374     {
1375         case SCOMP_INPHOTON:      a = getInputPhoton()          ; break ;
1376         case SCOMP_PHOTON:        a = gatherPhoton()            ; break ;
1377         case SCOMP_PHOTONLITE:    a = gatherPhotonLite()        ; break ;
1378         case SCOMP_HIT:           a = gatherHit()               ; break ;
1379         case SCOMP_HITLITE:       a = gatherHitLite()           ; break ;
1380         case SCOMP_HITLITEMERGED: a = gatherHitLiteMerged()     ; break ;
1381         case SCOMP_HITMERGED:     a = gatherHitMerged()         ; break ;
1382 #ifndef PRODUCTION
1383         case SCOMP_DOMAIN:    a = gatherDomain()      ; break ;
1384         case SCOMP_RECORD:    a = gatherRecord()   ; break ;
1385         case SCOMP_REC:       a = gatherRec()      ; break ;
1386         case SCOMP_SEQ:       a = gatherSeq()      ; break ;
1387         case SCOMP_PRD:       a = gatherPrd()      ; break ;
1388         case SCOMP_SEED:      a = gatherSeed()     ; break ;
1389         case SCOMP_SIMTRACE:  a = gatherSimtrace() ; break ;
1390         case SCOMP_TAG:       a = gatherTag()      ; break ;
1391         case SCOMP_FLAT:      a = gatherFlat()     ; break ;
1392         case SCOMP_GENSTEP:   a = gatherGenstepFromDevice() ; break ;
1393 #else
1394         case SCOMP_GENSTEP:   a = getGenstep()     ; break ;
1395 #endif
1396     }
1397     return a ;
1398 }
1399 
1400 
1401 
1402 /**
1403 QEvt::setNumPhoton
1404 ---------------------
1405 
1406 At the first call when evt.photon is nullptr allocation on device is done.
1407 
1408 Canonically invoked internally from QEvt::setGenstep but may be invoked
1409 directly from "friendly" photon only tests without use of gensteps.
1410 
1411 1. Sets evt->num_photon which asserts that is within allowed *evt->max_photon*
1412 2. allocates buffers for all configured arrays (how heavy depends on configured array sizes)
1413 3. calls *uploadEvt* (lightweight, just counts and pointers)
1414 
1415 This assumes that the number of photons for subsequent launches does not increase
1416 when collecting records : that is ok as running with records is regarded as debugging.
1417 
1418 **/
1419 
1420 void QEvt::setNumPhoton(size_t num_photon )
1421 {
1422     LOG_IF(info, LIFECYCLE) << " num_photon " << num_photon ;
1423     LOG(LEVEL);
1424 
1425     sev->setNumPhoton(num_photon);
1426 
1427     bool noalloc = evt->no_photon_or_photonlite_alloc();
1428     if(noalloc) device_alloc_photon();
1429 
1430     uploadEvt();
1431 }
1432 
1433 
1434 void QEvt::setNumSimtrace(size_t num_simtrace)
1435 {
1436     sev->setNumSimtrace(num_simtrace);
1437     if( evt->simtrace == nullptr ) device_alloc_simtrace();
1438     uploadEvt();
1439 }
1440 
1441 
1442 
1443 
1444 /**
1445 QEvt::device_alloc_photon
1446 ----------------------------
1447 
1448 Buffers are allocated on device and the device pointers are collected
1449 into hostside sevent.h "evt"
1450 
1451 Q: With multi-launch within an event running, is this called for every launch ?
1452 A: NO IT SHOULD NOT (WOULD BE A LEAK IF IT DID)
1453    Although QEvt::setNumPhoton is necessarily called for every launch by QEvt::setGenstepUpload
1454    the noalloc check means that this only gets called the first time which allocs with
1455    the size of arrays based on max_slot (the maximum possible number of photons in one launch)
1456    allowing reuse of the same buffers.
1457 
1458 ::
1459 
1460     BP=QEvt::device_alloc_photon cxs_min.sh
1461 
1462     (gdb) bt
1463     #0  QEvt::device_alloc_photon (this=0x186569c0) at /home/blyth/opticks/qudarap/QEvt.cc:1370
1464     #1  0x00007ffff5ec1a37 in QEvt::setNumPhoton (this=0x186569c0, num_photon=257899584) at /home/blyth/opticks/qudarap/QEvt.cc:1337
1465     #2  0x00007ffff5ebb428 in QEvt::setGenstepUpload (this=0x186569c0, qq0=0x1dba8700, gs_start=0, gs_stop=16) at /home/blyth/opticks/qudarap/QEvt.cc:426
1466     #3  0x00007ffff5eb9efa in QEvt::setGenstepUpload_NP (this=0x186569c0, gs_=0x1e6a8780, gss_=0x1e6262e0) at /home/blyth/opticks/qudarap/QEvt.cc:225
1467     #4  0x00007ffff5e716c9 in QSim::simulate (this=0x18656a20, eventID=0, reset_=true) at /home/blyth/opticks/qudarap/QSim.cc:481
1468     #5  0x00007ffff7e35b54 in CSGOptiX::simulate (this=0x1866b3e0, eventID=0, reset=true) at /home/blyth/opticks/CSGOptiX/CSGOptiX.cc:777
1469     #6  0x00007ffff7e3255c in CSGOptiX::SimulateMain () at /home/blyth/opticks/CSGOptiX/CSGOptiX.cc:177
1470     #7  0x0000000000404a95 in main (argc=1, argv=0x7fffffffaf28) at /home/blyth/opticks/CSGOptiX/tests/CSGOptiXSMTest.cc:13
1471     (gdb)
1472 
1473 
1474 Q: Are these buffers ever dealloc ?
1475 A: NO, dont think so : same buffers reused for all launches of all events so
1476    they get "cleaned up" when the CUDA context is reclaimed
1477 
1478 **/
1479 
1480 void QEvt::device_alloc_photon()
1481 {
1482     LOG_IF(info, LIFECYCLE) ;
1483     SetAllocMeta( QU::alloc, evt );   // do this first as memory errors likely to happen in following lines
1484 
1485     bool with_photon     = evt->with_photon();
1486     bool with_photonlite = evt->with_photonlite();
1487     bool noalloc = evt->no_photon_or_photonlite_alloc();
1488 
1489 
1490     LOG(LEVEL)
1491         << " evt.max_slot   " << evt->max_slot
1492         << " evt.max_record " << evt->max_record
1493         << " evt.max_photon " << evt->max_photon
1494         << " evt.num_photon " << evt->num_photon
1495 #ifndef PRODUCTION
1496         << " evt.num_record " << evt->num_record
1497         << " evt.num_rec    " << evt->num_rec
1498         << " evt.num_seq    " << evt->num_seq
1499         << " evt.num_prd    " << evt->num_prd
1500         << " evt.num_tag    " << evt->num_tag
1501         << " evt.num_flat   " << evt->num_flat
1502         << " evt.with_photon " << evt->with_photon()
1503         << " evt.with_photonlite " << evt->with_photonlite()
1504         << " evt.noalloc " << noalloc
1505 #endif
1506         ;
1507 
1508     assert( noalloc );
1509 
1510     evt->photon     = with_photon     ? QU::device_alloc_zero<sphoton>(     evt->max_slot, "QEvt::device_alloc_photon/max_slot*sizeof(sphoton)"     ) : nullptr ;
1511     evt->photonlite = with_photonlite ? QU::device_alloc_zero<sphotonlite>( evt->max_slot, "QEvt::device_alloc_photon/max_slot*sizeof(sphotonlite)" ) : nullptr ;
1512 
1513 #ifndef PRODUCTION
1514     evt->record  = evt->max_record > 0 ? QU::device_alloc_zero<sphoton>( evt->max_slot * evt->max_record, "max_slot*max_record*sizeof(sphoton)" ) : nullptr ;
1515     evt->rec     = evt->max_rec    > 0 ? QU::device_alloc_zero<srec>(    evt->max_slot * evt->max_rec   , "max_slot*max_rec*sizeof(srec)"    ) : nullptr ;
1516     evt->prd     = evt->max_prd    > 0 ? QU::device_alloc_zero<quad2>(   evt->max_slot * evt->max_prd   , "max_slot*max_prd*sizeof(quad2)"    ) : nullptr ;
1517     evt->seq     = evt->max_seq   == 1 ? QU::device_alloc_zero<sseq>(    evt->max_slot                  , "max_slot*sizeof(sseq)"    ) : nullptr ;
1518     evt->tag     = evt->max_tag   == 1 ? QU::device_alloc_zero<stag>(    evt->max_slot                  , "max_slot*sizeof(stag)"    ) : nullptr ;
1519     evt->flat    = evt->max_flat  == 1 ? QU::device_alloc_zero<sflat>(   evt->max_slot                  , "max_slot*sizeof(sflat)"   ) : nullptr ;
1520 #endif
1521 
1522     LOG(LEVEL) << desc() ;
1523     LOG(LEVEL) << desc_alloc() ;
1524 }
1525 
1526 
1527 
1528 /**
1529 QEvt::SetAllocMeta
1530 ---------------------
1531 
1532 Collect metadata from sevent.h into salloc.h
1533 
1534 **/
1535 
1536 
1537 void QEvt::SetAllocMeta(salloc* alloc, const sevent* evt)  // static
1538 {
1539     if(!alloc) return ;
1540     if(!evt) return ;
1541     evt->get_meta(alloc->meta);
1542 }
1543 
1544 
1545 void QEvt::device_alloc_simtrace()
1546 {
1547     LOG_IF(info, LIFECYCLE) ;
1548     evt->simtrace = QU::device_alloc<quad4>( evt->max_slot, "QEvt::device_alloc_simtrace/max_slot" ) ;
1549     LOG(LEVEL)
1550         << " evt.num_simtrace " << evt->num_simtrace
1551         << " evt.max_simtrace " << evt->max_simtrace
1552         ;
1553 }
1554 
1555 
1556 /**
1557 QEvt::uploadEvt
1558 --------------------
1559 
1560 Uploads lightweight sevent.h instance with counters and pointers for the array.
1561 
1562 Copies host side sevent.h *evt* instance (with updated num_genstep and num_photon) to device side  *d_evt*.
1563 Note that the evt->genstep and evt->photon pointers are not updated, so the same buffers are reused for each launch.
1564 
1565 **/
1566 
1567 void QEvt::uploadEvt()
1568 {
1569     LOG_IF(info, LIFECYCLE) ;
1570     LOG(LEVEL) << std::endl << evt->desc() ;
1571     QU::copy_host_to_device<sevent>(d_evt, evt, 1 );
1572 }
1573 
1574 size_t QEvt::getNumPhoton() const
1575 {
1576     return evt->num_photon ;
1577 }
1578 size_t QEvt::getNumSimtrace() const
1579 {
1580     return evt->num_simtrace ;
1581 }
1582 
1583 
1584 
1585 extern "C" void QEvt_checkEvt(dim3 numBlocks, dim3 threadsPerBlock, sevent* evt, unsigned width, unsigned height ) ;
1586 
1587 void QEvt::checkEvt()
1588 {
1589     size_t width = getNumPhoton() ;
1590     size_t height = 1 ;
1591     LOG(info) << " width " << width << " height " << height ;
1592 
1593     dim3 numBlocks ;
1594     dim3 threadsPerBlock ;
1595     QU::ConfigureLaunch( numBlocks, threadsPerBlock, width, height );
1596 
1597     assert( d_evt );
1598     QEvt_checkEvt(numBlocks, threadsPerBlock, d_evt, width, height );
1599 }
1600 
1601