Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:09

0001 /**
0002 QPMT.cc
0003 ==========
0004 
0005 QPMT::init
0006 QPMT::init_thickness
0007 QPMT::init_lcqs
0008     prep hostside qpmt.h instance and upload to device at d_pmt
0009 
0010 QPMT::pmtcat_check
0011     check domain and lookup shape consistency
0012 
0013 QPMT::pmtcat_scan
0014    interface in .cc to kernel launcher QPMT_pmtcat_scan in .cu
0015 
0016 QPMT::mct_lpmtid_
0017    interface in .cc to kernel launcher QPMT_mct_lpmtid in .cu
0018 
0019 
0020 **/
0021 
0022 #include "SLOG.hh"
0023 
0024 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0025 #else
0026 #include <cuda_runtime.h>
0027 #endif
0028 
0029 #include <csignal>
0030 #include <vector_types.h>
0031 #include "QPMT.hh"
0032 
0033 
0034 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0035 #include "QPMT_MOCK.h"
0036 
0037 template<typename T>
0038 const plog::Severity QPMT<T>::LEVEL = plog::info ;
0039 #else
0040 template<typename T>
0041 const plog::Severity QPMT<T>::LEVEL = SLOG::EnvLevel("QPMT", "DEBUG");
0042 #endif
0043 
0044 template<typename T>
0045 const QPMT<T>* QPMT<T>::INSTANCE = nullptr ;
0046 
0047 template<typename T>
0048 const QPMT<T>* QPMT<T>::Get(){ return INSTANCE ;  }
0049 
0050 
0051 
0052 template<typename T>
0053 inline std::string QPMT<T>::Desc() // static
0054 {
0055     std::stringstream ss ;
0056     ss << "QPMT<" << ( sizeof(T) == 4 ? "float" : "double" ) << "> " ;
0057 #ifdef WITH_CUSTOM4
0058     ss << "WITH_CUSTOM4 " ;
0059 #else
0060     ss << "NOT:WITH_CUSTOM4 " ;
0061 #endif
0062     ss << " INSTANCE:" << ( INSTANCE ? "YES" : "NO " ) << " " ;
0063     if(INSTANCE) ss << INSTANCE->desc() ;
0064     std::string str = ss.str();
0065     return str ;
0066 }
0067 
0068 /**
0069 QPMT::init
0070 ------------
0071 
0072 1. populate hostside qpmt.h instance with device side pointers
0073 2. upload the hostside qpmt.h instance to GPU
0074 3. retain d_pmt pointer to use in launches
0075 
0076 **/
0077 
0078 template<typename T>
0079 inline void QPMT<T>::init()
0080 {
0081     LOG(LEVEL) << "[" ;
0082 
0083     INSTANCE = this ;
0084 
0085     const int ni = s_pmt::NUM_CAT ;   // 3:NNVT/HAMA/NNVT_HiQE
0086     const int nj = s_pmt::NUM_LAYR ;  // 4:Pyrex/ARC/PHC/Vacuum
0087     const int nk = s_pmt::NUM_PROP ;  // 2:RINDEX/KINDEX
0088                                     // -----------------
0089                                     // 3*4*2 = 24
0090 
0091     bool src_rindex_expect = src_rindex->has_shape(ni, nj, nk, -1, 2 ) ;
0092     bool src_thickness_expect = src_thickness->has_shape(ni, nj, 1 ) ;
0093 
0094     assert( src_rindex_expect);
0095     assert( src_thickness_expect );
0096 
0097     if(!src_rindex_expect) std::raise(SIGINT);
0098     if(!src_thickness_expect) std::raise(SIGINT);
0099 
0100     // NB everything here valid across all photons : nothing needs to be per photon idx
0101     init_prop();
0102     init_thickness();
0103     init_lcqs();
0104     init_s_qescale();
0105 
0106 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0107     d_pmt = pmt ;
0108 #else
0109     d_pmt = QU::UploadArray<qpmt<T>>( (const qpmt<T>*)pmt, 1u, "QPMT::init/d_pmt" ) ;
0110     // getting above line to link required template instanciation at tail of qpmt.h
0111 #endif
0112     LOG(LEVEL) << "]" ;
0113 }
0114 
0115 
0116 
0117 template<typename T>
0118 inline void QPMT<T>::init_prop()
0119 {
0120     pmt->rindex_prop = rindex_prop->getDevicePtr() ;
0121     pmt->qeshape_prop = qeshape_prop->getDevicePtr() ;
0122     pmt->cetheta_prop = cetheta_prop->getDevicePtr() ;
0123     pmt->cecosth_prop = cecosth_prop->getDevicePtr() ;
0124 
0125     pmt->s_qeshape_prop = s_qeshape_prop->getDevicePtr() ;
0126 }
0127 
0128 
0129 template<typename T>
0130 inline void QPMT<T>::init_thickness()
0131 {
0132     const char* label = "QPMT::init_thickness/d_thickness" ;
0133 
0134 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0135     T* d_thickness = const_cast<T*>(thickness->cvalues<T>()) ;
0136 #else
0137     T* d_thickness = QU::UploadArray<T>(thickness->cvalues<T>(), thickness->num_values(), label ); ;
0138 #endif
0139 
0140     pmt->thickness = d_thickness ;
0141 }
0142 
0143 template<typename T>
0144 inline void QPMT<T>::init_lcqs()
0145 {
0146     LOG(LEVEL)
0147        << " src_lcqs " << ( src_lcqs ? src_lcqs->sstr() : "-" )
0148        << " lcqs " << ( lcqs ? lcqs->sstr() : "-" )
0149        ;
0150 
0151     const char* label = "QPMT::init_lcqs/d_lcqs" ;
0152 
0153 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0154     T* d_lcqs = lcqs ? const_cast<T*>(lcqs->cvalues<T>()) : nullptr ;
0155 #else
0156     T* d_lcqs = lcqs ? QU::UploadArray<T>(lcqs->cvalues<T>(), lcqs->num_values(), label) : nullptr ;
0157 #endif
0158 
0159     pmt->lcqs = d_lcqs ;
0160     pmt->i_lcqs = (int*)d_lcqs ;   // HMM: would cause issues with T=double
0161 }
0162 
0163 
0164 template<typename T>
0165 inline void QPMT<T>::init_s_qescale()
0166 {
0167     LOG(LEVEL)
0168        << " src_s_qescale " << ( src_s_qescale ? src_s_qescale->sstr() : "-" )
0169        << " s_qescale " << ( s_qescale ? s_qescale->sstr() : "-" )
0170        ;
0171 
0172     const char* label = "QPMT::init_s_qescale/d_s_qescale" ;
0173 
0174 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0175     T* d_s_qescale = s_qescale ? const_cast<T*>(s_qescale->cvalues<T>()) : nullptr ;
0176 #else
0177     T* d_s_qescale = s_qescale ? QU::UploadArray<T>(s_qescale->cvalues<T>(), s_qescale->num_values(), label) : nullptr ;
0178 #endif
0179 
0180     pmt->s_qescale = d_s_qescale ;
0181 }
0182 
0183 
0184 
0185 
0186 
0187 
0188 
0189 /**
0190 NB these decls cannot be extern "C" as need C++ name mangling for template types
0191 **/
0192 
0193 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0194 
0195 template <typename F>
0196 extern void QPMT_pmtcat_MOCK(
0197     qpmt<F>* pmt,
0198     int etype,
0199     F* lookup,
0200     const F* domain,
0201     unsigned domain_width
0202 );
0203 
0204 template <typename T>
0205 extern void QPMT_mct_lpmtid_MOCK(
0206     qpmt<T>* pmt,
0207     int etype,
0208     T* lookup,
0209     const T* domain,
0210     unsigned domain_width,
0211     const int* lpmtid,
0212     unsigned num_lpmtid
0213 );
0214 
0215 
0216 #else
0217 template <typename T>
0218 extern void QPMT_pmtcat_scan(
0219     dim3 numBlocks,
0220     dim3 threadsPerBlock,
0221     qpmt<T>* pmt,
0222     int etype,
0223     T* lookup,
0224     const T* domain,
0225     unsigned domain_width
0226 );
0227 
0228 template <typename T>
0229 extern void QPMT_mct_lpmtid_scan(
0230     dim3 numBlocks,
0231     dim3 threadsPerBlock,
0232     qpmt<T>* pmt,
0233     int etype,
0234     T* lookup,
0235     const T* domain,
0236     unsigned domain_width,
0237     const int* lpmtid,
0238     unsigned num_lpmtid
0239 );
0240 
0241 template <typename T>
0242 extern void QPMT_spmtid_scan(
0243     dim3 numBlocks,
0244     dim3 threadsPerBlock,
0245     qpmt<T>* pmt,
0246     int etype,
0247     T* lookup,
0248     const int* spmtid,
0249     unsigned num_spmtid
0250 );
0251 
0252 #endif
0253 
0254 
0255 
0256 
0257 template<typename T>
0258 void QPMT<T>::pmtcat_check_domain_lookup_shape( int etype, const NP* domain, const NP* lookup) const
0259 {
0260     const char* elabel = qpmt_enum::Label(etype) ;
0261     bool domain_expect = domain->shape.size() == 1 && domain->shape[0] > 0 ;
0262 
0263     LOG_IF(fatal, !domain_expect)
0264         << " etype " << etype
0265         << " elabel " << elabel
0266         << " FATAL UNEXPECTED DOMAIN SHAPE "
0267         << " domain.sstr " << ( domain ? domain->sstr() : "-" )
0268         ;
0269 
0270     assert( domain_expect );
0271     if(!domain_expect) std::raise(SIGINT);
0272 
0273     unsigned num_domain = domain->shape[0] ;
0274     unsigned num_domain_1 = 0 ;
0275 
0276     switch(etype)
0277     {
0278         case qpmt_RINDEX : num_domain_1 = lookup->shape[lookup->shape.size()-1] ; break ;
0279         case qpmt_QESHAPE: num_domain_1 = lookup->shape[lookup->shape.size()-1] ; break ;
0280         case qpmt_S_QESHAPE: num_domain_1 = lookup->shape[lookup->shape.size()-1] ; break ;
0281         case qpmt_CETHETA: num_domain_1 = lookup->shape[lookup->shape.size()-1] ; break ;
0282         case qpmt_CECOSTH: num_domain_1 = lookup->shape[lookup->shape.size()-1] ; break ;
0283         case qpmt_CATSPEC: num_domain_1 = lookup->shape[lookup->shape.size()-3] ; break ; // (4,4) payload
0284     }
0285 
0286     bool num_domain_expect = num_domain == num_domain_1 ;
0287 
0288     LOG_IF(fatal, !num_domain_expect)
0289         << " etype " << etype
0290         << " elabel " << elabel
0291         << " FATAL DOMAIN MISMATCH "
0292         << " num_domain " << num_domain
0293         << " num_domain_1 " << num_domain_1
0294         ;
0295 
0296     assert( num_domain_expect );
0297     if(!num_domain_expect) std::raise(SIGINT) ;
0298 
0299 }
0300 
0301 template<typename T>
0302 const T* QPMT<T>::Upload(const NP* arr, const char* label)
0303 {
0304     unsigned num_arr = arr->shape[0] ;
0305 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0306     const T* d_arr = arr->cvalues<T>() ;
0307 #else
0308     const T* d_arr = QU::UploadArray<T>( arr->cvalues<T>(), num_arr, label ) ;
0309 #endif
0310     return d_arr ;
0311 }
0312 
0313 
0314 template<typename T>
0315 T* QPMT<T>::Alloc(NP* out, const char* label)
0316 {
0317     unsigned num_values = out->num_values() ;
0318 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0319     T* d_arr = out->values<T> ;
0320 #else
0321     T* d_arr = QU::device_alloc<T>(num_values,label) ;
0322 #endif
0323     return d_arr ;
0324 }
0325 
0326 
0327 
0328 /**
0329 QPMT::pmtcat_scan
0330 -------------------
0331 
0332 Canonical usage from QPMTTest.h make_qscan
0333 
0334 1. create hostside lookup array for the output
0335 2. upload domain array to d_domain
0336 3. allocate lookup array at d_lookup
0337 4. invoke QPMT_lpmtcat launch using d_pmt pointer argument
0338 5. copy d_lookup to h_lookup
0339 
0340 The shape of the lookup output depends on the etype.
0341 For some etype the lookup contains energy_eV scans for all pmt cat (3),
0342 layers (4) and props (2) (RINDEX, KINDEX) resulting in
0343 lookup shape of (3,4,2, domain_width ).
0344 
0345 The lookup are populated with nested loops (eg 3*4*2=24 props) in the kernel
0346 with the energy domain passed in as input. Parallelism is over the energy.
0347 
0348 **/
0349 
0350 template<typename T>
0351 NP* QPMT<T>::pmtcat_scan(int etype, const NP* domain ) const
0352 {
0353     const char* elabel = qpmt_enum::Label(etype) ;
0354 
0355     unsigned num_domain = domain->shape[0] ;
0356     NP* lookup = MakeArray_pmtcat(etype, num_domain );
0357     pmtcat_check_domain_lookup_shape(etype, domain, lookup) ;
0358     unsigned lookup_num_values = lookup->num_values() ;
0359 
0360     const T* d_domain = Upload(domain, "QPMT::pmtcat_scan/d_domain" );
0361 
0362     LOG(LEVEL)
0363         << " etype " << etype
0364         << " elabel " << elabel
0365         << " domain " << domain->sstr()
0366         << " num_domain " << num_domain
0367         << " lookup " << lookup->sstr()
0368         << " lookup_num_values " << lookup_num_values
0369         ;
0370 
0371     T* h_lookup = lookup->values<T>() ;
0372 
0373     T* d_lookup = Alloc(lookup, "QPMT<T>::pmtcat_scan/d_lookup" );
0374 
0375 
0376 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0377     QPMT_pmtcat_MOCK( d_pmt, etype, d_lookup, d_domain, num_domain );
0378 #else
0379     dim3 numBlocks ;
0380     dim3 threadsPerBlock ;
0381     QU::ConfigureLaunch1D( numBlocks, threadsPerBlock, num_domain, 512u );
0382 
0383     QPMT_pmtcat_scan(numBlocks, threadsPerBlock, d_pmt, etype, d_lookup, d_domain, num_domain );
0384 
0385     QU::copy_device_to_host_and_free<T>( h_lookup, d_lookup, lookup_num_values, "QPMT::pmtcat_scan/cd2haf" );
0386     cudaDeviceSynchronize();
0387 #endif
0388 
0389     return lookup ;
0390 }
0391 
0392 
0393 
0394 
0395 
0396 
0397 /**
0398 QPMT::mct_lpmtid_scan
0399 ----------------------
0400 
0401 Canonical usage from QPMTTest.h make_qscan
0402 
0403 
0404 mct means minus_cos_theta for an AOI scan
0405 
0406 1. create lookup output array with shape depending on etype
0407 2. allocate d_lookup on device
0408 3. upload domain to d_domain
0409 4. upload lpmtid to d_lpmtid
0410 5. invoke launch QPMT_mct_lpmtid
0411 6. download d_lookup to h_lookup
0412 
0413 
0414 Q: Can dependency on CUSTOM4 be avoided ?
0415 A: NO, as using multilayer stack calc so need the
0416    header with the TMM calc from CUSTOM4.
0417    Dont want to duplicate that header.
0418 
0419 **/
0420 
0421 template<typename T>
0422 NP* QPMT<T>::mct_lpmtid_scan(int etype, const NP* domain, const NP* lpmtid ) const
0423 {
0424     const char* elabel = qpmt_enum::Label(etype) ;
0425     unsigned num_domain = domain->shape[0] ;
0426     unsigned num_lpmtid = lpmtid->shape[0] ;
0427 
0428     NP* lookup = MakeArray_pmtid(etype, num_domain, num_lpmtid );
0429     LOG_IF(fatal, lookup == nullptr)
0430           << " etype " << etype
0431           << " elabel " << elabel
0432           << " FATAL no lookup "
0433           ;
0434 
0435     assert(lookup);
0436     if(!lookup) std::raise(SIGINT);
0437 
0438     unsigned num_lookup = lookup->num_values() ;
0439 
0440 
0441     if( etype == qpmt_ART )
0442     {
0443         std::vector<std::pair<std::string, std::string>> kvs =
0444         {
0445             { "title", "QPMT.title" },
0446             { "brief", "QPMT.brief" },
0447             { "name",  "QPMT.name"  },
0448             { "label", "QPMT.label" },
0449             { "ExecutableName", ExecutableName }
0450         };
0451         lookup->set_meta_kv<std::string>(kvs);
0452     }
0453 
0454 
0455     LOG(LEVEL)
0456         << " etype " << etype
0457         << " elabel " << elabel
0458         << " domain " << domain->sstr()
0459         << " lpmtid " << lpmtid->sstr()
0460         << " num_domain " << num_domain
0461         << " num_lpmtid " << num_lpmtid
0462         << " lookup " << lookup->sstr()
0463         << " num_lookup " << num_lookup
0464         ;
0465 
0466 
0467 
0468 #ifdef WITH_CUSTOM4
0469     T* h_lookup = lookup->values<T>() ;
0470 
0471 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0472     T* d_lookup = h_lookup ;
0473 #else
0474     T* d_lookup = QU::device_alloc<T>(num_lookup,"QPMT::mct_lpmtid_scan/d_lookup") ;
0475 #endif
0476 
0477     assert( lpmtid->uifc == 'i' && lpmtid->ebyte == 4 );
0478 
0479 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0480     const T*   d_domain = domain->cvalues<T>() ;
0481     const int* d_lpmtid = lpmtid->cvalues<int>() ;
0482 #else
0483     const char* label_0 = "QPMT::mct_lpmtid_scan/d_domain" ;
0484     const char* label_1 = "QPMT::mct_lpmtid_scan/d_lpmtid" ;
0485 
0486     const T*   d_domain = QU::UploadArray<T>(   domain->cvalues<T>(),   num_domain, label_0) ;
0487     const int* d_lpmtid = QU::UploadArray<int>( lpmtid->cvalues<int>(), num_lpmtid, label_1) ;
0488 #endif
0489 
0490 
0491 
0492 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0493     QPMT_mct_lpmtid_MOCK( d_pmt, etype, d_lookup, d_domain, num_domain, d_lpmtid, num_lpmtid );
0494 #else
0495 
0496     dim3 numBlocks ;
0497     dim3 threadsPerBlock ;
0498     QU::ConfigureLaunch1D( numBlocks, threadsPerBlock, num_domain, 512u );
0499 
0500     QPMT_mct_lpmtid_scan(
0501         numBlocks,
0502         threadsPerBlock,
0503         d_pmt,
0504         etype,
0505         d_lookup,
0506         d_domain,
0507         num_domain,
0508         d_lpmtid,
0509         num_lpmtid );
0510 
0511     cudaDeviceSynchronize();
0512 
0513     const char* label = "QPMT::mct_lpmtid_scan" ;
0514     QU::copy_device_to_host_and_free<T>( h_lookup, d_lookup, num_lookup, label );
0515     cudaDeviceSynchronize();
0516 #endif
0517 
0518 #else
0519     LOG(fatal) << " QPMT::mct_lpmtid_scan requires compilation WITH_CUSTOM4 " ;
0520     assert(0) ;
0521 #endif
0522 
0523     return lookup ;
0524 }
0525 
0526 
0527 
0528 /**
0529 QPMT::spmtid_scan
0530 ----------------------
0531 
0532 Canonical usage from QPMTTest.h make_qscan
0533 
0534 **/
0535 
0536 
0537 template<typename T>
0538 NP* QPMT<T>::spmtid_scan(int etype, const NP* spmtid ) const
0539 {
0540     const char* elabel = qpmt_enum::Label(etype) ;
0541     unsigned num_spmtid = spmtid->shape[0] ;
0542     const int* d_spmtid = QU::UploadArray<int>( spmtid->cvalues<int>(), num_spmtid, "QPMT::spmtid_scan/d_spmtid") ;
0543 
0544     unsigned num_domain = -1 ; // not used
0545     NP* lookup = MakeArray_pmtid(etype, num_domain, num_spmtid );
0546     LOG_IF(fatal, lookup == nullptr)
0547           << " etype " << etype
0548           << " elabel " << elabel
0549           << " FATAL no lookup "
0550           ;
0551 
0552     assert(lookup);
0553     if(!lookup) std::raise(SIGINT);
0554 
0555     unsigned num_lookup = lookup->num_values() ;
0556     T* h_lookup = lookup->values<T>() ;
0557     T* d_lookup = QU::device_alloc<T>(num_lookup,"QPMT::spmtid_scan/d_lookup") ;
0558 
0559     dim3 numBlocks ;
0560     dim3 threadsPerBlock ;
0561     QU::ConfigureLaunch1D( numBlocks, threadsPerBlock, num_spmtid, 512u );
0562 
0563     QPMT_spmtid_scan(
0564         numBlocks,
0565         threadsPerBlock,
0566         d_pmt,
0567         etype,
0568         d_lookup,
0569         d_spmtid,
0570         num_spmtid );
0571 
0572     cudaDeviceSynchronize();
0573 
0574     QU::copy_device_to_host_and_free<T>( h_lookup, d_lookup, num_lookup, "QPMT::spmtid_scan/cdthaf" );
0575     cudaDeviceSynchronize();
0576 
0577     return lookup ;
0578 }
0579 
0580 
0581 
0582 
0583 
0584 // found the below can live in header, when headeronly
0585 //#pragma GCC diagnostic push
0586 //#pragma GCC diagnostic ignored "-Wattributes"
0587 // quell warning: type attributes ignored after type is already defined [-Wattributes]
0588 template struct QUDARAP_API QPMT<float>;
0589 //template struct QUDARAP_API QPMT<double>;
0590 //#pragma GCC diagnostic pop
0591