File indexing completed on 2026-04-09 07:49:09
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022 #include "SLOG.hh"
0023
0024 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0025 #else
0026 #include <cuda_runtime.h>
0027 #endif
0028
0029 #include <csignal>
0030 #include <vector_types.h>
0031 #include "QPMT.hh"
0032
0033
0034 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0035 #include "QPMT_MOCK.h"
0036
0037 template<typename T>
0038 const plog::Severity QPMT<T>::LEVEL = plog::info ;
0039 #else
0040 template<typename T>
0041 const plog::Severity QPMT<T>::LEVEL = SLOG::EnvLevel("QPMT", "DEBUG");
0042 #endif
0043
0044 template<typename T>
0045 const QPMT<T>* QPMT<T>::INSTANCE = nullptr ;
0046
0047 template<typename T>
0048 const QPMT<T>* QPMT<T>::Get(){ return INSTANCE ; }
0049
0050
0051
0052 template<typename T>
0053 inline std::string QPMT<T>::Desc()
0054 {
0055 std::stringstream ss ;
0056 ss << "QPMT<" << ( sizeof(T) == 4 ? "float" : "double" ) << "> " ;
0057 #ifdef WITH_CUSTOM4
0058 ss << "WITH_CUSTOM4 " ;
0059 #else
0060 ss << "NOT:WITH_CUSTOM4 " ;
0061 #endif
0062 ss << " INSTANCE:" << ( INSTANCE ? "YES" : "NO " ) << " " ;
0063 if(INSTANCE) ss << INSTANCE->desc() ;
0064 std::string str = ss.str();
0065 return str ;
0066 }
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078 template<typename T>
0079 inline void QPMT<T>::init()
0080 {
0081 LOG(LEVEL) << "[" ;
0082
0083 INSTANCE = this ;
0084
0085 const int ni = s_pmt::NUM_CAT ;
0086 const int nj = s_pmt::NUM_LAYR ;
0087 const int nk = s_pmt::NUM_PROP ;
0088
0089
0090
0091 bool src_rindex_expect = src_rindex->has_shape(ni, nj, nk, -1, 2 ) ;
0092 bool src_thickness_expect = src_thickness->has_shape(ni, nj, 1 ) ;
0093
0094 assert( src_rindex_expect);
0095 assert( src_thickness_expect );
0096
0097 if(!src_rindex_expect) std::raise(SIGINT);
0098 if(!src_thickness_expect) std::raise(SIGINT);
0099
0100
0101 init_prop();
0102 init_thickness();
0103 init_lcqs();
0104 init_s_qescale();
0105
0106 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0107 d_pmt = pmt ;
0108 #else
0109 d_pmt = QU::UploadArray<qpmt<T>>( (const qpmt<T>*)pmt, 1u, "QPMT::init/d_pmt" ) ;
0110
0111 #endif
0112 LOG(LEVEL) << "]" ;
0113 }
0114
0115
0116
0117 template<typename T>
0118 inline void QPMT<T>::init_prop()
0119 {
0120 pmt->rindex_prop = rindex_prop->getDevicePtr() ;
0121 pmt->qeshape_prop = qeshape_prop->getDevicePtr() ;
0122 pmt->cetheta_prop = cetheta_prop->getDevicePtr() ;
0123 pmt->cecosth_prop = cecosth_prop->getDevicePtr() ;
0124
0125 pmt->s_qeshape_prop = s_qeshape_prop->getDevicePtr() ;
0126 }
0127
0128
0129 template<typename T>
0130 inline void QPMT<T>::init_thickness()
0131 {
0132 const char* label = "QPMT::init_thickness/d_thickness" ;
0133
0134 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0135 T* d_thickness = const_cast<T*>(thickness->cvalues<T>()) ;
0136 #else
0137 T* d_thickness = QU::UploadArray<T>(thickness->cvalues<T>(), thickness->num_values(), label ); ;
0138 #endif
0139
0140 pmt->thickness = d_thickness ;
0141 }
0142
0143 template<typename T>
0144 inline void QPMT<T>::init_lcqs()
0145 {
0146 LOG(LEVEL)
0147 << " src_lcqs " << ( src_lcqs ? src_lcqs->sstr() : "-" )
0148 << " lcqs " << ( lcqs ? lcqs->sstr() : "-" )
0149 ;
0150
0151 const char* label = "QPMT::init_lcqs/d_lcqs" ;
0152
0153 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0154 T* d_lcqs = lcqs ? const_cast<T*>(lcqs->cvalues<T>()) : nullptr ;
0155 #else
0156 T* d_lcqs = lcqs ? QU::UploadArray<T>(lcqs->cvalues<T>(), lcqs->num_values(), label) : nullptr ;
0157 #endif
0158
0159 pmt->lcqs = d_lcqs ;
0160 pmt->i_lcqs = (int*)d_lcqs ;
0161 }
0162
0163
0164 template<typename T>
0165 inline void QPMT<T>::init_s_qescale()
0166 {
0167 LOG(LEVEL)
0168 << " src_s_qescale " << ( src_s_qescale ? src_s_qescale->sstr() : "-" )
0169 << " s_qescale " << ( s_qescale ? s_qescale->sstr() : "-" )
0170 ;
0171
0172 const char* label = "QPMT::init_s_qescale/d_s_qescale" ;
0173
0174 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0175 T* d_s_qescale = s_qescale ? const_cast<T*>(s_qescale->cvalues<T>()) : nullptr ;
0176 #else
0177 T* d_s_qescale = s_qescale ? QU::UploadArray<T>(s_qescale->cvalues<T>(), s_qescale->num_values(), label) : nullptr ;
0178 #endif
0179
0180 pmt->s_qescale = d_s_qescale ;
0181 }
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0194
0195 template <typename F>
0196 extern void QPMT_pmtcat_MOCK(
0197 qpmt<F>* pmt,
0198 int etype,
0199 F* lookup,
0200 const F* domain,
0201 unsigned domain_width
0202 );
0203
0204 template <typename T>
0205 extern void QPMT_mct_lpmtid_MOCK(
0206 qpmt<T>* pmt,
0207 int etype,
0208 T* lookup,
0209 const T* domain,
0210 unsigned domain_width,
0211 const int* lpmtid,
0212 unsigned num_lpmtid
0213 );
0214
0215
0216 #else
0217 template <typename T>
0218 extern void QPMT_pmtcat_scan(
0219 dim3 numBlocks,
0220 dim3 threadsPerBlock,
0221 qpmt<T>* pmt,
0222 int etype,
0223 T* lookup,
0224 const T* domain,
0225 unsigned domain_width
0226 );
0227
0228 template <typename T>
0229 extern void QPMT_mct_lpmtid_scan(
0230 dim3 numBlocks,
0231 dim3 threadsPerBlock,
0232 qpmt<T>* pmt,
0233 int etype,
0234 T* lookup,
0235 const T* domain,
0236 unsigned domain_width,
0237 const int* lpmtid,
0238 unsigned num_lpmtid
0239 );
0240
0241 template <typename T>
0242 extern void QPMT_spmtid_scan(
0243 dim3 numBlocks,
0244 dim3 threadsPerBlock,
0245 qpmt<T>* pmt,
0246 int etype,
0247 T* lookup,
0248 const int* spmtid,
0249 unsigned num_spmtid
0250 );
0251
0252 #endif
0253
0254
0255
0256
0257 template<typename T>
0258 void QPMT<T>::pmtcat_check_domain_lookup_shape( int etype, const NP* domain, const NP* lookup) const
0259 {
0260 const char* elabel = qpmt_enum::Label(etype) ;
0261 bool domain_expect = domain->shape.size() == 1 && domain->shape[0] > 0 ;
0262
0263 LOG_IF(fatal, !domain_expect)
0264 << " etype " << etype
0265 << " elabel " << elabel
0266 << " FATAL UNEXPECTED DOMAIN SHAPE "
0267 << " domain.sstr " << ( domain ? domain->sstr() : "-" )
0268 ;
0269
0270 assert( domain_expect );
0271 if(!domain_expect) std::raise(SIGINT);
0272
0273 unsigned num_domain = domain->shape[0] ;
0274 unsigned num_domain_1 = 0 ;
0275
0276 switch(etype)
0277 {
0278 case qpmt_RINDEX : num_domain_1 = lookup->shape[lookup->shape.size()-1] ; break ;
0279 case qpmt_QESHAPE: num_domain_1 = lookup->shape[lookup->shape.size()-1] ; break ;
0280 case qpmt_S_QESHAPE: num_domain_1 = lookup->shape[lookup->shape.size()-1] ; break ;
0281 case qpmt_CETHETA: num_domain_1 = lookup->shape[lookup->shape.size()-1] ; break ;
0282 case qpmt_CECOSTH: num_domain_1 = lookup->shape[lookup->shape.size()-1] ; break ;
0283 case qpmt_CATSPEC: num_domain_1 = lookup->shape[lookup->shape.size()-3] ; break ;
0284 }
0285
0286 bool num_domain_expect = num_domain == num_domain_1 ;
0287
0288 LOG_IF(fatal, !num_domain_expect)
0289 << " etype " << etype
0290 << " elabel " << elabel
0291 << " FATAL DOMAIN MISMATCH "
0292 << " num_domain " << num_domain
0293 << " num_domain_1 " << num_domain_1
0294 ;
0295
0296 assert( num_domain_expect );
0297 if(!num_domain_expect) std::raise(SIGINT) ;
0298
0299 }
0300
0301 template<typename T>
0302 const T* QPMT<T>::Upload(const NP* arr, const char* label)
0303 {
0304 unsigned num_arr = arr->shape[0] ;
0305 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0306 const T* d_arr = arr->cvalues<T>() ;
0307 #else
0308 const T* d_arr = QU::UploadArray<T>( arr->cvalues<T>(), num_arr, label ) ;
0309 #endif
0310 return d_arr ;
0311 }
0312
0313
0314 template<typename T>
0315 T* QPMT<T>::Alloc(NP* out, const char* label)
0316 {
0317 unsigned num_values = out->num_values() ;
0318 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0319 T* d_arr = out->values<T> ;
0320 #else
0321 T* d_arr = QU::device_alloc<T>(num_values,label) ;
0322 #endif
0323 return d_arr ;
0324 }
0325
0326
0327
0328
0329
0330
0331
0332
0333
0334
0335
0336
0337
0338
0339
0340
0341
0342
0343
0344
0345
0346
0347
0348
0349
0350 template<typename T>
0351 NP* QPMT<T>::pmtcat_scan(int etype, const NP* domain ) const
0352 {
0353 const char* elabel = qpmt_enum::Label(etype) ;
0354
0355 unsigned num_domain = domain->shape[0] ;
0356 NP* lookup = MakeArray_pmtcat(etype, num_domain );
0357 pmtcat_check_domain_lookup_shape(etype, domain, lookup) ;
0358 unsigned lookup_num_values = lookup->num_values() ;
0359
0360 const T* d_domain = Upload(domain, "QPMT::pmtcat_scan/d_domain" );
0361
0362 LOG(LEVEL)
0363 << " etype " << etype
0364 << " elabel " << elabel
0365 << " domain " << domain->sstr()
0366 << " num_domain " << num_domain
0367 << " lookup " << lookup->sstr()
0368 << " lookup_num_values " << lookup_num_values
0369 ;
0370
0371 T* h_lookup = lookup->values<T>() ;
0372
0373 T* d_lookup = Alloc(lookup, "QPMT<T>::pmtcat_scan/d_lookup" );
0374
0375
0376 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0377 QPMT_pmtcat_MOCK( d_pmt, etype, d_lookup, d_domain, num_domain );
0378 #else
0379 dim3 numBlocks ;
0380 dim3 threadsPerBlock ;
0381 QU::ConfigureLaunch1D( numBlocks, threadsPerBlock, num_domain, 512u );
0382
0383 QPMT_pmtcat_scan(numBlocks, threadsPerBlock, d_pmt, etype, d_lookup, d_domain, num_domain );
0384
0385 QU::copy_device_to_host_and_free<T>( h_lookup, d_lookup, lookup_num_values, "QPMT::pmtcat_scan/cd2haf" );
0386 cudaDeviceSynchronize();
0387 #endif
0388
0389 return lookup ;
0390 }
0391
0392
0393
0394
0395
0396
0397
0398
0399
0400
0401
0402
0403
0404
0405
0406
0407
0408
0409
0410
0411
0412
0413
0414
0415
0416
0417
0418
0419
0420
0421 template<typename T>
0422 NP* QPMT<T>::mct_lpmtid_scan(int etype, const NP* domain, const NP* lpmtid ) const
0423 {
0424 const char* elabel = qpmt_enum::Label(etype) ;
0425 unsigned num_domain = domain->shape[0] ;
0426 unsigned num_lpmtid = lpmtid->shape[0] ;
0427
0428 NP* lookup = MakeArray_pmtid(etype, num_domain, num_lpmtid );
0429 LOG_IF(fatal, lookup == nullptr)
0430 << " etype " << etype
0431 << " elabel " << elabel
0432 << " FATAL no lookup "
0433 ;
0434
0435 assert(lookup);
0436 if(!lookup) std::raise(SIGINT);
0437
0438 unsigned num_lookup = lookup->num_values() ;
0439
0440
0441 if( etype == qpmt_ART )
0442 {
0443 std::vector<std::pair<std::string, std::string>> kvs =
0444 {
0445 { "title", "QPMT.title" },
0446 { "brief", "QPMT.brief" },
0447 { "name", "QPMT.name" },
0448 { "label", "QPMT.label" },
0449 { "ExecutableName", ExecutableName }
0450 };
0451 lookup->set_meta_kv<std::string>(kvs);
0452 }
0453
0454
0455 LOG(LEVEL)
0456 << " etype " << etype
0457 << " elabel " << elabel
0458 << " domain " << domain->sstr()
0459 << " lpmtid " << lpmtid->sstr()
0460 << " num_domain " << num_domain
0461 << " num_lpmtid " << num_lpmtid
0462 << " lookup " << lookup->sstr()
0463 << " num_lookup " << num_lookup
0464 ;
0465
0466
0467
0468 #ifdef WITH_CUSTOM4
0469 T* h_lookup = lookup->values<T>() ;
0470
0471 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0472 T* d_lookup = h_lookup ;
0473 #else
0474 T* d_lookup = QU::device_alloc<T>(num_lookup,"QPMT::mct_lpmtid_scan/d_lookup") ;
0475 #endif
0476
0477 assert( lpmtid->uifc == 'i' && lpmtid->ebyte == 4 );
0478
0479 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0480 const T* d_domain = domain->cvalues<T>() ;
0481 const int* d_lpmtid = lpmtid->cvalues<int>() ;
0482 #else
0483 const char* label_0 = "QPMT::mct_lpmtid_scan/d_domain" ;
0484 const char* label_1 = "QPMT::mct_lpmtid_scan/d_lpmtid" ;
0485
0486 const T* d_domain = QU::UploadArray<T>( domain->cvalues<T>(), num_domain, label_0) ;
0487 const int* d_lpmtid = QU::UploadArray<int>( lpmtid->cvalues<int>(), num_lpmtid, label_1) ;
0488 #endif
0489
0490
0491
0492 #if defined(MOCK_CURAND) || defined(MOCK_CUDA)
0493 QPMT_mct_lpmtid_MOCK( d_pmt, etype, d_lookup, d_domain, num_domain, d_lpmtid, num_lpmtid );
0494 #else
0495
0496 dim3 numBlocks ;
0497 dim3 threadsPerBlock ;
0498 QU::ConfigureLaunch1D( numBlocks, threadsPerBlock, num_domain, 512u );
0499
0500 QPMT_mct_lpmtid_scan(
0501 numBlocks,
0502 threadsPerBlock,
0503 d_pmt,
0504 etype,
0505 d_lookup,
0506 d_domain,
0507 num_domain,
0508 d_lpmtid,
0509 num_lpmtid );
0510
0511 cudaDeviceSynchronize();
0512
0513 const char* label = "QPMT::mct_lpmtid_scan" ;
0514 QU::copy_device_to_host_and_free<T>( h_lookup, d_lookup, num_lookup, label );
0515 cudaDeviceSynchronize();
0516 #endif
0517
0518 #else
0519 LOG(fatal) << " QPMT::mct_lpmtid_scan requires compilation WITH_CUSTOM4 " ;
0520 assert(0) ;
0521 #endif
0522
0523 return lookup ;
0524 }
0525
0526
0527
0528
0529
0530
0531
0532
0533
0534
0535
0536
0537 template<typename T>
0538 NP* QPMT<T>::spmtid_scan(int etype, const NP* spmtid ) const
0539 {
0540 const char* elabel = qpmt_enum::Label(etype) ;
0541 unsigned num_spmtid = spmtid->shape[0] ;
0542 const int* d_spmtid = QU::UploadArray<int>( spmtid->cvalues<int>(), num_spmtid, "QPMT::spmtid_scan/d_spmtid") ;
0543
0544 unsigned num_domain = -1 ;
0545 NP* lookup = MakeArray_pmtid(etype, num_domain, num_spmtid );
0546 LOG_IF(fatal, lookup == nullptr)
0547 << " etype " << etype
0548 << " elabel " << elabel
0549 << " FATAL no lookup "
0550 ;
0551
0552 assert(lookup);
0553 if(!lookup) std::raise(SIGINT);
0554
0555 unsigned num_lookup = lookup->num_values() ;
0556 T* h_lookup = lookup->values<T>() ;
0557 T* d_lookup = QU::device_alloc<T>(num_lookup,"QPMT::spmtid_scan/d_lookup") ;
0558
0559 dim3 numBlocks ;
0560 dim3 threadsPerBlock ;
0561 QU::ConfigureLaunch1D( numBlocks, threadsPerBlock, num_spmtid, 512u );
0562
0563 QPMT_spmtid_scan(
0564 numBlocks,
0565 threadsPerBlock,
0566 d_pmt,
0567 etype,
0568 d_lookup,
0569 d_spmtid,
0570 num_spmtid );
0571
0572 cudaDeviceSynchronize();
0573
0574 QU::copy_device_to_host_and_free<T>( h_lookup, d_lookup, num_lookup, "QPMT::spmtid_scan/cdthaf" );
0575 cudaDeviceSynchronize();
0576
0577 return lookup ;
0578 }
0579
0580
0581
0582
0583
0584
0585
0586
0587
0588 template struct QUDARAP_API QPMT<float>;
0589
0590
0591