Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:09

0001 #include "SLOG.hh"
0002 #include "SSys.hh"
0003 #include "scuda.h"
0004 #include "squad.h"
0005 
0006 #include <sstream>
0007 
0008 #include "NP.hh"
0009 
0010 #include "QUDA_CHECK.h"
0011 #include "QRng.hh"
0012 #include "QU.hh"
0013 #include "QMultiFilm.hh"
0014 #include "QTex.hh"
0015 #include "qmultifilm.h"
0016 
0017 const plog::Severity QMultiFilm::LEVEL = SLOG::EnvLevel("QMultiFilm", "DEBUG"); 
0018 const QMultiFilm* QMultiFilm::INSTANCE = nullptr ; 
0019 const QMultiFilm* QMultiFilm::Get(){ return INSTANCE ;  }
0020 
0021 
0022 QMultiFilm::QMultiFilm(const NP* lut )
0023     :
0024     dsrc(lut->ebyte == 8 ? lut : nullptr),
0025     src( lut->ebyte == 4 ? lut : NP::MakeNarrow(dsrc)),
0026     multifilm(new qmultifilm),
0027     d_multifilm(nullptr) 
0028 {
0029 
0030     makeMultiFilmAllTex();
0031     INSTANCE = this ; 
0032     init();
0033 }
0034 
0035 qmultifilm* QMultiFilm::getDevicePtr() const
0036 {   
0037     return d_multifilm ;
0038 }
0039 
0040 void QMultiFilm::init(){
0041 
0042     uploadMultifilmlut();
0043 
0044 }
0045 
0046 void QMultiFilm::uploadMultifilmlut()
0047 {
0048     int num = 2 ;
0049     for(int i = 0 ; i < num ; i++)
0050     {
0051         multifilm->nnvt_normal_tex[i] = tex_nnvt_normal[i]->texObj ;
0052         multifilm->nnvt_highqe_tex[i] = tex_nnvt_highqe[i]->texObj ;
0053         multifilm->hama_tex[i]        = tex_hama[i]       ->texObj ;
0054 
0055         multifilm->nnvt_normal_meta[i]= tex_nnvt_normal[i]->d_meta ;
0056         multifilm->nnvt_highqe_meta[i]= tex_nnvt_highqe[i]->d_meta ;
0057         multifilm->hama_meta[i]       = tex_hama[i]       ->d_meta ; 
0058     }
0059     d_multifilm = QU::UploadArray<qmultifilm>(multifilm, 1,"QMultiFilm::uploadMultifilmlut" );
0060 }
0061 
0062 
0063 void QMultiFilm::makeMultiFilmAllTex(){
0064 
0065     assert( src->has_shape(3,2,128,256,4));
0066     std::vector<std::string> pmtTypeList;
0067     src -> get_names( pmtTypeList );
0068     assert( pmtTypeList.size() == 3);
0069     for(unsigned i = 0 ; i < pmtTypeList.size() ; i++){
0070 
0071         std::string pmtName = pmtTypeList[i];
0072         //NP* pmt_src = src -> spawn_item(i);
0073         QTex<float4>  ** tex_arr = nullptr;
0074         if(pmtName == "kPMT_NNVT"){
0075             tex_arr = tex_nnvt_normal;
0076         }
0077         else if( pmtName == "kPMT_NNVT_HighQE"){
0078             tex_arr = tex_nnvt_highqe; 
0079         }
0080         else if( pmtName == "kPMT_Hamamatsu"){
0081             tex_arr = tex_hama;
0082         }
0083         else{ 
0084             assert(0);
0085         }
0086         makeMultiFilmOnePMTTex( i , tex_arr );          
0087 
0088     }
0089 }
0090 
0091 void QMultiFilm::makeMultiFilmOnePMTTex(  int pmtcatIdx , QTex<float4> ** tex_pmt  ){
0092 
0093     //   int bndDimIdx = src->get_meta<int>("boundary");
0094     //   int resDimIdx = src->get_meta<int>("resolution");
0095 
0096     int resolution_dim = src->shape[1];
0097 
0098     //assert(bnd_dim == 2) ;
0099     assert(resolution_dim == 2);
0100 
0101     for(int i = 0; i < resolution_dim ; i++){
0102         int offset = i;
0103         tex_pmt[offset] = makeMultiFilmOneTex( pmtcatIdx , i );               
0104     }
0105 }
0106 
0107 QTex<float4>* QMultiFilm::makeMultiFilmOneTex( int pmtcatIdx , int resIdx ){
0108 
0109 
0110     assert( src->uifc == 'f' ); 
0111     assert( src->ebyte == 4 );    // expecting float src array, possible narrowed from double dsrc array  
0112 
0113     /*
0114        int bndDimIdx = src->get_meta<int>("boundary");
0115        int resDimIdx = src->get_meta<int>("resolution");
0116        int wvDimIdx = src->get_meta<int>("wavelength");
0117        int aoiDimIdx = src->get_meta<int>("aoi");
0118        int payDimIdx = src->get_meta<int>("payload");
0119        */
0120     int resolution_dim = src->shape[1];
0121     assert(resolution_dim == 2);
0122 
0123     unsigned ni = src->shape[2]; 
0124     unsigned nj = src->shape[3]; 
0125     unsigned nk = src->shape[4]; 
0126 
0127     assert( ni == 128); 
0128     assert( nj == 256); 
0129     assert( nk == 4 ); 
0130 
0131     unsigned ny = ni ; // height  
0132     unsigned nx = nj ; // width 
0133 
0134     int offset = pmtcatIdx*resolution_dim*ni*nj*nk + resIdx * ni*nj*nk;    
0135 
0136     bool qmultifilmlut_disable_interpolation = SSys::getenvbool("QMULTIFILMLUT_DISABLE_INTERP"); 
0137     char filterMode = qmultifilmlut_disable_interpolation ? 'P' : 'L' ; 
0138 
0139     LOG_IF(fatal, qmultifilmlut_disable_interpolation) << "QMULTIFILMLUT_DISABLE_INTERP active using filterMode " << filterMode ; 
0140 
0141 
0142     QTex<float4>* tx = new QTex<float4>(nx, ny, src->cvalues<float>()+offset , filterMode , 1, src ) ; 
0143 
0144     //tx->setHDFactor(hd_factor); 
0145     float wv_low  = src->get_meta<float>("wv_low");
0146     float wv_high = src->get_meta<float>("wv_high");
0147 
0148     float aoi_low = src->get_meta<float>("aoi_low");
0149     float aoi_high = src->get_meta<float>("aoi_high");
0150     float aoi_sublow = src->get_meta<float>("aoi_sublow");
0151     float aoi_subhigh = src->get_meta<float>("aoi_subhigh");
0152 
0153     quad domainX;
0154     domainX.f.x = aoi_low;
0155     domainX.f.y = aoi_high;
0156     domainX.f.z = aoi_sublow ;
0157     domainX.f.w = aoi_subhigh;
0158     tx->setMetaDomainX(&domainX);
0159 
0160     quad domainY;
0161     domainY.f.x = wv_low;
0162     domainY.f.y = wv_high;
0163     tx->setMetaDomainY(&domainY);
0164 
0165     tx->uploadMeta(); 
0166 
0167     LOG(LEVEL)
0168         << " src " << src->desc()
0169         << " nx (width) " << nx
0170         << " ny (height) " << ny
0171         //<< " tx.HDFactor " << tx->getHDFactor() 
0172         << " tx.filterMode " << tx->getFilterMode()
0173         << " LOG(LEVEL) = INFO "
0174         ;
0175 
0176     return tx ; 
0177 
0178 }
0179 
0180 std::string QMultiFilm::desc() const
0181 {
0182     std::stringstream ss ; 
0183     ss << "QMultiFilm"
0184         << " dsrc " << ( dsrc ? dsrc->desc() : "-" )
0185         << " src " << ( src ? src->desc() : "-" )
0186         ; 
0187     int num = 2;
0188     for(int i = 0 ; i < num ;i++){
0189         ss<<" tex_hama["<<i<<"]" << ( tex_hama[i] ? tex_hama[i] ->desc(): "-") << std::endl;
0190     }
0191 
0192     for(int i = 0 ; i < num ;i++){
0193         ss<<" tex_nnvt_normal["<<i<<"]" << ( tex_nnvt_normal[i] ? tex_nnvt_normal[i] ->desc(): "-")<<std::endl;
0194     }
0195 
0196     for(int i = 0 ; i < num ;i++){
0197         ss<<" tex_nnvt_highqe["<<i<<"]" << ( tex_nnvt_highqe[i] ? tex_nnvt_highqe[i] ->desc(): "-")<<std::endl;
0198     }
0199 
0200     std::string s = ss.str(); 
0201     return s ; 
0202 }
0203 
0204 
0205 
0206 extern "C" void QMultiFilm_check(dim3 numBlocks, dim3 threadsPerBlock, unsigned width, unsigned height  ); 
0207 extern "C" void QMultiFilm_lookup(dim3 numBlocks, dim3 threadsPerBlock, cudaTextureObject_t texObj, quad4* meta, float4* lookup, unsigned num_lookup, unsigned width, unsigned height); 
0208 
0209 void QMultiFilm::configureLaunch( dim3& numBlocks, dim3& threadsPerBlock, unsigned width, unsigned height )
0210 {
0211     threadsPerBlock.x = 32 ; 
0212     threadsPerBlock.y = 32 ; 
0213     threadsPerBlock.z = 1 ; 
0214 
0215     numBlocks.x = (width + threadsPerBlock.x - 1) / threadsPerBlock.x ; 
0216     numBlocks.y = (height + threadsPerBlock.y - 1) / threadsPerBlock.y ;
0217     numBlocks.z = 1 ; 
0218 
0219     LOG(LEVEL) 
0220         << " width " << std::setw(7) << width 
0221         << " height " << std::setw(7) << height 
0222         << " width*height " << std::setw(7) << width*height 
0223         << " threadsPerBlock"
0224         << "(" 
0225         << std::setw(3) << threadsPerBlock.x << " " 
0226         << std::setw(3) << threadsPerBlock.y << " " 
0227         << std::setw(3) << threadsPerBlock.z << " "
0228         << ")" 
0229         << " numBlocks "
0230         << "(" 
0231         << std::setw(3) << numBlocks.x << " " 
0232         << std::setw(3) << numBlocks.y << " " 
0233         << std::setw(3) << numBlocks.z << " "
0234         << ")" 
0235         ;
0236 }
0237 
0238 void QMultiFilm::check(){
0239 
0240     check( tex_hama[0] );
0241 
0242 }
0243 
0244 void QMultiFilm::check( QTex<float4> *tex )
0245 {
0246     unsigned width = tex->width ; 
0247     unsigned height = tex->height ; 
0248 
0249     LOG(LEVEL)
0250         << " width " << width
0251         << " height " << height
0252         ;
0253 
0254     dim3 numBlocks ; 
0255     dim3 threadsPerBlock ; 
0256     configureLaunch( numBlocks, threadsPerBlock, width, height ); 
0257     QMultiFilm_check(numBlocks, threadsPerBlock, width, height );  
0258 
0259     cudaDeviceSynchronize();
0260 }
0261 
0262 NP* QMultiFilm::lookup(int pmtcatIdx , int resIdx ){
0263 
0264     QTex<float4> **tex = choose_tex(pmtcatIdx);
0265     int offset = resIdx;
0266     NP* out =  lookup(tex[offset]);
0267     return out;
0268 }
0269 
0270 QTex<float4> ** QMultiFilm::choose_tex(int pmtcatIdx){
0271 
0272     QTex<float4> **tex = nullptr;
0273     switch(pmtcatIdx){
0274         case 0: tex = tex_nnvt_normal ; break ; 
0275         case 1: tex = tex_hama        ; break ;
0276         case 2: tex = tex_nnvt_highqe ; break ;
0277     }      
0278 
0279     return tex;
0280 }
0281 
0282 
0283 NP* QMultiFilm::lookup( QTex<float4> *tex  )
0284 {
0285     unsigned width = tex->width ; 
0286     unsigned height = tex->height; 
0287     unsigned num_lookup = width*height ; 
0288     //   unsigned payload = 4 ;
0289 
0290     LOG(LEVEL)
0291         << " width " << width
0292         << " height " << height
0293         << " lookup " << num_lookup
0294         ;
0295 
0296 
0297     NP* out = NP::Make<float>(height, width, 4 ); 
0298     float4* out_v = out->values<float4>(); 
0299     lookup( tex,out_v , num_lookup, width, height); 
0300 
0301     return out ; 
0302 }
0303 
0304 void QMultiFilm::lookup( QTex<float4> *tex, float4* lookup, unsigned num_lookup, unsigned width, unsigned height)
0305 {
0306     LOG(LEVEL) << "[" ; 
0307     dim3 numBlocks ; 
0308     dim3 threadsPerBlock ; 
0309     configureLaunch( numBlocks, threadsPerBlock, width, height ); 
0310 
0311     size_t size = width * height * sizeof(float4) ; 
0312 
0313     LOG(LEVEL) 
0314         << " num_lookup " << num_lookup
0315         << " width " << width 
0316         << " height " << height
0317 
0318         << " size " << size 
0319         << " tex->texObj " << tex->texObj
0320         << " tex->meta " << tex->meta
0321         << " tex->d_meta " << tex->d_meta
0322         ; 
0323 
0324     float4* d_lookup = nullptr ;  
0325     QUDA_CHECK( cudaMalloc(reinterpret_cast<void**>( &d_lookup ), size )); 
0326 
0327     LOG(LEVEL)
0328         <<" QMultiFilm_lookup (";
0329     QMultiFilm_lookup(numBlocks, threadsPerBlock, tex->texObj, tex->d_meta, d_lookup, num_lookup, width, height);  
0330 
0331     LOG(LEVEL)
0332         <<" QMultiFilm_lookup )";
0333     QUDA_CHECK( cudaMemcpy(reinterpret_cast<void*>( lookup ), d_lookup, size, cudaMemcpyDeviceToHost )); 
0334     QUDA_CHECK( cudaFree(d_lookup) ); 
0335 
0336     cudaDeviceSynchronize();
0337 
0338     dump(lookup , num_lookup);
0339 
0340     LOG(LEVEL) << "]" ; 
0341 }
0342 
0343 
0344 void QMultiFilm::dump( float4* lookup, unsigned num_lookup, unsigned edgeitems  )
0345 {
0346     LOG(LEVEL); 
0347     for(unsigned i=0 ; i < num_lookup ; i++)
0348     {
0349         if( i < edgeitems || i > num_lookup - edgeitems )
0350             std::cout 
0351                 << std::setw(6) << i 
0352                 << std::setw(10) << std::fixed << std::setprecision(3) << lookup[i].x
0353                 << std::setw(10) << std::fixed << std::setprecision(3) << lookup[i].y
0354                 << std::setw(10) << std::fixed << std::setprecision(3) << lookup[i].z
0355                 << std::setw(10) << std::fixed << std::setprecision(3) << lookup[i].w 
0356                 << std::endl 
0357                 ; 
0358     }
0359 }
0360 
0361 /* */
0362 extern "C" void QMultiFilm_mock_lookup(dim3 numBlocks, dim3 threadsPerBlock, qmultifilm* d_multifilm, quad2* d_input, float4* d_out, unsigned num_lookup, unsigned width, unsigned height); 
0363 
0364 NP* QMultiFilm::mock_lookup( NP * input_arr )
0365 {
0366     assert(input_arr->has_shape(128,256,2,4));
0367 
0368     unsigned height = input_arr->shape[0]; 
0369     unsigned width = input_arr->shape[1] ; 
0370     unsigned num_lookup = width*height ; 
0371 
0372     LOG(LEVEL)
0373         << " width " << width
0374         << " height " << height
0375         << " lookup " << num_lookup
0376         ;
0377 
0378     //upload input_array
0379     quad2* qd2 = (quad2*)input_arr->values<float>();
0380     quad2* d_input = QU::UploadArray<quad2>(qd2, num_lookup,"multifilm_mock_lookup");
0381 
0382     //malloc for output array 
0383     NP* out = NP::Make<float>(height, width, 4 ); 
0384     float4* h_out = out->values<float4>();
0385     float4* d_out = nullptr; 
0386 
0387     size_t size = num_lookup*sizeof(float4);
0388     QUDA_CHECK( cudaMalloc(reinterpret_cast<void**>( &d_out ), size )); 
0389 
0390     mock_lookup( d_input, d_out , num_lookup , width, height); 
0391 
0392     QUDA_CHECK( cudaMemcpy(reinterpret_cast<void*>(h_out), d_out, size, cudaMemcpyDeviceToHost )); 
0393     QUDA_CHECK( cudaFree(d_out) ); 
0394     QUDA_CHECK( cudaFree(d_input) ); 
0395     cudaDeviceSynchronize();
0396 
0397     dump(h_out, num_lookup);
0398     return out ; 
0399 }
0400 
0401 
0402 void QMultiFilm::mock_lookup( quad2* d_input, float4* d_out, unsigned num_lookup, unsigned width, unsigned height ){
0403 
0404     dim3 numBlocks ; 
0405     dim3 threadsPerBlock ; 
0406     configureLaunch( numBlocks, threadsPerBlock, width, height ); 
0407 
0408     QMultiFilm_mock_lookup(numBlocks, threadsPerBlock, d_multifilm, d_input, d_out, num_lookup, width, height);  
0409 
0410 }