File indexing completed on 2026-04-09 07:49:09
0001 #include "SLOG.hh"
0002 #include "SSys.hh"
0003 #include "scuda.h"
0004 #include "squad.h"
0005
0006 #include <sstream>
0007
0008 #include "NP.hh"
0009
0010 #include "QUDA_CHECK.h"
0011 #include "QRng.hh"
0012 #include "QU.hh"
0013 #include "QMultiFilm.hh"
0014 #include "QTex.hh"
0015 #include "qmultifilm.h"
0016
0017 const plog::Severity QMultiFilm::LEVEL = SLOG::EnvLevel("QMultiFilm", "DEBUG");
0018 const QMultiFilm* QMultiFilm::INSTANCE = nullptr ;
0019 const QMultiFilm* QMultiFilm::Get(){ return INSTANCE ; }
0020
0021
0022 QMultiFilm::QMultiFilm(const NP* lut )
0023 :
0024 dsrc(lut->ebyte == 8 ? lut : nullptr),
0025 src( lut->ebyte == 4 ? lut : NP::MakeNarrow(dsrc)),
0026 multifilm(new qmultifilm),
0027 d_multifilm(nullptr)
0028 {
0029
0030 makeMultiFilmAllTex();
0031 INSTANCE = this ;
0032 init();
0033 }
0034
0035 qmultifilm* QMultiFilm::getDevicePtr() const
0036 {
0037 return d_multifilm ;
0038 }
0039
0040 void QMultiFilm::init(){
0041
0042 uploadMultifilmlut();
0043
0044 }
0045
0046 void QMultiFilm::uploadMultifilmlut()
0047 {
0048 int num = 2 ;
0049 for(int i = 0 ; i < num ; i++)
0050 {
0051 multifilm->nnvt_normal_tex[i] = tex_nnvt_normal[i]->texObj ;
0052 multifilm->nnvt_highqe_tex[i] = tex_nnvt_highqe[i]->texObj ;
0053 multifilm->hama_tex[i] = tex_hama[i] ->texObj ;
0054
0055 multifilm->nnvt_normal_meta[i]= tex_nnvt_normal[i]->d_meta ;
0056 multifilm->nnvt_highqe_meta[i]= tex_nnvt_highqe[i]->d_meta ;
0057 multifilm->hama_meta[i] = tex_hama[i] ->d_meta ;
0058 }
0059 d_multifilm = QU::UploadArray<qmultifilm>(multifilm, 1,"QMultiFilm::uploadMultifilmlut" );
0060 }
0061
0062
0063 void QMultiFilm::makeMultiFilmAllTex(){
0064
0065 assert( src->has_shape(3,2,128,256,4));
0066 std::vector<std::string> pmtTypeList;
0067 src -> get_names( pmtTypeList );
0068 assert( pmtTypeList.size() == 3);
0069 for(unsigned i = 0 ; i < pmtTypeList.size() ; i++){
0070
0071 std::string pmtName = pmtTypeList[i];
0072
0073 QTex<float4> ** tex_arr = nullptr;
0074 if(pmtName == "kPMT_NNVT"){
0075 tex_arr = tex_nnvt_normal;
0076 }
0077 else if( pmtName == "kPMT_NNVT_HighQE"){
0078 tex_arr = tex_nnvt_highqe;
0079 }
0080 else if( pmtName == "kPMT_Hamamatsu"){
0081 tex_arr = tex_hama;
0082 }
0083 else{
0084 assert(0);
0085 }
0086 makeMultiFilmOnePMTTex( i , tex_arr );
0087
0088 }
0089 }
0090
0091 void QMultiFilm::makeMultiFilmOnePMTTex( int pmtcatIdx , QTex<float4> ** tex_pmt ){
0092
0093
0094
0095
0096 int resolution_dim = src->shape[1];
0097
0098
0099 assert(resolution_dim == 2);
0100
0101 for(int i = 0; i < resolution_dim ; i++){
0102 int offset = i;
0103 tex_pmt[offset] = makeMultiFilmOneTex( pmtcatIdx , i );
0104 }
0105 }
0106
0107 QTex<float4>* QMultiFilm::makeMultiFilmOneTex( int pmtcatIdx , int resIdx ){
0108
0109
0110 assert( src->uifc == 'f' );
0111 assert( src->ebyte == 4 );
0112
0113
0114
0115
0116
0117
0118
0119
0120 int resolution_dim = src->shape[1];
0121 assert(resolution_dim == 2);
0122
0123 unsigned ni = src->shape[2];
0124 unsigned nj = src->shape[3];
0125 unsigned nk = src->shape[4];
0126
0127 assert( ni == 128);
0128 assert( nj == 256);
0129 assert( nk == 4 );
0130
0131 unsigned ny = ni ;
0132 unsigned nx = nj ;
0133
0134 int offset = pmtcatIdx*resolution_dim*ni*nj*nk + resIdx * ni*nj*nk;
0135
0136 bool qmultifilmlut_disable_interpolation = SSys::getenvbool("QMULTIFILMLUT_DISABLE_INTERP");
0137 char filterMode = qmultifilmlut_disable_interpolation ? 'P' : 'L' ;
0138
0139 LOG_IF(fatal, qmultifilmlut_disable_interpolation) << "QMULTIFILMLUT_DISABLE_INTERP active using filterMode " << filterMode ;
0140
0141
0142 QTex<float4>* tx = new QTex<float4>(nx, ny, src->cvalues<float>()+offset , filterMode , 1, src ) ;
0143
0144
0145 float wv_low = src->get_meta<float>("wv_low");
0146 float wv_high = src->get_meta<float>("wv_high");
0147
0148 float aoi_low = src->get_meta<float>("aoi_low");
0149 float aoi_high = src->get_meta<float>("aoi_high");
0150 float aoi_sublow = src->get_meta<float>("aoi_sublow");
0151 float aoi_subhigh = src->get_meta<float>("aoi_subhigh");
0152
0153 quad domainX;
0154 domainX.f.x = aoi_low;
0155 domainX.f.y = aoi_high;
0156 domainX.f.z = aoi_sublow ;
0157 domainX.f.w = aoi_subhigh;
0158 tx->setMetaDomainX(&domainX);
0159
0160 quad domainY;
0161 domainY.f.x = wv_low;
0162 domainY.f.y = wv_high;
0163 tx->setMetaDomainY(&domainY);
0164
0165 tx->uploadMeta();
0166
0167 LOG(LEVEL)
0168 << " src " << src->desc()
0169 << " nx (width) " << nx
0170 << " ny (height) " << ny
0171
0172 << " tx.filterMode " << tx->getFilterMode()
0173 << " LOG(LEVEL) = INFO "
0174 ;
0175
0176 return tx ;
0177
0178 }
0179
0180 std::string QMultiFilm::desc() const
0181 {
0182 std::stringstream ss ;
0183 ss << "QMultiFilm"
0184 << " dsrc " << ( dsrc ? dsrc->desc() : "-" )
0185 << " src " << ( src ? src->desc() : "-" )
0186 ;
0187 int num = 2;
0188 for(int i = 0 ; i < num ;i++){
0189 ss<<" tex_hama["<<i<<"]" << ( tex_hama[i] ? tex_hama[i] ->desc(): "-") << std::endl;
0190 }
0191
0192 for(int i = 0 ; i < num ;i++){
0193 ss<<" tex_nnvt_normal["<<i<<"]" << ( tex_nnvt_normal[i] ? tex_nnvt_normal[i] ->desc(): "-")<<std::endl;
0194 }
0195
0196 for(int i = 0 ; i < num ;i++){
0197 ss<<" tex_nnvt_highqe["<<i<<"]" << ( tex_nnvt_highqe[i] ? tex_nnvt_highqe[i] ->desc(): "-")<<std::endl;
0198 }
0199
0200 std::string s = ss.str();
0201 return s ;
0202 }
0203
0204
0205
0206 extern "C" void QMultiFilm_check(dim3 numBlocks, dim3 threadsPerBlock, unsigned width, unsigned height );
0207 extern "C" void QMultiFilm_lookup(dim3 numBlocks, dim3 threadsPerBlock, cudaTextureObject_t texObj, quad4* meta, float4* lookup, unsigned num_lookup, unsigned width, unsigned height);
0208
0209 void QMultiFilm::configureLaunch( dim3& numBlocks, dim3& threadsPerBlock, unsigned width, unsigned height )
0210 {
0211 threadsPerBlock.x = 32 ;
0212 threadsPerBlock.y = 32 ;
0213 threadsPerBlock.z = 1 ;
0214
0215 numBlocks.x = (width + threadsPerBlock.x - 1) / threadsPerBlock.x ;
0216 numBlocks.y = (height + threadsPerBlock.y - 1) / threadsPerBlock.y ;
0217 numBlocks.z = 1 ;
0218
0219 LOG(LEVEL)
0220 << " width " << std::setw(7) << width
0221 << " height " << std::setw(7) << height
0222 << " width*height " << std::setw(7) << width*height
0223 << " threadsPerBlock"
0224 << "("
0225 << std::setw(3) << threadsPerBlock.x << " "
0226 << std::setw(3) << threadsPerBlock.y << " "
0227 << std::setw(3) << threadsPerBlock.z << " "
0228 << ")"
0229 << " numBlocks "
0230 << "("
0231 << std::setw(3) << numBlocks.x << " "
0232 << std::setw(3) << numBlocks.y << " "
0233 << std::setw(3) << numBlocks.z << " "
0234 << ")"
0235 ;
0236 }
0237
0238 void QMultiFilm::check(){
0239
0240 check( tex_hama[0] );
0241
0242 }
0243
0244 void QMultiFilm::check( QTex<float4> *tex )
0245 {
0246 unsigned width = tex->width ;
0247 unsigned height = tex->height ;
0248
0249 LOG(LEVEL)
0250 << " width " << width
0251 << " height " << height
0252 ;
0253
0254 dim3 numBlocks ;
0255 dim3 threadsPerBlock ;
0256 configureLaunch( numBlocks, threadsPerBlock, width, height );
0257 QMultiFilm_check(numBlocks, threadsPerBlock, width, height );
0258
0259 cudaDeviceSynchronize();
0260 }
0261
0262 NP* QMultiFilm::lookup(int pmtcatIdx , int resIdx ){
0263
0264 QTex<float4> **tex = choose_tex(pmtcatIdx);
0265 int offset = resIdx;
0266 NP* out = lookup(tex[offset]);
0267 return out;
0268 }
0269
0270 QTex<float4> ** QMultiFilm::choose_tex(int pmtcatIdx){
0271
0272 QTex<float4> **tex = nullptr;
0273 switch(pmtcatIdx){
0274 case 0: tex = tex_nnvt_normal ; break ;
0275 case 1: tex = tex_hama ; break ;
0276 case 2: tex = tex_nnvt_highqe ; break ;
0277 }
0278
0279 return tex;
0280 }
0281
0282
0283 NP* QMultiFilm::lookup( QTex<float4> *tex )
0284 {
0285 unsigned width = tex->width ;
0286 unsigned height = tex->height;
0287 unsigned num_lookup = width*height ;
0288
0289
0290 LOG(LEVEL)
0291 << " width " << width
0292 << " height " << height
0293 << " lookup " << num_lookup
0294 ;
0295
0296
0297 NP* out = NP::Make<float>(height, width, 4 );
0298 float4* out_v = out->values<float4>();
0299 lookup( tex,out_v , num_lookup, width, height);
0300
0301 return out ;
0302 }
0303
0304 void QMultiFilm::lookup( QTex<float4> *tex, float4* lookup, unsigned num_lookup, unsigned width, unsigned height)
0305 {
0306 LOG(LEVEL) << "[" ;
0307 dim3 numBlocks ;
0308 dim3 threadsPerBlock ;
0309 configureLaunch( numBlocks, threadsPerBlock, width, height );
0310
0311 size_t size = width * height * sizeof(float4) ;
0312
0313 LOG(LEVEL)
0314 << " num_lookup " << num_lookup
0315 << " width " << width
0316 << " height " << height
0317
0318 << " size " << size
0319 << " tex->texObj " << tex->texObj
0320 << " tex->meta " << tex->meta
0321 << " tex->d_meta " << tex->d_meta
0322 ;
0323
0324 float4* d_lookup = nullptr ;
0325 QUDA_CHECK( cudaMalloc(reinterpret_cast<void**>( &d_lookup ), size ));
0326
0327 LOG(LEVEL)
0328 <<" QMultiFilm_lookup (";
0329 QMultiFilm_lookup(numBlocks, threadsPerBlock, tex->texObj, tex->d_meta, d_lookup, num_lookup, width, height);
0330
0331 LOG(LEVEL)
0332 <<" QMultiFilm_lookup )";
0333 QUDA_CHECK( cudaMemcpy(reinterpret_cast<void*>( lookup ), d_lookup, size, cudaMemcpyDeviceToHost ));
0334 QUDA_CHECK( cudaFree(d_lookup) );
0335
0336 cudaDeviceSynchronize();
0337
0338 dump(lookup , num_lookup);
0339
0340 LOG(LEVEL) << "]" ;
0341 }
0342
0343
0344 void QMultiFilm::dump( float4* lookup, unsigned num_lookup, unsigned edgeitems )
0345 {
0346 LOG(LEVEL);
0347 for(unsigned i=0 ; i < num_lookup ; i++)
0348 {
0349 if( i < edgeitems || i > num_lookup - edgeitems )
0350 std::cout
0351 << std::setw(6) << i
0352 << std::setw(10) << std::fixed << std::setprecision(3) << lookup[i].x
0353 << std::setw(10) << std::fixed << std::setprecision(3) << lookup[i].y
0354 << std::setw(10) << std::fixed << std::setprecision(3) << lookup[i].z
0355 << std::setw(10) << std::fixed << std::setprecision(3) << lookup[i].w
0356 << std::endl
0357 ;
0358 }
0359 }
0360
0361
0362 extern "C" void QMultiFilm_mock_lookup(dim3 numBlocks, dim3 threadsPerBlock, qmultifilm* d_multifilm, quad2* d_input, float4* d_out, unsigned num_lookup, unsigned width, unsigned height);
0363
0364 NP* QMultiFilm::mock_lookup( NP * input_arr )
0365 {
0366 assert(input_arr->has_shape(128,256,2,4));
0367
0368 unsigned height = input_arr->shape[0];
0369 unsigned width = input_arr->shape[1] ;
0370 unsigned num_lookup = width*height ;
0371
0372 LOG(LEVEL)
0373 << " width " << width
0374 << " height " << height
0375 << " lookup " << num_lookup
0376 ;
0377
0378
0379 quad2* qd2 = (quad2*)input_arr->values<float>();
0380 quad2* d_input = QU::UploadArray<quad2>(qd2, num_lookup,"multifilm_mock_lookup");
0381
0382
0383 NP* out = NP::Make<float>(height, width, 4 );
0384 float4* h_out = out->values<float4>();
0385 float4* d_out = nullptr;
0386
0387 size_t size = num_lookup*sizeof(float4);
0388 QUDA_CHECK( cudaMalloc(reinterpret_cast<void**>( &d_out ), size ));
0389
0390 mock_lookup( d_input, d_out , num_lookup , width, height);
0391
0392 QUDA_CHECK( cudaMemcpy(reinterpret_cast<void*>(h_out), d_out, size, cudaMemcpyDeviceToHost ));
0393 QUDA_CHECK( cudaFree(d_out) );
0394 QUDA_CHECK( cudaFree(d_input) );
0395 cudaDeviceSynchronize();
0396
0397 dump(h_out, num_lookup);
0398 return out ;
0399 }
0400
0401
0402 void QMultiFilm::mock_lookup( quad2* d_input, float4* d_out, unsigned num_lookup, unsigned width, unsigned height ){
0403
0404 dim3 numBlocks ;
0405 dim3 threadsPerBlock ;
0406 configureLaunch( numBlocks, threadsPerBlock, width, height );
0407
0408 QMultiFilm_mock_lookup(numBlocks, threadsPerBlock, d_multifilm, d_input, d_out, num_lookup, width, height);
0409
0410 }