File indexing completed on 2026-04-09 07:49:10
0001 #include <sstream>
0002 #include <csignal>
0003
0004
0005 #include "qrng.h"
0006
0007 #include "SLOG.hh"
0008 #include "ssys.h"
0009 #include "scuda.h"
0010 #include "squad.h"
0011 #include "sphoton.h"
0012 #include "sscint.h"
0013
0014
0015 #include "NP.hh"
0016 #include "QUDA_CHECK.h"
0017 #include "QRng.hh"
0018 #include "QScint.hh"
0019 #include "QTex.hh"
0020 #include "QU.hh"
0021
0022 #include "qscint.h"
0023
0024
0025 const plog::Severity QScint::LEVEL = SLOG::EnvLevel("QScint", "DEBUG");
0026
0027 const QScint* QScint::INSTANCE = nullptr ;
0028 const QScint* QScint::Get(){ return INSTANCE ; }
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039 QScint::QScint(const NP* icdf, unsigned hd_factor )
0040 :
0041 dsrc(icdf->ebyte == 8 ? icdf : nullptr),
0042 src( icdf->ebyte == 4 ? icdf : NP::MakeNarrow(dsrc) ),
0043 tex(MakeScintTex(src, hd_factor)),
0044 scint(MakeInstance(tex)),
0045 d_scint(QU::UploadArray<qscint>(scint, 1, "QScint::QScint/d_scint"))
0046 {
0047 INSTANCE = this ;
0048 }
0049
0050
0051 qscint* QScint::MakeInstance(const QTex<float>* tex)
0052 {
0053 qscint* scint = new qscint ;
0054 scint->scint_tex = tex->texObj ;
0055 scint->scint_meta = tex->d_meta ;
0056 bool qscint_disable_hd = ssys::getenvbool("QSCINT_DISABLE_HD");
0057 scint->hd_factor = qscint_disable_hd ? 0u : tex->getHDFactor() ;
0058 return scint ;
0059 }
0060
0061
0062 std::string QScint::desc() const
0063 {
0064 std::stringstream ss ;
0065 ss << "QScint"
0066 << " dsrc " << ( dsrc ? dsrc->desc() : "-" )
0067 << " src " << ( src ? src->desc() : "-" )
0068 << " tex " << ( tex ? tex->desc() : "-" )
0069 << " tex " << tex
0070 ;
0071
0072 std::string s = ss.str();
0073 return s ;
0074 }
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084 QTex<float>* QScint::MakeScintTex(const NP* src, unsigned hd_factor )
0085 {
0086 bool expected_shape = src->has_shape(1,4096,1) || src->has_shape(3,4096,1) ;
0087 LOG_IF(fatal, !expected_shape) << " unexpected shape of src " << ( src ? src->sstr() : "-" ) ;
0088 assert( expected_shape );
0089
0090 unsigned ni = src->shape[0];
0091 unsigned nj = src->shape[1];
0092 unsigned nk = src->shape[2];
0093
0094 bool src_expect = src->uifc == 'f' && src->ebyte == 4 && ( ni == 1 || ni == 3 ) && nj == 4096 && nk == 1 ;
0095 assert( src_expect );
0096 if(!src_expect) std::raise(SIGINT);
0097
0098
0099 unsigned ny = ni ;
0100 unsigned nx = nj ;
0101
0102
0103 bool qscint_disable_interpolation = ssys::getenvbool("QSCINT_DISABLE_INTERPOLATION");
0104 char filterMode = qscint_disable_interpolation ? 'P' : 'L' ;
0105
0106 LOG_IF(fatal, qscint_disable_interpolation) << "QSCINT_DISABLE_INTERPOLATION active using filterMode " << filterMode ;
0107
0108 bool normalizedCoords = true ;
0109 QTex<float>* tx = new QTex<float>(nx, ny, src->cvalues<float>(), filterMode, normalizedCoords, src ) ;
0110
0111 tx->setHDFactor(hd_factor);
0112 tx->uploadMeta();
0113
0114 LOG(LEVEL)
0115 << " src " << src->desc()
0116 << " nx (width) " << nx
0117 << " ny (height) " << ny
0118 << " tx.HDFactor " << tx->getHDFactor()
0119 << " tx.filterMode " << tx->getFilterMode()
0120 << " tx.normalizedCoords " << tx->getNormalizedCoords()
0121 ;
0122
0123 return tx ;
0124 }
0125
0126 extern "C" void QScint_check(dim3 numBlocks, dim3 threadsPerBlock, unsigned width, unsigned height );
0127 extern "C" void QScint_lookup(dim3 numBlocks, dim3 threadsPerBlock, cudaTextureObject_t texObj, quad4* meta, float* lookup, unsigned num_lookup, unsigned width, unsigned height );
0128
0129 void QScint::configureLaunch( dim3& numBlocks, dim3& threadsPerBlock, unsigned width, unsigned height )
0130 {
0131 threadsPerBlock.x = 512 ;
0132 threadsPerBlock.y = 1 ;
0133 threadsPerBlock.z = 1 ;
0134
0135 numBlocks.x = (width + threadsPerBlock.x - 1) / threadsPerBlock.x ;
0136 numBlocks.y = (height + threadsPerBlock.y - 1) / threadsPerBlock.y ;
0137 numBlocks.z = 1 ;
0138
0139 LOG(LEVEL)
0140 << " width " << std::setw(7) << width
0141 << " height " << std::setw(7) << height
0142 << " width*height " << std::setw(7) << width*height
0143 << " threadsPerBlock"
0144 << "("
0145 << std::setw(3) << threadsPerBlock.x << " "
0146 << std::setw(3) << threadsPerBlock.y << " "
0147 << std::setw(3) << threadsPerBlock.z << " "
0148 << ")"
0149 << " numBlocks "
0150 << "("
0151 << std::setw(3) << numBlocks.x << " "
0152 << std::setw(3) << numBlocks.y << " "
0153 << std::setw(3) << numBlocks.z << " "
0154 << ")"
0155 ;
0156 }
0157
0158
0159
0160 void QScint::check()
0161 {
0162 unsigned width = tex->width ;
0163 unsigned height = tex->height ;
0164
0165 LOG(LEVEL)
0166 << " width " << width
0167 << " height " << height
0168 ;
0169
0170 dim3 numBlocks ;
0171 dim3 threadsPerBlock ;
0172 configureLaunch( numBlocks, threadsPerBlock, width, height );
0173 QScint_check(numBlocks, threadsPerBlock, width, height );
0174
0175 cudaDeviceSynchronize();
0176 }
0177
0178
0179 NP* QScint::lookup()
0180 {
0181 unsigned width = tex->width ;
0182 unsigned height = tex->height ;
0183 unsigned num_lookup = width*height ;
0184
0185 LOG(LEVEL)
0186 << " width " << width
0187 << " height " << height
0188 << " lookup " << num_lookup
0189 ;
0190
0191 NP* out = NP::Make<float>(height, width );
0192 float* out_v = out->values<float>();
0193 lookup( out_v , num_lookup, width, height );
0194
0195 return out ;
0196 }
0197
0198 void QScint::lookup( float* lookup, unsigned num_lookup, unsigned width, unsigned height )
0199 {
0200 LOG(LEVEL) << "[" ;
0201 dim3 numBlocks ;
0202 dim3 threadsPerBlock ;
0203 configureLaunch( numBlocks, threadsPerBlock, width, height );
0204
0205 size_t size = width*height*sizeof(float) ;
0206
0207 LOG(LEVEL)
0208 << " num_lookup " << num_lookup
0209 << " width " << width
0210 << " height " << height
0211 << " size " << size
0212 << " tex->texObj " << tex->texObj
0213 << " tex->meta " << tex->meta
0214 << " tex->d_meta " << tex->d_meta
0215 ;
0216
0217 float* d_lookup = nullptr ;
0218 QUDA_CHECK( cudaMalloc(reinterpret_cast<void**>( &d_lookup ), size ));
0219
0220 QScint_lookup(numBlocks, threadsPerBlock, tex->texObj, tex->d_meta, d_lookup, num_lookup, width, height );
0221
0222 QUDA_CHECK( cudaMemcpy(reinterpret_cast<void*>( lookup ), d_lookup, size, cudaMemcpyDeviceToHost ));
0223 QUDA_CHECK( cudaFree(d_lookup) );
0224
0225 cudaDeviceSynchronize();
0226
0227 LOG(LEVEL) << "]" ;
0228 }
0229
0230 void QScint::dump( float* lookup, unsigned num_lookup, unsigned edgeitems )
0231 {
0232 LOG(LEVEL);
0233 for(unsigned i=0 ; i < num_lookup ; i++)
0234 {
0235 if( i < edgeitems || i > num_lookup - edgeitems )
0236 std::cout
0237 << std::setw(6) << i
0238 << std::setw(10) << std::fixed << std::setprecision(3) << lookup[i]
0239 << std::endl
0240 ;
0241 }
0242 }
0243