Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:07

0001 
0002 #include <cuda_runtime.h>
0003 #include <sstream>
0004 #include <map>
0005 #include <csignal>
0006 
0007 #include "SBnd.h"
0008 #include "NP.hh"
0009 #include "NPFold.h"
0010 
0011 #include "scuda.h"
0012 #include "squad.h"
0013 #include "sstate.h"
0014 
0015 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0016 #else
0017 #include "QUDA_CHECK.h"
0018 #include "QU.hh"
0019 #include "SLOG.hh"
0020 #endif
0021 
0022 #include "QTex.hh"
0023 #include "QOptical.hh"
0024 #include "QBnd.hh"
0025 
0026 #include "qbnd.h"
0027 
0028 
0029 
0030 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0031 #else
0032 const plog::Severity QBnd::LEVEL = SLOG::EnvLevel("QBnd", "DEBUG"); 
0033 #endif
0034 
0035 const QBnd* QBnd::INSTANCE = nullptr ; 
0036 const QBnd* QBnd::Get(){ return INSTANCE ; }
0037 
0038 /**
0039 QBnd::MakeInstance
0040 ---------------------
0041 
0042 static method used from QBnd::QBnd using the bnd array spec names
0043 
0044 **/
0045 
0046 qbnd* QBnd::MakeInstance(const QTex<float4>* tex, const std::vector<std::string>& names )
0047 {
0048     qbnd* qb = new qbnd ; 
0049 
0050     qb->boundary_tex = tex->texObj ; 
0051     qb->boundary_meta = tex->d_meta ; 
0052     qb->boundary_tex_MaterialLine_Water = SBnd::GetMaterialLine("Water", names) ; 
0053     qb->boundary_tex_MaterialLine_LS    = SBnd::GetMaterialLine("LS", names) ; 
0054 
0055     const QOptical* optical = QOptical::Get() ; 
0056     //assert( optical ); 
0057 
0058 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0059 #else
0060     LOG(LEVEL) << " optical " << ( optical ? optical->desc() : "MISSING" ) ; 
0061 #endif
0062 
0063     qb->optical = optical ? optical->d_optical : nullptr ; 
0064 
0065     assert( qb->optical != nullptr ); 
0066     assert( qb->boundary_meta != nullptr ); 
0067     return qb ; 
0068 }
0069 
0070 
0071 /**
0072 QBnd::QBnd
0073 ------------
0074 
0075 Narrows the NP array if wide and creates GPU texture 
0076 
0077 **/
0078 
0079 QBnd::QBnd(const NP* buf)
0080     :
0081     dsrc(buf->ebyte == 8 ? buf : nullptr),
0082     src(NP::MakeNarrowIfWide(buf)),
0083     sbn(new SBnd(src)),
0084     tex(MakeBoundaryTex(src)),
0085     qb(MakeInstance(tex, buf->names)),
0086     d_qb(nullptr)
0087 {
0088     init(); 
0089 } 
0090 
0091 void QBnd::init()
0092 {
0093     INSTANCE = this ; 
0094 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0095     d_qb = qb ;  
0096 #else
0097     d_qb = QU::UploadArray<qbnd>(qb,1,"QBnd::QBnd/d_qb") ; 
0098 #endif
0099 }
0100 
0101 
0102 /**
0103 QBnd::MakeBoundaryTex
0104 ------------------------
0105 
0106 Creates GPU texture with material and surface properties as a function of wavelenth.
0107 Example of mapping from 5D array of floats into 2D texture of float4::
0108 
0109     .     ni nj nk  nl nm
0110     blib  36, 4, 2,761, 4
0111 
0112           ni : boundaries
0113           nj : 0:omat/1:osur/2:isur/3:imat  
0114           nk : 0 or 1 property group
0115           nl :  
0116 
0117 
0118 
0119           ni*nk*nk         -> ny  36*4*2 = 288
0120                    nl      -> nx           761 (fine domain, 39 when using coarse domain)
0121                       nm   -> float4 elem    4    
0122 
0123          nx*ny = 11232
0124 
0125 
0126 TODO: need to get boundary domain range metadata into buffer json sidecar and get it uploaded with the tex
0127 
0128 **/
0129 
0130 QTex<float4>* QBnd::MakeBoundaryTex(const NP* buf )   // static 
0131 {
0132     assert( buf->uifc == 'f' && buf->ebyte == 4 );  
0133 
0134     unsigned ni = buf->shape[0];  // (~123) number of boundaries 
0135     unsigned nj = buf->shape[1];  // (4)    number of species : omat/osur/isur/imat 
0136     unsigned nk = buf->shape[2];  // (2)    number of float4 property groups per species 
0137     unsigned nl = buf->shape[3];  // (39 or 761)   number of wavelength samples of the property
0138     unsigned nm = buf->shape[4];  // (4)    number of prop within the float4
0139 
0140 
0141 
0142 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0143 #else
0144     LOG(LEVEL) << " buf " << ( buf ? buf->desc() : "-" ) ;  
0145 #endif
0146     bool nm_expect = nm == 4 ;
0147     assert( nm_expect ); 
0148     if(!nm_expect) std::raise(SIGINT); 
0149 
0150     unsigned nx = nl ;           // wavelength samples
0151     unsigned ny = ni*nj*nk ;     
0152     // ny : total number of properties from all (two) float4 property 
0153     // groups of all (4) species in all (~123) boundaries 
0154 
0155     const float* values = buf->cvalues<float>(); 
0156 
0157     char filterMode = 'L' ; 
0158     //bool normalizedCoords = false ; 
0159     bool normalizedCoords = true ; 
0160 
0161     QTex<float4>* btex = new QTex<float4>(nx, ny, values, filterMode, normalizedCoords, buf ) ; 
0162 
0163     bool buf_has_meta = buf->has_meta() ;
0164 
0165 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0166 #else
0167     LOG_IF(fatal, !buf_has_meta) << " buf_has_meta FAIL : domain metadata is required to create texture  buf.desc " << buf->desc() ;  
0168 #endif
0169     assert( buf_has_meta ); 
0170 
0171     quad domainX ; 
0172     domainX.f.x = buf->get_meta<float>("domain_low",   0.f ); 
0173     domainX.f.y = buf->get_meta<float>("domain_high",  0.f ); 
0174     domainX.f.z = buf->get_meta<float>("domain_step",  0.f ); 
0175     domainX.f.w = buf->get_meta<float>("domain_range", 0.f ); 
0176 
0177 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0178 #else
0179     LOG(LEVEL)
0180         << " domain_low " << std::fixed << std::setw(10) << std::setprecision(3) << domainX.f.x  
0181         << " domain_high " << std::fixed << std::setw(10) << std::setprecision(3) << domainX.f.y  
0182         << " domain_step " << std::fixed << std::setw(10) << std::setprecision(3) << domainX.f.z 
0183         << " domain_range " << std::fixed << std::setw(10) << std::setprecision(3) << domainX.f.w  
0184         ;
0185 #endif
0186 
0187     assert( domainX.f.y > domainX.f.x ); 
0188     assert( domainX.f.z > 0.f ); 
0189     assert( domainX.f.w == domainX.f.y - domainX.f.x ); 
0190 
0191     btex->setMetaDomainX(&domainX); 
0192     btex->uploadMeta(); 
0193 
0194     return btex ; 
0195 }
0196 
0197 std::string QBnd::desc() const
0198 {
0199     std::stringstream ss ; 
0200     ss << "QBnd"
0201        << " src " << ( src ? src->desc() : "-" )
0202        << " tex " << ( tex ? tex->desc() : "-" )
0203        << " tex " << tex 
0204        ; 
0205     std::string str = ss.str(); 
0206     return str ; 
0207 }
0208 
0209 std::string QBnd::DescLaunch( const dim3& numBlocks, const dim3& threadsPerBlock, int width, int height ) // static
0210 {
0211     std::stringstream ss ; 
0212     ss
0213         << " width " << std::setw(7) << width 
0214         << " height " << std::setw(7) << height 
0215         << " width*height " << std::setw(7) << width*height 
0216         << " threadsPerBlock"
0217         << "(" 
0218         << std::setw(3) << threadsPerBlock.x << " " 
0219         << std::setw(3) << threadsPerBlock.y << " " 
0220         << std::setw(3) << threadsPerBlock.z << " "
0221         << ")" 
0222         << " numBlocks "
0223         << "(" 
0224         << std::setw(3) << numBlocks.x << " " 
0225         << std::setw(3) << numBlocks.y << " " 
0226         << std::setw(3) << numBlocks.z << " "
0227         << ")" 
0228         ;
0229 
0230     std::string str = ss.str(); 
0231     return str ; 
0232 }
0233 
0234 
0235 void QBnd::ConfigureLaunch( dim3& numBlocks, dim3& threadsPerBlock, int width, int height ) // static 
0236 {
0237     threadsPerBlock.x = 16 ; 
0238     threadsPerBlock.y = 16 ; 
0239     threadsPerBlock.z = 1 ; 
0240  
0241     numBlocks.x = (width + threadsPerBlock.x - 1) / threadsPerBlock.x ; 
0242     numBlocks.y = (height + threadsPerBlock.y - 1) / threadsPerBlock.y ;
0243     numBlocks.z = 1 ; 
0244 }
0245 
0246 NP* QBnd::lookup() const 
0247 {
0248     unsigned width = tex->width ; 
0249     unsigned height = tex->height ; 
0250     unsigned num_lookup = width*height ; 
0251 
0252     NP* out = NP::Make<float>(height, width, 4 ); 
0253 
0254     quad* out_ = (quad*)out->values<float>(); 
0255     lookup( out_ , num_lookup, width, height ); 
0256 
0257     out->reshape(src->shape); 
0258 
0259     return out ; 
0260 }
0261 
0262 NPFold* QBnd::serialize() const 
0263 {
0264     NPFold* f = new NPFold ; 
0265     f->add("src", src ); 
0266     f->add("dst", lookup() ); 
0267     return f ; 
0268 }
0269 
0270 void QBnd::save(const char* dir) const 
0271 {
0272     NPFold* f = serialize(); 
0273     f->save(dir); 
0274 }
0275 
0276 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0277 
0278 extern "C" void QBnd_lookup_0_MOCK(
0279     cudaTextureObject_t texObj, 
0280     quad4* meta, 
0281     quad* lookup, 
0282     int num_lookup, 
0283     int width, 
0284     int height 
0285     ); 
0286 
0287 #include "QBnd_MOCK.h"
0288 
0289 #else
0290 
0291 // from QBnd.cu
0292 extern "C" void QBnd_lookup_0(
0293     dim3 numBlocks, 
0294     dim3 threadsPerBlock, 
0295     cudaTextureObject_t texObj, 
0296     quad4* meta, 
0297     quad* lookup, 
0298     int num_lookup, 
0299     int width, 
0300     int height 
0301     ); 
0302 
0303 #endif
0304 
0305 
0306 void QBnd::lookup( quad* lookup, int num_lookup, int width, int height ) const 
0307 {
0308     if( tex->d_meta == nullptr )
0309     {
0310         tex->uploadMeta();    // TODO: not a good place to do this, needs to be more standard
0311     }
0312     assert( tex->d_meta != nullptr && "must QTex::uploadMeta() before lookups" );
0313 
0314 
0315 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0316 
0317     std::cout << "QBnd::lookup MISSING MOCK IMPL " << std::endl ; 
0318     quad* d_lookup  = lookup ; 
0319 
0320     QBnd_lookup_0_MOCK(tex->texObj, tex->d_meta, d_lookup, num_lookup, width, height );  
0321 
0322 #else
0323 
0324     // TODO: update the below to use more contemporary approach, starting with using QU 
0325 
0326     dim3 numBlocks ; 
0327     dim3 threadsPerBlock ; 
0328     ConfigureLaunch( numBlocks, threadsPerBlock, width, height ); 
0329     std::cout << DescLaunch( numBlocks, threadsPerBlock, width, height ) << std::endl ; 
0330     size_t size = num_lookup*sizeof(quad) ;  
0331 
0332 
0333     quad* d_lookup  ;  
0334     QUDA_CHECK( cudaMalloc(reinterpret_cast<void**>( &d_lookup ), size )); 
0335 
0336     QBnd_lookup_0(numBlocks, threadsPerBlock, tex->texObj, tex->d_meta, d_lookup, num_lookup, width, height );  
0337 
0338     QUDA_CHECK( cudaMemcpy(reinterpret_cast<void*>(lookup), d_lookup, size, cudaMemcpyDeviceToHost )); 
0339     QUDA_CHECK( cudaFree(d_lookup) ); 
0340 
0341 #endif
0342 
0343 }
0344 
0345 std::string QBnd::Dump( quad* lookup, int num_lookup, int edgeitems ) // static 
0346 {
0347     std::stringstream ss ; 
0348 
0349     for(int i=0 ; i < num_lookup ; i++)
0350     {
0351         if( i < edgeitems || i > num_lookup - edgeitems)
0352         {
0353             quad& props = lookup[i] ;  
0354             ss
0355                 << std::setw(10) << i 
0356                 << std::setw(10) << std::fixed << std::setprecision(3) << props.f.x 
0357                 << std::setw(10) << std::fixed << std::setprecision(3) << props.f.y
0358                 << std::setw(10) << std::fixed << std::setprecision(3) << props.f.z 
0359                 << std::setw(10) << std::fixed << std::setprecision(3) << props.f.w 
0360                 << std::endl 
0361                 ; 
0362         }
0363     }
0364     std::string str = ss.str(); 
0365     return str ; 
0366 }
0367