Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:11

0001 #include <sstream>
0002 #include <cuda_runtime.h>
0003 
0004 #include "SLOG.hh"
0005 #include "SSys.hh"
0006 #include "scuda.h"
0007 #include "NP.hh"
0008 
0009 #include "QUDA_CHECK.h"
0010 #include "QTex.hh"
0011 #include "QTexLookup.hh"
0012 
0013 template<typename T>
0014 const plog::Severity QTexLookup<T>::LEVEL = SLOG::EnvLevel("QTexLookup", "DEBUG") ; 
0015  
0016 
0017 template<typename T>
0018 NP* QTexLookup<T>::Look( const QTex<T>* tex_ )  // static 
0019 {
0020     QTexLookup<T> look(tex_) ; 
0021     return look.lookup(); 
0022 }
0023 
0024 
0025 template<typename T>
0026 QTexLookup<T>::QTexLookup( const QTex<T>* tex_ )
0027     :
0028     tex(tex_)
0029 {
0030 }
0031 
0032 /**
0033 QTexLookup::lookup
0034 --------------------
0035 
0036 First tried using float4 and float template specialization for this but gave linker errors. 
0037 Instead kludged *is_float4* from the size of the template type. 
0038 
0039 This needs a revisit if wish to get this working with uchar "image" textures.
0040 
0041 **/
0042 
0043 template<typename T> NP* QTexLookup<T>::lookup()
0044 {
0045     unsigned width = tex->width ; 
0046     unsigned height = tex->height ; 
0047     unsigned num_lookup = width*height ; 
0048 
0049     bool is_float4 = sizeof(T) == 4*sizeof(float); 
0050 
0051     NP* out = NP::Make<float>(height, width, is_float4 ? 4 : 1 ) ; 
0052     float* out_v = out->values<float>(); 
0053 
0054     lookup_( (T*)out_v , num_lookup, width, height ); 
0055 
0056     return out ; 
0057 }
0058 
0059 
0060 
0061 template <typename T>
0062 extern void QTexLookup_lookup(dim3 numBlocks, dim3 threadsPerBlock, cudaTextureObject_t texObj, quad4* meta, T* lookup, unsigned num_lookup, unsigned width, unsigned height  ); 
0063 
0064 
0065 template<typename T>
0066 void QTexLookup<T>::lookup_( T* lookup, unsigned num_lookup, unsigned width, unsigned height  )
0067 {
0068     LOG(LEVEL) << "[" ; 
0069     size_t size = width*height*sizeof(T) ; 
0070 
0071     LOG(LEVEL) 
0072         << " num_lookup " << num_lookup
0073         << " width " << width 
0074         << " height " << height
0075         << " size " << size 
0076         << " tex->texObj " << tex->texObj
0077         << " tex->meta " << tex->meta
0078         << " tex->d_meta " << tex->d_meta
0079         ; 
0080 
0081     dim3 numBlocks ; 
0082     dim3 threadsPerBlock ; 
0083     configureLaunch( numBlocks, threadsPerBlock, width, height ); 
0084   
0085     T* d_lookup = nullptr ;  
0086     QUDA_CHECK( cudaMalloc(reinterpret_cast<void**>( &d_lookup ), size )); 
0087 
0088     QTexLookup_lookup<T>(numBlocks, threadsPerBlock, tex->texObj, tex->d_meta, (T*)d_lookup, num_lookup, width, height );  
0089 
0090     QUDA_CHECK( cudaMemcpy(reinterpret_cast<void*>( lookup ), d_lookup, size, cudaMemcpyDeviceToHost )); 
0091     QUDA_CHECK( cudaFree(d_lookup) ); 
0092 
0093     cudaDeviceSynchronize();
0094 
0095     LOG(LEVEL) << "]" ; 
0096 }
0097 
0098 
0099 
0100 template<typename T>
0101 void QTexLookup<T>::configureLaunch( dim3& numBlocks, dim3& threadsPerBlock, unsigned width, unsigned height )
0102 {
0103     threadsPerBlock.x = 16 ; 
0104     threadsPerBlock.y = 16 ; 
0105     threadsPerBlock.z = 1 ; 
0106  
0107     numBlocks.x = (width + threadsPerBlock.x - 1) / threadsPerBlock.x ; 
0108     numBlocks.y = (height + threadsPerBlock.y - 1) / threadsPerBlock.y ;
0109     numBlocks.z = 1 ; 
0110 
0111     LOG(LEVEL) 
0112         << " width " << std::setw(7) << width 
0113         << " height " << std::setw(7) << height 
0114         << " width*height " << std::setw(7) << width*height 
0115         << " threadsPerBlock"
0116         << "(" 
0117         << std::setw(3) << threadsPerBlock.x << " " 
0118         << std::setw(3) << threadsPerBlock.y << " " 
0119         << std::setw(3) << threadsPerBlock.z << " "
0120         << ")" 
0121         << " numBlocks "
0122         << "(" 
0123         << std::setw(3) << numBlocks.x << " " 
0124         << std::setw(3) << numBlocks.y << " " 
0125         << std::setw(3) << numBlocks.z << " "
0126         << ")" 
0127         ;
0128 }
0129 
0130 
0131 template struct QUDARAP_API QTexLookup<float4> ; 
0132 template struct QUDARAP_API QTexLookup<float> ; 
0133 
0134 
0135