File indexing completed on 2026-04-09 07:49:11
0001 #include <sstream>
0002 #include <cuda_runtime.h>
0003
0004 #include "SLOG.hh"
0005 #include "SSys.hh"
0006 #include "scuda.h"
0007 #include "NP.hh"
0008
0009 #include "QUDA_CHECK.h"
0010 #include "QTex.hh"
0011 #include "QTexLookup.hh"
0012
0013 template<typename T>
0014 const plog::Severity QTexLookup<T>::LEVEL = SLOG::EnvLevel("QTexLookup", "DEBUG") ;
0015
0016
0017 template<typename T>
0018 NP* QTexLookup<T>::Look( const QTex<T>* tex_ )
0019 {
0020 QTexLookup<T> look(tex_) ;
0021 return look.lookup();
0022 }
0023
0024
0025 template<typename T>
0026 QTexLookup<T>::QTexLookup( const QTex<T>* tex_ )
0027 :
0028 tex(tex_)
0029 {
0030 }
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043 template<typename T> NP* QTexLookup<T>::lookup()
0044 {
0045 unsigned width = tex->width ;
0046 unsigned height = tex->height ;
0047 unsigned num_lookup = width*height ;
0048
0049 bool is_float4 = sizeof(T) == 4*sizeof(float);
0050
0051 NP* out = NP::Make<float>(height, width, is_float4 ? 4 : 1 ) ;
0052 float* out_v = out->values<float>();
0053
0054 lookup_( (T*)out_v , num_lookup, width, height );
0055
0056 return out ;
0057 }
0058
0059
0060
0061 template <typename T>
0062 extern void QTexLookup_lookup(dim3 numBlocks, dim3 threadsPerBlock, cudaTextureObject_t texObj, quad4* meta, T* lookup, unsigned num_lookup, unsigned width, unsigned height );
0063
0064
0065 template<typename T>
0066 void QTexLookup<T>::lookup_( T* lookup, unsigned num_lookup, unsigned width, unsigned height )
0067 {
0068 LOG(LEVEL) << "[" ;
0069 size_t size = width*height*sizeof(T) ;
0070
0071 LOG(LEVEL)
0072 << " num_lookup " << num_lookup
0073 << " width " << width
0074 << " height " << height
0075 << " size " << size
0076 << " tex->texObj " << tex->texObj
0077 << " tex->meta " << tex->meta
0078 << " tex->d_meta " << tex->d_meta
0079 ;
0080
0081 dim3 numBlocks ;
0082 dim3 threadsPerBlock ;
0083 configureLaunch( numBlocks, threadsPerBlock, width, height );
0084
0085 T* d_lookup = nullptr ;
0086 QUDA_CHECK( cudaMalloc(reinterpret_cast<void**>( &d_lookup ), size ));
0087
0088 QTexLookup_lookup<T>(numBlocks, threadsPerBlock, tex->texObj, tex->d_meta, (T*)d_lookup, num_lookup, width, height );
0089
0090 QUDA_CHECK( cudaMemcpy(reinterpret_cast<void*>( lookup ), d_lookup, size, cudaMemcpyDeviceToHost ));
0091 QUDA_CHECK( cudaFree(d_lookup) );
0092
0093 cudaDeviceSynchronize();
0094
0095 LOG(LEVEL) << "]" ;
0096 }
0097
0098
0099
0100 template<typename T>
0101 void QTexLookup<T>::configureLaunch( dim3& numBlocks, dim3& threadsPerBlock, unsigned width, unsigned height )
0102 {
0103 threadsPerBlock.x = 16 ;
0104 threadsPerBlock.y = 16 ;
0105 threadsPerBlock.z = 1 ;
0106
0107 numBlocks.x = (width + threadsPerBlock.x - 1) / threadsPerBlock.x ;
0108 numBlocks.y = (height + threadsPerBlock.y - 1) / threadsPerBlock.y ;
0109 numBlocks.z = 1 ;
0110
0111 LOG(LEVEL)
0112 << " width " << std::setw(7) << width
0113 << " height " << std::setw(7) << height
0114 << " width*height " << std::setw(7) << width*height
0115 << " threadsPerBlock"
0116 << "("
0117 << std::setw(3) << threadsPerBlock.x << " "
0118 << std::setw(3) << threadsPerBlock.y << " "
0119 << std::setw(3) << threadsPerBlock.z << " "
0120 << ")"
0121 << " numBlocks "
0122 << "("
0123 << std::setw(3) << numBlocks.x << " "
0124 << std::setw(3) << numBlocks.y << " "
0125 << std::setw(3) << numBlocks.z << " "
0126 << ")"
0127 ;
0128 }
0129
0130
0131 template struct QUDARAP_API QTexLookup<float4> ;
0132 template struct QUDARAP_API QTexLookup<float> ;
0133
0134
0135