Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:11

0001 
0002 #include <sstream>
0003 #include <cstring>
0004 #include <cassert>
0005 #include <iostream>
0006 
0007 #include "scuda.h"
0008 #include "squad.h"
0009 #include "NP.hh"
0010 
0011 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0012 #include "stexture.h"
0013 #else
0014 #include <cuda_runtime.h>
0015 #include "cudaCheckErrors.h"
0016 #include "QU.hh"
0017 #endif
0018 
0019 #include "QTex.hh"
0020 
0021 
0022 template<typename T>
0023 QTex<T>::QTex(size_t width_, size_t height_ , const void* src_, char filterMode_, bool normalizedCoords_, const NP* a_  )
0024     :   
0025     width(width_),
0026     height(height_),
0027     src(src_),
0028     filterMode(filterMode_),
0029     normalizedCoords(normalizedCoords_), 
0030     origin(nullptr),
0031     a(a_),
0032 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0033 #else
0034     cuArray(nullptr),
0035     channelDesc(cudaCreateChannelDesc<T>()),
0036 #endif
0037     texObj(0),
0038     meta(new quad4),
0039     d_meta(nullptr)
0040 {
0041     init(); 
0042 }
0043 
0044 template<typename T>
0045 void QTex<T>::setOrigin(const void* origin_) 
0046 {
0047     origin = origin_  ; 
0048 }
0049 template<typename T>
0050 const void* QTex<T>::getOrigin() const  
0051 {
0052     return origin ; 
0053 }
0054 
0055 template<typename T>
0056 void QTex<T>::setHDFactor(unsigned hd_factor) 
0057 {
0058     meta->q0.u.w = hd_factor ; 
0059 }
0060 
0061 template<typename T>
0062 unsigned QTex<T>::getHDFactor() const 
0063 {
0064     return meta->q0.u.w ; 
0065 }
0066 
0067 template<typename T>
0068 char QTex<T>::getFilterMode() const 
0069 {
0070     return filterMode ; 
0071 }
0072 
0073 template<typename T>
0074 bool QTex<T>::getNormalizedCoords() const 
0075 {
0076     return normalizedCoords ; 
0077 }
0078 
0079 
0080 template<typename T>
0081 QTex<T>::~QTex()
0082 {
0083 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0084 #else
0085     cudaDestroyTextureObject(texObj);
0086     cudaFreeArray(cuArray);
0087 #endif
0088 }
0089 
0090 template<typename T>
0091 void QTex<T>::init()
0092 {
0093 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0094     assert(a); 
0095     MockTextureManager::Add(a) ; 
0096 #else
0097     createArray();   // cudaMallocArray using channelDesc for T 
0098     uploadToArray();
0099     createTextureObject();
0100 #endif
0101 
0102     meta->q0.u.x = width ; 
0103     meta->q0.u.y = height ; 
0104     meta->q0.u.z = 0 ; 
0105     meta->q0.u.w = 0 ; 
0106 }
0107 
0108 
0109 template<typename T>
0110 void QTex<T>::setMetaDomainX( const quad* domx )
0111 {
0112     meta->q1.f.x = domx->f.x ; 
0113     meta->q1.f.y = domx->f.y ; 
0114     meta->q1.f.z = domx->f.z ; 
0115     meta->q1.f.w = domx->f.w ; 
0116 }
0117 
0118 template<typename T>
0119 void QTex<T>::setMetaDomainY( const quad* domy )
0120 {
0121     meta->q2.f.x = domy->f.x ; 
0122     meta->q2.f.y = domy->f.y ; 
0123     meta->q2.f.z = domy->f.z ; 
0124     meta->q2.f.w = domy->f.w ; 
0125 }
0126 
0127 
0128 /**
0129 QTex:uploadMeta
0130 ------------------
0131 
0132 Not doing this automatically as will need to add some more meta 
0133 
0134 **/
0135 
0136 template<typename T>
0137 void QTex<T>::uploadMeta()
0138 {
0139 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0140     d_meta = meta ; 
0141 #else
0142     d_meta = QU::UploadArray<quad4>(meta, 1, "QTex::uploadMeta" );  
0143 #endif
0144 }
0145 
0146 
0147 
0148 
0149 
0150 template<typename T>
0151 std::string QTex<T>::desc() const
0152 {
0153     std::stringstream ss ; 
0154 
0155     ss << "QTex"
0156        << " width " << width 
0157        << " height " << height 
0158        << " texObj " << texObj
0159        << " meta " << meta
0160        << " d_meta " << d_meta
0161        ;
0162 
0163     std::string s = ss.str(); 
0164     return s ; 
0165 }
0166 
0167 
0168 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0169 #else
0170 
0171 template<typename T>
0172 void QTex<T>::createArray()
0173 {
0174     cudaMallocArray(&cuArray, &channelDesc, width, height );
0175     cudaCheckErrors("cudaMallocArray");
0176 }
0177 
0178 /**
0179 QTex::uploadToArray
0180 ----------------------
0181 
0182 ::
0183 
0184     cudaError_t 
0185     cudaMemcpy2DToArray(
0186        struct cudaArray* dst, 
0187        size_t wOffset, 
0188        size_t hOffset, 
0189        const void* src, 
0190        size_t spitch, 
0191        size_t width, 
0192        size_t height, 
0193        enum cudaMemcpyKind kind) 
0194 
0195 Copies a matrix (height rows of width bytes each) from the memory area pointed to by src 
0196 to the CUDA array dst starting at the upper left corner (wOffset, hOffset) where kind is one of 
0197 cudaMemcpyHostToHost, cudaMemcpyHostToDevice, cudaMemcpyDeviceToHost, or cudaMemcpyDeviceToDevice,
0198 and specifies the direction of the copy. 
0199 spitch is the width in memory in bytes of the 2D array pointed to by src, 
0200 including any padding added to the end of each row. 
0201 wOffset + width must not exceed the width of the CUDA array dst. 
0202 width must not exceed spitch. 
0203 
0204 cudaMemcpy2DToArray() returns an error if spitch exceeds the maximum allowed.
0205 
0206 dst - Destination memory address 
0207 wOffset - Destination starting X offset
0208 hOffset - Destination starting Y offset
0209 src - Source memory address
0210 spitch - Pitch of source memory
0211 width - Width of matrix transfer (columns in bytes) 
0212 height - Height of matrix transfer (rows)
0213 kind - Type of transfer
0214 
0215 
0216 * https://forums.developer.nvidia.com/t/cudamemcpytoarray-is-deprecated/71385/10
0217 
0218 **/
0219 
0220 template<typename T>
0221 void QTex<T>::uploadToArray()
0222 {
0223     cudaArray_t dst = cuArray ;
0224     size_t wOffset = 0 ;
0225     size_t hOffset = 0 ;
0226     cudaMemcpyKind kind = cudaMemcpyHostToDevice ;
0227 
0228     size_t spitch = width*sizeof(T);  
0229     size_t width_bytes = width*sizeof(T); 
0230     size_t height_rows = height ; 
0231 
0232     cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width_bytes, height_rows, kind );
0233 
0234     cudaCheckErrors("cudaMemcpy2DToArray");
0235 }
0236 
0237 
0238 
0239 /**
0240 
0241 normalized:false
0242    means texel coordinate addressing 
0243 
0244 normalized:true
0245    eg reemission generation need normalized
0246 
0247 **/
0248 
0249 template<typename T>
0250 void QTex<T>::createTextureObject()
0251 {
0252     struct cudaResourceDesc resDesc;
0253     memset(&resDesc, 0, sizeof(resDesc));
0254     resDesc.resType = cudaResourceTypeArray;
0255     resDesc.res.array.array = cuArray;
0256 
0257     // https://docs.nvidia.com/cuda/cuda-runtime-api/structcudaTextureDesc.html
0258     struct cudaTextureDesc texDesc;
0259     memset(&texDesc, 0, sizeof(texDesc));
0260     texDesc.addressMode[0] = cudaAddressModeWrap;
0261     texDesc.addressMode[1] = cudaAddressModeWrap;
0262 
0263     assert( filterMode == 'P' || filterMode == 'L' ); 
0264     switch(filterMode)
0265     {
0266         case 'L': texDesc.filterMode = cudaFilterModeLinear ; break ; 
0267         case 'P': texDesc.filterMode = cudaFilterModePoint  ; break ;  
0268         // cudaFilterModePoint: switches off interpolation, necessary with char texture  
0269     }
0270 
0271     texDesc.readMode = cudaReadModeElementType;  // return data of the type of the underlying buffer
0272     texDesc.normalizedCoords = normalizedCoords ;   // addressing into the texture with floats in range 0:1 when true
0273 
0274     // Create texture object
0275     cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);
0276 }
0277 
0278 #endif
0279 
0280 
0281 
0282 
0283 
0284 
0285 /**
0286 https://developer.nvidia.com/blog/cuda-refresher-cuda-programming-model/
0287 
0288 A group of threads is called a CUDA block. 
0289 Each CUDA block is executed by one streaming multiprocessor.
0290 CUDA architecture limits the numbers of threads per block (1024 threads per block limit).
0291 
0292 CUDA blocks are grouped into a grid. 
0293 A kernel is executed as a grid of blocks of threads::
0294 
0295     unsigned x = blockIdx.x * blockDim.x + threadIdx.x;
0296     unsigned y = blockIdx.y * blockDim.y + threadIdx.y;
0297 
0298 The below *numBlocks* divides by the *threadsPerBlock* to give sufficient threads to cover the workspace, 
0299 potentially with some spare threads at edge when workspace is not an exact multiple of threadsPerBlock size.
0300 
0301 **/
0302 
0303 // API export is essential on this template struct, otherwise get all symbols missing 
0304 template struct QUDARAP_API QTex<uchar4>;
0305 
0306 #pragma GCC diagnostic push
0307 #pragma GCC diagnostic ignored "-Wattributes"
0308 // quell warning: type attributes ignored after type is already defined [-Wattributes]
0309 template struct QUDARAP_API QTex<float>;
0310 template struct QUDARAP_API QTex<float4>;
0311 #pragma GCC diagnostic pop
0312 
0313 
0314 //////////////////////////////////////////////////////////////////////////////////////////////
0315 //////////////////////////////////////////////////////////////////////////////////////////////
0316 //////////////////////////////////////////////////////////////////////////////////////////////
0317 
0318