eic-opticks/qudarap/QTex.cc

0001
0002 #include <sstream>
0003 #include <cstring>
0004 #include <cassert>
0005 #include <iostream>
0006
0007 #include "scuda.h"
0008 #include "squad.h"
0009 #include "NP.hh"
0010
0011 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0012 #include "stexture.h"
0013 #else
0014 #include <cuda_runtime.h>
0015 #include "cudaCheckErrors.h"
0016 #include "QU.hh"
0017 #endif
0018
0019 #include "QTex.hh"
0020
0021
0022 template<typename T>
0023 QTex<T>::QTex(size_t width_, size_t height_ , const void* src_, char filterMode_, bool normalizedCoords_, const NP* a_  )
0024     :
0025     width(width_),
0026     height(height_),
0027     src(src_),
0028     filterMode(filterMode_),
0029     normalizedCoords(normalizedCoords_),
0030     origin(nullptr),
0031     a(a_),
0032 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0033 #else
0034     cuArray(nullptr),
0035     channelDesc(cudaCreateChannelDesc<T>()),
0036 #endif
0037     texObj(0),
0038     meta(new quad4),
0039     d_meta(nullptr)
0040 {
0041     init();
0042 }
0043
0044 template<typename T>
0045 void QTex<T>::setOrigin(const void* origin_)
0046 {
0047     origin = origin_  ;
0048 }
0049 template<typename T>
0050 const void* QTex<T>::getOrigin() const
0051 {
0052     return origin ;
0053 }
0054
0055 template<typename T>
0056 void QTex<T>::setHDFactor(unsigned hd_factor)
0057 {
0058     meta->q0.u.w = hd_factor ;
0059 }
0060
0061 template<typename T>
0062 unsigned QTex<T>::getHDFactor() const
0063 {
0064     return meta->q0.u.w ;
0065 }
0066
0067 template<typename T>
0068 char QTex<T>::getFilterMode() const
0069 {
0070     return filterMode ;
0071 }
0072
0073 template<typename T>
0074 bool QTex<T>::getNormalizedCoords() const
0075 {
0076     return normalizedCoords ;
0077 }
0078
0079
0080 template<typename T>
0081 QTex<T>::~QTex()
0082 {
0083 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0084 #else
0085     cudaDestroyTextureObject(texObj);
0086     cudaFreeArray(cuArray);
0087 #endif
0088 }
0089
0090 template<typename T>
0091 void QTex<T>::init()
0092 {
0093 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0094     assert(a);
0095     MockTextureManager::Add(a) ;
0096 #else
0097     createArray();   // cudaMallocArray using channelDesc for T
0098     uploadToArray();
0099     createTextureObject();
0100 #endif
0101
0102     meta->q0.u.x = width ;
0103     meta->q0.u.y = height ;
0104     meta->q0.u.z = 0 ;
0105     meta->q0.u.w = 0 ;
0106 }
0107
0108
0109 template<typename T>
0110 void QTex<T>::setMetaDomainX( const quad* domx )
0111 {
0112     meta->q1.f.x = domx->f.x ;
0113     meta->q1.f.y = domx->f.y ;
0114     meta->q1.f.z = domx->f.z ;
0115     meta->q1.f.w = domx->f.w ;
0116 }
0117
0118 template<typename T>
0119 void QTex<T>::setMetaDomainY( const quad* domy )
0120 {
0121     meta->q2.f.x = domy->f.x ;
0122     meta->q2.f.y = domy->f.y ;
0123     meta->q2.f.z = domy->f.z ;
0124     meta->q2.f.w = domy->f.w ;
0125 }
0126
0127
0128 /**
0129 QTex:uploadMeta
0130 ------------------
0131
0132 Not doing this automatically as will need to add some more meta
0133
0134 **/
0135
0136 template<typename T>
0137 void QTex<T>::uploadMeta()
0138 {
0139 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0140     d_meta = meta ;
0141 #else
0142     d_meta = QU::UploadArray<quad4>(meta, 1, "QTex::uploadMeta" );
0143 #endif
0144 }
0145
0146
0147
0148
0149
0150 template<typename T>
0151 std::string QTex<T>::desc() const
0152 {
0153     std::stringstream ss ;
0154
0155     ss << "QTex"
0156        << " width " << width
0157        << " height " << height
0158        << " texObj " << texObj
0159        << " meta " << meta
0160        << " d_meta " << d_meta
0161        ;
0162
0163     std::string s = ss.str();
0164     return s ;
0165 }
0166
0167
0168 #if defined(MOCK_TEXTURE) || defined(MOCK_CUDA)
0169 #else
0170
0171 template<typename T>
0172 void QTex<T>::createArray()
0173 {
0174     cudaMallocArray(&cuArray, &channelDesc, width, height );
0175     cudaCheckErrors("cudaMallocArray");
0176 }
0177
0178 /**
0179 QTex::uploadToArray
0180 ----------------------
0181
0182 ::
0183
0184     cudaError_t
0185     cudaMemcpy2DToArray(
0186        struct cudaArray* dst,
0187        size_t wOffset,
0188        size_t hOffset,
0189        const void* src,
0190        size_t spitch,
0191        size_t width,
0192        size_t height,
0193        enum cudaMemcpyKind kind)
0194
0195 Copies a matrix (height rows of width bytes each) from the memory area pointed to by src
0196 to the CUDA array dst starting at the upper left corner (wOffset, hOffset) where kind is one of
0197 cudaMemcpyHostToHost, cudaMemcpyHostToDevice, cudaMemcpyDeviceToHost, or cudaMemcpyDeviceToDevice,
0198 and specifies the direction of the copy.
0199 spitch is the width in memory in bytes of the 2D array pointed to by src,
0200 including any padding added to the end of each row.
0201 wOffset + width must not exceed the width of the CUDA array dst.
0202 width must not exceed spitch.
0203
0204 cudaMemcpy2DToArray() returns an error if spitch exceeds the maximum allowed.
0205
0206 dst - Destination memory address
0207 wOffset - Destination starting X offset
0208 hOffset - Destination starting Y offset
0209 src - Source memory address
0210 spitch - Pitch of source memory
0211 width - Width of matrix transfer (columns in bytes)
0212 height - Height of matrix transfer (rows)
0213 kind - Type of transfer
0214
0215
0216 * https://forums.developer.nvidia.com/t/cudamemcpytoarray-is-deprecated/71385/10
0217
0218 **/
0219
0220 template<typename T>
0221 void QTex<T>::uploadToArray()
0222 {
0223     cudaArray_t dst = cuArray ;
0224     size_t wOffset = 0 ;
0225     size_t hOffset = 0 ;
0226     cudaMemcpyKind kind = cudaMemcpyHostToDevice ;
0227
0228     size_t spitch = width*sizeof(T);
0229     size_t width_bytes = width*sizeof(T);
0230     size_t height_rows = height ;
0231
0232     cudaMemcpy2DToArray(dst, wOffset, hOffset, src, spitch, width_bytes, height_rows, kind );
0233
0234     cudaCheckErrors("cudaMemcpy2DToArray");
0235 }
0236
0237
0238
0239 /**
0240
0241 normalized:false
0242    means texel coordinate addressing
0243
0244 normalized:true
0245    eg reemission generation need normalized
0246
0247 **/
0248
0249 template<typename T>
0250 void QTex<T>::createTextureObject()
0251 {
0252     struct cudaResourceDesc resDesc;
0253     memset(&resDesc, 0, sizeof(resDesc));
0254     resDesc.resType = cudaResourceTypeArray;
0255     resDesc.res.array.array = cuArray;
0256
0257     // https://docs.nvidia.com/cuda/cuda-runtime-api/structcudaTextureDesc.html
0258     struct cudaTextureDesc texDesc;
0259     memset(&texDesc, 0, sizeof(texDesc));
0260     texDesc.addressMode[0] = cudaAddressModeWrap;
0261     texDesc.addressMode[1] = cudaAddressModeWrap;
0262
0263     assert( filterMode == 'P' || filterMode == 'L' );
0264     switch(filterMode)
0265     {
0266         case 'L': texDesc.filterMode = cudaFilterModeLinear ; break ;
0267         case 'P': texDesc.filterMode = cudaFilterModePoint  ; break ;
0268         // cudaFilterModePoint: switches off interpolation, necessary with char texture
0269     }
0270
0271     texDesc.readMode = cudaReadModeElementType;  // return data of the type of the underlying buffer
0272     texDesc.normalizedCoords = normalizedCoords ;   // addressing into the texture with floats in range 0:1 when true
0273
0274     // Create texture object
0275     cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);
0276 }
0277
0278 #endif
0279
0280
0281
0282
0283
0284
0285 /**
0286 https://developer.nvidia.com/blog/cuda-refresher-cuda-programming-model/
0287
0288 A group of threads is called a CUDA block.
0289 Each CUDA block is executed by one streaming multiprocessor.
0290 CUDA architecture limits the numbers of threads per block (1024 threads per block limit).
0291
0292 CUDA blocks are grouped into a grid.
0293 A kernel is executed as a grid of blocks of threads::
0294
0295     unsigned x = blockIdx.x * blockDim.x + threadIdx.x;
0296     unsigned y = blockIdx.y * blockDim.y + threadIdx.y;
0297
0298 The below *numBlocks* divides by the *threadsPerBlock* to give sufficient threads to cover the workspace,
0299 potentially with some spare threads at edge when workspace is not an exact multiple of threadsPerBlock size.
0300
0301 **/
0302
0303 // API export is essential on this template struct, otherwise get all symbols missing
0304 template struct QUDARAP_API QTex<uchar4>;
0305
0306 #pragma GCC diagnostic push
0307 #pragma GCC diagnostic ignored "-Wattributes"
0308 // quell warning: type attributes ignored after type is already defined [-Wattributes]
0309 template struct QUDARAP_API QTex<float>;
0310 template struct QUDARAP_API QTex<float4>;
0311 #pragma GCC diagnostic pop
0312
0313
0314 //////////////////////////////////////////////////////////////////////////////////////////////
0315 //////////////////////////////////////////////////////////////////////////////////////////////
0316 //////////////////////////////////////////////////////////////////////////////////////////////
0317
0318