Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:18

0001 // ~/o/sysrap/tests/SIMGStandaloneTest.sh
0002 
0003 #include <vector_types.h>
0004 #include <vector_functions.h>
0005 #include <cuda_runtime.h>
0006 
0007 #include <iostream>
0008 #define SIMG_IMPLEMENTATION 1 
0009 #include "SIMG.h"
0010 
0011 // https://stackoverflow.com/questions/14901491/cudamemcpytoarray/14929827#14929827
0012 
0013 #include <stdio.h>
0014 #define cudaCheckErrors(msg) \
0015     do { \
0016         cudaError_t __err = cudaGetLastError(); \
0017         if (__err != cudaSuccess) { \
0018             fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
0019                 msg, cudaGetErrorString(__err), \
0020                 __FILE__, __LINE__); \
0021             fprintf(stderr, "*** FAILED - ABORTING\n"); \
0022             exit(1); \
0023         } \
0024     } while (0)
0025 
0026 
0027 
0028 __global__ void colorKernel(uchar4* output, cudaTextureObject_t texObj, int width, int height, float theta) {
0029     unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
0030     unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
0031 
0032     //if( x % 1000 == 0 ) printf("x %d y %d \n", x, y ); 
0033     //if( x == 1000 ) printf("x %d y %d \n", x, y ); 
0034 
0035     output[y * width + x] = make_uchar4( 255u, 0u, 0u, 255u ); 
0036 }
0037 
0038 
0039 
0040 __global__ void transformKernel(uchar4* output, cudaTextureObject_t texObj, int width, int height, float theta) {
0041     unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
0042     unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
0043     
0044     float u = x / (float) width;  // 0. -> 1. 
0045     float v = y / (float) height;
0046 
0047     // shift origin to center of image
0048     u -= 0.5f;                   //  -0.5 -> 0.5 
0049     v -= 0.5f;
0050 
0051     // rotate around the center
0052     float tu = u * cosf(theta) - v * sinf(theta) ;
0053     float tv = v * cosf(theta) + u * sinf(theta) ;
0054 
0055     // read from the texture  
0056     uchar4 c = tex2D<uchar4>(texObj, tu+0.5f, tv+0.5f); 
0057 
0058     //if( c.x != 0 ) printf(" c ( %d %d %d %d ) \n",c.x, c.y, c.z, c.w );  
0059     //c.x = 255u ; 
0060     c.w = 255u ; 
0061 
0062     output[y * width + x] = c ;
0063 }
0064 
0065 int main(int argc, char** argv)
0066 {
0067     const char* ipath = argc > 1 ? argv[1] : "/tmp/i.png" ; 
0068     const char* opath = argc > 2 ? argv[2] : "/tmp/o.png" ; 
0069 
0070     SIMG img(ipath); 
0071     std::cout << img.desc() << std::endl ; 
0072     assert( img.channels == 4 ); 
0073 
0074     cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar4>();
0075 
0076     cudaArray *cuArray;
0077     cudaMallocArray(&cuArray, &channelDesc, img.width, img.height );
0078     cudaCheckErrors("cudaMallocArray");
0079 
0080     cudaMemcpyToArray(cuArray, 0, 0, img.data, img.width*img.height*4*sizeof(unsigned char), cudaMemcpyHostToDevice);
0081     cudaCheckErrors("cudaMemcpyToArray");
0082 
0083     struct cudaResourceDesc resDesc;
0084     memset(&resDesc, 0, sizeof(resDesc));
0085     resDesc.resType = cudaResourceTypeArray;
0086     resDesc.res.array.array = cuArray;
0087 
0088     // https://docs.nvidia.com/cuda/cuda-runtime-api/structcudaTextureDesc.html
0089     struct cudaTextureDesc texDesc;
0090     memset(&texDesc, 0, sizeof(texDesc));
0091     texDesc.addressMode[0] = cudaAddressModeWrap;
0092     texDesc.addressMode[1] = cudaAddressModeWrap;
0093 
0094     //texDesc.filterMode = cudaFilterModeLinear;
0095     texDesc.filterMode = cudaFilterModePoint;    // switch off interpolation, as that gives error with non-float texture  
0096 
0097     texDesc.readMode = cudaReadModeElementType;  // return data of the type of the underlying buffer
0098     texDesc.normalizedCoords = 1 ;            // addressing into the texture with floats in range 0:1
0099 
0100     // Create texture object
0101     cudaTextureObject_t texObj = 0;
0102     cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);
0103 
0104     // Allocate result of transformation in device memory
0105     uchar4* d_output;
0106     cudaMalloc(&d_output, img.width * img.height * 4*sizeof(unsigned char));
0107 
0108     dim3 dimBlock(16, 16);
0109     dim3 dimGrid((img.width + dimBlock.x - 1) / dimBlock.x, (img.height + dimBlock.y - 1) / dimBlock.y);
0110 
0111     float theta = 1.f ; 
0112 
0113     //colorKernel<<<dimGrid, dimBlock>>>(d_output, texObj, img.width, img.height, theta );
0114     transformKernel<<<dimGrid, dimBlock>>>(d_output, texObj, img.width, img.height, theta );
0115     cudaDeviceSynchronize();      
0116     cudaCheckErrors("cudaDeviceSynchronize"); 
0117     // Fatal error: cudaDeviceSynchronize (linear filtering not supported for non-float type at SIMGStandaloneTest.cu:123)
0118 
0119 
0120     uchar4* output = new uchar4[img.width*img.height] ; 
0121     cudaMemcpy(output, d_output, img.width*img.height*sizeof(uchar4), cudaMemcpyDeviceToHost);     
0122 
0123     std::cout << "writing to " << opath << std::endl ; 
0124 
0125     SIMG img2(img.width, img.height, img.channels, (unsigned char*)output ); 
0126     img2.writePNG(opath); 
0127 
0128     cudaDeviceSynchronize();  
0129 
0130     cudaDestroyTextureObject(texObj);
0131     cudaFreeArray(cuArray);
0132 
0133     delete[] output ; 
0134     cudaFree(d_output);
0135 
0136     return 0;
0137 }
0138