File indexing completed on 2026-04-09 07:49:18
0001
0002
0003 #include <vector_types.h>
0004 #include <vector_functions.h>
0005 #include <cuda_runtime.h>
0006
0007 #include <iostream>
0008 #define SIMG_IMPLEMENTATION 1
0009 #include "SIMG.h"
0010
0011
0012
0013 #include <stdio.h>
0014 #define cudaCheckErrors(msg) \
0015 do { \
0016 cudaError_t __err = cudaGetLastError(); \
0017 if (__err != cudaSuccess) { \
0018 fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
0019 msg, cudaGetErrorString(__err), \
0020 __FILE__, __LINE__); \
0021 fprintf(stderr, "*** FAILED - ABORTING\n"); \
0022 exit(1); \
0023 } \
0024 } while (0)
0025
0026
0027
0028 __global__ void colorKernel(uchar4* output, cudaTextureObject_t texObj, int width, int height, float theta) {
0029 unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
0030 unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
0031
0032
0033
0034
0035 output[y * width + x] = make_uchar4( 255u, 0u, 0u, 255u );
0036 }
0037
0038
0039
0040 __global__ void transformKernel(uchar4* output, cudaTextureObject_t texObj, int width, int height, float theta) {
0041 unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
0042 unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
0043
0044 float u = x / (float) width;
0045 float v = y / (float) height;
0046
0047
0048 u -= 0.5f;
0049 v -= 0.5f;
0050
0051
0052 float tu = u * cosf(theta) - v * sinf(theta) ;
0053 float tv = v * cosf(theta) + u * sinf(theta) ;
0054
0055
0056 uchar4 c = tex2D<uchar4>(texObj, tu+0.5f, tv+0.5f);
0057
0058
0059
0060 c.w = 255u ;
0061
0062 output[y * width + x] = c ;
0063 }
0064
0065 int main(int argc, char** argv)
0066 {
0067 const char* ipath = argc > 1 ? argv[1] : "/tmp/i.png" ;
0068 const char* opath = argc > 2 ? argv[2] : "/tmp/o.png" ;
0069
0070 SIMG img(ipath);
0071 std::cout << img.desc() << std::endl ;
0072 assert( img.channels == 4 );
0073
0074 cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar4>();
0075
0076 cudaArray *cuArray;
0077 cudaMallocArray(&cuArray, &channelDesc, img.width, img.height );
0078 cudaCheckErrors("cudaMallocArray");
0079
0080 cudaMemcpyToArray(cuArray, 0, 0, img.data, img.width*img.height*4*sizeof(unsigned char), cudaMemcpyHostToDevice);
0081 cudaCheckErrors("cudaMemcpyToArray");
0082
0083 struct cudaResourceDesc resDesc;
0084 memset(&resDesc, 0, sizeof(resDesc));
0085 resDesc.resType = cudaResourceTypeArray;
0086 resDesc.res.array.array = cuArray;
0087
0088
0089 struct cudaTextureDesc texDesc;
0090 memset(&texDesc, 0, sizeof(texDesc));
0091 texDesc.addressMode[0] = cudaAddressModeWrap;
0092 texDesc.addressMode[1] = cudaAddressModeWrap;
0093
0094
0095 texDesc.filterMode = cudaFilterModePoint;
0096
0097 texDesc.readMode = cudaReadModeElementType;
0098 texDesc.normalizedCoords = 1 ;
0099
0100
0101 cudaTextureObject_t texObj = 0;
0102 cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);
0103
0104
0105 uchar4* d_output;
0106 cudaMalloc(&d_output, img.width * img.height * 4*sizeof(unsigned char));
0107
0108 dim3 dimBlock(16, 16);
0109 dim3 dimGrid((img.width + dimBlock.x - 1) / dimBlock.x, (img.height + dimBlock.y - 1) / dimBlock.y);
0110
0111 float theta = 1.f ;
0112
0113
0114 transformKernel<<<dimGrid, dimBlock>>>(d_output, texObj, img.width, img.height, theta );
0115 cudaDeviceSynchronize();
0116 cudaCheckErrors("cudaDeviceSynchronize");
0117
0118
0119
0120 uchar4* output = new uchar4[img.width*img.height] ;
0121 cudaMemcpy(output, d_output, img.width*img.height*sizeof(uchar4), cudaMemcpyDeviceToHost);
0122
0123 std::cout << "writing to " << opath << std::endl ;
0124
0125 SIMG img2(img.width, img.height, img.channels, (unsigned char*)output );
0126 img2.writePNG(opath);
0127
0128 cudaDeviceSynchronize();
0129
0130 cudaDestroyTextureObject(texObj);
0131 cudaFreeArray(cuArray);
0132
0133 delete[] output ;
0134 cudaFree(d_output);
0135
0136 return 0;
0137 }
0138