eic-opticks/CSG/CU.cc

0001 #include <iostream>
0002
0003 #include "scuda.h"
0004 #include "squad.h"
0005 #include "sqat4.h"
0006
0007 #include "cuda_runtime.h"
0008 #include "CUDA_CHECK.h"
0009
0010 #ifdef WITH_SLOG
0011 #include "SLOG.hh"
0012 #endif
0013
0014 #include "CSGSolid.h"
0015 #include "CSGPrim.h"
0016 #include "CSGNode.h"
0017 #include "CSGParams.h"
0018
0019
0020 #include "CU.h"
0021
0022 #ifdef WITH_SLOG
0023 const plog::Severity CU::LEVEL = SLOG::EnvLevel("CU","DEBUG");
0024 #endif
0025
0026
0027
0028
0029 template <typename T>
0030 T* CU::AllocArray(unsigned num_items ) // static
0031 {
0032 #ifdef WITH_SLOG
0033     LOG(LEVEL) << " num_items " << num_items  ;
0034 #endif
0035     T* d_array = nullptr ;
0036     CUDA_CHECK( cudaMalloc(reinterpret_cast<void**>( &d_array ), num_items*sizeof(T) ));
0037     return d_array ;
0038 }
0039
0040
0041
0042 /**
0043 CU::UploadArray
0044 ----------------
0045
0046 Allocate on device and copy from host to device
0047
0048 **/
0049 template <typename T>
0050 T* CU::UploadArray(const T* array, unsigned num_items ) // static
0051 {
0052 #ifdef WITH_SLOG
0053     LOG(LEVEL) << " num_items " << num_items  ;
0054 #endif
0055     T* d_array = nullptr ;
0056     CUDA_CHECK( cudaMalloc(reinterpret_cast<void**>( &d_array ), num_items*sizeof(T) ));
0057     CUDA_CHECK( cudaMemcpy(reinterpret_cast<void*>( d_array ), array, sizeof(T)*num_items, cudaMemcpyHostToDevice ));
0058     return d_array ;
0059 }
0060
0061
0062 /**
0063 CU::UploadArray
0064 ----------------
0065
0066 Allocate on host and copy from device to host
0067
0068 **/
0069
0070 template <typename T>
0071 T* CU::DownloadArray(const T* d_array, unsigned num_items ) // static
0072 {
0073 #ifdef WITH_SLOG
0074     LOG(LEVEL) << " num_items " << num_items  ;
0075 #endif
0076     T* array = new T[num_items] ;
0077     CUDA_CHECK( cudaMemcpy( array, d_array, sizeof(T)*num_items, cudaMemcpyDeviceToHost ));
0078     return array ;
0079 }
0080
0081
0082 template CSG_API float* CU::UploadArray<float>(const float* array, unsigned num_items) ;
0083 template CSG_API float* CU::DownloadArray<float>(const float* d_array, unsigned num_items) ;
0084
0085 template CSG_API unsigned* CU::UploadArray<unsigned>(const unsigned* array, unsigned num_items) ;
0086 template CSG_API unsigned* CU::DownloadArray<unsigned>(const unsigned* d_array, unsigned num_items) ;
0087
0088 template CSG_API float4* CU::UploadArray<float4>(const float4* array, unsigned num_items) ;
0089 template CSG_API float4* CU::DownloadArray<float4>(const float4* d_array, unsigned num_items) ;
0090
0091 template CSG_API CSGParams* CU::UploadArray<CSGParams>(const CSGParams* d_array, unsigned num_items) ;
0092 template CSG_API CSGNode* CU::UploadArray<CSGNode>(const CSGNode* d_array, unsigned num_items) ;
0093 template CSG_API CSGNode* CU::DownloadArray<CSGNode>(const CSGNode* d_array, unsigned num_items) ;
0094
0095 template CSG_API quad4* CU::AllocArray<quad4>(unsigned num_items) ;
0096 template CSG_API quad4* CU::UploadArray<quad4>(const quad4* d_array, unsigned num_items) ;
0097 template CSG_API quad4* CU::DownloadArray<quad4>(const quad4* d_array, unsigned num_items) ;
0098
0099 template CSG_API qat4* CU::UploadArray<qat4>(const qat4* d_array, unsigned num_items) ;
0100 template CSG_API qat4* CU::DownloadArray<qat4>(const qat4* d_array, unsigned num_items) ;
0101
0102 template CSG_API CSGPrim* CU::UploadArray<CSGPrim>(const CSGPrim* d_array, unsigned num_items) ;
0103 template CSG_API CSGPrim* CU::DownloadArray<CSGPrim>(const CSGPrim* d_array, unsigned num_items) ;
0104
0105 template CSG_API CSGSolid* CU::UploadArray<CSGSolid>(const CSGSolid* d_array, unsigned num_items) ;
0106 template CSG_API CSGSolid* CU::DownloadArray<CSGSolid>(const CSGSolid* d_array, unsigned num_items) ;
0107
0108
0109
0110
0111
0112 template <typename T>
0113 T* CU::UploadVec(const std::vector<T>& vec)
0114 {
0115     unsigned num_items = vec.size() ;
0116     unsigned num_bytes = num_items*sizeof(T) ;
0117 #ifdef WITH_SLOG
0118     LOG(LEVEL) << " num_items " << num_items  ;
0119 #endif
0120     T* d_array = nullptr ;
0121     CUDA_CHECK( cudaMalloc(reinterpret_cast<void**>( &d_array ), num_bytes ));
0122     CUDA_CHECK( cudaMemcpy(reinterpret_cast<void*>( d_array ), vec.data(), num_bytes, cudaMemcpyHostToDevice ));
0123     return d_array ;
0124 }
0125
0126 template CSG_API CSGPrim* CU::UploadVec<CSGPrim>(const std::vector<CSGPrim>& vec ) ;
0127 template CSG_API float*     CU::UploadVec<float>(const std::vector<float>& vec ) ;
0128 template CSG_API unsigned*  CU::UploadVec<unsigned>(const std::vector<unsigned>& vec ) ;
0129
0130
0131 template <typename T>
0132 void CU::DownloadVec(std::vector<T>& vec, const T* d_array, unsigned num_items)  // static
0133 {
0134 #ifdef WITH_SLOG
0135     LOG(LEVEL) << " num_items " << num_items ;
0136 #endif
0137     unsigned num_bytes = num_items*sizeof(T) ;
0138     vec.clear();
0139     vec.resize(num_items);
0140     CUDA_CHECK( cudaMemcpy( vec.data(), d_array, num_bytes, cudaMemcpyDeviceToHost ));
0141 }
0142
0143 template CSG_API void CU::DownloadVec<CSGPrim>(std::vector<CSGPrim>& vec,  const CSGPrim* d_array, unsigned num_items) ;
0144 template CSG_API void CU::DownloadVec<float>(std::vector<float>& vec,  const float* d_array, unsigned num_items) ;
0145 template CSG_API void CU::DownloadVec<unsigned>(std::vector<unsigned>& vec,  const unsigned* d_array, unsigned num_items) ;
0146
0147
0148
0149
0150 void CU::ConfigureLaunch1D( dim3& numBlocks, dim3& threadsPerBlock, unsigned num, unsigned threads_per_block ) // static
0151 {
0152     threadsPerBlock.x = threads_per_block ;
0153     threadsPerBlock.y = 1 ;
0154     threadsPerBlock.z = 1 ;
0155
0156     numBlocks.x = (num + threadsPerBlock.x - 1) / threadsPerBlock.x ;
0157     numBlocks.y = 1 ;
0158     numBlocks.z = 1 ;
0159 }
0160
0161
0162