Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:32

0001 #pragma once
0002 /**
0003 SCU.h
0004 =======
0005 
0006 TODO: merge in SCU_.h 
0007 
0008 
0009 **/
0010 
0011 #include <cstddef>
0012 #include <cstdint>
0013 #include <vector_types.h>
0014 #include <ostream>
0015 #include <iomanip>
0016 
0017 #include <cuda.h>
0018 #include <cuda_runtime.h>
0019 
0020 #include "CUDA_CHECK.h"
0021 
0022 
0023 template <typename T>
0024 struct SCU_Buf
0025 {
0026    T*          ptr ; 
0027    size_t      num_item ;
0028    std::string name ; 
0029 
0030    CUdeviceptr pointer() const ; 
0031    void free() ; 
0032 
0033    std::string desc() const ; 
0034 };
0035 
0036 
0037 template <typename T>
0038 inline CUdeviceptr SCU_Buf<T>::pointer() const
0039 {
0040    return (CUdeviceptr)(uintptr_t) ptr ; 
0041 }
0042 
0043 template <typename T>
0044 inline void SCU_Buf<T>::free()
0045 {
0046     CUDA_CHECK( cudaFree( reinterpret_cast<void*>( ptr) ) );
0047     ptr = nullptr ; 
0048     num_item = 0 ; 
0049 }
0050 
0051 
0052 template <typename T>
0053 inline std::string SCU_Buf<T>::desc() const
0054 {
0055     std::stringstream ss ; 
0056     ss << "SCU_Buf"
0057        << " (uintptr_t)ptr  0x" 
0058        << std::setw(9) << std::hex << (uintptr_t)ptr << std::dec
0059        << " sizeof(T) " << std::setw(5) << sizeof(T)
0060        << " num_item "  << std::setw(7) << num_item 
0061        << " name " << name 
0062        ;
0063     std::string str = ss.str(); 
0064     return str ; 
0065 }
0066 
0067 
0068 
0069 
0070 
0071 struct SCU
0072 {
0073     template<typename T>
0074     static CUdeviceptr DevicePointerCast( const T* d_ptr ); 
0075 
0076     void _cudaMalloc( void** p2p, size_t size, const char* label ); 
0077 
0078     template<typename T>
0079     T* device_alloc( unsigned num_items, const char* label ); 
0080 
0081 
0082 
0083     template <typename T>
0084     static T* UploadArray(const T* array, size_t num_item ); 
0085 
0086     template <typename T>
0087     static SCU_Buf<T> UploadBuf(const T* array, size_t num_item, const char* name ); 
0088 
0089 
0090 
0091     template <typename T>
0092     static T* DownloadArray(const T* d_array, size_t num_item ); 
0093 
0094     template <typename T>
0095     static T* DownloadBuf(const SCU_Buf<T>& buf ); 
0096 
0097     template <typename T>
0098     static void DownloadVec(std::vector<T>& vec, const T* d_array, unsigned num_items); 
0099 
0100 
0101 
0102     template <typename T>
0103     static void FreeArray(T* d_array ); 
0104 
0105     template <typename T>
0106     static void FreeBuf(SCU_Buf<T>& buf ); 
0107 
0108 
0109 
0110     static void ConfigureLaunch2D( dim3& numBlocks, dim3& threadsPerBlock, int32_t width, int32_t height ); 
0111 };
0112 
0113 
0114 template<typename T>
0115 CUdeviceptr SCU::DevicePointerCast( const T* d_ptr ) // static
0116 {
0117     return (CUdeviceptr) (uintptr_t) d_ptr ; 
0118 }
0119 
0120 
0121 inline void SCU::_cudaMalloc( void** p2p, size_t size, const char* label )
0122 {
0123     cudaError_t err = cudaMalloc(p2p, size ) ;   
0124     if( err != cudaSuccess ) 
0125     {    
0126         std::stringstream ss; 
0127         ss << "CUDA call (" << label << " ) failed with error: '"    
0128            << cudaGetErrorString( err )    
0129            << "' (" __FILE__ << ":" << __LINE__ << ")\n"
0130            ;  
0131 
0132         const char* msg = ss.str().c_str() ;
0133         throw CUDA_Exception(msg);    
0134     }    
0135 }
0136 
0137 
0138 template<typename T>
0139 inline T* SCU::device_alloc( unsigned num_items, const char* label )
0140 {
0141     size_t size = num_items*sizeof(T) ; 
0142 
0143     T* d ;   
0144     _cudaMalloc( reinterpret_cast<void**>( &d ), size, label );  
0145 
0146     return d ; 
0147 }
0148 
0149 
0150 
0151 
0152 /**
0153 SCU::UploadArray
0154 -----------------
0155 
0156 Allocate on device and copy from host to device
0157 
0158 **/
0159 template <typename T>
0160 inline T* SCU::UploadArray(const T* array, size_t num_item ) // static
0161 {
0162     T* d_array = nullptr ; 
0163     CUDA_CHECK( cudaMalloc(reinterpret_cast<void**>( &d_array ), num_item*sizeof(T) )); 
0164     CUDA_CHECK( cudaMemcpy(reinterpret_cast<void*>( d_array ), array, sizeof(T)*num_item, cudaMemcpyHostToDevice )); 
0165     return d_array ; 
0166 }
0167 
0168 template <typename T>
0169 inline SCU_Buf<T> SCU::UploadBuf(const T* array, size_t num_item, const char* name )
0170 {
0171     T* d_array = UploadArray<T>(array, num_item ) ;   
0172     return { d_array, num_item, name } ; 
0173 }
0174 
0175 
0176 /**
0177 SCU::DownloadArray  
0178 -------------------
0179 
0180 Allocate on host and copy from device to host 
0181 
0182 **/
0183 
0184 template <typename T>
0185 inline T* SCU::DownloadArray(const T* d_array, size_t num_items ) // static
0186 {
0187     T* array = new T[num_items] ;
0188     CUDA_CHECK( cudaMemcpy( array, d_array, sizeof(T)*num_items, cudaMemcpyDeviceToHost ));
0189     return array ;
0190 }
0191 
0192 template <typename T>
0193 inline T* SCU::DownloadBuf(const SCU_Buf<T>& buf )
0194 {
0195     return DownloadArray<T>( buf.ptr, buf.num_item );     
0196 }
0197 
0198 
0199 /**
0200 SCU::DownloadVec
0201 -----------------
0202 
0203 After CU::DownloadVec
0204 
0205 **/
0206 
0207 template <typename T>
0208 inline void SCU::DownloadVec(std::vector<T>& vec, const T* d_array, unsigned num_items)  // static
0209 {
0210     unsigned num_bytes = num_items*sizeof(T) ; 
0211     vec.clear(); 
0212     vec.resize(num_items); 
0213     CUDA_CHECK( cudaMemcpy( vec.data(), d_array, num_bytes, cudaMemcpyDeviceToHost )); 
0214 } 
0215 
0216 template void SCU::DownloadVec<float>(   std::vector<float>&    vec,  const float* d_array,    unsigned num_items) ;
0217 template void SCU::DownloadVec<unsigned>(std::vector<unsigned>& vec,  const unsigned* d_array, unsigned num_items) ;
0218 
0219 
0220 
0221 
0222 
0223 
0224 template <typename T>
0225 inline void SCU::FreeArray(T* d_array ) // static
0226 {
0227     CUDA_CHECK( cudaFree( reinterpret_cast<void*>( d_array ) ) );
0228 } 
0229 
0230 template <typename T>
0231 inline void SCU::FreeBuf(SCU_Buf<T>& buf ) // static
0232 { 
0233     buf.free() ;  
0234 }
0235 
0236 
0237 
0238 
0239 
0240 
0241 
0242 
0243 
0244 
0245 
0246 inline void SCU::ConfigureLaunch2D( dim3& numBlocks, dim3& threadsPerBlock, int32_t width, int32_t height ) // static
0247 {
0248     threadsPerBlock.x = 16 ; 
0249     threadsPerBlock.y = 16 ; 
0250     threadsPerBlock.z = 1 ; 
0251  
0252     numBlocks.x = (width + threadsPerBlock.x - 1) / threadsPerBlock.x ; 
0253     numBlocks.y = (height + threadsPerBlock.y - 1) / threadsPerBlock.y ;
0254     numBlocks.z = 1 ; 
0255 }
0256 
0257 inline std::ostream& operator<<(std::ostream& os, const dim3& v)
0258 {
0259     int w = 6 ;
0260     os
0261        << "("
0262        << std::setw(w) << v.x
0263        << ","
0264        << std::setw(w) << v.y
0265        << ","
0266        << std::setw(w) << v.z
0267        << ") "
0268        ;
0269     return os;
0270 }
0271