Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:32

0001 #pragma once
0002 /**
0003 SCU_BufferView.h
0004 =================
0005 
0006 Able to *upload* data from multiple NP arrays into a 
0007 contiguous GPU side buffer and record item counts from each 
0008 input array into the item vector allowing access to the 
0009 GPU side device pointers for the separate input array data. 
0010 
0011 TODO: investigate alignment performance impact for different T: float/float3/float4
0012 with and without padding 
0013 
0014 **/
0015 
0016 #include "NP.hh"
0017 #include "CUDA_CHECK.h"
0018 
0019 template <typename T>
0020 struct SCU_BufferView
0021 {
0022     T* data = nullptr ; 
0023     std::vector<size_t> item ;   // HMM "value" more appropriate 
0024 
0025     void upload(   const std::vector<const NP*>& aa ); 
0026     void hostcopy( const std::vector<const NP*>& aa ); 
0027 
0028     std::string hostdump(size_t part) const ; 
0029     std::string hostdump() const ; 
0030 
0031     std::string devdump(size_t part) const ; 
0032     std::string devdump() const ; 
0033 
0034     size_t num_part() const ;
0035     size_t item_total() const ; 
0036     size_t item_offset(size_t part) const ; 
0037     size_t item_num(   size_t part) const ; 
0038 
0039     T* _pointer(size_t part) const ; 
0040     CUdeviceptr pointer(size_t part) const ; 
0041     void free() ; 
0042 
0043     std::string desc() const ; 
0044     std::string descItem() const ; 
0045 };
0046 
0047 
0048 /**
0049 SCU_BufferView::hostcopy
0050 --------------------------
0051 
0052 Hostside malloc and copy from arrays into BufferView.
0053 Mainly for testing before using for copies to device. 
0054 
0055 **/
0056 
0057 template <typename T>
0058 inline void SCU_BufferView<T>::hostcopy( const std::vector<const NP*>& aa )
0059 {
0060     assert( item.size() == 0 );    
0061 
0062     int num_a = aa.size() ; 
0063     for(int i=0 ; i < num_a ; i++) item.push_back( aa[i]->num_items() );  
0064     size_t tot_bytes = item_total()*sizeof(T) ; 
0065 
0066     data = (T*)malloc( tot_bytes );
0067  
0068     for(int i=0 ; i < num_a ; i++) memcpy( _pointer(i), aa[i]->cvalues<T>(), aa[i]->arr_bytes() );  
0069 }
0070 
0071 /**
0072 SCU_BufferView::upload
0073 ------------------------
0074 
0075 **/
0076 
0077 template <typename T>
0078 inline void SCU_BufferView<T>::upload( const std::vector<const NP*>& aa )
0079 {
0080     assert( item.size() == 0 );    
0081 
0082     int num_a = aa.size() ; 
0083     for(int i=0 ; i < num_a ; i++) item.push_back( aa[i]->num_values() );  
0084     size_t tot_bytes = item_total()*sizeof(T) ; 
0085 
0086     CUDA_CHECK( cudaMalloc(reinterpret_cast<void**>( &data ), tot_bytes )); 
0087 
0088     size_t tot_arr_bytes = 0 ; 
0089 
0090     for(int i=0 ; i < num_a ; i++) 
0091     {
0092         const NP* a = aa[i] ;
0093         CUdeviceptr d = pointer(i); 
0094 
0095         size_t arr_bytes = a->arr_bytes() ; 
0096         tot_arr_bytes += arr_bytes ;
0097         assert( tot_arr_bytes <= tot_bytes ); 
0098  
0099         CUDA_CHECK( cudaMemcpy(reinterpret_cast<void*>( d ), a->cvalues<T>(), arr_bytes, cudaMemcpyHostToDevice )); 
0100     }
0101 }
0102 
0103 
0104 /**
0105 SCU_BufferView::hostdump
0106 --------------------------
0107 
0108 *data* assumed to be a valid host pointer, eg after using *hostcopy* 
0109 
0110 **/
0111 
0112 
0113 template <typename T>
0114 inline std::string SCU_BufferView<T>::hostdump(size_t part) const 
0115 {
0116     size_t num = item_num(part); 
0117     const T* p = _pointer(part); 
0118     std::stringstream ss ;
0119     ss 
0120        << "[SCU_BufferView::hostdump"
0121        << " part " << part 
0122        << " num " << num
0123        << "\n"  
0124        ;
0125 
0126     for(size_t i=0 ; i < num ; i++) ss << p[i] << "\n" ; 
0127  
0128     ss << "]SCU_BufferView::hostdump" ; 
0129     std::string str = ss.str(); 
0130     return str ; 
0131 }
0132 
0133 template <typename T>
0134 inline std::string SCU_BufferView<T>::hostdump() const 
0135 {
0136     std::stringstream ss ;
0137     for(size_t i=0 ; i < item.size() ; i++) ss << hostdump(i) ; 
0138     std::string str = ss.str(); 
0139     return str ; 
0140 }
0141 
0142 
0143 
0144 
0145 
0146 
0147 /**
0148 SCU_BufferView::devdump
0149 -------------------------
0150 
0151 *data* assumed to be a valid dev pointer, eg after using *upload* 
0152 
0153 **/
0154 
0155 template <typename T>
0156 inline std::string SCU_BufferView<T>::devdump(size_t part) const 
0157 {
0158     size_t num = item_num(part); 
0159     CUdeviceptr ptr = pointer(part); 
0160 
0161     std::vector<T> tmp(num) ; 
0162     T* tt = tmp.data() ;
0163    
0164     CUDA_CHECK( cudaMemcpy( tt, reinterpret_cast<void*>(ptr), sizeof(T)*num, cudaMemcpyDeviceToHost )); 
0165 
0166     std::stringstream ss ;
0167     ss 
0168        << "[SCU_BufferView::devdump"
0169        << " part " << part 
0170        << " num " << num
0171        << "\n"  
0172        ;
0173 
0174     for(size_t i=0 ; i < num ; i++) ss << tmp[i] << "\n" ; 
0175  
0176     ss << "]SCU_BufferView::devdump \n" ; 
0177     std::string str = ss.str(); 
0178     return str ; 
0179 }
0180 
0181 template <typename T>
0182 inline std::string SCU_BufferView<T>::devdump() const 
0183 {
0184     std::stringstream ss ;
0185     for(size_t i=0 ; i < item.size() ; i++) ss << devdump(i) ; 
0186     std::string str = ss.str(); 
0187     return str ; 
0188 }
0189 
0190 template <typename T>
0191 inline size_t SCU_BufferView<T>::num_part() const 
0192 {
0193     return item.size(); 
0194 }
0195 
0196 template <typename T>
0197 inline size_t SCU_BufferView<T>::item_total() const
0198 {
0199     size_t tot = 0 ; 
0200     for(size_t i=0 ; i < item.size() ; i++) tot += item[i] ; 
0201     return tot ; 
0202 }
0203 
0204 template <typename T>
0205 inline size_t SCU_BufferView<T>::item_offset(size_t part) const
0206 {
0207     assert( part < item.size() ); 
0208     size_t off = 0 ; 
0209     for(size_t i=0 ; i < part ; i++) off += item[i] ; 
0210     return off ; 
0211 }
0212 
0213 template <typename T>
0214 inline size_t SCU_BufferView<T>::item_num(size_t part) const
0215 {
0216     assert( part < item.size() ); 
0217     return item[part] ; 
0218 }
0219 
0220 template <typename T>
0221 inline T* SCU_BufferView<T>::_pointer(size_t part) const
0222 {
0223     assert( part < item.size() ); 
0224     size_t off = item_offset(part) ; 
0225     return  ( data + off ) ; 
0226 }
0227 
0228 template <typename T>
0229 inline CUdeviceptr SCU_BufferView<T>::pointer(size_t part) const
0230 {
0231     return (CUdeviceptr)(uintptr_t) _pointer(part) ;  
0232 }
0233 
0234 /**
0235 SCU_BufferView::free
0236 ---------------------
0237 
0238 *dat* assumed to be a device pointer, eg after *upload*
0239 
0240 **/
0241 
0242 template <typename T>
0243 inline void SCU_BufferView<T>::free()
0244 {
0245     CUDA_CHECK( cudaFree( reinterpret_cast<void*>( data) ) );
0246     data = nullptr ; 
0247     item.clear(); 
0248 }
0249 
0250 template <typename T>
0251 inline std::string SCU_BufferView<T>::desc() const
0252 {
0253     std::stringstream ss ; 
0254     ss << "SCU_BufferView"
0255        << " (uintptr_t)data  0x" 
0256        << std::setw(9) << std::hex << (uintptr_t)data << std::dec
0257        << " sizeof(T) " << std::setw(5) << sizeof(T)
0258        << " item_total "  << std::setw(7) << item_total()
0259        << " num_part "    << std::setw(7) << item.size() 
0260        << " " << descItem() << "\n" ; 
0261        ;
0262     std::string str = ss.str(); 
0263     return str ; 
0264 }
0265 
0266 template <typename T>
0267 inline std::string SCU_BufferView<T>::descItem() const
0268 {
0269     std::stringstream ss ; 
0270     ss << "{" ; 
0271     for(int i=0 ; i < int(item.size()) ; i++) ss << item[i] << " " ; 
0272     ss << "}" ; 
0273     std::string str = ss.str(); 
0274     return str ; 
0275 }
0276 
0277