File indexing completed on 2026-04-09 07:49:32
0001 #pragma once
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016 #include "NP.hh"
0017 #include "CUDA_CHECK.h"
0018
0019 template <typename T>
0020 struct SCU_BufferView
0021 {
0022 T* data = nullptr ;
0023 std::vector<size_t> item ;
0024
0025 void upload( const std::vector<const NP*>& aa );
0026 void hostcopy( const std::vector<const NP*>& aa );
0027
0028 std::string hostdump(size_t part) const ;
0029 std::string hostdump() const ;
0030
0031 std::string devdump(size_t part) const ;
0032 std::string devdump() const ;
0033
0034 size_t num_part() const ;
0035 size_t item_total() const ;
0036 size_t item_offset(size_t part) const ;
0037 size_t item_num( size_t part) const ;
0038
0039 T* _pointer(size_t part) const ;
0040 CUdeviceptr pointer(size_t part) const ;
0041 void free() ;
0042
0043 std::string desc() const ;
0044 std::string descItem() const ;
0045 };
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057 template <typename T>
0058 inline void SCU_BufferView<T>::hostcopy( const std::vector<const NP*>& aa )
0059 {
0060 assert( item.size() == 0 );
0061
0062 int num_a = aa.size() ;
0063 for(int i=0 ; i < num_a ; i++) item.push_back( aa[i]->num_items() );
0064 size_t tot_bytes = item_total()*sizeof(T) ;
0065
0066 data = (T*)malloc( tot_bytes );
0067
0068 for(int i=0 ; i < num_a ; i++) memcpy( _pointer(i), aa[i]->cvalues<T>(), aa[i]->arr_bytes() );
0069 }
0070
0071
0072
0073
0074
0075
0076
0077 template <typename T>
0078 inline void SCU_BufferView<T>::upload( const std::vector<const NP*>& aa )
0079 {
0080 assert( item.size() == 0 );
0081
0082 int num_a = aa.size() ;
0083 for(int i=0 ; i < num_a ; i++) item.push_back( aa[i]->num_values() );
0084 size_t tot_bytes = item_total()*sizeof(T) ;
0085
0086 CUDA_CHECK( cudaMalloc(reinterpret_cast<void**>( &data ), tot_bytes ));
0087
0088 size_t tot_arr_bytes = 0 ;
0089
0090 for(int i=0 ; i < num_a ; i++)
0091 {
0092 const NP* a = aa[i] ;
0093 CUdeviceptr d = pointer(i);
0094
0095 size_t arr_bytes = a->arr_bytes() ;
0096 tot_arr_bytes += arr_bytes ;
0097 assert( tot_arr_bytes <= tot_bytes );
0098
0099 CUDA_CHECK( cudaMemcpy(reinterpret_cast<void*>( d ), a->cvalues<T>(), arr_bytes, cudaMemcpyHostToDevice ));
0100 }
0101 }
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113 template <typename T>
0114 inline std::string SCU_BufferView<T>::hostdump(size_t part) const
0115 {
0116 size_t num = item_num(part);
0117 const T* p = _pointer(part);
0118 std::stringstream ss ;
0119 ss
0120 << "[SCU_BufferView::hostdump"
0121 << " part " << part
0122 << " num " << num
0123 << "\n"
0124 ;
0125
0126 for(size_t i=0 ; i < num ; i++) ss << p[i] << "\n" ;
0127
0128 ss << "]SCU_BufferView::hostdump" ;
0129 std::string str = ss.str();
0130 return str ;
0131 }
0132
0133 template <typename T>
0134 inline std::string SCU_BufferView<T>::hostdump() const
0135 {
0136 std::stringstream ss ;
0137 for(size_t i=0 ; i < item.size() ; i++) ss << hostdump(i) ;
0138 std::string str = ss.str();
0139 return str ;
0140 }
0141
0142
0143
0144
0145
0146
0147
0148
0149
0150
0151
0152
0153
0154
0155 template <typename T>
0156 inline std::string SCU_BufferView<T>::devdump(size_t part) const
0157 {
0158 size_t num = item_num(part);
0159 CUdeviceptr ptr = pointer(part);
0160
0161 std::vector<T> tmp(num) ;
0162 T* tt = tmp.data() ;
0163
0164 CUDA_CHECK( cudaMemcpy( tt, reinterpret_cast<void*>(ptr), sizeof(T)*num, cudaMemcpyDeviceToHost ));
0165
0166 std::stringstream ss ;
0167 ss
0168 << "[SCU_BufferView::devdump"
0169 << " part " << part
0170 << " num " << num
0171 << "\n"
0172 ;
0173
0174 for(size_t i=0 ; i < num ; i++) ss << tmp[i] << "\n" ;
0175
0176 ss << "]SCU_BufferView::devdump \n" ;
0177 std::string str = ss.str();
0178 return str ;
0179 }
0180
0181 template <typename T>
0182 inline std::string SCU_BufferView<T>::devdump() const
0183 {
0184 std::stringstream ss ;
0185 for(size_t i=0 ; i < item.size() ; i++) ss << devdump(i) ;
0186 std::string str = ss.str();
0187 return str ;
0188 }
0189
0190 template <typename T>
0191 inline size_t SCU_BufferView<T>::num_part() const
0192 {
0193 return item.size();
0194 }
0195
0196 template <typename T>
0197 inline size_t SCU_BufferView<T>::item_total() const
0198 {
0199 size_t tot = 0 ;
0200 for(size_t i=0 ; i < item.size() ; i++) tot += item[i] ;
0201 return tot ;
0202 }
0203
0204 template <typename T>
0205 inline size_t SCU_BufferView<T>::item_offset(size_t part) const
0206 {
0207 assert( part < item.size() );
0208 size_t off = 0 ;
0209 for(size_t i=0 ; i < part ; i++) off += item[i] ;
0210 return off ;
0211 }
0212
0213 template <typename T>
0214 inline size_t SCU_BufferView<T>::item_num(size_t part) const
0215 {
0216 assert( part < item.size() );
0217 return item[part] ;
0218 }
0219
0220 template <typename T>
0221 inline T* SCU_BufferView<T>::_pointer(size_t part) const
0222 {
0223 assert( part < item.size() );
0224 size_t off = item_offset(part) ;
0225 return ( data + off ) ;
0226 }
0227
0228 template <typename T>
0229 inline CUdeviceptr SCU_BufferView<T>::pointer(size_t part) const
0230 {
0231 return (CUdeviceptr)(uintptr_t) _pointer(part) ;
0232 }
0233
0234
0235
0236
0237
0238
0239
0240
0241
0242 template <typename T>
0243 inline void SCU_BufferView<T>::free()
0244 {
0245 CUDA_CHECK( cudaFree( reinterpret_cast<void*>( data) ) );
0246 data = nullptr ;
0247 item.clear();
0248 }
0249
0250 template <typename T>
0251 inline std::string SCU_BufferView<T>::desc() const
0252 {
0253 std::stringstream ss ;
0254 ss << "SCU_BufferView"
0255 << " (uintptr_t)data 0x"
0256 << std::setw(9) << std::hex << (uintptr_t)data << std::dec
0257 << " sizeof(T) " << std::setw(5) << sizeof(T)
0258 << " item_total " << std::setw(7) << item_total()
0259 << " num_part " << std::setw(7) << item.size()
0260 << " " << descItem() << "\n" ;
0261 ;
0262 std::string str = ss.str();
0263 return str ;
0264 }
0265
0266 template <typename T>
0267 inline std::string SCU_BufferView<T>::descItem() const
0268 {
0269 std::stringstream ss ;
0270 ss << "{" ;
0271 for(int i=0 ; i < int(item.size()) ; i++) ss << item[i] << " " ;
0272 ss << "}" ;
0273 std::string str = ss.str();
0274 return str ;
0275 }
0276
0277