Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-10 07:49:39

0001 #pragma once
0002 /**
0003 QBuf.hh : header only CUDA device buffer
0004 ------------------------------------------
0005 
0006 Hmm: with creater used of QU am unsure regards QBuf ?
0007 
0008 
0009 **/
0010 
0011 #include <string>
0012 #include <iostream>
0013 #include <sstream>
0014 #include <cassert>
0015 #include <vector>
0016 
0017 #include "QUDA_CHECK.h"
0018 #include "QUDARAP_API_EXPORT.hh"
0019 #include "NP.hh"
0020 
0021 
0022 template <typename T>
0023 struct QUDARAP_API QBuf 
0024 {
0025     void* a ; 
0026     T* h ; 
0027     T* d ; 
0028     unsigned num_items ; 
0029     unsigned max_items ; 
0030 
0031     QBuf()
0032         :
0033         a(nullptr),   // will often be an NP array 
0034         h(nullptr),
0035         d(nullptr),
0036         num_items(0),
0037         max_items(0)
0038     {
0039     }
0040 
0041 
0042     void device_alloc(unsigned num_items_)
0043     {   
0044         num_items = num_items_ ; 
0045         QUDA_CHECK(cudaMalloc(reinterpret_cast<void**>( &d ), num_items*sizeof(T) ));  
0046     }   
0047     void device_set(int value=0)  // Value to set for each byte of specified memory
0048     {   
0049         QUDA_CHECK(cudaMemset(reinterpret_cast<void*>( d ), value, num_items*sizeof(T)  ));  
0050     }   
0051     void device_free()
0052     {   
0053         QUDA_CHECK(cudaFree(d)) ; 
0054         d = nullptr ; 
0055         num_items = 0 ; 
0056     }   
0057     std::string desc() const 
0058     {   
0059         std::stringstream ss ; 
0060         ss <<  "QBuf d " << ( d ? d : 0 ) << " num_items " << num_items ; 
0061         return ss.str();   
0062     }   
0063     void upload( const T* data, unsigned num_items_ )
0064     {   
0065         if( num_items > 0 ) assert( num_items_ == num_items );  
0066         QUDA_CHECK(cudaMemcpy(reinterpret_cast<void*>( d ), data, sizeof(T)*num_items_, cudaMemcpyHostToDevice )) ; 
0067     }   
0068 
0069     void download( std::vector<T>& vec )
0070     {
0071         vec.resize(num_items);  
0072         QUDA_CHECK(cudaMemcpy(reinterpret_cast<void*>( vec.data() ), d , sizeof(T)*num_items, cudaMemcpyDeviceToHost ));
0073     }
0074 
0075     void download_dump(const char* msg, unsigned edgeitems);
0076  
0077     static QBuf<T>* Upload( const T* data, unsigned num_items )
0078     {   
0079         QBuf<T>* buf = new QBuf<T>() ; 
0080         buf->device_alloc(num_items);   // sets (ptr, num_items)
0081         buf->upload( data, num_items );  
0082         return buf ; 
0083     }   
0084 
0085     // caution : this is allocating every time
0086     // for event by event handling better to allocate one and resize ?
0087     static QBuf<T>* Upload( const NP* a  )
0088     {   
0089         return Upload( a->cvalues<T>(), a->num_values() );  
0090     }   
0091 
0092     static QBuf<T>* Upload( const std::vector<T>& vec  )
0093     {   
0094         return Upload( vec.data(), vec.size() );  
0095     }   
0096 
0097     /**
0098     method tickles CUDA/cxx17/devtoolset-8 bug causing compilation to fail with 
0099     error: cannot call member function without object
0100 
0101     See notes/issues/cxx17_issues.rst
0102 
0103     https://forums.developer.nvidia.com/t/cuda-10-1-nvidia-youre-now-fixing-gcc-bugs-that-gcc-doesnt-even-have/71063
0104 
0105     **/
0106 
0107     static QBuf<T>* Alloc( unsigned num_items  )
0108     {   
0109         QBuf<T>* buf = new QBuf<T> ; 
0110         (*buf).device_alloc(num_items); 
0111         (*buf).device_set(0); 
0112         return buf ; 
0113     }   
0114 };
0115 
0116 
0117 
0118 template<typename T> 
0119 inline void QBuf<T>::download_dump(const char* msg, unsigned )
0120 {
0121     std::cout << "QBuf::download_dump " << msg << " PLACEHOLDER " << std::endl ; 
0122 }
0123 
0124 template<> 
0125 inline void QBuf<int>::download_dump(const char* msg, unsigned edgeitems)
0126 {
0127     std::vector<int> chk  ;   
0128     download(chk); 
0129     std::cout << "QBuf::download_dump " << msg << std::endl ; 
0130     for(unsigned i=0 ; i < chk.size() ; i++ ) 
0131     {
0132         if( i < edgeitems || i > chk.size() - edgeitems ) std::cout << chk[i] << " "  ;   
0133         else if ( i == edgeitems ) std::cout << "... " ; 
0134     }
0135     std::cout << std::endl ; 
0136 }
0137 
0138 
0139