eic-opticks/qudarap/QProp.cc

0001 #include "NP.hh"
0002
0003 #if defined(MOCK_CURAND)
0004 #include "plog/Severity.h"
0005 #else
0006 #include "SLOG.hh"
0007 #include <cuda_runtime.h>
0008 #include "QUDA_CHECK.h"
0009 #include "QU.hh"
0010 #endif
0011
0012 #include "QProp.hh"
0013 #include "qprop.h"
0014
0015
0016 #if defined(MOCK_CURAND)
0017 template<typename T>
0018 const plog::Severity QProp<T>::LEVEL = plog::info ;
0019 #else
0020 template<typename T>
0021 const plog::Severity QProp<T>::LEVEL = SLOG::EnvLevel("QProp", "DEBUG");
0022 #endif
0023
0024
0025 template<typename T>
0026 const QProp<T>* QProp<T>::INSTANCE = nullptr ;
0027
0028 template<typename T>
0029 const QProp<T>* QProp<T>::Get(){ return INSTANCE ; }
0030
0031 template<typename T>
0032 qprop<T>* QProp<T>::getDevicePtr() const
0033 {
0034     return d_prop ;
0035 }
0036
0037
0038 /**
0039 QProp::Make3D  : NOW REMOVED : SHOULD ADJUST SHAPE BEFORE USING CTOR
0040 -----------------------------------------------------------------------
0041
0042 * QProp requires 1+2D (num_prop, num_energy, 2 )
0043 * BUT: real world arrays such as those from JPMT.h often have more dimensions 3+2D::
0044
0045       (num_pmtcat, num_layer, num_prop, num_energy, 2)
0046
0047 * to avoid code duplication or complicated template handling
0048   of different shapes, this takes the approach of using NP::change_shape
0049   to scrunch up the higher dimensions yielding::
0050
0051       (num_pmtcat*num_layer*num_prop, num_energy, 2 )
0052
0053 * as there will usually be other dimensional needs in future, its
0054   more sustainable to standardize to keep things simple at the
0055   expense of requiring a simple calc to get access the
0056   scrunched "iprop" eg::
0057
0058     int iprop = pmtcat*NUM_LAYER*NUM_PROP + layer*NUM_PROP + prop_index ;
0059
0060 HMM can do equivalent of NP::combined_interp_5
0061
0062 template<typename T>
0063 QProp<T>* QProp<T>::Make3D(const NP* a)
0064 {
0065     NP* b = a->copy() ;
0066     b->change_shape_to_3D();
0067     return new QProp<T>(b) ;
0068 }
0069 **/
0070
0071
0072 /**
0073 QProp<T>::QProp
0074 -----------------
0075
0076 Instanciation:
0077
0078 1. examines input combined array dimensions
0079 2. creates host qprop<T> instance, and populates it
0080    with device pointers and metadata such as dimensions
0081 3. uploads the host qprop<T> instance to the device,
0082    retaining device pointer d_prop
0083
0084 **/
0085
0086 template<typename T>
0087 QProp<T>::QProp(const NP* a_)
0088     :
0089     a(a_ ? a_->copy() : nullptr),
0090     pp(a ? a->cvalues<T>() : nullptr),
0091     nv(a ? a->num_values() : 0),
0092     ni(a ? a->shape[0] : 0 ),
0093     nj(a ? a->shape[1] : 0 ),
0094     nk(a ? a->shape[2] : 0 ),
0095     prop(new qprop<T>),
0096     d_prop(nullptr)
0097 {
0098     init();
0099 }
0100
0101 template<typename T>
0102 void QProp<T>::init()
0103 {
0104     INSTANCE = this ;
0105     assert( a->uifc == 'f' );
0106     assert( a->shape.size() == 3 );
0107     assert( nv == ni*nj*nk ) ;
0108
0109     bool type_consistent = a->ebyte == sizeof(T) ;
0110
0111 #ifdef MOCK_CURAND
0112 #else
0113     LOG_IF(fatal, !type_consistent)
0114         << " type_consistent FAIL "
0115         << " sizeof(T) " << sizeof(T)
0116         << " a.ebyte " << a->ebyte
0117         ;
0118 #endif
0119     assert( type_consistent );
0120
0121     //dump();
0122     upload();
0123 }
0124
0125 /**
0126 QProp::upload
0127 --------------
0128
0129 1. allocate device array for *nv* T values
0130 2. populate *prop* on host with device pointers and (height, width) values
0131
0132    * height is the number of props
0133    * width is max_num_energy_of_all_prop_plus_one * 2
0134      (+1 for integer num_energy last column annotation, as done by NP::combine)
0135
0136 3. copy *pp* array values to device *prop->pp*
0137 4. copy *prop* to device and retain device-side pointer *d_prop*
0138
0139 **/
0140
0141 template<typename T>
0142 void QProp<T>::upload()
0143 {
0144     prop->height = ni ;
0145     prop->width  = nj*nk ;
0146
0147 #if defined(MOCK_CURAND)
0148     prop->pp = const_cast<T*>(pp) ;
0149     d_prop = prop ;
0150 #else
0151     prop->pp = QU::device_alloc<T>(nv,"QProp::upload/pp") ;
0152     QU::copy_host_to_device<T>( prop->pp, pp, nv );
0153     d_prop = QU::UploadArray<qprop<T>>(prop, 1, "QProp::upload/d_prop");
0154 #endif
0155
0156 }
0157
0158 template<typename T>
0159 void QProp<T>::cleanup()
0160 {
0161 #if defined(MOCK_CURAND)
0162 #else
0163     QUDA_CHECK(cudaFree(prop->pp));
0164     QUDA_CHECK(cudaFree(d_prop));
0165 #endif
0166 }
0167
0168 template<typename T>
0169 QProp<T>::~QProp()
0170 {
0171 }
0172
0173 template<typename T>
0174 std::string QProp<T>::desc() const
0175 {
0176     std::stringstream ss ;
0177     ss << "QProp::desc"
0178        << " a " << ( a ? a->desc() : "-" )
0179        << " nv " << nv
0180        << " ni " << ni
0181        << " nj " << nj
0182        << " nk " << nk
0183        ;
0184     return ss.str();
0185 }
0186
0187
0188
0189 template<typename T>
0190 void QProp<T>::dump() const
0191 {
0192 #ifdef MOCK_CURAND
0193     std::cout << desc() << std::endl ;
0194 #else
0195     LOG(info) << desc() ;
0196 #endif
0197     for(unsigned i=0 ; i < ni ; i++)
0198     {
0199         for(unsigned j=0 ; j < nj ; j++)
0200         {
0201             for(unsigned k=0 ; k < nk ; k++)
0202             {
0203                 std::cout
0204                     << std::setw(10) << std::fixed << std::setprecision(5) << pp[nk*nj*i+j*nk+k] << " "
0205                     ;
0206             }
0207
0208             T f = pp[nk*nj*i+j*nk+nk-1] ;
0209             unsigned prop_ni  = sview::uint_from<T>(f);
0210             std::cout
0211                 << " prop_ni :" << std::setw(5) << prop_ni
0212                 << std::endl
0213                 ;
0214
0215             assert( prop_ni < nj ) ;
0216         }
0217     }
0218 }
0219
0220
0221
0222
0223 #if defined(MOCK_CURAND)
0224 #else
0225 // NB this cannot be extern "C" as need C++ name mangling for template types
0226
0227 template <typename T>
0228 extern void QProp_lookup(
0229     dim3 numBlocks,
0230     dim3 threadsPerBlock,
0231     qprop<T>* prop,
0232     T* lookup,
0233     const T* domain,
0234     unsigned iprop,
0235     unsigned domain_width
0236 );
0237
0238 #endif
0239
0240
0241
0242
0243 /**
0244 QProp::lookup
0245 --------------
0246
0247 Note that this is doing separate CUDA launches for each property
0248
0249 **/
0250
0251 template<typename T>
0252 void QProp<T>::lookup( T* lookup, const T* domain,  unsigned num_prop, unsigned domain_width ) const
0253 {
0254     int ni = num_prop ;
0255     int nj = domain_width ;
0256     int nv = ni*nj ;
0257
0258 #if defined(MOCK_CURAND)
0259
0260     std::cout << "MOCK_CURAND QProp::lookup needs impl " << std::endl ;
0261
0262     for(int i=0 ; i < ni ; i++)
0263     for(int j=0 ; j < nj ; j++)
0264     {
0265         int idx = i*nj + j ;
0266         lookup[idx] = prop->interpolate( i, domain[j] ) ;
0267     }
0268
0269 #else
0270     LOG(LEVEL)
0271         << "["
0272         << " num_prop(ni) " << num_prop
0273         << " domain_width(nj) " << domain_width
0274         << " num_lookup(nv=ni*nj) " << nv
0275         ;
0276
0277
0278     T* d_domain = QU::device_alloc<T>(domain_width, "QProp<T>::lookup:domain_width") ;
0279     QU::copy_host_to_device<T>( d_domain, domain, domain_width  );
0280
0281     const char* label = "QProp<T>::lookup:num_lookup" ;
0282
0283     T* d_lookup = QU::device_alloc<T>(nv,label) ;
0284
0285     dim3 numBlocks ;
0286     dim3 threadsPerBlock ;
0287     //configureLaunch( numBlocks, threadsPerBlock, domain_width, 1 );
0288
0289     unsigned threads_per_block = 512u ;
0290     QU::ConfigureLaunch1D( numBlocks, threadsPerBlock, domain_width, threads_per_block );
0291
0292     for(int i=0 ; i < ni ; i++)
0293     {
0294         QProp_lookup(numBlocks, threadsPerBlock, d_prop, d_lookup, d_domain, i, domain_width );
0295     }
0296
0297     QU::copy_device_to_host_and_free<T>( lookup, d_lookup, nv, label );
0298
0299     LOG(LEVEL) << "]" ;
0300 #endif
0301
0302
0303 }
0304
0305
0306
0307
0308 /**
0309 lookup_scan
0310 -------------
0311
0312 nx lookups in x0->x1 inclusive for each property yielding nx*qp.ni values.
0313
0314 1. create *x* domain array of shape (nx,) with values in range x0 to x1
0315 2. create *y* lookup array of shape (qp.ni, nx )
0316 3. invoke QProp::lookup collecting *y* lookup values from kernel call
0317 4. save prop, domain and lookup into fold/reldir
0318
0319 **/
0320
0321 template<typename T>
0322 void QProp<T>::lookup_scan(T x0, T x1, unsigned nx, const char* fold, const char* reldir ) const
0323 {
0324     NP* x = NP::Linspace<T>( x0, x1, nx );
0325     NP* y = NP::Make<T>(ni, nx );
0326
0327     lookup(y->values<T>(), x->cvalues<T>(), ni, nx );
0328
0329     a->save(fold, reldir, "prop.npy");
0330     x->save(fold, reldir, "domain.npy");
0331     y->save(fold, reldir, "lookup.npy");
0332
0333 #ifdef MOCK_CURAND
0334     std::cout
0335         << "QProp::lookup_scan"
0336         << " save to " << fold << "/" << reldir
0337         << std::endl
0338         ;
0339 #else
0340     LOG(info) << "save to " << fold << "/" << reldir  ;
0341 #endif
0342 }
0343
0344
0345
0346 #pragma GCC diagnostic push
0347 #pragma GCC diagnostic ignored "-Wattributes"
0348 // quell warning: type attributes ignored after type is already defined [-Wattributes]
0349 template struct QUDARAP_API QProp<float>;
0350 template struct QUDARAP_API QProp<double>;
0351 #pragma GCC diagnostic pop
0352
0353