Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:09

0001 #include <sstream>
0002 #include <cstring>
0003 #include "SLOG.hh"
0004 
0005 #include "QRng.hh"
0006 #include "SCurandSpec.h"
0007 
0008 #ifdef OLD_MONOLITHIC_CURANDSTATE
0009 #include "SCurandStateMonolithic.hh"
0010 #else
0011 #include "SEventConfig.hh"
0012 #include "SCurandState.h"
0013 #endif
0014 
0015 #include "sdirectory.h"
0016 #include "ssys.h"
0017 
0018 #include "qrng.h"
0019 #include "srng.h"
0020 #include "QU.hh"
0021 
0022 #include "QUDA_CHECK.h"
0023 
0024 const plog::Severity QRng::LEVEL = SLOG::EnvLevel("QRng", "DEBUG"); 
0025 const QRng* QRng::INSTANCE = nullptr ; 
0026 const QRng* QRng::Get(){ return INSTANCE ;  }
0027 
0028 std::string QRng::Desc() // static
0029 {
0030     std::stringstream ss ; 
0031     ss << "QRng::Desc"
0032        << " IMPL:" << IMPL 
0033        ;
0034     std::string str = ss.str() ;
0035     return str ;  
0036 }
0037 
0038 
0039 /**
0040 QRng::QRng
0041 ------------
0042 
0043 QRng instanciation is invoked from QSim::UploadComponents
0044 
0045 **/
0046 
0047 QRng::QRng(unsigned skipahead_event_offset_)
0048     :
0049     RNGNAME(srng<RNG>::NAME),
0050     UPLOAD_RNG_STATES(srng<RNG>::UPLOAD_RNG_STATES),
0051     skipahead_event_offset(skipahead_event_offset_),
0052     seed(0ull),
0053     offset(0ull),
0054     SEED_OFFSET(ssys::getenvvar("QRng__SEED_OFFSET")),
0055     parse_rc(SCurandSpec::ParseSeedOffset(seed, offset, SEED_OFFSET )),
0056     qr(new qrng<RNG>(seed, offset, skipahead_event_offset)), 
0057     d_qr(nullptr),
0058 #ifdef OLD_MONOLITHIC_CURANDSTATE
0059     rngmax(0)
0060 #else
0061     rngmax(SEventConfig::MaxCurand()),
0062     cs(nullptr)
0063 #endif
0064 {
0065     init(); 
0066 }
0067 
0068 
0069 
0070 template<> void QRng::initStates<Philox>()
0071 { 
0072     LOG(info)
0073         << "initStates<Philox> DO NOTHING : No LoadAndUpload needed " 
0074         << " rngmax " << rngmax 
0075         << " SEventConfig::MaxCurand " << SEventConfig::MaxCurand()
0076         ; 
0077 }
0078 
0079 template<> void QRng::initStates<XORWOW>()
0080 { 
0081     bool is_XORWOW = strcmp( srng<XORWOW>::NAME, "XORWOW") == 0 ; 
0082     assert( is_XORWOW ); 
0083 
0084     LOG(info) << "initStates<XORWOW> LoadAndUpload and set_uploaded_states " ; 
0085 #ifdef OLD_MONOLITHIC_CURANDSTATE
0086     XORWOW* d_uploaded_states = LoadAndUpload(rngmax, SCurandStateMonolithic::Path()) ;  
0087 #else
0088     XORWOW* d_uploaded_states = LoadAndUpload(rngmax, cs); 
0089 #endif
0090     qr->set_uploaded_states( d_uploaded_states ); 
0091 }
0092 
0093 
0094 
0095 void QRng::init()
0096 {
0097     INSTANCE = this ; 
0098     assert(parse_rc == 0 ); 
0099 
0100     initStates<RNG>(); 
0101     initMeta(); 
0102 
0103     bool VERBOSE = ssys::getenvbool(init_VERBOSE); 
0104     LOG_IF(info, VERBOSE)
0105          << "[" << init_VERBOSE << "] " << ( VERBOSE ? "YES" : "NO " )
0106          << "\n"
0107          << desc()
0108          ;  
0109 }
0110 
0111 
0112 
0113 
0114 
0115 /**
0116 QRng::initMeta
0117 ------------------
0118 
0119 1. record device pointer qr->rng_startes
0120 
0121 2. upload qrng.h *qr* instance within single element array, setting d_qr
0122 
0123 **/
0124 
0125 void QRng::initMeta()
0126 {
0127     const char* label_1 = "QRng::initMeta/d_qr" ; 
0128     d_qr = QU::UploadArray<qrng<RNG>>(qr, 1, label_1 ); 
0129 
0130     bool uploaded = d_qr != nullptr ; 
0131     LOG_IF(fatal, !uploaded) << " FAILED to upload RNG and/or metadata " ;  
0132     assert(uploaded); 
0133 }
0134 
0135 
0136 
0137 QRng::~QRng()
0138 {
0139 }
0140 
0141 
0142 
0143 #ifdef OLD_MONOLITHIC_CURANDSTATE
0144 
0145 const char* QRng::Load_FAIL_NOTES = R"(
0146 QRng::Load_FAIL_NOTES
0147 =================================
0148 
0149 QRng::Load failed to load the RNG files. 
0150 These files should have been created during the *opticks-full* installation 
0151 by the bash function *opticks-prepare-installation* 
0152 which runs *qudarap-prepare-installation*. 
0153 
0154 Investigate by looking at the contents of the RNG directory, 
0155 as shown below::
0156 
0157     epsilon:~ blyth$ ls -l  ~/.opticks/rngcache/RNG/
0158     total 892336
0159     -rw-r--r--  1 blyth  staff   44000000 Oct  6 19:43 QCurandState_1000000_0_0.bin
0160     -rw-r--r--  1 blyth  staff  132000000 Oct  6 19:53 QCurandState_3000000_0_0.bin
0161     epsilon:~ blyth$ 
0162 
0163 
0164 )" ;
0165 
0166 #else
0167 const char* QRng::Load_FAIL_NOTES = R"(
0168 QRng::Load_FAIL_NOTES
0169 ===============================
0170 
0171 TODO : for new chunked impl
0172 
0173 )" ;
0174 
0175 #endif
0176 
0177 
0178 
0179 
0180 #ifdef OLD_MONOLITHIC_CURANDSTATE
0181 
0182 /**
0183 QRng::LoadAndUpload
0184 --------------------
0185 
0186 In the old monolithic impl rngmax is an output argument obtained from file_size/item_size 
0187 and at the same time kinda an input to specify which file to load. 
0188 
0189 In the new chunked impl with partial chunk loading the rngmax is an input value
0190 that can be set to anything. 
0191 
0192 **/
0193 
0194 XORWOW* QRng::LoadAndUpload(ULL& rngmax, const char* path)  // static 
0195 {
0196     XORWOW* h_states = Load(rngmax, path); 
0197     XORWOW* d_states = UploadAndFree(h_states, rngmax ); 
0198     return d_states ; 
0199 }
0200 
0201 XORWOW* QRng::Load(ULL& rngmax, const char* path)  // static 
0202 {
0203     bool null_path = path == nullptr ; 
0204     LOG_IF(fatal, null_path ) << " QRng::Load null path " ; 
0205     assert( !null_path );  
0206 
0207     FILE *fp = fopen(path,"rb");
0208     bool failed = fp == nullptr ; 
0209     LOG_IF(fatal, failed ) << " unabled to open file [" << path << "]" ; 
0210     LOG_IF(error, failed ) << Load_FAIL_NOTES  ; 
0211     assert(!failed); 
0212 
0213 
0214     fseek(fp, 0L, SEEK_END);
0215     long file_size = ftell(fp);
0216     rewind(fp);
0217 
0218     long type_size = sizeof(RNG) ;  
0219     long item_size = 44 ; 
0220 
0221     rngmax = file_size/item_size ; 
0222 
0223 
0224     LOG(LEVEL) 
0225         << " path " << path 
0226         << " file_size " << file_size 
0227         << " item_size " << item_size 
0228         << " type_size " << type_size 
0229         << " rngmax " << rngmax
0230         ; 
0231 
0232     assert( file_size % item_size == 0 );  
0233 
0234     XORWOW* rng_states = (XORWOW*)malloc(sizeof(XORWOW)*rngmax);
0235 
0236     for(ULL i = 0 ; i < rngmax ; ++i )
0237     {   
0238         XORWOW& rng = rng_states[i] ;
0239         fread(&rng.d,                     sizeof(unsigned int),1,fp);   //  1
0240         fread(&rng.v,                     sizeof(unsigned int),5,fp);   //  5 
0241         fread(&rng.boxmuller_flag,        sizeof(int)         ,1,fp);   //  1 
0242         fread(&rng.boxmuller_flag_double, sizeof(int)         ,1,fp);   //  1
0243         fread(&rng.boxmuller_extra,       sizeof(float)       ,1,fp);   //  1
0244         fread(&rng.boxmuller_extra_double,sizeof(double)      ,1,fp);   //  2    11*4 = 44 
0245     }   
0246     fclose(fp);
0247 
0248     return rng_states ; 
0249 }
0250 
0251 XORWOW* QRng::UploadAndFree(XORWOW* h_states, ULL num_states )  // static 
0252 {
0253     const char* label_0 = "QRng::UploadAndFree/rng_states" ; 
0254     XORWOW* d_states = QU::UploadArray<XORWOW>(h_states, num_states, label_0 ) ;   
0255     free(h_states); 
0256     return d_states ;  
0257 }
0258 
0259 #else
0260 
0261 /**
0262 QRng::LoadAndUpload
0263 ----------------------
0264 
0265 TODO : replace this, using SCurandState::loadAndUpload
0266 
0267 
0268 rngmax
0269     input argument that determines how many chunks of RNG to load and upload
0270 
0271 (SCurandState)cs
0272     vector of SCurandChunk metadata on the chunk files 
0273 
0274 
0275 For example with chunks of 10M each and rngmax of 25M::
0276 
0277      10M     10M      10M
0278    +------+--------+-------+
0279    
0280 
0281 Read full chunks until doing so would go over rngmax, then 
0282 
0283 
0284 
0285 RNG load bytes digest 
0286 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0287 
0288 ::
0289 
0290     QRng::LoadAndUpload complete YES rngmax/M 3 rngmax 3000000 digest c5a80f522e9393efe0302b916affda06
0291 
0292 
0293 If rngmax lands on a border between files/chunks then the RNG load digest
0294 should match the output from md5sum on the corresponding state files. 
0295 For chunks it is necessary to concat the files first::
0296 
0297     P[blyth@localhost RNG]$ md5sum QCurandStateMonolithic_3000000_0_0.bin
0298     c5a80f522e9393efe0302b916affda06  QCurandStateMonolithic_3000000_0_0.bin
0299 
0300     P[blyth@localhost RNG]$ cat SCurandChunk_0000_0000M_0001M_0_0.bin SCurandChunk_0001_0001M_0001M_0_0.bin SCurandChunk_0002_0002M_0001M_0_0.bin > /tmp/3M.bin
0301     P[blyth@localhost RNG]$ md5sum /tmp/3M.bin
0302     c5a80f522e9393efe0302b916affda06  /tmp/3M.bin
0303 
0304     ## cat SCurandChunk_000[0-2]_00*M_0001M_0_0.bin > /tmp/3M.bin  ## wildcard way 
0305 
0306 
0307 Note that sizeof(RNG) is slightly larger than the itemsize in the file, 
0308 indicating that RNG in memory has some padding. Due to this digests of 
0309 the RNG in memory do not match those of the files or the loaded bytes.    
0310 
0311 
0312 rethink auto rngmax:0
0313 ~~~~~~~~~~~~~~~~~~~~~~~
0314 
0315 While implementing multiple launch running realize that 
0316 reproducibility requires RNG "ix" slot offsetting 
0317 for launches beyond the first. This should allow results from 
0318 multiple launches to exactly match unsplit launches.   
0319 
0320 Initially thought that would entail re-uploading the 
0321 states. But it would be simpler to upload all the available 
0322 states at initialization and just offset for each launch.  
0323 Note this is "vertical" picking the slot, not "horizontal" 
0324 offsetting for the skipahead done from event to event.  
0325 
0326 While this means need VRAM for the states it looks likely 
0327 that will soon jump to Philox counter based RNG, which will 
0328 remove the need for loading states.  Offsetting of counters
0329 appropriately will still be needed. 
0330 
0331 rngmax:0
0332    load all available states, 
0333 rngmax>0 
0334    load specified number of states
0335 
0336 
0337 
0338 **/
0339 
0340 XORWOW* QRng::LoadAndUpload(ULL _rngmax, const SCurandState& cs)  // static 
0341 {
0342     LOG(LEVEL) << cs.desc() ; 
0343 
0344     ULL tot_available_states = cs.all.num ; 
0345     ULL rngmax = _rngmax > 0 ? _rngmax : tot_available_states ; 
0346 
0347     LOG_IF(error, _rngmax == 0 ) 
0348         << "\n" 
0349         << " WARNING : _rngmax is ZERO : will load+upload all SCurandChunk files "
0350         << " consuming significant VRAM and enabling very large launches "
0351         << " set [" << SEventConfig::kMaxCurand << "] non-zero eg M3 to control "
0352         << " tot_available_states/M " << tot_available_states/M 
0353         << " rngmax/M " << rngmax/M
0354         ;
0355 
0356     XORWOW* d0 = QU::device_alloc<XORWOW>( rngmax, "QRng::LoadAndUpload/rngmax" ); 
0357     XORWOW* d = d0 ; 
0358 
0359     ULL available_chunk = cs.chunk.size(); 
0360     ULL count = 0 ; 
0361 
0362     LOG(LEVEL)
0363         << " rngmax " << rngmax
0364         << " rngmax/M " << rngmax/M
0365         << " available_chunk " << available_chunk 
0366         << " cs.all.num/M " << cs.all.num/M 
0367         << " tot_available_states/M " << tot_available_states/M 
0368         << " rngmax/M " << rngmax/M
0369         << " d0 " << d0 
0370         ;
0371 
0372 
0373     sdigest dig ; 
0374 
0375     for(ULL i=0 ; i < available_chunk ; i++)
0376     {
0377         ULL remaining = rngmax - count ;  
0378 
0379         const SCurandChunk& chunk = cs.chunk[i]; 
0380  
0381         bool partial_read = remaining < chunk.ref.num ;  
0382 
0383         ULL num = partial_read ? remaining : chunk.ref.num ;
0384 
0385         LOG(LEVEL)
0386             << " i " << std::setw(3) << i 
0387             << " chunk.ref.num/M " << std::setw(4) << chunk.ref.num/M
0388             << " count/M " << std::setw(4) << count/M
0389             << " remaining/M " << std::setw(4) << remaining/M
0390             << " partial_read " << ( partial_read ? "YES" : "NO " )
0391             << " num/M " << std::setw(4) << num/M
0392             << " d " << d 
0393             ;
0394 
0395         scurandref<XORWOW> cr = chunk.load(num, cs.dir, &dig ) ;
0396   
0397         assert( cr.states != nullptr); 
0398 
0399         bool num_match = cr.num == num ; 
0400 
0401         LOG_IF(fatal, !num_match)
0402             << "QRng::LoadAndUpload"
0403             << " num_match " << ( num_match ? "YES" : "NO " )
0404             << " cr.num/M " << cr.num/M
0405             << " num/M " << num/M
0406             ;
0407 
0408         assert(num_match); 
0409 
0410         QU::copy_host_to_device<XORWOW>( d , cr.states , num ); 
0411 
0412         free(cr.states); 
0413 
0414         d += num ;  
0415         count += num ;  
0416 
0417         if(count > rngmax) assert(0); 
0418         if(count == rngmax) break ;
0419     }
0420 
0421     bool complete = count == rngmax ; 
0422     assert( complete );
0423     std::string digest = dig.finalize(); 
0424 
0425     std::cout 
0426         << "QRng::LoadAndUpload"
0427         << " complete " << ( complete ? "YES" : "NO ")
0428         << " rngmax/M " << rngmax/M 
0429         << " rngmax " << rngmax
0430         << " digest " << digest 
0431         << "\n"
0432         ;
0433 
0434     return complete ? d0 : nullptr ; 
0435 }
0436 
0437 #endif
0438 
0439 
0440 /**
0441 QRng::Save
0442 ------------
0443 
0444 Used from the old QCurandState::save
0445 
0446 TODO: eliminate, functionality duplicates in SCurandChunk::Save
0447 
0448 **/
0449 void QRng::Save( XORWOW* states, unsigned num_states, const char* path ) // static
0450 {
0451     sdirectory::MakeDirsForFile(path);
0452     FILE *fp = fopen(path,"wb");
0453     LOG_IF(fatal, fp == nullptr) << " error opening file " << path ; 
0454     assert(fp); 
0455 
0456     for(unsigned i = 0 ; i < num_states ; ++i )
0457     {   
0458         XORWOW& rng = states[i] ;
0459         fwrite(&rng.d,                     sizeof(unsigned int),1,fp);
0460         fwrite(&rng.v,                     sizeof(unsigned int),5,fp);
0461         fwrite(&rng.boxmuller_flag,        sizeof(int)         ,1,fp);
0462         fwrite(&rng.boxmuller_flag_double, sizeof(int)         ,1,fp);
0463         fwrite(&rng.boxmuller_extra,       sizeof(float)       ,1,fp);
0464         fwrite(&rng.boxmuller_extra_double,sizeof(double)      ,1,fp);
0465     }   
0466     fclose(fp);
0467     return ; 
0468 }
0469 
0470 
0471 
0472 
0473 
0474 std::string QRng::desc() const
0475 {
0476     std::stringstream ss ; 
0477     ss << "QRng::desc\n"
0478        << std::setw(30) << " IMPL " << IMPL << "\n" 
0479        << std::setw(30) << " RNGNAME " << ( RNGNAME ? RNGNAME : "-" ) << "\n" 
0480        << std::setw(30) << " UPLOAD_RNG_STATES " << ( UPLOAD_RNG_STATES ? "YES" : "NO " ) << "\n"
0481        << std::setw(30) << " seed " << seed << "\n"
0482        << std::setw(30) << " offset " << offset << "\n"
0483        << std::setw(30) << " rngmax " << rngmax << "\n"
0484        << std::setw(30) << " rngmax/M " << rngmax/M << "\n"
0485        << std::setw(30) << " qr " << qr << "\n"
0486        << std::setw(30) << " qr.skipahead_event_offset " << qr->skipahead_event_offset << "\n"
0487        << std::setw(30) << " d_qr " << d_qr << "\n"
0488        ;
0489 
0490     std::string str = ss.str(); 
0491     return str ; 
0492 }
0493 
0494 
0495 
0496 
0497 
0498 template <typename T>
0499 extern void QRng_generate(
0500     dim3, 
0501     dim3, 
0502     qrng<RNG>*, 
0503     unsigned, 
0504     T*, 
0505     unsigned, 
0506     unsigned );
0507 
0508 
0509 /**
0510 QRng::generate
0511 -----------------
0512 
0513 Launch ni threads to generate ni*nv values, via [0:nv] loop in the kernel 
0514 with some light touch encapsulation using event_idx to automate skipahead. 
0515 
0516 **/
0517 
0518 
0519 template<typename T>
0520 void QRng::generate( T* uu, unsigned ni, unsigned nv, unsigned evid )
0521 {
0522     const char* label = "QRng::generate:ni*nv" ; 
0523 
0524     T* d_uu = QU::device_alloc<T>(ni*nv, label );
0525 
0526     QU::ConfigureLaunch(numBlocks, threadsPerBlock, ni, 1 );  
0527 
0528     QRng_generate<T>(numBlocks, threadsPerBlock, d_qr, evid, d_uu, ni, nv ); 
0529 
0530     QU::copy_device_to_host_and_free<T>( uu, d_uu, ni*nv, label );
0531 }
0532 
0533 
0534 template void QRng::generate<float>( float*,   unsigned, unsigned, unsigned ); 
0535 template void QRng::generate<double>( double*, unsigned, unsigned, unsigned ); 
0536 
0537