File indexing completed on 2026-04-09 07:49:09
0001 #include <sstream>
0002 #include <cstring>
0003 #include "SLOG.hh"
0004
0005 #include "QRng.hh"
0006 #include "SCurandSpec.h"
0007
0008 #ifdef OLD_MONOLITHIC_CURANDSTATE
0009 #include "SCurandStateMonolithic.hh"
0010 #else
0011 #include "SEventConfig.hh"
0012 #include "SCurandState.h"
0013 #endif
0014
0015 #include "sdirectory.h"
0016 #include "ssys.h"
0017
0018 #include "qrng.h"
0019 #include "srng.h"
0020 #include "QU.hh"
0021
0022 #include "QUDA_CHECK.h"
0023
0024 const plog::Severity QRng::LEVEL = SLOG::EnvLevel("QRng", "DEBUG");
0025 const QRng* QRng::INSTANCE = nullptr ;
0026 const QRng* QRng::Get(){ return INSTANCE ; }
0027
0028 std::string QRng::Desc()
0029 {
0030 std::stringstream ss ;
0031 ss << "QRng::Desc"
0032 << " IMPL:" << IMPL
0033 ;
0034 std::string str = ss.str() ;
0035 return str ;
0036 }
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047 QRng::QRng(unsigned skipahead_event_offset_)
0048 :
0049 RNGNAME(srng<RNG>::NAME),
0050 UPLOAD_RNG_STATES(srng<RNG>::UPLOAD_RNG_STATES),
0051 skipahead_event_offset(skipahead_event_offset_),
0052 seed(0ull),
0053 offset(0ull),
0054 SEED_OFFSET(ssys::getenvvar("QRng__SEED_OFFSET")),
0055 parse_rc(SCurandSpec::ParseSeedOffset(seed, offset, SEED_OFFSET )),
0056 qr(new qrng<RNG>(seed, offset, skipahead_event_offset)),
0057 d_qr(nullptr),
0058 #ifdef OLD_MONOLITHIC_CURANDSTATE
0059 rngmax(0)
0060 #else
0061 rngmax(SEventConfig::MaxCurand()),
0062 cs(nullptr)
0063 #endif
0064 {
0065 init();
0066 }
0067
0068
0069
0070 template<> void QRng::initStates<Philox>()
0071 {
0072 LOG(info)
0073 << "initStates<Philox> DO NOTHING : No LoadAndUpload needed "
0074 << " rngmax " << rngmax
0075 << " SEventConfig::MaxCurand " << SEventConfig::MaxCurand()
0076 ;
0077 }
0078
0079 template<> void QRng::initStates<XORWOW>()
0080 {
0081 bool is_XORWOW = strcmp( srng<XORWOW>::NAME, "XORWOW") == 0 ;
0082 assert( is_XORWOW );
0083
0084 LOG(info) << "initStates<XORWOW> LoadAndUpload and set_uploaded_states " ;
0085 #ifdef OLD_MONOLITHIC_CURANDSTATE
0086 XORWOW* d_uploaded_states = LoadAndUpload(rngmax, SCurandStateMonolithic::Path()) ;
0087 #else
0088 XORWOW* d_uploaded_states = LoadAndUpload(rngmax, cs);
0089 #endif
0090 qr->set_uploaded_states( d_uploaded_states );
0091 }
0092
0093
0094
0095 void QRng::init()
0096 {
0097 INSTANCE = this ;
0098 assert(parse_rc == 0 );
0099
0100 initStates<RNG>();
0101 initMeta();
0102
0103 bool VERBOSE = ssys::getenvbool(init_VERBOSE);
0104 LOG_IF(info, VERBOSE)
0105 << "[" << init_VERBOSE << "] " << ( VERBOSE ? "YES" : "NO " )
0106 << "\n"
0107 << desc()
0108 ;
0109 }
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125 void QRng::initMeta()
0126 {
0127 const char* label_1 = "QRng::initMeta/d_qr" ;
0128 d_qr = QU::UploadArray<qrng<RNG>>(qr, 1, label_1 );
0129
0130 bool uploaded = d_qr != nullptr ;
0131 LOG_IF(fatal, !uploaded) << " FAILED to upload RNG and/or metadata " ;
0132 assert(uploaded);
0133 }
0134
0135
0136
0137 QRng::~QRng()
0138 {
0139 }
0140
0141
0142
0143 #ifdef OLD_MONOLITHIC_CURANDSTATE
0144
0145 const char* QRng::Load_FAIL_NOTES = R"(
0146 QRng::Load_FAIL_NOTES
0147 =================================
0148
0149 QRng::Load failed to load the RNG files.
0150 These files should have been created during the *opticks-full* installation
0151 by the bash function *opticks-prepare-installation*
0152 which runs *qudarap-prepare-installation*.
0153
0154 Investigate by looking at the contents of the RNG directory,
0155 as shown below::
0156
0157 epsilon:~ blyth$ ls -l ~/.opticks/rngcache/RNG/
0158 total 892336
0159 -rw-r--r-- 1 blyth staff 44000000 Oct 6 19:43 QCurandState_1000000_0_0.bin
0160 -rw-r--r-- 1 blyth staff 132000000 Oct 6 19:53 QCurandState_3000000_0_0.bin
0161 epsilon:~ blyth$
0162
0163
0164 )" ;
0165
0166 #else
0167 const char* QRng::Load_FAIL_NOTES = R"(
0168 QRng::Load_FAIL_NOTES
0169 ===============================
0170
0171 TODO : for new chunked impl
0172
0173 )" ;
0174
0175 #endif
0176
0177
0178
0179
0180 #ifdef OLD_MONOLITHIC_CURANDSTATE
0181
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194 XORWOW* QRng::LoadAndUpload(ULL& rngmax, const char* path)
0195 {
0196 XORWOW* h_states = Load(rngmax, path);
0197 XORWOW* d_states = UploadAndFree(h_states, rngmax );
0198 return d_states ;
0199 }
0200
0201 XORWOW* QRng::Load(ULL& rngmax, const char* path)
0202 {
0203 bool null_path = path == nullptr ;
0204 LOG_IF(fatal, null_path ) << " QRng::Load null path " ;
0205 assert( !null_path );
0206
0207 FILE *fp = fopen(path,"rb");
0208 bool failed = fp == nullptr ;
0209 LOG_IF(fatal, failed ) << " unabled to open file [" << path << "]" ;
0210 LOG_IF(error, failed ) << Load_FAIL_NOTES ;
0211 assert(!failed);
0212
0213
0214 fseek(fp, 0L, SEEK_END);
0215 long file_size = ftell(fp);
0216 rewind(fp);
0217
0218 long type_size = sizeof(RNG) ;
0219 long item_size = 44 ;
0220
0221 rngmax = file_size/item_size ;
0222
0223
0224 LOG(LEVEL)
0225 << " path " << path
0226 << " file_size " << file_size
0227 << " item_size " << item_size
0228 << " type_size " << type_size
0229 << " rngmax " << rngmax
0230 ;
0231
0232 assert( file_size % item_size == 0 );
0233
0234 XORWOW* rng_states = (XORWOW*)malloc(sizeof(XORWOW)*rngmax);
0235
0236 for(ULL i = 0 ; i < rngmax ; ++i )
0237 {
0238 XORWOW& rng = rng_states[i] ;
0239 fread(&rng.d, sizeof(unsigned int),1,fp);
0240 fread(&rng.v, sizeof(unsigned int),5,fp);
0241 fread(&rng.boxmuller_flag, sizeof(int) ,1,fp);
0242 fread(&rng.boxmuller_flag_double, sizeof(int) ,1,fp);
0243 fread(&rng.boxmuller_extra, sizeof(float) ,1,fp);
0244 fread(&rng.boxmuller_extra_double,sizeof(double) ,1,fp);
0245 }
0246 fclose(fp);
0247
0248 return rng_states ;
0249 }
0250
0251 XORWOW* QRng::UploadAndFree(XORWOW* h_states, ULL num_states )
0252 {
0253 const char* label_0 = "QRng::UploadAndFree/rng_states" ;
0254 XORWOW* d_states = QU::UploadArray<XORWOW>(h_states, num_states, label_0 ) ;
0255 free(h_states);
0256 return d_states ;
0257 }
0258
0259 #else
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276
0277
0278
0279
0280
0281
0282
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292
0293
0294
0295
0296
0297
0298
0299
0300
0301
0302
0303
0304
0305
0306
0307
0308
0309
0310
0311
0312
0313
0314
0315
0316
0317
0318
0319
0320
0321
0322
0323
0324
0325
0326
0327
0328
0329
0330
0331
0332
0333
0334
0335
0336
0337
0338
0339
0340 XORWOW* QRng::LoadAndUpload(ULL _rngmax, const SCurandState& cs)
0341 {
0342 LOG(LEVEL) << cs.desc() ;
0343
0344 ULL tot_available_states = cs.all.num ;
0345 ULL rngmax = _rngmax > 0 ? _rngmax : tot_available_states ;
0346
0347 LOG_IF(error, _rngmax == 0 )
0348 << "\n"
0349 << " WARNING : _rngmax is ZERO : will load+upload all SCurandChunk files "
0350 << " consuming significant VRAM and enabling very large launches "
0351 << " set [" << SEventConfig::kMaxCurand << "] non-zero eg M3 to control "
0352 << " tot_available_states/M " << tot_available_states/M
0353 << " rngmax/M " << rngmax/M
0354 ;
0355
0356 XORWOW* d0 = QU::device_alloc<XORWOW>( rngmax, "QRng::LoadAndUpload/rngmax" );
0357 XORWOW* d = d0 ;
0358
0359 ULL available_chunk = cs.chunk.size();
0360 ULL count = 0 ;
0361
0362 LOG(LEVEL)
0363 << " rngmax " << rngmax
0364 << " rngmax/M " << rngmax/M
0365 << " available_chunk " << available_chunk
0366 << " cs.all.num/M " << cs.all.num/M
0367 << " tot_available_states/M " << tot_available_states/M
0368 << " rngmax/M " << rngmax/M
0369 << " d0 " << d0
0370 ;
0371
0372
0373 sdigest dig ;
0374
0375 for(ULL i=0 ; i < available_chunk ; i++)
0376 {
0377 ULL remaining = rngmax - count ;
0378
0379 const SCurandChunk& chunk = cs.chunk[i];
0380
0381 bool partial_read = remaining < chunk.ref.num ;
0382
0383 ULL num = partial_read ? remaining : chunk.ref.num ;
0384
0385 LOG(LEVEL)
0386 << " i " << std::setw(3) << i
0387 << " chunk.ref.num/M " << std::setw(4) << chunk.ref.num/M
0388 << " count/M " << std::setw(4) << count/M
0389 << " remaining/M " << std::setw(4) << remaining/M
0390 << " partial_read " << ( partial_read ? "YES" : "NO " )
0391 << " num/M " << std::setw(4) << num/M
0392 << " d " << d
0393 ;
0394
0395 scurandref<XORWOW> cr = chunk.load(num, cs.dir, &dig ) ;
0396
0397 assert( cr.states != nullptr);
0398
0399 bool num_match = cr.num == num ;
0400
0401 LOG_IF(fatal, !num_match)
0402 << "QRng::LoadAndUpload"
0403 << " num_match " << ( num_match ? "YES" : "NO " )
0404 << " cr.num/M " << cr.num/M
0405 << " num/M " << num/M
0406 ;
0407
0408 assert(num_match);
0409
0410 QU::copy_host_to_device<XORWOW>( d , cr.states , num );
0411
0412 free(cr.states);
0413
0414 d += num ;
0415 count += num ;
0416
0417 if(count > rngmax) assert(0);
0418 if(count == rngmax) break ;
0419 }
0420
0421 bool complete = count == rngmax ;
0422 assert( complete );
0423 std::string digest = dig.finalize();
0424
0425 std::cout
0426 << "QRng::LoadAndUpload"
0427 << " complete " << ( complete ? "YES" : "NO ")
0428 << " rngmax/M " << rngmax/M
0429 << " rngmax " << rngmax
0430 << " digest " << digest
0431 << "\n"
0432 ;
0433
0434 return complete ? d0 : nullptr ;
0435 }
0436
0437 #endif
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447
0448
0449 void QRng::Save( XORWOW* states, unsigned num_states, const char* path )
0450 {
0451 sdirectory::MakeDirsForFile(path);
0452 FILE *fp = fopen(path,"wb");
0453 LOG_IF(fatal, fp == nullptr) << " error opening file " << path ;
0454 assert(fp);
0455
0456 for(unsigned i = 0 ; i < num_states ; ++i )
0457 {
0458 XORWOW& rng = states[i] ;
0459 fwrite(&rng.d, sizeof(unsigned int),1,fp);
0460 fwrite(&rng.v, sizeof(unsigned int),5,fp);
0461 fwrite(&rng.boxmuller_flag, sizeof(int) ,1,fp);
0462 fwrite(&rng.boxmuller_flag_double, sizeof(int) ,1,fp);
0463 fwrite(&rng.boxmuller_extra, sizeof(float) ,1,fp);
0464 fwrite(&rng.boxmuller_extra_double,sizeof(double) ,1,fp);
0465 }
0466 fclose(fp);
0467 return ;
0468 }
0469
0470
0471
0472
0473
0474 std::string QRng::desc() const
0475 {
0476 std::stringstream ss ;
0477 ss << "QRng::desc\n"
0478 << std::setw(30) << " IMPL " << IMPL << "\n"
0479 << std::setw(30) << " RNGNAME " << ( RNGNAME ? RNGNAME : "-" ) << "\n"
0480 << std::setw(30) << " UPLOAD_RNG_STATES " << ( UPLOAD_RNG_STATES ? "YES" : "NO " ) << "\n"
0481 << std::setw(30) << " seed " << seed << "\n"
0482 << std::setw(30) << " offset " << offset << "\n"
0483 << std::setw(30) << " rngmax " << rngmax << "\n"
0484 << std::setw(30) << " rngmax/M " << rngmax/M << "\n"
0485 << std::setw(30) << " qr " << qr << "\n"
0486 << std::setw(30) << " qr.skipahead_event_offset " << qr->skipahead_event_offset << "\n"
0487 << std::setw(30) << " d_qr " << d_qr << "\n"
0488 ;
0489
0490 std::string str = ss.str();
0491 return str ;
0492 }
0493
0494
0495
0496
0497
0498 template <typename T>
0499 extern void QRng_generate(
0500 dim3,
0501 dim3,
0502 qrng<RNG>*,
0503 unsigned,
0504 T*,
0505 unsigned,
0506 unsigned );
0507
0508
0509
0510
0511
0512
0513
0514
0515
0516
0517
0518
0519 template<typename T>
0520 void QRng::generate( T* uu, unsigned ni, unsigned nv, unsigned evid )
0521 {
0522 const char* label = "QRng::generate:ni*nv" ;
0523
0524 T* d_uu = QU::device_alloc<T>(ni*nv, label );
0525
0526 QU::ConfigureLaunch(numBlocks, threadsPerBlock, ni, 1 );
0527
0528 QRng_generate<T>(numBlocks, threadsPerBlock, d_qr, evid, d_uu, ni, nv );
0529
0530 QU::copy_device_to_host_and_free<T>( uu, d_uu, ni*nv, label );
0531 }
0532
0533
0534 template void QRng::generate<float>( float*, unsigned, unsigned, unsigned );
0535 template void QRng::generate<double>( double*, unsigned, unsigned, unsigned );
0536
0537