Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:08

0001 #pragma once
0002 /**
0003 QCurandStateMonolithic.hh : allocate + create + download + save
0004 =================================================================
0005 
0006 * creates states using curand_init with CUDA launchs configured by SLaunchSequence.h
0007 * loading/saving from/to file is handled separately by QRng
0008 
0009 The RNG originate on the device as a result of 
0010 calling curand_init and they need to be downloaded and stored
0011 into files named informatively with seeds, counts, offsets etc..
0012 
0013 A difficulty is that calling curand_init is a very heavy kernel, 
0014 so currently the below large files are created via multiple launches all 
0015 writing into the single files shown below.  
0016 The old cuRANDWrapper and new QCurandStateMonolithic have exactly the same contents. 
0017 
0018 +-----------+---------------+----------------+--------------------------------------------------------------------+
0019 |  num      | bytes (ls)    | filesize(du -h)|  path                                                              |
0020 +===========+===============+================+====================================================================+
0021 |   200M    |  8800000000   |   8.2G         | /home/blyth/.opticks/rngcache/RNG/cuRANDWrapper_200000000_0_0.bin  |
0022 +-----------+---------------+----------------+--------------------------------------------------------------------+
0023 |   100M    |  4400000000   |   4.1G         | /home/blyth/.opticks/rngcache/RNG/cuRANDWrapper_100000000_0_0.bin  |
0024 +-----------+---------------+----------------+--------------------------------------------------------------------+
0025 |    10M    |   440000000   |   420M         | /home/blyth/.opticks/rngcache/RNG/cuRANDWrapper_10000000_0_0.bin   | 
0026 +-----------+---------------+----------------+--------------------------------------------------------------------+
0027 |     3M    |   132000000   |   126M         | /home/blyth/.opticks/rngcache/RNG/cuRANDWrapper_3000000_0_0.bin    |
0028 +-----------+---------------+----------------+--------------------------------------------------------------------+
0029 |     2M    |    88000000   |    84M         | /home/blyth/.opticks/rngcache/RNG/cuRANDWrapper_2000000_0_0.bin    |
0030 +-----------+---------------+----------------+--------------------------------------------------------------------+
0031 |     1M    |    44000000   |    42M         | /home/blyth/.opticks/rngcache/RNG/cuRANDWrapper_1000000_0_0.bin    |
0032 +-----------+---------------+----------------+--------------------------------------------------------------------+
0033 
0034 +-----------+---------------+----------------+-------------------------------------------------------------------------------+
0035 |  num      | bytes (ls)    | filesize(du -h)|  path                                                                         |
0036 +===========+===============+================+===============================================================================+
0037 |    10M    |   440000000   |   420M         | /home/blyth/.opticks/rngcache/RNG/QCurandStateMonolithiic_10000000_0_0.bin    |   
0038 +-----------+---------------+----------------+-------------------------------------------------------------------------------+
0039 |     3M    |   132000000   |   126M         | /home/blyth/.opticks/rngcache/RNG/QCurandStateMonolithic_3000000_0_0.bin      |
0040 +-----------+---------------+----------------+-------------------------------------------------------------------------------+
0041 |     1M    |    44000000   |    42M         | /home/blyth/.opticks/rngcache/RNG/QCurandStateMonolithic_1000000_0_0.bin      | 
0042 +-----------+---------------+----------------+-------------------------------------------------------------------------------+
0043 
0044 
0045 With GPU VRAM of 48G the limit coming from combination of photons and RNGs is about 400M
0046 
0047 * curand StateXORWOW item size in the files is 44 bytes which get padded to 48 bytes in curand StateXORWOW type
0048 * dealing with 16.4GB files for 400M states is uncomfortable, so will need to rearrange into multiple files
0049 * chunking into files of 10M states each would correspond to 40 files of 10M states each (420M bytes) 
0050 * with 40-100 files of 10M states each could push to one billion photon launch if had GPU with 100G VRAM 
0051 * also could arrange for just the needed states (in 10M chunks) to be loaded+uploaded 
0052   depending on configured max photon, which depends on available VRAM 
0053 
0054 
0055 Decide on max size of photon launches by scaling from 48G for 400M, eg with 8G VRAM::
0056 
0057     In [2]: 8.*400./48.
0058     Out[2]: 66.66666666666667    ## so you might aim for 60M photons max with 8G VRAM
0059 
0060 
0061 HMM: 61M proves to be over optimistic for small VRAM, see ~/opticks/notes/max_photon_launch_size_with_8GB_VRAM.rst
0062 
0063 
0064 
0065 
0066 WIP:chunked creation, chunk naming, chunked save/load 
0067 
0068 See SCurandState.h SCurandChunk.h 
0069 
0070 **/
0071 
0072 #include <string>
0073 #include <cstdint>
0074 #include "QUDARAP_API_EXPORT.hh"
0075 #include "plog/Severity.h"
0076 #include "SCurandStateMonolithic.hh"
0077 
0078 #include "qcurandwrap.h"
0079 struct SLaunchSequence ; 
0080 
0081 struct QUDARAP_API QCurandStateMonolithic
0082 {
0083     static const plog::Severity LEVEL ; 
0084     static constexpr const char* EKEY = "QCurandStateMonolithic_SPEC" ; 
0085     static QCurandStateMonolithic* Create(); 
0086     static QCurandStateMonolithic* Create(const char* spec); 
0087 
0088     const SCurandStateMonolithic scs ; 
0089     qcurandwrap<XORWOW>* h_cs ; 
0090     qcurandwrap<XORWOW>* cs ; 
0091     qcurandwrap<XORWOW>* d_cs ; 
0092     SLaunchSequence* lseq ; 
0093 
0094     QCurandStateMonolithic(const SCurandStateMonolithic& scs); 
0095     void init(); 
0096     void alloc(); 
0097     void create(); 
0098     void download(); 
0099     void save() const ; 
0100 
0101     std::string desc() const ; 
0102 };