|
|
|||
File indexing completed on 2026-04-09 07:49:08
0001 #pragma once 0002 /** 0003 QCurandStateMonolithic.hh : allocate + create + download + save 0004 ================================================================= 0005 0006 * creates states using curand_init with CUDA launchs configured by SLaunchSequence.h 0007 * loading/saving from/to file is handled separately by QRng 0008 0009 The RNG originate on the device as a result of 0010 calling curand_init and they need to be downloaded and stored 0011 into files named informatively with seeds, counts, offsets etc.. 0012 0013 A difficulty is that calling curand_init is a very heavy kernel, 0014 so currently the below large files are created via multiple launches all 0015 writing into the single files shown below. 0016 The old cuRANDWrapper and new QCurandStateMonolithic have exactly the same contents. 0017 0018 +-----------+---------------+----------------+--------------------------------------------------------------------+ 0019 | num | bytes (ls) | filesize(du -h)| path | 0020 +===========+===============+================+====================================================================+ 0021 | 200M | 8800000000 | 8.2G | /home/blyth/.opticks/rngcache/RNG/cuRANDWrapper_200000000_0_0.bin | 0022 +-----------+---------------+----------------+--------------------------------------------------------------------+ 0023 | 100M | 4400000000 | 4.1G | /home/blyth/.opticks/rngcache/RNG/cuRANDWrapper_100000000_0_0.bin | 0024 +-----------+---------------+----------------+--------------------------------------------------------------------+ 0025 | 10M | 440000000 | 420M | /home/blyth/.opticks/rngcache/RNG/cuRANDWrapper_10000000_0_0.bin | 0026 +-----------+---------------+----------------+--------------------------------------------------------------------+ 0027 | 3M | 132000000 | 126M | /home/blyth/.opticks/rngcache/RNG/cuRANDWrapper_3000000_0_0.bin | 0028 +-----------+---------------+----------------+--------------------------------------------------------------------+ 0029 | 2M | 88000000 | 84M | /home/blyth/.opticks/rngcache/RNG/cuRANDWrapper_2000000_0_0.bin | 0030 +-----------+---------------+----------------+--------------------------------------------------------------------+ 0031 | 1M | 44000000 | 42M | /home/blyth/.opticks/rngcache/RNG/cuRANDWrapper_1000000_0_0.bin | 0032 +-----------+---------------+----------------+--------------------------------------------------------------------+ 0033 0034 +-----------+---------------+----------------+-------------------------------------------------------------------------------+ 0035 | num | bytes (ls) | filesize(du -h)| path | 0036 +===========+===============+================+===============================================================================+ 0037 | 10M | 440000000 | 420M | /home/blyth/.opticks/rngcache/RNG/QCurandStateMonolithiic_10000000_0_0.bin | 0038 +-----------+---------------+----------------+-------------------------------------------------------------------------------+ 0039 | 3M | 132000000 | 126M | /home/blyth/.opticks/rngcache/RNG/QCurandStateMonolithic_3000000_0_0.bin | 0040 +-----------+---------------+----------------+-------------------------------------------------------------------------------+ 0041 | 1M | 44000000 | 42M | /home/blyth/.opticks/rngcache/RNG/QCurandStateMonolithic_1000000_0_0.bin | 0042 +-----------+---------------+----------------+-------------------------------------------------------------------------------+ 0043 0044 0045 With GPU VRAM of 48G the limit coming from combination of photons and RNGs is about 400M 0046 0047 * curand StateXORWOW item size in the files is 44 bytes which get padded to 48 bytes in curand StateXORWOW type 0048 * dealing with 16.4GB files for 400M states is uncomfortable, so will need to rearrange into multiple files 0049 * chunking into files of 10M states each would correspond to 40 files of 10M states each (420M bytes) 0050 * with 40-100 files of 10M states each could push to one billion photon launch if had GPU with 100G VRAM 0051 * also could arrange for just the needed states (in 10M chunks) to be loaded+uploaded 0052 depending on configured max photon, which depends on available VRAM 0053 0054 0055 Decide on max size of photon launches by scaling from 48G for 400M, eg with 8G VRAM:: 0056 0057 In [2]: 8.*400./48. 0058 Out[2]: 66.66666666666667 ## so you might aim for 60M photons max with 8G VRAM 0059 0060 0061 HMM: 61M proves to be over optimistic for small VRAM, see ~/opticks/notes/max_photon_launch_size_with_8GB_VRAM.rst 0062 0063 0064 0065 0066 WIP:chunked creation, chunk naming, chunked save/load 0067 0068 See SCurandState.h SCurandChunk.h 0069 0070 **/ 0071 0072 #include <string> 0073 #include <cstdint> 0074 #include "QUDARAP_API_EXPORT.hh" 0075 #include "plog/Severity.h" 0076 #include "SCurandStateMonolithic.hh" 0077 0078 #include "qcurandwrap.h" 0079 struct SLaunchSequence ; 0080 0081 struct QUDARAP_API QCurandStateMonolithic 0082 { 0083 static const plog::Severity LEVEL ; 0084 static constexpr const char* EKEY = "QCurandStateMonolithic_SPEC" ; 0085 static QCurandStateMonolithic* Create(); 0086 static QCurandStateMonolithic* Create(const char* spec); 0087 0088 const SCurandStateMonolithic scs ; 0089 qcurandwrap<XORWOW>* h_cs ; 0090 qcurandwrap<XORWOW>* cs ; 0091 qcurandwrap<XORWOW>* d_cs ; 0092 SLaunchSequence* lseq ; 0093 0094 QCurandStateMonolithic(const SCurandStateMonolithic& scs); 0095 void init(); 0096 void alloc(); 0097 void create(); 0098 void download(); 0099 void save() const ; 0100 0101 std::string desc() const ; 0102 };
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|