Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:31

0001 #pragma once
0002 /**
0003 scontext.h : holds sdevice.h structs for all and visible GPUs
0004 ==============================================================
0005 
0006 Canonical instance is SEventConfig::CONTEXT instanciated by
0007 SEventConfig::Initialize with SEventConfig::Initialize_Meta.
0008 This Initialize happens on instanciation of the first SEvt.
0009 
0010 ::
0011 
0012     A[blyth@localhost opticks]$ opticks-fl scontext.h
0013     ./sysrap/CMakeLists.txt
0014     ./sysrap/SEventConfig.cc
0015     ./sysrap/sdevice.h
0016     ./sysrap/tests/scontext_test.cc
0017     ./sysrap/scontext.h
0018 
0019 ::
0020 
0021    ~/o/sysrap/tests/scontext_test.sh
0022 
0023 
0024 **/
0025 
0026 #include <cstdlib>
0027 #include <csignal>
0028 #include "sdevice.h"
0029 #include "ssys.h"
0030 #include "SEventConfig.hh"
0031 
0032 struct scontext
0033 {
0034     static constexpr const char* _level = "scontext__level" ;
0035     static int level ;
0036 
0037     scontext();
0038     void init();
0039     void initPersist();
0040     void initConfig();
0041     void initConfig_SetDevice(int idev);
0042 
0043     std::vector<sdevice> visible_devices ;
0044     std::vector<sdevice> all_devices ;
0045 
0046     std::string desc() const ;
0047     std::string brief() const ;
0048     std::string vram() const ;
0049 
0050     // query visible_devices[idx]
0051     std::string device_name(int idx) const ;
0052     size_t totalGlobalMem_bytes(int idx) const ;
0053     size_t totalGlobalMem_GB(int idx) const ;
0054 
0055     std::string main(int arg, char** argv) const ;
0056 };
0057 
0058 
0059 inline int scontext::level = ssys::getenvint(_level, 0 );
0060 
0061 
0062 
0063 
0064 inline scontext::scontext()
0065 {
0066     init();
0067 }
0068 inline void scontext::init()
0069 {
0070     initPersist();
0071     initConfig();
0072 }
0073 
0074 
0075 /**
0076 scontext::initPersist
0077 -----------------------
0078 
0079 HMM: in workstation context it makes sense to persist
0080 info on all GPUs into $HOME/.opticks/scontext as that
0081 does not change much.
0082 
0083 BUT in batch submission context on a GPU cluster
0084 the number and identity of GPUs can depend on the
0085 job submission so using a fixed place makes no
0086 sense.  In that situation a more appropriate
0087 location is the invoking directory.
0088 
0089 Original motivation for persisting GPU info for all GPUs
0090 (ie all those detected by CUDA API when CUDA_VISIBLE_DEVICES is not defined)
0091 was for making sense of which GPU is in use in a changing environment
0092 of CUDA_VISIBLE_DEVICES values and hence indices.
0093 
0094 Using the record for all GPUs enabled associating an absolute ordinal
0095 (identity based on uuid and name of the GPU) to GPUs even when
0096 CUDA_VISIBLE_DEVICES means that not all GPUs are visible.
0097 
0098 **/
0099 
0100 
0101 inline void scontext::initPersist()
0102 {
0103     if(level > 0) std::cout << "[scontext::initPersist" << std::endl ;
0104 
0105     sdevice::Visible(visible_devices);
0106     sdevice::Load(   all_devices );   // seems all_devices not used much from here
0107 
0108     if(level > 0) std::cout << "]scontext::initPersist" << std::endl ;
0109 }
0110 
0111 inline void scontext::initConfig()
0112 {
0113     int numdev = visible_devices.size();
0114     int idev = -1 ;
0115 
0116     if(numdev == 0)
0117     {
0118         std::cerr << "scontext::initConfig : ZERO VISIBLE DEVICES - CHECK CUDA_VISIBLE_DEVICES envvar \n" ;
0119     }
0120     else if(numdev == 1)
0121     {
0122         idev = 0 ;
0123     }
0124     else if(numdev > 1)
0125     {
0126         idev = 0 ;
0127         std::cerr
0128             << "scontext::initConfig : WARNING - MORE THAN ONE VISIBLE DEVICES - DEFAULTING TO USE idev:[" << idev << "]\n"
0129             << "scontext::initConfig : QUELL THIS WARNING BY SETTING/CHANGING CUDA_VISIBLE_DEVICES envvar TO SELECT ONE DEVICE\n"
0130             ;
0131     }
0132 
0133     initConfig_SetDevice(idev);
0134 }
0135 
0136 inline void scontext::initConfig_SetDevice(int idev)
0137 {
0138     int numdev = visible_devices.size();
0139     bool idev_valid = idev >=0 && idev < numdev ;
0140 
0141     if( !idev_valid || level > 0 ) std::cerr
0142         << "scontext::initConfig_SetDevice "
0143         << " numdev " << numdev
0144         << " idev " << idev
0145         << " level " << level
0146         << " idev_valid " << ( idev_valid ? "YES" : "NO " )
0147         << "\n"
0148         ;
0149 
0150     if(!idev_valid) return ;
0151 
0152     std::string name = device_name(idev);
0153     size_t vram = totalGlobalMem_bytes(idev);
0154     SEventConfig::SetDevice(vram, name);
0155 }
0156 
0157 
0158 inline std::string scontext::desc() const
0159 {
0160     char* cvd = getenv("CUDA_VISIBLE_DEVICES") ;
0161     std::stringstream ss ;
0162     ss << "scontext::desc [" << brief() << "]" << std::endl ;
0163     ss << "CUDA_VISIBLE_DEVICES : [" << ( cvd ? cvd : "-" ) << "]" << std::endl;
0164     ss << "all_devices" << std::endl ;
0165     ss << sdevice::Desc(all_devices) ;
0166     ss << "visible_devices" << std::endl ;
0167     ss << sdevice::Desc(visible_devices) ;
0168     std::string str = ss.str();
0169     return str ;
0170 }
0171 
0172 inline std::string scontext::brief() const
0173 {
0174     return sdevice::Brief(visible_devices) ;
0175 }
0176 
0177 inline std::string scontext::vram() const
0178 {
0179     return sdevice::VRAM(visible_devices) ;
0180 }
0181 
0182 inline std::string scontext::main(int argc, char** argv) const
0183 {
0184     std::stringstream ss ;
0185     if(argc == 1) ss << brief() ;
0186 
0187     for(int i=1 ; i < argc ; i++)
0188     {
0189         char* arg = argv[i] ;
0190         if(strcmp(arg, "--brief")==0) ss << brief() << "\n" ;
0191         if(strcmp(arg, "--desc")==0)  ss << desc() << "\n" ;
0192         if(strcmp(arg, "--vram")==0)  ss << vram() << "\n" ;
0193         if(strcmp(arg, "--name0")==0)  ss << device_name(0) << "\n" ;
0194         if(strcmp(arg, "--name1")==0)  ss << device_name(1) << "\n" ;
0195         if(strcmp(arg, "--vram0")==0)  ss << totalGlobalMem_bytes(0) << "\n" ;
0196         if(strcmp(arg, "--vram1")==0)  ss << totalGlobalMem_bytes(1) << "\n" ;
0197         if(strcmp(arg, "--vram0g")==0)  ss << totalGlobalMem_GB(0) << "\n" ;
0198         if(strcmp(arg, "--vram1g")==0)  ss << totalGlobalMem_GB(1) << "\n" ;
0199 
0200     }
0201     std::string str = ss.str();
0202     return str ;
0203 }
0204 
0205 inline std::string scontext::device_name(int idx) const
0206 {
0207     return idx < int(visible_devices.size()) ? visible_devices[idx].name : "" ;
0208 }
0209 inline size_t scontext::totalGlobalMem_bytes(int idx) const
0210 {
0211     return idx < int(visible_devices.size()) ? visible_devices[idx].totalGlobalMem_bytes() : 0 ;
0212 }
0213 inline size_t scontext::totalGlobalMem_GB(int idx) const
0214 {
0215     return idx < int(visible_devices.size()) ? visible_devices[idx].totalGlobalMem_GB() : 0 ;
0216 }
0217 
0218 
0219