Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:33

0001 #pragma once
0002 
0003 /**
0004 sdevice.h
0005 ============
0006 
0007 Simplified version of the former cudarap/CDevice.cu
0008 
0009 To select the GPU need to use CUDA_VISIBLE_DEVICES
0010 and metadata recording is handled with sdevice.h scontext.h
0011 
0012 * scontext.h needs updating to handle updated sdevice.h and
0013   metadata from scontext needs to be included into the SEvt run metadata
0014 
0015 * running sysrap/tests/sdevice_test.sh without CUDA_VISIBLE_DEVICES
0016   defined persists info on all GPUs into ~/.opticks/runcache/sdevice.bin
0017 
0018 **/
0019 
0020 #include <cstddef>
0021 #include <vector>
0022 #include <iostream>
0023 #include <iomanip>
0024 #include <string>
0025 #include <cstring>
0026 #include <cassert>
0027 #include <csignal>
0028 #include <sstream>
0029 #include <cstdlib>
0030 #include <fstream>
0031 #include <cuda_runtime_api.h>
0032 
0033 #include "sdirectory.h"
0034 #include "spath.h"
0035 #include "ssys.h"
0036 
0037 
0038 struct sdevice
0039 {
0040     static constexpr const char* CVD = "CUDA_VISIBLE_DEVICES" ;
0041     static constexpr const char* _DIRPATH = "sdevice__DIRPATH" ;
0042     static constexpr const char* _PERSIST = "sdevice__PERSIST" ;
0043     static constexpr const char* DIRPATH_DEFAULT = "$HOME/.opticks/sdevice" ;  // formerly scontext
0044     static constexpr const char* FILENAME = "sdevice.bin" ;
0045     static constexpr const char* _level = "sdevice__level" ;
0046     static int level ;
0047     static int PERSIST ;
0048 
0049     int ordinal ;
0050     int index ;
0051 
0052     char name[256] ;
0053     char uuid[16] ;
0054     int major  ;
0055     int minor  ;
0056     int compute_capability ;
0057     int multiProcessorCount ;
0058     size_t totalGlobalMem ;   // bytes
0059 
0060 
0061     const char* brief() const ;
0062     const char* desc() const ;
0063     bool matches(const sdevice& other) const ;
0064     size_t totalGlobalMem_bytes() const ;
0065     float totalGlobalMem_GB() const ;
0066     static int DeviceCount();
0067     static void Collect(std::vector<sdevice>& devices, bool ordinal_from_index=false );
0068     static int Size();
0069     void write( std::ostream& out ) const ;
0070     void read( std::istream& in );
0071 
0072     static void Visible(std::vector<sdevice>& visible );
0073     static int FindIndexOfMatchingDevice( const sdevice& d, const std::vector<sdevice>& all );
0074 
0075     static std::string Path(const char* dirpath);
0076 
0077     static const char* ResolveDirPath();
0078     static const char* CreateDirPath();
0079 
0080     static void Save(const std::vector<sdevice>& devices );
0081     static void Load(      std::vector<sdevice>& devices );
0082 
0083     static std::string Brief(const std::vector<sdevice>& devices );
0084     static std::string Desc( const std::vector<sdevice>& devices );
0085     static std::string VRAM( const std::vector<sdevice>& devices );
0086 };
0087 
0088 
0089 /**
0090 With c++11::
0091 
0092     warning: inline variables are only available with ‘-std=c++17’ or ‘-std=gnu++17’
0093 
0094 **/
0095 
0096 inline int sdevice::level = ssys::getenvint(_level, 0 );
0097 inline int sdevice::PERSIST = ssys::getenvint(_PERSIST, 0);
0098 
0099 
0100 inline const char* sdevice::brief() const
0101 {
0102     std::stringstream ss ;
0103     ss << "idx/ord/mpc/cc:"
0104        << index
0105        << "/"
0106        << ordinal
0107        << "/"
0108        << multiProcessorCount
0109        << "/"
0110        << compute_capability
0111        << std::setw(8) << std::fixed << std::setprecision(3) <<  totalGlobalMem_GB() << " GB "
0112        ;
0113     std::string s = ss.str();
0114     return strdup(s.c_str());
0115 }
0116 
0117 inline const char* sdevice::desc() const
0118 {
0119     std::stringstream ss ;
0120     ss
0121         << std::setw(30) << brief()
0122         << " "
0123         << name
0124         ;
0125     std::string s = ss.str();
0126     return strdup(s.c_str());
0127 }
0128 
0129 /**
0130 sdevice::matches
0131 ------------------
0132 
0133 Returns true when both uuid and name matches.
0134 
0135 **/
0136 
0137 inline bool sdevice::matches(const sdevice& other) const
0138 {
0139    return strncmp(other.uuid, uuid, sizeof(uuid)) == 0 && strncmp(other.name, name, sizeof(name)) == 0;
0140 }
0141 
0142 inline size_t sdevice::totalGlobalMem_bytes() const
0143 {
0144     return totalGlobalMem ;
0145 }
0146 inline float sdevice::totalGlobalMem_GB() const
0147 {
0148     return float(totalGlobalMem)/float(1024*1024*1024)  ;
0149 }
0150 
0151 inline int sdevice::DeviceCount() // static
0152 {
0153     int devCount(0) ;  // TWAS A BUG TO NOT INITIALIZE THIS
0154     cudaGetDeviceCount(&devCount);
0155     return devCount ;
0156 }
0157 
0158 /**
0159 sdevice::Collect
0160 --------------------
0161 
0162 Use CUDA API to collect a summary of the cudaDeviceProp properties
0163 regarding all attached devices into the vector of sdevice argument.
0164 
0165 ordinal_from_index:true
0166     sdevice.ordinal value is taken from the index corresponding to the ordering
0167     of devices returned by cudaGetDeviceProperties(&p, i) : this
0168     is used by sdevice::Visible when when no CUDA_VISIBLE_DEVICES envvar
0169     is defined
0170 
0171 ordinal_from_index:false
0172     sdevice.ordinal is set to initial placeholder -1 : sdevice::Visible
0173     however when CUDA_VISIBLE_DEVICES envvar is defined sets the ordinal
0174     by matching device properties with the persisted list of all of them
0175 
0176 **/
0177 
0178 inline void sdevice::Collect(std::vector<sdevice>& devices, bool ordinal_from_index)
0179 {
0180     int devCount = DeviceCount() ;
0181     if(level > 0) std::cout << "sdevice::Collect cudaGetDeviceCount : " << devCount << std::endl ;
0182 
0183     for (int i = 0; i < devCount; ++i)
0184     {
0185         cudaDeviceProp p;
0186         cudaGetDeviceProperties(&p, i);
0187 
0188         sdevice d ;
0189 
0190         assert( sizeof(p.name) == sizeof(char)*256 ) ;
0191         assert( sizeof(d.name) == sizeof(char)*256 ) ;
0192         strncpy( d.name, p.name, sizeof(d.name) );
0193 
0194 #ifndef CUDART_VERSION
0195 #error CUDART_VERSION Undefined!
0196 #elif (CUDART_VERSION >= 10000)
0197         assert( sizeof(p.uuid) == sizeof(uuid) );
0198         strncpy( d.uuid, p.uuid.bytes, sizeof(p.uuid) );
0199 #elif (CUDART_VERSION >= 9000)
0200 #endif
0201 
0202         d.index = i ;
0203         d.ordinal = ordinal_from_index ? i : -1 ;
0204         d.major = p.major ;
0205         d.minor = p.minor ;
0206         d.compute_capability = p.major*10 + p.minor ;
0207 
0208         d.multiProcessorCount = p.multiProcessorCount ;
0209         d.totalGlobalMem = p.totalGlobalMem ;
0210 
0211         devices.push_back(d);
0212     }
0213 }
0214 
0215 inline int sdevice::Size()
0216 {
0217     return
0218         sizeof(int) +       // ordinal
0219         sizeof(int) +       // index
0220         sizeof(char)*256 +  // name
0221         sizeof(char)*16 +   // uuid
0222         sizeof(int) +       // major
0223         sizeof(int) +       // minor
0224         sizeof(int) +       // compute_capability
0225         sizeof(int) +       // multiProcessorCount
0226         sizeof(size_t) ;    // totalGlobalMem
0227 }
0228 inline void sdevice::write( std::ostream& out ) const
0229 {
0230     int size = Size();
0231     char* buffer = new char[size];
0232     char* p = buffer ;
0233 
0234     memcpy( p, &ordinal,             sizeof(ordinal) )             ; p += sizeof(ordinal) ;
0235     memcpy( p, &index,               sizeof(index) )               ; p += sizeof(index) ;
0236     memcpy( p, name,                 sizeof(name) )                ; p += sizeof(name) ;
0237     memcpy( p, uuid,                 sizeof(uuid) )                ; p += sizeof(uuid) ;
0238     memcpy( p, &major,               sizeof(major) )               ; p += sizeof(major) ;
0239     memcpy( p, &minor,               sizeof(minor) )               ; p += sizeof(minor) ;
0240     memcpy( p, &compute_capability,  sizeof(compute_capability) )  ; p += sizeof(compute_capability) ;
0241     memcpy( p, &multiProcessorCount, sizeof(multiProcessorCount) ) ; p += sizeof(multiProcessorCount) ;
0242     memcpy( p, &totalGlobalMem,      sizeof(totalGlobalMem) )      ; p += sizeof(totalGlobalMem) ;
0243 
0244     out.write(buffer, size);
0245     assert( p - buffer == size );
0246     delete [] buffer ;
0247 }
0248 
0249 inline void sdevice::read( std::istream& in )
0250 {
0251     int size = Size();
0252     char* buffer = new char[size];
0253     in.read(buffer, size);
0254     char* p = buffer ;
0255 
0256     memcpy( &ordinal,  p,           sizeof(ordinal) )             ; p += sizeof(ordinal) ;
0257     memcpy( &index,    p,           sizeof(index) )               ; p += sizeof(index) ;
0258     memcpy( name,      p,           sizeof(name) )                ; p += sizeof(name) ;
0259     memcpy( uuid,      p,           sizeof(uuid) )                ; p += sizeof(uuid) ;
0260     memcpy( &major,    p,           sizeof(major) )               ; p += sizeof(major) ;
0261     memcpy( &minor,    p,           sizeof(minor) )               ; p += sizeof(minor) ;
0262     memcpy( &compute_capability, p, sizeof(compute_capability) )  ; p += sizeof(compute_capability) ;
0263     memcpy( &multiProcessorCount,p, sizeof(multiProcessorCount) ) ; p += sizeof(multiProcessorCount) ;
0264     memcpy( &totalGlobalMem,     p, sizeof(totalGlobalMem) )      ; p += sizeof(totalGlobalMem) ;
0265 
0266     delete [] buffer ;
0267 }
0268 
0269 
0270 
0271 /**
0272 sdevice::Visible
0273 ------------------
0274 
0275 This assumes that the ordinal is the index when all GPUs are visible
0276 and it finds this by arranging to persist the query when
0277 CUDA_VISIBLE_DEVICES is not defined and use that to provide something
0278 to match against when the envvar is defined.
0279 
0280 Initially tried to do this in one go by changing envvar
0281 and repeating the query. But that doesnt work,
0282 presumably as the CUDA_VISIBLE_DEVICES value only has
0283 any effect when cuda is initialized.
0284 
0285 Of course the disadvantage of this approach
0286 is that need to arrange to do the persisting of all devices
0287 at some initialization time and need to find an
0288 appropriate place for the file.
0289 
0290 The purpose is for reference running, especially performance
0291 scanning : so its acceptable to require running a metadata
0292 capturing executable prior to scanning.
0293 
0294 Possibly NVML can provide a better solution, see nvml-
0295 Actually maybe not : the NVML enumeration order follows nvidia-smi
0296 not CUDA.
0297 
0298 
0299 1. check existance of CUDA_VISIBLE_DEVICES envvar setting *no_cvd*
0300    when not defined, indicating all GPUs are visible
0301 
0302 2. invoke Collect setting *ordinal_from_index* according to *no_cvd*
0303 
0304 
0305 **/
0306 
0307 inline void sdevice::Visible(std::vector<sdevice>& visible )
0308 {
0309     if(level > 0) std::cout << "[sdevice::Visible" << std::endl ;
0310 
0311     char* cvd = getenv(CVD);
0312     bool no_cvd = cvd == NULL ;
0313     std::vector<sdevice> all ;
0314 
0315     bool ordinal_from_index = no_cvd  ;
0316     Collect(visible, ordinal_from_index);
0317 
0318     int VISIBLE_COUNT = visible.size() ;
0319     assert( sdevice::DeviceCount() == VISIBLE_COUNT );
0320 
0321     if(level > 0) std::cerr << "sdevice::Visible no_cvd:" << no_cvd << std::endl ;
0322 
0323 
0324     if( no_cvd )
0325     {
0326         if(VISIBLE_COUNT > 0 && PERSIST == 1)
0327         {
0328             if(level > 0) std::cerr
0329                 << "sdevice::Visible no_cvd save"
0330                 << " VISIBLE_COUNT " << VISIBLE_COUNT
0331                 << " PERSIST " << PERSIST
0332                 << " level " << level
0333                 << "\n"
0334                 ;
0335             Save( visible );
0336         }
0337     }
0338     else
0339     {
0340         if(level > 0) std::cerr << "sdevice::Visible with cvd " << cvd << std::endl ;
0341         Load(all);
0342 
0343         for(unsigned i=0 ; i < visible.size() ; i++)
0344         {
0345             sdevice& v = visible[i] ;
0346             v.ordinal = FindIndexOfMatchingDevice( v, all );
0347         }
0348     }
0349     if(level > 0) std::cout << "]sdevice::Visible" << std::endl ;
0350 }
0351 
0352 
0353 const char* sdevice::ResolveDirPath()
0354 {
0355     const char* DIRPATH = ssys::getenvvar(_DIRPATH, DIRPATH_DEFAULT)  ;
0356     const char* dirpath = spath::Resolve(DIRPATH) ;
0357     return dirpath ;
0358 }
0359 
0360 
0361 const char* sdevice::CreateDirPath()
0362 {
0363     const char* dirpath = ResolveDirPath();
0364     bool exists = spath::Exists(dirpath);
0365 
0366     if(level > 0) std::cout
0367         << "[sdevice::CreateDirPath"
0368         << " level " << level << "\n"
0369         << " dirpath [" << ( dirpath ? dirpath : "-" )  << "]\n"
0370         << " _DIRPATH [" << ( _DIRPATH ? _DIRPATH : "-" ) << "]\n"
0371         << " DIRPATH_DEFAULT [" << ( DIRPATH_DEFAULT ? DIRPATH_DEFAULT : "-" ) << "]\n"
0372         << " exists " << ( exists ? "YES" : "NO " ) << "\n"
0373         << "\n"
0374         ;
0375 
0376     if( !exists )
0377     {
0378         int rc = sdirectory::MakeDirs(dirpath, 0);
0379         if(rc!=0) std::cerr
0380             << "sdevice::CreateDirPath "
0381             << " FAILED to create dir \n"
0382             << " ABOUT TO RAISE SIGINT \n"
0383             << " level " << level << "\n"
0384             << " dirpath [" << ( dirpath ? dirpath : "-" )  << "]\n"
0385             << " _DIRPATH [" << ( _DIRPATH ? _DIRPATH : "-" ) << "]\n"
0386             << " DIRPATH_DEFAULT [" << ( DIRPATH_DEFAULT ? DIRPATH_DEFAULT : "-" ) << "]\n"
0387             ;
0388 
0389         if(rc!=0) std::raise(SIGINT);
0390         assert(rc == 0);
0391     }
0392     return dirpath ;
0393 }
0394 
0395 
0396 
0397 /**
0398 sdevice::FindIndexOfMatchingDevice
0399 ------------------------------------
0400 
0401 **/
0402 
0403 inline int sdevice::FindIndexOfMatchingDevice( const sdevice& d, const std::vector<sdevice>& all )
0404 {
0405     int index = -1 ;
0406     if(level > 0) std::cout
0407          << "sdevice::FindIndexOfMatchingDevice"
0408          << " d " << d.desc()
0409          << " all.size " << all.size()
0410          << std::endl
0411          ;
0412 
0413     for(unsigned i=0 ; i < all.size() ; i++)
0414     {
0415         const sdevice& a = all[i] ;
0416         bool m = a.matches(d) ;
0417         if(level > 0) std::cout
0418             << "sdevice::FindIndexOfMatchingDevice"
0419             << " a " << a.desc()
0420             << " m " << m
0421             << std::endl
0422             ;
0423 
0424         if(m)
0425         {
0426            index = a.index ;
0427            break ;
0428         }
0429     }
0430     if(level > 0) std::cout << "sdevice::FindIndexOfMatchingDevice  index : " << index << std::endl ;
0431     return index ;
0432 }
0433 
0434 
0435 
0436 
0437 inline std::string sdevice::Path(const char* dirpath)
0438 {
0439     std::stringstream ss ;
0440     if( dirpath ) ss << dirpath << "/" ;
0441     ss << FILENAME ;
0442     return ss.str();
0443 }
0444 
0445 /**
0446 sdevice::Save
0447 --------------
0448 
0449 All sdevice struct from the vector are written into a single file
0450 
0451 **/
0452 
0453 inline void sdevice::Save( const std::vector<sdevice>& devices )
0454 {
0455     const char* dirpath = CreateDirPath();
0456     std::string _path = Path(dirpath);
0457     const char* path = _path.c_str();
0458 
0459     if(level > 0) std::cout
0460         << "sdevice::Save "
0461         << " dirpath [" << ( dirpath ? dirpath : "-" ) << "]"
0462         << " path [" << ( path ? path : "-" ) << "]"
0463         << std::endl
0464         ;
0465 
0466     std::ofstream out(path, std::ofstream::binary);
0467     if(out.fail())
0468     {
0469         std::cerr << "sdevice::Save failed open for [" << ( path ? path : "-" ) << "]\n" ;
0470         return ;
0471     }
0472 
0473     for(unsigned i = 0 ; i < devices.size() ; ++i )
0474     {
0475         const sdevice& d = devices[i] ;
0476         d.write(out);
0477     }
0478 }
0479 
0480 /**
0481 sdevice::Load
0482 ---------------
0483 
0484 The sdevice struct vector is populated by reading
0485 from the single file until reaching EOF.
0486 
0487 **/
0488 
0489 
0490 inline void sdevice::Load( std::vector<sdevice>& devices )
0491 {
0492     const char* dirpath = ResolveDirPath();
0493     std::string _path = Path(dirpath);
0494     const char* path = _path.c_str();
0495 
0496     if(level > 0) std::cout
0497         << "sdevice::Load"
0498         << " dirpath [" << ( dirpath ? dirpath : "-" ) << "]"
0499         << " path [" << ( path ? path : "-" ) << "]"
0500         << std::endl
0501         ;
0502     std::ifstream in(path, std::ofstream::binary);
0503 
0504     sdevice d ;
0505 
0506     while(true)
0507     {
0508         d.read(in);
0509         if(in.eof()) return ;
0510         if(in.fail())
0511         {
0512             if( level > 0 || PERSIST == 1 ) std::cerr
0513                 << "sdevice::Load"
0514                 << " failed read from "
0515                 << " dirpath [" << ( dirpath ? dirpath : "-" ) << "]"
0516                 << " path [" << ( path ? path : "-" ) << "]"
0517                 << " PERSIST " << PERSIST
0518                 << std::endl
0519                 ;
0520             return ;
0521         }
0522         devices.push_back(d);
0523     }
0524 }
0525 
0526 inline std::string sdevice::Brief( const std::vector<sdevice>& devices )
0527 {
0528     std::stringstream ss ;
0529     for(unsigned i=0 ; i < devices.size() ; i++)
0530     {
0531         const sdevice& d = devices[i] ;
0532         ss << d.ordinal << ':' ;
0533         for(unsigned j=0 ; j < strlen(d.name) ; j++)
0534         {
0535             char c = *(d.name+j) ;
0536             ss << ( c == ' ' ? '_' : c ) ;
0537         }
0538         if( i < devices.size() - 1 ) ss << ' ' ;
0539     }
0540     return ss.str();
0541 }
0542 
0543 inline std::string sdevice::Desc( const std::vector<sdevice>& devices )
0544 {
0545     std::stringstream ss ;
0546     ss << "[" << Brief(devices) << "]" << std::endl  ;
0547     for(unsigned i=0 ; i < devices.size() ; i++)
0548     {
0549         const sdevice& d = devices[i] ;
0550         ss << d.desc() << std::endl ;
0551     }
0552     std::string str = ss.str();
0553     return str ;
0554 }
0555 
0556 inline std::string sdevice::VRAM( const std::vector<sdevice>& devices )
0557 {
0558     std::stringstream ss ;
0559     for(unsigned i=0 ; i < devices.size() ; i++)
0560     {
0561         const sdevice& d = devices[i] ;
0562         ss << d.totalGlobalMem_bytes() << "\n" ;
0563     }
0564     std::string str = ss.str();
0565     return str ;
0566 }
0567