Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:16

0001 #!/bin/bash
0002 usage(){ cat << EOU
0003 scontext_test.sh
0004 =================
0005 
0006 This script must currently use the CMake built scontext_test
0007 executable not the executable that this script was formerly
0008 able to build.
0009 
0010 TODO : get this standalone script to compile again by better
0011        dependency control with SEventConfig.cc (and dependencies)
0012 
0013 
0014 ::
0015 
0016     ~/o/sysrap/tests/scontext_test.sh run
0017 
0018 
0019 * note that the nvidia-smi ordering does not necessarily match the CUDA ordering
0020 * also I think that nvidia-smi ordering may change after reboots
0021 * also the CUDA ordering can be changed with eg CUDA_VISIBLE_DEVICES=1,0
0022 
0023 ::
0024 
0025     N[blyth@localhost tests]$ ~/o/sysrap/tests/scontext_test.sh run
0026     0:TITAN_V 1:TITAN_RTX
0027     N[blyth@localhost tests]$ CUDA_VISIBLE_DEVICES=1 ~/o/sysrap/tests/scontext_test.sh
0028     1:TITAN_RTX
0029     N[blyth@localhost tests]$ CUDA_VISIBLE_DEVICES=0 ~/o/sysrap/tests/scontext_test.sh
0030     0:TITAN_V
0031     N[blyth@localhost tests]$ CUDA_VISIBLE_DEVICES=1 ~/o/sysrap/tests/scontext_test.sh
0032     1:TITAN_RTX
0033     N[blyth@localhost tests]$ CUDA_VISIBLE_DEVICES=0,1 ~/o/sysrap/tests/scontext_test.sh
0034     0:TITAN_V 1:TITAN_RTX
0035     N[blyth@localhost tests]$ CUDA_VISIBLE_DEVICES=1,0 ~/o/sysrap/tests/scontext_test.sh
0036     1:TITAN_RTX 0:TITAN_V
0037     N[blyth@localhost tests]$
0038 
0039 
0040 
0041     N[blyth@localhost tests]$ VERBOSE=1 ~/o/sysrap/tests/scontext_test.sh
0042     scontext::desc
0043     all_devices
0044     [0:TITAN_V 1:TITAN_RTX]
0045     idx/ord/mpc/cc:0/0/80/70  11.784 GB  TITAN V
0046     idx/ord/mpc/cc:1/1/72/75  23.652 GB  TITAN RTX
0047     visible_devices
0048     [0:TITAN_V 1:TITAN_RTX]
0049     idx/ord/mpc/cc:0/0/80/70  11.784 GB  TITAN V
0050     idx/ord/mpc/cc:1/1/72/75  23.652 GB  TITAN RTX
0051 
0052 
0053     N[blyth@localhost tests]$ VERBOSE=1 CUDA_VISIBLE_DEVICES=0 ~/o/sysrap/tests/scontext_test.sh
0054     scontext::desc
0055     all_devices
0056     [0:TITAN_V 1:TITAN_RTX]
0057     idx/ord/mpc/cc:0/0/80/70  11.784 GB  TITAN V
0058     idx/ord/mpc/cc:1/1/72/75  23.652 GB  TITAN RTX
0059     visible_devices
0060     [0:TITAN_V]
0061     idx/ord/mpc/cc:0/0/80/70  11.784 GB  TITAN V
0062 
0063     N[blyth@localhost tests]$ VERBOSE=1 CUDA_VISIBLE_DEVICES=1 ~/o/sysrap/tests/scontext_test.sh
0064     scontext::desc
0065     all_devices
0066     [0:TITAN_V 1:TITAN_RTX]
0067     idx/ord/mpc/cc:0/0/80/70  11.784 GB  TITAN V
0068     idx/ord/mpc/cc:1/1/72/75  23.652 GB  TITAN RTX
0069     visible_devices
0070     [1:TITAN_RTX]
0071     idx/ord/mpc/cc:0/1/72/75  23.652 GB  TITAN RTX
0072 
0073     N[blyth@localhost tests]$ VERBOSE=1 CUDA_VISIBLE_DEVICES=1,0 ~/o/sysrap/tests/scontext_test.sh
0074     scontext::desc
0075     all_devices
0076     [0:TITAN_V 1:TITAN_RTX]
0077     idx/ord/mpc/cc:0/0/80/70  11.784 GB  TITAN V
0078     idx/ord/mpc/cc:1/1/72/75  23.652 GB  TITAN RTX
0079     visible_devices
0080     [1:TITAN_RTX 0:TITAN_V]
0081     idx/ord/mpc/cc:0/1/72/75  23.652 GB  TITAN RTX
0082     idx/ord/mpc/cc:1/0/80/70  11.784 GB  TITAN V
0083 
0084     N[blyth@localhost tests]$ VERBOSE=1 CUDA_VISIBLE_DEVICES=0,1 ~/o/sysrap/tests/scontext_test.sh
0085     scontext::desc
0086     all_devices
0087     [0:TITAN_V 1:TITAN_RTX]
0088     idx/ord/mpc/cc:0/0/80/70  11.784 GB  TITAN V
0089     idx/ord/mpc/cc:1/1/72/75  23.652 GB  TITAN RTX
0090     visible_devices
0091     [0:TITAN_V 1:TITAN_RTX]
0092     idx/ord/mpc/cc:0/0/80/70  11.784 GB  TITAN V
0093     idx/ord/mpc/cc:1/1/72/75  23.652 GB  TITAN RTX
0094 
0095 
0096 
0097     N[blyth@localhost tests]$ nvidia-smi
0098     Mon Jun  5 19:51:47 2023
0099     +-----------------------------------------------------------------------------+
0100     | NVIDIA-SMI 435.21       Driver Version: 435.21       CUDA Version: 10.1     |
0101     |-------------------------------+----------------------+----------------------+
0102     | GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
0103     | Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
0104     |===============================+======================+======================|
0105     |   0  TITAN RTX           Off  | 00000000:73:00.0 Off |                  N/A |
0106     | 39%   55C    P0    71W / 280W |      0MiB / 24219MiB |      0%      Default |
0107     +-------------------------------+----------------------+----------------------+
0108     |   1  TITAN V             Off  | 00000000:A6:00.0 Off |                  N/A |
0109     | 46%   54C    P8    N/A /  N/A |      0MiB / 12066MiB |      0%      Default |
0110     +-------------------------------+----------------------+----------------------+
0111 
0112 
0113 
0114 
0115 Try deleting ~/.opticks/scontext and see how to recreate
0116 -----------------------------------------------------------
0117 
0118 * THESE ARE OLD NOTES PRIOR TO MOVE TO USE ~/.opticks/sdevice/sdevice.bin
0119 
0120 
0121 Initially running works::
0122 
0123     A[blyth@localhost ~]$ opticks/sysrap/tests/scontext_test.sh run
0124     0:NVIDIA_RTX_5000_Ada_GenerationA[blyth@localhost ~]$ l ~/.opticks/scontext/
0125     total 4
0126     4 -rw-r--r--. 1 blyth blyth 304 Aug 30 16:07 sdevice.bin
0127     0 drwxr-xr-x. 7 blyth blyth  86 Aug 30 09:51 ..
0128     0 drwxr-xr-x. 2 blyth blyth  25 Aug 29 22:06 .
0129 
0130 Remove the scontext directory causes the expected error::
0131 
0132     A[blyth@localhost ~]$ rm -rf  ~/.opticks/scontext
0133     A[blyth@localhost ~]$ opticks/sysrap/tests/scontext_test.sh run
0134     sdevice::Load failed read from  dirpath_ /home/blyth/.opticks/scontext dirpath /home/blyth/.opticks/scontext path /home/blyth/.opticks/scontext/sdevice.bin
0135     sdevice::Load failed read from  dirpath_ /home/blyth/.opticks/scontext dirpath /home/blyth/.opticks/scontext path /home/blyth/.opticks/scontext/sdevice.bin
0136     -1:NVIDIA_RTX_5000_Ada_GenerationA[blyth@localhost ~]$
0137 
0138 And the running did not create the directory and sdevice.bin file::
0139 
0140     A[blyth@localhost ~]$ l ~/.opticks/scontext
0141     total 0
0142     0 drwxr-xr-x. 2 blyth blyth  6 Feb 14 15:26 .
0143     0 drwxr-xr-x. 7 blyth blyth 86 Feb 14 15:26 ..
0144 
0145 Observe that CUDA_VISIBLE_DEVICES is defined::
0146 
0147     A[blyth@localhost ~]$ echo $CUDA_VISIBLE_DEVICES
0148     0
0149 
0150 Only with CUDA_VISIBLE_DEVICES unset does the file get persisted::
0151 
0152     A[blyth@localhost ~]$ unset CUDA_VISIBLE_DEVICES
0153     A[blyth@localhost ~]$ opticks/sysrap/tests/scontext_test.sh run
0154     0:NVIDIA_RTX_5000_Ada_GenerationA[blyth@localhost ~]$
0155     A[blyth@localhost ~]$
0156     A[blyth@localhost ~]$ l ~/.opticks/scontext/
0157     total 4
0158     0 drwxr-xr-x. 2 blyth blyth  25 Feb 14 15:30 .
0159     4 -rw-r--r--. 1 blyth blyth 304 Feb 14 15:30 sdevice.bin
0160     0 drwxr-xr-x. 7 blyth blyth  86 Feb 14 15:26 ..
0161     A[blyth@localhost ~]$ opticks/sysrap/tests/scontext_test.sh run
0162     0:NVIDIA_RTX_5000_Ada_GenerationA[blyth@localhost ~]$
0163     A[blyth@localhost ~]$
0164 
0165 How about setting it blank rather than unset ?::
0166 
0167     A[blyth@localhost ~]$ rm -rf  ~/.opticks/scontext
0168     A[blyth@localhost ~]$ CUDA_VISIBLE_DEVICES="" opticks/sysrap/tests/scontext_test.sh run
0169     sdevice::Load failed read from  dirpath_ /home/blyth/.opticks/scontext dirpath /home/blyth/.opticks/scontext path /home/blyth/.opticks/scontext/sdevice.bin
0170     sdevice::Load failed read from  dirpath_ /home/blyth/.opticks/scontext dirpath /home/blyth/.opticks/scontext path /home/blyth/.opticks/scontext/sdevice.bin
0171     scontext::initConfig : ZERO VISIBLE DEVICES - CHECK CUDA_VISIBLE_DEVICES envvar
0172     A[blyth@localhost ~]$
0173 
0174     A[blyth@localhost ~]$ rm -rf  ~/.opticks/scontext
0175     A[blyth@localhost ~]$ CUDA_VISIBLE_DEVICES= opticks/sysrap/tests/scontext_test.sh run
0176     sdevice::Load failed read from  dirpath_ /home/blyth/.opticks/scontext dirpath /home/blyth/.opticks/scontext path /home/blyth/.opticks/scontext/sdevice.bin
0177     sdevice::Load failed read from  dirpath_ /home/blyth/.opticks/scontext dirpath /home/blyth/.opticks/scontext path /home/blyth/.opticks/scontext/sdevice.bin
0178     scontext::initConfig : ZERO VISIBLE DEVICES - CHECK CUDA_VISIBLE_DEVICES envvar
0179     A[blyth@localhost ~]$
0180 
0181 Nope it does need to be unset::
0182 
0183     A[blyth@localhost ~]$ rm -rf  ~/.opticks/scontext
0184     A[blyth@localhost ~]$ unset CUDA_VISIBLE_DEVICES ; opticks/sysrap/tests/scontext_test.sh run
0185     0:NVIDIA_RTX_5000_Ada_GenerationA[blyth@localhost ~]$
0186     A[blyth@localhost ~]$
0187     A[blyth@localhost ~]$ l ~/.opticks/scontext/
0188     total 4
0189     0 drwxr-xr-x. 2 blyth blyth  25 Feb 14 15:42 .
0190     4 -rw-r--r--. 1 blyth blyth 304 Feb 14 15:42 sdevice.bin
0191     0 drwxr-xr-x. 7 blyth blyth  86 Feb 14 15:42 ..
0192     A[blyth@localhost ~]$
0193 
0194 
0195 
0196 
0197 EOU
0198 }
0199 cd $(dirname $(realpath $BASH_SOURCE))
0200 
0201 name=scontext_test
0202 bin=$name
0203 
0204 defarg="info_run"
0205 arg=${1:-$defarg}
0206 
0207 vars="BASH_SOURCE 0 PWD name TMP bin defarg arg CUDA_VISIBLE_DEVICES"
0208 
0209 if [ "${arg/info}" != "$arg" ]; then
0210    for var in $vars ; do printf "%20s : %s\n" "$var" "${!var}" ; done
0211 fi
0212 
0213 if [ "${arg/run}" != "$arg" ]; then
0214    $bin
0215    [ $? -ne 0 ] && echo $BASH_SOURCE : run error && exit 2
0216 fi
0217 
0218 if [ "${arg/dbg}" != "$arg" ]; then
0219    source dbg__.sh
0220    dbg__ $bin
0221    [ $? -ne 0 ] && echo $BASH_SOURCE : run error && exit 2
0222 fi
0223 
0224 
0225 
0226 exit 0
0227 
0228