File indexing completed on 2026-04-09 07:49:16
0001
0002 usage(){ cat << EOU
0003 scontext_test.sh
0004 =================
0005
0006 This script must currently use the CMake built scontext_test
0007 executable not the executable that this script was formerly
0008 able to build.
0009
0010 TODO : get this standalone script to compile again by better
0011 dependency control with SEventConfig.cc (and dependencies)
0012
0013
0014 ::
0015
0016 ~/o/sysrap/tests/scontext_test.sh run
0017
0018
0019 * note that the nvidia-smi ordering does not necessarily match the CUDA ordering
0020 * also I think that nvidia-smi ordering may change after reboots
0021 * also the CUDA ordering can be changed with eg CUDA_VISIBLE_DEVICES=1,0
0022
0023 ::
0024
0025 N[blyth@localhost tests]$ ~/o/sysrap/tests/scontext_test.sh run
0026 0:TITAN_V 1:TITAN_RTX
0027 N[blyth@localhost tests]$ CUDA_VISIBLE_DEVICES=1 ~/o/sysrap/tests/scontext_test.sh
0028 1:TITAN_RTX
0029 N[blyth@localhost tests]$ CUDA_VISIBLE_DEVICES=0 ~/o/sysrap/tests/scontext_test.sh
0030 0:TITAN_V
0031 N[blyth@localhost tests]$ CUDA_VISIBLE_DEVICES=1 ~/o/sysrap/tests/scontext_test.sh
0032 1:TITAN_RTX
0033 N[blyth@localhost tests]$ CUDA_VISIBLE_DEVICES=0,1 ~/o/sysrap/tests/scontext_test.sh
0034 0:TITAN_V 1:TITAN_RTX
0035 N[blyth@localhost tests]$ CUDA_VISIBLE_DEVICES=1,0 ~/o/sysrap/tests/scontext_test.sh
0036 1:TITAN_RTX 0:TITAN_V
0037 N[blyth@localhost tests]$
0038
0039
0040
0041 N[blyth@localhost tests]$ VERBOSE=1 ~/o/sysrap/tests/scontext_test.sh
0042 scontext::desc
0043 all_devices
0044 [0:TITAN_V 1:TITAN_RTX]
0045 idx/ord/mpc/cc:0/0/80/70 11.784 GB TITAN V
0046 idx/ord/mpc/cc:1/1/72/75 23.652 GB TITAN RTX
0047 visible_devices
0048 [0:TITAN_V 1:TITAN_RTX]
0049 idx/ord/mpc/cc:0/0/80/70 11.784 GB TITAN V
0050 idx/ord/mpc/cc:1/1/72/75 23.652 GB TITAN RTX
0051
0052
0053 N[blyth@localhost tests]$ VERBOSE=1 CUDA_VISIBLE_DEVICES=0 ~/o/sysrap/tests/scontext_test.sh
0054 scontext::desc
0055 all_devices
0056 [0:TITAN_V 1:TITAN_RTX]
0057 idx/ord/mpc/cc:0/0/80/70 11.784 GB TITAN V
0058 idx/ord/mpc/cc:1/1/72/75 23.652 GB TITAN RTX
0059 visible_devices
0060 [0:TITAN_V]
0061 idx/ord/mpc/cc:0/0/80/70 11.784 GB TITAN V
0062
0063 N[blyth@localhost tests]$ VERBOSE=1 CUDA_VISIBLE_DEVICES=1 ~/o/sysrap/tests/scontext_test.sh
0064 scontext::desc
0065 all_devices
0066 [0:TITAN_V 1:TITAN_RTX]
0067 idx/ord/mpc/cc:0/0/80/70 11.784 GB TITAN V
0068 idx/ord/mpc/cc:1/1/72/75 23.652 GB TITAN RTX
0069 visible_devices
0070 [1:TITAN_RTX]
0071 idx/ord/mpc/cc:0/1/72/75 23.652 GB TITAN RTX
0072
0073 N[blyth@localhost tests]$ VERBOSE=1 CUDA_VISIBLE_DEVICES=1,0 ~/o/sysrap/tests/scontext_test.sh
0074 scontext::desc
0075 all_devices
0076 [0:TITAN_V 1:TITAN_RTX]
0077 idx/ord/mpc/cc:0/0/80/70 11.784 GB TITAN V
0078 idx/ord/mpc/cc:1/1/72/75 23.652 GB TITAN RTX
0079 visible_devices
0080 [1:TITAN_RTX 0:TITAN_V]
0081 idx/ord/mpc/cc:0/1/72/75 23.652 GB TITAN RTX
0082 idx/ord/mpc/cc:1/0/80/70 11.784 GB TITAN V
0083
0084 N[blyth@localhost tests]$ VERBOSE=1 CUDA_VISIBLE_DEVICES=0,1 ~/o/sysrap/tests/scontext_test.sh
0085 scontext::desc
0086 all_devices
0087 [0:TITAN_V 1:TITAN_RTX]
0088 idx/ord/mpc/cc:0/0/80/70 11.784 GB TITAN V
0089 idx/ord/mpc/cc:1/1/72/75 23.652 GB TITAN RTX
0090 visible_devices
0091 [0:TITAN_V 1:TITAN_RTX]
0092 idx/ord/mpc/cc:0/0/80/70 11.784 GB TITAN V
0093 idx/ord/mpc/cc:1/1/72/75 23.652 GB TITAN RTX
0094
0095
0096
0097 N[blyth@localhost tests]$ nvidia-smi
0098 Mon Jun 5 19:51:47 2023
0099 +-----------------------------------------------------------------------------+
0100 | NVIDIA-SMI 435.21 Driver Version: 435.21 CUDA Version: 10.1 |
0101 |-------------------------------+----------------------+----------------------+
0102 | GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
0103 | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
0104 |===============================+======================+======================|
0105 | 0 TITAN RTX Off | 00000000:73:00.0 Off | N/A |
0106 | 39% 55C P0 71W / 280W | 0MiB / 24219MiB | 0% Default |
0107 +-------------------------------+----------------------+----------------------+
0108 | 1 TITAN V Off | 00000000:A6:00.0 Off | N/A |
0109 | 46% 54C P8 N/A / N/A | 0MiB / 12066MiB | 0% Default |
0110 +-------------------------------+----------------------+----------------------+
0111
0112
0113
0114
0115 Try deleting ~/.opticks/scontext and see how to recreate
0116 -----------------------------------------------------------
0117
0118 * THESE ARE OLD NOTES PRIOR TO MOVE TO USE ~/.opticks/sdevice/sdevice.bin
0119
0120
0121 Initially running works::
0122
0123 A[blyth@localhost ~]$ opticks/sysrap/tests/scontext_test.sh run
0124 0:NVIDIA_RTX_5000_Ada_GenerationA[blyth@localhost ~]$ l ~/.opticks/scontext/
0125 total 4
0126 4 -rw-r--r--. 1 blyth blyth 304 Aug 30 16:07 sdevice.bin
0127 0 drwxr-xr-x. 7 blyth blyth 86 Aug 30 09:51 ..
0128 0 drwxr-xr-x. 2 blyth blyth 25 Aug 29 22:06 .
0129
0130 Remove the scontext directory causes the expected error::
0131
0132 A[blyth@localhost ~]$ rm -rf ~/.opticks/scontext
0133 A[blyth@localhost ~]$ opticks/sysrap/tests/scontext_test.sh run
0134 sdevice::Load failed read from dirpath_ /home/blyth/.opticks/scontext dirpath /home/blyth/.opticks/scontext path /home/blyth/.opticks/scontext/sdevice.bin
0135 sdevice::Load failed read from dirpath_ /home/blyth/.opticks/scontext dirpath /home/blyth/.opticks/scontext path /home/blyth/.opticks/scontext/sdevice.bin
0136 -1:NVIDIA_RTX_5000_Ada_GenerationA[blyth@localhost ~]$
0137
0138 And the running did not create the directory and sdevice.bin file::
0139
0140 A[blyth@localhost ~]$ l ~/.opticks/scontext
0141 total 0
0142 0 drwxr-xr-x. 2 blyth blyth 6 Feb 14 15:26 .
0143 0 drwxr-xr-x. 7 blyth blyth 86 Feb 14 15:26 ..
0144
0145 Observe that CUDA_VISIBLE_DEVICES is defined::
0146
0147 A[blyth@localhost ~]$ echo $CUDA_VISIBLE_DEVICES
0148 0
0149
0150 Only with CUDA_VISIBLE_DEVICES unset does the file get persisted::
0151
0152 A[blyth@localhost ~]$ unset CUDA_VISIBLE_DEVICES
0153 A[blyth@localhost ~]$ opticks/sysrap/tests/scontext_test.sh run
0154 0:NVIDIA_RTX_5000_Ada_GenerationA[blyth@localhost ~]$
0155 A[blyth@localhost ~]$
0156 A[blyth@localhost ~]$ l ~/.opticks/scontext/
0157 total 4
0158 0 drwxr-xr-x. 2 blyth blyth 25 Feb 14 15:30 .
0159 4 -rw-r--r--. 1 blyth blyth 304 Feb 14 15:30 sdevice.bin
0160 0 drwxr-xr-x. 7 blyth blyth 86 Feb 14 15:26 ..
0161 A[blyth@localhost ~]$ opticks/sysrap/tests/scontext_test.sh run
0162 0:NVIDIA_RTX_5000_Ada_GenerationA[blyth@localhost ~]$
0163 A[blyth@localhost ~]$
0164
0165 How about setting it blank rather than unset ?::
0166
0167 A[blyth@localhost ~]$ rm -rf ~/.opticks/scontext
0168 A[blyth@localhost ~]$ CUDA_VISIBLE_DEVICES="" opticks/sysrap/tests/scontext_test.sh run
0169 sdevice::Load failed read from dirpath_ /home/blyth/.opticks/scontext dirpath /home/blyth/.opticks/scontext path /home/blyth/.opticks/scontext/sdevice.bin
0170 sdevice::Load failed read from dirpath_ /home/blyth/.opticks/scontext dirpath /home/blyth/.opticks/scontext path /home/blyth/.opticks/scontext/sdevice.bin
0171 scontext::initConfig : ZERO VISIBLE DEVICES - CHECK CUDA_VISIBLE_DEVICES envvar
0172 A[blyth@localhost ~]$
0173
0174 A[blyth@localhost ~]$ rm -rf ~/.opticks/scontext
0175 A[blyth@localhost ~]$ CUDA_VISIBLE_DEVICES= opticks/sysrap/tests/scontext_test.sh run
0176 sdevice::Load failed read from dirpath_ /home/blyth/.opticks/scontext dirpath /home/blyth/.opticks/scontext path /home/blyth/.opticks/scontext/sdevice.bin
0177 sdevice::Load failed read from dirpath_ /home/blyth/.opticks/scontext dirpath /home/blyth/.opticks/scontext path /home/blyth/.opticks/scontext/sdevice.bin
0178 scontext::initConfig : ZERO VISIBLE DEVICES - CHECK CUDA_VISIBLE_DEVICES envvar
0179 A[blyth@localhost ~]$
0180
0181 Nope it does need to be unset::
0182
0183 A[blyth@localhost ~]$ rm -rf ~/.opticks/scontext
0184 A[blyth@localhost ~]$ unset CUDA_VISIBLE_DEVICES ; opticks/sysrap/tests/scontext_test.sh run
0185 0:NVIDIA_RTX_5000_Ada_GenerationA[blyth@localhost ~]$
0186 A[blyth@localhost ~]$
0187 A[blyth@localhost ~]$ l ~/.opticks/scontext/
0188 total 4
0189 0 drwxr-xr-x. 2 blyth blyth 25 Feb 14 15:42 .
0190 4 -rw-r--r--. 1 blyth blyth 304 Feb 14 15:42 sdevice.bin
0191 0 drwxr-xr-x. 7 blyth blyth 86 Feb 14 15:42 ..
0192 A[blyth@localhost ~]$
0193
0194
0195
0196
0197 EOU
0198 }
0199 cd $(dirname $(realpath $BASH_SOURCE))
0200
0201 name=scontext_test
0202 bin=$name
0203
0204 defarg="info_run"
0205 arg=${1:-$defarg}
0206
0207 vars="BASH_SOURCE 0 PWD name TMP bin defarg arg CUDA_VISIBLE_DEVICES"
0208
0209 if [ "${arg/info}" != "$arg" ]; then
0210 for var in $vars ; do printf "%20s : %s\n" "$var" "${!var}" ; done
0211 fi
0212
0213 if [ "${arg/run}" != "$arg" ]; then
0214 $bin
0215 [ $? -ne 0 ] && echo $BASH_SOURCE : run error && exit 2
0216 fi
0217
0218 if [ "${arg/dbg}" != "$arg" ]; then
0219 source dbg__.sh
0220 dbg__ $bin
0221 [ $? -ne 0 ] && echo $BASH_SOURCE : run error && exit 2
0222 fi
0223
0224
0225
0226 exit 0
0227
0228