File indexing completed on 2026-04-09 07:49:40
0001
0002 """
0003 smonitor.py
0004 ============
0005
0006 ~/o/sysrap/smonitor.sh ana
0007
0008 """
0009
0010 import os, numpy as np
0011 import matplotlib.pyplot as mp
0012 SIZE=np.array([1280, 720])
0013
0014 if __name__ == '__main__':
0015 mon = np.load("smonitor.npy").astype(np.int64)
0016
0017 stamp = mon[:,0]
0018 device = mon[:,1]
0019 free = mon[:,2]
0020 total = mon[:,3]
0021 used = mon[:,4]
0022 pid = mon[:,5]
0023 usedGpuMemory = mon[:,6]
0024 proc_count = mon[:,7]
0025
0026 t = (stamp - stamp[0])/1e6
0027 usedGpuMemory_GB = usedGpuMemory/1e9
0028 total_GB = total/1e9
0029 free_GB = free/1e9
0030 used_GB = used/1e9
0031
0032 u_device = np.unique(device)
0033 u_total = np.unique(total)
0034 u_pid = np.unique(pid)
0035 u_proc_count = np.unique(proc_count)
0036
0037 assert len(u_device) == 1
0038 assert len(u_total) == 1
0039 assert len(u_pid) == 1
0040
0041 _device = u_device[0]
0042 _total = u_total[0]/1e9
0043 _pid = u_pid[0]
0044 _proc_count = u_proc_count[0]
0045
0046 assert _proc_count == 1
0047
0048 delta_usedGpuMemory_GB = np.diff( usedGpuMemory_GB )
0049 w_delta_usedGpuMemory_GB = np.where( delta_usedGpuMemory_GB > 0.001 )[0]
0050
0051 auto_start = w_delta_usedGpuMemory_GB[2] if len(w_delta_usedGpuMemory_GB) > 2 else 0
0052 env_start = int(os.environ["START"]) if "START" in os.environ else 0
0053 start = env_start if env_start > 0 else auto_start
0054 message = "auto_start:%(auto_start)2d START:%(env_start)2d start:%(start)2d " % locals()
0055
0056
0057 idx = np.arange( len(usedGpuMemory_GB) )
0058 sel = slice(start, None)
0059
0060 expr = "np.c_[idx[sel], t[sel], usedGpuMemory_GB[sel], usedGpuMemory[sel] ]"
0061 print(expr)
0062 print(eval(expr))
0063
0064 _dgb = "(usedGpuMemory_GB[sel][-1]-usedGpuMemory_GB[sel][0])"
0065 dgb = eval(_dgb)
0066 print("dgb %10.3f %s " % ( dgb, _dgb ))
0067
0068 _db = "(usedGpuMemory[sel][-1]-usedGpuMemory[sel][0])"
0069 db = eval(_db)
0070 print("db %10.3f %s " % ( db, _db ))
0071
0072
0073
0074 _dt = "(t[sel][-1]-t[sel][0])"
0075 dt = eval(_dt)
0076 print("dt %10.3f %s " % ( dt, _dt ))
0077 print("dgb/dt %10.3f " % (dgb/dt))
0078 print("db/dt %10.3f " % (db/dt))
0079
0080
0081 deg = 1
0082 pfit = np.polyfit(t[sel], usedGpuMemory_GB[sel], deg)
0083 linefit = np.poly1d(pfit)
0084 linefit_label = "line fit: slope %10.3f [GB/s] intercept %10.3f " % (linefit.coef[0], linefit.coef[1])
0085
0086 headline = "smonitor.sh device %(_device)s total_GB %(_total)4.1f pid %(_pid)s " % locals()
0087 publine = "%s : %s " % (message, os.environ.get("PUB", "no-PUB" ))
0088 title = "\n".join([headline, linefit_label, publine])
0089 print(title)
0090
0091 fig, ax = mp.subplots(figsize=SIZE/100.)
0092 fig.suptitle(title)
0093
0094 ax.set_yscale('log')
0095
0096 ax.plot( t, total_GB , label="total_GB" )
0097 ax.plot( t, free_GB , label="free_GB" )
0098 ax.plot( t, used_GB , label="used_GB" )
0099
0100
0101 ax.scatter( t, usedGpuMemory_GB, label="proc.usedGpuMemory_GB" )
0102 ax.plot( t[sel], linefit(t[sel]), label=linefit_label )
0103
0104
0105 ax.legend()
0106 fig.show()
0107
0108