File indexing completed on 2026-04-09 07:48:50
0001
0002 """
0003 qcf.py
0004 ======
0005
0006
0007 """
0008 import os, logging, numpy as np
0009 from opticks.ana.nbase import chi2, chi2_pvalue
0010 log = logging.getLogger(__name__)
0011
0012 class QU(object):
0013 def __init__(self, q, symbol="q"):
0014 """
0015 QU : Unique value table
0016
0017 q is a char array of dtype "|S96" (32*3=96 each point taking 3 chars "TO " "BT ")
0018
0019 u : unique values within q
0020 x : first index within q of the unique value
0021 n : count of occurrence of the unique values in q
0022 """
0023 assert q.dtype == "|S96"
0024
0025 expr = "np.c_[n,x,u][o][lim]"
0026 label = "uniques in descending count order with first index x"
0027
0028 u, x, n = np.unique(q, return_index=True, return_counts=True)
0029 o = np.argsort(n)[::-1]
0030
0031
0032 self.symbol = symbol
0033 self.expr = expr
0034 self.label = label
0035
0036 self.q = q
0037 self.uxno = u,x,n,o
0038 self.u = u
0039 self.x = x
0040 self.n = n
0041 self.o = o
0042 self.lim = slice(0,10)
0043
0044 def _get_tab(self):
0045 u,x,n,o = self.uxno
0046 expr = self.expr
0047 lim = self.lim
0048 return eval(expr)
0049
0050 tab = property(_get_tab)
0051
0052 def __repr__(self):
0053 lines = []
0054 lines.append("%s : %s : %s" % (self.symbol, self.expr, self.label))
0055 lines.append(str(self.tab))
0056 return "\n".join(lines)
0057
0058
0059 class QCF(object):
0060 """
0061 Used for example from sevt.py:SAB
0062 """
0063 def __init__(self, _aq, _bq, symbol="qcf"):
0064 """
0065 :param _aq: photon history strings for event A
0066 :param _bq: ditto for B
0067 """
0068 _same_shape = _aq.shape == _bq.shape
0069 if not _same_shape:
0070 mn = min(_aq.shape[0], _bq.shape[0])
0071 msg = "a, b not same shape _aq %s _bq %s WILL LIMIT TO mn %d " % ( str(_aq.shape), str(_bq.shape), mn )
0072 lim = slice(0, mn)
0073 else:
0074 lim = slice(None)
0075 msg = ""
0076 pass
0077 aq = _aq[lim]
0078 bq = _bq[lim]
0079 same_shape = aq.shape == bq.shape
0080 assert same_shape
0081
0082 asym = "%s.aqu" % symbol
0083
0084 log.info("QCF.__init__ %s " % asym)
0085 aqu = QU(aq, symbol=asym )
0086 bsym = "%s.bqu" % symbol
0087 log.info("QCF.__init__ %s " % bsym)
0088 bqu = QU(bq, symbol=bsym )
0089
0090 qu = np.unique(np.concatenate([aqu.u,bqu.u]))
0091 ab = np.zeros( (len(qu),3,2), dtype=np.int64 )
0092
0093 log.info("QCF.__init__ [ qu loop")
0094 for i, q in enumerate(qu):
0095
0096
0097 ai_ = np.where(aqu.u == q )[0]
0098 bi_ = np.where(bqu.u == q )[0]
0099 ai = ai_[0] if len(ai_) == 1 else -1
0100 bi = bi_[0] if len(bi_) == 1 else -1
0101
0102
0103
0104
0105
0106
0107
0108 if i % 1000 == 0:
0109 log.info("QCF.__init__ . qu loop %d " % i )
0110 pass
0111
0112 ab[i,0,0] = ai
0113 ab[i,1,0] = aqu.x[ai] if ai > -1 else -1
0114 ab[i,2,0] = aqu.n[ai] if ai > -1 else 0
0115
0116 ab[i,0,1] = bi
0117 ab[i,1,1] = bqu.x[bi] if bi > -1 else -1
0118 ab[i,2,1] = bqu.n[bi] if bi > -1 else 0
0119 pass
0120 log.info("QCF.__init__ ] qu loop")
0121
0122
0123
0124 abx = np.max(ab[:,2,:], axis=1 )
0125 abxo = np.argsort(abx)[::-1]
0126 abo = ab[abxo]
0127 quo = qu[abxo]
0128 iq = np.arange(len(qu))
0129
0130
0131 bzero = np.where( np.logical_and( abo[:,2,0] > 10, abo[:,2,1] == 0 ) )[0]
0132 azero = np.where( np.logical_and( abo[:,2,1] > 10, abo[:,2,0] == 0 ) )[0]
0133
0134 c2cut = int(os.environ.get("C2CUT","30"))
0135 c2,c2n,c2c = chi2( abo[:,2,0], abo[:,2,1], cut=c2cut )
0136 c2sum = c2.sum()
0137 c2per = c2sum/c2n
0138
0139 c2pv = chi2_pvalue( c2sum, int(c2n) )
0140 c2pvm = "> 0.05 : null-hyp " if c2pv > 0.05 else "< 0.05 : NOT:null-hyp "
0141 c2pvd = "pv[%4.3f,%s] " % (c2pv, c2pvm)
0142
0143
0144
0145
0146 c2desc = "c2sum/c2n:c2per(C2CUT) %5.2f/%d:%5.3f (%2d) %s" % ( c2sum, int(c2n), c2per, c2cut, c2pvd )
0147 c2label = "c2sum : %10.4f c2n : %10.4f c2per: %10.4f C2CUT: %4d " % ( c2sum, c2n, c2per, c2cut )
0148
0149
0150
0151 self._aq = _aq
0152 self._bq = _bq
0153 self.lim = lim
0154 self.msg = msg
0155 self.aq = aq
0156 self.bq = bq
0157 self.symbol = symbol
0158 self.aqu = aqu
0159 self.bqu = bqu
0160 self.qu = qu
0161 self.ab = ab
0162 self.abx = abx
0163 self.abxo = abxo
0164 self.abo = abo
0165 self.quo = quo
0166 self.iq = iq
0167 self.bzero = bzero
0168 self.azero = azero
0169 self.c2cut = c2cut
0170 self.c2 = c2
0171 self.c2n = c2n
0172 self.c2c = c2c
0173 self.c2sum = c2sum
0174 self.c2per = c2per
0175 self.c2desc = c2desc
0176 self.c2label = c2label
0177 self.sli = slice(None)
0178
0179 def __getitem__(self, sli):
0180 """
0181
0182 Change slice with::
0183
0184 ab.qcf[:100]
0185
0186 """
0187 self.sli = sli
0188 print("sli: %s " % str(sli))
0189 return self
0190
0191 def __repr__(self):
0192 lines = []
0193 lines.append("QCF %s : %s " % (self.symbol, self.msg))
0194 lines.append("a.q %d b.q %d lim %s " % (self._aq.shape[0], self._bq.shape[0], str(self.lim)) )
0195 lines.append(self.c2label)
0196 lines.append(self.c2desc)
0197
0198 siq = list(map(lambda _:"%2d" % _ , self.iq ))
0199 sc2 = list(map(lambda _:"%7.4f" % _, self.c2 ))
0200
0201 sabo2 = list(map(lambda _:"%6d %6d" % tuple(_), self.abo[:,2,:]))
0202 sabo1 = list(map(lambda _:"%6d %6d" % tuple(_), self.abo[:,1,:]))
0203
0204 pstr_ = lambda _:_.strip().decode("utf-8")
0205 _quo = list(map(pstr_, self.quo))
0206 mxl = max(list(map(len, _quo)))
0207 fmt = "%-" + str(mxl) + "s"
0208 _quo = list(map(lambda _:fmt % _, _quo ))
0209 _quo = np.array( _quo )
0210
0211 sli = self.sli
0212
0213 start = sli.start if not sli.start is None else 0
0214 stop = sli.stop if not sli.stop is None else 25
0215 step = sli.step if not sli.step is None else None
0216
0217
0218 abexpr = "np.c_[siq,_quo,siq,sabo2,sc2,sabo1]"
0219 subs = "[%d:%d]" % ( start, stop )
0220 subs += " [bzero] [azero]"
0221 subs = subs.split()
0222 descs = ["A-B history frequency chi2 comparison", "in A but not B", "in B but not A" ]
0223
0224 bzero = self.bzero
0225 azero = self.azero
0226
0227 for i in range(len(subs)):
0228 expr = "%s%s" % (abexpr, subs[i])
0229 lines.append("\n%s ## %s " % (expr, descs[i]) )
0230 lines.append(str(eval(expr)))
0231 pass
0232 return "\n".join(lines)
0233
0234 class QCFZero(object):
0235 """
0236 Presenting dropout histories in comparison : smoking gun for bugs
0237 """
0238 def __init__(self, qcf, symbol="qcf0"):
0239 self.qcf = qcf
0240 self.bzero_viz = "u4t ; N=0 APID=%d AOPT=idx ./U4SimtraceTest.sh ana"
0241 self.azero_viz = "u4t ; N=1 BPID=%d BOPT=idx ./U4SimtraceTest.sh ana"
0242
0243 def __repr__(self):
0244 qcf = self.qcf
0245 quo = qcf.quo
0246 aq = qcf.aq
0247 bq = qcf.bq
0248 bzero = qcf.bzero
0249 azero = qcf.azero
0250
0251 pstr_ = lambda _:_.strip().decode("utf-8")
0252
0253 lines = []
0254 lim = slice(0,2)
0255 lines.append("\nbzero : %s : A HIST NOT IN B" % (str(bzero)))
0256 for _ in bzero:
0257 idxs = np.where( quo[_] == aq[:,0] )[0]
0258 lines.append("bzero quo[_]:%s len(idxs):%d idxs[lim]:%s " % ( pstr_(quo[_]), len(idxs), str(idxs[lim])) )
0259 for idx in idxs[lim]:
0260 lines.append(self.bzero_viz % idx)
0261 pass
0262 if len(idxs) > 0: lines.append("")
0263 pass
0264
0265 lines.append("\nazero : %s : B HIST NOT IN A" % (str(azero)))
0266 for _ in azero:
0267 idxs = np.where( quo[_] == bq[:,0] )[0]
0268 lines.append("azero quo[_]:%s len(idxs):%d idxs[lim]:%s " % ( pstr_(quo[_]), len(idxs), str(idxs[lim])) )
0269 for idx in idxs[lim]:
0270 lines.append(self.azero_viz % idx)
0271 pass
0272 if len(idxs) > 0: lines.append("")
0273 pass
0274 return "\n".join(lines)
0275
0276
0277 def test_QU():
0278 aq = np.array( ["red", "green", "blue", "blue" ], dtype="|S10" )
0279 aqu = QU(aq, symbol="aqu")
0280 print(aqu.tab)
0281 print(aqu)
0282
0283 def test_QCF():
0284 aq = np.array( ["red", "green", "blue", "blue" ], dtype="|S10" )
0285 bq = np.array( ["red", "red", "green", "blue" ], dtype="|S10" )
0286
0287 qcf = QCF(aq, bq, symbol="qcf")
0288 print(qcf.ab)
0289 print(qcf)
0290
0291
0292
0293
0294
0295 if __name__ == '__main__':
0296 logging.basicConfig(level=logging.INFO)
0297
0298 aq = np.array( ["red", "green", "blue", "cyan" ], dtype="|S10" )
0299 bq = np.array( ["red", "red", "green", "blue" ], dtype="|S10" )
0300
0301 qcf = QCF(aq, bq, symbol="qcf")
0302 print(qcf.ab)
0303 print(qcf)
0304
0305 qcf0 = QCFZero(qcf, symbol="qcf0")
0306 print(qcf0)
0307
0308
0309
0310
0311
0312