Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:48:50

0001 #!/usr/bin/env python
0002 """
0003 qcf.py
0004 ======
0005 
0006 
0007 """
0008 import os, logging, numpy as np
0009 from opticks.ana.nbase import chi2, chi2_pvalue
0010 log = logging.getLogger(__name__)
0011 
0012 class QU(object):
0013     def __init__(self, q, symbol="q"):
0014         """
0015         QU : Unique value table 
0016        
0017         q is a char array of dtype "|S96" (32*3=96 each point taking 3 chars  "TO " "BT ")
0018 
0019         u : unique values within q
0020         x : first index within q of the unique value
0021         n : count of occurrence of the unique values in q 
0022         """
0023         assert q.dtype == "|S96"   
0024   
0025         expr = "np.c_[n,x,u][o][lim]"
0026         label = "uniques in descending count order with first index x"
0027 
0028         u, x, n = np.unique(q, return_index=True, return_counts=True)
0029         o = np.argsort(n)[::-1]  
0030         # HMM:notice that the ordering is not applied here 
0031 
0032         self.symbol = symbol
0033         self.expr = expr 
0034         self.label = label
0035 
0036         self.q = q 
0037         self.uxno = u,x,n,o
0038         self.u = u 
0039         self.x = x 
0040         self.n = n 
0041         self.o = o 
0042         self.lim = slice(0,10)
0043 
0044     def _get_tab(self):
0045         u,x,n,o = self.uxno
0046         expr = self.expr
0047         lim = self.lim 
0048         return eval(expr)
0049 
0050     tab = property(_get_tab)
0051 
0052     def __repr__(self):
0053         lines = []
0054         lines.append("%s : %s : %s" % (self.symbol, self.expr, self.label))
0055         lines.append(str(self.tab))
0056         return "\n".join(lines)
0057 
0058 
0059 class QCF(object):
0060     """
0061     Used for example from sevt.py:SAB
0062     """
0063     def __init__(self, _aq, _bq, symbol="qcf"):
0064         """
0065         :param _aq: photon history strings for event A
0066         :param _bq: ditto for B 
0067         """
0068         _same_shape = _aq.shape == _bq.shape
0069         if not _same_shape:
0070             mn = min(_aq.shape[0], _bq.shape[0])
0071             msg = "a, b not same shape _aq %s _bq %s WILL LIMIT TO mn %d " % ( str(_aq.shape), str(_bq.shape), mn ) 
0072             lim = slice(0, mn)
0073         else:
0074             lim = slice(None)
0075             msg = ""
0076         pass
0077         aq = _aq[lim]
0078         bq = _bq[lim]
0079         same_shape = aq.shape == bq.shape
0080         assert same_shape
0081 
0082         asym = "%s.aqu" % symbol
0083 
0084         log.info("QCF.__init__ %s " % asym)  
0085         aqu = QU(aq, symbol=asym )
0086         bsym = "%s.bqu" % symbol
0087         log.info("QCF.__init__ %s " % bsym)  
0088         bqu = QU(bq, symbol=bsym ) 
0089 
0090         qu = np.unique(np.concatenate([aqu.u,bqu.u]))       ## unique histories of both A and B in uncontrolled order
0091         ab = np.zeros( (len(qu),3,2), dtype=np.int64 )
0092 
0093         log.info("QCF.__init__ [ qu loop")
0094         for i, q in enumerate(qu):
0095 
0096             # here are finding all indices when just want the first  
0097             ai_ = np.where(aqu.u == q )[0]           # find indices in the a and b unique lists 
0098             bi_ = np.where(bqu.u == q )[0]
0099             ai = ai_[0] if len(ai_) == 1 else -1
0100             bi = bi_[0] if len(bi_) == 1 else -1
0101 
0102             # NB the ai and bi are internal indices into the separate A and B lists
0103             # so they are necessary but not ordinarily surfaced 
0104             # as not very human digestible 
0105             #
0106             # effectively ai and bi are pointers into the two unique lists
0107            
0108             if i % 1000 == 0:
0109                 log.info("QCF.__init__ . qu loop %d " % i )
0110             pass
0111 
0112             ab[i,0,0] = ai                            ## internal index into the A unique list 
0113             ab[i,1,0] = aqu.x[ai] if ai > -1 else -1  ## index of first occurrence in original A seq list
0114             ab[i,2,0] = aqu.n[ai] if ai > -1 else 0   ## count in A or 0 when not present 
0115 
0116             ab[i,0,1] = bi                            ## internal index into the B unique list 
0117             ab[i,1,1] = bqu.x[bi] if bi > -1 else -1  ## index of first occurrence in original B seq list
0118             ab[i,2,1] = bqu.n[bi] if bi > -1 else 0   ## count in B or 0 when not present 
0119         pass
0120         log.info("QCF.__init__ ] qu loop")
0121 
0122         # last dimension 0,1 corresponding to A,B 
0123 
0124         abx = np.max(ab[:,2,:], axis=1 )   # max of aqn, bqn counts 
0125         abxo = np.argsort(abx)[::-1]       # descending count order indices
0126         abo = ab[abxo]                     # ab ordered  
0127         quo = qu[abxo]                     # qu ordered 
0128         iq = np.arange(len(qu))
0129 
0130         # more than 10 counts in one, but zero in the other : history dropouts are smoking guns for bugs 
0131         bzero = np.where( np.logical_and( abo[:,2,0] > 10, abo[:,2,1] == 0 ) )[0]
0132         azero = np.where( np.logical_and( abo[:,2,1] > 10, abo[:,2,0] == 0 ) )[0]
0133 
0134         c2cut = int(os.environ.get("C2CUT","30"))
0135         c2,c2n,c2c = chi2( abo[:,2,0], abo[:,2,1], cut=c2cut )
0136         c2sum = c2.sum()
0137         c2per = c2sum/c2n
0138 
0139         c2pv = chi2_pvalue( c2sum, int(c2n) )
0140         c2pvm = "> 0.05 : null-hyp " if c2pv > 0.05 else "< 0.05 : NOT:null-hyp "  
0141         c2pvd = "pv[%4.3f,%s] " % (c2pv, c2pvm)
0142         # null-hyp consistent means there is no significant difference between 
0143         # the frequency counts in the A and B samples at a certain confidence
0144         # level (normally 5%) 
0145         
0146         c2desc = "c2sum/c2n:c2per(C2CUT)  %5.2f/%d:%5.3f (%2d) %s" % ( c2sum, int(c2n), c2per, c2cut, c2pvd )
0147         c2label = "c2sum : %10.4f c2n : %10.4f c2per: %10.4f  C2CUT: %4d " % ( c2sum, c2n, c2per, c2cut )
0148 
0149 
0150 
0151         self._aq = _aq
0152         self._bq = _bq
0153         self.lim = lim 
0154         self.msg = msg 
0155         self.aq = aq
0156         self.bq = bq
0157         self.symbol = symbol
0158         self.aqu = aqu
0159         self.bqu = bqu
0160         self.qu = qu
0161         self.ab = ab
0162         self.abx = abx
0163         self.abxo = abxo
0164         self.abo = abo
0165         self.quo = quo
0166         self.iq = iq
0167         self.bzero = bzero
0168         self.azero = azero
0169         self.c2cut = c2cut
0170         self.c2 = c2
0171         self.c2n = c2n
0172         self.c2c = c2c
0173         self.c2sum = c2sum
0174         self.c2per = c2per
0175         self.c2desc = c2desc
0176         self.c2label = c2label
0177         self.sli = slice(None)
0178 
0179     def __getitem__(self, sli):
0180         """
0181 
0182         Change slice with::
0183 
0184            ab.qcf[:100]  
0185 
0186         """
0187         self.sli = sli
0188         print("sli: %s " % str(sli))
0189         return self
0190  
0191     def __repr__(self):
0192         lines = []
0193         lines.append("QCF %s : %s " % (self.symbol, self.msg))
0194         lines.append("a.q %d b.q %d lim %s " % (self._aq.shape[0], self._bq.shape[0], str(self.lim)) )
0195         lines.append(self.c2label)
0196         lines.append(self.c2desc)
0197 
0198         siq = list(map(lambda _:"%2d" % _ , self.iq ))  # row index 
0199         sc2 = list(map(lambda _:"%7.4f" % _, self.c2 ))
0200 
0201         sabo2 = list(map(lambda _:"%6d %6d" % tuple(_), self.abo[:,2,:]))
0202         sabo1 = list(map(lambda _:"%6d %6d" % tuple(_), self.abo[:,1,:]))
0203 
0204         pstr_ = lambda _:_.strip().decode("utf-8")
0205         _quo = list(map(pstr_, self.quo))
0206         mxl = max(list(map(len, _quo)))
0207         fmt = "%-" + str(mxl) + "s"
0208         _quo = list(map(lambda _:fmt % _, _quo ))
0209         _quo = np.array( _quo )
0210 
0211         sli = self.sli
0212 
0213         start = sli.start if not sli.start is None else 0 
0214         stop = sli.stop if not sli.stop is None else 25
0215         step = sli.step if not sli.step is None else None  # not used
0216 
0217         #abexpr = "np.c_[quo,abo[:,2,:],abo[:,1,:]]"
0218         abexpr = "np.c_[siq,_quo,siq,sabo2,sc2,sabo1]"
0219         subs = "[%d:%d]" % ( start, stop ) 
0220         subs += " [bzero] [azero]"
0221         subs = subs.split()
0222         descs = ["A-B history frequency chi2 comparison", "in A but not B", "in B but not A" ]
0223 
0224         bzero = self.bzero
0225         azero = self.azero
0226 
0227         for i in range(len(subs)):
0228             expr = "%s%s" % (abexpr, subs[i])
0229             lines.append("\n%s  ## %s " % (expr, descs[i]) )
0230             lines.append(str(eval(expr)))
0231         pass
0232         return "\n".join(lines)
0233 
0234 class QCFZero(object):
0235     """
0236     Presenting dropout histories in comparison : smoking gun for bugs 
0237     """
0238     def __init__(self, qcf, symbol="qcf0"):
0239         self.qcf = qcf
0240         self.bzero_viz = "u4t ; N=0 APID=%d AOPT=idx ./U4SimtraceTest.sh ana"
0241         self.azero_viz = "u4t ; N=1 BPID=%d BOPT=idx ./U4SimtraceTest.sh ana"
0242 
0243     def __repr__(self):
0244         qcf = self.qcf
0245         quo = qcf.quo
0246         aq = qcf.aq
0247         bq = qcf.bq  
0248         bzero = qcf.bzero
0249         azero = qcf.azero
0250 
0251         pstr_ = lambda _:_.strip().decode("utf-8")
0252 
0253         lines = []
0254         lim = slice(0,2)
0255         lines.append("\nbzero : %s : A HIST NOT IN B" % (str(bzero)))
0256         for _ in bzero:
0257             idxs = np.where( quo[_] == aq[:,0] )[0]
0258             lines.append("bzero quo[_]:%s len(idxs):%d idxs[lim]:%s " % ( pstr_(quo[_]), len(idxs), str(idxs[lim])) )
0259             for idx in idxs[lim]:
0260                 lines.append(self.bzero_viz % idx)
0261             pass
0262             if len(idxs) > 0: lines.append("")
0263         pass
0264 
0265         lines.append("\nazero : %s : B HIST NOT IN A" % (str(azero)))
0266         for _ in azero:
0267             idxs = np.where( quo[_] == bq[:,0] )[0]
0268             lines.append("azero quo[_]:%s len(idxs):%d idxs[lim]:%s " % ( pstr_(quo[_]), len(idxs), str(idxs[lim])) )
0269             for idx in idxs[lim]:
0270                 lines.append(self.azero_viz % idx)
0271             pass
0272             if len(idxs) > 0: lines.append("")
0273         pass
0274         return "\n".join(lines)
0275 
0276 
0277 def test_QU():
0278     aq = np.array( ["red", "green", "blue", "blue" ], dtype="|S10" )
0279     aqu = QU(aq, symbol="aqu")
0280     print(aqu.tab)
0281     print(aqu)
0282 
0283 def test_QCF():
0284     aq = np.array( ["red", "green", "blue", "blue" ], dtype="|S10" )
0285     bq = np.array( ["red", "red", "green", "blue"  ], dtype="|S10" )
0286 
0287     qcf = QCF(aq, bq, symbol="qcf")
0288     print(qcf.ab)
0289     print(qcf)
0290 
0291 
0292 
0293 
0294 
0295 if __name__ == '__main__':
0296     logging.basicConfig(level=logging.INFO)
0297 
0298     aq = np.array( ["red", "green", "blue", "cyan" ], dtype="|S10" )
0299     bq = np.array( ["red", "red", "green", "blue"  ], dtype="|S10" )
0300 
0301     qcf = QCF(aq, bq, symbol="qcf")
0302     print(qcf.ab)
0303     print(qcf)
0304 
0305     qcf0 = QCFZero(qcf, symbol="qcf0")
0306     print(qcf0)
0307 
0308 
0309 
0310 
0311 
0312