eic-opticks/ana/seq.py

0001 #!/usr/bin/env python
0002 #
0003 # Copyright (c) 2019 Opticks Team. All Rights Reserved.
0004 #
0005 # This file is part of Opticks
0006 # (see https://bitbucket.org/simoncblyth/opticks).
0007 #
0008 # Licensed under the Apache License, Version 2.0 (the "License");
0009 # you may not use this file except in compliance with the License.
0010 # You may obtain a copy of the License at
0011 #
0012 #   http://www.apache.org/licenses/LICENSE-2.0
0013 #
0014 # Unless required by applicable law or agreed to in writing, software
0015 # distributed under the License is distributed on an "AS IS" BASIS,
0016 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0017 # See the License for the specific language governing permissions and
0018 # limitations under the License.
0019 #
0020
0021 import os, datetime, logging, re, signal, sys
0022 log = logging.getLogger(__name__)
0023 import numpy as np
0024 lfilter = lambda *args:list(filter(*args))
0025
0026 try:
0027     reduce
0028 except NameError:
0029     from functools import reduce
0030 pass
0031
0032
0033 """
0034 About using IPython debugger
0035 ------------------------------
0036
0037 Plant an ipython debugger breakpoint inside some python module by
0038 duplicating the below code block into the head and entering
0039 the below at the critical code point::
0040
0041    ipdb.set_trace()
0042
0043 1. previously thought this was not working in py3, but it seems OK now
0044 2. attempting to import code doing the below doesnt work, have to include the code
0045 3. breakpoint actually lands on the statement immediately after "ipdb.set_trace()"
0046 4. when stopped at the breakpoint an >ipdb prompt should appear
0047
0048 Commands available from the ipdb prompt::
0049
0050 1. "bt" : show the stack backtrace
0051 2. "c"  : continue from the breakpoint
0052
0053 Observed that even when using python this can magically
0054 jump into ipython when a breakpoint is reached. However
0055 see notes/issues/ipython-ipdb-issue.rst which made
0056 me add the check for an ipython invokation to prevent
0057 such fragile magic.
0058 """
0059 #if sys.argv[0].find("ipython") > -1:
0060 if True:
0061     try:
0062         from IPython.core.debugger import Pdb as MyPdb
0063     except ImportError:
0064         class MyPdb(object):
0065             def set_trace(self):
0066                 log.error("IPython is required for ipdb.set_trace() " )
0067             pass
0068         pass
0069     pass
0070     ipdb = MyPdb()
0071 else:
0072     ipdb = None
0073 pass
0074
0075
0076 from opticks.ana.base import ihex_
0077 from opticks.ana.nbase import chi2, chi2_pvalue, ratio, count_unique_sorted
0078 from opticks.ana.nload import A
0079
0080 nibble_ = lambda n:0xf << (4*n)
0081 firstlast_ = lambda name:name[0] + name[-1]
0082
0083 class BaseType(object):
0084     hexstr = re.compile("^[0-9a-f]+$")
0085     def __init__(self, flags, abbrev, delim=" "):
0086         """
0087         When no abbreviation available, use first and last letter of name eg::
0088
0089            MACHINERY -> MY
0090            FABRICATED -> FD
0091            G4GUN -> GN
0092
0093         """
0094
0095         log.debug("flags.names %s " % repr(flags.names) )
0096         log.debug("abbrev.name2abbr %s " % abbrev.name2abbr )
0097         log.debug("abbrev %s " % repr(map(lambda _:abbrev.name2abbr.get(_,None),flags.names)))
0098
0099         abbrs = list(map(lambda name:abbrev.name2abbr.get(name,firstlast_(name)), flags.names ))
0100
0101         abbr2code = dict(zip(abbrs, flags.codes))
0102         code2abbr = dict(zip(flags.codes, abbrs))
0103         aflags = np.array( ['   '] + list(map(lambda _:"%s " % _  ,abbrs )), dtype="|S3" )
0104
0105         self.abbr2code = abbr2code
0106         self.code2abbr = code2abbr
0107         self.flags = flags
0108         self.abbrev = abbrev
0109         self.delim = delim
0110         self.aflags = aflags
0111
0112     def __call__(self, args):
0113         for a in args:
0114             return self.code(a)    # code from subtype
0115
0116     def check(self, s):
0117         f = self.abbr2code
0118         bad = 0
0119         for n in s.strip().split(self.delim):
0120             if f.get(n,0) == 0:
0121                #log.warn("code bad abbr [%s] s [%s] " % (n, s) )
0122                bad += 1
0123
0124         #if bad>0:
0125         #   log.warn("code sees %s bad abbr in [%s] " % (bad, s ))
0126         return bad
0127
0128
0129 def seq2msk_procedural(isq):
0130     ifl = 0
0131     for n in range(16):
0132         msk = 0xf << (4*n)
0133         nib = ( isq & msk ) >> (4*n)
0134         if nib == 0:continue   ## cannot vectorize with such procedural approach
0135         flg = 1 << (nib - 1)
0136         ifl |= flg
0137     pass
0138     return ifl
0139
0140 def seq2msk(isq):
0141     """
0142     Convert seqhis into mskhis
0143
0144     OpticksPhoton.h uses a mask but seq use the index for bit-bevity::
0145
0146           3 enum
0147           4 {
0148           5     CERENKOV          = 0x1 <<  0,
0149           6     SCINTILLATION     = 0x1 <<  1,
0150           7     MISS              = 0x1 <<  2,
0151           8     BULK_ABSORB       = 0x1 <<  3,
0152           9     BULK_REEMIT       = 0x1 <<  4,
0153
0154
0155     """
0156     ifl = np.zeros_like(isq)
0157     for n in range(16):
0158         msk = 0xf << (4*n)               ## nibble mask
0159         nib = ( isq & msk ) >> (4*n)     ## pick the nibble and shift to pole position
0160         flg = 1 << ( nib[nib>0] - 1 )    ## convert flag bit index into flag mask
0161         ifl[nib>0] |= flg
0162     pass
0163     return ifl
0164
0165
0166 class MaskType(BaseType):
0167     def __init__(self, flags, abbrev):
0168          BaseType.__init__(self, flags, abbrev, delim="|")
0169          log.debug("abbr2code %s " % repr(self.abbr2code))
0170          log.debug("code2abbr %s " % repr(self.code2abbr))
0171          log.debug("flags.codes %s " % repr(self.flags.codes))
0172
0173     def code(self, s):
0174         """
0175         :param s: abbreviation string eg "TO|BT|SD"  or hexstring 8ccccd (without 0x prefix)
0176         :return: integer bitmask
0177         """
0178
0179         #log.debug(" s [%s] " % s)
0180         if self.hexstr.match(s):
0181             c = int(s,16)
0182             cs = "%x" % c
0183             log.debug("converted hexstr %s to hexint %x and back %s " % (s,c,cs))
0184             assert s == cs
0185         else:
0186             f = self.abbr2code
0187             bad = self.check(s)
0188             c = reduce(lambda a,b:a|b,list(map(lambda n:f.get(n,0), s.split(self.delim))))
0189         pass
0190         return c
0191
0192
0193     def label(self, arg):
0194         """
0195         :param i: integer bitmask
0196         :return: abbreviation mask string
0197         """
0198         if type(arg) is list or issubclass( arg.__class__, np.ndarray ):
0199             return list(map(lambda _:self.label(_), arg))
0200         pass
0201
0202         if type(arg) is int:
0203             i = arg
0204         elif type(arg) in (np.uint64,np.int64,np.uint32,np.int32):
0205             i = arg
0206         elif type(arg) is str:
0207             if self.hexstr.match(arg):
0208                 i = int(arg, 16)
0209             else:
0210                 return arg
0211             pass
0212         else:
0213             log.fatal("unexpected argtype %s %s " % (arg, repr(type(arg))))
0214             assert 0
0215         pass
0216
0217         #log.debug(" i : %s %s " % (repr(i), type(i)))
0218         codes = filter(lambda c:int(i) & c, self.flags.codes)
0219         codes = sorted(codes,reverse=True)
0220         d = self.code2abbr
0221         return self.delim.join(map(lambda _:d.get(_,'?%s?' % _ ), codes ))
0222
0223
0224 class SeqType(BaseType):
0225     def __init__(self, flags, abbrev):
0226
0227          BaseType.__init__(self, flags, abbrev, delim=" ")
0228
0229     def code(self, s):
0230         """
0231         :param s: abbreviation sequence string eg "TO BT BR BR BR BT SA"
0232         :return: integer code eg 0x8cbbbcd
0233         """
0234         if type(s) is list or issubclass( s.__class__, np.ndarray ):
0235             return list(map(lambda _:self.code(_), s))
0236         pass
0237
0238
0239         if self.hexstr.match(s):
0240             c = int(s,16)
0241             cs = "%x" % c
0242             log.debug("converted hexstr %s to hexint %x and back %s " % (s,c,cs))
0243             assert s == cs
0244         else:
0245             f = self.abbr2code
0246             bad = self.check(s)
0247
0248             if bad>0:
0249                #assert 0
0250                log.warn("SeqType.code check [%s] bad %d " % (s, bad))
0251
0252             c = reduce(lambda a,b:a|b,map(lambda ib:ib[1] << 4*ib[0],enumerate(map(lambda n:f.get(n,0), s.split(self.delim)))))
0253         pass
0254         return c
0255
0256
0257     def label(self, arg):
0258         """
0259         :param i: integer code
0260         :return: abbreviation sequence string
0261
0262         ::
0263
0264             In [6]: from opticks.ana.histype import HisType
0265             In [7]: af = HisType()
0266
0267             In [4]: af.label(0xccd)        # hexint
0268             Out[4]: 'TO BT BT'
0269
0270             In [5]: af.label("TO BT BT")   # already a label
0271             Out[5]: 'TO BT BT'
0272
0273             In [6]: af.label("ccd")        # hexstring  (NB without 0x)
0274             Out[6]: 'TO BT BT'
0275
0276             In [7]: af.label(".ccd")       # hexstring with wildcard continuation char "."
0277             Out[7]: 'TO BT BT ..'
0278
0279         """
0280         if type(arg) is list or issubclass( arg.__class__, np.ndarray ):
0281             return list(map(lambda _:self.label(_), arg))
0282         pass
0283
0284         i = None
0285         wildcard = type(arg) == str and arg[0] == "."
0286         if wildcard:
0287             arg = arg[1:]
0288
0289         if type(arg) is int:
0290             i = arg
0291         elif type(arg) is np.uint64:
0292             i = arg
0293         elif type(arg) is str:
0294             if self.hexstr.match(arg):
0295                 i = int(arg, 16)
0296             else:
0297                 return arg
0298             pass
0299         else:
0300             log.fatal("unexpected argtype %s %s " % (arg, repr(type(arg))))
0301             assert 0
0302         pass
0303
0304         xs = ihex_(i)[::-1]  # top and tailed hex string in reverse order
0305         seq = list(map(lambda _:int(_,16), xs ))
0306         #log.debug("label xs %s seq %s " % (xs, repr(seq)) )
0307         d = self.code2abbr
0308         elem = list(map(lambda _:d.get(_,'?%s?' % _ ), seq ))
0309         if wildcard:
0310             elem += [".."]
0311
0312         return self.delim.join(elem)
0313
0314
0315
0316 class SeqList(object):
0317     def __init__(self, ls, af, sli ):
0318         """
0319         :param ls: seqhis or seqmat array of integers
0320         :param af: histype or mattype able to decode integers into labels
0321         """
0322         self.ls = ls
0323         self.afl = af.label
0324         self.sli = sli
0325
0326     def __repr__(self):
0327         return "\n".join(map(lambda _:self.afl(_), self.ls[self.sli]))
0328
0329     def __getitem__(self, sli):
0330          self.sli = sli
0331          return self
0332
0333
0334
0335
0336 class SeqTable(object):
0337     """
0338     Based on count_unique_sorted applied to a photon length array of sequence history codes
0339
0340     """
0341     ptn_ = r"^(?P<idx>\d{4})\s*(?P<code>[0-9a-f]+)\s*(?P<a>\d+)\s*(?P<b>\d+)\s*(?P<cf>\S*).*$"
0342     ptn = re.compile(ptn_)
0343
0344     @classmethod
0345     def FromTxt(cls, txt, af, **kwa):
0346         """
0347         Hmm this assumes a comparison table with cu(count-unique) array of shape (n,3)
0348         """
0349         dd = []
0350         for line in txt.split("\n"):
0351             m = cls.ptn.match(line)
0352             if not m: continue
0353             dd.append(m.groupdict())
0354         pass
0355         cu = np.zeros( (len(dd),3), dtype=np.uint64 )
0356         for i,d in enumerate(dd):
0357             cu[i] = ( int("0x%s"%d["code"],16), int(d["a"]), int(d["b"]) )
0358         pass
0359         return cls(cu, af, **kwa)
0360
0361
0362     def __init__(self, cu, af, cnames=[], dbgseq=0, dbgmsk=0, dbgzero=False, cmx=0, c2cut=30, shortname="noshortname"):
0363         """
0364         :param cu: count unique array, typically shaped (n, 2) or (n,3) for comparisons
0365         :param af: instance of SeqType subclass such as HisType
0366         :param cnames: column names
0367
0368         """
0369         log.debug("cnames %s " % repr(cnames))
0370
0371         assert len(cu.shape) == 2 and cu.shape[1] >= 2
0372         ncol = cu.shape[1] - 1    # excluding column 0 which is the seq code
0373
0374         log.debug("SeqTable.__init__ dbgseq %x" % dbgseq)
0375         log.debug("shortname %s cu.shape %s ncol: %s" % (shortname,repr(cu.shape), ncol))
0376         assert shortname != "noshortname"
0377
0378         if sys.version_info.major in (2,3):
0379             pass
0380             #ipdb.set_trace()  # plant an ipython debugger breakpoint
0381         pass
0382
0383
0384         # self.smry = smry     ## more convenient as method argument, not ctor argument
0385         self.dirty = False
0386         self.cu = cu
0387
0388         self.ncol = ncol
0389         self.dbgseq = dbgseq
0390         self.dbgmsk = dbgmsk
0391         self.dbgzero = dbgzero
0392         self.cmx = cmx
0393         self.shortname = shortname
0394
0395         seqs = cu[:,0]
0396         msks = seq2msk(seqs)
0397
0398         tots = [cu[:,n].sum() for n in range(1,ncol+1)]
0399
0400         if ncol == 2:
0401             a = cu[:,1].astype(np.float64)
0402             b = cu[:,2].astype(np.float64)
0403
0404             ia = cu[:,1].astype(np.int64)
0405             ib = cu[:,2].astype(np.int64)
0406             idif = ia-ib
0407
0408             c2, c2n, c2c = chi2(a, b, cut=c2cut)
0409
0410             #c2s = c2/c2n
0411             #c2s_tot = c2s.sum()  # same as c2p
0412
0413             ndf = c2n - 1   ## totals are constrained to match, so one less degree of freedom ?
0414
0415             c2sum = c2.sum()
0416             c2p = c2sum/max(1,ndf)
0417
0418             c2_pval = chi2_pvalue( c2sum , ndf )
0419
0420
0421             log.debug(" c2sum %10.4f ndf %d c2p %10.4f c2_pval %10.4f " % (c2sum,ndf,c2p, c2_pval ))
0422
0423             #cnames += ["c2"]
0424             #tots += ["%10.2f" % c2sum ]
0425
0426             stats = "%.2f/%d = %5.2f   pvalue:P[C2>]:%0.3f  1-pvalue:P[C2<]:%0.3f " % (c2sum,ndf,c2p,c2_pval,1-c2_pval)
0427
0428             cfcount = cu[:,1:]
0429
0430             ab, ba = ratio(a, b)
0431             #cnames += ["ab"]
0432             #cnames += ["ba"]
0433
0434         else:
0435             c2 = None
0436             #c2s = None
0437             c2p = None
0438             cfcount = None
0439             ab = None
0440             ba = None
0441             ia = None
0442             ib = None
0443             idif = None
0444             stats = None
0445             c2sum = None
0446         pass
0447         self.ia = ia
0448         self.ib = ib
0449         self.idif = idif
0450
0451
0452         if len(tots) == 1:
0453             total = tots[0]
0454             tots += ["%10.2f" % 1.0 ]
0455         else:
0456             total = None
0457         pass
0458
0459         self.total = total
0460         self.c2 = c2
0461         #self.c2s = c2s
0462         self.c2p = c2p
0463         self.ab = ab
0464         self.ba = ba
0465
0466         self.stats = stats
0467         self.seqs = seqs
0468         self.msks = msks
0469
0470         codes = cu[:,0]
0471         counts = cu[:,1]
0472
0473         #log.debug("codes  : %s " % repr(codes))
0474         #log.debug("counts : %s " % repr(counts))
0475
0476         labels = list(map(lambda i:af.label(i), codes ))
0477         nstep = list(map(lambda l:len(l.split(af.delim)),labels))
0478
0479         self.label2nstep = dict(zip(labels, nstep))
0480         self.labels = labels
0481         self.codes = codes
0482         self.counts = counts
0483
0484         k_line = self.line(0, key=True, smry=False) if len(cu) > 0 else ""
0485         k_sine = self.line(-1, key=True, smry=True) if len(cu) > 0 else ""
0486
0487         lines = list(filter(None, list(map(lambda n:self.line(n,smry=False), range(len(cu))))))
0488         sines = list(filter(None, list(map(lambda n:self.line(n,smry=True ), range(len(cu))))))
0489
0490         self.lines = [k_line] + lines + [k_line]
0491         self.sines = [k_sine] + sines + [k_sine]
0492
0493         self.label2count = dict(zip(labels, counts))
0494         self.label2line = dict(zip(labels, lines))
0495         self.label2sine = dict(zip(labels, sines))
0496         self.label2code = dict(zip(labels, seqs))
0497
0498         if cfcount is not None:
0499             self.label2cfcount = dict(zip(labels, cfcount))
0500         pass
0501
0502         self.cnames = cnames
0503         self.tots = tots
0504         self.c2sum = c2sum
0505         log.debug(" tots %s " % repr(tots))
0506
0507         self.af = af
0508         self.sli = slice(None)
0509
0510         ## hmm applying numpy psel style selection on the table eg c2>10
0511         ## would be much more flexible than simple python slicing
0512
0513     def cfo_line(self, n):
0514         if self.ncol == 2:
0515             cfo_key = getattr(self, 'cfordering_key', [] )
0516             if len(cfo_key) > 0:
0517                 cfo_debug = " %10.5f " % self.cfordering_key[n]
0518             else:
0519                 #log.debug("no cfo_key ")
0520                 cfo_debug = " no cfo_key"
0521             pass
0522         else:
0523             cfo_debug = ""
0524         pass
0525         return cfo_debug
0526
0527
0528     k_xn_dot = ".%4s"
0529     k_xn_fmt = "%4s "
0530     xn_fmt = "%0.4d "
0531
0532     k_xs_fmt = "%16s "
0533     k_xv_fmt = " %7s "
0534
0535     idif_fmt = " %4d "
0536     k_idif_fmt = " %4s "
0537
0538     sc2_fmt = "  %10.2f  "
0539     k_sc2_fmt = "%14s"
0540     div1 = "   "
0541     div2 = "   "
0542     div3 = "   "
0543
0544     k_label_fmt = "%s"
0545
0546
0547     def line(self, n, key=False, smry=False):
0548         """
0549         :param n: 0-based line number
0550         :param key: when True returns column labels, ignoring n
0551         :param smry: when True presents less columns
0552         :return str: formatted line of sequence table
0553         """
0554         iseq = int(self.cu[n,0])
0555         imsk = int(self.msks[n])
0556
0557         if self.dbgseq > 0 and ( self.dbgseq & iseq ) != self.dbgseq:
0558             return None
0559         pass
0560
0561         if self.dbgmsk > 0:
0562             pick = (self.dbgmsk & imsk) == self.dbgmsk   #
0563             if not pick:
0564                 return None
0565             pass
0566         pass
0567
0568         if smry == False:
0569             xn = self.xn_fmt % (n)
0570             k_xn = self.k_xn_fmt  % ("n")
0571         else:
0572             xn = self.xn_fmt % (n)
0573             k_xn = self.k_xn_fmt  % ("n")
0574         pass
0575
0576         if smry == False:
0577             xs = self.k_xs_fmt % (ihex_(iseq))
0578             k_xs = self.k_xs_fmt % ("iseq")
0579         else:
0580             xs = self.k_xs_fmt % (ihex_(iseq))
0581             k_xs = self.k_xs_fmt % ("iseq")
0582         pass
0583
0584         #cfo_debug = self.cfo_line(n)
0585         cfo_debug = ""
0586         k_cfo_debug = ""
0587
0588         vals = list(map(lambda _:self.k_xv_fmt  % _, self.cu[n,1:] ))
0589         keys = "abcdefghijklmnopqrstuvwxyz"
0590         k_vals = list(map(lambda _:self.k_xv_fmt % keys[_], range(len(self.cu[n,1:])) ))
0591
0592         idif = self.idif[n] if len(vals) == 2 else None
0593         idif = self.idif_fmt % idif if idif is not None else " "
0594         k_idif = self.k_idif_fmt % "a-b" if idif is not None else " "
0595
0596         label = self.k_label_fmt % self.labels[n]
0597         k_label = self.k_label_fmt % "label"
0598
0599         if smry == False:
0600             nstep = "[%-2d]" % self.label2nstep[label]
0601             k_nstep = "[ns]"
0602         else:
0603             nstep = ""
0604             k_nstep = ""
0605         pass
0606
0607         # show only lines with chi2 contrib greater than cmx
0608         if self.c2 is not None:
0609             if self.cmx > 0 and self.c2[n] < self.cmx:
0610                 return None
0611             pass
0612         pass
0613
0614         # show only lines with zero counts
0615         if self.c2 is not None:
0616             if self.dbgzero and self.cu[n,1] > 0 and self.cu[n,2] > 0:
0617                 return None
0618             pass
0619         pass
0620
0621         if self.c2 is not None:
0622             sc2 = self.sc2_fmt % (self.c2[n])
0623             k_sc2 = self.k_sc2_fmt % "(a-b)^2/(a+b)"
0624         else:
0625             sc2 = ""
0626             k_sc2 = ""
0627         pass
0628
0629         if self.ab is not None and smry == False:
0630             sab = " %10.3f +- %5.3f " % ( self.ab[n,0], self.ab[n,1] )
0631             k_sab = " %10s    %5s " % ( "a/b", "" )
0632         else:
0633             sab = ""
0634             k_sab = ""
0635         pass
0636
0637         if self.ba is not None and smry == False:
0638             sba = " %10.3f +- %5.3f " % ( self.ba[n,0], self.ba[n,1] )
0639             k_sba = " %10s    %5s " % ( "b/a", "" )
0640         else:
0641             sba = ""
0642             k_sba = ""
0643         pass
0644
0645         if self.total is not None:
0646              frac = float(self.cu[n,1])/float(self.total)
0647              frac = " %10.3f   " % frac
0648              k_frac = " %10s   " % "frac"
0649         else:
0650              frac = ""
0651              k_frac = ""
0652         pass
0653
0654         cols =   [  cfo_debug,   xn,  xs,   frac] + vals +   [self.div1,  idif  , self.div2,   sc2,   sab,   sba,   nstep, self.div3,   label]
0655         k_cols = [k_cfo_debug, k_xn,k_xs, k_frac] + k_vals + [self.div1, k_idif , self.div2, k_sc2, k_sab, k_sba, k_nstep, self.div3, k_label]
0656
0657         u_cols = k_cols if key==True else cols
0658         return "".join(filter(lambda _:_ != "", u_cols))
0659
0660     def present(self, smry=False):
0661         """
0662         title is externally set from evt.present_table
0663         """
0664         spacer_ = lambda _:"%1s%3s %22s " % (".","",_)
0665         space = spacer_("")
0666
0667         lhs_fmt = self.k_xn_dot + self.k_xs_fmt
0668         lhs_key = lhs_fmt % ("","TOTALS:")
0669
0670         rhs_fmt = self.div1 + self.k_idif_fmt + self.div2 + self.k_sc2_fmt + self.div3 + self.k_label_fmt
0671
0672         if self.c2sum is None or self.stats is None:
0673             rhs_key = ""
0674         else:
0675             rhs_key = rhs_fmt % ( "", self.sc2_fmt % self.c2sum, self.stats )
0676         pass
0677
0678         title = spacer_(getattr(self,'title',"")+"  cfo:"+getattr(self,'cfordering',"-"))
0679         body_ = lambda _:" %7s " % _
0680
0681         head = title + " ".join(map(body_, self.cnames ))
0682         #head = title
0683
0684         tots = lhs_key + "".join(map(body_, self.tots)) + rhs_key
0685
0686         if smry:
0687             return "\n".join([tots] + lfilter(None,self.sines[self.sli]) + [tots])
0688         else:
0689             return "\n".join([self.shortname,head,tots]+ lfilter(None,self.lines[self.sli]) + [tots])
0690         pass
0691
0692     def __repr__(self):
0693         return self.present(smry=False)
0694
0695     def __str__(self):
0696         return self.present(smry=True)
0697
0698     def __call__(self, labels):
0699         ll = sorted(list(labels), key=lambda _:self.label2count.get(_, None))
0700         return "\n".join(map(lambda _:self.label2line.get(_,None), ll ))
0701
0702     def _get_ll(self):
0703         return np.array(self.lines[1:-1])  # skip the label lines
0704     ll = property(_get_ll)
0705
0706     def _get_ss(self):
0707         return np.array(self.sines[1:-1])  # skip the label lines
0708     ss = property(_get_ss)
0709
0710     def __getitem__(self, sli):
0711          self.sli = sli
0712          return self
0713
0714     def compare(self, other, ordering="self", shortname="noshortname?"):
0715         """
0716         :param other: SeqTable instance
0717         :param ordering: string "self", "other", "max"  control descending count row ordering
0718         """
0719         log.debug("SeqTable.compare START")
0720         l = set(self.labels)
0721         o = set(other.labels)
0722         lo = list(l | o)
0723         # union of labels in self or other
0724
0725         if ordering == "max":
0726             ordering_ = lambda _:max(self.label2count.get(_,0),other.label2count.get(_,0))
0727         elif ordering == "sum":
0728             ordering_ = lambda _:self.label2count.get(_,0)+other.label2count.get(_,0)
0729         elif ordering == "sum_code":
0730             ordering_ = lambda _:1e9*float(self.label2count.get(_,0)+other.label2count.get(_,0))+float(self.label2code.get(_,0)+other.label2code.get(_,0))
0731         elif ordering == "code":
0732             ordering_ = lambda _:self.label2code.get(_,0) + other.label2code.get(_,0)
0733         elif ordering == "self":
0734             ordering_ = lambda _:self.label2count.get(_,0)
0735         elif ordering == "other":
0736             ordering_ = lambda _:other.label2count.get(_,0)
0737         else:
0738             assert 0, "ordering_ must be one of max/sum/self/other "
0739         pass
0740
0741         u = sorted( lo, key=ordering_, reverse=True)
0742         # order the labels union by descending maximum count in self or other
0743
0744         cf = np.zeros( (len(u),3), dtype=np.uint64 )
0745         cf[:,0] = list(map(lambda _:self.af.code(_), u ))
0746         cf[:,1] = list(map(lambda _:self.label2count.get(_,0), u ))
0747         cf[:,2] = list(map(lambda _:other.label2count.get(_,0), u ))
0748         # form comparison table
0749
0750         cnames = self.cnames + other.cnames
0751
0752         log.debug("compare dbgseq %x dbgmsk %x " % (self.dbgseq, self.dbgmsk))
0753
0754         cftab = SeqTable(cf, self.af, cnames=cnames, dbgseq=self.dbgseq, dbgmsk=self.dbgmsk, dbgzero=self.dbgzero, cmx=self.cmx, shortname=shortname)
0755         cftab.cfordering = ordering
0756
0757         cfordering_key = list(map(ordering_, u))
0758         log.debug("cfordering_key for %s" % shortname)
0759         #log.debug(cfordering_key)
0760
0761         cftab.cfordering_key = cfordering_key
0762         log.debug("SeqTable.compare DONE")
0763         return cftab
0764
0765
0766 class SeqAna(object):
0767     """
0768     Canonical usage is from evt with::
0769
0770         self.seqhis_ana = SeqAna(self.seqhis, self.histype)
0771         self.seqmat_ana = SeqAna(self.seqmat, self.mattype)
0772
0773     In addition to holding the SeqTable instance SeqAna provides
0774     methods to make boolean array selections using the aseq and
0775     form labels.
0776
0777     SeqAna and its contained SeqTable exist within a particular selection,
0778     ie changing selection entails recreation of SeqAna and its contained SeqTable
0779
0780     Hmm: when searching for nibbles (eg RE) it would be convenient to view seqhis as an
0781     np.int4/np.uint4 dtype, but there is no such thing.
0782     """
0783     @classmethod
0784     def for_evt(cls, af, tag="1", src="torch", det="dayabay", pfx="source", offset=0):
0785         ph = A.load_("ph",src,tag,det, pfx=pfx)
0786         aseq = ph[:,0,offset]
0787         return cls(aseq, af, cnames=[tag])
0788
0789     def __init__(self, aseq, af, cnames=["noname"], dbgseq=0, dbgmsk=0, dbgzero=False, cmx=0, table_shortname="no_table_shortname"):
0790         """
0791         :param aseq: photon length sequence array, eg a.seqhis or a.seqmat
0792         :param af: instance of SeqType subclass, which knows what the nibble codes mean
0793
0794         ::
0795
0796             In [10]: sa.aseq
0797             A([  9227469,   9227469, 147639405, ...,   9227469,   9227469,     19661], dtype=uint64)
0798
0799             In [11]: sa.aseq.shape
0800             Out[11]: (1000000,)
0801
0802         """
0803         cu = count_unique_sorted(aseq)
0804         self.af = af
0805         self.dbgseq = dbgseq
0806         self.dbgmsk = dbgmsk
0807         self.dbgzero = dbgzero
0808         self.cmx = cmx
0809
0810         self.table = SeqTable(cu, af, cnames=cnames, dbgseq=self.dbgseq, dbgmsk=self.dbgmsk, dbgzero=self.dbgzero, cmx=self.cmx, shortname=table_shortname)
0811
0812         self.aseq = aseq
0813         self.cu = cu
0814
0815     def labels(self, prefix=None):
0816         """
0817         :param prefix: string sequence label eg "TO BT BT SC"
0818         :return labels: list of string labels that start with the prefix
0819         """
0820         codes = self.cu[:,0]
0821         if not prefix is None:
0822             pfx = self.af.code(prefix)
0823             codes = codes[np.where( codes & pfx == pfx )]
0824         pass
0825         labels =  map( lambda _:self.af.label(_), codes )
0826         return labels
0827
0828     def seq_or(self, sseq):
0829         """
0830         :param sseq: list of sequence labels including source, eg "TO BR SA" "TO BR AB"
0831         :return psel: selection boolean array of photon length
0832
0833         Selection of photons with any of the sequence arguments
0834         """
0835         #af = self.table.af
0836         af = self.af
0837         bseq = list(map(lambda _:self.aseq == af.code(_), sseq))  # full length boolean array
0838         psel = np.logical_or.reduce(bseq)
0839         return psel
0840
0841     def seq_or_count(self, sseq):
0842         psel = self.seq_or(sseq)
0843         return np.count_nonzero(psel)
0844
0845     def seq_startswith(self, prefix):
0846         """
0847         :param prefix: sequence string stub eg "TO BT BT SC"
0848         :return psel: selection boolean array of photon length
0849
0850         Selection of all photons starting with prefix sequence
0851         """
0852         af = self.table.af
0853         pfx = af.code(prefix)
0854         psel = self.aseq & pfx == pfx
0855         return psel
0856
0857     def seq_startswith_count(self, prefix):
0858         psel = self.seq_startswith(prefix)
0859         return np.count_nonzero(psel)
0860
0861     def seq_any_(self, co="RE"):
0862         code = self.af.code(co)
0863         aseq = self.aseq
0864         wk = np.zeros( (len(aseq), 16), dtype=np.bool )
0865         for n in range(16): wk[:, n] = ( aseq & ( 0xf << (n*4) ) == ( code << (n*4) ))
0866         return wk
0867
0868     def seq_any(self, co="RE" ):
0869         """
0870         :param co: string label for seqhis nibble
0871         :return psel: selection boolean array of photon length
0872
0873         Selects photons with the co nibble in any of the 16 slots
0874         """
0875         wk = self.seq_any_(co)
0876         psel = np.any(wk, axis=1)
0877         return psel
0878
0879     def seq_any_count(self, co="RE" ):
0880         """
0881         :param co: string label for seqhis nibble
0882         :return count: count_nonzero of psel result of seq_any
0883
0884         The count is of photons with the co in any slot (this is not the count of nibbles)
0885         """
0886         psel = self.seq_any(co)
0887         return np.count_nonzero(psel)
0888
0889     def seq_any_count_nibble(self, co="RE"):
0890         """
0891         Like seq_any_count but instead of counting photons count nibbles
0892         """
0893         wk = self.seq_any_(co)
0894         return np.count_nonzero(wk.ravel())
0895
0896
0897 def test_simple_table():
0898     log.info("test_simple_table")
0899     from opticks.ana.histype import HisType
0900     af = HisType()
0901     cu = np.zeros( (5,2), dtype=np.uint64 )  # mock up a count unique array
0902     cu[0] = (af.code("TO BT AB"), 100)
0903     cu[1] = (af.code("TO BT AB SD"),200)
0904     cu[2] = (af.code("TO BT BR BT AB"),300)
0905     cu[3] = (af.code("TO BT BT BT AB SD"),400)
0906     cu[4] = (af.code("TO BT AB MI"),500)
0907
0908     table = SeqTable(cu, af)
0909     print(table)
0910
0911 def test_comparison_table():
0912     log.info("test_comparison_table")
0913     from opticks.ana.histype import HisType
0914     af = HisType()
0915     cu = np.zeros( (8,3), dtype=np.uint64 )  # mock up a count unique array
0916
0917     a = 0
0918     b = 0
0919
0920     cu[0] = (af.code("TO"), a,b)
0921     cu[1] = (af.code("TO BT"), a,b)
0922     cu[2] = (af.code("TO BT BT"),a,b)
0923     cu[3] = (af.code("TO BT BT BT"),a,b)
0924     cu[4] = (af.code("TO BT BT BT BT"),a,b)
0925     cu[5] = (af.code("TO BT BT BT BT BT"),a,b)
0926     cu[6] = (af.code("TO BT BT BT BT BT BT"),a,b)
0927     cu[7] = (af.code("TO BT BT BT BT BT BT BT"),a,b)
0928
0929     table = SeqTable(cu, af)
0930     print(table)
0931
0932
0933 def test_comparison_table_2():
0934     txt = r"""
0935 ab.ahis
0936 .            all_seqhis_ana  1:tboolean-box:tboolean-box   -1:tboolean-box:tboolean-box        c2        ab        ba
0937 .                              10000     10000      2285.00/5 = 457.00  (pval:1.000 prob:0.000)
0938 0000             8ccd      8805      8807     -2             0.00        1.000 +- 0.011        1.000 +- 0.011  [4 ] TO BT BT SA
0939 0001              3bd       580         0    580           580.00        0.000 +- 0.000        0.000 +- 0.000  [3 ] TO BR MI
0940 0002            3cbcd       563         0    563           563.00        0.000 +- 0.000        0.000 +- 0.000  [5 ] TO BT BR BT MI
0941 0003           8cbbcd        29        29      0             0.00        1.000 +- 0.186        1.000 +- 0.186  [6 ] TO BT BR BR BT SA
0942 0004          3cbbbcd         6         0      6             0.00        0.000 +- 0.000        0.000 +- 0.000  [7 ] TO BT BR BR BR BT MI
0943 0005              36d         5         0      5             0.00        0.000 +- 0.000        0.000 +- 0.000  [3 ] TO SC MI
0944 0006               4d         3         0      3             0.00        0.000 +- 0.000        0.000 +- 0.000  [2 ] TO AB
0945 0007            86ccd         2         2      0             0.00        1.000 +- 0.707        1.000 +- 0.707  [5 ] TO BT BT SC SA
0946 0008            8c6cd         1         1      0             0.00        1.000 +- 1.000        1.000 +- 1.000  [5 ] TO BT SC BT SA
0947 0009            3b6bd         1         0      1             0.00        0.000 +- 0.000        0.000 +- 0.000  [5 ] TO BR SC BR MI
0948 0010           8b6ccd         1         0      1             0.00        0.000 +- 0.000        0.000 +- 0.000  [6 ] TO BT BT SC BR SA
0949 0011            3cc6d         1         0      1             0.00        0.000 +- 0.000        0.000 +- 0.000  [5 ] TO SC BT BT MI
0950 0012          3cc6ccd         1         0      1             0.00        0.000 +- 0.000        0.000 +- 0.000  [7 ] TO BT BT SC BT BT MI
0951 0013             4ccd         1         1      0             0.00        1.000 +- 1.000        1.000 +- 1.000  [4 ] TO BT BT AB
0952 0014            3c6cd         1         0      1             0.00        0.000 +- 0.000        0.000 +- 0.000  [5 ] TO BT SC BT MI
0953 0015              8bd         0       580   -580           580.00        0.000 +- 0.000        0.000 +- 0.000  [3 ] TO BR SA
0954 0016           8cbc6d         0         1     -1             0.00        0.000 +- 0.000        0.000 +- 0.000  [6 ] TO SC BT BR BT SA
0955 0017              86d         0         5     -5             0.00        0.000 +- 0.000        0.000 +- 0.000  [3 ] TO SC SA
0956 0018        8cbbc6ccd         0         1     -1             0.00        0.000 +- 0.000        0.000 +- 0.000  [9 ] TO BT BT SC BT BR BR BT SA
0957 0019        8cbbbb6cd         0         1     -1             0.00        0.000 +- 0.000        0.000 +- 0.000  [9 ] TO BT SC BR BR BR BR BT SA
0958 .                              10000     10000      2285.00/5 = 457.00  (pval:1.000 prob:0.000)
0959     """
0960     log.info("test_comparison_table_2")
0961     from opticks.ana.histype import HisType
0962     af = HisType()
0963     table = SeqTable.FromTxt(txt, af)
0964     print(table)
0965
0966
0967 if __name__ == '__main__':
0968     pass
0969     ## see histype.py and mattype.py for other testing of this
0970     logging.basicConfig(level=logging.INFO)
0971     #test_simple_table()
0972     #test_comparison_table()
0973     test_comparison_table_2()
0974
0975
0976