eic-opticks/ana/qdv.py

0001 #!/usr/bin/env python
0002 #
0003 # Copyright (c) 2019 Opticks Team. All Rights Reserved.
0004 #
0005 # This file is part of Opticks
0006 # (see https://bitbucket.org/simoncblyth/opticks).
0007 #
0008 # Licensed under the Apache License, Version 2.0 (the "License");
0009 # you may not use this file except in compliance with the License.
0010 # You may obtain a copy of the License at
0011 #
0012 #   http://www.apache.org/licenses/LICENSE-2.0
0013 #
0014 # Unless required by applicable law or agreed to in writing, software
0015 # distributed under the License is distributed on an "AS IS" BASIS,
0016 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0017 # See the License for the specific language governing permissions and
0018 # limitations under the License.
0019 #
0020
0021 """
0022 qdv.py : photon deviation reporting
0023 ========================================
0024
0025 This replaces the slow dv.py with a much faster implementation
0026
0027 * https://bitbucket.org/simoncblyth/opticks/commits/d31f4e271ee34a7fe1bf68af3e07f90ceb54565e
0028
0029 fixed slow deviation checking for large numbers of photons by loop inversion
0030 from select-then-deviate to deviate-then-select so only do the expensive
0031 deviation check once : replaces ana/dv.py with ana/qdv.py see
0032 notes/issues/py-analysis-too-slow-for-big-events.rst
0033
0034 """
0035 import os, sys, logging, numpy as np
0036 #from opticks.ana.log import fatal_, error_, warning_, info_, debug_
0037 from opticks.ana.log import underline_, blink_
0038
0039 from opticks.ana.level import Level
0040
0041 log = logging.getLogger(__name__)
0042
0043
0044
0045 class QDV(object):
0046
0047    FMT  =       "  %9d %9d : %5d %5d %5d : %6.4f %6.4f %6.4f : %9.4f %9.4f %9.4f  "
0048    CFMT =       "  %9s %9s : %5s %5s %5s : %6s %6s %6s : %9s %9s %9s    "
0049    CFMT_CUTS =  "  %9s %9s : %5s %5s %5s : %6.4f %6.4f %6.4f : %9s %9s %9s    "
0050    CFMT_COLS =  "nitem nelem nwar nerr nfat fwar ferr ffat mx mn avg".split()
0051
0052    LMT  = " %0.4d %10s : %30s : %7d  %7d "   # labels seqhis line
0053    CLMT = " %4s %10s : %30s : %7s  %7s "
0054
0055    clabel = CLMT % ( "idx", "msg", "sel", "lcu1", "lcu2" )
0056    cblank = CLMT % ( "", "", "", "", "" )
0057
0058
0059    def __init__(self, tab, idx, sel, dv, ndv, lcu, dvmax, msg=""):
0060        """
0061        :param tab: QDVTab instance
0062        :param idx: unskipped orignal seqhis line index
0063        :param sel: single line selection eg 'TO BT BT SA'
0064        :param dv: photon max deviations within seqhis code selection
0065        :param ndv: number of elements aggregated in the max
0066        :param lcu: list of length 3 with (seqhis-bigint, a-count, b-count)
0067        :param dvmax: triplet of floats for warn/error/fatal deviation levels
0068
0069        Access an Dv instance in ipython::
0070
0071             In [12]: ab.ox_dv.dvs[2]
0072             Out[12]:  0002            :                 TO BT BR BT SA :     561      527  :          27       324/       14: 0.043  mx/mn/av   0.00238/        0/3.823e-05  eps:0.0002
0073
0074        Get at the values::
0075
0076            In [16]: av,bv = ab.ox_dv.dvs[2].av, ab.ox_dv.dvs[2].bv
0077
0078
0079        Change in meaning of fractions
0080        --------------------------------
0081
0082        The transition from dv.py to qdv.py changes the meaning of the
0083        warning fractions : it used to be fraction of compared elements with deviation exceeding cut
0084        it is now fraction of photons with aggregated max deviation exceeding cut
0085
0086        This tends to reduce the fractions, but its more meaningful as single elements dont go wrong
0087        on there own : usually its the photon that goes wrong with many elements deviating together.
0088        So a fraction of deviant photons is more pertinent.
0089        """
0090        label = self.LMT % ( idx, msg, sel, lcu[1], lcu[2] )
0091        assert len(dvmax) == 3
0092
0093        nitem = len(dv)
0094        npoi = len(sel.split())
0095        nelem = ndv*npoi*nitem
0096
0097        log.debug(" nitem:%d npoi:%d nelem:%d " % (nitem,npoi,nelem))
0098
0099        if nelem>0:
0100            mx = dv.max()
0101            mn = dv.min()
0102            avg = dv.sum()/float(nelem)
0103            disc=[ dv[dv>dvmax[0]], dv[dv>dvmax[1]], dv[dv>dvmax[2]] ]
0104            ndisc = list(map(len, disc))     # elements, not items
0105            fdisc =  list(map(lambda _:float(_)/float(nelem), ndisc ))
0106        else:
0107            mx = None
0108            mn = None
0109            avg = None
0110            ndisc = None
0111            fdisc = None
0112        pass
0113
0114        self.tab = tab
0115        self.label = label
0116        self.nitem = nitem
0117        self.nelem = nelem
0118
0119        self.mx = mx
0120        self.mn = mn
0121        self.avg = avg
0122        self.ndisc = ndisc
0123        self.fdisc = fdisc
0124        self.ismax = False # set from DvTab
0125
0126        self.dv = dv
0127        self.ndv = ndv
0128        self.lcu  = lcu
0129        self.dvmax = dvmax
0130        self.msg = msg
0131
0132        log.debug("mx %s " % self.mx)
0133        if self.mx is None:
0134            lev = Level.FromName("FATAL")
0135            lmsg = "  mx None "
0136        elif self.mx > self.dvmax[2]:
0137            lev = Level.FromName("FATAL")
0138            lmsg = "  > dvmax[2] %.4f " % self.dvmax[2]
0139        elif self.mx > self.dvmax[1]:
0140            lev = Level.FromName("ERROR")
0141            lmsg = "  > dvmax[1] %.4f " % self.dvmax[1]
0142        elif self.mx > self.dvmax[0]:
0143            lev = Level.FromName("WARNING")
0144            lmsg = "  > dvmax[0] %.4f " % self.dvmax[0]
0145        else:
0146            lev = Level.FromName("INFO")
0147            lmsg = ""
0148        pass
0149        self.fn_ = lev.fn_
0150        self.lev = lev
0151        self.lmsg = lmsg
0152
0153
0154    @classmethod
0155    def columns(cls):
0156        cdesc = cls.CFMT % tuple(cls.CFMT_COLS)
0157        clabel = cls.clabel
0158        return "%s : %s  " % (clabel, cdesc )
0159
0160    def columns2(self, tdisc):
0161        cdesc2 = self.CFMT_CUTS % ("","",tdisc[0],tdisc[1],tdisc[2], self.dvmax[0], self.dvmax[1], self.dvmax[2], "", "", "")
0162        return "%s : %s  " % (self.cblank, cdesc2 )
0163
0164
0165    def __repr__(self):
0166        if self.nelem>0:
0167            desc =  self.FMT % ( self.nitem, self.nelem, self.ndisc[0], self.ndisc[1], self.ndisc[2], self.fdisc[0], self.fdisc[1], self.fdisc[2], self.mx, self.mn, self.avg )
0168        else:
0169            desc = ""
0170        pass
0171
0172        if self.ismax:
0173            #pdesc = self.fn_(desc)
0174            pdesc = underline_(self.fn_(desc))
0175        else:
0176            pdesc = self.fn_(desc)
0177        pass
0178        return "%s : %s : %s : %s " % (self.label, pdesc, self.fn_("%20s"%self.lev.name), self.fn_(self.lmsg)  )
0179
0180
0181
0182
0183 class QDVTab(object):
0184     """
0185     A much quicker way to analyse deviations, avoiding selection jumping
0186     by inverting the loop ordering : a single deviation calc first
0187     and then aggregate into categories.
0188
0189     Skips eg "SC AB RE" only relevant for non-aligned "accidental" comparisons
0190
0191     """
0192     def __init__(self, name, ab, skips="", use_utaildebug=False ):
0193         """
0194         :param name: ox_dv, rpost_dv, rpol_dv
0195         :param ab:
0196         :param skips:
0197
0198         use_utaildebug
0199              suspect it caused some a.rposta with dimensions 1 off b.rposta
0200
0201         """
0202         log.debug("[ sel %s  " % (name) )
0203         self.name = name
0204         self.ab = ab
0205         self.seqtab = ab.ahis   # SeqTable
0206         self.a = ab.a
0207         self.b = ab.b
0208         self.dirty = False
0209         self.skips = skips.split()
0210         #self.sli = slice(None)
0211         self.sli = slice(0,10)
0212
0213         labels = self.seqtab.labels       # eg list of length 17 : ['TO BT BT SA', 'TO BR SA', ... ]
0214
0215         cu = self.seqtab.cu               # eg with shape (17,3)  the 3 columns being (seqhis, a-count, b-count )
0216
0217         log.debug("labels:%s" % labels)
0218
0219         assert len(labels) == len(cu)
0220         nsel = len(labels)
0221
0222         self.labels = labels
0223         self.cu = cu
0224         self.nsel = nsel
0225         self._ndisc = None
0226
0227         if self.ab.cfm.utaildebug == 1 and use_utaildebug:
0228             aligned = np.logical_and( self.a.utail == self.b.utail, self.a.seqhis == self.b.seqhis )
0229         else:
0230             aligned = self.a.seqhis == self.b.seqhis
0231         pass
0232
0233         self.aligned = aligned
0234         self.init_qdv()
0235         self.dvs = self.make_selection_dvs()
0236         self.findmax()
0237         log.debug("] %s " % name )
0238
0239     def init_qdv(self):
0240         """
0241         Full array deviation comparisons and max aggregation, with no selection
0242         """
0243         a = self.a
0244         b = self.b
0245
0246         if self.name == "rpost_dv":
0247             qdv = np.abs(a.rposta - b.rposta).max(axis=(1,2))   # maximal photon deviates
0248             ndv = a.rposta.shape[a.rposta.ndim-1]               # 4, items in last dimension
0249         elif self.name == "rpol_dv":
0250             qdv = np.abs(a.rpola - b.rpola).max(axis=(1,2))
0251             ndv = 3
0252         elif self.name == "ox_dv":
0253             aox = a.ox[:,:3,:]
0254             box = b.ox[:,:3,:]
0255             sox = aox.shape
0256             assert len(sox) == 3
0257             qdv = np.abs(aox - box).max(axis=(1,2))
0258             ndv = sox[-2]*sox[-1]                      # 3*4 items in last two dimension
0259         else:
0260             assert self.name
0261         pass
0262         self.qdv = qdv
0263         self.ndv = ndv
0264
0265     def make_selection_dvs(self):
0266         """
0267         Slice and dice the full per photon aggregated max deviation into
0268         the single line selections
0269         """
0270         dvmax = self.dvmax
0271         dvs = []
0272         for i in range(self.nsel)[self.sli]:
0273             sel = self.labels[i]
0274             if self.is_skip(sel):
0275                 continue
0276             pass
0277             lcu = self.cu[i]
0278             code = lcu[0]
0279             cqdv = self.cqdv(code)     # array of photon max deviations within selection
0280
0281             dv = QDV(self, i, sel, cqdv, self.ndv, lcu, dvmax )
0282             if len(dv.dv) == 0:
0283                 log.debug("dv.dv empty for i:%d sel:%s lcu:%r " % ( i, sel, lcu))
0284             else:
0285                 dvs.append(dv)
0286             pass
0287         pass
0288         return dvs
0289
0290
0291     def csel(self, code):
0292         """
0293         :param seqhis code:
0294         :return boolean selection array:
0295         """
0296         return np.logical_and( self.aligned, self.a.seqhis == code )
0297
0298     def cqdv(self, code):
0299         """
0300         :return array of deviations within selection:
0301         """
0302         csel = self.csel( code )
0303         return self.qdv[np.where(csel)]
0304
0305
0306     def _get_float(self, att):
0307         return list(map(lambda dv:float(getattr(dv, att)), list(filter(None,self.dvs))))
0308
0309     maxdv = property(lambda self:self._get_float("mx"))
0310     def _get_maxdvmax(self):
0311         maxdv_ = self.maxdv
0312         return max(maxdv_) if len(maxdv_) > 0 else -1
0313     maxdvmax   = property(_get_maxdvmax)
0314
0315     def _get_ndvp(self):
0316         """total number of deviant (ERROR or FATAL) photons"""
0317         return self.ndisc[1]
0318     ndvp = property(_get_ndvp)
0319
0320
0321     def findmax(self):
0322         maxdv = list(map(lambda _:float(_.mx), self.dvs))
0323         mmaxdv = max(maxdv) if len(maxdv) > 0 else -1
0324         for dv in self.dvs:
0325             if dv.mx == mmaxdv and dv.lev.level > Level.INFO:
0326                 dv.ismax = True
0327             pass
0328         pass
0329
0330     level = property(lambda self:self.maxlevel.name if not self.maxlevel is None else None)
0331
0332     def _get_maxlevel(self):
0333         """
0334         Overall level of the table : INFO, WARNING, ERROR or FATAL
0335         based on the maximum level of the lines
0336         """
0337         levs = list(map(lambda dv:dv.lev.level, self.dvs))
0338         mxl = max(levs) if len(levs) > 0 else None
0339         return Level.FromLevel(mxl) if mxl is not None else None
0340     maxlevel = property(_get_maxlevel)
0341
0342     def _get_ndisc(self):
0343         if self._ndisc is None:
0344             ndisc = np.zeros(3, dtype=np.int)
0345             for dv in self.dvs:
0346                 ndisc += dv.ndisc
0347             pass
0348             self._ndisc = ndisc
0349         pass
0350         return self._ndisc
0351     ndisc = property(_get_ndisc)
0352
0353
0354     def _get_RC(self):
0355         maxlevel = self.maxlevel
0356         return 1 if maxlevel is not None and  maxlevel.level > Level.WARNING else 0
0357     RC = property(_get_RC)
0358
0359     fdiscreps = property(lambda self:self._get_float("fdiscrep"))
0360     def _get_fdiscmax(self):
0361         fdiscreps_ = self.fdiscreps
0362         return max(fdiscreps_) if len(fdiscreps_) > 0 else -1
0363     fdiscmax   = property(_get_fdiscmax)
0364
0365
0366     def _get_smry(self):
0367         return "%s fdiscmax:%s fdiscreps:%r maxdvmax:%s " % ( self.name, self.fdiscmax, self.fdiscreps, self.maxdvmax  )
0368     smry = property(_get_smry)
0369
0370     def _get_brief(self):
0371         skips = " ".join(self.skips)
0372         gfmt_ = lambda _:"%.4f" % float(_)
0373
0374         if self.maxlevel is None:
0375             return "maxlevel None"
0376         else:
0377             return "maxdvmax:%s  ndvp:%4d  level:%s  RC:%d       skip:%s" % ( gfmt_(self.maxdvmax), self.ndvp, self.maxlevel.fn_(self.maxlevel.name), self.RC,  skips )
0378         pass
0379
0380
0381     brief = property(_get_brief)
0382
0383     def __repr__(self):
0384         if len(self.dvs) == 0:
0385             return "\n".join(["ab.%s" % self.name, "no dvs" ])
0386         else:
0387             return "\n".join( ["ab.%s" % self.name, self.brief, self.dvs[0].columns2(self.ndisc), QDV.columns()] + list(map(repr, list(filter(None,self.dvs[self.sli])) )) + ["."] )
0388         pass
0389
0390     def __getitem__(self, sli):
0391          self.sli = sli
0392          return self
0393
0394     def is_skip(self, sel):
0395         """
0396         SC AB RE
0397           see notes/issues/sc_ab_re_alignment.rst
0398
0399         When comparing non-aligned events it is necessary to
0400         avoid accidental history alignment causing deviation fails
0401         by skipping any selection that includes SC AB or RE, assuming
0402         reflect cheat is used.
0403         """
0404         sqs = sel.split()
0405         skip = False
0406         for skp in self.skips:
0407              with_sk = skp in sqs
0408              if with_sk:
0409                  skip = True
0410              pass
0411         pass
0412         return skip
0413
0414     def _get_dvmax(self):
0415         """
0416         :return dvmax: list of three values corresponding to warn/error/fatal deviation cuts
0417
0418         rpost
0419             extent*2/65535 is the size of shortnorm compression bins
0420             so its the closest can expect values to get.
0421
0422             TODO: currently the time cuts are not correct, a for "t"
0423             should be using different domain
0424
0425         rpol
0426              is extremely compressed, so bins are big : 1*2/255
0427         ox
0428              not compressed stored as floats : other things will limit deviations
0429
0430         """
0431         ab = self.ab
0432         if self.name == "rpost_dv":
0433             eps = ab.fdom[0,0,3]*2.0/((0x1 << 16) - 1)
0434             dvmax = [eps*1.1, 1.6*eps, 2.1*eps]
0435         elif self.name == "rpol_dv":
0436             eps = 1.0*2.0/((0x1 << 8) - 1)
0437             dvmax = [eps, 1.5*eps, 2.0*eps]
0438         elif self.name == "ox_dv":
0439             dvmax = ab.ok.pdvmax    ## adhoc input guess at appropriate cuts
0440         else:
0441             assert self.name
0442         pass
0443         return dvmax
0444     dvmax = property(_get_dvmax)
0445
0446
0447     def _get_dvmaxt(self):
0448         """
0449         ta 19::
0450
0451             In [1]: ab.rpost_dv.dvmax
0452             Out[1]: [0.022827496757457846, 0.03424124513618677, 0.04565499351491569]
0453
0454             In [2]: ab.rpost_dv.dvmaxt
0455             Out[2]: [0.0003043666242088853, 0.00045654993631332797, 0.0006087332484177706]
0456
0457         Depends on the rule-of-thumb Opticks::setupTimeDomains but time cuts should be much
0458         more stringent than position ones::
0459
0460             In [3]: ab.fdom[0,0,3]
0461             Out[3]: 748.0
0462
0463             In [4]: ab.fdom[1,0,1]
0464             Out[4]: 9.973333
0465
0466             In [5]: ab.fdom[0,0,3]/ab.fdom[1,0,1]
0467             Out[5]: 75.0
0468
0469         """
0470         ab = self.ab
0471         if self.name == "rpost_dv":
0472             eps_t = ab.fdom[1,0,1]*2.0/((0x1 << 16) - 1)
0473             dvmaxt = [eps_t, 1.5*eps_t, 2.0*eps_t]
0474         else:
0475             assert self.name
0476         pass
0477         return dvmaxt
0478     dvmaxt = property(_get_dvmaxt)
0479
0480
0481
0482 if __name__ == '__main__':
0483     from opticks.ana.main import opticks_main
0484     from opticks.ana.ab import AB
0485     ok = opticks_main()
0486     ab = AB(ok)
0487     #ab.dump()
0488
0489     print(ab.ahis)
0490
0491     a = ab.a
0492     b = ab.b
0493
0494     qdv = QDVTab("rpost_dv", ab)
0495     print(qdv)
0496
0497
0498