Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:47

0001 #pragma once
0002 /**
0003 sseq_index.h
0004 ===============
0005 
0006 This reimplements some python/NumPy that is slow with large seq.npy
0007 
0008 Basis struct from sseq.h and NPX.h
0009 -------------------------------------
0010 
0011 sseq
0012    sequence flag and boundary history "seqhis" "seqbnd" of a single photon,
0013    up to 32 step points
0014 
0015 NPX.h
0016     NPX::VecFromArray<sseq> reads vector<sseq> from NP seq array
0017 
0018 
0019 Following structs are defined
0020 -------------------------------
0021 
0022 sseq_index_count
0023    struct holding index and count
0024 
0025 sseq_index_count_ab
0026    struct holding a and b sseq_index_count to facilitate comparisons
0027 
0028 sseq_unique
0029    struct holding q:sseq and ic:sseq_index_count
0030    a single struct that holds counts for
0031    all occurrences of the q:sseq within the source array
0032 
0033 
0034 sseq_qab
0035    struct holding q:sseq a:sseq_index_count b:sseq_index_count
0036 
0037 sseq_index
0038    reimplementation of ~/opticks/ana/qcf.py:QU
0039 
0040    * q:vector of sseq (populated from typically large input array within ctor)
0041    * m:map of unique sseq with counts and first indices (relies on sseq.h hash specialization)
0042    * u:descending count ordered vector of sseq_unique
0043 
0044    Holds representation of the photon history of the input array in typically
0045    much smaller form with just unique sseq and counts
0046 
0047 
0048 Q: reimplementation of ~/opticks/ana/qcf.py:QCF ?
0049 
0050 **/
0051 
0052 #include "ssys.h"
0053 #include "sseq.h"
0054 #include "NPX.h"
0055 
0056 
0057 struct sseq_index_count
0058 {
0059     int index ;     // index of first occurrence within the source array
0060     int count ;     // count of occurrence of the same history within the source array
0061     std::string desc() const ;
0062 };
0063 inline std::string sseq_index_count::desc() const
0064 {
0065     std::stringstream ss ;
0066     ss << std::setw(7) << count << " : " << std::setw(7) << index  ;
0067     std::string str = ss.str();
0068     return str ;
0069 }
0070 
0071 struct sseq_index_count_ab
0072 {
0073     sseq_index_count a ;
0074     sseq_index_count b ;
0075 };
0076 
0077 
0078 struct sseq_unique
0079 {
0080     sseq             q  ;
0081     sseq_index_count ic ;
0082 
0083     bool operator< (const sseq_unique& other) const { return ic.count < other.ic.count ; }
0084     std::string desc() const ;
0085 };
0086 
0087 inline std::string sseq_unique::desc() const
0088 {
0089     std::stringstream ss ;
0090     ss << q.seqhis_() << " : " << ic.desc() ;
0091     std::string str = ss.str();
0092     return str ;
0093 }
0094 
0095 
0096 
0097 struct sseq_qab
0098 {
0099     sseq             q ;  // "high level" photon history eg "TO BT BT AB", that typically many photons will share
0100     sseq_index_count a ;
0101     sseq_index_count b ;
0102 
0103     double c2(bool& included, int absum_min) const ;
0104 
0105     int maxcount() const { return std::max(a.count, b.count) ; }
0106     bool operator< (const sseq_qab& other) const { return maxcount() < other.maxcount() ; }
0107 
0108     std::string desc(int absum_min) const ;
0109 };
0110 
0111 inline double sseq_qab::c2(bool& included, int absum_min) const
0112 {
0113     double _a = a.count ;
0114     double _b = b.count ;
0115     double absum = _a + _b ;
0116     double abdif = _a - _b ;
0117     included = _a > 0 && _b > 0 && absum > absum_min ;
0118     return included ? abdif*abdif/absum : 0 ;
0119 }
0120 
0121 /**
0122 sseq_qab::desc
0123 ----------------
0124 
0125 For chi2 inclusion the sum of a and b counts must exceed absum_min
0126 
0127 **/
0128 
0129 inline std::string sseq_qab::desc(int absum_min) const
0130 {
0131     bool included(false);
0132 
0133     std::stringstream ss ;
0134     ss << q.seqhis_()
0135        << " : "
0136        << std::setw(7) << a.count
0137        << std::setw(7) << b.count
0138        << " : "
0139        << std::fixed << std::setw(10) << std::setprecision(4) << c2(included, absum_min)
0140        << " : "
0141        << ( included ? "Y" : "N" )
0142        << " : "
0143        << std::setw(7) << a.index
0144        << std::setw(7) << b.index
0145        ;
0146     std::string str = ss.str();
0147     return str ;
0148 }
0149 
0150 
0151 
0152 struct sseq_index
0153 {
0154     std::vector<sseq> q ;                   // typically large input array
0155 
0156     std::map<sseq, sseq_index_count> m ;    // map of unique sseq with counts and first indices
0157 
0158     std::vector<sseq_unique> u ;            // unique sseq with counts in descending count ordered vector of sseq_unique
0159 
0160     sseq_index( const NP* seq);
0161 
0162     void load_seq( const NP* seq );
0163     void count_unique();
0164     void order_seq();
0165     std::string desc(int min_count=0) const;
0166 };
0167 
0168 
0169 /**
0170 sseq_index::sseq_index
0171 ------------------------
0172 
0173 1. load array into q vector of sseq
0174 2. populate std::map<sseq, sseq_index_count>
0175 3.
0176 
0177 **/
0178 
0179 
0180 inline sseq_index::sseq_index( const NP* seq)
0181 {
0182     load_seq(seq);
0183     count_unique();
0184     order_seq();
0185 }
0186 
0187 
0188 inline void sseq_index::load_seq(const NP* seq)
0189 {
0190     NPX::VecFromArray<sseq>(q, seq );
0191 }
0192 
0193 /**
0194 sseq_index::count_unique fill the sseq keyed map of occurence counts
0195 ----------------------------------------------------------------------
0196 
0197 Iterate over the source vector populating the
0198 map with the index of first occurrence and
0199 count of the frequency of occurrence.
0200 
0201 Relies on sseq hash specialization based on seqhis values
0202 to establish the different identify of different sseq values.
0203 
0204 **/
0205 
0206 inline void sseq_index::count_unique()
0207 {
0208     for (int i = 0; i < int(q.size()); i++)
0209     {
0210         const sseq& seq = q[i];
0211 
0212         std::map<sseq, sseq_index_count>::iterator it  = m.find(seq);
0213 
0214         if(it == m.end())
0215         {
0216             int q_index_of_first_occurrence = i ;
0217             m[seq] = {q_index_of_first_occurrence, 1} ;
0218         }
0219         else
0220         {
0221             it->second.count++ ;
0222         }
0223     }
0224 }
0225 
0226 /**
0227 sseq_index::order_seq : sorting unique sseq in descending count order
0228 ----------------------------------------------------------------------
0229 
0230 1. copy from map m into vector u
0231 2. sort the u vector into descending count order
0232 
0233 **/
0234 
0235 
0236 inline void sseq_index::order_seq()
0237 {
0238     for(auto it=m.begin() ; it != m.end() ; it++) u.push_back( { it->first, {it->second.index, it->second.count} } );
0239 
0240     auto descending_order = [](const sseq_unique& a, const sseq_unique& b) { return a.ic.count > b.ic.count ; } ;
0241 
0242     std::sort( u.begin(), u.end(), descending_order  );
0243 }
0244 
0245 inline std::string sseq_index::desc(int min_count) const
0246 {
0247     std::stringstream ss ;
0248     int num = u.size();
0249     ss << "[sseq_index::desc num " << num << std::endl ;
0250     for(int i=0 ; i < num ; i++)
0251     {
0252         if( u[i].ic.count < min_count ) break ;
0253         ss << u[i].desc() << std::endl ;
0254     }
0255     ss << "]sseq_index::desc" << std::endl ;
0256     std::string str = ss.str() ;
0257     return str ;
0258 }
0259 
0260 
0261 struct sseq_index_ab_chi2
0262 {
0263     static constexpr const char* NAME = "sseq_index_ab_chi2.npy" ;
0264     static const int DEFAULT_ABSUM_MIN = 30 ;
0265     static constexpr const char* EKEY = "sseq_index_ab_chi2_ABSUM_MIN" ;  // equiv to python C2CUT
0266 
0267     double sum ;
0268     double ndf ;
0269     double absum_min ;
0270     double spare ;
0271 
0272     NP* serialize() const ;
0273     void save(const char* dir) const ;
0274 
0275     void init();
0276     std::string desc() const ;
0277     std::string desc_rst() const ;
0278     std::string desc_html() const ;
0279 };
0280 
0281 
0282 inline NP* sseq_index_ab_chi2::serialize() const
0283 {
0284     NP* a = NP::Make<double>(4) ;
0285     double* aa = a->values<double>();
0286     aa[0] = sum ;
0287     aa[1] = ndf ;
0288     aa[2] = absum_min ;
0289     aa[3] = spare ;
0290     return a ;
0291 }
0292 inline void sseq_index_ab_chi2::save(const char* dir) const
0293 {
0294     NP* a = serialize();
0295     if(dir == nullptr) a->save(NAME);
0296     else a->save(dir, NAME);
0297 }
0298 
0299 
0300 inline void sseq_index_ab_chi2::init()
0301 {
0302     sum = 0 ;
0303     ndf = 0 ;
0304     absum_min = ssys::getenvint(EKEY,DEFAULT_ABSUM_MIN) ;
0305     spare = 0 ;
0306 }
0307 
0308 std::string sseq_index_ab_chi2::desc() const
0309 {
0310     return desc_rst();
0311 }
0312 std::string sseq_index_ab_chi2::desc_rst() const
0313 {
0314     std::stringstream ss ;
0315     ss << "sseq_index_ab_chi2::desc"
0316        << " sum " << std::fixed << std::setw(10) << std::setprecision(4) << sum
0317        << " ndf " << std::setw(7) << ndf
0318        << " sum/ndf " << std::fixed << std::setw(10) << std::setprecision(4) << sum/ndf
0319        << " " << EKEY
0320        << ":"
0321        << absum_min
0322        ;
0323     std::string str = ss.str() ;
0324     return str ;
0325 }
0326 
0327 std::string sseq_index_ab_chi2::desc_html() const
0328 {
0329     std::stringstream ss ;
0330     ss
0331        << "[CHI2"
0332        << " <span class=\"r\"> "
0333        << " sum " << std::fixed << std::setw(10) << std::setprecision(4) << sum
0334        << " ndf " << std::setw(7) << ndf
0335        << " sum/ndf " << std::fixed << std::setw(10) << std::setprecision(4) << sum/ndf
0336        << " </span> "
0337        << "]"
0338        ;
0339     std::string str = ss.str() ;
0340     return str ;
0341 }
0342 
0343 
0344 
0345 
0346 
0347 /**
0348 sseq_index_ab
0349 
0350 
0351 **/
0352 
0353 struct sseq_index_ab
0354 {
0355     static constexpr const char* NAME = "sseq_index_ab.npy" ;
0356     static constexpr const char* _sseq_index_ab__desc_NUM_MAX = "sseq_index_ab__desc_NUM_MAX" ;
0357     static constexpr const char* _sseq_index_ab__desc_HISWID  = "sseq_index_ab__desc_HISWID" ;
0358     static constexpr const char* _sseq_index_ab__desc_STYLE   = "sseq_index_ab__desc_STYLE" ;
0359 
0360     enum { STYLE_NONE, STYLE_RST, STYLE_HTML } ;
0361     static int STYLE();
0362     bool isRST()  const { return style == STYLE_RST ; }
0363     bool isHTML() const { return style == STYLE_HTML ; }
0364 
0365     const sseq_index& a ;
0366     const sseq_index& b ;
0367     int style ;
0368 
0369     std::map<sseq, sseq_index_count_ab> m ;
0370     std::vector<sseq_qab> u ;
0371     sseq_index_ab_chi2    chi2 ;
0372 
0373     sseq_index_ab( const sseq_index& a_, const sseq_index& b_ );
0374 
0375     void init();
0376     void collect_seq();
0377     void order_seq();
0378     void calc_chi2();
0379     NP* serialize() const ;
0380     void save(const char* dir) const ;
0381 
0382     std::string desc(const char* opt) const ;
0383     std::string desc() const ;
0384 
0385 };
0386 
0387 int sseq_index_ab::STYLE() // static
0388 {
0389     const char* _STYLE = ssys::getenvvar(_sseq_index_ab__desc_STYLE, "rst") ;
0390     int _style = STYLE_NONE ;
0391     if(0==strcmp(_STYLE, "rst" ))  _style = STYLE_RST ;
0392     if(0==strcmp(_STYLE, "html" )) _style= STYLE_HTML ;
0393     assert( _style == STYLE_RST || _style == STYLE_HTML );
0394     return _style ;
0395 }
0396 
0397 
0398 inline sseq_index_ab::sseq_index_ab( const sseq_index& a_, const sseq_index& b_ )
0399     :
0400     a(a_),
0401     b(b_),
0402     style(STYLE())
0403 {
0404     init();
0405 }
0406 
0407 inline void sseq_index_ab::init()
0408 {
0409     collect_seq();
0410     order_seq();
0411     calc_chi2();
0412 }
0413 
0414 /**
0415 sseq_index_ab::collect_seq
0416 ---------------------------
0417 
0418 sseq_index_count_ab
0419      4 integers with index and count from A and B
0420 
0421 
0422 **/
0423 
0424 
0425 inline void sseq_index_ab::collect_seq()
0426 {
0427     // collect from A into the map
0428     for (int i = 0; i < int(a.u.size()); i++)
0429     {
0430         const sseq_unique& q_ic = a.u[i];
0431         const sseq& q = q_ic.q ;
0432         std::map<sseq, sseq_index_count_ab>::iterator it  = m.find(q);
0433         bool first_q = it == m.end() ;               // first find of sseq history q within m
0434         if(first_q) m[q] = { q_ic.ic , {-1,-1} } ;   // fill in the two A slots setting -1,-1 for B
0435     }
0436     // should always be first_q as m starts empty and are grabbing from uniques already
0437 
0438 
0439     // collect from B into the map
0440     for (int i = 0; i < int(b.u.size()); i++)
0441     {
0442         const sseq_unique& q_ic = b.u[i];
0443         const sseq& q = q_ic.q ;
0444         std::map<sseq, sseq_index_count_ab>::iterator it  = m.find(q);
0445 
0446         bool first_q = it == m.end() ; // first find of sseq history q within m
0447 
0448         if(first_q) m[q] = { {-1,-1}, q_ic.ic } ;   // fill in the two B slots setting -1,-1 for A
0449         else it->second.b = q_ic.ic ;
0450         // already found (from A) so just fill in the B slot
0451 
0452     }
0453 }
0454 
0455 inline void sseq_index_ab::order_seq()
0456 {
0457     // copy from map to vector
0458     for(auto it=m.begin() ; it != m.end() ; it++)
0459     {
0460         const sseq& q = it->first ;
0461         const sseq_index_count_ab& ab = it->second ;
0462         u.push_back( {q, ab.a, ab.b } );
0463     }
0464 
0465     // order the vector in descending max count order
0466     auto descending_order = [](const sseq_qab& x, const sseq_qab& y) { return x.maxcount() > y.maxcount() ; } ;
0467     std::sort( u.begin(), u.end(), descending_order );
0468 }
0469 
0470 
0471 inline void sseq_index_ab::calc_chi2()
0472 {
0473     int num = u.size();
0474     chi2.init();
0475     for(int i=0 ; i < num ; i++)
0476     {
0477         const sseq_qab& qab = u[i] ;
0478         bool included = false ;
0479         double c2 = qab.c2(included, chi2.absum_min) ;
0480         if(included)
0481         {
0482             chi2.sum += c2 ;
0483             chi2.ndf +=  1 ;
0484         }
0485     }
0486 }
0487 
0488 
0489 inline NP* sseq_index_ab::serialize() const
0490 {
0491     int num = u.size();
0492     NP* a = NP::Make<int>(num, 4) ;
0493     int* aa = a->values<int>();
0494 
0495     for(int i=0 ; i < num ; i++)
0496     {
0497         const sseq_qab& ui = u[i] ;
0498         const sseq& q = ui.q ;
0499 
0500         std::string name = q.seqhis_();  // not trimmed
0501         a->names.push_back(name);
0502 
0503         const sseq_index_count& a = ui.a ;
0504         const sseq_index_count& b = ui.b ;
0505 
0506         aa[i*4+0] = a.count ;
0507         aa[i*4+1] = b.count ;
0508         aa[i*4+2] = a.index ;
0509         aa[i*4+3] = b.index ;
0510     }
0511     return a ;
0512 }
0513 
0514 inline void sseq_index_ab::save(const char* dir) const
0515 {
0516     NP* a = serialize();
0517     if(dir == nullptr) a->save(NAME);
0518     else a->save(dir, NAME);
0519 }
0520 
0521 
0522 
0523 
0524 
0525 /**x
0526 sseq_index_ab::desc
0527 ---------------------
0528 
0529 HMM: inclusion in the chi2 is based on sum of a and b counts
0530 
0531 **/
0532 
0533 inline std::string sseq_index_ab::desc(const char* opt) const
0534 {
0535     enum { ALL, AZERO, BZERO, C2INC, C2EXC, DEVIANT, BRIEF } ;
0536     int mode = BRIEF ;
0537     if(      strstr(opt,"ALL")  )   mode = ALL ;
0538     else if( strstr(opt,"AZERO") )  mode = AZERO ;
0539     else if( strstr(opt,"BZERO") )  mode = BZERO ;
0540     else if( strstr(opt,"C2INC") )  mode = C2INC ;
0541     else if( strstr(opt,"C2EXC") )  mode = C2EXC ;
0542     else if( strstr(opt,"DEVIANT")) mode = DEVIANT ;
0543     else if( strstr(opt,"BRIEF"))   mode = BRIEF ;
0544 
0545     int u_size = u.size();
0546     int NUM_MAX = ssys::getenvint(_sseq_index_ab__desc_NUM_MAX, 40) ;
0547     int HISWID = ssys::getenvint( _sseq_index_ab__desc_HISWID, 0) ;
0548 
0549 
0550     int num = 0 ;
0551     if( mode == AZERO || mode == BZERO || mode == DEVIANT)
0552     {
0553         // dont obey NUM_MAX for these problem finding modes as would miss issues
0554         num = u_size ;
0555     }
0556     else
0557     {
0558         num = NUM_MAX > 0 ? std::min( u_size, NUM_MAX ) : u_size ;
0559     }
0560 
0561 
0562     const char* itab = "    " ;
0563 
0564     std::stringstream ss ;
0565 
0566     if(isRST())
0567     {
0568         ss
0569         << "[sseq_index_ab::desc"
0570         << " u_size " << u_size
0571         << " NUM_MAX " << NUM_MAX
0572         << " num " << num
0573         << " opt " << opt
0574         << " mode " << mode
0575         << ( mode == BRIEF ? chi2.desc_rst()  : "" )
0576         << "\n"
0577         ;
0578     }
0579     else if(isHTML())
0580     {
0581         ss
0582         << ".. raw:: html\n"
0583         << "\n"
0584         << itab << "<pre class=\"mypretiny\">\n"
0585         << itab << "[sseq_index_ab::desc"
0586         << " " << ( mode == BRIEF ? "" : opt )
0587         << " " << ( mode == BRIEF ? chi2.desc_html()  : "" )
0588         << "\n"
0589         ;
0590     }
0591 
0592 
0593     for(int i=0 ; i < num ; i++)
0594     {
0595         const sseq_qab& qab = u[i] ;
0596 
0597         bool c2_inc(false);
0598         double c2 = qab.c2(c2_inc, chi2.absum_min);
0599 
0600         bool a_zero = qab.a.count <= 0 && qab.b.count > 10 ;
0601         bool b_zero = qab.b.count <= 0 && qab.a.count > 10 ;
0602         bool deviant = c2 > 10. ;
0603 
0604         bool selected = true ;
0605         switch(mode)
0606         {
0607             case ALL:     selected = true    ; break ;
0608             case BRIEF:   selected = i < 60  ; break ;
0609             case AZERO:   selected = a_zero  ; break ;
0610             case BZERO:   selected = b_zero  ; break ;
0611             case C2INC:   selected = c2_inc  ; break ;
0612             case C2EXC:   selected = !c2_inc ; break ;
0613             case DEVIANT: selected = deviant ; break ;
0614         }
0615 
0616         const char* head ;
0617         const char* tail ;
0618         const char* pdev ;
0619 
0620         if(isRST())
0621         {
0622             head = deviant ? ":r:`" : "    " ;
0623             tail = deviant ? "`" : " " ;
0624             pdev = "DEVIANT " ;
0625         }
0626         else if(isHTML())
0627         {
0628             head = "    " ;
0629             tail = " " ;
0630             pdev = "<span class=\"r\">DEVIANT</span> " ;
0631         }
0632 
0633         std::string seqhis = qab.q.seqhis_();
0634         std::string u_seqhis = HISWID == 0 ? seqhis : seqhis.substr(0,HISWID) ;
0635 
0636 
0637         if(selected) ss
0638            << head
0639            << u_seqhis
0640            << " : "
0641            << std::setw(7) << qab.a.count
0642            << std::setw(7) << qab.b.count
0643            << " : "
0644            << std::fixed << std::setw(10) << std::setprecision(4) << c2
0645            << " : "
0646            << ( c2_inc ? "Y" : "N" )
0647            << " : "
0648            << std::setw(7) << qab.a.index
0649            << std::setw(7) << qab.b.index
0650            << " : "
0651            << ( a_zero ? "AZERO " : "" )
0652            << ( b_zero ? "BZERO " : "" )
0653            << ( deviant ? pdev : "" )
0654            << ( c2_inc ? " " : "C2EXC " )   // SUPPRESS C2INC AS ITS NORMAL
0655            << tail
0656            << "\n"
0657            ;
0658     }
0659     ss
0660         << ( isHTML() ? itab : "" )
0661         << "]sseq_index_ab::desc\n"
0662         << ( isHTML() ? itab : "" )
0663         << ( isHTML() ? "</pre>\n" : "" )
0664         ;
0665 
0666 
0667     std::string str = ss.str();
0668     return str ;
0669 }
0670 
0671 
0672 inline std::string sseq_index_ab::desc() const
0673 {
0674     std::stringstream ss ;
0675     bool html = isHTML();
0676     ss
0677         << ( html ? "" : "AB" )
0678         << "\n"
0679         << desc("BRIEF")
0680         << "\n"
0681         << ( html ? "" : "AB" )
0682         << "\n"
0683         << desc("AZERO")
0684         << "\n"
0685         << ( html ? "" : "AB" )
0686         << "\n"
0687         << desc("BZERO")
0688         << "\n"
0689         << ( html ? "" : "AB" )
0690         << "\n"
0691         << desc("DEVIANT")
0692         << "\n"
0693         ;
0694 
0695     std::string str = ss.str();
0696     return str ;
0697 }
0698 
0699 
0700