Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-04-19 09:13:37

0001 // -*- C++ -*-
0002 //
0003 // This file is part of YODA -- Yet more Objects for Data Analysis
0004 // Copyright (C) 2008-2024 The YODA collaboration (see AUTHORS for details)
0005 //
0006 #ifndef YODA_READERUTILS_H
0007 #define YODA_READERUTILS_H
0008 
0009 #include "YODA/AnalysisObject.h"
0010 #include "YODA/Counter.h"
0011 #include "YODA/Estimate0D.h"
0012 #include "YODA/Scatter.h"
0013 #include "YODA/Histo.h"
0014 #include "YODA/Profile.h"
0015 #include "YODA/BinnedEstimate.h"
0016 
0017 #include <iostream>
0018 #include <locale>
0019 #include <cstring>
0020 #include <regex>
0021 #include <cmath>
0022 
0023 #ifdef WITH_OSX
0024 #include <xlocale.h>
0025 #endif
0026 
0027 using std::string;
0028 using std::vector;
0029 
0030 namespace YODA {
0031 
0032   /// Anonymous namespace to limit visibility
0033   namespace {
0034 
0035     static const std::regex regex_string_pat("([\"\'])(?:(?=(\\\\?))\\2.)*?\\1");
0036 
0037   }
0038 
0039 
0040   class AOReaderBase {
0041 
0042     /// Fast ASCII tokenizer, extended from FastIStringStream by Gavin Salam.
0043     class aistringstream {
0044     public:
0045       // Constructor from char*
0046       aistringstream(const char* line=0) {
0047         reset(line);
0048         _set_locale();
0049       }
0050       // Constructor from std::string
0051       aistringstream(const string& line) {
0052         reset(line);
0053         _set_locale();
0054       }
0055       ~aistringstream() {
0056         _reset_locale();
0057       }
0058 
0059       bool peek(const std::string& s) const {
0060         return s == std::string(_next, s.size());
0061       }
0062 
0063       // Re-init to new line as char*
0064       void reset(const char* line=0) {
0065         _next = const_cast<char*>(line);
0066         _new_next = _next;
0067         _error = false;
0068       }
0069       // Re-init to new line as std::string
0070       void reset(const string& line) { reset(line.c_str()); }
0071 
0072       // Tokenizing stream operator (forwards to specialisations)
0073       template<class T>
0074       aistringstream& operator >> (T& value) {
0075         _get(value);
0076         if (_new_next == _next) _error = true; // handy error condition behaviour!
0077         _next = _new_next;
0078         return *this;
0079       }
0080 
0081       // Allow use of operator>> in a while loop
0082       operator bool() const { return !_error; }
0083 
0084 
0085     private:
0086 
0087       // Changes the thread-local locale to interpret numbers in the "C" locale
0088       void _set_locale() {
0089         _locale_set = newlocale(LC_NUMERIC_MASK, "C", NULL);
0090         _locale_prev = uselocale(_locale_set);
0091         if (!_locale_prev) {
0092           throw ReadError(std::string("Error setting locale: ") + strerror(errno));
0093         }
0094       }
0095       void _reset_locale() {
0096         if (!uselocale(_locale_prev)) {
0097           throw ReadError(std::string("Error setting locale: ") + strerror(errno));
0098         }
0099         freelocale(_locale_set);
0100       }
0101 
0102       void _get(double& x) { x = std::strtod(_next, &_new_next); }
0103       void _get(float& x) { x = std::strtof(_next, &_new_next); }
0104       void _get(int& i) { i = std::strtol(_next, &_new_next, 10); } // force base 10!
0105       void _get(long& i) { i = std::strtol(_next, &_new_next, 10); } // force base 10!
0106       void _get(unsigned int& i) { i = std::strtoul(_next, &_new_next, 10); } // force base 10!
0107       void _get(long unsigned int& i) { i = std::strtoul(_next, &_new_next, 10); } // force base 10!
0108       void _get(string& x) {
0109         while (std::isspace(*_next) && _next[0] != '\0') _next += 1;
0110         _new_next = _next;
0111         while (!std::isspace(*_new_next) && _new_next[0] != '\0') _new_next += 1;
0112         x = string(_next, _new_next-_next);
0113       }
0114 
0115       locale_t _locale_set, _locale_prev;
0116       char *_next, *_new_next;
0117       bool _error;
0118     };
0119 
0120 
0121   public:
0122 
0123     /// Default constructor
0124     AOReaderBase() { }
0125 
0126     /// Default destructor
0127     virtual ~AOReaderBase() { }
0128 
0129     virtual void parse(const string& line) = 0;
0130 
0131     virtual AnalysisObject* assemble(const string& path = "") = 0;
0132 
0133     template<typename T>
0134     void extractVector(const std::string& line, std::vector<T>& vec) {
0135       if constexpr (std::is_same<T, std::string>::value) {
0136         string::const_iterator initpos( line.cbegin() );
0137         const string::const_iterator finpos( line.cend() );
0138         std::smatch m;
0139         while ( std::regex_search(initpos, finpos, m, regex_string_pat) ) {
0140           string label;
0141           std::stringstream ss(m[0].str());
0142           ss >> std::quoted(label); // removes outer quotes and de-escapes inner quotes
0143           vec.push_back(label);
0144           initpos = m.suffix().first;
0145         }
0146       }
0147       else {
0148          std::string content = line.substr(line.find(": [")+3);
0149          content.pop_back(); // remove the "]" at the end
0150          for (const std::string& item : Utils::split(content, ",")) {
0151            aiss.reset(item);
0152            T tmp;
0153            aiss >> tmp;
0154            vec.push_back(std::move(tmp));
0155          }
0156       }
0157     }
0158 
0159   protected:
0160 
0161     aistringstream aiss;
0162 
0163   };
0164 
0165 
0166 
0167 
0168   template<class T>
0169   class AOReader;
0170 
0171   template<>
0172   class AOReader<Counter> : public AOReaderBase {
0173 
0174     Dbn0D dbn;
0175 
0176     public:
0177 
0178     void parse(const string& line) {
0179       aiss.reset(line);
0180       double sumw(0), sumw2(0), n(0);
0181       aiss >> sumw >> sumw2 >> n;
0182       dbn = Dbn0D(n, sumw, sumw2);
0183     }
0184 
0185     AnalysisObject* assemble(const string& path = "") {
0186       auto* ao = new Counter(path);
0187       ao->setDbn(dbn);
0188       dbn = Dbn0D();
0189       return ao;
0190     }
0191   };
0192 
0193 
0194   template<>
0195   class AOReader<Estimate0D> : public AOReaderBase {
0196 
0197     Estimate0D est;
0198     vector<string> sources;
0199 
0200     void readErrors(std::map<string,std::pair<double,double>>& errors) {
0201       string eDn, eUp;
0202       for (size_t i = 0; i < sources.size(); ++i) {
0203         aiss >> eDn >> eUp;
0204         if (eDn != "---" && eUp != "---") {
0205           errors[sources[i]] = { Utils::toDbl(eDn), Utils::toDbl(eUp) };
0206         }
0207       }
0208     }
0209 
0210     public:
0211 
0212     void parse(const string& line) {
0213       if (!line.rfind("ErrorLabels: ", 0)) { // parse error labels
0214         extractVector<std::string>(line, sources);
0215         return;
0216       }
0217       // parse content
0218       aiss.reset(line);
0219       double val(0);
0220       aiss >> val;
0221       std::map<string,std::pair<double,double>> errors;
0222       readErrors(errors);
0223       est = Estimate0D(val, errors);
0224     }
0225 
0226     AnalysisObject* assemble(const string& path = "") {
0227 
0228       auto* ao = new Estimate0D(est, path);
0229       est = Estimate0D();
0230       sources.clear();
0231       return ao;
0232     }
0233 
0234   };
0235 
0236 
0237   template <size_t N>
0238   class AOReader<ScatterND<N>> : public AOReaderBase {
0239 
0240     vector<PointND<N>> points;
0241 
0242     template<size_t I>
0243     void readCoords(vector<double>& vals, vector<double>& errm, vector<double>& errp) {
0244       if constexpr(I < N) {
0245         double v(0), em(0), ep(0);
0246         aiss >> v >> em >> ep;
0247         vals[I] = v;
0248         errm[I] = fabs(em);
0249         errp[I] = fabs(ep);
0250         readCoords<I+1>(vals, errm, errp);
0251       }
0252     }
0253 
0254     public:
0255 
0256     void parse(const string& line) {
0257       aiss.reset(line);
0258       vector<double> vals(N), errm(N), errp(N);
0259       readCoords<0>(vals, errm, errp);
0260       points.push_back(PointND<N>(vals, errm, errp));
0261     }
0262 
0263     AnalysisObject* assemble(const string& path = "") {
0264       auto* ao = new ScatterND<N>();
0265       ao->setPath(path);
0266       ao->addPoints(points);
0267       points.clear();
0268       return ao;
0269     }
0270   };
0271 
0272 
0273   template <size_t DbnN, typename... AxisT>
0274   class AOReader<BinnedDbn<DbnN, AxisT...>> : public AOReaderBase {
0275 
0276     using BaseT = BinnedDbn<DbnN, AxisT...>;
0277 
0278     template <size_t I>
0279     using is_CAxis = typename std::is_floating_point<typename std::tuple_element_t<I, std::tuple<AxisT...>>>;
0280 
0281     std::tuple<vector<AxisT> ...> edges;
0282     Dbn<DbnN> yoda1Overflow;
0283     vector<Dbn<DbnN>> dbns;
0284     vector<size_t> maskedBins;
0285     std::array<double,DbnN*(DbnN-1)/2> crossTerms;
0286     bool isYODA1 = false;
0287     size_t axisCheck = 0;
0288 
0289 
0290     template<size_t I>
0291     void readEdges() { // YODA1 version for backwards compatibility
0292       if constexpr(I < sizeof...(AxisT)) {
0293         using EdgeT = std::tuple_element_t<I, std::tuple<AxisT...>>;
0294         if constexpr (is_CAxis<I>::value) { // continuous case
0295           EdgeT lo, hi;
0296           aiss >> lo >> hi;
0297           auto& curr_edges = std::get<I>(edges);
0298           if (!std::isinf(lo)) {
0299             if (curr_edges.empty())  curr_edges.push_back(lo);
0300           }
0301           if (!std::isinf(hi)) {
0302             if (curr_edges.size() && curr_edges[ curr_edges.size() - 1 ] != hi) {
0303               curr_edges.push_back(hi);
0304             }
0305           }
0306         }
0307         else { // discrete case
0308           throw BinningError("Discrete axes are not supported in this YODA1-style legacy format.");
0309         }
0310         readEdges<I+1>();
0311       }
0312     }
0313 
0314     template<size_t I>
0315     void readEdges(const std::string& line) { // YODA2 version
0316       if constexpr(I < sizeof...(AxisT)) {
0317         if (I == axisCheck) {
0318           using EdgeT = std::tuple_element_t<I, std::tuple<AxisT...>>;
0319           auto& curr_edges = std::get<I>(edges);
0320           extractVector<EdgeT>(line, curr_edges);
0321         }
0322         readEdges<I+1>(line);
0323       }
0324     }
0325 
0326     template<size_t I>
0327     void readDbn(std::array<double,DbnN+1>& sumW, std::array<double,DbnN+1>& sumW2) {
0328       if constexpr(I <= DbnN) {
0329         double w(0), w2(0);
0330         aiss >> w >> w2;
0331         sumW[I] = w;
0332         sumW2[I] = w2;
0333         readDbn<I+1>(sumW, sumW2);
0334       }
0335     }
0336 
0337     template <class tupleT, size_t... Is>
0338     BaseT* make_from_tuple(tupleT&& tuple, std::index_sequence<Is...> ) {
0339       BaseT* rtn = new BaseT{std::get<Is>(std::forward<tupleT>(tuple))...};
0340       rtn->maskBins(maskedBins);
0341       return rtn;
0342     }
0343 
0344     template <class tupleT>
0345     BaseT* make_from_tuple(tupleT&& tuple) {
0346       return make_from_tuple(std::forward<tupleT>(tuple),
0347                              std::make_index_sequence<sizeof...(AxisT)+1>{});
0348     }
0349 
0350     template<size_t I>
0351     void clearEdges() {
0352       if constexpr(I < sizeof...(AxisT)) {
0353         std::get<I>(edges).clear();
0354         clearEdges<I+1>();
0355       }
0356     }
0357 
0358     public:
0359 
0360     void parse(const string& line) {
0361       if (line.find("Total") != string::npos) {
0362         isYODA1 = true;
0363         return; // YODA1 backwards compatibility
0364       }
0365       if (!line.rfind("Edges(A", 0)) { // parse binning
0366         readEdges<0>(line);
0367         ++axisCheck;
0368         return;
0369       }
0370       if (!line.rfind("MaskedBins: ", 0)) { // parse indices of masked bins
0371         extractVector<size_t>(line, maskedBins);
0372         return;
0373       }
0374       aiss.reset(line);
0375       if (line.find("Underflow") != string::npos || line.find("Overflow") != string::npos) {
0376         // This must be the YODA1-style format ...
0377         if constexpr (sizeof...(AxisT) == 1) {
0378           string tmp1, tmp2;
0379           aiss >> tmp1 >> tmp2; // not needed
0380         }
0381       }
0382       else if (isYODA1)  readEdges<0>();
0383       std::array<double,DbnN+1> sumW, sumW2;
0384       readDbn<0>(sumW, sumW2);
0385       for (size_t i = 0; i < crossTerms.size(); ++i) {
0386         double tmp(0.);
0387         aiss >> tmp;
0388         crossTerms.at(i) = tmp;
0389       }
0390       double numEntries(0);
0391       aiss >> numEntries;
0392       if (line.find("Overflow") != string::npos) {
0393         if constexpr (sizeof...(AxisT) == 1) {
0394           if constexpr (DbnN < 2)
0395             yoda1Overflow = Dbn<DbnN>(numEntries, sumW, sumW2);
0396           else
0397             yoda1Overflow = Dbn<DbnN>(numEntries, sumW, sumW2, crossTerms);
0398         }
0399       }
0400       else {
0401         if constexpr (DbnN < 2) {
0402           dbns.emplace_back(numEntries, sumW, sumW2);
0403         }
0404         else {
0405           dbns.emplace_back(numEntries, sumW, sumW2, crossTerms);
0406         }
0407       }
0408     }
0409 
0410     AnalysisObject* assemble(const string& path = "") {
0411 
0412       auto args = std::tuple_cat(edges, std::make_tuple(path));
0413       BaseT* ao = make_from_tuple(std::move(args));
0414 
0415       size_t global_index = 0;
0416       if constexpr (sizeof...(AxisT) == 2) {
0417         if (isYODA1) { // 2D objects had no under-/overflows in Y1
0418           for (size_t ix = 1; ix < ao->numBinsAt(0)+1; ++ix) { //< visible bins only
0419             for (size_t iy = 1; iy < ao->numBinsAt(1)+1; ++iy) { //< visible bins only
0420               ao->bin(ix,iy).set(std::move(dbns[global_index++]));
0421             }
0422           }
0423         }
0424       }
0425       if ( !(isYODA1 && sizeof...(AxisT) == 2) ) { //< still works for Y1-style 1D
0426         for (auto&& d : dbns) {
0427           ao->bin(global_index++).set(std::move(d));
0428         }
0429       }
0430 
0431       if constexpr (sizeof...(AxisT) == 1) { // YODA1-style overflows
0432         if (isYODA1)  ao->bin(global_index).set(yoda1Overflow);
0433         yoda1Overflow = Dbn<DbnN>();
0434       }
0435 
0436       crossTerms.fill(0);
0437       maskedBins.clear();
0438       isYODA1 = false;
0439       clearEdges<0>();
0440       dbns.clear();
0441       axisCheck = 0;
0442       return ao;
0443     }
0444   };
0445 
0446 
0447   template <typename... AxisT>
0448   class AOReader<BinnedEstimate<AxisT...>> : public AOReaderBase {
0449 
0450     using BaseT = BinnedEstimate<AxisT...>;
0451 
0452     std::tuple<vector<AxisT> ...> edges;
0453     vector<Estimate> estimates;
0454     vector<size_t> maskedBins;
0455     vector<string> sources;
0456     size_t axisCheck = 0;
0457 
0458 
0459     template<size_t I>
0460     void readEdges(const std::string& line) {
0461       if constexpr(I < sizeof...(AxisT)) {
0462         if (I == axisCheck) {
0463           using EdgeT = std::tuple_element_t<I, std::tuple<AxisT...>>;
0464           auto& curr_edges = std::get<I>(edges);
0465           extractVector<EdgeT>(line, curr_edges);
0466         }
0467         readEdges<I+1>(line);
0468       }
0469     }
0470 
0471     void readErrors(std::map<string,std::pair<double,double>>& errors) {
0472       string eDn, eUp;
0473       for (const std::string& src : sources) {
0474         aiss >> eDn >> eUp;
0475         if (eDn != "---" && eUp != "---") {
0476           errors[src] = { Utils::toDbl(eDn), Utils::toDbl(eUp) };
0477         }
0478       }
0479     }
0480 
0481     template <class tupleT, size_t... Is>
0482     BaseT* make_from_tuple(tupleT&& tuple, std::index_sequence<Is...> ) {
0483       BaseT* rtn = new BaseT{std::get<Is>(std::forward<tupleT>(tuple))...};
0484       rtn->maskBins(maskedBins);
0485       return rtn;
0486     }
0487 
0488     template <class tupleT>
0489     BaseT* make_from_tuple(tupleT&& tuple) {
0490       return make_from_tuple(std::forward<tupleT>(tuple),
0491                              std::make_index_sequence<sizeof...(AxisT)+1>{});
0492     }
0493 
0494     template<size_t I>
0495     void clearEdges() {
0496       if constexpr(I < sizeof...(AxisT)) {
0497         std::get<I>(edges).clear();
0498         clearEdges<I+1>();
0499       }
0500     }
0501 
0502     public:
0503 
0504     void parse(const string& line) {
0505       if (!line.rfind("Edges(A", 0)) { // parse binning
0506         readEdges<0>(line);
0507         ++axisCheck;
0508         return;
0509       }
0510       if (!line.rfind("MaskedBins: ", 0)) { // parse indices of masked bins
0511         extractVector<size_t>(line, maskedBins);
0512         return;
0513       }
0514       if (!line.rfind("ErrorLabels: ", 0)) { // parse error labels
0515         extractVector<std::string>(line, sources);
0516         return;
0517       }
0518       // parse bin content
0519       aiss.reset(line);
0520       double val(0);
0521       aiss >> val;
0522       std::map<string,std::pair<double,double>> errors;
0523       readErrors(errors);
0524       estimates.emplace_back(val, errors);
0525     }
0526 
0527     AnalysisObject* assemble(const string& path = "") {
0528 
0529       auto args = std::tuple_cat(edges, std::make_tuple(path));
0530       BaseT* ao = make_from_tuple(std::move(args));
0531 
0532       size_t global_index = 0;
0533       for (auto&& e : estimates) {
0534         ao->bin(global_index++) = std::move(e);
0535       }
0536 
0537       clearEdges<0>();
0538       sources.clear();
0539       estimates.clear();
0540       maskedBins.clear();
0541       axisCheck = 0;
0542       return ao;
0543     }
0544   };
0545 
0546 
0547 }
0548 
0549 #endif