File indexing completed on 2025-12-16 10:35:00
0001
0002
0003
0004
0005
0006 #ifndef YODA_READERUTILS_H
0007 #define YODA_READERUTILS_H
0008
0009 #include "YODA/AnalysisObject.h"
0010 #include "YODA/Counter.h"
0011 #include "YODA/Estimate0D.h"
0012 #include "YODA/Scatter.h"
0013 #include "YODA/Histo.h"
0014 #include "YODA/Profile.h"
0015 #include "YODA/BinnedEstimate.h"
0016
0017 #ifdef HAVE_HDF5
0018 #include <YODA/Config/BuildConfig.h>
0019 #include "YODA/Utils/H5Utils.h"
0020 #ifdef WITH_HIGHFIVE
0021 #include <YODA/highfive/H5File.hpp>
0022 #else
0023 #include <highfive/H5File.hpp>
0024 #define YODA_H5 HighFive
0025 #endif
0026 #endif
0027
0028 #include <iostream>
0029 #include <locale>
0030 #include <cstring>
0031 #include <regex>
0032 #include <cmath>
0033
0034 #ifdef WITH_OSX
0035 #include <xlocale.h>
0036 #endif
0037
0038 using std::map;
0039 using std::string;
0040 using std::vector;
0041
0042 namespace YODA {
0043
0044
0045 namespace {
0046
0047 static const std::regex regex_string_pat("([\"\'])(?:(?=(\\\\?))\\2.)*?\\1");
0048
0049 }
0050
0051
0052 class AOReaderBase {
0053
0054
0055 class aistringstream {
0056 public:
0057
0058 aistringstream(const char* line=0) {
0059 reset(line);
0060 _set_locale();
0061 }
0062
0063 aistringstream(const string& line) {
0064 reset(line);
0065 _set_locale();
0066 }
0067 ~aistringstream() {
0068 _reset_locale();
0069 }
0070
0071 bool peek(const std::string& s) const {
0072 return s == std::string(_next, s.size());
0073 }
0074
0075
0076 void reset(const char* line=0) {
0077 _next = const_cast<char*>(line);
0078 _new_next = _next;
0079 _error = false;
0080 }
0081
0082 void reset(const string& line) { reset(line.c_str()); }
0083
0084
0085 template<class T>
0086 aistringstream& operator >> (T& value) {
0087 _get(value);
0088 if (_new_next == _next) _error = true;
0089 _next = _new_next;
0090 return *this;
0091 }
0092
0093
0094 operator bool() const { return !_error; }
0095
0096
0097 private:
0098
0099
0100 void _set_locale() {
0101 _locale_set = newlocale(LC_NUMERIC_MASK, "C", NULL);
0102 _locale_prev = uselocale(_locale_set);
0103 if (!_locale_prev) {
0104 throw ReadError(std::string("Error setting locale: ") + strerror(errno));
0105 }
0106 }
0107 void _reset_locale() {
0108 if (!uselocale(_locale_prev)) {
0109 throw ReadError(std::string("Error setting locale: ") + strerror(errno));
0110 }
0111 freelocale(_locale_set);
0112 }
0113
0114 void _get(double& x) { x = std::strtod(_next, &_new_next); }
0115 void _get(float& x) { x = std::strtof(_next, &_new_next); }
0116 void _get(int& i) { i = std::strtol(_next, &_new_next, 10); }
0117 void _get(long& i) { i = std::strtol(_next, &_new_next, 10); }
0118 void _get(unsigned int& i) { i = std::strtoul(_next, &_new_next, 10); }
0119 void _get(long unsigned int& i) { i = std::strtoul(_next, &_new_next, 10); }
0120 void _get(string& x) {
0121 while (std::isspace(*_next) && _next[0] != '\0') _next += 1;
0122 _new_next = _next;
0123 while (!std::isspace(*_new_next) && _new_next[0] != '\0') _new_next += 1;
0124 x = string(_next, _new_next-_next);
0125 }
0126
0127 locale_t _locale_set, _locale_prev;
0128 char *_next, *_new_next;
0129 bool _error;
0130 };
0131
0132
0133 public:
0134
0135
0136 AOReaderBase() { }
0137
0138
0139 virtual ~AOReaderBase() { }
0140
0141 virtual void parse(const string& line) = 0;
0142
0143 virtual AnalysisObject* assemble(const string& path = "") = 0;
0144
0145 #ifdef HAVE_HDF5
0146
0147 virtual AnalysisObject* mkFromH5(H5FileManager&) = 0;
0148
0149 virtual void skip(H5FileManager& h5file) {
0150 h5file.skipCommon();
0151 }
0152
0153 #endif
0154
0155 template<typename T>
0156 void extractVector(const std::string& line, std::vector<T>& vec) {
0157 if constexpr (std::is_same<T, std::string>::value) {
0158 string::const_iterator initpos( line.cbegin() );
0159 const string::const_iterator finpos( line.cend() );
0160 std::smatch m;
0161 while ( std::regex_search(initpos, finpos, m, regex_string_pat) ) {
0162 string label;
0163 std::stringstream ss(m[0].str());
0164 ss >> std::quoted(label);
0165 vec.push_back(label);
0166 initpos = m.suffix().first;
0167 }
0168 }
0169 else {
0170 std::string content = line.substr(line.find(": [")+3);
0171 content.pop_back();
0172 for (const std::string& item : Utils::split(content, ",")) {
0173 aiss.reset(item);
0174 T tmp;
0175 aiss >> tmp;
0176 vec.push_back(std::move(tmp));
0177 }
0178 }
0179 }
0180
0181 protected:
0182
0183 aistringstream aiss;
0184
0185 };
0186
0187
0188
0189
0190 template<class T>
0191 class AOReader;
0192
0193 template<>
0194 class AOReader<Counter> : public AOReaderBase {
0195
0196 Dbn0D dbn;
0197
0198 public:
0199
0200 void parse(const string& line) {
0201 aiss.reset(line);
0202 double sumw(0), sumw2(0), n(0);
0203 aiss >> sumw >> sumw2 >> n;
0204 dbn = Dbn0D(n, sumw, sumw2);
0205 }
0206
0207 AnalysisObject* assemble(const string& path = "") {
0208 auto* ao = new Counter(path);
0209 ao->setDbn(dbn);
0210 dbn = Dbn0D();
0211 return ao;
0212 }
0213
0214 #ifdef HAVE_HDF5
0215
0216 AnalysisObject* mkFromH5(H5FileManager& h5file) {
0217 vector<string> annos = h5file.loadAnnotations();
0218 auto* ao = new Counter(h5file.path(), annos.back());
0219 annos.pop_back();
0220 ao->deserializeMeta(annos);
0221 ao->deserializeContent(h5file.loadContent());
0222 return ao;
0223 };
0224
0225 #endif
0226 };
0227
0228
0229 template<>
0230 class AOReader<Estimate0D> : public AOReaderBase {
0231
0232 Estimate0D est;
0233 vector<string> sources;
0234
0235 void readErrors(std::map<string,std::pair<double,double>>& errors) {
0236 string eDn, eUp;
0237 for (size_t i = 0; i < sources.size(); ++i) {
0238 aiss >> eDn >> eUp;
0239 if (eDn != "---" && eUp != "---") {
0240 errors[sources[i]] = { Utils::toDbl(eDn), Utils::toDbl(eUp) };
0241 }
0242 }
0243 }
0244
0245 public:
0246
0247 void parse(const string& line) {
0248 if (!line.rfind("ErrorLabels: ", 0)) {
0249 extractVector<std::string>(line, sources);
0250 return;
0251 }
0252
0253 aiss.reset(line);
0254 double val(0);
0255 aiss >> val;
0256 std::map<string,std::pair<double,double>> errors;
0257 readErrors(errors);
0258 est = Estimate0D(val, errors);
0259 }
0260
0261 AnalysisObject* assemble(const string& path = "") {
0262
0263 auto* ao = new Estimate0D(est, path);
0264 est = Estimate0D();
0265 sources.clear();
0266 return ao;
0267 }
0268
0269 #ifdef HAVE_HDF5
0270 AnalysisObject* mkFromH5(H5FileManager& h5file) {
0271 vector<string> annos = h5file.loadAnnotations();
0272 auto* ao = new Estimate0D(h5file.path(), annos.back());
0273 annos.pop_back();
0274 ao->deserializeMeta(annos);
0275 ao->deserializeContent(h5file.loadContent());
0276 ao->deserializeSources(h5file.loadSources());
0277 return ao;
0278 }
0279
0280 void skip(H5FileManager& h5file) {
0281 h5file.skipCommon();
0282 h5file.skipSources();
0283 }
0284
0285 #endif
0286 };
0287
0288
0289 template <size_t N>
0290 class AOReader<ScatterND<N>> : public AOReaderBase {
0291
0292 vector<PointND<N>> points;
0293
0294 template<size_t I>
0295 void readCoords(vector<double>& vals, vector<double>& errm, vector<double>& errp) {
0296 if constexpr(I < N) {
0297 double v(0), em(0), ep(0);
0298 aiss >> v >> em >> ep;
0299 vals[I] = v;
0300 errm[I] = fabs(em);
0301 errp[I] = fabs(ep);
0302 readCoords<I+1>(vals, errm, errp);
0303 }
0304 }
0305
0306 public:
0307
0308 void parse(const string& line) {
0309 aiss.reset(line);
0310 vector<double> vals(N), errm(N), errp(N);
0311 readCoords<0>(vals, errm, errp);
0312 points.push_back(PointND<N>(vals, errm, errp));
0313 }
0314
0315 AnalysisObject* assemble(const string& path = "") {
0316 auto* ao = new ScatterND<N>();
0317 ao->setPath(path);
0318 ao->addPoints(points);
0319 points.clear();
0320 return ao;
0321 }
0322
0323 #ifdef HAVE_HDF5
0324
0325 AnalysisObject* mkFromH5(H5FileManager& h5file) {
0326 vector<string> annos = h5file.loadAnnotations();
0327 auto* ao = new ScatterND<N>(h5file.path(), annos.back());
0328 annos.pop_back();
0329 ao->deserializeMeta(annos);
0330 ao->deserializeContent(h5file.loadContent());
0331 return ao;
0332 }
0333
0334 #endif
0335 };
0336
0337
0338 template <size_t DbnN, typename... AxisT>
0339 class AOReader<BinnedDbn<DbnN, AxisT...>> : public AOReaderBase {
0340
0341 using BaseT = BinnedDbn<DbnN, AxisT...>;
0342
0343 template <size_t I>
0344 using is_CAxis = typename std::is_floating_point<typename std::tuple_element_t<I, std::tuple<AxisT...>>>;
0345
0346 std::tuple<vector<AxisT> ...> edges;
0347 Dbn<DbnN> yoda1Overflow;
0348 vector<Dbn<DbnN>> dbns;
0349 vector<size_t> maskedBins;
0350 std::array<double,DbnN*(DbnN-1)/2> crossTerms;
0351 bool isYODA1 = false;
0352 size_t axisCheck = 0;
0353
0354
0355 template<size_t I>
0356 void readEdges() {
0357 if constexpr(I < sizeof...(AxisT)) {
0358 using EdgeT = std::tuple_element_t<I, std::tuple<AxisT...>>;
0359 if constexpr (is_CAxis<I>::value) {
0360 EdgeT lo, hi;
0361 aiss >> lo >> hi;
0362 auto& curr_edges = std::get<I>(edges);
0363 if (!std::isinf(lo)) {
0364 if (curr_edges.empty()) curr_edges.push_back(lo);
0365 }
0366 if (!std::isinf(hi)) {
0367 if (curr_edges.size() && curr_edges[ curr_edges.size() - 1 ] != hi) {
0368 curr_edges.push_back(hi);
0369 }
0370 }
0371 }
0372 else {
0373 throw BinningError("Discrete axes are not supported in this YODA1-style legacy format.");
0374 }
0375 readEdges<I+1>();
0376 }
0377 }
0378
0379 template<size_t I>
0380 void readEdges(const std::string& line) {
0381 if constexpr(I < sizeof...(AxisT)) {
0382 if (I == axisCheck) {
0383 using EdgeT = std::tuple_element_t<I, std::tuple<AxisT...>>;
0384 auto& curr_edges = std::get<I>(edges);
0385 extractVector<EdgeT>(line, curr_edges);
0386 }
0387 readEdges<I+1>(line);
0388 }
0389 }
0390
0391 #ifdef HAVE_HDF5
0392
0393 template <size_t I>
0394 void loadEdges(H5FileManager& h5file) {
0395 if constexpr(I < sizeof...(AxisT)) {
0396 using EdgeT = std::tuple_element_t<I, std::tuple<AxisT...>>;
0397 auto& curr_edges = std::get<I>(edges);
0398 curr_edges = h5file.loadEdges<EdgeT>();
0399 loadEdges<I+1>(h5file);
0400 }
0401 }
0402
0403 template <size_t I>
0404 void skipEdges(H5FileManager& h5file) {
0405 if constexpr(I < sizeof...(AxisT)) {
0406 using EdgeT = std::tuple_element_t<I, std::tuple<AxisT...>>;
0407 h5file.skipEdges<EdgeT>();
0408 skipEdges<I+1>(h5file);
0409 }
0410 }
0411
0412 #endif
0413
0414 template<size_t I>
0415 void readDbn(std::array<double,DbnN+1>& sumW, std::array<double,DbnN+1>& sumW2) {
0416 if constexpr(I <= DbnN) {
0417 double w(0), w2(0);
0418 aiss >> w >> w2;
0419 sumW[I] = w;
0420 sumW2[I] = w2;
0421 readDbn<I+1>(sumW, sumW2);
0422 }
0423 }
0424
0425 template <class tupleT, size_t... Is>
0426 BaseT* make_from_tuple(tupleT&& tuple, std::index_sequence<Is...> ) {
0427 BaseT* rtn = new BaseT{std::get<Is>(std::forward<tupleT>(tuple))...};
0428 rtn->maskBins(maskedBins);
0429 return rtn;
0430 }
0431
0432 template <class tupleT>
0433 BaseT* make_from_tuple(tupleT&& tuple) {
0434 return make_from_tuple(std::forward<tupleT>(tuple),
0435 std::make_index_sequence<sizeof...(AxisT)+1>{});
0436 }
0437
0438 template<size_t I>
0439 void clearEdges() {
0440 if constexpr(I < sizeof...(AxisT)) {
0441 std::get<I>(edges).clear();
0442 clearEdges<I+1>();
0443 }
0444 }
0445
0446 public:
0447
0448 void parse(const string& line) {
0449 if (line.find("Total") != string::npos) {
0450 isYODA1 = true;
0451 return;
0452 }
0453 if (!line.rfind("Edges(A", 0)) {
0454 readEdges<0>(line);
0455 ++axisCheck;
0456 return;
0457 }
0458 if (!line.rfind("MaskedBins: ", 0)) {
0459 extractVector<size_t>(line, maskedBins);
0460 return;
0461 }
0462 aiss.reset(line);
0463 if (line.find("Underflow") != string::npos || line.find("Overflow") != string::npos) {
0464
0465 if constexpr (sizeof...(AxisT) == 1) {
0466 string tmp1, tmp2;
0467 aiss >> tmp1 >> tmp2;
0468 }
0469 }
0470 else if (isYODA1) readEdges<0>();
0471 std::array<double,DbnN+1> sumW, sumW2;
0472 readDbn<0>(sumW, sumW2);
0473 for (size_t i = 0; i < crossTerms.size(); ++i) {
0474 double tmp(0.);
0475 aiss >> tmp;
0476 crossTerms.at(i) = tmp;
0477 }
0478 double numEntries(0);
0479 aiss >> numEntries;
0480 if (line.find("Overflow") != string::npos) {
0481 if constexpr (sizeof...(AxisT) == 1) {
0482 if constexpr (DbnN < 2)
0483 yoda1Overflow = Dbn<DbnN>(numEntries, sumW, sumW2);
0484 else
0485 yoda1Overflow = Dbn<DbnN>(numEntries, sumW, sumW2, crossTerms);
0486 }
0487 }
0488 else {
0489 if constexpr (DbnN < 2) {
0490 dbns.emplace_back(numEntries, sumW, sumW2);
0491 }
0492 else {
0493 dbns.emplace_back(numEntries, sumW, sumW2, crossTerms);
0494 }
0495 }
0496 }
0497
0498 AnalysisObject* assemble(const string& path = "") {
0499
0500 auto args = std::tuple_cat(edges, std::make_tuple(path));
0501 BaseT* ao = make_from_tuple(std::move(args));
0502
0503 size_t global_index = 0;
0504 if constexpr (sizeof...(AxisT) == 2) {
0505 if (isYODA1) {
0506 for (size_t ix = 1; ix < ao->numBinsAt(0)+1; ++ix) {
0507 for (size_t iy = 1; iy < ao->numBinsAt(1)+1; ++iy) {
0508 ao->bin(ix,iy).set(std::move(dbns[global_index++]));
0509 }
0510 }
0511 }
0512 }
0513 if ( !(isYODA1 && sizeof...(AxisT) == 2) ) {
0514 for (auto&& d : dbns) {
0515 ao->bin(global_index++).set(std::move(d));
0516 }
0517 }
0518
0519 if constexpr (sizeof...(AxisT) == 1) {
0520 if (isYODA1) ao->bin(global_index).set(yoda1Overflow);
0521 yoda1Overflow = Dbn<DbnN>();
0522 }
0523
0524 crossTerms.fill(0);
0525 maskedBins.clear();
0526 isYODA1 = false;
0527 clearEdges<0>();
0528 dbns.clear();
0529 axisCheck = 0;
0530 return ao;
0531 }
0532
0533 #ifdef HAVE_HDF5
0534
0535 AnalysisObject* mkFromH5(H5FileManager& h5file) {
0536 loadEdges<0>(h5file);
0537 maskedBins = h5file.loadMasks();
0538 vector<string> annos = h5file.loadAnnotations();
0539 auto args = std::tuple_cat(edges, std::make_tuple(h5file.path(), annos.back()));
0540 BaseT* ao = make_from_tuple(std::move(args));
0541 annos.pop_back();
0542 ao->deserializeMeta(annos);
0543 ao->deserializeContent(h5file.loadContent());
0544 maskedBins.clear();
0545 clearEdges<0>();
0546 return ao;
0547 }
0548
0549 void skip(H5FileManager& h5file) {
0550 skipEdges<0>(h5file);
0551 h5file.skipMasks();
0552 h5file.skipCommon();
0553 }
0554
0555 #endif
0556
0557 };
0558
0559
0560 template <typename... AxisT>
0561 class AOReader<BinnedEstimate<AxisT...>> : public AOReaderBase {
0562
0563 using BaseT = BinnedEstimate<AxisT...>;
0564
0565 std::tuple<vector<AxisT> ...> edges;
0566 vector<Estimate> estimates;
0567 vector<size_t> maskedBins;
0568 vector<string> sources;
0569 size_t axisCheck = 0;
0570
0571
0572 template<size_t I>
0573 void readEdges(const std::string& line) {
0574 if constexpr(I < sizeof...(AxisT)) {
0575 if (I == axisCheck) {
0576 using EdgeT = std::tuple_element_t<I, std::tuple<AxisT...>>;
0577 auto& curr_edges = std::get<I>(edges);
0578 extractVector<EdgeT>(line, curr_edges);
0579 }
0580 readEdges<I+1>(line);
0581 }
0582 }
0583
0584 void readErrors(std::map<string,std::pair<double,double>>& errors) {
0585 string eDn, eUp;
0586 for (const std::string& src : sources) {
0587 aiss >> eDn >> eUp;
0588 if (eDn != "---" && eUp != "---") {
0589 errors[src] = { Utils::toDbl(eDn), Utils::toDbl(eUp) };
0590 }
0591 }
0592 }
0593
0594 #ifdef HAVE_HDF5
0595
0596 template <size_t I>
0597 void loadEdges(H5FileManager& h5file) {
0598 if constexpr(I < sizeof...(AxisT)) {
0599 using EdgeT = std::tuple_element_t<I, std::tuple<AxisT...>>;
0600 auto& curr_edges = std::get<I>(edges);
0601 curr_edges = h5file.loadEdges<EdgeT>();
0602 loadEdges<I+1>(h5file);
0603 }
0604 }
0605
0606 template <size_t I>
0607 void skipEdges(H5FileManager& h5file) {
0608 if constexpr(I < sizeof...(AxisT)) {
0609 using EdgeT = std::tuple_element_t<I, std::tuple<AxisT...>>;
0610 h5file.skipEdges<EdgeT>();
0611 skipEdges<I+1>(h5file);
0612 }
0613 }
0614
0615 #endif
0616
0617 template <class tupleT, size_t... Is>
0618 BaseT* make_from_tuple(tupleT&& tuple, std::index_sequence<Is...> ) {
0619 BaseT* rtn = new BaseT{std::get<Is>(std::forward<tupleT>(tuple))...};
0620 rtn->maskBins(maskedBins);
0621 return rtn;
0622 }
0623
0624 template <class tupleT>
0625 BaseT* make_from_tuple(tupleT&& tuple) {
0626 return make_from_tuple(std::forward<tupleT>(tuple),
0627 std::make_index_sequence<sizeof...(AxisT)+1>{});
0628 }
0629
0630 template<size_t I>
0631 void clearEdges() {
0632 if constexpr(I < sizeof...(AxisT)) {
0633 std::get<I>(edges).clear();
0634 clearEdges<I+1>();
0635 }
0636 }
0637
0638 public:
0639
0640 void parse(const string& line) {
0641 if (!line.rfind("Edges(A", 0)) {
0642 readEdges<0>(line);
0643 ++axisCheck;
0644 return;
0645 }
0646 if (!line.rfind("MaskedBins: ", 0)) {
0647 extractVector<size_t>(line, maskedBins);
0648 return;
0649 }
0650 if (!line.rfind("ErrorLabels: ", 0)) {
0651 extractVector<std::string>(line, sources);
0652 return;
0653 }
0654
0655 aiss.reset(line);
0656 double val(0);
0657 aiss >> val;
0658 std::map<string,std::pair<double,double>> errors;
0659 readErrors(errors);
0660 estimates.emplace_back(val, errors);
0661 }
0662
0663 AnalysisObject* assemble(const string& path = "") {
0664
0665 auto args = std::tuple_cat(edges, std::make_tuple(path));
0666 BaseT* ao = make_from_tuple(std::move(args));
0667
0668 size_t global_index = 0;
0669 for (auto&& e : estimates) {
0670 ao->bin(global_index++) = std::move(e);
0671 }
0672
0673 clearEdges<0>();
0674 sources.clear();
0675 estimates.clear();
0676 maskedBins.clear();
0677 axisCheck = 0;
0678 return ao;
0679 }
0680
0681 #ifdef HAVE_HDF5
0682
0683 AnalysisObject* mkFromH5(H5FileManager& h5file) {
0684 loadEdges<0>(h5file);
0685 maskedBins = h5file.loadMasks();
0686
0687 vector<string> annos = h5file.loadAnnotations();
0688 auto args = std::tuple_cat(edges, std::make_tuple(h5file.path(), annos.back()));
0689 BaseT* ao = make_from_tuple(std::move(args));
0690 annos.pop_back();
0691 ao->deserializeMeta(annos);
0692 ao->deserializeContent(h5file.loadContent());
0693
0694 for (auto& b : ao->bins(true, true)) {
0695 b.deserializeSources(h5file.loadSources());
0696 }
0697
0698 maskedBins.clear();
0699 clearEdges<0>();
0700 return ao;
0701 }
0702
0703 void skip(H5FileManager& h5file) {
0704 skipEdges<0>(h5file);
0705 h5file.skipMasks();
0706 h5file.skipSources();
0707 h5file.skipCommon();
0708 }
0709
0710 #endif
0711
0712 };
0713
0714
0715 }
0716
0717 #endif