File indexing completed on 2025-01-18 10:11:04
0001 #ifndef TMVA_CHUNKLOADER
0002 #define TMVA_CHUNKLOADER
0003
0004 #include <iostream>
0005 #include <vector>
0006
0007 #include "TMVA/RTensor.hxx"
0008 #include "ROOT/RDataFrame.hxx"
0009 #include "ROOT/RVec.hxx"
0010
0011 #include "ROOT/RLogger.hxx"
0012
0013 namespace TMVA {
0014 namespace Experimental {
0015 namespace Internal {
0016
0017
0018 template <typename First, typename... Rest>
0019 class RChunkLoaderFunctor {
0020
0021 private:
0022 std::size_t fOffset = 0;
0023 std::size_t fVecSizeIdx = 0;
0024 std::vector<std::size_t> fMaxVecSizes;
0025
0026 float fVecPadding;
0027
0028 TMVA::Experimental::RTensor<float> &fChunkTensor;
0029
0030
0031
0032
0033 template <typename First_T>
0034 void AssignToTensor(First_T first)
0035 {
0036 fChunkTensor.GetData()[fOffset++] = first;
0037 }
0038
0039
0040
0041
0042 template <typename VecType>
0043 void AssignToTensor(const ROOT::RVec<VecType> &first)
0044 {
0045 AssignVector(first);
0046 }
0047
0048
0049
0050
0051
0052
0053 template <typename First_T, typename... Rest_T>
0054 void AssignToTensor(First_T first, Rest_T... rest)
0055 {
0056 fChunkTensor.GetData()[fOffset++] = first;
0057
0058 AssignToTensor(std::forward<Rest_T>(rest)...);
0059 }
0060
0061
0062
0063
0064
0065
0066 template <typename VecType, typename... Rest_T>
0067 void AssignToTensor(const ROOT::RVec<VecType> &first, Rest_T... rest)
0068 {
0069 AssignVector(first);
0070
0071 AssignToTensor(std::forward<Rest_T>(rest)...);
0072 }
0073
0074
0075
0076
0077
0078
0079
0080 template <typename VecType>
0081 void AssignVector(const ROOT::RVec<VecType> &vec)
0082 {
0083 std::size_t max_vec_size = fMaxVecSizes[fVecSizeIdx++];
0084 std::size_t vec_size = vec.size();
0085
0086 for (std::size_t i = 0; i < max_vec_size; i++) {
0087 if (i < vec_size) {
0088 fChunkTensor.GetData()[fOffset++] = vec[i];
0089 } else {
0090 fChunkTensor.GetData()[fOffset++] = fVecPadding;
0091 }
0092 }
0093 }
0094
0095 public:
0096 RChunkLoaderFunctor(TMVA::Experimental::RTensor<float> &chunkTensor,
0097 const std::vector<std::size_t> &maxVecSizes = std::vector<std::size_t>(),
0098 const float vecPadding = 0.0)
0099 : fChunkTensor(chunkTensor), fMaxVecSizes(maxVecSizes), fVecPadding(vecPadding)
0100 {
0101 }
0102
0103
0104
0105
0106 void operator()(First first, Rest... rest)
0107 {
0108 fVecSizeIdx = 0;
0109 AssignToTensor(std::forward<First>(first), std::forward<Rest>(rest)...);
0110 }
0111 };
0112
0113 template <typename... Args>
0114 class RChunkLoader {
0115
0116 private:
0117 std::string fTreeName;
0118 std::string fFileName;
0119 std::size_t fChunkSize;
0120 std::size_t fNumColumns;
0121
0122 std::vector<std::string> fCols;
0123 std::string fFilters;
0124
0125 std::vector<std::size_t> fVecSizes;
0126 std::size_t fVecPadding;
0127
0128 public:
0129
0130
0131
0132
0133
0134
0135
0136
0137 RChunkLoader(const std::string &treeName, const std::string &fileName, const std::size_t chunkSize,
0138 const std::vector<std::string> &cols, const std::string &filters = "",
0139 const std::vector<std::size_t> &vecSizes = {}, const float vecPadding = 0.0)
0140 : fTreeName(treeName),
0141 fFileName(fileName),
0142 fChunkSize(chunkSize),
0143 fCols(cols),
0144 fFilters(filters),
0145 fVecSizes(vecSizes),
0146 fVecPadding(vecPadding),
0147 fNumColumns(cols.size())
0148 {
0149 }
0150
0151
0152
0153
0154
0155 std::pair<std::size_t, std::size_t>
0156 LoadChunk(TMVA::Experimental::RTensor<float> &chunkTensor, const std::size_t currentRow)
0157 {
0158 RChunkLoaderFunctor<Args...> func(chunkTensor, fVecSizes, fVecPadding);
0159
0160
0161
0162 long long start_l = currentRow;
0163 ROOT::RDF::Experimental::RDatasetSpec x_spec =
0164 ROOT::RDF::Experimental::RDatasetSpec()
0165 .AddSample({"", fTreeName, fFileName})
0166 .WithGlobalRange({start_l, std::numeric_limits<Long64_t>::max()});
0167
0168 ROOT::RDataFrame x_rdf(x_spec);
0169
0170
0171 if (fFilters.size() > 0) {
0172 return loadFiltered(x_rdf, func);
0173 }
0174
0175
0176 return loadNonFiltered(x_rdf, func);
0177 }
0178
0179 private:
0180
0181
0182
0183
0184 std::pair<std::size_t, std::size_t> loadFiltered(ROOT::RDataFrame &x_rdf, RChunkLoaderFunctor<Args...> &func)
0185 {
0186
0187 auto x_filter = x_rdf.Filter(fFilters, "RBatchGenerator_Filter");
0188
0189
0190 auto x_ranged = x_filter.Range(fChunkSize);
0191 auto myReport = x_ranged.Report();
0192
0193
0194 x_ranged.Foreach(func, fCols);
0195
0196
0197
0198
0199 std::size_t processed_events = myReport.begin()->GetAll();
0200 std::size_t passed_events = (myReport.end() - 1)->GetPass();
0201
0202 return std::make_pair(processed_events, passed_events);
0203 }
0204
0205
0206
0207
0208
0209
0210 std::pair<std::size_t, std::size_t> loadNonFiltered(ROOT::RDataFrame &x_rdf, RChunkLoaderFunctor<Args...> &func)
0211 {
0212
0213 auto x_ranged = x_rdf.Range(fChunkSize);
0214
0215 auto myCount = x_ranged.Count();
0216
0217
0218 x_ranged.Foreach(func, fCols);
0219
0220
0221 std::size_t processed_events = myCount.GetValue();
0222 std::size_t passed_events = myCount.GetValue();
0223 return std::make_pair(processed_events, passed_events);
0224 }
0225 };
0226
0227 }
0228 }
0229 }
0230 #endif