File indexing completed on 2025-09-17 09:14:38
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014 #ifndef TMVA_RCHUNKLOADER
0015 #define TMVA_RCHUNKLOADER
0016
0017 #include <vector>
0018
0019 #include "TMVA/RTensor.hxx"
0020 #include "ROOT/RDataFrame.hxx"
0021 #include "ROOT/RDF/Utils.hxx"
0022 #include "ROOT/RVec.hxx"
0023
0024 #include "ROOT/RLogger.hxx"
0025
0026 namespace TMVA {
0027 namespace Experimental {
0028 namespace Internal {
0029
0030
0031 template <typename... ColTypes>
0032 class RChunkLoaderFunctor {
0033 std::size_t fOffset{};
0034 std::size_t fVecSizeIdx{};
0035 float fVecPadding{};
0036 std::vector<std::size_t> fMaxVecSizes{};
0037
0038 TMVA::Experimental::RTensor<float> &fChunkTensor;
0039
0040 template <typename T, std::enable_if_t<ROOT::Internal::RDF::IsDataContainer<T>::value, int> = 0>
0041 void AssignToTensor(const T &vec)
0042 {
0043 const auto &max_vec_size = fMaxVecSizes[fVecSizeIdx++];
0044 const auto &vec_size = vec.size();
0045 if (vec_size < max_vec_size)
0046 {
0047 std::copy(vec.cbegin(), vec.cend(), &fChunkTensor.GetData()[fOffset]);
0048 std::fill(&fChunkTensor.GetData()[fOffset + vec_size], &fChunkTensor.GetData()[fOffset + max_vec_size],
0049 fVecPadding);
0050 } else
0051 {
0052 std::copy(vec.cbegin(), vec.cbegin() + max_vec_size, &fChunkTensor.GetData()[fOffset]);
0053 }
0054 fOffset += max_vec_size;
0055 }
0056
0057 template <typename T, std::enable_if_t<!ROOT::Internal::RDF::IsDataContainer<T>::value, int> = 0>
0058 void AssignToTensor(const T &val)
0059 {
0060 fChunkTensor.GetData()[fOffset++] = val;
0061 }
0062
0063 public:
0064 RChunkLoaderFunctor(TMVA::Experimental::RTensor<float> &chunkTensor, const std::vector<std::size_t> &maxVecSizes,
0065 float vecPadding)
0066 : fChunkTensor(chunkTensor), fMaxVecSizes(maxVecSizes), fVecPadding(vecPadding)
0067 {
0068 }
0069
0070 void operator()(const ColTypes &...cols)
0071 {
0072 fVecSizeIdx = 0;
0073 (AssignToTensor(cols), ...);
0074 }
0075 };
0076
0077 template <typename... ColTypes>
0078 class RChunkLoaderFunctorFilters {
0079
0080 private:
0081 std::size_t fOffset{};
0082 std::size_t fVecSizeIdx{};
0083 std::size_t fEntries{};
0084 std::size_t fChunkSize{};
0085 float fVecPadding{};
0086 std::vector<std::size_t> fMaxVecSizes{};
0087
0088 TMVA::Experimental::RTensor<float> &fChunkTensor;
0089 TMVA::Experimental::RTensor<float> &fRemainderTensor;
0090
0091 template <typename T, std::enable_if_t<ROOT::Internal::RDF::IsDataContainer<T>::value, int> = 0>
0092 void AssignToTensor(const T &vec)
0093 {
0094 std::size_t max_vec_size = fMaxVecSizes[fVecSizeIdx++];
0095 std::size_t vec_size = vec.size();
0096 if (vec_size < max_vec_size)
0097 {
0098 std::copy(vec.begin(), vec.end(), &fChunkTensor.GetData()[fOffset]);
0099 std::fill(&fChunkTensor.GetData()[fOffset + vec_size], &fChunkTensor.GetData()[fOffset + max_vec_size],
0100 fVecPadding);
0101 } else
0102 {
0103 std::copy(vec.begin(), vec.begin() + max_vec_size, &fChunkTensor.GetData()[fOffset]);
0104 }
0105 fOffset += max_vec_size;
0106 fEntries++;
0107 }
0108
0109 template <typename T, std::enable_if_t<!ROOT::Internal::RDF::IsDataContainer<T>::value, int> = 0>
0110 void AssignToTensor(const T &val)
0111 {
0112 fChunkTensor.GetData()[fOffset++] = val;
0113 fEntries++;
0114 }
0115
0116 public:
0117 RChunkLoaderFunctorFilters(TMVA::Experimental::RTensor<float> &chunkTensor,
0118 TMVA::Experimental::RTensor<float> &remainderTensor, std::size_t entries,
0119 std::size_t chunkSize, std::size_t &&offset,
0120 const std::vector<std::size_t> &maxVecSizes = std::vector<std::size_t>(),
0121 const float vecPadding = 0.0)
0122 : fChunkTensor(chunkTensor),
0123 fRemainderTensor(remainderTensor),
0124 fEntries(entries),
0125 fChunkSize(chunkSize),
0126 fOffset(offset),
0127 fMaxVecSizes(maxVecSizes),
0128 fVecPadding(vecPadding)
0129 {
0130 }
0131
0132 void operator()(const ColTypes &...cols)
0133 {
0134 fVecSizeIdx = 0;
0135 if (fEntries == fChunkSize) {
0136 fChunkTensor = fRemainderTensor;
0137 fOffset = 0;
0138 }
0139 (AssignToTensor(cols), ...);
0140 }
0141
0142 std::size_t &SetEntries() { return fEntries; }
0143 std::size_t &SetOffset() { return fOffset; }
0144 };
0145
0146 template <typename... Args>
0147 class RChunkLoader {
0148
0149 private:
0150 std::size_t fChunkSize;
0151
0152 std::vector<std::string> fCols;
0153
0154 std::vector<std::size_t> fVecSizes;
0155 std::size_t fVecPadding;
0156
0157 ROOT::RDF::RNode &f_rdf;
0158 TMVA::Experimental::RTensor<float> &fChunkTensor;
0159
0160 public:
0161
0162
0163
0164
0165
0166
0167 RChunkLoader(ROOT::RDF::RNode &rdf, TMVA::Experimental::RTensor<float> &chunkTensor, const std::size_t chunkSize,
0168 const std::vector<std::string> &cols, const std::vector<std::size_t> &vecSizes = {},
0169 const float vecPadding = 0.0)
0170 : f_rdf(rdf),
0171 fChunkTensor(chunkTensor),
0172 fChunkSize(chunkSize),
0173 fCols(cols),
0174 fVecSizes(vecSizes),
0175 fVecPadding(vecPadding)
0176 {
0177 }
0178
0179
0180
0181
0182
0183 std::size_t LoadChunk(const std::size_t currentRow)
0184 {
0185 RChunkLoaderFunctor<Args...> func(fChunkTensor, fVecSizes, fVecPadding);
0186
0187 ROOT::Internal::RDF::ChangeBeginAndEndEntries(f_rdf, currentRow, currentRow + fChunkSize);
0188 auto myCount = f_rdf.Count();
0189
0190
0191 f_rdf.Foreach(func, fCols);
0192
0193
0194 return myCount.GetValue();
0195 }
0196 };
0197
0198 template <typename... Args>
0199 class RChunkLoaderFilters {
0200
0201 private:
0202 ROOT::RDF::RNode &f_rdf;
0203 TMVA::Experimental::RTensor<float> &fChunkTensor;
0204
0205 std::size_t fChunkSize;
0206 std::vector<std::string> fCols;
0207 const std::size_t fNumEntries;
0208 std::size_t fNumAllEntries;
0209 std::vector<std::size_t> fVecSizes;
0210 std::size_t fVecPadding;
0211 std::size_t fNumColumns;
0212
0213 const std::size_t fPartOfChunkSize;
0214 TMVA::Experimental::RTensor<float> fRemainderChunkTensor;
0215 std::size_t fRemainderChunkTensorRow = 0;
0216
0217 public:
0218
0219
0220
0221
0222
0223
0224
0225 RChunkLoaderFilters(ROOT::RDF::RNode &rdf, TMVA::Experimental::RTensor<float> &chunkTensor,
0226 const std::size_t chunkSize, const std::vector<std::string> &cols, std::size_t numEntries,
0227 std::size_t numAllEntries, const std::vector<std::size_t> &vecSizes = {},
0228 const float vecPadding = 0.0)
0229 : f_rdf(rdf),
0230 fChunkTensor(chunkTensor),
0231 fChunkSize(chunkSize),
0232 fCols(cols),
0233 fNumEntries(numEntries),
0234 fNumAllEntries(numAllEntries),
0235 fVecSizes(vecSizes),
0236 fVecPadding(vecPadding),
0237 fNumColumns(cols.size()),
0238 fPartOfChunkSize(chunkSize / 5),
0239 fRemainderChunkTensor(std::vector<std::size_t>{fPartOfChunkSize, fNumColumns})
0240 {
0241 }
0242
0243
0244
0245
0246
0247 std::pair<std::size_t, std::size_t> LoadChunk(std::size_t currentRow)
0248 {
0249 for (std::size_t i = 0; i < fRemainderChunkTensorRow; i++) {
0250 std::copy(fRemainderChunkTensor.GetData() + (i * fNumColumns),
0251 fRemainderChunkTensor.GetData() + ((i + 1) * fNumColumns),
0252 fChunkTensor.GetData() + (i * fNumColumns));
0253 }
0254
0255 RChunkLoaderFunctorFilters<Args...> func(fChunkTensor, fRemainderChunkTensor, fRemainderChunkTensorRow,
0256 fChunkSize, fRemainderChunkTensorRow * fNumColumns, fVecSizes,
0257 fVecPadding);
0258
0259 std::size_t passedEvents = 0;
0260 std::size_t processedEvents = 0;
0261
0262 while ((passedEvents < fChunkSize && passedEvents < fNumEntries) && currentRow < fNumAllEntries) {
0263 ROOT::Internal::RDF::ChangeBeginAndEndEntries(f_rdf, currentRow, currentRow + fPartOfChunkSize);
0264 auto report = f_rdf.Report();
0265
0266 f_rdf.Foreach(func, fCols);
0267
0268 processedEvents += report.begin()->GetAll();
0269 passedEvents += (report.end() - 1)->GetPass();
0270
0271 currentRow += fPartOfChunkSize;
0272 func.SetEntries() = passedEvents;
0273 func.SetOffset() = passedEvents * fNumColumns;
0274 }
0275
0276 fRemainderChunkTensorRow = passedEvents > fChunkSize ? passedEvents - fChunkSize : 0;
0277
0278 return std::make_pair(processedEvents, passedEvents);
0279 }
0280
0281 std::size_t LastChunk()
0282 {
0283 for (std::size_t i = 0; i < fRemainderChunkTensorRow; i++) {
0284 std::copy(fRemainderChunkTensor.GetData() + (i * fNumColumns),
0285 fRemainderChunkTensor.GetData() + ((i + 1) * fNumColumns),
0286 fChunkTensor.GetData() + (i * fNumColumns));
0287 }
0288
0289 return fRemainderChunkTensorRow;
0290 }
0291 };
0292 }
0293 }
0294 }
0295 #endif