File indexing completed on 2025-12-15 10:28:51
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015 #ifndef TMVA_RBATCHLOADER
0016 #define TMVA_RBATCHLOADER
0017
0018 #include <vector>
0019 #include <memory>
0020 #include <numeric>
0021
0022
0023 #include <queue>
0024 #include <mutex>
0025 #include <condition_variable>
0026
0027 #include "TMVA/RTensor.hxx"
0028 #include "TMVA/Tools.h"
0029
0030 namespace TMVA::Experimental::Internal {
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041 class RBatchLoader {
0042 private:
0043 std::size_t fBatchSize;
0044 std::size_t fNumColumns;
0045
0046 bool fIsActive = false;
0047
0048 std::mutex fBatchLock;
0049 std::condition_variable fBatchCondition;
0050
0051
0052 std::queue<std::unique_ptr<TMVA::Experimental::RTensor<float>>> fTrainingBatchQueue;
0053 std::queue<std::unique_ptr<TMVA::Experimental::RTensor<float>>> fValidationBatchQueue;
0054
0055
0056 std::size_t fNumTrainingBatchQueue;
0057 std::size_t fNumValidationBatchQueue;
0058
0059
0060 std::unique_ptr<TMVA::Experimental::RTensor<float>> fCurrentBatch;
0061
0062
0063 std::unique_ptr<TMVA::Experimental::RTensor<float>> fPrimaryLeftoverTrainingBatch;
0064 std::unique_ptr<TMVA::Experimental::RTensor<float>> fSecondaryLeftoverTrainingBatch;
0065
0066 std::unique_ptr<TMVA::Experimental::RTensor<float>> fPrimaryLeftoverValidationBatch;
0067 std::unique_ptr<TMVA::Experimental::RTensor<float>> fSecondaryLeftoverValidationBatch;
0068
0069 public:
0070 RBatchLoader(std::size_t batchSize, std::size_t numColumns) : fBatchSize(batchSize), fNumColumns(numColumns)
0071 {
0072
0073 fPrimaryLeftoverTrainingBatch =
0074 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>{0, fNumColumns});
0075 fSecondaryLeftoverTrainingBatch =
0076 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>{0, fNumColumns});
0077
0078 fPrimaryLeftoverValidationBatch =
0079 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>{0, fNumColumns});
0080 fSecondaryLeftoverValidationBatch =
0081 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>{0, fNumColumns});
0082
0083 fNumTrainingBatchQueue = fTrainingBatchQueue.size();
0084 fNumValidationBatchQueue = fValidationBatchQueue.size();
0085 }
0086
0087 public:
0088 void Activate()
0089 {
0090 {
0091 std::lock_guard<std::mutex> lock(fBatchLock);
0092 fIsActive = true;
0093 }
0094 fBatchCondition.notify_all();
0095 }
0096
0097
0098
0099 void DeActivate()
0100 {
0101 {
0102 std::lock_guard<std::mutex> lock(fBatchLock);
0103 fIsActive = false;
0104 }
0105 fBatchCondition.notify_all();
0106 }
0107
0108
0109
0110
0111
0112
0113 std::unique_ptr<TMVA::Experimental::RTensor<float>>
0114 CreateBatch(TMVA::Experimental::RTensor<float> &chunkTensor, std::size_t idxs)
0115 {
0116 auto batch =
0117 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>({fBatchSize, fNumColumns}));
0118 std::copy(chunkTensor.GetData() + (idxs * fBatchSize * fNumColumns),
0119 chunkTensor.GetData() + ((idxs + 1) * fBatchSize * fNumColumns), batch->GetData());
0120
0121 return batch;
0122 }
0123
0124
0125
0126 TMVA::Experimental::RTensor<float> GetTrainBatch()
0127 {
0128
0129 if (fTrainingBatchQueue.empty()) {
0130 fCurrentBatch = std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>({0}));
0131 return *fCurrentBatch;
0132 }
0133
0134 fCurrentBatch = std::move(fTrainingBatchQueue.front());
0135 fTrainingBatchQueue.pop();
0136
0137 return *fCurrentBatch;
0138 }
0139
0140
0141
0142 TMVA::Experimental::RTensor<float> GetValidationBatch()
0143 {
0144
0145 if (fValidationBatchQueue.empty()) {
0146 fCurrentBatch = std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>({0}));
0147 return *fCurrentBatch;
0148 }
0149
0150 fCurrentBatch = std::move(fValidationBatchQueue.front());
0151 fValidationBatchQueue.pop();
0152
0153 return *fCurrentBatch;
0154 }
0155
0156
0157
0158
0159
0160
0161 void CreateTrainingBatches(TMVA::Experimental::RTensor<float> &chunkTensor, int lastbatch,
0162 std::size_t leftoverBatchSize, bool dropRemainder)
0163 {
0164 std::size_t ChunkSize = chunkTensor.GetShape()[0];
0165 std::size_t Batches = ChunkSize / fBatchSize;
0166 std::size_t LeftoverBatchSize = ChunkSize % fBatchSize;
0167
0168
0169 std::vector<std::unique_ptr<TMVA::Experimental::RTensor<float>>> batches;
0170
0171
0172 for (std::size_t i = 0; i < Batches; i++) {
0173
0174 batches.emplace_back(CreateBatch(chunkTensor, i));
0175 }
0176
0177
0178 TMVA::Experimental::RTensor<float> LeftoverBatch({LeftoverBatchSize, fNumColumns});
0179 std::copy(chunkTensor.GetData() + (Batches * fBatchSize * fNumColumns),
0180 chunkTensor.GetData() + (Batches * fBatchSize * fNumColumns + LeftoverBatchSize * fNumColumns),
0181 LeftoverBatch.GetData());
0182
0183
0184 std::size_t PrimaryLeftoverSize = (*fPrimaryLeftoverTrainingBatch).GetShape()[0];
0185 std::size_t emptySlots = fBatchSize - PrimaryLeftoverSize;
0186
0187
0188 if (emptySlots >= LeftoverBatchSize) {
0189 (*fPrimaryLeftoverTrainingBatch) =
0190 (*fPrimaryLeftoverTrainingBatch).Resize({PrimaryLeftoverSize + LeftoverBatchSize, fNumColumns});
0191 std::copy(LeftoverBatch.GetData(), LeftoverBatch.GetData() + (LeftoverBatchSize * fNumColumns),
0192 fPrimaryLeftoverTrainingBatch->GetData() + (PrimaryLeftoverSize * fNumColumns));
0193
0194
0195 if (emptySlots == LeftoverBatchSize) {
0196 auto copy =
0197 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>{fBatchSize, fNumColumns});
0198 std::copy(fPrimaryLeftoverTrainingBatch->GetData(),
0199 fPrimaryLeftoverTrainingBatch->GetData() + (fBatchSize * fNumColumns), copy->GetData());
0200 batches.emplace_back(std::move(copy));
0201
0202
0203 *fPrimaryLeftoverTrainingBatch = *fSecondaryLeftoverTrainingBatch;
0204 fSecondaryLeftoverValidationBatch =
0205 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>{0, fNumColumns});
0206 }
0207 }
0208
0209
0210 else if (emptySlots < LeftoverBatchSize) {
0211
0212 (*fPrimaryLeftoverTrainingBatch) = (*fPrimaryLeftoverTrainingBatch).Resize({fBatchSize, fNumColumns});
0213 std::copy(LeftoverBatch.GetData(), LeftoverBatch.GetData() + (emptySlots * fNumColumns),
0214 fPrimaryLeftoverTrainingBatch->GetData() + (PrimaryLeftoverSize * fNumColumns));
0215
0216
0217 (*fSecondaryLeftoverTrainingBatch) =
0218 (*fSecondaryLeftoverTrainingBatch).Resize({LeftoverBatchSize - emptySlots, fNumColumns});
0219 std::copy(LeftoverBatch.GetData() + (emptySlots * fNumColumns),
0220 LeftoverBatch.GetData() + (LeftoverBatchSize * fNumColumns),
0221 fSecondaryLeftoverTrainingBatch->GetData());
0222
0223
0224 auto copy =
0225 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>{fBatchSize, fNumColumns});
0226 std::copy(fPrimaryLeftoverTrainingBatch->GetData(),
0227 fPrimaryLeftoverTrainingBatch->GetData() + (fBatchSize * fNumColumns), copy->GetData());
0228 batches.emplace_back(std::move(copy));
0229
0230
0231 *fPrimaryLeftoverTrainingBatch = *fSecondaryLeftoverTrainingBatch;
0232
0233
0234 fSecondaryLeftoverValidationBatch =
0235 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>{0, fNumColumns});
0236 }
0237
0238
0239 if (lastbatch == 1) {
0240
0241 if (dropRemainder == false && leftoverBatchSize > 0) {
0242 auto copy = std::make_unique<TMVA::Experimental::RTensor<float>>(
0243 std::vector<std::size_t>{leftoverBatchSize, fNumColumns});
0244 std::copy((*fPrimaryLeftoverTrainingBatch).GetData(),
0245 (*fPrimaryLeftoverTrainingBatch).GetData() + (leftoverBatchSize * fNumColumns), copy->GetData());
0246 batches.emplace_back(std::move(copy));
0247 }
0248
0249 fPrimaryLeftoverTrainingBatch =
0250 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>{0, fNumColumns});
0251 fSecondaryLeftoverTrainingBatch =
0252 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>{0, fNumColumns});
0253 }
0254
0255
0256 for (std::size_t i = 0; i < batches.size(); i++) {
0257 fTrainingBatchQueue.push(std::move(batches[i]));
0258 }
0259 }
0260
0261
0262
0263
0264
0265
0266 void CreateValidationBatches(TMVA::Experimental::RTensor<float> &chunkTensor, std::size_t lastbatch,
0267 std::size_t leftoverBatchSize, bool dropRemainder)
0268 {
0269 std::size_t ChunkSize = chunkTensor.GetShape()[0];
0270 std::size_t NumCols = chunkTensor.GetShape()[1];
0271 std::size_t Batches = ChunkSize / fBatchSize;
0272 std::size_t LeftoverBatchSize = ChunkSize % fBatchSize;
0273
0274 std::vector<std::unique_ptr<TMVA::Experimental::RTensor<float>>> batches;
0275
0276 for (std::size_t i = 0; i < Batches; i++) {
0277
0278 batches.emplace_back(CreateBatch(chunkTensor, i));
0279 }
0280
0281 TMVA::Experimental::RTensor<float> LeftoverBatch({LeftoverBatchSize, NumCols});
0282 std::copy(chunkTensor.GetData() + (Batches * fBatchSize * NumCols),
0283 chunkTensor.GetData() + (Batches * fBatchSize * NumCols + LeftoverBatchSize * NumCols),
0284 LeftoverBatch.GetData());
0285
0286 std::size_t PrimaryLeftoverSize = (*fPrimaryLeftoverValidationBatch).GetShape()[0];
0287 std::size_t emptySlots = fBatchSize - PrimaryLeftoverSize;
0288
0289 if (emptySlots >= LeftoverBatchSize) {
0290 (*fPrimaryLeftoverValidationBatch) =
0291 (*fPrimaryLeftoverValidationBatch).Resize({PrimaryLeftoverSize + LeftoverBatchSize, NumCols});
0292 std::copy(LeftoverBatch.GetData(), LeftoverBatch.GetData() + (LeftoverBatchSize * NumCols),
0293 fPrimaryLeftoverValidationBatch->GetData() + (PrimaryLeftoverSize * NumCols));
0294
0295 if (emptySlots == LeftoverBatchSize) {
0296 auto copy =
0297 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>{fBatchSize, fNumColumns});
0298 std::copy(fPrimaryLeftoverValidationBatch->GetData(),
0299 fPrimaryLeftoverValidationBatch->GetData() + (fBatchSize * fNumColumns), copy->GetData());
0300 batches.emplace_back(std::move(copy));
0301 *fPrimaryLeftoverValidationBatch = *fSecondaryLeftoverValidationBatch;
0302 fSecondaryLeftoverValidationBatch =
0303 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>{0, fNumColumns});
0304 }
0305 }
0306
0307 else if (emptySlots < LeftoverBatchSize) {
0308 (*fPrimaryLeftoverValidationBatch) = (*fPrimaryLeftoverValidationBatch).Resize({fBatchSize, NumCols});
0309 std::copy(LeftoverBatch.GetData(), LeftoverBatch.GetData() + (emptySlots * NumCols),
0310 fPrimaryLeftoverValidationBatch->GetData() + (PrimaryLeftoverSize * NumCols));
0311 (*fSecondaryLeftoverValidationBatch) =
0312 (*fSecondaryLeftoverValidationBatch).Resize({LeftoverBatchSize - emptySlots, NumCols});
0313 std::copy(LeftoverBatch.GetData() + (emptySlots * NumCols),
0314 LeftoverBatch.GetData() + (LeftoverBatchSize * NumCols),
0315 fSecondaryLeftoverValidationBatch->GetData());
0316 auto copy =
0317 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>{fBatchSize, fNumColumns});
0318 std::copy(fPrimaryLeftoverValidationBatch->GetData(),
0319 fPrimaryLeftoverValidationBatch->GetData() + (fBatchSize * fNumColumns), copy->GetData());
0320 batches.emplace_back(std::move(copy));
0321 *fPrimaryLeftoverValidationBatch = *fSecondaryLeftoverValidationBatch;
0322 fSecondaryLeftoverValidationBatch =
0323 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>{0, fNumColumns});
0324 }
0325
0326 if (lastbatch == 1) {
0327
0328 if (dropRemainder == false && leftoverBatchSize > 0) {
0329 auto copy = std::make_unique<TMVA::Experimental::RTensor<float>>(
0330 std::vector<std::size_t>{leftoverBatchSize, fNumColumns});
0331 std::copy((*fPrimaryLeftoverValidationBatch).GetData(),
0332 (*fPrimaryLeftoverValidationBatch).GetData() + (leftoverBatchSize * fNumColumns),
0333 copy->GetData());
0334 batches.emplace_back(std::move(copy));
0335 }
0336 fPrimaryLeftoverValidationBatch =
0337 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>{0, fNumColumns});
0338 fSecondaryLeftoverValidationBatch =
0339 std::make_unique<TMVA::Experimental::RTensor<float>>(std::vector<std::size_t>{0, fNumColumns});
0340 }
0341
0342 for (std::size_t i = 0; i < batches.size(); i++) {
0343 fValidationBatchQueue.push(std::move(batches[i]));
0344 }
0345 }
0346 std::size_t GetNumTrainingBatchQueue() { return fTrainingBatchQueue.size(); }
0347 std::size_t GetNumValidationBatchQueue() { return fValidationBatchQueue.size(); }
0348 };
0349
0350 }
0351
0352 #endif