File indexing completed on 2025-01-18 10:10:55
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #ifndef TMVA_DNN_DATALOADER
0019 #define TMVA_DNN_DATALOADER
0020
0021 #include "TMatrix.h"
0022 #include "TMVA/Event.h"
0023
0024 #include <algorithm>
0025 #include <random>
0026 #include <vector>
0027 #include <utility>
0028
0029 namespace TMVA {
0030
0031 class DataSetInfo;
0032
0033 namespace DNN {
0034
0035
0036
0037
0038 using MatrixInput_t = std::tuple<const TMatrixT<Double_t> &, const TMatrixT<Double_t> &, const TMatrixT<Double_t> &>;
0039 using TMVAInput_t =
0040 std::tuple<const std::vector<Event *> &, const DataSetInfo &>;
0041
0042 using IndexIterator_t = typename std::vector<size_t>::iterator;
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053 template <typename AArchitecture>
0054 class TBatch
0055 {
0056 private:
0057
0058 using Matrix_t = typename AArchitecture::Matrix_t;
0059
0060 Matrix_t fInputMatrix;
0061 Matrix_t fOutputMatrix;
0062 Matrix_t fWeightMatrix;
0063
0064 public:
0065 TBatch(Matrix_t &, Matrix_t &, Matrix_t &);
0066 TBatch(const TBatch &) = default;
0067 TBatch( TBatch &&) = default;
0068 TBatch & operator=(const TBatch &) = default;
0069 TBatch & operator=( TBatch &&) = default;
0070
0071
0072 Matrix_t &GetInput() { return fInputMatrix; }
0073
0074 Matrix_t &GetOutput() { return fOutputMatrix; }
0075
0076 Matrix_t &GetWeights() { return fWeightMatrix; }
0077 };
0078
0079 template<typename Data_t, typename AArchitecture> class TDataLoader;
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089 template<typename Data_t, typename AArchitecture>
0090 class TBatchIterator
0091 {
0092 private:
0093
0094 TDataLoader<Data_t, AArchitecture> & fDataLoader;
0095 size_t fBatchIndex;
0096
0097 public:
0098
0099 TBatchIterator(TDataLoader<Data_t, AArchitecture> & dataLoader, size_t index = 0)
0100 : fDataLoader(dataLoader), fBatchIndex(index)
0101 {
0102
0103 }
0104
0105 TBatch<AArchitecture> operator*() {return fDataLoader.GetBatch();}
0106 TBatchIterator operator++() {fBatchIndex++; return *this;}
0107 bool operator!=(const TBatchIterator & other) {
0108 return fBatchIndex != other.fBatchIndex;
0109 }
0110 };
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127 template<typename Data_t, typename AArchitecture>
0128 class TDataLoader
0129 {
0130 private:
0131
0132 using HostBuffer_t = typename AArchitecture::HostBuffer_t;
0133 using DeviceBuffer_t = typename AArchitecture::DeviceBuffer_t;
0134 using Matrix_t = typename AArchitecture::Matrix_t;
0135 using BatchIterator_t = TBatchIterator<Data_t, AArchitecture>;
0136
0137 const Data_t &fData;
0138
0139 size_t fNSamples;
0140 size_t fBatchSize;
0141 size_t fNInputFeatures;
0142 size_t fNOutputFeatures;
0143 size_t fBatchIndex;
0144
0145 size_t fNStreams;
0146 std::vector<DeviceBuffer_t> fDeviceBuffers;
0147 std::vector<HostBuffer_t> fHostBuffers;
0148
0149 std::vector<size_t> fSampleIndices;
0150
0151 public:
0152
0153 TDataLoader(const Data_t & data, size_t nSamples, size_t batchSize,
0154 size_t nInputFeatures, size_t nOutputFeatures, size_t nStreams = 1);
0155 TDataLoader(const TDataLoader &) = default;
0156 TDataLoader( TDataLoader &&) = default;
0157 TDataLoader & operator=(const TDataLoader &) = default;
0158 TDataLoader & operator=( TDataLoader &&) = default;
0159
0160
0161
0162 void CopyInput(HostBuffer_t &buffer, IndexIterator_t begin, size_t batchSize);
0163
0164
0165 void CopyOutput(HostBuffer_t &buffer, IndexIterator_t begin, size_t batchSize);
0166
0167
0168 void CopyWeights(HostBuffer_t &buffer, IndexIterator_t begin, size_t batchSize);
0169
0170 BatchIterator_t begin() {return TBatchIterator<Data_t, AArchitecture>(*this);}
0171 BatchIterator_t end()
0172 {
0173 return TBatchIterator<Data_t, AArchitecture>(*this, fNSamples / fBatchSize);
0174 }
0175
0176
0177
0178
0179 void Shuffle();
0180
0181
0182
0183
0184 TBatch<AArchitecture> GetBatch();
0185
0186 };
0187
0188
0189
0190
0191 template <typename AArchitecture>
0192 TBatch<AArchitecture>::TBatch(Matrix_t &inputMatrix, Matrix_t &outputMatrix, Matrix_t &weightMatrix)
0193 : fInputMatrix(inputMatrix), fOutputMatrix(outputMatrix), fWeightMatrix(weightMatrix)
0194 {
0195
0196 }
0197
0198
0199
0200
0201 template<typename Data_t, typename AArchitecture>
0202 TDataLoader<Data_t, AArchitecture>::TDataLoader(
0203 const Data_t & data, size_t nSamples, size_t batchSize,
0204 size_t nInputFeatures, size_t nOutputFeatures, size_t nStreams)
0205 : fData(data), fNSamples(nSamples), fBatchSize(batchSize),
0206 fNInputFeatures(nInputFeatures), fNOutputFeatures(nOutputFeatures),
0207 fBatchIndex(0), fNStreams(nStreams), fDeviceBuffers(), fHostBuffers(),
0208 fSampleIndices()
0209 {
0210 size_t inputMatrixSize = fBatchSize * fNInputFeatures;
0211 size_t outputMatrixSize = fBatchSize * fNOutputFeatures;
0212 size_t weightMatrixSize = fBatchSize;
0213
0214 for (size_t i = 0; i < fNStreams; i++)
0215 {
0216 fHostBuffers.push_back(HostBuffer_t(inputMatrixSize + outputMatrixSize + weightMatrixSize));
0217 fDeviceBuffers.push_back(DeviceBuffer_t(inputMatrixSize + outputMatrixSize + weightMatrixSize));
0218 }
0219
0220 fSampleIndices.reserve(fNSamples);
0221 for (size_t i = 0; i < fNSamples; i++) {
0222 fSampleIndices.push_back(i);
0223 }
0224 }
0225
0226
0227 template<typename Data_t, typename AArchitecture>
0228 TBatch<AArchitecture> TDataLoader<Data_t, AArchitecture>::GetBatch()
0229 {
0230 fBatchIndex %= (fNSamples / fBatchSize);
0231
0232
0233 size_t inputMatrixSize = fBatchSize * fNInputFeatures;
0234 size_t outputMatrixSize = fBatchSize * fNOutputFeatures;
0235 size_t weightMatrixSize = fBatchSize;
0236
0237 size_t streamIndex = fBatchIndex % fNStreams;
0238 HostBuffer_t & hostBuffer = fHostBuffers[streamIndex];
0239 DeviceBuffer_t & deviceBuffer = fDeviceBuffers[streamIndex];
0240
0241 HostBuffer_t inputHostBuffer = hostBuffer.GetSubBuffer(0, inputMatrixSize);
0242 HostBuffer_t outputHostBuffer = hostBuffer.GetSubBuffer(inputMatrixSize,
0243 outputMatrixSize);
0244 HostBuffer_t weightHostBuffer = hostBuffer.GetSubBuffer(inputMatrixSize + outputMatrixSize, weightMatrixSize);
0245
0246 DeviceBuffer_t inputDeviceBuffer = deviceBuffer.GetSubBuffer(0, inputMatrixSize);
0247 DeviceBuffer_t outputDeviceBuffer = deviceBuffer.GetSubBuffer(inputMatrixSize,
0248 outputMatrixSize);
0249 DeviceBuffer_t weightDeviceBuffer = deviceBuffer.GetSubBuffer(inputMatrixSize + outputMatrixSize, weightMatrixSize);
0250
0251 size_t sampleIndex = fBatchIndex * fBatchSize;
0252 IndexIterator_t sampleIndexIterator = fSampleIndices.begin() + sampleIndex;
0253
0254 CopyInput(inputHostBuffer, sampleIndexIterator, fBatchSize);
0255 CopyOutput(outputHostBuffer, sampleIndexIterator, fBatchSize);
0256 CopyWeights(weightHostBuffer, sampleIndexIterator, fBatchSize);
0257
0258 deviceBuffer.CopyFrom(hostBuffer);
0259 Matrix_t inputMatrix(inputDeviceBuffer, fBatchSize, fNInputFeatures);
0260 Matrix_t outputMatrix(outputDeviceBuffer, fBatchSize, fNOutputFeatures);
0261 Matrix_t weightMatrix(weightDeviceBuffer, fBatchSize, fNOutputFeatures);
0262
0263 fBatchIndex++;
0264 return TBatch<AArchitecture>(inputMatrix, outputMatrix, weightMatrix);
0265 }
0266
0267
0268 template<typename Data_t, typename AArchitecture>
0269 void TDataLoader<Data_t, AArchitecture>::Shuffle()
0270 {
0271 std::shuffle(fSampleIndices.begin(), fSampleIndices.end(), std::default_random_engine{});
0272 }
0273
0274 }
0275 }
0276
0277 #endif