TMVA/DNN/DataLoader.h

0001 // @(#)root/tmva/tmva/dnn:$Id$
0002 // Author: Simon Pfreundschuh 08/08/16
0003
0004 /*************************************************************************
0005  * Copyright (C) 2016, Simon Pfreundschuh                                *
0006  * All rights reserved.                                                  *
0007  *                                                                       *
0008  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0009  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0010  *************************************************************************/
0011
0012 /////////////////////////////////////////////////////////////////////
0013 // Generic data loader for neural network input data. Provides a   //
0014 // high level abstraction for the transfer of training data to the //
0015 // device.                                                         //
0016 /////////////////////////////////////////////////////////////////////
0017
0018 #ifndef TMVA_DNN_DATALOADER
0019 #define TMVA_DNN_DATALOADER
0020
0021 #include "TMatrix.h"
0022 #include "TMVA/Event.h"
0023
0024 #include <algorithm>
0025 #include <random>
0026 #include <vector>
0027 #include <utility>
0028
0029 namespace TMVA {
0030
0031 class DataSetInfo;
0032
0033 namespace DNN  {
0034
0035 //
0036 // Input Data Types
0037 //______________________________________________________________________________
0038 using MatrixInput_t = std::tuple<const TMatrixT<Double_t> &, const TMatrixT<Double_t> &, const TMatrixT<Double_t> &>;
0039 using TMVAInput_t =
0040     std::tuple<const std::vector<Event *> &, const DataSetInfo &>;
0041
0042 using IndexIterator_t = typename std::vector<size_t>::iterator;
0043
0044 /** TBatch
0045  *
0046  * Class representing training batches consisting of a matrix of input data
0047  * and a matrix of output data. The input and output data can be accessed using
0048  * the GetInput() and GetOutput() member functions.
0049  *
0050  * \tparam AArchitecture The underlying architecture.
0051  */
0052 //______________________________________________________________________________
0053 template <typename AArchitecture>
0054 class TBatch
0055 {
0056 private:
0057
0058    using Matrix_t       = typename AArchitecture::Matrix_t;
0059
0060    Matrix_t fInputMatrix;
0061    Matrix_t fOutputMatrix;
0062    Matrix_t fWeightMatrix;
0063
0064 public:
0065    TBatch(Matrix_t &, Matrix_t &, Matrix_t &);
0066    TBatch(const TBatch  &) = default;
0067    TBatch(      TBatch &&) = default;
0068    TBatch & operator=(const TBatch  &) = default;
0069    TBatch & operator=(      TBatch &&) = default;
0070
0071    /** Return the matrix representing the input data. */
0072    Matrix_t &GetInput() { return fInputMatrix; }
0073    /** Return the matrix representing the output data. */
0074    Matrix_t &GetOutput() { return fOutputMatrix; }
0075    /** Return the matrix holding the event weights. */
0076    Matrix_t &GetWeights() { return fWeightMatrix; }
0077 };
0078
0079 template<typename Data_t, typename AArchitecture> class TDataLoader;
0080
0081 /** TBatchIterator
0082  *
0083  * Simple iterator class for the iterations over the training batches in
0084  * a given data set represented by a TDataLoader object.
0085  *
0086  * \tparam AData         The input data type.
0087  * \tparam AArchitecture The underlying architecture type.
0088  */
0089 template<typename Data_t, typename AArchitecture>
0090 class TBatchIterator
0091 {
0092 private:
0093
0094    TDataLoader<Data_t, AArchitecture> & fDataLoader;
0095    size_t fBatchIndex;
0096
0097 public:
0098
0099 TBatchIterator(TDataLoader<Data_t, AArchitecture> & dataLoader, size_t index = 0)
0100 : fDataLoader(dataLoader), fBatchIndex(index)
0101 {
0102    // Nothing to do here.
0103 }
0104
0105    TBatch<AArchitecture> operator*() {return fDataLoader.GetBatch();}
0106    TBatchIterator operator++() {fBatchIndex++; return *this;}
0107    bool operator!=(const TBatchIterator & other) {
0108       return fBatchIndex != other.fBatchIndex;
0109    }
0110 };
0111
0112 /** TDataLoader
0113  *
0114  * Service class managing the streaming of the training data from the input data
0115  * type to the accelerator device or the CPU. A TDataLoader object manages a number
0116  * of host and device buffer pairs that are used in a round-robin manner for the
0117  * transfer of batches to the device.
0118  *
0119  * Each TDataLoader object has an associated batch size and a number of total
0120  * samples in the dataset. One epoch is the number of buffers required to transfer
0121  * the complete training set. Using the begin() and end() member functions allows
0122  * the user to iterate over the batches in one epoch.
0123  *
0124  * \tparam AData The input data type.
0125  * \tparam AArchitecture The architecture class of the underlying architecture.
0126  */
0127 template<typename Data_t, typename AArchitecture>
0128 class TDataLoader
0129 {
0130 private:
0131
0132    using HostBuffer_t    = typename AArchitecture::HostBuffer_t;
0133    using DeviceBuffer_t  = typename AArchitecture::DeviceBuffer_t;
0134    using Matrix_t        = typename AArchitecture::Matrix_t;
0135    using BatchIterator_t = TBatchIterator<Data_t, AArchitecture>;
0136
0137    const Data_t &fData;
0138
0139    size_t fNSamples;
0140    size_t fBatchSize;
0141    size_t fNInputFeatures;
0142    size_t fNOutputFeatures;
0143    size_t fBatchIndex;
0144
0145    size_t fNStreams;                            ///< Number of buffer pairs.
0146    std::vector<DeviceBuffer_t> fDeviceBuffers;
0147    std::vector<HostBuffer_t>   fHostBuffers;
0148
0149    std::vector<size_t> fSampleIndices; ///< Ordering of the samples in the epoch.
0150
0151 public:
0152
0153    TDataLoader(const Data_t & data, size_t nSamples, size_t batchSize,
0154                size_t nInputFeatures, size_t nOutputFeatures, size_t nStreams = 1);
0155    TDataLoader(const TDataLoader  &) = default;
0156    TDataLoader(      TDataLoader &&) = default;
0157    TDataLoader & operator=(const TDataLoader  &) = default;
0158    TDataLoader & operator=(      TDataLoader &&) = default;
0159
0160    /** Copy input matrix into the given host buffer. Function to be specialized by
0161     *  the architecture-specific backend. */
0162    void  CopyInput(HostBuffer_t &buffer, IndexIterator_t begin, size_t batchSize);
0163    /** Copy output matrix into the given host buffer. Function to be specialized
0164     * by the architecture-specific backend. */
0165    void CopyOutput(HostBuffer_t &buffer, IndexIterator_t begin, size_t batchSize);
0166    /** Copy weight matrix into the given host buffer. Function to be specialized
0167     * by the architecture-specific backend. */
0168    void CopyWeights(HostBuffer_t &buffer, IndexIterator_t begin, size_t batchSize);
0169
0170    BatchIterator_t begin() {return TBatchIterator<Data_t, AArchitecture>(*this);}
0171    BatchIterator_t end()
0172    {
0173       return TBatchIterator<Data_t, AArchitecture>(*this, fNSamples / fBatchSize);
0174    }
0175
0176    /** Shuffle the order of the samples in the batch. The shuffling is indirect,
0177     *  i.e. only the indices are shuffled. No input data is moved by this
0178     * routine. */
0179    void Shuffle();
0180
0181    /** Return the next batch from the training set. The TDataLoader object
0182     *  keeps an internal counter that cycles over the batches in the training
0183     *  set. */
0184    TBatch<AArchitecture> GetBatch();
0185
0186 };
0187
0188 //
0189 // TBatch Class.
0190 //______________________________________________________________________________
0191 template <typename AArchitecture>
0192 TBatch<AArchitecture>::TBatch(Matrix_t &inputMatrix, Matrix_t &outputMatrix, Matrix_t &weightMatrix)
0193    : fInputMatrix(inputMatrix), fOutputMatrix(outputMatrix), fWeightMatrix(weightMatrix)
0194 {
0195     // Nothing to do here.
0196 }
0197
0198 //
0199 // TDataLoader Class.
0200 //______________________________________________________________________________
0201 template<typename Data_t, typename AArchitecture>
0202 TDataLoader<Data_t, AArchitecture>::TDataLoader(
0203     const Data_t & data, size_t nSamples, size_t batchSize,
0204     size_t nInputFeatures, size_t nOutputFeatures, size_t nStreams)
0205     : fData(data), fNSamples(nSamples), fBatchSize(batchSize),
0206       fNInputFeatures(nInputFeatures), fNOutputFeatures(nOutputFeatures),
0207       fBatchIndex(0), fNStreams(nStreams), fDeviceBuffers(), fHostBuffers(),
0208       fSampleIndices()
0209 {
0210    size_t inputMatrixSize  = fBatchSize * fNInputFeatures;
0211    size_t outputMatrixSize = fBatchSize * fNOutputFeatures;
0212    size_t weightMatrixSize = fBatchSize;
0213
0214    for (size_t i = 0; i < fNStreams; i++)
0215    {
0216       fHostBuffers.push_back(HostBuffer_t(inputMatrixSize + outputMatrixSize + weightMatrixSize));
0217       fDeviceBuffers.push_back(DeviceBuffer_t(inputMatrixSize + outputMatrixSize + weightMatrixSize));
0218    }
0219
0220    fSampleIndices.reserve(fNSamples);
0221    for (size_t i = 0; i < fNSamples; i++) {
0222       fSampleIndices.push_back(i);
0223    }
0224 }
0225
0226 //______________________________________________________________________________
0227 template<typename Data_t, typename AArchitecture>
0228 TBatch<AArchitecture> TDataLoader<Data_t, AArchitecture>::GetBatch()
0229 {
0230    fBatchIndex %= (fNSamples / fBatchSize); // Cycle through samples.
0231
0232
0233    size_t inputMatrixSize  = fBatchSize * fNInputFeatures;
0234    size_t outputMatrixSize = fBatchSize * fNOutputFeatures;
0235    size_t weightMatrixSize = fBatchSize;
0236
0237    size_t streamIndex = fBatchIndex % fNStreams;
0238    HostBuffer_t   & hostBuffer   = fHostBuffers[streamIndex];
0239    DeviceBuffer_t & deviceBuffer = fDeviceBuffers[streamIndex];
0240
0241    HostBuffer_t inputHostBuffer  = hostBuffer.GetSubBuffer(0, inputMatrixSize);
0242    HostBuffer_t outputHostBuffer = hostBuffer.GetSubBuffer(inputMatrixSize,
0243                                                            outputMatrixSize);
0244    HostBuffer_t weightHostBuffer = hostBuffer.GetSubBuffer(inputMatrixSize + outputMatrixSize, weightMatrixSize);
0245
0246    DeviceBuffer_t inputDeviceBuffer  = deviceBuffer.GetSubBuffer(0, inputMatrixSize);
0247    DeviceBuffer_t outputDeviceBuffer = deviceBuffer.GetSubBuffer(inputMatrixSize,
0248                                                                  outputMatrixSize);
0249    DeviceBuffer_t weightDeviceBuffer = deviceBuffer.GetSubBuffer(inputMatrixSize + outputMatrixSize, weightMatrixSize);
0250
0251    size_t sampleIndex = fBatchIndex * fBatchSize;
0252    IndexIterator_t sampleIndexIterator = fSampleIndices.begin() + sampleIndex;
0253
0254    CopyInput(inputHostBuffer,   sampleIndexIterator, fBatchSize);
0255    CopyOutput(outputHostBuffer, sampleIndexIterator, fBatchSize);
0256    CopyWeights(weightHostBuffer, sampleIndexIterator, fBatchSize);
0257
0258    deviceBuffer.CopyFrom(hostBuffer);
0259    Matrix_t  inputMatrix(inputDeviceBuffer,  fBatchSize, fNInputFeatures);
0260    Matrix_t outputMatrix(outputDeviceBuffer, fBatchSize, fNOutputFeatures);
0261    Matrix_t weightMatrix(weightDeviceBuffer, fBatchSize, fNOutputFeatures);
0262
0263    fBatchIndex++;
0264    return TBatch<AArchitecture>(inputMatrix, outputMatrix, weightMatrix);
0265 }
0266
0267 //______________________________________________________________________________
0268 template<typename Data_t, typename AArchitecture>
0269 void TDataLoader<Data_t, AArchitecture>::Shuffle()
0270 {
0271    std::shuffle(fSampleIndices.begin(), fSampleIndices.end(), std::default_random_engine{});
0272 }
0273
0274 } // namespace DNN
0275 } // namespace TMVA
0276
0277 #endif