TMVA/DNN/TensorDataLoader.h

0001 // @(#)root/tmva/tmva/dnn:$Id$
0002 // Author: Vladimir Ilievski
0003
0004 /**********************************************************************************
0005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
0006  * Package: TMVA                                                                  *
0007  * Class  : TTensorDataLoader                                                     *
0008  *                                             *
0009  *                                                                                *
0010  * Description:                                                                   *
0011  *      Tensor Data Loader Class                                                  *
0012  *                                                                                *
0013  * Authors (alphabetical):                                                        *
0014  *      Vladimir Ilievski      <ilievski.vladimir@live.com>  - CERN, Switzerland  *
0015  *                                                                                *
0016  * Copyright (c) 2005-2015:                                                       *
0017  *      CERN, Switzerland                                                         *
0018  *      U. of Victoria, Canada                                                    *
0019  *      MPI-K Heidelberg, Germany                                                 *
0020  *      U. of Bonn, Germany                                                       *
0021  *                                                                                *
0022  * Redistribution and use in source and binary forms, with or without             *
0023  * modification, are permitted according to the terms listed in LICENSE           *
0024  * (see tmva/doc/LICENSE)                                          *
0025  **********************************************************************************/
0026
0027 #ifndef TMVA_DNN_TENSORDATALOADER
0028 #define TMVA_DNN_TENSORDATALOADER
0029
0030 #include "TMatrix.h"
0031 #include "TMVA/Event.h"
0032 #include <algorithm>
0033 #include <vector>
0034 #include <utility>
0035
0036 namespace TMVA {
0037    class DataSetInfo;
0038 namespace DNN {
0039
0040 //
0041 // Input Data Types
0042 //______________________________________________________________________________
0043 using TensorInput =
0044    std::tuple<const std::vector<TMatrixT<Double_t>> &, const TMatrixT<Double_t> &, const TMatrixT<Double_t> &>;
0045
0046 using TMVAInput_t =  std::tuple<const std::vector<Event *> &, const DataSetInfo &>;
0047 using IndexIterator_t = typename std::vector<size_t>::iterator;
0048
0049 /** TTensorBatch
0050  *
0051  * Class representing training batches consisting of a vector of matrices as input data
0052  * and a matrix of output data. The input and output data can be accessed using
0053  * the GetInput() and GetOutput() member functions.
0054  *
0055  * \tparam Architecture_t The underlying architecture.
0056  */
0057
0058 template <typename Architecture_t>
0059 class TTensorBatch {
0060 public:
0061    using Matrix_t = typename Architecture_t::Matrix_t;
0062    using Tensor_t = typename Architecture_t::Tensor_t;
0063
0064 private:
0065    Tensor_t  fInputTensor;         ///< The input tensor batch, one matrix one input.
0066    Matrix_t fOutputMatrix;         ///< The output matrix representing the ground truth.
0067    Matrix_t fWeightMatrix;         ///< The event/example weights
0068
0069 public:
0070    TTensorBatch(Tensor_t &, Matrix_t &, Matrix_t &);
0071    TTensorBatch(const TTensorBatch &) = default;
0072    TTensorBatch(TTensorBatch &&) = default;
0073    TTensorBatch &operator=(const TTensorBatch &) = default;
0074    TTensorBatch &operator=(TTensorBatch &&) = default;
0075
0076    /** Return the tensor representing the input data */
0077    Tensor_t &GetInput() { return fInputTensor; }
0078    /** Return the matrix representing the output data. */
0079    Matrix_t &GetOutput() { return fOutputMatrix; }
0080    /** Return the matrix holding the event weights. */
0081    Matrix_t &GetWeights() { return fWeightMatrix; }
0082 };
0083
0084 template <typename Data_t, typename Architecture_t>
0085 class TTensorDataLoader;
0086
0087 /** TTensorBatchIterator
0088  *
0089  * Simple iterator class for the iterations over the training batches in
0090  * a given data set represented by a TTensorDataLoader object.
0091  *
0092  * \tparam Data_t         The input data type.
0093  * \tparam Architecture_t The underlying architecture type.
0094  */
0095 template <typename Data_t, typename Architecture_t>
0096 class TTensorBatchIterator {
0097 private:
0098    TTensorDataLoader<Data_t, Architecture_t> &fTensorDataLoader;
0099    size_t fBatchIndex;
0100
0101 public:
0102    TTensorBatchIterator(TTensorDataLoader<Data_t, Architecture_t> &tensorDataLoader, size_t index = 0)
0103       : fTensorDataLoader(tensorDataLoader), fBatchIndex(index)
0104    {
0105       // Nothing to do here.
0106    }
0107
0108    TTensorBatch<Architecture_t> operator*() { return fTensorDataLoader.GetTensorBatch(); }
0109    TTensorBatchIterator operator++()
0110    {
0111       fBatchIndex++;
0112       return *this;
0113    }
0114    bool operator!=(const TTensorBatchIterator &other) { return fBatchIndex != other.fBatchIndex; }
0115 };
0116
0117 /** TTensorDataLoader
0118  *
0119  * Service class managing the streaming of the training data from the input data
0120  * type to the accelerator device or the CPU. A TTensorDataLoader object manages
0121  * a number of host and device buffer pairs that are used in a round-robin manner
0122  * for the transfer of batches to the device.
0123  *
0124  * Each TTensorDataLoader object has an associated batch size and a number of total
0125  * samples in the dataset. One epoch is the number of buffers required to transfer
0126  * the complete training set. Using the begin() and end() member functions allows
0127  * the user to iterate over the batches in one epoch.
0128  *
0129  * \tparam Data_t The input data type.
0130  * \tparam Architecture_t The architecture class of the underlying architecture.
0131  */
0132 template <typename Data_t, typename Architecture_t>
0133 class TTensorDataLoader {
0134 private:
0135    using HostBuffer_t = typename Architecture_t::HostBuffer_t;
0136    using DeviceBuffer_t = typename Architecture_t::DeviceBuffer_t;
0137    using Matrix_t = typename Architecture_t::Matrix_t;
0138    using Tensor_t = typename Architecture_t::Tensor_t;
0139    using Shape_t = typename Architecture_t::Tensor_t::Shape_t;
0140    using BatchIterator_t = TTensorBatchIterator<Data_t, Architecture_t>;
0141
0142    const Data_t &fData;     ///< The data that should be loaded in the batches.
0143    size_t fNSamples;        ///< The total number of samples in the dataset.
0144    size_t fBatchSize;       ///< The size of a batch.
0145    Shape_t    fInputLayout; ///< The input data layout  (does not include batch size)
0146    size_t fBatchDepth;      ///< The number of matrices in the tensor.
0147    size_t fBatchHeight;     ///< The number od rows in each matrix.
0148    size_t fBatchWidth;      ///< The number of columns in each matrix.
0149    size_t fNOutputFeatures; ///< The number of outputs from the classifier/regressor.
0150    size_t fBatchIndex;      ///< The index of the batch when there are multiple batches in parallel
0151
0152
0153    size_t fNStreams;                           ///< Number of buffer pairs.
0154    std::vector<DeviceBuffer_t> fDeviceBuffers; ///< The device buffers used to keep the input, output and weight data.
0155    std::vector<HostBuffer_t> fHostBuffers;     ///< The host buffers used to load the input, output and weight data.
0156
0157    std::vector<size_t> fSampleIndices; ///< Ordering of the samples in the epoch.
0158
0159 public:
0160    /*! Constructor. */
0161    TTensorDataLoader(const Data_t &data, size_t nSamples, size_t batchSize, const Shape_t & inputLayout,
0162        const Shape_t & batchLayout, size_t nOutputFeatures, size_t nStreams = 1);
0163
0164    TTensorDataLoader(const TTensorDataLoader &) = default;
0165    TTensorDataLoader(TTensorDataLoader &&) = default;
0166    TTensorDataLoader &operator=(const TTensorDataLoader &) = default;
0167    TTensorDataLoader &operator=(TTensorDataLoader &&) = default;
0168
0169    /** Copy input tensor into the given host buffer. Function to be specialized by
0170     *  the architecture-specific backend. */
0171    void CopyTensorInput(HostBuffer_t &buffer, IndexIterator_t begin);
0172    /** Copy output matrix into the given host buffer. Function to be specialized
0173     * by the architecture-specific backend. */
0174    void CopyTensorOutput(HostBuffer_t &buffer, IndexIterator_t begin);
0175    /** Copy weight matrix into the given host buffer. Function to be specialized
0176     * by the architecture-specific backend. */
0177    void CopyTensorWeights(HostBuffer_t &buffer, IndexIterator_t begin);
0178
0179    BatchIterator_t begin() { return TTensorBatchIterator<Data_t, Architecture_t>(*this); }
0180    BatchIterator_t end() { return TTensorBatchIterator<Data_t, Architecture_t>(*this, fNSamples / fBatchSize); }
0181
0182    /** Shuffle the order of the samples in the batch. The shuffling is indirect,
0183     *  i.e. only the indices are shuffled. No input data is moved by this
0184     * routine. */
0185    template<typename RNG>
0186    void Shuffle(RNG & rng);
0187
0188    /** Return the next batch from the training set. The TTensorDataLoader object
0189     *  keeps an internal counter that cycles over the batches in the training
0190     *  set. */
0191    TTensorBatch<Architecture_t> GetTensorBatch();
0192 };
0193
0194 //
0195 // TTensorBatch Class.
0196 //______________________________________________________________________________
0197 template <typename Architecture_t>
0198 TTensorBatch<Architecture_t>::TTensorBatch(Tensor_t &inputTensor, Matrix_t &outputMatrix,
0199                                            Matrix_t &weightMatrix)
0200    : fInputTensor(inputTensor), fOutputMatrix(outputMatrix), fWeightMatrix(weightMatrix)
0201 {
0202    // Nothing to do here.
0203 }
0204
0205 //
0206 // TTensorDataLoader Class.
0207 //______________________________________________________________________________
0208 template <typename Data_t, typename Architecture_t>
0209 TTensorDataLoader<Data_t, Architecture_t>::TTensorDataLoader(const Data_t &data, size_t nSamples, size_t batchSize,
0210                                                              const Shape_t & inputLayout,  const Shape_t & batchLayout,
0211                                                              size_t nOutputFeatures, size_t nStreams)
0212    : fData(data), fNSamples(nSamples), fBatchSize(batchSize), fInputLayout(inputLayout), fBatchDepth(batchLayout[0]), fBatchHeight(batchLayout[1]),
0213      fBatchWidth(batchLayout[2]), fNOutputFeatures(nOutputFeatures), fBatchIndex(0), fNStreams(nStreams), fDeviceBuffers(),
0214      fHostBuffers(), fSampleIndices()
0215 {
0216    size_t inputTensorSize = fBatchDepth * fBatchHeight * fBatchWidth;
0217    size_t outputMatrixSize = fBatchSize * fNOutputFeatures;
0218    size_t weightMatrixSize = fBatchSize;
0219
0220    for (size_t i = 0; i < fNStreams; i++) {
0221       fHostBuffers.push_back(HostBuffer_t(inputTensorSize + outputMatrixSize + weightMatrixSize));
0222       fDeviceBuffers.push_back(DeviceBuffer_t(inputTensorSize + outputMatrixSize + weightMatrixSize));
0223    }
0224
0225    fSampleIndices.reserve(fNSamples);
0226    for (size_t i = 0; i < fNSamples; i++) {
0227       fSampleIndices.push_back(i);
0228    }
0229 }
0230
0231 //______________________________________________________________________________
0232 template <typename Data_t, typename Architecture_t>
0233 TTensorBatch<Architecture_t> TTensorDataLoader<Data_t, Architecture_t>::GetTensorBatch()
0234 {
0235    fBatchIndex %= (fNSamples / fBatchSize); // Cycle through samples.
0236
0237    size_t inputTensorSize =  fBatchDepth * fBatchHeight * fBatchWidth;
0238    size_t outputMatrixSize = fBatchSize * fNOutputFeatures;
0239    size_t weightMatrixSize = fBatchSize;
0240
0241    size_t streamIndex = fBatchIndex % fNStreams;
0242    HostBuffer_t &hostBuffer = fHostBuffers[streamIndex];
0243    DeviceBuffer_t &deviceBuffer = fDeviceBuffers[streamIndex];
0244
0245    HostBuffer_t inputHostBuffer = hostBuffer.GetSubBuffer(0, inputTensorSize);
0246    HostBuffer_t outputHostBuffer = hostBuffer.GetSubBuffer(inputTensorSize, outputMatrixSize);
0247    HostBuffer_t weightHostBuffer = hostBuffer.GetSubBuffer(inputTensorSize + outputMatrixSize, weightMatrixSize);
0248
0249    DeviceBuffer_t inputDeviceBuffer = deviceBuffer.GetSubBuffer(0, inputTensorSize);
0250    DeviceBuffer_t outputDeviceBuffer = deviceBuffer.GetSubBuffer(inputTensorSize, outputMatrixSize);
0251    DeviceBuffer_t weightDeviceBuffer = deviceBuffer.GetSubBuffer(inputTensorSize + outputMatrixSize, weightMatrixSize);
0252
0253    // here sample index has batch size as offset , while in
0254    // copy tensor input has batch depth.
0255    // We support then now two cases: batchdepth = 1  batchHeight = batch size
0256    //   or batch depth = batch
0257    size_t sampleIndex = fBatchIndex * fBatchSize;
0258    IndexIterator_t sampleIndexIterator = fSampleIndices.begin() + sampleIndex;
0259
0260    CopyTensorInput(inputHostBuffer, sampleIndexIterator);
0261    CopyTensorOutput(outputHostBuffer, sampleIndexIterator);
0262    CopyTensorWeights(weightHostBuffer, sampleIndexIterator);
0263
0264    deviceBuffer.CopyFrom(hostBuffer);
0265
0266    assert(fInputLayout.size() == 3);
0267    Tensor_t inputTensor = Architecture_t::CreateTensor( inputDeviceBuffer, fBatchSize, fInputLayout[0], fInputLayout[1], fInputLayout[2] );
0268    // in case of dense layers
0269    if (fBatchDepth == 1 && fBatchHeight == fBatchSize && fInputLayout[0] == 1 && fInputLayout[1] == 1){
0270       inputTensor = Tensor_t( inputDeviceBuffer, {fBatchSize, fInputLayout.back() }, Tensor_t::MemoryLayout::ColumnMajor );
0271    }
0272
0273    Matrix_t outputMatrix(outputDeviceBuffer, fBatchSize, fNOutputFeatures);
0274    Matrix_t weightMatrix(weightDeviceBuffer, fBatchSize, 1);
0275
0276    fBatchIndex++;
0277
0278
0279    return TTensorBatch<Architecture_t>(inputTensor, outputMatrix, weightMatrix);
0280 }
0281
0282 //______________________________________________________________________________
0283 template <typename Data_t, typename Architecture_t>
0284 template <typename RNG>
0285 void TTensorDataLoader<Data_t, Architecture_t>::Shuffle(RNG & rng)
0286 {
0287    std::shuffle(fSampleIndices.begin(), fSampleIndices.end(), rng);
0288 }
0289
0290 } // namespace DNN
0291 } // namespace TMVA
0292
0293 #endif