TMVA/DNN/DenseLayer.h

0001
0002 // Author: Vladimir Ilievski
0003
0004 /**********************************************************************************
0005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
0006  * Package: TMVA                                                                  *
0007  * Class  : TDenseLayer                                                           *
0008  *                                             *
0009  *                                                                                *
0010  * Description:                                                                   *
0011  *      Dense Layer Class                                                         *
0012  *                                                                                *
0013  * Authors (alphabetical):                                                        *
0014  *      Vladimir Ilievski      <ilievski.vladimir@live.com>  - CERN, Switzerland  *
0015  *                                                                                *
0016  * Copyright (c) 2005-2015:                                                       *
0017  *      CERN, Switzerland                                                         *
0018  *      U. of Victoria, Canada                                                    *
0019  *      MPI-K Heidelberg, Germany                                                 *
0020  *      U. of Bonn, Germany                                                       *
0021  *                                                                                *
0022  * Redistribution and use in source and binary forms, with or without             *
0023  * modification, are permitted according to the terms listed in LICENSE           *
0024  * (see tmva/doc/LICENSE)                                          *
0025  **********************************************************************************/
0026
0027 #ifndef TMVA_DNN_DENSELAYER
0028 #define TMVA_DNN_DENSELAYER
0029
0030 #include "TMatrix.h"
0031
0032 #include "TMVA/DNN/GeneralLayer.h"
0033 #include "TMVA/DNN/Functions.h"
0034 #include "TMVA/DNN/CNN/ContextHandles.h"
0035
0036 #include <iostream>
0037 #include <iomanip>
0038 #include <vector>
0039 #include <string>
0040
0041 namespace TMVA {
0042 namespace DNN {
0043 /** \class TDenseLayer
0044
0045 Generic layer class.
0046
0047 This generic layer class represents a dense layer of a neural network with
0048 a given width n and activation function f. The activation function of each
0049 layer is given by \f$\mathbf{u} = \mathbf{W}\mathbf{x} + \boldsymbol{\theta}\f$.
0050
0051 In addition to the weight and bias matrices, each layer allocates memory
0052 for its activations and the corresponding input tensor before evaluation  of
0053 the activation function as well as the gradients of the weights and biases.
0054
0055 The layer provides member functions for the forward propagation of
0056 activations through the given layer.
0057 */
0058 template <typename Architecture_t>
0059 class TDenseLayer : public VGeneralLayer<Architecture_t> {
0060 public:
0061
0062    using Scalar_t = typename Architecture_t::Scalar_t;
0063    using Matrix_t = typename Architecture_t::Matrix_t;
0064    using Tensor_t = typename Architecture_t::Tensor_t;
0065
0066 private:
0067
0068    Tensor_t fInputActivation; ///< output of GEMM and input to activation function
0069    Tensor_t fDerivatives;     ///< activation function gradient
0070
0071    Scalar_t fDropoutProbability; ///< Probability that an input is active.
0072
0073    EActivationFunction fF; ///< Activation function of the layer.
0074    ERegularization fReg;   ///< The regularization method.
0075    Scalar_t fWeightDecay;  ///< The weight decay.
0076
0077    typename Architecture_t::ActivationDescriptor_t fActivationDesc; // the descriptor for the activation function
0078
0079 public:
0080    /*! Constructor */
0081    TDenseLayer(size_t BatchSize, size_t InputWidth, size_t Width, EInitialization init, Scalar_t DropoutProbability,
0082                EActivationFunction f, ERegularization reg, Scalar_t weightDecay);
0083
0084    /*! Copy the dense layer provided as a pointer */
0085    TDenseLayer(TDenseLayer<Architecture_t> *layer);
0086
0087    /*! Copy Constructor */
0088    TDenseLayer(const TDenseLayer &);
0089
0090    /*! Destructor */
0091    ~TDenseLayer();
0092
0093    /*! Compute activation of the layer for the given input. The input
0094     * must be in 3D tensor form with the different matrices corresponding to
0095     * different events in the batch. Computes activations as well as
0096     * the first partial derivative of the activation function at those
0097     * activations. */
0098    void Forward(Tensor_t &input, bool applyDropout = false) override;
0099
0100    /*! Compute weight, bias and activation gradients. Uses the precomputed
0101     *  first partial derivatives of the activation function computed during
0102     *  forward propagation and modifies them. Must only be called directly
0103     *  a the corresponding call to Forward(...). */
0104    void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward ) override;
0105    ///              std::vector<Matrix_t> &inp1, std::vector<Matrix_t> &inp2);
0106
0107    /*! Printing the layer info. */
0108    void Print() const override;
0109
0110    /*! Writes the information and the weights about the layer in an XML node. */
0111    virtual void AddWeightsXMLTo(void *parent) override;
0112
0113    /*! Read the information and the weights about the layer from XML node. */
0114    virtual void ReadWeightsFromXML(void *parent) override;
0115
0116    /*! Set dropout probabilities */
0117    void SetDropoutProbability(Scalar_t dropoutProbability) override { fDropoutProbability = dropoutProbability; }
0118
0119    /*! Getters */
0120    Scalar_t GetDropoutProbability() const { return fDropoutProbability; }
0121
0122    /* return output of Gemm before computing the activation function */
0123    const Tensor_t &GetInputActivation() const { return fInputActivation; }
0124    Tensor_t &GetInputActivation() { return fInputActivation; }
0125
0126    EActivationFunction GetActivationFunction() const { return fF; }
0127    ERegularization GetRegularization() const { return fReg; }
0128    Scalar_t GetWeightDecay() const { return fWeightDecay; }
0129 };
0130
0131 //
0132 //
0133 //  The Dense Layer Class - Implementation
0134 //______________________________________________________________________________
0135 template <typename Architecture_t>
0136 TDenseLayer<Architecture_t>::TDenseLayer(size_t batchSize, size_t inputWidth, size_t width, EInitialization init,
0137                                          Scalar_t dropoutProbability, EActivationFunction f, ERegularization reg,
0138                                          Scalar_t weightDecay)
0139    :  VGeneralLayer<Architecture_t>(batchSize, 1, 1, inputWidth, 1, 1, width, 1, width, inputWidth, 1, width, 1, 1,
0140                                    batchSize, width, init),
0141       fInputActivation(), fDropoutProbability(dropoutProbability), fF(f), fReg(reg), fWeightDecay(weightDecay)
0142 {
0143    // should be  {1, batchSize, width} but take from output
0144    fInputActivation = Tensor_t ( this->GetOutput().GetShape() );
0145    fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
0146
0147    Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
0148 }
0149
0150 //______________________________________________________________________________
0151 template <typename Architecture_t>
0152 TDenseLayer<Architecture_t>::TDenseLayer(TDenseLayer<Architecture_t> *layer) :
0153    VGeneralLayer<Architecture_t>(layer),
0154    fInputActivation( layer->GetInputActivation().GetShape() ),
0155    fDropoutProbability(layer->GetDropoutProbability()),
0156    fF(layer->GetActivationFunction()), fReg(layer->GetRegularization()), fWeightDecay(layer->GetWeightDecay())
0157 {
0158    fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
0159    Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
0160 }
0161
0162 //______________________________________________________________________________
0163 template <typename Architecture_t>
0164 TDenseLayer<Architecture_t>::TDenseLayer(const TDenseLayer &layer) :
0165    VGeneralLayer<Architecture_t>(layer),
0166    fInputActivation( layer->GetInputActivation()),
0167    fDropoutProbability(layer.fDropoutProbability),
0168    fF(layer.fF), fReg(layer.fReg), fWeightDecay(layer.fWeightDecay)
0169 {
0170    fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
0171    Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
0172 }
0173
0174 //______________________________________________________________________________
0175 template <typename Architecture_t>
0176 TDenseLayer<Architecture_t>::~TDenseLayer()
0177 {
0178    // release activation descriptor
0179    Architecture_t::ReleaseDescriptor(fActivationDesc);
0180 }
0181
0182
0183
0184
0185 //______________________________________________________________________________
0186 template <typename Architecture_t>
0187 auto TDenseLayer<Architecture_t>::Forward( Tensor_t &input, bool applyDropout) -> void
0188 {
0189    if (applyDropout && (this->GetDropoutProbability() != 1.0)) {
0190       //
0191       Architecture_t::DropoutForward(input, static_cast<TDescriptors *> (nullptr),
0192                                      static_cast<TWorkspace *> (nullptr),
0193                                      this->GetDropoutProbability());
0194    }
0195    Architecture_t::MultiplyTranspose(this->GetOutput() , input, this->GetWeightsAt(0));
0196    Architecture_t::AddRowWise(this->GetOutput(), this->GetBiasesAt(0));
0197
0198    //evaluate<Architecture_t>(this->GetOutput(), this->GetActivationFunction());
0199    Architecture_t::Copy(this->GetInputActivation(),this->GetOutput());
0200
0201    Architecture_t::ActivationFunctionForward(this->GetOutput(), this->GetActivationFunction(), fActivationDesc);
0202 }
0203
0204 //______________________________________________________________________________
0205 template <typename Architecture_t>
0206 auto TDenseLayer<Architecture_t>::Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward) -> void
0207 ///                                           std::vector<Matrix_t> & /*inp1*/, std::vector<Matrix_t> &
0208 ////                                           /*inp2*/) -> void
0209 {
0210
0211    if (this->GetDropoutProbability() != 1.0) {
0212       Architecture_t::DropoutBackward(this->GetActivationGradients(),
0213       static_cast<TDescriptors *> (nullptr),
0214       static_cast<TWorkspace *> (nullptr));
0215    }
0216
0217    Architecture_t::ActivationFunctionBackward(fDerivatives, this->GetOutput(),
0218                                               this->GetActivationGradients(), this->GetInputActivation(),
0219                                               this->GetActivationFunction(), fActivationDesc);
0220
0221    Architecture_t::Backward(gradients_backward, this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0),
0222                             fDerivatives, this->GetActivationGradients(), this->GetWeightsAt(0),
0223                             activations_backward);
0224
0225    addRegularizationGradients<Architecture_t>(this->GetWeightGradientsAt(0), this->GetWeightsAt(0),
0226                                               this->GetWeightDecay(), this->GetRegularization());
0227 }
0228
0229 //______________________________________________________________________________
0230 template <typename Architecture_t>
0231 void TDenseLayer<Architecture_t>::Print() const
0232 {
0233    std::cout << " DENSE Layer: \t";
0234    std::cout << " ( Input =" << std::setw(6) << this->GetWeightsAt(0).GetNcols();  // input size
0235    std::cout << " , Width =" << std::setw(6) << this->GetWeightsAt(0).GetNrows() << " ) ";  // layer width
0236
0237    std::cout << "\tOutput = ( " << std::setw(2) << this->GetOutput().GetFirstSize() << " ," << std::setw(6) << this->GetOutput().GetShape()[0] << " ," << std::setw(6) << this->GetOutput().GetShape()[1] << " ) ";
0238
0239    std::vector<std::string> activationNames = { "Identity","Relu","Sigmoid","Tanh","SymmRelu","SoftSign","Gauss" };
0240    std::cout << "\t Activation Function = ";
0241    std::cout << activationNames[ static_cast<int>(fF) ];
0242    if (fDropoutProbability != 1.) std::cout << "\t Dropout prob. = " << fDropoutProbability;
0243    std::cout << std::endl;
0244 }
0245
0246 //______________________________________________________________________________
0247
0248 template <typename Architecture_t>
0249 void TDenseLayer<Architecture_t>::AddWeightsXMLTo(void *parent)
0250 {
0251   // write layer width activation function + weight and bias matrices
0252
0253    auto layerxml = gTools().xmlengine().NewChild(parent, nullptr, "DenseLayer");
0254
0255    gTools().xmlengine().NewAttr(layerxml, nullptr, "Width", gTools().StringFromInt(this->GetWidth()));
0256
0257    int activationFunction = static_cast<int>(this -> GetActivationFunction());
0258    gTools().xmlengine().NewAttr(layerxml, nullptr, "ActivationFunction",
0259                                 TString::Itoa(activationFunction, 10));
0260    // write weights and bias matrix
0261    this->WriteMatrixToXML(layerxml, "Weights", this -> GetWeightsAt(0));
0262    this->WriteMatrixToXML(layerxml, "Biases",  this -> GetBiasesAt(0));
0263 }
0264
0265 //______________________________________________________________________________
0266 template <typename Architecture_t>
0267 void TDenseLayer<Architecture_t>::ReadWeightsFromXML(void *parent)
0268 {
0269    // Read layer weights and biases from XML
0270    this->ReadMatrixXML(parent,"Weights", this -> GetWeightsAt(0));
0271    this->ReadMatrixXML(parent,"Biases", this -> GetBiasesAt(0));
0272
0273 }
0274
0275
0276 } // namespace DNN
0277 } // namespace TMVA
0278
0279 #endif