Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:10:56

0001 
0002 // Author: Vladimir Ilievski
0003 
0004 /**********************************************************************************
0005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
0006  * Package: TMVA                                                                  *
0007  * Class  : TDenseLayer                                                           *
0008  *                                             *
0009  *                                                                                *
0010  * Description:                                                                   *
0011  *      Dense Layer Class                                                         *
0012  *                                                                                *
0013  * Authors (alphabetical):                                                        *
0014  *      Vladimir Ilievski      <ilievski.vladimir@live.com>  - CERN, Switzerland  *
0015  *                                                                                *
0016  * Copyright (c) 2005-2015:                                                       *
0017  *      CERN, Switzerland                                                         *
0018  *      U. of Victoria, Canada                                                    *
0019  *      MPI-K Heidelberg, Germany                                                 *
0020  *      U. of Bonn, Germany                                                       *
0021  *                                                                                *
0022  * Redistribution and use in source and binary forms, with or without             *
0023  * modification, are permitted according to the terms listed in LICENSE           *
0024  * (see tmva/doc/LICENSE)                                          *
0025  **********************************************************************************/
0026 
0027 #ifndef TMVA_DNN_DENSELAYER
0028 #define TMVA_DNN_DENSELAYER
0029 
0030 #include "TMatrix.h"
0031 
0032 #include "TMVA/DNN/GeneralLayer.h"
0033 #include "TMVA/DNN/Functions.h"
0034 #include "TMVA/DNN/CNN/ContextHandles.h"
0035 
0036 #include <iostream>
0037 #include <iomanip>
0038 #include <vector>
0039 #include <string>
0040 
0041 namespace TMVA {
0042 namespace DNN {
0043 /** \class TDenseLayer
0044 
0045 Generic layer class.
0046 
0047 This generic layer class represents a dense layer of a neural network with
0048 a given width n and activation function f. The activation function of each
0049 layer is given by \f$\mathbf{u} = \mathbf{W}\mathbf{x} + \boldsymbol{\theta}\f$.
0050 
0051 In addition to the weight and bias matrices, each layer allocates memory
0052 for its activations and the corresponding input tensor before evaluation  of
0053 the activation function as well as the gradients of the weights and biases.
0054 
0055 The layer provides member functions for the forward propagation of
0056 activations through the given layer.
0057 */
0058 template <typename Architecture_t>
0059 class TDenseLayer : public VGeneralLayer<Architecture_t> {
0060 public:
0061 
0062    using Scalar_t = typename Architecture_t::Scalar_t;
0063    using Matrix_t = typename Architecture_t::Matrix_t;
0064    using Tensor_t = typename Architecture_t::Tensor_t;
0065 
0066 private:
0067 
0068    Tensor_t fInputActivation; ///< output of GEMM and input to activation function
0069    Tensor_t fDerivatives;     ///< activation function gradient
0070 
0071    Scalar_t fDropoutProbability; ///< Probability that an input is active.
0072 
0073    EActivationFunction fF; ///< Activation function of the layer.
0074    ERegularization fReg;   ///< The regularization method.
0075    Scalar_t fWeightDecay;  ///< The weight decay.
0076 
0077    typename Architecture_t::ActivationDescriptor_t fActivationDesc; // the descriptor for the activation function
0078 
0079 public:
0080    /*! Constructor */
0081    TDenseLayer(size_t BatchSize, size_t InputWidth, size_t Width, EInitialization init, Scalar_t DropoutProbability,
0082                EActivationFunction f, ERegularization reg, Scalar_t weightDecay);
0083 
0084    /*! Copy the dense layer provided as a pointer */
0085    TDenseLayer(TDenseLayer<Architecture_t> *layer);
0086 
0087    /*! Copy Constructor */
0088    TDenseLayer(const TDenseLayer &);
0089 
0090    /*! Destructor */
0091    ~TDenseLayer();
0092 
0093    /*! Compute activation of the layer for the given input. The input
0094     * must be in 3D tensor form with the different matrices corresponding to
0095     * different events in the batch. Computes activations as well as
0096     * the first partial derivative of the activation function at those
0097     * activations. */
0098    void Forward(Tensor_t &input, bool applyDropout = false);
0099 
0100    /*! Compute weight, bias and activation gradients. Uses the precomputed
0101     *  first partial derivatives of the activation function computed during
0102     *  forward propagation and modifies them. Must only be called directly
0103     *  a the corresponding call to Forward(...). */
0104    void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward );
0105    ///              std::vector<Matrix_t> &inp1, std::vector<Matrix_t> &inp2);
0106 
0107    /*! Printing the layer info. */
0108    void Print() const;
0109 
0110    /*! Writes the information and the weights about the layer in an XML node. */
0111    virtual void AddWeightsXMLTo(void *parent);
0112 
0113    /*! Read the information and the weights about the layer from XML node. */
0114    virtual void ReadWeightsFromXML(void *parent);
0115 
0116    /*! Set dropout probabilities */
0117    virtual void SetDropoutProbability(Scalar_t dropoutProbability) { fDropoutProbability = dropoutProbability; }
0118 
0119    /*! Getters */
0120    Scalar_t GetDropoutProbability() const { return fDropoutProbability; }
0121 
0122    /* return output of Gemm before computing the activation function */
0123    const Tensor_t &GetInputActivation() const { return fInputActivation; }
0124    Tensor_t &GetInputActivation() { return fInputActivation; }
0125 
0126    EActivationFunction GetActivationFunction() const { return fF; }
0127    ERegularization GetRegularization() const { return fReg; }
0128    Scalar_t GetWeightDecay() const { return fWeightDecay; }
0129 };
0130 
0131 //
0132 //
0133 //  The Dense Layer Class - Implementation
0134 //______________________________________________________________________________
0135 template <typename Architecture_t>
0136 TDenseLayer<Architecture_t>::TDenseLayer(size_t batchSize, size_t inputWidth, size_t width, EInitialization init,
0137                                          Scalar_t dropoutProbability, EActivationFunction f, ERegularization reg,
0138                                          Scalar_t weightDecay)
0139    :  VGeneralLayer<Architecture_t>(batchSize, 1, 1, inputWidth, 1, 1, width, 1, width, inputWidth, 1, width, 1, 1,
0140                                    batchSize, width, init),
0141       fInputActivation(), fDropoutProbability(dropoutProbability), fF(f), fReg(reg), fWeightDecay(weightDecay)
0142 {
0143    // should be  {1, batchSize, width} but take from output
0144    fInputActivation = Tensor_t ( this->GetOutput().GetShape() );
0145    fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
0146 
0147    Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
0148 }
0149 
0150 //______________________________________________________________________________
0151 template <typename Architecture_t>
0152 TDenseLayer<Architecture_t>::TDenseLayer(TDenseLayer<Architecture_t> *layer) :
0153    VGeneralLayer<Architecture_t>(layer),
0154    fInputActivation( layer->GetInputActivation().GetShape() ),
0155    fDropoutProbability(layer->GetDropoutProbability()),
0156    fF(layer->GetActivationFunction()), fReg(layer->GetRegularization()), fWeightDecay(layer->GetWeightDecay())
0157 {
0158    fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
0159    Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
0160 }
0161 
0162 //______________________________________________________________________________
0163 template <typename Architecture_t>
0164 TDenseLayer<Architecture_t>::TDenseLayer(const TDenseLayer &layer) :
0165    VGeneralLayer<Architecture_t>(layer),
0166    fInputActivation( layer->GetInputActivation()),
0167    fDropoutProbability(layer.fDropoutProbability),
0168    fF(layer.fF), fReg(layer.fReg), fWeightDecay(layer.fWeightDecay)
0169 {
0170    fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
0171    Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
0172 }
0173 
0174 //______________________________________________________________________________
0175 template <typename Architecture_t>
0176 TDenseLayer<Architecture_t>::~TDenseLayer()
0177 {
0178    // release activation descriptor
0179    Architecture_t::ReleaseDescriptor(fActivationDesc);
0180 }
0181 
0182 
0183 
0184 
0185 //______________________________________________________________________________
0186 template <typename Architecture_t>
0187 auto TDenseLayer<Architecture_t>::Forward( Tensor_t &input, bool applyDropout) -> void
0188 {
0189    if (applyDropout && (this->GetDropoutProbability() != 1.0)) {
0190       //
0191       Architecture_t::DropoutForward(input, static_cast<TDescriptors *> (nullptr),
0192                                      static_cast<TWorkspace *> (nullptr),
0193                                      this->GetDropoutProbability());
0194    }
0195    Architecture_t::MultiplyTranspose(this->GetOutput() , input, this->GetWeightsAt(0));
0196    Architecture_t::AddRowWise(this->GetOutput(), this->GetBiasesAt(0));
0197 
0198    //evaluate<Architecture_t>(this->GetOutput(), this->GetActivationFunction());
0199    Architecture_t::Copy(this->GetInputActivation(),this->GetOutput());
0200 
0201    Architecture_t::ActivationFunctionForward(this->GetOutput(), this->GetActivationFunction(), fActivationDesc);
0202 }
0203 
0204 //______________________________________________________________________________
0205 template <typename Architecture_t>
0206 auto TDenseLayer<Architecture_t>::Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward) -> void
0207 ///                                           std::vector<Matrix_t> & /*inp1*/, std::vector<Matrix_t> &
0208 ////                                           /*inp2*/) -> void
0209 {
0210 
0211    if (this->GetDropoutProbability() != 1.0) {
0212       Architecture_t::DropoutBackward(this->GetActivationGradients(),
0213       static_cast<TDescriptors *> (nullptr),
0214       static_cast<TWorkspace *> (nullptr));
0215    }
0216 
0217    Architecture_t::ActivationFunctionBackward(fDerivatives, this->GetOutput(),
0218                                               this->GetActivationGradients(), this->GetInputActivation(),
0219                                               this->GetActivationFunction(), fActivationDesc);
0220 
0221    Architecture_t::Backward(gradients_backward, this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0),
0222                             fDerivatives, this->GetActivationGradients(), this->GetWeightsAt(0),
0223                             activations_backward);
0224 
0225    addRegularizationGradients<Architecture_t>(this->GetWeightGradientsAt(0), this->GetWeightsAt(0),
0226                                               this->GetWeightDecay(), this->GetRegularization());
0227 }
0228 
0229 //______________________________________________________________________________
0230 template <typename Architecture_t>
0231 void TDenseLayer<Architecture_t>::Print() const
0232 {
0233    std::cout << " DENSE Layer: \t";
0234    std::cout << " ( Input =" << std::setw(6) << this->GetWeightsAt(0).GetNcols();  // input size
0235    std::cout << " , Width =" << std::setw(6) << this->GetWeightsAt(0).GetNrows() << " ) ";  // layer width
0236 
0237    std::cout << "\tOutput = ( " << std::setw(2) << this->GetOutput().GetFirstSize() << " ," << std::setw(6) << this->GetOutput().GetShape()[0] << " ," << std::setw(6) << this->GetOutput().GetShape()[1] << " ) ";
0238 
0239    std::vector<std::string> activationNames = { "Identity","Relu","Sigmoid","Tanh","SymmRelu","SoftSign","Gauss" };
0240    std::cout << "\t Activation Function = ";
0241    std::cout << activationNames[ static_cast<int>(fF) ];
0242    if (fDropoutProbability != 1.) std::cout << "\t Dropout prob. = " << fDropoutProbability;
0243    std::cout << std::endl;
0244 }
0245 
0246 //______________________________________________________________________________
0247 
0248 template <typename Architecture_t>
0249 void TDenseLayer<Architecture_t>::AddWeightsXMLTo(void *parent)
0250 {
0251   // write layer width activation function + weight and bias matrices
0252 
0253    auto layerxml = gTools().xmlengine().NewChild(parent, nullptr, "DenseLayer");
0254 
0255    gTools().xmlengine().NewAttr(layerxml, nullptr, "Width", gTools().StringFromInt(this->GetWidth()));
0256 
0257    int activationFunction = static_cast<int>(this -> GetActivationFunction());
0258    gTools().xmlengine().NewAttr(layerxml, nullptr, "ActivationFunction",
0259                                 TString::Itoa(activationFunction, 10));
0260    // write weights and bias matrix
0261    this->WriteMatrixToXML(layerxml, "Weights", this -> GetWeightsAt(0));
0262    this->WriteMatrixToXML(layerxml, "Biases",  this -> GetBiasesAt(0));
0263 }
0264 
0265 //______________________________________________________________________________
0266 template <typename Architecture_t>
0267 void TDenseLayer<Architecture_t>::ReadWeightsFromXML(void *parent)
0268 {
0269    // Read layer weights and biases from XML
0270    this->ReadMatrixXML(parent,"Weights", this -> GetWeightsAt(0));
0271    this->ReadMatrixXML(parent,"Biases", this -> GetBiasesAt(0));
0272 
0273 }
0274 
0275 
0276 } // namespace DNN
0277 } // namespace TMVA
0278 
0279 #endif