TMVA/DNN/Functions.h

0001 // @(#)root/tmva/tmva/dnn:$Id$
0002 // Author: Simon Pfreundschuh 20/06/16
0003
0004 /*************************************************************************
0005  * Copyright (C) 2016, Simon Pfreundschuh                                *
0006  * All rights reserved.                                                  *
0007  *                                                                       *
0008  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0009  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0010  *************************************************************************/
0011
0012 /////////////////////////////////////////////////////////////////////
0013 // Contains function enums for activation and output functions, as //
0014 // well as generic evaluation functions, that delegate the call to //
0015 // the corresponding evaluation kernel.                            //
0016 /////////////////////////////////////////////////////////////////////
0017
0018 #ifndef TMVA_DNN_FUNCTIONS
0019 #define TMVA_DNN_FUNCTIONS
0020
0021 namespace TMVA
0022 {
0023 namespace DNN
0024 {
0025 //______________________________________________________________________________
0026 //
0027 //  Enum Definitions
0028 //______________________________________________________________________________
0029
0030 /*! Enum that represents layer activation functions. */
0031 enum class EActivationFunction
0032 {
0033    kIdentity = 0,
0034    kRelu     = 1,
0035    kSigmoid  = 2,
0036    kTanh     = 3,
0037    kSymmRelu = 4,
0038    kSoftSign = 5,
0039    kGauss    = 6,
0040    kFastTanh = 7
0041
0042 };
0043
0044 /*! Enum that represents output functions */
0045 enum class EOutputFunction
0046 {
0047    kIdentity = 'I',
0048    kSigmoid  = 'S',
0049    kSoftmax  = 'M'
0050 };
0051
0052 /*! Enum that represents objective functions for the net, i.e. functions
0053 *  that take the output from the last layer in the net together with the
0054 *  truths and return the objective function values that is to be minimized
0055 *  in the training process. */
0056 enum class ELossFunction
0057 {
0058     kCrossEntropy        = 'C',
0059     kMeanSquaredError    = 'R',
0060     kSoftmaxCrossEntropy = 'S'
0061 };
0062
0063 /*! Enum representing the regularization type applied for a given layer */
0064 enum class ERegularization
0065 {
0066     kNone = '0',
0067     kL1   = '1',
0068     kL2   = '2'
0069     };
0070
0071 /* Enum representing the initialization method used for this layer. */
0072 enum class EInitialization {
0073     kGauss    = 'G',
0074     kUniform  = 'U',
0075     kIdentity = 'I',
0076     kZero = 'Z',
0077     kGlorotNormal = 'X',
0078     kGlorotUniform = 'F',
0079 };
0080
0081 /// Enum representing the optimizer used for training.
0082 enum class EOptimizer {
0083    kSGD = 0,
0084    kAdam = 1,
0085    kAdagrad = 2,
0086    kRMSProp = 3,
0087    kAdadelta = 4,
0088 };
0089
0090 //______________________________________________________________________________
0091 //
0092 //  Activation Functions
0093 //______________________________________________________________________________
0094
0095 /*! Apply the given activation function to each value in the given
0096 *  tensor A. */
0097 template<typename Architecture_t>
0098 inline void evaluate(typename Architecture_t::Tensor_t &A,
0099                     EActivationFunction f)
0100 {
0101     switch(f)
0102     {
0103     case EActivationFunction::kIdentity : break;
0104     case EActivationFunction::kRelu :     Architecture_t::Relu(A);
0105         break;
0106     case EActivationFunction::kSigmoid  :  Architecture_t::Sigmoid(A);
0107         break;
0108     case EActivationFunction::kTanh     :  Architecture_t::Tanh(A);
0109         break;
0110     case EActivationFunction::kSymmRelu :  Architecture_t::SymmetricRelu(A);
0111         break;
0112     case EActivationFunction::kSoftSign :  Architecture_t::SoftSign(A);
0113         break;
0114     case EActivationFunction::kGauss    :  Architecture_t::Gauss(A);
0115         break;
0116     case EActivationFunction::kFastTanh :  Architecture_t::FastTanh(A);
0117         break;
0118     }
0119 }
0120
0121 /*! Compute the first partial derivative of the activation function for
0122 *  the values given in tensor A and write the results into B. */
0123 //______________________________________________________________________________
0124 template<typename Architecture_t>
0125 inline void evaluateDerivative(typename Architecture_t::Tensor_t & B,
0126                                 EActivationFunction f,
0127                                 const typename Architecture_t::Tensor_t & A)
0128 {
0129     switch(f)
0130     {
0131     case EActivationFunction::kIdentity : Architecture_t::IdentityDerivative(B, A);
0132         break;
0133     case EActivationFunction::kRelu     : Architecture_t::ReluDerivative(B, A);
0134         break;
0135     case EActivationFunction::kSigmoid  : Architecture_t::SigmoidDerivative(B, A);
0136         break;
0137     case EActivationFunction::kTanh     : Architecture_t::TanhDerivative(B, A);
0138         break;
0139     case EActivationFunction::kSymmRelu : Architecture_t::SymmetricReluDerivative(B, A);
0140         break;
0141     case EActivationFunction::kSoftSign : Architecture_t::SoftSignDerivative(B, A);
0142         break;
0143     case EActivationFunction::kGauss    : Architecture_t::GaussDerivative(B, A);
0144         break;
0145     case EActivationFunction::kFastTanh : Architecture_t::FastTanhDerivative(B, A);
0146         break;
0147     }
0148 }
0149
0150 // matrix version of the function (for backward comp.)
0151 template<typename Architecture_t>
0152 inline void evaluateMatrix( typename Architecture_t::Matrix_t &A,
0153                         EActivationFunction f)
0154 {
0155     typename Architecture_t::Tensor_t t(A);
0156     evaluate<Architecture_t>(t,f);
0157 }
0158
0159 template<typename Architecture_t>
0160 inline void evaluateDerivativeMatrix( typename Architecture_t::Matrix_t &B,
0161                         EActivationFunction f,
0162                         const typename Architecture_t::Matrix_t & A)
0163 {
0164     typename Architecture_t::Tensor_t t(B);
0165     evaluateDerivative<Architecture_t>(t,f, typename Architecture_t::Tensor_t(A));
0166 }
0167 //______________________________________________________________________________
0168 //
0169 //  Output Functions
0170 //______________________________________________________________________________
0171
0172 /*! Apply the given output function to each value in the given
0173 *  tensor A. */
0174 template<typename Architecture_t>
0175 inline void evaluate(typename Architecture_t::Matrix_t &A,
0176                     EOutputFunction f,
0177                     const typename Architecture_t::Matrix_t &X)
0178 {
0179     switch(f)
0180     {
0181     case EOutputFunction::kIdentity : Architecture_t::Copy(A, X);
0182                                       break;
0183     case EOutputFunction::kSigmoid  : Architecture_t::Sigmoid(A, X);
0184                                       break;
0185     case EOutputFunction::kSoftmax  : Architecture_t::Softmax(A, X);
0186                                       break;
0187     }
0188 }
0189
0190 //______________________________________________________________________________
0191 //
0192 //  Loss Functions
0193 //______________________________________________________________________________
0194
0195 /*! Compute the value of the objective function f for given activations
0196 *  of the ouput layer and the truth Y. */
0197 template <typename Architecture_t>
0198 inline auto evaluate(ELossFunction f, const typename Architecture_t::Matrix_t &Y,
0199                      const typename Architecture_t::Matrix_t &output, const typename Architecture_t::Matrix_t &weights)
0200    -> decltype(Architecture_t::CrossEntropy(Y, output, weights))
0201 {
0202     switch(f)
0203     {
0204     case ELossFunction::kCrossEntropy: return Architecture_t::CrossEntropy(Y, output, weights);
0205     case ELossFunction::kMeanSquaredError: return Architecture_t::MeanSquaredError(Y, output, weights);
0206     case ELossFunction::kSoftmaxCrossEntropy: return Architecture_t::SoftmaxCrossEntropy(Y, output, weights);
0207     }
0208     return 0.0;
0209 }
0210
0211 /*! Compute the gradient of the given output function f for given activations
0212 *  output of the output layer and truth Y and write the results into dY. */
0213 //______________________________________________________________________________
0214 template <typename Architecture_t>
0215 inline void evaluateGradients(typename Architecture_t::Matrix_t &dY, ELossFunction f,
0216                               const typename Architecture_t::Matrix_t &Y,
0217                               const typename Architecture_t::Matrix_t &output,
0218                               const typename Architecture_t::Matrix_t &weights)
0219 {
0220     switch(f)
0221     {
0222     case ELossFunction::kCrossEntropy: Architecture_t::CrossEntropyGradients(dY, Y, output, weights); break;
0223     case ELossFunction::kMeanSquaredError: Architecture_t::MeanSquaredErrorGradients(dY, Y, output, weights); break;
0224     case ELossFunction::kSoftmaxCrossEntropy :
0225        Architecture_t::SoftmaxCrossEntropyGradients(dY, Y, output, weights);
0226        break;
0227     }
0228 }
0229
0230
0231 //______________________________________________________________________________
0232 //
0233 // Regularization
0234 //______________________________________________________________________________
0235
0236 /*! Evaluate the regularization functional for a given weight matrix. */
0237 template<typename Architecture_t>
0238 inline auto regularization(const typename Architecture_t::Matrix_t &A,
0239                     ERegularization R)
0240 -> decltype(Architecture_t::L1Regularization(A))
0241 {
0242     switch(R)
0243     {
0244     case ERegularization::kNone :
0245         return 0.0;
0246     case ERegularization::kL1 :
0247         return Architecture_t::L1Regularization(A);
0248     case ERegularization::kL2 :
0249         return Architecture_t::L2Regularization(A);
0250     }
0251     return 0.0;
0252 }
0253
0254 /*! Add the regularization gradient corresponding to weight matrix W, to
0255 *  the matrix A. */
0256 //______________________________________________________________________________
0257 template<typename Architecture_t>
0258 inline void addRegularizationGradients(typename Architecture_t::Matrix_t &A,
0259                                        const typename Architecture_t::Matrix_t &W,
0260                                        typename Architecture_t::Scalar_t weightDecay,
0261                                        ERegularization R)
0262 {
0263     switch(R)
0264     {
0265     case ERegularization::kNone :
0266         break;
0267     case ERegularization::kL1 :
0268         Architecture_t::AddL1RegularizationGradients(A, W, weightDecay);
0269         break;
0270     case ERegularization::kL2 :
0271         Architecture_t::AddL2RegularizationGradients(A, W, weightDecay);
0272         break;
0273     }
0274 }
0275
0276 //______________________________________________________________________________
0277 //
0278 // Initialization
0279 //______________________________________________________________________________
0280
0281 template<typename Architecture_t>
0282 inline void initialize(typename Architecture_t::Matrix_t & A,
0283                        EInitialization m)
0284 {
0285    switch(m) {
0286    case EInitialization::kGauss    : Architecture_t::InitializeGauss(A);
0287        break;
0288    case EInitialization::kUniform  : Architecture_t::InitializeUniform(A);
0289        break;
0290    case EInitialization::kIdentity : Architecture_t::InitializeIdentity(A);
0291        break;
0292    case EInitialization::kZero     : Architecture_t::InitializeZero(A);
0293        break;
0294    case EInitialization::kGlorotNormal    : Architecture_t::InitializeGlorotNormal(A);
0295        break;
0296    case EInitialization::kGlorotUniform  : Architecture_t::InitializeGlorotUniform(A);
0297        break;
0298    }
0299 }
0300
0301 } // namespace DNN
0302 } // namespace TMVA
0303
0304 #endif