File indexing completed on 2025-01-18 10:10:56
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027 #ifndef TMVA_DNN_DENSELAYER
0028 #define TMVA_DNN_DENSELAYER
0029
0030 #include "TMatrix.h"
0031
0032 #include "TMVA/DNN/GeneralLayer.h"
0033 #include "TMVA/DNN/Functions.h"
0034 #include "TMVA/DNN/CNN/ContextHandles.h"
0035
0036 #include <iostream>
0037 #include <iomanip>
0038 #include <vector>
0039 #include <string>
0040
0041 namespace TMVA {
0042 namespace DNN {
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058 template <typename Architecture_t>
0059 class TDenseLayer : public VGeneralLayer<Architecture_t> {
0060 public:
0061
0062 using Scalar_t = typename Architecture_t::Scalar_t;
0063 using Matrix_t = typename Architecture_t::Matrix_t;
0064 using Tensor_t = typename Architecture_t::Tensor_t;
0065
0066 private:
0067
0068 Tensor_t fInputActivation;
0069 Tensor_t fDerivatives;
0070
0071 Scalar_t fDropoutProbability;
0072
0073 EActivationFunction fF;
0074 ERegularization fReg;
0075 Scalar_t fWeightDecay;
0076
0077 typename Architecture_t::ActivationDescriptor_t fActivationDesc;
0078
0079 public:
0080
0081 TDenseLayer(size_t BatchSize, size_t InputWidth, size_t Width, EInitialization init, Scalar_t DropoutProbability,
0082 EActivationFunction f, ERegularization reg, Scalar_t weightDecay);
0083
0084
0085 TDenseLayer(TDenseLayer<Architecture_t> *layer);
0086
0087
0088 TDenseLayer(const TDenseLayer &);
0089
0090
0091 ~TDenseLayer();
0092
0093
0094
0095
0096
0097
0098 void Forward(Tensor_t &input, bool applyDropout = false);
0099
0100
0101
0102
0103
0104 void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward );
0105
0106
0107
0108 void Print() const;
0109
0110
0111 virtual void AddWeightsXMLTo(void *parent);
0112
0113
0114 virtual void ReadWeightsFromXML(void *parent);
0115
0116
0117 virtual void SetDropoutProbability(Scalar_t dropoutProbability) { fDropoutProbability = dropoutProbability; }
0118
0119
0120 Scalar_t GetDropoutProbability() const { return fDropoutProbability; }
0121
0122
0123 const Tensor_t &GetInputActivation() const { return fInputActivation; }
0124 Tensor_t &GetInputActivation() { return fInputActivation; }
0125
0126 EActivationFunction GetActivationFunction() const { return fF; }
0127 ERegularization GetRegularization() const { return fReg; }
0128 Scalar_t GetWeightDecay() const { return fWeightDecay; }
0129 };
0130
0131
0132
0133
0134
0135 template <typename Architecture_t>
0136 TDenseLayer<Architecture_t>::TDenseLayer(size_t batchSize, size_t inputWidth, size_t width, EInitialization init,
0137 Scalar_t dropoutProbability, EActivationFunction f, ERegularization reg,
0138 Scalar_t weightDecay)
0139 : VGeneralLayer<Architecture_t>(batchSize, 1, 1, inputWidth, 1, 1, width, 1, width, inputWidth, 1, width, 1, 1,
0140 batchSize, width, init),
0141 fInputActivation(), fDropoutProbability(dropoutProbability), fF(f), fReg(reg), fWeightDecay(weightDecay)
0142 {
0143
0144 fInputActivation = Tensor_t ( this->GetOutput().GetShape() );
0145 fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
0146
0147 Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
0148 }
0149
0150
0151 template <typename Architecture_t>
0152 TDenseLayer<Architecture_t>::TDenseLayer(TDenseLayer<Architecture_t> *layer) :
0153 VGeneralLayer<Architecture_t>(layer),
0154 fInputActivation( layer->GetInputActivation().GetShape() ),
0155 fDropoutProbability(layer->GetDropoutProbability()),
0156 fF(layer->GetActivationFunction()), fReg(layer->GetRegularization()), fWeightDecay(layer->GetWeightDecay())
0157 {
0158 fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
0159 Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
0160 }
0161
0162
0163 template <typename Architecture_t>
0164 TDenseLayer<Architecture_t>::TDenseLayer(const TDenseLayer &layer) :
0165 VGeneralLayer<Architecture_t>(layer),
0166 fInputActivation( layer->GetInputActivation()),
0167 fDropoutProbability(layer.fDropoutProbability),
0168 fF(layer.fF), fReg(layer.fReg), fWeightDecay(layer.fWeightDecay)
0169 {
0170 fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
0171 Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
0172 }
0173
0174
0175 template <typename Architecture_t>
0176 TDenseLayer<Architecture_t>::~TDenseLayer()
0177 {
0178
0179 Architecture_t::ReleaseDescriptor(fActivationDesc);
0180 }
0181
0182
0183
0184
0185
0186 template <typename Architecture_t>
0187 auto TDenseLayer<Architecture_t>::Forward( Tensor_t &input, bool applyDropout) -> void
0188 {
0189 if (applyDropout && (this->GetDropoutProbability() != 1.0)) {
0190
0191 Architecture_t::DropoutForward(input, static_cast<TDescriptors *> (nullptr),
0192 static_cast<TWorkspace *> (nullptr),
0193 this->GetDropoutProbability());
0194 }
0195 Architecture_t::MultiplyTranspose(this->GetOutput() , input, this->GetWeightsAt(0));
0196 Architecture_t::AddRowWise(this->GetOutput(), this->GetBiasesAt(0));
0197
0198
0199 Architecture_t::Copy(this->GetInputActivation(),this->GetOutput());
0200
0201 Architecture_t::ActivationFunctionForward(this->GetOutput(), this->GetActivationFunction(), fActivationDesc);
0202 }
0203
0204
0205 template <typename Architecture_t>
0206 auto TDenseLayer<Architecture_t>::Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward) -> void
0207
0208
0209 {
0210
0211 if (this->GetDropoutProbability() != 1.0) {
0212 Architecture_t::DropoutBackward(this->GetActivationGradients(),
0213 static_cast<TDescriptors *> (nullptr),
0214 static_cast<TWorkspace *> (nullptr));
0215 }
0216
0217 Architecture_t::ActivationFunctionBackward(fDerivatives, this->GetOutput(),
0218 this->GetActivationGradients(), this->GetInputActivation(),
0219 this->GetActivationFunction(), fActivationDesc);
0220
0221 Architecture_t::Backward(gradients_backward, this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0),
0222 fDerivatives, this->GetActivationGradients(), this->GetWeightsAt(0),
0223 activations_backward);
0224
0225 addRegularizationGradients<Architecture_t>(this->GetWeightGradientsAt(0), this->GetWeightsAt(0),
0226 this->GetWeightDecay(), this->GetRegularization());
0227 }
0228
0229
0230 template <typename Architecture_t>
0231 void TDenseLayer<Architecture_t>::Print() const
0232 {
0233 std::cout << " DENSE Layer: \t";
0234 std::cout << " ( Input =" << std::setw(6) << this->GetWeightsAt(0).GetNcols();
0235 std::cout << " , Width =" << std::setw(6) << this->GetWeightsAt(0).GetNrows() << " ) ";
0236
0237 std::cout << "\tOutput = ( " << std::setw(2) << this->GetOutput().GetFirstSize() << " ," << std::setw(6) << this->GetOutput().GetShape()[0] << " ," << std::setw(6) << this->GetOutput().GetShape()[1] << " ) ";
0238
0239 std::vector<std::string> activationNames = { "Identity","Relu","Sigmoid","Tanh","SymmRelu","SoftSign","Gauss" };
0240 std::cout << "\t Activation Function = ";
0241 std::cout << activationNames[ static_cast<int>(fF) ];
0242 if (fDropoutProbability != 1.) std::cout << "\t Dropout prob. = " << fDropoutProbability;
0243 std::cout << std::endl;
0244 }
0245
0246
0247
0248 template <typename Architecture_t>
0249 void TDenseLayer<Architecture_t>::AddWeightsXMLTo(void *parent)
0250 {
0251
0252
0253 auto layerxml = gTools().xmlengine().NewChild(parent, nullptr, "DenseLayer");
0254
0255 gTools().xmlengine().NewAttr(layerxml, nullptr, "Width", gTools().StringFromInt(this->GetWidth()));
0256
0257 int activationFunction = static_cast<int>(this -> GetActivationFunction());
0258 gTools().xmlengine().NewAttr(layerxml, nullptr, "ActivationFunction",
0259 TString::Itoa(activationFunction, 10));
0260
0261 this->WriteMatrixToXML(layerxml, "Weights", this -> GetWeightsAt(0));
0262 this->WriteMatrixToXML(layerxml, "Biases", this -> GetBiasesAt(0));
0263 }
0264
0265
0266 template <typename Architecture_t>
0267 void TDenseLayer<Architecture_t>::ReadWeightsFromXML(void *parent)
0268 {
0269
0270 this->ReadMatrixXML(parent,"Weights", this -> GetWeightsAt(0));
0271 this->ReadMatrixXML(parent,"Biases", this -> GetBiasesAt(0));
0272
0273 }
0274
0275
0276 }
0277 }
0278
0279 #endif