File indexing completed on 2025-01-18 10:10:56
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 #ifndef TMVA_DNN_LAYER
0018 #define TMVA_DNN_LAYER
0019
0020 #include <iostream>
0021
0022 #include "TMatrix.h"
0023 #include "Functions.h"
0024
0025 namespace TMVA
0026 {
0027 namespace DNN
0028 {
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051 template<typename Architecture_t>
0052 class TLayer
0053 {
0054
0055 public:
0056 using Scalar_t = typename Architecture_t::Scalar_t;
0057 using Matrix_t = typename Architecture_t::Matrix_t;
0058 using Tensor_t = typename Architecture_t::Tensor_t;
0059
0060
0061 private:
0062
0063 size_t fBatchSize;
0064 size_t fInputWidth;
0065 size_t fWidth;
0066
0067 Scalar_t fDropoutProbability;
0068
0069 Matrix_t fWeights;
0070 Matrix_t fBiases;
0071 Matrix_t fOutput;
0072 Matrix_t fDerivatives;
0073 Matrix_t fWeightGradients;
0074 Matrix_t fBiasGradients;
0075 Matrix_t fActivationGradients;
0076
0077 EActivationFunction fF;
0078
0079 public:
0080
0081 TLayer(size_t BatchSize,
0082 size_t InputWidth,
0083 size_t Width,
0084 EActivationFunction f,
0085 Scalar_t dropoutProbability);
0086 TLayer(const TLayer &);
0087
0088
0089
0090 void Initialize(EInitialization m);
0091
0092
0093
0094
0095
0096 void inline Forward(Matrix_t & input, bool applyDropout = false);
0097
0098
0099
0100
0101 void inline Backward(Matrix_t & gradients_backward,
0102 const Matrix_t & activations_backward,
0103 ERegularization r,
0104 Scalar_t weightDecay);
0105
0106 void Print() const;
0107
0108 size_t GetBatchSize() const {return fBatchSize;}
0109 size_t GetInputWidth() const {return fInputWidth;}
0110 size_t GetWidth() const {return fWidth;}
0111 size_t GetDropoutProbability() const {return fDropoutProbability;}
0112
0113 void SetDropoutProbability(Scalar_t p) {fDropoutProbability = p;}
0114
0115 EActivationFunction GetActivationFunction() const {return fF;}
0116
0117 Matrix_t & GetOutput() {return fOutput;}
0118 const Matrix_t & GetOutput() const {return fOutput;}
0119 Matrix_t & GetWeights() {return fWeights;}
0120 const Matrix_t & GetWeights() const {return fWeights;}
0121 Matrix_t & GetBiases() {return fBiases;}
0122 const Matrix_t & GetBiases() const {return fBiases;}
0123 Matrix_t & GetActivationGradients() {return fActivationGradients;}
0124 const Matrix_t & GetActivationGradients() const {return fActivationGradients;}
0125 Matrix_t & GetBiasGradients() {return fBiasGradients;}
0126 const Matrix_t & GetBiasGradients() const {return fBiasGradients;}
0127 Matrix_t & GetWeightGradients() {return fWeightGradients;}
0128 const Matrix_t & GetWeightGradients() const {return fWeightGradients;}
0129
0130 };
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146 template<typename Architecture_t>
0147 class TSharedLayer
0148 {
0149
0150 public:
0151
0152 using Scalar_t = typename Architecture_t::Scalar_t;
0153 using Matrix_t = typename Architecture_t::Matrix_t;
0154 using Tensor_t = typename Architecture_t::Tensor_t;
0155
0156
0157 private:
0158
0159 size_t fBatchSize;
0160 size_t fInputWidth;
0161 size_t fWidth;
0162
0163 Scalar_t fDropoutProbability;
0164
0165 Matrix_t & fWeights;
0166 Matrix_t & fBiases;
0167 Matrix_t fOutput;
0168 Matrix_t fDerivatives;
0169 Matrix_t fWeightGradients;
0170 Matrix_t fBiasGradients;
0171 Matrix_t fActivationGradients;
0172
0173 EActivationFunction fF;
0174
0175 public:
0176
0177 TSharedLayer(size_t fBatchSize,
0178 TLayer<Architecture_t> & layer);
0179 TSharedLayer(const TSharedLayer & layer);
0180
0181
0182
0183
0184
0185
0186 void inline Forward(Matrix_t & input, bool applyDropout = false);
0187
0188
0189
0190
0191 void inline Backward(Matrix_t & gradients_backward,
0192 const Matrix_t & activations_backward,
0193 ERegularization r,
0194 Scalar_t weightDecay);
0195
0196 void Print() const;
0197
0198 size_t GetBatchSize() const {return fBatchSize;}
0199 size_t GetInputWidth() const {return fInputWidth;}
0200 size_t GetWidth() const {return fWidth;}
0201 size_t GetDropoutProbability() const {return fDropoutProbability;}
0202
0203 void SetDropoutProbability(Scalar_t p) {fDropoutProbability = p;}
0204
0205 EActivationFunction GetActivationFunction() const {return fF;}
0206
0207 Matrix_t & GetOutput() {return fOutput;}
0208 const Matrix_t & GetOutput() const {return fOutput;}
0209 Matrix_t & GetWeights() const {return fWeights;}
0210 Matrix_t & GetBiases() {return fBiases;}
0211 const Matrix_t & GetBiases() const {return fBiases;}
0212 Matrix_t & GetActivationGradients() {return fActivationGradients;}
0213 const Matrix_t & GetActivationGradients() const {return fActivationGradients;}
0214 Matrix_t & GetBiasGradients() {return fBiasGradients;}
0215 const Matrix_t & GetBiasGradients() const {return fBiasGradients;}
0216 Matrix_t & GetWeightGradients() {return fWeightGradients;}
0217 const Matrix_t & GetWeightGradients() const {return fWeightGradients;}
0218
0219 };
0220
0221
0222
0223
0224
0225
0226 template<typename Architecture_t>
0227 TLayer<Architecture_t>::TLayer(size_t batchSize,
0228 size_t inputWidth,
0229 size_t width,
0230 EActivationFunction f,
0231 Scalar_t dropoutProbability)
0232 : fBatchSize(batchSize), fInputWidth(inputWidth), fWidth(width),
0233 fDropoutProbability(dropoutProbability), fWeights(width, fInputWidth),
0234 fBiases(width, 1), fOutput(fBatchSize, width), fDerivatives(fBatchSize, width),
0235 fWeightGradients(width, fInputWidth), fBiasGradients(width, 1),
0236 fActivationGradients(fBatchSize, width), fF(f)
0237 {
0238
0239 }
0240
0241
0242 template<typename Architecture_t>
0243 TLayer<Architecture_t>::TLayer(const TLayer &layer)
0244 : fBatchSize(layer.fBatchSize), fInputWidth(layer.fInputWidth),
0245 fWidth(layer.fWidth), fDropoutProbability(layer.fDropoutProbability),
0246 fWeights(layer.fWidth, layer.fInputWidth), fBiases(layer.fWidth, 1),
0247 fOutput(layer.fBatchSize, layer.fWidth),
0248 fDerivatives(layer.fBatchSize, layer.fWidth),
0249 fWeightGradients(layer.fWidth, layer.fInputWidth),
0250 fBiasGradients(layer.fWidth, 1),
0251 fActivationGradients(layer.fBatchSize, layer.fWidth),
0252 fF(layer.fF)
0253 {
0254 Architecture_t::Copy(fWeights, layer.GetWeights());
0255 Architecture_t::Copy(fBiases, layer.GetBiases());
0256 }
0257
0258
0259 template<typename Architecture_t>
0260 auto TLayer<Architecture_t>::Initialize(EInitialization m)
0261 -> void
0262 {
0263 initialize<Architecture_t>(fWeights, m);
0264 initialize<Architecture_t>(fBiases, EInitialization::kZero);
0265 }
0266
0267
0268 template<typename Architecture_t>
0269 auto inline TLayer<Architecture_t>::Forward(Matrix_t & input,
0270 bool applyDropout)
0271 -> void
0272 {
0273 if (applyDropout && (fDropoutProbability != 1.0)) {
0274 Architecture_t::DropoutForward(input, fDropoutProbability);
0275 }
0276 Architecture_t::MultiplyTranspose(fOutput, input, fWeights);
0277 Architecture_t::AddRowWise(fOutput, fBiases);
0278 Tensor_t tOutput(fOutput);
0279 Tensor_t tDerivatives(fDerivatives);
0280 evaluateDerivative<Architecture_t>(tDerivatives, fF, tOutput);
0281
0282 evaluate<Architecture_t>(tOutput, fF);
0283 }
0284
0285
0286 template<typename Architecture_t>
0287 auto TLayer<Architecture_t>::Backward(Matrix_t & gradients_backward,
0288 const Matrix_t & activations_backward,
0289 ERegularization r,
0290 Scalar_t weightDecay)
0291 -> void
0292 {
0293
0294 Tensor_t tGradBw(gradients_backward);
0295 Tensor_t tActBw(activations_backward);
0296 Tensor_t tActGrad(fActivationGradients);
0297 Tensor_t tDeriv(fDerivatives);
0298
0299 Architecture_t::Hadamard( tDeriv, tActGrad);
0300 Architecture_t::Backward( tGradBw,
0301 fWeightGradients,
0302 fBiasGradients,
0303 tDeriv,
0304 tActGrad,
0305 fWeights,
0306 tActBw);
0307 addRegularizationGradients<Architecture_t>(fWeightGradients,
0308 fWeights,
0309 weightDecay, r);
0310 }
0311
0312
0313 template<typename Architecture_t>
0314 void TLayer<Architecture_t>::Print() const
0315 {
0316 std::cout << "Width = " << fWeights.GetNrows();
0317 std::cout << ", Activation Function = ";
0318 std::cout << static_cast<int>(fF) << std::endl;
0319 }
0320
0321
0322
0323
0324
0325
0326
0327 template<typename Architecture_t>
0328 TSharedLayer<Architecture_t>::TSharedLayer(size_t BatchSize,
0329 TLayer<Architecture_t> &layer)
0330 : fBatchSize(BatchSize),
0331 fInputWidth(layer.GetInputWidth()), fWidth(layer.GetWidth()),
0332 fDropoutProbability(layer.GetDropoutProbability()),
0333 fWeights(layer.GetWeights()), fBiases(layer.GetBiases()),
0334 fOutput(fBatchSize, fWidth), fDerivatives(fBatchSize, fWidth),
0335 fWeightGradients(fWidth, fInputWidth), fBiasGradients(fWidth, 1),
0336 fActivationGradients(fBatchSize, fWidth), fF(layer.GetActivationFunction())
0337 {
0338
0339 }
0340
0341
0342 template<typename Architecture_t>
0343 TSharedLayer<Architecture_t>::TSharedLayer(const TSharedLayer &layer)
0344 : fBatchSize(layer.fBatchSize),
0345 fInputWidth(layer.GetInputWidth()), fWidth(layer.GetWidth()),
0346 fDropoutProbability(layer.fDropoutProbability), fWeights(layer.fWeights),
0347 fBiases(layer.fBiases), fOutput(layer.fBatchSize, fWidth),
0348 fDerivatives(layer.fBatchSize, fWidth), fWeightGradients(fWidth, fInputWidth),
0349 fBiasGradients(fWidth, 1), fActivationGradients(layer.fBatchSize, fWidth),
0350 fF(layer.fF)
0351 {
0352 }
0353
0354
0355 template<typename Architecture_t>
0356 auto inline TSharedLayer<Architecture_t>::Forward(Matrix_t & input,
0357 bool applyDropout)
0358 -> void
0359 {
0360 if (applyDropout && (fDropoutProbability != 1.0)) {
0361 Architecture_t::DropoutForward(input, fDropoutProbability);
0362 }
0363 Architecture_t::MultiplyTranspose(fOutput, input, fWeights);
0364 Architecture_t::AddRowWise(fOutput, fBiases);
0365 Tensor_t tOutput(fOutput);
0366 Tensor_t tDerivatives(fDerivatives);
0367 evaluateDerivative<Architecture_t>(tDerivatives, fF, tOutput);
0368 evaluate<Architecture_t>(tOutput, fF);
0369 }
0370
0371
0372 template<typename Architecture_t>
0373 auto inline TSharedLayer<Architecture_t>::Backward(Matrix_t & gradients_backward,
0374 const Matrix_t & activations_backward,
0375 ERegularization r,
0376 Scalar_t weightDecay)
0377 -> void
0378 {
0379 Architecture_t::Backward(gradients_backward,
0380 fWeightGradients,
0381 fBiasGradients,
0382 fDerivatives,
0383 fActivationGradients,
0384 fWeights,
0385 activations_backward);
0386 addRegularizationGradients<Architecture_t>(fWeightGradients,
0387 fWeights,
0388 weightDecay, r);
0389 }
0390
0391
0392 template<typename Architecture_t>
0393 void TSharedLayer<Architecture_t>::Print() const
0394 {
0395 std::cout << "Width = " << fWeights.GetNrows();
0396 std::cout << ", Activation Function = ";
0397 std::cout << static_cast<int>(fF) << std::endl;
0398 }
0399
0400 }
0401 }
0402
0403 #endif