File indexing completed on 2025-01-18 10:10:55
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029 #ifndef TMVA_DNN_DEEPNET
0030 #define TMVA_DNN_DEEPNET
0031
0032 #include "TMVA/DNN/Functions.h"
0033 #include "TMVA/DNN/TensorDataLoader.h"
0034
0035 #include "TMVA/DNN/GeneralLayer.h"
0036 #include "TMVA/DNN/DenseLayer.h"
0037 #include "TMVA/DNN/ReshapeLayer.h"
0038 #include "TMVA/DNN/BatchNormLayer.h"
0039
0040 #include "TMVA/DNN/CNN/ConvLayer.h"
0041 #include "TMVA/DNN/CNN/MaxPoolLayer.h"
0042
0043 #include "TMVA/DNN/RNN/RNNLayer.h"
0044 #include "TMVA/DNN/RNN/LSTMLayer.h"
0045 #include "TMVA/DNN/RNN/GRULayer.h"
0046
0047 #ifdef HAVE_DAE
0048 #include "TMVA/DNN/DAE/CompressionLayer.h"
0049 #include "TMVA/DNN/DAE/CorruptionLayer.h"
0050 #include "TMVA/DNN/DAE/ReconstructionLayer.h"
0051 #include "TMVA/DNN/DAE/LogisticRegressionLayer.h"
0052 #endif
0053
0054 #include <vector>
0055 #include <cmath>
0056
0057
0058 namespace TMVA {
0059 namespace DNN {
0060
0061 using namespace CNN;
0062 using namespace RNN;
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072 template <typename Architecture_t, typename Layer_t = VGeneralLayer<Architecture_t>>
0073 class TDeepNet {
0074 public:
0075
0076 using Tensor_t = typename Architecture_t::Tensor_t;
0077 using Matrix_t = typename Architecture_t::Matrix_t;
0078 using Scalar_t = typename Architecture_t::Scalar_t;
0079
0080
0081 private:
0082 bool inline isInteger(Scalar_t x) const { return x == floor(x); }
0083 size_t calculateDimension(int imgDim, int fltDim, int padding, int stride);
0084
0085 private:
0086 std::vector<Layer_t *> fLayers;
0087
0088 size_t fBatchSize;
0089 size_t fInputDepth;
0090 size_t fInputHeight;
0091 size_t fInputWidth;
0092
0093 size_t fBatchDepth;
0094 size_t fBatchHeight;
0095 size_t fBatchWidth;
0096
0097 bool fIsTraining;
0098
0099 ELossFunction fJ;
0100 EInitialization fI;
0101 ERegularization fR;
0102 Scalar_t fWeightDecay;
0103
0104 public:
0105
0106 TDeepNet();
0107
0108
0109 TDeepNet(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t BatchDepth,
0110 size_t BatchHeight, size_t BatchWidth, ELossFunction fJ, EInitialization fI = EInitialization::kZero,
0111 ERegularization fR = ERegularization::kNone, Scalar_t fWeightDecay = 0.0, bool isTraining = false);
0112
0113
0114 TDeepNet(const TDeepNet &);
0115
0116
0117 ~TDeepNet();
0118
0119
0120
0121
0122
0123
0124 TConvLayer<Architecture_t> *AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows,
0125 size_t strideCols, size_t paddingHeight, size_t paddingWidth,
0126 EActivationFunction f, Scalar_t dropoutProbability = 1.0);
0127
0128
0129
0130 void AddConvLayer(TConvLayer<Architecture_t> *convLayer);
0131
0132
0133
0134
0135
0136
0137 TMaxPoolLayer<Architecture_t> *AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows,
0138 size_t strideCols, Scalar_t dropoutProbability = 1.0);
0139
0140
0141 void AddMaxPoolLayer(CNN::TMaxPoolLayer<Architecture_t> *maxPoolLayer);
0142
0143
0144
0145
0146 TBasicRNNLayer<Architecture_t> *AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps,
0147 bool rememberState = false,bool returnSequence = false,
0148 EActivationFunction f = EActivationFunction::kTanh);
0149
0150
0151
0152 void AddBasicRNNLayer(TBasicRNNLayer<Architecture_t> *basicRNNLayer);
0153
0154
0155
0156 TBasicLSTMLayer<Architecture_t> *AddBasicLSTMLayer(size_t stateSize, size_t inputSize, size_t timeSteps,
0157 bool rememberState = false, bool returnSequence = false);
0158
0159
0160
0161 void AddBasicLSTMLayer(TBasicLSTMLayer<Architecture_t> *basicLSTMLayer);
0162
0163
0164
0165 TBasicGRULayer<Architecture_t> *AddBasicGRULayer(size_t stateSize, size_t inputSize, size_t timeSteps,
0166 bool rememberState = false, bool returnSequence = false,
0167 bool resetGateAfter = false);
0168
0169
0170
0171 void AddBasicGRULayer(TBasicGRULayer<Architecture_t> *basicGRULayer);
0172
0173
0174
0175
0176
0177 TDenseLayer<Architecture_t> *AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability = 1.0);
0178
0179
0180
0181 void AddDenseLayer(TDenseLayer<Architecture_t> *denseLayer);
0182
0183
0184
0185
0186 TReshapeLayer<Architecture_t> *AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening);
0187
0188
0189 TBatchNormLayer<Architecture_t> *AddBatchNormLayer(Scalar_t momentum = -1, Scalar_t epsilon = 0.0001);
0190
0191
0192
0193 void AddReshapeLayer(TReshapeLayer<Architecture_t> *reshapeLayer);
0194
0195 #ifdef HAVE_DAE
0196
0197
0198
0199 TCorruptionLayer<Architecture_t> *AddCorruptionLayer(size_t visibleUnits, size_t hiddenUnits,
0200 Scalar_t dropoutProbability, Scalar_t corruptionLevel);
0201
0202
0203
0204 void AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer);
0205
0206
0207
0208
0209 TCompressionLayer<Architecture_t> *AddCompressionLayer(size_t visibleUnits, size_t hiddenUnits,
0210 Scalar_t dropoutProbability, EActivationFunction f,
0211 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases);
0212
0213
0214
0215 void AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer);
0216
0217
0218
0219
0220
0221 TReconstructionLayer<Architecture_t> *AddReconstructionLayer(size_t visibleUnits, size_t hiddenUnits,
0222 Scalar_t learningRate, EActivationFunction f,
0223 std::vector<Matrix_t> weights,
0224 std::vector<Matrix_t> biases, Scalar_t corruptionLevel,
0225 Scalar_t dropoutProbability);
0226
0227
0228
0229 void AddReconstructionLayer(TReconstructionLayer<Architecture_t> *reconstructionLayer);
0230
0231
0232
0233 TLogisticRegressionLayer<Architecture_t> *AddLogisticRegressionLayer(size_t inputUnits, size_t outputUnits,
0234 size_t testDataBatchSize,
0235 Scalar_t learningRate);
0236
0237
0238
0239 void AddLogisticRegressionLayer(TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer);
0240
0241
0242
0243 void PreTrain(std::vector<Matrix_t> &input, std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
0244 Scalar_t corruptionLevel, Scalar_t dropoutProbability, size_t epochs, EActivationFunction f,
0245 bool applyDropout = false);
0246
0247
0248
0249
0250 void FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput, std::vector<Matrix_t> &outputLabel,
0251 size_t outputUnits, size_t testDataBatchSize, Scalar_t learningRate, size_t epochs);
0252 #endif
0253
0254
0255 void Initialize();
0256
0257
0258 void Forward(Tensor_t &input, bool applyDropout = false);
0259
0260
0261 void ResetTraining();
0262
0263
0264
0265
0266 void Backward(const Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights);
0267
0268
0269 #ifdef USE_PARALLEL_DEEPNET
0270
0271
0272 void ParallelForward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
0273 std::vector<TTensorBatch<Architecture_t>> &batches, bool applyDropout = false);
0274
0275
0276
0277
0278 void ParallelBackward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
0279 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate);
0280
0281
0282
0283
0284 void ParallelBackwardMomentum(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
0285 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate,
0286 Scalar_t momentum);
0287
0288
0289
0290
0291 void ParallelBackwardNestorov(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
0292 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate,
0293 Scalar_t momentum);
0294
0295 #endif
0296
0297
0298
0299 void Update(Scalar_t learningRate);
0300
0301
0302
0303 Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization = true) const;
0304
0305
0306 Scalar_t Loss(Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights,
0307 bool inTraining = false, bool includeRegularization = true);
0308
0309
0310 Scalar_t RegularizationTerm() const;
0311
0312
0313 void Prediction(Matrix_t &predictions, EOutputFunction f) const;
0314
0315
0316 void Prediction(Matrix_t &predictions, Tensor_t & input, EOutputFunction f);
0317
0318
0319 void Print() const;
0320
0321
0322 inline Layer_t *GetLayerAt(size_t i) { return fLayers[i]; }
0323 inline const Layer_t *GetLayerAt(size_t i) const { return fLayers[i]; }
0324
0325
0326 inline size_t GetDepth() const { return fLayers.size(); }
0327 inline size_t GetOutputWidth() const { return fLayers.back()->GetWidth(); }
0328
0329
0330 inline std::vector<Layer_t *> &GetLayers() { return fLayers; }
0331 inline const std::vector<Layer_t *> &GetLayers() const { return fLayers; }
0332
0333
0334 inline void Clear() { fLayers.clear(); }
0335
0336
0337 inline size_t GetBatchSize() const { return fBatchSize; }
0338 inline size_t GetInputDepth() const { return fInputDepth; }
0339 inline size_t GetInputHeight() const { return fInputHeight; }
0340 inline size_t GetInputWidth() const { return fInputWidth; }
0341
0342 inline size_t GetBatchDepth() const { return fBatchDepth; }
0343 inline size_t GetBatchHeight() const { return fBatchHeight; }
0344 inline size_t GetBatchWidth() const { return fBatchWidth; }
0345
0346 inline bool IsTraining() const { return fIsTraining; }
0347
0348 inline ELossFunction GetLossFunction() const { return fJ; }
0349 inline EInitialization GetInitialization() const { return fI; }
0350 inline ERegularization GetRegularization() const { return fR; }
0351 inline Scalar_t GetWeightDecay() const { return fWeightDecay; }
0352
0353
0354
0355
0356
0357 inline void SetBatchSize(size_t batchSize) { fBatchSize = batchSize; }
0358 inline void SetInputDepth(size_t inputDepth) { fInputDepth = inputDepth; }
0359 inline void SetInputHeight(size_t inputHeight) { fInputHeight = inputHeight; }
0360 inline void SetInputWidth(size_t inputWidth) { fInputWidth = inputWidth; }
0361 inline void SetBatchDepth(size_t batchDepth) { fBatchDepth = batchDepth; }
0362 inline void SetBatchHeight(size_t batchHeight) { fBatchHeight = batchHeight; }
0363 inline void SetBatchWidth(size_t batchWidth) { fBatchWidth = batchWidth; }
0364 inline void SetLossFunction(ELossFunction J) { fJ = J; }
0365 inline void SetInitialization(EInitialization I) { fI = I; }
0366 inline void SetRegularization(ERegularization R) { fR = R; }
0367 inline void SetWeightDecay(Scalar_t weightDecay) { fWeightDecay = weightDecay; }
0368
0369 void SetDropoutProbabilities(const std::vector<Double_t> & probabilities);
0370
0371 };
0372
0373
0374
0375
0376
0377 template <typename Architecture_t, typename Layer_t>
0378 TDeepNet<Architecture_t, Layer_t>::TDeepNet()
0379 : fLayers(), fBatchSize(0), fInputDepth(0), fInputHeight(0), fInputWidth(0), fBatchDepth(0), fBatchHeight(0),
0380 fBatchWidth(0), fJ(ELossFunction::kMeanSquaredError), fI(EInitialization::kZero), fR(ERegularization::kNone),
0381 fIsTraining(true), fWeightDecay(0.0)
0382 {
0383
0384 }
0385
0386
0387 template <typename Architecture_t, typename Layer_t>
0388 TDeepNet<Architecture_t, Layer_t>::TDeepNet(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
0389 size_t batchDepth, size_t batchHeight, size_t batchWidth, ELossFunction J,
0390 EInitialization I, ERegularization R, Scalar_t weightDecay, bool isTraining)
0391 : fLayers(), fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth),
0392 fBatchDepth(batchDepth), fBatchHeight(batchHeight), fBatchWidth(batchWidth), fIsTraining(isTraining), fJ(J), fI(I),
0393 fR(R), fWeightDecay(weightDecay)
0394 {
0395
0396 }
0397
0398
0399 template <typename Architecture_t, typename Layer_t>
0400 TDeepNet<Architecture_t, Layer_t>::TDeepNet(const TDeepNet &deepNet)
0401 : fLayers(), fBatchSize(deepNet.fBatchSize), fInputDepth(deepNet.fInputDepth), fInputHeight(deepNet.fInputHeight),
0402 fInputWidth(deepNet.fInputWidth), fBatchDepth(deepNet.fBatchDepth), fBatchHeight(deepNet.fBatchHeight),
0403 fBatchWidth(deepNet.fBatchWidth), fIsTraining(deepNet.fIsTraining), fJ(deepNet.fJ), fI(deepNet.fI), fR(deepNet.fR),
0404 fWeightDecay(deepNet.fWeightDecay)
0405 {
0406
0407 }
0408
0409
0410 template <typename Architecture_t, typename Layer_t>
0411 TDeepNet<Architecture_t, Layer_t>::~TDeepNet()
0412 {
0413
0414 for (auto layer : fLayers)
0415 delete layer;
0416 fLayers.clear();
0417 }
0418
0419
0420 template <typename Architecture_t, typename Layer_t>
0421 auto TDeepNet<Architecture_t, Layer_t>::calculateDimension(int imgDim, int fltDim, int padding, int stride) -> size_t
0422 {
0423 Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1;
0424 if (!isInteger(dimension) || dimension <= 0) {
0425 this->Print();
0426 int iLayer = fLayers.size();
0427 Fatal("calculateDimension","Not compatible hyper parameters for layer %d - (imageDim, filterDim, padding, stride) %d , %d , %d , %d",
0428 iLayer, imgDim, fltDim, padding, stride);
0429
0430
0431
0432 }
0433
0434 return (size_t)dimension;
0435 }
0436
0437
0438 template <typename Architecture_t, typename Layer_t>
0439 TConvLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddConvLayer(size_t depth, size_t filterHeight,
0440 size_t filterWidth, size_t strideRows,
0441 size_t strideCols, size_t paddingHeight,
0442 size_t paddingWidth, EActivationFunction f,
0443 Scalar_t dropoutProbability)
0444 {
0445
0446 size_t batchSize = this->GetBatchSize();
0447 size_t inputDepth;
0448 size_t inputHeight;
0449 size_t inputWidth;
0450 EInitialization init = this->GetInitialization();
0451 ERegularization reg = this->GetRegularization();
0452 Scalar_t decay = this->GetWeightDecay();
0453
0454 if (fLayers.size() == 0) {
0455 inputDepth = this->GetInputDepth();
0456 inputHeight = this->GetInputHeight();
0457 inputWidth = this->GetInputWidth();
0458 } else {
0459 Layer_t *lastLayer = fLayers.back();
0460 inputDepth = lastLayer->GetDepth();
0461 inputHeight = lastLayer->GetHeight();
0462 inputWidth = lastLayer->GetWidth();
0463 }
0464
0465
0466
0467
0468 TConvLayer<Architecture_t> *convLayer = new TConvLayer<Architecture_t>(
0469 batchSize, inputDepth, inputHeight, inputWidth, depth, init, filterHeight, filterWidth, strideRows,
0470 strideCols, paddingHeight, paddingWidth, dropoutProbability, f, reg, decay);
0471
0472 fLayers.push_back(convLayer);
0473 return convLayer;
0474 }
0475
0476
0477 template <typename Architecture_t, typename Layer_t>
0478 void TDeepNet<Architecture_t, Layer_t>::AddConvLayer(TConvLayer<Architecture_t> *convLayer)
0479 {
0480 fLayers.push_back(convLayer);
0481 }
0482
0483
0484 template <typename Architecture_t, typename Layer_t>
0485 TMaxPoolLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddMaxPoolLayer(size_t frameHeight, size_t frameWidth,
0486 size_t strideRows, size_t strideCols,
0487 Scalar_t dropoutProbability)
0488 {
0489 size_t batchSize = this->GetBatchSize();
0490 size_t inputDepth;
0491 size_t inputHeight;
0492 size_t inputWidth;
0493
0494 if (fLayers.size() == 0) {
0495 inputDepth = this->GetInputDepth();
0496 inputHeight = this->GetInputHeight();
0497 inputWidth = this->GetInputWidth();
0498 } else {
0499 Layer_t *lastLayer = fLayers.back();
0500 inputDepth = lastLayer->GetDepth();
0501 inputHeight = lastLayer->GetHeight();
0502 inputWidth = lastLayer->GetWidth();
0503 }
0504
0505 TMaxPoolLayer<Architecture_t> *maxPoolLayer = new TMaxPoolLayer<Architecture_t>(
0506 batchSize, inputDepth, inputHeight, inputWidth, frameHeight, frameWidth,
0507 strideRows, strideCols, dropoutProbability);
0508
0509
0510 fLayers.push_back(maxPoolLayer);
0511
0512 return maxPoolLayer;
0513 }
0514
0515
0516 template <typename Architecture_t, typename Layer_t>
0517 void TDeepNet<Architecture_t, Layer_t>::AddMaxPoolLayer(TMaxPoolLayer<Architecture_t> *maxPoolLayer)
0518 {
0519 fLayers.push_back(maxPoolLayer);
0520 }
0521
0522
0523 template <typename Architecture_t, typename Layer_t>
0524 TBasicRNNLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddBasicRNNLayer(size_t stateSize, size_t inputSize,
0525 size_t timeSteps,
0526 bool rememberState, bool returnSequence,
0527 EActivationFunction f)
0528 {
0529
0530
0531
0532
0533 size_t inputHeight, inputWidth, inputDepth;
0534 if (fLayers.size() == 0) {
0535 inputHeight = this->GetInputHeight();
0536 inputWidth = this->GetInputWidth();
0537 inputDepth = this->GetInputDepth();
0538 } else {
0539 Layer_t *lastLayer = fLayers.back();
0540 inputHeight = lastLayer->GetHeight();
0541 inputWidth = lastLayer->GetWidth();
0542 inputDepth = lastLayer->GetDepth();
0543 }
0544 if (inputSize != inputWidth) {
0545 Error("AddBasicRNNLayer","Inconsistent input size with input layout - it should be %zu instead of %zu",inputSize, inputWidth);
0546 }
0547 if (timeSteps != inputHeight && timeSteps != inputDepth) {
0548 Error("AddBasicRNNLayer","Inconsistent time steps with input layout - it should be %zu instead of %zu or %zu",timeSteps, inputHeight,inputDepth);
0549 }
0550
0551 TBasicRNNLayer<Architecture_t> *basicRNNLayer =
0552 new TBasicRNNLayer<Architecture_t>(this->GetBatchSize(), stateSize, inputSize, timeSteps, rememberState, returnSequence,
0553 f, fIsTraining, this->GetInitialization());
0554 fLayers.push_back(basicRNNLayer);
0555 return basicRNNLayer;
0556 }
0557
0558
0559 template <typename Architecture_t, typename Layer_t>
0560 void TDeepNet<Architecture_t, Layer_t>::AddBasicRNNLayer(TBasicRNNLayer<Architecture_t> *basicRNNLayer)
0561 {
0562 fLayers.push_back(basicRNNLayer);
0563 }
0564
0565
0566 template <typename Architecture_t, typename Layer_t>
0567 TBasicLSTMLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddBasicLSTMLayer(size_t stateSize, size_t inputSize,
0568 size_t timeSteps, bool rememberState, bool returnSequence)
0569 {
0570
0571 size_t inputHeight, inputWidth, inputDepth;
0572 if (fLayers.size() == 0) {
0573 inputHeight = this->GetInputHeight();
0574 inputWidth = this->GetInputWidth();
0575 inputDepth = this->GetInputDepth();
0576 } else {
0577 Layer_t *lastLayer = fLayers.back();
0578 inputHeight = lastLayer->GetHeight();
0579 inputWidth = lastLayer->GetWidth();
0580 inputDepth = lastLayer->GetDepth();
0581 }
0582 if (inputSize != inputWidth) {
0583 Error("AddBasicLSTMLayer", "Inconsistent input size with input layout - it should be %zu instead of %zu", inputSize, inputWidth);
0584 }
0585 if (timeSteps != inputHeight && timeSteps != inputDepth) {
0586 Error("AddBasicLSTMLayer", "Inconsistent time steps with input layout - it should be %zu instead of %zu", timeSteps, inputHeight);
0587 }
0588
0589 TBasicLSTMLayer<Architecture_t> *basicLSTMLayer =
0590 new TBasicLSTMLayer<Architecture_t>(this->GetBatchSize(), stateSize, inputSize, timeSteps, rememberState, returnSequence,
0591 DNN::EActivationFunction::kSigmoid,
0592 DNN::EActivationFunction::kTanh,
0593 fIsTraining, this->GetInitialization());
0594 fLayers.push_back(basicLSTMLayer);
0595 return basicLSTMLayer;
0596 }
0597
0598
0599 template <typename Architecture_t, typename Layer_t>
0600 void TDeepNet<Architecture_t, Layer_t>::AddBasicLSTMLayer(TBasicLSTMLayer<Architecture_t> *basicLSTMLayer)
0601 {
0602 fLayers.push_back(basicLSTMLayer);
0603 }
0604
0605
0606
0607 template <typename Architecture_t, typename Layer_t>
0608 TBasicGRULayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddBasicGRULayer(size_t stateSize, size_t inputSize,
0609 size_t timeSteps, bool rememberState, bool returnSequence, bool resetGateAfter)
0610 {
0611
0612 size_t inputHeight, inputWidth, inputDepth;
0613 if (fLayers.size() == 0) {
0614 inputHeight = this->GetInputHeight();
0615 inputWidth = this->GetInputWidth();
0616 inputDepth = this->GetInputDepth();
0617 } else {
0618 Layer_t *lastLayer = fLayers.back();
0619 inputHeight = lastLayer->GetHeight();
0620 inputWidth = lastLayer->GetWidth();
0621 inputDepth = lastLayer->GetDepth();
0622 }
0623 if (inputSize != inputWidth) {
0624 Error("AddBasicGRULayer", "Inconsistent input size with input layout - it should be %zu instead of %zu", inputSize, inputWidth);
0625 }
0626 if (timeSteps != inputHeight && timeSteps != inputDepth) {
0627 Error("AddBasicGRULayer", "Inconsistent time steps with input layout - it should be %zu instead of %zu", timeSteps, inputHeight);
0628 }
0629
0630 TBasicGRULayer<Architecture_t> *basicGRULayer =
0631 new TBasicGRULayer<Architecture_t>(this->GetBatchSize(), stateSize, inputSize, timeSteps, rememberState, returnSequence, resetGateAfter,
0632 DNN::EActivationFunction::kSigmoid,
0633 DNN::EActivationFunction::kTanh,
0634 fIsTraining, this->GetInitialization());
0635 fLayers.push_back(basicGRULayer);
0636 return basicGRULayer;
0637 }
0638
0639
0640 template <typename Architecture_t, typename Layer_t>
0641 void TDeepNet<Architecture_t, Layer_t>::AddBasicGRULayer(TBasicGRULayer<Architecture_t> *basicGRULayer)
0642 {
0643 fLayers.push_back(basicGRULayer);
0644 }
0645
0646
0647
0648
0649 #ifdef HAVE_DAE
0650
0651
0652 template <typename Architecture_t, typename Layer_t>
0653 TCorruptionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCorruptionLayer(size_t visibleUnits,
0654 size_t hiddenUnits,
0655 Scalar_t dropoutProbability,
0656 Scalar_t corruptionLevel)
0657 {
0658 size_t batchSize = this->GetBatchSize();
0659
0660 TCorruptionLayer<Architecture_t> *corruptionLayer =
0661 new TCorruptionLayer<Architecture_t>(batchSize, visibleUnits, hiddenUnits, dropoutProbability, corruptionLevel);
0662 fLayers.push_back(corruptionLayer);
0663 return corruptionLayer;
0664 }
0665
0666
0667 template <typename Architecture_t, typename Layer_t>
0668 void TDeepNet<Architecture_t, Layer_t>::AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer)
0669 {
0670 fLayers.push_back(corruptionLayer);
0671 }
0672
0673
0674 template <typename Architecture_t, typename Layer_t>
0675 TCompressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(
0676 size_t visibleUnits, size_t hiddenUnits, Scalar_t dropoutProbability, EActivationFunction f,
0677 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases)
0678 {
0679 size_t batchSize = this->GetBatchSize();
0680
0681 TCompressionLayer<Architecture_t> *compressionLayer = new TCompressionLayer<Architecture_t>(
0682 batchSize, visibleUnits, hiddenUnits, dropoutProbability, f, weights, biases);
0683 fLayers.push_back(compressionLayer);
0684 return compressionLayer;
0685 }
0686
0687
0688 template <typename Architecture_t, typename Layer_t>
0689 void TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer)
0690 {
0691 fLayers.push_back(compressionLayer);
0692 }
0693
0694
0695 template <typename Architecture_t, typename Layer_t>
0696 TReconstructionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
0697 size_t visibleUnits, size_t hiddenUnits, Scalar_t learningRate, EActivationFunction f, std::vector<Matrix_t> weights,
0698 std::vector<Matrix_t> biases, Scalar_t corruptionLevel, Scalar_t dropoutProbability)
0699 {
0700 size_t batchSize = this->GetBatchSize();
0701
0702 TReconstructionLayer<Architecture_t> *reconstructionLayer = new TReconstructionLayer<Architecture_t>(
0703 batchSize, visibleUnits, hiddenUnits, learningRate, f, weights, biases, corruptionLevel, dropoutProbability);
0704 fLayers.push_back(reconstructionLayer);
0705 return reconstructionLayer;
0706 }
0707
0708
0709 template <typename Architecture_t, typename Layer_t>
0710 void TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
0711 TReconstructionLayer<Architecture_t> *reconstructionLayer)
0712 {
0713 fLayers.push_back(reconstructionLayer);
0714 }
0715
0716
0717 template <typename Architecture_t, typename Layer_t>
0718 TLogisticRegressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
0719 size_t inputUnits, size_t outputUnits, size_t testDataBatchSize, Scalar_t learningRate)
0720 {
0721 size_t batchSize = this->GetBatchSize();
0722
0723 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer =
0724 new TLogisticRegressionLayer<Architecture_t>(batchSize, inputUnits, outputUnits, testDataBatchSize, learningRate);
0725 fLayers.push_back(logisticRegressionLayer);
0726 return logisticRegressionLayer;
0727 }
0728
0729 template <typename Architecture_t, typename Layer_t>
0730 void TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
0731 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer)
0732 {
0733 fLayers.push_back(logisticRegressionLayer);
0734 }
0735 #endif
0736
0737
0738
0739 template <typename Architecture_t, typename Layer_t>
0740 TDenseLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddDenseLayer(size_t width, EActivationFunction f,
0741 Scalar_t dropoutProbability)
0742 {
0743 size_t batchSize = this->GetBatchSize();
0744 size_t inputWidth;
0745 EInitialization init = this->GetInitialization();
0746 ERegularization reg = this->GetRegularization();
0747 Scalar_t decay = this->GetWeightDecay();
0748
0749 if (fLayers.size() == 0) {
0750 inputWidth = this->GetInputWidth();
0751 } else {
0752 Layer_t *lastLayer = fLayers.back();
0753 inputWidth = lastLayer->GetWidth();
0754 }
0755
0756 TDenseLayer<Architecture_t> *denseLayer =
0757 new TDenseLayer<Architecture_t>(batchSize, inputWidth, width, init, dropoutProbability, f, reg, decay);
0758
0759 fLayers.push_back(denseLayer);
0760
0761 return denseLayer;
0762 }
0763
0764
0765 template <typename Architecture_t, typename Layer_t>
0766 void TDeepNet<Architecture_t, Layer_t>::AddDenseLayer(TDenseLayer<Architecture_t> *denseLayer)
0767 {
0768 fLayers.push_back(denseLayer);
0769 }
0770
0771
0772 template <typename Architecture_t, typename Layer_t>
0773 TReshapeLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddReshapeLayer(size_t depth, size_t height,
0774 size_t width, bool flattening)
0775 {
0776 size_t batchSize = this->GetBatchSize();
0777 size_t inputDepth;
0778 size_t inputHeight;
0779 size_t inputWidth;
0780 size_t outputNSlices;
0781 size_t outputNRows;
0782 size_t outputNCols;
0783
0784 if (fLayers.size() == 0) {
0785 inputDepth = this->GetInputDepth();
0786 inputHeight = this->GetInputHeight();
0787 inputWidth = this->GetInputWidth();
0788 } else {
0789 Layer_t *lastLayer = fLayers.back();
0790 inputDepth = lastLayer->GetDepth();
0791 inputHeight = lastLayer->GetHeight();
0792 inputWidth = lastLayer->GetWidth();
0793 }
0794
0795 if (flattening) {
0796 outputNSlices = 1;
0797 outputNRows = this->GetBatchSize();
0798 outputNCols = depth * height * width;
0799 size_t inputNCols = inputDepth * inputHeight * inputWidth;
0800 if (outputNCols != 0 && outputNCols != inputNCols ) {
0801 Info("AddReshapeLayer","Dimensions not compatibles - product of input %zu x %zu x %zu should be equal to output %zu x %zu x %zu - Force flattening output to be %zu",
0802 inputDepth, inputHeight, inputWidth, depth, height, width,inputNCols);
0803 }
0804 outputNCols = inputNCols;
0805 depth = 1;
0806 height = 1;
0807 width = outputNCols;
0808 } else {
0809 outputNSlices = this->GetBatchSize();
0810 outputNRows = depth;
0811 outputNCols = height * width;
0812 }
0813
0814 TReshapeLayer<Architecture_t> *reshapeLayer =
0815 new TReshapeLayer<Architecture_t>(batchSize, inputDepth, inputHeight, inputWidth, depth, height, width,
0816 outputNSlices, outputNRows, outputNCols, flattening);
0817
0818 fLayers.push_back(reshapeLayer);
0819
0820 return reshapeLayer;
0821 }
0822
0823
0824 template <typename Architecture_t, typename Layer_t>
0825 TBatchNormLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddBatchNormLayer(Scalar_t momentum, Scalar_t epsilon)
0826 {
0827 int axis = -1;
0828 size_t batchSize = this->GetBatchSize();
0829 size_t inputDepth = 0;
0830 size_t inputHeight = 0;
0831 size_t inputWidth = 0;
0832
0833
0834 std::vector<size_t> shape = {1, 1, 1};
0835 if (fLayers.size() == 0) {
0836 inputDepth = this->GetInputDepth();
0837 inputHeight = this->GetInputHeight();
0838 inputWidth = this->GetInputWidth();
0839
0840 shape[0] = batchSize;
0841 shape[1] = inputWidth;
0842 shape[2] = 1;
0843 } else {
0844 Layer_t *lastLayer = fLayers.back();
0845 inputDepth = lastLayer->GetDepth();
0846 inputHeight = lastLayer->GetHeight();
0847 inputWidth = lastLayer->GetWidth();
0848 shape = lastLayer->GetOutput().GetShape();
0849 if (dynamic_cast<TConvLayer<Architecture_t> *>(lastLayer) != nullptr ||
0850 dynamic_cast<TMaxPoolLayer<Architecture_t> *>(lastLayer) != nullptr)
0851 axis = 1;
0852 if (shape.size() > 3) {
0853 for (size_t i = 3; i < shape.size(); ++i)
0854 shape[2] *= shape[i];
0855 }
0856 }
0857
0858
0859
0860 auto bnormLayer =
0861 new TBatchNormLayer<Architecture_t>(batchSize, inputDepth, inputHeight, inputWidth, shape, axis, momentum, epsilon);
0862
0863 fLayers.push_back(bnormLayer);
0864
0865 return bnormLayer;
0866 }
0867
0868
0869 template <typename Architecture_t, typename Layer_t>
0870 void TDeepNet<Architecture_t, Layer_t>::AddReshapeLayer(TReshapeLayer<Architecture_t> *reshapeLayer)
0871 {
0872 fLayers.push_back(reshapeLayer);
0873 }
0874
0875
0876 template <typename Architecture_t, typename Layer_t>
0877 auto TDeepNet<Architecture_t, Layer_t>::Initialize() -> void
0878 {
0879 for (size_t i = 0; i < fLayers.size(); i++) {
0880 fLayers[i]->Initialize();
0881 }
0882 }
0883
0884
0885 template <typename Architecture_t, typename Layer_t>
0886 auto TDeepNet<Architecture_t, Layer_t>::ResetTraining() -> void
0887 {
0888 for (size_t i = 0; i < fLayers.size(); i++) {
0889 fLayers[i]->ResetTraining();
0890 }
0891 }
0892
0893
0894
0895 template <typename Architecture_t, typename Layer_t>
0896 auto TDeepNet<Architecture_t, Layer_t>::Forward( Tensor_t &input, bool applyDropout) -> void
0897 {
0898 fLayers.front()->Forward(input, applyDropout);
0899
0900 for (size_t i = 1; i < fLayers.size(); i++) {
0901 fLayers[i]->Forward(fLayers[i - 1]->GetOutput(), applyDropout);
0902
0903
0904 }
0905 }
0906
0907
0908 #ifdef HAVE_DAE
0909
0910 template <typename Architecture_t, typename Layer_t>
0911 auto TDeepNet<Architecture_t, Layer_t>::PreTrain(std::vector<Matrix_t> &input,
0912 std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
0913 Scalar_t corruptionLevel, Scalar_t dropoutProbability, size_t epochs,
0914 EActivationFunction f, bool applyDropout) -> void
0915 {
0916 std::vector<Matrix_t> inp1;
0917 std::vector<Matrix_t> inp2;
0918 size_t numOfHiddenLayers = sizeof(numHiddenUnitsPerLayer) / sizeof(numHiddenUnitsPerLayer[0]);
0919
0920 size_t visibleUnits = (size_t)input[0].GetNrows();
0921
0922 AddCorruptionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, corruptionLevel);
0923 fLayers.back()->Initialize();
0924 fLayers.back()->Forward(input, applyDropout);
0925
0926
0927 AddCompressionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, f, fLayers.back()->GetWeights(),
0928 fLayers.back()->GetBiases());
0929 fLayers.back()->Initialize();
0930 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
0931
0932 AddReconstructionLayer(visibleUnits, numHiddenUnitsPerLayer[0], learningRate, f, fLayers.back()->GetWeights(),
0933 fLayers.back()->GetBiases(), corruptionLevel, dropoutProbability);
0934 fLayers.back()->Initialize();
0935 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
0936 applyDropout);
0937 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
0938 input);
0939
0940 size_t weightsSize = fLayers.back()->GetWeights().size();
0941 size_t biasesSize = fLayers.back()->GetBiases().size();
0942 for (size_t epoch = 0; epoch < epochs - 1; epoch++) {
0943
0944 for (size_t j = 0; j < weightsSize; j++) {
0945 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
0946 }
0947 for (size_t j = 0; j < biasesSize; j++) {
0948 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
0949 }
0950 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
0951 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
0952 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
0953 fLayers[fLayers.size() - 3]->GetOutput(), input);
0954 }
0955 fLayers.back()->Print();
0956
0957 for (size_t i = 1; i < numOfHiddenLayers; i++) {
0958
0959 AddCorruptionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, corruptionLevel);
0960 fLayers.back()->Initialize();
0961 fLayers.back()->Forward(fLayers[fLayers.size() - 3]->GetOutput(),
0962 applyDropout);
0963
0964 AddCompressionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, f,
0965 fLayers.back()->GetWeights(), fLayers.back()->GetBiases());
0966 fLayers.back()->Initialize();
0967 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
0968
0969 AddReconstructionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], learningRate, f,
0970 fLayers.back()->GetWeights(), fLayers.back()->GetBiases(), corruptionLevel,
0971 dropoutProbability);
0972 fLayers.back()->Initialize();
0973 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
0974 applyDropout);
0975 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
0976 fLayers[fLayers.size() - 5]->GetOutput());
0977
0978
0979 size_t _weightsSize = fLayers.back()->GetWeights().size();
0980 size_t _biasesSize = fLayers.back()->GetBiases().size();
0981 for (size_t epoch = 0; epoch < epochs - 1; epoch++) {
0982
0983 for (size_t j = 0; j < _weightsSize; j++) {
0984 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
0985 }
0986 for (size_t j = 0; j < _biasesSize; j++) {
0987 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
0988 }
0989 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
0990 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
0991 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
0992 fLayers[fLayers.size() - 3]->GetOutput(),
0993 fLayers[fLayers.size() - 5]->GetOutput());
0994 }
0995 fLayers.back()->Print();
0996 }
0997 }
0998
0999
1000 template <typename Architecture_t, typename Layer_t>
1001 auto TDeepNet<Architecture_t, Layer_t>::FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput,
1002 std::vector<Matrix_t> &inputLabel, size_t outputUnits,
1003 size_t testDataBatchSize, Scalar_t learningRate, size_t epochs) -> void
1004 {
1005 std::vector<Matrix_t> inp1;
1006 std::vector<Matrix_t> inp2;
1007 if (fLayers.size() == 0)
1008 {
1009 size_t inputUnits = input[0].GetNrows();
1010
1011 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
1012 fLayers.back()->Initialize();
1013 for (size_t i = 0; i < epochs; i++) {
1014 fLayers.back()->Backward(inputLabel, inp1, input, inp2);
1015 }
1016 fLayers.back()->Forward(input, false);
1017 fLayers.back()->Print();
1018 } else {
1019 size_t inputUnits = fLayers.back()->GetOutputAt(0).GetNrows();
1020 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
1021 fLayers.back()->Initialize();
1022 for (size_t i = 0; i < epochs; i++) {
1023 fLayers.back()->Backward(inputLabel, inp1, fLayers[fLayers.size() - 2]->GetOutput(), inp2);
1024 }
1025 fLayers.back()->Forward(testInput, false);
1026 fLayers.back()->Print();
1027 }
1028 }
1029 #endif
1030
1031
1032 template <typename Architecture_t, typename Layer_t>
1033 auto TDeepNet<Architecture_t, Layer_t>::Backward(const Tensor_t &input, const Matrix_t &groundTruth,
1034 const Matrix_t &weights) -> void
1035 {
1036
1037
1038
1039 Matrix_t last_actgrad = fLayers.back()->GetActivationGradientsAt(0);
1040 Matrix_t last_output = fLayers.back()->GetOutputAt(0);
1041 evaluateGradients<Architecture_t>(last_actgrad, this->GetLossFunction(), groundTruth,
1042 last_output, weights);
1043
1044 for (size_t i = fLayers.size() - 1; i > 0; i--) {
1045 auto &activation_gradient_backward = fLayers[i - 1]->GetActivationGradients();
1046 auto &activations_backward = fLayers[i - 1]->GetOutput();
1047 fLayers[i]->Backward(activation_gradient_backward, activations_backward);
1048 }
1049
1050
1051
1052 Tensor_t dummy;
1053 fLayers[0]->Backward(dummy, input);
1054 }
1055
1056 #ifdef USE_PARALLEL_DEEPNET
1057
1058
1059 template <typename Architecture_t, typename Layer_t>
1060 auto TDeepNet<Architecture_t, Layer_t>::ParallelForward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1061 std::vector<TTensorBatch<Architecture_t>> &batches,
1062 bool applyDropout) -> void
1063 {
1064 size_t depth = this->GetDepth();
1065
1066
1067 for (size_t i = 0; i < nets.size(); i++) {
1068 nets[i].GetLayerAt(0)->Forward(batches[i].GetInput(), applyDropout);
1069 }
1070
1071
1072 for (size_t i = 1; i < depth; i++) {
1073 for (size_t j = 0; j < nets.size(); j++) {
1074 nets[j].GetLayerAt(i)->Forward(nets[j].GetLayerAt(i - 1)->GetOutput(), applyDropout);
1075 }
1076 }
1077 }
1078
1079
1080 template <typename Architecture_t, typename Layer_t>
1081 auto TDeepNet<Architecture_t, Layer_t>::ParallelBackward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1082 std::vector<TTensorBatch<Architecture_t>> &batches,
1083 Scalar_t learningRate) -> void
1084 {
1085 std::vector<Matrix_t> inp1;
1086 std::vector<Matrix_t> inp2;
1087 size_t depth = this->GetDepth();
1088
1089
1090 for (size_t i = 0; i < nets.size(); i++) {
1091 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1092 nets[i].GetLossFunction(), batches[i].GetOutput(),
1093 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1094 }
1095
1096
1097 for (size_t i = depth - 1; i > 0; i--) {
1098 for (size_t j = 0; j < nets.size(); j++) {
1099 nets[j].GetLayerAt(i)->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(),
1100 nets[j].GetLayerAt(i - 1)->GetOutput(), inp1, inp2);
1101 }
1102 }
1103
1104 std::vector<Matrix_t> dummy;
1105
1106
1107 for (size_t i = 0; i < nets.size(); i++) {
1108 nets[i].GetLayerAt(0)->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1109 }
1110
1111
1112 for (size_t i = 0; i < nets.size(); i++) {
1113 for (size_t j = 0; j < depth; j++) {
1114 Layer_t *masterLayer = this->GetLayerAt(j);
1115 Layer_t *layer = nets[i].GetLayerAt(j);
1116
1117 masterLayer->UpdateWeights(layer->GetWeightGradients(), learningRate);
1118 layer->CopyWeights(masterLayer->GetWeights());
1119
1120 masterLayer->UpdateBiases(layer->GetBiasGradients(), learningRate);
1121 layer->CopyBiases(masterLayer->GetBiases());
1122 }
1123 }
1124 }
1125
1126
1127 template <typename Architecture_t, typename Layer_t>
1128 auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardMomentum(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1129 std::vector<TTensorBatch<Architecture_t>> &batches,
1130 Scalar_t learningRate, Scalar_t momentum) -> void
1131 {
1132 std::vector<Matrix_t> inp1;
1133 std::vector<Matrix_t> inp2;
1134 size_t depth = this->GetDepth();
1135
1136
1137 for (size_t i = 0; i < nets.size(); i++) {
1138 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1139 nets[i].GetLossFunction(), batches[i].GetOutput(),
1140 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1141 }
1142
1143
1144 for (size_t i = depth - 1; i > 0; i--) {
1145 Layer_t *masterLayer = this->GetLayerAt(i);
1146
1147 for (size_t j = 0; j < nets.size(); j++) {
1148 Layer_t *layer = nets[j].GetLayerAt(i);
1149
1150 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1151 inp1, inp2);
1152 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1153 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1154 }
1155
1156 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1157 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1158 }
1159
1160 std::vector<Matrix_t> dummy;
1161
1162
1163 Layer_t *masterFirstLayer = this->GetLayerAt(0);
1164 for (size_t i = 0; i < nets.size(); i++) {
1165 Layer_t *layer = nets[i].GetLayerAt(0);
1166
1167 layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1168
1169 masterFirstLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1170 masterFirstLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1171 }
1172
1173 masterFirstLayer->UpdateWeightGradients(masterFirstLayer->GetWeightGradients(), 1.0 - momentum);
1174 masterFirstLayer->UpdateBiasGradients(masterFirstLayer->GetBiasGradients(), 1.0 - momentum);
1175
1176 for (size_t i = 0; i < depth; i++) {
1177 Layer_t *masterLayer = this->GetLayerAt(i);
1178 masterLayer->Update(1.0);
1179
1180 for (size_t j = 0; j < nets.size(); j++) {
1181 Layer_t *layer = nets[j].GetLayerAt(i);
1182
1183 layer->CopyWeights(masterLayer->GetWeights());
1184 layer->CopyBiases(masterLayer->GetBiases());
1185 }
1186 }
1187 }
1188
1189
1190 template <typename Architecture_t, typename Layer_t>
1191 auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardNestorov(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1192 std::vector<TTensorBatch<Architecture_t>> &batches,
1193 Scalar_t learningRate, Scalar_t momentum) -> void
1194 {
1195 std::cout << "Parallel Backward Nestorov" << std::endl;
1196 std::vector<Matrix_t> inp1;
1197 std::vector<Matrix_t> inp2;
1198 size_t depth = this->GetDepth();
1199
1200
1201 for (size_t i = 0; i < nets.size(); i++) {
1202 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1203 nets[i].GetLossFunction(), batches[i].GetOutput(),
1204 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1205 }
1206
1207
1208 for (size_t i = depth - 1; i > 0; i--) {
1209 for (size_t j = 0; j < nets.size(); j++) {
1210 Layer_t *layer = nets[j].GetLayerAt(i);
1211
1212 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1213 inp1, inp2);
1214 }
1215 }
1216
1217 std::vector<Matrix_t> dummy;
1218
1219
1220 for (size_t i = 0; i < nets.size(); i++) {
1221 Layer_t *layer = nets[i].GetLayerAt(0);
1222 layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1223 }
1224
1225 for (size_t i = 0; i < depth; i++) {
1226 Layer_t *masterLayer = this->GetLayerAt(i);
1227 for (size_t j = 0; j < nets.size(); j++) {
1228 Layer_t *layer = nets[j].GetLayerAt(i);
1229
1230 layer->CopyWeights(masterLayer->GetWeights());
1231 layer->CopyBiases(masterLayer->GetBiases());
1232
1233 layer->UpdateWeights(masterLayer->GetWeightGradients(), 1.0);
1234 layer->UpdateBiases(masterLayer->GetBiasGradients(), 1.0);
1235 }
1236
1237 for (size_t j = 0; j < nets.size(); j++) {
1238 Layer_t *layer = nets[j].GetLayerAt(i);
1239
1240 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1241 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1242 }
1243
1244 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1245 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1246
1247 masterLayer->Update(1.0);
1248 }
1249 }
1250 #endif
1251
1252
1253 template <typename Architecture_t, typename Layer_t>
1254 auto TDeepNet<Architecture_t, Layer_t>::Update(Scalar_t learningRate) -> void
1255 {
1256 for (size_t i = 0; i < fLayers.size(); i++) {
1257 fLayers[i]->Update(learningRate);
1258 }
1259 }
1260
1261
1262 template <typename Architecture_t, typename Layer_t>
1263 auto TDeepNet<Architecture_t, Layer_t>::Loss(const Matrix_t &groundTruth, const Matrix_t &weights,
1264 bool includeRegularization) const -> Scalar_t
1265 {
1266
1267 auto loss = evaluate<Architecture_t>(this->GetLossFunction(), groundTruth, fLayers.back()->GetOutputAt(0), weights);
1268
1269 includeRegularization &= (this->GetRegularization() != ERegularization::kNone);
1270 if (includeRegularization) {
1271 loss += RegularizationTerm();
1272 }
1273
1274 return loss;
1275 }
1276
1277
1278 template <typename Architecture_t, typename Layer_t>
1279 auto TDeepNet<Architecture_t, Layer_t>::Loss(Tensor_t &input, const Matrix_t &groundTruth,
1280 const Matrix_t &weights, bool inTraining, bool includeRegularization)
1281 -> Scalar_t
1282 {
1283 Forward(input, inTraining);
1284 return Loss(groundTruth, weights, includeRegularization);
1285 }
1286
1287
1288 template <typename Architecture_t, typename Layer_t>
1289 auto TDeepNet<Architecture_t, Layer_t>::RegularizationTerm() const -> Scalar_t
1290 {
1291 Scalar_t reg = 0.0;
1292 for (size_t i = 0; i < fLayers.size(); i++) {
1293 for (size_t j = 0; j < (fLayers[i]->GetWeights()).size(); j++) {
1294 reg += regularization<Architecture_t>(fLayers[i]->GetWeightsAt(j), this->GetRegularization());
1295 }
1296 }
1297 return this->GetWeightDecay() * reg;
1298 }
1299
1300
1301
1302 template <typename Architecture_t, typename Layer_t>
1303 auto TDeepNet<Architecture_t, Layer_t>::Prediction(Matrix_t &predictions, EOutputFunction f) const -> void
1304 {
1305
1306 evaluate<Architecture_t>(predictions, f, fLayers.back()->GetOutputAt(0));
1307 }
1308
1309
1310 template <typename Architecture_t, typename Layer_t>
1311 auto TDeepNet<Architecture_t, Layer_t>::Prediction(Matrix_t &predictions, Tensor_t & input,
1312 EOutputFunction f) -> void
1313 {
1314 Forward(input, false);
1315
1316 evaluate<Architecture_t>(predictions, f, fLayers.back()->GetOutputAt(0));
1317 }
1318
1319
1320 template <typename Architecture_t, typename Layer_t>
1321 auto TDeepNet<Architecture_t, Layer_t>::Print() const -> void
1322 {
1323 std::cout << "DEEP NEURAL NETWORK: Depth = " << this->GetDepth();
1324 std::cout << " Input = ( " << this->GetInputDepth();
1325 std::cout << ", " << this->GetInputHeight();
1326 std::cout << ", " << this->GetInputWidth() << " )";
1327 std::cout << " Batch size = " << this->GetBatchSize();
1328 std::cout << " Loss function = " << static_cast<char>(this->GetLossFunction()) << std::endl;
1329
1330
1331
1332 for (size_t i = 0; i < fLayers.size(); i++) {
1333 std::cout << "\tLayer " << i << "\t";
1334 fLayers[i]->Print();
1335 }
1336 }
1337
1338
1339 template <typename Architecture_t, typename Layer_t>
1340 void TDeepNet<Architecture_t, Layer_t>::SetDropoutProbabilities(
1341 const std::vector<Double_t> & probabilities)
1342 {
1343 for (size_t i = 0; i < fLayers.size(); i++) {
1344 if (i < probabilities.size()) {
1345 fLayers[i]->SetDropoutProbability(probabilities[i]);
1346 } else {
1347 fLayers[i]->SetDropoutProbability(1.0);
1348 }
1349 }
1350 }
1351
1352
1353 }
1354 }
1355
1356 #endif