Warning, file /include/root/TMVA/DNN/Adam.h was not indexed
or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027 #ifndef TMVA_DNN_ADAM
0028 #define TMVA_DNN_ADAM
0029
0030 #include "TMatrix.h"
0031 #include "TMVA/DNN/Optimizer.h"
0032 #include "TMVA/DNN/Functions.h"
0033 #include <vector>
0034
0035 namespace TMVA {
0036 namespace DNN {
0037
0038
0039
0040
0041
0042
0043 template <typename Architecture_t, typename Layer_t = VGeneralLayer<Architecture_t>,
0044 typename DeepNet_t = TDeepNet<Architecture_t, Layer_t>>
0045 class TAdam : public VOptimizer<Architecture_t, Layer_t, DeepNet_t> {
0046 public:
0047 using Matrix_t = typename Architecture_t::Matrix_t;
0048 using Scalar_t = typename Architecture_t::Scalar_t;
0049
0050 protected:
0051 Scalar_t fBeta1;
0052 Scalar_t fBeta2;
0053 Scalar_t fEpsilon;
0054
0055 std::vector<std::vector<Matrix_t>> fFirstMomentWeights;
0056
0057 std::vector<std::vector<Matrix_t>> fFirstMomentBiases;
0058
0059
0060 std::vector<std::vector<Matrix_t>> fSecondMomentWeights;
0061
0062 std::vector<std::vector<Matrix_t>> fSecondMomentBiases;
0063
0064
0065
0066 void UpdateWeights(size_t layerIndex, std::vector<Matrix_t> &weights, const std::vector<Matrix_t> &weightGradients);
0067
0068
0069 void UpdateBiases(size_t layerIndex, std::vector<Matrix_t> &biases, const std::vector<Matrix_t> &biasGradients);
0070
0071 public:
0072
0073 TAdam(DeepNet_t &deepNet, Scalar_t learningRate = 0.001, Scalar_t beta1 = 0.9, Scalar_t beta2 = 0.999,
0074 Scalar_t epsilon = 1e-7);
0075
0076
0077 ~TAdam() = default;
0078
0079
0080 Scalar_t GetBeta1() const { return fBeta1; }
0081 Scalar_t GetBeta2() const { return fBeta2; }
0082 Scalar_t GetEpsilon() const { return fEpsilon; }
0083
0084 std::vector<std::vector<Matrix_t>> &GetFirstMomentWeights() { return fFirstMomentWeights; }
0085 std::vector<Matrix_t> &GetFirstMomentWeightsAt(size_t i) { return fFirstMomentWeights[i]; }
0086
0087 std::vector<std::vector<Matrix_t>> &GetFirstMomentBiases() { return fFirstMomentBiases; }
0088 std::vector<Matrix_t> &GetFirstMomentBiasesAt(size_t i) { return fFirstMomentBiases[i]; }
0089
0090 std::vector<std::vector<Matrix_t>> &GetSecondMomentWeights() { return fSecondMomentWeights; }
0091 std::vector<Matrix_t> &GetSecondMomentWeightsAt(size_t i) { return fSecondMomentWeights[i]; }
0092
0093 std::vector<std::vector<Matrix_t>> &GetSecondMomentBiases() { return fSecondMomentBiases; }
0094 std::vector<Matrix_t> &GetSecondMomentBiasesAt(size_t i) { return fSecondMomentBiases[i]; }
0095 };
0096
0097
0098
0099
0100
0101 template <typename Architecture_t, typename Layer_t, typename DeepNet_t>
0102 TAdam<Architecture_t, Layer_t, DeepNet_t>::TAdam(DeepNet_t &deepNet, Scalar_t learningRate, Scalar_t beta1,
0103 Scalar_t beta2, Scalar_t epsilon)
0104 : VOptimizer<Architecture_t, Layer_t, DeepNet_t>(learningRate, deepNet), fBeta1(beta1), fBeta2(beta2),
0105 fEpsilon(epsilon)
0106 {
0107 std::vector<Layer_t *> &layers = deepNet.GetLayers();
0108 const size_t layersNSlices = layers.size();
0109 fFirstMomentWeights.resize(layersNSlices);
0110 fFirstMomentBiases.resize(layersNSlices);
0111 fSecondMomentWeights.resize(layersNSlices);
0112 fSecondMomentBiases.resize(layersNSlices);
0113
0114
0115 for (size_t i = 0; i < layersNSlices; i++) {
0116
0117 Architecture_t::CreateWeightTensors( fFirstMomentWeights[i], layers[i]->GetWeights());
0118 Architecture_t::CreateWeightTensors( fSecondMomentWeights[i], layers[i]->GetWeights());
0119
0120 const size_t weightsNSlices = (layers[i]->GetWeights()).size();
0121
0122 for (size_t j = 0; j < weightsNSlices; j++) {
0123 initialize<Architecture_t>(fFirstMomentWeights[i][j], EInitialization::kZero);
0124 initialize<Architecture_t>(fSecondMomentWeights[i][j], EInitialization::kZero);
0125 }
0126
0127 const size_t biasesNSlices = (layers[i]->GetBiases()).size();
0128
0129 Architecture_t::CreateWeightTensors( fFirstMomentBiases[i], layers[i]->GetBiases());
0130 Architecture_t::CreateWeightTensors( fSecondMomentBiases[i], layers[i]->GetBiases());
0131
0132 for (size_t j = 0; j < biasesNSlices; j++) {
0133 initialize<Architecture_t>(fFirstMomentBiases[i][j], EInitialization::kZero);
0134 initialize<Architecture_t>(fSecondMomentBiases[i][j], EInitialization::kZero);
0135 }
0136 }
0137 }
0138
0139
0140 template <typename Architecture_t, typename Layer_t, typename DeepNet_t>
0141 auto TAdam<Architecture_t, Layer_t, DeepNet_t>::UpdateWeights(size_t layerIndex, std::vector<Matrix_t> &weights,
0142 const std::vector<Matrix_t> &weightGradients) -> void
0143 {
0144
0145
0146
0147
0148 std::vector<Matrix_t> ¤tLayerFirstMomentWeights = this->GetFirstMomentWeightsAt(layerIndex);
0149 std::vector<Matrix_t> ¤tLayerSecondMomentWeights = this->GetSecondMomentWeightsAt(layerIndex);
0150
0151
0152 Scalar_t alpha = (this->GetLearningRate()) * (sqrt(1 - pow(this->GetBeta2(), this->GetGlobalStep()))) /
0153 (1 - pow(this->GetBeta1(), this->GetGlobalStep()));
0154
0155
0156 for (size_t i = 0; i < weights.size(); i++) {
0157
0158 Architecture_t::AdamUpdateFirstMom(currentLayerFirstMomentWeights[i], weightGradients[i], this->GetBeta1() );
0159
0160 Architecture_t::AdamUpdateSecondMom(currentLayerSecondMomentWeights[i], weightGradients[i], this->GetBeta2() );
0161
0162 Architecture_t::AdamUpdate(weights[i], currentLayerFirstMomentWeights[i], currentLayerSecondMomentWeights[i],
0163 alpha, this->GetEpsilon() );
0164 }
0165 }
0166
0167
0168 template <typename Architecture_t, typename Layer_t, typename DeepNet_t>
0169 auto TAdam<Architecture_t, Layer_t, DeepNet_t>::UpdateBiases(size_t layerIndex, std::vector<Matrix_t> &biases,
0170 const std::vector<Matrix_t> &biasGradients) -> void
0171 {
0172 std::vector<Matrix_t> ¤tLayerFirstMomentBiases = this->GetFirstMomentBiasesAt(layerIndex);
0173 std::vector<Matrix_t> ¤tLayerSecondMomentBiases = this->GetSecondMomentBiasesAt(layerIndex);
0174
0175
0176 Scalar_t alpha = (this->GetLearningRate()) * (sqrt(1 - pow(this->GetBeta2(), this->GetGlobalStep()))) /
0177 (1 - pow(this->GetBeta1(), this->GetGlobalStep()));
0178
0179
0180 for (size_t i = 0; i < biases.size(); i++) {
0181
0182 Architecture_t::AdamUpdateFirstMom(currentLayerFirstMomentBiases[i], biasGradients[i], this->GetBeta1() );
0183
0184 Architecture_t::AdamUpdateSecondMom(currentLayerSecondMomentBiases[i], biasGradients[i], this->GetBeta2() );
0185
0186 Architecture_t::AdamUpdate(biases[i], currentLayerFirstMomentBiases[i], currentLayerSecondMomentBiases[i],
0187 alpha, this->GetEpsilon() );
0188 }
0189 }
0190
0191 }
0192 }
0193
0194 #endif