File indexing completed on 2025-01-18 10:10:55
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027 #ifndef TMVA_DNN_ADAGRAD
0028 #define TMVA_DNN_ADAGRAD
0029
0030 #include "TMatrix.h"
0031 #include "TMVA/DNN/Optimizer.h"
0032 #include "TMVA/DNN/Functions.h"
0033 #include <vector>
0034
0035 namespace TMVA {
0036 namespace DNN {
0037
0038
0039
0040
0041
0042
0043 template <typename Architecture_t, typename Layer_t = VGeneralLayer<Architecture_t>,
0044 typename DeepNet_t = TDeepNet<Architecture_t, Layer_t>>
0045 class TAdagrad : public VOptimizer<Architecture_t, Layer_t, DeepNet_t> {
0046 public:
0047 using Matrix_t = typename Architecture_t::Matrix_t;
0048 using Scalar_t = typename Architecture_t::Scalar_t;
0049
0050 protected:
0051 Scalar_t fEpsilon;
0052
0053 std::vector<std::vector<Matrix_t>>
0054 fPastSquaredWeightGradients;
0055 std::vector<std::vector<Matrix_t>>
0056 fPastSquaredBiasGradients;
0057 std::vector<std::vector<Matrix_t>>
0058 fWorkWeightTensor;
0059 std::vector<std::vector<Matrix_t>>
0060 fWorkBiasTensor;
0061
0062
0063 void UpdateWeights(size_t layerIndex, std::vector<Matrix_t> &weights, const std::vector<Matrix_t> &weightGradients);
0064
0065
0066 void UpdateBiases(size_t layerIndex, std::vector<Matrix_t> &biases, const std::vector<Matrix_t> &biasGradients);
0067
0068 public:
0069
0070 TAdagrad(DeepNet_t &deepNet, Scalar_t learningRate = 0.01, Scalar_t epsilon = 1e-8);
0071
0072
0073 ~TAdagrad() = default;
0074
0075
0076 Scalar_t GetEpsilon() const { return fEpsilon; }
0077
0078 std::vector<std::vector<Matrix_t>> &GetPastSquaredWeightGradients() { return fPastSquaredWeightGradients; }
0079 std::vector<Matrix_t> &GetPastSquaredWeightGradientsAt(size_t i) { return fPastSquaredWeightGradients[i]; }
0080
0081 std::vector<std::vector<Matrix_t>> &GetPastSquaredBiasGradients() { return fPastSquaredBiasGradients; }
0082 std::vector<Matrix_t> &GetPastSquaredBiasGradientsAt(size_t i) { return fPastSquaredBiasGradients[i]; }
0083 };
0084
0085
0086
0087
0088
0089 template <typename Architecture_t, typename Layer_t, typename DeepNet_t>
0090 TAdagrad<Architecture_t, Layer_t, DeepNet_t>::TAdagrad(DeepNet_t &deepNet, Scalar_t learningRate, Scalar_t epsilon)
0091 : VOptimizer<Architecture_t, Layer_t, DeepNet_t>(learningRate, deepNet), fEpsilon(epsilon)
0092 {
0093 std::vector<Layer_t *> &layers = deepNet.GetLayers();
0094 const size_t layersNSlices = layers.size();
0095 fPastSquaredWeightGradients.resize(layersNSlices);
0096 fPastSquaredBiasGradients.resize(layersNSlices);
0097 fWorkWeightTensor.resize(layersNSlices);
0098 fWorkBiasTensor.resize(layersNSlices);
0099
0100 for (size_t i = 0; i < layersNSlices; i++) {
0101 const size_t weightsNSlices = (layers[i]->GetWeights()).size();
0102
0103
0104 Architecture_t::CreateWeightTensors( fPastSquaredWeightGradients[i], layers[i]->GetWeights());
0105
0106 for (size_t j = 0; j < weightsNSlices; j++) {
0107 initialize<Architecture_t>(fPastSquaredWeightGradients[i][j], EInitialization::kZero);
0108 }
0109
0110 const size_t biasesNSlices = (layers[i]->GetBiases()).size();
0111
0112 Architecture_t::CreateWeightTensors( fPastSquaredBiasGradients[i], layers[i]->GetBiases());
0113
0114 for (size_t j = 0; j < biasesNSlices; j++) {
0115 initialize<Architecture_t>(fPastSquaredBiasGradients[i][j], EInitialization::kZero);
0116 }
0117
0118 Architecture_t::CreateWeightTensors(fWorkWeightTensor[i], layers[i]->GetWeights());
0119 Architecture_t::CreateWeightTensors(fWorkBiasTensor[i], layers[i]->GetBiases());
0120
0121 }
0122 }
0123
0124
0125 template <typename Architecture_t, typename Layer_t, typename DeepNet_t>
0126 auto TAdagrad<Architecture_t, Layer_t, DeepNet_t>::UpdateWeights(size_t layerIndex, std::vector<Matrix_t> &weights,
0127 const std::vector<Matrix_t> &weightGradients) -> void
0128 {
0129 auto ¤tLayerPastSquaredWeightGradients = this->GetPastSquaredWeightGradientsAt(layerIndex);
0130
0131
0132 const size_t weightsNSlices = weights.size();
0133 assert(currentLayerPastSquaredWeightGradients.size() == weightsNSlices);
0134
0135 for (size_t i = 0; i < weightsNSlices; i++) {
0136
0137 auto ¤tSquaredWeightGradients = fWorkWeightTensor[layerIndex][i];
0138
0139 Architecture_t::Copy(currentSquaredWeightGradients, weightGradients[i]);
0140 Architecture_t::SquareElementWise(currentSquaredWeightGradients);
0141 Architecture_t::ScaleAdd(currentLayerPastSquaredWeightGradients[i], currentSquaredWeightGradients, 1.0);
0142
0143
0144
0145
0146 auto ¤tWeightUpdates = fWorkWeightTensor[layerIndex][i];
0147 Architecture_t::Copy(currentWeightUpdates, currentLayerPastSquaredWeightGradients[i]);
0148 Architecture_t::ConstAdd(currentWeightUpdates, this->GetEpsilon());
0149 Architecture_t::SqrtElementWise(currentWeightUpdates);
0150 Architecture_t::ReciprocalElementWise(currentWeightUpdates);
0151 Architecture_t::Hadamard(currentWeightUpdates, weightGradients[i]);
0152 Architecture_t::ScaleAdd(weights[i], currentWeightUpdates, -this->GetLearningRate());
0153 }
0154 }
0155
0156
0157 template <typename Architecture_t, typename Layer_t, typename DeepNet_t>
0158 auto TAdagrad<Architecture_t, Layer_t, DeepNet_t>::UpdateBiases(size_t layerIndex, std::vector<Matrix_t> &biases,
0159 const std::vector<Matrix_t> &biasGradients) -> void
0160 {
0161 std::vector<Matrix_t> ¤tLayerPastSquaredBiasGradients = this->GetPastSquaredBiasGradientsAt(layerIndex);
0162
0163 const size_t biasesNSlices = biases.size();
0164 assert(currentLayerPastSquaredBiasGradients.size() == biasesNSlices);
0165 for (size_t i = 0; i < biasesNSlices; i++) {
0166
0167
0168 auto ¤tSquaredBiasGradients = fWorkBiasTensor[layerIndex][i];
0169 Architecture_t::Copy(currentSquaredBiasGradients, biasGradients[i]);
0170 Architecture_t::SquareElementWise(currentSquaredBiasGradients);
0171 Architecture_t::ScaleAdd(currentLayerPastSquaredBiasGradients[i], currentSquaredBiasGradients, 1.0);
0172
0173
0174
0175
0176 auto ¤tBiasUpdates = fWorkBiasTensor[layerIndex][i];
0177 Architecture_t::Copy(currentBiasUpdates, currentLayerPastSquaredBiasGradients[i]);
0178 Architecture_t::ConstAdd(currentBiasUpdates, this->GetEpsilon());
0179 Architecture_t::SqrtElementWise(currentBiasUpdates);
0180 Architecture_t::ReciprocalElementWise(currentBiasUpdates);
0181 Architecture_t::Hadamard(currentBiasUpdates, biasGradients[i]);
0182 Architecture_t::ScaleAdd(biases[i], currentBiasUpdates, -this->GetLearningRate());
0183 }
0184 }
0185
0186 }
0187 }
0188
0189 #endif