File indexing completed on 2025-01-18 10:10:56
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027 #ifndef TMVA_DNN_DLMINIMIZERS
0028 #define TMVA_DNN_DLMINIMIZERS
0029
0030 #include "TMVA/DNN/TensorDataLoader.h"
0031 #include "TMVA/DNN/Functions.h"
0032 #include "TMVA/DNN/DeepNet.h"
0033
0034 #include <limits>
0035 #include <vector>
0036
0037 namespace TMVA {
0038 namespace DNN {
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064 template <typename Architecture_t>
0065 class TDLGradientDescent {
0066 public:
0067 using DeepNet_t = TDeepNet<Architecture_t>;
0068 using Scalar_t = typename Architecture_t::Scalar_t;
0069 using Matrix_t = typename Architecture_t::Matrix_t;
0070
0071 private:
0072 size_t fBatchSize;
0073 size_t fStepCount;
0074 size_t fConvergenceSteps;
0075
0076 size_t fConvergenceCount;
0077
0078 size_t fTestInterval;
0079 Scalar_t fTrainingError;
0080 Scalar_t fTestError;
0081 Scalar_t fLearningRate;
0082 Scalar_t fMinimumError;
0083
0084 public:
0085 TDLGradientDescent();
0086 TDLGradientDescent(Scalar_t learningRate, size_t convergenceSteps, size_t testInterval);
0087
0088
0089 void Reset()
0090 {
0091 fMinimumError = std::numeric_limits<Scalar_t>::infinity();
0092 fConvergenceCount = 0;
0093 fStepCount = 0;
0094 };
0095
0096
0097
0098
0099
0100
0101 void Step(DeepNet_t &deepNet, std::vector<Matrix_t> &input, const Matrix_t &output, const Matrix_t &weights);
0102
0103
0104
0105
0106 void StepReducedWeights(DeepNet_t &deepNet, std::vector<Matrix_t> &input, const Matrix_t &output,
0107 const Matrix_t &weights);
0108
0109
0110
0111 Scalar_t StepLoss(DeepNet_t &deepNet, std::vector<Matrix_t> &input, const Matrix_t &output, const Matrix_t &weights);
0112
0113
0114
0115 Scalar_t StepReducedWeightsLoss(DeepNet_t &deepNet, std::vector<Matrix_t> &input, const Matrix_t &output,
0116 const Matrix_t &weights);
0117
0118
0119
0120
0121
0122
0123
0124 void Step(DeepNet_t &master, std::vector<DeepNet_t> &nets, std::vector<TTensorBatch<Architecture_t>> &batches);
0125
0126
0127 void StepMomentum(DeepNet_t &master, std::vector<DeepNet_t> &nets,
0128 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t momentum);
0129
0130
0131
0132 void StepNesterov(DeepNet_t &master, std::vector<DeepNet_t> &nets,
0133 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t momentum);
0134
0135
0136
0137
0138 bool HasConverged();
0139
0140
0141
0142
0143 bool HasConverged(Scalar_t testError);
0144
0145
0146 size_t GetConvergenceCount() const { return fConvergenceCount; }
0147 size_t GetConvergenceSteps() const { return fConvergenceSteps; }
0148 Scalar_t GetTrainingError() const { return fTrainingError; }
0149 Scalar_t GetTestError() const { return fTestError; }
0150 size_t GetTestInterval() const { return fTestInterval; }
0151
0152
0153 void SetConvergenceSteps(size_t steps) { fConvergenceSteps = steps; }
0154 void SetTestInterval(size_t interval) { fTestInterval = interval; }
0155 void SetLearningRate(Scalar_t rate) { fLearningRate = rate; }
0156 void SetBatchSize(Scalar_t rate) { fBatchSize = rate; }
0157 };
0158
0159
0160
0161
0162 template <typename Architecture_t>
0163 TDLGradientDescent<Architecture_t>::TDLGradientDescent()
0164 : fBatchSize(0), fStepCount(0), fConvergenceSteps(0), fConvergenceCount(0), fTestInterval(0), fLearningRate(0),
0165 fMinimumError(std::numeric_limits<Scalar_t>::infinity())
0166 {
0167
0168 }
0169
0170
0171 template <typename Architecture_t>
0172 TDLGradientDescent<Architecture_t>::TDLGradientDescent(Scalar_t learningRate, size_t convergenceSteps,
0173 size_t testInterval)
0174 : fBatchSize(0), fStepCount(0), fConvergenceSteps(convergenceSteps), fConvergenceCount(0),
0175 fTestInterval(testInterval), fLearningRate(learningRate), fMinimumError(std::numeric_limits<Scalar_t>::infinity())
0176 {
0177
0178 }
0179
0180
0181 template <typename Architecture_t>
0182 void TDLGradientDescent<Architecture_t>::Step(DeepNet_t &deepNet, std::vector<Matrix_t> &input, const Matrix_t &output,
0183 const Matrix_t &weights)
0184 {
0185
0186 deepNet.Forward(input, true);
0187 deepNet.Backward(input, output, weights);
0188 deepNet.Update(fLearningRate);
0189 }
0190
0191
0192 template <typename Architecture_t>
0193 void TDLGradientDescent<Architecture_t>::StepReducedWeights(DeepNet_t &deepNet, std::vector<Matrix_t> &input,
0194 const Matrix_t &output, const Matrix_t &weights)
0195 {
0196
0197 deepNet.Forward(input, true);
0198 deepNet.Backward(input, output, weights);
0199
0200 for (size_t i = 0; i < deepNet.GetDepth(); i++) {
0201 auto *layer = deepNet.GetLayerAt(i);
0202
0203 layer->UpdateWeights(layer->GetWeightGradients(), fLearningRate);
0204 if (i == 0) {
0205 layer->UpdateBiases(layer->GetBiasGradients(), fLearningRate);
0206 }
0207 }
0208 }
0209
0210
0211 template <typename Architecture_t>
0212 auto TDLGradientDescent<Architecture_t>::StepLoss(DeepNet_t &deepNet, std::vector<Matrix_t> &input,
0213 const Matrix_t &output, const Matrix_t &weights) -> Scalar_t
0214 {
0215 Scalar_t loss = deepNet.Loss(input, output);
0216 deepNet.Backward(input, output, weights);
0217 deepNet.Update(fLearningRate);
0218
0219 return loss;
0220 }
0221
0222
0223 template <typename Architecture_t>
0224 auto TDLGradientDescent<Architecture_t>::StepReducedWeightsLoss(DeepNet_t &deepNet, std::vector<Matrix_t> &input,
0225 const Matrix_t &output, const Matrix_t &weights)
0226 -> Scalar_t
0227 {
0228 Scalar_t loss = deepNet.Loss(input, output);
0229 fTrainingError = loss;
0230 deepNet.Backward(input, output, weights);
0231
0232 for (size_t i = 0; i < deepNet.GetDepth(); i++) {
0233 auto *layer = deepNet.GetLayerAt(i);
0234
0235 layer->UpdateWeights(layer->GetWeightGradients(), fLearningRate);
0236 if (i == 0) {
0237 layer->UpdateBiases(layer->GetBiasGradients(), fLearningRate);
0238 }
0239 }
0240
0241 return loss;
0242 }
0243
0244
0245 template <typename Architecture_t>
0246 void TDLGradientDescent<Architecture_t>::Step(DeepNet_t &master, std::vector<DeepNet_t> &nets,
0247 std::vector<TTensorBatch<Architecture_t>> &batches)
0248 {
0249
0250 master.ParallelForward(nets, batches);
0251 master.ParallelBackward(nets, batches, fLearningRate);
0252 }
0253
0254
0255 template <typename Architecture_t>
0256 void TDLGradientDescent<Architecture_t>::StepMomentum(DeepNet_t &master, std::vector<DeepNet_t> &nets,
0257 std::vector<TTensorBatch<Architecture_t>> &batches,
0258 Scalar_t momentum)
0259 {
0260 master.ParallelForward(nets, batches);
0261 master.ParallelBackwardMomentum(nets, batches, fLearningRate, momentum);
0262 }
0263
0264
0265 template <typename Architecture_t>
0266 void TDLGradientDescent<Architecture_t>::StepNesterov(DeepNet_t &master, std::vector<DeepNet_t> &nets,
0267 std::vector<TTensorBatch<Architecture_t>> &batches,
0268 Scalar_t momentum)
0269 {
0270 master.ParallelForward(nets, batches);
0271 master.ParallelBackwardNestorov(nets, batches, fLearningRate, momentum);
0272 }
0273
0274
0275 template <typename Architecture_t>
0276 bool TDLGradientDescent<Architecture_t>::HasConverged()
0277 {
0278 if (fTestError < fMinimumError * 0.999) {
0279 fConvergenceCount = 0;
0280 fMinimumError = fTestError;
0281 } else {
0282 fConvergenceCount++;
0283 }
0284
0285 return (fConvergenceCount >= fConvergenceSteps);
0286 }
0287
0288
0289 template <typename Architecture_t>
0290 bool TDLGradientDescent<Architecture_t>::HasConverged(Scalar_t testError)
0291 {
0292 fTestError = testError;
0293 if (fTestError < fMinimumError * 0.999) {
0294 fConvergenceCount = 0;
0295 fMinimumError = fTestError;
0296 } else {
0297 fConvergenceCount += fTestInterval;
0298 }
0299 return (fConvergenceCount >= fConvergenceSteps);
0300 }
0301
0302 }
0303 }
0304
0305 #endif