File indexing completed on 2025-01-18 10:11:02
0001 #ifndef TMVA_NEURAL_NET_I
0002 #define TMVA_NEURAL_NET_I
0003
0004 #ifndef TMVA_NEURAL_NET
0005 #error "Do not use NeuralNet.icc directly. #include \"NeuralNet.h\" instead."
0006 #endif
0007 #pragma once
0008 #ifndef _MSC_VER
0009 #pragma GCC diagnostic ignored "-Wunused-variable"
0010 #endif
0011
0012 #include "Math/Util.h"
0013
0014 #include "TMVA/Pattern.h"
0015 #include "TMVA/MethodBase.h"
0016
0017 #include <tuple>
0018 #include <future>
0019 #include <random>
0020
0021 namespace TMVA
0022 {
0023 namespace DNN
0024 {
0025
0026
0027
0028
0029
0030
0031
0032
0033 template <typename T>
0034 T uniformFromTo (T from, T to)
0035 {
0036 return from + (rand ()* (to - from)/RAND_MAX);
0037 }
0038
0039
0040
0041 template <typename Container, typename T>
0042 void uniformDouble (Container& container, T maxValue)
0043 {
0044 for (auto it = begin (container), itEnd = end (container); it != itEnd; ++it)
0045 {
0046
0047 (*it) = TMVA::DNN::uniformFromTo (-1.0*maxValue, 1.0*maxValue);
0048 }
0049 }
0050
0051
0052 extern std::shared_ptr<std::function<double(double)>> ZeroFnc;
0053
0054
0055 extern std::shared_ptr<std::function<double(double)>> Sigmoid;
0056 extern std::shared_ptr<std::function<double(double)>> InvSigmoid;
0057
0058 extern std::shared_ptr<std::function<double(double)>> Tanh;
0059 extern std::shared_ptr<std::function<double(double)>> InvTanh;
0060
0061 extern std::shared_ptr<std::function<double(double)>> Linear;
0062 extern std::shared_ptr<std::function<double(double)>> InvLinear;
0063
0064 extern std::shared_ptr<std::function<double(double)>> SymmReLU;
0065 extern std::shared_ptr<std::function<double(double)>> InvSymmReLU;
0066
0067 extern std::shared_ptr<std::function<double(double)>> ReLU;
0068 extern std::shared_ptr<std::function<double(double)>> InvReLU;
0069
0070 extern std::shared_ptr<std::function<double(double)>> SoftPlus;
0071 extern std::shared_ptr<std::function<double(double)>> InvSoftPlus;
0072
0073 extern std::shared_ptr<std::function<double(double)>> TanhShift;
0074 extern std::shared_ptr<std::function<double(double)>> InvTanhShift;
0075
0076 extern std::shared_ptr<std::function<double(double)>> SoftSign;
0077 extern std::shared_ptr<std::function<double(double)>> InvSoftSign;
0078
0079 extern std::shared_ptr<std::function<double(double)>> Gauss;
0080 extern std::shared_ptr<std::function<double(double)>> InvGauss;
0081
0082 extern std::shared_ptr<std::function<double(double)>> GaussComplement;
0083 extern std::shared_ptr<std::function<double(double)>> InvGaussComplement;
0084
0085
0086
0087
0088
0089
0090 template <bool HasDropOut, typename ItSource, typename ItWeight, typename ItTarget, typename ItDrop>
0091 void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd,
0092 ItWeight itWeight,
0093 ItTarget itTargetBegin, ItTarget itTargetEnd,
0094 ItDrop itDrop)
0095 {
0096 for (auto itSource = itSourceBegin; itSource != itSourceEnd; ++itSource)
0097 {
0098 for (auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget)
0099 {
0100 if (!HasDropOut || *itDrop)
0101 (*itTarget) += (*itSource) * (*itWeight);
0102 ++itWeight;
0103 }
0104 if (HasDropOut) ++itDrop;
0105 }
0106 }
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117 template <bool HasDropOut, typename ItSource, typename ItWeight, typename ItPrev, typename ItDrop>
0118 void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd,
0119 ItWeight itWeight,
0120 ItPrev itPrevBegin, ItPrev itPrevEnd,
0121 ItDrop itDrop)
0122 {
0123 for (auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev)
0124 {
0125 for (auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr)
0126 {
0127 if (!HasDropOut || *itDrop)
0128 (*itPrev) += (*itCurr) * (*itWeight);
0129 ++itWeight;
0130 }
0131 if (HasDropOut) ++itDrop;
0132 }
0133 }
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146 template <typename ItValue, typename Fnc>
0147 void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc)
0148 {
0149 while (itValue != itValueEnd)
0150 {
0151 auto& value = (*itValue);
0152 value = (*fnc.get ()) (value);
0153
0154 ++itValue;
0155 }
0156 }
0157
0158
0159
0160
0161
0162
0163 template <typename ItValue, typename Fnc, typename InvFnc, typename ItGradient>
0164 void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, InvFnc invFnc, ItGradient itGradient)
0165 {
0166 while (itValue != itValueEnd)
0167 {
0168 auto& value = (*itValue);
0169 value = (*fnc.get ()) (value);
0170 (*itGradient) = (*invFnc.get ()) (value);
0171
0172 ++itValue; ++itGradient;
0173 }
0174 }
0175
0176
0177
0178
0179
0180
0181
0182 template <typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient>
0183 void update (ItSource itSource, ItSource itSourceEnd,
0184 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
0185 ItTargetGradient itTargetGradientBegin,
0186 ItGradient itGradient)
0187 {
0188 while (itSource != itSourceEnd)
0189 {
0190 auto itTargetDelta = itTargetDeltaBegin;
0191 auto itTargetGradient = itTargetGradientBegin;
0192 while (itTargetDelta != itTargetDeltaEnd)
0193 {
0194 (*itGradient) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient);
0195 ++itTargetDelta; ++itTargetGradient; ++itGradient;
0196 }
0197 ++itSource;
0198 }
0199 }
0200
0201
0202
0203
0204
0205
0206
0207
0208 template <EnumRegularization Regularization>
0209 inline double computeRegularization (double weight, const double& factorWeightDecay)
0210 {
0211 MATH_UNUSED(weight);
0212 MATH_UNUSED(factorWeightDecay);
0213
0214 return 0;
0215 }
0216
0217
0218 template <>
0219 inline double computeRegularization<EnumRegularization::L1> (double weight, const double& factorWeightDecay)
0220 {
0221 return weight == 0.0 ? 0.0 : std::copysign (factorWeightDecay, weight);
0222 }
0223
0224
0225 template <>
0226 inline double computeRegularization<EnumRegularization::L2> (double weight, const double& factorWeightDecay)
0227 {
0228 return factorWeightDecay * weight;
0229 }
0230
0231
0232
0233
0234
0235
0236 template <EnumRegularization Regularization, typename ItSource, typename ItDelta, typename ItTargetGradient, typename ItGradient, typename ItWeight>
0237 void update (ItSource itSource, ItSource itSourceEnd,
0238 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
0239 ItTargetGradient itTargetGradientBegin,
0240 ItGradient itGradient,
0241 ItWeight itWeight, double weightDecay)
0242 {
0243
0244 while (itSource != itSourceEnd)
0245 {
0246 auto itTargetDelta = itTargetDeltaBegin;
0247 auto itTargetGradient = itTargetGradientBegin;
0248 while (itTargetDelta != itTargetDeltaEnd)
0249 {
0250 (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization<Regularization>(*itWeight,weightDecay);
0251 ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight;
0252 }
0253 ++itSource;
0254 }
0255 }
0256
0257
0258
0259
0260
0261
0262 #define USELOCALWEIGHTS 1
0263
0264
0265
0266
0267
0268
0269
0270 template <typename Function, typename Weights, typename PassThrough>
0271 double Steepest::operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough)
0272 {
0273 size_t numWeights = weights.size ();
0274
0275 m_localGradients.assign (numWeights, 0.0);
0276
0277
0278 m_localWeights.assign (begin (weights), end (weights));
0279
0280 double E = 1e10;
0281 if (m_prevGradients.size () != numWeights)
0282 {
0283 m_prevGradients.clear ();
0284 m_prevGradients.assign (weights.size (), 0);
0285 }
0286
0287 bool success = true;
0288 size_t currentRepetition = 0;
0289 while (success)
0290 {
0291 if (currentRepetition >= m_repetitions)
0292 break;
0293
0294 m_localGradients.assign (numWeights, 0.0);
0295
0296
0297
0298 auto itPrevG = begin (m_prevGradients);
0299 auto itPrevGEnd = end (m_prevGradients);
0300 auto itLocWeight = begin (m_localWeights);
0301 for (; itPrevG != itPrevGEnd; ++itPrevG, ++itLocWeight)
0302 {
0303 (*itPrevG) *= m_beta;
0304 (*itLocWeight) += (*itPrevG);
0305 }
0306
0307 E = fitnessFunction (passThrough, m_localWeights, m_localGradients);
0308
0309
0310
0311 double alpha = gaussDouble (m_alpha, m_alpha/2.0);
0312
0313
0314 auto itG = begin (m_localGradients);
0315 auto itGEnd = end (m_localGradients);
0316 itPrevG = begin (m_prevGradients);
0317 double maxGrad = 0.0;
0318 for (; itG != itGEnd; ++itG, ++itPrevG)
0319 {
0320 double currGrad = (*itG);
0321 double prevGrad = (*itPrevG);
0322 currGrad *= alpha;
0323
0324
0325 currGrad += prevGrad;
0326 (*itG) = currGrad;
0327 (*itPrevG) = currGrad;
0328
0329 if (std::fabs (currGrad) > maxGrad)
0330 maxGrad = currGrad;
0331 }
0332
0333 if (maxGrad > 1)
0334 {
0335 m_alpha /= 2;
0336 std::cout << "\nlearning rate reduced to " << m_alpha << std::endl;
0337 std::for_each (weights.begin (), weights.end (), [maxGrad](double& w)
0338 {
0339 w /= maxGrad;
0340 });
0341 m_prevGradients.clear ();
0342 }
0343 else
0344 {
0345 auto itW = std::begin (weights);
0346 std::for_each (std::begin (m_localGradients), std::end (m_localGradients), [&itW](double& g)
0347 {
0348 *itW += g;
0349 ++itW;
0350 });
0351 }
0352
0353 ++currentRepetition;
0354 }
0355 return E;
0356 }
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366
0367
0368
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379
0380
0381 template <typename ItOutput, typename ItTruth, typename ItDelta, typename InvFnc>
0382 double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, InvFnc invFnc, double patternWeight)
0383 {
0384 double errorSum = 0.0;
0385
0386
0387 ItTruth itTruth = itTruthBegin;
0388 bool hasDeltas = (itDelta != itDeltaEnd);
0389 for (ItOutput itOutput = itOutputBegin; itOutput != itOutputEnd; ++itOutput, ++itTruth)
0390 {
0391
0392 double output = (*itOutput);
0393 double error = output - (*itTruth);
0394 if (hasDeltas)
0395 {
0396 (*itDelta) = (*invFnc.get ()) (output) * error * patternWeight;
0397 ++itDelta;
0398 }
0399 errorSum += error*error * patternWeight;
0400 }
0401
0402 return 0.5*errorSum;
0403 }
0404
0405
0406
0407
0408
0409
0410
0411 template <typename ItProbability, typename ItTruth, typename ItDelta, typename ItInvActFnc>
0412 double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc , double patternWeight)
0413 {
0414 bool hasDeltas = (itDelta != itDeltaEnd);
0415
0416 double errorSum = 0.0;
0417 for (ItProbability itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability)
0418 {
0419 double probability = *itProbability;
0420 double truth = *itTruthBegin;
0421
0422
0423 truth = truth < 0.5 ? 0.1 : 0.9;
0424 if (hasDeltas)
0425 {
0426 double delta = probability - truth;
0427 (*itDelta) = delta*patternWeight;
0428
0429 ++itDelta;
0430 }
0431 double error (0);
0432 if (probability == 0)
0433 {
0434 if (truth >= 0.5)
0435 error += 1.0;
0436 }
0437 else if (probability == 1)
0438 {
0439 if (truth < 0.5)
0440 error += 1.0;
0441 }
0442 else
0443 error += - (truth * log (probability) + (1.0-truth) * log (1.0-probability));
0444 errorSum += error * patternWeight;
0445
0446 }
0447 return errorSum;
0448 }
0449
0450
0451
0452
0453
0454
0455
0456
0457 template <typename ItOutput, typename ItTruth, typename ItDelta, typename ItInvActFnc>
0458 double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc , double patternWeight)
0459 {
0460 double errorSum = 0.0;
0461
0462 bool hasDeltas = (itDelta != itDeltaEnd);
0463
0464 ItTruth itTruth = itTruthBegin;
0465 for (auto itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability, ++itTruth)
0466 {
0467
0468 double probability = (*itProbability);
0469 double truth = (*itTruth);
0470 if (hasDeltas)
0471 {
0472 (*itDelta) = probability - truth;
0473
0474 ++itDelta;
0475 }
0476 double error (0);
0477
0478 error += truth * log (probability);
0479 errorSum += error;
0480 }
0481
0482 return -errorSum * patternWeight;
0483 }
0484
0485
0486
0487
0488
0489
0490
0491
0492
0493
0494
0495
0496
0497 template <typename ItWeight>
0498 double weightDecay (double error, ItWeight itWeight, ItWeight itWeightEnd, double factorWeightDecay, EnumRegularization eRegularization)
0499 {
0500 if (eRegularization == EnumRegularization::L1)
0501 {
0502
0503 double w = 0;
0504 size_t n = 0;
0505 for (; itWeight != itWeightEnd; ++itWeight, ++n)
0506 {
0507 double weight = (*itWeight);
0508 w += std::fabs (weight);
0509 }
0510 return error + 0.5 * w * factorWeightDecay / n;
0511 }
0512 else if (eRegularization == EnumRegularization::L2)
0513 {
0514
0515 double w = 0;
0516 size_t n = 0;
0517 for (; itWeight != itWeightEnd; ++itWeight, ++n)
0518 {
0519 double weight = (*itWeight);
0520 w += weight*weight;
0521 }
0522 return error + 0.5 * w * factorWeightDecay / n;
0523 }
0524 else
0525 return error;
0526 }
0527
0528
0529
0530
0531
0532
0533
0534
0535
0536
0537
0538
0539
0540
0541
0542
0543
0544
0545 template <typename LAYERDATA>
0546 void forward (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
0547 {
0548 if (prevLayerData.hasDropOut ())
0549 {
0550 applyWeights<true> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
0551 currLayerData.weightsBegin (),
0552 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
0553 prevLayerData.dropOut ());
0554 }
0555 else
0556 {
0557 bool dummy = true;
0558 applyWeights<false> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
0559 currLayerData.weightsBegin (),
0560 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
0561 &dummy);
0562 }
0563 }
0564
0565
0566
0567
0568
0569
0570
0571 template <typename LAYERDATA>
0572 void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
0573 {
0574 if (prevLayerData.hasDropOut ())
0575 {
0576 applyWeightsBackwards<true> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
0577 currLayerData.weightsBegin (),
0578 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
0579 prevLayerData.dropOut ());
0580 }
0581 else
0582 {
0583 bool dummy = true;
0584 applyWeightsBackwards<false> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
0585 currLayerData.weightsBegin (),
0586 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
0587 &dummy);
0588 }
0589 }
0590
0591
0592
0593
0594
0595
0596
0597
0598
0599 template <typename LAYERDATA>
0600 void update (const LAYERDATA& prevLayerData, LAYERDATA& currLayerData, double factorWeightDecay, EnumRegularization regularization)
0601 {
0602
0603 if (factorWeightDecay != 0.0)
0604 if (regularization == EnumRegularization::L1)
0605 {
0606 update<EnumRegularization::L1> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
0607 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
0608 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
0609 currLayerData.weightsBegin (), factorWeightDecay);
0610 }
0611 else if (regularization == EnumRegularization::L2)
0612 {
0613 update<EnumRegularization::L2> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
0614 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
0615 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
0616 currLayerData.weightsBegin (), factorWeightDecay);
0617 }
0618 else
0619 {
0620 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
0621 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
0622 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
0623 }
0624
0625 else
0626 {
0627 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
0628 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
0629 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
0630 }
0631 }
0632
0633
0634
0635
0636
0637
0638
0639
0640
0641
0642
0643
0644
0645
0646
0647
0648
0649
0650
0651 template <typename WeightsType, typename DropProbabilities>
0652 void Net::dropOutWeightFactor (WeightsType& weights,
0653 const DropProbabilities& drops,
0654 bool inverse)
0655 {
0656 if (drops.empty () || weights.empty ())
0657 return;
0658
0659 auto itWeight = std::begin (weights);
0660 auto itWeightEnd = std::end (weights);
0661 auto itDrop = std::begin (drops);
0662 auto itDropEnd = std::end (drops);
0663 size_t numNodesPrev = inputSize ();
0664 double dropFractionPrev = *itDrop;
0665 ++itDrop;
0666
0667 for (auto& layer : layers ())
0668 {
0669 if (itDrop == itDropEnd)
0670 break;
0671
0672 size_t _numNodes = layer.numNodes ();
0673
0674 double dropFraction = *itDrop;
0675 double pPrev = 1.0 - dropFractionPrev;
0676 double p = 1.0 - dropFraction;
0677 p *= pPrev;
0678
0679 if (inverse)
0680 {
0681 p = 1.0/p;
0682 }
0683 size_t _numWeights = layer.numWeights (numNodesPrev);
0684 for (size_t iWeight = 0; iWeight < _numWeights; ++iWeight)
0685 {
0686 if (itWeight == itWeightEnd)
0687 break;
0688
0689 *itWeight *= p;
0690 ++itWeight;
0691 }
0692 numNodesPrev = _numNodes;
0693 dropFractionPrev = dropFraction;
0694 ++itDrop;
0695 }
0696 }
0697
0698
0699
0700
0701
0702
0703
0704
0705
0706
0707
0708
0709
0710
0711 template <typename Minimizer>
0712 double Net::train (std::vector<double>& weights,
0713 std::vector<Pattern>& trainPattern,
0714 const std::vector<Pattern>& testPattern,
0715 Minimizer& minimizer,
0716 Settings& settings)
0717 {
0718
0719 settings.startTrainCycle ();
0720
0721
0722 if (fIPyMaxIter) *fIPyMaxIter = 100;
0723
0724 settings.pads (4);
0725 settings.create ("trainErrors", 100, 0, 100, 100, 0,1);
0726 settings.create ("testErrors", 100, 0, 100, 100, 0,1);
0727
0728 size_t cycleCount = 0;
0729 size_t testCycleCount = 0;
0730 double testError = 1e20;
0731 double trainError = 1e20;
0732 size_t dropOutChangeCount = 0;
0733
0734 DropContainer dropContainer;
0735 DropContainer dropContainerTest;
0736 const std::vector<double>& dropFractions = settings.dropFractions ();
0737 bool isWeightsForDrop = false;
0738
0739
0740
0741 do
0742 {
0743 ++cycleCount;
0744
0745
0746 size_t dropIndex = 0;
0747 if (!dropFractions.empty () && dropOutChangeCount % settings.dropRepetitions () == 0)
0748 {
0749
0750 dropContainer.clear ();
0751 size_t _numNodes = inputSize ();
0752 double dropFraction = 0.0;
0753 dropFraction = dropFractions.at (dropIndex);
0754 ++dropIndex;
0755 fillDropContainer (dropContainer, dropFraction, _numNodes);
0756 for (auto itLayer = begin (m_layers), itLayerEnd = end (m_layers); itLayer != itLayerEnd; ++itLayer, ++dropIndex)
0757 {
0758 auto& layer = *itLayer;
0759 _numNodes = layer.numNodes ();
0760
0761 dropFraction = 0.0;
0762 if (dropFractions.size () > dropIndex)
0763 dropFraction = dropFractions.at (dropIndex);
0764
0765 fillDropContainer (dropContainer, dropFraction, _numNodes);
0766 }
0767 isWeightsForDrop = true;
0768 }
0769
0770
0771 trainError = trainCycle (minimizer, weights, begin (trainPattern), end (trainPattern), settings, dropContainer);
0772
0773
0774
0775 bool hasConverged = false;
0776 if (testCycleCount % settings.testRepetitions () == 0)
0777 {
0778 if (isWeightsForDrop)
0779 {
0780 dropOutWeightFactor (weights, dropFractions);
0781 isWeightsForDrop = false;
0782 }
0783
0784
0785 testError = 0;
0786
0787 settings.startTestCycle ();
0788 if (settings.useMultithreading ())
0789 {
0790 size_t numThreads = std::thread::hardware_concurrency ();
0791 size_t patternPerThread = testPattern.size () / numThreads;
0792 std::vector<Batch> batches;
0793 auto itPat = testPattern.begin ();
0794
0795 for (size_t idxThread = 0; idxThread < numThreads-1; ++idxThread)
0796 {
0797 batches.push_back (Batch (itPat, itPat + patternPerThread));
0798 itPat += patternPerThread;
0799 }
0800 if (itPat != testPattern.end ())
0801 batches.push_back (Batch (itPat, testPattern.end ()));
0802
0803 std::vector<std::future<std::tuple<double,std::vector<double>>>> futures;
0804 for (auto& batch : batches)
0805 {
0806
0807 futures.push_back (
0808 std::async (std::launch::async, [&]()
0809 {
0810 std::vector<double> localOutput;
0811 pass_through_type passThrough (settings, batch, dropContainerTest);
0812 double testBatchError = (*this) (passThrough, weights, ModeOutput::FETCH, localOutput);
0813 return std::make_tuple (testBatchError, localOutput);
0814 })
0815 );
0816 }
0817
0818 auto itBatch = batches.begin ();
0819 for (auto& f : futures)
0820 {
0821 std::tuple<double,std::vector<double>> result = f.get ();
0822 testError += std::get<0>(result) / batches.size ();
0823 std::vector<double> output = std::get<1>(result);
0824 if (output.size() == (outputSize() - 1) * itBatch->size())
0825 {
0826 auto output_iterator = output.begin();
0827 for (auto pattern_it = itBatch->begin(); pattern_it != itBatch->end(); ++pattern_it)
0828 {
0829 for (size_t output_index = 1; output_index < outputSize(); ++output_index)
0830 {
0831 settings.testSample (0, *output_iterator, (*pattern_it).output ().at (0),
0832 (*pattern_it).weight ());
0833 ++output_iterator;
0834 }
0835 }
0836 }
0837 ++itBatch;
0838 }
0839
0840 }
0841 else
0842 {
0843 std::vector<double> output;
0844
0845 {
0846
0847
0848
0849 Batch batch (begin (testPattern), end (testPattern));
0850 output.clear ();
0851 pass_through_type passThrough (settings, batch, dropContainerTest);
0852 double testPatternError = (*this) (passThrough, weights, ModeOutput::FETCH, output);
0853 if (output.size() == (outputSize() - 1) * batch.size())
0854 {
0855 auto output_iterator = output.begin();
0856 for (auto pattern_it = batch.begin(); pattern_it != batch.end(); ++pattern_it)
0857 {
0858 for (size_t output_index = 1; output_index < outputSize(); ++output_index)
0859 {
0860 settings.testSample (0, *output_iterator, (*pattern_it).output ().at (0),
0861 (*pattern_it).weight ());
0862 ++output_iterator;
0863 }
0864 }
0865 }
0866 testError += testPatternError;
0867 }
0868
0869 }
0870 settings.endTestCycle ();
0871
0872
0873 settings.computeResult (*this, weights);
0874
0875 hasConverged = settings.hasConverged (testError);
0876 if (!hasConverged && !isWeightsForDrop)
0877 {
0878 dropOutWeightFactor (weights, dropFractions, true);
0879 isWeightsForDrop = true;
0880 }
0881 }
0882 ++testCycleCount;
0883 ++dropOutChangeCount;
0884
0885
0886
0887 settings.addPoint ("trainErrors", cycleCount, trainError);
0888 settings.addPoint ("testErrors", cycleCount, testError);
0889 settings.plot ("trainErrors", "C", 1, kBlue);
0890 settings.plot ("testErrors", "C", 1, kMagenta);
0891
0892
0893
0894 if (fInteractive){
0895 fInteractive->AddPoint(cycleCount, trainError, testError);
0896 if (*fExitFromTraining) break;
0897 *fIPyCurrentIter = 100*(double)settings.maxConvergenceCount () /(double)settings.convergenceSteps ();
0898 }
0899
0900 if (hasConverged)
0901 break;
0902
0903 if ((int)cycleCount % 10 == 0) {
0904
0905 TString convText = TString::Format( "(train/test/epo/conv/maxco): %.3g/%.3g/%d/%d/%d",
0906 trainError,
0907 testError,
0908 (int)cycleCount,
0909 (int)settings.convergenceCount (),
0910 (int)settings.maxConvergenceCount ());
0911 double progress = 100*(double)settings.maxConvergenceCount () /(double)settings.convergenceSteps ();
0912 settings.cycle (progress, convText);
0913 }
0914 }
0915 while (true);
0916 settings.endTrainCycle (trainError);
0917
0918 TString convText = TString::Format( "(train/test/epoch): %.4g/%.4g/%d", trainError, testError, (int)cycleCount);
0919 double progress = 100*(double)settings.maxConvergenceCount() /(double)settings.convergenceSteps ();
0920 settings.cycle (progress, convText);
0921
0922 return testError;
0923 }
0924
0925
0926
0927
0928
0929
0930
0931
0932
0933
0934
0935
0936
0937
0938 template <typename Iterator, typename Minimizer>
0939 inline double Net::trainCycle (Minimizer& minimizer, std::vector<double>& weights,
0940 Iterator itPatternBegin, Iterator itPatternEnd, Settings& settings, DropContainer& dropContainer)
0941 {
0942 double error = 0.0;
0943 size_t numPattern = std::distance (itPatternBegin, itPatternEnd);
0944 size_t numBatches = numPattern/settings.batchSize ();
0945 size_t numBatches_stored = numBatches;
0946
0947 std::shuffle(itPatternBegin, itPatternEnd, std::default_random_engine{});
0948 Iterator itPatternBatchBegin = itPatternBegin;
0949 Iterator itPatternBatchEnd = itPatternBatchBegin;
0950
0951
0952 std::vector<Batch> batches;
0953 while (numBatches > 0)
0954 {
0955 std::advance (itPatternBatchEnd, settings.batchSize ());
0956 batches.push_back (Batch (itPatternBatchBegin, itPatternBatchEnd));
0957 itPatternBatchBegin = itPatternBatchEnd;
0958 --numBatches;
0959 }
0960
0961
0962 if (itPatternBatchEnd != itPatternEnd)
0963 batches.push_back (Batch (itPatternBatchEnd, itPatternEnd));
0964
0965
0966
0967 if (settings.useMultithreading ())
0968 {
0969
0970 size_t numThreads = std::thread::hardware_concurrency ();
0971 size_t batchesPerThread = batches.size () / numThreads;
0972 typedef std::vector<Batch>::iterator batch_iterator;
0973 std::vector<std::pair<batch_iterator,batch_iterator>> batchVec;
0974 batch_iterator itBatchBegin = std::begin (batches);
0975 batch_iterator itBatchCurrEnd = std::begin (batches);
0976 batch_iterator itBatchEnd = std::end (batches);
0977 for (size_t iT = 0; iT < numThreads; ++iT)
0978 {
0979 if (iT == numThreads-1)
0980 itBatchCurrEnd = itBatchEnd;
0981 else
0982 std::advance (itBatchCurrEnd, batchesPerThread);
0983 batchVec.push_back (std::make_pair (itBatchBegin, itBatchCurrEnd));
0984 itBatchBegin = itBatchCurrEnd;
0985 }
0986
0987
0988 std::vector<std::future<double>> futures;
0989 for (auto& batchRange : batchVec)
0990 {
0991
0992 futures.push_back (
0993 std::async (std::launch::async, [&]()
0994 {
0995 double localError = 0.0;
0996 for (auto it = batchRange.first, itEnd = batchRange.second; it != itEnd; ++it)
0997 {
0998 Batch& batch = *it;
0999 pass_through_type settingsAndBatch (settings, batch, dropContainer);
1000 Minimizer minimizerClone (minimizer);
1001 localError += minimizerClone ((*this), weights, settingsAndBatch);
1002 }
1003 return localError;
1004 })
1005 );
1006 }
1007
1008 for (auto& f : futures)
1009 error += f.get ();
1010 }
1011 else
1012 {
1013 for (auto& batch : batches)
1014 {
1015 std::tuple<Settings&, Batch&, DropContainer&> settingsAndBatch (settings, batch, dropContainer);
1016 error += minimizer ((*this), weights, settingsAndBatch);
1017 }
1018 }
1019
1020 numBatches_stored = std::max (numBatches_stored, size_t(1));
1021 error /= numBatches_stored;
1022 settings.testIteration ();
1023
1024 return error;
1025 }
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036 template <typename Weights>
1037 std::vector<double> Net::compute (const std::vector<double>& input, const Weights& weights) const
1038 {
1039 std::vector<LayerData> layerData;
1040 layerData.reserve (m_layers.size ()+1);
1041 auto itWeight = begin (weights);
1042 auto itInputBegin = begin (input);
1043 auto itInputEnd = end (input);
1044 layerData.push_back (LayerData (itInputBegin, itInputEnd));
1045 size_t numNodesPrev = input.size ();
1046
1047
1048 for (auto& layer: m_layers)
1049 {
1050 layerData.push_back (LayerData (layer.numNodes (), itWeight,
1051 layer.activationFunction (),
1052 layer.modeOutputValues ()));
1053 size_t _numWeights = layer.numWeights (numNodesPrev);
1054 itWeight += _numWeights;
1055 numNodesPrev = layer.numNodes ();
1056 }
1057
1058
1059
1060 forwardPattern (m_layers, layerData);
1061
1062
1063 std::vector<double> output;
1064 fetchOutput (layerData.back (), output);
1065 return output;
1066 }
1067
1068
1069 template <typename Weights, typename PassThrough>
1070 double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights) const
1071 {
1072 std::vector<double> nothing;
1073 assert (numWeights () == weights.size ());
1074 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, nothing, false);
1075 return error;
1076 }
1077
1078 template <typename Weights, typename PassThrough, typename OutContainer>
1079 double Net::operator() (PassThrough& settingsAndBatch, const Weights& weights, ModeOutput , OutContainer& outputContainer) const
1080 {
1081 std::vector<double> nothing;
1082 assert (numWeights () == weights.size ());
1083 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, outputContainer, true);
1084 return error;
1085 }
1086
1087
1088 template <typename Weights, typename Gradients, typename PassThrough>
1089 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients) const
1090 {
1091 std::vector<double> nothing;
1092 assert (numWeights () == weights.size ());
1093 assert (weights.size () == gradients.size ());
1094 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, nothing, false);
1095 return error;
1096 }
1097
1098 template <typename Weights, typename Gradients, typename PassThrough, typename OutContainer>
1099 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer) const
1100 {
1101 MATH_UNUSED(eFetch);
1102 assert (numWeights () == weights.size ());
1103 assert (weights.size () == gradients.size ());
1104 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, outputContainer, true);
1105 return error;
1106 }
1107
1108
1109
1110 template <typename LayerContainer, typename DropContainer, typename ItWeight, typename ItGradient>
1111 std::vector<std::vector<LayerData>> Net::prepareLayerData (LayerContainer& _layers,
1112 Batch& batch,
1113 const DropContainer& dropContainer,
1114 ItWeight itWeightBegin,
1115 ItWeight ,
1116 ItGradient itGradientBegin,
1117 ItGradient itGradientEnd,
1118 size_t& totalNumWeights) const
1119 {
1120 LayerData::const_dropout_iterator itDropOut;
1121 bool usesDropOut = !dropContainer.empty ();
1122 if (usesDropOut)
1123 itDropOut = std::begin (dropContainer);
1124
1125 if (_layers.empty ())
1126 throw std::string ("no layers in this net");
1127
1128
1129
1130
1131
1132 totalNumWeights = 0;
1133 std::vector<std::vector<LayerData>> layerPatternData;
1134 layerPatternData.reserve (_layers.size ()+1);
1135 ItWeight itWeight = itWeightBegin;
1136 ItGradient itGradient = itGradientBegin;
1137 size_t numNodesPrev = inputSize ();
1138 typename Pattern::const_iterator itInputBegin;
1139 typename Pattern::const_iterator itInputEnd;
1140
1141
1142
1143
1144
1145
1146
1147
1148 layerPatternData.push_back (std::vector<LayerData>());
1149 for (const Pattern& _pattern : batch)
1150 {
1151 std::vector<LayerData>& layerData = layerPatternData.back ();
1152 layerData.push_back (LayerData (numNodesPrev));
1153
1154 itInputBegin = _pattern.beginInput ();
1155 itInputEnd = _pattern.endInput ();
1156 layerData.back ().setInput (itInputBegin, itInputEnd);
1157
1158 if (usesDropOut)
1159 layerData.back ().setDropOut (itDropOut);
1160
1161 }
1162
1163
1164 if (usesDropOut)
1165 itDropOut += _layers.back ().numNodes ();
1166
1167
1168
1169 for (auto itLayer = begin (_layers), itLayerEnd = end (_layers); itLayer != itLayerEnd; ++itLayer)
1170 {
1171 bool isOutputLayer = (itLayer+1 == itLayerEnd);
1172 bool isFirstHiddenLayer = (itLayer == begin (_layers));
1173
1174 auto& layer = *itLayer;
1175 layerPatternData.push_back (std::vector<LayerData>());
1176
1177 for (const Pattern& _pattern : batch)
1178 {
1179 std::vector<LayerData>& layerData = layerPatternData.back ();
1180
1181
1182 if (itGradientBegin == itGradientEnd)
1183 {
1184 layerData.push_back (LayerData (layer.numNodes (), itWeight,
1185 layer.activationFunction (),
1186 layer.modeOutputValues ()));
1187 }
1188 else
1189 {
1190 layerData.push_back (LayerData (layer.numNodes (), itWeight, itGradient,
1191 layer.activationFunction (),
1192 layer.inverseActivationFunction (),
1193 layer.modeOutputValues ()));
1194 }
1195
1196 if (usesDropOut)
1197 {
1198 layerData.back ().setDropOut (itDropOut);
1199 }
1200
1201 }
1202
1203 if (usesDropOut)
1204 {
1205 itDropOut += layer.numNodes ();
1206 }
1207 size_t _numWeights = layer.numWeights (numNodesPrev);
1208 totalNumWeights += _numWeights;
1209 itWeight += _numWeights;
1210 itGradient += _numWeights;
1211 numNodesPrev = layer.numNodes ();
1212
1213 }
1214 assert (totalNumWeights > 0);
1215 return layerPatternData;
1216 }
1217
1218
1219
1220 template <typename LayerContainer>
1221 void Net::forwardPattern (const LayerContainer& _layers,
1222 std::vector<LayerData>& layerData) const
1223 {
1224 size_t idxLayer = 0, idxLayerEnd = _layers.size ();
1225 for (; idxLayer < idxLayerEnd; ++idxLayer)
1226 {
1227 LayerData& prevLayerData = layerData.at (idxLayer);
1228 LayerData& currLayerData = layerData.at (idxLayer+1);
1229
1230 forward (prevLayerData, currLayerData);
1231
1232 applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ());
1233 }
1234 }
1235
1236
1237
1238
1239 template <typename LayerContainer, typename LayerPatternContainer>
1240 void Net::forwardBatch (const LayerContainer& _layers,
1241 LayerPatternContainer& layerPatternData,
1242 std::vector<double>& valuesMean,
1243 std::vector<double>& valuesStdDev,
1244 size_t trainFromLayer) const
1245 {
1246 valuesMean.clear ();
1247 valuesStdDev.clear ();
1248
1249
1250 for (size_t idxLayer = 0, idxLayerEnd = layerPatternData.size (); idxLayer < idxLayerEnd-1; ++idxLayer)
1251 {
1252 bool doTraining = idxLayer >= trainFromLayer;
1253
1254
1255 std::vector<LayerData>& prevLayerPatternData = layerPatternData.at (idxLayer);
1256 std::vector<LayerData>& currLayerPatternData = layerPatternData.at (idxLayer+1);
1257
1258 size_t numPattern = prevLayerPatternData.size ();
1259 size_t numNodesLayer = _layers.at (idxLayer).numNodes ();
1260
1261 std::vector<MeanVariance> means (numNodesLayer);
1262
1263 for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1264 {
1265 const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);
1266 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1267
1268
1269 forward (prevLayerData, currLayerData);
1270 }
1271
1272
1273 for (size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1274 {
1275
1276 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1277
1278 if (doTraining)
1279 applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction (),
1280 currLayerData.inverseActivationFunction (), currLayerData.valueGradientsBegin ());
1281 else
1282 applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ());
1283 }
1284 }
1285 }
1286
1287
1288
1289
1290 template <typename OutputContainer>
1291 void Net::fetchOutput (const LayerData& lastLayerData, OutputContainer& outputContainer) const
1292 {
1293 ModeOutputValues eModeOutput = lastLayerData.outputMode ();
1294 if (isFlagSet (ModeOutputValues::DIRECT, eModeOutput))
1295 {
1296 outputContainer.insert (outputContainer.end (), lastLayerData.valuesBegin (), lastLayerData.valuesEnd ());
1297 }
1298 else if (isFlagSet (ModeOutputValues::SIGMOID, eModeOutput) ||
1299 isFlagSet (ModeOutputValues::SOFTMAX, eModeOutput))
1300 {
1301 const auto& prob = lastLayerData.probabilities ();
1302 outputContainer.insert (outputContainer.end (), prob.begin (), prob.end ()) ;
1303 }
1304 else
1305 assert (false);
1306 }
1307
1308
1309
1310
1311 template <typename OutputContainer>
1312 void Net::fetchOutput (const std::vector<LayerData>& lastLayerPatternData, OutputContainer& outputContainer) const
1313 {
1314 for (const LayerData& lastLayerData : lastLayerPatternData)
1315 fetchOutput (lastLayerData, outputContainer);
1316 }
1317
1318
1319
1320 template <typename ItWeight>
1321 std::tuple<double,double> Net::computeError (const Settings& settings,
1322 std::vector<LayerData>& lastLayerData,
1323 Batch& batch,
1324 ItWeight itWeightBegin,
1325 ItWeight itWeightEnd) const
1326 {
1327 typename std::vector<LayerData>::iterator itLayerData = lastLayerData.begin ();
1328
1329
1330 typename std::vector<Pattern>::const_iterator itPattern = batch.begin ();
1331 typename std::vector<Pattern>::const_iterator itPatternEnd = batch.end ();
1332
1333 double sumWeights (0.0);
1334 double sumError (0.0);
1335
1336
1337 for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData)
1338 {
1339
1340 LayerData& layerData = (*itLayerData);
1341 const Pattern& _pattern = (*itPattern);
1342 double error = errorFunction (layerData, _pattern.output (),
1343 itWeightBegin, itWeightEnd,
1344 _pattern.weight (), settings.factorWeightDecay (),
1345 settings.regularization ());
1346 sumWeights += fabs (_pattern.weight ());
1347 sumError += error;
1348 }
1349 return std::make_tuple (sumError, sumWeights);
1350 }
1351
1352
1353
1354 template <typename Settings>
1355 void Net::backPropagate (std::vector<std::vector<LayerData>>& layerPatternData,
1356 const Settings& settings,
1357 size_t trainFromLayer,
1358 size_t totalNumWeights) const
1359 {
1360 bool doTraining = layerPatternData.size () > trainFromLayer;
1361 if (doTraining)
1362 {
1363
1364 size_t idxLayer = layerPatternData.size ();
1365 for (auto itLayerPatternData = layerPatternData.rbegin (), itLayerPatternDataBegin = layerPatternData.rend ();
1366 itLayerPatternData != itLayerPatternDataBegin; ++itLayerPatternData)
1367 {
1368 --idxLayer;
1369 if (idxLayer <= trainFromLayer)
1370 break;
1371
1372 std::vector<LayerData>& currLayerDataColl = *(itLayerPatternData);
1373 std::vector<LayerData>& prevLayerDataColl = *(itLayerPatternData+1);
1374
1375
1376 for (typename std::vector<LayerData>::iterator itCurrLayerData = begin (currLayerDataColl), itCurrLayerDataEnd = end (currLayerDataColl),
1377 itPrevLayerData = begin (prevLayerDataColl) ;
1378 itCurrLayerData != itCurrLayerDataEnd; ++itCurrLayerData, ++itPrevLayerData)
1379 {
1380 LayerData& currLayerData = (*itCurrLayerData);
1381 LayerData& prevLayerData = *(itPrevLayerData);
1382
1383 backward (prevLayerData, currLayerData);
1384
1385
1386
1387
1388
1389
1390
1391
1392 update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization ());
1393 }
1394 }
1395 }
1396 }
1397
1398
1399
1400
1401
1402
1403
1404 template <typename LayerContainer, typename PassThrough, typename ItWeight, typename ItGradient, typename OutContainer>
1405 double Net::forward_backward (LayerContainer& _layers, PassThrough& settingsAndBatch,
1406 ItWeight itWeightBegin, ItWeight itWeightEnd,
1407 ItGradient itGradientBegin, ItGradient itGradientEnd,
1408 size_t trainFromLayer,
1409 OutContainer& outputContainer, bool doFetchOutput) const
1410 {
1411 Settings& settings = std::get<0>(settingsAndBatch);
1412 Batch& batch = std::get<1>(settingsAndBatch);
1413 DropContainer& dropContainer = std::get<2>(settingsAndBatch);
1414
1415 double sumError = 0.0;
1416 double sumWeights = 0.0;
1417
1418
1419
1420 size_t totalNumWeights (0);
1421 std::vector<std::vector<LayerData>> layerPatternData = prepareLayerData (_layers,
1422 batch,
1423 dropContainer,
1424 itWeightBegin,
1425 itWeightEnd,
1426 itGradientBegin,
1427 itGradientEnd,
1428 totalNumWeights);
1429
1430
1431
1432
1433 std::vector<double> valuesMean;
1434 std::vector<double> valuesStdDev;
1435 forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer);
1436
1437
1438
1439 if (doFetchOutput)
1440 {
1441 fetchOutput (layerPatternData.back (), outputContainer);
1442 }
1443
1444
1445
1446 std::tie (sumError, sumWeights) = computeError (settings, layerPatternData.back (), batch, itWeightBegin, itWeightBegin + totalNumWeights);
1447
1448
1449
1450 backPropagate (layerPatternData, settings, trainFromLayer, totalNumWeights);
1451
1452
1453
1454 double batchSize = std::distance (std::begin (batch), std::end (batch));
1455 for (auto it = itGradientBegin; it != itGradientEnd; ++it)
1456 (*it) /= batchSize;
1457
1458
1459 sumError /= sumWeights;
1460 return sumError;
1461 }
1462
1463
1464
1465
1466
1467
1468
1469 template <typename OutIterator>
1470 void Net::initializeWeights (WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
1471 {
1472 if (eInitStrategy == WeightInitializationStrategy::XAVIER)
1473 {
1474
1475 int numInput = inputSize ();
1476
1477
1478
1479
1480
1481
1482 for (auto& layer: layers ())
1483 {
1484 double nIn = numInput;
1485 double stdDev = sqrt (2.0/nIn);
1486 for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1487 {
1488 (*itWeight) = DNN::gaussDouble (0.0, stdDev);
1489 ++itWeight;
1490 }
1491 numInput = layer.numNodes ();
1492 }
1493 return;
1494 }
1495
1496 if (eInitStrategy == WeightInitializationStrategy::XAVIERUNIFORM)
1497 {
1498
1499 int numInput = inputSize ();
1500
1501
1502
1503
1504
1505
1506 for (auto& layer: layers ())
1507 {
1508 double nIn = numInput;
1509 double minVal = -sqrt(2.0/nIn);
1510 double maxVal = sqrt (2.0/nIn);
1511 for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1512 {
1513
1514 (*itWeight) = DNN::uniformDouble (minVal, maxVal);
1515 ++itWeight;
1516 }
1517 numInput = layer.numNodes ();
1518 }
1519 return;
1520 }
1521
1522 if (eInitStrategy == WeightInitializationStrategy::TEST)
1523 {
1524
1525 int numInput = inputSize ();
1526
1527
1528
1529
1530
1531
1532 for (auto& layer: layers ())
1533 {
1534
1535 for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1536 {
1537 (*itWeight) = DNN::gaussDouble (0.0, 0.1);
1538 ++itWeight;
1539 }
1540 numInput = layer.numNodes ();
1541 }
1542 return;
1543 }
1544
1545 if (eInitStrategy == WeightInitializationStrategy::LAYERSIZE)
1546 {
1547
1548 int numInput = inputSize ();
1549
1550
1551
1552
1553
1554
1555 for (auto& layer: layers ())
1556 {
1557 double nIn = numInput;
1558 for (size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1559 {
1560 (*itWeight) = DNN::gaussDouble (0.0, sqrt (layer.numWeights (nIn)));
1561 ++itWeight;
1562 }
1563 numInput = layer.numNodes ();
1564 }
1565 return;
1566 }
1567
1568 }
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578 template <typename Container, typename ItWeight>
1579 double Net::errorFunction (LayerData& layerData,
1580 Container truth,
1581 ItWeight itWeight,
1582 ItWeight itWeightEnd,
1583 double patternWeight,
1584 double factorWeightDecay,
1585 EnumRegularization eRegularization) const
1586 {
1587 double error (0);
1588 switch (m_eErrorFunction)
1589 {
1590 case ModeErrorFunction::SUMOFSQUARES:
1591 {
1592 error = sumOfSquares (layerData.valuesBegin (), layerData.valuesEnd (), begin (truth), end (truth),
1593 layerData.deltasBegin (), layerData.deltasEnd (),
1594 layerData.inverseActivationFunction (),
1595 patternWeight);
1596 break;
1597 }
1598 case ModeErrorFunction::CROSSENTROPY:
1599 {
1600 assert (!TMVA::DNN::isFlagSet (ModeOutputValues::DIRECT, layerData.outputMode ()));
1601 std::vector<double> probabilities = layerData.probabilities ();
1602 error = crossEntropy (begin (probabilities), end (probabilities),
1603 begin (truth), end (truth),
1604 layerData.deltasBegin (), layerData.deltasEnd (),
1605 layerData.inverseActivationFunction (),
1606 patternWeight);
1607 break;
1608 }
1609 case ModeErrorFunction::CROSSENTROPY_MUTUALEXCLUSIVE:
1610 {
1611 std::cout << "softmax." << std::endl;
1612 assert (!TMVA::DNN::isFlagSet (ModeOutputValues::DIRECT, layerData.outputMode ()));
1613 std::vector<double> probabilities = layerData.probabilities ();
1614 error = softMaxCrossEntropy (begin (probabilities), end (probabilities),
1615 begin (truth), end (truth),
1616 layerData.deltasBegin (), layerData.deltasEnd (),
1617 layerData.inverseActivationFunction (),
1618 patternWeight);
1619 break;
1620 }
1621 }
1622 if (factorWeightDecay != 0 && eRegularization != EnumRegularization::NONE)
1623 {
1624 error = weightDecay (error, itWeight, itWeightEnd, factorWeightDecay, eRegularization);
1625 }
1626 return error;
1627 }
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760 }
1761 }
1762
1763 #endif