root/TMVA/MethodMLP.h

0001 // @(#)root/tmva $Id$
0002 // Author: Krzysztof Danielowski, Andreas Hoecker, Matt Jachowski, Kamil Kraszewski, Maciej Kruk, Peter Speckmayer, Joerg Stelzer, Eckhard von Toerne, Jan Therhaag, Jiahang Zhong
0003
0004 /**********************************************************************************
0005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
0006  * Package: TMVA                                                                  *
0007  * Class  : MethodMLP                                                             *
0008  *                                             *
0009  *                                                                                *
0010  * Description:                                                                   *
0011  *      ANN Multilayer Perceptron  class for the discrimination of signal         *
0012  *      from background.  BFGS implementation based on TMultiLayerPerceptron      *
0013  *      class from ROOT (http://root.cern.ch).                                    *
0014  *                                                                                *
0015  * Authors (alphabetical):                                                        *
0016  *      Krzysztof Danielowski <danielow@cern.ch>       - IFJ & AGH, Poland        *
0017  *      Andreas Hoecker       <Andreas.Hocker@cern.ch> - CERN, Switzerland        *
0018  *      Matt Jachowski        <jachowski@stanford.edu> - Stanford University, USA *
0019  *      Kamil Kraszewski      <kalq@cern.ch>           - IFJ & UJ, Poland         *
0020  *      Maciej Kruk           <mkruk@cern.ch>          - IFJ & AGH, Poland        *
0021  *      Peter Speckmayer      <peter.speckmayer@cern.ch> - CERN, Switzerland      *
0022  *      Joerg Stelzer         <stelzer@cern.ch>        - DESY, Germany            *
0023  *      Jan Therhaag          <Jan.Therhaag@cern.ch>   - U of Bonn, Germany       *
0024  *      Eckhard v. Toerne     <evt@uni-bonn.de>        - U of Bonn, Germany       *
0025  *      Jiahang Zhong         <Jiahang.Zhong@cern.ch>  - Academia Sinica, Taipei  *
0026  *                                                                                *
0027  * Copyright (c) 2005-2011:                                                       *
0028  *      CERN, Switzerland                                                         *
0029  *      U. of Victoria, Canada                                                    *
0030  *      MPI-K Heidelberg, Germany                                                 *
0031  *      U. of Bonn, Germany                                                       *
0032  *                                                                                *
0033  * Redistribution and use in source and binary forms, with or without             *
0034  * modification, are permitted according to the terms listed in LICENSE           *
0035  * (see tmva/doc/LICENSE)                                          *
0036  **********************************************************************************/
0037
0038 #ifndef ROOT_TMVA_MethodMLP
0039 #define ROOT_TMVA_MethodMLP
0040
0041 //////////////////////////////////////////////////////////////////////////
0042 //                                                                      //
0043 // MethodMLP                                                            //
0044 //                                                                      //
0045 // Multilayer Perceptron built off of MethodANNBase                     //
0046 //                                                                      //
0047 //////////////////////////////////////////////////////////////////////////
0048
0049 #include <vector>
0050 #include <utility>
0051 #include "TString.h"
0052 #include "TTree.h"
0053 #include "TRandom3.h"
0054 #include "TH1F.h"
0055 #include "TMatrixDfwd.h"
0056
0057 #include "TMVA/IFitterTarget.h"
0058 #include "TMVA/MethodBase.h"
0059 #include "TMVA/MethodANNBase.h"
0060 #include "TMVA/TNeuron.h"
0061 #include "TMVA/TActivation.h"
0062 #include "TMVA/ConvergenceTest.h"
0063
0064 #define MethodMLP_UseMinuit__
0065 #undef  MethodMLP_UseMinuit__
0066
0067 namespace TMVA {
0068
0069    class MethodMLP : public MethodANNBase, public IFitterTarget, public ConvergenceTest {
0070
0071    public:
0072
0073       // standard constructors
0074       MethodMLP( const TString& jobName,
0075                  const TString&  methodTitle,
0076                  DataSetInfo& theData,
0077                  const TString& theOption );
0078
0079       MethodMLP( DataSetInfo& theData,
0080                  const TString& theWeightFile );
0081
0082       virtual ~MethodMLP();
0083
0084       Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets ) override;
0085
0086       void Train() override;
0087       // for GA
0088       Double_t ComputeEstimator ( std::vector<Double_t>& parameters );
0089       Double_t EstimatorFunction( std::vector<Double_t>& parameters ) override;
0090
0091       enum ETrainingMethod { kBP=0, kBFGS, kGA };
0092       enum EBPTrainingMode { kSequential=0, kBatch };
0093
0094       bool     HasInverseHessian() { return fCalculateErrors; }
0095       Double_t GetMvaValue( Double_t* err = nullptr, Double_t* errUpper = nullptr ) override;
0096
0097    protected:
0098
0099       // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
0100       void MakeClassSpecific( std::ostream&, const TString& ) const override;
0101
0102       // get help message text
0103       void GetHelpMessage() const override;
0104
0105
0106    private:
0107
0108       // the option handling methods
0109       void DeclareOptions() override;
0110       void ProcessOptions() override;
0111
0112       // general helper functions
0113       void     Train( Int_t nEpochs );
0114       void     Init() override;
0115       void     InitializeLearningRates(); // although this is only needed by backprop
0116
0117       // used as a measure of success in all minimization techniques
0118       Double_t CalculateEstimator( Types::ETreeType treeType = Types::kTraining, Int_t iEpoch = -1 );
0119
0120       // BFGS functions
0121       void     BFGSMinimize( Int_t nEpochs );
0122       void     SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &Buffer );
0123       void     SteepestDir( TMatrixD &Dir );
0124       Bool_t   GetHessian( TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta );
0125       void     SetDir( TMatrixD &Hessian, TMatrixD &Dir );
0126       Double_t DerivDir( TMatrixD &Dir );
0127       Bool_t   LineSearch( TMatrixD &Dir, std::vector<Double_t> &Buffer, Double_t* dError=nullptr ); ///< zjh
0128       void     ComputeDEDw();
0129       void     SimulateEvent( const Event* ev );
0130       void     SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha );
0131       Double_t GetError();
0132       Double_t GetMSEErr( const Event* ev, UInt_t index = 0 );   ///< zjh
0133       Double_t GetCEErr( const Event* ev, UInt_t index = 0 );   ///< zjh
0134
0135       // backpropagation functions
0136       void     BackPropagationMinimize( Int_t nEpochs );
0137       void     TrainOneEpoch();
0138       void     Shuffle( Int_t* index, Int_t n );
0139       void     DecaySynapseWeights(Bool_t lateEpoch );
0140       void     TrainOneEvent( Int_t ievt);
0141       Double_t GetDesiredOutput( const Event* ev );
0142       void     UpdateNetwork( Double_t desired, Double_t eventWeight=1.0 );
0143       void     UpdateNetwork(const std::vector<Float_t>& desired, Double_t eventWeight=1.0);
0144       void     CalculateNeuronDeltas();
0145       void     UpdateSynapses();
0146       void     AdjustSynapseWeights();
0147
0148       // faster backpropagation
0149       void     TrainOneEventFast( Int_t ievt, Float_t*& branchVar, Int_t& type );
0150
0151       // genetic algorithm functions
0152       void GeneticMinimize();
0153
0154
0155 #ifdef MethodMLP_UseMinuit__
0156       // minuit functions -- commented out because they rely on a static pointer
0157       void MinuitMinimize();
0158       static MethodMLP* GetThisPtr();
0159       static void IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t ifl );
0160       void FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t ifl );
0161 #endif
0162
0163       // general
0164       bool               fUseRegulator;         ///< zjh
0165       bool               fCalculateErrors;      ///< compute inverse hessian matrix at the end of the training
0166       Double_t           fPrior;                ///< zjh
0167       std::vector<Double_t> fPriorDev;          ///< zjh
0168       void               GetApproxInvHessian ( TMatrixD& InvHessian, bool regulate=true );   ///< rank-1 approximation, neglect 2nd derivatives. //zjh
0169       void               UpdateRegulators();    ///< zjh
0170       void               UpdatePriors();        ///< zjh
0171       Int_t              fUpdateLimit;          ///< zjh
0172
0173       ETrainingMethod fTrainingMethod; ///< method of training, BP or GA
0174       TString         fTrainMethodS;   ///< training method option param
0175
0176       Float_t         fSamplingFraction;  ///< fraction of events which is sampled for training
0177       Float_t         fSamplingEpoch;     ///< fraction of epochs where sampling is used
0178       Float_t         fSamplingWeight;    ///< changing factor for event weights when sampling is turned on
0179       Bool_t          fSamplingTraining;  ///< The training sample is sampled
0180       Bool_t          fSamplingTesting;   ///< The testing sample is sampled
0181
0182       // BFGS variables
0183       Double_t        fLastAlpha;      ///< line search variable
0184       Double_t        fTau;            ///< line search variable
0185       Int_t           fResetStep;      ///< reset time (how often we clear hessian matrix)
0186
0187       // back propagation variable
0188       Double_t        fLearnRate;      ///< learning rate for synapse weight adjustments
0189       Double_t        fDecayRate;      ///< decay rate for above learning rate
0190       EBPTrainingMode fBPMode;         ///< backprop learning mode (sequential or batch)
0191       TString         fBpModeS;        ///< backprop learning mode option string (sequential or batch)
0192       Int_t           fBatchSize;      ///< batch size, only matters if in batch learning mode
0193       Int_t           fTestRate;       ///< test for overtraining performed at each #th epochs
0194       Bool_t          fEpochMon;       ///< create and fill epoch-wise monitoring histograms (makes outputfile big!)
0195
0196       // genetic algorithm variables
0197       Int_t           fGA_nsteps;      ///< GA settings: number of steps
0198       Int_t           fGA_preCalc;     ///< GA settings: number of pre-calc steps
0199       Int_t           fGA_SC_steps;    ///< GA settings: SC_steps
0200       Int_t           fGA_SC_rate;     ///< GA settings: SC_rate
0201       Double_t        fGA_SC_factor;   ///< GA settings: SC_factor
0202
0203       // regression, storage of deviations
0204       std::vector<std::pair<Float_t,Float_t> >* fDeviationsFromTargets; ///< deviation from the targets, event weight
0205
0206       Float_t         fWeightRange;    ///< suppress outliers for the estimator calculation
0207
0208 #ifdef MethodMLP_UseMinuit__
0209       // minuit variables -- commented out because they rely on a static pointer
0210       Int_t          fNumberOfWeights; ///< Minuit: number of weights
0211       static MethodMLP* fgThis;        ///< Minuit: this pointer
0212 #endif
0213
0214       // debugging flags
0215       static const Int_t  fgPRINT_ESTIMATOR_INC = 10;     ///< debug flags
0216       static const Bool_t fgPRINT_SEQ           = kFALSE; ///< debug flags
0217       static const Bool_t fgPRINT_BATCH         = kFALSE; ///< debug flags
0218
0219       ClassDefOverride(MethodMLP,0); // Multi-layer perceptron implemented specifically for TMVA
0220    };
0221
0222 } // namespace TMVA
0223
0224 #endif