File indexing completed on 2025-01-30 10:22:53
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031 #ifndef ROOT_TMVA_MethodBDT
0032 #define ROOT_TMVA_MethodBDT
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042 #include <vector>
0043 #include <memory>
0044 #include <map>
0045
0046 #include "TH2.h"
0047 #include "TTree.h"
0048 #include "TMVA/MethodBase.h"
0049 #include "TMVA/DecisionTree.h"
0050 #include "TMVA/Event.h"
0051 #include "TMVA/LossFunction.h"
0052
0053
0054 #ifdef R__USE_IMT
0055 #include <ROOT/TThreadExecutor.hxx>
0056 #include "TSystem.h"
0057 #endif
0058
0059 namespace TMVA {
0060
0061 class SeparationBase;
0062
0063 class MethodBDT : public MethodBase {
0064
0065 public:
0066
0067
0068 MethodBDT( const TString& jobName,
0069 const TString& methodTitle,
0070 DataSetInfo& theData,
0071 const TString& theOption = "");
0072
0073
0074 MethodBDT( DataSetInfo& theData,
0075 const TString& theWeightFile);
0076
0077 virtual ~MethodBDT( void );
0078
0079 virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
0080
0081
0082
0083
0084 void InitEventSample();
0085
0086
0087 virtual std::map<TString,Double_t> OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA");
0088 virtual void SetTuneParameters(std::map<TString,Double_t> tuneParameters);
0089
0090
0091 void Train( void );
0092
0093
0094 void Reset( void );
0095
0096 using MethodBase::ReadWeightsFromStream;
0097
0098
0099 void AddWeightsXMLTo( void* parent ) const;
0100
0101
0102 void ReadWeightsFromStream( std::istream& istr );
0103 void ReadWeightsFromXML(void* parent);
0104
0105
0106 void WriteMonitoringHistosToFile( void ) const;
0107
0108
0109 Double_t GetMvaValue( Double_t* err = nullptr, Double_t* errUpper = nullptr);
0110
0111
0112 UInt_t GetNTrees() const {return fForest.size();}
0113 private:
0114
0115 Double_t GetMvaValue( Double_t* err, Double_t* errUpper, UInt_t useNTrees );
0116 Double_t PrivateGetMvaValue( const TMVA::Event *ev, Double_t* err=nullptr, Double_t* errUpper=nullptr, UInt_t useNTrees=0 );
0117 void BoostMonitor(Int_t iTree);
0118
0119 public:
0120 const std::vector<Float_t>& GetMulticlassValues();
0121
0122
0123 const std::vector<Float_t>& GetRegressionValues();
0124
0125
0126 Double_t Boost( std::vector<const TMVA::Event*>&, DecisionTree *dt, UInt_t cls = 0);
0127
0128
0129 const Ranking* CreateRanking();
0130
0131
0132 void DeclareOptions();
0133 void ProcessOptions();
0134 void SetMaxDepth(Int_t d){fMaxDepth = d;}
0135 void SetMinNodeSize(Double_t sizeInPercent);
0136 void SetMinNodeSize(TString sizeInPercent);
0137
0138 void SetNTrees(Int_t d){fNTrees = d;}
0139 void SetAdaBoostBeta(Double_t b){fAdaBoostBeta = b;}
0140 void SetNodePurityLimit(Double_t l){fNodePurityLimit = l;}
0141 void SetShrinkage(Double_t s){fShrinkage = s;}
0142 void SetUseNvars(Int_t n){fUseNvars = n;}
0143 void SetBaggedSampleFraction(Double_t f){fBaggedSampleFraction = f;}
0144
0145
0146
0147 inline const std::vector<TMVA::DecisionTree*> & GetForest() const;
0148
0149
0150 inline const std::vector<const TMVA::Event*> & GetTrainingEvents() const;
0151
0152 inline const std::vector<double> & GetBoostWeights() const;
0153
0154
0155 std::vector<Double_t> GetVariableImportance();
0156 Double_t GetVariableImportance(UInt_t ivar);
0157
0158 Double_t TestTreeQuality( DecisionTree *dt );
0159
0160
0161 void MakeClassSpecific( std::ostream&, const TString& ) const;
0162
0163
0164 void MakeClassSpecificHeader( std::ostream&, const TString& ) const;
0165
0166 void MakeClassInstantiateNode( DecisionTreeNode *n, std::ostream& fout,
0167 const TString& className ) const;
0168
0169 void GetHelpMessage() const;
0170
0171 protected:
0172 void DeclareCompatibilityOptions();
0173
0174 private:
0175
0176 void Init( void );
0177
0178 void PreProcessNegativeEventWeights();
0179
0180
0181 Double_t AdaBoost( std::vector<const TMVA::Event*>&, DecisionTree *dt );
0182
0183
0184 Double_t AdaCost( std::vector<const TMVA::Event*>&, DecisionTree *dt );
0185
0186
0187 Double_t Bagging( );
0188
0189
0190 Double_t RegBoost( std::vector<const TMVA::Event*>&, DecisionTree *dt );
0191
0192
0193 Double_t AdaBoostR2( std::vector<const TMVA::Event*>&, DecisionTree *dt );
0194
0195
0196
0197
0198 Double_t GradBoost( std::vector<const TMVA::Event*>&, DecisionTree *dt, UInt_t cls = 0);
0199 Double_t GradBoostRegression(std::vector<const TMVA::Event*>&, DecisionTree *dt );
0200 void InitGradBoost( std::vector<const TMVA::Event*>&);
0201 void UpdateTargets( std::vector<const TMVA::Event*>&, UInt_t cls = 0);
0202 void UpdateTargetsRegression( std::vector<const TMVA::Event*>&,Bool_t first=kFALSE);
0203 Double_t GetGradBoostMVA(const TMVA::Event *e, UInt_t nTrees);
0204 void GetBaggedSubSample(std::vector<const TMVA::Event*>&);
0205
0206 std::vector<const TMVA::Event*> fEventSample;
0207 std::vector<const TMVA::Event*> fValidationSample;
0208 std::vector<const TMVA::Event*> fSubSample;
0209 std::vector<const TMVA::Event*> *fTrainSample;
0210
0211 Int_t fNTrees;
0212 std::vector<DecisionTree*> fForest;
0213 std::vector<double> fBoostWeights;
0214 Double_t fSigToBkgFraction;
0215 TString fBoostType;
0216 Double_t fAdaBoostBeta;
0217 TString fAdaBoostR2Loss;
0218
0219 Double_t fShrinkage;
0220 Bool_t fBaggedBoost;
0221 Bool_t fBaggedGradBoost;
0222
0223
0224 std::map< const TMVA::Event*, LossFunctionEventInfo> fLossFunctionEventInfo;
0225
0226 std::map< const TMVA::Event*,std::vector<double> > fResiduals;
0227
0228
0229 SeparationBase *fSepType;
0230 TString fSepTypeS;
0231 Int_t fMinNodeEvents;
0232 Float_t fMinNodeSize;
0233 TString fMinNodeSizeS;
0234
0235 Int_t fNCuts;
0236 Bool_t fUseFisherCuts;
0237 Double_t fMinLinCorrForFisher;
0238 Bool_t fUseExclusiveVars;
0239 Bool_t fUseYesNoLeaf;
0240 Double_t fNodePurityLimit;
0241 UInt_t fNNodesMax;
0242 UInt_t fMaxDepth;
0243
0244 DecisionTree::EPruneMethod fPruneMethod;
0245 TString fPruneMethodS;
0246 Double_t fPruneStrength;
0247 Double_t fFValidationEvents;
0248 Bool_t fAutomatic;
0249 Bool_t fRandomisedTrees;
0250 UInt_t fUseNvars;
0251 Bool_t fUsePoissonNvars;
0252 UInt_t fUseNTrainEvents;
0253
0254 Double_t fBaggedSampleFraction;
0255 TString fNegWeightTreatment;
0256 Bool_t fNoNegWeightsInTraining;
0257 Bool_t fInverseBoostNegWeights;
0258 Bool_t fPairNegWeightsGlobal;
0259 Bool_t fTrainWithNegWeights;
0260 Bool_t fDoBoostMonitor;
0261
0262
0263
0264 TTree* fMonitorNtuple;
0265 Int_t fITree;
0266 Double_t fBoostWeight;
0267 Double_t fErrorFraction;
0268
0269 Double_t fCss;
0270 Double_t fCts_sb;
0271 Double_t fCtb_ss;
0272 Double_t fCbb;
0273
0274 Bool_t fDoPreselection;
0275
0276 Bool_t fSkipNormalization;
0277
0278 std::vector<Double_t> fVariableImportance;
0279
0280
0281 void DeterminePreselectionCuts(const std::vector<const TMVA::Event*>& eventSample);
0282 Double_t ApplyPreselectionCuts(const Event* ev);
0283
0284 std::vector<Double_t> fLowSigCut;
0285 std::vector<Double_t> fLowBkgCut;
0286 std::vector<Double_t> fHighSigCut;
0287 std::vector<Double_t> fHighBkgCut;
0288
0289 std::vector<Bool_t> fIsLowSigCut;
0290 std::vector<Bool_t> fIsLowBkgCut;
0291 std::vector<Bool_t> fIsHighSigCut;
0292 std::vector<Bool_t> fIsHighBkgCut;
0293
0294 Bool_t fHistoricBool;
0295
0296 TString fRegressionLossFunctionBDTGS;
0297 Double_t fHuberQuantile;
0298
0299 LossFunctionBDT* fRegressionLossFunctionBDTG;
0300
0301
0302 static const Int_t fgDebugLevel;
0303
0304
0305 ClassDef(MethodBDT,0);
0306 };
0307
0308 }
0309
0310 const std::vector<TMVA::DecisionTree*>& TMVA::MethodBDT::GetForest() const { return fForest; }
0311 const std::vector<const TMVA::Event*> & TMVA::MethodBDT::GetTrainingEvents() const { return fEventSample; }
0312 const std::vector<double>& TMVA::MethodBDT::GetBoostWeights() const { return fBoostWeights; }
0313
0314 #endif