Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-30 10:22:53

0001 // @(#)root/tmva $Id$
0002 // Author: Andreas Hoecker, Matt Jachowski, Peter Speckmayer, Helge Voss, Kai Voss
0003 
0004 /**********************************************************************************
0005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
0006  * Package: TMVA                                                                  *
0007  * Class  : MethodCuts                                                            *
0008  *                                             *
0009  *                                                                                *
0010  * Description:                                                                   *
0011  *      Multivariate optimisation of signal efficiency for given background       *
0012  *      efficiency, using rectangular minimum and maximum requirements on         *
0013  *      input variables                                                           *
0014  *                                                                                *
0015  * Authors (alphabetical):                                                        *
0016  *      Andreas Hoecker  <Andreas.Hocker@cern.ch> - CERN, Switzerland             *
0017  *      Matt Jachowski   <jachowski@stanford.edu> - Stanford University, USA      *
0018  *      Peter Speckmayer <speckmay@mail.cern.ch>  - CERN, Switzerland             *
0019  *      Helge Voss       <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany     *
0020  *      Kai Voss         <Kai.Voss@cern.ch>       - U. of Victoria, Canada        *
0021  *                                                                                *
0022  * Copyright (c) 2005:                                                            *
0023  *      CERN, Switzerland                                                         *
0024  *      U. of Victoria, Canada                                                    *
0025  *      MPI-K Heidelberg, Germany                                                 *
0026  *      LAPP, Annecy, France                                                      *
0027  *                                                                                *
0028  * Redistribution and use in source and binary forms, with or without             *
0029  * modification, are permitted according to the terms listed in LICENSE           *
0030  * (see tmva/doc/LICENSE)                                          *
0031  **********************************************************************************/
0032 
0033 #ifndef ROOT_TMVA_MethodCuts
0034 #define ROOT_TMVA_MethodCuts
0035 
0036 //////////////////////////////////////////////////////////////////////////
0037 //                                                                      //
0038 // MethodCuts                                                           //
0039 //                                                                      //
0040 // Multivariate optimisation of signal efficiency for given background  //
0041 // efficiency, using rectangular minimum and maximum requirements on    //
0042 // input variables                                                      //
0043 //                                                                      //
0044 //////////////////////////////////////////////////////////////////////////
0045 
0046 #include <vector>
0047 
0048 
0049 #include "TMVA/MethodBase.h"
0050 #include "TMVA/BinarySearchTree.h"
0051 #include "TMVA/PDF.h"
0052 #include "TMatrixDfwd.h"
0053 #include "IFitterTarget.h"
0054 
0055 class TRandom;
0056 
0057 namespace TMVA {
0058 
0059    class Interval;
0060 
0061    class MethodCuts : public MethodBase, public IFitterTarget {
0062 
0063    public:
0064 
0065       MethodCuts( const TString& jobName,
0066                   const TString& methodTitle,
0067                   DataSetInfo& theData,
0068                   const TString& theOption = "MC:150:10000:");
0069 
0070       MethodCuts( DataSetInfo& theData,
0071                   const TString& theWeightFile);
0072 
0073       // this is a workaround which is necessary since CINT is not capable of handling dynamic casts
0074       static MethodCuts* DynamicCast( IMethod* method ) { return dynamic_cast<MethodCuts*>(method); }
0075 
0076       virtual ~MethodCuts( void );
0077 
0078       virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
0079 
0080       // training method
0081       void Train( void );
0082 
0083       using MethodBase::ReadWeightsFromStream;
0084 
0085       void AddWeightsXMLTo      ( void* parent ) const;
0086 
0087       void ReadWeightsFromStream( std::istream & i );
0088       void ReadWeightsFromXML   ( void* wghtnode );
0089 
0090       // calculate the MVA value (for CUTs this is just a dummy)
0091       Double_t GetMvaValue( Double_t* err = nullptr, Double_t* errUpper = nullptr );
0092 
0093       // write method specific histos to target file
0094       void WriteMonitoringHistosToFile( void ) const;
0095 
0096       // test the method
0097       void TestClassification();
0098 
0099       // also overwrite --> not computed for cuts
0100       Double_t GetSeparation  ( TH1*, TH1* ) const { return -1; }
0101       Double_t GetSeparation  ( PDF* = nullptr, PDF* = nullptr ) const { return -1; }
0102       Double_t GetSignificance( void )       const { return -1; }
0103       Double_t GetmuTransform ( TTree *)           { return -1; }
0104       Double_t GetEfficiency  ( const TString&, Types::ETreeType, Double_t& );
0105       Double_t GetTrainingEfficiency(const TString& );
0106 
0107       // rarity distributions (signal or background (default) is uniform in [0,1])
0108       Double_t GetRarity( Double_t, Types::ESBType ) const { return 0; }
0109 
0110       // accessors for Minuit
0111       Double_t ComputeEstimator( std::vector<Double_t> & );
0112 
0113       Double_t EstimatorFunction( std::vector<Double_t> & );
0114       Double_t EstimatorFunction( Int_t ievt1, Int_t ievt2 );
0115 
0116       void     SetTestSignalEfficiency( Double_t effS ) { fTestSignalEff = effS; }
0117 
0118       // retrieve cut values for given signal efficiency
0119       void     PrintCuts( Double_t effS ) const;
0120       Double_t GetCuts  ( Double_t effS, std::vector<Double_t>& cutMin, std::vector<Double_t>& cutMax ) const;
0121       Double_t GetCuts  ( Double_t effS, Double_t* cutMin, Double_t* cutMax ) const;
0122 
0123       // ranking of input variables (not available for cuts)
0124       const Ranking* CreateRanking() { return nullptr; }
0125 
0126       void DeclareOptions();
0127       void ProcessOptions();
0128 
0129       // maximum |cut| value
0130       static const Double_t fgMaxAbsCutVal;
0131 
0132       // no check of options at this place
0133       void CheckSetup() {}
0134 
0135    protected:
0136 
0137       // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
0138       void MakeClassSpecific( std::ostream&, const TString& ) const;
0139 
0140       // get help message text
0141       void GetHelpMessage() const;
0142 
0143    private:
0144 
0145       // optimisation method
0146       enum EFitMethodType { kUseMonteCarlo = 0,
0147                             kUseGeneticAlgorithm,
0148                             kUseSimulatedAnnealing,
0149                             kUseMinuit,
0150                             kUseEventScan,
0151                             kUseMonteCarloEvents };
0152 
0153       // efficiency calculation method
0154       // - kUseEventSelection: computes efficiencies from given data sample
0155       // - kUsePDFs          : creates smoothed PDFs from data samples, and
0156       //                       uses this to compute efficiencies
0157       enum EEffMethod     { kUseEventSelection = 0,
0158                             kUsePDFs };
0159 
0160       // improve the Monte Carlo by providing some additional information
0161       enum EFitParameters { kNotEnforced = 0,
0162                             kForceMin,
0163                             kForceMax,
0164                             kForceSmart };
0165 
0166       // general
0167       TString                 fFitMethodS;         ///< chosen fit method (string)
0168       EFitMethodType          fFitMethod;          ///< chosen fit method
0169       TString                 fEffMethodS;         ///< chosen efficiency calculation method (string)
0170       EEffMethod              fEffMethod;          ///< chosen efficiency calculation method
0171       std::vector<EFitParameters>* fFitParams;     ///< vector for series of fit methods
0172       Double_t                fTestSignalEff;      ///< used to test optimized signal efficiency
0173       Double_t                fEffSMin;            ///< used to test optimized signal efficiency
0174       Double_t                fEffSMax;            ///< used to test optimized signal efficiency
0175       Double_t*               fCutRangeMin;        ///< minimum of allowed cut range
0176       Double_t*               fCutRangeMax;        ///< maximum of allowed cut range
0177       std::vector<Interval*>  fCutRange;           ///< allowed ranges for cut optimisation
0178 
0179       // for the use of the binary tree method
0180       BinarySearchTree*       fBinaryTreeS;
0181       BinarySearchTree*       fBinaryTreeB;
0182 
0183       // MC method
0184       Double_t**              fCutMin;             ///< minimum requirement
0185       Double_t**              fCutMax;             ///< maximum requirement
0186       Double_t*               fTmpCutMin;          ///< temporary minimum requirement
0187       Double_t*               fTmpCutMax;          ///< temporary maximum requirement
0188       TString*                fAllVarsI;           ///< what to do with variables
0189 
0190       // relevant for all methods
0191       Int_t                   fNpar;               ///< number of parameters in fit (default: 2*Nvar)
0192       Double_t                fEffRef;             ///< reference efficiency
0193       std::vector<Int_t>*     fRangeSign;          ///< used to match cuts to fit parameters (and vice versa)
0194       TRandom*                fRandom;             ///< random generator for MC optimisation method
0195 
0196       // basic statistics
0197       std::vector<Double_t>*  fMeanS;              ///< means of variables (signal)
0198       std::vector<Double_t>*  fMeanB;              ///< means of variables (background)
0199       std::vector<Double_t>*  fRmsS;               ///< RMSs of variables (signal)
0200       std::vector<Double_t>*  fRmsB;               ///< RMSs of variables (background)
0201 
0202       TH1*                    fEffBvsSLocal;       ///< intermediate eff. background versus eff signal histo
0203 
0204       // PDF section
0205       std::vector<TH1*>*      fVarHistS;           ///< reference histograms (signal)
0206       std::vector<TH1*>*      fVarHistB;           ///< reference histograms (background)
0207       std::vector<TH1*>*      fVarHistS_smooth;    ///< smoothed reference histograms (signal)
0208       std::vector<TH1*>*      fVarHistB_smooth;    ///< smoothed reference histograms (background)
0209       std::vector<PDF*>*      fVarPdfS;            ///< reference PDFs (signal)
0210       std::vector<PDF*>*      fVarPdfB;            ///< reference PDFs (background)
0211 
0212       // negative efficiencies
0213       Bool_t                  fNegEffWarning;      ///< flag risen in case of negative efficiency warning
0214 
0215 
0216       // the definition of fit parameters can be different from the actual
0217       // cut requirements; these functions provide the matching
0218       void     MatchParsToCuts( const std::vector<Double_t>&, Double_t*, Double_t* );
0219       void     MatchParsToCuts( Double_t*, Double_t*, Double_t* );
0220 
0221       void     MatchCutsToPars( std::vector<Double_t>&, Double_t*, Double_t* );
0222       void     MatchCutsToPars( std::vector<Double_t>&, Double_t**, Double_t**, Int_t ibin );
0223 
0224       // creates PDFs in case these are used to compute efficiencies
0225       // (corresponds to: EffMethod == kUsePDFs)
0226       void     CreateVariablePDFs( void );
0227 
0228       // returns signal and background efficiencies for given cuts - using event counting
0229       void     GetEffsfromSelection( Double_t* cutMin, Double_t* cutMax,
0230                                      Double_t& effS, Double_t& effB );
0231       // returns signal and background efficiencies for given cuts - using PDFs
0232       void     GetEffsfromPDFs( Double_t* cutMin, Double_t* cutMax,
0233                                 Double_t& effS, Double_t& effB );
0234 
0235       // default initialisation method called by all constructors
0236       void     Init( void );
0237 
0238       ClassDef(MethodCuts,0);  // Multivariate optimisation of signal efficiency
0239    };
0240 
0241 } // namespace TMVA
0242 
0243 #endif