Back to home page

EIC code displayed by LXR

 
 

    


Warning, file /include/root/TMVA/Tools.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 // @(#)root/tmva $Id$
0002 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss
0003 
0004 /**********************************************************************************
0005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
0006  * Package: TMVA                                                                  *
0007  * Class  : Tools                                                                 *
0008  *                                             *
0009  *                                                                                *
0010  * Description:                                                                   *
0011  *      Global auxiliary applications and data treatment routines                 *
0012  *                                                                                *
0013  * Authors (alphabetical):                                                        *
0014  *      Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland              *
0015  *      Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland           *
0016  *      Helge Voss      <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany      *
0017  *      Kai Voss        <Kai.Voss@cern.ch>       - U. of Victoria, Canada         *
0018  *                                                                                *
0019  * Copyright (c) 2005:                                                            *
0020  *      CERN, Switzerland                                                         *
0021  *      U. of Victoria, Canada                                                    *
0022  *      MPI-K Heidelberg, Germany                                                 *
0023  *                                                                                *
0024  * Redistribution and use in source and binary forms, with or without             *
0025  * modification, are permitted according to the terms listed in LICENSE           *
0026  * (see tmva/doc/LICENSE)                                          *
0027  **********************************************************************************/
0028 
0029 #ifndef ROOT_TMVA_Tools
0030 #define ROOT_TMVA_Tools
0031 
0032 //////////////////////////////////////////////////////////////////////////
0033 //                                                                      //
0034 // Tools (namespace)                                                    //
0035 //                                                                      //
0036 // Global auxiliary applications and data treatment routines            //
0037 //                                                                      //
0038 //////////////////////////////////////////////////////////////////////////
0039 
0040 #include <vector>
0041 #include <string>
0042 #include <sstream>
0043 #include <iostream>
0044 #include <atomic>
0045 
0046 #include "TXMLEngine.h"
0047 
0048 #include "TMatrixDSymfwd.h"
0049 
0050 #include "TMatrixDfwd.h"
0051 
0052 #include "TVectorDfwd.h"
0053 
0054 #include "TMVA/Types.h"
0055 
0056 #include "TMVA/VariableTransformBase.h"
0057 
0058 #include "TString.h"
0059 
0060 #include "TMVA/MsgLogger.h"
0061 
0062 class TList;
0063 class TTree;
0064 class TH1;
0065 class TH2;
0066 class TH2F;
0067 class TSpline;
0068 class TXMLEngine;
0069 
0070 namespace TMVA {
0071 
0072    class Event;
0073    class PDF;
0074    class MsgLogger;
0075 
0076    class Tools {
0077 
0078    private:
0079 
0080       Tools();
0081 
0082    public:
0083 
0084       // destructor
0085       ~Tools();
0086 
0087       // accessor to single instance
0088       static Tools& Instance();
0089       static void   DestroyInstance();
0090 
0091 
0092       template <typename T> Double_t Mean(Long64_t n, const T *a, const Double_t *w=0);
0093       template <typename Iterator, typename WeightIterator> Double_t Mean ( Iterator first, Iterator last, WeightIterator w);
0094 
0095       template <typename T> Double_t RMS(Long64_t n, const T *a, const Double_t *w=0);
0096       template <typename Iterator, typename WeightIterator> Double_t RMS(Iterator first, Iterator last, WeightIterator w);
0097 
0098 
0099       // simple statistics operations on tree entries
0100       void  ComputeStat( const std::vector<TMVA::Event*>&,
0101                          std::vector<Float_t>*,
0102                          Double_t&, Double_t&, Double_t&,
0103                          Double_t&, Double_t&, Double_t&, Int_t signalClass,
0104                          Bool_t norm = kFALSE );
0105 
0106       // compute variance from sums
0107       inline Double_t ComputeVariance( Double_t sumx2, Double_t sumx, Int_t nx );
0108 
0109       // creates histograms normalized to one
0110       TH1* projNormTH1F( TTree* theTree, const TString& theVarName,
0111                          const TString& name, Int_t nbins,
0112                          Double_t xmin, Double_t xmax, const TString& cut );
0113 
0114       // normalize histogram by its integral
0115       Double_t NormHist( TH1* theHist, Double_t norm = 1.0 );
0116 
0117       // parser for TString phrase with items separated by a character
0118       TList* ParseFormatLine( TString theString, const char * sep = ":" );
0119 
0120       // parse option string for ANN methods
0121       std::vector<Int_t>* ParseANNOptionString( TString theOptions, Int_t nvar,
0122                                                 std::vector<Int_t>* nodes );
0123 
0124       // returns the square-root of a symmetric matrix: symMat = sqrtMat*sqrtMat
0125       TMatrixD* GetSQRootMatrix( TMatrixDSym* symMat );
0126 
0127       // returns the covariance matrix of of the different classes (and the sum)
0128       // given the event sample
0129       std::vector<TMatrixDSym*>* CalcCovarianceMatrices( const std::vector<Event*>& events, Int_t maxCls, VariableTransformBase* transformBase=nullptr );
0130       std::vector<TMatrixDSym*>* CalcCovarianceMatrices( const std::vector<const Event*>& events, Int_t maxCls, VariableTransformBase* transformBase=nullptr );
0131 
0132 
0133       // turns covariance into correlation matrix
0134       const TMatrixD* GetCorrelationMatrix( const TMatrixD* covMat );
0135 
0136       // check spline quality by comparison with initial histogram
0137       Bool_t CheckSplines( const TH1*, const TSpline* );
0138 
0139       // normalization of variable output
0140       Double_t NormVariable( Double_t x, Double_t xmin, Double_t xmax );
0141 
0142       // return separation of two histograms
0143       Double_t GetSeparation( TH1* S, TH1* B ) const;
0144       Double_t GetSeparation( const PDF& pdfS, const PDF& pdfB ) const;
0145 
0146       // vector rescaling
0147       std::vector<Double_t> MVADiff( std::vector<Double_t>&, std::vector<Double_t>& );
0148       void Scale( std::vector<Double_t>&, Double_t );
0149       void Scale( std::vector<Float_t>&,  Float_t  );
0150 
0151       // re-arrange a vector of arrays (vectors) in a way such that the first array
0152       // is ordered, and the other arrays reshuffled accordingly
0153       void UsefulSortDescending( std::vector< std::vector<Double_t> >&, std::vector<TString>* vs = nullptr );
0154       void UsefulSortAscending ( std::vector< std::vector<Double_t> >&, std::vector<TString>* vs = nullptr );
0155 
0156       void UsefulSortDescending( std::vector<Double_t>& );
0157       void UsefulSortAscending ( std::vector<Double_t>& );
0158 
0159       Int_t GetIndexMaxElement ( std::vector<Double_t>& );
0160       Int_t GetIndexMinElement ( std::vector<Double_t>& );
0161 
0162       // check if input string contains regular expression
0163       Bool_t  ContainsRegularExpression( const TString& s );
0164       TString ReplaceRegularExpressions( const TString& s, const TString& replace = "+" );
0165 
0166       // routines for formatted output -----------------
0167       void FormattedOutput( const std::vector<Double_t>&, const std::vector<TString>&,
0168                             const TString titleVars, const TString titleValues, MsgLogger& logger,
0169                             TString format = "%+1.3f" );
0170       void FormattedOutput( const TMatrixD&, const std::vector<TString>&, MsgLogger& logger );
0171       void FormattedOutput( const TMatrixD&, const std::vector<TString>& vert, const std::vector<TString>& horiz,
0172                             MsgLogger& logger );
0173 
0174       void WriteFloatArbitraryPrecision( Float_t  val, std::ostream& os );
0175       void ReadFloatArbitraryPrecision ( Float_t& val, std::istream& is );
0176 
0177       // for histogramming
0178       TString GetXTitleWithUnit( const TString& title, const TString& unit );
0179       TString GetYTitleWithUnit( const TH1& h, const TString& unit, Bool_t normalised );
0180 
0181       // Mutual Information method for non-linear correlations estimates in 2D histogram
0182       // Author: Moritz Backes, Geneva (2009)
0183       Double_t GetMutualInformation( const TH2F& );
0184 
0185       // Correlation Ratio method for non-linear correlations estimates in 2D histogram
0186       // Author: Moritz Backes, Geneva (2009)
0187       Double_t GetCorrelationRatio( const TH2F& );
0188       TH2F*    TransposeHist      ( const TH2F& );
0189 
0190       // check if "silent" or "verbose" option in configuration string
0191       Bool_t CheckForSilentOption ( const TString& ) const;
0192       Bool_t CheckForVerboseOption( const TString& ) const;
0193 
0194       // color information
0195       const TString& Color( const TString& );
0196 
0197       // print welcome message (to be called from, eg, .TMVAlogon)
0198       enum EWelcomeMessage { kStandardWelcomeMsg = 1,
0199                              kIsometricWelcomeMsg,
0200                              kBlockWelcomeMsg,
0201                              kLeanWelcomeMsg,
0202                              kLogoWelcomeMsg,
0203                              kSmall1WelcomeMsg,
0204                              kSmall2WelcomeMsg,
0205                              kOriginalWelcomeMsgColor,
0206                              kOriginalWelcomeMsgBW };
0207 
0208       // print TMVA citation (to be called from, eg, .TMVAlogon)
0209       enum ECitation { kPlainText = 1,
0210                        kBibTeX,
0211                        kLaTeX,
0212                        kHtmlLink };
0213 
0214       void TMVAWelcomeMessage();
0215       void TMVAWelcomeMessage( MsgLogger& logger, EWelcomeMessage m = kStandardWelcomeMsg );
0216       void TMVAVersionMessage( MsgLogger& logger );
0217       void ROOTVersionMessage( MsgLogger& logger );
0218 
0219       void TMVACitation( MsgLogger& logger, ECitation citType = kPlainText );
0220 
0221       // string tools
0222 
0223       std::vector<TString> SplitString( const TString& theOpt, const char separator ) const;
0224 
0225       // variables
0226       const TString fRegexp;
0227       mutable MsgLogger*    fLogger; //!
0228       MsgLogger& Log() const { return *fLogger; }
0229       static std::atomic<Tools*> fgTools;
0230 
0231       // xml tools
0232 
0233       TString     StringFromInt      ( Long_t i   );
0234       TString     StringFromDouble   ( Double_t d );
0235       void        WriteTMatrixDToXML ( void* node, const char* name, TMatrixD* mat );
0236       void        WriteTVectorDToXML ( void* node, const char* name, TVectorD* vec );
0237       void        ReadTMatrixDFromXML( void* node, const char* name, TMatrixD* mat );
0238       void        ReadTVectorDFromXML( void* node, const char* name, TVectorD* vec );
0239       Bool_t      HistoHasEquidistantBins(const TH1& h);
0240 
0241       Bool_t      HasAttr     ( void* node, const char* attrname );
0242       template<typename T>
0243          inline void ReadAttr    ( void* node, const char* , T& value );
0244       void        ReadAttr    ( void* node, const char* attrname, TString& value );
0245       void ReadAttr(void *node, const char *, float &value);
0246       void ReadAttr(void *node, const char *, int &value);
0247       void ReadAttr(void *node, const char *, short &value);
0248 
0249       template<typename T>
0250          void        AddAttr     ( void* node, const char* , const T& value, Int_t precision = 16 );
0251       void        AddAttr     ( void* node, const char* attrname, const char* value );
0252       void*       AddChild    ( void* parent, const char* childname, const char* content = nullptr, bool isRootNode = false );
0253       Bool_t      AddRawLine  ( void* node, const char * raw );
0254       Bool_t      AddComment  ( void* node, const char* comment );
0255 
0256       void*       GetParent( void* child);
0257       void*       GetChild    ( void* parent, const char* childname=nullptr );
0258       void*       GetNextChild( void* prevchild, const char* childname=nullptr );
0259       const char* GetContent  ( void* node );
0260       const char* GetName     ( void* node );
0261 
0262       TXMLEngine& xmlengine() { return *fXMLEngine; }
0263       int xmlenginebuffersize() { return fXMLBufferSize;}
0264       void SetXMLEngineBufferSize(int buffer) { fXMLBufferSize = buffer; }
0265       TXMLEngine* fXMLEngine;
0266 
0267       TH1*       GetCumulativeDist( TH1* h);
0268 
0269    private:
0270 
0271       int fXMLBufferSize = 10000000;
0272       // utilities for correlation ratio
0273       Double_t GetYMean_binX( const TH2& , Int_t bin_x );
0274 
0275    }; // Common tools
0276 
0277    Tools& gTools(); // global accessor
0278 
0279    //
0280    // Adapts a TRandom random number generator to the interface of the ones in the
0281    // standard library (STL) so that TRandom derived generators can be used with
0282    // STL algorithms such as `std::shuffle`.
0283    //
0284    // Example:
0285    // ```
0286    // std::vector<double> v {0, 1, 2, 3, 4, 5};
0287    // TRandom3StdEngine rng(seed);
0288    // std::shuffle(v.begin(), v.end(), rng);
0289    // ```
0290    //
0291    // Or at a lower level:
0292    // ```
0293    // std::vector<double> v {0, 1, 2, 3, 4, 5};
0294    // RandomGenerator<TRandom3> rng(seed);
0295    // std::shuffle(v.begin(), v.end(), rng);
0296    // ```
0297    //
0298    template <typename TRandomLike, typename UIntType = UInt_t, UIntType max_val = kMaxUInt>
0299    class RandomGenerator {
0300    public:
0301       using result_type = UIntType;
0302 
0303       RandomGenerator(UIntType s = 0) { fRandom.SetSeed(s); }
0304 
0305       static constexpr UIntType min() { return 0; }
0306       static constexpr UIntType max() { return max_val; }
0307 
0308       void seed(UIntType s = 0) { fRandom.SetSeed(s); }
0309 
0310       UIntType operator()() { return fRandom.Integer(max()); }
0311 
0312       void discard(unsigned long long z)
0313       {
0314          double r;
0315          for (unsigned long long i = 0; i < z; ++i)
0316             r = fRandom.Rndm();
0317          (void) r; /* avoid unused variable warning */
0318       }
0319 
0320    private:
0321       TRandomLike fRandom; // random generator
0322    };
0323 
0324 } // namespace TMVA
0325 
0326 ////////////////////////////////////////////////////////////////////////////////
0327 /// read attribute from xml
0328 
0329 template<typename T> void TMVA::Tools::ReadAttr( void* node, const char* attrname, T& value )
0330 {
0331    // read attribute from xml
0332    const char *val = xmlengine().GetAttr(node, attrname);
0333    if (!val) {
0334       const char *nodename = xmlengine().GetNodeName(node);
0335       Log() << kFATAL << "Trying to read non-existing attribute '" << attrname << "' from xml node '" << nodename << "'"
0336             << Endl;
0337    }
0338    std::stringstream s(val);
0339    // coverity[tainted_data_argument]
0340    s >> value;
0341 }
0342 
0343 ////////////////////////////////////////////////////////////////////////////////
0344 /// add attribute to xml
0345 
0346 template<typename T>
0347 void TMVA::Tools::AddAttr( void* node, const char* attrname, const T& value, Int_t precision )
0348 {
0349    std::stringstream s;
0350    s.precision( precision );
0351    s << std::scientific << value;
0352    AddAttr( node, attrname, s.str().c_str() );
0353 }
0354 
0355 ////////////////////////////////////////////////////////////////////////////////
0356 /// compute variance from given sums
0357 
0358 inline Double_t TMVA::Tools::ComputeVariance( Double_t sumx2, Double_t sumx, Int_t nx )
0359 {
0360    if (nx<2) return 0;
0361    return (sumx2 - ((sumx*sumx)/static_cast<Double_t>(nx)))/static_cast<Double_t>(nx-1);
0362 }
0363 
0364 #endif