Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-30 10:23:03

0001 // @(#)root/tmva $Id$
0002 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss
0003 
0004 /**********************************************************************************
0005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
0006  * Package: TMVA                                                                  *
0007  * Class  : SeparationBase                                                        *
0008  *                                             *
0009  *                                                                                *
0010  * Description: An interface to different separation criteria used in various     *
0011  *              training algorithms, as there are:                                *
0012  *              Gini-Index, Cross Entropy, Misclassification Error, e.t.c.        *
0013  *                                                                                *
0014  *          There are two things: the Separation Index, and the Separation Gain   *
0015  *          Separation Index:                                                     *
0016  *          Measure of the "purity" of a sample. If all elements (events) in the  *
0017  *          sample belong to the same class (e.g. signal or backgr), than the     *
0018  *          separation index is 0 (meaning 100% purity (or 0% purity as it is     *
0019  *          symmetric. The index becomes maximal, for perfectly mixed samples     *
0020  *          eg. purity=50% , N_signal = N_bkg                                     *
0021  *                                                                                *
0022  *          Separation Gain:                                                      *
0023  *          the measure of how the quality of separation of the sample increases  *
0024  *          by splitting the sample e.g. into a "left-node" and a "right-node"    *
0025  *          (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)  *
0026  *          this is then the quality criterion which is optimized for when trying *
0027  *          to increase the information in the system (making the best selection  *
0028  *                                                                                *
0029  *                                                                                *
0030  * Authors (alphabetical):                                                        *
0031  *      Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland              *
0032  *      Helge Voss      <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany      *
0033  *      Kai Voss        <Kai.Voss@cern.ch>       - U. of Victoria, Canada         *
0034  *                                                                                *
0035  * Copyright (c) 2005:                                                            *
0036  *      CERN, Switzerland                                                         *
0037  *      U. of Victoria, Canada                                                    *
0038  *      Heidelberg U., Germany                                                    *
0039  *                                                                                *
0040  * Redistribution and use in source and binary forms, with or without             *
0041  * modification, are permitted according to the terms listed in LICENSE           *
0042  * (see tmva/doc/LICENSE)                                          *
0043  **********************************************************************************/
0044 
0045 #ifndef ROOT_TMVA_SeparationBase
0046 #define ROOT_TMVA_SeparationBase
0047 
0048 //////////////////////////////////////////////////////////////////////////
0049 //                                                                      //
0050 // SeparationBase                                                       //
0051 //                                                                      //
0052 // An interface to calculate the "SeparationGain" for different         //
0053 // separation criteria used in various training algorithms              //
0054 //                                                                      //
0055 // There are two things: the Separation Index, and the Separation Gain  //
0056 // Separation Index:                                                    //
0057 // Measure of the "purity" of a sample. If all elements (events) in the //
0058 // sample belong to the same class (e.g. signal or background), than the//
0059 // separation index is 0 (meaning 100% purity (or 0% purity as it is    //
0060 // symmetric. The index becomes maximal, for perfectly mixed samples    //
0061 // eg. purity=50% , N_signal = N_bkg                                    //
0062 //                                                                      //
0063 // Separation Gain:                                                     //
0064 // the measure of how the quality of separation of the sample increases //
0065 // by splitting the sample e.g. into a "left-node" and a "right-node"   //
0066 // (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) //
0067 // this is then the quality criterion which is optimized for when trying//
0068 // to increase the information in the system (making the best selection //
0069 //                                                                      //
0070 //////////////////////////////////////////////////////////////////////////
0071 
0072 #include "Rtypes.h"
0073 
0074 #include "TString.h"
0075 
0076 #include "TMath.h"
0077 
0078 #include <limits>
0079 
0080 namespace TMVA {
0081 
0082    class SeparationBase {
0083 
0084    public:
0085 
0086       // default constructor
0087       SeparationBase();
0088 
0089       //copy constructor
0090       SeparationBase( const SeparationBase& s );
0091 
0092       // destructor
0093       virtual ~SeparationBase(){}
0094 
0095       // Return the gain in separation of the original sample is split in two sub-samples
0096       // (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)
0097       virtual Double_t GetSeparationGain( const Double_t nSelS, const Double_t nSelB,
0098                                           const Double_t nTotS, const Double_t nTotB );
0099 
0100       // Return the separation index (a measure for "purity" of the sample")
0101       virtual Double_t GetSeparationIndex( const Double_t s, const Double_t b ) = 0;
0102 
0103       // Return the name of the concrete Index implementation
0104       const TString& GetName() { return fName; }
0105 
0106    protected:
0107 
0108       TString fName;  // name of the concrete Separation Index implementation
0109 
0110       Double_t fPrecisionCut;
0111 
0112       ClassDef(SeparationBase,0); // Interface to different separation criteria used in training algorithms
0113    };
0114 
0115 
0116 } // namespace TMVA
0117 
0118 #endif