root/TMVA/SeparationBase.h

0001 // @(#)root/tmva $Id$
0002 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss
0003
0004 /**********************************************************************************
0005  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis       *
0006  * Package: TMVA                                                                  *
0007  * Class  : SeparationBase                                                        *
0008  *                                             *
0009  *                                                                                *
0010  * Description: An interface to different separation criteria used in various     *
0011  *              training algorithms, as there are:                                *
0012  *              Gini-Index, Cross Entropy, Misclassification Error, e.t.c.        *
0013  *                                                                                *
0014  *          There are two things: the Separation Index, and the Separation Gain   *
0015  *          Separation Index:                                                     *
0016  *          Measure of the "purity" of a sample. If all elements (events) in the  *
0017  *          sample belong to the same class (e.g. signal or backgr), than the     *
0018  *          separation index is 0 (meaning 100% purity (or 0% purity as it is     *
0019  *          symmetric. The index becomes maximal, for perfectly mixed samples     *
0020  *          eg. purity=50% , N_signal = N_bkg                                     *
0021  *                                                                                *
0022  *          Separation Gain:                                                      *
0023  *          the measure of how the quality of separation of the sample increases  *
0024  *          by splitting the sample e.g. into a "left-node" and a "right-node"    *
0025  *          (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)  *
0026  *          this is then the quality criterion which is optimized for when trying *
0027  *          to increase the information in the system (making the best selection  *
0028  *                                                                                *
0029  *                                                                                *
0030  * Authors (alphabetical):                                                        *
0031  *      Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland              *
0032  *      Helge Voss      <Helge.Voss@cern.ch>     - MPI-K Heidelberg, Germany      *
0033  *      Kai Voss        <Kai.Voss@cern.ch>       - U. of Victoria, Canada         *
0034  *                                                                                *
0035  * Copyright (c) 2005:                                                            *
0036  *      CERN, Switzerland                                                         *
0037  *      U. of Victoria, Canada                                                    *
0038  *      Heidelberg U., Germany                                                    *
0039  *                                                                                *
0040  * Redistribution and use in source and binary forms, with or without             *
0041  * modification, are permitted according to the terms listed in LICENSE           *
0042  * (see tmva/doc/LICENSE)                                          *
0043  **********************************************************************************/
0044
0045 #ifndef ROOT_TMVA_SeparationBase
0046 #define ROOT_TMVA_SeparationBase
0047
0048 //////////////////////////////////////////////////////////////////////////
0049 //                                                                      //
0050 // SeparationBase                                                       //
0051 //                                                                      //
0052 // An interface to calculate the "SeparationGain" for different         //
0053 // separation criteria used in various training algorithms              //
0054 //                                                                      //
0055 // There are two things: the Separation Index, and the Separation Gain  //
0056 // Separation Index:                                                    //
0057 // Measure of the "purity" of a sample. If all elements (events) in the //
0058 // sample belong to the same class (e.g. signal or background), than the//
0059 // separation index is 0 (meaning 100% purity (or 0% purity as it is    //
0060 // symmetric. The index becomes maximal, for perfectly mixed samples    //
0061 // eg. purity=50% , N_signal = N_bkg                                    //
0062 //                                                                      //
0063 // Separation Gain:                                                     //
0064 // the measure of how the quality of separation of the sample increases //
0065 // by splitting the sample e.g. into a "left-node" and a "right-node"   //
0066 // (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right) //
0067 // this is then the quality criterion which is optimized for when trying//
0068 // to increase the information in the system (making the best selection //
0069 //                                                                      //
0070 //////////////////////////////////////////////////////////////////////////
0071
0072 #include "Rtypes.h"
0073
0074 #include "TString.h"
0075
0076 #include "TMath.h"
0077
0078 #include <limits>
0079
0080 namespace TMVA {
0081
0082    class SeparationBase {
0083
0084    public:
0085
0086       // default constructor
0087       SeparationBase();
0088
0089       //copy constructor
0090       SeparationBase( const SeparationBase& s );
0091
0092       // destructor
0093       virtual ~SeparationBase(){}
0094
0095       // Return the gain in separation of the original sample is split in two sub-samples
0096       // (N * Index_parent) - (N_left * Index_left) - (N_right * Index_right)
0097       virtual Double_t GetSeparationGain( const Double_t nSelS, const Double_t nSelB,
0098                                           const Double_t nTotS, const Double_t nTotB );
0099
0100       // Return the separation index (a measure for "purity" of the sample")
0101       virtual Double_t GetSeparationIndex( const Double_t s, const Double_t b ) = 0;
0102
0103       // Return the name of the concrete Index implementation
0104       const TString& GetName() { return fName; }
0105
0106    protected:
0107
0108       TString fName;  // name of the concrete Separation Index implementation
0109
0110       Double_t fPrecisionCut;
0111
0112       ClassDef(SeparationBase,0); // Interface to different separation criteria used in training algorithms
0113    };
0114
0115
0116 } // namespace TMVA
0117
0118 #endif