Back to home page

EIC code displayed by LXR

 
 

    


Warning, file /include/root/TKDE.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 // @(#)root/hist:$Id$
0002 // Authors: Bartolomeu Rabacal    07/2010
0003 /**********************************************************************
0004  *                                                                    *
0005  * Copyright (c) 2006 , LCG ROOT MathLib Team                         *
0006  *                                                                    *
0007  *                                                                    *
0008  **********************************************************************/
0009 // Header file for TKDE
0010 
0011 #ifndef ROOT_TKDE
0012 #define ROOT_TKDE
0013 
0014 #include "Math/WrappedFunction.h"
0015 
0016 #include "TNamed.h"
0017 
0018 #include "Math/Math.h"
0019 
0020 #include <string>
0021 #include <vector>
0022 #include <memory>
0023 
0024 class TGraphErrors;
0025 class TF1;
0026 
0027 /*
0028    Kernel Density Estimation class.
0029    The three main references are
0030 
0031    (1) "Scott DW, Multivariate Density Estimation.Theory, Practice and Visualization. New York: Wiley",
0032    (2) "Jann Ben - ETH Zurich, Switzerland -, Univariate kernel density estimation document for KDENS: Stata module for univariate kernel density estimation."
0033    (3) "Hardle W, Muller M, Sperlich S, Werwatz A, Nonparametric and Semiparametric Models. Springer."The algorithm is briefly described in
0034        "Cranmer KS, Kernel Estimation in High-Energy Physics. Computer Physics Communications 136:198-207,2001" - e-Print Archive: hep ex/0011057.
0035        A binned version is also implemented to address the performance issue due to its data size dependence.
0036 */
0037 class TKDE : public TNamed  {
0038 public:
0039 
0040    /// Types of Kernel functions
0041    /// They can be set using the function SetKernelType() or as a string in the constructor
0042    enum EKernelType {
0043       kGaussian,
0044       kEpanechnikov,
0045       kBiweight,
0046       kCosineArch,
0047       kUserDefined, ///< Internal use only for the class's template constructor
0048       kTotalKernels ///< Internal use only for member initialization
0049    };
0050 
0051    /// Iteration types. They can be set using SetIteration()
0052    enum EIteration {
0053       kAdaptive,
0054       kFixed
0055    };
0056 
0057    /// Data "mirroring" option to address the probability "spill out" boundary effect
0058    /// They can be set using SetMirror()
0059    enum EMirror {
0060       kNoMirror,
0061       kMirrorLeft,
0062       kMirrorRight,
0063       kMirrorBoth,
0064       kMirrorAsymLeft,
0065       kMirrorRightAsymLeft,
0066       kMirrorAsymRight,
0067       kMirrorLeftAsymRight,
0068       kMirrorAsymBoth
0069    };
0070 
0071    /// Data binning option.
0072    /// They can be set using SetBinning()
0073    enum EBinning{
0074       kUnbinned,
0075       kRelaxedBinning, ///< The algorithm is allowed to use binning if the data is large enough
0076       kForcedBinning
0077    };
0078 
0079    ///  default constructor used only by I/O
0080    TKDE();
0081 
0082    /// Constructor for unweighted data
0083    /// Varius option for TKDE can be passed in the option string as below.
0084    /// Note that min and max will define the plotting range but will not restrict the data in the unbinned case
0085    /// Instead when use binning, only the data in the range will be considered.
0086    /// Note also, that when some data exists outside the range, one should not use the mirror option with unbinned.
0087    /// Adaptive will be soon very slow especially for Nevents > 10000.
0088    /// For this reason, by default for Nevents >=10000, the data are automatically binned  in
0089    /// nbins=Min(10000,Nevents/10)
0090    /// In case of ForceBinning option the default number of bins is 1000
0091    TKDE(UInt_t events, const Double_t* data, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option =
0092                  "KernelType:Gaussian;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0) {
0093       Instantiate( nullptr,  events, data, nullptr, xMin, xMax, option, rho);
0094    }
0095 
0096    /// Constructor for weighted data
0097    TKDE(UInt_t events, const Double_t* data, const Double_t* dataWeight, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option =
0098         "KernelType:Gaussian;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0) {
0099       Instantiate( nullptr,  events, data, dataWeight, xMin, xMax, option, rho);
0100    }
0101 
0102    /// Constructor for unweighted data and a user defined kernel function
0103    template<class KernelFunction>
0104    TKDE(const Char_t* /*name*/, const KernelFunction& kernfunc, UInt_t events, const Double_t* data, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option = "KernelType:UserDefined;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0)  {
0105       Instantiate(new ROOT::Math::WrappedFunction<const KernelFunction&>(kernfunc), events, data, nullptr, xMin, xMax, option, rho);
0106    }
0107 
0108    /// Constructor for weighted data and a user defined kernel function
0109    template<class KernelFunction>
0110    TKDE(const Char_t* /*name*/, const KernelFunction& kernfunc, UInt_t events, const Double_t* data, const Double_t * dataWeight, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option = "KernelType:UserDefined;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0)  {
0111       Instantiate(new ROOT::Math::WrappedFunction<const KernelFunction&>(kernfunc), events, data, dataWeight, xMin, xMax, option, rho);
0112    }
0113 
0114    ~TKDE() override;
0115 
0116    void Fill(Double_t data);
0117    void Fill(Double_t data, Double_t weight);
0118    void SetKernelType(EKernelType kern);
0119    void SetIteration(EIteration iter);
0120    void SetMirror(EMirror mir);
0121    void SetBinning(EBinning);
0122    void SetNBins(UInt_t nbins);
0123    void SetUseBinsNEvents(UInt_t nEvents);
0124    void SetTuneFactor(Double_t rho);
0125    void SetRange(Double_t xMin, Double_t xMax); ///< By default computed from the data
0126 
0127    void Draw(const Option_t* option = "") override;
0128 
0129    Double_t operator()(Double_t x) const;
0130    Double_t operator()(const Double_t* x, const Double_t* p = nullptr) const;  // Needed for creating TF1
0131 
0132    Double_t GetValue(Double_t x) const { return (*this)(x); }
0133    Double_t GetError(Double_t x) const;
0134 
0135    Double_t GetBias(Double_t x) const;
0136    Double_t GetMean() const;
0137    Double_t GetSigma() const;
0138    Double_t GetRAMISE() const;
0139 
0140    Double_t GetFixedWeight() const;
0141 
0142    TF1* GetFunction(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
0143    TF1* GetUpperFunction(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
0144    TF1* GetLowerFunction(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
0145    TF1* GetApproximateBias(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
0146    TGraphErrors * GetGraphWithErrors(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
0147 
0148    /// @name Drawn objects getters
0149    /// Allow to change settings
0150    /// These objects are managed by TKDE and should not be deleted by the user
0151    ///@{
0152    TF1 * GetDrawnFunction() { return fPDF;}
0153    TF1 * GetDrawnUpperFunction() { return fUpperPDF;}
0154    TF1 * GetDrawnLowerFunction() { return fLowerPDF;}
0155    TGraphErrors * GetDrawnGraph() { return fGraph;}
0156    ///@}
0157 
0158    const Double_t * GetAdaptiveWeights() const;
0159 
0160 
0161 public:
0162 
0163    class TKernel {
0164       TKDE *fKDE;
0165       UInt_t fNWeights;               ///< Number of kernel weights (bandwidth as vectorized for binning)
0166       std::vector<Double_t> fWeights; ///< Kernel weights (bandwidth)
0167    public:
0168       TKernel(Double_t weight, TKDE *kde);
0169       void ComputeAdaptiveWeights();
0170       Double_t operator()(Double_t x) const;
0171       Double_t GetWeight(Double_t x) const;
0172       Double_t GetFixedWeight() const;
0173       const std::vector<Double_t> &GetAdaptiveWeights() const;
0174    };
0175 
0176    friend class TKernel;
0177 
0178 private:
0179 
0180    TKDE(TKDE& kde);           // Disallowed copy constructor
0181    TKDE operator=(TKDE& kde); // Disallowed assign operator
0182 
0183    // Kernel function pointer. It is managed by class for internal kernels or externally for user defined kernels
0184    typedef ROOT::Math::IBaseFunctionOneDim* KernelFunction_Ptr;
0185    KernelFunction_Ptr fKernelFunction;  ///<! pointer to kernel function
0186 
0187    std::unique_ptr<TKernel> fKernel;             ///<! internal kernel class. Transient because it is recreated after reading from a file
0188 
0189    std::vector<Double_t> fData;         ///< Data events
0190    std::vector<Double_t> fEvents;       ///< Original data storage
0191    std::vector<Double_t> fEventWeights; ///< Original data weights
0192 
0193    TF1* fPDF;             //! Output Kernel Density Estimation PDF function
0194    TF1* fUpperPDF;        //! Output Kernel Density Estimation upper confidence interval PDF function
0195    TF1* fLowerPDF;        //! Output Kernel Density Estimation lower confidence interval PDF function
0196    TF1* fApproximateBias; //! Output Kernel Density Estimation approximate bias
0197    TGraphErrors* fGraph;  //! Graph with the errors
0198 
0199    EKernelType fKernelType;
0200    EIteration fIteration;
0201    EMirror fMirror;
0202    EBinning fBinning;
0203 
0204 
0205    Bool_t fUseMirroring, fMirrorLeft, fMirrorRight, fAsymLeft, fAsymRight;
0206    Bool_t fUseBins;
0207    Bool_t fNewData;                    ///< Flag to control when new data are given
0208    Bool_t fUseMinMaxFromData;          ///< Flag top control if min and max must be used from data
0209 
0210    UInt_t fNBins;                      ///< Number of bins for binned data option
0211    UInt_t fNEvents;                    ///< Data's number of events
0212    Double_t fSumOfCounts;              ///< Data sum of weights
0213    UInt_t fUseBinsNEvents;             ///< If the algorithm is allowed to use automatic (relaxed) binning this is the minimum number of events to do so
0214 
0215    Double_t fMean;                     ///< Data mean
0216    Double_t fSigma;                    ///< Data std deviation
0217    Double_t fSigmaRob;                 ///< Data std deviation (robust estimation)
0218    Double_t fXMin;                     ///< Data minimum value
0219    Double_t fXMax;                     ///< Data maximum value
0220    Double_t fRho;                      ///< Adjustment factor for sigma
0221    Double_t fAdaptiveBandwidthFactor;  ///< Geometric mean of the kernel density estimation from the data for adaptive iteration
0222 
0223    Double_t fWeightSize;               ///< Caches the weight size
0224 
0225    std::vector<Double_t> fCanonicalBandwidths;
0226    std::vector<Double_t> fKernelSigmas2;
0227 
0228    std::vector<Double_t> fBinCount;    ///< Number of events per bin for binned data option
0229 
0230    std::vector<Bool_t> fSettedOptions; ///< User input options flag
0231 
0232    struct KernelIntegrand;
0233    friend struct KernelIntegrand;
0234 
0235    void Instantiate(KernelFunction_Ptr kernfunc, UInt_t events, const Double_t* data, const Double_t* weight,
0236                     Double_t xMin, Double_t xMax, const Option_t* option, Double_t rho);
0237 
0238    /// Returns the kernel evaluation at x
0239    inline Double_t GaussianKernel(Double_t x) const {
0240       Double_t k2_PI_ROOT_INV = 0.398942280401432703; // (2 * M_PI)**-0.5
0241       return (x > -9. && x < 9.) ? k2_PI_ROOT_INV * std::exp(-.5 * x * x) : 0.0;
0242    }
0243 
0244    inline Double_t EpanechnikovKernel(Double_t x) const {
0245       return (x > -1. &&  x < 1.) ? 3. / 4. * (1. - x * x) : 0.0;
0246    }
0247 
0248    /// Returns the kernel evaluation at x
0249    inline Double_t BiweightKernel(Double_t x) const {
0250       return (x > -1. &&  x < 1.) ? 15. / 16. * (1. - x * x) * (1. - x * x) : 0.0;
0251    }
0252 
0253    /// Returns the kernel evaluation at x
0254    inline Double_t CosineArchKernel(Double_t x) const {
0255       return (x > -1. &&  x < 1.) ? M_PI_4 * std::cos(M_PI_2 * x) : 0.0;
0256    }
0257    Double_t UpperConfidenceInterval(const Double_t* x, const Double_t* p) const; ///< Valid if the bandwidth is small compared to nEvents**1/5
0258    Double_t LowerConfidenceInterval(const Double_t* x, const Double_t* p) const; ///< Valid if the bandwidth is small compared to nEvents**1/5
0259    Double_t ApproximateBias(const Double_t* x, const Double_t* ) const { return GetBias(*x); }
0260    Double_t ComputeKernelL2Norm() const;
0261    Double_t ComputeKernelSigma2() const;
0262    Double_t ComputeKernelMu() const;
0263    Double_t ComputeKernelIntegral() const;
0264    Double_t ComputeMidspread() ;
0265    void ComputeDataStats() ;
0266 
0267    UInt_t Index(Double_t x) const;
0268 
0269    void SetBinCentreData(Double_t xmin, Double_t xmax);
0270    void SetBinCountData();
0271    void CheckKernelValidity();
0272    void SetUserCanonicalBandwidth();
0273    void SetUserKernelSigma2();
0274    void SetCanonicalBandwidths();
0275    void SetKernelSigmas2();
0276    void SetHistogram();
0277    void SetUseBins();
0278    void SetMirror();
0279    void SetMean();
0280    void SetSigma(Double_t R);
0281    void SetKernel();
0282    void SetKernelFunction(KernelFunction_Ptr kernfunc = nullptr);
0283    void SetOptions(const Option_t* option, Double_t rho);
0284    void CheckOptions(Bool_t isUserDefinedKernel = kFALSE);
0285    void GetOptions(std::string optionType, std::string option);
0286    void AssureOptions();
0287    void SetData(const Double_t* data, const Double_t * weights);
0288    void ReInit();
0289    void InitFromNewData();
0290    void SetMirroredEvents();
0291    void SetDrawOptions(const Option_t* option, TString& plotOpt, TString& drawOpt);
0292    void DrawErrors(TString& drawOpt);
0293    void DrawConfidenceInterval(TString& drawOpt, double cl=0.95);
0294 
0295    TF1* GetKDEFunction(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
0296    TF1* GetKDEApproximateBias(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
0297    // The density to estimate should be at least twice differentiable.
0298    TF1* GetPDFUpperConfidenceInterval(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
0299    TF1* GetPDFLowerConfidenceInterval(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
0300 
0301    ClassDefOverride(TKDE, 3) // One dimensional semi-parametric Kernel Density Estimation
0302 
0303 };
0304 
0305 #endif