include/root/TLinearFitter.h

0001 // @(#)root/minuit:$Id$
0002 // Author: Anna Kreshuk 04/03/2005
0003
0004 /*************************************************************************
0005  * Copyright (C) 1995-2005, Rene Brun and Fons Rademakers.               *
0006  * All rights reserved.                                                  *
0007  *                                                                       *
0008  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0009  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0010  *************************************************************************/
0011
0012 #ifndef ROOT_TLinearFitter
0013 #define ROOT_TLinearFitter
0014
0015 //////////////////////////////////////////////////////////////////////////
0016 //
0017 // The Linear Fitter - fitting functions that are LINEAR IN PARAMETERS
0018 //
0019 // Linear fitter is used to fit a set of data points with a linear
0020 // combination of specified functions. Note, that "linear" in the name
0021 // stands only for the model dependency on parameters, the specified
0022 // functions can be nonlinear.
0023 // The general form of this kind of model is
0024 //
0025 //          y(x) = a[0] + a[1]*f[1](x)+...a[n]*f[n](x)
0026 //
0027 // Functions f are fixed functions of x. For example, fitting with a
0028 // polynomial is linear fitting in this sense.
0029 //
0030 //                         The fitting method
0031 //
0032 // The fit is performed using the Normal Equations method with Cholesky
0033 // decomposition.
0034 //
0035 //                         Why should it be used?
0036 //
0037 // The linear fitter is considerably faster than general non-linear
0038 // fitters and doesn't require to set the initial values of parameters.
0039 //
0040 //                          Using the fitter:
0041 //
0042 // 1.Adding the data points:
0043 //  1.1 To store or not to store the input data?
0044 //      - There are 2 options in the constructor - to store or not
0045 //        store the input data. The advantages of storing the data
0046 //        are that you'll be able to reset the fitting model without
0047 //        adding all the points again, and that for very large sets
0048 //        of points the chisquare is calculated more precisely.
0049 //        The obvious disadvantage is the amount of memory used to
0050 //        keep all the points.
0051 //      - Before you start adding the points, you can change the
0052 //        store/not store option by StoreData() method.
0053 //  1.2 The data can be added:
0054 //      - simply point by point - AddPoint() method
0055 //      - an array of points at once:
0056 //        If the data is already stored in some arrays, this data
0057 //        can be assigned to the linear fitter without physically
0058 //        coping bytes, thanks to the Use() method of
0059 //        TVector and TMatrix classes - AssignData() method
0060 //
0061 // 2.Setting the formula
0062 //  2.1 The linear formula syntax:
0063 //      -Additive parts are separated by 2 plus signs "++"
0064 //       --for example "1 ++ x" - for fitting a straight line
0065 //      -All standard functions, undrestood by TFormula, can be used
0066 //       as additive parts
0067 //       --TMath functions can be used too
0068 //      -Functions, used as additive parts, shouldn't have any parameters,
0069 //       even if those parameters are set.
0070 //       --for example, if normalizing a sum of a gaus(0, 1) and a
0071 //         gaus(0, 2), don't use the built-in "gaus" of TFormula,
0072 //         because it has parameters, take TMath::Gaus(x, 0, 1) instead.
0073 //      -Polynomials can be used like "pol3", .."polN"
0074 //      -If fitting a more than 3-dimensional formula, variables should
0075 //       be numbered as follows:
0076 //       -- x0, x1, x2... For example, to fit  "1 ++ x0 ++ x1 ++ x2 ++ x3*x3"
0077 //  2.2 Setting the formula:
0078 //    2.2.1 If fitting a 1-2-3-dimensional formula, one can create a
0079 //          TF123 based on a linear expression and pass this function
0080 //          to the fitter:
0081 //          --Example:
0082 //            TLinearFitter *lf = new TLinearFitter();
0083 //            TF2 *f2 = new TF2("f2", "x ++ y ++ x*x*y*y", -2, 2, -2, 2);
0084 //            lf->SetFormula(f2);
0085 //          --The results of the fit are then stored in the function,
0086 //            just like when the TH1::Fit or TGraph::Fit is used
0087 //          --A linear function of this kind is by no means different
0088 //            from any other function, it can be drawn, evaluated, etc.
0089 //    2.2.2 There is no need to create the function if you don't want to,
0090 //          the formula can be set by expression:
0091 //          --Example:
0092 //            // 2 is the number of dimensions
0093 //            TLinearFitter *lf = new TLinearFitter(2);
0094 //            lf->SetFormula("x ++ y ++ x*x*y*y");
0095 //          --That's the only way to go, if you want to fit in more
0096 //            than 3 dimensions
0097 //    2.2.3 The fastest functions to compute are polynomials and hyperplanes.
0098 //          --Polynomials are set the usual way: "pol1", "pol2",...
0099 //          --Hyperplanes are set by expression "hyp3", "hyp4", ...
0100 //          ---The "hypN" expressions only work when the linear fitter
0101 //             is used directly, not through TH1::Fit or TGraph::Fit.
0102 //             To fit a graph or a histogram with a hyperplane, define
0103 //             the function as "1++x++y".
0104 //          ---A constant term is assumed for a hyperplane, when using
0105 //             the "hypN" expression, so "hyp3" is in fact fitting with
0106 //             "1++x++y++z" function.
0107 //          --Fitting hyperplanes is much faster than fitting other
0108 //            expressions so if performance is vital, calculate the
0109 //            function values beforehand and give them to the fitter
0110 //            as variables
0111 //          --Example:
0112 //            You want to fit "sin(x)|cos(2*x)" very fast. Calculate
0113 //            sin(x) and cos(2*x) beforehand and store them in array *data.
0114 //            Then:
0115 //            TLinearFitter *lf=new TLinearFitter(2, "hyp2");
0116 //            lf->AssignData(npoint, 2, data, y);
0117 //
0118 //  2.3 Resetting the formula
0119 //    2.3.1 If the input data is stored (or added via AssignData() function),
0120 //          the fitting formula can be reset without re-adding all the points.
0121 //          --Example:
0122 //            TLinearFitter *lf=new TLinearFitter("1++x++x*x");
0123 //            lf->AssignData(n, 1, x, y, e);
0124 //            lf->Eval()
0125 //            //looking at the parameter significance, you see,
0126 //            // that maybe the fit will improve, if you take out
0127 //            // the constant term
0128 //            lf->SetFormula("x++x*x");
0129 //            lf->Eval();
0130 //            ...
0131 //    2.3.2 If the input data is not stored, the fitter will have to be
0132 //          cleared and the data will have to be added again to try a
0133 //          different formula.
0134 //
0135 // 3.Accessing the fit results
0136 //  3.1 There are methods in the fitter to access all relevant information:
0137 //      --GetParameters, GetCovarianceMatrix, etc
0138 //      --the t-values of parameters and their significance can be reached by
0139 //        GetParTValue() and GetParSignificance() methods
0140 //  3.2 If fitting with a pre-defined TF123, the fit results are also
0141 //      written into this function.
0142 //
0143 //////////////////////////////////////////////////////////////////////////
0144
0145 #include "TVectorD.h"
0146 #include "TMatrixD.h"
0147 #include "TObjArray.h"
0148 #include "TFormula.h"
0149 #include "TVirtualFitter.h"
0150
0151 #include <map>
0152
0153 class TLinearFitter: public TVirtualFitter {
0154
0155 private:
0156    TVectorD     fParams;         //vector of parameters
0157    TMatrixDSym  fParCovar;       //matrix of parameters' covariances
0158    TVectorD     fTValues;        //T-Values of parameters
0159    TVectorD     fParSign;        //significance levels of parameters
0160    TMatrixDSym  fDesign;         //matrix AtA
0161    TMatrixDSym  fDesignTemp;     //! temporary matrix, used for num.stability
0162    TMatrixDSym  fDesignTemp2;    //!
0163    TMatrixDSym  fDesignTemp3;    //!
0164
0165    TVectorD     fAtb;            //vector Atb
0166    TVectorD     fAtbTemp;        //! temporary vector, used for num.stability
0167    TVectorD     fAtbTemp2;       //!
0168    TVectorD     fAtbTemp3;       //!
0169
0170    static std::map<TString,TFormula*>   fgFormulaMap;  //! map of basis functions and formula
0171    TObjArray    fFunctions;      //array of basis functions
0172    TVectorD     fY;              //the values being fit
0173    Double_t     fY2;             //sum of square of y, used for chisquare
0174    Double_t     fY2Temp;         //! temporary variable used for num.stability
0175    TMatrixD     fX;              //values of x
0176    TVectorD     fE;              //the errors if they are known
0177    TFormula     *fInputFunction; //the function being fit
0178    Double_t     fVal[1000];      //! temporary
0179
0180    Int_t        fNpoints;        //number of points
0181    Int_t        fNfunctions;     //number of basis functions
0182    Int_t        fFormulaSize;    //length of the formula
0183    Int_t        fNdim;           //number of dimensions in the formula
0184    Int_t        fNfixed;         //number of fixed parameters
0185    Int_t        fSpecial;        //=100+n if fitting a polynomial of deg.n
0186                                  //=200+n if fitting an n-dimensional hyperplane
0187    char         *fFormula;       //the formula
0188    Bool_t       fIsSet;          //Has the formula been set?
0189    Bool_t       fStoreData;      //Is the data stored?
0190    Double_t     fChisquare;      //Chisquare of the fit
0191
0192    Int_t        fH;              //number of good points in robust fit
0193    Bool_t       fRobust;         //true when performing a robust fit
0194    TBits        fFitsample;      //indices of points, used in the robust fit
0195
0196    Bool_t       *fFixedParams;   //[fNfixed] array of fixed/released params
0197
0198
0199    void  AddToDesign(Double_t *x, Double_t y, Double_t e);
0200    void  ComputeTValues();
0201    Int_t GraphLinearFitter(Double_t h);
0202    Int_t Graph2DLinearFitter(Double_t h);
0203    Int_t HistLinearFitter();
0204    Int_t MultiGraphLinearFitter(Double_t h);
0205
0206    //robust fitting functions:
0207    Int_t     Partition(Int_t nmini, Int_t *indsubdat);
0208    void      RDraw(Int_t *subdat, Int_t *indsubdat);
0209    void      CreateSubset(Int_t ntotal, Int_t h, Int_t *index);
0210    Double_t  CStep(Int_t step, Int_t h, Double_t *residuals, Int_t *index, Int_t *subdat, Int_t start, Int_t end);
0211    Bool_t    Linf();
0212
0213    TLinearFitter(const TLinearFitter &) = delete;
0214    TLinearFitter& operator=(const TLinearFitter &) = delete;
0215
0216 public:
0217    TLinearFitter();
0218    TLinearFitter(Int_t ndim, const char *formula, Option_t *opt="D");
0219    TLinearFitter(Int_t ndim);
0220    TLinearFitter(TFormula *function, Option_t *opt="D");
0221    ~TLinearFitter() override;
0222
0223    virtual void       Add(TLinearFitter *tlf);
0224    virtual void       AddPoint(Double_t *x, Double_t y, Double_t e=1);
0225    virtual void       AddTempMatrices();
0226    virtual void       AssignData(Int_t npoints, Int_t xncols, Double_t *x, Double_t *y, Double_t *e=nullptr);
0227
0228    void       Clear(Option_t *option="") override;
0229    virtual void       ClearPoints();
0230    virtual void       Chisquare();
0231    virtual Int_t      Eval();
0232    virtual Int_t      EvalRobust(Double_t h=-1);
0233    Int_t      ExecuteCommand(const char *command, Double_t *args, Int_t nargs) override;
0234    void       FixParameter(Int_t ipar) override;
0235    virtual void       FixParameter(Int_t ipar, Double_t parvalue);
0236    virtual void       GetAtbVector(TVectorD &v);
0237    virtual Double_t   GetChisquare();
0238    void       GetConfidenceIntervals(Int_t n, Int_t ndim, const Double_t *x, Double_t *ci, Double_t cl=0.95) override;
0239    void       GetConfidenceIntervals(TObject *obj, Double_t cl=0.95) override;
0240    Double_t*  GetCovarianceMatrix() const override;
0241    virtual void       GetCovarianceMatrix(TMatrixD &matr);
0242    Double_t   GetCovarianceMatrixElement(Int_t i, Int_t j) const override {return fParCovar(i, j);}
0243    virtual void       GetDesignMatrix(TMatrixD &matr);
0244    virtual void       GetErrors(TVectorD &vpar);
0245    Int_t      GetNumberTotalParameters() const override {return fNfunctions;}
0246    Int_t      GetNumberFreeParameters() const override {return fNfunctions-fNfixed;}
0247    virtual Int_t      GetNpoints() { return fNpoints; }
0248    virtual void       GetParameters(TVectorD &vpar);
0249    Double_t   GetParameter(Int_t ipar) const override {return fParams(ipar);}
0250    Int_t      GetParameter(Int_t ipar,char* name,Double_t& value,Double_t& /*verr*/,Double_t& /*vlow*/, Double_t& /*vhigh*/) const override;
0251    const char *GetParName(Int_t ipar) const override;
0252    Double_t   GetParError(Int_t ipar) const override;
0253    virtual Double_t   GetParTValue(Int_t ipar);
0254    virtual Double_t   GetParSignificance(Int_t ipar);
0255    virtual void       GetFitSample(TBits& bits);
0256    virtual Double_t   GetY2() const {return fY2;}
0257    Bool_t     IsFixed(Int_t ipar) const override {return fFixedParams[ipar];}
0258    virtual Int_t      Merge(TCollection *list);
0259    void       PrintResults(Int_t level, Double_t amin=0) const override;
0260    void       ReleaseParameter(Int_t ipar) override;
0261    virtual void       SetBasisFunctions(TObjArray * functions);
0262    virtual void       SetDim(Int_t n);
0263    virtual void       SetFormula(const char* formula);
0264    virtual void       SetFormula(TFormula *function);
0265    virtual void       StoreData(Bool_t store) {fStoreData=store;}
0266
0267    virtual Bool_t     UpdateMatrix();
0268
0269    //dummy functions for TVirtualFitter:
0270    Double_t  Chisquare(Int_t /*npar*/, Double_t * /*params*/) const override {return 0;}
0271    Int_t     GetErrors(Int_t /*ipar*/,Double_t & /*eplus*/, Double_t & /*eminus*/, Double_t & /*eparab*/, Double_t & /*globcc*/) const override {return 0;}
0272
0273    Int_t     GetStats(Double_t& /*amin*/, Double_t& /*edm*/, Double_t& /*errdef*/, Int_t& /*nvpar*/, Int_t& /*nparx*/) const override {return 0;}
0274    Double_t  GetSumLog(Int_t /*i*/) override {return 0;}
0275    void      SetFitMethod(const char * /*name*/) override {}
0276    Int_t     SetParameter(Int_t /*ipar*/,const char * /*parname*/,Double_t /*value*/,Double_t /*verr*/,Double_t /*vlow*/, Double_t /*vhigh*/) override {return 0;}
0277
0278    ClassDefOverride(TLinearFitter, 2) //fit a set of data points with a linear combination of functions
0279 };
0280
0281 #endif