Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:12:23

0001 // @(#)root/physics:$Id$
0002 // Author: Anna Kreshuk  08/10/2004
0003 
0004 
0005 //////////////////////////////////////////////////////////////////////////////
0006 //
0007 //  TRobustEstimator
0008 //
0009 // Minimum Covariance Determinant Estimator - a Fast Algorithm
0010 // invented by Peter J.Rousseeuw and Katrien Van Dreissen
0011 // "A Fast Algorithm for the Minimum covariance Determinant Estimator"
0012 // Technometrics, August 1999, Vol.41, NO.3
0013 //
0014 //////////////////////////////////////////////////////////////////////////////
0015 
0016 #ifndef ROOT_TRobustEstimator
0017 #define ROOT_TRobustEstimator
0018 
0019 #include "TArrayI.h"
0020 #include "TMatrixDSym.h"
0021 #include "TVectorD.h"
0022 
0023 class TRobustEstimator : public TObject {
0024 
0025 protected:
0026 
0027    Int_t        fNvar;          //number of variables
0028    Int_t        fH;             //algorithm parameter, determining the subsample size
0029    Int_t        fN;             //number of observations
0030 
0031    Int_t        fVarTemp;       //number of variables already added to the data matrix
0032    Int_t        fVecTemp;       //number of observations already added to the data matrix
0033 
0034    Int_t        fExact;         //if there was an exact fit, stores the number of points on a hyperplane
0035 
0036    TVectorD     fMean;          //location estimate (mean values)
0037    TMatrixDSym  fCovariance;    //covariance matrix estimate
0038    TMatrixDSym  fInvcovariance; //inverse of the covariance matrix
0039    TMatrixDSym  fCorrelation;   //correlation matrix
0040    TVectorD     fRd;            //array of robust distances, size n
0041    TVectorD     fSd;            //array of standard deviations
0042    TArrayI      fOut;           //array of indexes of ouliers, size <0.5*n
0043    TVectorD     fHyperplane;    //in case more than fH observations lie on a hyperplane
0044                                //the equation of this hyperplane is stored here
0045 
0046    TMatrixD fData;              //the original data
0047 
0048    //functions needed for evaluation
0049 
0050    void     AddToSscp(TMatrixD &sscp, TVectorD &vec);
0051    void     ClearSscp(TMatrixD &sscp);
0052 
0053    void     Classic();
0054    void     Covar(TMatrixD &sscp, TVectorD &m, TMatrixDSym &cov, TVectorD &sd, Int_t nvec);
0055    void     Correl();
0056 
0057    void     CreateSubset(Int_t ntotal, Int_t htotal, Int_t p, Int_t *index, TMatrixD &data,
0058                     TMatrixD &sscp, Double_t *ndist);
0059    void     CreateOrtSubset(TMatrixD &dat, Int_t *index, Int_t hmerged, Int_t nmerged, TMatrixD &sscp, Double_t *ndist);
0060 
0061    Double_t CStep(Int_t ntotal, Int_t htotal, Int_t *index, TMatrixD &data, TMatrixD &sscp, Double_t *ndist);
0062 
0063    Int_t    Exact(Double_t *ndist);
0064    Int_t    Exact2(TMatrixD &mstockbig, TMatrixD &cstockbig, TMatrixD &hyperplane,
0065                Double_t *deti, Int_t nbest,Int_t kgroup,
0066                TMatrixD &sscp, Double_t *ndist);
0067 
0068    Int_t    Partition(Int_t nmini, Int_t *indsubdat);
0069    Int_t    RDist(TMatrixD &sscp);
0070    void     RDraw(Int_t *subdat, Int_t ngroup, Int_t *indsubdat);
0071 
0072    Double_t KOrdStat(Int_t ntotal, Double_t *arr, Int_t k, Int_t *work);
0073 
0074 public:
0075 
0076    TRobustEstimator();
0077    TRobustEstimator(Int_t nvectors, Int_t nvariables, Int_t hh=0);
0078    ~TRobustEstimator() override {}
0079 
0080    void    AddColumn(Double_t *col);         //adds a column to the data matrix
0081    void    AddRow(Double_t *row);            //adds a row to the data matrix
0082 
0083    void    Evaluate();
0084    void    EvaluateUni(Int_t nvectors, Double_t *data, Double_t &mean, Double_t &sigma, Int_t hh=0);
0085 
0086    Int_t   GetBDPoint();                     //returns the breakdown point of the algorithm
0087 
0088    /// returns a reference to the data matrix
0089    const TMatrixD & GetData() { return fData; }
0090 
0091    void    GetCovariance(TMatrixDSym &matr); //returns robust covariance matrix estimate
0092    const   TMatrixDSym* GetCovariance() const{return &fCovariance;}
0093    void    GetCorrelation(TMatrixDSym &matr); //returns robust correlation matrix estimate
0094    const   TMatrixDSym* GetCorrelation() const{return &fCorrelation;}
0095    void    GetHyperplane(TVectorD &vec);      //if the data lies on a hyperplane, returns this hyperplane
0096    const   TVectorD* GetHyperplane() const;   //if the data lies on a hyperplane, returns this hyperplane
0097    Int_t   GetNHyp() {return fExact;}         //returns the number of points on a hyperplane
0098    void    GetMean(TVectorD &means);                        //returns robust mean vector estimate
0099    const   TVectorD* GetMean() const {return &fMean;}       //returns robust mean vector estimate
0100    void    GetRDistances(TVectorD &rdist);                  //returns robust distances of all observations
0101    const   TVectorD* GetRDistances() const {return &fRd;}   //returns robust distances of all observations
0102    Int_t   GetNumberObservations() const {return fN;}
0103    Int_t   GetNvar() const {return fNvar;}
0104    const   TArrayI* GetOuliers() const{return &fOut;}       //returns an array of outlier indexes
0105    Int_t   GetNOut(); //returns the number of points outside the tolerance ellipsoid.
0106                       //ONLY those with robust distances significantly larger than the
0107                       //cutoff value, should be considered outliers!
0108    Double_t GetChiQuant(Int_t i) const;
0109 
0110    ClassDefOverride(TRobustEstimator,1)  //Minimum Covariance Determinant Estimator
0111 
0112 };
0113 
0114 
0115 #endif
0116