root/Fit/BinData.h

0001 // @(#)root/mathcore:$Id$
0002 // Author: L. Moneta Wed Aug 30 11:15:23 2006
0003
0004 /**********************************************************************
0005  *                                                                    *
0006  * Copyright (c) 2006  LCG ROOT Math Team, CERN/PH-SFT                *
0007  *                                                                    *
0008  *                                                                    *
0009  **********************************************************************/
0010
0011 // Header file for class BinData
0012
0013 #ifndef ROOT_Fit_BinData
0014 #define ROOT_Fit_BinData
0015
0016 #include "Fit/FitData.h"
0017 #include "Math/Error.h"
0018 #include <cmath>
0019 #include <vector>
0020
0021 namespace ROOT {
0022
0023    namespace Fit {
0024
0025
0026
0027 //___________________________________________________________________________________
0028 /**
0029    Class describing the binned data sets :
0030    vectors of  x coordinates, y values and optionally error on y values and error on coordinates
0031    The dimension of the coordinate is free
0032    There are 4 different options:
0033    - only coordinates and values  (for binned likelihood fits)  : kNoError
0034    - coordinate, values and error on  values (for normal least square fits)  : kValueError
0035    - coordinate, values, error on values and coordinates (for effective least square fits) : kCoordError
0036    - coordinate, values, error on coordinates and asymmetric error on values : kAsymError
0037
0038    In addition there is the option to construct Bindata copying the data in (using the DataVector class)
0039    or using pointer to external data (DataWrapper) class.
0040    In general is found to be more efficient to copy the data.
0041    In case of really large data sets for limiting memory consumption then the other option can be used
0042    Specialized constructor exists for data up to 3 dimensions.
0043
0044    When the data are copying in the number of points can be set later (or re-set) using Initialize and
0045    the data are inserted one by one using the Add method.
0046    It is mandatory to set the size before using the Add method.
0047
0048    @ingroup  FitData
0049 */
0050
0051
0052 class BinData  : public FitData  {
0053
0054 public :
0055
0056    enum ErrorType { kNoError, kValueError, kCoordError, kAsymError };
0057
0058    /**
0059       constructor from dimension of point  and max number of points (to pre-allocate vector)
0060       Give a zero value and then use Initialize later one if the size is not known
0061    */
0062
0063    explicit BinData(unsigned int maxpoints = 0, unsigned int dim = 1,
0064                     ErrorType err = kValueError);
0065
0066
0067    /**
0068       constructor from option and default range
0069    */
0070    explicit BinData (const DataOptions & opt, unsigned int maxpoints = 0,
0071                      unsigned int dim = 1, ErrorType err = kValueError);
0072
0073    /**
0074       constructor from options and range
0075       default is 1D and value errors
0076    */
0077    BinData (const DataOptions & opt, const DataRange & range,
0078             unsigned int maxpoints = 0, unsigned int dim = 1, ErrorType err = kValueError );
0079
0080    /** constructors using external data */
0081
0082    /**
0083       constructor from external data for 1D with errors on  coordinate and value
0084    */
0085    BinData(unsigned int n, const double * dataX, const double * val,
0086            const double * ex , const double * eval );
0087
0088    /**
0089       constructor from external data for 2D with errors on  coordinate and value
0090    */
0091    BinData(unsigned int n, const double * dataX, const double * dataY,
0092            const double * val, const double * ex , const double * ey,
0093            const double * eval  );
0094
0095    /**
0096       constructor from external data for 3D with errors on  coordinate and value
0097    */
0098    BinData(unsigned int n, const double * dataX, const double * dataY,
0099            const double * dataZ, const double * val, const double * ex ,
0100            const double * ey , const double * ez , const double * eval   );
0101
0102    /**
0103       destructor
0104    */
0105    ~BinData() override;
0106
0107    /**
0108       copy constructors
0109    */
0110    BinData(const BinData & rhs);
0111
0112    /// assignment operator
0113    BinData & operator= ( const BinData & rhs );
0114
0115
0116    /**
0117       Preallocate a data set with given size, dimension and error type.
0118       If the data set already exists, `newPoints` are appended to the existing data set.
0119       (i.e., if the data exists Initialize() is equivalent to a `resize( NPoints() + maxpoints)`).
0120       Initialize() and Append() are equivalent.
0121    */
0122    void Initialize( unsigned int newPoints, unsigned int dim = 1, ErrorType err = kValueError ){
0123       Append(newPoints,dim,err);
0124    }
0125
0126    /// Equivalent to Initialize()
0127    void Append( unsigned int newPoints, unsigned int dim = 1, ErrorType err = kValueError );
0128
0129
0130
0131    /**
0132       flag to control if data provides error on the coordinates
0133    */
0134    bool HaveCoordErrors() const {
0135       assert (  fErrorType == kNoError ||
0136                 fErrorType == kValueError ||
0137                 fErrorType == kCoordError ||
0138                 fErrorType == kAsymError );
0139
0140       return fErrorType == kCoordError;
0141    }
0142
0143    /**
0144       flag to control if data provides asymmetric errors on the value
0145    */
0146    bool HaveAsymErrors() const {
0147       assert (  fErrorType == kNoError ||
0148                 fErrorType == kValueError ||
0149                 fErrorType == kCoordError ||
0150                 fErrorType == kAsymError );
0151
0152       return fErrorType == kAsymError;
0153    }
0154
0155
0156    /**
0157       apply a Log transformation of the data values
0158       can be used for example when fitting an exponential or gaussian
0159       Transform the data in place need to copy if want to preserve original data
0160       The data sets must not contain negative values. IN case it does,
0161       an empty data set is returned
0162    */
0163    BinData & LogTransform();
0164
0165
0166    /**
0167       add one dim data with only coordinate and values
0168    */
0169    void Add( double x, double y );
0170
0171    /**
0172       add one dim data with no error in the coordinate (x)
0173       in this case store the inverse of the error in the value (y)
0174    */
0175    void Add( double x, double y, double ey );
0176
0177    /**
0178       add one dim data with  error in the coordinate (x)
0179       in this case store the value (y)  error and not the inverse
0180    */
0181    void Add( double x, double y, double ex, double ey );
0182
0183    /**
0184       add one dim data with  error in the coordinate (x) and asymmetric errors in the value (y)
0185       in this case store the y errors and not the inverse
0186    */
0187    void Add( double x, double y, double ex, double eyl, double eyh );
0188
0189    /**
0190       add multi-dim coordinate data with only value
0191    */
0192    void Add( const double* x, double val );
0193
0194    /**
0195       add multi-dim coordinate data with only error in value
0196    */
0197    void Add( const double* x, double val, double eval );
0198
0199    /**
0200       add multi-dim coordinate data with both error in coordinates and value
0201    */
0202    void Add( const double* x, double val, const double* ex, double eval );
0203
0204    /**
0205       add multi-dim coordinate data with both error in coordinates and value
0206    */
0207    void Add( const double* x, double val, const double* ex, double elval, double ehval );
0208
0209    /**
0210       add the bin width data, a pointer to an array with the bin upper edge information.
0211       This is needed when fitting with integral options
0212       The information is added for the previously inserted point.
0213       BinData::Add  must be called before
0214    */
0215    void AddBinUpEdge( const double* xup );
0216
0217    /**
0218       return the value for the given fit point
0219    */
0220    double Value( unsigned int ipoint ) const
0221    {
0222       assert( ipoint < fMaxPoints );
0223       assert( fDataPtr );
0224       assert( fData.empty() || &fData.front() == fDataPtr );
0225
0226       return fDataPtr[ipoint];
0227    }
0228
0229    /**
0230       return a pointer to the value for the given fit point
0231    */
0232    const double *ValuePtr( unsigned int ipoint ) const
0233    {
0234       return &fDataPtr[ipoint];
0235    }
0236
0237    /**
0238       Return a pointer to the error (or the inverse error) on the value for a given point
0239       depending on the type of data.
0240       - If the data contains only value error (e.g. from histograms) returns a pointer to
0241         the inverse of the errors.
0242       - If the data contains errors in coordinates and value (e.g from TGraphErrors) returns a
0243         pointer to the corresponding value error (NOT the inverse).
0244       - If the data contains asymmetric errors return a pointer to the average error (NOT the inverse):
0245         0.5(eu + el).
0246       - If the data does not contain errors return a nullptr.
0247    */
0248
0249    const double * ErrorPtr(unsigned int ipoint) const{
0250       assert( ipoint < fMaxPoints );
0251       assert( kValueError == fErrorType || kCoordError == fErrorType ||
0252               kAsymError == fErrorType || kNoError == fErrorType );
0253
0254       if ( fErrorType == kNoError )
0255          return nullptr;
0256       return &fDataErrorPtr[ ipoint ];
0257    }
0258
0259    /// Return the error on the given point.
0260    /// Safer method returning in any case the error and not the inverse as in the
0261    /// function above.
0262    double Error( unsigned int ipoint ) const
0263    {
0264       assert( ipoint < fMaxPoints );
0265       assert( kValueError == fErrorType || kCoordError == fErrorType ||
0266               kAsymError == fErrorType || kNoError == fErrorType );
0267
0268       if ( fErrorType == kNoError )
0269       {
0270          assert( !fDataErrorPtr && !fDataErrorHighPtr && !fDataErrorLowPtr );
0271          assert( fDataError.empty() && fDataErrorHigh.empty() && fDataErrorLow.empty() );
0272          return 1.0;
0273       }
0274
0275       if ( fErrorType == kValueError ) // need to invert (inverror is stored)
0276       {
0277          assert( fDataErrorPtr && !fDataErrorHighPtr && !fDataErrorLowPtr );
0278          assert( fDataErrorHigh.empty() && fDataErrorLow.empty() );
0279          assert( fDataError.empty() || &fDataError.front() == fDataErrorPtr );
0280
0281          double eval = fDataErrorPtr[ ipoint ];
0282
0283          if (fWrapped)
0284             return eval;
0285          else
0286             return (eval != 0.0) ? 1.0/eval : 0.0;
0287       }
0288
0289       if ( fErrorType == kAsymError )
0290       {  // return 1/2(el + eh)
0291          assert( !fDataErrorPtr && fDataErrorHighPtr && fDataErrorLowPtr );
0292          assert( fDataError.empty() );
0293          assert( fDataErrorHigh.empty() || &fDataErrorHigh.front() == fDataErrorHighPtr );
0294          assert( fDataErrorLow.empty() || &fDataErrorLow.front() == fDataErrorLowPtr );
0295          assert( fDataErrorLow.empty() == fDataErrorHigh.empty() );
0296
0297          double eh = fDataErrorHighPtr[ ipoint ];
0298          double el = fDataErrorLowPtr[ ipoint ];
0299
0300          return (el+eh) / 2.0;
0301       }
0302
0303       assert( fErrorType == kCoordError );
0304       return fDataErrorPtr[ ipoint ];
0305    }
0306
0307    void GetAsymError( unsigned int ipoint, double& lowError, double& highError ) const
0308    {
0309       assert( fErrorType == kAsymError );
0310       assert( !fDataErrorPtr && fDataErrorHighPtr && fDataErrorLowPtr );
0311       assert( fDataError.empty() );
0312       assert( fDataErrorHigh.empty() || &fDataErrorHigh.front() == fDataErrorHighPtr );
0313       assert( fDataErrorLow.empty() || &fDataErrorLow.front() == fDataErrorLowPtr );
0314       assert( fDataErrorLow.empty() == fDataErrorHigh.empty() );
0315
0316       lowError = fDataErrorLowPtr[ ipoint ];
0317       highError = fDataErrorHighPtr[ ipoint ];
0318    }
0319
0320    /**
0321       Return the inverse of error on the value for the given fit point
0322       useful when error in the coordinates are not stored and then this is used directly this as the weight in
0323       the least square function
0324    */
0325    double InvError( unsigned int ipoint ) const
0326    {
0327       assert( ipoint < fMaxPoints );
0328       assert( kValueError == fErrorType || kCoordError == fErrorType ||
0329               kAsymError == fErrorType || kNoError == fErrorType );
0330
0331       if ( fErrorType == kNoError )
0332       {
0333          assert( !fDataErrorPtr && !fDataErrorHighPtr && !fDataErrorLowPtr );
0334          assert( fDataError.empty() && fDataErrorHigh.empty() && fDataErrorLow.empty() );
0335          return 1.0;
0336       }
0337
0338       if ( fErrorType == kValueError ) // need to invert (inverror is stored)
0339       {
0340          assert( fDataErrorPtr && !fDataErrorHighPtr && !fDataErrorLowPtr );
0341          assert( fDataErrorHigh.empty() && fDataErrorLow.empty() );
0342          assert( fDataError.empty() || &fDataError.front() == fDataErrorPtr );
0343
0344          double eval = fDataErrorPtr[ ipoint ];
0345
0346          // in case of wrapped data the pointer stores the error and
0347          // not the inverse
0348          if (fWrapped)
0349             return 1.0 / eval;
0350          else
0351             return (eval != 0.0) ? eval : 0.0;
0352       }
0353
0354       if ( fErrorType == kAsymError ) {
0355          // return inverse of 1/2(el + eh)
0356          assert( !fDataErrorPtr && fDataErrorHighPtr && fDataErrorLowPtr );
0357          assert( fDataError.empty() );
0358          assert( fDataErrorHigh.empty() || &fDataErrorHigh.front() == fDataErrorHighPtr );
0359          assert( fDataErrorLow.empty() || &fDataErrorLow.front() == fDataErrorLowPtr );
0360          assert( fDataErrorLow.empty() == fDataErrorHigh.empty() );
0361
0362          double eh = fDataErrorHighPtr[ ipoint ];
0363          double el = fDataErrorLowPtr[ ipoint ];
0364
0365          return 2.0 / (el+eh);
0366       }
0367
0368       assert( fErrorType == kCoordError );
0369       // for coordinate error we store the error and not the inverse
0370       return 1.0 / fDataErrorPtr[ ipoint ];
0371    }
0372
0373
0374    /**
0375       retrieve at the same time a  pointer to the coordinate data and the fit value
0376       More efficient than calling Coords(i) and Value(i)
0377    */
0378    // not threadsafe, to be replaced with never constructs!
0379    // for example: just return std::array or std::vector, there's
0380    // is going to be only minor overhead in c++11.
0381    const double * GetPoint( unsigned int ipoint, double & value ) const
0382    {
0383       assert( ipoint < fMaxPoints );
0384       value = Value( ipoint );
0385
0386       return Coords( ipoint );
0387    }
0388
0389    /**
0390       returns a single coordinate error component of a point.
0391       This function is threadsafe in contrast to Coords(...)
0392       and can easily get vectorized by the compiler in loops
0393       running over the ipoint-index.
0394    */
0395    double GetCoordErrorComponent( unsigned int ipoint, unsigned int icoord ) const
0396    {
0397       assert( ipoint < fMaxPoints );
0398       assert( icoord < fDim );
0399       assert( fCoordErrorsPtr.size() == fDim );
0400       assert( fCoordErrorsPtr[icoord] );
0401       assert( fCoordErrors.empty() || &fCoordErrors[icoord].front() == fCoordErrorsPtr[icoord] );
0402
0403       return fCoordErrorsPtr[icoord][ipoint];
0404    }
0405
0406    /**
0407       Return a pointer to the errors in the coordinates for the given fit point
0408    */
0409    // not threadsafe, to be replaced with never constructs!
0410    // for example: just return std::array or std::vector, there's
0411    // is going to be only minor overhead in c++11.
0412    const double* CoordErrors( unsigned int ipoint ) const
0413    {
0414       assert( ipoint < fMaxPoints );
0415       assert( fpTmpCoordErrorVector );
0416       assert( fErrorType == kCoordError || fErrorType == kAsymError );
0417
0418       for ( unsigned int i=0; i < fDim; i++ )
0419       {
0420          assert( fCoordErrorsPtr[i] );
0421          assert( fCoordErrors.empty() || &fCoordErrors[i].front() == fCoordErrorsPtr[i] );
0422
0423          fpTmpCoordErrorVector[i] = fCoordErrorsPtr[i][ipoint];
0424       }
0425
0426       return fpTmpCoordErrorVector;
0427    }
0428
0429
0430    /**
0431       retrieve in a single call a pointer to the coordinate data, value and inverse error for
0432       the given fit point.
0433       To be used only when type is kValueError or kNoError. In the last case the value 1 is returned
0434       for the error.
0435    */
0436    // not threadsafe, to be replaced with never constructs!
0437    // for example: just return std::array or std::vector, there's
0438    // is going to be only minor overhead in c++11.
0439    const double* GetPoint( unsigned int ipoint, double & value, double & invError ) const
0440    {
0441       assert( ipoint < fMaxPoints );
0442       assert( fErrorType == kNoError || fErrorType == kValueError );
0443
0444       double e = Error( ipoint );
0445
0446       if (fWrapped)
0447          invError = e;
0448       else
0449          invError = ( e != 0.0 ) ? 1.0/e : 1.0;
0450
0451       return GetPoint( ipoint, value );
0452    }
0453
0454    /**
0455       Retrieve the errors on the point (coordinate and value) for the given fit point
0456       It must be called only when the coordinate errors are stored otherwise it will produce an
0457       assert.
0458    */
0459    // not threadsafe, to be replaced with never constructs!
0460    // for example: just return std::array or std::vector, there's
0461    // is going to be only minor overhead in c++11.
0462    const double* GetPointError(unsigned int ipoint, double & errvalue) const
0463    {
0464       assert( ipoint < fMaxPoints );
0465       assert( fErrorType == kCoordError || fErrorType == kAsymError );
0466
0467       errvalue = Error( ipoint );
0468       return CoordErrors( ipoint );
0469    }
0470
0471    /**
0472       Get errors on the point (coordinate errors and asymmetric value errors) for the
0473       given fit point.
0474       It must be called only when the coordinate errors and asymmetric errors are stored
0475       otherwise it will produce an assert.
0476    */
0477    // not threadsafe, to be replaced with never constructs!
0478    // for example: just return std::array or std::vector, there's
0479    // is going to be only minor overhead in c++11.
0480    const double* GetPointError(unsigned int ipoint, double & errlow, double & errhigh) const
0481    {
0482       assert( ipoint < fMaxPoints );
0483       assert( fErrorType == kAsymError );
0484       assert( !fDataErrorPtr && fDataErrorHighPtr && fDataErrorLowPtr );
0485       assert( fDataError.empty() );
0486       assert( fDataErrorHigh.empty() || &fDataErrorHigh.front() == fDataErrorHighPtr );
0487       assert( fDataErrorLow.empty() || &fDataErrorLow.front() == fDataErrorLowPtr );
0488       assert( fDataErrorLow.empty() == fDataErrorHigh.empty() );
0489
0490       errhigh = fDataErrorHighPtr[ ipoint ];
0491       errlow = fDataErrorLowPtr[ ipoint ];
0492
0493       return CoordErrors( ipoint );
0494    }
0495
0496    /**
0497       returns a single coordinate error component of a point.
0498       This function is threadsafe in contrast to Coords(...)
0499       and can easily get vectorized by the compiler in loops
0500       running over the ipoint-index.
0501    */
0502    double GetBinUpEdgeComponent( unsigned int ipoint, unsigned int icoord ) const
0503    {
0504       assert( icoord < fDim );
0505       assert( !fBinEdge.empty() );
0506       assert( ipoint < fBinEdge.front().size() );
0507
0508       return fBinEdge[icoord][ipoint];
0509    }
0510
0511    /**
0512       return an array containing the upper edge of the bin for coordinate i
0513       In case of empty bin they could be merged in a single larger bin
0514       Return a NULL pointer  if the bin width  is not stored
0515    */
0516    // not threadsafe, to be replaced with never constructs!
0517    // for example: just return std::array or std::vector, there's
0518    // is going to be only minor overhead in c++11.
0519    const double* BinUpEdge( unsigned int ipoint ) const
0520    {
0521       if ( fBinEdge.empty() || ipoint > fBinEdge.front().size() )
0522          return nullptr;
0523
0524       GetBinUpEdgeCoordinates(ipoint, fpTmpBinEdgeVector);
0525
0526       return fpTmpBinEdgeVector;
0527    }
0528
0529    /**
0530     * Thread save version of function retrieving the bin up-edge in case of multidimensions
0531    */
0532    void GetBinUpEdgeCoordinates(unsigned int ipoint, double * x) const
0533    {
0534       if (fBinEdge.empty() || ipoint > fBinEdge.front().size()) return;
0535       assert(!fBinEdge.empty());
0536       assert(ipoint < fMaxPoints);
0537       for (unsigned int i = 0; i < fDim; i++) {
0538          x[i] = fBinEdge[i][ipoint];
0539       }
0540    }
0541
0542    /**
0543       query if the data store the bin edges instead of the center
0544    */
0545    bool HasBinEdges() const {
0546       return fBinEdge.size() == fDim && !fBinEdge[0].empty();
0547    }
0548
0549    /**
0550       retrieve the reference volume used to normalize the data when the option bin volume is set
0551    */
0552    double RefVolume() const { return fRefVolume; }
0553
0554    /**
0555       set the reference volume used to normalize the data when the option bin volume is set
0556    */
0557    void SetRefVolume(double value) { fRefVolume = value; }
0558
0559    /**
0560       retrieve the errortype
0561    */
0562    ErrorType GetErrorType( ) const
0563    {
0564       return fErrorType;
0565    }
0566
0567    /**
0568       compute the total sum of the data content
0569       (sum of weights in case of weighted data set)
0570    */
0571    double SumOfContent() const { return fSumContent; }
0572
0573    /**
0574       compute the total sum of the error square
0575       (sum of weight square in case of a weighted data set)
0576    */
0577    double SumOfError2() const { return fSumError2;}
0578
0579    /**
0580       return true if the data set is weighted
0581       We cannot compute ourselves because sometimes errors are filled with 1
0582       instead of zero (as in ROOT::Fit::FillData )
0583     */
0584    bool IsWeighted() const {
0585       return fIsWeighted;
0586    }
0587
0588 protected:
0589    void InitDataVector ();
0590
0591    void InitializeErrors();
0592
0593    void InitBinEdge();
0594
0595    void UnWrap( );
0596
0597    // compute sum of content and error squares
0598    void ComputeSums();
0599
0600 private:
0601
0602    ErrorType fErrorType;
0603    bool fIsWeighted = false; ///< flag to indicate weighted data
0604    double fRefVolume;        ///< reference bin volume - used to normalize the bins in case of variable bins data
0605    double fSumContent = 0;   ///< total sum of the bin data content
0606    double fSumError2 = 0;    ///< total sum square of the errors
0607
0608    /**
0609     * Stores the data values the same way as the coordinates.
0610     *
0611     */
0612    std::vector< double > fData;
0613    const double* fDataPtr;
0614
0615    std::vector< std::vector< double > > fCoordErrors;
0616    std::vector< const double* > fCoordErrorsPtr;
0617    // This vector contains the coordinate errors
0618    // in the same way as fCoords.
0619
0620    std::vector< double > fDataError;
0621    std::vector< double > fDataErrorHigh;
0622    std::vector< double > fDataErrorLow;
0623    const double*  fDataErrorPtr;
0624    const double*  fDataErrorHighPtr;
0625    const double*  fDataErrorLowPtr;
0626    // This vector contains the data error.
0627    // Either only fDataError or fDataErrorHigh and fDataErrorLow are used.
0628
0629    double* fpTmpCoordErrorVector; ///< not threadsafe stuff!
0630
0631    std::vector< std::vector< double > > fBinEdge;
0632    // vector containing the bin upper edge (coordinate will contain low edge)
0633
0634    double* fpTmpBinEdgeVector; ///< not threadsafe stuff!
0635 };
0636
0637
0638    } // end namespace Fit
0639
0640 } // end namespace ROOT
0641
0642
0643
0644 #endif /* ROOT_Fit_BinData */