Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:10:09

0001 // @(#)root/mathcore:$Id$
0002 // Author: L. Moneta Tue Nov 28 10:52:47 2006
0003 
0004 /**********************************************************************
0005  *                                                                    *
0006  * Copyright (c) 2006  LCG ROOT Math Team, CERN/PH-SFT                *
0007  *                                                                    *
0008  *                                                                    *
0009  **********************************************************************/
0010 
0011 // Header file for class FitUtil
0012 
0013 #ifndef ROOT_Fit_FitUtil
0014 #define ROOT_Fit_FitUtil
0015 
0016 #include "Math/IParamFunctionfwd.h"
0017 #include "Math/IParamFunction.h"
0018 
0019 #ifdef R__USE_IMT
0020 #include "ROOT/TThreadExecutor.hxx"
0021 #endif
0022 #include "ROOT/TSequentialExecutor.hxx"
0023 
0024 #include "Fit/BinData.h"
0025 #include "Fit/UnBinData.h"
0026 #include "ROOT/EExecutionPolicy.hxx"
0027 
0028 #include "Math/Integrator.h"
0029 #include "Math/IntegratorMultiDim.h"
0030 
0031 #include "TError.h"
0032 #include <vector>
0033 
0034 // using parameter cache is not thread safe but needed for normalizing the functions
0035 #define USE_PARAMCACHE
0036 
0037 //#define DEBUG_FITUTIL
0038 
0039 #ifdef R__HAS_VECCORE
0040 namespace vecCore {
0041 template <class T>
0042 vecCore::Mask<T> Int2Mask(unsigned i)
0043 {
0044    T x;
0045    for (unsigned j = 0; j < vecCore::VectorSize<T>(); j++)
0046       vecCore::Set<T>(x, j, j);
0047    return vecCore::Mask<T>(x < T(i));
0048 }
0049 }
0050 #endif
0051 
0052 namespace ROOT {
0053 
0054    namespace Fit {
0055 
0056 /**
0057    namespace defining utility free functions using in Fit for evaluating the various fit method
0058    functions (chi2, likelihood, etc..)  given the data and the model function
0059 
0060    @ingroup FitMain
0061 */
0062 namespace FitUtil {
0063 
0064   typedef  ROOT::Math::IParamMultiFunction IModelFunction;
0065   typedef  ROOT::Math::IParamMultiGradFunction IGradModelFunction;
0066 
0067   template <class T>
0068   using IGradModelFunctionTempl = ROOT::Math::IParamMultiGradFunctionTempl<T>;
0069 
0070   template <class T>
0071   using IModelFunctionTempl = ROOT::Math::IParamMultiFunctionTempl<T>;
0072 
0073   // internal class defining
0074   template <class T>
0075   class LikelihoodAux {
0076   public:
0077      LikelihoodAux(T logv = {}, T w = {}, T w2 = {}) : logvalue(logv), weight(w), weight2(w2) {}
0078 
0079      LikelihoodAux operator+(const LikelihoodAux &l) const
0080      {
0081         return LikelihoodAux<T>(logvalue + l.logvalue, weight + l.weight, weight2 + l.weight2);
0082      }
0083 
0084      LikelihoodAux &operator+=(const LikelihoodAux &l)
0085      {
0086         logvalue += l.logvalue;
0087         weight += l.weight;
0088         weight2 += l.weight2;
0089         return *this;
0090      }
0091 
0092      T logvalue;
0093      T weight;
0094      T weight2;
0095   };
0096 
0097   template <>
0098   class LikelihoodAux<double> {
0099   public:
0100      LikelihoodAux(double logv = 0.0, double w = 0.0, double w2 = 0.0) : logvalue(logv), weight(w), weight2(w2){};
0101 
0102      LikelihoodAux operator+(const LikelihoodAux &l) const
0103      {
0104         return LikelihoodAux<double>(logvalue + l.logvalue, weight + l.weight, weight2 + l.weight2);
0105      }
0106 
0107      LikelihoodAux &operator+=(const LikelihoodAux &l)
0108      {
0109         logvalue += l.logvalue;
0110         weight += l.weight;
0111         weight2 += l.weight2;
0112         return *this;
0113      }
0114 
0115      double logvalue;
0116      double weight;
0117      double weight2;
0118   };
0119 
0120   // internal class to evaluate the function or the integral
0121   // and cached internal integration details
0122   // if useIntegral is false no allocation is done
0123   // and this is a dummy class
0124   // class is templated on any parametric functor implementing operator()(x,p) and NDim()
0125   // contains a constant pointer to the function
0126 
0127   template <class ParamFunc = ROOT::Math::IParamMultiFunctionTempl<double>>
0128   class IntegralEvaluator {
0129 
0130   public:
0131      IntegralEvaluator(const ParamFunc &func, const double *p, bool useIntegral = true,
0132                        ROOT::Math::IntegrationOneDim::Type igType = ROOT::Math::IntegrationOneDim::kDEFAULT)
0133         : fDim(0), fParams(nullptr), fFunc(nullptr), fIg1Dim(nullptr), fIgNDim(nullptr), fFunc1Dim(nullptr), fFuncNDim(nullptr)
0134      {
0135         if (useIntegral) {
0136            SetFunction(func, p, igType);
0137         }
0138      }
0139 
0140      void SetFunction(const ParamFunc &func, const double *p = nullptr,
0141                       ROOT::Math::IntegrationOneDim::Type igType = ROOT::Math::IntegrationOneDim::kDEFAULT)
0142      {
0143         // set the integrand function and create required wrapper
0144         // to perform integral in (x) of a generic  f(x,p)
0145         fParams = p;
0146         fDim = func.NDim();
0147         // copy the function object to be able to modify the parameters
0148         // fFunc = dynamic_cast<ROOT::Math::IParamMultiFunction *>( func.Clone() );
0149         fFunc = &func;
0150         assert(fFunc != nullptr);
0151         // set parameters in function
0152         // fFunc->SetParameters(p);
0153         if (fDim == 1) {
0154            fFunc1Dim =
0155               new ROOT::Math::WrappedMemFunction<IntegralEvaluator, double (IntegralEvaluator::*)(double) const>(
0156                  *this, &IntegralEvaluator::F1);
0157            fIg1Dim = new ROOT::Math::IntegratorOneDim(igType);
0158            // fIg1Dim->SetFunction( static_cast<const ROOT::Math::IMultiGenFunction & >(*fFunc),false);
0159            fIg1Dim->SetFunction(static_cast<const ROOT::Math::IGenFunction &>(*fFunc1Dim));
0160         } else if (fDim > 1) {
0161            fFuncNDim =
0162               new ROOT::Math::WrappedMemMultiFunction<IntegralEvaluator, double (IntegralEvaluator::*)(const double *)
0163                                                                             const>(*this, &IntegralEvaluator::FN, fDim);
0164            fIgNDim = new ROOT::Math::IntegratorMultiDim();
0165            fIgNDim->SetFunction(*fFuncNDim);
0166         } else
0167            assert(fDim > 0);
0168      }
0169 
0170      void SetParameters(const double *p)
0171      {
0172         // copy just the pointer
0173         fParams = p;
0174      }
0175 
0176      ~IntegralEvaluator()
0177      {
0178         if (fIg1Dim)
0179            delete fIg1Dim;
0180         if (fIgNDim)
0181            delete fIgNDim;
0182         if (fFunc1Dim)
0183            delete fFunc1Dim;
0184         if (fFuncNDim)
0185            delete fFuncNDim;
0186         // if (fFunc) delete fFunc;
0187      }
0188 
0189      // evaluation of integrand function (one-dim)
0190      double F1(double x) const
0191      {
0192         double xx = x;
0193         return ExecFunc(fFunc, &xx, fParams);
0194      }
0195      // evaluation of integrand function (multi-dim)
0196      double FN(const double *x) const { return ExecFunc(fFunc, x, fParams); }
0197 
0198      double Integral(const double *x1, const double *x2)
0199      {
0200         // return unnormalized integral
0201         return (fIg1Dim) ? fIg1Dim->Integral(*x1, *x2) : fIgNDim->Integral(x1, x2);
0202      }
0203 
0204      double operator()(const double *x1, const double *x2)
0205      {
0206         // return normalized integral, divided by bin volume (dx1*dx...*dxn)
0207         if (fIg1Dim) {
0208            double dV = *x2 - *x1;
0209            return fIg1Dim->Integral(*x1, *x2) / dV;
0210         } else if (fIgNDim) {
0211            double dV = 1;
0212            for (unsigned int i = 0; i < fDim; ++i)
0213               dV *= (x2[i] - x1[i]);
0214            return fIgNDim->Integral(x1, x2) / dV;
0215            //                   std::cout << " do integral btw x " << x1[0] << "  " << x2[0] << " y " << x1[1] << "  "
0216            //                   << x2[1] << " dV = " << dV << " result = " << result << std::endl; return result;
0217         } else
0218            assert(1.); // should never be here
0219         return 0;
0220      }
0221 
0222   private:
0223      template <class T>
0224      inline double ExecFunc(T *f, const double *x, const double *p) const
0225      {
0226         return (*f)(x, p);
0227      }
0228 
0229 #ifdef R__HAS_VECCORE
0230      inline double ExecFunc(const IModelFunctionTempl<ROOT::Double_v> *f, const double *x, const double *p) const
0231      {
0232         // Figure out the size of the SIMD vectors.
0233         constexpr static int vecSize = sizeof(ROOT::Double_v) / sizeof(double);
0234         double xBuffer[vecSize];
0235         ROOT::Double_v xx[fDim];
0236         for (unsigned int i = 0; i < fDim; ++i) {
0237            // The Load() function reads multiple values from the pointed-to
0238            // memory into xx. This is why we have to copy the input values from
0239            // the x array into a zero-padded buffer to read from. Otherwise,
0240            // Load() would access the x array out of bounds.
0241            *xBuffer = x[i];
0242            for(int j = 1; j < vecSize; ++j) {
0243               xBuffer[j] = 0.0;
0244            }
0245            vecCore::Load<ROOT::Double_v>(xx[i], xBuffer);
0246         }
0247         auto res = (*f)(xx, p);
0248         return vecCore::Get<ROOT::Double_v>(res, 0);
0249      }
0250 #endif
0251 
0252      // objects of this class are not meant to be copied / assigned
0253      IntegralEvaluator(const IntegralEvaluator &rhs) = delete;
0254      IntegralEvaluator &operator=(const IntegralEvaluator &rhs) = delete;
0255 
0256      unsigned int fDim;
0257      const double *fParams;
0258      // ROOT::Math::IParamMultiFunction * fFunc;  // copy of function in order to be able to change parameters
0259      // const ParamFunc * fFunc;       //  reference to a generic parametric function
0260      const ParamFunc *fFunc;
0261      ROOT::Math::IntegratorOneDim *fIg1Dim;
0262      ROOT::Math::IntegratorMultiDim *fIgNDim;
0263      ROOT::Math::IGenFunction *fFunc1Dim;
0264      ROOT::Math::IMultiGenFunction *fFuncNDim;
0265   };
0266 
0267   /** Chi2 Functions */
0268 
0269   /**
0270       evaluate the Chi2 given a model function and the data at the point x.
0271       return also nPoints as the effective number of used points in the Chi2 evaluation
0272   */
0273   double EvaluateChi2(const IModelFunction &func, const BinData &data, const double *p, unsigned int &nPoints,
0274                       ::ROOT::EExecutionPolicy executionPolicy, unsigned nChunks = 0);
0275 
0276   /**
0277       evaluate the effective Chi2 given a model function and the data at the point x.
0278       The effective chi2 uses the errors on the coordinates : W = 1/(sigma_y**2 + ( sigma_x_i * df/dx_i )**2 )
0279       return also nPoints as the effective number of used points in the Chi2 evaluation
0280   */
0281   double EvaluateChi2Effective(const IModelFunction &func, const BinData &data, const double *x, unsigned int &nPoints);
0282 
0283   /**
0284       evaluate the Chi2 gradient given a model function and the data at the point p.
0285       return also nPoints as the effective number of used points in the Chi2 evaluation
0286   */
0287   void EvaluateChi2Gradient(const IModelFunction &func, const BinData &data, const double *p, double *grad,
0288                             unsigned int &nPoints,
0289                             ::ROOT::EExecutionPolicy executionPolicy = ::ROOT::EExecutionPolicy::kSequential,
0290                             unsigned nChunks = 0);
0291 
0292   /**
0293       evaluate the LogL given a model function and the data at the point x.
0294       return also nPoints as the effective number of used points in the LogL evaluation
0295   */
0296   double EvaluateLogL(const IModelFunction &func, const UnBinData &data, const double *p, int iWeight, bool extended,
0297                       unsigned int &nPoints, ::ROOT::EExecutionPolicy executionPolicy, unsigned nChunks = 0);
0298 
0299   /**
0300       evaluate the LogL gradient given a model function and the data at the point p.
0301       return also nPoints as the effective number of used points in the LogL evaluation
0302   */
0303   void EvaluateLogLGradient(const IModelFunction &func, const UnBinData &data, const double *p, double *grad,
0304                             unsigned int &nPoints,
0305                             ::ROOT::EExecutionPolicy executionPolicy = ::ROOT::EExecutionPolicy::kSequential,
0306                             unsigned nChunks = 0);
0307 
0308   // #ifdef R__HAS_VECCORE
0309   //    template <class NotCompileIfScalarBackend = std::enable_if<!(std::is_same<double, ROOT::Double_v>::value)>>
0310   //    void EvaluateLogLGradient(const IModelFunctionTempl<ROOT::Double_v> &, const UnBinData &, const double *, double
0311   //    *, unsigned int & ) {}
0312   // #endif
0313 
0314   /**
0315       evaluate the Poisson LogL given a model function and the data at the point p.
0316       return also nPoints as the effective number of used points in the LogL evaluation
0317       By default is extended, pass extend to false if want to be not extended (MultiNomial)
0318   */
0319   double EvaluatePoissonLogL(const IModelFunction &func, const BinData &data, const double *p, int iWeight,
0320                              bool extended, unsigned int &nPoints, ::ROOT::EExecutionPolicy executionPolicy,
0321                              unsigned nChunks = 0);
0322 
0323   /**
0324       evaluate the Poisson LogL given a model function and the data at the point p.
0325       return also nPoints as the effective number of used points in the LogL evaluation
0326   */
0327   void EvaluatePoissonLogLGradient(const IModelFunction &func, const BinData &data, const double *p, double *grad,
0328                                    unsigned int &nPoints,
0329                                    ::ROOT::EExecutionPolicy executionPolicy = ::ROOT::EExecutionPolicy::kSequential,
0330                                    unsigned nChunks = 0);
0331 
0332   // methods required by dedicate minimizer like Fumili
0333 
0334   /**
0335       evaluate the residual contribution to the Chi2 given a model function and the BinPoint data
0336       and if the pointer g is not null evaluate also the gradient of the residual.
0337       If the function provides parameter derivatives they are used otherwise a simple derivative calculation
0338       is used
0339   */
0340   double EvaluateChi2Residual(const IModelFunction &func, const BinData &data, const double *p, unsigned int ipoint,
0341                               double *g = nullptr, double * h = nullptr, bool hasGrad = false, bool fullHessian = false);
0342 
0343   /**
0344       evaluate the pdf contribution to the LogL given a model function and the BinPoint data.
0345       If the pointer g is not null evaluate also the gradient of the pdf.
0346       If the function provides parameter derivatives they are used otherwise a simple derivative calculation
0347       is used
0348   */
0349   double
0350   EvaluatePdf(const IModelFunction &func, const UnBinData &data, const double *p, unsigned int ipoint, double *g = nullptr, double * h = nullptr, bool hasGrad = false, bool fullHessian = false);
0351 
0352 
0353    /**
0354        evaluate the pdf contribution to the Poisson LogL given a model function and the BinPoint data.
0355        If the pointer g is not null evaluate also the gradient of the Poisson pdf.
0356        If the function provides parameter derivatives they are used otherwise a simple derivative calculation
0357        is used
0358    */
0359    double EvaluatePoissonBinPdf(const IModelFunction & func, const BinData & data, const double * x, unsigned int ipoint, double * g = nullptr, double * h = nullptr, bool hasGrad = false, bool fullHessian = false);
0360 
0361    unsigned setAutomaticChunking(unsigned nEvents);
0362 
0363    template<class T>
0364    struct Evaluate {
0365 #ifdef R__HAS_VECCORE
0366       static double EvalChi2(const IModelFunctionTempl<T> &func, const BinData &data, const double *p,
0367                              unsigned int &nPoints, ::ROOT::EExecutionPolicy executionPolicy, unsigned nChunks = 0)
0368       {
0369          // evaluate the chi2 given a  vectorized function reference  , the data and returns the value and also in nPoints
0370          // the actual number of used points
0371          // normal chi2 using only error on values (from fitting histogram)
0372          // optionally the integral of function in the bin is used
0373 
0374          //Info("EvalChi2","Using vectorized implementation %d",(int) data.Opt().fIntegral);
0375 
0376          unsigned int n = data.Size();
0377          nPoints = data.Size();  // npoints
0378 
0379          // set parameters of the function to cache integral value
0380 #ifdef USE_PARAMCACHE
0381          (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
0382 #endif
0383          // do not cache parameter values (it is not thread safe)
0384          //func.SetParameters(p);
0385 
0386 
0387          // get fit option and check case if using integral of bins
0388          const DataOptions &fitOpt = data.Opt();
0389          if (fitOpt.fBinVolume || fitOpt.fIntegral || fitOpt.fExpErrors)
0390             Error("FitUtil::EvaluateChi2", "The vectorized implementation doesn't support Integrals, BinVolume or ExpErrors\n. Aborting operation.");
0391 
0392          (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
0393 
0394          double maxResValue = std::numeric_limits<double>::max() / n;
0395          std::vector<double> ones{1., 1., 1., 1.};
0396          auto vecSize = vecCore::VectorSize<T>();
0397 
0398          auto mapFunction = [&](unsigned int i) {
0399             // in case of no error in y invError=1 is returned
0400             T x1, y, invErrorVec;
0401             vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
0402             vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
0403             const auto invError = data.ErrorPtr(i * vecSize);
0404             auto invErrorptr = (invError != nullptr) ? invError : &ones.front();
0405             vecCore::Load<T>(invErrorVec, invErrorptr);
0406 
0407             const T *x;
0408             std::vector<T> xc;
0409             if(data.NDim() > 1) {
0410                xc.resize(data.NDim());
0411                xc[0] = x1;
0412                for (unsigned int j = 1; j < data.NDim(); ++j)
0413                   vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
0414                x = xc.data();
0415             } else {
0416                x = &x1;
0417             }
0418 
0419             T fval{};
0420 
0421 #ifdef USE_PARAMCACHE
0422             fval = func(x);
0423 #else
0424             fval = func(x, p);
0425 #endif
0426 
0427             T tmp = (y - fval) * invErrorVec;
0428             T chi2 = tmp * tmp;
0429 
0430 
0431             // avoid infinity or nan in chi2 values due to wrong function values
0432             auto m = vecCore::Mask_v<T>(chi2 > maxResValue);
0433 
0434             vecCore::MaskedAssign<T>(chi2, m, maxResValue);
0435 
0436             return chi2;
0437          };
0438 
0439          auto redFunction = [](const std::vector<T> &objs) {
0440             return std::accumulate(objs.begin(), objs.end(), T{});
0441          };
0442 
0443 #ifndef R__USE_IMT
0444          (void)nChunks;
0445 
0446          // If IMT is disabled, force the execution policy to the serial case
0447          if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
0448             Warning("FitUtil::EvaluateChi2", "Multithread execution policy requires IMT, which is disabled. Changing "
0449                                              "to ::ROOT::EExecutionPolicy::kSequential.");
0450             executionPolicy = ::ROOT::EExecutionPolicy::kSequential;
0451          }
0452 #endif
0453 
0454          T res{};
0455          if (executionPolicy == ::ROOT::EExecutionPolicy::kSequential) {
0456             ROOT::TSequentialExecutor pool;
0457             res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size()/vecSize), redFunction);
0458 #ifdef R__USE_IMT
0459          } else if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
0460             ROOT::TThreadExecutor pool;
0461             auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(data.Size() / vecSize);
0462             res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction, chunks);
0463 #endif
0464          } else {
0465             Error("FitUtil::EvaluateChi2", "Execution policy unknown. Available choices:\n ::ROOT::EExecutionPolicy::kSequential (default)\n ::ROOT::EExecutionPolicy::kMultiThread (requires IMT)\n");
0466          }
0467 
0468          // Last SIMD vector of elements (if padding needed)
0469          if (data.Size() % vecSize != 0)
0470             vecCore::MaskedAssign(res, vecCore::Int2Mask<T>(data.Size() % vecSize),
0471                                   res + mapFunction(data.Size() / vecSize));
0472 
0473          return vecCore::ReduceAdd(res);
0474       }
0475 
0476       static double EvalLogL(const IModelFunctionTempl<T> &func, const UnBinData &data, const double *const p,
0477                              int iWeight, bool extended, unsigned int &nPoints,
0478                              ::ROOT::EExecutionPolicy executionPolicy, unsigned nChunks = 0)
0479       {
0480          // evaluate the LogLikelihood
0481          unsigned int n = data.Size();
0482          nPoints = data.Size();  // npoints
0483 
0484          //unsigned int nRejected = 0;
0485          bool normalizeFunc = false;
0486 
0487          // set parameters of the function to cache integral value
0488 #ifdef USE_PARAMCACHE
0489          (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
0490 #endif
0491 
0492 #ifdef R__USE_IMT
0493          // in case parameter needs to be propagated to user function use trick to set parameters by calling one time the function
0494          // this will be done in sequential mode and parameters can be set in a thread safe manner
0495          if (!normalizeFunc) {
0496             if (data.NDim() == 1) {
0497                T x;
0498                vecCore::Load<T>(x, data.GetCoordComponent(0, 0));
0499                func( &x, p);
0500             }
0501             else {
0502                std::vector<T> x(data.NDim());
0503                for (unsigned int j = 0; j < data.NDim(); ++j)
0504                   vecCore::Load<T>(x[j], data.GetCoordComponent(0, j));
0505                func( x.data(), p);
0506             }
0507          }
0508 #endif
0509 
0510          // this is needed if function must be normalized
0511          double norm = 1.0;
0512          if (normalizeFunc) {
0513             // compute integral of the function
0514             std::vector<double> xmin(data.NDim());
0515             std::vector<double> xmax(data.NDim());
0516             IntegralEvaluator<IModelFunctionTempl<T>> igEval(func, p, true);
0517             // compute integral in the ranges where is defined
0518             if (data.Range().Size() > 0) {
0519                norm = 0;
0520                for (unsigned int ir = 0; ir < data.Range().Size(); ++ir) {
0521                   data.Range().GetRange(&xmin[0], &xmax[0], ir);
0522                   norm += igEval.Integral(xmin.data(), xmax.data());
0523                }
0524             } else {
0525                // use (-inf +inf)
0526                data.Range().GetRange(&xmin[0], &xmax[0]);
0527                // check if function is zero at +- inf
0528                T xmin_v, xmax_v;
0529                vecCore::Load<T>(xmin_v, xmin.data());
0530                vecCore::Load<T>(xmax_v, xmax.data());
0531                if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) {
0532                   MATH_ERROR_MSG("FitUtil::EvaluateLogLikelihood", "A range has not been set and the function is not zero at +/- inf");
0533                   return 0;
0534                }
0535                norm = igEval.Integral(&xmin[0], &xmax[0]);
0536             }
0537          }
0538 
0539          // needed to compute effective global weight in case of extended likelihood
0540 
0541          auto vecSize = vecCore::VectorSize<T>();
0542          unsigned int numVectors = n / vecSize;
0543 
0544          auto mapFunction = [ &, p](const unsigned i) {
0545             T W{};
0546             T W2{};
0547             T fval{};
0548 
0549             (void)p; /* avoid unused lambda capture warning if PARAMCACHE is disabled */
0550 
0551             T x1;
0552             vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
0553             const T *x = nullptr;
0554             unsigned int ndim = data.NDim();
0555             std::vector<T> xc;
0556             if (ndim > 1) {
0557                xc.resize(ndim);
0558                xc[0] = x1;
0559                for (unsigned int j = 1; j < ndim; ++j)
0560                   vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
0561                x = xc.data();
0562             } else {
0563                x = &x1;
0564             }
0565 
0566 #ifdef USE_PARAMCACHE
0567             fval = func(x);
0568 #else
0569             fval = func(x, p);
0570 #endif
0571 
0572 #ifdef DEBUG_FITUTIL
0573             if (i < 5 || (i > numVectors-5) ) {
0574                if (ndim == 1) std::cout << i << "  x " << x[0]  << " fval = " << fval;
0575                else std::cout << i << "  x " << x[0] << " y " << x[1] << " fval = " << fval;
0576             }
0577 #endif
0578 
0579             if (normalizeFunc) fval = fval * (1 / norm);
0580 
0581             // function EvalLog protects against negative or too small values of fval
0582             auto logval =  ROOT::Math::Util::EvalLog(fval);
0583             if (iWeight > 0) {
0584                T weight{};
0585                if (data.WeightsPtr(i) == nullptr)
0586                   weight = 1;
0587                else
0588                   vecCore::Load<T>(weight, data.WeightsPtr(i*vecSize));
0589                logval *= weight;
0590                if (iWeight == 2) {
0591                   logval *= weight; // use square of weights in likelihood
0592                   if (!extended) {
0593                      // needed sum of weights and sum of weight square if likelkihood is extended
0594                      W  = weight;
0595                      W2 = weight * weight;
0596                   }
0597                }
0598             }
0599 #ifdef DEBUG_FITUTIL
0600             if (i < 5 || (i > numVectors-5))  {
0601                std::cout << "   " << fval << "  logfval " << logval << std::endl;
0602             }
0603 #endif
0604 
0605             return LikelihoodAux<T>(logval, W, W2);
0606          };
0607 
0608          auto redFunction = [](const std::vector<LikelihoodAux<T>> &objs) {
0609             return std::accumulate(objs.begin(), objs.end(), LikelihoodAux<T>(),
0610             [](const LikelihoodAux<T> &l1, const LikelihoodAux<T> &l2) {
0611                return l1 + l2;
0612             });
0613          };
0614 
0615 #ifndef R__USE_IMT
0616          (void)nChunks;
0617 
0618          // If IMT is disabled, force the execution policy to the serial case
0619          if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
0620             Warning("FitUtil::EvaluateLogL", "Multithread execution policy requires IMT, which is disabled. Changing "
0621                                              "to ::ROOT::EExecutionPolicy::kSequential.");
0622             executionPolicy = ::ROOT::EExecutionPolicy::kSequential;
0623          }
0624 #endif
0625 
0626          T logl_v{};
0627          T sumW_v{};
0628          T sumW2_v{};
0629          ROOT::Fit::FitUtil::LikelihoodAux<T> resArray;
0630          if (executionPolicy == ::ROOT::EExecutionPolicy::kSequential) {
0631             ROOT::TSequentialExecutor pool;
0632             resArray = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction);
0633 #ifdef R__USE_IMT
0634          } else if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
0635             ROOT::TThreadExecutor pool;
0636             auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking( numVectors);
0637             resArray = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction, chunks);
0638 #endif
0639          } else {
0640             Error("FitUtil::EvaluateLogL", "Execution policy unknown. Available choices:\n ::ROOT::EExecutionPolicy::kSequential (default)\n ::ROOT::EExecutionPolicy::kMultiThread (requires IMT)\n");
0641          }
0642 
0643          logl_v = resArray.logvalue;
0644          sumW_v = resArray.weight;
0645          sumW2_v = resArray.weight2;
0646 
0647          // Compute the contribution from the remaining points ( Last padded SIMD vector of elements )
0648          unsigned int remainingPoints = n % vecSize;
0649          if (remainingPoints > 0) {
0650             auto remainingPointsContribution = mapFunction(numVectors);
0651             // Add the contribution from the valid remaining points and store the result in the output variable
0652             auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
0653             vecCore::MaskedAssign(logl_v, remainingMask, logl_v + remainingPointsContribution.logvalue);
0654             vecCore::MaskedAssign(sumW_v, remainingMask, sumW_v + remainingPointsContribution.weight);
0655             vecCore::MaskedAssign(sumW2_v, remainingMask, sumW2_v + remainingPointsContribution.weight2);
0656          }
0657 
0658 
0659          //reduce vector type to double.
0660          double logl  = vecCore::ReduceAdd(logl_v);
0661          double sumW  = vecCore::ReduceAdd(sumW_v);
0662          double sumW2 = vecCore::ReduceAdd(sumW2_v);
0663 
0664          if (extended) {
0665             // add Poisson extended term
0666             double extendedTerm = 0; // extended term in likelihood
0667             double nuTot = 0;
0668             // nuTot is integral of function in the range
0669             // if function has been normalized integral has been already computed
0670             if (!normalizeFunc) {
0671                IntegralEvaluator<IModelFunctionTempl<T>> igEval(func, p, true);
0672                std::vector<double> xmin(data.NDim());
0673                std::vector<double> xmax(data.NDim());
0674 
0675                // compute integral in the ranges where is defined
0676                if (data.Range().Size() > 0) {
0677                   nuTot = 0;
0678                   for (unsigned int ir = 0; ir < data.Range().Size(); ++ir) {
0679                      data.Range().GetRange(&xmin[0], &xmax[0], ir);
0680                      nuTot += igEval.Integral(xmin.data(), xmax.data());
0681                   }
0682                } else {
0683                   // use (-inf +inf)
0684                   data.Range().GetRange(&xmin[0], &xmax[0]);
0685                   // check if function is zero at +- inf
0686                   T xmin_v, xmax_v;
0687                   vecCore::Load<T>(xmin_v, xmin.data());
0688                   vecCore::Load<T>(xmax_v, xmax.data());
0689                   if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) {
0690                      MATH_ERROR_MSG("FitUtil::EvaluateLogLikelihood", "A range has not been set and the function is not zero at +/- inf");
0691                      return 0;
0692                   }
0693                   nuTot = igEval.Integral(&xmin[0], &xmax[0]);
0694                }
0695 
0696                // force to be last parameter value
0697                //nutot = p[func.NDim()-1];
0698                if (iWeight != 2)
0699                   extendedTerm = - nuTot;  // no need to add in this case n log(nu) since is already computed before
0700                else {
0701                   // case use weight square in likelihood : compute total effective weight = sw2/sw
0702                   // ignore for the moment case when sumW is zero
0703                   extendedTerm = - (sumW2 / sumW) * nuTot;
0704                }
0705 
0706             } else {
0707                nuTot = norm;
0708                extendedTerm = - nuTot + double(n) *  ROOT::Math::Util::EvalLog(nuTot);
0709                // in case of weights need to use here sum of weights (to be done)
0710             }
0711 
0712             logl += extendedTerm;
0713          }
0714 
0715 #ifdef DEBUG_FITUTIL
0716          std::cout << "Evaluated log L for parameters (";
0717          for (unsigned int ip = 0; ip < func.NPar(); ++ip)
0718             std::cout << " " << p[ip];
0719          std::cout << ")  nll = " << -logl << std::endl;
0720 #endif
0721 
0722          return -logl;
0723 
0724       }
0725 
0726       static double EvalPoissonLogL(const IModelFunctionTempl<T> &func, const BinData &data, const double *p,
0727                                     int iWeight, bool extended, unsigned int,
0728                                     ::ROOT::EExecutionPolicy executionPolicy, unsigned nChunks = 0)
0729       {
0730          // evaluate the Poisson Log Likelihood
0731          // for binned likelihood fits
0732          // this is Sum ( f(x_i)  -  y_i * log( f (x_i) ) )
0733          // add as well constant term for saturated model to make it like a Chi2/2
0734          // by default is extended. If extended is false the fit is not extended and
0735          // the global poisson term is removed (i.e is a binomial fit)
0736          // (remember that in this case one needs to have a function with a fixed normalization
0737          // like in a non extended binned fit)
0738          //
0739          // if use Weight use a weighted dataset
0740          // iWeight = 1 ==> logL = Sum( w f(x_i) )
0741          // case of iWeight==1 is actually identical to weight==0
0742          // iWeight = 2 ==> logL = Sum( w*w * f(x_i) )
0743          //
0744 
0745 #ifdef USE_PARAMCACHE
0746          (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
0747 #endif
0748          auto vecSize = vecCore::VectorSize<T>();
0749          // get fit option and check case of using integral of bins
0750          const DataOptions &fitOpt = data.Opt();
0751          if (fitOpt.fExpErrors || fitOpt.fIntegral)
0752             Error("FitUtil::EvaluateChi2",
0753                   "The vectorized implementation doesn't support Integrals or BinVolume\n. Aborting operation.");
0754          bool useW2 = (iWeight == 2);
0755 
0756          auto mapFunction = [&](unsigned int i) {
0757             T y;
0758             vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
0759             T fval{};
0760 
0761             if (data.NDim() > 1) {
0762                std::vector<T> x(data.NDim());
0763                for (unsigned int j = 0; j < data.NDim(); ++j)
0764                   vecCore::Load<T>(x[j], data.GetCoordComponent(i * vecSize, j));
0765 #ifdef USE_PARAMCACHE
0766                fval = func(x.data());
0767 #else
0768                fval = func(x.data(), p);
0769 #endif
0770                // one -dim case
0771             } else {
0772                T x;
0773                vecCore::Load<T>(x, data.GetCoordComponent(i * vecSize, 0));
0774 #ifdef USE_PARAMCACHE
0775                fval = func(&x);
0776 #else
0777                fval = func(&x, p);
0778 #endif
0779             }
0780 
0781             // EvalLog protects against 0 values of fval but don't want to add in the -log sum
0782             // negative values of fval
0783             vecCore::MaskedAssign<T>(fval, fval < 0.0, 0.0);
0784 
0785             T nloglike{}; // negative loglikelihood
0786 
0787             if (useW2) {
0788                // apply weight correction . Effective weight is error^2/ y
0789                // and expected events in bins is fval/weight
0790                // can apply correction only when y is not zero otherwise weight is undefined
0791                // (in case of weighted likelihood I don't care about the constant term due to
0792                // the saturated model)
0793                assert (data.GetErrorType() != ROOT::Fit::BinData::ErrorType::kNoError);
0794                T error = 0.0;
0795                vecCore::Load<T>(error, data.ErrorPtr(i * vecSize));
0796                // for empty bin use the average weight  computed from the total data weight
0797                auto m = vecCore::Mask_v<T>(y != 0.0);
0798                auto weight = vecCore::Blend(m,(error * error) / y, T(data.SumOfError2()/ data.SumOfContent()) );
0799                if (extended) {
0800                   nloglike =  weight * ( fval - y);
0801                }
0802                vecCore::MaskedAssign<T>(nloglike, y != 0, nloglike + weight * y *( ROOT::Math::Util::EvalLog(y) -  ROOT::Math::Util::EvalLog(fval)) );
0803 
0804             } else {
0805                // standard case no weights or iWeight=1
0806                // this is needed for Poisson likelihood (which are extended and not for multinomial)
0807                // the formula below  include constant term due to likelihood of saturated model (f(x) = y)
0808                // (same formula as in Baker-Cousins paper, page 439 except a factor of 2
0809                if (extended) nloglike = fval - y;
0810 
0811                vecCore::MaskedAssign<T>(
0812                   nloglike, y > 0, nloglike + y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval)));
0813             }
0814 
0815             return nloglike;
0816          };
0817 
0818 #ifdef R__USE_IMT
0819          auto redFunction = [](const std::vector<T> &objs) { return std::accumulate(objs.begin(), objs.end(), T{}); };
0820 #else
0821          (void)nChunks;
0822 
0823          // If IMT is disabled, force the execution policy to the serial case
0824          if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
0825             Warning("FitUtil::Evaluate<T>::EvalPoissonLogL",
0826                     "Multithread execution policy requires IMT, which is disabled. Changing "
0827                     "to ::ROOT::EExecutionPolicy::kSequential.");
0828             executionPolicy = ::ROOT::EExecutionPolicy::kSequential;
0829          }
0830 #endif
0831 
0832          T res{};
0833          if (executionPolicy == ::ROOT::EExecutionPolicy::kSequential) {
0834             for (unsigned int i = 0; i < (data.Size() / vecSize); i++) {
0835                res += mapFunction(i);
0836             }
0837 #ifdef R__USE_IMT
0838          } else if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
0839             ROOT::TThreadExecutor pool;
0840             auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(data.Size() / vecSize);
0841             res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction, chunks);
0842 #endif
0843          } else {
0844             Error(
0845                "FitUtil::Evaluate<T>::EvalPoissonLogL",
0846                "Execution policy unknown. Available choices:\n ::ROOT::EExecutionPolicy::kSequential (default)\n ::ROOT::EExecutionPolicy::kMultiThread (requires IMT)\n");
0847          }
0848 
0849          // Last padded SIMD vector of elements
0850          if (data.Size() % vecSize != 0)
0851             vecCore::MaskedAssign(res, vecCore::Int2Mask<T>(data.Size() % vecSize),
0852                                   res + mapFunction(data.Size() / vecSize));
0853 
0854          return vecCore::ReduceAdd(res);
0855       }
0856 
0857       static double EvalChi2Effective(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int &)
0858       {
0859          Error("FitUtil::Evaluate<T>::EvalChi2Effective", "The vectorized evaluation of the Chi2 with coordinate errors is still not supported");
0860          return -1.;
0861       }
0862 
0863       // Compute a mask to filter out infinite numbers and NaN values.
0864       // The argument rval is updated so infinite numbers and NaN values are replaced by
0865       // maximum finite values (preserving the original sign).
0866       static vecCore::Mask<T> CheckInfNaNValues(T &rval)
0867       {
0868          auto mask = rval > -vecCore::NumericLimits<T>::Max() && rval < vecCore::NumericLimits<T>::Max();
0869 
0870          // Case +inf or nan
0871          vecCore::MaskedAssign(rval, !mask, +vecCore::NumericLimits<T>::Max());
0872 
0873          // Case -inf
0874          vecCore::MaskedAssign(rval, !mask && rval < 0, -vecCore::NumericLimits<T>::Max());
0875 
0876          return mask;
0877       }
0878 
0879       static void EvalChi2Gradient(const IModelFunctionTempl<T> &f, const BinData &data, const double *p, double *grad,
0880                                    unsigned int &nPoints,
0881                                    ::ROOT::EExecutionPolicy executionPolicy = ::ROOT::EExecutionPolicy::kSequential,
0882                                    unsigned nChunks = 0)
0883       {
0884          // evaluate the gradient of the chi2 function
0885          // this function is used when the model function knows how to calculate the derivative and we can
0886          // avoid that the minimizer re-computes them
0887          //
0888          // case of chi2 effective (errors on coordinate) is not supported
0889 
0890          if (data.HaveCoordErrors()) {
0891             MATH_ERROR_MSG("FitUtil::EvaluateChi2Gradient",
0892                            "Error on the coordinates are not used in calculating Chi2 gradient");
0893             return; // it will assert otherwise later in GetPoint
0894          }
0895 
0896          const IGradModelFunctionTempl<T> *fg = dynamic_cast<const IGradModelFunctionTempl<T> *>(&f);
0897          assert(fg != nullptr); // must be called by a gradient function
0898 
0899          const IGradModelFunctionTempl<T> &func = *fg;
0900 
0901          const DataOptions &fitOpt = data.Opt();
0902          if (fitOpt.fBinVolume || fitOpt.fIntegral || fitOpt.fExpErrors)
0903             Error("FitUtil::EvaluateChi2Gradient", "The vectorized implementation doesn't support Integrals,"
0904                                                    "BinVolume or ExpErrors\n. Aborting operation.");
0905 
0906          unsigned int npar = func.NPar();
0907          auto vecSize = vecCore::VectorSize<T>();
0908          unsigned initialNPoints = data.Size();
0909          unsigned numVectors = initialNPoints / vecSize;
0910 
0911          // numVectors + 1 because of the padded data (call to mapFunction with i = numVectors after the main loop)
0912          std::vector<vecCore::Mask<T>> validPointsMasks(numVectors + 1);
0913 
0914          auto mapFunction = [&](const unsigned int i) {
0915             // set all vector values to zero
0916             std::vector<T> gradFunc(npar);
0917             std::vector<T> pointContributionVec(npar);
0918 
0919             T x1, y, invError;
0920 
0921             vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
0922             vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
0923             const auto invErrorPtr = data.ErrorPtr(i * vecSize);
0924 
0925             if (invErrorPtr == nullptr)
0926                invError = 1;
0927             else
0928                vecCore::Load<T>(invError, invErrorPtr);
0929 
0930             // TODO: Check error options and invert if needed
0931 
0932             T fval = 0;
0933 
0934             const T *x = nullptr;
0935 
0936             unsigned int ndim = data.NDim();
0937             // need to declare vector outside if statement
0938             // otherwise pointer will be invalid
0939             std::vector<T> xc;
0940             if (ndim > 1) {
0941                xc.resize(ndim);
0942                xc[0] = x1;
0943                for (unsigned int j = 1; j < ndim; ++j)
0944                   vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
0945                x = xc.data();
0946             } else {
0947                x = &x1;
0948             }
0949 
0950             fval = func(x, p);
0951             func.ParameterGradient(x, p, &gradFunc[0]);
0952 
0953             validPointsMasks[i] = CheckInfNaNValues(fval);
0954             if (vecCore::MaskEmpty(validPointsMasks[i])) {
0955                // Return a zero contribution to all partial derivatives on behalf of the current points
0956                return pointContributionVec;
0957             }
0958 
0959             // loop on the parameters
0960             for (unsigned int ipar = 0; ipar < npar; ++ipar) {
0961                // avoid singularity in the function (infinity and nan ) in the chi2 sum
0962                // eventually add possibility of excluding some points (like singularity)
0963                validPointsMasks[i] = CheckInfNaNValues(gradFunc[ipar]);
0964 
0965                if (vecCore::MaskEmpty(validPointsMasks[i])) {
0966                   break; // exit loop on parameters
0967                }
0968 
0969                // calculate derivative point contribution (only for valid points)
0970                vecCore::MaskedAssign(pointContributionVec[ipar], validPointsMasks[i],
0971                                      -2.0 * (y - fval) * invError * invError * gradFunc[ipar]);
0972             }
0973 
0974             return pointContributionVec;
0975          };
0976 
0977          // Reduce the set of vectors by summing its equally-indexed components
0978          auto redFunction = [&](const std::vector<std::vector<T>> &partialResults) {
0979             std::vector<T> result(npar);
0980 
0981             for (auto const &pointContributionVec : partialResults) {
0982                for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
0983                   result[parameterIndex] += pointContributionVec[parameterIndex];
0984             }
0985 
0986             return result;
0987          };
0988 
0989          std::vector<T> gVec(npar);
0990          std::vector<double> g(npar);
0991 
0992 #ifndef R__USE_IMT
0993          // to fix compiler warning
0994          (void)nChunks;
0995 
0996          // If IMT is disabled, force the execution policy to the serial case
0997          if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
0998             Warning("FitUtil::EvaluateChi2Gradient",
0999                     "Multithread execution policy requires IMT, which is disabled. Changing "
1000                     "to ::ROOT::EExecutionPolicy::kSequential.");
1001             executionPolicy = ::ROOT::EExecutionPolicy::kSequential;
1002          }
1003 #endif
1004 
1005          if (executionPolicy == ::ROOT::EExecutionPolicy::kSequential) {
1006             ROOT::TSequentialExecutor pool;
1007             gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction);
1008          }
1009 #ifdef R__USE_IMT
1010          else if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
1011             ROOT::TThreadExecutor pool;
1012             auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(numVectors);
1013             gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction, chunks);
1014          }
1015 #endif
1016          else {
1017             Error(
1018                "FitUtil::EvaluateChi2Gradient",
1019                "Execution policy unknown. Available choices:\n 0: Serial (default)\n 1: MultiThread (requires IMT)\n");
1020          }
1021 
1022          // Compute the contribution from the remaining points
1023          unsigned int remainingPoints = initialNPoints % vecSize;
1024          if (remainingPoints > 0) {
1025             auto remainingPointsContribution = mapFunction(numVectors);
1026             // Add the contribution from the valid remaining points and store the result in the output variable
1027             auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
1028             for (unsigned int param = 0; param < npar; param++) {
1029                vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
1030             }
1031          }
1032          // reduce final gradient result from T to double
1033          for (unsigned int param = 0; param < npar; param++) {
1034             grad[param] = vecCore::ReduceAdd(gVec[param]);
1035          }
1036 
1037          // correct the number of points
1038          nPoints = initialNPoints;
1039 
1040          if (std::any_of(validPointsMasks.begin(), validPointsMasks.end(),
1041                          [](vecCore::Mask<T> validPoints) { return !vecCore::MaskFull(validPoints); })) {
1042             unsigned nRejected = 0;
1043 
1044             for (const auto &mask : validPointsMasks) {
1045                for (unsigned int i = 0; i < vecSize; i++) {
1046                   nRejected += !vecCore::Get(mask, i);
1047                }
1048             }
1049 
1050             assert(nRejected <= initialNPoints);
1051             nPoints = initialNPoints - nRejected;
1052 
1053             if (nPoints < npar) {
1054                MATH_ERROR_MSG("FitUtil::EvaluateChi2Gradient",
1055                               "Too many points rejected for overflow in gradient calculation");
1056            }
1057          }
1058       }
1059 
1060       static double EvalChi2Residual(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int, double *, double *, bool, bool)
1061       {
1062          Error("FitUtil::Evaluate<T>::EvalChi2Residual", "The vectorized evaluation of the Chi2 with the ith residual is still not supported");
1063          return -1.;
1064       }
1065 
1066       /// evaluate the pdf (Poisson) contribution to the logl (return actually log of pdf)
1067       /// and its gradient
1068       static double EvalPoissonBinPdf(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int , double * , double * , bool, bool) {
1069          Error("FitUtil::Evaluate<T>::EvaluatePoissonBinPdf", "The vectorized evaluation of the BinnedLikelihood fit evaluated point by point is still not supported");
1070          return -1.;
1071       }
1072 
1073       static double EvalPdf(const IModelFunctionTempl<T> &, const UnBinData &, const double *, unsigned int , double * , double * , bool, bool) {
1074          Error("FitUtil::Evaluate<T>::EvalPdf", "The vectorized evaluation of the LogLikelihood fit evaluated point by point is still not supported");
1075          return -1.;
1076       }
1077 
1078       //template <class NotCompileIfScalarBackend = std::enable_if<!(std::is_same<double, ROOT::Double_v>::value)>>
1079       // static double EvalPdf(const IModelFunctionTempl<ROOT::Double_v> &func, const UnBinData &data, const double *p, unsigned int i, double *, double *, bool, bool) {
1080       // // evaluate the pdf contribution to the generic logl function in case of bin data
1081       // // return actually the log of the pdf and its derivatives
1082       // // func.SetParameters(p);
1083       // const auto x = vecCore::FromPtr<ROOT::Double_v>(data.GetCoordComponent(i, 0));
1084       // auto fval = func(&x, p);
1085       // auto logPdf = ROOT::Math::Util::EvalLog(fval);
1086       // return vecCore::Get<ROOT::Double_v>(logPdf, 0);
1087 
1088       static void
1089       EvalPoissonLogLGradient(const IModelFunctionTempl<T> &f, const BinData &data, const double *p, double *grad,
1090                               unsigned int &,
1091                               ::ROOT::EExecutionPolicy executionPolicy = ::ROOT::EExecutionPolicy::kSequential,
1092                               unsigned nChunks = 0)
1093       {
1094          // evaluate the gradient of the Poisson log likelihood function
1095 
1096          const IGradModelFunctionTempl<T> *fg = dynamic_cast<const IGradModelFunctionTempl<T> *>(&f);
1097          assert(fg != nullptr); // must be called by a grad function
1098 
1099          const IGradModelFunctionTempl<T> &func = *fg;
1100 
1101          (const_cast<IGradModelFunctionTempl<T> &>(func)).SetParameters(p);
1102 
1103 
1104          const DataOptions &fitOpt = data.Opt();
1105          if (fitOpt.fBinVolume || fitOpt.fIntegral || fitOpt.fExpErrors)
1106             Error("FitUtil::EvaluatePoissonLogLGradient", "The vectorized implementation doesn't support Integrals,"
1107                                                           "BinVolume or ExpErrors\n. Aborting operation.");
1108 
1109          unsigned int npar = func.NPar();
1110          auto vecSize = vecCore::VectorSize<T>();
1111          unsigned initialNPoints = data.Size();
1112          unsigned numVectors = initialNPoints / vecSize;
1113 
1114          auto mapFunction = [&](const unsigned int i) {
1115             // set all vector values to zero
1116             std::vector<T> gradFunc(npar);
1117             std::vector<T> pointContributionVec(npar);
1118 
1119             T x1, y;
1120 
1121             vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
1122             vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
1123 
1124             T fval = 0;
1125 
1126             const T *x = nullptr;
1127 
1128             unsigned ndim = data.NDim();
1129             std::vector<T> xc;
1130             if (ndim > 1) {
1131                xc.resize(ndim);
1132                xc[0] = x1;
1133                for (unsigned int j = 1; j < ndim; ++j)
1134                   vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
1135                x = xc.data();
1136             } else {
1137                x = &x1;
1138             }
1139 
1140             fval = func(x, p);
1141             func.ParameterGradient(x, p, &gradFunc[0]);
1142 
1143             // correct the gradient
1144             for (unsigned int ipar = 0; ipar < npar; ++ipar) {
1145                vecCore::Mask<T> positiveValuesMask = fval > 0;
1146 
1147                // df/dp * (1.  - y/f )
1148                vecCore::MaskedAssign(pointContributionVec[ipar], positiveValuesMask, gradFunc[ipar] * (1. - y / fval));
1149 
1150                vecCore::Mask<T> validNegativeValuesMask = !positiveValuesMask && gradFunc[ipar] != 0;
1151 
1152                if (!vecCore::MaskEmpty(validNegativeValuesMask)) {
1153                   const T kdmax1 = vecCore::math::Sqrt(vecCore::NumericLimits<T>::Max());
1154                   const T kdmax2 = vecCore::NumericLimits<T>::Max() / (4 * initialNPoints);
1155                   T gg = kdmax1 * gradFunc[ipar];
1156                   pointContributionVec[ipar] = -vecCore::Blend(gg > 0, vecCore::math::Min(gg, kdmax2), vecCore::math::Max(gg, -kdmax2));
1157                }
1158             }
1159 
1160 #ifdef DEBUG_FITUTIL
1161       {
1162          R__LOCKGUARD(gROOTMutex);
1163          if (i < 5 || (i > data.Size()-5) ) {
1164             if (data.NDim() > 1) std::cout << i << "  x " << x[0] << " y " << x[1];
1165             else std::cout << i << "  x " << x[0];
1166             std::cout << " func " << fval  << " gradient ";
1167             for (unsigned int ii = 0; ii < npar; ++ii) std::cout << "  " << pointContributionVec[ii];
1168             std::cout << "\n";
1169          }
1170       }
1171 #endif
1172 
1173             return pointContributionVec;
1174          };
1175 
1176          // Vertically reduce the set of vectors by summing its equally-indexed components
1177          auto redFunction = [&](const std::vector<std::vector<T>> &partialResults) {
1178             std::vector<T> result(npar);
1179 
1180             for (auto const &pointContributionVec : partialResults) {
1181                for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
1182                   result[parameterIndex] += pointContributionVec[parameterIndex];
1183             }
1184 
1185             return result;
1186          };
1187 
1188          std::vector<T> gVec(npar);
1189 
1190 #ifndef R__USE_IMT
1191          // to fix compiler warning
1192          (void)nChunks;
1193 
1194          // If IMT is disabled, force the execution policy to the serial case
1195          if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
1196             Warning("FitUtil::EvaluatePoissonLogLGradient",
1197                     "Multithread execution policy requires IMT, which is disabled. Changing "
1198                     "to ::ROOT::EExecutionPolicy::kSequential.");
1199             executionPolicy = ::ROOT::EExecutionPolicy::kSequential;
1200          }
1201 #endif
1202 
1203          if (executionPolicy == ::ROOT::EExecutionPolicy::kSequential) {
1204             ROOT::TSequentialExecutor pool;
1205             gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction);
1206          }
1207 #ifdef R__USE_IMT
1208          else if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
1209             ROOT::TThreadExecutor pool;
1210             auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(numVectors);
1211             gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction, chunks);
1212          }
1213 #endif
1214          else {
1215             Error("FitUtil::EvaluatePoissonLogLGradient", "Execution policy unknown. Available choices:\n "
1216                                                           "::ROOT::EExecutionPolicy::kSequential (default)\n "
1217                                                           "::ROOT::EExecutionPolicy::kMultiThread (requires IMT)\n");
1218          }
1219 
1220 
1221          // Compute the contribution from the remaining points
1222          unsigned int remainingPoints = initialNPoints % vecSize;
1223          if (remainingPoints > 0) {
1224             auto remainingPointsContribution = mapFunction(numVectors);
1225             // Add the contribution from the valid remaining points and store the result in the output variable
1226             auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
1227             for (unsigned int param = 0; param < npar; param++) {
1228                vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
1229             }
1230          }
1231          // reduce final gradient result from T to double
1232          for (unsigned int param = 0; param < npar; param++) {
1233             grad[param] = vecCore::ReduceAdd(gVec[param]);
1234          }
1235 
1236 #ifdef DEBUG_FITUTIL
1237          std::cout << "***** Final gradient : ";
1238          for (unsigned int ii = 0; ii< npar; ++ii) std::cout << grad[ii] << "   ";
1239          std::cout << "\n";
1240 #endif
1241 
1242       }
1243 
1244       static void EvalLogLGradient(const IModelFunctionTempl<T> &f, const UnBinData &data, const double *p,
1245                                    double *grad, unsigned int &,
1246                                    ::ROOT::EExecutionPolicy executionPolicy = ::ROOT::EExecutionPolicy::kSequential,
1247                                    unsigned nChunks = 0)
1248       {
1249          // evaluate the gradient of the log likelihood function
1250 
1251          const IGradModelFunctionTempl<T> *fg = dynamic_cast<const IGradModelFunctionTempl<T> *>(&f);
1252          assert(fg != nullptr); // must be called by a grad function
1253 
1254          const IGradModelFunctionTempl<T> &func = *fg;
1255 
1256 
1257          unsigned int npar = func.NPar();
1258          auto vecSize = vecCore::VectorSize<T>();
1259          unsigned initialNPoints = data.Size();
1260          unsigned numVectors = initialNPoints / vecSize;
1261 
1262 #ifdef DEBUG_FITUTIL
1263          std::cout << "\n===> Evaluate Gradient for parameters ";
1264          for (unsigned int ip = 0; ip < npar; ++ip)
1265             std::cout << "  " << p[ip];
1266          std::cout << "\n";
1267 #endif
1268 
1269          (const_cast<IGradModelFunctionTempl<T> &>(func)).SetParameters(p);
1270 
1271          const T kdmax1 = vecCore::math::Sqrt(vecCore::NumericLimits<T>::Max());
1272          const T kdmax2 = vecCore::NumericLimits<T>::Max() / (4 * initialNPoints);
1273 
1274          auto mapFunction = [&](const unsigned int i) {
1275             std::vector<T> gradFunc(npar);
1276             std::vector<T> pointContributionVec(npar);
1277 
1278             T x1;
1279             vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
1280 
1281             const T *x = nullptr;
1282 
1283             unsigned int ndim = data.NDim();
1284             std::vector<T> xc(ndim);
1285             if (ndim > 1) {
1286                xc.resize(ndim);
1287                xc[0] = x1;
1288                for (unsigned int j = 1; j < ndim; ++j)
1289                   vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
1290                x = xc.data();
1291             } else {
1292                x = &x1;
1293             }
1294 
1295 
1296             T fval = func(x, p);
1297             func.ParameterGradient(x, p, &gradFunc[0]);
1298 
1299 #ifdef DEBUG_FITUTIL
1300             if (i < 5 || (i > numVectors-5) ) {
1301                if (ndim > 1) std::cout << i << "  x " << x[0] << " y " << x[1] << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  " << gradFunc[3] << std::endl;
1302                else std::cout << i << "  x " << x[0] << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  " << gradFunc[3] << std::endl;
1303             }
1304 #endif
1305 
1306             vecCore::Mask<T> positiveValues = fval > 0;
1307 
1308             for (unsigned int kpar = 0; kpar < npar; ++kpar) {
1309                if (!vecCore::MaskEmpty(positiveValues))
1310                   vecCore::MaskedAssign<T>(pointContributionVec[kpar], positiveValues, -1. / fval * gradFunc[kpar]);
1311 
1312                vecCore::Mask<T> nonZeroGradientValues = !positiveValues && gradFunc[kpar] != 0;
1313                if (!vecCore::MaskEmpty(nonZeroGradientValues)) {
1314                   T gg = kdmax1 * gradFunc[kpar];
1315                   pointContributionVec[kpar] =
1316                      vecCore::Blend(nonZeroGradientValues && gg > 0, -vecCore::math::Min(gg, kdmax2),
1317                                     -vecCore::math::Max(gg, -kdmax2));
1318                }
1319                // if func derivative is zero term is also zero so do not add in g[kpar]
1320             }
1321 
1322             return pointContributionVec;
1323          };
1324 
1325          // Vertically reduce the set of vectors by summing its equally-indexed components
1326          auto redFunction = [&](const std::vector<std::vector<T>> &pointContributions) {
1327             std::vector<T> result(npar);
1328 
1329             for (auto const &pointContributionVec : pointContributions) {
1330                for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
1331                   result[parameterIndex] += pointContributionVec[parameterIndex];
1332             }
1333 
1334             return result;
1335          };
1336 
1337          std::vector<T> gVec(npar);
1338          std::vector<double> g(npar);
1339 
1340 #ifndef R__USE_IMT
1341          // to fix compiler warning
1342          (void)nChunks;
1343 
1344          // If IMT is disabled, force the execution policy to the serial case
1345          if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
1346             Warning("FitUtil::EvaluateLogLGradient",
1347                     "Multithread execution policy requires IMT, which is disabled. Changing "
1348                     "to ::ROOT::EExecutionPolicy::kSequential.");
1349             executionPolicy = ::ROOT::EExecutionPolicy::kSequential;
1350          }
1351 #endif
1352 
1353          if (executionPolicy == ::ROOT::EExecutionPolicy::kSequential) {
1354             ROOT::TSequentialExecutor pool;
1355             gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction);
1356          }
1357 #ifdef R__USE_IMT
1358          else if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
1359             ROOT::TThreadExecutor pool;
1360             auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(numVectors);
1361             gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction, chunks);
1362          }
1363 #endif
1364          else {
1365             Error("FitUtil::EvaluateLogLGradient", "Execution policy unknown. Available choices:\n "
1366                                                    "::ROOT::EExecutionPolicy::kSequential (default)\n "
1367                                                    "::ROOT::EExecutionPolicy::kMultiThread (requires IMT)\n");
1368          }
1369 
1370          // Compute the contribution from the remaining points
1371          unsigned int remainingPoints = initialNPoints % vecSize;
1372          if (remainingPoints > 0) {
1373             auto remainingPointsContribution = mapFunction(numVectors);
1374             // Add the contribution from the valid remaining points and store the result in the output variable
1375             auto remainingMask = vecCore::Int2Mask<T>(initialNPoints % vecSize);
1376             for (unsigned int param = 0; param < npar; param++) {
1377                vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
1378             }
1379          }
1380          // reduce final gradient result from T to double
1381          for (unsigned int param = 0; param < npar; param++) {
1382             grad[param] = vecCore::ReduceAdd(gVec[param]);
1383          }
1384 
1385 #ifdef DEBUG_FITUTIL
1386          std::cout << "Final gradient ";
1387          for (unsigned int param = 0; param < npar; param++) {
1388             std::cout << "  " << grad[param];
1389          }
1390          std::cout << "\n";
1391 #endif
1392       }
1393    };
1394 
1395    template<>
1396    struct Evaluate<double>{
1397 #endif
1398 
1399       static double EvalChi2(const IModelFunction &func, const BinData &data, const double *p, unsigned int &nPoints,
1400                              ::ROOT::EExecutionPolicy executionPolicy, unsigned nChunks = 0)
1401       {
1402          // evaluate the chi2 given a  function reference, the data and returns the value and also in nPoints
1403          // the actual number of used points
1404          // normal chi2 using only error on values (from fitting histogram)
1405          // optionally the integral of function in the bin is used
1406 
1407 
1408          //Info("EvalChi2","Using non-vectorized implementation %d",(int) data.Opt().fIntegral);
1409 
1410          return FitUtil::EvaluateChi2(func, data, p, nPoints, executionPolicy, nChunks);
1411       }
1412 
1413       static double EvalLogL(const IModelFunctionTempl<double> &func, const UnBinData &data, const double *p,
1414                              int iWeight, bool extended, unsigned int &nPoints,
1415                              ::ROOT::EExecutionPolicy executionPolicy, unsigned nChunks = 0)
1416       {
1417          return FitUtil::EvaluateLogL(func, data, p, iWeight, extended, nPoints, executionPolicy, nChunks);
1418       }
1419 
1420       static double EvalPoissonLogL(const IModelFunctionTempl<double> &func, const BinData &data, const double *p,
1421                                     int iWeight, bool extended, unsigned int &nPoints,
1422                                     ::ROOT::EExecutionPolicy executionPolicy, unsigned nChunks = 0)
1423       {
1424          return FitUtil::EvaluatePoissonLogL(func, data, p, iWeight, extended, nPoints, executionPolicy, nChunks);
1425       }
1426 
1427       static double EvalChi2Effective(const IModelFunctionTempl<double> &func, const BinData & data, const double * p, unsigned int &nPoints)
1428       {
1429          return FitUtil::EvaluateChi2Effective(func, data, p, nPoints);
1430       }
1431       static void EvalChi2Gradient(const IModelFunctionTempl<double> &func, const BinData &data, const double *p,
1432                                    double *g, unsigned int &nPoints,
1433                                    ::ROOT::EExecutionPolicy executionPolicy = ::ROOT::EExecutionPolicy::kSequential,
1434                                    unsigned nChunks = 0)
1435       {
1436          FitUtil::EvaluateChi2Gradient(func, data, p, g, nPoints, executionPolicy, nChunks);
1437       }
1438 
1439       static double EvalChi2Residual(const IModelFunctionTempl<double> &func, const BinData & data, const double * p, unsigned int i, double *g, double * h,
1440                                     bool hasGrad, bool fullHessian)
1441       {
1442          return FitUtil::EvaluateChi2Residual(func, data, p, i, g, h, hasGrad, fullHessian);
1443       }
1444 
1445       /// evaluate the pdf (Poisson) contribution to the logl (return actually log of pdf)
1446       /// and its gradient
1447       static double EvalPoissonBinPdf(const IModelFunctionTempl<double> &func, const BinData & data, const double *p, unsigned int i, double *g, double * h, bool hasGrad, bool fullHessian) {
1448          return FitUtil::EvaluatePoissonBinPdf(func, data, p, i, g, h, hasGrad, fullHessian);
1449       }
1450 
1451       static double EvalPdf(const IModelFunctionTempl<double> &func, const UnBinData & data, const double *p, unsigned int i, double *g, double * h, bool hasGrad, bool fullHessian) {
1452          return FitUtil::EvaluatePdf(func, data, p, i, g, h, hasGrad, fullHessian);
1453       }
1454 
1455       static void
1456       EvalPoissonLogLGradient(const IModelFunctionTempl<double> &func, const BinData &data, const double *p, double *g,
1457                               unsigned int &nPoints,
1458                               ::ROOT::EExecutionPolicy executionPolicy = ::ROOT::EExecutionPolicy::kSequential,
1459                               unsigned nChunks = 0)
1460       {
1461          FitUtil::EvaluatePoissonLogLGradient(func, data, p, g, nPoints, executionPolicy, nChunks);
1462       }
1463 
1464       static void EvalLogLGradient(const IModelFunctionTempl<double> &func, const UnBinData &data, const double *p,
1465                                    double *g, unsigned int &nPoints,
1466                                    ::ROOT::EExecutionPolicy executionPolicy = ::ROOT::EExecutionPolicy::kSequential,
1467                                    unsigned nChunks = 0)
1468       {
1469          FitUtil::EvaluateLogLGradient(func, data, p, g, nPoints, executionPolicy, nChunks);
1470       }
1471    };
1472 
1473 } // end namespace FitUtil
1474 
1475    } // end namespace Fit
1476 
1477 } // end namespace ROOT
1478 
1479 #if defined (R__HAS_VECCORE) && defined(R__HAS_VC)
1480 //Fixes alignment for structures of SIMD structures
1481 Vc_DECLARE_ALLOCATOR(ROOT::Fit::FitUtil::LikelihoodAux<ROOT::Double_v>);
1482 #endif
1483 
1484 #endif /* ROOT_Fit_FitUtil */