Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-07-05 09:07:33

0001 // @(#)root/mathcore:$Id$
0002 // Author: L. Moneta Tue Nov 28 10:52:47 2006
0003 
0004 /**********************************************************************
0005  *                                                                    *
0006  * Copyright (c) 2006  LCG ROOT Math Team, CERN/PH-SFT                *
0007  *                                                                    *
0008  *                                                                    *
0009  **********************************************************************/
0010 
0011 // Header file for class FitUtil
0012 
0013 #ifndef ROOT_Fit_FitUtil
0014 #define ROOT_Fit_FitUtil
0015 
0016 #include "Math/IParamFunctionfwd.h"
0017 #include "Math/IParamFunction.h"
0018 
0019 #ifdef R__USE_IMT
0020 #include "ROOT/TThreadExecutor.hxx"
0021 #endif
0022 #include "ROOT/TSequentialExecutor.hxx"
0023 
0024 #include "Fit/BinData.h"
0025 #include "Fit/UnBinData.h"
0026 #include "ROOT/EExecutionPolicy.hxx"
0027 
0028 #include "Math/Integrator.h"
0029 #include "Math/IntegratorMultiDim.h"
0030 
0031 #include "TError.h"
0032 #include <vector>
0033 
0034 // using parameter cache is not thread safe but needed for normalizing the functions
0035 #define USE_PARAMCACHE
0036 
0037 //#define DEBUG_FITUTIL
0038 
0039 #ifdef R__HAS_VECCORE
0040 namespace vecCore {
0041 template <class T>
0042 vecCore::Mask<T> Int2Mask(unsigned i)
0043 {
0044    T x;
0045    for (unsigned j = 0; j < vecCore::VectorSize<T>(); j++)
0046       vecCore::Set<T>(x, j, j);
0047    return vecCore::Mask<T>(x < T(i));
0048 }
0049 }
0050 #endif
0051 
0052 namespace ROOT {
0053 
0054    namespace Fit {
0055 
0056 /**
0057    namespace defining utility free functions using in Fit for evaluating the various fit method
0058    functions (chi2, likelihood, etc..)  given the data and the model function
0059 
0060    @ingroup FitMain
0061 */
0062 namespace FitUtil {
0063 
0064   typedef  ROOT::Math::IParamMultiFunction IModelFunction;
0065   typedef  ROOT::Math::IParamMultiGradFunction IGradModelFunction;
0066 
0067   template <class T>
0068   using IGradModelFunctionTempl = ROOT::Math::IParamMultiGradFunctionTempl<T>;
0069 
0070   template <class T>
0071   using IModelFunctionTempl = ROOT::Math::IParamMultiFunctionTempl<T>;
0072 
0073   // internal class defining
0074   template <class T>
0075   class LikelihoodAux {
0076   public:
0077      LikelihoodAux(T logv = {}, T w = {}, T w2 = {}) : logvalue(logv), weight(w), weight2(w2) {}
0078 
0079      LikelihoodAux operator+(const LikelihoodAux &l) const
0080      {
0081         return LikelihoodAux<T>(logvalue + l.logvalue, weight + l.weight, weight2 + l.weight2);
0082      }
0083 
0084      LikelihoodAux &operator+=(const LikelihoodAux &l)
0085      {
0086         logvalue += l.logvalue;
0087         weight += l.weight;
0088         weight2 += l.weight2;
0089         return *this;
0090      }
0091 
0092      T logvalue;
0093      T weight;
0094      T weight2;
0095   };
0096 
0097   template <>
0098   class LikelihoodAux<double> {
0099   public:
0100      LikelihoodAux(double logv = 0.0, double w = 0.0, double w2 = 0.0) : logvalue(logv), weight(w), weight2(w2){};
0101 
0102      LikelihoodAux operator+(const LikelihoodAux &l) const
0103      {
0104         return LikelihoodAux<double>(logvalue + l.logvalue, weight + l.weight, weight2 + l.weight2);
0105      }
0106 
0107      LikelihoodAux &operator+=(const LikelihoodAux &l)
0108      {
0109         logvalue += l.logvalue;
0110         weight += l.weight;
0111         weight2 += l.weight2;
0112         return *this;
0113      }
0114 
0115      double logvalue;
0116      double weight;
0117      double weight2;
0118   };
0119 
0120   // internal class to evaluate the function or the integral
0121   // and cached internal integration details
0122   // if useIntegral is false no allocation is done
0123   // and this is a dummy class
0124   // class is templated on any parametric functor implementing operator()(x,p) and NDim()
0125   // contains a constant pointer to the function
0126 
0127   template <class ParamFunc = ROOT::Math::IParamMultiFunctionTempl<double>>
0128   class IntegralEvaluator {
0129 
0130   public:
0131      IntegralEvaluator(const ParamFunc &func, const double *p, bool useIntegral = true,
0132                        ROOT::Math::IntegrationOneDim::Type igType = ROOT::Math::IntegrationOneDim::kDEFAULT)
0133         : fDim(0), fParams(nullptr), fFunc(nullptr), fIg1Dim(nullptr), fIgNDim(nullptr), fFunc1Dim(nullptr), fFuncNDim(nullptr)
0134      {
0135         if (useIntegral) {
0136            SetFunction(func, p, igType);
0137         }
0138      }
0139 
0140      void SetFunction(const ParamFunc &func, const double *p = nullptr,
0141                       ROOT::Math::IntegrationOneDim::Type igType = ROOT::Math::IntegrationOneDim::kDEFAULT)
0142      {
0143         // set the integrand function and create required wrapper
0144         // to perform integral in (x) of a generic  f(x,p)
0145         fParams = p;
0146         fDim = func.NDim();
0147         // copy the function object to be able to modify the parameters
0148         // fFunc = dynamic_cast<ROOT::Math::IParamMultiFunction *>( func.Clone() );
0149         fFunc = &func;
0150         assert(fFunc != nullptr);
0151         // set parameters in function
0152         // fFunc->SetParameters(p);
0153         if (fDim == 1) {
0154            fFunc1Dim =
0155               new ROOT::Math::WrappedMemFunction<IntegralEvaluator, double (IntegralEvaluator::*)(double) const>(
0156                  *this, &IntegralEvaluator::F1);
0157            fIg1Dim = new ROOT::Math::IntegratorOneDim(igType);
0158            // fIg1Dim->SetFunction( static_cast<const ROOT::Math::IMultiGenFunction & >(*fFunc),false);
0159            fIg1Dim->SetFunction(static_cast<const ROOT::Math::IGenFunction &>(*fFunc1Dim));
0160         } else if (fDim > 1) {
0161            fFuncNDim =
0162               new ROOT::Math::WrappedMemMultiFunction<IntegralEvaluator, double (IntegralEvaluator::*)(const double *)
0163                                                                             const>(*this, &IntegralEvaluator::FN, fDim);
0164            fIgNDim = new ROOT::Math::IntegratorMultiDim();
0165            fIgNDim->SetFunction(*fFuncNDim);
0166         } else
0167            assert(fDim > 0);
0168      }
0169 
0170      void SetParameters(const double *p)
0171      {
0172         // copy just the pointer
0173         fParams = p;
0174      }
0175 
0176      ~IntegralEvaluator()
0177      {
0178         if (fIg1Dim)
0179            delete fIg1Dim;
0180         if (fIgNDim)
0181            delete fIgNDim;
0182         if (fFunc1Dim)
0183            delete fFunc1Dim;
0184         if (fFuncNDim)
0185            delete fFuncNDim;
0186         // if (fFunc) delete fFunc;
0187      }
0188 
0189      // evaluation of integrand function (one-dim)
0190      double F1(double x) const
0191      {
0192         double xx = x;
0193         return ExecFunc(fFunc, &xx, fParams);
0194      }
0195      // evaluation of integrand function (multi-dim)
0196      double FN(const double *x) const { return ExecFunc(fFunc, x, fParams); }
0197 
0198      double Integral(const double *x1, const double *x2)
0199      {
0200         // return unnormalized integral
0201         return (fIg1Dim) ? fIg1Dim->Integral(*x1, *x2) : fIgNDim->Integral(x1, x2);
0202      }
0203 
0204      double operator()(const double *x1, const double *x2)
0205      {
0206         // return normalized integral, divided by bin volume (dx1*dx...*dxn)
0207         if (fIg1Dim) {
0208            double dV = *x2 - *x1;
0209            return fIg1Dim->Integral(*x1, *x2) / dV;
0210         } else if (fIgNDim) {
0211            double dV = 1;
0212            for (unsigned int i = 0; i < fDim; ++i)
0213               dV *= (x2[i] - x1[i]);
0214            return fIgNDim->Integral(x1, x2) / dV;
0215            //                   std::cout << " do integral btw x " << x1[0] << "  " << x2[0] << " y " << x1[1] << "  "
0216            //                   << x2[1] << " dV = " << dV << " result = " << result << std::endl; return result;
0217         } else
0218            assert(1.); // should never be here
0219         return 0;
0220      }
0221 
0222   private:
0223      template <class T>
0224      inline double ExecFunc(T *f, const double *x, const double *p) const
0225      {
0226         return (*f)(x, p);
0227      }
0228 
0229 #ifdef R__HAS_VECCORE
0230 
0231 #if __clang_major__ > 16
0232 #pragma clang diagnostic push
0233 #pragma clang diagnostic ignored "-Wvla-cxx-extension"
0234 #endif
0235 
0236      inline double ExecFunc(const IModelFunctionTempl<ROOT::Double_v> *f, const double *x, const double *p) const
0237      {
0238         // Figure out the size of the SIMD vectors.
0239         constexpr static int vecSize = sizeof(ROOT::Double_v) / sizeof(double);
0240         double xBuffer[vecSize];
0241         ROOT::Double_v xx[fDim];
0242         for (unsigned int i = 0; i < fDim; ++i) {
0243            // The Load() function reads multiple values from the pointed-to
0244            // memory into xx. This is why we have to copy the input values from
0245            // the x array into a zero-padded buffer to read from. Otherwise,
0246            // Load() would access the x array out of bounds.
0247            *xBuffer = x[i];
0248            for(int j = 1; j < vecSize; ++j) {
0249               xBuffer[j] = 0.0;
0250            }
0251            vecCore::Load<ROOT::Double_v>(xx[i], xBuffer);
0252         }
0253         auto res = (*f)(xx, p);
0254         return vecCore::Get<ROOT::Double_v>(res, 0);
0255      }
0256 
0257 #if __clang_major__ > 16
0258 #pragma clang diagnostic pop
0259 #endif
0260 
0261 #endif
0262 
0263      // objects of this class are not meant to be copied / assigned
0264      IntegralEvaluator(const IntegralEvaluator &rhs) = delete;
0265      IntegralEvaluator &operator=(const IntegralEvaluator &rhs) = delete;
0266 
0267      unsigned int fDim;
0268      const double *fParams;
0269      // ROOT::Math::IParamMultiFunction * fFunc;  // copy of function in order to be able to change parameters
0270      // const ParamFunc * fFunc;       //  reference to a generic parametric function
0271      const ParamFunc *fFunc;
0272      ROOT::Math::IntegratorOneDim *fIg1Dim;
0273      ROOT::Math::IntegratorMultiDim *fIgNDim;
0274      ROOT::Math::IGenFunction *fFunc1Dim;
0275      ROOT::Math::IMultiGenFunction *fFuncNDim;
0276   };
0277 
0278   /** Chi2 Functions */
0279 
0280   /**
0281       evaluate the Chi2 given a model function and the data at the point x.
0282       return also nPoints as the effective number of used points in the Chi2 evaluation
0283   */
0284   double EvaluateChi2(const IModelFunction &func, const BinData &data, const double *p, unsigned int &nPoints,
0285                       ::ROOT::EExecutionPolicy executionPolicy, unsigned nChunks = 0);
0286 
0287   /**
0288       evaluate the effective Chi2 given a model function and the data at the point x.
0289       The effective chi2 uses the errors on the coordinates : W = 1/(sigma_y**2 + ( sigma_x_i * df/dx_i )**2 )
0290       return also nPoints as the effective number of used points in the Chi2 evaluation
0291   */
0292   double EvaluateChi2Effective(const IModelFunction &func, const BinData &data, const double *x, unsigned int &nPoints);
0293 
0294   /**
0295       evaluate the Chi2 gradient given a model function and the data at the point p.
0296       return also nPoints as the effective number of used points in the Chi2 evaluation
0297   */
0298   void EvaluateChi2Gradient(const IModelFunction &func, const BinData &data, const double *p, double *grad,
0299                             unsigned int &nPoints,
0300                             ::ROOT::EExecutionPolicy executionPolicy = ::ROOT::EExecutionPolicy::kSequential,
0301                             unsigned nChunks = 0);
0302 
0303   /**
0304       evaluate the LogL given a model function and the data at the point x.
0305       return also nPoints as the effective number of used points in the LogL evaluation
0306   */
0307   double EvaluateLogL(const IModelFunction &func, const UnBinData &data, const double *p, int iWeight, bool extended,
0308                       unsigned int &nPoints, ::ROOT::EExecutionPolicy executionPolicy, unsigned nChunks = 0);
0309 
0310   /**
0311       evaluate the LogL gradient given a model function and the data at the point p.
0312       return also nPoints as the effective number of used points in the LogL evaluation
0313   */
0314   void EvaluateLogLGradient(const IModelFunction &func, const UnBinData &data, const double *p, double *grad,
0315                             unsigned int &nPoints,
0316                             ::ROOT::EExecutionPolicy executionPolicy = ::ROOT::EExecutionPolicy::kSequential,
0317                             unsigned nChunks = 0);
0318 
0319   // #ifdef R__HAS_VECCORE
0320   //    template <class NotCompileIfScalarBackend = std::enable_if<!(std::is_same<double, ROOT::Double_v>::value)>>
0321   //    void EvaluateLogLGradient(const IModelFunctionTempl<ROOT::Double_v> &, const UnBinData &, const double *, double
0322   //    *, unsigned int & ) {}
0323   // #endif
0324 
0325   /**
0326       evaluate the Poisson LogL given a model function and the data at the point p.
0327       return also nPoints as the effective number of used points in the LogL evaluation
0328       By default is extended, pass extend to false if want to be not extended (MultiNomial)
0329   */
0330   double EvaluatePoissonLogL(const IModelFunction &func, const BinData &data, const double *p, int iWeight,
0331                              bool extended, unsigned int &nPoints, ::ROOT::EExecutionPolicy executionPolicy,
0332                              unsigned nChunks = 0);
0333 
0334   /**
0335       evaluate the Poisson LogL given a model function and the data at the point p.
0336       return also nPoints as the effective number of used points in the LogL evaluation
0337   */
0338   void EvaluatePoissonLogLGradient(const IModelFunction &func, const BinData &data, const double *p, double *grad,
0339                                    unsigned int &nPoints,
0340                                    ::ROOT::EExecutionPolicy executionPolicy = ::ROOT::EExecutionPolicy::kSequential,
0341                                    unsigned nChunks = 0);
0342 
0343   // methods required by dedicate minimizer like Fumili
0344 
0345   /**
0346       evaluate the residual contribution to the Chi2 given a model function and the BinPoint data
0347       and if the pointer g is not null evaluate also the gradient of the residual.
0348       If the function provides parameter derivatives they are used otherwise a simple derivative calculation
0349       is used
0350   */
0351   double EvaluateChi2Residual(const IModelFunction &func, const BinData &data, const double *p, unsigned int ipoint,
0352                               double *g = nullptr, double * h = nullptr, bool hasGrad = false, bool fullHessian = false);
0353 
0354   /**
0355       evaluate the pdf contribution to the LogL given a model function and the BinPoint data.
0356       If the pointer g is not null evaluate also the gradient of the pdf.
0357       If the function provides parameter derivatives they are used otherwise a simple derivative calculation
0358       is used
0359   */
0360   double
0361   EvaluatePdf(const IModelFunction &func, const UnBinData &data, const double *p, unsigned int ipoint, double *g = nullptr, double * h = nullptr, bool hasGrad = false, bool fullHessian = false);
0362 
0363 
0364    /**
0365        evaluate the pdf contribution to the Poisson LogL given a model function and the BinPoint data.
0366        If the pointer g is not null evaluate also the gradient of the Poisson pdf.
0367        If the function provides parameter derivatives they are used otherwise a simple derivative calculation
0368        is used
0369    */
0370    double EvaluatePoissonBinPdf(const IModelFunction & func, const BinData & data, const double * x, unsigned int ipoint, double * g = nullptr, double * h = nullptr, bool hasGrad = false, bool fullHessian = false);
0371 
0372    unsigned setAutomaticChunking(unsigned nEvents);
0373 
0374    template<class T>
0375    struct Evaluate {
0376 #ifdef R__HAS_VECCORE
0377       static double EvalChi2(const IModelFunctionTempl<T> &func, const BinData &data, const double *p,
0378                              unsigned int &nPoints, ::ROOT::EExecutionPolicy executionPolicy, unsigned nChunks = 0)
0379       {
0380          // evaluate the chi2 given a  vectorized function reference  , the data and returns the value and also in nPoints
0381          // the actual number of used points
0382          // normal chi2 using only error on values (from fitting histogram)
0383          // optionally the integral of function in the bin is used
0384 
0385          //Info("EvalChi2","Using vectorized implementation %d",(int) data.Opt().fIntegral);
0386 
0387          unsigned int n = data.Size();
0388          nPoints = data.Size();  // npoints
0389 
0390          // set parameters of the function to cache integral value
0391 #ifdef USE_PARAMCACHE
0392          (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
0393 #endif
0394          // do not cache parameter values (it is not thread safe)
0395          //func.SetParameters(p);
0396 
0397 
0398          // get fit option and check case if using integral of bins
0399          const DataOptions &fitOpt = data.Opt();
0400          if (fitOpt.fBinVolume || fitOpt.fIntegral || fitOpt.fExpErrors)
0401             Error("FitUtil::EvaluateChi2", "The vectorized implementation doesn't support Integrals, BinVolume or ExpErrors\n. Aborting operation.");
0402 
0403          (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
0404 
0405          double maxResValue = std::numeric_limits<double>::max() / n;
0406          std::vector<double> ones{1., 1., 1., 1.};
0407          auto vecSize = vecCore::VectorSize<T>();
0408 
0409          auto mapFunction = [&](unsigned int i) {
0410             // in case of no error in y invError=1 is returned
0411             T x1, y, invErrorVec;
0412             vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
0413             vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
0414             const auto invError = data.ErrorPtr(i * vecSize);
0415             auto invErrorptr = (invError != nullptr) ? invError : &ones.front();
0416             vecCore::Load<T>(invErrorVec, invErrorptr);
0417 
0418             const T *x;
0419             std::vector<T> xc;
0420             if(data.NDim() > 1) {
0421                xc.resize(data.NDim());
0422                xc[0] = x1;
0423                for (unsigned int j = 1; j < data.NDim(); ++j)
0424                   vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
0425                x = xc.data();
0426             } else {
0427                x = &x1;
0428             }
0429 
0430             T fval{};
0431 
0432 #ifdef USE_PARAMCACHE
0433             fval = func(x);
0434 #else
0435             fval = func(x, p);
0436 #endif
0437 
0438             T tmp = (y - fval) * invErrorVec;
0439             T chi2 = tmp * tmp;
0440 
0441 
0442             // avoid infinity or nan in chi2 values due to wrong function values
0443             auto m = vecCore::Mask_v<T>(chi2 > maxResValue);
0444 
0445             vecCore::MaskedAssign<T>(chi2, m, maxResValue);
0446 
0447             return chi2;
0448          };
0449 
0450          auto redFunction = [](const std::vector<T> &objs) {
0451             return std::accumulate(objs.begin(), objs.end(), T{});
0452          };
0453 
0454 #ifndef R__USE_IMT
0455          (void)nChunks;
0456 
0457          // If IMT is disabled, force the execution policy to the serial case
0458          if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
0459             Warning("FitUtil::EvaluateChi2", "Multithread execution policy requires IMT, which is disabled. Changing "
0460                                              "to ::ROOT::EExecutionPolicy::kSequential.");
0461             executionPolicy = ::ROOT::EExecutionPolicy::kSequential;
0462          }
0463 #endif
0464 
0465          T res{};
0466          if (executionPolicy == ::ROOT::EExecutionPolicy::kSequential) {
0467             ROOT::TSequentialExecutor pool;
0468             res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size()/vecSize), redFunction);
0469 #ifdef R__USE_IMT
0470          } else if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
0471             ROOT::TThreadExecutor pool;
0472             auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(data.Size() / vecSize);
0473             res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction, chunks);
0474 #endif
0475          } else {
0476             Error("FitUtil::EvaluateChi2", "Execution policy unknown. Available choices:\n ::ROOT::EExecutionPolicy::kSequential (default)\n ::ROOT::EExecutionPolicy::kMultiThread (requires IMT)\n");
0477          }
0478 
0479          // Last SIMD vector of elements (if padding needed)
0480          if (data.Size() % vecSize != 0)
0481             vecCore::MaskedAssign(res, vecCore::Int2Mask<T>(data.Size() % vecSize),
0482                                   res + mapFunction(data.Size() / vecSize));
0483 
0484          return vecCore::ReduceAdd(res);
0485       }
0486 
0487       static double EvalLogL(const IModelFunctionTempl<T> &func, const UnBinData &data, const double *const p,
0488                              int iWeight, bool extended, unsigned int &nPoints,
0489                              ::ROOT::EExecutionPolicy executionPolicy, unsigned nChunks = 0)
0490       {
0491          // evaluate the LogLikelihood
0492          unsigned int n = data.Size();
0493          nPoints = data.Size();  // npoints
0494 
0495          //unsigned int nRejected = 0;
0496          bool normalizeFunc = false;
0497 
0498          // set parameters of the function to cache integral value
0499 #ifdef USE_PARAMCACHE
0500          (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
0501 #endif
0502 
0503 #ifdef R__USE_IMT
0504          // in case parameter needs to be propagated to user function use trick to set parameters by calling one time the function
0505          // this will be done in sequential mode and parameters can be set in a thread safe manner
0506          if (!normalizeFunc) {
0507             if (data.NDim() == 1) {
0508                T x;
0509                vecCore::Load<T>(x, data.GetCoordComponent(0, 0));
0510                func( &x, p);
0511             }
0512             else {
0513                std::vector<T> x(data.NDim());
0514                for (unsigned int j = 0; j < data.NDim(); ++j)
0515                   vecCore::Load<T>(x[j], data.GetCoordComponent(0, j));
0516                func( x.data(), p);
0517             }
0518          }
0519 #endif
0520 
0521          // this is needed if function must be normalized
0522          double norm = 1.0;
0523          if (normalizeFunc) {
0524             // compute integral of the function
0525             std::vector<double> xmin(data.NDim());
0526             std::vector<double> xmax(data.NDim());
0527             IntegralEvaluator<IModelFunctionTempl<T>> igEval(func, p, true);
0528             // compute integral in the ranges where is defined
0529             if (data.Range().Size() > 0) {
0530                norm = 0;
0531                for (unsigned int ir = 0; ir < data.Range().Size(); ++ir) {
0532                   data.Range().GetRange(&xmin[0], &xmax[0], ir);
0533                   norm += igEval.Integral(xmin.data(), xmax.data());
0534                }
0535             } else {
0536                // use (-inf +inf)
0537                data.Range().GetRange(&xmin[0], &xmax[0]);
0538                // check if function is zero at +- inf
0539                T xmin_v, xmax_v;
0540                vecCore::Load<T>(xmin_v, xmin.data());
0541                vecCore::Load<T>(xmax_v, xmax.data());
0542                if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) {
0543                   MATH_ERROR_MSG("FitUtil::EvaluateLogLikelihood", "A range has not been set and the function is not zero at +/- inf");
0544                   return 0;
0545                }
0546                norm = igEval.Integral(&xmin[0], &xmax[0]);
0547             }
0548          }
0549 
0550          // needed to compute effective global weight in case of extended likelihood
0551 
0552          auto vecSize = vecCore::VectorSize<T>();
0553          unsigned int numVectors = n / vecSize;
0554 
0555          auto mapFunction = [ &, p](const unsigned i) {
0556             T W{};
0557             T W2{};
0558             T fval{};
0559 
0560             (void)p; /* avoid unused lambda capture warning if PARAMCACHE is disabled */
0561 
0562             T x1;
0563             vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
0564             const T *x = nullptr;
0565             unsigned int ndim = data.NDim();
0566             std::vector<T> xc;
0567             if (ndim > 1) {
0568                xc.resize(ndim);
0569                xc[0] = x1;
0570                for (unsigned int j = 1; j < ndim; ++j)
0571                   vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
0572                x = xc.data();
0573             } else {
0574                x = &x1;
0575             }
0576 
0577 #ifdef USE_PARAMCACHE
0578             fval = func(x);
0579 #else
0580             fval = func(x, p);
0581 #endif
0582 
0583 #ifdef DEBUG_FITUTIL
0584             if (i < 5 || (i > numVectors-5) ) {
0585                if (ndim == 1) std::cout << i << "  x " << x[0]  << " fval = " << fval;
0586                else std::cout << i << "  x " << x[0] << " y " << x[1] << " fval = " << fval;
0587             }
0588 #endif
0589 
0590             if (normalizeFunc) fval = fval * (1 / norm);
0591 
0592             // function EvalLog protects against negative or too small values of fval
0593             auto logval =  ROOT::Math::Util::EvalLog(fval);
0594             if (iWeight > 0) {
0595                T weight{};
0596                if (data.WeightsPtr(i) == nullptr)
0597                   weight = 1;
0598                else
0599                   vecCore::Load<T>(weight, data.WeightsPtr(i*vecSize));
0600                logval *= weight;
0601                if (iWeight == 2) {
0602                   logval *= weight; // use square of weights in likelihood
0603                   if (!extended) {
0604                      // needed sum of weights and sum of weight square if likelkihood is extended
0605                      W  = weight;
0606                      W2 = weight * weight;
0607                   }
0608                }
0609             }
0610 #ifdef DEBUG_FITUTIL
0611             if (i < 5 || (i > numVectors-5))  {
0612                std::cout << "   " << fval << "  logfval " << logval << std::endl;
0613             }
0614 #endif
0615 
0616             return LikelihoodAux<T>(logval, W, W2);
0617          };
0618 
0619          auto redFunction = [](const std::vector<LikelihoodAux<T>> &objs) {
0620             return std::accumulate(objs.begin(), objs.end(), LikelihoodAux<T>(),
0621             [](const LikelihoodAux<T> &l1, const LikelihoodAux<T> &l2) {
0622                return l1 + l2;
0623             });
0624          };
0625 
0626 #ifndef R__USE_IMT
0627          (void)nChunks;
0628 
0629          // If IMT is disabled, force the execution policy to the serial case
0630          if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
0631             Warning("FitUtil::EvaluateLogL", "Multithread execution policy requires IMT, which is disabled. Changing "
0632                                              "to ::ROOT::EExecutionPolicy::kSequential.");
0633             executionPolicy = ::ROOT::EExecutionPolicy::kSequential;
0634          }
0635 #endif
0636 
0637          T logl_v{};
0638          T sumW_v{};
0639          T sumW2_v{};
0640          ROOT::Fit::FitUtil::LikelihoodAux<T> resArray;
0641          if (executionPolicy == ::ROOT::EExecutionPolicy::kSequential) {
0642             ROOT::TSequentialExecutor pool;
0643             resArray = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction);
0644 #ifdef R__USE_IMT
0645          } else if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
0646             ROOT::TThreadExecutor pool;
0647             auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking( numVectors);
0648             resArray = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction, chunks);
0649 #endif
0650          } else {
0651             Error("FitUtil::EvaluateLogL", "Execution policy unknown. Available choices:\n ::ROOT::EExecutionPolicy::kSequential (default)\n ::ROOT::EExecutionPolicy::kMultiThread (requires IMT)\n");
0652          }
0653 
0654          logl_v = resArray.logvalue;
0655          sumW_v = resArray.weight;
0656          sumW2_v = resArray.weight2;
0657 
0658          // Compute the contribution from the remaining points ( Last padded SIMD vector of elements )
0659          unsigned int remainingPoints = n % vecSize;
0660          if (remainingPoints > 0) {
0661             auto remainingPointsContribution = mapFunction(numVectors);
0662             // Add the contribution from the valid remaining points and store the result in the output variable
0663             auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
0664             vecCore::MaskedAssign(logl_v, remainingMask, logl_v + remainingPointsContribution.logvalue);
0665             vecCore::MaskedAssign(sumW_v, remainingMask, sumW_v + remainingPointsContribution.weight);
0666             vecCore::MaskedAssign(sumW2_v, remainingMask, sumW2_v + remainingPointsContribution.weight2);
0667          }
0668 
0669 
0670          //reduce vector type to double.
0671          double logl  = vecCore::ReduceAdd(logl_v);
0672          double sumW  = vecCore::ReduceAdd(sumW_v);
0673          double sumW2 = vecCore::ReduceAdd(sumW2_v);
0674 
0675          if (extended) {
0676             // add Poisson extended term
0677             double extendedTerm = 0; // extended term in likelihood
0678             double nuTot = 0;
0679             // nuTot is integral of function in the range
0680             // if function has been normalized integral has been already computed
0681             if (!normalizeFunc) {
0682                IntegralEvaluator<IModelFunctionTempl<T>> igEval(func, p, true);
0683                std::vector<double> xmin(data.NDim());
0684                std::vector<double> xmax(data.NDim());
0685 
0686                // compute integral in the ranges where is defined
0687                if (data.Range().Size() > 0) {
0688                   nuTot = 0;
0689                   for (unsigned int ir = 0; ir < data.Range().Size(); ++ir) {
0690                      data.Range().GetRange(&xmin[0], &xmax[0], ir);
0691                      nuTot += igEval.Integral(xmin.data(), xmax.data());
0692                   }
0693                } else {
0694                   // use (-inf +inf)
0695                   data.Range().GetRange(&xmin[0], &xmax[0]);
0696                   // check if function is zero at +- inf
0697                   T xmin_v, xmax_v;
0698                   vecCore::Load<T>(xmin_v, xmin.data());
0699                   vecCore::Load<T>(xmax_v, xmax.data());
0700                   if (vecCore::ReduceAdd(func(&xmin_v, p)) != 0 || vecCore::ReduceAdd(func(&xmax_v, p)) != 0) {
0701                      MATH_ERROR_MSG("FitUtil::EvaluateLogLikelihood", "A range has not been set and the function is not zero at +/- inf");
0702                      return 0;
0703                   }
0704                   nuTot = igEval.Integral(&xmin[0], &xmax[0]);
0705                }
0706 
0707                // force to be last parameter value
0708                //nutot = p[func.NDim()-1];
0709                if (iWeight != 2)
0710                   extendedTerm = - nuTot;  // no need to add in this case n log(nu) since is already computed before
0711                else {
0712                   // case use weight square in likelihood : compute total effective weight = sw2/sw
0713                   // ignore for the moment case when sumW is zero
0714                   extendedTerm = - (sumW2 / sumW) * nuTot;
0715                }
0716 
0717             } else {
0718                nuTot = norm;
0719                extendedTerm = - nuTot + double(n) *  ROOT::Math::Util::EvalLog(nuTot);
0720                // in case of weights need to use here sum of weights (to be done)
0721             }
0722 
0723             logl += extendedTerm;
0724          }
0725 
0726 #ifdef DEBUG_FITUTIL
0727          std::cout << "Evaluated log L for parameters (";
0728          for (unsigned int ip = 0; ip < func.NPar(); ++ip)
0729             std::cout << " " << p[ip];
0730          std::cout << ")  nll = " << -logl << std::endl;
0731 #endif
0732 
0733          return -logl;
0734 
0735       }
0736 
0737       static double EvalPoissonLogL(const IModelFunctionTempl<T> &func, const BinData &data, const double *p,
0738                                     int iWeight, bool extended, unsigned int,
0739                                     ::ROOT::EExecutionPolicy executionPolicy, unsigned nChunks = 0)
0740       {
0741          // evaluate the Poisson Log Likelihood
0742          // for binned likelihood fits
0743          // this is Sum ( f(x_i)  -  y_i * log( f (x_i) ) )
0744          // add as well constant term for saturated model to make it like a Chi2/2
0745          // by default is extended. If extended is false the fit is not extended and
0746          // the global poisson term is removed (i.e is a binomial fit)
0747          // (remember that in this case one needs to have a function with a fixed normalization
0748          // like in a non extended binned fit)
0749          //
0750          // if use Weight use a weighted dataset
0751          // iWeight = 1 ==> logL = Sum( w f(x_i) )
0752          // case of iWeight==1 is actually identical to weight==0
0753          // iWeight = 2 ==> logL = Sum( w*w * f(x_i) )
0754          //
0755 
0756 #ifdef USE_PARAMCACHE
0757          (const_cast<IModelFunctionTempl<T> &>(func)).SetParameters(p);
0758 #endif
0759          auto vecSize = vecCore::VectorSize<T>();
0760          // get fit option and check case of using integral of bins
0761          const DataOptions &fitOpt = data.Opt();
0762          if (fitOpt.fExpErrors || fitOpt.fIntegral)
0763             Error("FitUtil::EvaluateChi2",
0764                   "The vectorized implementation doesn't support Integrals or BinVolume\n. Aborting operation.");
0765          bool useW2 = (iWeight == 2);
0766 
0767          auto mapFunction = [&](unsigned int i) {
0768             T y;
0769             vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
0770             T fval{};
0771 
0772             if (data.NDim() > 1) {
0773                std::vector<T> x(data.NDim());
0774                for (unsigned int j = 0; j < data.NDim(); ++j)
0775                   vecCore::Load<T>(x[j], data.GetCoordComponent(i * vecSize, j));
0776 #ifdef USE_PARAMCACHE
0777                fval = func(x.data());
0778 #else
0779                fval = func(x.data(), p);
0780 #endif
0781                // one -dim case
0782             } else {
0783                T x;
0784                vecCore::Load<T>(x, data.GetCoordComponent(i * vecSize, 0));
0785 #ifdef USE_PARAMCACHE
0786                fval = func(&x);
0787 #else
0788                fval = func(&x, p);
0789 #endif
0790             }
0791 
0792             // EvalLog protects against 0 values of fval but don't want to add in the -log sum
0793             // negative values of fval
0794             vecCore::MaskedAssign<T>(fval, fval < 0.0, 0.0);
0795 
0796             T nloglike{}; // negative loglikelihood
0797 
0798             if (useW2) {
0799                // apply weight correction . Effective weight is error^2/ y
0800                // and expected events in bins is fval/weight
0801                // can apply correction only when y is not zero otherwise weight is undefined
0802                // (in case of weighted likelihood I don't care about the constant term due to
0803                // the saturated model)
0804                assert (data.GetErrorType() != ROOT::Fit::BinData::ErrorType::kNoError);
0805                T error = 0.0;
0806                vecCore::Load<T>(error, data.ErrorPtr(i * vecSize));
0807                // for empty bin use the average weight  computed from the total data weight
0808                auto m = vecCore::Mask_v<T>(y != 0.0);
0809                auto weight = vecCore::Blend(m,(error * error) / y, T(data.SumOfError2()/ data.SumOfContent()) );
0810                if (extended) {
0811                   nloglike =  weight * ( fval - y);
0812                }
0813                vecCore::MaskedAssign<T>(nloglike, y != 0, nloglike + weight * y *( ROOT::Math::Util::EvalLog(y) -  ROOT::Math::Util::EvalLog(fval)) );
0814 
0815             } else {
0816                // standard case no weights or iWeight=1
0817                // this is needed for Poisson likelihood (which are extended and not for multinomial)
0818                // the formula below  include constant term due to likelihood of saturated model (f(x) = y)
0819                // (same formula as in Baker-Cousins paper, page 439 except a factor of 2
0820                if (extended) nloglike = fval - y;
0821 
0822                vecCore::MaskedAssign<T>(
0823                   nloglike, y > 0, nloglike + y * (ROOT::Math::Util::EvalLog(y) - ROOT::Math::Util::EvalLog(fval)));
0824             }
0825 
0826             return nloglike;
0827          };
0828 
0829 #ifdef R__USE_IMT
0830          auto redFunction = [](const std::vector<T> &objs) { return std::accumulate(objs.begin(), objs.end(), T{}); };
0831 #else
0832          (void)nChunks;
0833 
0834          // If IMT is disabled, force the execution policy to the serial case
0835          if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
0836             Warning("FitUtil::Evaluate<T>::EvalPoissonLogL",
0837                     "Multithread execution policy requires IMT, which is disabled. Changing "
0838                     "to ::ROOT::EExecutionPolicy::kSequential.");
0839             executionPolicy = ::ROOT::EExecutionPolicy::kSequential;
0840          }
0841 #endif
0842 
0843          T res{};
0844          if (executionPolicy == ::ROOT::EExecutionPolicy::kSequential) {
0845             for (unsigned int i = 0; i < (data.Size() / vecSize); i++) {
0846                res += mapFunction(i);
0847             }
0848 #ifdef R__USE_IMT
0849          } else if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
0850             ROOT::TThreadExecutor pool;
0851             auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(data.Size() / vecSize);
0852             res = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, data.Size() / vecSize), redFunction, chunks);
0853 #endif
0854          } else {
0855             Error(
0856                "FitUtil::Evaluate<T>::EvalPoissonLogL",
0857                "Execution policy unknown. Available choices:\n ::ROOT::EExecutionPolicy::kSequential (default)\n ::ROOT::EExecutionPolicy::kMultiThread (requires IMT)\n");
0858          }
0859 
0860          // Last padded SIMD vector of elements
0861          if (data.Size() % vecSize != 0)
0862             vecCore::MaskedAssign(res, vecCore::Int2Mask<T>(data.Size() % vecSize),
0863                                   res + mapFunction(data.Size() / vecSize));
0864 
0865          return vecCore::ReduceAdd(res);
0866       }
0867 
0868       static double EvalChi2Effective(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int &)
0869       {
0870          Error("FitUtil::Evaluate<T>::EvalChi2Effective", "The vectorized evaluation of the Chi2 with coordinate errors is still not supported");
0871          return -1.;
0872       }
0873 
0874       // Compute a mask to filter out infinite numbers and NaN values.
0875       // The argument rval is updated so infinite numbers and NaN values are replaced by
0876       // maximum finite values (preserving the original sign).
0877       static vecCore::Mask<T> CheckInfNaNValues(T &rval)
0878       {
0879          auto mask = rval > -vecCore::NumericLimits<T>::Max() && rval < vecCore::NumericLimits<T>::Max();
0880 
0881          // Case +inf or nan
0882          vecCore::MaskedAssign(rval, !mask, +vecCore::NumericLimits<T>::Max());
0883 
0884          // Case -inf
0885          vecCore::MaskedAssign(rval, !mask && rval < 0, -vecCore::NumericLimits<T>::Max());
0886 
0887          return mask;
0888       }
0889 
0890       static void EvalChi2Gradient(const IModelFunctionTempl<T> &f, const BinData &data, const double *p, double *grad,
0891                                    unsigned int &nPoints,
0892                                    ::ROOT::EExecutionPolicy executionPolicy = ::ROOT::EExecutionPolicy::kSequential,
0893                                    unsigned nChunks = 0)
0894       {
0895          // evaluate the gradient of the chi2 function
0896          // this function is used when the model function knows how to calculate the derivative and we can
0897          // avoid that the minimizer re-computes them
0898          //
0899          // case of chi2 effective (errors on coordinate) is not supported
0900 
0901          if (data.HaveCoordErrors()) {
0902             MATH_ERROR_MSG("FitUtil::EvaluateChi2Gradient",
0903                            "Error on the coordinates are not used in calculating Chi2 gradient");
0904             return; // it will assert otherwise later in GetPoint
0905          }
0906 
0907          const IGradModelFunctionTempl<T> *fg = dynamic_cast<const IGradModelFunctionTempl<T> *>(&f);
0908          assert(fg != nullptr); // must be called by a gradient function
0909 
0910          const IGradModelFunctionTempl<T> &func = *fg;
0911 
0912          const DataOptions &fitOpt = data.Opt();
0913          if (fitOpt.fBinVolume || fitOpt.fIntegral || fitOpt.fExpErrors)
0914             Error("FitUtil::EvaluateChi2Gradient", "The vectorized implementation doesn't support Integrals,"
0915                                                    "BinVolume or ExpErrors\n. Aborting operation.");
0916 
0917          unsigned int npar = func.NPar();
0918          auto vecSize = vecCore::VectorSize<T>();
0919          unsigned initialNPoints = data.Size();
0920          unsigned numVectors = initialNPoints / vecSize;
0921 
0922          // numVectors + 1 because of the padded data (call to mapFunction with i = numVectors after the main loop)
0923          std::vector<vecCore::Mask<T>> validPointsMasks(numVectors + 1);
0924 
0925          auto mapFunction = [&](const unsigned int i) {
0926             // set all vector values to zero
0927             std::vector<T> gradFunc(npar);
0928             std::vector<T> pointContributionVec(npar);
0929 
0930             T x1, y, invError;
0931 
0932             vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
0933             vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
0934             const auto invErrorPtr = data.ErrorPtr(i * vecSize);
0935 
0936             if (invErrorPtr == nullptr)
0937                invError = 1;
0938             else
0939                vecCore::Load<T>(invError, invErrorPtr);
0940 
0941             // TODO: Check error options and invert if needed
0942 
0943             T fval = 0;
0944 
0945             const T *x = nullptr;
0946 
0947             unsigned int ndim = data.NDim();
0948             // need to declare vector outside if statement
0949             // otherwise pointer will be invalid
0950             std::vector<T> xc;
0951             if (ndim > 1) {
0952                xc.resize(ndim);
0953                xc[0] = x1;
0954                for (unsigned int j = 1; j < ndim; ++j)
0955                   vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
0956                x = xc.data();
0957             } else {
0958                x = &x1;
0959             }
0960 
0961             fval = func(x, p);
0962             func.ParameterGradient(x, p, &gradFunc[0]);
0963 
0964             validPointsMasks[i] = CheckInfNaNValues(fval);
0965             if (vecCore::MaskEmpty(validPointsMasks[i])) {
0966                // Return a zero contribution to all partial derivatives on behalf of the current points
0967                return pointContributionVec;
0968             }
0969 
0970             // loop on the parameters
0971             for (unsigned int ipar = 0; ipar < npar; ++ipar) {
0972                // avoid singularity in the function (infinity and nan ) in the chi2 sum
0973                // eventually add possibility of excluding some points (like singularity)
0974                validPointsMasks[i] = CheckInfNaNValues(gradFunc[ipar]);
0975 
0976                if (vecCore::MaskEmpty(validPointsMasks[i])) {
0977                   break; // exit loop on parameters
0978                }
0979 
0980                // calculate derivative point contribution (only for valid points)
0981                vecCore::MaskedAssign(pointContributionVec[ipar], validPointsMasks[i],
0982                                      -2.0 * (y - fval) * invError * invError * gradFunc[ipar]);
0983             }
0984 
0985             return pointContributionVec;
0986          };
0987 
0988          // Reduce the set of vectors by summing its equally-indexed components
0989          auto redFunction = [&](const std::vector<std::vector<T>> &partialResults) {
0990             std::vector<T> result(npar);
0991 
0992             for (auto const &pointContributionVec : partialResults) {
0993                for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
0994                   result[parameterIndex] += pointContributionVec[parameterIndex];
0995             }
0996 
0997             return result;
0998          };
0999 
1000          std::vector<T> gVec(npar);
1001          std::vector<double> g(npar);
1002 
1003 #ifndef R__USE_IMT
1004          // to fix compiler warning
1005          (void)nChunks;
1006 
1007          // If IMT is disabled, force the execution policy to the serial case
1008          if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
1009             Warning("FitUtil::EvaluateChi2Gradient",
1010                     "Multithread execution policy requires IMT, which is disabled. Changing "
1011                     "to ::ROOT::EExecutionPolicy::kSequential.");
1012             executionPolicy = ::ROOT::EExecutionPolicy::kSequential;
1013          }
1014 #endif
1015 
1016          if (executionPolicy == ::ROOT::EExecutionPolicy::kSequential) {
1017             ROOT::TSequentialExecutor pool;
1018             gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction);
1019          }
1020 #ifdef R__USE_IMT
1021          else if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
1022             ROOT::TThreadExecutor pool;
1023             auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(numVectors);
1024             gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction, chunks);
1025          }
1026 #endif
1027          else {
1028             Error(
1029                "FitUtil::EvaluateChi2Gradient",
1030                "Execution policy unknown. Available choices:\n 0: Serial (default)\n 1: MultiThread (requires IMT)\n");
1031          }
1032 
1033          // Compute the contribution from the remaining points
1034          unsigned int remainingPoints = initialNPoints % vecSize;
1035          if (remainingPoints > 0) {
1036             auto remainingPointsContribution = mapFunction(numVectors);
1037             // Add the contribution from the valid remaining points and store the result in the output variable
1038             auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
1039             for (unsigned int param = 0; param < npar; param++) {
1040                vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
1041             }
1042          }
1043          // reduce final gradient result from T to double
1044          for (unsigned int param = 0; param < npar; param++) {
1045             grad[param] = vecCore::ReduceAdd(gVec[param]);
1046          }
1047 
1048          // correct the number of points
1049          nPoints = initialNPoints;
1050 
1051          if (std::any_of(validPointsMasks.begin(), validPointsMasks.end(),
1052                          [](vecCore::Mask<T> validPoints) { return !vecCore::MaskFull(validPoints); })) {
1053             unsigned nRejected = 0;
1054 
1055             for (const auto &mask : validPointsMasks) {
1056                for (unsigned int i = 0; i < vecSize; i++) {
1057                   nRejected += !vecCore::Get(mask, i);
1058                }
1059             }
1060 
1061             assert(nRejected <= initialNPoints);
1062             nPoints = initialNPoints - nRejected;
1063 
1064             if (nPoints < npar) {
1065                MATH_ERROR_MSG("FitUtil::EvaluateChi2Gradient",
1066                               "Too many points rejected for overflow in gradient calculation");
1067            }
1068          }
1069       }
1070 
1071       static double EvalChi2Residual(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int, double *, double *, bool, bool)
1072       {
1073          Error("FitUtil::Evaluate<T>::EvalChi2Residual", "The vectorized evaluation of the Chi2 with the ith residual is still not supported");
1074          return -1.;
1075       }
1076 
1077       /// evaluate the pdf (Poisson) contribution to the logl (return actually log of pdf)
1078       /// and its gradient
1079       static double EvalPoissonBinPdf(const IModelFunctionTempl<T> &, const BinData &, const double *, unsigned int , double * , double * , bool, bool) {
1080          Error("FitUtil::Evaluate<T>::EvaluatePoissonBinPdf", "The vectorized evaluation of the BinnedLikelihood fit evaluated point by point is still not supported");
1081          return -1.;
1082       }
1083 
1084       static double EvalPdf(const IModelFunctionTempl<T> &, const UnBinData &, const double *, unsigned int , double * , double * , bool, bool) {
1085          Error("FitUtil::Evaluate<T>::EvalPdf", "The vectorized evaluation of the LogLikelihood fit evaluated point by point is still not supported");
1086          return -1.;
1087       }
1088 
1089       //template <class NotCompileIfScalarBackend = std::enable_if<!(std::is_same<double, ROOT::Double_v>::value)>>
1090       // static double EvalPdf(const IModelFunctionTempl<ROOT::Double_v> &func, const UnBinData &data, const double *p, unsigned int i, double *, double *, bool, bool) {
1091       // // evaluate the pdf contribution to the generic logl function in case of bin data
1092       // // return actually the log of the pdf and its derivatives
1093       // // func.SetParameters(p);
1094       // const auto x = vecCore::FromPtr<ROOT::Double_v>(data.GetCoordComponent(i, 0));
1095       // auto fval = func(&x, p);
1096       // auto logPdf = ROOT::Math::Util::EvalLog(fval);
1097       // return vecCore::Get<ROOT::Double_v>(logPdf, 0);
1098 
1099       static void
1100       EvalPoissonLogLGradient(const IModelFunctionTempl<T> &f, const BinData &data, const double *p, double *grad,
1101                               unsigned int &,
1102                               ::ROOT::EExecutionPolicy executionPolicy = ::ROOT::EExecutionPolicy::kSequential,
1103                               unsigned nChunks = 0)
1104       {
1105          // evaluate the gradient of the Poisson log likelihood function
1106 
1107          const IGradModelFunctionTempl<T> *fg = dynamic_cast<const IGradModelFunctionTempl<T> *>(&f);
1108          assert(fg != nullptr); // must be called by a grad function
1109 
1110          const IGradModelFunctionTempl<T> &func = *fg;
1111 
1112          (const_cast<IGradModelFunctionTempl<T> &>(func)).SetParameters(p);
1113 
1114 
1115          const DataOptions &fitOpt = data.Opt();
1116          if (fitOpt.fBinVolume || fitOpt.fIntegral || fitOpt.fExpErrors)
1117             Error("FitUtil::EvaluatePoissonLogLGradient", "The vectorized implementation doesn't support Integrals,"
1118                                                           "BinVolume or ExpErrors\n. Aborting operation.");
1119 
1120          unsigned int npar = func.NPar();
1121          auto vecSize = vecCore::VectorSize<T>();
1122          unsigned initialNPoints = data.Size();
1123          unsigned numVectors = initialNPoints / vecSize;
1124 
1125          auto mapFunction = [&](const unsigned int i) {
1126             // set all vector values to zero
1127             std::vector<T> gradFunc(npar);
1128             std::vector<T> pointContributionVec(npar);
1129 
1130             T x1, y;
1131 
1132             vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
1133             vecCore::Load<T>(y, data.ValuePtr(i * vecSize));
1134 
1135             T fval = 0;
1136 
1137             const T *x = nullptr;
1138 
1139             unsigned ndim = data.NDim();
1140             std::vector<T> xc;
1141             if (ndim > 1) {
1142                xc.resize(ndim);
1143                xc[0] = x1;
1144                for (unsigned int j = 1; j < ndim; ++j)
1145                   vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
1146                x = xc.data();
1147             } else {
1148                x = &x1;
1149             }
1150 
1151             fval = func(x, p);
1152             func.ParameterGradient(x, p, &gradFunc[0]);
1153 
1154             // correct the gradient
1155             for (unsigned int ipar = 0; ipar < npar; ++ipar) {
1156                vecCore::Mask<T> positiveValuesMask = fval > 0;
1157 
1158                // df/dp * (1.  - y/f )
1159                vecCore::MaskedAssign(pointContributionVec[ipar], positiveValuesMask, gradFunc[ipar] * (1. - y / fval));
1160 
1161                vecCore::Mask<T> validNegativeValuesMask = !positiveValuesMask && gradFunc[ipar] != 0;
1162 
1163                if (!vecCore::MaskEmpty(validNegativeValuesMask)) {
1164                   const T kdmax1 = vecCore::math::Sqrt(vecCore::NumericLimits<T>::Max());
1165                   const T kdmax2 = vecCore::NumericLimits<T>::Max() / (4 * initialNPoints);
1166                   T gg = kdmax1 * gradFunc[ipar];
1167                   pointContributionVec[ipar] = -vecCore::Blend(gg > 0, vecCore::math::Min(gg, kdmax2), vecCore::math::Max(gg, -kdmax2));
1168                }
1169             }
1170 
1171 #ifdef DEBUG_FITUTIL
1172       {
1173          R__LOCKGUARD(gROOTMutex);
1174          if (i < 5 || (i > data.Size()-5) ) {
1175             if (data.NDim() > 1) std::cout << i << "  x " << x[0] << " y " << x[1];
1176             else std::cout << i << "  x " << x[0];
1177             std::cout << " func " << fval  << " gradient ";
1178             for (unsigned int ii = 0; ii < npar; ++ii) std::cout << "  " << pointContributionVec[ii];
1179             std::cout << "\n";
1180          }
1181       }
1182 #endif
1183 
1184             return pointContributionVec;
1185          };
1186 
1187          // Vertically reduce the set of vectors by summing its equally-indexed components
1188          auto redFunction = [&](const std::vector<std::vector<T>> &partialResults) {
1189             std::vector<T> result(npar);
1190 
1191             for (auto const &pointContributionVec : partialResults) {
1192                for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
1193                   result[parameterIndex] += pointContributionVec[parameterIndex];
1194             }
1195 
1196             return result;
1197          };
1198 
1199          std::vector<T> gVec(npar);
1200 
1201 #ifndef R__USE_IMT
1202          // to fix compiler warning
1203          (void)nChunks;
1204 
1205          // If IMT is disabled, force the execution policy to the serial case
1206          if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
1207             Warning("FitUtil::EvaluatePoissonLogLGradient",
1208                     "Multithread execution policy requires IMT, which is disabled. Changing "
1209                     "to ::ROOT::EExecutionPolicy::kSequential.");
1210             executionPolicy = ::ROOT::EExecutionPolicy::kSequential;
1211          }
1212 #endif
1213 
1214          if (executionPolicy == ::ROOT::EExecutionPolicy::kSequential) {
1215             ROOT::TSequentialExecutor pool;
1216             gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction);
1217          }
1218 #ifdef R__USE_IMT
1219          else if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
1220             ROOT::TThreadExecutor pool;
1221             auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(numVectors);
1222             gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction, chunks);
1223          }
1224 #endif
1225          else {
1226             Error("FitUtil::EvaluatePoissonLogLGradient", "Execution policy unknown. Available choices:\n "
1227                                                           "::ROOT::EExecutionPolicy::kSequential (default)\n "
1228                                                           "::ROOT::EExecutionPolicy::kMultiThread (requires IMT)\n");
1229          }
1230 
1231 
1232          // Compute the contribution from the remaining points
1233          unsigned int remainingPoints = initialNPoints % vecSize;
1234          if (remainingPoints > 0) {
1235             auto remainingPointsContribution = mapFunction(numVectors);
1236             // Add the contribution from the valid remaining points and store the result in the output variable
1237             auto remainingMask = vecCore::Int2Mask<T>(remainingPoints);
1238             for (unsigned int param = 0; param < npar; param++) {
1239                vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
1240             }
1241          }
1242          // reduce final gradient result from T to double
1243          for (unsigned int param = 0; param < npar; param++) {
1244             grad[param] = vecCore::ReduceAdd(gVec[param]);
1245          }
1246 
1247 #ifdef DEBUG_FITUTIL
1248          std::cout << "***** Final gradient : ";
1249          for (unsigned int ii = 0; ii< npar; ++ii) std::cout << grad[ii] << "   ";
1250          std::cout << "\n";
1251 #endif
1252 
1253       }
1254 
1255       static void EvalLogLGradient(const IModelFunctionTempl<T> &f, const UnBinData &data, const double *p,
1256                                    double *grad, unsigned int &,
1257                                    ::ROOT::EExecutionPolicy executionPolicy = ::ROOT::EExecutionPolicy::kSequential,
1258                                    unsigned nChunks = 0)
1259       {
1260          // evaluate the gradient of the log likelihood function
1261 
1262          const IGradModelFunctionTempl<T> *fg = dynamic_cast<const IGradModelFunctionTempl<T> *>(&f);
1263          assert(fg != nullptr); // must be called by a grad function
1264 
1265          const IGradModelFunctionTempl<T> &func = *fg;
1266 
1267 
1268          unsigned int npar = func.NPar();
1269          auto vecSize = vecCore::VectorSize<T>();
1270          unsigned initialNPoints = data.Size();
1271          unsigned numVectors = initialNPoints / vecSize;
1272 
1273 #ifdef DEBUG_FITUTIL
1274          std::cout << "\n===> Evaluate Gradient for parameters ";
1275          for (unsigned int ip = 0; ip < npar; ++ip)
1276             std::cout << "  " << p[ip];
1277          std::cout << "\n";
1278 #endif
1279 
1280          (const_cast<IGradModelFunctionTempl<T> &>(func)).SetParameters(p);
1281 
1282          const T kdmax1 = vecCore::math::Sqrt(vecCore::NumericLimits<T>::Max());
1283          const T kdmax2 = vecCore::NumericLimits<T>::Max() / (4 * initialNPoints);
1284 
1285          auto mapFunction = [&](const unsigned int i) {
1286             std::vector<T> gradFunc(npar);
1287             std::vector<T> pointContributionVec(npar);
1288 
1289             T x1;
1290             vecCore::Load<T>(x1, data.GetCoordComponent(i * vecSize, 0));
1291 
1292             const T *x = nullptr;
1293 
1294             unsigned int ndim = data.NDim();
1295             std::vector<T> xc(ndim);
1296             if (ndim > 1) {
1297                xc.resize(ndim);
1298                xc[0] = x1;
1299                for (unsigned int j = 1; j < ndim; ++j)
1300                   vecCore::Load<T>(xc[j], data.GetCoordComponent(i * vecSize, j));
1301                x = xc.data();
1302             } else {
1303                x = &x1;
1304             }
1305 
1306 
1307             T fval = func(x, p);
1308             func.ParameterGradient(x, p, &gradFunc[0]);
1309 
1310 #ifdef DEBUG_FITUTIL
1311             if (i < 5 || (i > numVectors-5) ) {
1312                if (ndim > 1) std::cout << i << "  x " << x[0] << " y " << x[1] << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  " << gradFunc[3] << std::endl;
1313                else std::cout << i << "  x " << x[0] << " gradient " << gradFunc[0] << "  " << gradFunc[1] << "  " << gradFunc[3] << std::endl;
1314             }
1315 #endif
1316 
1317             vecCore::Mask<T> positiveValues = fval > 0;
1318 
1319             for (unsigned int kpar = 0; kpar < npar; ++kpar) {
1320                if (!vecCore::MaskEmpty(positiveValues))
1321                   vecCore::MaskedAssign<T>(pointContributionVec[kpar], positiveValues, -1. / fval * gradFunc[kpar]);
1322 
1323                vecCore::Mask<T> nonZeroGradientValues = !positiveValues && gradFunc[kpar] != 0;
1324                if (!vecCore::MaskEmpty(nonZeroGradientValues)) {
1325                   T gg = kdmax1 * gradFunc[kpar];
1326                   pointContributionVec[kpar] =
1327                      vecCore::Blend(nonZeroGradientValues && gg > 0, -vecCore::math::Min(gg, kdmax2),
1328                                     -vecCore::math::Max(gg, -kdmax2));
1329                }
1330                // if func derivative is zero term is also zero so do not add in g[kpar]
1331             }
1332 
1333             return pointContributionVec;
1334          };
1335 
1336          // Vertically reduce the set of vectors by summing its equally-indexed components
1337          auto redFunction = [&](const std::vector<std::vector<T>> &pointContributions) {
1338             std::vector<T> result(npar);
1339 
1340             for (auto const &pointContributionVec : pointContributions) {
1341                for (unsigned int parameterIndex = 0; parameterIndex < npar; parameterIndex++)
1342                   result[parameterIndex] += pointContributionVec[parameterIndex];
1343             }
1344 
1345             return result;
1346          };
1347 
1348          std::vector<T> gVec(npar);
1349          std::vector<double> g(npar);
1350 
1351 #ifndef R__USE_IMT
1352          // to fix compiler warning
1353          (void)nChunks;
1354 
1355          // If IMT is disabled, force the execution policy to the serial case
1356          if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
1357             Warning("FitUtil::EvaluateLogLGradient",
1358                     "Multithread execution policy requires IMT, which is disabled. Changing "
1359                     "to ::ROOT::EExecutionPolicy::kSequential.");
1360             executionPolicy = ::ROOT::EExecutionPolicy::kSequential;
1361          }
1362 #endif
1363 
1364          if (executionPolicy == ::ROOT::EExecutionPolicy::kSequential) {
1365             ROOT::TSequentialExecutor pool;
1366             gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction);
1367          }
1368 #ifdef R__USE_IMT
1369          else if (executionPolicy == ::ROOT::EExecutionPolicy::kMultiThread) {
1370             ROOT::TThreadExecutor pool;
1371             auto chunks = nChunks != 0 ? nChunks : setAutomaticChunking(numVectors);
1372             gVec = pool.MapReduce(mapFunction, ROOT::TSeq<unsigned>(0, numVectors), redFunction, chunks);
1373          }
1374 #endif
1375          else {
1376             Error("FitUtil::EvaluateLogLGradient", "Execution policy unknown. Available choices:\n "
1377                                                    "::ROOT::EExecutionPolicy::kSequential (default)\n "
1378                                                    "::ROOT::EExecutionPolicy::kMultiThread (requires IMT)\n");
1379          }
1380 
1381          // Compute the contribution from the remaining points
1382          unsigned int remainingPoints = initialNPoints % vecSize;
1383          if (remainingPoints > 0) {
1384             auto remainingPointsContribution = mapFunction(numVectors);
1385             // Add the contribution from the valid remaining points and store the result in the output variable
1386             auto remainingMask = vecCore::Int2Mask<T>(initialNPoints % vecSize);
1387             for (unsigned int param = 0; param < npar; param++) {
1388                vecCore::MaskedAssign(gVec[param], remainingMask, gVec[param] + remainingPointsContribution[param]);
1389             }
1390          }
1391          // reduce final gradient result from T to double
1392          for (unsigned int param = 0; param < npar; param++) {
1393             grad[param] = vecCore::ReduceAdd(gVec[param]);
1394          }
1395 
1396 #ifdef DEBUG_FITUTIL
1397          std::cout << "Final gradient ";
1398          for (unsigned int param = 0; param < npar; param++) {
1399             std::cout << "  " << grad[param];
1400          }
1401          std::cout << "\n";
1402 #endif
1403       }
1404    };
1405 
1406    template<>
1407    struct Evaluate<double>{
1408 #endif
1409 
1410       static double EvalChi2(const IModelFunction &func, const BinData &data, const double *p, unsigned int &nPoints,
1411                              ::ROOT::EExecutionPolicy executionPolicy, unsigned nChunks = 0)
1412       {
1413          // evaluate the chi2 given a  function reference, the data and returns the value and also in nPoints
1414          // the actual number of used points
1415          // normal chi2 using only error on values (from fitting histogram)
1416          // optionally the integral of function in the bin is used
1417 
1418 
1419          //Info("EvalChi2","Using non-vectorized implementation %d",(int) data.Opt().fIntegral);
1420 
1421          return FitUtil::EvaluateChi2(func, data, p, nPoints, executionPolicy, nChunks);
1422       }
1423 
1424       static double EvalLogL(const IModelFunctionTempl<double> &func, const UnBinData &data, const double *p,
1425                              int iWeight, bool extended, unsigned int &nPoints,
1426                              ::ROOT::EExecutionPolicy executionPolicy, unsigned nChunks = 0)
1427       {
1428          return FitUtil::EvaluateLogL(func, data, p, iWeight, extended, nPoints, executionPolicy, nChunks);
1429       }
1430 
1431       static double EvalPoissonLogL(const IModelFunctionTempl<double> &func, const BinData &data, const double *p,
1432                                     int iWeight, bool extended, unsigned int &nPoints,
1433                                     ::ROOT::EExecutionPolicy executionPolicy, unsigned nChunks = 0)
1434       {
1435          return FitUtil::EvaluatePoissonLogL(func, data, p, iWeight, extended, nPoints, executionPolicy, nChunks);
1436       }
1437 
1438       static double EvalChi2Effective(const IModelFunctionTempl<double> &func, const BinData & data, const double * p, unsigned int &nPoints)
1439       {
1440          return FitUtil::EvaluateChi2Effective(func, data, p, nPoints);
1441       }
1442       static void EvalChi2Gradient(const IModelFunctionTempl<double> &func, const BinData &data, const double *p,
1443                                    double *g, unsigned int &nPoints,
1444                                    ::ROOT::EExecutionPolicy executionPolicy = ::ROOT::EExecutionPolicy::kSequential,
1445                                    unsigned nChunks = 0)
1446       {
1447          FitUtil::EvaluateChi2Gradient(func, data, p, g, nPoints, executionPolicy, nChunks);
1448       }
1449 
1450       static double EvalChi2Residual(const IModelFunctionTempl<double> &func, const BinData & data, const double * p, unsigned int i, double *g, double * h,
1451                                     bool hasGrad, bool fullHessian)
1452       {
1453          return FitUtil::EvaluateChi2Residual(func, data, p, i, g, h, hasGrad, fullHessian);
1454       }
1455 
1456       /// evaluate the pdf (Poisson) contribution to the logl (return actually log of pdf)
1457       /// and its gradient
1458       static double EvalPoissonBinPdf(const IModelFunctionTempl<double> &func, const BinData & data, const double *p, unsigned int i, double *g, double * h, bool hasGrad, bool fullHessian) {
1459          return FitUtil::EvaluatePoissonBinPdf(func, data, p, i, g, h, hasGrad, fullHessian);
1460       }
1461 
1462       static double EvalPdf(const IModelFunctionTempl<double> &func, const UnBinData & data, const double *p, unsigned int i, double *g, double * h, bool hasGrad, bool fullHessian) {
1463          return FitUtil::EvaluatePdf(func, data, p, i, g, h, hasGrad, fullHessian);
1464       }
1465 
1466       static void
1467       EvalPoissonLogLGradient(const IModelFunctionTempl<double> &func, const BinData &data, const double *p, double *g,
1468                               unsigned int &nPoints,
1469                               ::ROOT::EExecutionPolicy executionPolicy = ::ROOT::EExecutionPolicy::kSequential,
1470                               unsigned nChunks = 0)
1471       {
1472          FitUtil::EvaluatePoissonLogLGradient(func, data, p, g, nPoints, executionPolicy, nChunks);
1473       }
1474 
1475       static void EvalLogLGradient(const IModelFunctionTempl<double> &func, const UnBinData &data, const double *p,
1476                                    double *g, unsigned int &nPoints,
1477                                    ::ROOT::EExecutionPolicy executionPolicy = ::ROOT::EExecutionPolicy::kSequential,
1478                                    unsigned nChunks = 0)
1479       {
1480          FitUtil::EvaluateLogLGradient(func, data, p, g, nPoints, executionPolicy, nChunks);
1481       }
1482    };
1483 
1484 } // end namespace FitUtil
1485 
1486    } // end namespace Fit
1487 
1488 } // end namespace ROOT
1489 
1490 #if defined (R__HAS_VECCORE) && defined(R__HAS_VC)
1491 //Fixes alignment for structures of SIMD structures
1492 Vc_DECLARE_ALLOCATOR(ROOT::Fit::FitUtil::LikelihoodAux<ROOT::Double_v>);
1493 #endif
1494 
1495 #endif /* ROOT_Fit_FitUtil */