YODA/Utils/MathUtils.h

0001 // -*- C++ -*-
0002 //
0003 // This file is part of YODA -- Yet more Objects for Data Analysis
0004 // Copyright (C) 2008-2025 The YODA collaboration (see AUTHORS for details)
0005 //
0006 #ifndef YODA_MathUtils_H
0007 #define YODA_MathUtils_H
0008
0009 /// @todo Add SFINAE math type stuff (see Rivet) and add inrange() and inrange_closed_closed() etc. aliases cf. MCUtils
0010
0011 #include "YODA/Exceptions.h"
0012 #include "YODA/Config/BuildConfig.h"
0013
0014 #include <algorithm>
0015 #include <cassert>
0016 #include <cfloat>
0017 #include <climits>
0018 #include <cmath>
0019 #include <functional>
0020 #include <iostream>
0021 #include <limits>
0022 #include <map>
0023 #include <numeric>
0024 #include <numeric>
0025 #include <ostream>
0026 #include <sstream>
0027 #include <stdexcept>
0028 #include <string>
0029 #include <utility>
0030 #include <vector>
0031
0032 namespace YODA {
0033
0034
0035   /// Pre-defined numeric type limits
0036   /// @deprecated Prefer the standard DBL/INT_MAX
0037   const static double MAXDOUBLE = DBL_MAX; // was std::numeric_limits<double>::max(); -- warns in GCC5
0038   const static double MAXINT = INT_MAX; // was std::numeric_limits<int>::max(); -- warns in GCC5
0039
0040   /// A pre-defined value of \f$ \pi \f$.
0041   static const double PI = M_PI;
0042
0043   /// A pre-defined value of \f$ 2\pi \f$.
0044   static const double TWOPI = 2*M_PI;
0045
0046   /// A pre-defined value of \f$ \pi/2 \f$.
0047   static const double HALFPI = M_PI_2;
0048
0049   /// Enum for signs of numbers.
0050   enum Sign { MINUS = -1, ZERO = 0, PLUS = 1 };
0051
0052
0053   /// @name Comparison functions for safe (floating point) equality tests
0054   /// @{
0055
0056   /// @brief Compare a number to zero
0057   ///
0058   /// This version for floating point types has a degree of fuzziness expressed
0059   /// by the absolute @a tolerance parameter, for floating point safety.
0060   template <typename NUM>
0061   inline typename std::enable_if_t<std::is_floating_point_v<NUM>, bool>
0062   isZero(NUM val, double tolerance=1e-5) {
0063     return fabs(val) < tolerance;
0064   }
0065
0066   /// @brief Compare a number to zero
0067   ///
0068   /// SFINAE template specialisation for integers, since there is no FP
0069   /// precision issue.
0070   template <typename NUM>
0071   inline typename std::enable_if_t<std::is_integral_v<NUM>, bool>
0072   isZero(NUM val, double = 1e-5) {
0073     return val==0;
0074   }
0075
0076   /// @brief Check if a number is NaN
0077   template <typename NUM>
0078   inline typename std::enable_if_t<std::is_floating_point_v<NUM>, bool>
0079   isNaN(NUM val) { return std::isnan(val); }
0080
0081   /// @brief Check if a number is non-NaN
0082   template <typename NUM>
0083   inline typename std::enable_if_t<std::is_floating_point_v<NUM>, bool>
0084   notNaN(NUM val) { return !std::isnan(val); }
0085
0086   /// @brief Compare two numbers for equality with a degree of fuzziness
0087   ///
0088   /// This version for floating point types (if any argument is FP) has a degree
0089   /// of fuzziness expressed by the fractional @a tolerance parameter, for
0090   /// floating point safety.
0091   template <typename N1, typename N2>
0092   inline typename std::enable_if_t<
0093     std::is_arithmetic_v<N1> && std::is_arithmetic_v<N2> &&
0094    (std::is_floating_point_v<N1> || std::is_floating_point_v<N2>), bool>
0095   fuzzyEquals(N1 a, N2 b, double tolerance=1e-5) {
0096     const double absavg = (std::abs(a) + std::abs(b))/2.0;
0097     const double absdiff = std::abs(a - b);
0098     const bool rtn = (isZero(a) && isZero(b)) || absdiff < tolerance*absavg;
0099     return rtn;
0100   }
0101
0102   /// @brief Compare two numbers for equality with a degree of fuzziness
0103   ///
0104   /// Simpler SFINAE template specialisation for integers, since there is no FP
0105   /// precision issue.
0106   template <typename N1, typename N2>
0107   inline typename std::enable_if_t<
0108     std::is_integral_v<N1> && std::is_integral_v<N2>, bool>
0109     fuzzyEquals(N1 a, N2 b, double) { //< NB. unused tolerance parameter for ints, still needs a default value!
0110     return a == b;
0111   }
0112
0113   /// @brief Comparator wrapper to use with STL algorithms, e.g. std::equal etc.
0114   static std::function<bool(const double, const double)> fuzzyEqComp =
0115     [](const double& lhs, const double& rhs) { return fuzzyEquals(lhs, rhs); };
0116
0117
0118   /// @brief Compare two numbers for >= with a degree of fuzziness
0119   ///
0120   /// The @a tolerance parameter on the equality test is as for @c fuzzyEquals.
0121   template <typename N1, typename N2>
0122   inline typename std::enable_if_t<
0123     std::is_arithmetic_v<N1> && std::is_arithmetic_v<N2>, bool>
0124   fuzzyGtrEquals(N1 a, N2 b, double tolerance=1e-5) {
0125     return a > b || fuzzyEquals(a, b, tolerance);
0126   }
0127
0128   /// @brief Compare two floating point numbers for <= with a degree of fuzziness
0129   ///
0130   /// The @a tolerance parameter on the equality test is as for @c fuzzyEquals.
0131   template <typename N1, typename N2>
0132   inline typename std::enable_if_t<
0133     std::is_arithmetic_v<N1> && std::is_arithmetic_v<N2>, bool>
0134   fuzzyLessEquals(N1 a, N2 b, double tolerance=1e-5) {
0135     return a < b || fuzzyEquals(a, b, tolerance);
0136   }
0137
0138   /// Returns a number floored at the nth decimal place.
0139   inline double approx(double a, int n = 5) {
0140     double roundTo = pow(10.0,n);
0141     a *= roundTo;
0142     a = floor(a);
0143     return a/roundTo;
0144   }
0145   /// @}
0146
0147
0148   /// @name Ranges and intervals
0149   /// @{
0150
0151   /// Represents whether an interval is open (non-inclusive) or closed (inclusive).
0152   ///
0153   /// For example, the interval \f$ [0, \pi) \f$ is closed (an inclusive
0154   /// boundary) at 0, and open (a non-inclusive boundary) at \f$ \pi \f$.
0155   enum RangeBoundary { OPEN=0, SOFT=0, CLOSED=1, HARD=1 };
0156
0157
0158   /// @brief Determine if @a value is in the range @a low to @a high, for floating point numbers
0159   ///
0160   /// Interval boundary types are defined by @a lowbound and @a highbound.
0161   /// @todo Optimise to one-line at compile time?
0162   template<typename NUM>
0163   inline bool inRange(NUM value, NUM low, NUM high,
0164                       RangeBoundary lowbound=CLOSED, RangeBoundary highbound=OPEN) {
0165     if (lowbound == OPEN && highbound == OPEN) {
0166       return (value > low && value < high);
0167     } else if (lowbound == OPEN && highbound == CLOSED) {
0168       return (value > low && value <= high);
0169     } else if (lowbound == CLOSED && highbound == OPEN) {
0170       return (value >= low && value < high);
0171     } else { // if (lowbound == CLOSED && highbound == CLOSED)
0172       return (value >= low && value <= high);
0173     }
0174   }
0175
0176   /// Alternative version of inRange for doubles, which accepts a pair for the range arguments.
0177   template<typename NUM>
0178   inline bool inRange(NUM value, std::pair<NUM, NUM> lowhigh,
0179                       RangeBoundary lowbound=CLOSED, RangeBoundary highbound=OPEN) {
0180     return inRange(value, lowhigh.first, lowhigh.second, lowbound, highbound);
0181   }
0182
0183
0184   /// @brief Determine if @a value is in the range @a low to @a high, for integer types
0185   ///
0186   /// Interval boundary types are defined by @a lowbound and @a highbound.
0187   /// @todo Optimise to one-line at compile time?
0188   inline bool inRange(int value, int low, int high,
0189                       RangeBoundary lowbound=CLOSED, RangeBoundary highbound=CLOSED) {
0190     if (lowbound == OPEN && highbound == OPEN) {
0191       return (value > low && value < high);
0192     } else if (lowbound == OPEN && highbound == CLOSED) {
0193       return (value > low && value <= high);
0194     } else if (lowbound == CLOSED && highbound == OPEN) {
0195       return (value >= low && value < high);
0196     } else { // if (lowbound == CLOSED && highbound == CLOSED)
0197       return (value >= low && value <= high);
0198     }
0199   }
0200
0201   /// Alternative version of @c inRange for ints, which accepts a pair for the range arguments.
0202   inline bool inRange(int value, std::pair<int, int> lowhigh,
0203                       RangeBoundary lowbound=CLOSED, RangeBoundary highbound=OPEN) {
0204     return inRange(value, lowhigh.first, lowhigh.second, lowbound, highbound);
0205   }
0206
0207   /// @}
0208
0209
0210   /// @name Miscellaneous numerical helpers
0211   /// @{
0212
0213   /// Named number-type squaring operation.
0214   template <typename NUM>
0215   inline NUM sqr(NUM a) {
0216     return a*a;
0217   }
0218
0219   /// Named number-type addition in quadrature operation.
0220   template <typename... Num>
0221   inline std::enable_if_t<std::conjunction_v<std::is_arithmetic<Num>...>, double>
0222   add_quad(Num ... vals) {
0223     return sqrt((0.0 + ... + sqr(vals)));
0224   }
0225
0226   /// Find the sign of a number.
0227   inline int sign(double val) {
0228     if (isZero(val)) return ZERO;
0229     const int valsign = (val > 0) ? PLUS : MINUS;
0230     return valsign;
0231   }
0232
0233   /// Find the sign of a number.
0234   inline int sign(int val) {
0235     if (val == 0) return ZERO;
0236     return (val > 0) ? PLUS : MINUS;
0237   }
0238
0239   /// Find the sign of a number.
0240   inline int sign(long val) {
0241     if (val == 0) return ZERO;
0242     return (val > 0) ? PLUS : MINUS;
0243   }
0244
0245   /// Subtract two numbers with FP fuzziness.
0246   inline double subtract(double a, double b, double tolerance = 1e-5) {
0247     if (fuzzyEquals(a,b,tolerance))  return 0.;
0248     return a - b;
0249   }
0250
0251   /// Add two numbers with FP fuzziness.
0252   inline double add(double a, double b, double tolerance = 1e-5) {
0253     return subtract(a,-b,tolerance);
0254   }
0255
0256   /// @}
0257
0258
0259   /// @name Binning helper functions
0260   /// @{
0261
0262   /// @brief Make a list of @a nbins + 1 values uniformly spaced between @a xmin and @a xmax inclusive.
0263   ///
0264   /// @note The arg ordering and the meaning of the nbins variable is "histogram-like",
0265   /// as opposed to the Numpy/Matlab version.
0266   inline std::vector<double> linspace(size_t nbins, double xmin, double xmax, bool include_end=true) {
0267     if (xmax < xmin)  throw RangeError("xmax should not be smaller than xmin!");
0268     if (nbins == 0)   throw RangeError("Requested number of bins is 0!");
0269     std::vector<double> rtn;
0270     const double interval = (xmax-xmin)/static_cast<double>(nbins);
0271     for (size_t i = 0; i < nbins; ++i) {
0272       rtn.push_back(xmin + i*interval);
0273     }
0274     assert(rtn.size() == nbins);
0275     if (include_end) rtn.push_back(xmax); // exact xmax, not result of n * interval
0276     return rtn;
0277   }
0278
0279
0280   /// @brief Make a list of @a nbins + 1 values uniformly spaced in log(x) between @a xmin and @a xmax inclusive.
0281   ///
0282   /// @note The arg ordering and the meaning of the nbins variable is "histogram-like",
0283   /// as opposed to the Numpy/Matlab version, and the xmin and xmax arguments are expressed
0284   /// in "normal" space, rather than as the logarithms of the xmin/xmax values as in Numpy/Matlab.
0285   inline std::vector<double> logspace(size_t nbins, double xmin, double xmax, bool include_end=true) {
0286     if (xmax < xmin)  throw RangeError("xmax should not be smaller than xmin!");
0287     if (xmin < 0)     throw RangeError("xmin should not be negative!");
0288     if (nbins == 0)   throw RangeError("Requested number of bins is 0!");
0289     const double logxmin = std::log(xmin);
0290     const double logxmax = std::log(xmax);
0291     const std::vector<double> logvals = linspace(nbins, logxmin, logxmax);
0292     assert(logvals.size() == nbins+1);
0293     std::vector<double> rtn; rtn.reserve(logvals.size());
0294     rtn.push_back(xmin);
0295     for (size_t i = 1; i < logvals.size()-1; ++i) {
0296       rtn.push_back(std::exp(logvals[i]));
0297     }
0298     assert(rtn.size() == nbins);
0299     if (include_end) rtn.push_back(xmax);
0300     return rtn;
0301   }
0302
0303
0304   /// @todo fspace() for uniform sampling from f(x); requires ability to invert fn... how, in general?
0305   //inline std::vector<double> fspace(size_t nbins, double xmin, double xmax, std::function<double(double)>& fn)
0306
0307
0308   /// @brief Make a list of @a nbins + 1 values spaced with *density* ~ f(x) between @a xmin and @a end inclusive.
0309   ///
0310   /// The density function @a fn will be evaluated at @a nsample uniformly
0311   /// distributed points between @a xmin and @a xmax, its integral approximated
0312   /// via the Trapezium Rule and used to normalize the distribution, and @a
0313   /// nbins + 1 edges then selected to (approximately) divide into bins each
0314   /// containing fraction 1/@a nbins of the integral.
0315   ///
0316   /// @note The function @a fn does not need to be a normalized pdf, but it should be non-negative.
0317   /// Any negative values returned by @a fn(x) will be truncated to zero and contribute nothing to
0318   /// the estimated integral and binning density.
0319   ///
0320   /// @note The arg ordering and the meaning of the nbins variable is "histogram-like",
0321   /// as opposed to the Numpy/Matlab version, and the xmin and xmax arguments are expressed
0322   /// in "normal" space, rather than in the function-mapped space as with Numpy/Matlab.
0323   ///
0324   /// @note The naming of this function differs from the other, Matlab-inspired ones: the bin spacing is
0325   /// uniform in the CDF of the density function given, rather than in the function itself. For
0326   /// most use-cases this is more intuitive.
0327   inline std::vector<double> pdfspace(size_t nbins, double xmin, double xmax, std::function<double(double)>& fn, size_t nsample=10000) {
0328     const double dx = (xmax-xmin)/(double)nsample;
0329     const std::vector<double> xs = linspace(nsample, xmin, xmax);
0330     std::vector<double> ys(0, nsample);
0331     auto posfn = [&](double x){return std::max(fn(x), 0.0);};
0332     std::transform(xs.begin(), xs.end(), ys.begin(), posfn);
0333     std::vector<double> areas; areas.reserve(nsample);
0334     double areasum = 0;
0335     for (size_t i = 0; i < ys.size()-1; ++i) {
0336       const double area = (ys[i] + ys[i+1])*dx/2.0;
0337       areas[i] = area;
0338       areasum += area;
0339     }
0340     const double df = areasum/(double)nbins;
0341     std::vector<double> xedges{xmin}; xedges.reserve(nbins+1);
0342     double fsum = 0;
0343     for (size_t i = 0; i < nsample-1; ++i) {
0344       fsum += areas[i];
0345       if (fsum > df) {
0346         fsum = 0;
0347         xedges.push_back(xs[i+1]);
0348       }
0349     }
0350     xedges.push_back(xmax);
0351     assert(xedges.size() == nbins+1);
0352     return xedges;
0353   }
0354
0355
0356   /// @brief Return the bin index of the given value, @a val, given a vector of bin edges.
0357   ///
0358   /// NB. The @a binedges vector must be sorted
0359   template <typename NUM>
0360   inline int index_between(const NUM& val, const std::vector<NUM>& binedges) {
0361     if (!inRange(val, binedges.front(), binedges.back())) return -1; //< Out of histo range
0362     int index = -1;
0363     for (size_t i = 1; i < binedges.size(); ++i) {
0364       if (val < binedges[i]) {
0365         index = i-1;
0366         break;
0367       }
0368     }
0369     assert(inRange(index, -1, binedges.size()-1));
0370     return index;
0371   }
0372
0373   /// @}
0374
0375 }
0376
0377 #endif