root/Math/Util.h

0001 // @(#)root/mathcore:$Id$
0002 // Author: L. Moneta Tue Nov 14 15:44:38 2006
0003
0004 /**********************************************************************
0005  *                                                                    *
0006  * Copyright (c) 2006  LCG ROOT Math Team, CERN/PH-SFT                *
0007  *                                                                    *
0008  *                                                                    *
0009  **********************************************************************/
0010
0011 // Utility functions for all ROOT Math classes
0012
0013 #ifndef ROOT_Math_Util
0014 #define ROOT_Math_Util
0015
0016 #include <algorithm>
0017 #include <chrono>
0018 #include <cmath>
0019 #include <functional>
0020 #include <limits>
0021 #include <numeric>
0022 #include <sstream>
0023 #include <string>
0024
0025
0026 // This can be protected against by defining ROOT_Math_VecTypes
0027 // This is only used for the R__HAS_VECCORE define
0028 // and a single VecCore function in EvalLog
0029 #ifndef ROOT_Math_VecTypes
0030 #include "Types.h"
0031 #endif
0032
0033
0034 // for defining unused variables in the interfaces
0035 //  and have still them in the documentation
0036 #define MATH_UNUSED(var)   (void)var
0037
0038
0039 namespace ROOT {
0040
0041 namespace Math {
0042
0043 /**
0044    namespace defining Utility functions needed by mathcore
0045 */
0046 namespace Util {
0047
0048 class TimingScope {
0049
0050 public:
0051    TimingScope(std::function<void(std::string const&)> printer, std::string const &message);
0052
0053    ~TimingScope();
0054
0055 private:
0056    std::chrono::steady_clock::time_point fBegin;
0057    std::function<void(std::string const&)> fPrinter;
0058    const std::string fMessage;
0059 };
0060
0061    /**
0062       Utility function for conversion to strings
0063    */
0064    template <class T>
0065    std::string ToString(const T &val)
0066    {
0067       std::ostringstream buf;
0068       buf << val;
0069
0070       std::string ret = buf.str();
0071       return ret;
0072    }
0073
0074    /// safe evaluation of log(x) with a protections against negative or zero argument to the log
0075    /// smooth linear extrapolation below function values smaller than  epsilon
0076    /// (better than a simple cut-off)
0077
0078    template<class T>
0079    inline T EvalLog(T x) {
0080       static const T epsilon = T(2.0 * std::numeric_limits<double>::min());
0081 #ifdef R__HAS_VECCORE
0082       T logval = vecCore::Blend<T>(x <= epsilon, x / epsilon + std::log(epsilon) - T(1.0), std::log(x));
0083 #else
0084       T logval = x <= epsilon ? x / epsilon + std::log(epsilon) - T(1.0) : std::log(x);
0085 #endif
0086       return logval;
0087    }
0088
0089    } // end namespace Util
0090
0091    /// \class KahanSum
0092    /// The Kahan summation is a compensated summation algorithm, which significantly reduces numerical errors
0093    /// when adding a sequence of finite-precision floating point numbers.
0094    /// This is done by keeping a separate running compensation (a variable to accumulate small errors).
0095    ///
0096    /// ### Auto-vectorisable accumulation
0097    /// This class can internally use multiple accumulators (template parameter `N`).
0098    /// When filled from a collection that supports index access from a *contiguous* block of memory,
0099    /// compilers such as gcc, clang and icc can auto-vectorise the accumulation. This happens by cycling
0100    /// through the internal accumulators based on the value of "`index % N`", so `N` accumulators can be filled from a block
0101    /// of `N` numbers in a single instruction.
0102    ///
0103    /// The usage of multiple accumulators might slightly increase the precision in comparison to the single-accumulator version
0104    /// with `N = 1`.
0105    /// This depends on the order and magnitude of the numbers being accumulated. Therefore, in rare cases, the accumulation
0106    /// result can change *in dependence of N*, even when the data are identical.
0107    /// The magnitude of such differences is well below the precision of the floating point type, and will therefore mostly show
0108    /// in the compensation sum(see Carry()). Increasing the number of accumulators therefore only makes sense to
0109    /// speed up the accumulation, but not to increase precision.
0110    ///
0111    /// \param T The type of the values to be accumulated.
0112    /// \param N Number of accumulators. Defaults to 1. Ideal values are the widths of a vector register on the relevant architecture.
0113    /// Depending on the instruction set, good values are:
0114    /// - AVX2-float: 8
0115    /// - AVX2-double: 4
0116    /// - AVX512-float: 16
0117    /// - AVX512-double: 8
0118    ///
0119    /// ### Examples
0120    ///
0121    /// ~~~{.cpp}
0122    /// std::vector<double> numbers(1000);
0123    /// for (std::size_t i=0; i<1000; ++i) {
0124    ///    numbers[i] = rand();
0125    /// }
0126    ///
0127    /// ROOT::Math::KahanSum<double, 4> k;
0128    /// k.Add(numbers.begin(), numbers.end());
0129    /// // or
0130    /// k.Add(numbers);
0131    /// ~~~
0132    /// ~~~{.cpp}
0133    /// double offset = 10.;
0134    /// auto result = ROOT::Math::KahanSum<double, 4>::Accumulate(numbers.begin(), numbers.end(), offset);
0135    /// ~~~
0136    template<typename T = double, unsigned int N = 1>
0137    class KahanSum {
0138      public:
0139        /// Initialise the sum.
0140        /// \param[in] initialValue Initialise with this value. Defaults to 0.
0141        explicit KahanSum(T initialValue = T{}) {
0142          fSum[0] = initialValue;
0143          std::fill(std::begin(fSum)+1, std::end(fSum), 0.);
0144          std::fill(std::begin(fCarry), std::end(fCarry), 0.);
0145        }
0146
0147        /// Initialise with a sum value and a carry value.
0148        /// \param[in] initialSumValue Initialise the sum with this value.
0149        /// \param[in] initialCarryValue Initialise the carry with this value.
0150        KahanSum(T initialSumValue, T initialCarryValue) {
0151           fSum[0] = initialSumValue;
0152           fCarry[0] = initialCarryValue;
0153           std::fill(std::begin(fSum)+1, std::end(fSum), 0.);
0154           std::fill(std::begin(fCarry)+1, std::end(fCarry), 0.);
0155        }
0156
0157        /// Initialise the sum with a pre-existing state.
0158        /// \param[in] sumBegin Begin of iterator range with values to initialise the sum with.
0159        /// \param[in] sumEnd End of iterator range with values to initialise the sum with.
0160        /// \param[in] carryBegin Begin of iterator range with values to initialise the carry with.
0161        /// \param[in] carryEnd End of iterator range with values to initialise the carry with.
0162        template<class Iterator>
0163        KahanSum(Iterator sumBegin, Iterator sumEnd, Iterator carryBegin, Iterator carryEnd) {
0164           assert(std::distance(sumBegin, sumEnd) == N);
0165           assert(std::distance(carryBegin, carryEnd) == N);
0166           std::copy(sumBegin, sumEnd, std::begin(fSum));
0167           std::copy(carryBegin, carryEnd, std::begin(fCarry));
0168        }
0169
0170        /// Constructor to create a KahanSum from another KahanSum with a different number of accumulators
0171        template <unsigned int M>
0172        KahanSum(KahanSum<T,M> const& other) {
0173          fSum[0] = other.Sum();
0174          fCarry[0] = other.Carry();
0175          std::fill(std::begin(fSum)+1, std::end(fSum), 0.);
0176          std::fill(std::begin(fCarry)+1, std::end(fCarry), 0.);
0177        }
0178
0179        /// Single-element accumulation. Will not vectorise.
0180        void Add(T x) {
0181           auto y = x - fCarry[0];
0182           auto t = fSum[0] + y;
0183           fCarry[0] = (t - fSum[0]) - y;
0184           fSum[0] = t;
0185        }
0186
0187
0188        /// Accumulate from a range denoted by iterators.
0189        ///
0190        /// This function will auto-vectorise with random-access iterators.
0191        /// \param[in] begin Beginning of a range. Needs to be a random access iterator for automatic
0192        /// vectorisation, because a contiguous block of memory needs to be read.
0193        /// \param[in] end   End of the range.
0194        template <class Iterator>
0195        void Add(Iterator begin, Iterator end) {
0196            static_assert(std::is_floating_point<
0197                typename std::remove_reference<decltype(*begin)>::type>::value,
0198                "Iterator needs to point to floating-point values.");
0199            const std::size_t n = std::distance(begin, end);
0200
0201            for (std::size_t i=0; i<n; ++i) {
0202              AddIndexed(*(begin++), i);
0203            }
0204        }
0205
0206
0207        /// Fill from a container that supports index access.
0208        /// \param[in] inputs Container with index access such as std::vector or array.
0209        template<class Container_t>
0210        void Add(const Container_t& inputs) {
0211          static_assert(std::is_floating_point<typename Container_t::value_type>::value,
0212              "Container does not hold floating-point values.");
0213          for (std::size_t i=0; i < inputs.size(); ++i) {
0214            AddIndexed(inputs[i], i);
0215          }
0216        }
0217
0218
0219        /// Iterate over a range and return an instance of a KahanSum.
0220        ///
0221        /// See Add(Iterator,Iterator) for details.
0222        /// \param[in] begin Beginning of a range.
0223        /// \param[in] end   End of the range.
0224        /// \param[in] initialValue Optional initial value.
0225        template <class Iterator>
0226        static KahanSum<T, N> Accumulate(Iterator begin, Iterator end,
0227            T initialValue = T{}) {
0228            KahanSum<T, N> theSum(initialValue);
0229            theSum.Add(begin, end);
0230
0231            return theSum;
0232        }
0233
0234
0235        /// Add `input` to the sum.
0236        ///
0237        /// Particularly helpful when filling from a for loop.
0238        /// This function can be inlined and auto-vectorised if
0239        /// the index parameter is used to enumerate *consecutive* fills.
0240        /// Use Add() or Accumulate() when no index is available.
0241        /// \param[in] input Value to accumulate.
0242        /// \param[in] index Index of the value. Determines internal accumulator that this
0243        /// value is added to. Make sure that consecutive fills have consecutive indices
0244        /// to make a loop auto-vectorisable. The actual value of the index does not matter,
0245        /// as long as it is consecutive.
0246        void AddIndexed(T input, std::size_t index) {
0247          const unsigned int i = index % N;
0248          const T y = input - fCarry[i];
0249          const T t = fSum[i] + y;
0250          fCarry[i] = (t - fSum[i]) - y;
0251          fSum[i] = t;
0252        }
0253
0254        /// \return Compensated sum.
0255        T Sum() const {
0256          return std::accumulate(std::begin(fSum), std::end(fSum), 0.);
0257        }
0258
0259        /// \return Compensated sum.
0260        T Result() const {
0261          return Sum();
0262        }
0263
0264        /// \return The sum used for compensation.
0265        T Carry() const {
0266          return std::accumulate(std::begin(fCarry), std::end(fCarry), 0.);
0267        }
0268
0269        /// Add `arg` into accumulator. Does not vectorise.
0270        KahanSum<T, N>& operator+=(T arg) {
0271          Add(arg);
0272          return *this;
0273        }
0274
0275        /// Add other KahanSum into accumulator. Does not vectorise.
0276        ///
0277        /// Based on KahanIncrement from:
0278        /// Y. Tian, S. Tatikonda and B. Reinwald, "Scalable and Numerically Stable Descriptive Statistics in SystemML," 2012 IEEE 28th International Conference on Data Engineering, 2012, pp. 1351-1359, doi: 10.1109/ICDE.2012.12.
0279        /// Note that while Tian et al. add the carry in the first step, we subtract
0280        /// the carry, in accordance with the Add(Indexed) implementation(s) above.
0281        /// This is purely an implementation choice that has no impact on performance.
0282        ///
0283        /// \note Take care when using += (and -=) to add other KahanSums into a zero-initialized
0284        ///       KahanSum. The operator behaves correctly in this case, but the result may be slightly
0285        ///       off if you expect 0 + x to yield exactly x (where 0 is the zero-initialized KahanSum
0286        ///       and x another KahanSum). In particular, x's carry term may get lost. This doesn't
0287        ///       just happen with zero-initialized KahanSums; see the SubtractWithABitTooSmallCarry
0288        ///       test case in the testKahan unittest for other examples. This behavior is internally
0289        ///       consistent: the carry also gets lost if you switch the operands and it also happens with
0290        ///       other KahanSum operators.
0291        template<typename U, unsigned int M>
0292        KahanSum<T, N>& operator+=(const KahanSum<U, M>& other) {
0293          U corrected_arg_sum = other.Sum() - (fCarry[0] + other.Carry());
0294          U sum = fSum[0] + corrected_arg_sum;
0295          U correction = (sum - fSum[0]) - corrected_arg_sum;
0296          fSum[0] = sum;
0297          fCarry[0] = correction;
0298          return *this;
0299        }
0300
0301        /// Subtract other KahanSum. Does not vectorise.
0302        ///
0303        /// Based on KahanIncrement from: Tian et al., 2012 (see operator+= documentation).
0304        template<typename U, unsigned int M>
0305        KahanSum<T, N>& operator-=(KahanSum<U, M> const& other) {
0306          U corrected_arg_sum = -other.Sum() - (fCarry[0] - other.Carry());
0307          U sum = fSum[0] + corrected_arg_sum;
0308          U correction = (sum - fSum[0]) - corrected_arg_sum;
0309          fSum[0] = sum;
0310          fCarry[0] = correction;
0311          return *this;
0312        }
0313
0314        KahanSum<T, N> operator-()
0315        {
0316           return {-this->fSum[0], -this->fCarry[0]};
0317        }
0318
0319        template<typename U, unsigned int M>
0320        bool operator ==(KahanSum<U, M> const& other) const {
0321           return (this->Sum() == other.Sum()) && (this->Carry() == other.Carry());
0322        }
0323
0324        template<typename U, unsigned int M>
0325        bool operator !=(KahanSum<U, M> const& other) const {
0326           return !(*this == other);
0327        }
0328
0329      private:
0330        T fSum[N];
0331        T fCarry[N];
0332    };
0333
0334    /// Add two non-vectorized KahanSums.
0335    template<typename T, unsigned int N, typename U, unsigned int M>
0336    KahanSum<T, N> operator+(const KahanSum<T, N>& left, const KahanSum<U, M>& right) {
0337       KahanSum<T, N> sum(left);
0338       sum += right;
0339       return sum;
0340    }
0341
0342    /// Subtract two non-vectorized KahanSums.
0343    template<typename T, unsigned int N, typename U, unsigned int M>
0344    KahanSum<T, N> operator-(const KahanSum<T, N>& left, const KahanSum<U, M>& right) {
0345       KahanSum<T, N> sum(left);
0346       sum -= right;
0347       return sum;
0348    }
0349
0350    } // end namespace Math
0351
0352 } // end namespace ROOT
0353
0354
0355 #endif /* ROOT_Math_Util */