![]() |
|
|||
File indexing completed on 2025-09-16 09:08:14
0001 // @(#)root/mathcore:$Id$ 0002 // Author: L. Moneta Tue Nov 14 15:44:38 2006 0003 0004 /********************************************************************** 0005 * * 0006 * Copyright (c) 2006 LCG ROOT Math Team, CERN/PH-SFT * 0007 * * 0008 * * 0009 **********************************************************************/ 0010 0011 // Utility functions for all ROOT Math classes 0012 0013 #ifndef ROOT_Math_Util 0014 #define ROOT_Math_Util 0015 0016 #include <chrono> 0017 #include <cmath> 0018 #include <functional> 0019 #include <limits> 0020 #include <numeric> 0021 #include <sstream> 0022 #include <string> 0023 0024 0025 // This can be protected against by defining ROOT_Math_VecTypes 0026 // This is only used for the R__HAS_VECCORE define 0027 // and a single VecCore function in EvalLog 0028 #ifndef ROOT_Math_VecTypes 0029 #include "Types.h" 0030 #endif 0031 0032 0033 // for defining unused variables in the interfaces 0034 // and have still them in the documentation 0035 #define MATH_UNUSED(var) (void)var 0036 0037 0038 namespace ROOT { 0039 0040 namespace Math { 0041 0042 /** 0043 namespace defining Utility functions needed by mathcore 0044 */ 0045 namespace Util { 0046 0047 class TimingScope { 0048 0049 public: 0050 TimingScope(std::function<void(std::string const&)> printer, std::string const &message); 0051 0052 ~TimingScope(); 0053 0054 private: 0055 std::chrono::steady_clock::time_point fBegin; 0056 std::function<void(std::string const&)> fPrinter; 0057 const std::string fMessage; 0058 }; 0059 0060 /** 0061 Utility function for conversion to strings 0062 */ 0063 template <class T> 0064 std::string ToString(const T &val) 0065 { 0066 std::ostringstream buf; 0067 buf << val; 0068 0069 std::string ret = buf.str(); 0070 return ret; 0071 } 0072 0073 /// safe evaluation of log(x) with a protections against negative or zero argument to the log 0074 /// smooth linear extrapolation below function values smaller than epsilon 0075 /// (better than a simple cut-off) 0076 0077 template<class T> 0078 inline T EvalLog(T x) { 0079 static const T epsilon = T(2.0 * std::numeric_limits<double>::min()); 0080 #ifdef R__HAS_VECCORE 0081 T logval = vecCore::Blend<T>(x <= epsilon, x / epsilon + std::log(epsilon) - T(1.0), std::log(x)); 0082 #else 0083 T logval = x <= epsilon ? x / epsilon + std::log(epsilon) - T(1.0) : std::log(x); 0084 #endif 0085 return logval; 0086 } 0087 0088 } // end namespace Util 0089 0090 /// \class KahanSum 0091 /// The Kahan summation is a compensated summation algorithm, which significantly reduces numerical errors 0092 /// when adding a sequence of finite-precision floating point numbers. 0093 /// This is done by keeping a separate running compensation (a variable to accumulate small errors). 0094 /// 0095 /// ### Auto-vectorisable accumulation 0096 /// This class can internally use multiple accumulators (template parameter `N`). 0097 /// When filled from a collection that supports index access from a *contiguous* block of memory, 0098 /// compilers such as gcc, clang and icc can auto-vectorise the accumulation. This happens by cycling 0099 /// through the internal accumulators based on the value of "`index % N`", so `N` accumulators can be filled from a block 0100 /// of `N` numbers in a single instruction. 0101 /// 0102 /// The usage of multiple accumulators might slightly increase the precision in comparison to the single-accumulator version 0103 /// with `N = 1`. 0104 /// This depends on the order and magnitude of the numbers being accumulated. Therefore, in rare cases, the accumulation 0105 /// result can change *in dependence of N*, even when the data are identical. 0106 /// The magnitude of such differences is well below the precision of the floating point type, and will therefore mostly show 0107 /// in the compensation sum(see Carry()). Increasing the number of accumulators therefore only makes sense to 0108 /// speed up the accumulation, but not to increase precision. 0109 /// 0110 /// \param T The type of the values to be accumulated. 0111 /// \param N Number of accumulators. Defaults to 1. Ideal values are the widths of a vector register on the relevant architecture. 0112 /// Depending on the instruction set, good values are: 0113 /// - AVX2-float: 8 0114 /// - AVX2-double: 4 0115 /// - AVX512-float: 16 0116 /// - AVX512-double: 8 0117 /// 0118 /// ### Examples 0119 /// 0120 /// ~~~{.cpp} 0121 /// std::vector<double> numbers(1000); 0122 /// for (std::size_t i=0; i<1000; ++i) { 0123 /// numbers[i] = rand(); 0124 /// } 0125 /// 0126 /// ROOT::Math::KahanSum<double, 4> k; 0127 /// k.Add(numbers.begin(), numbers.end()); 0128 /// // or 0129 /// k.Add(numbers); 0130 /// ~~~ 0131 /// ~~~{.cpp} 0132 /// double offset = 10.; 0133 /// auto result = ROOT::Math::KahanSum<double, 4>::Accumulate(numbers.begin(), numbers.end(), offset); 0134 /// ~~~ 0135 template<typename T = double, unsigned int N = 1> 0136 class KahanSum { 0137 public: 0138 /// Initialise the sum. 0139 /// \param[in] initialValue Initialise with this value. Defaults to 0. 0140 explicit KahanSum(T initialValue = T{}) { 0141 fSum[0] = initialValue; 0142 std::fill(std::begin(fSum)+1, std::end(fSum), 0.); 0143 std::fill(std::begin(fCarry), std::end(fCarry), 0.); 0144 } 0145 0146 /// Initialise with a sum value and a carry value. 0147 /// \param[in] initialSumValue Initialise the sum with this value. 0148 /// \param[in] initialCarryValue Initialise the carry with this value. 0149 KahanSum(T initialSumValue, T initialCarryValue) { 0150 fSum[0] = initialSumValue; 0151 fCarry[0] = initialCarryValue; 0152 std::fill(std::begin(fSum)+1, std::end(fSum), 0.); 0153 std::fill(std::begin(fCarry)+1, std::end(fCarry), 0.); 0154 } 0155 0156 /// Initialise the sum with a pre-existing state. 0157 /// \param[in] sumBegin Begin of iterator range with values to initialise the sum with. 0158 /// \param[in] sumEnd End of iterator range with values to initialise the sum with. 0159 /// \param[in] carryBegin Begin of iterator range with values to initialise the carry with. 0160 /// \param[in] carryEnd End of iterator range with values to initialise the carry with. 0161 template<class Iterator> 0162 KahanSum(Iterator sumBegin, Iterator sumEnd, Iterator carryBegin, Iterator carryEnd) { 0163 assert(std::distance(sumBegin, sumEnd) == N); 0164 assert(std::distance(carryBegin, carryEnd) == N); 0165 std::copy(sumBegin, sumEnd, std::begin(fSum)); 0166 std::copy(carryBegin, carryEnd, std::begin(fCarry)); 0167 } 0168 0169 /// Constructor to create a KahanSum from another KahanSum with a different number of accumulators 0170 template <unsigned int M> 0171 KahanSum(KahanSum<T,M> const& other) { 0172 fSum[0] = other.Sum(); 0173 fCarry[0] = other.Carry(); 0174 std::fill(std::begin(fSum)+1, std::end(fSum), 0.); 0175 std::fill(std::begin(fCarry)+1, std::end(fCarry), 0.); 0176 } 0177 0178 /// Single-element accumulation. Will not vectorise. 0179 void Add(T x) { 0180 auto y = x - fCarry[0]; 0181 auto t = fSum[0] + y; 0182 fCarry[0] = (t - fSum[0]) - y; 0183 fSum[0] = t; 0184 } 0185 0186 0187 /// Accumulate from a range denoted by iterators. 0188 /// 0189 /// This function will auto-vectorise with random-access iterators. 0190 /// \param[in] begin Beginning of a range. Needs to be a random access iterator for automatic 0191 /// vectorisation, because a contiguous block of memory needs to be read. 0192 /// \param[in] end End of the range. 0193 template <class Iterator> 0194 void Add(Iterator begin, Iterator end) { 0195 static_assert(std::is_floating_point< 0196 typename std::remove_reference<decltype(*begin)>::type>::value, 0197 "Iterator needs to point to floating-point values."); 0198 const std::size_t n = std::distance(begin, end); 0199 0200 for (std::size_t i=0; i<n; ++i) { 0201 AddIndexed(*(begin++), i); 0202 } 0203 } 0204 0205 0206 /// Fill from a container that supports index access. 0207 /// \param[in] inputs Container with index access such as std::vector or array. 0208 template<class Container_t> 0209 void Add(const Container_t& inputs) { 0210 static_assert(std::is_floating_point<typename Container_t::value_type>::value, 0211 "Container does not hold floating-point values."); 0212 for (std::size_t i=0; i < inputs.size(); ++i) { 0213 AddIndexed(inputs[i], i); 0214 } 0215 } 0216 0217 0218 /// Iterate over a range and return an instance of a KahanSum. 0219 /// 0220 /// See Add(Iterator,Iterator) for details. 0221 /// \param[in] begin Beginning of a range. 0222 /// \param[in] end End of the range. 0223 /// \param[in] initialValue Optional initial value. 0224 template <class Iterator> 0225 static KahanSum<T, N> Accumulate(Iterator begin, Iterator end, 0226 T initialValue = T{}) { 0227 KahanSum<T, N> theSum(initialValue); 0228 theSum.Add(begin, end); 0229 0230 return theSum; 0231 } 0232 0233 0234 /// Add `input` to the sum. 0235 /// 0236 /// Particularly helpful when filling from a for loop. 0237 /// This function can be inlined and auto-vectorised if 0238 /// the index parameter is used to enumerate *consecutive* fills. 0239 /// Use Add() or Accumulate() when no index is available. 0240 /// \param[in] input Value to accumulate. 0241 /// \param[in] index Index of the value. Determines internal accumulator that this 0242 /// value is added to. Make sure that consecutive fills have consecutive indices 0243 /// to make a loop auto-vectorisable. The actual value of the index does not matter, 0244 /// as long as it is consecutive. 0245 void AddIndexed(T input, std::size_t index) { 0246 const unsigned int i = index % N; 0247 const T y = input - fCarry[i]; 0248 const T t = fSum[i] + y; 0249 fCarry[i] = (t - fSum[i]) - y; 0250 fSum[i] = t; 0251 } 0252 0253 /// \return Compensated sum. 0254 T Sum() const { 0255 return std::accumulate(std::begin(fSum), std::end(fSum), 0.); 0256 } 0257 0258 /// \return Compensated sum. 0259 T Result() const { 0260 return Sum(); 0261 } 0262 0263 /// \return The sum used for compensation. 0264 T Carry() const { 0265 return std::accumulate(std::begin(fCarry), std::end(fCarry), 0.); 0266 } 0267 0268 /// Add `arg` into accumulator. Does not vectorise. 0269 KahanSum<T, N>& operator+=(T arg) { 0270 Add(arg); 0271 return *this; 0272 } 0273 0274 /// Add other KahanSum into accumulator. Does not vectorise. 0275 /// 0276 /// Based on KahanIncrement from: 0277 /// Y. Tian, S. Tatikonda and B. Reinwald, "Scalable and Numerically Stable Descriptive Statistics in SystemML," 2012 IEEE 28th International Conference on Data Engineering, 2012, pp. 1351-1359, doi: 10.1109/ICDE.2012.12. 0278 /// Note that while Tian et al. add the carry in the first step, we subtract 0279 /// the carry, in accordance with the Add(Indexed) implementation(s) above. 0280 /// This is purely an implementation choice that has no impact on performance. 0281 /// 0282 /// \note Take care when using += (and -=) to add other KahanSums into a zero-initialized 0283 /// KahanSum. The operator behaves correctly in this case, but the result may be slightly 0284 /// off if you expect 0 + x to yield exactly x (where 0 is the zero-initialized KahanSum 0285 /// and x another KahanSum). In particular, x's carry term may get lost. This doesn't 0286 /// just happen with zero-initialized KahanSums; see the SubtractWithABitTooSmallCarry 0287 /// test case in the testKahan unittest for other examples. This behavior is internally 0288 /// consistent: the carry also gets lost if you switch the operands and it also happens with 0289 /// other KahanSum operators. 0290 template<typename U, unsigned int M> 0291 KahanSum<T, N>& operator+=(const KahanSum<U, M>& other) { 0292 U corrected_arg_sum = other.Sum() - (fCarry[0] + other.Carry()); 0293 U sum = fSum[0] + corrected_arg_sum; 0294 U correction = (sum - fSum[0]) - corrected_arg_sum; 0295 fSum[0] = sum; 0296 fCarry[0] = correction; 0297 return *this; 0298 } 0299 0300 /// Subtract other KahanSum. Does not vectorise. 0301 /// 0302 /// Based on KahanIncrement from: Tian et al., 2012 (see operator+= documentation). 0303 template<typename U, unsigned int M> 0304 KahanSum<T, N>& operator-=(KahanSum<U, M> const& other) { 0305 U corrected_arg_sum = -other.Sum() - (fCarry[0] - other.Carry()); 0306 U sum = fSum[0] + corrected_arg_sum; 0307 U correction = (sum - fSum[0]) - corrected_arg_sum; 0308 fSum[0] = sum; 0309 fCarry[0] = correction; 0310 return *this; 0311 } 0312 0313 KahanSum<T, N> operator-() 0314 { 0315 return {-this->fSum[0], -this->fCarry[0]}; 0316 } 0317 0318 template<typename U, unsigned int M> 0319 bool operator ==(KahanSum<U, M> const& other) const { 0320 return (this->Sum() == other.Sum()) && (this->Carry() == other.Carry()); 0321 } 0322 0323 template<typename U, unsigned int M> 0324 bool operator !=(KahanSum<U, M> const& other) const { 0325 return !(*this == other); 0326 } 0327 0328 private: 0329 T fSum[N]; 0330 T fCarry[N]; 0331 }; 0332 0333 /// Add two non-vectorized KahanSums. 0334 template<typename T, unsigned int N, typename U, unsigned int M> 0335 KahanSum<T, N> operator+(const KahanSum<T, N>& left, const KahanSum<U, M>& right) { 0336 KahanSum<T, N> sum(left); 0337 sum += right; 0338 return sum; 0339 } 0340 0341 /// Subtract two non-vectorized KahanSums. 0342 template<typename T, unsigned int N, typename U, unsigned int M> 0343 KahanSum<T, N> operator-(const KahanSum<T, N>& left, const KahanSum<U, M>& right) { 0344 KahanSum<T, N> sum(left); 0345 sum -= right; 0346 return sum; 0347 } 0348 0349 } // end namespace Math 0350 0351 } // end namespace ROOT 0352 0353 0354 #endif /* ROOT_Math_Util */
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |