|
|
|||
File indexing completed on 2025-12-07 10:35:01
0001 // @(#)root/mathcore:$Id$ 0002 // Author: L. Moneta Tue Nov 14 15:44:38 2006 0003 0004 /********************************************************************** 0005 * * 0006 * Copyright (c) 2006 LCG ROOT Math Team, CERN/PH-SFT * 0007 * * 0008 * * 0009 **********************************************************************/ 0010 0011 // Utility functions for all ROOT Math classes 0012 0013 #ifndef ROOT_Math_Util 0014 #define ROOT_Math_Util 0015 0016 #include <algorithm> 0017 #include <chrono> 0018 #include <cmath> 0019 #include <functional> 0020 #include <limits> 0021 #include <numeric> 0022 #include <sstream> 0023 #include <string> 0024 0025 0026 // This can be protected against by defining ROOT_Math_VecTypes 0027 // This is only used for the R__HAS_VECCORE define 0028 // and a single VecCore function in EvalLog 0029 #ifndef ROOT_Math_VecTypes 0030 #include "Types.h" 0031 #endif 0032 0033 0034 // for defining unused variables in the interfaces 0035 // and have still them in the documentation 0036 #define MATH_UNUSED(var) (void)var 0037 0038 0039 namespace ROOT { 0040 0041 namespace Math { 0042 0043 /** 0044 namespace defining Utility functions needed by mathcore 0045 */ 0046 namespace Util { 0047 0048 class TimingScope { 0049 0050 public: 0051 TimingScope(std::function<void(std::string const&)> printer, std::string const &message); 0052 0053 ~TimingScope(); 0054 0055 private: 0056 std::chrono::steady_clock::time_point fBegin; 0057 std::function<void(std::string const&)> fPrinter; 0058 const std::string fMessage; 0059 }; 0060 0061 /** 0062 Utility function for conversion to strings 0063 */ 0064 template <class T> 0065 std::string ToString(const T &val) 0066 { 0067 std::ostringstream buf; 0068 buf << val; 0069 0070 std::string ret = buf.str(); 0071 return ret; 0072 } 0073 0074 /// safe evaluation of log(x) with a protections against negative or zero argument to the log 0075 /// smooth linear extrapolation below function values smaller than epsilon 0076 /// (better than a simple cut-off) 0077 0078 template<class T> 0079 inline T EvalLog(T x) { 0080 static const T epsilon = T(2.0 * std::numeric_limits<double>::min()); 0081 #ifdef R__HAS_VECCORE 0082 T logval = vecCore::Blend<T>(x <= epsilon, x / epsilon + std::log(epsilon) - T(1.0), std::log(x)); 0083 #else 0084 T logval = x <= epsilon ? x / epsilon + std::log(epsilon) - T(1.0) : std::log(x); 0085 #endif 0086 return logval; 0087 } 0088 0089 } // end namespace Util 0090 0091 /// \class KahanSum 0092 /// The Kahan summation is a compensated summation algorithm, which significantly reduces numerical errors 0093 /// when adding a sequence of finite-precision floating point numbers. 0094 /// This is done by keeping a separate running compensation (a variable to accumulate small errors). 0095 /// 0096 /// ### Auto-vectorisable accumulation 0097 /// This class can internally use multiple accumulators (template parameter `N`). 0098 /// When filled from a collection that supports index access from a *contiguous* block of memory, 0099 /// compilers such as gcc, clang and icc can auto-vectorise the accumulation. This happens by cycling 0100 /// through the internal accumulators based on the value of "`index % N`", so `N` accumulators can be filled from a block 0101 /// of `N` numbers in a single instruction. 0102 /// 0103 /// The usage of multiple accumulators might slightly increase the precision in comparison to the single-accumulator version 0104 /// with `N = 1`. 0105 /// This depends on the order and magnitude of the numbers being accumulated. Therefore, in rare cases, the accumulation 0106 /// result can change *in dependence of N*, even when the data are identical. 0107 /// The magnitude of such differences is well below the precision of the floating point type, and will therefore mostly show 0108 /// in the compensation sum(see Carry()). Increasing the number of accumulators therefore only makes sense to 0109 /// speed up the accumulation, but not to increase precision. 0110 /// 0111 /// \param T The type of the values to be accumulated. 0112 /// \param N Number of accumulators. Defaults to 1. Ideal values are the widths of a vector register on the relevant architecture. 0113 /// Depending on the instruction set, good values are: 0114 /// - AVX2-float: 8 0115 /// - AVX2-double: 4 0116 /// - AVX512-float: 16 0117 /// - AVX512-double: 8 0118 /// 0119 /// ### Examples 0120 /// 0121 /// ~~~{.cpp} 0122 /// std::vector<double> numbers(1000); 0123 /// for (std::size_t i=0; i<1000; ++i) { 0124 /// numbers[i] = rand(); 0125 /// } 0126 /// 0127 /// ROOT::Math::KahanSum<double, 4> k; 0128 /// k.Add(numbers.begin(), numbers.end()); 0129 /// // or 0130 /// k.Add(numbers); 0131 /// ~~~ 0132 /// ~~~{.cpp} 0133 /// double offset = 10.; 0134 /// auto result = ROOT::Math::KahanSum<double, 4>::Accumulate(numbers.begin(), numbers.end(), offset); 0135 /// ~~~ 0136 template<typename T = double, unsigned int N = 1> 0137 class KahanSum { 0138 public: 0139 /// Initialise the sum. 0140 /// \param[in] initialValue Initialise with this value. Defaults to 0. 0141 explicit KahanSum(T initialValue = T{}) { 0142 fSum[0] = initialValue; 0143 std::fill(std::begin(fSum)+1, std::end(fSum), 0.); 0144 std::fill(std::begin(fCarry), std::end(fCarry), 0.); 0145 } 0146 0147 /// Initialise with a sum value and a carry value. 0148 /// \param[in] initialSumValue Initialise the sum with this value. 0149 /// \param[in] initialCarryValue Initialise the carry with this value. 0150 KahanSum(T initialSumValue, T initialCarryValue) { 0151 fSum[0] = initialSumValue; 0152 fCarry[0] = initialCarryValue; 0153 std::fill(std::begin(fSum)+1, std::end(fSum), 0.); 0154 std::fill(std::begin(fCarry)+1, std::end(fCarry), 0.); 0155 } 0156 0157 /// Initialise the sum with a pre-existing state. 0158 /// \param[in] sumBegin Begin of iterator range with values to initialise the sum with. 0159 /// \param[in] sumEnd End of iterator range with values to initialise the sum with. 0160 /// \param[in] carryBegin Begin of iterator range with values to initialise the carry with. 0161 /// \param[in] carryEnd End of iterator range with values to initialise the carry with. 0162 template<class Iterator> 0163 KahanSum(Iterator sumBegin, Iterator sumEnd, Iterator carryBegin, Iterator carryEnd) { 0164 assert(std::distance(sumBegin, sumEnd) == N); 0165 assert(std::distance(carryBegin, carryEnd) == N); 0166 std::copy(sumBegin, sumEnd, std::begin(fSum)); 0167 std::copy(carryBegin, carryEnd, std::begin(fCarry)); 0168 } 0169 0170 /// Constructor to create a KahanSum from another KahanSum with a different number of accumulators 0171 template <unsigned int M> 0172 KahanSum(KahanSum<T,M> const& other) { 0173 fSum[0] = other.Sum(); 0174 fCarry[0] = other.Carry(); 0175 std::fill(std::begin(fSum)+1, std::end(fSum), 0.); 0176 std::fill(std::begin(fCarry)+1, std::end(fCarry), 0.); 0177 } 0178 0179 /// Single-element accumulation. Will not vectorise. 0180 void Add(T x) { 0181 auto y = x - fCarry[0]; 0182 auto t = fSum[0] + y; 0183 fCarry[0] = (t - fSum[0]) - y; 0184 fSum[0] = t; 0185 } 0186 0187 0188 /// Accumulate from a range denoted by iterators. 0189 /// 0190 /// This function will auto-vectorise with random-access iterators. 0191 /// \param[in] begin Beginning of a range. Needs to be a random access iterator for automatic 0192 /// vectorisation, because a contiguous block of memory needs to be read. 0193 /// \param[in] end End of the range. 0194 template <class Iterator> 0195 void Add(Iterator begin, Iterator end) { 0196 static_assert(std::is_floating_point< 0197 typename std::remove_reference<decltype(*begin)>::type>::value, 0198 "Iterator needs to point to floating-point values."); 0199 const std::size_t n = std::distance(begin, end); 0200 0201 for (std::size_t i=0; i<n; ++i) { 0202 AddIndexed(*(begin++), i); 0203 } 0204 } 0205 0206 0207 /// Fill from a container that supports index access. 0208 /// \param[in] inputs Container with index access such as std::vector or array. 0209 template<class Container_t> 0210 void Add(const Container_t& inputs) { 0211 static_assert(std::is_floating_point<typename Container_t::value_type>::value, 0212 "Container does not hold floating-point values."); 0213 for (std::size_t i=0; i < inputs.size(); ++i) { 0214 AddIndexed(inputs[i], i); 0215 } 0216 } 0217 0218 0219 /// Iterate over a range and return an instance of a KahanSum. 0220 /// 0221 /// See Add(Iterator,Iterator) for details. 0222 /// \param[in] begin Beginning of a range. 0223 /// \param[in] end End of the range. 0224 /// \param[in] initialValue Optional initial value. 0225 template <class Iterator> 0226 static KahanSum<T, N> Accumulate(Iterator begin, Iterator end, 0227 T initialValue = T{}) { 0228 KahanSum<T, N> theSum(initialValue); 0229 theSum.Add(begin, end); 0230 0231 return theSum; 0232 } 0233 0234 0235 /// Add `input` to the sum. 0236 /// 0237 /// Particularly helpful when filling from a for loop. 0238 /// This function can be inlined and auto-vectorised if 0239 /// the index parameter is used to enumerate *consecutive* fills. 0240 /// Use Add() or Accumulate() when no index is available. 0241 /// \param[in] input Value to accumulate. 0242 /// \param[in] index Index of the value. Determines internal accumulator that this 0243 /// value is added to. Make sure that consecutive fills have consecutive indices 0244 /// to make a loop auto-vectorisable. The actual value of the index does not matter, 0245 /// as long as it is consecutive. 0246 void AddIndexed(T input, std::size_t index) { 0247 const unsigned int i = index % N; 0248 const T y = input - fCarry[i]; 0249 const T t = fSum[i] + y; 0250 fCarry[i] = (t - fSum[i]) - y; 0251 fSum[i] = t; 0252 } 0253 0254 /// \return Compensated sum. 0255 T Sum() const { 0256 return std::accumulate(std::begin(fSum), std::end(fSum), 0.); 0257 } 0258 0259 /// \return Compensated sum. 0260 T Result() const { 0261 return Sum(); 0262 } 0263 0264 /// \return The sum used for compensation. 0265 T Carry() const { 0266 return std::accumulate(std::begin(fCarry), std::end(fCarry), 0.); 0267 } 0268 0269 /// Add `arg` into accumulator. Does not vectorise. 0270 KahanSum<T, N>& operator+=(T arg) { 0271 Add(arg); 0272 return *this; 0273 } 0274 0275 /// Add other KahanSum into accumulator. Does not vectorise. 0276 /// 0277 /// Based on KahanIncrement from: 0278 /// Y. Tian, S. Tatikonda and B. Reinwald, "Scalable and Numerically Stable Descriptive Statistics in SystemML," 2012 IEEE 28th International Conference on Data Engineering, 2012, pp. 1351-1359, doi: 10.1109/ICDE.2012.12. 0279 /// Note that while Tian et al. add the carry in the first step, we subtract 0280 /// the carry, in accordance with the Add(Indexed) implementation(s) above. 0281 /// This is purely an implementation choice that has no impact on performance. 0282 /// 0283 /// \note Take care when using += (and -=) to add other KahanSums into a zero-initialized 0284 /// KahanSum. The operator behaves correctly in this case, but the result may be slightly 0285 /// off if you expect 0 + x to yield exactly x (where 0 is the zero-initialized KahanSum 0286 /// and x another KahanSum). In particular, x's carry term may get lost. This doesn't 0287 /// just happen with zero-initialized KahanSums; see the SubtractWithABitTooSmallCarry 0288 /// test case in the testKahan unittest for other examples. This behavior is internally 0289 /// consistent: the carry also gets lost if you switch the operands and it also happens with 0290 /// other KahanSum operators. 0291 template<typename U, unsigned int M> 0292 KahanSum<T, N>& operator+=(const KahanSum<U, M>& other) { 0293 U corrected_arg_sum = other.Sum() - (fCarry[0] + other.Carry()); 0294 U sum = fSum[0] + corrected_arg_sum; 0295 U correction = (sum - fSum[0]) - corrected_arg_sum; 0296 fSum[0] = sum; 0297 fCarry[0] = correction; 0298 return *this; 0299 } 0300 0301 /// Subtract other KahanSum. Does not vectorise. 0302 /// 0303 /// Based on KahanIncrement from: Tian et al., 2012 (see operator+= documentation). 0304 template<typename U, unsigned int M> 0305 KahanSum<T, N>& operator-=(KahanSum<U, M> const& other) { 0306 U corrected_arg_sum = -other.Sum() - (fCarry[0] - other.Carry()); 0307 U sum = fSum[0] + corrected_arg_sum; 0308 U correction = (sum - fSum[0]) - corrected_arg_sum; 0309 fSum[0] = sum; 0310 fCarry[0] = correction; 0311 return *this; 0312 } 0313 0314 KahanSum<T, N> operator-() 0315 { 0316 return {-this->fSum[0], -this->fCarry[0]}; 0317 } 0318 0319 template<typename U, unsigned int M> 0320 bool operator ==(KahanSum<U, M> const& other) const { 0321 return (this->Sum() == other.Sum()) && (this->Carry() == other.Carry()); 0322 } 0323 0324 template<typename U, unsigned int M> 0325 bool operator !=(KahanSum<U, M> const& other) const { 0326 return !(*this == other); 0327 } 0328 0329 private: 0330 T fSum[N]; 0331 T fCarry[N]; 0332 }; 0333 0334 /// Add two non-vectorized KahanSums. 0335 template<typename T, unsigned int N, typename U, unsigned int M> 0336 KahanSum<T, N> operator+(const KahanSum<T, N>& left, const KahanSum<U, M>& right) { 0337 KahanSum<T, N> sum(left); 0338 sum += right; 0339 return sum; 0340 } 0341 0342 /// Subtract two non-vectorized KahanSums. 0343 template<typename T, unsigned int N, typename U, unsigned int M> 0344 KahanSum<T, N> operator-(const KahanSum<T, N>& left, const KahanSum<U, M>& right) { 0345 KahanSum<T, N> sum(left); 0346 sum -= right; 0347 return sum; 0348 } 0349 0350 } // end namespace Math 0351 0352 } // end namespace ROOT 0353 0354 0355 #endif /* ROOT_Math_Util */
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|