File indexing completed on 2025-01-18 09:40:43
0001
0002
0003
0004
0005
0006 #ifndef BOOST_MATH_TOOLS_UNIVARIATE_STATISTICS_HPP
0007 #define BOOST_MATH_TOOLS_UNIVARIATE_STATISTICS_HPP
0008
0009 #include <algorithm>
0010 #include <iterator>
0011 #include <tuple>
0012 #include <boost/math/tools/assert.hpp>
0013 #include <boost/math/tools/header_deprecated.hpp>
0014
0015 #include <boost/math/tools/is_standalone.hpp>
0016 #ifndef BOOST_MATH_STANDALONE
0017 #include <boost/config.hpp>
0018 #ifdef BOOST_NO_CXX17_IF_CONSTEXPR
0019 #error "The header <boost/math/norms.hpp> can only be used in C++17 and later."
0020 #endif
0021 #endif
0022
0023 BOOST_MATH_HEADER_DEPRECATED("<boost/math/statistics/univariate_statistics.hpp>");
0024
0025 namespace boost::math::tools {
0026
0027 template<class ForwardIterator>
0028 auto mean(ForwardIterator first, ForwardIterator last)
0029 {
0030 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
0031 BOOST_MATH_ASSERT_MSG(first != last, "At least one sample is required to compute the mean.");
0032 if constexpr (std::is_integral<Real>::value)
0033 {
0034 double mu = 0;
0035 double i = 1;
0036 for(auto it = first; it != last; ++it) {
0037 mu = mu + (*it - mu)/i;
0038 i += 1;
0039 }
0040 return mu;
0041 }
0042 else if constexpr (std::is_same_v<typename std::iterator_traits<ForwardIterator>::iterator_category, std::random_access_iterator_tag>)
0043 {
0044 size_t elements = std::distance(first, last);
0045 Real mu0 = 0;
0046 Real mu1 = 0;
0047 Real mu2 = 0;
0048 Real mu3 = 0;
0049 Real i = 1;
0050 auto end = last - (elements % 4);
0051 for(auto it = first; it != end; it += 4) {
0052 Real inv = Real(1)/i;
0053 Real tmp0 = (*it - mu0);
0054 Real tmp1 = (*(it+1) - mu1);
0055 Real tmp2 = (*(it+2) - mu2);
0056 Real tmp3 = (*(it+3) - mu3);
0057
0058 mu0 += tmp0*inv;
0059 mu1 += tmp1*inv;
0060 mu2 += tmp2*inv;
0061 mu3 += tmp3*inv;
0062 i += 1;
0063 }
0064 Real num1 = Real(elements - (elements %4))/Real(4);
0065 Real num2 = num1 + Real(elements % 4);
0066
0067 for (auto it = end; it != last; ++it)
0068 {
0069 mu3 += (*it-mu3)/i;
0070 i += 1;
0071 }
0072
0073 return (num1*(mu0+mu1+mu2) + num2*mu3)/Real(elements);
0074 }
0075 else
0076 {
0077 auto it = first;
0078 Real mu = *it;
0079 Real i = 2;
0080 while(++it != last)
0081 {
0082 mu += (*it - mu)/i;
0083 i += 1;
0084 }
0085 return mu;
0086 }
0087 }
0088
0089 template<class Container>
0090 inline auto mean(Container const & v)
0091 {
0092 return mean(v.cbegin(), v.cend());
0093 }
0094
0095 template<class ForwardIterator>
0096 auto variance(ForwardIterator first, ForwardIterator last)
0097 {
0098 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
0099 BOOST_MATH_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance.");
0100
0101 if constexpr (std::is_integral<Real>::value)
0102 {
0103 double M = *first;
0104 double Q = 0;
0105 double k = 2;
0106 for (auto it = std::next(first); it != last; ++it)
0107 {
0108 double tmp = *it - M;
0109 Q = Q + ((k-1)*tmp*tmp)/k;
0110 M = M + tmp/k;
0111 k += 1;
0112 }
0113 return Q/(k-1);
0114 }
0115 else
0116 {
0117 Real M = *first;
0118 Real Q = 0;
0119 Real k = 2;
0120 for (auto it = std::next(first); it != last; ++it)
0121 {
0122 Real tmp = (*it - M)/k;
0123 Q += k*(k-1)*tmp*tmp;
0124 M += tmp;
0125 k += 1;
0126 }
0127 return Q/(k-1);
0128 }
0129 }
0130
0131 template<class Container>
0132 inline auto variance(Container const & v)
0133 {
0134 return variance(v.cbegin(), v.cend());
0135 }
0136
0137 template<class ForwardIterator>
0138 auto sample_variance(ForwardIterator first, ForwardIterator last)
0139 {
0140 size_t n = std::distance(first, last);
0141 BOOST_MATH_ASSERT_MSG(n > 1, "At least two samples are required to compute the sample variance.");
0142 return n*variance(first, last)/(n-1);
0143 }
0144
0145 template<class Container>
0146 inline auto sample_variance(Container const & v)
0147 {
0148 return sample_variance(v.cbegin(), v.cend());
0149 }
0150
0151
0152
0153
0154 template<class ForwardIterator>
0155 auto skewness(ForwardIterator first, ForwardIterator last)
0156 {
0157 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
0158 BOOST_MATH_ASSERT_MSG(first != last, "At least one sample is required to compute skewness.");
0159 if constexpr (std::is_integral<Real>::value)
0160 {
0161 double M1 = *first;
0162 double M2 = 0;
0163 double M3 = 0;
0164 double n = 2;
0165 for (auto it = std::next(first); it != last; ++it)
0166 {
0167 double delta21 = *it - M1;
0168 double tmp = delta21/n;
0169 M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
0170 M2 = M2 + tmp*(n-1)*delta21;
0171 M1 = M1 + tmp;
0172 n += 1;
0173 }
0174
0175 double var = M2/(n-1);
0176 if (var == 0)
0177 {
0178
0179
0180 return static_cast<double>(0);
0181 }
0182 double skew = M3/(M2*sqrt(var));
0183 return skew;
0184 }
0185 else
0186 {
0187 Real M1 = *first;
0188 Real M2 = 0;
0189 Real M3 = 0;
0190 Real n = 2;
0191 for (auto it = std::next(first); it != last; ++it)
0192 {
0193 Real delta21 = *it - M1;
0194 Real tmp = delta21/n;
0195 M3 += tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
0196 M2 += tmp*(n-1)*delta21;
0197 M1 += tmp;
0198 n += 1;
0199 }
0200
0201 Real var = M2/(n-1);
0202 if (var == 0)
0203 {
0204
0205
0206 return Real(0);
0207 }
0208 Real skew = M3/(M2*sqrt(var));
0209 return skew;
0210 }
0211 }
0212
0213 template<class Container>
0214 inline auto skewness(Container const & v)
0215 {
0216 return skewness(v.cbegin(), v.cend());
0217 }
0218
0219
0220
0221 template<class ForwardIterator>
0222 auto first_four_moments(ForwardIterator first, ForwardIterator last)
0223 {
0224 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
0225 BOOST_MATH_ASSERT_MSG(first != last, "At least one sample is required to compute the first four moments.");
0226 if constexpr (std::is_integral<Real>::value)
0227 {
0228 double M1 = *first;
0229 double M2 = 0;
0230 double M3 = 0;
0231 double M4 = 0;
0232 double n = 2;
0233 for (auto it = std::next(first); it != last; ++it)
0234 {
0235 double delta21 = *it - M1;
0236 double tmp = delta21/n;
0237 M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3);
0238 M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
0239 M2 = M2 + tmp*(n-1)*delta21;
0240 M1 = M1 + tmp;
0241 n += 1;
0242 }
0243
0244 return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1));
0245 }
0246 else
0247 {
0248 Real M1 = *first;
0249 Real M2 = 0;
0250 Real M3 = 0;
0251 Real M4 = 0;
0252 Real n = 2;
0253 for (auto it = std::next(first); it != last; ++it)
0254 {
0255 Real delta21 = *it - M1;
0256 Real tmp = delta21/n;
0257 M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3);
0258 M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
0259 M2 = M2 + tmp*(n-1)*delta21;
0260 M1 = M1 + tmp;
0261 n += 1;
0262 }
0263
0264 return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1));
0265 }
0266 }
0267
0268 template<class Container>
0269 inline auto first_four_moments(Container const & v)
0270 {
0271 return first_four_moments(v.cbegin(), v.cend());
0272 }
0273
0274
0275
0276
0277 template<class ForwardIterator>
0278 auto kurtosis(ForwardIterator first, ForwardIterator last)
0279 {
0280 auto [M1, M2, M3, M4] = first_four_moments(first, last);
0281 if (M2 == 0)
0282 {
0283 return M2;
0284 }
0285 return M4/(M2*M2);
0286 }
0287
0288 template<class Container>
0289 inline auto kurtosis(Container const & v)
0290 {
0291 return kurtosis(v.cbegin(), v.cend());
0292 }
0293
0294 template<class ForwardIterator>
0295 auto excess_kurtosis(ForwardIterator first, ForwardIterator last)
0296 {
0297 return kurtosis(first, last) - 3;
0298 }
0299
0300 template<class Container>
0301 inline auto excess_kurtosis(Container const & v)
0302 {
0303 return excess_kurtosis(v.cbegin(), v.cend());
0304 }
0305
0306
0307 template<class RandomAccessIterator>
0308 auto median(RandomAccessIterator first, RandomAccessIterator last)
0309 {
0310 size_t num_elems = std::distance(first, last);
0311 BOOST_MATH_ASSERT_MSG(num_elems > 0, "The median of a zero length vector is undefined.");
0312 if (num_elems & 1)
0313 {
0314 auto middle = first + (num_elems - 1)/2;
0315 std::nth_element(first, middle, last);
0316 return *middle;
0317 }
0318 else
0319 {
0320 auto middle = first + num_elems/2 - 1;
0321 std::nth_element(first, middle, last);
0322 std::nth_element(middle, middle+1, last);
0323 return (*middle + *(middle+1))/2;
0324 }
0325 }
0326
0327
0328 template<class RandomAccessContainer>
0329 inline auto median(RandomAccessContainer & v)
0330 {
0331 return median(v.begin(), v.end());
0332 }
0333
0334 template<class RandomAccessIterator>
0335 auto gini_coefficient(RandomAccessIterator first, RandomAccessIterator last)
0336 {
0337 using Real = typename std::iterator_traits<RandomAccessIterator>::value_type;
0338 BOOST_MATH_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples.");
0339
0340 std::sort(first, last);
0341 if constexpr (std::is_integral<Real>::value)
0342 {
0343 double i = 1;
0344 double num = 0;
0345 double denom = 0;
0346 for (auto it = first; it != last; ++it)
0347 {
0348 num += *it*i;
0349 denom += *it;
0350 ++i;
0351 }
0352
0353
0354 if (denom == 0)
0355 {
0356 return static_cast<double>(0);
0357 }
0358
0359 return ((2*num)/denom - i)/(i-1);
0360 }
0361 else
0362 {
0363 Real i = 1;
0364 Real num = 0;
0365 Real denom = 0;
0366 for (auto it = first; it != last; ++it)
0367 {
0368 num += *it*i;
0369 denom += *it;
0370 ++i;
0371 }
0372
0373
0374 if (denom == 0)
0375 {
0376 return Real(0);
0377 }
0378
0379 return ((2*num)/denom - i)/(i-1);
0380 }
0381 }
0382
0383 template<class RandomAccessContainer>
0384 inline auto gini_coefficient(RandomAccessContainer & v)
0385 {
0386 return gini_coefficient(v.begin(), v.end());
0387 }
0388
0389 template<class RandomAccessIterator>
0390 inline auto sample_gini_coefficient(RandomAccessIterator first, RandomAccessIterator last)
0391 {
0392 size_t n = std::distance(first, last);
0393 return n*gini_coefficient(first, last)/(n-1);
0394 }
0395
0396 template<class RandomAccessContainer>
0397 inline auto sample_gini_coefficient(RandomAccessContainer & v)
0398 {
0399 return sample_gini_coefficient(v.begin(), v.end());
0400 }
0401
0402 template<class RandomAccessIterator>
0403 auto median_absolute_deviation(RandomAccessIterator first, RandomAccessIterator last, typename std::iterator_traits<RandomAccessIterator>::value_type center=std::numeric_limits<typename std::iterator_traits<RandomAccessIterator>::value_type>::quiet_NaN())
0404 {
0405 using std::abs;
0406 using Real = typename std::iterator_traits<RandomAccessIterator>::value_type;
0407 using std::isnan;
0408 if (isnan(center))
0409 {
0410 center = boost::math::tools::median(first, last);
0411 }
0412 size_t num_elems = std::distance(first, last);
0413 BOOST_MATH_ASSERT_MSG(num_elems > 0, "The median of a zero-length vector is undefined.");
0414 auto comparator = [¢er](Real a, Real b) { return abs(a-center) < abs(b-center);};
0415 if (num_elems & 1)
0416 {
0417 auto middle = first + (num_elems - 1)/2;
0418 std::nth_element(first, middle, last, comparator);
0419 return abs(*middle);
0420 }
0421 else
0422 {
0423 auto middle = first + num_elems/2 - 1;
0424 std::nth_element(first, middle, last, comparator);
0425 std::nth_element(middle, middle+1, last, comparator);
0426 return (abs(*middle) + abs(*(middle+1)))/abs(static_cast<Real>(2));
0427 }
0428 }
0429
0430 template<class RandomAccessContainer>
0431 inline auto median_absolute_deviation(RandomAccessContainer & v, typename RandomAccessContainer::value_type center=std::numeric_limits<typename RandomAccessContainer::value_type>::quiet_NaN())
0432 {
0433 return median_absolute_deviation(v.begin(), v.end(), center);
0434 }
0435
0436 }
0437 #endif