File indexing completed on 2025-12-16 09:55:37
0001
0002
0003
0004
0005
0006
0007
0008
0009 #ifndef BOOST_MATH_STATISTICS_LINEAR_REGRESSION_HPP
0010 #define BOOST_MATH_STATISTICS_LINEAR_REGRESSION_HPP
0011
0012 #include <cmath>
0013 #include <algorithm>
0014 #include <utility>
0015 #include <tuple>
0016 #include <stdexcept>
0017 #include <type_traits>
0018 #include <boost/math/statistics/univariate_statistics.hpp>
0019 #include <boost/math/statistics/bivariate_statistics.hpp>
0020
0021 namespace boost { namespace math { namespace statistics { namespace detail {
0022
0023
0024 template<class ReturnType, class RandomAccessContainer>
0025 ReturnType simple_ordinary_least_squares_impl(RandomAccessContainer const & x,
0026 RandomAccessContainer const & y)
0027 {
0028 using Real = typename std::tuple_element<0, ReturnType>::type;
0029 if (x.size() <= 1)
0030 {
0031 throw std::domain_error("At least 2 samples are required to perform a linear regression.");
0032 }
0033
0034 if (x.size() != y.size())
0035 {
0036 throw std::domain_error("The same number of samples must be in the independent and dependent variable.");
0037 }
0038 std::tuple<Real, Real, Real> temp = boost::math::statistics::means_and_covariance(x, y);
0039 Real mu_x = std::get<0>(temp);
0040 Real mu_y = std::get<1>(temp);
0041 Real cov_xy = std::get<2>(temp);
0042
0043 Real var_x = boost::math::statistics::variance(x);
0044
0045 if (var_x <= 0) {
0046 throw std::domain_error("Independent variable has no variance; this breaks linear regression.");
0047 }
0048
0049
0050 Real c1 = cov_xy/var_x;
0051 Real c0 = mu_y - c1*mu_x;
0052
0053 return std::make_pair(c0, c1);
0054 }
0055
0056 template<class ReturnType, class RandomAccessContainer>
0057 ReturnType simple_ordinary_least_squares_with_R_squared_impl(RandomAccessContainer const & x,
0058 RandomAccessContainer const & y)
0059 {
0060 using Real = typename std::tuple_element<0, ReturnType>::type;
0061 if (x.size() <= 1)
0062 {
0063 throw std::domain_error("At least 2 samples are required to perform a linear regression.");
0064 }
0065
0066 if (x.size() != y.size())
0067 {
0068 throw std::domain_error("The same number of samples must be in the independent and dependent variable.");
0069 }
0070 std::tuple<Real, Real, Real> temp = boost::math::statistics::means_and_covariance(x, y);
0071 Real mu_x = std::get<0>(temp);
0072 Real mu_y = std::get<1>(temp);
0073 Real cov_xy = std::get<2>(temp);
0074
0075 Real var_x = boost::math::statistics::variance(x);
0076
0077 if (var_x <= 0) {
0078 throw std::domain_error("Independent variable has no variance; this breaks linear regression.");
0079 }
0080
0081
0082 Real c1 = cov_xy/var_x;
0083 Real c0 = mu_y - c1*mu_x;
0084
0085 Real squared_residuals = 0;
0086 Real squared_mean_deviation = 0;
0087 for(decltype(y.size()) i = 0; i < y.size(); ++i) {
0088 squared_mean_deviation += (y[i] - mu_y)*(y[i]-mu_y);
0089 Real ei = (c0 + c1*x[i]) - y[i];
0090 squared_residuals += ei*ei;
0091 }
0092
0093 Real Rsquared;
0094 if (squared_mean_deviation == 0) {
0095
0096 Rsquared = 1;
0097 } else {
0098 Rsquared = 1 - squared_residuals/squared_mean_deviation;
0099 }
0100
0101 return std::make_tuple(c0, c1, Rsquared);
0102 }
0103 }
0104
0105 template<typename RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type,
0106 typename std::enable_if<std::is_integral<Real>::value, bool>::type = true>
0107 inline auto simple_ordinary_least_squares(RandomAccessContainer const & x, RandomAccessContainer const & y) -> std::pair<double, double>
0108 {
0109 return detail::simple_ordinary_least_squares_impl<std::pair<double, double>>(x, y);
0110 }
0111
0112 template<typename RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type,
0113 typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true>
0114 inline auto simple_ordinary_least_squares(RandomAccessContainer const & x, RandomAccessContainer const & y) -> std::pair<Real, Real>
0115 {
0116 return detail::simple_ordinary_least_squares_impl<std::pair<Real, Real>>(x, y);
0117 }
0118
0119 template<typename RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type,
0120 typename std::enable_if<std::is_integral<Real>::value, bool>::type = true>
0121 inline auto simple_ordinary_least_squares_with_R_squared(RandomAccessContainer const & x, RandomAccessContainer const & y) -> std::tuple<double, double, double>
0122 {
0123 return detail::simple_ordinary_least_squares_with_R_squared_impl<std::tuple<double, double, double>>(x, y);
0124 }
0125
0126 template<typename RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type,
0127 typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true>
0128 inline auto simple_ordinary_least_squares_with_R_squared(RandomAccessContainer const & x, RandomAccessContainer const & y) -> std::tuple<Real, Real, Real>
0129 {
0130 return detail::simple_ordinary_least_squares_with_R_squared_impl<std::tuple<Real, Real, Real>>(x, y);
0131 }
0132 }}}
0133 #endif