Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-16 09:55:37

0001 /*
0002  * Copyright Nick Thompson, 2019
0003  * Copyright Matt Borland, 2021
0004  * Use, modification and distribution are subject to the
0005  * Boost Software License, Version 1.0. (See accompanying file
0006  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0007  */
0008 
0009 #ifndef BOOST_MATH_STATISTICS_LINEAR_REGRESSION_HPP
0010 #define BOOST_MATH_STATISTICS_LINEAR_REGRESSION_HPP
0011 
0012 #include <cmath>
0013 #include <algorithm>
0014 #include <utility>
0015 #include <tuple>
0016 #include <stdexcept>
0017 #include <type_traits>
0018 #include <boost/math/statistics/univariate_statistics.hpp>
0019 #include <boost/math/statistics/bivariate_statistics.hpp>
0020 
0021 namespace boost { namespace math { namespace statistics { namespace detail {
0022 
0023 
0024 template<class ReturnType, class RandomAccessContainer>
0025 ReturnType simple_ordinary_least_squares_impl(RandomAccessContainer const & x,
0026                                               RandomAccessContainer const & y)
0027 {
0028     using Real = typename std::tuple_element<0, ReturnType>::type;
0029     if (x.size() <= 1)
0030     {
0031         throw std::domain_error("At least 2 samples are required to perform a linear regression.");
0032     }
0033 
0034     if (x.size() != y.size())
0035     {
0036         throw std::domain_error("The same number of samples must be in the independent and dependent variable.");
0037     }
0038     std::tuple<Real, Real, Real> temp = boost::math::statistics::means_and_covariance(x, y);
0039     Real mu_x = std::get<0>(temp);
0040     Real mu_y = std::get<1>(temp);
0041     Real cov_xy = std::get<2>(temp);
0042 
0043     Real var_x = boost::math::statistics::variance(x);
0044 
0045     if (var_x <= 0) {
0046         throw std::domain_error("Independent variable has no variance; this breaks linear regression.");
0047     }
0048 
0049 
0050     Real c1 = cov_xy/var_x;
0051     Real c0 = mu_y - c1*mu_x;
0052 
0053     return std::make_pair(c0, c1);
0054 }
0055 
0056 template<class ReturnType, class RandomAccessContainer>
0057 ReturnType simple_ordinary_least_squares_with_R_squared_impl(RandomAccessContainer const & x,
0058                                                              RandomAccessContainer const & y)
0059 {
0060     using Real = typename std::tuple_element<0, ReturnType>::type;
0061     if (x.size() <= 1)
0062     {
0063         throw std::domain_error("At least 2 samples are required to perform a linear regression.");
0064     }
0065 
0066     if (x.size() != y.size())
0067     {
0068         throw std::domain_error("The same number of samples must be in the independent and dependent variable.");
0069     }
0070     std::tuple<Real, Real, Real> temp = boost::math::statistics::means_and_covariance(x, y);
0071     Real mu_x = std::get<0>(temp);
0072     Real mu_y = std::get<1>(temp);
0073     Real cov_xy = std::get<2>(temp);
0074 
0075     Real var_x = boost::math::statistics::variance(x);
0076 
0077     if (var_x <= 0) {
0078         throw std::domain_error("Independent variable has no variance; this breaks linear regression.");
0079     }
0080 
0081 
0082     Real c1 = cov_xy/var_x;
0083     Real c0 = mu_y - c1*mu_x;
0084 
0085     Real squared_residuals = 0;
0086     Real squared_mean_deviation = 0;
0087     for(decltype(y.size()) i = 0; i < y.size(); ++i) {
0088         squared_mean_deviation += (y[i] - mu_y)*(y[i]-mu_y);
0089         Real ei = (c0 + c1*x[i]) - y[i];
0090         squared_residuals += ei*ei;
0091     }
0092 
0093     Real Rsquared;
0094     if (squared_mean_deviation == 0) {
0095         // Then y = constant, so the linear regression is perfect.
0096         Rsquared = 1;
0097     } else {
0098         Rsquared = 1 - squared_residuals/squared_mean_deviation;
0099     }
0100 
0101     return std::make_tuple(c0, c1, Rsquared);
0102 }
0103 } // namespace detail
0104 
0105 template<typename RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type, 
0106          typename std::enable_if<std::is_integral<Real>::value, bool>::type = true>
0107 inline auto simple_ordinary_least_squares(RandomAccessContainer const & x, RandomAccessContainer const & y) -> std::pair<double, double>
0108 {
0109     return detail::simple_ordinary_least_squares_impl<std::pair<double, double>>(x, y);
0110 }
0111 
0112 template<typename RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type, 
0113          typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true>
0114 inline auto simple_ordinary_least_squares(RandomAccessContainer const & x, RandomAccessContainer const & y) -> std::pair<Real, Real>
0115 {
0116     return detail::simple_ordinary_least_squares_impl<std::pair<Real, Real>>(x, y);
0117 }
0118 
0119 template<typename RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type, 
0120          typename std::enable_if<std::is_integral<Real>::value, bool>::type = true>
0121 inline auto simple_ordinary_least_squares_with_R_squared(RandomAccessContainer const & x, RandomAccessContainer const & y) -> std::tuple<double, double, double>
0122 {
0123     return detail::simple_ordinary_least_squares_with_R_squared_impl<std::tuple<double, double, double>>(x, y);
0124 }
0125 
0126 template<typename RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type, 
0127          typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true>
0128 inline auto simple_ordinary_least_squares_with_R_squared(RandomAccessContainer const & x, RandomAccessContainer const & y) -> std::tuple<Real, Real, Real>
0129 {
0130     return detail::simple_ordinary_least_squares_with_R_squared_impl<std::tuple<Real, Real, Real>>(x, y);
0131 }
0132 }}} // namespace boost::math::statistics
0133 #endif