Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:40:39

0001 //  (C) Copyright Nick Thompson 2018.
0002 //  Use, modification and distribution are subject to the
0003 //  Boost Software License, Version 1.0. (See accompanying file
0004 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0005 
0006 #ifndef BOOST_MATH_TOOLS_BIVARIATE_STATISTICS_HPP
0007 #define BOOST_MATH_TOOLS_BIVARIATE_STATISTICS_HPP
0008 
0009 #include <iterator>
0010 #include <tuple>
0011 #include <limits>
0012 #include <boost/math/tools/assert.hpp>
0013 #include <boost/math/tools/header_deprecated.hpp>
0014 
0015 BOOST_MATH_HEADER_DEPRECATED("<boost/math/statistics/bivariate_statistics.hpp>");
0016 
0017 namespace boost{ namespace math{ namespace tools {
0018 
0019 template<class Container>
0020 auto means_and_covariance(Container const & u, Container const & v)
0021 {
0022     using Real = typename Container::value_type;
0023     using std::size;
0024     BOOST_MATH_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance.");
0025     BOOST_MATH_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least one sample.");
0026 
0027     // See Equation III.9 of "Numerically Stable, Single-Pass, Parallel Statistics Algorithms", Bennet et al.
0028     Real cov = 0;
0029     Real mu_u = u[0];
0030     Real mu_v = v[0];
0031 
0032     for(size_t i = 1; i < size(u); ++i)
0033     {
0034         Real u_tmp = (u[i] - mu_u)/(i+1);
0035         Real v_tmp = v[i] - mu_v;
0036         cov += i*u_tmp*v_tmp;
0037         mu_u = mu_u + u_tmp;
0038         mu_v = mu_v + v_tmp/(i+1);
0039     }
0040 
0041     return std::make_tuple(mu_u, mu_v, cov/size(u));
0042 }
0043 
0044 template<class Container>
0045 auto covariance(Container const & u, Container const & v)
0046 {
0047     auto [mu_u, mu_v, cov] = boost::math::tools::means_and_covariance(u, v);
0048     return cov;
0049 }
0050 
0051 template<class Container>
0052 auto correlation_coefficient(Container const & u, Container const & v)
0053 {
0054     using Real = typename Container::value_type;
0055     using std::size;
0056     BOOST_MATH_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance.");
0057     BOOST_MATH_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least two samples.");
0058 
0059     Real cov = 0;
0060     Real mu_u = u[0];
0061     Real mu_v = v[0];
0062     Real Qu = 0;
0063     Real Qv = 0;
0064 
0065     for(size_t i = 1; i < size(u); ++i)
0066     {
0067         Real u_tmp = u[i] - mu_u;
0068         Real v_tmp = v[i] - mu_v;
0069         Qu = Qu + (i*u_tmp*u_tmp)/(i+1);
0070         Qv = Qv + (i*v_tmp*v_tmp)/(i+1);
0071         cov += i*u_tmp*v_tmp/(i+1);
0072         mu_u = mu_u + u_tmp/(i+1);
0073         mu_v = mu_v + v_tmp/(i+1);
0074     }
0075 
0076     // If one dataset is constant, then they have no correlation:
0077     // See https://stats.stackexchange.com/questions/23676/normalized-correlation-with-a-constant-vector
0078     // Thanks to zbjornson for pointing this out.
0079     if (Qu == 0 || Qv == 0)
0080     {
0081         return std::numeric_limits<Real>::quiet_NaN();
0082     }
0083 
0084     // Make sure rho in [-1, 1], even in the presence of numerical noise.
0085     Real rho = cov/sqrt(Qu*Qv);
0086     if (rho > 1) {
0087         rho = 1;
0088     }
0089     if (rho < -1) {
0090         rho = -1;
0091     }
0092     return rho;
0093 }
0094 
0095 }}}
0096 #endif