Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:28:22

0001 ///////////////////////////////////////////////////////////////////////////////
0002 // weighted_median.hpp
0003 //
0004 //  Copyright 2006 Eric Niebler, Olivier Gygi. Distributed under the Boost
0005 //  Software License, Version 1.0. (See accompanying file
0006 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0007 
0008 #ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_MEDIAN_HPP_EAN_28_10_2005
0009 #define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_MEDIAN_HPP_EAN_28_10_2005
0010 
0011 #include <boost/mpl/placeholders.hpp>
0012 #include <boost/range/iterator_range.hpp>
0013 #include <boost/accumulators/framework/accumulator_base.hpp>
0014 #include <boost/accumulators/framework/extractor.hpp>
0015 #include <boost/accumulators/numeric/functional.hpp>
0016 #include <boost/accumulators/framework/parameters/sample.hpp>
0017 #include <boost/accumulators/framework/depends_on.hpp>
0018 #include <boost/accumulators/statistics_fwd.hpp>
0019 #include <boost/accumulators/statistics/count.hpp>
0020 #include <boost/accumulators/statistics/median.hpp>
0021 #include <boost/accumulators/statistics/weighted_p_square_quantile.hpp>
0022 #include <boost/accumulators/statistics/weighted_density.hpp>
0023 #include <boost/accumulators/statistics/weighted_p_square_cumul_dist.hpp>
0024 
0025 namespace boost { namespace accumulators
0026 {
0027 
0028 namespace impl
0029 {
0030     ///////////////////////////////////////////////////////////////////////////////
0031     // weighted_median_impl
0032     //
0033     /**
0034         @brief Median estimation for weighted samples based on the \f$P^2\f$ quantile estimator
0035 
0036         The \f$P^2\f$ algorithm for weighted samples is invoked with a quantile probability of 0.5.
0037     */
0038     template<typename Sample>
0039     struct weighted_median_impl
0040       : accumulator_base
0041     {
0042         // for boost::result_of
0043         typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type result_type;
0044 
0045         weighted_median_impl(dont_care) {}
0046 
0047         template<typename Args>
0048         result_type result(Args const &args) const
0049         {
0050             return weighted_p_square_quantile_for_median(args);
0051         }
0052     };
0053 
0054     ///////////////////////////////////////////////////////////////////////////////
0055     // with_density_weighted_median_impl
0056     //
0057     /**
0058         @brief Median estimation for weighted samples based on the density estimator
0059 
0060         The algorithm determines the bin in which the \f$0.5*cnt\f$-th sample lies, \f$cnt\f$ being
0061         the total number of samples. It returns the approximate horizontal position of this sample,
0062         based on a linear interpolation inside the bin.
0063     */
0064     template<typename Sample>
0065     struct with_density_weighted_median_impl
0066       : accumulator_base
0067     {
0068         typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
0069         typedef std::vector<std::pair<float_type, float_type> > histogram_type;
0070         typedef iterator_range<typename histogram_type::iterator> range_type;
0071         // for boost::result_of
0072         typedef float_type result_type;
0073 
0074         template<typename Args>
0075         with_density_weighted_median_impl(Args const &args)
0076           : sum(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
0077           , is_dirty(true)
0078         {
0079         }
0080 
0081         void operator ()(dont_care)
0082         {
0083             this->is_dirty = true;
0084         }
0085 
0086         template<typename Args>
0087         result_type result(Args const &args) const
0088         {
0089             if (this->is_dirty)
0090             {
0091                 this->is_dirty = false;
0092 
0093                 std::size_t cnt = count(args);
0094                 range_type histogram = weighted_density(args);
0095                 typename range_type::iterator it = histogram.begin();
0096                 while (this->sum < 0.5 * cnt)
0097                 {
0098                     this->sum += it->second * cnt;
0099                     ++it;
0100                 }
0101                 --it;
0102                 float_type over = numeric::fdiv(this->sum - 0.5 * cnt, it->second * cnt);
0103                 this->median = it->first * over + (it + 1)->first * ( 1. - over );
0104             }
0105 
0106             return this->median;
0107         }
0108 
0109         // make this accumulator serializeable
0110         template<class Archive>
0111         void serialize(Archive & ar, const unsigned int file_version)
0112         { 
0113             ar & sum;
0114             ar & is_dirty;
0115             ar & median;
0116         }
0117 
0118     private:
0119         mutable float_type sum;
0120         mutable bool is_dirty;
0121         mutable float_type median;
0122     };
0123 
0124     ///////////////////////////////////////////////////////////////////////////////
0125     // with_p_square_cumulative_distribution_weighted_median_impl
0126     //
0127     /**
0128         @brief Median estimation for weighted samples based on the \f$P^2\f$ cumulative distribution estimator
0129 
0130         The algorithm determines the first (leftmost) bin with a height exceeding 0.5. It
0131         returns the approximate horizontal position of where the cumulative distribution
0132         equals 0.5, based on a linear interpolation inside the bin.
0133     */
0134     template<typename Sample, typename Weight>
0135     struct with_p_square_cumulative_distribution_weighted_median_impl
0136       : accumulator_base
0137     {
0138         typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample;
0139         typedef typename numeric::functional::fdiv<weighted_sample, std::size_t>::result_type float_type;
0140         typedef std::vector<std::pair<float_type, float_type> > histogram_type;
0141         typedef iterator_range<typename histogram_type::iterator> range_type;
0142         // for boost::result_of
0143         typedef float_type result_type;
0144 
0145         with_p_square_cumulative_distribution_weighted_median_impl(dont_care)
0146           : is_dirty(true)
0147         {
0148         }
0149 
0150         void operator ()(dont_care)
0151         {
0152             this->is_dirty = true;
0153         }
0154 
0155         template<typename Args>
0156         result_type result(Args const &args) const
0157         {
0158             if (this->is_dirty)
0159             {
0160                 this->is_dirty = false;
0161 
0162                 range_type histogram = weighted_p_square_cumulative_distribution(args);
0163                 typename range_type::iterator it = histogram.begin();
0164                 while (it->second < 0.5)
0165                 {
0166                     ++it;
0167                 }
0168                 float_type over = numeric::fdiv(it->second - 0.5, it->second - (it - 1)->second);
0169                 this->median = it->first * over + (it + 1)->first * ( 1. - over );
0170             }
0171 
0172             return this->median;
0173         }
0174         
0175         // make this accumulator serializeable
0176         template<class Archive>
0177         void serialize(Archive & ar, const unsigned int file_version)
0178         { 
0179             ar & is_dirty;
0180             ar & median;
0181         }
0182 
0183     private:
0184         mutable bool is_dirty;
0185         mutable float_type median;
0186     };
0187 
0188 } // namespace impl
0189 
0190 ///////////////////////////////////////////////////////////////////////////////
0191 // tag::weighted_median
0192 // tag::with_density_weighted_median
0193 // tag::with_p_square_cumulative_distribution_weighted_median
0194 //
0195 namespace tag
0196 {
0197     struct weighted_median
0198       : depends_on<weighted_p_square_quantile_for_median>
0199     {
0200         /// INTERNAL ONLY
0201         ///
0202         typedef accumulators::impl::weighted_median_impl<mpl::_1> impl;
0203     };
0204     struct with_density_weighted_median
0205       : depends_on<count, weighted_density>
0206     {
0207         /// INTERNAL ONLY
0208         ///
0209         typedef accumulators::impl::with_density_weighted_median_impl<mpl::_1> impl;
0210     };
0211     struct with_p_square_cumulative_distribution_weighted_median
0212       : depends_on<weighted_p_square_cumulative_distribution>
0213     {
0214         /// INTERNAL ONLY
0215         ///
0216         typedef accumulators::impl::with_p_square_cumulative_distribution_weighted_median_impl<mpl::_1, mpl::_2> impl;
0217     };
0218 
0219 }
0220 
0221 ///////////////////////////////////////////////////////////////////////////////
0222 // extract::weighted_median
0223 //
0224 namespace extract
0225 {
0226     extractor<tag::median> const weighted_median = {};
0227 
0228     BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_median)
0229 }
0230 
0231 using extract::weighted_median;
0232 // weighted_median(with_p_square_quantile) -> weighted_median
0233 template<>
0234 struct as_feature<tag::weighted_median(with_p_square_quantile)>
0235 {
0236     typedef tag::weighted_median type;
0237 };
0238 
0239 // weighted_median(with_density) -> with_density_weighted_median
0240 template<>
0241 struct as_feature<tag::weighted_median(with_density)>
0242 {
0243     typedef tag::with_density_weighted_median type;
0244 };
0245 
0246 // weighted_median(with_p_square_cumulative_distribution) -> with_p_square_cumulative_distribution_weighted_median
0247 template<>
0248 struct as_feature<tag::weighted_median(with_p_square_cumulative_distribution)>
0249 {
0250     typedef tag::with_p_square_cumulative_distribution_weighted_median type;
0251 };
0252 
0253 }} // namespace boost::accumulators
0254 
0255 #endif