Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:28:22

0001 ///////////////////////////////////////////////////////////////////////////////
0002 // weighted_p_square_quantile.hpp
0003 //
0004 //  Copyright 2005 Daniel Egloff. Distributed under the Boost
0005 //  Software License, Version 1.0. (See accompanying file
0006 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0007 
0008 #ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_P_SQUARE_QUANTILE_HPP_DE_01_01_2006
0009 #define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_P_SQUARE_QUANTILE_HPP_DE_01_01_2006
0010 
0011 #include <cmath>
0012 #include <functional>
0013 #include <boost/array.hpp>
0014 #include <boost/parameter/keyword.hpp>
0015 #include <boost/mpl/placeholders.hpp>
0016 #include <boost/type_traits/is_same.hpp>
0017 #include <boost/accumulators/framework/accumulator_base.hpp>
0018 #include <boost/accumulators/framework/extractor.hpp>
0019 #include <boost/accumulators/numeric/functional.hpp>
0020 #include <boost/accumulators/framework/parameters/sample.hpp>
0021 #include <boost/accumulators/statistics_fwd.hpp>
0022 #include <boost/accumulators/statistics/count.hpp>
0023 #include <boost/accumulators/statistics/sum.hpp>
0024 #include <boost/accumulators/statistics/parameters/quantile_probability.hpp>
0025 
0026 namespace boost { namespace accumulators
0027 {
0028 
0029 namespace impl {
0030     ///////////////////////////////////////////////////////////////////////////////
0031     // weighted_p_square_quantile_impl
0032     //  single quantile estimation with weighted samples
0033     /**
0034         @brief Single quantile estimation with the \f$P^2\f$ algorithm for weighted samples
0035 
0036         This version of the \f$P^2\f$ algorithm extends the \f$P^2\f$ algorithm to support weighted samples.
0037         The \f$P^2\f$ algorithm estimates a quantile dynamically without storing samples. Instead of
0038         storing the whole sample cumulative distribution, only five points (markers) are stored. The heights
0039         of these markers are the minimum and the maximum of the samples and the current estimates of the
0040         \f$(p/2)\f$-, \f$p\f$ - and \f$(1+p)/2\f$ -quantiles. Their positions are equal to the number
0041         of samples that are smaller or equal to the markers. Each time a new sample is added, the
0042         positions of the markers are updated and if necessary their heights are adjusted using a piecewise-
0043         parabolic formula.
0044 
0045         For further details, see
0046 
0047         R. Jain and I. Chlamtac, The P^2 algorithm for dynamic calculation of quantiles and
0048         histograms without storing observations, Communications of the ACM,
0049         Volume 28 (October), Number 10, 1985, p. 1076-1085.
0050 
0051         @param quantile_probability
0052     */
0053     template<typename Sample, typename Weight, typename Impl>
0054     struct weighted_p_square_quantile_impl
0055       : accumulator_base
0056     {
0057         typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample;
0058         typedef typename numeric::functional::fdiv<weighted_sample, std::size_t>::result_type float_type;
0059         typedef array<float_type, 5> array_type;
0060         // for boost::result_of
0061         typedef float_type result_type;
0062 
0063         template<typename Args>
0064         weighted_p_square_quantile_impl(Args const &args)
0065           : p(is_same<Impl, for_median>::value ? 0.5 : args[quantile_probability | 0.5])
0066           , heights()
0067           , actual_positions()
0068           , desired_positions()
0069         {
0070         }
0071 
0072         template<typename Args>
0073         void operator ()(Args const &args)
0074         {
0075             std::size_t cnt = count(args);
0076 
0077             // accumulate 5 first samples
0078             if (cnt <= 5)
0079             {
0080                 this->heights[cnt - 1] = args[sample];
0081 
0082                 // In this initialization phase, actual_positions stores the weights of the
0083                 // initial samples that are needed at the end of the initialization phase to
0084                 // compute the correct initial positions of the markers.
0085                 this->actual_positions[cnt - 1] = args[weight];
0086 
0087                 // complete the initialization of heights and actual_positions by sorting
0088                 if (cnt == 5)
0089                 {
0090                     // TODO: we need to sort the initial samples (in heights) in ascending order and
0091                     // sort their weights (in actual_positions) the same way. The following lines do
0092                     // it, but there must be a better and more efficient way of doing this.
0093                     typename array_type::iterator it_begin, it_end, it_min;
0094 
0095                     it_begin = this->heights.begin();
0096                     it_end   = this->heights.end();
0097 
0098                     std::size_t pos = 0;
0099 
0100                     while (it_begin != it_end)
0101                     {
0102                         it_min = std::min_element(it_begin, it_end);
0103                         std::size_t d = std::distance(it_begin, it_min);
0104                         std::swap(*it_begin, *it_min);
0105                         std::swap(this->actual_positions[pos], this->actual_positions[pos + d]);
0106                         ++it_begin;
0107                         ++pos;
0108                     }
0109 
0110                     // calculate correct initial actual positions
0111                     for (std::size_t i = 1; i < 5; ++i)
0112                     {
0113                         this->actual_positions[i] += this->actual_positions[i - 1];
0114                     }
0115                 }
0116             }
0117             else
0118             {
0119                 std::size_t sample_cell = 1; // k
0120 
0121                 // find cell k such that heights[k-1] <= args[sample] < heights[k] and adjust extreme values
0122                 if (args[sample] < this->heights[0])
0123                 {
0124                     this->heights[0] = args[sample];
0125                     this->actual_positions[0] = args[weight];
0126                     sample_cell = 1;
0127                 }
0128                 else if (this->heights[4] <= args[sample])
0129                 {
0130                     this->heights[4] = args[sample];
0131                     sample_cell = 4;
0132                 }
0133                 else
0134                 {
0135                     typedef typename array_type::iterator iterator;
0136                     iterator it = std::upper_bound(
0137                         this->heights.begin()
0138                       , this->heights.end()
0139                       , args[sample]
0140                     );
0141 
0142                     sample_cell = std::distance(this->heights.begin(), it);
0143                 }
0144 
0145                 // increment positions of markers above sample_cell
0146                 for (std::size_t i = sample_cell; i < 5; ++i)
0147                 {
0148                     this->actual_positions[i] += args[weight];
0149                 }
0150 
0151                 // update desired positions for all markers
0152                 this->desired_positions[0] = this->actual_positions[0];
0153                 this->desired_positions[1] = (sum_of_weights(args) - this->actual_positions[0])
0154                                            * this->p/2. + this->actual_positions[0];
0155                 this->desired_positions[2] = (sum_of_weights(args) - this->actual_positions[0])
0156                                            * this->p + this->actual_positions[0];
0157                 this->desired_positions[3] = (sum_of_weights(args) - this->actual_positions[0])
0158                                            * (1. + this->p)/2. + this->actual_positions[0];
0159                 this->desired_positions[4] = sum_of_weights(args);
0160 
0161                 // adjust height and actual positions of markers 1 to 3 if necessary
0162                 for (std::size_t i = 1; i <= 3; ++i)
0163                 {
0164                     // offset to desired positions
0165                     float_type d = this->desired_positions[i] - this->actual_positions[i];
0166 
0167                     // offset to next position
0168                     float_type dp = this->actual_positions[i + 1] - this->actual_positions[i];
0169 
0170                     // offset to previous position
0171                     float_type dm = this->actual_positions[i - 1] - this->actual_positions[i];
0172 
0173                     // height ds
0174                     float_type hp = (this->heights[i + 1] - this->heights[i]) / dp;
0175                     float_type hm = (this->heights[i - 1] - this->heights[i]) / dm;
0176 
0177                     if ( ( d >= 1. && dp > 1. ) || ( d <= -1. && dm < -1. ) )
0178                     {
0179                         short sign_d = static_cast<short>(d / std::abs(d));
0180 
0181                         // try adjusting heights[i] using p-squared formula
0182                         float_type h = this->heights[i] + sign_d / (dp - dm) * ( (sign_d - dm) * hp + (dp - sign_d) * hm );
0183 
0184                         if ( this->heights[i - 1] < h && h < this->heights[i + 1] )
0185                         {
0186                             this->heights[i] = h;
0187                         }
0188                         else
0189                         {
0190                             // use linear formula
0191                             if (d>0)
0192                             {
0193                                 this->heights[i] += hp;
0194                             }
0195                             if (d<0)
0196                             {
0197                                 this->heights[i] -= hm;
0198                             }
0199                         }
0200                         this->actual_positions[i] += sign_d;
0201                     }
0202                 }
0203             }
0204         }
0205 
0206         result_type result(dont_care) const
0207         {
0208             return this->heights[2];
0209         }
0210 
0211         // make this accumulator serializeable
0212         // TODO split to save/load and check on parameters provided in ctor
0213         template<class Archive>
0214         void serialize(Archive & ar, const unsigned int file_version)
0215         {
0216             ar & p;
0217             ar & heights;
0218             ar & actual_positions;
0219             ar & desired_positions;
0220         }
0221 
0222     private:
0223         float_type p;                    // the quantile probability p
0224         array_type heights;              // q_i
0225         array_type actual_positions;     // n_i
0226         array_type desired_positions;    // n'_i
0227     };
0228 
0229 } // namespace impl
0230 
0231 ///////////////////////////////////////////////////////////////////////////////
0232 // tag::weighted_p_square_quantile
0233 //
0234 namespace tag
0235 {
0236     struct weighted_p_square_quantile
0237       : depends_on<count, sum_of_weights>
0238     {
0239         typedef accumulators::impl::weighted_p_square_quantile_impl<mpl::_1, mpl::_2, regular> impl;
0240     };
0241     struct weighted_p_square_quantile_for_median
0242       : depends_on<count, sum_of_weights>
0243     {
0244         typedef accumulators::impl::weighted_p_square_quantile_impl<mpl::_1, mpl::_2, for_median> impl;
0245     };
0246 }
0247 
0248 ///////////////////////////////////////////////////////////////////////////////
0249 // extract::weighted_p_square_quantile
0250 // extract::weighted_p_square_quantile_for_median
0251 //
0252 namespace extract
0253 {
0254     extractor<tag::weighted_p_square_quantile> const weighted_p_square_quantile = {};
0255     extractor<tag::weighted_p_square_quantile_for_median> const weighted_p_square_quantile_for_median = {};
0256 
0257     BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_p_square_quantile)
0258     BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_p_square_quantile_for_median)
0259 }
0260 
0261 using extract::weighted_p_square_quantile;
0262 using extract::weighted_p_square_quantile_for_median;
0263 
0264 }} // namespace boost::accumulators
0265 
0266 #endif