Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:28:21

0001 ///////////////////////////////////////////////////////////////////////////////
0002 // weighted_extended_p_square.hpp
0003 //
0004 //  Copyright 2005 Daniel Egloff. Distributed under the Boost
0005 //  Software License, Version 1.0. (See accompanying file
0006 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0007 
0008 #ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_EXTENDED_P_SQUARE_HPP_DE_01_01_2006
0009 #define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_EXTENDED_P_SQUARE_HPP_DE_01_01_2006
0010 
0011 #include <vector>
0012 #include <functional>
0013 #include <boost/range/begin.hpp>
0014 #include <boost/range/end.hpp>
0015 #include <boost/range/iterator_range.hpp>
0016 #include <boost/iterator/transform_iterator.hpp>
0017 #include <boost/iterator/counting_iterator.hpp>
0018 #include <boost/iterator/permutation_iterator.hpp>
0019 #include <boost/parameter/keyword.hpp>
0020 #include <boost/mpl/placeholders.hpp>
0021 #include <boost/accumulators/framework/accumulator_base.hpp>
0022 #include <boost/accumulators/framework/extractor.hpp>
0023 #include <boost/accumulators/numeric/functional.hpp>
0024 #include <boost/accumulators/framework/parameters/sample.hpp>
0025 #include <boost/accumulators/framework/depends_on.hpp>
0026 #include <boost/accumulators/statistics_fwd.hpp>
0027 #include <boost/accumulators/statistics/count.hpp>
0028 #include <boost/accumulators/statistics/sum.hpp>
0029 #include <boost/accumulators/statistics/times2_iterator.hpp>
0030 #include <boost/accumulators/statistics/extended_p_square.hpp>
0031 #include <boost/serialization/vector.hpp>
0032 
0033 namespace boost { namespace accumulators
0034 {
0035 
0036 namespace impl
0037 {
0038     ///////////////////////////////////////////////////////////////////////////////
0039     // weighted_extended_p_square_impl
0040     //  multiple quantile estimation with weighted samples
0041     /**
0042         @brief Multiple quantile estimation with the extended \f$P^2\f$ algorithm for weighted samples
0043 
0044         This version of the extended \f$P^2\f$ algorithm extends the extended \f$P^2\f$ algorithm to
0045         support weighted samples. The extended \f$P^2\f$ algorithm dynamically estimates several
0046         quantiles without storing samples. Assume that \f$m\f$ quantiles
0047         \f$\xi_{p_1}, \ldots, \xi_{p_m}\f$ are to be estimated. Instead of storing the whole sample
0048         cumulative distribution, the algorithm maintains only \f$m+2\f$ principal markers and
0049         \f$m+1\f$ middle markers, whose positions are updated with each sample and whose heights
0050         are adjusted (if necessary) using a piecewise-parablic formula. The heights of the principal
0051         markers are the current estimates of the quantiles and are returned as an iterator range.
0052 
0053         For further details, see
0054 
0055         K. E. E. Raatikainen, Simultaneous estimation of several quantiles, Simulation, Volume 49,
0056         Number 4 (October), 1986, p. 159-164.
0057 
0058         The extended \f$ P^2 \f$ algorithm generalizes the \f$ P^2 \f$ algorithm of
0059 
0060         R. Jain and I. Chlamtac, The P^2 algorithm for dynamic calculation of quantiles and
0061         histograms without storing observations, Communications of the ACM,
0062         Volume 28 (October), Number 10, 1985, p. 1076-1085.
0063 
0064         @param extended_p_square_probabilities A vector of quantile probabilities.
0065     */
0066     template<typename Sample, typename Weight>
0067     struct weighted_extended_p_square_impl
0068       : accumulator_base
0069     {
0070         typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample;
0071         typedef typename numeric::functional::fdiv<weighted_sample, std::size_t>::result_type float_type;
0072         typedef std::vector<float_type> array_type;
0073         // for boost::result_of
0074         typedef iterator_range<
0075             detail::lvalue_index_iterator<
0076                 permutation_iterator<
0077                     typename array_type::const_iterator
0078                   , detail::times2_iterator
0079                 >
0080             >
0081         > result_type;
0082 
0083         template<typename Args>
0084         weighted_extended_p_square_impl(Args const &args)
0085           : probabilities(
0086                 boost::begin(args[extended_p_square_probabilities])
0087               , boost::end(args[extended_p_square_probabilities])
0088             )
0089           , heights(2 * probabilities.size() + 3)
0090           , actual_positions(heights.size())
0091           , desired_positions(heights.size())
0092         {
0093         }
0094 
0095         template<typename Args>
0096         void operator ()(Args const &args)
0097         {
0098             std::size_t cnt = count(args);
0099             std::size_t sample_cell = 1; // k
0100             std::size_t num_quantiles = this->probabilities.size();
0101 
0102             // m+2 principal markers and m+1 middle markers
0103             std::size_t num_markers = 2 * num_quantiles + 3;
0104 
0105             // first accumulate num_markers samples
0106             if(cnt <= num_markers)
0107             {
0108                 this->heights[cnt - 1] = args[sample];
0109                 this->actual_positions[cnt - 1] = args[weight];
0110 
0111                 // complete the initialization of heights (and actual_positions) by sorting
0112                 if(cnt == num_markers)
0113                 {
0114                     // TODO: we need to sort the initial samples (in heights) in ascending order and
0115                     // sort their weights (in actual_positions) the same way. The following lines do
0116                     // it, but there must be a better and more efficient way of doing this.
0117                     typename array_type::iterator it_begin, it_end, it_min;
0118 
0119                     it_begin = this->heights.begin();
0120                     it_end   = this->heights.end();
0121 
0122                     std::size_t pos = 0;
0123 
0124                     while (it_begin != it_end)
0125                     {
0126                         it_min = std::min_element(it_begin, it_end);
0127                         std::size_t d = std::distance(it_begin, it_min);
0128                         std::swap(*it_begin, *it_min);
0129                         std::swap(this->actual_positions[pos], this->actual_positions[pos + d]);
0130                         ++it_begin;
0131                         ++pos;
0132                     }
0133 
0134                     // calculate correct initial actual positions
0135                     for (std::size_t i = 1; i < num_markers; ++i)
0136                     {
0137                         actual_positions[i] += actual_positions[i - 1];
0138                     }
0139                 }
0140             }
0141             else
0142             {
0143                 if(args[sample] < this->heights[0])
0144                 {
0145                     this->heights[0] = args[sample];
0146                     this->actual_positions[0] = args[weight];
0147                     sample_cell = 1;
0148                 }
0149                 else if(args[sample] >= this->heights[num_markers - 1])
0150                 {
0151                     this->heights[num_markers - 1] = args[sample];
0152                     sample_cell = num_markers - 1;
0153                 }
0154                 else
0155                 {
0156                     // find cell k = sample_cell such that heights[k-1] <= sample < heights[k]
0157 
0158                     typedef typename array_type::iterator iterator;
0159                     iterator it = std::upper_bound(
0160                         this->heights.begin()
0161                       , this->heights.end()
0162                       , args[sample]
0163                     );
0164 
0165                     sample_cell = std::distance(this->heights.begin(), it);
0166                 }
0167 
0168                 // update actual position of all markers above sample_cell
0169                 for(std::size_t i = sample_cell; i < num_markers; ++i)
0170                 {
0171                     this->actual_positions[i] += args[weight];
0172                 }
0173 
0174                 // compute desired positions
0175                 {
0176                     this->desired_positions[0] = this->actual_positions[0];
0177                     this->desired_positions[num_markers - 1] = sum_of_weights(args);
0178                     this->desired_positions[1] = (sum_of_weights(args) - this->actual_positions[0]) * probabilities[0]
0179                                               / 2. + this->actual_positions[0];
0180                     this->desired_positions[num_markers - 2] = (sum_of_weights(args) - this->actual_positions[0])
0181                                                             * (probabilities[num_quantiles - 1] + 1.)
0182                                                             / 2. + this->actual_positions[0];
0183 
0184                     for (std::size_t i = 0; i < num_quantiles; ++i)
0185                     {
0186                         this->desired_positions[2 * i + 2] = (sum_of_weights(args) - this->actual_positions[0])
0187                                                           * probabilities[i] + this->actual_positions[0];
0188                     }
0189 
0190                     for (std::size_t i = 1; i < num_quantiles; ++i)
0191                     {
0192                         this->desired_positions[2 * i + 1] = (sum_of_weights(args) - this->actual_positions[0])
0193                                                       * (probabilities[i - 1] + probabilities[i])
0194                                                       / 2. + this->actual_positions[0];
0195                     }
0196                 }
0197 
0198                 // adjust heights and actual_positions of markers 1 to num_markers - 2 if necessary
0199                 for (std::size_t i = 1; i <= num_markers - 2; ++i)
0200                 {
0201                     // offset to desired position
0202                     float_type d = this->desired_positions[i] - this->actual_positions[i];
0203 
0204                     // offset to next position
0205                     float_type dp = this->actual_positions[i + 1] - this->actual_positions[i];
0206 
0207                     // offset to previous position
0208                     float_type dm = this->actual_positions[i - 1] - this->actual_positions[i];
0209 
0210                     // height ds
0211                     float_type hp = (this->heights[i + 1] - this->heights[i]) / dp;
0212                     float_type hm = (this->heights[i - 1] - this->heights[i]) / dm;
0213 
0214                     if((d >= 1 && dp > 1) || (d <= -1 && dm < -1))
0215                     {
0216                         short sign_d = static_cast<short>(d / std::abs(d));
0217 
0218                         float_type h = this->heights[i] + sign_d / (dp - dm) * ((sign_d - dm)*hp + (dp - sign_d) * hm);
0219 
0220                         // try adjusting heights[i] using p-squared formula
0221                         if(this->heights[i - 1] < h && h < this->heights[i + 1])
0222                         {
0223                             this->heights[i] = h;
0224                         }
0225                         else
0226                         {
0227                             // use linear formula
0228                             if(d > 0)
0229                             {
0230                                 this->heights[i] += hp;
0231                             }
0232                             if(d < 0)
0233                             {
0234                                 this->heights[i] -= hm;
0235                             }
0236                         }
0237                         this->actual_positions[i] += sign_d;
0238                     }
0239                 }
0240             }
0241         }
0242 
0243         result_type result(dont_care) const
0244         {
0245             // for i in [1,probabilities.size()], return heights[i * 2]
0246             detail::times2_iterator idx_begin = detail::make_times2_iterator(1);
0247             detail::times2_iterator idx_end = detail::make_times2_iterator(this->probabilities.size() + 1);
0248 
0249             return result_type(
0250                 make_permutation_iterator(this->heights.begin(), idx_begin)
0251               , make_permutation_iterator(this->heights.begin(), idx_end)
0252             );
0253         }
0254 
0255         // make this accumulator serializeable
0256         // TODO: do we need to split to load/save and verify that the parameters did not change?
0257         template<class Archive>
0258         void serialize(Archive & ar, const unsigned int file_version)
0259         { 
0260             ar & probabilities;
0261             ar & heights;
0262             ar & actual_positions;
0263             ar & desired_positions;
0264         }
0265 
0266     private:
0267         array_type probabilities;         // the quantile probabilities
0268         array_type heights;               // q_i
0269         array_type actual_positions;      // n_i
0270         array_type desired_positions;     // d_i
0271     };
0272 
0273 } // namespace impl
0274 
0275 ///////////////////////////////////////////////////////////////////////////////
0276 // tag::weighted_extended_p_square
0277 //
0278 namespace tag
0279 {
0280     struct weighted_extended_p_square
0281       : depends_on<count, sum_of_weights>
0282       , extended_p_square_probabilities
0283     {
0284         typedef accumulators::impl::weighted_extended_p_square_impl<mpl::_1, mpl::_2> impl;
0285     };
0286 }
0287 
0288 ///////////////////////////////////////////////////////////////////////////////
0289 // extract::weighted_extended_p_square
0290 //
0291 namespace extract
0292 {
0293     extractor<tag::weighted_extended_p_square> const weighted_extended_p_square = {};
0294 
0295     BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_extended_p_square)
0296 }
0297 
0298 using extract::weighted_extended_p_square;
0299 
0300 }} // namespace boost::accumulators
0301 
0302 #endif