accumulators/statistics/extended_p_square.hpp

0001 ///////////////////////////////////////////////////////////////////////////////
0002 // extended_p_square.hpp
0003 //
0004 //  Copyright 2005 Daniel Egloff. Distributed under the Boost
0005 //  Software License, Version 1.0. (See accompanying file
0006 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0007
0008 #ifndef BOOST_ACCUMULATORS_STATISTICS_EXTENDED_SINGLE_HPP_DE_01_01_2006
0009 #define BOOST_ACCUMULATORS_STATISTICS_EXTENDED_SINGLE_HPP_DE_01_01_2006
0010
0011 #include <vector>
0012 #include <functional>
0013 #include <boost/range/begin.hpp>
0014 #include <boost/range/end.hpp>
0015 #include <boost/range/iterator_range.hpp>
0016 #include <boost/iterator/transform_iterator.hpp>
0017 #include <boost/iterator/counting_iterator.hpp>
0018 #include <boost/iterator/permutation_iterator.hpp>
0019 #include <boost/parameter/keyword.hpp>
0020 #include <boost/mpl/placeholders.hpp>
0021 #include <boost/accumulators/accumulators_fwd.hpp>
0022 #include <boost/accumulators/framework/extractor.hpp>
0023 #include <boost/accumulators/numeric/functional.hpp>
0024 #include <boost/accumulators/framework/parameters/sample.hpp>
0025 #include <boost/accumulators/framework/depends_on.hpp>
0026 #include <boost/accumulators/statistics_fwd.hpp>
0027 #include <boost/accumulators/statistics/count.hpp>
0028 #include <boost/accumulators/statistics/times2_iterator.hpp>
0029 #include <boost/serialization/vector.hpp>
0030
0031 namespace boost { namespace accumulators
0032 {
0033 ///////////////////////////////////////////////////////////////////////////////
0034 // probabilities named parameter
0035 //
0036 BOOST_PARAMETER_NESTED_KEYWORD(tag, extended_p_square_probabilities, probabilities)
0037
0038 BOOST_ACCUMULATORS_IGNORE_GLOBAL(extended_p_square_probabilities)
0039
0040 namespace impl
0041 {
0042     ///////////////////////////////////////////////////////////////////////////////
0043     // extended_p_square_impl
0044     //  multiple quantile estimation
0045     /**
0046         @brief Multiple quantile estimation with the extended \f$P^2\f$ algorithm
0047
0048         Extended \f$P^2\f$ algorithm for estimation of several quantiles without storing samples.
0049         Assume that \f$m\f$ quantiles \f$\xi_{p_1}, \ldots, \xi_{p_m}\f$ are to be estimated.
0050         Instead of storing the whole sample cumulative distribution, the algorithm maintains only
0051         \f$m+2\f$ principal markers and \f$m+1\f$ middle markers, whose positions are updated
0052         with each sample and whose heights are adjusted (if necessary) using a piecewise-parablic
0053         formula. The heights of these central markers are the current estimates of the quantiles
0054         and returned as an iterator range.
0055
0056         For further details, see
0057
0058         K. E. E. Raatikainen, Simultaneous estimation of several quantiles, Simulation, Volume 49,
0059         Number 4 (October), 1986, p. 159-164.
0060
0061         The extended \f$ P^2 \f$ algorithm generalizes the \f$ P^2 \f$ algorithm of
0062
0063         R. Jain and I. Chlamtac, The P^2 algorithm for dynamic calculation of quantiles and
0064         histograms without storing observations, Communications of the ACM,
0065         Volume 28 (October), Number 10, 1985, p. 1076-1085.
0066
0067         @param extended_p_square_probabilities A vector of quantile probabilities.
0068     */
0069     template<typename Sample>
0070     struct extended_p_square_impl
0071       : accumulator_base
0072     {
0073         typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
0074         typedef std::vector<float_type> array_type;
0075         // for boost::result_of
0076         typedef iterator_range<
0077             detail::lvalue_index_iterator<
0078                 permutation_iterator<
0079                     typename array_type::const_iterator
0080                   , detail::times2_iterator
0081                 >
0082             >
0083         > result_type;
0084
0085         template<typename Args>
0086         extended_p_square_impl(Args const &args)
0087           : probabilities(
0088                 boost::begin(args[extended_p_square_probabilities])
0089               , boost::end(args[extended_p_square_probabilities])
0090             )
0091           , heights(2 * probabilities.size() + 3)
0092           , actual_positions(heights.size())
0093           , desired_positions(heights.size())
0094           , positions_increments(heights.size())
0095         {
0096             std::size_t num_quantiles = this->probabilities.size();
0097             std::size_t num_markers = this->heights.size();
0098
0099             for(std::size_t i = 0; i < num_markers; ++i)
0100             {
0101                 this->actual_positions[i] = i + 1;
0102             }
0103
0104             this->positions_increments[0] = 0.;
0105             this->positions_increments[num_markers - 1] = 1.;
0106
0107             for(std::size_t i = 0; i < num_quantiles; ++i)
0108             {
0109                 this->positions_increments[2 * i + 2] = probabilities[i];
0110             }
0111
0112             for(std::size_t i = 0; i <= num_quantiles; ++i)
0113             {
0114                 this->positions_increments[2 * i + 1] =
0115                     0.5 * (this->positions_increments[2 * i] + this->positions_increments[2 * i + 2]);
0116             }
0117
0118             for(std::size_t i = 0; i < num_markers; ++i)
0119             {
0120                 this->desired_positions[i] = 1. + 2. * (num_quantiles + 1.) * this->positions_increments[i];
0121             }
0122         }
0123
0124         template<typename Args>
0125         void operator ()(Args const &args)
0126         {
0127             std::size_t cnt = count(args);
0128
0129             // m+2 principal markers and m+1 middle markers
0130             std::size_t num_markers = 2 * this->probabilities.size() + 3;
0131
0132             // first accumulate num_markers samples
0133             if(cnt <= num_markers)
0134             {
0135                 this->heights[cnt - 1] = args[sample];
0136
0137                 // complete the initialization of heights by sorting
0138                 if(cnt == num_markers)
0139                 {
0140                     std::sort(this->heights.begin(), this->heights.end());
0141                 }
0142             }
0143             else
0144             {
0145                 std::size_t sample_cell = 1;
0146
0147                 // find cell k = sample_cell such that heights[k-1] <= sample < heights[k]
0148                 if(args[sample] < this->heights[0])
0149                 {
0150                     this->heights[0] = args[sample];
0151                     sample_cell = 1;
0152                 }
0153                 else if(args[sample] >= this->heights[num_markers - 1])
0154                 {
0155                     this->heights[num_markers - 1] = args[sample];
0156                     sample_cell = num_markers - 1;
0157                 }
0158                 else
0159                 {
0160                     typedef typename array_type::iterator iterator;
0161                     iterator it = std::upper_bound(
0162                         this->heights.begin()
0163                       , this->heights.end()
0164                       , args[sample]
0165                     );
0166
0167                     sample_cell = std::distance(this->heights.begin(), it);
0168                 }
0169
0170                 // update actual positions of all markers above sample_cell index
0171                 for(std::size_t i = sample_cell; i < num_markers; ++i)
0172                 {
0173                     ++this->actual_positions[i];
0174                 }
0175
0176                 // update desired positions of all markers
0177                 for(std::size_t i = 0; i < num_markers; ++i)
0178                 {
0179                     this->desired_positions[i] += this->positions_increments[i];
0180                 }
0181
0182                 // adjust heights and actual positions of markers 1 to num_markers-2 if necessary
0183                 for(std::size_t i = 1; i <= num_markers - 2; ++i)
0184                 {
0185                     // offset to desired position
0186                     float_type d = this->desired_positions[i] - this->actual_positions[i];
0187
0188                     // offset to next position
0189                     float_type dp = this->actual_positions[i+1] - this->actual_positions[i];
0190
0191                     // offset to previous position
0192                     float_type dm = this->actual_positions[i-1] - this->actual_positions[i];
0193
0194                     // height ds
0195                     float_type hp = (this->heights[i+1] - this->heights[i]) / dp;
0196                     float_type hm = (this->heights[i-1] - this->heights[i]) / dm;
0197
0198                     if((d >= 1 && dp > 1) || (d <= -1 && dm < -1))
0199                     {
0200                         short sign_d = static_cast<short>(d / std::abs(d));
0201
0202                         float_type h = this->heights[i] + sign_d / (dp - dm) * ((sign_d - dm)*hp
0203                                      + (dp - sign_d) * hm);
0204
0205                         // try adjusting heights[i] using p-squared formula
0206                         if(this->heights[i - 1] < h && h < this->heights[i + 1])
0207                         {
0208                             this->heights[i] = h;
0209                         }
0210                         else
0211                         {
0212                             // use linear formula
0213                             if(d > 0)
0214                             {
0215                                 this->heights[i] += hp;
0216                             }
0217                             if(d < 0)
0218                             {
0219                                 this->heights[i] -= hm;
0220                             }
0221                         }
0222                         this->actual_positions[i] += sign_d;
0223                     }
0224                 }
0225             }
0226         }
0227
0228         result_type result(dont_care) const
0229         {
0230             // for i in [1,probabilities.size()], return heights[i * 2]
0231             detail::times2_iterator idx_begin = detail::make_times2_iterator(1);
0232             detail::times2_iterator idx_end = detail::make_times2_iterator(this->probabilities.size() + 1);
0233
0234             return result_type(
0235                 make_permutation_iterator(this->heights.begin(), idx_begin)
0236               , make_permutation_iterator(this->heights.begin(), idx_end)
0237             );
0238         }
0239
0240     public:
0241         // make this accumulator serializeable
0242         // TODO: do we need to split to load/save and verify that the parameters did not change?
0243         template<class Archive>
0244         void serialize(Archive & ar, const unsigned int file_version)
0245         {
0246             ar & probabilities;
0247             ar & heights;
0248             ar & actual_positions;
0249             ar & desired_positions;
0250             ar & positions_increments;
0251         }
0252
0253     private:
0254         array_type probabilities;         // the quantile probabilities
0255         array_type heights;               // q_i
0256         array_type actual_positions;      // n_i
0257         array_type desired_positions;     // d_i
0258         array_type positions_increments;  // f_i
0259     };
0260
0261 } // namespace impl
0262
0263 ///////////////////////////////////////////////////////////////////////////////
0264 // tag::extended_p_square
0265 //
0266 namespace tag
0267 {
0268     struct extended_p_square
0269       : depends_on<count>
0270       , extended_p_square_probabilities
0271     {
0272         typedef accumulators::impl::extended_p_square_impl<mpl::_1> impl;
0273
0274         #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED
0275         /// tag::extended_p_square::probabilities named parameter
0276         static boost::parameter::keyword<tag::probabilities> const probabilities;
0277         #endif
0278     };
0279 }
0280
0281 ///////////////////////////////////////////////////////////////////////////////
0282 // extract::extended_p_square
0283 //
0284 namespace extract
0285 {
0286     extractor<tag::extended_p_square> const extended_p_square = {};
0287
0288     BOOST_ACCUMULATORS_IGNORE_GLOBAL(extended_p_square)
0289 }
0290
0291 using extract::extended_p_square;
0292
0293 // So that extended_p_square can be automatically substituted with
0294 // weighted_extended_p_square when the weight parameter is non-void
0295 template<>
0296 struct as_weighted_feature<tag::extended_p_square>
0297 {
0298     typedef tag::weighted_extended_p_square type;
0299 };
0300
0301 template<>
0302 struct feature_of<tag::weighted_extended_p_square>
0303   : feature_of<tag::extended_p_square>
0304 {
0305 };
0306
0307 }} // namespace boost::accumulators
0308
0309 #endif