Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:28:20

0001 ///////////////////////////////////////////////////////////////////////////////
0002 // p_square_quantile.hpp
0003 //
0004 //  Copyright 2005 Daniel Egloff. Distributed under the Boost
0005 //  Software License, Version 1.0. (See accompanying file
0006 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0007 
0008 #ifndef BOOST_ACCUMULATORS_STATISTICS_P_SQUARE_QUANTILE_HPP_DE_01_01_2006
0009 #define BOOST_ACCUMULATORS_STATISTICS_P_SQUARE_QUANTILE_HPP_DE_01_01_2006
0010 
0011 #include <cmath>
0012 #include <functional>
0013 #include <boost/array.hpp>
0014 #include <boost/mpl/placeholders.hpp>
0015 #include <boost/type_traits/is_same.hpp>
0016 #include <boost/parameter/keyword.hpp>
0017 #include <boost/accumulators/framework/accumulator_base.hpp>
0018 #include <boost/accumulators/framework/extractor.hpp>
0019 #include <boost/accumulators/numeric/functional.hpp>
0020 #include <boost/accumulators/framework/parameters/sample.hpp>
0021 #include <boost/accumulators/framework/depends_on.hpp>
0022 #include <boost/accumulators/statistics_fwd.hpp>
0023 #include <boost/accumulators/statistics/count.hpp>
0024 #include <boost/accumulators/statistics/parameters/quantile_probability.hpp>
0025 #include <boost/serialization/boost_array.hpp>
0026 
0027 namespace boost { namespace accumulators
0028 {
0029 
0030 namespace impl
0031 {
0032     ///////////////////////////////////////////////////////////////////////////////
0033     // p_square_quantile_impl
0034     //  single quantile estimation
0035     /**
0036         @brief Single quantile estimation with the \f$P^2\f$ algorithm
0037 
0038         The \f$P^2\f$ algorithm estimates a quantile dynamically without storing samples. Instead of
0039         storing the whole sample cumulative distribution, only five points (markers) are stored. The heights
0040         of these markers are the minimum and the maximum of the samples and the current estimates of the
0041         \f$(p/2)\f$-, \f$p\f$- and \f$(1+p)/2\f$-quantiles. Their positions are equal to the number
0042         of samples that are smaller or equal to the markers. Each time a new samples is recorded, the
0043         positions of the markers are updated and if necessary their heights are adjusted using a piecewise-
0044         parabolic formula.
0045 
0046         For further details, see
0047 
0048         R. Jain and I. Chlamtac, The P^2 algorithm for dynamic calculation of quantiles and
0049         histograms without storing observations, Communications of the ACM,
0050         Volume 28 (October), Number 10, 1985, p. 1076-1085.
0051 
0052         @param quantile_probability
0053     */
0054     template<typename Sample, typename Impl>
0055     struct p_square_quantile_impl
0056       : accumulator_base
0057     {
0058         typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
0059         typedef array<float_type, 5> array_type;
0060         // for boost::result_of
0061         typedef float_type result_type;
0062 
0063         template<typename Args>
0064         p_square_quantile_impl(Args const &args)
0065           : p(is_same<Impl, for_median>::value ? float_type(0.5) : args[quantile_probability | float_type(0.5)])
0066           , heights()
0067           , actual_positions()
0068           , desired_positions()
0069           , positions_increments()
0070         {
0071             for(std::size_t i = 0; i < 5; ++i)
0072             {
0073                 this->actual_positions[i] = i + float_type(1.);
0074             }
0075 
0076             this->desired_positions[0] = float_type(1.);
0077             this->desired_positions[1] = float_type(1.) + float_type(2.) * this->p;
0078             this->desired_positions[2] = float_type(1.) + float_type(4.) * this->p;
0079             this->desired_positions[3] = float_type(3.) + float_type(2.) * this->p;
0080             this->desired_positions[4] = float_type(5.);
0081 
0082 
0083             this->positions_increments[0] = float_type(0.);
0084             this->positions_increments[1] = this->p / float_type(2.);
0085             this->positions_increments[2] = this->p;
0086             this->positions_increments[3] = (float_type(1.) + this->p) / float_type(2.);
0087             this->positions_increments[4] = float_type(1.);
0088         }
0089 
0090         template<typename Args>
0091         void operator ()(Args const &args)
0092         {
0093             std::size_t cnt = count(args);
0094 
0095             // accumulate 5 first samples
0096             if(cnt <= 5)
0097             {
0098                 this->heights[cnt - 1] = args[sample];
0099 
0100                 // complete the initialization of heights by sorting
0101                 if(cnt == 5)
0102                 {
0103                     std::sort(this->heights.begin(), this->heights.end());
0104                 }
0105             }
0106             else
0107             {
0108                 std::size_t sample_cell = 1; // k
0109 
0110                 // find cell k such that heights[k-1] <= args[sample] < heights[k] and adjust extreme values
0111                 if (args[sample] < this->heights[0])
0112                 {
0113                     this->heights[0] = args[sample];
0114                     sample_cell = 1;
0115                 }
0116                 else if (this->heights[4] <= args[sample])
0117                 {
0118                     this->heights[4] = args[sample];
0119                     sample_cell = 4;
0120                 }
0121                 else
0122                 {
0123                     typedef typename array_type::iterator iterator;
0124                     iterator it = std::upper_bound(
0125                         this->heights.begin()
0126                       , this->heights.end()
0127                       , args[sample]
0128                     );
0129 
0130                     sample_cell = std::distance(this->heights.begin(), it);
0131                 }
0132 
0133                 // update positions of markers above sample_cell
0134                 for(std::size_t i = sample_cell; i < 5; ++i)
0135                 {
0136                     ++this->actual_positions[i];
0137                 }
0138 
0139                 // update desired positions of all markers
0140                 for(std::size_t i = 0; i < 5; ++i)
0141                 {
0142                     this->desired_positions[i] += this->positions_increments[i];
0143                 }
0144 
0145                 // adjust heights and actual positions of markers 1 to 3 if necessary
0146                 for(std::size_t i = 1; i <= 3; ++i)
0147                 {
0148                     // offset to desired positions
0149                     float_type d = this->desired_positions[i] - this->actual_positions[i];
0150 
0151                     // offset to next position
0152                     float_type dp = this->actual_positions[i + 1] - this->actual_positions[i];
0153 
0154                     // offset to previous position
0155                     float_type dm = this->actual_positions[i - 1] - this->actual_positions[i];
0156 
0157                     // height ds
0158                     float_type hp = (this->heights[i + 1] - this->heights[i]) / dp;
0159                     float_type hm = (this->heights[i - 1] - this->heights[i]) / dm;
0160 
0161                     if((d >= float_type(1.) && dp > float_type(1.)) || (d <= float_type(-1.) && dm < float_type(-1.)))
0162                     {
0163                         short sign_d = static_cast<short>(d / std::abs(d));
0164 
0165                         // try adjusting heights[i] using p-squared formula
0166                         float_type h = this->heights[i] + sign_d / (dp - dm) * ((sign_d - dm) * hp
0167                                      + (dp - sign_d) * hm);
0168 
0169                         if(this->heights[i - 1] < h && h < this->heights[i + 1])
0170                         {
0171                             this->heights[i] = h;
0172                         }
0173                         else
0174                         {
0175                             // use linear formula
0176                             if(d > float_type(0))
0177                             {
0178                                 this->heights[i] += hp;
0179                             }
0180                             if(d < float_type(0))
0181                             {
0182                                 this->heights[i] -= hm;
0183                             }
0184                         }
0185                         this->actual_positions[i] += sign_d;
0186                     }
0187                 }
0188             }
0189         }
0190 
0191         result_type result(dont_care) const
0192         {
0193             return this->heights[2];
0194         }
0195 
0196         // make this accumulator serializeable
0197         // TODO: do we need to split to load/save and verify that P did not change?
0198         template<class Archive>
0199         void serialize(Archive & ar, const unsigned int file_version)
0200         { 
0201             ar & p;
0202             ar & heights;
0203             ar & actual_positions;
0204             ar & desired_positions;
0205             ar & positions_increments;
0206         }
0207 
0208     private:
0209         float_type p;                    // the quantile probability p
0210         array_type heights;              // q_i
0211         array_type actual_positions;     // n_i
0212         array_type desired_positions;    // n'_i
0213         array_type positions_increments; // dn'_i
0214     };
0215 
0216 } // namespace detail
0217 
0218 ///////////////////////////////////////////////////////////////////////////////
0219 // tag::p_square_quantile
0220 //
0221 namespace tag
0222 {
0223     struct p_square_quantile
0224       : depends_on<count>
0225     {
0226         /// INTERNAL ONLY
0227         ///
0228         typedef accumulators::impl::p_square_quantile_impl<mpl::_1, regular> impl;
0229     };
0230     struct p_square_quantile_for_median
0231       : depends_on<count>
0232     {
0233         /// INTERNAL ONLY
0234         ///
0235         typedef accumulators::impl::p_square_quantile_impl<mpl::_1, for_median> impl;
0236     };
0237 }
0238 
0239 ///////////////////////////////////////////////////////////////////////////////
0240 // extract::p_square_quantile
0241 // extract::p_square_quantile_for_median
0242 //
0243 namespace extract
0244 {
0245     extractor<tag::p_square_quantile> const p_square_quantile = {};
0246     extractor<tag::p_square_quantile_for_median> const p_square_quantile_for_median = {};
0247 
0248     BOOST_ACCUMULATORS_IGNORE_GLOBAL(p_square_quantile)
0249     BOOST_ACCUMULATORS_IGNORE_GLOBAL(p_square_quantile_for_median)
0250 }
0251 
0252 using extract::p_square_quantile;
0253 using extract::p_square_quantile_for_median;
0254 
0255 // So that p_square_quantile can be automatically substituted with
0256 // weighted_p_square_quantile when the weight parameter is non-void
0257 template<>
0258 struct as_weighted_feature<tag::p_square_quantile>
0259 {
0260     typedef tag::weighted_p_square_quantile type;
0261 };
0262 
0263 template<>
0264 struct feature_of<tag::weighted_p_square_quantile>
0265   : feature_of<tag::p_square_quantile>
0266 {
0267 };
0268 
0269 }} // namespace boost::accumulators
0270 
0271 #endif