Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:28:22

0001 ///////////////////////////////////////////////////////////////////////////////
0002 // weighted_p_square_cumul_dist.hpp
0003 //
0004 //  Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost
0005 //  Software License, Version 1.0. (See accompanying file
0006 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0007 
0008 #ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_P_SQUARE_CUMUL_DIST_HPP_DE_01_01_2006
0009 #define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_P_SQUARE_CUMUL_DIST_HPP_DE_01_01_2006
0010 
0011 #include <vector>
0012 #include <functional>
0013 #include <boost/parameter/keyword.hpp>
0014 #include <boost/mpl/placeholders.hpp>
0015 #include <boost/range.hpp>
0016 #include <boost/accumulators/framework/accumulator_base.hpp>
0017 #include <boost/accumulators/framework/extractor.hpp>
0018 #include <boost/accumulators/numeric/functional.hpp>
0019 #include <boost/accumulators/framework/parameters/sample.hpp>
0020 #include <boost/accumulators/statistics_fwd.hpp>
0021 #include <boost/accumulators/statistics/count.hpp>
0022 #include <boost/accumulators/statistics/sum.hpp>
0023 #include <boost/accumulators/statistics/p_square_cumul_dist.hpp> // for named parameter p_square_cumulative_distribution_num_cells
0024 
0025 namespace boost { namespace accumulators
0026 {
0027 
0028 namespace impl
0029 {
0030     ///////////////////////////////////////////////////////////////////////////////
0031     // weighted_p_square_cumulative_distribution_impl
0032     //  cumulative distribution calculation (as histogram)
0033     /**
0034         @brief Histogram calculation of the cumulative distribution with the \f$P^2\f$ algorithm for weighted samples
0035 
0036         A histogram of the sample cumulative distribution is computed dynamically without storing samples
0037         based on the \f$ P^2 \f$ algorithm for weighted samples. The returned histogram has a specifiable
0038         amount (num_cells) equiprobable (and not equal-sized) cells.
0039 
0040         Note that applying importance sampling results in regions to be more and other regions to be less
0041         accurately estimated than without importance sampling, i.e., with unweighted samples.
0042 
0043         For further details, see
0044 
0045         R. Jain and I. Chlamtac, The P^2 algorithm for dynamic calculation of quantiles and
0046         histograms without storing observations, Communications of the ACM,
0047         Volume 28 (October), Number 10, 1985, p. 1076-1085.
0048 
0049         @param p_square_cumulative_distribution_num_cells
0050     */
0051     template<typename Sample, typename Weight>
0052     struct weighted_p_square_cumulative_distribution_impl
0053       : accumulator_base
0054     {
0055         typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample;
0056         typedef typename numeric::functional::fdiv<weighted_sample, std::size_t>::result_type float_type;
0057         typedef std::vector<std::pair<float_type, float_type> > histogram_type;
0058         typedef std::vector<float_type> array_type;
0059         // for boost::result_of
0060         typedef iterator_range<typename histogram_type::iterator> result_type;
0061 
0062         template<typename Args>
0063         weighted_p_square_cumulative_distribution_impl(Args const &args)
0064           : num_cells(args[p_square_cumulative_distribution_num_cells])
0065           , heights(num_cells + 1)
0066           , actual_positions(num_cells + 1)
0067           , desired_positions(num_cells + 1)
0068           , histogram(num_cells + 1)
0069           , is_dirty(true)
0070         {
0071         }
0072 
0073         template<typename Args>
0074         void operator ()(Args const &args)
0075         {
0076             this->is_dirty = true;
0077 
0078             std::size_t cnt = count(args);
0079             std::size_t sample_cell = 1; // k
0080             std::size_t b = this->num_cells;
0081 
0082             // accumulate num_cells + 1 first samples
0083             if (cnt <= b + 1)
0084             {
0085                 this->heights[cnt - 1] = args[sample];
0086                 this->actual_positions[cnt - 1] = args[weight];
0087 
0088                 // complete the initialization of heights by sorting
0089                 if (cnt == b + 1)
0090                 {
0091                     //std::sort(this->heights.begin(), this->heights.end());
0092 
0093                     // TODO: we need to sort the initial samples (in heights) in ascending order and
0094                     // sort their weights (in actual_positions) the same way. The following lines do
0095                     // it, but there must be a better and more efficient way of doing this.
0096                     typename array_type::iterator it_begin, it_end, it_min;
0097 
0098                     it_begin = this->heights.begin();
0099                     it_end   = this->heights.end();
0100 
0101                     std::size_t pos = 0;
0102 
0103                     while (it_begin != it_end)
0104                     {
0105                         it_min = std::min_element(it_begin, it_end);
0106                         std::size_t d = std::distance(it_begin, it_min);
0107                         std::swap(*it_begin, *it_min);
0108                         std::swap(this->actual_positions[pos], this->actual_positions[pos + d]);
0109                         ++it_begin;
0110                         ++pos;
0111                     }
0112 
0113                     // calculate correct initial actual positions
0114                     for (std::size_t i = 1; i < b; ++i)
0115                     {
0116                         this->actual_positions[i] += this->actual_positions[i - 1];
0117                     }
0118                 }
0119             }
0120             else
0121             {
0122                 // find cell k such that heights[k-1] <= args[sample] < heights[k] and adjust extreme values
0123                 if (args[sample] < this->heights[0])
0124                 {
0125                     this->heights[0] = args[sample];
0126                     this->actual_positions[0] = args[weight];
0127                     sample_cell = 1;
0128                 }
0129                 else if (this->heights[b] <= args[sample])
0130                 {
0131                     this->heights[b] = args[sample];
0132                     sample_cell = b;
0133                 }
0134                 else
0135                 {
0136                     typename array_type::iterator it;
0137                     it = std::upper_bound(
0138                         this->heights.begin()
0139                       , this->heights.end()
0140                       , args[sample]
0141                     );
0142 
0143                     sample_cell = std::distance(this->heights.begin(), it);
0144                 }
0145 
0146                 // increment positions of markers above sample_cell
0147                 for (std::size_t i = sample_cell; i < b + 1; ++i)
0148                 {
0149                     this->actual_positions[i] += args[weight];
0150                 }
0151 
0152                 // determine desired marker positions
0153                 for (std::size_t i = 1; i < b + 1; ++i)
0154                 {
0155                     this->desired_positions[i] = this->actual_positions[0]
0156                                                + numeric::fdiv((i-1) * (sum_of_weights(args) - this->actual_positions[0]), b);
0157                 }
0158 
0159                 // adjust heights of markers 2 to num_cells if necessary
0160                 for (std::size_t i = 1; i < b; ++i)
0161                 {
0162                     // offset to desire position
0163                     float_type d = this->desired_positions[i] - this->actual_positions[i];
0164 
0165                     // offset to next position
0166                     float_type dp = this->actual_positions[i + 1] - this->actual_positions[i];
0167 
0168                     // offset to previous position
0169                     float_type dm = this->actual_positions[i - 1] - this->actual_positions[i];
0170 
0171                     // height ds
0172                     float_type hp = (this->heights[i + 1] - this->heights[i]) / dp;
0173                     float_type hm = (this->heights[i - 1] - this->heights[i]) / dm;
0174 
0175                     if ( ( d >= 1. && dp > 1. ) || ( d <= -1. && dm < -1. ) )
0176                     {
0177                         short sign_d = static_cast<short>(d / std::abs(d));
0178 
0179                         // try adjusting heights[i] using p-squared formula
0180                         float_type h = this->heights[i] + sign_d / (dp - dm) * ( (sign_d - dm) * hp + (dp - sign_d) * hm );
0181 
0182                         if ( this->heights[i - 1] < h && h < this->heights[i + 1] )
0183                         {
0184                             this->heights[i] = h;
0185                         }
0186                         else
0187                         {
0188                             // use linear formula
0189                             if (d>0)
0190                             {
0191                                 this->heights[i] += hp;
0192                             }
0193                             if (d<0)
0194                             {
0195                                 this->heights[i] -= hm;
0196                             }
0197                         }
0198                         this->actual_positions[i] += sign_d;
0199                     }
0200                 }
0201             }
0202         }
0203 
0204         template<typename Args>
0205         result_type result(Args const &args) const
0206         {
0207             if (this->is_dirty)
0208             {
0209                 this->is_dirty = false;
0210 
0211                 // creates a vector of std::pair where each pair i holds
0212                 // the values heights[i] (x-axis of histogram) and
0213                 // actual_positions[i] / sum_of_weights (y-axis of histogram)
0214 
0215                 for (std::size_t i = 0; i < this->histogram.size(); ++i)
0216                 {
0217                     this->histogram[i] = std::make_pair(this->heights[i], numeric::fdiv(this->actual_positions[i], sum_of_weights(args)));
0218                 }
0219             }
0220 
0221             return make_iterator_range(this->histogram);
0222         }
0223 
0224         // make this accumulator serializeable
0225         // TODO split to save/load and check on parameters provided in ctor
0226         template<class Archive>
0227         void serialize(Archive & ar, const unsigned int file_version)
0228         {
0229             ar & num_cells;
0230             ar & heights;
0231             ar & actual_positions;
0232             ar & desired_positions;
0233             ar & histogram;
0234             ar & is_dirty; 
0235         }
0236 
0237     private:
0238         std::size_t num_cells;            // number of cells b
0239         array_type  heights;              // q_i
0240         array_type  actual_positions;     // n_i
0241         array_type  desired_positions;    // n'_i
0242         mutable histogram_type histogram; // histogram
0243         mutable bool is_dirty;
0244     };
0245 
0246 } // namespace detail
0247 
0248 ///////////////////////////////////////////////////////////////////////////////
0249 // tag::weighted_p_square_cumulative_distribution
0250 //
0251 namespace tag
0252 {
0253     struct weighted_p_square_cumulative_distribution
0254       : depends_on<count, sum_of_weights>
0255       , p_square_cumulative_distribution_num_cells
0256     {
0257         typedef accumulators::impl::weighted_p_square_cumulative_distribution_impl<mpl::_1, mpl::_2> impl;
0258     };
0259 }
0260 
0261 ///////////////////////////////////////////////////////////////////////////////
0262 // extract::weighted_p_square_cumulative_distribution
0263 //
0264 namespace extract
0265 {
0266     extractor<tag::weighted_p_square_cumulative_distribution> const weighted_p_square_cumulative_distribution = {};
0267 
0268     BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_p_square_cumulative_distribution)
0269 }
0270 
0271 using extract::weighted_p_square_cumulative_distribution;
0272 
0273 }} // namespace boost::accumulators
0274 
0275 #endif