Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:28:21

0001 ///////////////////////////////////////////////////////////////////////////////
0002 // weighted_density.hpp
0003 //
0004 //  Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost
0005 //  Software License, Version 1.0. (See accompanying file
0006 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0007 
0008 #ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_DENSITY_HPP_DE_01_01_2006
0009 #define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_DENSITY_HPP_DE_01_01_2006
0010 
0011 #include <vector>
0012 #include <limits>
0013 #include <functional>
0014 #include <boost/range.hpp>
0015 #include <boost/parameter/keyword.hpp>
0016 #include <boost/mpl/placeholders.hpp>
0017 #include <boost/accumulators/framework/accumulator_base.hpp>
0018 #include <boost/accumulators/framework/extractor.hpp>
0019 #include <boost/accumulators/numeric/functional.hpp>
0020 #include <boost/accumulators/framework/parameters/sample.hpp>
0021 #include <boost/accumulators/statistics_fwd.hpp>
0022 #include <boost/accumulators/statistics/sum.hpp>
0023 #include <boost/accumulators/statistics/max.hpp>
0024 #include <boost/accumulators/statistics/min.hpp>
0025 #include <boost/accumulators/statistics/density.hpp> // for named parameters density_cache_size and density_num_bins
0026 #include <boost/serialization/vector.hpp>
0027 #include <boost/serialization/utility.hpp>
0028 
0029 namespace boost { namespace accumulators
0030 {
0031 
0032 namespace impl
0033 {
0034     ///////////////////////////////////////////////////////////////////////////////
0035     // weighted_density_impl
0036     //  density histogram for weighted samples
0037     /**
0038         @brief Histogram density estimator for weighted samples
0039 
0040         The histogram density estimator returns a histogram of the sample distribution. The positions and sizes of the bins
0041         are determined using a specifiable number of cached samples (cache_size). The range between the minimum and the
0042         maximum of the cached samples is subdivided into a specifiable number of bins (num_bins) of same size. Additionally,
0043         an under- and an overflow bin is added to capture future under- and overflow samples. Once the bins are determined,
0044         the cached samples and all subsequent samples are added to the correct bins. At the end, a range of std::pair is
0045         returned, where each pair contains the position of the bin (lower bound) and the sum of the weights (normalized with the
0046         sum of all weights).
0047 
0048         @param density_cache_size Number of first samples used to determine min and max.
0049         @param density_num_bins Number of bins (two additional bins collect under- and overflow samples).
0050     */
0051     template<typename Sample, typename Weight>
0052     struct weighted_density_impl
0053       : accumulator_base
0054     {
0055         typedef typename numeric::functional::fdiv<Weight, std::size_t>::result_type float_type;
0056         typedef std::vector<std::pair<float_type, float_type> > histogram_type;
0057         typedef std::vector<float_type> array_type;
0058         // for boost::result_of
0059         typedef iterator_range<typename histogram_type::iterator> result_type;
0060 
0061         template<typename Args>
0062         weighted_density_impl(Args const &args)
0063             : cache_size(args[density_cache_size])
0064             , cache(cache_size)
0065             , num_bins(args[density_num_bins])
0066             , samples_in_bin(num_bins + 2, 0.)
0067             , bin_positions(num_bins + 2)
0068             , histogram(
0069                 num_bins + 2
0070               , std::make_pair(
0071                     numeric::fdiv(args[sample | Sample()],(std::size_t)1)
0072                   , numeric::fdiv(args[sample | Sample()],(std::size_t)1)
0073                 )
0074               )
0075             , is_dirty(true)
0076         {
0077         }
0078 
0079         template<typename Args>
0080         void operator ()(Args const &args)
0081         {
0082             this->is_dirty = true;
0083 
0084             std::size_t cnt = count(args);
0085 
0086             // Fill up cache with cache_size first samples
0087             if (cnt <= this->cache_size)
0088             {
0089                 this->cache[cnt - 1] = std::make_pair(args[sample], args[weight]);
0090             }
0091 
0092             // Once cache_size samples have been accumulated, create num_bins bins of same size between
0093             // the minimum and maximum of the cached samples as well as an under- and an overflow bin.
0094             // Store their lower bounds (bin_positions) and fill the bins with the cached samples (samples_in_bin).
0095             if (cnt == this->cache_size)
0096             {
0097                 float_type minimum = numeric::fdiv((min)(args),(std::size_t)1);
0098                 float_type maximum = numeric::fdiv((max)(args),(std::size_t)1);
0099                 float_type bin_size = numeric::fdiv(maximum - minimum, this->num_bins);
0100 
0101                 // determine bin positions (their lower bounds)
0102                 for (std::size_t i = 0; i < this->num_bins + 2; ++i)
0103                 {
0104                     this->bin_positions[i] = minimum + (i - 1.) * bin_size;
0105                 }
0106 
0107                 for (typename histogram_type::const_iterator iter = this->cache.begin(); iter != this->cache.end(); ++iter)
0108                 {
0109                     if (iter->first < this->bin_positions[1])
0110                     {
0111                         this->samples_in_bin[0] += iter->second;
0112                     }
0113                     else if (iter->first >= this->bin_positions[this->num_bins + 1])
0114                     {
0115                         this->samples_in_bin[this->num_bins + 1] += iter->second;
0116                     }
0117                     else
0118                     {
0119                         typename array_type::iterator it = std::upper_bound(
0120                             this->bin_positions.begin()
0121                           , this->bin_positions.end()
0122                           , iter->first
0123                         );
0124 
0125                         std::size_t d = std::distance(this->bin_positions.begin(), it);
0126                         this->samples_in_bin[d - 1] += iter->second;
0127                     }
0128                 }
0129             }
0130             // Add each subsequent sample to the correct bin
0131             else if (cnt > this->cache_size)
0132             {
0133                 if (args[sample] < this->bin_positions[1])
0134                 {
0135                     this->samples_in_bin[0] += args[weight];
0136                 }
0137                 else if (args[sample] >= this->bin_positions[this->num_bins + 1])
0138                 {
0139                     this->samples_in_bin[this->num_bins + 1] += args[weight];
0140                 }
0141                 else
0142                 {
0143                     typename array_type::iterator it = std::upper_bound(
0144                         this->bin_positions.begin()
0145                       , this->bin_positions.end()
0146                       , args[sample]
0147                     );
0148 
0149                     std::size_t d = std::distance(this->bin_positions.begin(), it);
0150                     this->samples_in_bin[d - 1] += args[weight];
0151                 }
0152             }
0153         }
0154 
0155         template<typename Args>
0156         result_type result(Args const &args) const
0157         {
0158             if (this->is_dirty)
0159             {
0160                 this->is_dirty = false;
0161 
0162                 // creates a vector of std::pair where each pair i holds
0163                 // the values bin_positions[i] (x-axis of histogram) and
0164                 // samples_in_bin[i] / cnt (y-axis of histogram).
0165 
0166                 for (std::size_t i = 0; i < this->num_bins + 2; ++i)
0167                 {
0168                     this->histogram[i] = std::make_pair(this->bin_positions[i], numeric::fdiv(this->samples_in_bin[i], sum_of_weights(args)));
0169                 }
0170             }
0171 
0172             // returns a range of pairs
0173             return make_iterator_range(this->histogram);
0174         }
0175 
0176         // make this accumulator serializeable
0177         // TODO split to save/load and check on parameters provided in ctor
0178         template<class Archive>
0179         void serialize(Archive & ar, const unsigned int file_version)
0180         {
0181             ar & cache_size;
0182             ar & cache;
0183             ar & num_bins;
0184             ar & samples_in_bin;
0185             ar & bin_positions;
0186             ar & histogram;
0187             ar & is_dirty; 
0188         }
0189 
0190     private:
0191         std::size_t            cache_size;      // number of cached samples
0192         histogram_type         cache;           // cache to store the first cache_size samples with their weights as std::pair
0193         std::size_t            num_bins;        // number of bins
0194         array_type             samples_in_bin;  // number of samples in each bin
0195         array_type             bin_positions;   // lower bounds of bins
0196         mutable histogram_type histogram;       // histogram
0197         mutable bool is_dirty;
0198     };
0199 
0200 } // namespace impl
0201 
0202 ///////////////////////////////////////////////////////////////////////////////
0203 // tag::weighted_density
0204 //
0205 namespace tag
0206 {
0207     struct weighted_density
0208       : depends_on<count, sum_of_weights, min, max>
0209       , density_cache_size
0210       , density_num_bins
0211     {
0212         /// INTERNAL ONLY
0213         ///
0214         typedef accumulators::impl::weighted_density_impl<mpl::_1, mpl::_2> impl;
0215 
0216         #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED
0217         static boost::parameter::keyword<density_cache_size> const cache_size;
0218         static boost::parameter::keyword<density_num_bins> const num_bins;
0219         #endif
0220     };
0221 }
0222 
0223 ///////////////////////////////////////////////////////////////////////////////
0224 // extract::weighted_density
0225 //
0226 namespace extract
0227 {
0228     extractor<tag::density> const weighted_density = {};
0229 
0230     BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_density)
0231 }
0232 
0233 using extract::weighted_density;
0234 
0235 }} // namespace boost::accumulators
0236 
0237 #endif