boost/random/discrete_distribution.hpp

0001 /* boost random/discrete_distribution.hpp header file
0002  *
0003  * Copyright Steven Watanabe 2009-2011
0004  * Distributed under the Boost Software License, Version 1.0. (See
0005  * accompanying file LICENSE_1_0.txt or copy at
0006  * http://www.boost.org/LICENSE_1_0.txt)
0007  *
0008  * See http://www.boost.org for most recent version including documentation.
0009  *
0010  * $Id$
0011  */
0012
0013 #ifndef BOOST_RANDOM_DISCRETE_DISTRIBUTION_HPP_INCLUDED
0014 #define BOOST_RANDOM_DISCRETE_DISTRIBUTION_HPP_INCLUDED
0015
0016 #include <vector>
0017 #include <limits>
0018 #include <numeric>
0019 #include <utility>
0020 #include <iterator>
0021 #include <boost/assert.hpp>
0022 #include <boost/random/uniform_01.hpp>
0023 #include <boost/random/uniform_int_distribution.hpp>
0024 #include <boost/random/detail/config.hpp>
0025 #include <boost/random/detail/operators.hpp>
0026 #include <boost/random/detail/vector_io.hpp>
0027
0028 #ifndef BOOST_NO_CXX11_HDR_INITIALIZER_LIST
0029 #include <initializer_list>
0030 #endif
0031
0032 #include <boost/random/detail/disable_warnings.hpp>
0033
0034 namespace boost {
0035 namespace random {
0036 namespace detail {
0037
0038 template<class IntType, class WeightType>
0039 struct integer_alias_table {
0040     WeightType get_weight(IntType bin) const {
0041         WeightType result = _average;
0042         if(bin < _excess) ++result;
0043         return result;
0044     }
0045     template<class Iter>
0046     WeightType init_average(Iter begin, Iter end) {
0047         WeightType weight_average = 0;
0048         IntType excess = 0;
0049         IntType n = 0;
0050         // weight_average * n + excess == current partial sum
0051         // This is a bit messy, but it's guaranteed not to overflow
0052         for(Iter iter = begin; iter != end; ++iter) {
0053             ++n;
0054             if(*iter < weight_average) {
0055                 WeightType diff = weight_average - *iter;
0056                 weight_average -= diff / n;
0057                 if(diff % n > excess) {
0058                     --weight_average;
0059                     excess += n - diff % n;
0060                 } else {
0061                     excess -= diff % n;
0062                 }
0063             } else {
0064                 WeightType diff = *iter - weight_average;
0065                 weight_average += diff / n;
0066                 if(diff % n < n - excess) {
0067                     excess += diff % n;
0068                 } else {
0069                     ++weight_average;
0070                     excess -= n - diff % n;
0071                 }
0072             }
0073         }
0074         _alias_table.resize(static_cast<std::size_t>(n));
0075         _average = weight_average;
0076         _excess = excess;
0077         return weight_average;
0078     }
0079     void init_empty()
0080     {
0081         _alias_table.clear();
0082         _alias_table.push_back(std::make_pair(static_cast<WeightType>(1),
0083                                               static_cast<IntType>(0)));
0084         _average = static_cast<WeightType>(1);
0085         _excess = static_cast<IntType>(0);
0086     }
0087     bool operator==(const integer_alias_table& other) const
0088     {
0089         return _alias_table == other._alias_table &&
0090             _average == other._average && _excess == other._excess;
0091     }
0092     static WeightType normalize(WeightType val, WeightType /* average */)
0093     {
0094         return val;
0095     }
0096     static void normalize(std::vector<WeightType>&) {}
0097     template<class URNG>
0098     WeightType test(URNG &urng) const
0099     {
0100         return uniform_int_distribution<WeightType>(0, _average)(urng);
0101     }
0102     bool accept(IntType result, WeightType val) const
0103     {
0104         return result < _excess || val < _average;
0105     }
0106     static WeightType try_get_sum(const std::vector<WeightType>& weights)
0107     {
0108         WeightType result = static_cast<WeightType>(0);
0109         for(typename std::vector<WeightType>::const_iterator
0110                 iter = weights.begin(), end = weights.end();
0111             iter != end; ++iter)
0112         {
0113             if((std::numeric_limits<WeightType>::max)() - result > *iter) {
0114                 return static_cast<WeightType>(0);
0115             }
0116             result += *iter;
0117         }
0118         return result;
0119     }
0120     template<class URNG>
0121     static WeightType generate_in_range(URNG &urng, WeightType max)
0122     {
0123         return uniform_int_distribution<WeightType>(
0124             static_cast<WeightType>(0), max-1)(urng);
0125     }
0126     typedef std::vector<std::pair<WeightType, IntType> > alias_table_t;
0127     alias_table_t _alias_table;
0128     WeightType _average;
0129     IntType _excess;
0130 };
0131
0132 template<class IntType, class WeightType>
0133 struct real_alias_table {
0134     WeightType get_weight(IntType) const
0135     {
0136         return WeightType(1.0);
0137     }
0138     template<class Iter>
0139     WeightType init_average(Iter first, Iter last)
0140     {
0141         std::size_t size = std::distance(first, last);
0142         WeightType weight_sum =
0143             std::accumulate(first, last, static_cast<WeightType>(0));
0144         _alias_table.resize(size);
0145         return weight_sum / size;
0146     }
0147     void init_empty()
0148     {
0149         _alias_table.clear();
0150         _alias_table.push_back(std::make_pair(static_cast<WeightType>(1),
0151                                               static_cast<IntType>(0)));
0152     }
0153     bool operator==(const real_alias_table& other) const
0154     {
0155         return _alias_table == other._alias_table;
0156     }
0157     static WeightType normalize(WeightType val, WeightType average)
0158     {
0159         return val / average;
0160     }
0161     static void normalize(std::vector<WeightType>& weights)
0162     {
0163         WeightType sum =
0164             std::accumulate(weights.begin(), weights.end(),
0165                             static_cast<WeightType>(0));
0166         for(typename std::vector<WeightType>::iterator
0167                 iter = weights.begin(),
0168                 end = weights.end();
0169             iter != end; ++iter)
0170         {
0171             *iter /= sum;
0172         }
0173     }
0174     template<class URNG>
0175     WeightType test(URNG &urng) const
0176     {
0177         return uniform_01<WeightType>()(urng);
0178     }
0179     bool accept(IntType, WeightType) const
0180     {
0181         return true;
0182     }
0183     static WeightType try_get_sum(const std::vector<WeightType>& /* weights */)
0184     {
0185         return static_cast<WeightType>(1);
0186     }
0187     template<class URNG>
0188     static WeightType generate_in_range(URNG &urng, WeightType)
0189     {
0190         return uniform_01<WeightType>()(urng);
0191     }
0192     typedef std::vector<std::pair<WeightType, IntType> > alias_table_t;
0193     alias_table_t _alias_table;
0194 };
0195
0196 template<bool IsIntegral>
0197 struct select_alias_table;
0198
0199 template<>
0200 struct select_alias_table<true> {
0201     template<class IntType, class WeightType>
0202     struct apply {
0203         typedef integer_alias_table<IntType, WeightType> type;
0204     };
0205 };
0206
0207 template<>
0208 struct select_alias_table<false> {
0209     template<class IntType, class WeightType>
0210     struct apply {
0211         typedef real_alias_table<IntType, WeightType> type;
0212     };
0213 };
0214
0215 }
0216
0217 /**
0218  * The class @c discrete_distribution models a \random_distribution.
0219  * It produces integers in the range [0, n) with the probability
0220  * of producing each value is specified by the parameters of the
0221  * distribution.
0222  */
0223 template<class IntType = int, class WeightType = double>
0224 class discrete_distribution {
0225 public:
0226     typedef WeightType input_type;
0227     typedef IntType result_type;
0228
0229     class param_type {
0230     public:
0231
0232         typedef discrete_distribution distribution_type;
0233
0234         /**
0235          * Constructs a @c param_type object, representing a distribution
0236          * with \f$p(0) = 1\f$ and \f$p(k|k>0) = 0\f$.
0237          */
0238         param_type() : _probabilities(1, static_cast<WeightType>(1)) {}
0239         /**
0240          * If @c first == @c last, equivalent to the default constructor.
0241          * Otherwise, the values of the range represent weights for the
0242          * possible values of the distribution.
0243          */
0244         template<class Iter>
0245         param_type(Iter first, Iter last) : _probabilities(first, last)
0246         {
0247             normalize();
0248         }
0249 #ifndef BOOST_NO_CXX11_HDR_INITIALIZER_LIST
0250         /**
0251          * If wl.size() == 0, equivalent to the default constructor.
0252          * Otherwise, the values of the @c initializer_list represent
0253          * weights for the possible values of the distribution.
0254          */
0255         param_type(const std::initializer_list<WeightType>& wl)
0256           : _probabilities(wl)
0257         {
0258             normalize();
0259         }
0260 #endif
0261         /**
0262          * If the range is empty, equivalent to the default constructor.
0263          * Otherwise, the elements of the range represent
0264          * weights for the possible values of the distribution.
0265          */
0266         template<class Range>
0267         explicit param_type(const Range& range)
0268           : _probabilities(std::begin(range), std::end(range))
0269         {
0270             normalize();
0271         }
0272
0273         /**
0274          * If nw is zero, equivalent to the default constructor.
0275          * Otherwise, the range of the distribution is [0, nw),
0276          * and the weights are found by  calling fw with values
0277          * evenly distributed between \f$\mbox{xmin} + \delta/2\f$ and
0278          * \f$\mbox{xmax} - \delta/2\f$, where
0279          * \f$\delta = (\mbox{xmax} - \mbox{xmin})/\mbox{nw}\f$.
0280          */
0281         template<class Func>
0282         param_type(std::size_t nw, double xmin, double xmax, Func fw)
0283         {
0284             std::size_t n = (nw == 0) ? 1 : nw;
0285             double delta = (xmax - xmin) / n;
0286             BOOST_ASSERT(delta > 0);
0287             for(std::size_t k = 0; k < n; ++k) {
0288                 _probabilities.push_back(fw(xmin + k*delta + delta/2));
0289             }
0290             normalize();
0291         }
0292
0293         /**
0294          * Returns a vector containing the probabilities of each possible
0295          * value of the distribution.
0296          */
0297         std::vector<WeightType> probabilities() const
0298         {
0299             return _probabilities;
0300         }
0301
0302         /** Writes the parameters to a @c std::ostream. */
0303         BOOST_RANDOM_DETAIL_OSTREAM_OPERATOR(os, param_type, parm)
0304         {
0305             detail::print_vector(os, parm._probabilities);
0306             return os;
0307         }
0308
0309         /** Reads the parameters from a @c std::istream. */
0310         BOOST_RANDOM_DETAIL_ISTREAM_OPERATOR(is, param_type, parm)
0311         {
0312             std::vector<WeightType> temp;
0313             detail::read_vector(is, temp);
0314             if(is) {
0315                 parm._probabilities.swap(temp);
0316             }
0317             return is;
0318         }
0319
0320         /** Returns true if the two sets of parameters are the same. */
0321         BOOST_RANDOM_DETAIL_EQUALITY_OPERATOR(param_type, lhs, rhs)
0322         {
0323             return lhs._probabilities == rhs._probabilities;
0324         }
0325         /** Returns true if the two sets of parameters are different. */
0326         BOOST_RANDOM_DETAIL_INEQUALITY_OPERATOR(param_type)
0327     private:
0328         /// @cond show_private
0329         friend class discrete_distribution;
0330         explicit param_type(const discrete_distribution& dist)
0331           : _probabilities(dist.probabilities())
0332         {}
0333         void normalize()
0334         {
0335             impl_type::normalize(_probabilities);
0336         }
0337         std::vector<WeightType> _probabilities;
0338         /// @endcond
0339     };
0340
0341     /**
0342      * Creates a new @c discrete_distribution object that has
0343      * \f$p(0) = 1\f$ and \f$p(i|i>0) = 0\f$.
0344      */
0345     discrete_distribution()
0346     {
0347         _impl.init_empty();
0348     }
0349     /**
0350      * Constructs a discrete_distribution from an iterator range.
0351      * If @c first == @c last, equivalent to the default constructor.
0352      * Otherwise, the values of the range represent weights for the
0353      * possible values of the distribution.
0354      */
0355     template<class Iter>
0356     discrete_distribution(Iter first, Iter last)
0357     {
0358         init(first, last);
0359     }
0360 #ifndef BOOST_NO_CXX11_HDR_INITIALIZER_LIST
0361     /**
0362      * Constructs a @c discrete_distribution from a @c std::initializer_list.
0363      * If the @c initializer_list is empty, equivalent to the default
0364      * constructor.  Otherwise, the values of the @c initializer_list
0365      * represent weights for the possible values of the distribution.
0366      * For example, given the distribution
0367      *
0368      * @code
0369      * discrete_distribution<> dist{1, 4, 5};
0370      * @endcode
0371      *
0372      * The probability of a 0 is 1/10, the probability of a 1 is 2/5,
0373      * the probability of a 2 is 1/2, and no other values are possible.
0374      */
0375     discrete_distribution(std::initializer_list<WeightType> wl)
0376     {
0377         init(wl.begin(), wl.end());
0378     }
0379 #endif
0380     /**
0381      * Constructs a discrete_distribution from a Boost.Range range.
0382      * If the range is empty, equivalent to the default constructor.
0383      * Otherwise, the values of the range represent weights for the
0384      * possible values of the distribution.
0385      */
0386     template<class Range>
0387     explicit discrete_distribution(const Range& range)
0388     {
0389         init(std::begin(range), std::end(range));
0390     }
0391     /**
0392      * Constructs a discrete_distribution that approximates a function.
0393      * If nw is zero, equivalent to the default constructor.
0394      * Otherwise, the range of the distribution is [0, nw),
0395      * and the weights are found by  calling fw with values
0396      * evenly distributed between \f$\mbox{xmin} + \delta/2\f$ and
0397      * \f$\mbox{xmax} - \delta/2\f$, where
0398      * \f$\delta = (\mbox{xmax} - \mbox{xmin})/\mbox{nw}\f$.
0399      */
0400     template<class Func>
0401     discrete_distribution(std::size_t nw, double xmin, double xmax, Func fw)
0402     {
0403         std::size_t n = (nw == 0) ? 1 : nw;
0404         double delta = (xmax - xmin) / n;
0405         BOOST_ASSERT(delta > 0);
0406         std::vector<WeightType> weights;
0407         for(std::size_t k = 0; k < n; ++k) {
0408             weights.push_back(fw(xmin + k*delta + delta/2));
0409         }
0410         init(weights.begin(), weights.end());
0411     }
0412     /**
0413      * Constructs a discrete_distribution from its parameters.
0414      */
0415     explicit discrete_distribution(const param_type& parm)
0416     {
0417         param(parm);
0418     }
0419
0420     /**
0421      * Returns a value distributed according to the parameters of the
0422      * discrete_distribution.
0423      */
0424     template<class URNG>
0425     IntType operator()(URNG& urng) const
0426     {
0427         BOOST_ASSERT(!_impl._alias_table.empty());
0428         IntType result;
0429         WeightType test;
0430         do {
0431             result = uniform_int_distribution<IntType>((min)(), (max)())(urng);
0432             test = _impl.test(urng);
0433         } while(!_impl.accept(result, test));
0434         if(test < _impl._alias_table[static_cast<std::size_t>(result)].first) {
0435             return result;
0436         } else {
0437             return(_impl._alias_table[static_cast<std::size_t>(result)].second);
0438         }
0439     }
0440
0441     /**
0442      * Returns a value distributed according to the parameters
0443      * specified by param.
0444      */
0445     template<class URNG>
0446     IntType operator()(URNG& urng, const param_type& parm) const
0447     {
0448         if(WeightType limit = impl_type::try_get_sum(parm._probabilities)) {
0449             WeightType val = impl_type::generate_in_range(urng, limit);
0450             WeightType sum = 0;
0451             std::size_t result = 0;
0452             for(typename std::vector<WeightType>::const_iterator
0453                     iter = parm._probabilities.begin(),
0454                     end = parm._probabilities.end();
0455                 iter != end; ++iter, ++result)
0456             {
0457                 sum += *iter;
0458                 if(sum > val) {
0459                     return result;
0460                 }
0461             }
0462             // This shouldn't be reachable, but round-off error
0463             // can prevent any match from being found when val is
0464             // very close to 1.
0465             return static_cast<IntType>(parm._probabilities.size() - 1);
0466         } else {
0467             // WeightType is integral and sum(parm._probabilities)
0468             // would overflow.  Just use the easy solution.
0469             return discrete_distribution(parm)(urng);
0470         }
0471     }
0472
0473     /** Returns the smallest value that the distribution can produce. */
0474     result_type min BOOST_PREVENT_MACRO_SUBSTITUTION () const { return 0; }
0475     /** Returns the largest value that the distribution can produce. */
0476     result_type max BOOST_PREVENT_MACRO_SUBSTITUTION () const
0477     { return static_cast<result_type>(_impl._alias_table.size() - 1); }
0478
0479     /**
0480      * Returns a vector containing the probabilities of each
0481      * value of the distribution.  For example, given
0482      *
0483      * @code
0484      * discrete_distribution<> dist = { 1, 4, 5 };
0485      * std::vector<double> p = dist.param();
0486      * @endcode
0487      *
0488      * the vector, p will contain {0.1, 0.4, 0.5}.
0489      *
0490      * If @c WeightType is integral, then the weights
0491      * will be returned unchanged.
0492      */
0493     std::vector<WeightType> probabilities() const
0494     {
0495         std::vector<WeightType> result(_impl._alias_table.size(), static_cast<WeightType>(0));
0496         std::size_t i = 0;
0497         for(typename impl_type::alias_table_t::const_iterator
0498                 iter = _impl._alias_table.begin(),
0499                 end = _impl._alias_table.end();
0500                 iter != end; ++iter, ++i)
0501         {
0502             WeightType val = iter->first;
0503             result[i] += val;
0504             result[static_cast<std::size_t>(iter->second)] += _impl.get_weight(i) - val;
0505         }
0506         impl_type::normalize(result);
0507         return(result);
0508     }
0509
0510     /** Returns the parameters of the distribution. */
0511     param_type param() const
0512     {
0513         return param_type(*this);
0514     }
0515     /** Sets the parameters of the distribution. */
0516     void param(const param_type& parm)
0517     {
0518         init(parm._probabilities.begin(), parm._probabilities.end());
0519     }
0520
0521     /**
0522      * Effects: Subsequent uses of the distribution do not depend
0523      * on values produced by any engine prior to invoking reset.
0524      */
0525     void reset() {}
0526
0527     /** Writes a distribution to a @c std::ostream. */
0528     BOOST_RANDOM_DETAIL_OSTREAM_OPERATOR(os, discrete_distribution, dd)
0529     {
0530         os << dd.param();
0531         return os;
0532     }
0533
0534     /** Reads a distribution from a @c std::istream */
0535     BOOST_RANDOM_DETAIL_ISTREAM_OPERATOR(is, discrete_distribution, dd)
0536     {
0537         param_type parm;
0538         if(is >> parm) {
0539             dd.param(parm);
0540         }
0541         return is;
0542     }
0543
0544     /**
0545      * Returns true if the two distributions will return the
0546      * same sequence of values, when passed equal generators.
0547      */
0548     BOOST_RANDOM_DETAIL_EQUALITY_OPERATOR(discrete_distribution, lhs, rhs)
0549     {
0550         return lhs._impl == rhs._impl;
0551     }
0552     /**
0553      * Returns true if the two distributions may return different
0554      * sequences of values, when passed equal generators.
0555      */
0556     BOOST_RANDOM_DETAIL_INEQUALITY_OPERATOR(discrete_distribution)
0557
0558 private:
0559
0560     /// @cond show_private
0561
0562     template<class Iter>
0563     void init(Iter first, Iter last, std::input_iterator_tag)
0564     {
0565         std::vector<WeightType> temp(first, last);
0566         init(temp.begin(), temp.end());
0567     }
0568     template<class Iter>
0569     void init(Iter first, Iter last, std::forward_iterator_tag)
0570     {
0571         size_t input_size = std::distance(first, last);
0572         std::vector<std::pair<WeightType, IntType> > below_average;
0573         std::vector<std::pair<WeightType, IntType> > above_average;
0574         below_average.reserve(input_size);
0575         above_average.reserve(input_size);
0576
0577         WeightType weight_average = _impl.init_average(first, last);
0578         WeightType normalized_average = _impl.get_weight(0);
0579         std::size_t i = 0;
0580         for(; first != last; ++first, ++i) {
0581             WeightType val = impl_type::normalize(*first, weight_average);
0582             std::pair<WeightType, IntType> elem(val, static_cast<IntType>(i));
0583             if(val < normalized_average) {
0584                 below_average.push_back(elem);
0585             } else {
0586                 above_average.push_back(elem);
0587             }
0588         }
0589
0590         typename impl_type::alias_table_t::iterator
0591             b_iter = below_average.begin(),
0592             b_end = below_average.end(),
0593             a_iter = above_average.begin(),
0594             a_end = above_average.end()
0595             ;
0596         while(b_iter != b_end && a_iter != a_end) {
0597             _impl._alias_table[static_cast<std::size_t>(b_iter->second)] =
0598                 std::make_pair(b_iter->first, a_iter->second);
0599             a_iter->first -= (_impl.get_weight(b_iter->second) - b_iter->first);
0600             if(a_iter->first < normalized_average) {
0601                 *b_iter = *a_iter++;
0602             } else {
0603                 ++b_iter;
0604             }
0605         }
0606         for(; b_iter != b_end; ++b_iter) {
0607             _impl._alias_table[static_cast<std::size_t>(b_iter->second)].first =
0608                 _impl.get_weight(b_iter->second);
0609         }
0610         for(; a_iter != a_end; ++a_iter) {
0611             _impl._alias_table[static_cast<std::size_t>(a_iter->second)].first =
0612                 _impl.get_weight(a_iter->second);
0613         }
0614     }
0615     template<class Iter>
0616     void init(Iter first, Iter last)
0617     {
0618         if(first == last) {
0619             _impl.init_empty();
0620         } else {
0621             typename std::iterator_traits<Iter>::iterator_category category;
0622             init(first, last, category);
0623         }
0624     }
0625     typedef typename detail::select_alias_table<
0626         (::boost::is_integral<WeightType>::value)
0627     >::template apply<IntType, WeightType>::type impl_type;
0628     impl_type _impl;
0629     /// @endcond
0630 };
0631
0632 }
0633 }
0634
0635 #include <boost/random/detail/enable_warnings.hpp>
0636
0637 #endif