math/quadrature/naive_monte_carlo.hpp

0001 /*
0002  * Copyright Nick Thompson, 2018
0003  * Use, modification and distribution are subject to the
0004  * Boost Software License, Version 1.0. (See accompanying file
0005  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0006  */
0007 #ifndef BOOST_MATH_QUADRATURE_NAIVE_MONTE_CARLO_HPP
0008 #define BOOST_MATH_QUADRATURE_NAIVE_MONTE_CARLO_HPP
0009 #include <sstream>
0010 #include <algorithm>
0011 #include <vector>
0012 #include <atomic>
0013 #include <memory>
0014 #include <functional>
0015 #include <future>
0016 #include <thread>
0017 #include <initializer_list>
0018 #include <utility>
0019 #include <random>
0020 #include <chrono>
0021 #include <map>
0022 #include <type_traits>
0023 #include <boost/math/policies/error_handling.hpp>
0024 #include <boost/math/special_functions/fpclassify.hpp>
0025
0026 #ifdef BOOST_NAIVE_MONTE_CARLO_DEBUG_FAILURES
0027 #  include <iostream>
0028 #endif
0029
0030 namespace boost { namespace math { namespace quadrature {
0031
0032 namespace detail {
0033   enum class limit_classification {FINITE,
0034                                    LOWER_BOUND_INFINITE,
0035                                    UPPER_BOUND_INFINITE,
0036                                    DOUBLE_INFINITE};
0037 }
0038
0039 template<class Real, class F, class RandomNumberGenerator = std::mt19937_64, class Policy = boost::math::policies::policy<>,
0040          typename std::enable_if<std::is_trivially_copyable<Real>::value, bool>::type = true>
0041 class naive_monte_carlo
0042 {
0043 public:
0044     naive_monte_carlo(const F& integrand,
0045                       std::vector<std::pair<Real, Real>> const & bounds,
0046                       Real error_goal,
0047                       bool singular = true,
0048                       uint64_t threads = std::thread::hardware_concurrency(),
0049                       uint64_t seed = 0) noexcept : m_num_threads{threads}, m_seed{seed}, m_volume(1)
0050     {
0051         using std::numeric_limits;
0052         using std::sqrt;
0053         using boost::math::isinf;
0054
0055         uint64_t n = bounds.size();
0056         m_lbs.resize(n);
0057         m_dxs.resize(n);
0058         m_limit_types.resize(n);
0059
0060         static const char* function = "boost::math::quadrature::naive_monte_carlo<%1%>";
0061         for (uint64_t i = 0; i < n; ++i)
0062         {
0063             if (bounds[i].second <= bounds[i].first)
0064             {
0065                 boost::math::policies::raise_domain_error(function, "The upper bound is <= the lower bound.\n", bounds[i].second, Policy());
0066                 return;
0067             }
0068             if (isinf(bounds[i].first))
0069             {
0070                 if (isinf(bounds[i].second))
0071                 {
0072                     m_limit_types[i] = detail::limit_classification::DOUBLE_INFINITE;
0073                 }
0074                 else
0075                 {
0076                     m_limit_types[i] = detail::limit_classification::LOWER_BOUND_INFINITE;
0077                     // Ok ok this is bad to use the second bound as the lower limit and then reflect.
0078                     m_lbs[i] = bounds[i].second;
0079                     m_dxs[i] = numeric_limits<Real>::quiet_NaN();
0080                 }
0081             }
0082             else if (isinf(bounds[i].second))
0083             {
0084                 m_limit_types[i] = detail::limit_classification::UPPER_BOUND_INFINITE;
0085                 if (singular)
0086                 {
0087                     // I've found that it's easier to sample on a closed set and perturb the boundary
0088                     // than to try to sample very close to the boundary.
0089                     m_lbs[i] = std::nextafter(bounds[i].first, (std::numeric_limits<Real>::max)());
0090                 }
0091                 else
0092                 {
0093                     m_lbs[i] = bounds[i].first;
0094                 }
0095                 m_dxs[i] = numeric_limits<Real>::quiet_NaN();
0096             }
0097             else
0098             {
0099                 m_limit_types[i] = detail::limit_classification::FINITE;
0100                 if (singular)
0101                 {
0102                     if (bounds[i].first == 0)
0103                     {
0104                         m_lbs[i] = std::numeric_limits<Real>::epsilon();
0105                     }
0106                     else
0107                     {
0108                         m_lbs[i] = std::nextafter(bounds[i].first, (std::numeric_limits<Real>::max)());
0109                     }
0110
0111                     m_dxs[i] = std::nextafter(bounds[i].second, std::numeric_limits<Real>::lowest()) - m_lbs[i];
0112                 }
0113                 else
0114                 {
0115                     m_lbs[i] = bounds[i].first;
0116                     m_dxs[i] = bounds[i].second - bounds[i].first;
0117                 }
0118                 m_volume *= m_dxs[i];
0119             }
0120         }
0121
0122         m_integrand = [this, &integrand](std::vector<Real> & x)->Real
0123         {
0124             Real coeff = m_volume;
0125             for (uint64_t i = 0; i < x.size(); ++i)
0126             {
0127                 // Variable transformation are listed at:
0128                 // https://en.wikipedia.org/wiki/Numerical_integration
0129                 // However, we've made some changes to these so that we can evaluate on a compact domain.
0130                 if (m_limit_types[i] == detail::limit_classification::FINITE)
0131                 {
0132                     x[i] = m_lbs[i] + x[i]*m_dxs[i];
0133                 }
0134                 else if (m_limit_types[i] == detail::limit_classification::UPPER_BOUND_INFINITE)
0135                 {
0136                     Real t = x[i];
0137                     Real z = 1/(1 + numeric_limits<Real>::epsilon() - t);
0138                     coeff *= (z*z)*(1 + numeric_limits<Real>::epsilon());
0139                     x[i] = m_lbs[i] + t*z;
0140                 }
0141                 else if (m_limit_types[i] == detail::limit_classification::LOWER_BOUND_INFINITE)
0142                 {
0143                     Real t = x[i];
0144                     Real z = 1/(t+sqrt((numeric_limits<Real>::min)()));
0145                     coeff *= (z*z);
0146                     x[i] = m_lbs[i] + (t-1)*z;
0147                 }
0148                 else
0149                 {
0150                     Real t1 = 1/(1+numeric_limits<Real>::epsilon() - x[i]);
0151                     Real t2 = 1/(x[i]+numeric_limits<Real>::epsilon());
0152                     x[i] = (2*x[i]-1)*t1*t2/4;
0153                     coeff *= (t1*t1+t2*t2)/4;
0154                 }
0155             }
0156             return coeff*integrand(x);
0157         };
0158
0159         // If we don't do a single function call in the constructor,
0160         // we can't do a restart.
0161         std::vector<Real> x(m_lbs.size());
0162
0163         // If the seed is zero, that tells us to choose a random seed for the user:
0164         if (seed == 0)
0165         {
0166             std::random_device rd;
0167             seed = rd();
0168         }
0169
0170         RandomNumberGenerator gen(seed);
0171         Real inv_denom = 1/static_cast<Real>(((gen.max)()-(gen.min)()));
0172
0173         m_num_threads = (std::max)(m_num_threads, static_cast<uint64_t>(1));
0174         m_thread_calls.reset(new std::atomic<uint64_t>[threads]);
0175         m_thread_Ss.reset(new std::atomic<Real>[threads]);
0176         m_thread_averages.reset(new std::atomic<Real>[threads]);
0177
0178         Real avg = 0;
0179         for (uint64_t i = 0; i < m_num_threads; ++i)
0180         {
0181             for (uint64_t j = 0; j < m_lbs.size(); ++j)
0182             {
0183                 x[j] = (gen()-(gen.min)())*inv_denom;
0184             }
0185             Real y = m_integrand(x);
0186             m_thread_averages[i] = y; // relaxed store
0187             m_thread_calls[i] = 1;
0188             m_thread_Ss[i] = 0;
0189             avg += y;
0190         }
0191         avg /= m_num_threads;
0192         m_avg = avg; // relaxed store
0193
0194         m_error_goal = error_goal; // relaxed store
0195         m_start = std::chrono::system_clock::now();
0196         m_done = false; // relaxed store
0197         m_total_calls = m_num_threads;  // relaxed store
0198         m_variance = (numeric_limits<Real>::max)();
0199     }
0200
0201     std::future<Real> integrate()
0202     {
0203         // Set done to false in case we wish to restart:
0204         m_done.store(false); // relaxed store, no worker threads yet
0205         m_start = std::chrono::system_clock::now();
0206         return std::async(std::launch::async,
0207                           &naive_monte_carlo::m_integrate, this);
0208     }
0209
0210     void cancel()
0211     {
0212         // If seed = 0 (meaning have the routine pick the seed), this leaves the seed the same.
0213         // If seed != 0, then the seed is changed, so a restart doesn't do the exact same thing.
0214         m_seed = m_seed*m_seed;
0215         m_done = true; // relaxed store, worker threads will get the message eventually
0216         // Make sure the error goal is infinite, because otherwise we'll loop when we do the final error goal check:
0217         m_error_goal = (std::numeric_limits<Real>::max)();
0218     }
0219
0220     Real variance() const
0221     {
0222         return m_variance.load();
0223     }
0224
0225     Real current_error_estimate() const
0226     {
0227         using std::sqrt;
0228         //
0229         // There is a bug here: m_variance and m_total_calls get updated asynchronously
0230         // and may be out of synch when we compute the error estimate, not sure if it matters though...
0231         //
0232         return sqrt(m_variance.load()/m_total_calls.load());
0233     }
0234
0235     std::chrono::duration<Real> estimated_time_to_completion() const
0236     {
0237         auto now = std::chrono::system_clock::now();
0238         std::chrono::duration<Real> elapsed_seconds = now - m_start;
0239         Real r = this->current_error_estimate()/m_error_goal.load(); // relaxed load
0240         if (r*r <= 1) {
0241             return 0*elapsed_seconds;
0242         }
0243         return (r*r - 1)*elapsed_seconds;
0244     }
0245
0246     void update_target_error(Real new_target_error)
0247     {
0248         m_error_goal = new_target_error;  // relaxed store
0249     }
0250
0251     Real progress() const
0252     {
0253         Real r = m_error_goal.load()/this->current_error_estimate();  // relaxed load
0254         if (r*r >= 1)
0255         {
0256             return 1;
0257         }
0258         return r*r;
0259     }
0260
0261     Real current_estimate() const
0262     {
0263         return m_avg.load();
0264     }
0265
0266     uint64_t calls() const
0267     {
0268         return m_total_calls.load();  // relaxed load
0269     }
0270
0271 private:
0272
0273    Real m_integrate()
0274    {
0275       uint64_t seed;
0276       // If the user tells us to pick a seed, pick a seed:
0277       if (m_seed == 0)
0278       {
0279          std::random_device rd;
0280          seed = rd();
0281       }
0282       else // use the seed we are given:
0283       {
0284          seed = m_seed;
0285       }
0286       RandomNumberGenerator gen(seed);
0287       int max_repeat_tries = 5;
0288       do{
0289
0290          if (max_repeat_tries < 5)
0291          {
0292             m_done = false;
0293
0294 #ifdef BOOST_NAIVE_MONTE_CARLO_DEBUG_FAILURES
0295             std::cerr << "Failed to achieve required tolerance first time through..\n";
0296             std::cerr << "  variance =    " << m_variance << std::endl;
0297             std::cerr << "  average =     " << m_avg << std::endl;
0298             std::cerr << "  total calls = " << m_total_calls << std::endl;
0299
0300             for (std::size_t i = 0; i < m_num_threads; ++i)
0301                std::cerr << "  thread_calls[" << i << "] = " << m_thread_calls[i] << std::endl;
0302             for (std::size_t i = 0; i < m_num_threads; ++i)
0303                std::cerr << "  thread_averages[" << i << "] = " << m_thread_averages[i] << std::endl;
0304             for (std::size_t i = 0; i < m_num_threads; ++i)
0305                std::cerr << "  thread_Ss[" << i << "] = " << m_thread_Ss[i] << std::endl;
0306 #endif
0307          }
0308
0309          std::vector<std::thread> threads(m_num_threads);
0310          for (uint64_t i = 0; i < threads.size(); ++i)
0311          {
0312             threads[i] = std::thread(&naive_monte_carlo::m_thread_monte, this, i, gen());
0313          }
0314          do {
0315             std::this_thread::sleep_for(std::chrono::milliseconds(100));
0316             uint64_t total_calls = 0;
0317             for (uint64_t i = 0; i < m_num_threads; ++i)
0318             {
0319                uint64_t t_calls = m_thread_calls[i].load(std::memory_order_consume);
0320                total_calls += t_calls;
0321             }
0322             Real variance = 0;
0323             Real avg = 0;
0324             for (uint64_t i = 0; i < m_num_threads; ++i)
0325             {
0326                uint64_t t_calls = m_thread_calls[i].load(std::memory_order_consume);
0327                // Will this overflow? Not hard to remove . . .
0328                avg += m_thread_averages[i].load(std::memory_order_relaxed)*(static_cast<Real>(t_calls) / static_cast<Real>(total_calls));
0329                variance += m_thread_Ss[i].load(std::memory_order_relaxed);
0330             }
0331             m_avg.store(avg, std::memory_order_release);
0332             m_variance.store(variance / (total_calls - 1), std::memory_order_release);
0333             m_total_calls = total_calls; // relaxed store, it's just for user feedback
0334             // Allow cancellation:
0335             if (m_done) // relaxed load
0336             {
0337                break;
0338             }
0339          } while (m_total_calls < 2048 || this->current_error_estimate() > m_error_goal.load(std::memory_order_consume));
0340          // Error bound met; signal the threads:
0341          m_done = true; // relaxed store, threads will get the message in the end
0342          std::for_each(threads.begin(), threads.end(),
0343             std::mem_fn(&std::thread::join));
0344          if (m_exception)
0345          {
0346             std::rethrow_exception(m_exception);
0347          }
0348          // Incorporate their work into the final estimate:
0349          uint64_t total_calls = 0;
0350          for (uint64_t i = 0; i < m_num_threads; ++i)
0351          {
0352             uint64_t t_calls = m_thread_calls[i].load(std::memory_order_consume);
0353             total_calls += t_calls;
0354          }
0355          Real variance = 0;
0356          Real avg = 0;
0357
0358          for (uint64_t i = 0; i < m_num_threads; ++i)
0359          {
0360             uint64_t t_calls = m_thread_calls[i].load(std::memory_order_consume);
0361             // Averages weighted by the number of calls the thread made:
0362             avg += m_thread_averages[i].load(std::memory_order_relaxed)*(static_cast<Real>(t_calls) / static_cast<Real>(total_calls));
0363             variance += m_thread_Ss[i].load(std::memory_order_relaxed);
0364          }
0365          m_avg.store(avg, std::memory_order_release);
0366          m_variance.store(variance / (total_calls - 1), std::memory_order_release);
0367          m_total_calls = total_calls; // relaxed store, this is just user feedback
0368
0369          // Sometimes, the master will observe the variance at a very "good" (or bad?) moment,
0370          // Then the threads proceed to find the variance is much greater by the time they hear the message to stop.
0371          // This *WOULD* make sure that the final error estimate is within the error bounds.
0372       }
0373       while ((--max_repeat_tries >= 0) && (this->current_error_estimate() > m_error_goal));
0374
0375       return m_avg.load(std::memory_order_consume);
0376     }
0377
0378     void m_thread_monte(uint64_t thread_index, uint64_t seed)
0379     {
0380         using std::numeric_limits;
0381         try
0382         {
0383             std::vector<Real> x(m_lbs.size());
0384             RandomNumberGenerator gen(seed);
0385             Real inv_denom = static_cast<Real>(1) / static_cast<Real>(( (gen.max)() - (gen.min)() ));
0386             Real M1 = m_thread_averages[thread_index].load(std::memory_order_consume);
0387             Real S = m_thread_Ss[thread_index].load(std::memory_order_consume);
0388             // Kahan summation is required or the value of the integrand will go on a random walk during long computations.
0389             // See the implementation discussion.
0390             // The idea is that the unstabilized additions have error sigma(f)/sqrt(N) + epsilon*N, which diverges faster than it converges!
0391             // Kahan summation turns this to sigma(f)/sqrt(N) + epsilon^2*N, and the random walk occurs on a timescale of 10^14 years (on current hardware)
0392             Real compensator = 0;
0393             uint64_t k = m_thread_calls[thread_index].load(std::memory_order_consume);
0394             while (!m_done) // relaxed load
0395             {
0396                 int j = 0;
0397                 // If we don't have a certain number of calls before an update, we can easily terminate prematurely
0398                 // because the variance estimate is way too low. This magic number is a reasonable compromise, as 1/sqrt(2048) = 0.02,
0399                 // so it should recover 2 digits if the integrand isn't poorly behaved, and if it is, it should discover that before premature termination.
0400                 // Of course if the user has 64 threads, then this number is probably excessive.
0401                 int magic_calls_before_update = 2048;
0402                 while (j++ < magic_calls_before_update)
0403                 {
0404                     for (uint64_t i = 0; i < m_lbs.size(); ++i)
0405                     {
0406                         x[i] = (gen() - (gen.min)())*inv_denom;
0407                     }
0408                     Real f = m_integrand(x);
0409                     using std::isfinite;
0410                     if (!isfinite(f))
0411                     {
0412                         // The call to m_integrand transform x, so this error message states the correct node.
0413                         std::stringstream os;
0414                         os << "Your integrand was evaluated at {";
0415                         for (uint64_t i = 0; i < x.size() -1; ++i)
0416                         {
0417                              os << x[i] << ", ";
0418                         }
0419                         os << x[x.size() -1] << "}, and returned " << f << std::endl;
0420                         static const char* function = "boost::math::quadrature::naive_monte_carlo<%1%>";
0421                         boost::math::policies::raise_domain_error(function, os.str().c_str(), /*this is a dummy arg to make it compile*/ 7.2, Policy());
0422                     }
0423                     ++k;
0424                     Real term = (f - M1)/k;
0425                     Real y1 = term - compensator;
0426                     Real M2 = M1 + y1;
0427                     compensator = (M2 - M1) - y1;
0428                     S += (f - M1)*(f - M2);
0429                     M1 = M2;
0430                 }
0431                 m_thread_averages[thread_index].store(M1, std::memory_order_release);
0432                 m_thread_Ss[thread_index].store(S, std::memory_order_release);
0433                 m_thread_calls[thread_index].store(k, std::memory_order_release);
0434             }
0435         }
0436         catch (...)
0437         {
0438             // Signal the other threads that the computation is ruined:
0439             m_done = true; // relaxed store
0440             std::lock_guard<std::mutex> lock(m_exception_mutex); // Scoped lock to prevent race writing to m_exception
0441             m_exception = std::current_exception();
0442         }
0443     }
0444
0445     std::function<Real(std::vector<Real> &)> m_integrand;
0446     uint64_t m_num_threads;
0447     std::atomic<uint64_t> m_seed;
0448     std::atomic<Real> m_error_goal;
0449     std::atomic<bool> m_done{};
0450     std::vector<Real> m_lbs;
0451     std::vector<Real> m_dxs;
0452     std::vector<detail::limit_classification> m_limit_types;
0453     Real m_volume;
0454     std::atomic<uint64_t> m_total_calls{};
0455     // I wanted these to be vectors rather than maps,
0456     // but you can't resize a vector of atomics.
0457     std::unique_ptr<std::atomic<uint64_t>[]> m_thread_calls;
0458     std::atomic<Real> m_variance;
0459     std::unique_ptr<std::atomic<Real>[]> m_thread_Ss;
0460     std::atomic<Real> m_avg;
0461     std::unique_ptr<std::atomic<Real>[]> m_thread_averages;
0462     std::chrono::time_point<std::chrono::system_clock> m_start;
0463     std::exception_ptr m_exception;
0464     std::mutex m_exception_mutex;
0465 };
0466
0467 }}}
0468 #endif