math/optimization/jso.hpp

0001 /*
0002  * Copyright Nick Thompson, 2024
0003  * Use, modification and distribution are subject to the
0004  * Boost Software License, Version 1.0. (See accompanying file
0005  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0006  */
0007 #ifndef BOOST_MATH_OPTIMIZATION_JSO_HPP
0008 #define BOOST_MATH_OPTIMIZATION_JSO_HPP
0009 #include <atomic>
0010 #include <boost/math/optimization/detail/common.hpp>
0011 #include <cmath>
0012 #include <iostream>
0013 #include <limits>
0014 #include <random>
0015 #include <sstream>
0016 #include <stdexcept>
0017 #include <thread>
0018 #include <type_traits>
0019 #include <utility>
0020 #include <vector>
0021
0022 namespace boost::math::optimization {
0023
0024 #ifndef BOOST_MATH_DEBUG_JSO
0025 #define BOOST_MATH_DEBUG_JSO 0
0026 #endif
0027 // Follows: Brest, Janez, Mirjam Sepesy Maucec, and Borko Boskovic. "Single
0028 // objective real-parameter optimization: Algorithm jSO." 2017 IEEE congress on
0029 // evolutionary computation (CEC). IEEE, 2017. In the sequel, this wil be
0030 // referred to as "the reference". Note that the reference is rather difficult
0031 // to understand without also reading: Zhang, J., & Sanderson, A. C. (2009).
0032 // JADE: adaptive differential evolution with optional external archive.
0033 // IEEE Transactions on evolutionary computation, 13(5), 945-958."
0034 template <typename ArgumentContainer> struct jso_parameters {
0035   using Real = typename ArgumentContainer::value_type;
0036   using DimensionlessReal = decltype(Real()/Real());
0037   ArgumentContainer lower_bounds;
0038   ArgumentContainer upper_bounds;
0039   // Population in the first generation.
0040   // This defaults to 0, which indicates "use the default specified in the
0041   // referenced paper". To wit, initial population size
0042   // =ceil(25log(D+1)sqrt(D)), where D is the dimension of the problem.
0043   size_t initial_population_size = 0;
0044   // Maximum number of function evaluations.
0045   // The default of 0 indicates "use max_function_evaluations = 10,000D", where
0046   // D is the dimension of the problem.
0047   size_t max_function_evaluations = 0;
0048   size_t threads = std::thread::hardware_concurrency();
0049   ArgumentContainer const *initial_guess = nullptr;
0050 };
0051
0052 template <typename ArgumentContainer>
0053 void validate_jso_parameters(jso_parameters<ArgumentContainer> &jso_params) {
0054   using std::isfinite;
0055   using std::isnan;
0056   std::ostringstream oss;
0057   if (jso_params.threads == 0) {
0058     oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
0059     oss << ": Requested zero threads to perform the calculation, but at least "
0060            "1 is required.";
0061     throw std::invalid_argument(oss.str());
0062   }
0063   detail::validate_bounds(jso_params.lower_bounds, jso_params.upper_bounds);
0064   auto const dimension = jso_params.lower_bounds.size();
0065   if (jso_params.initial_population_size == 0) {
0066     // Ever so slightly different than the reference-the dimension can be 1,
0067     // but if we followed the reference, the population size would then be zero.
0068     jso_params.initial_population_size = static_cast<size_t>(
0069         std::ceil(25 * std::log(dimension + 1.0) * sqrt(dimension)));
0070   }
0071   if (jso_params.max_function_evaluations == 0) {
0072     // Recommended value from the reference:
0073     jso_params.max_function_evaluations = 10000 * dimension;
0074   }
0075   if (jso_params.initial_population_size < 4) {
0076     oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
0077     oss << ": The population size must be at least 4, but requested population "
0078            "size of "
0079         << jso_params.initial_population_size << ".";
0080     throw std::invalid_argument(oss.str());
0081   }
0082   if (jso_params.threads > jso_params.initial_population_size) {
0083     oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
0084     oss << ": There must be more individuals in the population than threads.";
0085     throw std::invalid_argument(oss.str());
0086   }
0087   if (jso_params.initial_guess) {
0088     detail::validate_initial_guess(*jso_params.initial_guess,
0089                                    jso_params.lower_bounds,
0090                                    jso_params.upper_bounds);
0091   }
0092 }
0093
0094 template <typename ArgumentContainer, class Func, class URBG>
0095 ArgumentContainer
0096 jso(const Func cost_function, jso_parameters<ArgumentContainer> &jso_params,
0097     URBG &gen,
0098     std::invoke_result_t<Func, ArgumentContainer> target_value =
0099         std::numeric_limits<
0100             std::invoke_result_t<Func, ArgumentContainer>>::quiet_NaN(),
0101     std::atomic<bool> *cancellation = nullptr,
0102     std::atomic<std::invoke_result_t<Func, ArgumentContainer>>
0103         *current_minimum_cost = nullptr,
0104     std::vector<std::pair<ArgumentContainer,
0105                           std::invoke_result_t<Func, ArgumentContainer>>>
0106         *queries = nullptr) {
0107   using Real = typename ArgumentContainer::value_type;
0108   using DimensionlessReal = decltype(Real()/Real());
0109   validate_jso_parameters(jso_params);
0110
0111   using ResultType = std::invoke_result_t<Func, ArgumentContainer>;
0112   using std::abs;
0113   using std::cauchy_distribution;
0114   using std::clamp;
0115   using std::isnan;
0116   using std::max;
0117   using std::round;
0118   using std::isfinite;
0119   using std::uniform_real_distribution;
0120
0121   // Refer to the referenced paper, pseudocode on page 1313:
0122   // Algorithm 1, jSO, Line 1:
0123   std::vector<ArgumentContainer> archive;
0124
0125   // Algorithm 1, jSO, Line 2
0126   // "Initialize population P_g = (x_0,g, ..., x_{np-1},g) randomly.
0127   auto population = detail::random_initial_population(
0128       jso_params.lower_bounds, jso_params.upper_bounds,
0129       jso_params.initial_population_size, gen);
0130   if (jso_params.initial_guess) {
0131     population[0] = *jso_params.initial_guess;
0132   }
0133   size_t dimension = jso_params.lower_bounds.size();
0134   // Don't force the user to initialize to sane value:
0135   if (current_minimum_cost) {
0136     *current_minimum_cost = std::numeric_limits<ResultType>::infinity();
0137   }
0138   std::atomic<bool> target_attained = false;
0139   std::vector<ResultType> cost(jso_params.initial_population_size,
0140                                std::numeric_limits<ResultType>::quiet_NaN());
0141   for (size_t i = 0; i < cost.size(); ++i) {
0142     cost[i] = cost_function(population[i]);
0143     if (!isnan(target_value) && cost[i] <= target_value) {
0144       target_attained = true;
0145     }
0146     if (current_minimum_cost && cost[i] < *current_minimum_cost) {
0147       *current_minimum_cost = cost[i];
0148     }
0149     if (queries) {
0150       queries->push_back(std::make_pair(population[i], cost[i]));
0151     }
0152   }
0153   std::vector<size_t> indices = detail::best_indices(cost);
0154   std::atomic<size_t> function_evaluations = population.size();
0155 #if BOOST_MATH_DEBUG_JSO
0156   {
0157     auto const &min_cost = cost[indices[0]];
0158     auto const &location = population[indices[0]];
0159     std::cout << __FILE__ << ":" << __LINE__ << ":" << __func__;
0160     std::cout << "\n\tMinimum cost after evaluation of initial random "
0161                  "population of size "
0162               << population.size() << " is " << min_cost << "."
0163               << "\n\tLocation {";
0164     for (size_t i = 0; i < location.size() - 1; ++i) {
0165       std::cout << location[i] << ", ";
0166     }
0167     std::cout << location.back() << "}.\n";
0168   }
0169 #endif
0170   // Algorithm 1: jSO algorithm, Line 3:
0171   // "Set all values in M_F to 0.5":
0172   // I believe this is a typo: Compare with "Section B. Experimental Results",
0173   // last bullet, which claims this should be set to 0.3. The reference
0174   // implementation also does 0.3:
0175   size_t H = 5;
0176   std::vector<DimensionlessReal> M_F(H, static_cast<DimensionlessReal>(0.3));
0177   // Algorithm 1: jSO algorithm, Line 4:
0178   // "Set all values in M_CR to 0.8":
0179   std::vector<DimensionlessReal> M_CR(H, static_cast<DimensionlessReal>(0.8));
0180
0181   std::uniform_real_distribution<DimensionlessReal> unif01(DimensionlessReal(0), DimensionlessReal(1));
0182   bool keep_going = !target_attained;
0183   if (cancellation && *cancellation) {
0184     keep_going = false;
0185   }
0186   // k from:
0187   // http://metahack.org/CEC2014-Tanabe-Fukunaga.pdf, Algorithm 1:
0188   // Determines where Lehmer means are stored in the memory:
0189   size_t k = 0;
0190   size_t minimum_population_size = (max)(size_t(4), size_t(jso_params.threads));
0191   while (keep_going) {
0192     // Algorithm 1, jSO, Line 7:
0193     std::vector<DimensionlessReal> S_CR;
0194     std::vector<DimensionlessReal> S_F;
0195     // Equation 9 of L-SHADE:
0196     std::vector<ResultType> delta_f;
0197     for (size_t i = 0; i < population.size(); ++i) {
0198       // Algorithm 1, jSO, Line 9:
0199       auto ri = gen() % H;
0200       // Algorithm 1, jSO, Line 10-13:
0201       // Again, what is written in the pseudocode is not quite right.
0202       // What they mean is mu_F = 0.9-the historical memory is not used.
0203       // I confess I find it weird to store the historical memory if we're just
0204       // gonna ignore it, but that's what the paper and the reference
0205       // implementation says!
0206       DimensionlessReal mu_F = static_cast<DimensionlessReal>(0.9);
0207       DimensionlessReal mu_CR = static_cast<DimensionlessReal>(0.9);
0208       if (ri != H - 1) {
0209         mu_F = M_F[ri];
0210         mu_CR = M_CR[ri];
0211       }
0212       // Algorithm 1, jSO, Line 14-18:
0213       DimensionlessReal crossover_probability = static_cast<DimensionlessReal>(0);
0214       if (mu_CR >= 0) {
0215         using std::normal_distribution;
0216         normal_distribution<DimensionlessReal> normal(mu_CR, static_cast<DimensionlessReal>(0.1));
0217         crossover_probability = normal(gen);
0218         // Clamp comes from L-SHADE description:
0219         crossover_probability = clamp(crossover_probability, DimensionlessReal(0), DimensionlessReal(1));
0220       }
0221       // Algorithm 1, jSO, Line 19-23:
0222       // Note that the pseudocode uses a "max_generations parameter",
0223       // but the reference implementation does not.
0224       // Since we already require specification of max_function_evaluations,
0225       // the pseudocode adds an unnecessary parameter.
0226       if (4 * function_evaluations < jso_params.max_function_evaluations) {
0227         crossover_probability = (max)(crossover_probability, DimensionlessReal(0.7));
0228       } else if (2 * function_evaluations <
0229                  jso_params.max_function_evaluations) {
0230         crossover_probability = (max)(crossover_probability, DimensionlessReal(0.6));
0231       }
0232
0233       // Algorithm 1, jSO, Line 24-27:
0234       // Note the adjustments to the pseudocode given in the reference
0235       // implementation.
0236       cauchy_distribution<DimensionlessReal> cauchy(mu_F, static_cast<DimensionlessReal>(0.1));
0237       DimensionlessReal F;
0238       do {
0239         F = cauchy(gen);
0240         if (F > 1) {
0241           F = 1;
0242         }
0243       } while (F <= 0);
0244       DimensionlessReal threshold = static_cast<DimensionlessReal>(7) / static_cast<DimensionlessReal>(10);
0245       if ((10 * function_evaluations <
0246            6 * jso_params.max_function_evaluations) &&
0247           (F > threshold)) {
0248         F = threshold;
0249       }
0250       // > p value for mutation strategy linearly decreases from pmax to pmin
0251       // during the evolutionary process, > where pmax = 0.25 in jSO and pmin =
0252       // pmax/2.
0253       DimensionlessReal p = DimensionlessReal(0.25) * (1 - static_cast<DimensionlessReal>(function_evaluations) /
0254                                      (2 * jso_params.max_function_evaluations));
0255       // Equation (4) of the reference:
0256       DimensionlessReal Fw = static_cast<DimensionlessReal>(1.2) * F;
0257       if (10 * function_evaluations < 4 * jso_params.max_function_evaluations) {
0258         if (10 * function_evaluations <
0259             2 * jso_params.max_function_evaluations) {
0260           Fw = static_cast<DimensionlessReal>(0.7) * F;
0261         } else {
0262           Fw = static_cast<DimensionlessReal>(0.8) * F;
0263         }
0264       }
0265       // Algorithm 1, jSO, Line 28:
0266       // "ui,g := current-to-pBest-w/1/bin using Eq. (3)"
0267       // This is not explained in the reference, but "current-to-pBest"
0268       // strategy means "select randomly among the best values available."
0269       // See:
0270       // Zhang, J., & Sanderson, A. C. (2009).
0271       // JADE: adaptive differential evolution with optional external archive.
0272       // IEEE Transactions on evolutionary computation, 13(5), 945-958.
0273       // > As a generalization of DE/current-to- best,
0274       // > DE/current-to-pbest utilizes not only the best solution information
0275       // > but also the information of other good solutions.
0276       // > To be specific, any of the top 100p%, p in (0, 1],
0277       // > solutions can be randomly chosen in DE/current-to-pbest to play the
0278       // role > designed exclusively for the single best solution in
0279       // DE/current-to-best."
0280       size_t max_p_index = static_cast<size_t>(std::ceil(p * indices.size()));
0281       size_t p_best_idx = gen() % max_p_index;
0282       // We now need r1, r2 so that r1 != r2 != i:
0283       size_t r1;
0284       do {
0285         r1 = gen() % population.size();
0286       } while (r1 == i);
0287       size_t r2;
0288       do {
0289         r2 = gen() % (population.size() + archive.size());
0290       } while (r2 == r1 || r2 == i);
0291
0292       ArgumentContainer trial_vector;
0293       if constexpr (detail::has_resize_v<ArgumentContainer>) {
0294         trial_vector.resize(dimension);
0295       }
0296       auto const &xi = population[i];
0297       auto const &xr1 = population[r1];
0298       ArgumentContainer xr2;
0299       if (r2 < population.size()) {
0300         xr2 = population[r2];
0301       } else {
0302         xr2 = archive[r2 - population.size()];
0303       }
0304       auto const &x_p_best = population[p_best_idx];
0305       for (size_t j = 0; j < dimension; ++j) {
0306         auto guaranteed_changed_idx = gen() % dimension;
0307         if (unif01(gen) < crossover_probability ||
0308             j == guaranteed_changed_idx) {
0309           auto tmp = xi[j] + Fw * (x_p_best[j] - xi[j]) + F * (xr1[j] - xr2[j]);
0310           auto const &lb = jso_params.lower_bounds[j];
0311           auto const &ub = jso_params.upper_bounds[j];
0312           // Some others recommend regenerating the indices rather than
0313           // clamping; I dunno seems like it could get stuck regenerating . . .
0314           // Another suggestion is provided in:
0315           // "JADE: Adaptive Differential Evolution with Optional External
0316           // Archive" page 947. Perhaps we should implement it!
0317           trial_vector[j] = clamp(tmp, lb, ub);
0318         } else {
0319           trial_vector[j] = population[i][j];
0320         }
0321       }
0322       auto trial_cost = cost_function(trial_vector);
0323       function_evaluations++;
0324       if (isnan(trial_cost)) {
0325         continue;
0326       }
0327       if (queries) {
0328         queries->push_back(std::make_pair(trial_vector, trial_cost));
0329       }
0330
0331       // Successful trial:
0332       if (trial_cost < cost[i] || isnan(cost[i])) {
0333         if (!isnan(target_value) && trial_cost <= target_value) {
0334           target_attained = true;
0335         }
0336         if (current_minimum_cost && trial_cost < *current_minimum_cost) {
0337           *current_minimum_cost = trial_cost;
0338         }
0339         // Can't decide on improvement if the previous evaluation was a NaN:
0340         if (!isnan(cost[i])) {
0341           if (crossover_probability > 1 || crossover_probability < 0) {
0342             throw std::domain_error("Crossover probability is weird.");
0343           }
0344           if (F > 1 || F < 0) {
0345             throw std::domain_error("Scale factor (F) is weird.");
0346           }
0347           S_CR.push_back(crossover_probability);
0348           S_F.push_back(F);
0349           delta_f.push_back(abs(cost[i] - trial_cost));
0350         }
0351         // Build the historical archive:
0352         if (archive.size() < cost.size()) {
0353           archive.push_back(trial_vector);
0354         } else {
0355           // If it's already built, then put the successful trial in a random index:
0356           archive.resize(cost.size());
0357           auto idx = gen() % archive.size();
0358           archive[idx] = trial_vector;
0359         }
0360         cost[i] = trial_cost;
0361         population[i] = trial_vector;
0362       }
0363     }
0364
0365     indices = detail::best_indices(cost);
0366
0367     // If there are no successful updates this generation, we do not update the
0368     // historical memory:
0369     if (S_CR.size() > 0) {
0370       std::vector<DimensionlessReal> weights(S_CR.size(),
0371                                 std::numeric_limits<DimensionlessReal>::quiet_NaN());
0372       ResultType delta_sum = static_cast<ResultType>(0);
0373       for (auto const &delta : delta_f) {
0374         delta_sum += delta;
0375       }
0376       if (delta_sum <= 0 || !isfinite(delta_sum)) {
0377         std::ostringstream oss;
0378         oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
0379         oss << "\n\tYou've hit a bug: The sum of improvements must be strictly "
0380                "positive, but got "
0381             << delta_sum << " improvement from " << S_CR.size()
0382             << " successful updates.\n";
0383         oss << "\tImprovements: {" << std::hexfloat;
0384         for (size_t l = 0; l < delta_f.size() -1; ++l) {
0385           oss << delta_f[l] << ", ";
0386         }
0387         oss << delta_f.back() << "}.\n";
0388         throw std::logic_error(oss.str());
0389       }
0390       for (size_t i = 0; i < weights.size(); ++i) {
0391         weights[i] = delta_f[i] / delta_sum;
0392       }
0393
0394       M_CR[k] = detail::weighted_lehmer_mean(S_CR, weights);
0395       M_F[k] = detail::weighted_lehmer_mean(S_F, weights);
0396     }
0397     k++;
0398     if (k == M_F.size()) {
0399       k = 0;
0400     }
0401     if (target_attained) {
0402       break;
0403     }
0404     if (cancellation && *cancellation) {
0405       break;
0406     }
0407     if (function_evaluations >= jso_params.max_function_evaluations) {
0408       break;
0409     }
0410     // Linear population size reduction:
0411     size_t new_population_size = static_cast<size_t>(round(
0412         -double(jso_params.initial_population_size - minimum_population_size) *
0413             function_evaluations /
0414             static_cast<double>(jso_params.max_function_evaluations) +
0415         jso_params.initial_population_size));
0416     size_t num_erased = population.size() - new_population_size;
0417     std::vector<size_t> indices_to_erase(num_erased);
0418     size_t j = 0;
0419     for (size_t i = population.size() - 1; i >= new_population_size; --i) {
0420       indices_to_erase[j++] = indices[i];
0421     }
0422     std::sort(indices_to_erase.rbegin(), indices_to_erase.rend());
0423     for (auto const &idx : indices_to_erase) {
0424       population.erase(population.begin() + idx);
0425       cost.erase(cost.begin() + idx);
0426     }
0427     indices.resize(new_population_size);
0428   }
0429
0430 #if BOOST_MATH_DEBUG_JSO
0431   {
0432     auto const &min_cost = cost[indices[0]];
0433     auto const &location = population[indices[0]];
0434     std::cout << __FILE__ << ":" << __LINE__ << ":" << __func__;
0435     std::cout << "\n\tMinimum cost after completion is " << min_cost
0436               << ".\n\tLocation: {";
0437     for (size_t i = 0; i < location.size() - 1; ++i) {
0438       std::cout << location[i] << ", ";
0439     }
0440     std::cout << location.back() << "}.\n";
0441     std::cout << "\tRequired " << function_evaluations
0442               << " function calls out of "
0443               << jso_params.max_function_evaluations << " allowed.\n";
0444     if (target_attained) {
0445       std::cout << "\tReason for return: Target value attained.\n";
0446     }
0447     std::cout << "\tHistorical crossover probabilities (M_CR): {";
0448     for (size_t i = 0; i < M_CR.size() - 1; ++i) {
0449       std::cout << M_CR[i] << ", ";
0450     }
0451     std::cout << M_CR.back() << "}.\n";
0452     std::cout << "\tHistorical scale factors (M_F): {";
0453     for (size_t i = 0; i < M_F.size() - 1; ++i) {
0454       std::cout << M_F[i] << ", ";
0455     }
0456     std::cout << M_F.back() << "}.\n";
0457     std::cout << "\tFinal population size: " << population.size() << ".\n";
0458   }
0459 #endif
0460   return population[indices[0]];
0461 }
0462
0463 } // namespace boost::math::optimization
0464 #endif