File indexing completed on 2025-04-26 08:38:01
0001
0002
0003
0004
0005
0006
0007 #ifndef BOOST_MATH_OPTIMIZATION_CMA_ES_HPP
0008 #define BOOST_MATH_OPTIMIZATION_CMA_ES_HPP
0009 #include <atomic>
0010 #include <cmath>
0011 #include <iostream>
0012 #include <limits>
0013 #include <random>
0014 #include <sstream>
0015 #include <stdexcept>
0016 #include <utility>
0017 #include <vector>
0018 #include <boost/math/optimization/detail/common.hpp>
0019 #include <boost/math/tools/assert.hpp>
0020 #if __has_include(<Eigen/Dense>)
0021 #include <Eigen/Dense>
0022 #else
0023 #error "CMA-ES requires Eigen."
0024 #endif
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035 #ifndef BOOST_MATH_DEBUG_CMA_ES
0036 #define BOOST_MATH_DEBUG_CMA_ES 0
0037 #endif
0038
0039 namespace boost::math::optimization {
0040
0041 template <typename ArgumentContainer> struct cma_es_parameters {
0042 using Real = typename ArgumentContainer::value_type;
0043 using DimensionlessReal = decltype(Real()/Real());
0044 ArgumentContainer lower_bounds;
0045 ArgumentContainer upper_bounds;
0046 size_t max_generations = 1000;
0047 ArgumentContainer const *initial_guess = nullptr;
0048
0049
0050
0051 size_t population_size = 0;
0052
0053 DimensionlessReal learning_rate = 1;
0054 };
0055
0056 template <typename ArgumentContainer>
0057 void validate_cma_es_parameters(cma_es_parameters<ArgumentContainer> ¶ms) {
0058 using Real = typename ArgumentContainer::value_type;
0059 using DimensionlessReal = decltype(Real()/Real());
0060 using std::isfinite;
0061 using std::isnan;
0062 using std::log;
0063 using std::ceil;
0064 using std::floor;
0065
0066 std::ostringstream oss;
0067 detail::validate_bounds(params.lower_bounds, params.upper_bounds);
0068 if (params.initial_guess) {
0069 detail::validate_initial_guess(*params.initial_guess, params.lower_bounds, params.upper_bounds);
0070 }
0071 const size_t n = params.upper_bounds.size();
0072
0073 if (params.population_size == 0) {
0074
0075
0076
0077
0078 params.population_size = static_cast<size_t>(4 + floor(3*log(n)));
0079 }
0080 if (params.learning_rate <= DimensionlessReal(0) || !isfinite(params.learning_rate)) {
0081 oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
0082 oss << ": The learning rate must be > 0, but got " << params.learning_rate << ".";
0083 throw std::invalid_argument(oss.str());
0084 }
0085 }
0086
0087 template <typename ArgumentContainer, class Func, class URBG>
0088 ArgumentContainer cma_es(
0089 const Func cost_function,
0090 cma_es_parameters<ArgumentContainer> ¶ms,
0091 URBG &gen,
0092 std::invoke_result_t<Func, ArgumentContainer> target_value = std::numeric_limits<std::invoke_result_t<Func, ArgumentContainer>>::quiet_NaN(),
0093 std::atomic<bool> *cancellation = nullptr,
0094 std::atomic<std::invoke_result_t<Func, ArgumentContainer>> *current_minimum_cost = nullptr,
0095 std::vector<std::pair<ArgumentContainer, std::invoke_result_t<Func, ArgumentContainer>>> *queries = nullptr)
0096 {
0097 using Real = typename ArgumentContainer::value_type;
0098 using DimensionlessReal = decltype(Real()/Real());
0099 using ResultType = std::invoke_result_t<Func, ArgumentContainer>;
0100 using std::abs;
0101 using std::log;
0102 using std::exp;
0103 using std::pow;
0104 using std::min;
0105 using std::max;
0106 using std::sqrt;
0107 using std::isnan;
0108 using std::isfinite;
0109 using std::uniform_real_distribution;
0110 using std::normal_distribution;
0111 validate_cma_es_parameters(params);
0112
0113 const size_t n = params.lower_bounds.size();
0114 std::atomic<bool> target_attained = false;
0115 std::atomic<ResultType> lowest_cost = std::numeric_limits<ResultType>::infinity();
0116 ArgumentContainer best_vector;
0117
0118 Eigen::Vector<DimensionlessReal, Eigen::Dynamic> p_c(n);
0119
0120 Eigen::Vector<DimensionlessReal, Eigen::Dynamic> p_sigma(n);
0121 if constexpr (detail::has_resize_v<ArgumentContainer>) {
0122 best_vector.resize(n, std::numeric_limits<Real>::quiet_NaN());
0123 }
0124 for (size_t i = 0; i < n; ++i) {
0125 p_c[i] = DimensionlessReal(0);
0126 p_sigma[i] = DimensionlessReal(0);
0127 }
0128
0129 size_t mu = params.population_size/2;
0130 std::vector<DimensionlessReal> w_prime(params.population_size, std::numeric_limits<DimensionlessReal>::quiet_NaN());
0131 for (size_t i = 0; i < params.population_size; ++i) {
0132
0133 w_prime[i] = log(static_cast<DimensionlessReal>(params.population_size + 1)/(2*(i+1)));
0134 }
0135
0136 DimensionlessReal positive_weight_sum = 0;
0137 DimensionlessReal sq_weight_sum = 0;
0138 for (size_t i = 0; i < mu; ++i) {
0139 BOOST_MATH_ASSERT(w_prime[i] > 0);
0140 positive_weight_sum += w_prime[i];
0141 sq_weight_sum += w_prime[i]*w_prime[i];
0142 }
0143 DimensionlessReal mu_eff = positive_weight_sum*positive_weight_sum/sq_weight_sum;
0144 BOOST_MATH_ASSERT(1 <= mu_eff);
0145 BOOST_MATH_ASSERT(mu_eff <= mu);
0146 DimensionlessReal negative_weight_sum = 0;
0147 sq_weight_sum = 0;
0148 for (size_t i = mu; i < params.population_size; ++i) {
0149 BOOST_MATH_ASSERT(w_prime[i] <= 0);
0150 negative_weight_sum += w_prime[i];
0151 sq_weight_sum += w_prime[i]*w_prime[i];
0152 }
0153 DimensionlessReal mu_eff_m = negative_weight_sum*negative_weight_sum/sq_weight_sum;
0154
0155 DimensionlessReal c_m = params.learning_rate;
0156
0157 DimensionlessReal c_sigma = (mu_eff + 2)/(n + mu_eff + 5);
0158 BOOST_MATH_ASSERT(c_sigma < 1);
0159 DimensionlessReal d_sigma = 1 + 2*(max)(DimensionlessReal(0), sqrt(DimensionlessReal((mu_eff - 1)/(n + 1))) - DimensionlessReal(1)) + c_sigma;
0160
0161 DimensionlessReal c_c = (4 + mu_eff/n)/(n + 4 + 2*mu_eff/n);
0162 BOOST_MATH_ASSERT(c_c <= 1);
0163
0164 DimensionlessReal c_1 = DimensionlessReal(2)/(pow(n + 1.3, 2) + mu_eff);
0165
0166 DimensionlessReal c_mu = (min)(1 - c_1, 2*(DimensionlessReal(0.25) + mu_eff + 1/mu_eff - 2)/((n+2)*(n+2) + mu_eff));
0167 BOOST_MATH_ASSERT(c_1 + c_mu <= DimensionlessReal(1));
0168
0169 DimensionlessReal alpha_mu_m = 1 + c_1/c_mu;
0170
0171 DimensionlessReal alpha_mu_eff_m = 1 + 2*mu_eff_m/(mu_eff + 2);
0172
0173 DimensionlessReal alpha_m_pos_def = (1- c_1 - c_mu)/(n*c_mu);
0174
0175 std::vector<DimensionlessReal> weights(params.population_size, std::numeric_limits<DimensionlessReal>::quiet_NaN());
0176 for (size_t i = 0; i < mu; ++i) {
0177 weights[i] = w_prime[i]/positive_weight_sum;
0178 }
0179 DimensionlessReal min_alpha = (min)(alpha_mu_m, (min)(alpha_mu_eff_m, alpha_m_pos_def));
0180 for (size_t i = mu; i < params.population_size; ++i) {
0181 weights[i] = min_alpha*w_prime[i]/abs(negative_weight_sum);
0182 }
0183
0184 Eigen::Matrix<DimensionlessReal, Eigen::Dynamic, Eigen::Dynamic> C = Eigen::Matrix<DimensionlessReal, Eigen::Dynamic, Eigen::Dynamic>::Identity(n, n);
0185 ArgumentContainer mean_vector;
0186
0187
0188 Real sigma = DimensionlessReal(0.3)*(params.upper_bounds[0] - params.lower_bounds[0]);;
0189 if (params.initial_guess) {
0190 mean_vector = *params.initial_guess;
0191 }
0192 else {
0193 mean_vector = detail::random_initial_population(params.lower_bounds, params.upper_bounds, 1, gen)[0];
0194 }
0195 auto initial_cost = cost_function(mean_vector);
0196 if (!isnan(initial_cost)) {
0197 best_vector = mean_vector;
0198 lowest_cost = initial_cost;
0199 if (current_minimum_cost) {
0200 *current_minimum_cost = initial_cost;
0201 }
0202 }
0203 #if BOOST_MATH_DEBUG_CMA_ES
0204 {
0205 std::cout << __FILE__ << ":" << __LINE__ << ":" << __func__ << "\n";
0206 std::cout << "\tRunning a (" << params.population_size/2 << "/" << params.population_size/2 << "_W, " << params.population_size << ")-aCMA Evolutionary Strategy on " << params.threads << " threads.\n";
0207 std::cout << "\tInitial mean vector: {";
0208 for (size_t i = 0; i < n - 1; ++i) {
0209 std::cout << mean_vector[i] << ", ";
0210 }
0211 std::cout << mean_vector[n - 1] << "}.\n";
0212 std::cout << "\tCost: " << lowest_cost << ".\n";
0213 std::cout << "\tInitial step length: " << sigma << ".\n";
0214 std::cout << "\tVariance effective selection mass: " << mu_eff << ".\n";
0215 std::cout << "\tLearning rate for rank-one update of covariance matrix: " << c_1 << ".\n";
0216 std::cout << "\tLearning rate for rank-mu update of covariance matrix: " << c_mu << ".\n";
0217 std::cout << "\tDecay rate for cumulation path for step-size control: " << c_sigma << ".\n";
0218 std::cout << "\tLearning rate for the mean: " << c_m << ".\n";
0219 std::cout << "\tDamping parameter for step-size update: " << d_sigma << ".\n";
0220 }
0221 #endif
0222 size_t generation = 0;
0223
0224 std::vector<Eigen::Vector<DimensionlessReal, Eigen::Dynamic>> ys(params.population_size);
0225 std::vector<ArgumentContainer> xs(params.population_size);
0226 std::vector<ResultType> costs(params.population_size, std::numeric_limits<ResultType>::quiet_NaN());
0227 Eigen::Vector<DimensionlessReal, Eigen::Dynamic> weighted_avg_y(n);
0228 Eigen::Vector<DimensionlessReal, Eigen::Dynamic> z(n);
0229 if constexpr (detail::has_resize_v<ArgumentContainer>) {
0230 for (auto & x : xs) {
0231 x.resize(n, std::numeric_limits<Real>::quiet_NaN());
0232 }
0233 }
0234 for (auto & y : ys) {
0235 y.resize(n);
0236 }
0237 normal_distribution<DimensionlessReal> dis(DimensionlessReal(0), DimensionlessReal(1));
0238 do {
0239 if (cancellation && *cancellation) {
0240 break;
0241 }
0242
0243
0244
0245
0246
0247
0248 Eigen::SelfAdjointEigenSolver<Eigen::Matrix<DimensionlessReal, Eigen::Dynamic, Eigen::Dynamic>> eigensolver(C);
0249 if (eigensolver.info() != Eigen::Success) {
0250 std::ostringstream oss;
0251 oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
0252 oss << ": Could not decompose the covariance matrix as BDB^{T}.";
0253 throw std::logic_error(oss.str());
0254 }
0255 Eigen::Matrix<DimensionlessReal, Eigen::Dynamic, Eigen::Dynamic> B = eigensolver.eigenvectors();
0256
0257 auto D = eigensolver.eigenvalues();
0258
0259 for (auto & d : D) {
0260 if (d <= 0 || isnan(d)) {
0261 std::ostringstream oss;
0262 oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
0263 oss << ": The covariance matrix is not positive definite. This breaks the evolution path computation downstream.\n";
0264 oss << "C=\n" << C << "\n";
0265 oss << "Eigenvalues: " << D;
0266 throw std::domain_error(oss.str());
0267 }
0268 d = sqrt(d);
0269 }
0270
0271 for (size_t k = 0; k < params.population_size; ++k) {
0272 auto & y = ys[k];
0273 auto & x = xs[k];
0274 BOOST_MATH_ASSERT(static_cast<size_t>(x.size()) == n);
0275 BOOST_MATH_ASSERT(static_cast<size_t>(y.size()) == n);
0276 size_t resample_counter = 0;
0277 do {
0278
0279
0280 for (size_t i = 0; i < n; ++i) {
0281 z[i] = dis(gen);
0282 }
0283 Eigen::Vector<DimensionlessReal, Eigen::Dynamic> Dz(n);
0284 for (size_t i = 0; i < n; ++i) {
0285 Dz[i] = D[i]*z[i];
0286 }
0287 y = B*Dz;
0288 for (size_t i = 0; i < n; ++i) {
0289 BOOST_MATH_ASSERT(!isnan(mean_vector[i]));
0290 BOOST_MATH_ASSERT(!isnan(y[i]));
0291 x[i] = mean_vector[i] + sigma*y[i];
0292 }
0293 costs[k] = cost_function(x);
0294 if (resample_counter++ == 50) {
0295 std::ostringstream oss;
0296 oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
0297 oss << ": 50 resamples was not sufficient to find an argument to the cost function which did not return NaN.";
0298 oss << " Giving up.";
0299 throw std::domain_error(oss.str());
0300 }
0301 } while (isnan(costs[k]));
0302
0303 if (queries) {
0304 queries->emplace_back(std::make_pair(x, costs[k]));
0305 }
0306 if (costs[k] < lowest_cost) {
0307 lowest_cost = costs[k];
0308 best_vector = x;
0309 if (current_minimum_cost && costs[k] < *current_minimum_cost) {
0310 *current_minimum_cost = costs[k];
0311 }
0312 if (lowest_cost < target_value) {
0313 target_attained = true;
0314 break;
0315 }
0316 }
0317 }
0318 if (target_attained) {
0319 break;
0320 }
0321 if (cancellation && *cancellation) {
0322 break;
0323 }
0324 auto indices = detail::best_indices(costs);
0325
0326 for (size_t j = 0; j < n; ++j) {
0327 weighted_avg_y[j] = 0;
0328 for (size_t i = 0; i < mu; ++i) {
0329 BOOST_MATH_ASSERT(!isnan(weights[i]));
0330 BOOST_MATH_ASSERT(!isnan(ys[indices[i]][j]));
0331 weighted_avg_y[j] += weights[i]*ys[indices[i]][j];
0332 }
0333 }
0334
0335 for (size_t j = 0; j < n; ++j) {
0336 mean_vector[j] = mean_vector[j] + c_m*sigma*weighted_avg_y[j];
0337 }
0338
0339 Eigen::Vector<DimensionlessReal, Eigen::Dynamic> inv_D_B_transpose_y = B.transpose()*weighted_avg_y;
0340 for (long j = 0; j < inv_D_B_transpose_y.size(); ++j) {
0341 inv_D_B_transpose_y[j] /= D[j];
0342 }
0343 Eigen::Vector<DimensionlessReal, Eigen::Dynamic> C_inv_sqrt_y_avg = B*inv_D_B_transpose_y;
0344
0345 DimensionlessReal p_sigma_norm = 0;
0346 for (size_t j = 0; j < n; ++j) {
0347 p_sigma[j] = (1-c_sigma)*p_sigma[j] + sqrt(c_sigma*(2-c_sigma)*mu_eff)*C_inv_sqrt_y_avg[j];
0348 p_sigma_norm += p_sigma[j]*p_sigma[j];
0349 }
0350 p_sigma_norm = sqrt(p_sigma_norm);
0351
0352 const DimensionlessReal expectation_norm_0I = sqrt(static_cast<DimensionlessReal>(n))*(DimensionlessReal(1) - DimensionlessReal(1)/(4*n) + DimensionlessReal(1)/(21*n*n));
0353
0354 sigma = sigma*exp(c_sigma*(p_sigma_norm/expectation_norm_0I -1)/d_sigma);
0355
0356 DimensionlessReal h_sigma = 0;
0357 DimensionlessReal rhs = (DimensionlessReal(1.4) + DimensionlessReal(2)/(n+1))*expectation_norm_0I*sqrt(1 - pow(1-c_sigma, 2*(generation+1)));
0358 if (p_sigma_norm < rhs) {
0359 h_sigma = 1;
0360 }
0361
0362 p_c = (1-c_c)*p_c + h_sigma*sqrt(c_c*(2-c_c)*mu_eff)*weighted_avg_y;
0363 DimensionlessReal delta_h_sigma = (1-h_sigma)*c_c*(2-c_c);
0364 DimensionlessReal weight_sum = 0;
0365 for (auto & w : weights) {
0366 weight_sum += w;
0367 }
0368
0369 DimensionlessReal K = (1 + c_1*delta_h_sigma - c_1 - c_mu*weight_sum);
0370
0371
0372 C = K*C + c_1*p_c*p_c.transpose();
0373
0374 for (size_t i = 0; i < params.population_size/2; ++i) {
0375 C += c_mu*weights[i]*ys[indices[i]]*ys[indices[i]].transpose();
0376 }
0377 for (size_t i = params.population_size/2; i < params.population_size; ++i) {
0378 Eigen::Vector<DimensionlessReal, Eigen::Dynamic> D_inv_BTy = B.transpose()*ys[indices[i]];
0379 for (size_t j = 0; j < n; ++j) {
0380 D_inv_BTy[j] /= D[j];
0381 }
0382 DimensionlessReal squared_norm = D_inv_BTy.squaredNorm();
0383 DimensionlessReal K2 = c_mu*weights[i]/squared_norm;
0384 C += K2*ys[indices[i]]*ys[indices[i]].transpose();
0385 }
0386 } while (generation++ < params.max_generations);
0387
0388 return best_vector;
0389 }
0390
0391 }
0392 #endif