File indexing completed on 2025-01-18 09:51:47
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #ifndef __BOOST_SORT_PARALLEL_DETAIL_SAMPLE_SORT_HPP
0014 #define __BOOST_SORT_PARALLEL_DETAIL_SAMPLE_SORT_HPP
0015
0016 #include <ciso646>
0017 #include <functional>
0018 #include <future>
0019 #include <iterator>
0020 #include <memory>
0021 #include <type_traits>
0022 #include <vector>
0023
0024 #include <algorithm>
0025 #include <boost/sort/spinsort/spinsort.hpp>
0026 #include <boost/sort/common/indirect.hpp>
0027 #include <boost/sort/common/util/atomic.hpp>
0028 #include <boost/sort/common/merge_four.hpp>
0029 #include <boost/sort/common/merge_vector.hpp>
0030 #include <boost/sort/common/range.hpp>
0031
0032 namespace boost
0033 {
0034 namespace sort
0035 {
0036 namespace sample_detail
0037 {
0038
0039
0040
0041 namespace bsc = boost::sort::common;
0042 namespace bss = boost::sort::spin_detail;
0043 namespace bscu = boost::sort::common::util;
0044 using bsc::range;
0045 using bscu::atomic_add;
0046 using bsc::merge_vector4;
0047 using bsc::uninit_merge_level4;
0048 using bsc::less_ptr_no_null;
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058 template<class Iter_t, class Compare>
0059 struct sample_sort
0060 {
0061
0062
0063
0064 typedef value_iter<Iter_t> value_t;
0065 typedef range<Iter_t> range_it;
0066 typedef range<value_t *> range_buf;
0067 typedef sample_sort<Iter_t, Compare> this_t;
0068
0069
0070
0071
0072
0073 static const uint32_t thread_min = (1 << 16);
0074
0075
0076
0077 uint32_t nthread, ninterval;
0078
0079
0080
0081
0082 bool construct = false, owner = false;
0083
0084
0085 Compare comp;
0086
0087
0088 range_it global_range;
0089
0090
0091 range_buf global_buf;
0092
0093
0094 std::vector<std::future<void>> vfuture;
0095
0096
0097
0098 std::vector<std::vector<range_it>> vv_range_it;
0099
0100
0101
0102 std::vector<std::vector<range_buf>> vv_range_buf;
0103
0104
0105 std::vector<range_it> vrange_it_ini;
0106
0107
0108 std::vector<range_buf> vrange_buf_ini;
0109
0110
0111
0112 std::atomic<uint32_t> njob;
0113
0114
0115 bool error;
0116
0117
0118
0119
0120 void initial_configuration(void);
0121
0122 sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
0123 value_t *paux, size_t naux);
0124
0125 sample_sort(Iter_t first, Iter_t last)
0126 : sample_sort (first, last, Compare(), std::thread::hardware_concurrency(),
0127 nullptr, 0) { };
0128
0129 sample_sort(Iter_t first, Iter_t last, Compare cmp)
0130 : sample_sort(first, last, cmp, std::thread::hardware_concurrency(),
0131 nullptr, 0) { };
0132
0133 sample_sort(Iter_t first, Iter_t last, uint32_t num_thread)
0134 : sample_sort(first, last, Compare(), num_thread, nullptr, 0) { };
0135
0136 sample_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread)
0137 : sample_sort(first, last, cmp, num_thread, nullptr, 0) { };
0138
0139 sample_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
0140 range_buf range_buf_initial)
0141 : sample_sort(first, last, cmp, num_thread,
0142 range_buf_initial.first, range_buf_initial.size()) { };
0143
0144 void destroy_all(void);
0145
0146
0147
0148
0149
0150
0151 ~sample_sort(void) { destroy_all(); };
0152
0153
0154
0155
0156
0157 void execute_first(void)
0158 {
0159 uint32_t job = 0;
0160 while ((job = atomic_add(njob, 1)) < ninterval)
0161 {
0162 uninit_merge_level4(vrange_buf_ini[job], vv_range_it[job],
0163 vv_range_buf[job], comp);
0164 };
0165 };
0166
0167
0168
0169
0170
0171 void execute(void)
0172 {
0173 uint32_t job = 0;
0174 while ((job = atomic_add(njob, 1)) < ninterval)
0175 {
0176 merge_vector4(vrange_buf_ini[job], vrange_it_ini[job],
0177 vv_range_buf[job], vv_range_it[job], comp);
0178 };
0179 };
0180
0181
0182
0183
0184
0185 void first_merge(void)
0186 {
0187 njob = 0;
0188
0189 for (uint32_t i = 0; i < nthread; ++i)
0190 {
0191 vfuture[i] = std::async(std::launch::async, &this_t::execute_first,
0192 this);
0193 };
0194 for (uint32_t i = 0; i < nthread; ++i)
0195 vfuture[i].get();
0196 };
0197
0198
0199
0200
0201
0202 void final_merge(void)
0203 {
0204 njob = 0;
0205
0206 for (uint32_t i = 0; i < nthread; ++i)
0207 {
0208 vfuture[i] = std::async(std::launch::async, &this_t::execute, this);
0209 };
0210 for (uint32_t i = 0; i < nthread; ++i)
0211 vfuture[i].get();
0212 };
0213
0214 };
0215
0216
0217
0218
0219
0220
0221
0222
0223
0224
0225
0226
0227
0228
0229
0230
0231
0232
0233
0234
0235
0236
0237
0238 template<class Iter_t, typename Compare>
0239 sample_sort<Iter_t, Compare>
0240 ::sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
0241 value_t *paux, size_t naux)
0242 : nthread(num_thread), owner(false), comp(cmp), global_range(first, last),
0243 global_buf(nullptr, nullptr), error(false)
0244 {
0245 assert((last - first) >= 0);
0246 size_t nelem = size_t(last - first);
0247 construct = false;
0248 njob = 0;
0249 vfuture.resize(nthread);
0250
0251
0252 while (nelem > thread_min and (nthread * nthread) > (nelem >> 3))
0253 {
0254 nthread /= 2;
0255 };
0256 ninterval = (nthread << 3);
0257
0258 if (nthread < 2 or nelem <= (thread_min))
0259 {
0260 bss::spinsort<Iter_t, Compare>(first, last, comp);
0261 return;
0262 };
0263
0264
0265 bool sw = true;
0266 for (Iter_t it1 = first, it2 = first + 1;
0267 it2 != last and (sw = not comp(*it2, *it1)); it1 = it2++);
0268 if (sw) return;
0269
0270
0271 sw = true;
0272 for (Iter_t it1 = first, it2 = first + 1;
0273 it2 != last and (sw = comp(*it2, *it1)); it1 = it2++);
0274 if (sw)
0275 {
0276 using std::swap;
0277 size_t nelem2 = nelem >> 1;
0278 Iter_t it1 = first, it2 = last - 1;
0279 for (size_t i = 0; i < nelem2; ++i)
0280 swap(*(it1++), *(it2--));
0281 return;
0282 };
0283
0284 if (paux != nullptr)
0285 {
0286 assert(naux != 0);
0287 global_buf.first = paux;
0288 global_buf.last = paux + naux;
0289 owner = false;
0290 }
0291 else
0292 {
0293 value_t * ptr = reinterpret_cast <value_t*>
0294 (std::malloc (nelem * sizeof(value_t)));
0295
0296 if (ptr == nullptr) throw std::bad_alloc();
0297 owner = true;
0298 global_buf = range_buf(ptr, ptr + nelem);
0299 };
0300
0301
0302
0303 try
0304 {
0305 initial_configuration();
0306 } catch (std::bad_alloc &)
0307 {
0308 error = true;
0309 };
0310 if (not error)
0311 {
0312 first_merge();
0313 construct = true;
0314 final_merge();
0315 };
0316 if (error)
0317 {
0318 destroy_all();
0319 throw std::bad_alloc();
0320 };
0321 }
0322 ;
0323
0324
0325
0326
0327
0328
0329 template<class Iter_t, typename Compare>
0330 void sample_sort<Iter_t, Compare>::destroy_all(void)
0331 {
0332 if (construct)
0333 {
0334 destroy(global_buf);
0335 construct = false;
0336 }
0337 if (global_buf.first != nullptr and owner)
0338 std::free(global_buf.first);
0339 }
0340
0341
0342
0343
0344
0345
0346 template<class Iter_t, typename Compare>
0347 void sample_sort<Iter_t, Compare>::initial_configuration(void)
0348 {
0349 std::vector<range_it> vmem_thread;
0350 std::vector<range_buf> vbuf_thread;
0351 size_t nelem = global_range.size();
0352
0353
0354 size_t cupo = nelem / nthread;
0355 Iter_t it_first = global_range.first;
0356 value_t *buf_first = global_buf.first;
0357 vmem_thread.reserve(nthread + 1);
0358 vbuf_thread.reserve(nthread + 1);
0359
0360 for (uint32_t i = 0; i < (nthread - 1); ++i, it_first += cupo, buf_first +=
0361 cupo)
0362 {
0363 vmem_thread.emplace_back(it_first, it_first + cupo);
0364 vbuf_thread.emplace_back(buf_first, buf_first + cupo);
0365 };
0366
0367 vmem_thread.emplace_back(it_first, global_range.last);
0368 vbuf_thread.emplace_back(buf_first, global_buf.last);
0369
0370
0371
0372
0373 std::vector<std::future<void>> vfuture(nthread);
0374
0375 for (uint32_t i = 0; i < nthread; ++i)
0376 {
0377 auto func = [this, &vmem_thread, i, &vbuf_thread]()
0378 {
0379 bss::spinsort<Iter_t, Compare> (vmem_thread[i].first,
0380 vmem_thread[i].last, this->comp,
0381 vbuf_thread[i]);
0382 };
0383 vfuture[i] = std::async(std::launch::async, func);
0384 };
0385
0386 for (uint32_t i = 0; i < nthread; ++i)
0387 vfuture[i].get();
0388
0389
0390
0391
0392 std::vector<Iter_t> vsample;
0393 vsample.reserve(nthread * (ninterval - 1));
0394
0395 for (uint32_t i = 0; i < nthread; ++i)
0396 {
0397 size_t distance = vmem_thread[i].size() / ninterval;
0398 for (size_t j = 1, pos = distance; j < ninterval; ++j, pos += distance)
0399 {
0400 vsample.push_back(vmem_thread[i].first + pos);
0401 };
0402 };
0403 typedef less_ptr_no_null<Iter_t, Compare> compare_ptr;
0404 typedef typename std::vector<Iter_t>::iterator it_to_it;
0405
0406 bss::spinsort<it_to_it, compare_ptr>(vsample.begin(), vsample.end(),
0407 compare_ptr(comp));
0408
0409
0410
0411
0412 std::vector<Iter_t> vmilestone;
0413 vmilestone.reserve(ninterval);
0414
0415 for (uint32_t pos = nthread >> 1; pos < vsample.size(); pos += nthread)
0416 {
0417 vmilestone.push_back(vsample[pos]);
0418 };
0419
0420
0421
0422
0423 std::vector<std::vector<range<Iter_t>>>vv_range_first (nthread);
0424
0425 for (uint32_t i = 0; i < nthread; ++i)
0426 {
0427 Iter_t itaux = vmem_thread[i].first;
0428
0429 for (uint32_t k = 0; k < (ninterval - 1); ++k)
0430 {
0431 Iter_t it2 = std::upper_bound(itaux, vmem_thread[i].last,
0432 *vmilestone[k], comp);
0433
0434 vv_range_first[i].emplace_back(itaux, it2);
0435 itaux = it2;
0436 };
0437 vv_range_first[i].emplace_back(itaux, vmem_thread[i].last);
0438 };
0439
0440
0441
0442
0443 vv_range_it.resize(ninterval);
0444 vv_range_buf.resize(ninterval);
0445 vrange_it_ini.reserve(ninterval);
0446 vrange_buf_ini.reserve(ninterval);
0447
0448 for (uint32_t i = 0; i < ninterval; ++i)
0449 {
0450 vv_range_it[i].reserve(nthread);
0451 vv_range_buf[i].reserve(nthread);
0452 };
0453
0454 Iter_t it = global_range.first;
0455 value_t *it_buf = global_buf.first;
0456
0457 for (uint32_t k = 0; k < ninterval; ++k)
0458 {
0459 size_t nelem_interval = 0;
0460
0461 for (uint32_t i = 0; i < nthread; ++i)
0462 {
0463 size_t nelem_range = vv_range_first[i][k].size();
0464 if (nelem_range != 0)
0465 {
0466 vv_range_it[k].push_back(vv_range_first[i][k]);
0467 };
0468 nelem_interval += nelem_range;
0469 };
0470
0471 vrange_it_ini.emplace_back(it, it + nelem_interval);
0472 vrange_buf_ini.emplace_back(it_buf, it_buf + nelem_interval);
0473
0474 it += nelem_interval;
0475 it_buf += nelem_interval;
0476 };
0477 }
0478 ;
0479
0480
0481 }
0482 ;
0483
0484
0485
0486 namespace bscu = boost::sort::common::util;
0487
0488
0489
0490
0491
0492
0493
0494
0495
0496
0497
0498
0499
0500
0501
0502
0503 template<class Iter_t>
0504 void sample_sort(Iter_t first, Iter_t last)
0505 {
0506 typedef compare_iter<Iter_t> Compare;
0507 sample_detail::sample_sort<Iter_t, Compare>(first, last);
0508 };
0509
0510
0511
0512
0513
0514
0515
0516
0517
0518
0519 template<class Iter_t>
0520 void sample_sort(Iter_t first, Iter_t last, uint32_t nthread)
0521 {
0522 typedef compare_iter<Iter_t> Compare;
0523 sample_detail::sample_sort<Iter_t, Compare>(first, last, nthread);
0524 };
0525
0526
0527
0528
0529
0530
0531
0532
0533
0534
0535 template<class Iter_t, class Compare, bscu::enable_if_not_integral<Compare> * =
0536 nullptr>
0537 void sample_sort(Iter_t first, Iter_t last, Compare comp)
0538 {
0539 sample_detail::sample_sort<Iter_t, Compare>(first, last, comp);
0540 };
0541
0542
0543
0544
0545
0546
0547
0548
0549
0550
0551
0552
0553 template<class Iter_t, class Compare>
0554 void sample_sort(Iter_t first, Iter_t last, Compare comp, uint32_t nthread)
0555 {
0556 sample_detail::sample_sort<Iter_t, Compare>(first, last, comp, nthread);
0557 };
0558
0559
0560 };
0561 };
0562
0563
0564 #endif