File indexing completed on 2026-05-03 08:13:58
0001
0002
0003
0004
0005
0006
0007
0008
0009 #ifndef _LIBCPP___PSTL_BACKENDS_LIBDISPATCH_H
0010 #define _LIBCPP___PSTL_BACKENDS_LIBDISPATCH_H
0011
0012 #include <__algorithm/inplace_merge.h>
0013 #include <__algorithm/lower_bound.h>
0014 #include <__algorithm/max.h>
0015 #include <__algorithm/merge.h>
0016 #include <__algorithm/upper_bound.h>
0017 #include <__atomic/atomic.h>
0018 #include <__config>
0019 #include <__cstddef/ptrdiff_t.h>
0020 #include <__exception/terminate.h>
0021 #include <__iterator/iterator_traits.h>
0022 #include <__iterator/move_iterator.h>
0023 #include <__memory/allocator.h>
0024 #include <__memory/construct_at.h>
0025 #include <__memory/unique_ptr.h>
0026 #include <__new/exceptions.h>
0027 #include <__numeric/reduce.h>
0028 #include <__pstl/backend_fwd.h>
0029 #include <__pstl/cpu_algos/any_of.h>
0030 #include <__pstl/cpu_algos/cpu_traits.h>
0031 #include <__pstl/cpu_algos/fill.h>
0032 #include <__pstl/cpu_algos/find_if.h>
0033 #include <__pstl/cpu_algos/for_each.h>
0034 #include <__pstl/cpu_algos/merge.h>
0035 #include <__pstl/cpu_algos/stable_sort.h>
0036 #include <__pstl/cpu_algos/transform.h>
0037 #include <__pstl/cpu_algos/transform_reduce.h>
0038 #include <__utility/empty.h>
0039 #include <__utility/exception_guard.h>
0040 #include <__utility/move.h>
0041 #include <__utility/pair.h>
0042 #include <optional>
0043
0044 _LIBCPP_PUSH_MACROS
0045 #include <__undef_macros>
0046
0047 #if _LIBCPP_STD_VER >= 17
0048
0049 _LIBCPP_BEGIN_NAMESPACE_STD
0050 namespace __pstl {
0051
0052 namespace __libdispatch {
0053
0054
0055
0056 _LIBCPP_EXPORTED_FROM_ABI void
0057 __dispatch_apply(size_t __chunk_count, void* __context, void (*__func)(void* __context, size_t __chunk)) noexcept;
0058
0059 template <class _Func>
0060 _LIBCPP_HIDE_FROM_ABI void __dispatch_apply(size_t __chunk_count, _Func __func) noexcept {
0061 __libdispatch::__dispatch_apply(__chunk_count, &__func, [](void* __context, size_t __chunk) {
0062 (*static_cast<_Func*>(__context))(__chunk);
0063 });
0064 }
0065
0066 struct __chunk_partitions {
0067 ptrdiff_t __chunk_count_;
0068 ptrdiff_t __chunk_size_;
0069 ptrdiff_t __first_chunk_size_;
0070 };
0071
0072 [[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI __chunk_partitions __partition_chunks(ptrdiff_t __size) noexcept;
0073
0074 template <class _RandomAccessIterator, class _Functor>
0075 _LIBCPP_HIDE_FROM_ABI optional<__empty>
0076 __dispatch_parallel_for(__chunk_partitions __partitions, _RandomAccessIterator __first, _Functor __func) {
0077
0078 __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __chunk) {
0079 auto __this_chunk_size = __chunk == 0 ? __partitions.__first_chunk_size_ : __partitions.__chunk_size_;
0080 auto __index =
0081 __chunk == 0
0082 ? 0
0083 : (__chunk * __partitions.__chunk_size_) + (__partitions.__first_chunk_size_ - __partitions.__chunk_size_);
0084 __func(__first + __index, __first + __index + __this_chunk_size);
0085 });
0086
0087 return __empty{};
0088 }
0089 }
0090
0091 template <>
0092 struct __cpu_traits<__libdispatch_backend_tag> {
0093 template <class _RandomAccessIterator, class _Functor>
0094 _LIBCPP_HIDE_FROM_ABI static optional<__empty>
0095 __for_each(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func) {
0096 return __libdispatch::__dispatch_parallel_for(
0097 __libdispatch::__partition_chunks(__last - __first), std::move(__first), std::move(__func));
0098 }
0099
0100 template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _RandomAccessIteratorOut>
0101 struct __merge_range {
0102 __merge_range(_RandomAccessIterator1 __mid1, _RandomAccessIterator2 __mid2, _RandomAccessIteratorOut __result)
0103 : __mid1_(__mid1), __mid2_(__mid2), __result_(__result) {}
0104
0105 _RandomAccessIterator1 __mid1_;
0106 _RandomAccessIterator2 __mid2_;
0107 _RandomAccessIteratorOut __result_;
0108 };
0109
0110 template <typename _RandomAccessIterator1,
0111 typename _RandomAccessIterator2,
0112 typename _RandomAccessIterator3,
0113 typename _Compare,
0114 typename _LeafMerge>
0115 _LIBCPP_HIDE_FROM_ABI static optional<__empty>
0116 __merge(_RandomAccessIterator1 __first1,
0117 _RandomAccessIterator1 __last1,
0118 _RandomAccessIterator2 __first2,
0119 _RandomAccessIterator2 __last2,
0120 _RandomAccessIterator3 __result,
0121 _Compare __comp,
0122 _LeafMerge __leaf_merge) noexcept {
0123 __libdispatch::__chunk_partitions __partitions =
0124 __libdispatch::__partition_chunks(std::max<ptrdiff_t>(__last1 - __first1, __last2 - __first2));
0125
0126 if (__partitions.__chunk_count_ == 0)
0127 return __empty{};
0128
0129 if (__partitions.__chunk_count_ == 1) {
0130 __leaf_merge(__first1, __last1, __first2, __last2, __result, __comp);
0131 return __empty{};
0132 }
0133
0134 using __merge_range_t = __merge_range<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>;
0135 auto const __n_ranges = __partitions.__chunk_count_ + 1;
0136
0137
0138 auto __destroy = [=](__merge_range_t* __ptr) {
0139 std::destroy_n(__ptr, __n_ranges);
0140 std::allocator<__merge_range_t>().deallocate(__ptr, __n_ranges);
0141 };
0142
0143 unique_ptr<__merge_range_t[], decltype(__destroy)> __ranges(
0144 [&]() -> __merge_range_t* {
0145 # if _LIBCPP_HAS_EXCEPTIONS
0146 try {
0147 # endif
0148 return std::allocator<__merge_range_t>().allocate(__n_ranges);
0149 # if _LIBCPP_HAS_EXCEPTIONS
0150 } catch (const std::bad_alloc&) {
0151 return nullptr;
0152 }
0153 # endif
0154 }(),
0155 __destroy);
0156
0157 if (!__ranges)
0158 return nullopt;
0159
0160
0161 __merge_range_t* __r = __ranges.get();
0162 std::__construct_at(__r++, __first1, __first2, __result);
0163
0164 bool __iterate_first_range = __last1 - __first1 > __last2 - __first2;
0165
0166 auto __compute_chunk = [&](size_t __chunk_size) -> __merge_range_t {
0167 auto [__mid1, __mid2] = [&] {
0168 if (__iterate_first_range) {
0169 auto __m1 = __first1 + __chunk_size;
0170 auto __m2 = std::lower_bound(__first2, __last2, __m1[-1], __comp);
0171 return std::make_pair(__m1, __m2);
0172 } else {
0173 auto __m2 = __first2 + __chunk_size;
0174 auto __m1 = std::lower_bound(__first1, __last1, __m2[-1], __comp);
0175 return std::make_pair(__m1, __m2);
0176 }
0177 }();
0178
0179 __result += (__mid1 - __first1) + (__mid2 - __first2);
0180 __first1 = __mid1;
0181 __first2 = __mid2;
0182 return {std::move(__mid1), std::move(__mid2), __result};
0183 };
0184
0185
0186 std::__construct_at(__r++, __compute_chunk(__partitions.__first_chunk_size_));
0187
0188
0189 for (ptrdiff_t __i = 0; __i != __partitions.__chunk_count_ - 2; ++__i)
0190 std::__construct_at(__r++, __compute_chunk(__partitions.__chunk_size_));
0191
0192
0193 std::__construct_at(__r, __last1, __last2, __result);
0194
0195 __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __index) {
0196 auto __first_iters = __ranges[__index];
0197 auto __last_iters = __ranges[__index + 1];
0198 __leaf_merge(
0199 __first_iters.__mid1_,
0200 __last_iters.__mid1_,
0201 __first_iters.__mid2_,
0202 __last_iters.__mid2_,
0203 __first_iters.__result_,
0204 __comp);
0205 });
0206
0207 return __empty{};
0208 }
0209
0210 template <class _RandomAccessIterator, class _Transform, class _Value, class _Combiner, class _Reduction>
0211 _LIBCPP_HIDE_FROM_ABI static optional<_Value> __transform_reduce(
0212 _RandomAccessIterator __first,
0213 _RandomAccessIterator __last,
0214 _Transform __transform,
0215 _Value __init,
0216 _Combiner __combiner,
0217 _Reduction __reduction) {
0218 if (__first == __last)
0219 return __init;
0220
0221 auto __partitions = __libdispatch::__partition_chunks(__last - __first);
0222
0223 auto __destroy = [__count = __partitions.__chunk_count_](_Value* __ptr) {
0224 std::destroy_n(__ptr, __count);
0225 std::allocator<_Value>().deallocate(__ptr, __count);
0226 };
0227
0228
0229
0230 unique_ptr<_Value[], decltype(__destroy)> __values(
0231 std::allocator<_Value>().allocate(__partitions.__chunk_count_), __destroy);
0232
0233
0234 __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __chunk) {
0235 auto __this_chunk_size = __chunk == 0 ? __partitions.__first_chunk_size_ : __partitions.__chunk_size_;
0236 auto __index = __chunk == 0 ? 0
0237 : (__chunk * __partitions.__chunk_size_) +
0238 (__partitions.__first_chunk_size_ - __partitions.__chunk_size_);
0239 if (__this_chunk_size != 1) {
0240 std::__construct_at(
0241 __values.get() + __chunk,
0242 __reduction(__first + __index + 2,
0243 __first + __index + __this_chunk_size,
0244 __combiner(__transform(__first + __index), __transform(__first + __index + 1))));
0245 } else {
0246 std::__construct_at(__values.get() + __chunk, __transform(__first + __index));
0247 }
0248 });
0249
0250 return std::reduce(
0251 std::make_move_iterator(__values.get()),
0252 std::make_move_iterator(__values.get() + __partitions.__chunk_count_),
0253 std::move(__init),
0254 __combiner);
0255 }
0256
0257 template <class _RandomAccessIterator, class _Comp, class _LeafSort>
0258 _LIBCPP_HIDE_FROM_ABI static optional<__empty>
0259 __stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp, _LeafSort __leaf_sort) {
0260 const auto __size = __last - __first;
0261 auto __partitions = __libdispatch::__partition_chunks(__size);
0262
0263 if (__partitions.__chunk_count_ == 0)
0264 return __empty{};
0265
0266 if (__partitions.__chunk_count_ == 1) {
0267 __leaf_sort(__first, __last, __comp);
0268 return __empty{};
0269 }
0270
0271 using _Value = __iter_value_type<_RandomAccessIterator>;
0272
0273 auto __destroy = [__size](_Value* __ptr) {
0274 std::destroy_n(__ptr, __size);
0275 std::allocator<_Value>().deallocate(__ptr, __size);
0276 };
0277
0278
0279 unique_ptr<_Value[], decltype(__destroy)> __values(std::allocator<_Value>().allocate(__size), __destroy);
0280
0281
0282
0283 std::__construct_at(__values.get(), std::move(*__first));
0284 for (__iter_diff_t<_RandomAccessIterator> __i = 1; __i != __size; ++__i) {
0285 std::__construct_at(__values.get() + __i, std::move(__values.get()[__i - 1]));
0286 }
0287 *__first = std::move(__values.get()[__size - 1]);
0288
0289 __libdispatch::__dispatch_parallel_for(
0290 __partitions,
0291 __first,
0292 [&__leaf_sort, &__comp](_RandomAccessIterator __chunk_first, _RandomAccessIterator __chunk_last) {
0293 __leaf_sort(std::move(__chunk_first), std::move(__chunk_last), __comp);
0294 });
0295
0296 bool __objects_are_in_buffer = false;
0297 do {
0298 const auto __old_chunk_size = __partitions.__chunk_size_;
0299 if (__partitions.__chunk_count_ % 2 == 1) {
0300 auto __inplace_merge_chunks = [&__comp, &__partitions](auto __first_chunk_begin) {
0301 std::inplace_merge(
0302 __first_chunk_begin,
0303 __first_chunk_begin + __partitions.__first_chunk_size_,
0304 __first_chunk_begin + __partitions.__first_chunk_size_ + __partitions.__chunk_size_,
0305 __comp);
0306 };
0307 if (__objects_are_in_buffer)
0308 __inplace_merge_chunks(__values.get());
0309 else
0310 __inplace_merge_chunks(__first);
0311 __partitions.__first_chunk_size_ += 2 * __partitions.__chunk_size_;
0312 } else {
0313 __partitions.__first_chunk_size_ += __partitions.__chunk_size_;
0314 }
0315
0316 __partitions.__chunk_size_ *= 2;
0317 __partitions.__chunk_count_ /= 2;
0318
0319 auto __merge_chunks = [__partitions, __old_chunk_size, &__comp](auto __from_first, auto __to_first) {
0320 __libdispatch::__dispatch_parallel_for(
0321 __partitions,
0322 __from_first,
0323 [__old_chunk_size, &__from_first, &__to_first, &__comp](auto __chunk_first, auto __chunk_last) {
0324 std::merge(std::make_move_iterator(__chunk_first),
0325 std::make_move_iterator(__chunk_last - __old_chunk_size),
0326 std::make_move_iterator(__chunk_last - __old_chunk_size),
0327 std::make_move_iterator(__chunk_last),
0328 __to_first + (__chunk_first - __from_first),
0329 __comp);
0330 });
0331 };
0332
0333 if (__objects_are_in_buffer)
0334 __merge_chunks(__values.get(), __first);
0335 else
0336 __merge_chunks(__first, __values.get());
0337 __objects_are_in_buffer = !__objects_are_in_buffer;
0338 } while (__partitions.__chunk_count_ > 1);
0339
0340 if (__objects_are_in_buffer) {
0341 std::move(__values.get(), __values.get() + __size, __first);
0342 }
0343
0344 return __empty{};
0345 }
0346
0347 _LIBCPP_HIDE_FROM_ABI static void __cancel_execution() {}
0348
0349 static constexpr size_t __lane_size = 64;
0350 };
0351
0352
0353 template <class _ExecutionPolicy>
0354 struct __find_if<__libdispatch_backend_tag, _ExecutionPolicy>
0355 : __cpu_parallel_find_if<__libdispatch_backend_tag, _ExecutionPolicy> {};
0356
0357 template <class _ExecutionPolicy>
0358 struct __for_each<__libdispatch_backend_tag, _ExecutionPolicy>
0359 : __cpu_parallel_for_each<__libdispatch_backend_tag, _ExecutionPolicy> {};
0360
0361 template <class _ExecutionPolicy>
0362 struct __merge<__libdispatch_backend_tag, _ExecutionPolicy>
0363 : __cpu_parallel_merge<__libdispatch_backend_tag, _ExecutionPolicy> {};
0364
0365 template <class _ExecutionPolicy>
0366 struct __stable_sort<__libdispatch_backend_tag, _ExecutionPolicy>
0367 : __cpu_parallel_stable_sort<__libdispatch_backend_tag, _ExecutionPolicy> {};
0368
0369 template <class _ExecutionPolicy>
0370 struct __transform<__libdispatch_backend_tag, _ExecutionPolicy>
0371 : __cpu_parallel_transform<__libdispatch_backend_tag, _ExecutionPolicy> {};
0372
0373 template <class _ExecutionPolicy>
0374 struct __transform_binary<__libdispatch_backend_tag, _ExecutionPolicy>
0375 : __cpu_parallel_transform_binary<__libdispatch_backend_tag, _ExecutionPolicy> {};
0376
0377 template <class _ExecutionPolicy>
0378 struct __transform_reduce<__libdispatch_backend_tag, _ExecutionPolicy>
0379 : __cpu_parallel_transform_reduce<__libdispatch_backend_tag, _ExecutionPolicy> {};
0380
0381 template <class _ExecutionPolicy>
0382 struct __transform_reduce_binary<__libdispatch_backend_tag, _ExecutionPolicy>
0383 : __cpu_parallel_transform_reduce_binary<__libdispatch_backend_tag, _ExecutionPolicy> {};
0384
0385
0386 template <class _ExecutionPolicy>
0387 struct __any_of<__libdispatch_backend_tag, _ExecutionPolicy>
0388 : __cpu_parallel_any_of<__libdispatch_backend_tag, _ExecutionPolicy> {};
0389
0390 template <class _ExecutionPolicy>
0391 struct __fill<__libdispatch_backend_tag, _ExecutionPolicy>
0392 : __cpu_parallel_fill<__libdispatch_backend_tag, _ExecutionPolicy> {};
0393
0394 }
0395 _LIBCPP_END_NAMESPACE_STD
0396
0397 #endif
0398
0399 _LIBCPP_POP_MACROS
0400
0401 #endif