Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-03 08:13:37

0001 //===----------------------------------------------------------------------===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 
0009 #ifndef _LIBCPP___CXX03___PSTL_BACKENDS_LIBDISPATCH_H
0010 #define _LIBCPP___CXX03___PSTL_BACKENDS_LIBDISPATCH_H
0011 
0012 #include <__cxx03/__algorithm/inplace_merge.h>
0013 #include <__cxx03/__algorithm/lower_bound.h>
0014 #include <__cxx03/__algorithm/max.h>
0015 #include <__cxx03/__algorithm/merge.h>
0016 #include <__cxx03/__algorithm/upper_bound.h>
0017 #include <__cxx03/__atomic/atomic.h>
0018 #include <__cxx03/__config>
0019 #include <__cxx03/__exception/terminate.h>
0020 #include <__cxx03/__iterator/iterator_traits.h>
0021 #include <__cxx03/__iterator/move_iterator.h>
0022 #include <__cxx03/__memory/allocator.h>
0023 #include <__cxx03/__memory/construct_at.h>
0024 #include <__cxx03/__memory/unique_ptr.h>
0025 #include <__cxx03/__numeric/reduce.h>
0026 #include <__cxx03/__pstl/backend_fwd.h>
0027 #include <__cxx03/__pstl/cpu_algos/any_of.h>
0028 #include <__cxx03/__pstl/cpu_algos/cpu_traits.h>
0029 #include <__cxx03/__pstl/cpu_algos/fill.h>
0030 #include <__cxx03/__pstl/cpu_algos/find_if.h>
0031 #include <__cxx03/__pstl/cpu_algos/for_each.h>
0032 #include <__cxx03/__pstl/cpu_algos/merge.h>
0033 #include <__cxx03/__pstl/cpu_algos/stable_sort.h>
0034 #include <__cxx03/__pstl/cpu_algos/transform.h>
0035 #include <__cxx03/__pstl/cpu_algos/transform_reduce.h>
0036 #include <__cxx03/__utility/empty.h>
0037 #include <__cxx03/__utility/exception_guard.h>
0038 #include <__cxx03/__utility/move.h>
0039 #include <__cxx03/__utility/pair.h>
0040 #include <__cxx03/cstddef>
0041 #include <__cxx03/new>
0042 #include <__cxx03/optional>
0043 
0044 _LIBCPP_PUSH_MACROS
0045 #include <__cxx03/__undef_macros>
0046 
0047 _LIBCPP_BEGIN_NAMESPACE_STD
0048 namespace __pstl {
0049 
0050 namespace __libdispatch {
0051 // ::dispatch_apply is marked as __attribute__((nothrow)) because it doesn't let exceptions propagate, and neither do
0052 // we.
0053 // TODO: Do we want to add [[_Clang::__callback__(__func, __context, __)]]?
0054 _LIBCPP_EXPORTED_FROM_ABI void
0055 __dispatch_apply(size_t __chunk_count, void* __context, void (*__func)(void* __context, size_t __chunk)) noexcept;
0056 
0057 template <class _Func>
0058 _LIBCPP_HIDE_FROM_ABI void __dispatch_apply(size_t __chunk_count, _Func __func) noexcept {
0059   __libdispatch::__dispatch_apply(__chunk_count, &__func, [](void* __context, size_t __chunk) {
0060     (*static_cast<_Func*>(__context))(__chunk);
0061   });
0062 }
0063 
0064 struct __chunk_partitions {
0065   ptrdiff_t __chunk_count_; // includes the first chunk
0066   ptrdiff_t __chunk_size_;
0067   ptrdiff_t __first_chunk_size_;
0068 };
0069 
0070 [[__gnu__::__const__]] _LIBCPP_EXPORTED_FROM_ABI __chunk_partitions __partition_chunks(ptrdiff_t __size) noexcept;
0071 
0072 template <class _RandomAccessIterator, class _Functor>
0073 _LIBCPP_HIDE_FROM_ABI optional<__empty>
0074 __dispatch_parallel_for(__chunk_partitions __partitions, _RandomAccessIterator __first, _Functor __func) {
0075   // Perform the chunked execution.
0076   __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __chunk) {
0077     auto __this_chunk_size = __chunk == 0 ? __partitions.__first_chunk_size_ : __partitions.__chunk_size_;
0078     auto __index =
0079         __chunk == 0
0080             ? 0
0081             : (__chunk * __partitions.__chunk_size_) + (__partitions.__first_chunk_size_ - __partitions.__chunk_size_);
0082     __func(__first + __index, __first + __index + __this_chunk_size);
0083   });
0084 
0085   return __empty{};
0086 }
0087 } // namespace __libdispatch
0088 
0089 template <>
0090 struct __cpu_traits<__libdispatch_backend_tag> {
0091   template <class _RandomAccessIterator, class _Functor>
0092   _LIBCPP_HIDE_FROM_ABI static optional<__empty>
0093   __for_each(_RandomAccessIterator __first, _RandomAccessIterator __last, _Functor __func) {
0094     return __libdispatch::__dispatch_parallel_for(
0095         __libdispatch::__partition_chunks(__last - __first), std::move(__first), std::move(__func));
0096   }
0097 
0098   template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _RandomAccessIteratorOut>
0099   struct __merge_range {
0100     __merge_range(_RandomAccessIterator1 __mid1, _RandomAccessIterator2 __mid2, _RandomAccessIteratorOut __result)
0101         : __mid1_(__mid1), __mid2_(__mid2), __result_(__result) {}
0102 
0103     _RandomAccessIterator1 __mid1_;
0104     _RandomAccessIterator2 __mid2_;
0105     _RandomAccessIteratorOut __result_;
0106   };
0107 
0108   template <typename _RandomAccessIterator1,
0109             typename _RandomAccessIterator2,
0110             typename _RandomAccessIterator3,
0111             typename _Compare,
0112             typename _LeafMerge>
0113   _LIBCPP_HIDE_FROM_ABI static optional<__empty>
0114   __merge(_RandomAccessIterator1 __first1,
0115           _RandomAccessIterator1 __last1,
0116           _RandomAccessIterator2 __first2,
0117           _RandomAccessIterator2 __last2,
0118           _RandomAccessIterator3 __result,
0119           _Compare __comp,
0120           _LeafMerge __leaf_merge) noexcept {
0121     __libdispatch::__chunk_partitions __partitions =
0122         __libdispatch::__partition_chunks(std::max<ptrdiff_t>(__last1 - __first1, __last2 - __first2));
0123 
0124     if (__partitions.__chunk_count_ == 0)
0125       return __empty{};
0126 
0127     if (__partitions.__chunk_count_ == 1) {
0128       __leaf_merge(__first1, __last1, __first2, __last2, __result, __comp);
0129       return __empty{};
0130     }
0131 
0132     using __merge_range_t = __merge_range<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>;
0133     auto const __n_ranges = __partitions.__chunk_count_ + 1;
0134 
0135     // TODO: use __uninitialized_buffer
0136     auto __destroy = [=](__merge_range_t* __ptr) {
0137       std::destroy_n(__ptr, __n_ranges);
0138       std::allocator<__merge_range_t>().deallocate(__ptr, __n_ranges);
0139     };
0140 
0141     unique_ptr<__merge_range_t[], decltype(__destroy)> __ranges(
0142         [&]() -> __merge_range_t* {
0143 #ifndef _LIBCPP_HAS_NO_EXCEPTIONS
0144           try {
0145 #endif
0146             return std::allocator<__merge_range_t>().allocate(__n_ranges);
0147 #ifndef _LIBCPP_HAS_NO_EXCEPTIONS
0148           } catch (const std::bad_alloc&) {
0149             return nullptr;
0150           }
0151 #endif
0152         }(),
0153         __destroy);
0154 
0155     if (!__ranges)
0156       return nullopt;
0157 
0158     // TODO: Improve the case where the smaller range is merged into just a few (or even one) chunks of the larger case
0159     __merge_range_t* __r = __ranges.get();
0160     std::__construct_at(__r++, __first1, __first2, __result);
0161 
0162     bool __iterate_first_range = __last1 - __first1 > __last2 - __first2;
0163 
0164     auto __compute_chunk = [&](size_t __chunk_size) -> __merge_range_t {
0165       auto [__mid1, __mid2] = [&] {
0166         if (__iterate_first_range) {
0167           auto __m1 = __first1 + __chunk_size;
0168           auto __m2 = std::lower_bound(__first2, __last2, __m1[-1], __comp);
0169           return std::make_pair(__m1, __m2);
0170         } else {
0171           auto __m2 = __first2 + __chunk_size;
0172           auto __m1 = std::lower_bound(__first1, __last1, __m2[-1], __comp);
0173           return std::make_pair(__m1, __m2);
0174         }
0175       }();
0176 
0177       __result += (__mid1 - __first1) + (__mid2 - __first2);
0178       __first1 = __mid1;
0179       __first2 = __mid2;
0180       return {std::move(__mid1), std::move(__mid2), __result};
0181     };
0182 
0183     // handle first chunk
0184     std::__construct_at(__r++, __compute_chunk(__partitions.__first_chunk_size_));
0185 
0186     // handle 2 -> N - 1 chunks
0187     for (ptrdiff_t __i = 0; __i != __partitions.__chunk_count_ - 2; ++__i)
0188       std::__construct_at(__r++, __compute_chunk(__partitions.__chunk_size_));
0189 
0190     // handle last chunk
0191     std::__construct_at(__r, __last1, __last2, __result);
0192 
0193     __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __index) {
0194       auto __first_iters = __ranges[__index];
0195       auto __last_iters  = __ranges[__index + 1];
0196       __leaf_merge(
0197           __first_iters.__mid1_,
0198           __last_iters.__mid1_,
0199           __first_iters.__mid2_,
0200           __last_iters.__mid2_,
0201           __first_iters.__result_,
0202           __comp);
0203     });
0204 
0205     return __empty{};
0206   }
0207 
0208   template <class _RandomAccessIterator, class _Transform, class _Value, class _Combiner, class _Reduction>
0209   _LIBCPP_HIDE_FROM_ABI static optional<_Value> __transform_reduce(
0210       _RandomAccessIterator __first,
0211       _RandomAccessIterator __last,
0212       _Transform __transform,
0213       _Value __init,
0214       _Combiner __combiner,
0215       _Reduction __reduction) {
0216     if (__first == __last)
0217       return __init;
0218 
0219     auto __partitions = __libdispatch::__partition_chunks(__last - __first);
0220 
0221     auto __destroy = [__count = __partitions.__chunk_count_](_Value* __ptr) {
0222       std::destroy_n(__ptr, __count);
0223       std::allocator<_Value>().deallocate(__ptr, __count);
0224     };
0225 
0226     // TODO: use __uninitialized_buffer
0227     // TODO: allocate one element per worker instead of one element per chunk
0228     unique_ptr<_Value[], decltype(__destroy)> __values(
0229         std::allocator<_Value>().allocate(__partitions.__chunk_count_), __destroy);
0230 
0231     // __dispatch_apply is noexcept
0232     __libdispatch::__dispatch_apply(__partitions.__chunk_count_, [&](size_t __chunk) {
0233       auto __this_chunk_size = __chunk == 0 ? __partitions.__first_chunk_size_ : __partitions.__chunk_size_;
0234       auto __index           = __chunk == 0 ? 0
0235                                             : (__chunk * __partitions.__chunk_size_) +
0236                                         (__partitions.__first_chunk_size_ - __partitions.__chunk_size_);
0237       if (__this_chunk_size != 1) {
0238         std::__construct_at(
0239             __values.get() + __chunk,
0240             __reduction(__first + __index + 2,
0241                         __first + __index + __this_chunk_size,
0242                         __combiner(__transform(__first + __index), __transform(__first + __index + 1))));
0243       } else {
0244         std::__construct_at(__values.get() + __chunk, __transform(__first + __index));
0245       }
0246     });
0247 
0248     return std::reduce(
0249         std::make_move_iterator(__values.get()),
0250         std::make_move_iterator(__values.get() + __partitions.__chunk_count_),
0251         std::move(__init),
0252         __combiner);
0253   }
0254 
0255   template <class _RandomAccessIterator, class _Comp, class _LeafSort>
0256   _LIBCPP_HIDE_FROM_ABI static optional<__empty>
0257   __stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Comp __comp, _LeafSort __leaf_sort) {
0258     const auto __size = __last - __first;
0259     auto __partitions = __libdispatch::__partition_chunks(__size);
0260 
0261     if (__partitions.__chunk_count_ == 0)
0262       return __empty{};
0263 
0264     if (__partitions.__chunk_count_ == 1) {
0265       __leaf_sort(__first, __last, __comp);
0266       return __empty{};
0267     }
0268 
0269     using _Value = __iter_value_type<_RandomAccessIterator>;
0270 
0271     auto __destroy = [__size](_Value* __ptr) {
0272       std::destroy_n(__ptr, __size);
0273       std::allocator<_Value>().deallocate(__ptr, __size);
0274     };
0275 
0276     // TODO: use __uninitialized_buffer
0277     unique_ptr<_Value[], decltype(__destroy)> __values(std::allocator<_Value>().allocate(__size), __destroy);
0278 
0279     // Initialize all elements to a moved-from state
0280     // TODO: Don't do this - this can be done in the first merge - see https://llvm.org/PR63928
0281     std::__construct_at(__values.get(), std::move(*__first));
0282     for (__iter_diff_t<_RandomAccessIterator> __i = 1; __i != __size; ++__i) {
0283       std::__construct_at(__values.get() + __i, std::move(__values.get()[__i - 1]));
0284     }
0285     *__first = std::move(__values.get()[__size - 1]);
0286 
0287     __libdispatch::__dispatch_parallel_for(
0288         __partitions,
0289         __first,
0290         [&__leaf_sort, &__comp](_RandomAccessIterator __chunk_first, _RandomAccessIterator __chunk_last) {
0291           __leaf_sort(std::move(__chunk_first), std::move(__chunk_last), __comp);
0292         });
0293 
0294     bool __objects_are_in_buffer = false;
0295     do {
0296       const auto __old_chunk_size = __partitions.__chunk_size_;
0297       if (__partitions.__chunk_count_ % 2 == 1) {
0298         auto __inplace_merge_chunks = [&__comp, &__partitions](auto __first_chunk_begin) {
0299           std::inplace_merge(
0300               __first_chunk_begin,
0301               __first_chunk_begin + __partitions.__first_chunk_size_,
0302               __first_chunk_begin + __partitions.__first_chunk_size_ + __partitions.__chunk_size_,
0303               __comp);
0304         };
0305         if (__objects_are_in_buffer)
0306           __inplace_merge_chunks(__values.get());
0307         else
0308           __inplace_merge_chunks(__first);
0309         __partitions.__first_chunk_size_ += 2 * __partitions.__chunk_size_;
0310       } else {
0311         __partitions.__first_chunk_size_ += __partitions.__chunk_size_;
0312       }
0313 
0314       __partitions.__chunk_size_ *= 2;
0315       __partitions.__chunk_count_ /= 2;
0316 
0317       auto __merge_chunks = [__partitions, __old_chunk_size, &__comp](auto __from_first, auto __to_first) {
0318         __libdispatch::__dispatch_parallel_for(
0319             __partitions,
0320             __from_first,
0321             [__old_chunk_size, &__from_first, &__to_first, &__comp](auto __chunk_first, auto __chunk_last) {
0322               std::merge(std::make_move_iterator(__chunk_first),
0323                          std::make_move_iterator(__chunk_last - __old_chunk_size),
0324                          std::make_move_iterator(__chunk_last - __old_chunk_size),
0325                          std::make_move_iterator(__chunk_last),
0326                          __to_first + (__chunk_first - __from_first),
0327                          __comp);
0328             });
0329       };
0330 
0331       if (__objects_are_in_buffer)
0332         __merge_chunks(__values.get(), __first);
0333       else
0334         __merge_chunks(__first, __values.get());
0335       __objects_are_in_buffer = !__objects_are_in_buffer;
0336     } while (__partitions.__chunk_count_ > 1);
0337 
0338     if (__objects_are_in_buffer) {
0339       std::move(__values.get(), __values.get() + __size, __first);
0340     }
0341 
0342     return __empty{};
0343   }
0344 
0345   _LIBCPP_HIDE_FROM_ABI static void __cancel_execution() {}
0346 
0347   static constexpr size_t __lane_size = 64;
0348 };
0349 
0350 // Mandatory implementations of the computational basis
0351 template <class _ExecutionPolicy>
0352 struct __find_if<__libdispatch_backend_tag, _ExecutionPolicy>
0353     : __cpu_parallel_find_if<__libdispatch_backend_tag, _ExecutionPolicy> {};
0354 
0355 template <class _ExecutionPolicy>
0356 struct __for_each<__libdispatch_backend_tag, _ExecutionPolicy>
0357     : __cpu_parallel_for_each<__libdispatch_backend_tag, _ExecutionPolicy> {};
0358 
0359 template <class _ExecutionPolicy>
0360 struct __merge<__libdispatch_backend_tag, _ExecutionPolicy>
0361     : __cpu_parallel_merge<__libdispatch_backend_tag, _ExecutionPolicy> {};
0362 
0363 template <class _ExecutionPolicy>
0364 struct __stable_sort<__libdispatch_backend_tag, _ExecutionPolicy>
0365     : __cpu_parallel_stable_sort<__libdispatch_backend_tag, _ExecutionPolicy> {};
0366 
0367 template <class _ExecutionPolicy>
0368 struct __transform<__libdispatch_backend_tag, _ExecutionPolicy>
0369     : __cpu_parallel_transform<__libdispatch_backend_tag, _ExecutionPolicy> {};
0370 
0371 template <class _ExecutionPolicy>
0372 struct __transform_binary<__libdispatch_backend_tag, _ExecutionPolicy>
0373     : __cpu_parallel_transform_binary<__libdispatch_backend_tag, _ExecutionPolicy> {};
0374 
0375 template <class _ExecutionPolicy>
0376 struct __transform_reduce<__libdispatch_backend_tag, _ExecutionPolicy>
0377     : __cpu_parallel_transform_reduce<__libdispatch_backend_tag, _ExecutionPolicy> {};
0378 
0379 template <class _ExecutionPolicy>
0380 struct __transform_reduce_binary<__libdispatch_backend_tag, _ExecutionPolicy>
0381     : __cpu_parallel_transform_reduce_binary<__libdispatch_backend_tag, _ExecutionPolicy> {};
0382 
0383 // Not mandatory, but better optimized
0384 template <class _ExecutionPolicy>
0385 struct __any_of<__libdispatch_backend_tag, _ExecutionPolicy>
0386     : __cpu_parallel_any_of<__libdispatch_backend_tag, _ExecutionPolicy> {};
0387 
0388 template <class _ExecutionPolicy>
0389 struct __fill<__libdispatch_backend_tag, _ExecutionPolicy>
0390     : __cpu_parallel_fill<__libdispatch_backend_tag, _ExecutionPolicy> {};
0391 
0392 } // namespace __pstl
0393 _LIBCPP_END_NAMESPACE_STD
0394 
0395 _LIBCPP_POP_MACROS
0396 
0397 #endif // _LIBCPP___CXX03___PSTL_BACKENDS_LIBDISPATCH_H