oneapi/tbb/parallel_for_each.h

0001 /*
0002     Copyright (c) 2005-2024 Intel Corporation
0003
0004     Licensed under the Apache License, Version 2.0 (the "License");
0005     you may not use this file except in compliance with the License.
0006     You may obtain a copy of the License at
0007
0008         http://www.apache.org/licenses/LICENSE-2.0
0009
0010     Unless required by applicable law or agreed to in writing, software
0011     distributed under the License is distributed on an "AS IS" BASIS,
0012     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013     See the License for the specific language governing permissions and
0014     limitations under the License.
0015 */
0016
0017 #ifndef __TBB_parallel_for_each_H
0018 #define __TBB_parallel_for_each_H
0019
0020 #include "detail/_config.h"
0021 #include "detail/_namespace_injection.h"
0022 #include "detail/_exception.h"
0023 #include "detail/_task.h"
0024 #include "detail/_aligned_space.h"
0025 #include "detail/_small_object_pool.h"
0026 #include "detail/_utils.h"
0027
0028 #include "parallel_for.h"
0029 #include "task_group.h" // task_group_context
0030
0031 #include <iterator>
0032 #include <type_traits>
0033
0034 namespace tbb {
0035 namespace detail {
0036 #if __TBB_CPP20_CONCEPTS_PRESENT
0037 namespace d1 {
0038 template <typename Item>
0039 class feeder;
0040
0041 } // namespace d1
0042 inline namespace d0 {
0043
0044 template <typename Body, typename ItemType, typename FeederItemType>
0045 concept parallel_for_each_body = std::invocable<const std::remove_reference_t<Body>&, ItemType&&> ||
0046                                  std::invocable<const std::remove_reference_t<Body>&, ItemType&&, tbb::detail::d1::feeder<FeederItemType>&>;
0047
0048 } // namespace d0
0049 #endif // __TBB_CPP20_CONCEPTS_PRESENT
0050 namespace d2 {
0051 template<typename Body, typename Item> class feeder_impl;
0052 } // namespace d2
0053
0054 namespace d1 {
0055 //! Class the user supplied algorithm body uses to add new tasks
0056 template<typename Item>
0057 class feeder {
0058     feeder() {}
0059     feeder(const feeder&) = delete;
0060     void operator=( const feeder&) = delete;
0061
0062     virtual ~feeder () {}
0063     virtual void internal_add_copy(const Item& item) = 0;
0064     virtual void internal_add_move(Item&& item) = 0;
0065
0066     template<typename Body_, typename Item_> friend class d2::feeder_impl;
0067 public:
0068     //! Add a work item to a running parallel_for_each.
0069     void add(const Item& item) {internal_add_copy(item);}
0070     void add(Item&& item) {internal_add_move(std::move(item));}
0071 };
0072
0073 } // namespace d1
0074
0075 namespace d2 {
0076 using namespace tbb::detail::d1;
0077 /** Selects one of the two possible forms of function call member operator.
0078     @ingroup algorithms **/
0079 template<class Body>
0080 struct parallel_for_each_operator_selector {
0081 public:
0082     template<typename ItemArg, typename FeederArg>
0083     static auto call(const Body& body, ItemArg&& item, FeederArg*)
0084     -> decltype(tbb::detail::invoke(body, std::forward<ItemArg>(item)), void()) {
0085         #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
0086         // Suppression of Microsoft non-standard extension warnings
0087         #pragma warning (push)
0088         #pragma warning (disable: 4239)
0089         #endif
0090
0091         tbb::detail::invoke(body, std::forward<ItemArg>(item));
0092
0093         #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
0094         #pragma warning (pop)
0095         #endif
0096     }
0097
0098     template<typename ItemArg, typename FeederArg>
0099     static auto call(const Body& body, ItemArg&& item, FeederArg* feeder)
0100     -> decltype(tbb::detail::invoke(body, std::forward<ItemArg>(item), *feeder), void()) {
0101         #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
0102         // Suppression of Microsoft non-standard extension warnings
0103         #pragma warning (push)
0104         #pragma warning (disable: 4239)
0105         #endif
0106         __TBB_ASSERT(feeder, "Feeder was not created but should be");
0107
0108         tbb::detail::invoke(body, std::forward<ItemArg>(item), *feeder);
0109
0110         #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
0111         #pragma warning (pop)
0112         #endif
0113     }
0114 };
0115
0116 template<typename Body, typename Item>
0117 struct feeder_item_task: public task {
0118     using feeder_type = feeder_impl<Body, Item>;
0119
0120     template <typename ItemType>
0121     feeder_item_task(ItemType&& input_item, feeder_type& feeder, small_object_allocator& alloc, wait_tree_vertex_interface& wait_vertex) :
0122         item(std::forward<ItemType>(input_item)),
0123         my_feeder(feeder),
0124         my_allocator(alloc),
0125         m_wait_tree_vertex(r1::get_thread_reference_vertex(&wait_vertex))
0126     {
0127         m_wait_tree_vertex->reserve();
0128     }
0129
0130     void finalize(const execution_data& ed) {
0131         m_wait_tree_vertex->release();
0132         my_allocator.delete_object(this, ed);
0133     }
0134
0135     //! Hack for resolve ambiguity between calls to the body with and without moving the stored copy
0136     //! Executing body with moving the copy should have higher priority
0137     using first_priority = int;
0138     using second_priority = double;
0139
0140     template <typename BodyType, typename ItemType, typename FeederType>
0141     static auto call(const BodyType& call_body, ItemType& call_item, FeederType& call_feeder, first_priority)
0142     -> decltype(parallel_for_each_operator_selector<Body>::call(call_body, std::move(call_item), &call_feeder), void())
0143     {
0144         parallel_for_each_operator_selector<Body>::call(call_body, std::move(call_item), &call_feeder);
0145     }
0146
0147     template <typename BodyType, typename ItemType, typename FeederType>
0148     static void call(const BodyType& call_body, ItemType& call_item, FeederType& call_feeder, second_priority) {
0149         parallel_for_each_operator_selector<Body>::call(call_body, call_item, &call_feeder);
0150     }
0151
0152     task* execute(execution_data& ed) override {
0153         call(my_feeder.my_body, item, my_feeder, first_priority{});
0154         finalize(ed);
0155         return nullptr;
0156     }
0157
0158     task* cancel(execution_data& ed) override {
0159         finalize(ed);
0160         return nullptr;
0161     }
0162
0163     Item item;
0164     feeder_type& my_feeder;
0165     small_object_allocator my_allocator;
0166     wait_tree_vertex_interface* m_wait_tree_vertex;
0167 }; // class feeder_item_task
0168
0169 /** Implements new task adding procedure.
0170     @ingroup algorithms **/
0171 template<typename Body, typename Item>
0172 class feeder_impl : public feeder<Item> {
0173     // Avoiding use of copy constructor in a virtual method if the type does not support it
0174     void internal_add_copy_impl(std::true_type, const Item& item) {
0175         using feeder_task = feeder_item_task<Body, Item>;
0176         small_object_allocator alloc;
0177         auto task = alloc.new_object<feeder_task>(item, *this, alloc, my_wait_context);
0178
0179         spawn(*task, my_execution_context);
0180     }
0181
0182     void internal_add_copy_impl(std::false_type, const Item&) {
0183         __TBB_ASSERT(false, "Overloading for r-value reference doesn't work or it's not movable and not copyable object");
0184     }
0185
0186     void internal_add_copy(const Item& item) override {
0187         internal_add_copy_impl(typename std::is_copy_constructible<Item>::type(), item);
0188     }
0189
0190     void internal_add_move(Item&& item) override {
0191         using feeder_task = feeder_item_task<Body, Item>;
0192         small_object_allocator alloc{};
0193         auto task = alloc.new_object<feeder_task>(std::move(item), *this, alloc, my_wait_context);
0194
0195         spawn(*task, my_execution_context);
0196     }
0197 public:
0198     feeder_impl(const Body& body, wait_context_vertex& w_context, task_group_context &context)
0199       : my_body(body),
0200         my_wait_context(w_context)
0201       , my_execution_context(context)
0202     {}
0203
0204     const Body& my_body;
0205     wait_context_vertex& my_wait_context;
0206     task_group_context& my_execution_context;
0207 }; // class feeder_impl
0208
0209 /** Execute computation under one element of the range
0210     @ingroup algorithms **/
0211 template<typename Iterator, typename Body, typename Item>
0212 struct for_each_iteration_task: public task {
0213     using feeder_type = feeder_impl<Body, Item>;
0214
0215     for_each_iteration_task(Iterator input_item_ptr, const Body& body, feeder_impl<Body, Item>* feeder_ptr, wait_context& wait_context) :
0216         item_ptr(input_item_ptr), my_body(body), my_feeder_ptr(feeder_ptr), parent_wait_context(wait_context)
0217     {}
0218
0219     void finalize() {
0220         parent_wait_context.release();
0221     }
0222
0223     task* execute(execution_data&) override {
0224         parallel_for_each_operator_selector<Body>::call(my_body, *item_ptr, my_feeder_ptr);
0225         finalize();
0226         return nullptr;
0227     }
0228
0229     task* cancel(execution_data&) override {
0230         finalize();
0231         return nullptr;
0232     }
0233
0234     Iterator item_ptr;
0235     const Body& my_body;
0236     feeder_impl<Body, Item>* my_feeder_ptr;
0237     wait_context& parent_wait_context;
0238 }; // class for_each_iteration_task
0239
0240 // Helper to get the type of the iterator to the internal sequence of copies
0241 // If the element can be passed to the body as an rvalue - this iterator should be move_iterator
0242 template <typename Body, typename Item, typename = void>
0243 struct input_iteration_task_iterator_helper {
0244     // For input iterators we pass const lvalue reference to the body
0245     // It is prohibited to take non-constant lvalue references for input iterators
0246     using type = const Item*;
0247 };
0248
0249 template <typename Body, typename Item>
0250 struct input_iteration_task_iterator_helper<Body, Item,
0251     tbb::detail::void_t<decltype(parallel_for_each_operator_selector<Body>::call(std::declval<const Body&>(),
0252                                                                                  std::declval<Item&&>(),
0253                                                                                  std::declval<feeder_impl<Body, Item>*>()))>>
0254 {
0255     using type = std::move_iterator<Item*>;
0256 };
0257
0258 /** Split one block task to several(max_block_size) iteration tasks for input iterators
0259     @ingroup algorithms **/
0260 template <typename Body, typename Item>
0261 struct input_block_handling_task : public task {
0262     static constexpr size_t max_block_size = 4;
0263
0264     using feeder_type = feeder_impl<Body, Item>;
0265     using iteration_task_iterator_type = typename input_iteration_task_iterator_helper<Body, Item>::type;
0266     using iteration_task = for_each_iteration_task<iteration_task_iterator_type, Body, Item>;
0267
0268     input_block_handling_task(wait_context_vertex& root_wait_context, task_group_context& e_context,
0269                               const Body& body, feeder_impl<Body, Item>* feeder_ptr, small_object_allocator& alloc)
0270         :my_size(0), my_wait_context(0), my_root_wait_context(root_wait_context),
0271          my_execution_context(e_context), my_allocator(alloc)
0272     {
0273         auto item_it = block_iteration_space.begin();
0274         for (auto* it = task_pool.begin(); it != task_pool.end(); ++it) {
0275             new (it) iteration_task(iteration_task_iterator_type(item_it++), body, feeder_ptr, my_wait_context);
0276         }
0277     }
0278
0279     void finalize(const execution_data& ed) {
0280         my_root_wait_context.release();
0281         my_allocator.delete_object(this, ed);
0282     }
0283
0284     task* execute(execution_data& ed) override {
0285         __TBB_ASSERT( my_size > 0, "Negative size was passed to task");
0286         for (std::size_t counter = 1; counter < my_size; ++counter) {
0287             my_wait_context.reserve();
0288             spawn(*(task_pool.begin() + counter), my_execution_context);
0289         }
0290         my_wait_context.reserve();
0291         execute_and_wait(*task_pool.begin(), my_execution_context,
0292                          my_wait_context,    my_execution_context);
0293
0294         // deallocate current task after children execution
0295         finalize(ed);
0296         return nullptr;
0297     }
0298
0299     task* cancel(execution_data& ed) override {
0300         finalize(ed);
0301         return nullptr;
0302     }
0303
0304     ~input_block_handling_task() {
0305         for(std::size_t counter = 0; counter < max_block_size; ++counter) {
0306             (task_pool.begin() + counter)->~iteration_task();
0307             if (counter < my_size) {
0308                 (block_iteration_space.begin() + counter)->~Item();
0309             }
0310         }
0311     }
0312
0313     aligned_space<Item, max_block_size> block_iteration_space;
0314     aligned_space<iteration_task, max_block_size> task_pool;
0315     std::size_t my_size;
0316     wait_context my_wait_context;
0317     wait_context_vertex& my_root_wait_context;
0318     task_group_context& my_execution_context;
0319     small_object_allocator my_allocator;
0320 }; // class input_block_handling_task
0321
0322 /** Split one block task to several(max_block_size) iteration tasks for forward iterators
0323     @ingroup algorithms **/
0324 template <typename Iterator, typename Body, typename Item>
0325 struct forward_block_handling_task : public task {
0326     static constexpr size_t max_block_size = 4;
0327
0328     using iteration_task = for_each_iteration_task<Iterator, Body, Item>;
0329
0330     forward_block_handling_task(Iterator first, std::size_t size,
0331                                 wait_context_vertex& w_context, task_group_context& e_context,
0332                                 const Body& body, feeder_impl<Body, Item>* feeder_ptr,
0333                                 small_object_allocator& alloc)
0334         : my_size(size), my_wait_context(0), my_root_wait_context(w_context),
0335           my_execution_context(e_context), my_allocator(alloc)
0336     {
0337         auto* task_it = task_pool.begin();
0338         for (std::size_t i = 0; i < size; i++) {
0339             new (task_it++) iteration_task(first, body, feeder_ptr, my_wait_context);
0340             ++first;
0341         }
0342     }
0343
0344     void finalize(const execution_data& ed) {
0345         my_root_wait_context.release();
0346         my_allocator.delete_object(this, ed);
0347     }
0348
0349     task* execute(execution_data& ed) override {
0350         __TBB_ASSERT( my_size > 0, "Negative size was passed to task");
0351         for(std::size_t counter = 1; counter < my_size; ++counter) {
0352             my_wait_context.reserve();
0353             spawn(*(task_pool.begin() + counter), my_execution_context);
0354         }
0355         my_wait_context.reserve();
0356         execute_and_wait(*task_pool.begin(), my_execution_context,
0357                          my_wait_context,    my_execution_context);
0358
0359         // deallocate current task after children execution
0360         finalize(ed);
0361         return nullptr;
0362     }
0363
0364     task* cancel(execution_data& ed) override {
0365         finalize(ed);
0366         return nullptr;
0367     }
0368
0369     ~forward_block_handling_task() {
0370         for(std::size_t counter = 0; counter < my_size; ++counter) {
0371             (task_pool.begin() + counter)->~iteration_task();
0372         }
0373     }
0374
0375     aligned_space<iteration_task, max_block_size> task_pool;
0376     std::size_t my_size;
0377     wait_context my_wait_context;
0378     wait_context_vertex& my_root_wait_context;
0379     task_group_context& my_execution_context;
0380     small_object_allocator my_allocator;
0381 }; // class forward_block_handling_task
0382
0383 /** Body for parallel_for algorithm.
0384   * Allows to redirect operations under random access iterators range to the parallel_for algorithm.
0385     @ingroup algorithms **/
0386 template <typename Iterator, typename Body, typename Item>
0387 class parallel_for_body_wrapper {
0388     Iterator my_first;
0389     const Body& my_body;
0390     feeder_impl<Body, Item>* my_feeder_ptr;
0391 public:
0392     parallel_for_body_wrapper(Iterator first, const Body& body, feeder_impl<Body, Item>* feeder_ptr)
0393         : my_first(first), my_body(body), my_feeder_ptr(feeder_ptr) {}
0394
0395     void operator()(tbb::blocked_range<std::size_t> range) const {
0396 #if __INTEL_COMPILER
0397 #pragma ivdep
0398 #endif
0399         for (std::size_t count = range.begin(); count != range.end(); count++) {
0400             parallel_for_each_operator_selector<Body>::call(my_body, *(my_first + count),
0401                                                             my_feeder_ptr);
0402         }
0403     }
0404 }; // class parallel_for_body_wrapper
0405
0406
0407 /** Helper for getting iterators tag including inherited custom tags
0408     @ingroup algorithms */
0409 template<typename It>
0410 using tag = typename std::iterator_traits<It>::iterator_category;
0411
0412 #if __TBB_CPP20_PRESENT
0413 template <typename It>
0414 struct move_iterator_dispatch_helper {
0415     using type = It;
0416 };
0417
0418 // Until C++23, std::move_iterator::iterator_concept always defines
0419 // to std::input_iterator_tag and hence std::forward_iterator concept
0420 // always evaluates to false, so std::move_iterator dispatch should be
0421 // made according to the base iterator type.
0422 template <typename It>
0423 struct move_iterator_dispatch_helper<std::move_iterator<It>> {
0424     using type = It;
0425 };
0426
0427 template <typename It>
0428 using iterator_tag_dispatch_impl =
0429     std::conditional_t<std::random_access_iterator<It>,
0430                        std::random_access_iterator_tag,
0431                        std::conditional_t<std::forward_iterator<It>,
0432                                           std::forward_iterator_tag,
0433                                           std::input_iterator_tag>>;
0434
0435 template <typename It>
0436 using iterator_tag_dispatch =
0437     iterator_tag_dispatch_impl<typename move_iterator_dispatch_helper<It>::type>;
0438
0439 #else
0440 template<typename It>
0441 using iterator_tag_dispatch = typename
0442     std::conditional<
0443         std::is_base_of<std::random_access_iterator_tag, tag<It>>::value,
0444         std::random_access_iterator_tag,
0445         typename std::conditional<
0446             std::is_base_of<std::forward_iterator_tag, tag<It>>::value,
0447             std::forward_iterator_tag,
0448             std::input_iterator_tag
0449         >::type
0450     >::type;
0451 #endif // __TBB_CPP20_PRESENT
0452
0453 template <typename Body, typename Iterator, typename Item>
0454 using feeder_is_required = tbb::detail::void_t<decltype(tbb::detail::invoke(std::declval<const Body>(),
0455                                                                             std::declval<typename std::iterator_traits<Iterator>::reference>(),
0456                                                                             std::declval<feeder<Item>&>()))>;
0457
0458 // Creates feeder object only if the body can accept it
0459 template <typename Iterator, typename Body, typename Item, typename = void>
0460 struct feeder_holder {
0461     feeder_holder( wait_context_vertex&, task_group_context&, const Body& ) {}
0462
0463     feeder_impl<Body, Item>* feeder_ptr() { return nullptr; }
0464 }; // class feeder_holder
0465
0466 template <typename Iterator, typename Body, typename Item>
0467 class feeder_holder<Iterator, Body, Item, feeder_is_required<Body, Iterator, Item>> {
0468 public:
0469     feeder_holder( wait_context_vertex& w_context, task_group_context& context, const Body& body )
0470         : my_feeder(body, w_context, context) {}
0471
0472     feeder_impl<Body, Item>* feeder_ptr() { return &my_feeder; }
0473 private:
0474     feeder_impl<Body, Item> my_feeder;
0475 }; // class feeder_holder
0476
0477 template <typename Iterator, typename Body, typename Item>
0478 class for_each_root_task_base : public task {
0479 public:
0480     for_each_root_task_base(Iterator first, Iterator last, const Body& body, wait_context_vertex& w_context, task_group_context& e_context)
0481         : my_first(first), my_last(last), my_wait_context(w_context), my_execution_context(e_context),
0482           my_body(body), my_feeder_holder(my_wait_context, my_execution_context, my_body)
0483     {
0484         my_wait_context.reserve();
0485     }
0486 private:
0487     task* cancel(execution_data&) override {
0488         this->my_wait_context.release();
0489         return nullptr;
0490     }
0491 protected:
0492     Iterator my_first;
0493     Iterator my_last;
0494     wait_context_vertex& my_wait_context;
0495     task_group_context& my_execution_context;
0496     const Body& my_body;
0497     feeder_holder<Iterator, Body, Item> my_feeder_holder;
0498 }; // class for_each_root_task_base
0499
0500 /** parallel_for_each algorithm root task - most generic version
0501   * Splits input range to blocks
0502     @ingroup algorithms **/
0503 template <typename Iterator, typename Body, typename Item, typename IteratorTag = iterator_tag_dispatch<Iterator>>
0504 class for_each_root_task : public for_each_root_task_base<Iterator, Body, Item>
0505 {
0506     using base_type = for_each_root_task_base<Iterator, Body, Item>;
0507 public:
0508     using base_type::base_type;
0509 private:
0510     task* execute(execution_data& ed) override {
0511         using block_handling_type = input_block_handling_task<Body, Item>;
0512
0513         if (this->my_first == this->my_last) {
0514             this->my_wait_context.release();
0515             return nullptr;
0516         }
0517
0518         this->my_wait_context.reserve();
0519         small_object_allocator alloc{};
0520         auto block_handling_task = alloc.new_object<block_handling_type>(ed, this->my_wait_context, this->my_execution_context,
0521                                                                          this->my_body, this->my_feeder_holder.feeder_ptr(),
0522                                                                          alloc);
0523
0524         auto* block_iterator = block_handling_task->block_iteration_space.begin();
0525         for (; !(this->my_first == this->my_last) && block_handling_task->my_size < block_handling_type::max_block_size; ++this->my_first) {
0526             // Move semantics are automatically used when supported by the iterator
0527             new (block_iterator++) Item(*this->my_first);
0528             ++block_handling_task->my_size;
0529         }
0530
0531         // Do not access this after spawn to avoid races
0532         spawn(*this, this->my_execution_context);
0533         return block_handling_task;
0534     }
0535 }; // class for_each_root_task - most generic implementation
0536
0537 /** parallel_for_each algorithm root task - forward iterator based specialization
0538   * Splits input range to blocks
0539     @ingroup algorithms **/
0540 template <typename Iterator, typename Body, typename Item>
0541 class for_each_root_task<Iterator, Body, Item, std::forward_iterator_tag>
0542     : public for_each_root_task_base<Iterator, Body, Item>
0543 {
0544     using base_type = for_each_root_task_base<Iterator, Body, Item>;
0545 public:
0546     using base_type::base_type;
0547 private:
0548     task* execute(execution_data& ed) override {
0549         using block_handling_type = forward_block_handling_task<Iterator, Body, Item>;
0550         if (this->my_first == this->my_last) {
0551             this->my_wait_context.release();
0552             return nullptr;
0553         }
0554
0555         std::size_t block_size{0};
0556         Iterator first_block_element = this->my_first;
0557         for (; !(this->my_first == this->my_last) && block_size < block_handling_type::max_block_size; ++this->my_first) {
0558             ++block_size;
0559         }
0560
0561         this->my_wait_context.reserve();
0562         small_object_allocator alloc{};
0563         auto block_handling_task = alloc.new_object<block_handling_type>(ed, first_block_element, block_size,
0564                                                                          this->my_wait_context, this->my_execution_context,
0565                                                                          this->my_body, this->my_feeder_holder.feeder_ptr(), alloc);
0566
0567         // Do not access this after spawn to avoid races
0568         spawn(*this, this->my_execution_context);
0569         return block_handling_task;
0570     }
0571 }; // class for_each_root_task - forward iterator based specialization
0572
0573 /** parallel_for_each algorithm root task - random access iterator based specialization
0574   * Splits input range to blocks
0575     @ingroup algorithms **/
0576 template <typename Iterator, typename Body, typename Item>
0577 class for_each_root_task<Iterator, Body, Item, std::random_access_iterator_tag>
0578     : public for_each_root_task_base<Iterator, Body, Item>
0579 {
0580     using base_type = for_each_root_task_base<Iterator, Body, Item>;
0581 public:
0582     using base_type::base_type;
0583 private:
0584     task* execute(execution_data&) override {
0585         tbb::parallel_for(
0586             tbb::blocked_range<std::size_t>(0, std::distance(this->my_first, this->my_last)),
0587             parallel_for_body_wrapper<Iterator, Body, Item>(this->my_first, this->my_body, this->my_feeder_holder.feeder_ptr())
0588             , this->my_execution_context
0589         );
0590
0591         this->my_wait_context.release();
0592         return nullptr;
0593     }
0594 }; // class for_each_root_task - random access iterator based specialization
0595
0596 /** Helper for getting item type. If item type can be deduced from feeder - got it from feeder,
0597     if feeder is generic - got item type from range.
0598     @ingroup algorithms */
0599 template<typename Body, typename Item, typename FeederArg>
0600 auto feeder_argument_parser(void (Body::*)(Item, feeder<FeederArg>&) const) -> FeederArg;
0601
0602 template<typename Body, typename>
0603 decltype(feeder_argument_parser<Body>(&Body::operator())) get_item_type_impl(int); // for (T, feeder<T>)
0604 template<typename Body, typename Item> Item get_item_type_impl(...); // stub
0605
0606 template <typename Body, typename Item>
0607 using get_item_type = decltype(get_item_type_impl<Body, Item>(0));
0608
0609 #if __TBB_CPP20_CONCEPTS_PRESENT
0610 template <typename Body, typename ItemType>
0611 using feeder_item_type = std::remove_cvref_t<get_item_type<Body, ItemType>>;
0612
0613 template <typename Body, typename Iterator>
0614 concept parallel_for_each_iterator_body =
0615     parallel_for_each_body<Body, iterator_reference_type<Iterator>, feeder_item_type<Body, iterator_reference_type<Iterator>>>;
0616
0617 template <typename Body, typename Range>
0618 concept parallel_for_each_range_body =
0619     parallel_for_each_body<Body, range_reference_type<Range>, feeder_item_type<Body, range_reference_type<Range>>>;
0620 #endif
0621
0622 /** Implements parallel iteration over a range.
0623     @ingroup algorithms */
0624 template<typename Iterator, typename Body>
0625 void run_parallel_for_each( Iterator first, Iterator last, const Body& body, task_group_context& context)
0626 {
0627     if (!(first == last)) {
0628         using ItemType = get_item_type<Body, typename std::iterator_traits<Iterator>::value_type>;
0629         wait_context_vertex w_context(0);
0630
0631         for_each_root_task<Iterator, Body, ItemType> root_task(first, last, body, w_context, context);
0632
0633         execute_and_wait(root_task, context, w_context.get_context(), context);
0634     }
0635 }
0636
0637 /** \page parallel_for_each_body_req Requirements on parallel_for_each body
0638     Class \c Body implementing the concept of parallel_for_each body must define:
0639     - \code
0640         B::operator()(
0641                 cv_item_type item,
0642                 feeder<item_type>& feeder
0643         ) const
0644
0645         OR
0646
0647         B::operator()( cv_item_type& item ) const
0648       \endcode                                               Process item.
0649                                                              May be invoked concurrently  for the same \c this but different \c item.
0650
0651     - \code item_type( const item_type& ) \endcode
0652                                                              Copy a work item.
0653     - \code ~item_type() \endcode                            Destroy a work item
0654 **/
0655
0656 /** \name parallel_for_each
0657     See also requirements on \ref parallel_for_each_body_req "parallel_for_each Body". **/
0658 //@{
0659 //! Parallel iteration over a range, with optional addition of more work.
0660 /** @ingroup algorithms */
0661 template<typename Iterator, typename Body>
0662     __TBB_requires(std::input_iterator<Iterator> && parallel_for_each_iterator_body<Body, Iterator>)
0663 void parallel_for_each(Iterator first, Iterator last, const Body& body) {
0664     task_group_context context(PARALLEL_FOR_EACH);
0665     run_parallel_for_each<Iterator, Body>(first, last, body, context);
0666 }
0667
0668 template<typename Range, typename Body>
0669     __TBB_requires(container_based_sequence<Range, std::input_iterator_tag> && parallel_for_each_range_body<Body, Range>)
0670 void parallel_for_each(Range& rng, const Body& body) {
0671     parallel_for_each(std::begin(rng), std::end(rng), body);
0672 }
0673
0674 template<typename Range, typename Body>
0675     __TBB_requires(container_based_sequence<Range, std::input_iterator_tag> && parallel_for_each_range_body<Body, Range>)
0676 void parallel_for_each(const Range& rng, const Body& body) {
0677     parallel_for_each(std::begin(rng), std::end(rng), body);
0678 }
0679
0680 //! Parallel iteration over a range, with optional addition of more work and user-supplied context
0681 /** @ingroup algorithms */
0682 template<typename Iterator, typename Body>
0683     __TBB_requires(std::input_iterator<Iterator> && parallel_for_each_iterator_body<Body, Iterator>)
0684 void parallel_for_each(Iterator first, Iterator last, const Body& body, task_group_context& context) {
0685     run_parallel_for_each<Iterator, Body>(first, last, body, context);
0686 }
0687
0688 template<typename Range, typename Body>
0689     __TBB_requires(container_based_sequence<Range, std::input_iterator_tag> && parallel_for_each_range_body<Body, Range>)
0690 void parallel_for_each(Range& rng, const Body& body, task_group_context& context) {
0691     parallel_for_each(std::begin(rng), std::end(rng), body, context);
0692 }
0693
0694 template<typename Range, typename Body>
0695     __TBB_requires(container_based_sequence<Range, std::input_iterator_tag> && parallel_for_each_range_body<Body, Range>)
0696 void parallel_for_each(const Range& rng, const Body& body, task_group_context& context) {
0697     parallel_for_each(std::begin(rng), std::end(rng), body, context);
0698 }
0699
0700 } // namespace d2
0701 } // namespace detail
0702 //! @endcond
0703 //@}
0704
0705 inline namespace v1 {
0706 using detail::d2::parallel_for_each;
0707 using detail::d1::feeder;
0708 } // namespace v1
0709
0710 } // namespace tbb
0711
0712 #endif /* __TBB_parallel_for_each_H */