Back to home page

EIC code displayed by LXR

 
 

    


Warning, file /include/oneapi/tbb/parallel_reduce.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 /*
0002     Copyright (c) 2005-2023 Intel Corporation
0003 
0004     Licensed under the Apache License, Version 2.0 (the "License");
0005     you may not use this file except in compliance with the License.
0006     You may obtain a copy of the License at
0007 
0008         http://www.apache.org/licenses/LICENSE-2.0
0009 
0010     Unless required by applicable law or agreed to in writing, software
0011     distributed under the License is distributed on an "AS IS" BASIS,
0012     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013     See the License for the specific language governing permissions and
0014     limitations under the License.
0015 */
0016 
0017 #ifndef __TBB_parallel_reduce_H
0018 #define __TBB_parallel_reduce_H
0019 
0020 #include <new>
0021 #include "detail/_namespace_injection.h"
0022 #include "detail/_task.h"
0023 #include "detail/_aligned_space.h"
0024 #include "detail/_small_object_pool.h"
0025 #include "detail/_range_common.h"
0026 
0027 #include "task_group.h" // task_group_context
0028 #include "partitioner.h"
0029 #include "profiling.h"
0030 
0031 namespace tbb {
0032 namespace detail {
0033 #if __TBB_CPP20_CONCEPTS_PRESENT
0034 inline namespace d0 {
0035 
0036 template <typename Body, typename Range>
0037 concept parallel_reduce_body = splittable<Body> &&
0038                                requires( Body& body, const Range& range, Body& rhs ) {
0039                                    body(range);
0040                                    body.join(rhs);
0041                                };
0042 
0043 template <typename Function, typename Range, typename Value>
0044 concept parallel_reduce_function = std::invocable<const std::remove_reference_t<Function>&,
0045                                                   const Range&, const Value&> &&
0046                                    std::convertible_to<std::invoke_result_t<const std::remove_reference_t<Function>&,
0047                                                                             const Range&, const Value&>,
0048                                                         Value>;
0049 
0050 template <typename Combine, typename Value>
0051 concept parallel_reduce_combine = std::invocable<const std::remove_reference_t<Combine>&,
0052                                                  const Value&, const Value&> &&
0053                                   std::convertible_to<std::invoke_result_t<const std::remove_reference_t<Combine>&,
0054                                                                            const Value&, const Value&>,
0055                                                       Value>;
0056 
0057 } // namespace d0
0058 #endif // __TBB_CPP20_CONCEPTS_PRESENT
0059 namespace d1 {
0060 
0061 //! Tree node type for parallel_reduce.
0062 /** @ingroup algorithms */
0063 //TODO: consider folding tree via bypass execution(instead of manual folding)
0064 // for better cancellation and critical tasks handling (performance measurements required).
0065 template<typename Body>
0066 struct reduction_tree_node : public tree_node {
0067     tbb::detail::aligned_space<Body> zombie_space;
0068     Body& left_body;
0069     bool has_right_zombie{false};
0070 
0071     reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) :
0072         tree_node{parent, ref_count, alloc},
0073         left_body(input_left_body) /* gcc4.8 bug - braced-initialization doesn't work for class members of reference type */
0074     {}
0075 
0076     void join(task_group_context* context) {
0077         if (has_right_zombie && !context->is_group_execution_cancelled())
0078             left_body.join(*zombie_space.begin());
0079     }
0080 
0081     ~reduction_tree_node() {
0082         if( has_right_zombie ) zombie_space.begin()->~Body();
0083     }
0084 };
0085 
0086 //! Task type used to split the work of parallel_reduce.
0087 /** @ingroup algorithms */
0088 template<typename Range, typename Body, typename Partitioner>
0089 struct start_reduce : public task {
0090     Range my_range;
0091     Body* my_body;
0092     node* my_parent;
0093 
0094     typename Partitioner::task_partition_type my_partition;
0095     small_object_allocator my_allocator;
0096     bool is_right_child;
0097 
0098     task* execute(execution_data&) override;
0099     task* cancel(execution_data&) override;
0100     void finalize(const execution_data&);
0101 
0102     using tree_node_type = reduction_tree_node<Body>;
0103 
0104     //! Constructor reduce root task.
0105     start_reduce( const Range& range, Body& body, Partitioner& partitioner, small_object_allocator& alloc ) :
0106         my_range(range),
0107         my_body(&body),
0108         my_parent(nullptr),
0109         my_partition(partitioner),
0110         my_allocator(alloc),
0111         is_right_child(false) {}
0112     //! Splitting constructor used to generate children.
0113     /** parent_ becomes left child. Newly constructed object is right child. */
0114     start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) :
0115         my_range(parent_.my_range, get_range_split_object<Range>(split_obj)),
0116         my_body(parent_.my_body),
0117         my_parent(nullptr),
0118         my_partition(parent_.my_partition, split_obj),
0119         my_allocator(alloc),
0120         is_right_child(true)
0121     {
0122         parent_.is_right_child = false;
0123     }
0124     //! Construct right child from the given range as response to the demand.
0125     /** parent_ remains left child. Newly constructed object is right child. */
0126     start_reduce( start_reduce& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) :
0127         my_range(r),
0128         my_body(parent_.my_body),
0129         my_parent(nullptr),
0130         my_partition(parent_.my_partition, split()),
0131         my_allocator(alloc),
0132         is_right_child(true)
0133     {
0134         my_partition.align_depth( d );
0135         parent_.is_right_child = false;
0136     }
0137     static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) {
0138         if ( !range.empty() ) {
0139             wait_node wn;
0140             small_object_allocator alloc{};
0141             auto reduce_task = alloc.new_object<start_reduce>(range, body, partitioner, alloc);
0142             reduce_task->my_parent = &wn;
0143             execute_and_wait(*reduce_task, context, wn.m_wait, context);
0144         }
0145     }
0146     static void run(const Range& range, Body& body, Partitioner& partitioner) {
0147         // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
0148         // and allows users to handle exceptions safely by wrapping parallel_reduce in the try-block.
0149         task_group_context context(PARALLEL_REDUCE);
0150         run(range, body, partitioner, context);
0151     }
0152     //! Run body for range, serves as callback for partitioner
0153     void run_body( Range &r ) {
0154         tbb::detail::invoke(*my_body, r);
0155     }
0156 
0157     //! spawn right task, serves as callback for partitioner
0158     void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) {
0159         offer_work_impl(ed, *this, split_obj);
0160     }
0161     //! spawn right task, serves as callback for partitioner
0162     void offer_work(const Range& r, depth_t d, execution_data& ed) {
0163         offer_work_impl(ed, *this, r, d);
0164     }
0165 
0166 private:
0167     template <typename... Args>
0168     void offer_work_impl(execution_data& ed, Args&&... args) {
0169         small_object_allocator alloc{};
0170         // New right child
0171         auto right_child = alloc.new_object<start_reduce>(ed, std::forward<Args>(args)..., alloc);
0172 
0173         // New root node as a continuation and ref count. Left and right child attach to the new parent.
0174         right_child->my_parent = my_parent = alloc.new_object<tree_node_type>(ed, my_parent, 2, *my_body, alloc);
0175 
0176         // Spawn the right sibling
0177         right_child->spawn_self(ed);
0178     }
0179 
0180     void spawn_self(execution_data& ed) {
0181         my_partition.spawn_task(*this, *context(ed));
0182     }
0183 };
0184 
0185 //! fold the tree and deallocate the task
0186 template<typename Range, typename Body, typename Partitioner>
0187 void start_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) {
0188     // Get the current parent and wait object before an object destruction
0189     node* parent = my_parent;
0190     auto allocator = my_allocator;
0191     // Task execution finished - destroy it
0192     this->~start_reduce();
0193     // Unwind the tree decrementing the parent`s reference count
0194     fold_tree<tree_node_type>(parent, ed);
0195     allocator.deallocate(this, ed);
0196 }
0197 
0198 //! Execute parallel_reduce task
0199 template<typename Range, typename Body, typename Partitioner>
0200 task* start_reduce<Range,Body,Partitioner>::execute(execution_data& ed) {
0201     if (!is_same_affinity(ed)) {
0202         my_partition.note_affinity(execution_slot(ed));
0203     }
0204     my_partition.check_being_stolen(*this, ed);
0205 
0206     // The acquire barrier synchronizes the data pointed with my_body if the left
0207     // task has already finished.
0208     __TBB_ASSERT(my_parent, nullptr);
0209     if( is_right_child && my_parent->m_ref_count.load(std::memory_order_acquire) == 2 ) {
0210         tree_node_type* parent_ptr = static_cast<tree_node_type*>(my_parent);
0211         my_body = static_cast<Body*>(new( parent_ptr->zombie_space.begin() ) Body(*my_body, split()));
0212         parent_ptr->has_right_zombie = true;
0213     }
0214     __TBB_ASSERT(my_body != nullptr, "Incorrect body value");
0215 
0216     my_partition.execute(*this, my_range, ed);
0217 
0218     finalize(ed);
0219     return nullptr;
0220 }
0221 
0222 //! Cancel parallel_reduce task
0223 template<typename Range, typename Body, typename Partitioner>
0224 task* start_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) {
0225     finalize(ed);
0226     return nullptr;
0227 }
0228 
0229 //! Tree node type for parallel_deterministic_reduce.
0230 /** @ingroup algorithms */
0231 template<typename Body>
0232 struct deterministic_reduction_tree_node : public tree_node {
0233     Body right_body;
0234     Body& left_body;
0235 
0236     deterministic_reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) :
0237         tree_node{parent, ref_count, alloc},
0238         right_body{input_left_body, detail::split()},
0239         left_body(input_left_body)
0240     {}
0241 
0242     void join(task_group_context* context) {
0243         if (!context->is_group_execution_cancelled())
0244             left_body.join(right_body);
0245     }
0246 };
0247 
0248 //! Task type used to split the work of parallel_deterministic_reduce.
0249 /** @ingroup algorithms */
0250 template<typename Range, typename Body, typename Partitioner>
0251 struct start_deterministic_reduce : public task {
0252     Range my_range;
0253     Body& my_body;
0254     node* my_parent;
0255 
0256     typename Partitioner::task_partition_type my_partition;
0257     small_object_allocator my_allocator;
0258 
0259     task* execute(execution_data&) override;
0260     task* cancel(execution_data&) override;
0261     void finalize(const execution_data&);
0262 
0263     using tree_node_type = deterministic_reduction_tree_node<Body>;
0264 
0265     //! Constructor deterministic_reduce root task.
0266     start_deterministic_reduce( const Range& range, Partitioner& partitioner, Body& body, small_object_allocator& alloc ) :
0267         my_range(range),
0268         my_body(body),
0269         my_parent(nullptr),
0270         my_partition(partitioner),
0271         my_allocator(alloc) {}
0272     //! Splitting constructor used to generate children.
0273     /** parent_ becomes left child.  Newly constructed object is right child. */
0274     start_deterministic_reduce( start_deterministic_reduce& parent_, typename Partitioner::split_type& split_obj, Body& body,
0275                                 small_object_allocator& alloc ) :
0276         my_range(parent_.my_range, get_range_split_object<Range>(split_obj)),
0277         my_body(body),
0278         my_parent(nullptr),
0279         my_partition(parent_.my_partition, split_obj),
0280         my_allocator(alloc) {}
0281     static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) {
0282         if ( !range.empty() ) {
0283             wait_node wn;
0284             small_object_allocator alloc{};
0285             auto deterministic_reduce_task =
0286                 alloc.new_object<start_deterministic_reduce>(range, partitioner, body, alloc);
0287             deterministic_reduce_task->my_parent = &wn;
0288             execute_and_wait(*deterministic_reduce_task, context, wn.m_wait, context);
0289         }
0290     }
0291     static void run(const Range& range, Body& body, Partitioner& partitioner) {
0292         // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
0293         // and allows users to handle exceptions safely by wrapping parallel_deterministic_reduce
0294         // in the try-block.
0295         task_group_context context(PARALLEL_REDUCE);
0296         run(range, body, partitioner, context);
0297     }
0298     //! Run body for range, serves as callback for partitioner
0299     void run_body( Range &r ) {
0300         tbb::detail::invoke(my_body, r);
0301     }
0302     //! Spawn right task, serves as callback for partitioner
0303     void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) {
0304         offer_work_impl(ed, *this, split_obj);
0305     }
0306 private:
0307     template <typename... Args>
0308     void offer_work_impl(execution_data& ed, Args&&... args) {
0309         small_object_allocator alloc{};
0310         // New root node as a continuation and ref count. Left and right child attach to the new parent. Split the body.
0311         auto new_tree_node = alloc.new_object<tree_node_type>(ed, my_parent, 2, my_body, alloc);
0312 
0313         // New right child
0314         auto right_child = alloc.new_object<start_deterministic_reduce>(ed, std::forward<Args>(args)..., new_tree_node->right_body, alloc);
0315 
0316         right_child->my_parent = my_parent = new_tree_node;
0317 
0318         // Spawn the right sibling
0319         right_child->spawn_self(ed);
0320     }
0321 
0322     void spawn_self(execution_data& ed) {
0323         my_partition.spawn_task(*this, *context(ed));
0324     }
0325 };
0326 
0327 //! Fold the tree and deallocate the task
0328 template<typename Range, typename Body, typename Partitioner>
0329 void start_deterministic_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) {
0330     // Get the current parent and wait object before an object destruction
0331     node* parent = my_parent;
0332 
0333     auto allocator = my_allocator;
0334     // Task execution finished - destroy it
0335     this->~start_deterministic_reduce();
0336     // Unwind the tree decrementing the parent`s reference count
0337     fold_tree<tree_node_type>(parent, ed);
0338     allocator.deallocate(this, ed);
0339 }
0340 
0341 //! Execute parallel_deterministic_reduce task
0342 template<typename Range, typename Body, typename Partitioner>
0343 task* start_deterministic_reduce<Range,Body,Partitioner>::execute(execution_data& ed) {
0344     if (!is_same_affinity(ed)) {
0345         my_partition.note_affinity(execution_slot(ed));
0346     }
0347     my_partition.check_being_stolen(*this, ed);
0348 
0349     my_partition.execute(*this, my_range, ed);
0350 
0351     finalize(ed);
0352     return nullptr;
0353 }
0354 
0355 //! Cancel parallel_deterministic_reduce task
0356 template<typename Range, typename Body, typename Partitioner>
0357 task* start_deterministic_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) {
0358     finalize(ed);
0359     return nullptr;
0360 }
0361 
0362 
0363 //! Auxiliary class for parallel_reduce; for internal use only.
0364 /** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body"
0365     using given \ref parallel_reduce_lambda_req "anonymous function objects".
0366  **/
0367 /** @ingroup algorithms */
0368 template<typename Range, typename Value, typename RealBody, typename Reduction>
0369 class lambda_reduce_body {
0370 //TODO: decide if my_real_body, my_reduction, and my_identity_element should be copied or referenced
0371 //       (might require some performance measurements)
0372 
0373     const Value&     my_identity_element;
0374     const RealBody&  my_real_body;
0375     const Reduction& my_reduction;
0376     Value            my_value;
0377     lambda_reduce_body& operator= ( const lambda_reduce_body& other );
0378 public:
0379     lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
0380         : my_identity_element(identity)
0381         , my_real_body(body)
0382         , my_reduction(reduction)
0383         , my_value(identity)
0384     { }
0385     lambda_reduce_body( const lambda_reduce_body& other ) = default;
0386     lambda_reduce_body( lambda_reduce_body& other, tbb::split )
0387         : my_identity_element(other.my_identity_element)
0388         , my_real_body(other.my_real_body)
0389         , my_reduction(other.my_reduction)
0390         , my_value(other.my_identity_element)
0391     { }
0392     void operator()(Range& range) {
0393         my_value = tbb::detail::invoke(my_real_body, range, const_cast<const Value&>(my_value));
0394     }
0395     void join( lambda_reduce_body& rhs ) {
0396         my_value = tbb::detail::invoke(my_reduction, const_cast<const Value&>(my_value),
0397                                                      const_cast<const Value&>(rhs.my_value));
0398     }
0399     Value result() const {
0400         return my_value;
0401     }
0402 };
0403 
0404 
0405 // Requirements on Range concept are documented in blocked_range.h
0406 
0407 /** \page parallel_reduce_body_req Requirements on parallel_reduce body
0408     Class \c Body implementing the concept of parallel_reduce body must define:
0409     - \code Body::Body( Body&, split ); \endcode        Splitting constructor.
0410                                                         Must be able to run concurrently with operator() and method \c join
0411     - \code Body::~Body(); \endcode                     Destructor
0412     - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r
0413                                                         and accumulating the result
0414     - \code void Body::join( Body& b ); \endcode        Join results.
0415                                                         The result in \c b should be merged into the result of \c this
0416 **/
0417 
0418 /** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions)
0419     TO BE DOCUMENTED
0420 **/
0421 
0422 /** \name parallel_reduce
0423     See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/
0424 //@{
0425 
0426 //! Parallel iteration with reduction and default partitioner.
0427 /** @ingroup algorithms **/
0428 template<typename Range, typename Body>
0429     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0430 void parallel_reduce( const Range& range, Body& body ) {
0431     start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
0432 }
0433 
0434 //! Parallel iteration with reduction and simple_partitioner
0435 /** @ingroup algorithms **/
0436 template<typename Range, typename Body>
0437     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0438 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
0439     start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
0440 }
0441 
0442 //! Parallel iteration with reduction and auto_partitioner
0443 /** @ingroup algorithms **/
0444 template<typename Range, typename Body>
0445     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0446 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
0447     start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
0448 }
0449 
0450 //! Parallel iteration with reduction and static_partitioner
0451 /** @ingroup algorithms **/
0452 template<typename Range, typename Body>
0453     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0454 void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) {
0455     start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner );
0456 }
0457 
0458 //! Parallel iteration with reduction and affinity_partitioner
0459 /** @ingroup algorithms **/
0460 template<typename Range, typename Body>
0461     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0462 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
0463     start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
0464 }
0465 
0466 //! Parallel iteration with reduction, default partitioner and user-supplied context.
0467 /** @ingroup algorithms **/
0468 template<typename Range, typename Body>
0469     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0470 void parallel_reduce( const Range& range, Body& body, task_group_context& context ) {
0471     start_reduce<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER(), context );
0472 }
0473 
0474 //! Parallel iteration with reduction, simple partitioner and user-supplied context.
0475 /** @ingroup algorithms **/
0476 template<typename Range, typename Body>
0477     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0478 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
0479     start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
0480 }
0481 
0482 //! Parallel iteration with reduction, auto_partitioner and user-supplied context
0483 /** @ingroup algorithms **/
0484 template<typename Range, typename Body>
0485     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0486 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
0487     start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
0488 }
0489 
0490 //! Parallel iteration with reduction, static_partitioner and user-supplied context
0491 /** @ingroup algorithms **/
0492 template<typename Range, typename Body>
0493     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0494 void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) {
0495     start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context );
0496 }
0497 
0498 //! Parallel iteration with reduction, affinity_partitioner and user-supplied context
0499 /** @ingroup algorithms **/
0500 template<typename Range, typename Body>
0501     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0502 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
0503     start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
0504 }
0505 /** parallel_reduce overloads that work with anonymous function objects
0506     (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
0507 
0508 //! Parallel iteration with reduction and default partitioner.
0509 /** @ingroup algorithms **/
0510 template<typename Range, typename Value, typename RealBody, typename Reduction>
0511     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0512                    parallel_reduce_combine<Reduction, Value>)
0513 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
0514     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0515     start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
0516                           ::run(range, body, __TBB_DEFAULT_PARTITIONER() );
0517     return body.result();
0518 }
0519 
0520 //! Parallel iteration with reduction and simple_partitioner.
0521 /** @ingroup algorithms **/
0522 template<typename Range, typename Value, typename RealBody, typename Reduction>
0523     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0524                    parallel_reduce_combine<Reduction, Value>)
0525 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0526                        const simple_partitioner& partitioner ) {
0527     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0528     start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
0529                           ::run(range, body, partitioner );
0530     return body.result();
0531 }
0532 
0533 //! Parallel iteration with reduction and auto_partitioner
0534 /** @ingroup algorithms **/
0535 template<typename Range, typename Value, typename RealBody, typename Reduction>
0536     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0537                    parallel_reduce_combine<Reduction, Value>)
0538 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0539                        const auto_partitioner& partitioner ) {
0540     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0541     start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
0542                           ::run( range, body, partitioner );
0543     return body.result();
0544 }
0545 
0546 //! Parallel iteration with reduction and static_partitioner
0547 /** @ingroup algorithms **/
0548 template<typename Range, typename Value, typename RealBody, typename Reduction>
0549     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0550                    parallel_reduce_combine<Reduction, Value>)
0551 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0552                        const static_partitioner& partitioner ) {
0553     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0554     start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
0555                                         ::run( range, body, partitioner );
0556     return body.result();
0557 }
0558 
0559 //! Parallel iteration with reduction and affinity_partitioner
0560 /** @ingroup algorithms **/
0561 template<typename Range, typename Value, typename RealBody, typename Reduction>
0562     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0563                    parallel_reduce_combine<Reduction, Value>)
0564 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0565                        affinity_partitioner& partitioner ) {
0566     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0567     start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
0568                                         ::run( range, body, partitioner );
0569     return body.result();
0570 }
0571 
0572 //! Parallel iteration with reduction, default partitioner and user-supplied context.
0573 /** @ingroup algorithms **/
0574 template<typename Range, typename Value, typename RealBody, typename Reduction>
0575     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0576                    parallel_reduce_combine<Reduction, Value>)
0577 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0578                        task_group_context& context ) {
0579     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0580     start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
0581                           ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context );
0582     return body.result();
0583 }
0584 
0585 //! Parallel iteration with reduction, simple partitioner and user-supplied context.
0586 /** @ingroup algorithms **/
0587 template<typename Range, typename Value, typename RealBody, typename Reduction>
0588     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0589                    parallel_reduce_combine<Reduction, Value>)
0590 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0591                        const simple_partitioner& partitioner, task_group_context& context ) {
0592     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0593     start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
0594                           ::run( range, body, partitioner, context );
0595     return body.result();
0596 }
0597 
0598 //! Parallel iteration with reduction, auto_partitioner and user-supplied context
0599 /** @ingroup algorithms **/
0600 template<typename Range, typename Value, typename RealBody, typename Reduction>
0601     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0602                    parallel_reduce_combine<Reduction, Value>)
0603 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0604                        const auto_partitioner& partitioner, task_group_context& context ) {
0605     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0606     start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
0607                           ::run( range, body, partitioner, context );
0608     return body.result();
0609 }
0610 
0611 //! Parallel iteration with reduction, static_partitioner and user-supplied context
0612 /** @ingroup algorithms **/
0613 template<typename Range, typename Value, typename RealBody, typename Reduction>
0614     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0615                    parallel_reduce_combine<Reduction, Value>)
0616 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0617                        const static_partitioner& partitioner, task_group_context& context ) {
0618     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0619     start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
0620                                         ::run( range, body, partitioner, context );
0621     return body.result();
0622 }
0623 
0624 //! Parallel iteration with reduction, affinity_partitioner and user-supplied context
0625 /** @ingroup algorithms **/
0626 template<typename Range, typename Value, typename RealBody, typename Reduction>
0627     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0628                    parallel_reduce_combine<Reduction, Value>)
0629 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0630                        affinity_partitioner& partitioner, task_group_context& context ) {
0631     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0632     start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
0633                                         ::run( range, body, partitioner, context );
0634     return body.result();
0635 }
0636 
0637 //! Parallel iteration with deterministic reduction and default simple partitioner.
0638 /** @ingroup algorithms **/
0639 template<typename Range, typename Body>
0640     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0641 void parallel_deterministic_reduce( const Range& range, Body& body ) {
0642     start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, simple_partitioner());
0643 }
0644 
0645 //! Parallel iteration with deterministic reduction and simple partitioner.
0646 /** @ingroup algorithms **/
0647 template<typename Range, typename Body>
0648     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0649 void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
0650     start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner);
0651 }
0652 
0653 //! Parallel iteration with deterministic reduction and static partitioner.
0654 /** @ingroup algorithms **/
0655 template<typename Range, typename Body>
0656     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0657 void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) {
0658     start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner);
0659 }
0660 
0661 //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context.
0662 /** @ingroup algorithms **/
0663 template<typename Range, typename Body>
0664     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0665 void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) {
0666     start_deterministic_reduce<Range,Body, const simple_partitioner>::run( range, body, simple_partitioner(), context );
0667 }
0668 
0669 //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
0670 /** @ingroup algorithms **/
0671 template<typename Range, typename Body>
0672     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0673 void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
0674     start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner, context);
0675 }
0676 
0677 //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context.
0678 /** @ingroup algorithms **/
0679 template<typename Range, typename Body>
0680     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0681 void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) {
0682     start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner, context);
0683 }
0684 
0685 /** parallel_reduce overloads that work with anonymous function objects
0686     (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
0687 
0688 //! Parallel iteration with deterministic reduction and default simple partitioner.
0689 // TODO: consider making static_partitioner the default
0690 /** @ingroup algorithms **/
0691 template<typename Range, typename Value, typename RealBody, typename Reduction>
0692     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0693                    parallel_reduce_combine<Reduction, Value>)
0694 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
0695     return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner());
0696 }
0697 
0698 //! Parallel iteration with deterministic reduction and simple partitioner.
0699 /** @ingroup algorithms **/
0700 template<typename Range, typename Value, typename RealBody, typename Reduction>
0701     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0702                    parallel_reduce_combine<Reduction, Value>)
0703 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner ) {
0704     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0705     start_deterministic_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>, const simple_partitioner>
0706                           ::run(range, body, partitioner);
0707     return body.result();
0708 }
0709 
0710 //! Parallel iteration with deterministic reduction and static partitioner.
0711 /** @ingroup algorithms **/
0712 template<typename Range, typename Value, typename RealBody, typename Reduction>
0713     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0714                    parallel_reduce_combine<Reduction, Value>)
0715 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner ) {
0716     lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
0717     start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner>
0718         ::run(range, body, partitioner);
0719     return body.result();
0720 }
0721 
0722 //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context.
0723 /** @ingroup algorithms **/
0724 template<typename Range, typename Value, typename RealBody, typename Reduction>
0725     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0726                    parallel_reduce_combine<Reduction, Value>)
0727 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0728     task_group_context& context ) {
0729     return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner(), context);
0730 }
0731 
0732 //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
0733 /** @ingroup algorithms **/
0734 template<typename Range, typename Value, typename RealBody, typename Reduction>
0735     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0736                    parallel_reduce_combine<Reduction, Value>)
0737 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0738     const simple_partitioner& partitioner, task_group_context& context ) {
0739     lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
0740     start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const simple_partitioner>
0741         ::run(range, body, partitioner, context);
0742     return body.result();
0743 }
0744 
0745 //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context.
0746 /** @ingroup algorithms **/
0747 template<typename Range, typename Value, typename RealBody, typename Reduction>
0748     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0749                    parallel_reduce_combine<Reduction, Value>)
0750 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0751     const static_partitioner& partitioner, task_group_context& context ) {
0752     lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
0753     start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner>
0754         ::run(range, body, partitioner, context);
0755     return body.result();
0756 }
0757 //@}
0758 
0759 } // namespace d1
0760 } // namespace detail
0761 
0762 inline namespace v1 {
0763 using detail::d1::parallel_reduce;
0764 using detail::d1::parallel_deterministic_reduce;
0765 // Split types
0766 using detail::split;
0767 using detail::proportional_split;
0768 } // namespace v1
0769 
0770 } // namespace tbb
0771 #endif /* __TBB_parallel_reduce_H */