Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-18 10:24:26

0001 /*
0002     Copyright (c) 2005-2024 Intel Corporation
0003 
0004     Licensed under the Apache License, Version 2.0 (the "License");
0005     you may not use this file except in compliance with the License.
0006     You may obtain a copy of the License at
0007 
0008         http://www.apache.org/licenses/LICENSE-2.0
0009 
0010     Unless required by applicable law or agreed to in writing, software
0011     distributed under the License is distributed on an "AS IS" BASIS,
0012     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013     See the License for the specific language governing permissions and
0014     limitations under the License.
0015 */
0016 
0017 #ifndef __TBB_parallel_reduce_H
0018 #define __TBB_parallel_reduce_H
0019 
0020 #include <new>
0021 #include "detail/_namespace_injection.h"
0022 #include "detail/_task.h"
0023 #include "detail/_aligned_space.h"
0024 #include "detail/_small_object_pool.h"
0025 #include "detail/_range_common.h"
0026 
0027 #include "task_group.h" // task_group_context
0028 #include "partitioner.h"
0029 #include "profiling.h"
0030 
0031 namespace tbb {
0032 namespace detail {
0033 #if __TBB_CPP20_CONCEPTS_PRESENT
0034 inline namespace d0 {
0035 
0036 template <typename Body, typename Range>
0037 concept parallel_reduce_body = splittable<Body> &&
0038                                requires( Body& body, const Range& range, Body& rhs ) {
0039                                    body(range);
0040                                    body.join(rhs);
0041                                };
0042 
0043 template <typename Function, typename Range, typename Value>
0044 concept parallel_reduce_function = std::invocable<const std::remove_reference_t<Function>&,
0045                                                   const Range&, Value&&> &&
0046                                    std::convertible_to<std::invoke_result_t<const std::remove_reference_t<Function>&,
0047                                                                             const Range&, Value&&>,
0048                                                         Value>;
0049 
0050 template <typename Combine, typename Value>
0051 concept parallel_reduce_combine = std::invocable<const std::remove_reference_t<Combine>&,
0052                                                  Value&&, Value&&> &&
0053                                   std::convertible_to<std::invoke_result_t<const std::remove_reference_t<Combine>&,
0054                                                                            Value&&, Value&&>,
0055                                                       Value>;
0056 
0057 } // namespace d0
0058 #endif // __TBB_CPP20_CONCEPTS_PRESENT
0059 namespace d1 {
0060 
0061 //! Tree node type for parallel_reduce.
0062 /** @ingroup algorithms */
0063 //TODO: consider folding tree via bypass execution(instead of manual folding)
0064 // for better cancellation and critical tasks handling (performance measurements required).
0065 template<typename Body>
0066 struct reduction_tree_node : public tree_node {
0067     tbb::detail::aligned_space<Body> zombie_space;
0068     Body& left_body;
0069     bool has_right_zombie{false};
0070 
0071     reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) :
0072         tree_node{parent, ref_count, alloc},
0073         left_body(input_left_body) /* gcc4.8 bug - braced-initialization doesn't work for class members of reference type */
0074     {}
0075 
0076     void join(task_group_context* context) {
0077         if (has_right_zombie && !context->is_group_execution_cancelled())
0078             left_body.join(*zombie_space.begin());
0079     }
0080 
0081     ~reduction_tree_node() {
0082         if( has_right_zombie ) zombie_space.begin()->~Body();
0083     }
0084 };
0085 
0086 //! Task type used to split the work of parallel_reduce.
0087 /** @ingroup algorithms */
0088 template<typename Range, typename Body, typename Partitioner>
0089 struct start_reduce : public task {
0090     Range my_range;
0091     Body* my_body;
0092     node* my_parent;
0093 
0094     typename Partitioner::task_partition_type my_partition;
0095     small_object_allocator my_allocator;
0096     bool is_right_child;
0097 
0098     task* execute(execution_data&) override;
0099     task* cancel(execution_data&) override;
0100     void finalize(const execution_data&);
0101 
0102     using tree_node_type = reduction_tree_node<Body>;
0103 
0104     //! Constructor reduce root task.
0105     start_reduce( const Range& range, Body& body, Partitioner& partitioner, small_object_allocator& alloc ) :
0106         my_range(range),
0107         my_body(&body),
0108         my_parent(nullptr),
0109         my_partition(partitioner),
0110         my_allocator(alloc),
0111         is_right_child(false) {}
0112     //! Splitting constructor used to generate children.
0113     /** parent_ becomes left child. Newly constructed object is right child. */
0114     start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) :
0115         my_range(parent_.my_range, get_range_split_object<Range>(split_obj)),
0116         my_body(parent_.my_body),
0117         my_parent(nullptr),
0118         my_partition(parent_.my_partition, split_obj),
0119         my_allocator(alloc),
0120         is_right_child(true)
0121     {
0122         parent_.is_right_child = false;
0123     }
0124     //! Construct right child from the given range as response to the demand.
0125     /** parent_ remains left child. Newly constructed object is right child. */
0126     start_reduce( start_reduce& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) :
0127         my_range(r),
0128         my_body(parent_.my_body),
0129         my_parent(nullptr),
0130         my_partition(parent_.my_partition, split()),
0131         my_allocator(alloc),
0132         is_right_child(true)
0133     {
0134         my_partition.align_depth( d );
0135         parent_.is_right_child = false;
0136     }
0137     static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) {
0138         if ( !range.empty() ) {
0139             wait_node wn;
0140             small_object_allocator alloc{};
0141             auto reduce_task = alloc.new_object<start_reduce>(range, body, partitioner, alloc);
0142             reduce_task->my_parent = &wn;
0143             execute_and_wait(*reduce_task, context, wn.m_wait, context);
0144         }
0145     }
0146     static void run(const Range& range, Body& body, Partitioner& partitioner) {
0147         // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
0148         // and allows users to handle exceptions safely by wrapping parallel_reduce in the try-block.
0149         task_group_context context(PARALLEL_REDUCE);
0150         run(range, body, partitioner, context);
0151     }
0152     //! Run body for range, serves as callback for partitioner
0153     void run_body( Range &r ) {
0154         tbb::detail::invoke(*my_body, r);
0155     }
0156 
0157     //! spawn right task, serves as callback for partitioner
0158     void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) {
0159         offer_work_impl(ed, *this, split_obj);
0160     }
0161     //! spawn right task, serves as callback for partitioner
0162     void offer_work(const Range& r, depth_t d, execution_data& ed) {
0163         offer_work_impl(ed, *this, r, d);
0164     }
0165 
0166 private:
0167     template <typename... Args>
0168     void offer_work_impl(execution_data& ed, Args&&... args) {
0169         small_object_allocator alloc{};
0170         // New right child
0171         auto right_child = alloc.new_object<start_reduce>(ed, std::forward<Args>(args)..., alloc);
0172 
0173         // New root node as a continuation and ref count. Left and right child attach to the new parent.
0174         right_child->my_parent = my_parent = alloc.new_object<tree_node_type>(ed, my_parent, 2, *my_body, alloc);
0175 
0176         // Spawn the right sibling
0177         right_child->spawn_self(ed);
0178     }
0179 
0180     void spawn_self(execution_data& ed) {
0181         my_partition.spawn_task(*this, *context(ed));
0182     }
0183 };
0184 
0185 //! fold the tree and deallocate the task
0186 template<typename Range, typename Body, typename Partitioner>
0187 void start_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) {
0188     // Get the current parent and wait object before an object destruction
0189     node* parent = my_parent;
0190     auto allocator = my_allocator;
0191     // Task execution finished - destroy it
0192     this->~start_reduce();
0193     // Unwind the tree decrementing the parent`s reference count
0194     fold_tree<tree_node_type>(parent, ed);
0195     allocator.deallocate(this, ed);
0196 }
0197 
0198 //! Execute parallel_reduce task
0199 template<typename Range, typename Body, typename Partitioner>
0200 task* start_reduce<Range,Body,Partitioner>::execute(execution_data& ed) {
0201     if (!is_same_affinity(ed)) {
0202         my_partition.note_affinity(execution_slot(ed));
0203     }
0204     my_partition.check_being_stolen(*this, ed);
0205 
0206     // The acquire barrier synchronizes the data pointed with my_body if the left
0207     // task has already finished.
0208     __TBB_ASSERT(my_parent, nullptr);
0209     if( is_right_child && my_parent->m_ref_count.load(std::memory_order_acquire) == 2 ) {
0210         tree_node_type* parent_ptr = static_cast<tree_node_type*>(my_parent);
0211         my_body = static_cast<Body*>(new( parent_ptr->zombie_space.begin() ) Body(*my_body, split()));
0212         parent_ptr->has_right_zombie = true;
0213     }
0214     __TBB_ASSERT(my_body != nullptr, "Incorrect body value");
0215 
0216     my_partition.execute(*this, my_range, ed);
0217 
0218     finalize(ed);
0219     return nullptr;
0220 }
0221 
0222 //! Cancel parallel_reduce task
0223 template<typename Range, typename Body, typename Partitioner>
0224 task* start_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) {
0225     finalize(ed);
0226     return nullptr;
0227 }
0228 
0229 //! Tree node type for parallel_deterministic_reduce.
0230 /** @ingroup algorithms */
0231 template<typename Body>
0232 struct deterministic_reduction_tree_node : public tree_node {
0233     Body right_body;
0234     Body& left_body;
0235 
0236     deterministic_reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) :
0237         tree_node{parent, ref_count, alloc},
0238         right_body{input_left_body, detail::split()},
0239         left_body(input_left_body)
0240     {}
0241 
0242     void join(task_group_context* context) {
0243         if (!context->is_group_execution_cancelled())
0244             left_body.join(right_body);
0245     }
0246 };
0247 
0248 //! Task type used to split the work of parallel_deterministic_reduce.
0249 /** @ingroup algorithms */
0250 template<typename Range, typename Body, typename Partitioner>
0251 struct start_deterministic_reduce : public task {
0252     Range my_range;
0253     Body& my_body;
0254     node* my_parent;
0255 
0256     typename Partitioner::task_partition_type my_partition;
0257     small_object_allocator my_allocator;
0258 
0259     task* execute(execution_data&) override;
0260     task* cancel(execution_data&) override;
0261     void finalize(const execution_data&);
0262 
0263     using tree_node_type = deterministic_reduction_tree_node<Body>;
0264 
0265     //! Constructor deterministic_reduce root task.
0266     start_deterministic_reduce( const Range& range, Partitioner& partitioner, Body& body, small_object_allocator& alloc ) :
0267         my_range(range),
0268         my_body(body),
0269         my_parent(nullptr),
0270         my_partition(partitioner),
0271         my_allocator(alloc) {}
0272     //! Splitting constructor used to generate children.
0273     /** parent_ becomes left child.  Newly constructed object is right child. */
0274     start_deterministic_reduce( start_deterministic_reduce& parent_, typename Partitioner::split_type& split_obj, Body& body,
0275                                 small_object_allocator& alloc ) :
0276         my_range(parent_.my_range, get_range_split_object<Range>(split_obj)),
0277         my_body(body),
0278         my_parent(nullptr),
0279         my_partition(parent_.my_partition, split_obj),
0280         my_allocator(alloc) {}
0281     static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) {
0282         if ( !range.empty() ) {
0283             wait_node wn;
0284             small_object_allocator alloc{};
0285             auto deterministic_reduce_task =
0286                 alloc.new_object<start_deterministic_reduce>(range, partitioner, body, alloc);
0287             deterministic_reduce_task->my_parent = &wn;
0288             execute_and_wait(*deterministic_reduce_task, context, wn.m_wait, context);
0289         }
0290     }
0291     static void run(const Range& range, Body& body, Partitioner& partitioner) {
0292         // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
0293         // and allows users to handle exceptions safely by wrapping parallel_deterministic_reduce
0294         // in the try-block.
0295         task_group_context context(PARALLEL_REDUCE);
0296         run(range, body, partitioner, context);
0297     }
0298     //! Run body for range, serves as callback for partitioner
0299     void run_body( Range &r ) {
0300         tbb::detail::invoke(my_body, r);
0301     }
0302     //! Spawn right task, serves as callback for partitioner
0303     void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) {
0304         offer_work_impl(ed, *this, split_obj);
0305     }
0306 private:
0307     template <typename... Args>
0308     void offer_work_impl(execution_data& ed, Args&&... args) {
0309         small_object_allocator alloc{};
0310         // New root node as a continuation and ref count. Left and right child attach to the new parent. Split the body.
0311         auto new_tree_node = alloc.new_object<tree_node_type>(ed, my_parent, 2, my_body, alloc);
0312 
0313         // New right child
0314         auto right_child = alloc.new_object<start_deterministic_reduce>(ed, std::forward<Args>(args)..., new_tree_node->right_body, alloc);
0315 
0316         right_child->my_parent = my_parent = new_tree_node;
0317 
0318         // Spawn the right sibling
0319         right_child->spawn_self(ed);
0320     }
0321 
0322     void spawn_self(execution_data& ed) {
0323         my_partition.spawn_task(*this, *context(ed));
0324     }
0325 };
0326 
0327 //! Fold the tree and deallocate the task
0328 template<typename Range, typename Body, typename Partitioner>
0329 void start_deterministic_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) {
0330     // Get the current parent and wait object before an object destruction
0331     node* parent = my_parent;
0332 
0333     auto allocator = my_allocator;
0334     // Task execution finished - destroy it
0335     this->~start_deterministic_reduce();
0336     // Unwind the tree decrementing the parent`s reference count
0337     fold_tree<tree_node_type>(parent, ed);
0338     allocator.deallocate(this, ed);
0339 }
0340 
0341 //! Execute parallel_deterministic_reduce task
0342 template<typename Range, typename Body, typename Partitioner>
0343 task* start_deterministic_reduce<Range,Body,Partitioner>::execute(execution_data& ed) {
0344     if (!is_same_affinity(ed)) {
0345         my_partition.note_affinity(execution_slot(ed));
0346     }
0347     my_partition.check_being_stolen(*this, ed);
0348 
0349     my_partition.execute(*this, my_range, ed);
0350 
0351     finalize(ed);
0352     return nullptr;
0353 }
0354 
0355 //! Cancel parallel_deterministic_reduce task
0356 template<typename Range, typename Body, typename Partitioner>
0357 task* start_deterministic_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) {
0358     finalize(ed);
0359     return nullptr;
0360 }
0361 
0362 
0363 //! Auxiliary class for parallel_reduce; for internal use only.
0364 /** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body"
0365     using given \ref parallel_reduce_lambda_req "anonymous function objects".
0366  **/
0367 /** @ingroup algorithms */
0368 template<typename Range, typename Value, typename RealBody, typename Reduction>
0369 class lambda_reduce_body {
0370 //TODO: decide if my_real_body, my_reduction, and my_identity_element should be copied or referenced
0371 //       (might require some performance measurements)
0372 
0373     const Value&     my_identity_element;
0374     const RealBody&  my_real_body;
0375     const Reduction& my_reduction;
0376     Value            my_value;
0377     lambda_reduce_body& operator= ( const lambda_reduce_body& other );
0378 public:
0379     lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
0380         : my_identity_element(identity)
0381         , my_real_body(body)
0382         , my_reduction(reduction)
0383         , my_value(identity)
0384     { }
0385     lambda_reduce_body( const lambda_reduce_body& other ) = default;
0386     lambda_reduce_body( lambda_reduce_body& other, tbb::split )
0387         : my_identity_element(other.my_identity_element)
0388         , my_real_body(other.my_real_body)
0389         , my_reduction(other.my_reduction)
0390         , my_value(other.my_identity_element)
0391     { }
0392     void operator()(Range& range) {
0393         my_value = tbb::detail::invoke(my_real_body, range, std::move(my_value));
0394     }
0395 
0396     void join( lambda_reduce_body& rhs ) {
0397         my_value = tbb::detail::invoke(my_reduction, std::move(my_value), std::move(rhs.my_value));
0398     }
0399 
0400     __TBB_nodiscard Value&& result() && noexcept {
0401         return std::move(my_value);
0402     }
0403 };
0404 
0405 
0406 // Requirements on Range concept are documented in blocked_range.h
0407 
0408 /** \page parallel_reduce_body_req Requirements on parallel_reduce body
0409     Class \c Body implementing the concept of parallel_reduce body must define:
0410     - \code Body::Body( Body&, split ); \endcode        Splitting constructor.
0411                                                         Must be able to run concurrently with operator() and method \c join
0412     - \code Body::~Body(); \endcode                     Destructor
0413     - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r
0414                                                         and accumulating the result
0415     - \code void Body::join( Body& b ); \endcode        Join results.
0416                                                         The result in \c b should be merged into the result of \c this
0417 **/
0418 
0419 /** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions)
0420     TO BE DOCUMENTED
0421 **/
0422 
0423 /** \name parallel_reduce
0424     See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/
0425 //@{
0426 
0427 //! Parallel iteration with reduction and default partitioner.
0428 /** @ingroup algorithms **/
0429 template<typename Range, typename Body>
0430     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0431 void parallel_reduce( const Range& range, Body& body ) {
0432     start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
0433 }
0434 
0435 //! Parallel iteration with reduction and simple_partitioner
0436 /** @ingroup algorithms **/
0437 template<typename Range, typename Body>
0438     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0439 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
0440     start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
0441 }
0442 
0443 //! Parallel iteration with reduction and auto_partitioner
0444 /** @ingroup algorithms **/
0445 template<typename Range, typename Body>
0446     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0447 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
0448     start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
0449 }
0450 
0451 //! Parallel iteration with reduction and static_partitioner
0452 /** @ingroup algorithms **/
0453 template<typename Range, typename Body>
0454     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0455 void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) {
0456     start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner );
0457 }
0458 
0459 //! Parallel iteration with reduction and affinity_partitioner
0460 /** @ingroup algorithms **/
0461 template<typename Range, typename Body>
0462     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0463 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
0464     start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
0465 }
0466 
0467 //! Parallel iteration with reduction, default partitioner and user-supplied context.
0468 /** @ingroup algorithms **/
0469 template<typename Range, typename Body>
0470     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0471 void parallel_reduce( const Range& range, Body& body, task_group_context& context ) {
0472     start_reduce<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER(), context );
0473 }
0474 
0475 //! Parallel iteration with reduction, simple partitioner and user-supplied context.
0476 /** @ingroup algorithms **/
0477 template<typename Range, typename Body>
0478     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0479 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
0480     start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
0481 }
0482 
0483 //! Parallel iteration with reduction, auto_partitioner and user-supplied context
0484 /** @ingroup algorithms **/
0485 template<typename Range, typename Body>
0486     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0487 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
0488     start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
0489 }
0490 
0491 //! Parallel iteration with reduction, static_partitioner and user-supplied context
0492 /** @ingroup algorithms **/
0493 template<typename Range, typename Body>
0494     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0495 void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) {
0496     start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context );
0497 }
0498 
0499 //! Parallel iteration with reduction, affinity_partitioner and user-supplied context
0500 /** @ingroup algorithms **/
0501 template<typename Range, typename Body>
0502     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0503 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
0504     start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
0505 }
0506 /** parallel_reduce overloads that work with anonymous function objects
0507     (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
0508 
0509 //! Parallel iteration with reduction and default partitioner.
0510 /** @ingroup algorithms **/
0511 template<typename Range, typename Value, typename RealBody, typename Reduction>
0512     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0513                    parallel_reduce_combine<Reduction, Value>)
0514 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
0515     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0516     start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
0517                           ::run(range, body, __TBB_DEFAULT_PARTITIONER() );
0518     return std::move(body).result();
0519 }
0520 
0521 //! Parallel iteration with reduction and simple_partitioner.
0522 /** @ingroup algorithms **/
0523 template<typename Range, typename Value, typename RealBody, typename Reduction>
0524     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0525                    parallel_reduce_combine<Reduction, Value>)
0526 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0527                        const simple_partitioner& partitioner ) {
0528     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0529     start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
0530                           ::run(range, body, partitioner );
0531     return std::move(body).result();
0532 }
0533 
0534 //! Parallel iteration with reduction and auto_partitioner
0535 /** @ingroup algorithms **/
0536 template<typename Range, typename Value, typename RealBody, typename Reduction>
0537     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0538                    parallel_reduce_combine<Reduction, Value>)
0539 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0540                        const auto_partitioner& partitioner ) {
0541     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0542     start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
0543                           ::run( range, body, partitioner );
0544     return std::move(body).result();
0545 }
0546 
0547 //! Parallel iteration with reduction and static_partitioner
0548 /** @ingroup algorithms **/
0549 template<typename Range, typename Value, typename RealBody, typename Reduction>
0550     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0551                    parallel_reduce_combine<Reduction, Value>)
0552 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0553                        const static_partitioner& partitioner ) {
0554     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0555     start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
0556                                         ::run( range, body, partitioner );
0557     return std::move(body).result();
0558 }
0559 
0560 //! Parallel iteration with reduction and affinity_partitioner
0561 /** @ingroup algorithms **/
0562 template<typename Range, typename Value, typename RealBody, typename Reduction>
0563     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0564                    parallel_reduce_combine<Reduction, Value>)
0565 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0566                        affinity_partitioner& partitioner ) {
0567     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0568     start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
0569                                         ::run( range, body, partitioner );
0570     return std::move(body).result();
0571 }
0572 
0573 //! Parallel iteration with reduction, default partitioner and user-supplied context.
0574 /** @ingroup algorithms **/
0575 template<typename Range, typename Value, typename RealBody, typename Reduction>
0576     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0577                    parallel_reduce_combine<Reduction, Value>)
0578 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0579                        task_group_context& context ) {
0580     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0581     start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
0582                           ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context );
0583     return std::move(body).result();
0584 }
0585 
0586 //! Parallel iteration with reduction, simple partitioner and user-supplied context.
0587 /** @ingroup algorithms **/
0588 template<typename Range, typename Value, typename RealBody, typename Reduction>
0589     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0590                    parallel_reduce_combine<Reduction, Value>)
0591 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0592                        const simple_partitioner& partitioner, task_group_context& context ) {
0593     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0594     start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
0595                           ::run( range, body, partitioner, context );
0596     return std::move(body).result();
0597 }
0598 
0599 //! Parallel iteration with reduction, auto_partitioner and user-supplied context
0600 /** @ingroup algorithms **/
0601 template<typename Range, typename Value, typename RealBody, typename Reduction>
0602     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0603                    parallel_reduce_combine<Reduction, Value>)
0604 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0605                        const auto_partitioner& partitioner, task_group_context& context ) {
0606     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0607     start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
0608                           ::run( range, body, partitioner, context );
0609     return std::move(body).result();
0610 }
0611 
0612 //! Parallel iteration with reduction, static_partitioner and user-supplied context
0613 /** @ingroup algorithms **/
0614 template<typename Range, typename Value, typename RealBody, typename Reduction>
0615     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0616                    parallel_reduce_combine<Reduction, Value>)
0617 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0618                        const static_partitioner& partitioner, task_group_context& context ) {
0619     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0620     start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
0621                                         ::run( range, body, partitioner, context );
0622     return std::move(body).result();
0623 }
0624 
0625 //! Parallel iteration with reduction, affinity_partitioner and user-supplied context
0626 /** @ingroup algorithms **/
0627 template<typename Range, typename Value, typename RealBody, typename Reduction>
0628     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0629                    parallel_reduce_combine<Reduction, Value>)
0630 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0631                        affinity_partitioner& partitioner, task_group_context& context ) {
0632     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0633     start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
0634                                         ::run( range, body, partitioner, context );
0635     return std::move(body).result();
0636 }
0637 
0638 //! Parallel iteration with deterministic reduction and default simple partitioner.
0639 /** @ingroup algorithms **/
0640 template<typename Range, typename Body>
0641     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0642 void parallel_deterministic_reduce( const Range& range, Body& body ) {
0643     start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, simple_partitioner());
0644 }
0645 
0646 //! Parallel iteration with deterministic reduction and simple partitioner.
0647 /** @ingroup algorithms **/
0648 template<typename Range, typename Body>
0649     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0650 void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
0651     start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner);
0652 }
0653 
0654 //! Parallel iteration with deterministic reduction and static partitioner.
0655 /** @ingroup algorithms **/
0656 template<typename Range, typename Body>
0657     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0658 void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) {
0659     start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner);
0660 }
0661 
0662 //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context.
0663 /** @ingroup algorithms **/
0664 template<typename Range, typename Body>
0665     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0666 void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) {
0667     start_deterministic_reduce<Range,Body, const simple_partitioner>::run( range, body, simple_partitioner(), context );
0668 }
0669 
0670 //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
0671 /** @ingroup algorithms **/
0672 template<typename Range, typename Body>
0673     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0674 void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
0675     start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner, context);
0676 }
0677 
0678 //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context.
0679 /** @ingroup algorithms **/
0680 template<typename Range, typename Body>
0681     __TBB_requires(tbb_range<Range> && parallel_reduce_body<Body, Range>)
0682 void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) {
0683     start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner, context);
0684 }
0685 
0686 /** parallel_reduce overloads that work with anonymous function objects
0687     (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
0688 
0689 //! Parallel iteration with deterministic reduction and default simple partitioner.
0690 // TODO: consider making static_partitioner the default
0691 /** @ingroup algorithms **/
0692 template<typename Range, typename Value, typename RealBody, typename Reduction>
0693     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0694                    parallel_reduce_combine<Reduction, Value>)
0695 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
0696     return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner());
0697 }
0698 
0699 //! Parallel iteration with deterministic reduction and simple partitioner.
0700 /** @ingroup algorithms **/
0701 template<typename Range, typename Value, typename RealBody, typename Reduction>
0702     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0703                    parallel_reduce_combine<Reduction, Value>)
0704 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner ) {
0705     lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
0706     start_deterministic_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>, const simple_partitioner>
0707                           ::run(range, body, partitioner);
0708     return std::move(body).result();
0709 }
0710 
0711 //! Parallel iteration with deterministic reduction and static partitioner.
0712 /** @ingroup algorithms **/
0713 template<typename Range, typename Value, typename RealBody, typename Reduction>
0714     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0715                    parallel_reduce_combine<Reduction, Value>)
0716 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner ) {
0717     lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
0718     start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner>
0719         ::run(range, body, partitioner);
0720     return std::move(body).result();
0721 }
0722 
0723 //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context.
0724 /** @ingroup algorithms **/
0725 template<typename Range, typename Value, typename RealBody, typename Reduction>
0726     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0727                    parallel_reduce_combine<Reduction, Value>)
0728 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0729     task_group_context& context ) {
0730     return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner(), context);
0731 }
0732 
0733 //! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
0734 /** @ingroup algorithms **/
0735 template<typename Range, typename Value, typename RealBody, typename Reduction>
0736     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0737                    parallel_reduce_combine<Reduction, Value>)
0738 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0739     const simple_partitioner& partitioner, task_group_context& context ) {
0740     lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
0741     start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const simple_partitioner>
0742         ::run(range, body, partitioner, context);
0743     return std::move(body).result();
0744 }
0745 
0746 //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context.
0747 /** @ingroup algorithms **/
0748 template<typename Range, typename Value, typename RealBody, typename Reduction>
0749     __TBB_requires(tbb_range<Range> && parallel_reduce_function<RealBody, Range, Value> &&
0750                    parallel_reduce_combine<Reduction, Value>)
0751 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
0752     const static_partitioner& partitioner, task_group_context& context ) {
0753     lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
0754     start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner>
0755         ::run(range, body, partitioner, context);
0756     return std::move(body).result();
0757 }
0758 //@}
0759 
0760 } // namespace d1
0761 } // namespace detail
0762 
0763 inline namespace v1 {
0764 using detail::d1::parallel_reduce;
0765 using detail::d1::parallel_deterministic_reduce;
0766 // Split types
0767 using detail::split;
0768 using detail::proportional_split;
0769 } // namespace v1
0770 
0771 } // namespace tbb
0772 #endif /* __TBB_parallel_reduce_H */