oneapi/tbb/partitioner.h

0001 /*
0002     Copyright (c) 2005-2023 Intel Corporation
0003
0004     Licensed under the Apache License, Version 2.0 (the "License");
0005     you may not use this file except in compliance with the License.
0006     You may obtain a copy of the License at
0007
0008         http://www.apache.org/licenses/LICENSE-2.0
0009
0010     Unless required by applicable law or agreed to in writing, software
0011     distributed under the License is distributed on an "AS IS" BASIS,
0012     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013     See the License for the specific language governing permissions and
0014     limitations under the License.
0015 */
0016
0017 #ifndef __TBB_partitioner_H
0018 #define __TBB_partitioner_H
0019
0020 #ifndef __TBB_INITIAL_CHUNKS
0021 // initial task divisions per thread
0022 #define __TBB_INITIAL_CHUNKS 2
0023 #endif
0024 #ifndef __TBB_RANGE_POOL_CAPACITY
0025 // maximum number of elements in range pool
0026 #define __TBB_RANGE_POOL_CAPACITY 8
0027 #endif
0028 #ifndef __TBB_INIT_DEPTH
0029 // initial value for depth of range pool
0030 #define __TBB_INIT_DEPTH 5
0031 #endif
0032 #ifndef __TBB_DEMAND_DEPTH_ADD
0033 // when imbalance is found range splits this value times more
0034 #define __TBB_DEMAND_DEPTH_ADD 1
0035 #endif
0036
0037 #include "detail/_config.h"
0038 #include "detail/_namespace_injection.h"
0039 #include "detail/_aligned_space.h"
0040 #include "detail/_utils.h"
0041 #include "detail/_template_helpers.h"
0042 #include "detail/_range_common.h"
0043 #include "detail/_task.h"
0044 #include "detail/_small_object_pool.h"
0045
0046 #include "cache_aligned_allocator.h"
0047 #include "task_group.h" // task_group_context
0048 #include "task_arena.h"
0049
0050 #include <algorithm>
0051 #include <atomic>
0052 #include <type_traits>
0053
0054 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
0055     // Workaround for overzealous compiler warnings
0056     #pragma warning (push)
0057     #pragma warning (disable: 4244)
0058 #endif
0059
0060 namespace tbb {
0061 namespace detail {
0062
0063 namespace d1 {
0064 class auto_partitioner;
0065 class simple_partitioner;
0066 class static_partitioner;
0067 class affinity_partitioner;
0068 class affinity_partition_type;
0069 class affinity_partitioner_base;
0070
0071 inline std::size_t get_initial_auto_partitioner_divisor() {
0072     const std::size_t factor = 4;
0073     return factor * static_cast<std::size_t>(max_concurrency());
0074 }
0075
0076 //! Defines entry point for affinity partitioner into oneTBB run-time library.
0077 class affinity_partitioner_base: no_copy {
0078     friend class affinity_partitioner;
0079     friend class affinity_partition_type;
0080     //! Array that remembers affinities of tree positions to affinity_id.
0081     /** nullptr if my_size==0. */
0082     slot_id* my_array;
0083     //! Number of elements in my_array.
0084     std::size_t my_size;
0085     //! Zeros the fields.
0086     affinity_partitioner_base() : my_array(nullptr), my_size(0) {}
0087     //! Deallocates my_array.
0088     ~affinity_partitioner_base() { resize(0); }
0089     //! Resize my_array.
0090     /** Retains values if resulting size is the same. */
0091     void resize(unsigned factor) {
0092         // Check factor to avoid asking for number of workers while there might be no arena.
0093         unsigned max_threads_in_arena = static_cast<unsigned>(max_concurrency());
0094         std::size_t new_size = factor ? factor * max_threads_in_arena : 0;
0095         if (new_size != my_size) {
0096             if (my_array) {
0097                 r1::cache_aligned_deallocate(my_array);
0098                 // Following two assignments must be done here for sake of exception safety.
0099                 my_array = nullptr;
0100                 my_size = 0;
0101             }
0102             if (new_size) {
0103                 my_array = static_cast<slot_id*>(r1::cache_aligned_allocate(new_size * sizeof(slot_id)));
0104                 std::fill_n(my_array, new_size, no_slot);
0105                 my_size = new_size;
0106             }
0107         }
0108     }
0109 };
0110
0111 template<typename Range, typename Body, typename Partitioner> struct start_for;
0112 template<typename Range, typename Body, typename Partitioner> struct start_scan;
0113 template<typename Range, typename Body, typename Partitioner> struct start_reduce;
0114 template<typename Range, typename Body, typename Partitioner> struct start_deterministic_reduce;
0115
0116 struct node {
0117     node* my_parent{};
0118     std::atomic<int> m_ref_count{};
0119
0120     node() = default;
0121     node(node* parent, int ref_count) :
0122         my_parent{parent}, m_ref_count{ref_count} {
0123         __TBB_ASSERT(ref_count > 0, "The ref count must be positive");
0124     }
0125 };
0126
0127 struct wait_node : node {
0128     wait_node() : node{ nullptr, 1 } {}
0129     wait_context m_wait{1};
0130 };
0131
0132 //! Join task node that contains shared flag for stealing feedback
0133 struct tree_node : public node {
0134     small_object_allocator m_allocator;
0135     std::atomic<bool> m_child_stolen{false};
0136
0137     tree_node(node* parent, int ref_count, small_object_allocator& alloc)
0138         : node{parent, ref_count}
0139         , m_allocator{alloc} {}
0140
0141     void join(task_group_context*) {/*dummy, required only for reduction algorithms*/};
0142
0143     template <typename Task>
0144     static void mark_task_stolen(Task &t) {
0145         std::atomic<bool> &flag = static_cast<tree_node*>(t.my_parent)->m_child_stolen;
0146 #if TBB_USE_PROFILING_TOOLS
0147         // Threading tools respect lock prefix but report false-positive data-race via plain store
0148         flag.exchange(true);
0149 #else
0150         flag.store(true, std::memory_order_relaxed);
0151 #endif // TBB_USE_PROFILING_TOOLS
0152     }
0153     template <typename Task>
0154     static bool is_peer_stolen(Task &t) {
0155         return static_cast<tree_node*>(t.my_parent)->m_child_stolen.load(std::memory_order_relaxed);
0156     }
0157 };
0158
0159 // Context used to check cancellation state during reduction join process
0160 template<typename TreeNodeType>
0161 void fold_tree(node* n, const execution_data& ed) {
0162     for (;;) {
0163         __TBB_ASSERT(n, nullptr);
0164         __TBB_ASSERT(n->m_ref_count.load(std::memory_order_relaxed) > 0, "The refcount must be positive.");
0165         call_itt_task_notify(releasing, n);
0166         if (--n->m_ref_count > 0) {
0167             return;
0168         }
0169         node* parent = n->my_parent;
0170         if (!parent) {
0171             break;
0172         };
0173
0174         call_itt_task_notify(acquired, n);
0175         TreeNodeType* self = static_cast<TreeNodeType*>(n);
0176         self->join(ed.context);
0177         self->m_allocator.delete_object(self, ed);
0178         n = parent;
0179     }
0180     // Finish parallel for execution when the root (last node) is reached
0181     static_cast<wait_node*>(n)->m_wait.release();
0182 }
0183
0184 //! Depth is a relative depth of recursive division inside a range pool. Relative depth allows
0185 //! infinite absolute depth of the recursion for heavily unbalanced workloads with range represented
0186 //! by a number that cannot fit into machine word.
0187 typedef unsigned char depth_t;
0188
0189 //! Range pool stores ranges of type T in a circular buffer with MaxCapacity
0190 template <typename T, depth_t MaxCapacity>
0191 class range_vector {
0192     depth_t my_head;
0193     depth_t my_tail;
0194     depth_t my_size;
0195     depth_t my_depth[MaxCapacity]; // relative depths of stored ranges
0196     tbb::detail::aligned_space<T, MaxCapacity> my_pool;
0197
0198 public:
0199     //! initialize via first range in pool
0200     range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) {
0201         my_depth[0] = 0;
0202         new( static_cast<void *>(my_pool.begin()) ) T(elem);//TODO: std::move?
0203     }
0204     ~range_vector() {
0205         while( !empty() ) pop_back();
0206     }
0207     bool empty() const { return my_size == 0; }
0208     depth_t size() const { return my_size; }
0209     //! Populates range pool via ranges up to max depth or while divisible
0210     //! max_depth starts from 0, e.g. value 2 makes 3 ranges in the pool up to two 1/4 pieces
0211     void split_to_fill(depth_t max_depth) {
0212         while( my_size < MaxCapacity && is_divisible(max_depth) ) {
0213             depth_t prev = my_head;
0214             my_head = (my_head + 1) % MaxCapacity;
0215             new(my_pool.begin()+my_head) T(my_pool.begin()[prev]); // copy TODO: std::move?
0216             my_pool.begin()[prev].~T(); // instead of assignment
0217             new(my_pool.begin()+prev) T(my_pool.begin()[my_head], detail::split()); // do 'inverse' split
0218             my_depth[my_head] = ++my_depth[prev];
0219             my_size++;
0220         }
0221     }
0222     void pop_back() {
0223         __TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size");
0224         my_pool.begin()[my_head].~T();
0225         my_size--;
0226         my_head = (my_head + MaxCapacity - 1) % MaxCapacity;
0227     }
0228     void pop_front() {
0229         __TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty size");
0230         my_pool.begin()[my_tail].~T();
0231         my_size--;
0232         my_tail = (my_tail + 1) % MaxCapacity;
0233     }
0234     T& back() {
0235         __TBB_ASSERT(my_size > 0, "range_vector::back() with empty size");
0236         return my_pool.begin()[my_head];
0237     }
0238     T& front() {
0239         __TBB_ASSERT(my_size > 0, "range_vector::front() with empty size");
0240         return my_pool.begin()[my_tail];
0241     }
0242     //! similarly to front(), returns depth of the first range in the pool
0243     depth_t front_depth() {
0244         __TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty size");
0245         return my_depth[my_tail];
0246     }
0247     depth_t back_depth() {
0248         __TBB_ASSERT(my_size > 0, "range_vector::back_depth() with empty size");
0249         return my_depth[my_head];
0250     }
0251     bool is_divisible(depth_t max_depth) {
0252         return back_depth() < max_depth && back().is_divisible();
0253     }
0254 };
0255
0256 //! Provides default methods for partition objects and common algorithm blocks.
0257 template <typename Partition>
0258 struct partition_type_base {
0259     typedef detail::split split_type;
0260     // decision makers
0261     void note_affinity( slot_id ) {}
0262     template <typename Task>
0263     bool check_being_stolen(Task&, const execution_data&) { return false; } // part of old should_execute_range()
0264     template <typename Range> split_type get_split() { return split(); }
0265     Partition& self() { return *static_cast<Partition*>(this); } // CRTP helper
0266
0267     template<typename StartType, typename Range>
0268     void work_balance(StartType &start, Range &range, const execution_data&) {
0269         start.run_body( range ); // static partitioner goes here
0270     }
0271
0272     template<typename StartType, typename Range>
0273     void execute(StartType &start, Range &range, execution_data& ed) {
0274         // The algorithm in a few words ([]-denotes calls to decision methods of partitioner):
0275         // [If this task is stolen, adjust depth and divisions if necessary, set flag].
0276         // If range is divisible {
0277         //    Spread the work while [initial divisions left];
0278         //    Create trap task [if necessary];
0279         // }
0280         // If not divisible or [max depth is reached], execute, else do the range pool part
0281         if ( range.is_divisible() ) {
0282             if ( self().is_divisible() ) {
0283                 do { // split until is divisible
0284                     typename Partition::split_type split_obj = self().template get_split<Range>();
0285                     start.offer_work( split_obj, ed );
0286                 } while ( range.is_divisible() && self().is_divisible() );
0287             }
0288         }
0289         self().work_balance(start, range, ed);
0290     }
0291 };
0292
0293 //! Provides default splitting strategy for partition objects.
0294 template <typename Partition>
0295 struct adaptive_mode : partition_type_base<Partition> {
0296     typedef Partition my_partition;
0297     std::size_t my_divisor;
0298     // For affinity_partitioner, my_divisor indicates the number of affinity array indices the task reserves.
0299     // A task which has only one index must produce the right split without reserved index in order to avoid
0300     // it to be overwritten in note_affinity() of the created (right) task.
0301     // I.e. a task created deeper than the affinity array can remember must not save its affinity (LIFO order)
0302     static const unsigned factor = 1;
0303     adaptive_mode() : my_divisor(get_initial_auto_partitioner_divisor() / 4 * my_partition::factor) {}
0304     adaptive_mode(adaptive_mode &src, split) : my_divisor(do_split(src, split())) {}
0305     adaptive_mode(adaptive_mode&, const proportional_split&) : my_divisor(0)
0306     {
0307         // left blank as my_divisor gets overridden in the successors' constructors
0308     }
0309     /*! Override do_split methods in order to specify splitting strategy */
0310     std::size_t do_split(adaptive_mode &src, split) {
0311         return src.my_divisor /= 2u;
0312     }
0313 };
0314
0315
0316 //! Provides proportional splitting strategy for partition objects
0317 template <typename Partition>
0318 struct proportional_mode : adaptive_mode<Partition> {
0319     typedef Partition my_partition;
0320     using partition_type_base<Partition>::self; // CRTP helper to get access to derived classes
0321
0322     proportional_mode() : adaptive_mode<Partition>() {}
0323     proportional_mode(proportional_mode &src, split) : adaptive_mode<Partition>(src, split()) {}
0324     proportional_mode(proportional_mode &src, const proportional_split& split_obj)
0325         : adaptive_mode<Partition>(src, split_obj)
0326     {
0327         self().my_divisor = do_split(src, split_obj);
0328     }
0329     std::size_t do_split(proportional_mode &src, const proportional_split& split_obj) {
0330         std::size_t portion = split_obj.right() * my_partition::factor;
0331         portion = (portion + my_partition::factor/2) & (0ul - my_partition::factor);
0332         src.my_divisor -= portion;
0333         return portion;
0334     }
0335     bool is_divisible() { // part of old should_execute_range()
0336         return self().my_divisor > my_partition::factor;
0337     }
0338     template <typename Range>
0339     proportional_split get_split() {
0340         // Create the proportion from partitioner internal resources (threads) that would be used:
0341         // - into proportional_mode constructor to split the partitioner
0342         // - if Range supports the proportional_split constructor it would use proposed proportion,
0343         //   otherwise, the tbb::proportional_split object will be implicitly (for Range implementer)
0344         //   casted to tbb::split
0345
0346         std::size_t n = self().my_divisor / my_partition::factor;
0347         std::size_t right = n / 2;
0348         std::size_t left  = n - right;
0349         return proportional_split(left, right);
0350     }
0351 };
0352
0353 static std::size_t get_initial_partition_head() {
0354     int current_index = tbb::this_task_arena::current_thread_index();
0355     if (current_index == tbb::task_arena::not_initialized)
0356         current_index = 0;
0357     return size_t(current_index);
0358 }
0359
0360 //! Provides default linear indexing of partitioner's sequence
0361 template <typename Partition>
0362 struct linear_affinity_mode : proportional_mode<Partition> {
0363     std::size_t my_head;
0364     std::size_t my_max_affinity;
0365     using proportional_mode<Partition>::self;
0366     linear_affinity_mode() : proportional_mode<Partition>(), my_head(get_initial_partition_head()),
0367                              my_max_affinity(self().my_divisor) {}
0368     linear_affinity_mode(linear_affinity_mode &src, split) : proportional_mode<Partition>(src, split())
0369         , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {}
0370     linear_affinity_mode(linear_affinity_mode &src, const proportional_split& split_obj) : proportional_mode<Partition>(src, split_obj)
0371         , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {}
0372     void spawn_task(task& t, task_group_context& ctx) {
0373         if (self().my_divisor) {
0374             spawn(t, ctx, slot_id(my_head));
0375         } else {
0376             spawn(t, ctx);
0377         }
0378     }
0379 };
0380
0381 static bool is_stolen_task(const execution_data& ed) {
0382     return execution_slot(ed) != original_slot(ed);
0383 }
0384
0385 /*! Determine work-balance phase implementing splitting & stealing actions */
0386 template<class Mode>
0387 struct dynamic_grainsize_mode : Mode {
0388     using Mode::self;
0389     enum {
0390         begin = 0,
0391         run,
0392         pass
0393     } my_delay;
0394     depth_t my_max_depth;
0395     static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
0396     dynamic_grainsize_mode(): Mode()
0397         , my_delay(begin)
0398         , my_max_depth(__TBB_INIT_DEPTH) {}
0399     dynamic_grainsize_mode(dynamic_grainsize_mode& p, split)
0400         : Mode(p, split())
0401         , my_delay(pass)
0402         , my_max_depth(p.my_max_depth) {}
0403     dynamic_grainsize_mode(dynamic_grainsize_mode& p, const proportional_split& split_obj)
0404         : Mode(p, split_obj)
0405         , my_delay(begin)
0406         , my_max_depth(p.my_max_depth) {}
0407     template <typename Task>
0408     bool check_being_stolen(Task &t, const execution_data& ed) { // part of old should_execute_range()
0409         if( !(self().my_divisor / Mode::my_partition::factor) ) { // if not from the top P tasks of binary tree
0410             self().my_divisor = 1; // TODO: replace by on-stack flag (partition_state's member)?
0411             if( is_stolen_task(ed) && t.my_parent->m_ref_count >= 2 ) { // runs concurrently with the left task
0412 #if __TBB_USE_OPTIONAL_RTTI
0413                 // RTTI is available, check whether the cast is valid
0414                 // TODO: TBB_REVAMP_TODO __TBB_ASSERT(dynamic_cast<tree_node*>(t.m_parent), 0);
0415                 // correctness of the cast relies on avoiding the root task for which:
0416                 // - initial value of my_divisor != 0 (protected by separate assertion)
0417                 // - is_stolen_task() always returns false for the root task.
0418 #endif
0419                 tree_node::mark_task_stolen(t);
0420                 if( !my_max_depth ) my_max_depth++;
0421                 my_max_depth += __TBB_DEMAND_DEPTH_ADD;
0422                 return true;
0423             }
0424         }
0425         return false;
0426     }
0427     depth_t max_depth() { return my_max_depth; }
0428     void align_depth(depth_t base) {
0429         __TBB_ASSERT(base <= my_max_depth, nullptr);
0430         my_max_depth -= base;
0431     }
0432     template<typename StartType, typename Range>
0433     void work_balance(StartType &start, Range &range, execution_data& ed) {
0434         if( !range.is_divisible() || !self().max_depth() ) {
0435             start.run_body( range );
0436         }
0437         else { // do range pool
0438             range_vector<Range, range_pool_size> range_pool(range);
0439             do {
0440                 range_pool.split_to_fill(self().max_depth()); // fill range pool
0441                 if( self().check_for_demand( start ) ) {
0442                     if( range_pool.size() > 1 ) {
0443                         start.offer_work( range_pool.front(), range_pool.front_depth(), ed );
0444                         range_pool.pop_front();
0445                         continue;
0446                     }
0447                     if( range_pool.is_divisible(self().max_depth()) ) // was not enough depth to fork a task
0448                         continue; // note: next split_to_fill() should split range at least once
0449                 }
0450                 start.run_body( range_pool.back() );
0451                 range_pool.pop_back();
0452             } while( !range_pool.empty() && !ed.context->is_group_execution_cancelled() );
0453         }
0454     }
0455     template <typename Task>
0456     bool check_for_demand(Task& t) {
0457         if ( pass == my_delay ) {
0458             if ( self().my_divisor > 1 ) // produce affinitized tasks while they have slot in array
0459                 return true; // do not do my_max_depth++ here, but be sure range_pool is splittable once more
0460             else if ( self().my_divisor && my_max_depth ) { // make balancing task
0461                 self().my_divisor = 0; // once for each task; depth will be decreased in align_depth()
0462                 return true;
0463             }
0464             else if ( tree_node::is_peer_stolen(t) ) {
0465                 my_max_depth += __TBB_DEMAND_DEPTH_ADD;
0466                 return true;
0467             }
0468         } else if( begin == my_delay ) {
0469             my_delay = pass;
0470         }
0471         return false;
0472     }
0473 };
0474
0475 class auto_partition_type: public dynamic_grainsize_mode<adaptive_mode<auto_partition_type> > {
0476 public:
0477     auto_partition_type( const auto_partitioner& ) {
0478         my_divisor *= __TBB_INITIAL_CHUNKS;
0479     }
0480     auto_partition_type( auto_partition_type& src, split)
0481         : dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >(src, split()) {}
0482     bool is_divisible() { // part of old should_execute_range()
0483         if( my_divisor > 1 ) return true;
0484         if( my_divisor && my_max_depth ) { // can split the task. TODO: on-stack flag instead
0485             // keep same fragmentation while splitting for the local task pool
0486             my_max_depth--;
0487             my_divisor = 0; // decrease max_depth once per task
0488             return true;
0489         } else return false;
0490     }
0491     template <typename Task>
0492     bool check_for_demand(Task& t) {
0493         if (tree_node::is_peer_stolen(t)) {
0494             my_max_depth += __TBB_DEMAND_DEPTH_ADD;
0495             return true;
0496         } else return false;
0497     }
0498     void spawn_task(task& t, task_group_context& ctx) {
0499         spawn(t, ctx);
0500     }
0501 };
0502
0503 class simple_partition_type: public partition_type_base<simple_partition_type> {
0504 public:
0505     simple_partition_type( const simple_partitioner& ) {}
0506     simple_partition_type( const simple_partition_type&, split ) {}
0507     //! simplified algorithm
0508     template<typename StartType, typename Range>
0509     void execute(StartType &start, Range &range, execution_data& ed) {
0510         split_type split_obj = split(); // start.offer_work accepts split_type as reference
0511         while( range.is_divisible() )
0512             start.offer_work( split_obj, ed );
0513         start.run_body( range );
0514     }
0515     void spawn_task(task& t, task_group_context& ctx) {
0516         spawn(t, ctx);
0517     }
0518 };
0519
0520 class static_partition_type : public linear_affinity_mode<static_partition_type> {
0521 public:
0522     typedef detail::proportional_split split_type;
0523     static_partition_type( const static_partitioner& ) {}
0524     static_partition_type( static_partition_type& p, const proportional_split& split_obj )
0525         : linear_affinity_mode<static_partition_type>(p, split_obj) {}
0526 };
0527
0528 class affinity_partition_type : public dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> > {
0529     static const unsigned factor_power = 4; // TODO: get a unified formula based on number of computing units
0530     slot_id* my_array;
0531 public:
0532     static const unsigned factor = 1 << factor_power; // number of slots in affinity array per task
0533     typedef detail::proportional_split split_type;
0534     affinity_partition_type( affinity_partitioner_base& ap ) {
0535         __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" );
0536         ap.resize(factor);
0537         my_array = ap.my_array;
0538         my_max_depth = factor_power + 1;
0539         __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, nullptr );
0540     }
0541     affinity_partition_type(affinity_partition_type& p, split)
0542         : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split())
0543         , my_array(p.my_array) {}
0544     affinity_partition_type(affinity_partition_type& p, const proportional_split& split_obj)
0545         : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split_obj)
0546         , my_array(p.my_array) {}
0547     void note_affinity(slot_id id) {
0548         if( my_divisor )
0549             my_array[my_head] = id;
0550     }
0551     void spawn_task(task& t, task_group_context& ctx) {
0552         if (my_divisor) {
0553             if (!my_array[my_head]) {
0554                 // TODO: consider new ideas with my_array for both affinity and static partitioner's, then code reuse
0555                 spawn(t, ctx, slot_id(my_head / factor));
0556             } else {
0557                 spawn(t, ctx, my_array[my_head]);
0558             }
0559         } else {
0560             spawn(t, ctx);
0561         }
0562     }
0563 };
0564
0565 //! A simple partitioner
0566 /** Divides the range until the range is not divisible.
0567     @ingroup algorithms */
0568 class simple_partitioner {
0569 public:
0570     simple_partitioner() {}
0571 private:
0572     template<typename Range, typename Body, typename Partitioner> friend struct start_for;
0573     template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
0574     template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
0575     template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
0576     // new implementation just extends existing interface
0577     typedef simple_partition_type task_partition_type;
0578     // TODO: consider to make split_type public
0579     typedef simple_partition_type::split_type split_type;
0580
0581     // for parallel_scan only
0582     class partition_type {
0583     public:
0584         bool should_execute_range(const execution_data& ) {return false;}
0585         partition_type( const simple_partitioner& ) {}
0586         partition_type( const partition_type&, split ) {}
0587     };
0588 };
0589
0590 //! An auto partitioner
0591 /** The range is initial divided into several large chunks.
0592     Chunks are further subdivided into smaller pieces if demand detected and they are divisible.
0593     @ingroup algorithms */
0594 class auto_partitioner {
0595 public:
0596     auto_partitioner() {}
0597
0598 private:
0599     template<typename Range, typename Body, typename Partitioner> friend struct start_for;
0600     template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
0601     template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
0602     template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
0603     // new implementation just extends existing interface
0604     typedef auto_partition_type task_partition_type;
0605     // TODO: consider to make split_type public
0606     typedef auto_partition_type::split_type split_type;
0607
0608     //! Backward-compatible partition for auto and affinity partition objects.
0609     class partition_type {
0610         size_t num_chunks;
0611         static const size_t VICTIM_CHUNKS = 4;
0612         public:
0613         bool should_execute_range(const execution_data& ed) {
0614             if( num_chunks<VICTIM_CHUNKS && is_stolen_task(ed) )
0615                 num_chunks = VICTIM_CHUNKS;
0616             return num_chunks==1;
0617         }
0618         partition_type( const auto_partitioner& )
0619             : num_chunks(get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
0620         partition_type( partition_type& pt, split ) {
0621             num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u;
0622         }
0623     };
0624 };
0625
0626 //! A static partitioner
0627 class static_partitioner {
0628 public:
0629     static_partitioner() {}
0630 private:
0631     template<typename Range, typename Body, typename Partitioner> friend struct start_for;
0632     template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
0633     template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
0634     template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
0635     // new implementation just extends existing interface
0636     typedef static_partition_type task_partition_type;
0637     // TODO: consider to make split_type public
0638     typedef static_partition_type::split_type split_type;
0639 };
0640
0641 //! An affinity partitioner
0642 class affinity_partitioner : affinity_partitioner_base {
0643 public:
0644     affinity_partitioner() {}
0645
0646 private:
0647     template<typename Range, typename Body, typename Partitioner> friend struct start_for;
0648     template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
0649     template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
0650     template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
0651     // new implementation just extends existing interface
0652     typedef affinity_partition_type task_partition_type;
0653     // TODO: consider to make split_type public
0654     typedef affinity_partition_type::split_type split_type;
0655 };
0656
0657 } // namespace d1
0658 } // namespace detail
0659
0660 inline namespace v1 {
0661 // Partitioners
0662 using detail::d1::auto_partitioner;
0663 using detail::d1::simple_partitioner;
0664 using detail::d1::static_partitioner;
0665 using detail::d1::affinity_partitioner;
0666 // Split types
0667 using detail::split;
0668 using detail::proportional_split;
0669 } // namespace v1
0670
0671 } // namespace tbb
0672
0673 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
0674     #pragma warning (pop)
0675 #endif // warning 4244 is back
0676
0677 #undef __TBB_INITIAL_CHUNKS
0678 #undef __TBB_RANGE_POOL_CAPACITY
0679 #undef __TBB_INIT_DEPTH
0680
0681 #endif /* __TBB_partitioner_H */