Warning, file /include/oneapi/tbb/partitioner.h was not indexed
or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 #ifndef __TBB_partitioner_H
0018 #define __TBB_partitioner_H
0019
0020 #ifndef __TBB_INITIAL_CHUNKS
0021
0022 #define __TBB_INITIAL_CHUNKS 2
0023 #endif
0024 #ifndef __TBB_RANGE_POOL_CAPACITY
0025
0026 #define __TBB_RANGE_POOL_CAPACITY 8
0027 #endif
0028 #ifndef __TBB_INIT_DEPTH
0029
0030 #define __TBB_INIT_DEPTH 5
0031 #endif
0032 #ifndef __TBB_DEMAND_DEPTH_ADD
0033
0034 #define __TBB_DEMAND_DEPTH_ADD 1
0035 #endif
0036
0037 #include "detail/_config.h"
0038 #include "detail/_namespace_injection.h"
0039 #include "detail/_aligned_space.h"
0040 #include "detail/_utils.h"
0041 #include "detail/_template_helpers.h"
0042 #include "detail/_range_common.h"
0043 #include "detail/_task.h"
0044 #include "detail/_small_object_pool.h"
0045
0046 #include "cache_aligned_allocator.h"
0047 #include "task_group.h" // task_group_context
0048 #include "task_arena.h"
0049
0050 #include <algorithm>
0051 #include <atomic>
0052 #include <type_traits>
0053
0054 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
0055
0056 #pragma warning (push)
0057 #pragma warning (disable: 4244)
0058 #endif
0059
0060 namespace tbb {
0061 namespace detail {
0062
0063 namespace d1 {
0064 class auto_partitioner;
0065 class simple_partitioner;
0066 class static_partitioner;
0067 class affinity_partitioner;
0068 class affinity_partition_type;
0069 class affinity_partitioner_base;
0070
0071 inline std::size_t get_initial_auto_partitioner_divisor() {
0072 const std::size_t factor = 4;
0073 return factor * static_cast<std::size_t>(max_concurrency());
0074 }
0075
0076
0077 class affinity_partitioner_base: no_copy {
0078 friend class affinity_partitioner;
0079 friend class affinity_partition_type;
0080
0081
0082 slot_id* my_array;
0083
0084 std::size_t my_size;
0085
0086 affinity_partitioner_base() : my_array(nullptr), my_size(0) {}
0087
0088 ~affinity_partitioner_base() { resize(0); }
0089
0090
0091 void resize(unsigned factor) {
0092
0093 unsigned max_threads_in_arena = static_cast<unsigned>(max_concurrency());
0094 std::size_t new_size = factor ? factor * max_threads_in_arena : 0;
0095 if (new_size != my_size) {
0096 if (my_array) {
0097 r1::cache_aligned_deallocate(my_array);
0098
0099 my_array = nullptr;
0100 my_size = 0;
0101 }
0102 if (new_size) {
0103 my_array = static_cast<slot_id*>(r1::cache_aligned_allocate(new_size * sizeof(slot_id)));
0104 std::fill_n(my_array, new_size, no_slot);
0105 my_size = new_size;
0106 }
0107 }
0108 }
0109 };
0110
0111 template<typename Range, typename Body, typename Partitioner> struct start_for;
0112 template<typename Range, typename Body, typename Partitioner> struct start_scan;
0113 template<typename Range, typename Body, typename Partitioner> struct start_reduce;
0114 template<typename Range, typename Body, typename Partitioner> struct start_deterministic_reduce;
0115
0116 struct node {
0117 node* my_parent{};
0118 std::atomic<int> m_ref_count{};
0119
0120 node() = default;
0121 node(node* parent, int ref_count) :
0122 my_parent{parent}, m_ref_count{ref_count} {
0123 __TBB_ASSERT(ref_count > 0, "The ref count must be positive");
0124 }
0125 };
0126
0127 struct wait_node : node {
0128 wait_node() : node{ nullptr, 1 } {}
0129 wait_context m_wait{1};
0130 };
0131
0132
0133 struct tree_node : public node {
0134 small_object_allocator m_allocator;
0135 std::atomic<bool> m_child_stolen{false};
0136
0137 tree_node(node* parent, int ref_count, small_object_allocator& alloc)
0138 : node{parent, ref_count}
0139 , m_allocator{alloc} {}
0140
0141 void join(task_group_context*) {};
0142
0143 template <typename Task>
0144 static void mark_task_stolen(Task &t) {
0145 std::atomic<bool> &flag = static_cast<tree_node*>(t.my_parent)->m_child_stolen;
0146 #if TBB_USE_PROFILING_TOOLS
0147
0148 flag.exchange(true);
0149 #else
0150 flag.store(true, std::memory_order_relaxed);
0151 #endif
0152 }
0153 template <typename Task>
0154 static bool is_peer_stolen(Task &t) {
0155 return static_cast<tree_node*>(t.my_parent)->m_child_stolen.load(std::memory_order_relaxed);
0156 }
0157 };
0158
0159
0160 template<typename TreeNodeType>
0161 void fold_tree(node* n, const execution_data& ed) {
0162 for (;;) {
0163 __TBB_ASSERT(n, nullptr);
0164 __TBB_ASSERT(n->m_ref_count.load(std::memory_order_relaxed) > 0, "The refcount must be positive.");
0165 call_itt_task_notify(releasing, n);
0166 if (--n->m_ref_count > 0) {
0167 return;
0168 }
0169 node* parent = n->my_parent;
0170 if (!parent) {
0171 break;
0172 };
0173
0174 call_itt_task_notify(acquired, n);
0175 TreeNodeType* self = static_cast<TreeNodeType*>(n);
0176 self->join(ed.context);
0177 self->m_allocator.delete_object(self, ed);
0178 n = parent;
0179 }
0180
0181 static_cast<wait_node*>(n)->m_wait.release();
0182 }
0183
0184
0185
0186
0187 typedef unsigned char depth_t;
0188
0189
0190 template <typename T, depth_t MaxCapacity>
0191 class range_vector {
0192 depth_t my_head;
0193 depth_t my_tail;
0194 depth_t my_size;
0195 depth_t my_depth[MaxCapacity];
0196 tbb::detail::aligned_space<T, MaxCapacity> my_pool;
0197
0198 public:
0199
0200 range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) {
0201 my_depth[0] = 0;
0202 new( static_cast<void *>(my_pool.begin()) ) T(elem);
0203 }
0204 ~range_vector() {
0205 while( !empty() ) pop_back();
0206 }
0207 bool empty() const { return my_size == 0; }
0208 depth_t size() const { return my_size; }
0209
0210
0211 void split_to_fill(depth_t max_depth) {
0212 while( my_size < MaxCapacity && is_divisible(max_depth) ) {
0213 depth_t prev = my_head;
0214 my_head = (my_head + 1) % MaxCapacity;
0215 new(my_pool.begin()+my_head) T(my_pool.begin()[prev]);
0216 my_pool.begin()[prev].~T();
0217 new(my_pool.begin()+prev) T(my_pool.begin()[my_head], detail::split());
0218 my_depth[my_head] = ++my_depth[prev];
0219 my_size++;
0220 }
0221 }
0222 void pop_back() {
0223 __TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size");
0224 my_pool.begin()[my_head].~T();
0225 my_size--;
0226 my_head = (my_head + MaxCapacity - 1) % MaxCapacity;
0227 }
0228 void pop_front() {
0229 __TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty size");
0230 my_pool.begin()[my_tail].~T();
0231 my_size--;
0232 my_tail = (my_tail + 1) % MaxCapacity;
0233 }
0234 T& back() {
0235 __TBB_ASSERT(my_size > 0, "range_vector::back() with empty size");
0236 return my_pool.begin()[my_head];
0237 }
0238 T& front() {
0239 __TBB_ASSERT(my_size > 0, "range_vector::front() with empty size");
0240 return my_pool.begin()[my_tail];
0241 }
0242
0243 depth_t front_depth() {
0244 __TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty size");
0245 return my_depth[my_tail];
0246 }
0247 depth_t back_depth() {
0248 __TBB_ASSERT(my_size > 0, "range_vector::back_depth() with empty size");
0249 return my_depth[my_head];
0250 }
0251 bool is_divisible(depth_t max_depth) {
0252 return back_depth() < max_depth && back().is_divisible();
0253 }
0254 };
0255
0256
0257 template <typename Partition>
0258 struct partition_type_base {
0259 typedef detail::split split_type;
0260
0261 void note_affinity( slot_id ) {}
0262 template <typename Task>
0263 bool check_being_stolen(Task&, const execution_data&) { return false; }
0264 template <typename Range> split_type get_split() { return split(); }
0265 Partition& self() { return *static_cast<Partition*>(this); }
0266
0267 template<typename StartType, typename Range>
0268 void work_balance(StartType &start, Range &range, const execution_data&) {
0269 start.run_body( range );
0270 }
0271
0272 template<typename StartType, typename Range>
0273 void execute(StartType &start, Range &range, execution_data& ed) {
0274
0275
0276
0277
0278
0279
0280
0281 if ( range.is_divisible() ) {
0282 if ( self().is_divisible() ) {
0283 do {
0284 typename Partition::split_type split_obj = self().template get_split<Range>();
0285 start.offer_work( split_obj, ed );
0286 } while ( range.is_divisible() && self().is_divisible() );
0287 }
0288 }
0289 self().work_balance(start, range, ed);
0290 }
0291 };
0292
0293
0294 template <typename Partition>
0295 struct adaptive_mode : partition_type_base<Partition> {
0296 typedef Partition my_partition;
0297 std::size_t my_divisor;
0298
0299
0300
0301
0302 static const unsigned factor = 1;
0303 adaptive_mode() : my_divisor(get_initial_auto_partitioner_divisor() / 4 * my_partition::factor) {}
0304 adaptive_mode(adaptive_mode &src, split) : my_divisor(do_split(src, split())) {}
0305 adaptive_mode(adaptive_mode&, const proportional_split&) : my_divisor(0)
0306 {
0307
0308 }
0309
0310 std::size_t do_split(adaptive_mode &src, split) {
0311 return src.my_divisor /= 2u;
0312 }
0313 };
0314
0315
0316
0317 template <typename Partition>
0318 struct proportional_mode : adaptive_mode<Partition> {
0319 typedef Partition my_partition;
0320 using partition_type_base<Partition>::self;
0321
0322 proportional_mode() : adaptive_mode<Partition>() {}
0323 proportional_mode(proportional_mode &src, split) : adaptive_mode<Partition>(src, split()) {}
0324 proportional_mode(proportional_mode &src, const proportional_split& split_obj)
0325 : adaptive_mode<Partition>(src, split_obj)
0326 {
0327 self().my_divisor = do_split(src, split_obj);
0328 }
0329 std::size_t do_split(proportional_mode &src, const proportional_split& split_obj) {
0330 std::size_t portion = split_obj.right() * my_partition::factor;
0331 portion = (portion + my_partition::factor/2) & (0ul - my_partition::factor);
0332 src.my_divisor -= portion;
0333 return portion;
0334 }
0335 bool is_divisible() {
0336 return self().my_divisor > my_partition::factor;
0337 }
0338 template <typename Range>
0339 proportional_split get_split() {
0340
0341
0342
0343
0344
0345
0346 std::size_t n = self().my_divisor / my_partition::factor;
0347 std::size_t right = n / 2;
0348 std::size_t left = n - right;
0349 return proportional_split(left, right);
0350 }
0351 };
0352
0353 static std::size_t get_initial_partition_head() {
0354 int current_index = tbb::this_task_arena::current_thread_index();
0355 if (current_index == tbb::task_arena::not_initialized)
0356 current_index = 0;
0357 return size_t(current_index);
0358 }
0359
0360
0361 template <typename Partition>
0362 struct linear_affinity_mode : proportional_mode<Partition> {
0363 std::size_t my_head;
0364 std::size_t my_max_affinity;
0365 using proportional_mode<Partition>::self;
0366 linear_affinity_mode() : proportional_mode<Partition>(), my_head(get_initial_partition_head()),
0367 my_max_affinity(self().my_divisor) {}
0368 linear_affinity_mode(linear_affinity_mode &src, split) : proportional_mode<Partition>(src, split())
0369 , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {}
0370 linear_affinity_mode(linear_affinity_mode &src, const proportional_split& split_obj) : proportional_mode<Partition>(src, split_obj)
0371 , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {}
0372 void spawn_task(task& t, task_group_context& ctx) {
0373 if (self().my_divisor) {
0374 spawn(t, ctx, slot_id(my_head));
0375 } else {
0376 spawn(t, ctx);
0377 }
0378 }
0379 };
0380
0381 static bool is_stolen_task(const execution_data& ed) {
0382 return execution_slot(ed) != original_slot(ed);
0383 }
0384
0385
0386 template<class Mode>
0387 struct dynamic_grainsize_mode : Mode {
0388 using Mode::self;
0389 enum {
0390 begin = 0,
0391 run,
0392 pass
0393 } my_delay;
0394 depth_t my_max_depth;
0395 static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
0396 dynamic_grainsize_mode(): Mode()
0397 , my_delay(begin)
0398 , my_max_depth(__TBB_INIT_DEPTH) {}
0399 dynamic_grainsize_mode(dynamic_grainsize_mode& p, split)
0400 : Mode(p, split())
0401 , my_delay(pass)
0402 , my_max_depth(p.my_max_depth) {}
0403 dynamic_grainsize_mode(dynamic_grainsize_mode& p, const proportional_split& split_obj)
0404 : Mode(p, split_obj)
0405 , my_delay(begin)
0406 , my_max_depth(p.my_max_depth) {}
0407 template <typename Task>
0408 bool check_being_stolen(Task &t, const execution_data& ed) {
0409 if( !(self().my_divisor / Mode::my_partition::factor) ) {
0410 self().my_divisor = 1;
0411 if( is_stolen_task(ed) && t.my_parent->m_ref_count >= 2 ) {
0412 #if __TBB_USE_OPTIONAL_RTTI
0413
0414
0415
0416
0417
0418 #endif
0419 tree_node::mark_task_stolen(t);
0420 if( !my_max_depth ) my_max_depth++;
0421 my_max_depth += __TBB_DEMAND_DEPTH_ADD;
0422 return true;
0423 }
0424 }
0425 return false;
0426 }
0427 depth_t max_depth() { return my_max_depth; }
0428 void align_depth(depth_t base) {
0429 __TBB_ASSERT(base <= my_max_depth, nullptr);
0430 my_max_depth -= base;
0431 }
0432 template<typename StartType, typename Range>
0433 void work_balance(StartType &start, Range &range, execution_data& ed) {
0434 if( !range.is_divisible() || !self().max_depth() ) {
0435 start.run_body( range );
0436 }
0437 else {
0438 range_vector<Range, range_pool_size> range_pool(range);
0439 do {
0440 range_pool.split_to_fill(self().max_depth());
0441 if( self().check_for_demand( start ) ) {
0442 if( range_pool.size() > 1 ) {
0443 start.offer_work( range_pool.front(), range_pool.front_depth(), ed );
0444 range_pool.pop_front();
0445 continue;
0446 }
0447 if( range_pool.is_divisible(self().max_depth()) )
0448 continue;
0449 }
0450 start.run_body( range_pool.back() );
0451 range_pool.pop_back();
0452 } while( !range_pool.empty() && !ed.context->is_group_execution_cancelled() );
0453 }
0454 }
0455 template <typename Task>
0456 bool check_for_demand(Task& t) {
0457 if ( pass == my_delay ) {
0458 if ( self().my_divisor > 1 )
0459 return true;
0460 else if ( self().my_divisor && my_max_depth ) {
0461 self().my_divisor = 0;
0462 return true;
0463 }
0464 else if ( tree_node::is_peer_stolen(t) ) {
0465 my_max_depth += __TBB_DEMAND_DEPTH_ADD;
0466 return true;
0467 }
0468 } else if( begin == my_delay ) {
0469 my_delay = pass;
0470 }
0471 return false;
0472 }
0473 };
0474
0475 class auto_partition_type: public dynamic_grainsize_mode<adaptive_mode<auto_partition_type> > {
0476 public:
0477 auto_partition_type( const auto_partitioner& ) {
0478 my_divisor *= __TBB_INITIAL_CHUNKS;
0479 }
0480 auto_partition_type( auto_partition_type& src, split)
0481 : dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >(src, split()) {}
0482 bool is_divisible() {
0483 if( my_divisor > 1 ) return true;
0484 if( my_divisor && my_max_depth ) {
0485
0486 my_max_depth--;
0487 my_divisor = 0;
0488 return true;
0489 } else return false;
0490 }
0491 template <typename Task>
0492 bool check_for_demand(Task& t) {
0493 if (tree_node::is_peer_stolen(t)) {
0494 my_max_depth += __TBB_DEMAND_DEPTH_ADD;
0495 return true;
0496 } else return false;
0497 }
0498 void spawn_task(task& t, task_group_context& ctx) {
0499 spawn(t, ctx);
0500 }
0501 };
0502
0503 class simple_partition_type: public partition_type_base<simple_partition_type> {
0504 public:
0505 simple_partition_type( const simple_partitioner& ) {}
0506 simple_partition_type( const simple_partition_type&, split ) {}
0507
0508 template<typename StartType, typename Range>
0509 void execute(StartType &start, Range &range, execution_data& ed) {
0510 split_type split_obj = split();
0511 while( range.is_divisible() )
0512 start.offer_work( split_obj, ed );
0513 start.run_body( range );
0514 }
0515 void spawn_task(task& t, task_group_context& ctx) {
0516 spawn(t, ctx);
0517 }
0518 };
0519
0520 class static_partition_type : public linear_affinity_mode<static_partition_type> {
0521 public:
0522 typedef detail::proportional_split split_type;
0523 static_partition_type( const static_partitioner& ) {}
0524 static_partition_type( static_partition_type& p, const proportional_split& split_obj )
0525 : linear_affinity_mode<static_partition_type>(p, split_obj) {}
0526 };
0527
0528 class affinity_partition_type : public dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> > {
0529 static const unsigned factor_power = 4;
0530 slot_id* my_array;
0531 public:
0532 static const unsigned factor = 1 << factor_power;
0533 typedef detail::proportional_split split_type;
0534 affinity_partition_type( affinity_partitioner_base& ap ) {
0535 __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" );
0536 ap.resize(factor);
0537 my_array = ap.my_array;
0538 my_max_depth = factor_power + 1;
0539 __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, nullptr );
0540 }
0541 affinity_partition_type(affinity_partition_type& p, split)
0542 : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split())
0543 , my_array(p.my_array) {}
0544 affinity_partition_type(affinity_partition_type& p, const proportional_split& split_obj)
0545 : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split_obj)
0546 , my_array(p.my_array) {}
0547 void note_affinity(slot_id id) {
0548 if( my_divisor )
0549 my_array[my_head] = id;
0550 }
0551 void spawn_task(task& t, task_group_context& ctx) {
0552 if (my_divisor) {
0553 if (!my_array[my_head]) {
0554
0555 spawn(t, ctx, slot_id(my_head / factor));
0556 } else {
0557 spawn(t, ctx, my_array[my_head]);
0558 }
0559 } else {
0560 spawn(t, ctx);
0561 }
0562 }
0563 };
0564
0565
0566
0567
0568 class simple_partitioner {
0569 public:
0570 simple_partitioner() {}
0571 private:
0572 template<typename Range, typename Body, typename Partitioner> friend struct start_for;
0573 template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
0574 template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
0575 template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
0576
0577 typedef simple_partition_type task_partition_type;
0578
0579 typedef simple_partition_type::split_type split_type;
0580
0581
0582 class partition_type {
0583 public:
0584 bool should_execute_range(const execution_data& ) {return false;}
0585 partition_type( const simple_partitioner& ) {}
0586 partition_type( const partition_type&, split ) {}
0587 };
0588 };
0589
0590
0591
0592
0593
0594 class auto_partitioner {
0595 public:
0596 auto_partitioner() {}
0597
0598 private:
0599 template<typename Range, typename Body, typename Partitioner> friend struct start_for;
0600 template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
0601 template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
0602 template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
0603
0604 typedef auto_partition_type task_partition_type;
0605
0606 typedef auto_partition_type::split_type split_type;
0607
0608
0609 class partition_type {
0610 size_t num_chunks;
0611 static const size_t VICTIM_CHUNKS = 4;
0612 public:
0613 bool should_execute_range(const execution_data& ed) {
0614 if( num_chunks<VICTIM_CHUNKS && is_stolen_task(ed) )
0615 num_chunks = VICTIM_CHUNKS;
0616 return num_chunks==1;
0617 }
0618 partition_type( const auto_partitioner& )
0619 : num_chunks(get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
0620 partition_type( partition_type& pt, split ) {
0621 num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u;
0622 }
0623 };
0624 };
0625
0626
0627 class static_partitioner {
0628 public:
0629 static_partitioner() {}
0630 private:
0631 template<typename Range, typename Body, typename Partitioner> friend struct start_for;
0632 template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
0633 template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
0634 template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
0635
0636 typedef static_partition_type task_partition_type;
0637
0638 typedef static_partition_type::split_type split_type;
0639 };
0640
0641
0642 class affinity_partitioner : affinity_partitioner_base {
0643 public:
0644 affinity_partitioner() {}
0645
0646 private:
0647 template<typename Range, typename Body, typename Partitioner> friend struct start_for;
0648 template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
0649 template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
0650 template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
0651
0652 typedef affinity_partition_type task_partition_type;
0653
0654 typedef affinity_partition_type::split_type split_type;
0655 };
0656
0657 }
0658 }
0659
0660 inline namespace v1 {
0661
0662 using detail::d1::auto_partitioner;
0663 using detail::d1::simple_partitioner;
0664 using detail::d1::static_partitioner;
0665 using detail::d1::affinity_partitioner;
0666
0667 using detail::split;
0668 using detail::proportional_split;
0669 }
0670
0671 }
0672
0673 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
0674 #pragma warning (pop)
0675 #endif
0676
0677 #undef __TBB_INITIAL_CHUNKS
0678 #undef __TBB_RANGE_POOL_CAPACITY
0679 #undef __TBB_INIT_DEPTH
0680
0681 #endif