Warning, file /include/oneapi/tbb/parallel_for.h was not indexed
or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 #ifndef __TBB_parallel_for_H
0018 #define __TBB_parallel_for_H
0019
0020 #include "detail/_config.h"
0021 #include "detail/_namespace_injection.h"
0022 #include "detail/_exception.h"
0023 #include "detail/_task.h"
0024 #include "detail/_small_object_pool.h"
0025 #include "profiling.h"
0026
0027 #include "partitioner.h"
0028 #include "blocked_range.h"
0029 #include "task_group.h"
0030
0031 #include <cstddef>
0032 #include <new>
0033
0034 namespace tbb {
0035 namespace detail {
0036 #if __TBB_CPP20_CONCEPTS_PRESENT
0037 inline namespace d0 {
0038
0039 template <typename Body, typename Range>
0040 concept parallel_for_body = std::copy_constructible<Body> && std::invocable<const std::remove_reference_t<Body>&, Range&>;
0041
0042 template <typename Index>
0043 concept parallel_for_index = std::constructible_from<Index, int> &&
0044 std::copyable<Index> &&
0045 requires( const std::remove_reference_t<Index>& lhs, const std::remove_reference_t<Index>& rhs ) {
0046 { lhs < rhs } -> adaptive_same_as<bool>;
0047 { lhs - rhs } -> std::convertible_to<std::size_t>;
0048 { lhs + (rhs - lhs) } -> std::convertible_to<Index>;
0049 };
0050
0051 template <typename Function, typename Index>
0052 concept parallel_for_function = std::invocable<const std::remove_reference_t<Function>&, Index>;
0053
0054 }
0055 #endif
0056 namespace d1 {
0057
0058
0059
0060 template<typename Range, typename Body, typename Partitioner>
0061 struct start_for : public task {
0062 Range my_range;
0063 const Body my_body;
0064 node* my_parent;
0065
0066 typename Partitioner::task_partition_type my_partition;
0067 small_object_allocator my_allocator;
0068
0069 task* execute(execution_data&) override;
0070 task* cancel(execution_data&) override;
0071 void finalize(const execution_data&);
0072
0073
0074 start_for( const Range& range, const Body& body, Partitioner& partitioner, small_object_allocator& alloc ) :
0075 my_range(range),
0076 my_body(body),
0077 my_parent(nullptr),
0078 my_partition(partitioner),
0079 my_allocator(alloc) {}
0080
0081
0082 start_for( start_for& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) :
0083 my_range(parent_.my_range, get_range_split_object<Range>(split_obj)),
0084 my_body(parent_.my_body),
0085 my_parent(nullptr),
0086 my_partition(parent_.my_partition, split_obj),
0087 my_allocator(alloc) {}
0088
0089
0090 start_for( start_for& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) :
0091 my_range(r),
0092 my_body(parent_.my_body),
0093 my_parent(nullptr),
0094 my_partition(parent_.my_partition, split()),
0095 my_allocator(alloc)
0096 {
0097 my_partition.align_depth( d );
0098 }
0099 static void run(const Range& range, const Body& body, Partitioner& partitioner) {
0100 task_group_context context(PARALLEL_FOR);
0101 run(range, body, partitioner, context);
0102 }
0103
0104 static void run(const Range& range, const Body& body, Partitioner& partitioner, task_group_context& context) {
0105 if ( !range.empty() ) {
0106 small_object_allocator alloc{};
0107 start_for& for_task = *alloc.new_object<start_for>(range, body, partitioner, alloc);
0108
0109
0110 wait_node wn;
0111 for_task.my_parent = &wn;
0112 execute_and_wait(for_task, context, wn.m_wait, context);
0113 }
0114 }
0115
0116 void run_body( Range &r ) {
0117 tbb::detail::invoke(my_body, r);
0118 }
0119
0120
0121 void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) {
0122 offer_work_impl(ed, *this, split_obj);
0123 }
0124
0125
0126 void offer_work(const Range& r, depth_t d, execution_data& ed) {
0127 offer_work_impl(ed, *this, r, d);
0128 }
0129
0130 private:
0131 template <typename... Args>
0132 void offer_work_impl(execution_data& ed, Args&&... constructor_args) {
0133
0134 small_object_allocator alloc{};
0135 start_for& right_child = *alloc.new_object<start_for>(ed, std::forward<Args>(constructor_args)..., alloc);
0136
0137
0138 right_child.my_parent = my_parent = alloc.new_object<tree_node>(ed, my_parent, 2, alloc);
0139
0140 right_child.spawn_self(ed);
0141 }
0142
0143 void spawn_self(execution_data& ed) {
0144 my_partition.spawn_task(*this, *context(ed));
0145 }
0146 };
0147
0148
0149 template<typename Range, typename Body, typename Partitioner>
0150 void start_for<Range, Body, Partitioner>::finalize(const execution_data& ed) {
0151
0152 node* parent = my_parent;
0153 auto allocator = my_allocator;
0154
0155 this->~start_for();
0156
0157
0158 fold_tree<tree_node>(parent, ed);
0159 allocator.deallocate(this, ed);
0160
0161 }
0162
0163
0164 template<typename Range, typename Body, typename Partitioner>
0165 task* start_for<Range, Body, Partitioner>::execute(execution_data& ed) {
0166 if (!is_same_affinity(ed)) {
0167 my_partition.note_affinity(execution_slot(ed));
0168 }
0169 my_partition.check_being_stolen(*this, ed);
0170 my_partition.execute(*this, my_range, ed);
0171 finalize(ed);
0172 return nullptr;
0173 }
0174
0175
0176 template<typename Range, typename Body, typename Partitioner>
0177 task* start_for<Range, Body, Partitioner>::cancel(execution_data& ed) {
0178 finalize(ed);
0179 return nullptr;
0180 }
0181
0182
0183 template<typename Function, typename Index>
0184 class parallel_for_body_wrapper : detail::no_assign {
0185 const Function &my_func;
0186 const Index my_begin;
0187 const Index my_step;
0188 public:
0189 parallel_for_body_wrapper( const Function& _func, Index& _begin, Index& _step )
0190 : my_func(_func), my_begin(_begin), my_step(_step) {}
0191
0192 void operator()( const blocked_range<Index>& r ) const {
0193
0194 Index b = r.begin();
0195 Index e = r.end();
0196 Index ms = my_step;
0197 Index k = my_begin + b*ms;
0198
0199 #if __INTEL_COMPILER
0200 #pragma ivdep
0201 #if __TBB_ASSERT_ON_VECTORIZATION_FAILURE
0202 #pragma vector always assert
0203 #endif
0204 #endif
0205 for ( Index i = b; i < e; ++i, k += ms ) {
0206 tbb::detail::invoke(my_func, k);
0207 }
0208 }
0209 };
0210
0211
0212
0213
0214
0215
0216
0217
0218
0219
0220
0221
0222
0223
0224
0225
0226 template<typename Range, typename Body>
0227 __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
0228 void parallel_for( const Range& range, const Body& body ) {
0229 start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
0230 }
0231
0232
0233
0234 template<typename Range, typename Body>
0235 __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
0236 void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) {
0237 start_for<Range,Body,const simple_partitioner>::run(range,body,partitioner);
0238 }
0239
0240
0241
0242 template<typename Range, typename Body>
0243 __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
0244 void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) {
0245 start_for<Range,Body,const auto_partitioner>::run(range,body,partitioner);
0246 }
0247
0248
0249
0250 template<typename Range, typename Body>
0251 __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
0252 void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) {
0253 start_for<Range,Body,const static_partitioner>::run(range,body,partitioner);
0254 }
0255
0256
0257
0258 template<typename Range, typename Body>
0259 __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
0260 void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) {
0261 start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner);
0262 }
0263
0264
0265
0266 template<typename Range, typename Body>
0267 __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
0268 void parallel_for( const Range& range, const Body& body, task_group_context& context ) {
0269 start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range, body, __TBB_DEFAULT_PARTITIONER(), context);
0270 }
0271
0272
0273
0274 template<typename Range, typename Body>
0275 __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
0276 void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
0277 start_for<Range,Body,const simple_partitioner>::run(range, body, partitioner, context);
0278 }
0279
0280
0281
0282 template<typename Range, typename Body>
0283 __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
0284 void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
0285 start_for<Range,Body,const auto_partitioner>::run(range, body, partitioner, context);
0286 }
0287
0288
0289
0290 template<typename Range, typename Body>
0291 __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
0292 void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner, task_group_context& context ) {
0293 start_for<Range,Body,const static_partitioner>::run(range, body, partitioner, context);
0294 }
0295
0296
0297
0298 template<typename Range, typename Body>
0299 __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
0300 void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
0301 start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context);
0302 }
0303
0304
0305 template <typename Index, typename Function, typename Partitioner>
0306 void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) {
0307 if (step <= 0 )
0308 throw_exception(exception_id::nonpositive_step);
0309 else if (first < last) {
0310
0311 Index end = Index(last - first - 1ul) / step + Index(1);
0312 blocked_range<Index> range(static_cast<Index>(0), end);
0313 parallel_for_body_wrapper<Function, Index> body(f, first, step);
0314 parallel_for(range, body, partitioner);
0315 }
0316 }
0317
0318
0319 template <typename Index, typename Function>
0320 __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0321 void parallel_for(Index first, Index last, Index step, const Function& f) {
0322 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner());
0323 }
0324
0325 template <typename Index, typename Function>
0326 __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0327 void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) {
0328 parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner);
0329 }
0330
0331 template <typename Index, typename Function>
0332 __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0333 void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) {
0334 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner);
0335 }
0336
0337 template <typename Index, typename Function>
0338 __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0339 void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner) {
0340 parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner);
0341 }
0342
0343 template <typename Index, typename Function>
0344 __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0345 void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) {
0346 parallel_for_impl(first, last, step, f, partitioner);
0347 }
0348
0349
0350 template <typename Index, typename Function>
0351 __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0352 void parallel_for(Index first, Index last, const Function& f) {
0353 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner());
0354 }
0355
0356 template <typename Index, typename Function>
0357 __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0358 void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) {
0359 parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
0360 }
0361
0362 template <typename Index, typename Function>
0363 __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0364 void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) {
0365 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
0366 }
0367
0368 template <typename Index, typename Function>
0369 __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0370 void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner) {
0371 parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
0372 }
0373
0374 template <typename Index, typename Function>
0375 __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0376 void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) {
0377 parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner);
0378 }
0379
0380
0381 template <typename Index, typename Function, typename Partitioner>
0382 void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, task_group_context &context) {
0383 if (step <= 0 )
0384 throw_exception(exception_id::nonpositive_step);
0385 else if (first < last) {
0386
0387 Index end = (last - first - Index(1)) / step + Index(1);
0388 blocked_range<Index> range(static_cast<Index>(0), end);
0389 parallel_for_body_wrapper<Function, Index> body(f, first, step);
0390 parallel_for(range, body, partitioner, context);
0391 }
0392 }
0393
0394
0395 template <typename Index, typename Function>
0396 __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0397 void parallel_for(Index first, Index last, Index step, const Function& f, task_group_context &context) {
0398 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context);
0399 }
0400
0401 template <typename Index, typename Function>
0402 __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0403 void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, task_group_context &context) {
0404 parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner, context);
0405 }
0406
0407 template <typename Index, typename Function>
0408 __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0409 void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, task_group_context &context) {
0410 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner, context);
0411 }
0412
0413 template <typename Index, typename Function>
0414 __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0415 void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner, task_group_context &context) {
0416 parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner, context);
0417 }
0418
0419 template <typename Index, typename Function>
0420 __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0421 void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, task_group_context &context) {
0422 parallel_for_impl(first, last, step, f, partitioner, context);
0423 }
0424
0425
0426 template <typename Index, typename Function>
0427 __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0428 void parallel_for(Index first, Index last, const Function& f, task_group_context &context) {
0429 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context);
0430 }
0431
0432 template <typename Index, typename Function>
0433 __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0434 void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, task_group_context &context) {
0435 parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
0436 }
0437
0438 template <typename Index, typename Function>
0439 __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0440 void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, task_group_context &context) {
0441 parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
0442 }
0443
0444 template <typename Index, typename Function>
0445 __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0446 void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner, task_group_context &context) {
0447 parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
0448 }
0449
0450 template <typename Index, typename Function>
0451 __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0452 void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, task_group_context &context) {
0453 parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner, context);
0454 }
0455
0456
0457 }
0458 }
0459
0460 inline namespace v1 {
0461 using detail::d1::parallel_for;
0462
0463 using detail::split;
0464 using detail::proportional_split;
0465 }
0466
0467 }
0468
0469 #endif