Back to home page

EIC code displayed by LXR

 
 

    


Warning, file /include/oneapi/tbb/parallel_for.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 /*
0002     Copyright (c) 2005-2023 Intel Corporation
0003 
0004     Licensed under the Apache License, Version 2.0 (the "License");
0005     you may not use this file except in compliance with the License.
0006     You may obtain a copy of the License at
0007 
0008         http://www.apache.org/licenses/LICENSE-2.0
0009 
0010     Unless required by applicable law or agreed to in writing, software
0011     distributed under the License is distributed on an "AS IS" BASIS,
0012     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013     See the License for the specific language governing permissions and
0014     limitations under the License.
0015 */
0016 
0017 #ifndef __TBB_parallel_for_H
0018 #define __TBB_parallel_for_H
0019 
0020 #include "detail/_config.h"
0021 #include "detail/_namespace_injection.h"
0022 #include "detail/_exception.h"
0023 #include "detail/_task.h"
0024 #include "detail/_small_object_pool.h"
0025 #include "profiling.h"
0026 
0027 #include "partitioner.h"
0028 #include "blocked_range.h"
0029 #include "task_group.h"
0030 
0031 #include <cstddef>
0032 #include <new>
0033 
0034 namespace tbb {
0035 namespace detail {
0036 #if __TBB_CPP20_CONCEPTS_PRESENT
0037 inline namespace d0 {
0038 
0039 template <typename Body, typename Range>
0040 concept parallel_for_body = std::copy_constructible<Body> && std::invocable<const std::remove_reference_t<Body>&, Range&>;
0041 
0042 template <typename Index>
0043 concept parallel_for_index = std::constructible_from<Index, int> &&
0044                              std::copyable<Index> &&
0045                              requires( const std::remove_reference_t<Index>& lhs, const std::remove_reference_t<Index>& rhs ) {
0046                                  { lhs < rhs } -> adaptive_same_as<bool>;
0047                                  { lhs - rhs } -> std::convertible_to<std::size_t>;
0048                                  { lhs + (rhs - lhs) } -> std::convertible_to<Index>;
0049                              };
0050 
0051 template <typename Function, typename Index>
0052 concept parallel_for_function = std::invocable<const std::remove_reference_t<Function>&, Index>;
0053 
0054 } // namespace d0
0055 #endif // __TBB_CPP20_CONCEPTS_PRESENT
0056 namespace d1 {
0057 
0058 //! Task type used in parallel_for
0059 /** @ingroup algorithms */
0060 template<typename Range, typename Body, typename Partitioner>
0061 struct start_for : public task {
0062     Range my_range;
0063     const Body my_body;
0064     node* my_parent;
0065 
0066     typename Partitioner::task_partition_type my_partition;
0067     small_object_allocator my_allocator;
0068 
0069     task* execute(execution_data&) override;
0070     task* cancel(execution_data&) override;
0071     void finalize(const execution_data&);
0072 
0073     //! Constructor for root task.
0074     start_for( const Range& range, const Body& body, Partitioner& partitioner, small_object_allocator& alloc ) :
0075         my_range(range),
0076         my_body(body),
0077         my_parent(nullptr),
0078         my_partition(partitioner),
0079         my_allocator(alloc) {}
0080     //! Splitting constructor used to generate children.
0081     /** parent_ becomes left child.  Newly constructed object is right child. */
0082     start_for( start_for& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) :
0083         my_range(parent_.my_range, get_range_split_object<Range>(split_obj)),
0084         my_body(parent_.my_body),
0085         my_parent(nullptr),
0086         my_partition(parent_.my_partition, split_obj),
0087         my_allocator(alloc) {}
0088     //! Construct right child from the given range as response to the demand.
0089     /** parent_ remains left child.  Newly constructed object is right child. */
0090     start_for( start_for& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) :
0091         my_range(r),
0092         my_body(parent_.my_body),
0093         my_parent(nullptr),
0094         my_partition(parent_.my_partition, split()),
0095         my_allocator(alloc)
0096     {
0097         my_partition.align_depth( d );
0098     }
0099     static void run(const Range& range, const Body& body, Partitioner& partitioner) {
0100         task_group_context context(PARALLEL_FOR);
0101         run(range, body, partitioner, context);
0102     }
0103 
0104     static void run(const Range& range, const Body& body, Partitioner& partitioner, task_group_context& context) {
0105         if ( !range.empty() ) {
0106             small_object_allocator alloc{};
0107             start_for& for_task = *alloc.new_object<start_for>(range, body, partitioner, alloc);
0108 
0109             // defer creation of the wait node until task allocation succeeds
0110             wait_node wn;
0111             for_task.my_parent = &wn;
0112             execute_and_wait(for_task, context, wn.m_wait, context);
0113         }
0114     }
0115     //! Run body for range, serves as callback for partitioner
0116     void run_body( Range &r ) {
0117         tbb::detail::invoke(my_body, r);
0118     }
0119 
0120     //! spawn right task, serves as callback for partitioner
0121     void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) {
0122        offer_work_impl(ed, *this, split_obj);
0123     }
0124 
0125     //! spawn right task, serves as callback for partitioner
0126     void offer_work(const Range& r, depth_t d, execution_data& ed) {
0127         offer_work_impl(ed, *this, r, d);
0128     }
0129 
0130 private:
0131     template <typename... Args>
0132     void offer_work_impl(execution_data& ed, Args&&... constructor_args) {
0133         // New right child
0134         small_object_allocator alloc{};
0135         start_for& right_child = *alloc.new_object<start_for>(ed, std::forward<Args>(constructor_args)..., alloc);
0136 
0137         // New root node as a continuation and ref count. Left and right child attach to the new parent.
0138         right_child.my_parent = my_parent = alloc.new_object<tree_node>(ed, my_parent, 2, alloc);
0139         // Spawn the right sibling
0140         right_child.spawn_self(ed);
0141     }
0142 
0143     void spawn_self(execution_data& ed) {
0144         my_partition.spawn_task(*this, *context(ed));
0145     }
0146 };
0147 
0148 //! fold the tree and deallocate the task
0149 template<typename Range, typename Body, typename Partitioner>
0150 void start_for<Range, Body, Partitioner>::finalize(const execution_data& ed) {
0151     // Get the current parent and allocator an object destruction
0152     node* parent = my_parent;
0153     auto allocator = my_allocator;
0154     // Task execution finished - destroy it
0155     this->~start_for();
0156     // Unwind the tree decrementing the parent`s reference count
0157 
0158     fold_tree<tree_node>(parent, ed);
0159     allocator.deallocate(this, ed);
0160 
0161 }
0162 
0163 //! execute task for parallel_for
0164 template<typename Range, typename Body, typename Partitioner>
0165 task* start_for<Range, Body, Partitioner>::execute(execution_data& ed) {
0166     if (!is_same_affinity(ed)) {
0167         my_partition.note_affinity(execution_slot(ed));
0168     }
0169     my_partition.check_being_stolen(*this, ed);
0170     my_partition.execute(*this, my_range, ed);
0171     finalize(ed);
0172     return nullptr;
0173 }
0174 
0175 //! cancel task for parallel_for
0176 template<typename Range, typename Body, typename Partitioner>
0177 task* start_for<Range, Body, Partitioner>::cancel(execution_data& ed) {
0178     finalize(ed);
0179     return nullptr;
0180 }
0181 
0182 //! Calls the function with values from range [begin, end) with a step provided
0183 template<typename Function, typename Index>
0184 class parallel_for_body_wrapper : detail::no_assign {
0185     const Function &my_func;
0186     const Index my_begin;
0187     const Index my_step;
0188 public:
0189     parallel_for_body_wrapper( const Function& _func, Index& _begin, Index& _step )
0190         : my_func(_func), my_begin(_begin), my_step(_step) {}
0191 
0192     void operator()( const blocked_range<Index>& r ) const {
0193         // A set of local variables to help the compiler with vectorization of the following loop.
0194         Index b = r.begin();
0195         Index e = r.end();
0196         Index ms = my_step;
0197         Index k = my_begin + b*ms;
0198 
0199 #if __INTEL_COMPILER
0200 #pragma ivdep
0201 #if __TBB_ASSERT_ON_VECTORIZATION_FAILURE
0202 #pragma vector always assert
0203 #endif
0204 #endif
0205         for ( Index i = b; i < e; ++i, k += ms ) {
0206             tbb::detail::invoke(my_func, k);
0207         }
0208     }
0209 };
0210 
0211 // Requirements on Range concept are documented in blocked_range.h
0212 
0213 /** \page parallel_for_body_req Requirements on parallel_for body
0214     Class \c Body implementing the concept of parallel_for body must define:
0215     - \code Body::Body( const Body& ); \endcode                 Copy constructor
0216     - \code Body::~Body(); \endcode                             Destructor
0217     - \code void Body::operator()( Range& r ) const; \endcode   Function call operator applying the body to range \c r.
0218 **/
0219 
0220 /** \name parallel_for
0221     See also requirements on \ref range_req "Range" and \ref parallel_for_body_req "parallel_for Body". **/
0222 //@{
0223 
0224 //! Parallel iteration over range with default partitioner.
0225 /** @ingroup algorithms **/
0226 template<typename Range, typename Body>
0227     __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
0228 void parallel_for( const Range& range, const Body& body ) {
0229     start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
0230 }
0231 
0232 //! Parallel iteration over range with simple partitioner.
0233 /** @ingroup algorithms **/
0234 template<typename Range, typename Body>
0235     __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
0236 void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) {
0237     start_for<Range,Body,const simple_partitioner>::run(range,body,partitioner);
0238 }
0239 
0240 //! Parallel iteration over range with auto_partitioner.
0241 /** @ingroup algorithms **/
0242 template<typename Range, typename Body>
0243     __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
0244 void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) {
0245     start_for<Range,Body,const auto_partitioner>::run(range,body,partitioner);
0246 }
0247 
0248 //! Parallel iteration over range with static_partitioner.
0249 /** @ingroup algorithms **/
0250 template<typename Range, typename Body>
0251     __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
0252 void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) {
0253     start_for<Range,Body,const static_partitioner>::run(range,body,partitioner);
0254 }
0255 
0256 //! Parallel iteration over range with affinity_partitioner.
0257 /** @ingroup algorithms **/
0258 template<typename Range, typename Body>
0259     __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
0260 void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) {
0261     start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner);
0262 }
0263 
0264 //! Parallel iteration over range with default partitioner and user-supplied context.
0265 /** @ingroup algorithms **/
0266 template<typename Range, typename Body>
0267     __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
0268 void parallel_for( const Range& range, const Body& body, task_group_context& context ) {
0269     start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range, body, __TBB_DEFAULT_PARTITIONER(), context);
0270 }
0271 
0272 //! Parallel iteration over range with simple partitioner and user-supplied context.
0273 /** @ingroup algorithms **/
0274 template<typename Range, typename Body>
0275     __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
0276 void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
0277     start_for<Range,Body,const simple_partitioner>::run(range, body, partitioner, context);
0278 }
0279 
0280 //! Parallel iteration over range with auto_partitioner and user-supplied context.
0281 /** @ingroup algorithms **/
0282 template<typename Range, typename Body>
0283     __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
0284 void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
0285     start_for<Range,Body,const auto_partitioner>::run(range, body, partitioner, context);
0286 }
0287 
0288 //! Parallel iteration over range with static_partitioner and user-supplied context.
0289 /** @ingroup algorithms **/
0290 template<typename Range, typename Body>
0291     __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
0292 void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner, task_group_context& context ) {
0293     start_for<Range,Body,const static_partitioner>::run(range, body, partitioner, context);
0294 }
0295 
0296 //! Parallel iteration over range with affinity_partitioner and user-supplied context.
0297 /** @ingroup algorithms **/
0298 template<typename Range, typename Body>
0299     __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>)
0300 void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
0301     start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context);
0302 }
0303 
0304 //! Implementation of parallel iteration over stepped range of integers with explicit step and partitioner
0305 template <typename Index, typename Function, typename Partitioner>
0306 void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) {
0307     if (step <= 0 )
0308         throw_exception(exception_id::nonpositive_step); // throws std::invalid_argument
0309     else if (first < last) {
0310         // Above "else" avoids "potential divide by zero" warning on some platforms
0311         Index end = Index(last - first - 1ul) / step + Index(1);
0312         blocked_range<Index> range(static_cast<Index>(0), end);
0313         parallel_for_body_wrapper<Function, Index> body(f, first, step);
0314         parallel_for(range, body, partitioner);
0315     }
0316 }
0317 
0318 //! Parallel iteration over a range of integers with a step provided and default partitioner
0319 template <typename Index, typename Function>
0320     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0321 void parallel_for(Index first, Index last, Index step, const Function& f) {
0322     parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner());
0323 }
0324 //! Parallel iteration over a range of integers with a step provided and simple partitioner
0325 template <typename Index, typename Function>
0326     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0327 void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) {
0328     parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner);
0329 }
0330 //! Parallel iteration over a range of integers with a step provided and auto partitioner
0331 template <typename Index, typename Function>
0332     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0333 void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) {
0334     parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner);
0335 }
0336 //! Parallel iteration over a range of integers with a step provided and static partitioner
0337 template <typename Index, typename Function>
0338     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0339 void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner) {
0340     parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner);
0341 }
0342 //! Parallel iteration over a range of integers with a step provided and affinity partitioner
0343 template <typename Index, typename Function>
0344     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0345 void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) {
0346     parallel_for_impl(first, last, step, f, partitioner);
0347 }
0348 
0349 //! Parallel iteration over a range of integers with a default step value and default partitioner
0350 template <typename Index, typename Function>
0351     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0352 void parallel_for(Index first, Index last, const Function& f) {
0353     parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner());
0354 }
0355 //! Parallel iteration over a range of integers with a default step value and simple partitioner
0356 template <typename Index, typename Function>
0357     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0358 void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) {
0359     parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
0360 }
0361 //! Parallel iteration over a range of integers with a default step value and auto partitioner
0362 template <typename Index, typename Function>
0363     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0364 void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) {
0365     parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
0366 }
0367 //! Parallel iteration over a range of integers with a default step value and static partitioner
0368 template <typename Index, typename Function>
0369     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0370 void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner) {
0371     parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
0372 }
0373 //! Parallel iteration over a range of integers with a default step value and affinity partitioner
0374 template <typename Index, typename Function>
0375     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0376 void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) {
0377     parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner);
0378 }
0379 
0380 //! Implementation of parallel iteration over stepped range of integers with explicit step, task group context, and partitioner
0381 template <typename Index, typename Function, typename Partitioner>
0382 void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, task_group_context &context) {
0383     if (step <= 0 )
0384         throw_exception(exception_id::nonpositive_step); // throws std::invalid_argument
0385     else if (first < last) {
0386         // Above "else" avoids "potential divide by zero" warning on some platforms
0387         Index end = (last - first - Index(1)) / step + Index(1);
0388         blocked_range<Index> range(static_cast<Index>(0), end);
0389         parallel_for_body_wrapper<Function, Index> body(f, first, step);
0390         parallel_for(range, body, partitioner, context);
0391     }
0392 }
0393 
0394 //! Parallel iteration over a range of integers with explicit step, task group context, and default partitioner
0395 template <typename Index, typename Function>
0396     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0397 void parallel_for(Index first, Index last, Index step, const Function& f, task_group_context &context) {
0398     parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context);
0399 }
0400 //! Parallel iteration over a range of integers with explicit step, task group context, and simple partitioner
0401 template <typename Index, typename Function>
0402     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0403 void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, task_group_context &context) {
0404     parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner, context);
0405 }
0406 //! Parallel iteration over a range of integers with explicit step, task group context, and auto partitioner
0407 template <typename Index, typename Function>
0408     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0409 void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, task_group_context &context) {
0410     parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner, context);
0411 }
0412 //! Parallel iteration over a range of integers with explicit step, task group context, and static partitioner
0413 template <typename Index, typename Function>
0414     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0415 void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner, task_group_context &context) {
0416     parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner, context);
0417 }
0418 //! Parallel iteration over a range of integers with explicit step, task group context, and affinity partitioner
0419 template <typename Index, typename Function>
0420     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0421 void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, task_group_context &context) {
0422     parallel_for_impl(first, last, step, f, partitioner, context);
0423 }
0424 
0425 //! Parallel iteration over a range of integers with a default step value, explicit task group context, and default partitioner
0426 template <typename Index, typename Function>
0427     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0428 void parallel_for(Index first, Index last, const Function& f, task_group_context &context) {
0429     parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context);
0430 }
0431 //! Parallel iteration over a range of integers with a default step value, explicit task group context, and simple partitioner
0432 template <typename Index, typename Function>
0433     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0434 void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, task_group_context &context) {
0435     parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
0436 }
0437 //! Parallel iteration over a range of integers with a default step value, explicit task group context, and auto partitioner
0438 template <typename Index, typename Function>
0439     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0440 void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, task_group_context &context) {
0441     parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
0442 }
0443 //! Parallel iteration over a range of integers with a default step value, explicit task group context, and static partitioner
0444 template <typename Index, typename Function>
0445     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0446 void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner, task_group_context &context) {
0447     parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
0448 }
0449 //! Parallel iteration over a range of integers with a default step value, explicit task group context, and affinity_partitioner
0450 template <typename Index, typename Function>
0451     __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>)
0452 void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, task_group_context &context) {
0453     parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner, context);
0454 }
0455 // @}
0456 
0457 } // namespace d1
0458 } // namespace detail
0459 
0460 inline namespace v1 {
0461 using detail::d1::parallel_for;
0462 // Split types
0463 using detail::split;
0464 using detail::proportional_split;
0465 } // namespace v1
0466 
0467 } // namespace tbb
0468 
0469 #endif /* __TBB_parallel_for_H */