oneapi/tbb/parallel_invoke.h

0001 /*
0002     Copyright (c) 2005-2023 Intel Corporation
0003
0004     Licensed under the Apache License, Version 2.0 (the "License");
0005     you may not use this file except in compliance with the License.
0006     You may obtain a copy of the License at
0007
0008         http://www.apache.org/licenses/LICENSE-2.0
0009
0010     Unless required by applicable law or agreed to in writing, software
0011     distributed under the License is distributed on an "AS IS" BASIS,
0012     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013     See the License for the specific language governing permissions and
0014     limitations under the License.
0015 */
0016
0017 #ifndef __TBB_parallel_invoke_H
0018 #define __TBB_parallel_invoke_H
0019
0020 #include "detail/_config.h"
0021 #include "detail/_namespace_injection.h"
0022 #include "detail/_exception.h"
0023 #include "detail/_task.h"
0024 #include "detail/_template_helpers.h"
0025 #include "detail/_small_object_pool.h"
0026
0027 #include "task_group.h"
0028
0029 #include <tuple>
0030 #include <atomic>
0031 #include <utility>
0032
0033 namespace tbb {
0034 namespace detail {
0035 namespace d1 {
0036
0037 //! Simple task object, executing user method
0038 template<typename Function, typename WaitObject>
0039 struct function_invoker : public task {
0040     function_invoker(const Function& function, WaitObject& wait_ctx) :
0041         my_function(function),
0042         parent_wait_ctx(wait_ctx)
0043     {}
0044
0045     task* execute(execution_data& ed) override {
0046         my_function();
0047         parent_wait_ctx.release(ed);
0048         call_itt_task_notify(destroy, this);
0049         return nullptr;
0050     }
0051
0052     task* cancel(execution_data& ed) override {
0053         parent_wait_ctx.release(ed);
0054         return nullptr;
0055     }
0056
0057     const Function& my_function;
0058     WaitObject& parent_wait_ctx;
0059 }; // struct function_invoker
0060
0061 //! Task object for managing subroots in trinary task trees.
0062 // Endowed with additional synchronization logic (compatible with wait object interfaces) to support
0063 // continuation passing execution. This task spawns 2 function_invoker tasks with first and second functors
0064 // and then executes first functor by itself. But only the last executed functor must destruct and deallocate
0065 // the subroot task.
0066 template<typename F1, typename F2, typename F3>
0067 struct invoke_subroot_task : public task {
0068     wait_context& root_wait_ctx;
0069     std::atomic<unsigned> ref_count{0};
0070     bool child_spawned = false;
0071
0072     const F1& self_invoked_functor;
0073     function_invoker<F2, invoke_subroot_task<F1, F2, F3>> f2_invoker;
0074     function_invoker<F3, invoke_subroot_task<F1, F2, F3>> f3_invoker;
0075
0076     task_group_context& my_execution_context;
0077     small_object_allocator my_allocator;
0078
0079     invoke_subroot_task(const F1& f1, const F2& f2, const F3& f3, wait_context& wait_ctx, task_group_context& context,
0080                  small_object_allocator& alloc) :
0081         root_wait_ctx(wait_ctx),
0082         self_invoked_functor(f1),
0083         f2_invoker(f2, *this),
0084         f3_invoker(f3, *this),
0085         my_execution_context(context),
0086         my_allocator(alloc)
0087     {
0088         root_wait_ctx.reserve();
0089     }
0090
0091     void finalize(const execution_data& ed) {
0092         root_wait_ctx.release();
0093
0094         my_allocator.delete_object(this, ed);
0095     }
0096
0097     void release(const execution_data& ed) {
0098         __TBB_ASSERT(ref_count > 0, nullptr);
0099         call_itt_task_notify(releasing, this);
0100         if( --ref_count == 0 ) {
0101             call_itt_task_notify(acquired, this);
0102             finalize(ed);
0103         }
0104     }
0105
0106     task* execute(execution_data& ed) override {
0107         ref_count.fetch_add(3, std::memory_order_relaxed);
0108         spawn(f3_invoker, my_execution_context);
0109         spawn(f2_invoker, my_execution_context);
0110         self_invoked_functor();
0111
0112         release(ed);
0113         return nullptr;
0114     }
0115
0116     task* cancel(execution_data& ed) override {
0117         if( ref_count > 0 ) { // detect children spawn
0118             release(ed);
0119         } else {
0120             finalize(ed);
0121         }
0122         return nullptr;
0123     }
0124 }; // struct subroot_task
0125
0126 class invoke_root_task {
0127 public:
0128     invoke_root_task(wait_context& wc) : my_wait_context(wc) {}
0129     void release(const execution_data&) {
0130         my_wait_context.release();
0131     }
0132 private:
0133     wait_context& my_wait_context;
0134 };
0135
0136 template<typename F1>
0137 void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1) {
0138     root_wait_ctx.reserve(1);
0139     invoke_root_task root(root_wait_ctx);
0140     function_invoker<F1, invoke_root_task> invoker1(f1, root);
0141
0142     execute_and_wait(invoker1, context, root_wait_ctx, context);
0143 }
0144
0145 template<typename F1, typename F2>
0146 void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1, const F2& f2) {
0147     root_wait_ctx.reserve(2);
0148     invoke_root_task root(root_wait_ctx);
0149     function_invoker<F1, invoke_root_task> invoker1(f1, root);
0150     function_invoker<F2, invoke_root_task> invoker2(f2, root);
0151
0152     spawn(invoker1, context);
0153     execute_and_wait(invoker2, context, root_wait_ctx, context);
0154 }
0155
0156 template<typename F1, typename F2, typename F3>
0157 void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1, const F2& f2, const F3& f3) {
0158     root_wait_ctx.reserve(3);
0159     invoke_root_task root(root_wait_ctx);
0160     function_invoker<F1, invoke_root_task> invoker1(f1, root);
0161     function_invoker<F2, invoke_root_task> invoker2(f2, root);
0162     function_invoker<F3, invoke_root_task> invoker3(f3, root);
0163
0164     //TODO: implement sub root for two tasks (measure performance)
0165     spawn(invoker1, context);
0166     spawn(invoker2, context);
0167     execute_and_wait(invoker3, context, root_wait_ctx, context);
0168 }
0169
0170 template<typename F1, typename F2, typename F3, typename... Fs>
0171 void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context,
0172                                  const F1& f1, const F2& f2, const F3& f3, const Fs&... fs) {
0173     small_object_allocator alloc{};
0174     auto sub_root = alloc.new_object<invoke_subroot_task<F1, F2, F3>>(f1, f2, f3, root_wait_ctx, context, alloc);
0175     spawn(*sub_root, context);
0176
0177     invoke_recursive_separation(root_wait_ctx, context, fs...);
0178 }
0179
0180 template<typename... Fs>
0181 void parallel_invoke_impl(task_group_context& context, const Fs&... fs) {
0182     static_assert(sizeof...(Fs) >= 2, "Parallel invoke may be called with at least two callable");
0183     wait_context root_wait_ctx{0};
0184
0185     invoke_recursive_separation(root_wait_ctx, context, fs...);
0186 }
0187
0188 template<typename F1, typename... Fs>
0189 void parallel_invoke_impl(const F1& f1, const Fs&... fs) {
0190     static_assert(sizeof...(Fs) >= 1, "Parallel invoke may be called with at least two callable");
0191     task_group_context context(PARALLEL_INVOKE);
0192     wait_context root_wait_ctx{0};
0193
0194     invoke_recursive_separation(root_wait_ctx, context, fs..., f1);
0195 }
0196
0197 //! Passes last argument of variadic pack as first for handling user provided task_group_context
0198 template <typename Tuple, typename... Fs>
0199 struct invoke_helper;
0200
0201 template <typename... Args, typename T, typename... Fs>
0202 struct invoke_helper<std::tuple<Args...>, T, Fs...> : invoke_helper<std::tuple<Args..., T>, Fs...> {};
0203
0204 template <typename... Fs, typename T/*task_group_context or callable*/>
0205 struct invoke_helper<std::tuple<Fs...>, T> {
0206     void operator()(Fs&&... args, T&& t) {
0207         parallel_invoke_impl(std::forward<T>(t), std::forward<Fs>(args)...);
0208     }
0209 };
0210
0211 //! Parallel execution of several function objects
0212 // We need to pass parameter pack through forwarding reference,
0213 // since this pack may contain task_group_context that must be passed via lvalue non-const reference
0214 template<typename... Fs>
0215 void parallel_invoke(Fs&&... fs) {
0216     invoke_helper<std::tuple<>, Fs...>()(std::forward<Fs>(fs)...);
0217 }
0218
0219 } // namespace d1
0220 } // namespace detail
0221
0222 inline namespace v1 {
0223 using detail::d1::parallel_invoke;
0224 } // namespace v1
0225
0226 } // namespace tbb
0227 #endif /* __TBB_parallel_invoke_H */