|
|
|||
File indexing completed on 2026-05-08 08:37:10
0001 //------------------------------ -*- C++ -*- -------------------------------// 0002 // Copyright Celeritas contributors: see top-level COPYRIGHT file for details 0003 // SPDX-License-Identifier: (Apache-2.0 OR MIT) 0004 //---------------------------------------------------------------------------// 0005 //! \file corecel/sys/KernelLauncher.device.hh 0006 //---------------------------------------------------------------------------// 0007 #pragma once 0008 0009 #include <string_view> 0010 #include <type_traits> 0011 0012 #include "corecel/DeviceRuntimeApi.hh" 0013 0014 #include "corecel/Assert.hh" 0015 #include "corecel/Macros.hh" 0016 #include "corecel/Types.hh" 0017 #include "corecel/cont/Range.hh" 0018 0019 #include "Device.hh" 0020 #include "KernelParamCalculator.device.hh" 0021 #include "Stream.hh" 0022 #include "ThreadId.hh" 0023 0024 #include "detail/KernelLauncherImpl.device.hh" 0025 0026 namespace celeritas 0027 { 0028 //---------------------------------------------------------------------------// 0029 /*! 0030 * Profile and launch Celeritas kernels. 0031 * 0032 * The template argument \c F may define a member type named \c Applier. 0033 * \c F::Applier should have up to two static constexpr int variables named 0034 * \c max_block_size and/or \c min_warps_per_eu. 0035 * If present, the kernel will use appropriate \c __launch_bounds__. 0036 * If \c F::Applier::min_warps_per_eu exists then \c F::Applier::max_block_size 0037 * must also be present or we get a compile error. 0038 * 0039 * The semantics of the second \c __launch_bounds__ argument differs between 0040 * CUDA and HIP. \c KernelLauncher expects HIP semantics. If Celeritas is 0041 * built targeting CUDA, it will automatically convert that argument to match 0042 * CUDA semantics. 0043 * 0044 * The CUDA-specific 3rd argument \c maxBlocksPerCluster is not supported. 0045 * 0046 * Example: 0047 * \code 0048 void launch_kernel(DeviceParams const& params, size_type count) const 0049 { 0050 auto execute_thread = BlahExecutor{params}; 0051 static KernelLauncher<decltype(execute_thread)> const launch("blah"); 0052 launch_kernel(count, StreamId{}, execute_thread); 0053 } 0054 * \endcode 0055 */ 0056 template<class F> 0057 class KernelLauncher 0058 { 0059 static_assert( 0060 (std::is_trivially_copyable_v<F> || CELERITAS_USE_HIP 0061 || CELER_COMPILER == CELER_COMPILER_CLANG) 0062 && !std::is_pointer_v<F> && !std::is_reference_v<F>, 0063 R"(Launched action must be a trivially copyable function object)"); 0064 0065 public: 0066 // Create a launcher from a label 0067 explicit inline KernelLauncher(std::string_view name); 0068 0069 // Launch a kernel for a thread range 0070 inline void operator()(Range<ThreadId> threads, 0071 StreamId stream_id, 0072 F const& execute_thread) const; 0073 0074 // Launch a kernel with a custom number of threads 0075 inline void operator()(size_type num_threads, 0076 StreamId stream_id, 0077 F const& execute_thread) const; 0078 0079 private: 0080 KernelParamCalculator calc_launch_params_; 0081 }; 0082 0083 //---------------------------------------------------------------------------// 0084 // INLINE FUNCTIONS 0085 //---------------------------------------------------------------------------// 0086 /*! 0087 * Create a launcher from a label. 0088 */ 0089 template<class F> 0090 KernelLauncher<F>::KernelLauncher(std::string_view name) 0091 : calc_launch_params_{name, &detail::launch_action_impl<F>} 0092 { 0093 } 0094 0095 //---------------------------------------------------------------------------// 0096 /*! 0097 * Launch a kernel for a thread range. 0098 */ 0099 template<class F> 0100 void KernelLauncher<F>::operator()(Range<ThreadId> threads, 0101 StreamId stream_id, 0102 F const& execute_thread) const 0103 { 0104 if (!threads.empty()) 0105 { 0106 using StreamT = CELER_DEVICE_API_SYMBOL(Stream_t); 0107 StreamT stream = stream_id 0108 ? celeritas::device().stream(stream_id).get() 0109 : nullptr; 0110 auto config = calc_launch_params_(threads.size()); 0111 detail::launch_action_impl<F> 0112 <<<config.blocks_per_grid, config.threads_per_block, 0, stream>>>( 0113 threads, execute_thread); 0114 } 0115 } 0116 0117 //---------------------------------------------------------------------------// 0118 /*! 0119 * Launch a kernel with a custom number of threads. 0120 * 0121 * The launch arguments have the same ordering as CUDA/HIP kernel launch 0122 * arguments. 0123 * 0124 * \param num_threads Total number of active consecutive threads 0125 * \param stream_id Execute the kernel on this device stream 0126 * \param execute_thread Call the given functor with the thread ID 0127 */ 0128 template<class F> 0129 void KernelLauncher<F>::operator()(size_type num_threads, 0130 StreamId stream_id, 0131 F const& execute_thread) const 0132 { 0133 (*this)(range(ThreadId{num_threads}), stream_id, execute_thread); 0134 } 0135 0136 //---------------------------------------------------------------------------// 0137 } // namespace celeritas
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|