![]() |
|
|||
File indexing completed on 2025-09-17 08:54:09
0001 //---------------------------------*-CUDA-*----------------------------------// 0002 // Copyright 2023-2024 UT-Battelle, LLC, and other Celeritas developers. 0003 // See the top-level COPYRIGHT file for details. 0004 // SPDX-License-Identifier: (Apache-2.0 OR MIT) 0005 //---------------------------------------------------------------------------// 0006 //! \file corecel/sys/KernelLauncher.device.hh 0007 //---------------------------------------------------------------------------// 0008 #pragma once 0009 0010 #include <string_view> 0011 #include <type_traits> 0012 0013 #include "corecel/DeviceRuntimeApi.hh" 0014 0015 #include "corecel/Assert.hh" 0016 #include "corecel/Macros.hh" 0017 #include "corecel/Types.hh" 0018 #include "corecel/cont/Range.hh" 0019 0020 #include "Device.hh" 0021 #include "KernelParamCalculator.device.hh" 0022 #include "Stream.hh" 0023 #include "ThreadId.hh" 0024 0025 #include "detail/KernelLauncherImpl.device.hh" 0026 0027 namespace celeritas 0028 { 0029 //---------------------------------------------------------------------------// 0030 /*! 0031 * Profile and launch Celeritas kernels. 0032 * 0033 * The template argument \c F may define a member type named \c Applier. 0034 * \c F::Applier should have up to two static constexpr int variables named 0035 * \c max_block_size and/or \c min_warps_per_eu. 0036 * If present, the kernel will use appropriate \c __launch_bounds__. 0037 * If \c F::Applier::min_warps_per_eu exists then \c F::Applier::max_block_size 0038 * must also be present or we get a compile error. 0039 * 0040 * The semantics of the second \c __launch_bounds__ argument differs between 0041 * CUDA and HIP. \c KernelLauncher expects HIP semantics. If Celeritas is 0042 * built targeting CUDA, it will automatically convert that argument to match 0043 * CUDA semantics. 0044 * 0045 * The CUDA-specific 3rd argument \c maxBlocksPerCluster is not supported. 0046 * 0047 * Example: 0048 * \code 0049 void launch_kernel(DeviceParams const& params, size_type count) const 0050 { 0051 auto execute_thread = BlahExecutor{params}; 0052 static KernelLauncher<decltype(execute_thread)> const launch("blah"); 0053 launch_kernel(count, StreamId{}, execute_thread); 0054 } 0055 * \endcode 0056 */ 0057 template<class F> 0058 class KernelLauncher 0059 { 0060 static_assert( 0061 (std::is_trivially_copyable_v<F> || CELERITAS_USE_HIP 0062 || CELER_COMPILER == CELER_COMPILER_CLANG) 0063 && !std::is_pointer_v<F> && !std::is_reference_v<F>, 0064 R"(Launched action must be a trivially copyable function object)"); 0065 0066 public: 0067 // Create a launcher from a label 0068 explicit inline KernelLauncher(std::string_view name); 0069 0070 // Launch a kernel for a thread range 0071 inline void operator()(Range<ThreadId> threads, 0072 StreamId stream_id, 0073 F const& execute_thread) const; 0074 0075 // Launch a kernel with a custom number of threads 0076 inline void operator()(size_type num_threads, 0077 StreamId stream_id, 0078 F const& execute_thread) const; 0079 0080 private: 0081 KernelParamCalculator calc_launch_params_; 0082 }; 0083 0084 //---------------------------------------------------------------------------// 0085 // INLINE FUNCTIONS 0086 //---------------------------------------------------------------------------// 0087 /*! 0088 * Create a launcher from a label. 0089 */ 0090 template<class F> 0091 KernelLauncher<F>::KernelLauncher(std::string_view name) 0092 : calc_launch_params_{name, &detail::launch_action_impl<F>} 0093 { 0094 } 0095 0096 //---------------------------------------------------------------------------// 0097 /*! 0098 * Launch a kernel for a thread range. 0099 */ 0100 template<class F> 0101 void KernelLauncher<F>::operator()(Range<ThreadId> threads, 0102 StreamId stream_id, 0103 F const& execute_thread) const 0104 { 0105 if (!threads.empty()) 0106 { 0107 using StreamT = CELER_DEVICE_API_SYMBOL(Stream_t); 0108 StreamT stream = stream_id 0109 ? celeritas::device().stream(stream_id).get() 0110 : nullptr; 0111 auto config = calc_launch_params_(threads.size()); 0112 detail::launch_action_impl<F> 0113 <<<config.blocks_per_grid, config.threads_per_block, 0, stream>>>( 0114 threads, execute_thread); 0115 } 0116 } 0117 0118 //---------------------------------------------------------------------------// 0119 /*! 0120 * Launch a kernel with a custom number of threads. 0121 * 0122 * The launch arguments have the same ordering as CUDA/HIP kernel launch 0123 * arguments. 0124 * 0125 * \param num_threads Total number of active consecutive threads 0126 * \param stream_id Execute the kernel on this device stream 0127 * \param execute_thread Call the given functor with the thread ID 0128 */ 0129 template<class F> 0130 void KernelLauncher<F>::operator()(size_type num_threads, 0131 StreamId stream_id, 0132 F const& execute_thread) const 0133 { 0134 (*this)(range(ThreadId{num_threads}), stream_id, execute_thread); 0135 } 0136 0137 //---------------------------------------------------------------------------// 0138 } // namespace celeritas
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |