|
||||
File indexing completed on 2025-01-18 09:54:49
0001 //---------------------------------*-CUDA-*----------------------------------// 0002 // Copyright 2023-2024 UT-Battelle, LLC, and other Celeritas developers. 0003 // See the top-level COPYRIGHT file for details. 0004 // SPDX-License-Identifier: (Apache-2.0 OR MIT) 0005 //---------------------------------------------------------------------------// 0006 //! \file corecel/sys/KernelLauncher.device.hh 0007 //---------------------------------------------------------------------------// 0008 #pragma once 0009 0010 #include <string_view> 0011 #include <type_traits> 0012 0013 #include "corecel/DeviceRuntimeApi.hh" 0014 0015 #include "corecel/Assert.hh" 0016 #include "corecel/Macros.hh" 0017 #include "corecel/Types.hh" 0018 #include "corecel/cont/Range.hh" 0019 0020 #include "Device.hh" 0021 #include "KernelParamCalculator.device.hh" 0022 #include "Stream.hh" 0023 #include "ThreadId.hh" 0024 0025 #include "detail/KernelLauncherImpl.device.hh" 0026 0027 namespace celeritas 0028 { 0029 //---------------------------------------------------------------------------// 0030 /*! 0031 * Profile and launch Celeritas kernels. 0032 * 0033 * The template argument \c F may define a member type named \c Applier. 0034 * \c F::Applier should have up to two static constexpr int variables named 0035 * \c max_block_size and/or \c min_warps_per_eu. 0036 * If present, the kernel will use appropriate \c __launch_bounds__. 0037 * If \c F::Applier::min_warps_per_eu exists then \c F::Applier::max_block_size 0038 * must also be present or we get a compile error. 0039 * 0040 * The semantics of the second \c __launch_bounds__ argument differs between 0041 * CUDA and HIP. \c KernelLauncher expects HIP semantics. If Celeritas is 0042 * built targeting CUDA, it will automatically convert that argument to match 0043 * CUDA semantics. 0044 * 0045 * The CUDA-specific 3rd argument \c maxBlocksPerCluster is not supported. 0046 * 0047 * Example: 0048 * \code 0049 void FooAction::launch_kernel(size_type count) const 0050 { 0051 auto execute_thread = make_blah_executor(blah); 0052 static KernelLauncher<decltype(execute_thread)> const 0053 launch_kernel("blah"); 0054 launch_kernel(state, execute_thread); 0055 } 0056 * \endcode 0057 */ 0058 template<class F> 0059 class KernelLauncher 0060 { 0061 static_assert( 0062 (std::is_trivially_copyable_v<F> || CELERITAS_USE_HIP 0063 || CELER_COMPILER == CELER_COMPILER_CLANG) 0064 && !std::is_pointer_v<F> && !std::is_reference_v<F>, 0065 R"(Launched action must be a trivially copyable function object)"); 0066 0067 public: 0068 // Create a launcher from a label 0069 explicit inline KernelLauncher(std::string_view name); 0070 0071 // Launch a kernel for a thread range 0072 inline void operator()(Range<ThreadId> threads, 0073 StreamId stream_id, 0074 F const& call_thread) const; 0075 0076 // Launch a kernel with a custom number of threads 0077 inline void operator()(size_type num_threads, 0078 StreamId stream_id, 0079 F const& call_thread) const; 0080 0081 private: 0082 KernelParamCalculator calc_launch_params_; 0083 }; 0084 0085 //---------------------------------------------------------------------------// 0086 // INLINE FUNCTIONS 0087 //---------------------------------------------------------------------------// 0088 /*! 0089 * Create a launcher from a label. 0090 */ 0091 template<class F> 0092 KernelLauncher<F>::KernelLauncher(std::string_view name) 0093 : calc_launch_params_{name, &detail::launch_action_impl<F>} 0094 { 0095 } 0096 0097 //---------------------------------------------------------------------------// 0098 /*! 0099 * Launch a kernel for a thread range. 0100 */ 0101 template<class F> 0102 void KernelLauncher<F>::operator()(Range<ThreadId> threads, 0103 StreamId stream_id, 0104 F const& call_thread) const 0105 { 0106 if (!threads.empty()) 0107 { 0108 using StreamT = CELER_DEVICE_PREFIX(Stream_t); 0109 StreamT stream = celeritas::device().stream(stream_id).get(); 0110 auto config = calc_launch_params_(threads.size()); 0111 detail::launch_action_impl<F> 0112 <<<config.blocks_per_grid, config.threads_per_block, 0, stream>>>( 0113 threads, call_thread); 0114 } 0115 } 0116 0117 //---------------------------------------------------------------------------// 0118 /*! 0119 * Launch a kernel with a custom number of threads. 0120 * 0121 * The launch arguments have the same ordering as CUDA/HIP kernel launch 0122 * arguments. 0123 * 0124 * \param num_threads Total number of active consecutive threads 0125 * \param stream_id Execute the kernel on this device stream 0126 * \param call_thread Call the given functor with the thread ID 0127 */ 0128 template<class F> 0129 void KernelLauncher<F>::operator()(size_type num_threads, 0130 StreamId stream_id, 0131 F const& call_thread) const 0132 { 0133 CELER_EXPECT(num_threads > 0); 0134 CELER_EXPECT(stream_id); 0135 (*this)(range(ThreadId{num_threads}), stream_id, call_thread); 0136 } 0137 0138 //---------------------------------------------------------------------------// 0139 } // namespace celeritas
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |