Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-17 08:54:09

0001 //---------------------------------*-CUDA-*----------------------------------//
0002 // Copyright 2023-2024 UT-Battelle, LLC, and other Celeritas developers.
0003 // See the top-level COPYRIGHT file for details.
0004 // SPDX-License-Identifier: (Apache-2.0 OR MIT)
0005 //---------------------------------------------------------------------------//
0006 //! \file corecel/sys/KernelLauncher.device.hh
0007 //---------------------------------------------------------------------------//
0008 #pragma once
0009 
0010 #include <string_view>
0011 #include <type_traits>
0012 
0013 #include "corecel/DeviceRuntimeApi.hh"
0014 
0015 #include "corecel/Assert.hh"
0016 #include "corecel/Macros.hh"
0017 #include "corecel/Types.hh"
0018 #include "corecel/cont/Range.hh"
0019 
0020 #include "Device.hh"
0021 #include "KernelParamCalculator.device.hh"
0022 #include "Stream.hh"
0023 #include "ThreadId.hh"
0024 
0025 #include "detail/KernelLauncherImpl.device.hh"
0026 
0027 namespace celeritas
0028 {
0029 //---------------------------------------------------------------------------//
0030 /*!
0031  * Profile and launch Celeritas kernels.
0032  *
0033  * The template argument \c F may define a member type named \c Applier.
0034  * \c F::Applier should have up to two static constexpr int variables named
0035  * \c max_block_size and/or \c min_warps_per_eu.
0036  * If present, the kernel will use appropriate \c __launch_bounds__.
0037  * If \c F::Applier::min_warps_per_eu exists then \c F::Applier::max_block_size
0038  * must also be present or we get a compile error.
0039  *
0040  * The semantics of the second \c __launch_bounds__ argument differs between
0041  * CUDA and HIP.  \c KernelLauncher expects HIP semantics. If Celeritas is
0042  * built targeting CUDA, it will automatically convert that argument to match
0043  * CUDA semantics.
0044  *
0045  * The CUDA-specific 3rd argument \c maxBlocksPerCluster is not supported.
0046  *
0047  * Example:
0048  * \code
0049  void launch_kernel(DeviceParams const& params, size_type count) const
0050  {
0051     auto execute_thread = BlahExecutor{params};
0052     static KernelLauncher<decltype(execute_thread)> const launch("blah");
0053     launch_kernel(count, StreamId{}, execute_thread);
0054  }
0055  * \endcode
0056  */
0057 template<class F>
0058 class KernelLauncher
0059 {
0060     static_assert(
0061         (std::is_trivially_copyable_v<F> || CELERITAS_USE_HIP
0062          || CELER_COMPILER == CELER_COMPILER_CLANG)
0063             && !std::is_pointer_v<F> && !std::is_reference_v<F>,
0064         R"(Launched action must be a trivially copyable function object)");
0065 
0066   public:
0067     // Create a launcher from a label
0068     explicit inline KernelLauncher(std::string_view name);
0069 
0070     // Launch a kernel for a thread range
0071     inline void operator()(Range<ThreadId> threads,
0072                            StreamId stream_id,
0073                            F const& execute_thread) const;
0074 
0075     // Launch a kernel with a custom number of threads
0076     inline void operator()(size_type num_threads,
0077                            StreamId stream_id,
0078                            F const& execute_thread) const;
0079 
0080   private:
0081     KernelParamCalculator calc_launch_params_;
0082 };
0083 
0084 //---------------------------------------------------------------------------//
0085 // INLINE FUNCTIONS
0086 //---------------------------------------------------------------------------//
0087 /*!
0088  * Create a launcher from a label.
0089  */
0090 template<class F>
0091 KernelLauncher<F>::KernelLauncher(std::string_view name)
0092     : calc_launch_params_{name, &detail::launch_action_impl<F>}
0093 {
0094 }
0095 
0096 //---------------------------------------------------------------------------//
0097 /*!
0098  * Launch a kernel for a thread range.
0099  */
0100 template<class F>
0101 void KernelLauncher<F>::operator()(Range<ThreadId> threads,
0102                                    StreamId stream_id,
0103                                    F const& execute_thread) const
0104 {
0105     if (!threads.empty())
0106     {
0107         using StreamT = CELER_DEVICE_API_SYMBOL(Stream_t);
0108         StreamT stream = stream_id
0109                              ? celeritas::device().stream(stream_id).get()
0110                              : nullptr;
0111         auto config = calc_launch_params_(threads.size());
0112         detail::launch_action_impl<F>
0113             <<<config.blocks_per_grid, config.threads_per_block, 0, stream>>>(
0114                 threads, execute_thread);
0115     }
0116 }
0117 
0118 //---------------------------------------------------------------------------//
0119 /*!
0120  * Launch a kernel with a custom number of threads.
0121  *
0122  * The launch arguments have the same ordering as CUDA/HIP kernel launch
0123  * arguments.
0124  *
0125  * \param num_threads Total number of active consecutive threads
0126  * \param stream_id Execute the kernel on this device stream
0127  * \param execute_thread Call the given functor with the thread ID
0128  */
0129 template<class F>
0130 void KernelLauncher<F>::operator()(size_type num_threads,
0131                                    StreamId stream_id,
0132                                    F const& execute_thread) const
0133 {
0134     (*this)(range(ThreadId{num_threads}), stream_id, execute_thread);
0135 }
0136 
0137 //---------------------------------------------------------------------------//
0138 }  // namespace celeritas