Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:54:49

0001 //---------------------------------*-CUDA-*----------------------------------//
0002 // Copyright 2023-2024 UT-Battelle, LLC, and other Celeritas developers.
0003 // See the top-level COPYRIGHT file for details.
0004 // SPDX-License-Identifier: (Apache-2.0 OR MIT)
0005 //---------------------------------------------------------------------------//
0006 //! \file corecel/sys/detail/KernelLauncherImpl.device.hh
0007 //---------------------------------------------------------------------------//
0008 #pragma once
0009 
0010 #include "corecel/Config.hh"
0011 #include "corecel/DeviceRuntimeApi.hh"
0012 
0013 #include "corecel/Macros.hh"
0014 #include "corecel/cont/Range.hh"
0015 #include "corecel/sys/KernelParamCalculator.device.hh"
0016 #include "corecel/sys/KernelTraits.hh"
0017 #include "corecel/sys/ThreadId.hh"
0018 
0019 namespace celeritas
0020 {
0021 namespace detail
0022 {
0023 namespace
0024 {
0025 //---------------------------------------------------------------------------//
0026 /*!
0027  * Celeritas executor kernel implementation.
0028  */
0029 template<class F>
0030 __device__ CELER_FORCEINLINE void
0031 launch_kernel_impl(Range<ThreadId> const& thread_range, F& execute_thread)
0032 {
0033     auto tid = celeritas::KernelParamCalculator::thread_id();
0034     if (!(tid < thread_range.size()))
0035         return;
0036     execute_thread(*(thread_range.cbegin() + tid.get()));
0037 }
0038 
0039 //---------------------------------------------------------------------------//
0040 //!@{
0041 //! Launch the given executor using thread ids in the thread_range.
0042 
0043 // Instantiated if F doesn't define a member type F::Applier
0044 template<class F, std::enable_if_t<!has_applier_v<F>, bool> = true>
0045 __global__ void __launch_bounds__(CELERITAS_MAX_BLOCK_SIZE)
0046     launch_action_impl(Range<ThreadId> const thread_range, F execute_thread)
0047 {
0048     launch_kernel_impl(thread_range, execute_thread);
0049 }
0050 
0051 // Instantiated if F::Applier has no manual launch bounds
0052 template<class F,
0053          std::enable_if_t<kernel_no_bound<typename F::Applier>, bool> = true>
0054 __global__ void __launch_bounds__(CELERITAS_MAX_BLOCK_SIZE)
0055     launch_action_impl(Range<ThreadId> const thread_range, F execute_thread)
0056 {
0057     launch_kernel_impl(thread_range, execute_thread);
0058 }
0059 
0060 // Instantiated if F::Applier defines the first launch bounds argument
0061 template<class F,
0062          class A_ = typename F::Applier,
0063          std::enable_if_t<kernel_max_blocks<A_>, bool> = true>
0064 __global__ void __launch_bounds__(A_::max_block_size)
0065     launch_action_impl(Range<ThreadId> const thread_range, F execute_thread)
0066 {
0067     launch_kernel_impl(thread_range, execute_thread);
0068 }
0069 
0070 // Instantiated if F::Applier defines two arguments for launch bounds
0071 template<class F,
0072          class A_ = typename F::Applier,
0073          std::enable_if_t<kernel_max_blocks_min_warps<A_>, bool> = true>
0074 __global__ void
0075 #if CELERITAS_USE_CUDA
0076 __launch_bounds__(A_::max_block_size,
0077                   (A_::min_warps_per_eu * 32) / A_::max_block_size)
0078 #elif CELERITAS_USE_HIP
0079 __launch_bounds__(A_::max_block_size, A_::min_warps_per_eu)
0080 #endif
0081     launch_action_impl(Range<ThreadId> const thread_range, F execute_thread)
0082 {
0083     launch_kernel_impl(thread_range, execute_thread);
0084 }
0085 
0086 //---------------------------------------------------------------------------//
0087 }  // namespace
0088 }  // namespace detail
0089 }  // namespace celeritas
0090 // vim: set ft=cuda :