File indexing completed on 2025-09-17 08:54:09
0001
0002
0003
0004
0005
0006
0007
0008 #pragma once
0009
0010 #include "corecel/Config.hh"
0011 #include "corecel/DeviceRuntimeApi.hh"
0012
0013 #include "corecel/Macros.hh"
0014 #include "corecel/cont/Range.hh"
0015
0016 #include "../KernelParamCalculator.device.hh"
0017 #include "../KernelTraits.hh"
0018 #include "../ThreadId.hh"
0019
0020 namespace celeritas
0021 {
0022 namespace detail
0023 {
0024 namespace
0025 {
0026
0027
0028
0029
0030 template<class F>
0031 __device__ CELER_FORCEINLINE void
0032 launch_kernel_impl(Range<ThreadId> const& thread_range, F& execute_thread)
0033 {
0034 auto tid = celeritas::KernelParamCalculator::thread_id();
0035 if (!(tid < thread_range.size()))
0036 return;
0037 execute_thread(*(thread_range.cbegin() + tid.get()));
0038 }
0039
0040
0041
0042
0043
0044
0045 template<class F, std::enable_if_t<!has_applier_v<F>, bool> = true>
0046 __global__ void __launch_bounds__(CELERITAS_MAX_BLOCK_SIZE)
0047 launch_action_impl(Range<ThreadId> const thread_range, F execute_thread)
0048 {
0049 launch_kernel_impl(thread_range, execute_thread);
0050 }
0051
0052
0053 template<class F,
0054 std::enable_if_t<kernel_no_bound<typename F::Applier>, bool> = true>
0055 __global__ void __launch_bounds__(CELERITAS_MAX_BLOCK_SIZE)
0056 launch_action_impl(Range<ThreadId> const thread_range, F execute_thread)
0057 {
0058 launch_kernel_impl(thread_range, execute_thread);
0059 }
0060
0061
0062 template<class F,
0063 class A_ = typename F::Applier,
0064 std::enable_if_t<kernel_max_blocks<A_>, bool> = true>
0065 __global__ void __launch_bounds__(A_::max_block_size)
0066 launch_action_impl(Range<ThreadId> const thread_range, F execute_thread)
0067 {
0068 launch_kernel_impl(thread_range, execute_thread);
0069 }
0070
0071
0072 template<class F,
0073 class A_ = typename F::Applier,
0074 std::enable_if_t<kernel_max_blocks_min_warps<A_>, bool> = true>
0075 __global__ void
0076 #if CELERITAS_USE_CUDA
0077 __launch_bounds__(A_::max_block_size,
0078 (A_::min_warps_per_eu * 32) / A_::max_block_size)
0079 #elif CELERITAS_USE_HIP
0080 __launch_bounds__(A_::max_block_size, A_::min_warps_per_eu)
0081 #endif
0082 launch_action_impl(Range<ThreadId> const thread_range, F execute_thread)
0083 {
0084 launch_kernel_impl(thread_range, execute_thread);
0085 }
0086
0087
0088 }
0089 }
0090 }
0091