device/cuda/sf_finders_grid_cuda_kernel.cu

0001 // This file is part of the ACTS project.
0002 //
0003 // Copyright (C) 2016 CERN for the benefit of the ACTS project
0004 //
0005 // This Source Code Form is subject to the terms of the Mozilla Public
0006 // License, v. 2.0. If a copy of the MPL was not distributed with this
0007 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
0008
0009 #include "detray/definitions/detail/cuda_definitions.hpp"
0010 #include "detray/utils/ranges.hpp"
0011
0012 // Detray test include(s)
0013 #include "sf_finders_grid_cuda_kernel.hpp"
0014
0015 // Vecmem include(s)
0016 #include <vecmem/containers/device_vector.hpp>
0017
0018 namespace detray {
0019
0020 //----------------------------------------------------
0021 //  test function for grid data with replace populator
0022 //----------------------------------------------------
0023
0024 /// cuda kernel for grid_replace_test
0025 __global__ void grid_replace_test_kernel(
0026     host_grid3_single::view_type grid_view) {
0027   // Let's try building the grid object
0028   device_grid3_single g3_device(grid_view);
0029
0030   // Get axes on the device-side
0031   const auto& axis_x = g3_device.template get_axis<axis::label::e_x>();
0032   const auto& axis_y = g3_device.template get_axis<axis::label::e_y>();
0033   const auto& axis_z = g3_device.template get_axis<axis::label::e_z>();
0034
0035   dindex gid = g3_device.serialize(
0036       detray::axis::multi_bin<3>{threadIdx.x, threadIdx.y, threadIdx.z});
0037
0038   point3 tp{axis_x.min() + gid * axis_x.bin_width(),
0039             axis_y.min() + gid * axis_y.bin_width(),
0040             axis_z.min() + gid * axis_z.bin_width()};
0041
0042   // replace the bin elements
0043   g3_device.template populate<replace<>>(gid, std::move(tp));
0044 }
0045
0046 /// grid_replace_test implementation
0047 void grid_replace_test(host_grid3_single::view_type grid_view,
0048                        std::size_t dim_x, std::size_t dim_y,
0049                        std::size_t dim_z) {
0050   int n_blocks = 1;
0051   dim3 n_threads(dim_x, dim_y, dim_z);
0052
0053   // run the kernel
0054   grid_replace_test_kernel<<<n_blocks, n_threads>>>(grid_view);
0055
0056   // cuda error check
0057   DETRAY_CUDA_ERROR_CHECK(cudaGetLastError());
0058   DETRAY_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
0059 }
0060
0061 /// cuda kernel for grid_replace_ci_test
0062 __global__ void grid_replace_ci_test_kernel(
0063     host_grid2_single_ci::view_type grid_view) {
0064   // Let's try building the grid object
0065   device_grid2_single_ci g2_device(grid_view);
0066
0067   // Get axes on the device-side
0068   const auto& axis_r = g2_device.template get_axis<axis::label::e_r>();
0069   const auto& axis_phi = g2_device.template get_axis<axis::label::e_phi>();
0070
0071   auto gid = threadIdx.x + threadIdx.y * blockDim.x;
0072
0073   point3 tp{axis_r.min() + gid * axis_r.bin_width(threadIdx.x),
0074             axis_phi.min() + gid * axis_phi.bin_width(), 0.5f};
0075
0076   // replace the bin elements
0077   g2_device.template populate<replace<>>(gid, std::move(tp));
0078 }
0079
0080 // test function for replace populator with circular and irregular axis
0081 void grid_replace_ci_test(host_grid2_single_ci::view_type grid_view,
0082                           std::size_t dim_x, std::size_t dim_y) {
0083   int n_blocks = 1;
0084   dim3 n_threads(dim_x, dim_y);
0085
0086   // run the kernel
0087   grid_replace_ci_test_kernel<<<n_blocks, n_threads>>>(grid_view);
0088
0089   // cuda error check
0090   DETRAY_CUDA_ERROR_CHECK(cudaGetLastError());
0091   DETRAY_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
0092 }
0093
0094 //----------------------------------------------------
0095 // test function for grid data with complete populator
0096 //----------------------------------------------------
0097
0098 // cuda kernel for grid_complete_test
0099 __global__ void grid_complete_kernel(host_grid2_array::view_type grid_view) {
0100   // Let's try building the grid object
0101   device_grid2_array g2_device(grid_view);
0102
0103   // Get axes on the device-side
0104   const auto& axis_r = g2_device.template get_axis<axis::label::e_r>();
0105   const auto& axis_phi = g2_device.template get_axis<axis::label::e_phi>();
0106
0107   auto gid = threadIdx.x + threadIdx.y * blockDim.x;
0108   auto tp = point3{axis_r.min() + gid * axis_r.bin_width(),
0109                    axis_phi.min() + gid * axis_phi.bin_width(), 0.5f};
0110
0111   g2_device.template populate<complete<>>(gid, std::move(tp));
0112 }
0113
0114 // grid_complete_test implementation
0115 void grid_complete_test(host_grid2_array::view_type grid_view,
0116                         std::size_t dim_x, std::size_t dim_y) {
0117   int block_dim = 1;
0118   dim3 thread_dim(dim_x, dim_y);
0119
0120   // run the kernel
0121   grid_complete_kernel<<<block_dim, thread_dim>>>(grid_view);
0122
0123   // cuda error check
0124   DETRAY_CUDA_ERROR_CHECK(cudaGetLastError());
0125   DETRAY_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
0126 }
0127
0128 //--------------------------------------------------
0129 // test function for grid data with attach populator
0130 //--------------------------------------------------
0131
0132 // cuda kernel for grid_attach_test
0133 __global__ void grid_attach_kernel(host_grid2_array::view_type grid_view) {
0134   // Let's try building the grid object
0135   device_grid2_array g2_device(grid_view);
0136
0137   // Get axes on the device-side
0138   const auto& axis_r = g2_device.template get_axis<axis::label::e_r>();
0139   const auto& axis_phi = g2_device.template get_axis<axis::label::e_phi>();
0140
0141   auto width_r = axis_r.m_binning.bin_width();
0142   auto width_phi = axis_phi.m_binning.bin_width();
0143
0144   auto gid = threadIdx.x + threadIdx.y * blockDim.x;
0145   auto tp = point3{axis_r.min() + gid * width_r,
0146                    axis_phi.min() + gid * width_phi, 0.5f};
0147
0148   g2_device.template populate<attach<>>(gid, std::move(tp));
0149 }
0150
0151 // grid_attach_test implementation
0152 void grid_attach_test(host_grid2_array::view_type grid_view, std::size_t dim_x,
0153                       std::size_t dim_y) {
0154   int block_dim = 1;
0155   dim3 thread_dim(dim_x, dim_y);
0156
0157   // run the kernel
0158   grid_attach_kernel<<<block_dim, thread_dim>>>(grid_view);
0159
0160   // cuda error check
0161   DETRAY_CUDA_ERROR_CHECK(cudaGetLastError());
0162   DETRAY_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
0163 }
0164
0165 // cuda kernel for grid_dynamic_attach_test
0166 __global__ void grid_dynamic_attach_kernel(
0167     host_grid2_dynamic_array::view_type grid_view) {
0168   // Let's try building the grid object
0169   device_grid2_dynamic_array g2_device(grid_view);
0170
0171   // Get axes on the device-side
0172   const auto& axis_r = g2_device.template get_axis<axis::label::e_r>();
0173   const auto& axis_phi = g2_device.template get_axis<axis::label::e_phi>();
0174
0175   auto width_r = axis_r.m_binning.bin_width();
0176   auto width_phi = axis_phi.m_binning.bin_width();
0177
0178   auto gid = threadIdx.x + threadIdx.y * blockDim.x;
0179   auto tp = point3{axis_r.min() + gid * width_r,
0180                    axis_phi.min() + gid * width_phi, 0.5f};
0181
0182   g2_device.template populate<attach<>>(gid, std::move(tp));
0183 }
0184
0185 // grid_dynamic_attach_test implementation
0186 void grid_dynamic_attach_test(host_grid2_dynamic_array::view_type grid_view,
0187                               std::size_t dim_x, std::size_t dim_y) {
0188   int block_dim = 1;
0189   dim3 thread_dim(dim_x, dim_y);
0190
0191   // run the kernel
0192   grid_dynamic_attach_kernel<<<block_dim, thread_dim>>>(grid_view);
0193
0194   // cuda error check
0195   DETRAY_CUDA_ERROR_CHECK(cudaGetLastError());
0196   DETRAY_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
0197 }
0198
0199 //----------------------------------------------------
0200 // Device side grid reader for debugging
0201 //----------------------------------------------------
0202
0203 // cuda kernel for attach_read_test
0204 template <typename device_grid_t, typename view_t>
0205 __global__ void print_grid_kernel(view_t grid_view) {
0206   // Let's try building the grid object
0207   device_grid_t g2_device(grid_view);
0208
0209   axis::multi_bin<device_grid_t::dim> mbin;
0210   if constexpr (device_grid_t::dim == 2) {
0211     mbin = {threadIdx.x, threadIdx.y};
0212   } else {
0213     mbin = {threadIdx.x, threadIdx.y, threadIdx.z};
0214   }
0215
0216   for (auto& pt : g2_device.bin(mbin)) {
0217     printf("[%f %f %f]\n", pt[0], pt[1], pt[2]);
0218   }
0219 }
0220
0221 // grid_attach_read_test implementation
0222 template <typename device_grid_t, typename view_t, typename... I>
0223 void print_grid(view_t grid_view, I... dims) {
0224   int block_dim = 1;
0225   dim3 thread_dim(dims...);
0226
0227   // run the kernel
0228   print_grid_kernel<device_grid_t, view_t>
0229       <<<block_dim, thread_dim>>>(grid_view);
0230
0231   // cuda error check
0232   DETRAY_CUDA_ERROR_CHECK(cudaGetLastError());
0233   DETRAY_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
0234 }
0235
0236 // Explicit instantioations
0237 template void print_grid<device_grid3_single>(host_grid3_single::view_type,
0238                                               dindex, dindex, dindex);
0239
0240 template void print_grid<device_grid2_single_ci>(
0241     host_grid2_single_ci::view_type, dindex, dindex);
0242
0243 template void print_grid<device_grid2_array>(host_grid2_array::view_type,
0244                                              dindex, dindex);
0245
0246 template void print_grid<device_grid2_dynamic_array>(
0247     device_grid2_dynamic_array::view_type, dindex, dindex);
0248
0249 //---------------------------------------
0250 //  test function for collection of grids
0251 //---------------------------------------
0252
0253 /// cuda kernel for grid_collection_test
0254 __global__ void grid_collection_test_kernel(
0255     grid_collection<n_own_host_grid3_array>::view_type grid_coll_view,
0256     vecmem::data::vector_view<dindex> n_bins_view,
0257     vecmem::data::vector_view<std::array<dindex, 3>> result_bins_view) {
0258   // Let's try building the grid object
0259   grid_collection<n_own_device_grid3_array> device_coll(grid_coll_view);
0260   vecmem::device_vector<dindex> n_bins(n_bins_view);
0261   vecmem::device_vector<std::array<dindex, 3>> result_bins(result_bins_view);
0262
0263   // test the grid axes of the second grid in the collection
0264   if (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0) {
0265     const auto& axis_r =
0266         device_coll[blockIdx.x].template get_axis<axis::label::e_r>();
0267     const auto& axis_phi =
0268         device_coll[blockIdx.x].template get_axis<axis::label::e_phi>();
0269     const auto& axis_z =
0270         device_coll[blockIdx.x].template get_axis<axis::label::e_z>();
0271
0272     n_bins[0 + blockIdx.x * 3] = axis_r.nbins();
0273     n_bins[1 + blockIdx.x * 3] = axis_phi.nbins();
0274     n_bins[2 + blockIdx.x * 3] = axis_z.nbins();
0275   }
0276
0277   // Read the entire grid content
0278   int gid = threadIdx.z * blockDim.y * blockDim.x + threadIdx.y * blockDim.x +
0279             threadIdx.x;
0280   if (gid < device_coll[blockIdx.x].nbins()) {
0281     for (const auto [i, bin_entry] :
0282          detray::views::enumerate(device_coll[blockIdx.x].bin(gid))) {
0283       result_bins[gid + device_coll.offsets()[blockIdx.x]][i] = bin_entry;
0284     }
0285   }
0286 }
0287
0288 /// grid_collection_test implementation
0289 void grid_collection_test(
0290     grid_collection<n_own_host_grid3_array>::view_type grid_coll_view,
0291     vecmem::data::vector_view<dindex> n_bins_view,
0292     vecmem::data::vector_view<std::array<dindex, 3>> result_bins_view,
0293     std::size_t n_grids, std::size_t dim_x, std::size_t dim_y,
0294     std::size_t dim_z) {
0295   int n_blocks = n_grids;
0296   dim3 n_threads(dim_x, dim_y, dim_z);
0297
0298   // run the kernel
0299   grid_collection_test_kernel<<<n_blocks, n_threads>>>(
0300       grid_coll_view, n_bins_view, result_bins_view);
0301
0302   // cuda error check
0303   DETRAY_CUDA_ERROR_CHECK(cudaGetLastError());
0304   DETRAY_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
0305 }
0306 }  // namespace detray