device/cuda/execute_cuda_test.cuh

0001 // This file is part of the ACTS project.
0002 //
0003 // Copyright (C) 2016 CERN for the benefit of the ACTS project
0004 //
0005 // This Source Code Form is subject to the terms of the Mozilla Public
0006 // License, v. 2.0. If a copy of the MPL was not distributed with this
0007 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
0008
0009 #pragma once
0010
0011 // Project include(s).
0012 #include "detray/definitions/detail/cuda_definitions.hpp"
0013
0014 // System include(s).
0015 #include <cstddef>
0016
0017 namespace detray::test::cuda {
0018
0019 template <class functor_t, typename... Args>
0020 __global__ void cuda_test_kernel(std::size_t array_sizes, Args... args) {
0021   // Find the current index that we need to process.
0022   const std::size_t i = blockIdx.x * blockDim.x + threadIdx.x;
0023   if (i >= array_sizes) {
0024     return;
0025   }
0026
0027   // Execute the test functor for this index.
0028   functor_t()(i, std::forward<Args>(args)...);
0029 }
0030
0031 /// Execute a test functor on a device, on @c array_sizes threads
0032 template <class functor_t, class... Args>
0033 void execute_cuda_test(std::size_t array_sizes, Args... args) {
0034   // Number of threads per execution block. Less than 1024 to make debug tests
0035   // possible.
0036   const int n_threads_per_block{std::min(256, static_cast<int>(array_sizes))};
0037   const int n_blocks{(static_cast<int>(array_sizes) + n_threads_per_block - 1) /
0038                      n_threads_per_block};
0039
0040   // Launch the test on the device.
0041   cuda_test_kernel<functor_t><<<n_blocks, n_threads_per_block>>>(
0042       array_sizes, std::forward<Args>(args)...);
0043
0044   // Check whether it succeeded to run.
0045   DETRAY_CUDA_ERROR_CHECK(cudaGetLastError());
0046   DETRAY_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
0047 }
0048
0049 }  // namespace detray::test::cuda