ExaTrkX/src/Tensor.cu

0001 // This file is part of the ACTS project.
0002 //
0003 // Copyright (C) 2016 CERN for the benefit of the ACTS project
0004 //
0005 // This Source Code Form is subject to the terms of the Mozilla Public
0006 // License, v. 2.0. If a copy of the MPL was not distributed with this
0007 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
0008
0009 #include "Acts/Plugins/ExaTrkX/Tensor.hpp"
0010 #include "Acts/Plugins/ExaTrkX/detail/CudaUtils.hpp"
0011
0012 #include <thrust/copy.h>
0013 #include <thrust/count.h>
0014 #include <thrust/execution_policy.h>
0015
0016 namespace {
0017
0018 __global__ void sigmoidImpl(std::size_t size, float *array) {
0019   std::size_t i = blockIdx.x * blockDim.x + threadIdx.x;
0020   if (i >= size) {
0021     return;
0022   }
0023
0024   array[i] = 1.f / (1.f + __expf(-array[i]));
0025 }
0026
0027 __global__ void applyCut(std::size_t size, float cutoff, const float *array,
0028                          bool *mask) {
0029   std::size_t i = blockIdx.x * blockDim.x + threadIdx.x;
0030   if (i >= size) {
0031     return;
0032   }
0033
0034   mask[i] = array[i] > cutoff;
0035 }
0036
0037 }  // namespace
0038
0039 namespace Acts::detail {
0040
0041 void cudaSigmoid(Tensor<float> &tensor, cudaStream_t stream) {
0042   dim3 blockDim = 1024;
0043   dim3 gridDim = (tensor.size() + blockDim.x - 1) / blockDim.x;
0044   sigmoidImpl<<<gridDim, blockDim, 0, stream>>>(tensor.size(), tensor.data());
0045   ACTS_CUDA_CHECK(cudaGetLastError());
0046 }
0047
0048 std::pair<Tensor<float>, Tensor<std::int64_t>> cudaApplyScoreCut(
0049     const Tensor<float> &scores, const Tensor<std::int64_t> &edgeIndex,
0050     float cut, cudaStream_t stream) {
0051   dim3 blockDim = 1024;
0052   dim3 gridDim = (scores.size() + blockDim.x - 1) / blockDim.x;
0053   ExecutionContext execContext{scores.device(), stream};
0054
0055   bool *mask{};
0056   ACTS_CUDA_CHECK(cudaMallocAsync(&mask, scores.size() * sizeof(bool), stream));
0057
0058   applyCut<<<gridDim, blockDim, 0, stream>>>(scores.size(), cut, scores.data(),
0059                                              mask);
0060   ACTS_CUDA_CHECK(cudaGetLastError());
0061
0062   const std::size_t nEdgesAfter = thrust::count(thrust::device.on(stream), mask,
0063                                                 mask + scores.size(), true);
0064
0065   auto outputScores = Tensor<float>::Create({nEdgesAfter, 1}, execContext);
0066   auto outputEdgeIndex =
0067       Tensor<std::int64_t>::Create({2, nEdgesAfter}, execContext);
0068
0069   auto pred = [] __device__(bool x) { return x; };
0070   thrust::copy_if(thrust::device.on(stream), scores.data(),
0071                   scores.data() + scores.size(), mask, outputScores.data(),
0072                   pred);
0073
0074   const auto edgesBefore = edgeIndex.size() / 2;
0075   thrust::copy_if(thrust::device.on(stream), edgeIndex.data(),
0076                   edgeIndex.data() + edgesBefore, mask, outputEdgeIndex.data(),
0077                   pred);
0078   thrust::copy_if(thrust::device.on(stream), edgeIndex.data() + edgesBefore,
0079                   edgeIndex.data() + 2 * edgesBefore, mask,
0080                   outputEdgeIndex.data() + nEdgesAfter, pred);
0081
0082   ACTS_CUDA_CHECK(cudaFreeAsync(mask, *execContext.stream));
0083   return {std::move(outputScores), std::move(outputEdgeIndex)};
0084 }
0085
0086 }  // namespace Acts::detail