File indexing completed on 2025-07-02 07:51:55
0001
0002
0003
0004
0005
0006
0007
0008
0009 #include "Acts/Plugins/ExaTrkX/Tensor.hpp"
0010 #include "Acts/Plugins/ExaTrkX/detail/CudaUtils.hpp"
0011
0012 #include <thrust/copy.h>
0013 #include <thrust/count.h>
0014 #include <thrust/execution_policy.h>
0015
0016 namespace {
0017
0018 __global__ void sigmoidImpl(std::size_t size, float *array) {
0019 std::size_t i = blockIdx.x * blockDim.x + threadIdx.x;
0020 if (i >= size) {
0021 return;
0022 }
0023
0024 array[i] = 1.f / (1.f + __expf(-array[i]));
0025 }
0026
0027 __global__ void applyCut(std::size_t size, float cutoff, const float *array,
0028 bool *mask) {
0029 std::size_t i = blockIdx.x * blockDim.x + threadIdx.x;
0030 if (i >= size) {
0031 return;
0032 }
0033
0034 mask[i] = array[i] > cutoff;
0035 }
0036
0037 }
0038
0039 namespace Acts::detail {
0040
0041 void cudaSigmoid(Tensor<float> &tensor, cudaStream_t stream) {
0042 dim3 blockDim = 1024;
0043 dim3 gridDim = (tensor.size() + blockDim.x - 1) / blockDim.x;
0044 sigmoidImpl<<<gridDim, blockDim, 0, stream>>>(tensor.size(), tensor.data());
0045 ACTS_CUDA_CHECK(cudaGetLastError());
0046 }
0047
0048 std::pair<Tensor<float>, Tensor<std::int64_t>> cudaApplyScoreCut(
0049 const Tensor<float> &scores, const Tensor<std::int64_t> &edgeIndex,
0050 float cut, cudaStream_t stream) {
0051 dim3 blockDim = 1024;
0052 dim3 gridDim = (scores.size() + blockDim.x - 1) / blockDim.x;
0053 ExecutionContext execContext{scores.device(), stream};
0054
0055 bool *mask{};
0056 ACTS_CUDA_CHECK(cudaMallocAsync(&mask, scores.size() * sizeof(bool), stream));
0057
0058 applyCut<<<gridDim, blockDim, 0, stream>>>(scores.size(), cut, scores.data(),
0059 mask);
0060 ACTS_CUDA_CHECK(cudaGetLastError());
0061
0062 const std::size_t nEdgesAfter = thrust::count(thrust::device.on(stream), mask,
0063 mask + scores.size(), true);
0064
0065 auto outputScores = Tensor<float>::Create({nEdgesAfter, 1}, execContext);
0066 auto outputEdgeIndex =
0067 Tensor<std::int64_t>::Create({2, nEdgesAfter}, execContext);
0068
0069 auto pred = [] __device__(bool x) { return x; };
0070 thrust::copy_if(thrust::device.on(stream), scores.data(),
0071 scores.data() + scores.size(), mask, outputScores.data(),
0072 pred);
0073
0074 const auto edgesBefore = edgeIndex.size() / 2;
0075 thrust::copy_if(thrust::device.on(stream), edgeIndex.data(),
0076 edgeIndex.data() + edgesBefore, mask, outputEdgeIndex.data(),
0077 pred);
0078 thrust::copy_if(thrust::device.on(stream), edgeIndex.data() + edgesBefore,
0079 edgeIndex.data() + 2 * edgesBefore, mask,
0080 outputEdgeIndex.data() + nEdgesAfter, pred);
0081
0082 ACTS_CUDA_CHECK(cudaFreeAsync(mask, *execContext.stream));
0083 return {std::move(outputScores), std::move(outputEdgeIndex)};
0084 }
0085
0086 }