File indexing completed on 2026-06-25 07:49:03
0001
0002
0003
0004
0005
0006
0007
0008
0009 #include "ActsPlugins/Gnn/Stages.hpp"
0010 #include "ActsPlugins/Gnn/detail/CudaUtils.hpp"
0011
0012 #include <thrust/copy.h>
0013 #include <thrust/execution_policy.h>
0014 #include <thrust/sort.h>
0015 #include <thrust/unique.h>
0016
0017 namespace {
0018
0019
0020 __global__ void buildNodeMaskKernel(const std::int64_t *usedNodes,
0021 std::size_t nUsed, bool *mask) {
0022 const std::size_t i = blockIdx.x * blockDim.x + threadIdx.x;
0023 if (i < nUsed) {
0024 mask[usedNodes[i]] = true;
0025 }
0026 }
0027
0028
0029 __global__ void buildReverseMapKernel(const std::int64_t *usedNodes,
0030 std::size_t nUsed,
0031 std::int64_t *oldToNew) {
0032 const std::size_t newIdx = blockIdx.x * blockDim.x + threadIdx.x;
0033 if (newIdx < nUsed) {
0034 oldToNew[usedNodes[newIdx]] = static_cast<std::int64_t>(newIdx);
0035 }
0036 }
0037
0038
0039 __global__ void remapEdgesKernel(std::size_t nTotal,
0040 const std::int64_t *oldToNew,
0041 std::int64_t *edgeData) {
0042 const std::size_t i = blockIdx.x * blockDim.x + threadIdx.x;
0043 if (i < nTotal) {
0044 edgeData[i] = oldToNew[edgeData[i]];
0045 }
0046 }
0047
0048 }
0049
0050 namespace ActsPlugins::detail {
0051
0052 PipelineTensors cudaRemoveUnusedNodes(PipelineTensors &&tensors,
0053 std::vector<int> &spacePointIds,
0054 const ExecutionContext &execCtx) {
0055 const auto stream = execCtx.stream.value();
0056 const auto nNodes = tensors.nodeFeatures.shape()[0];
0057 const auto nEdges = tensors.edgeIndex.shape()[1];
0058
0059
0060
0061 auto tmp = Tensor<std::int64_t>::Create({1, 2 * nEdges}, execCtx);
0062 ACTS_CUDA_CHECK(cudaMemcpyAsync(tmp.data(), tensors.edgeIndex.data(),
0063 tmp.nbytes(), cudaMemcpyDeviceToDevice,
0064 stream));
0065
0066
0067 thrust::sort(thrust::device.on(stream), tmp.data(), tmp.data() + 2 * nEdges);
0068 auto *uniqEnd = thrust::unique(thrust::device.on(stream), tmp.data(),
0069 tmp.data() + 2 * nEdges);
0070
0071 ACTS_CUDA_CHECK(cudaStreamSynchronize(stream));
0072 const std::size_t nUsed = static_cast<std::size_t>(uniqEnd - tmp.data());
0073
0074
0075 auto mask = Tensor<bool>::Create({nNodes, 1}, execCtx);
0076 ACTS_CUDA_CHECK(cudaMemsetAsync(mask.data(), 0, mask.nbytes(), stream));
0077 const dim3 blockDim = 1024;
0078 const dim3 gridUsed = (nUsed + blockDim.x - 1) / blockDim.x;
0079 buildNodeMaskKernel<<<gridUsed, blockDim, 0, stream>>>(tmp.data(), nUsed,
0080 mask.data());
0081 ACTS_CUDA_CHECK(cudaGetLastError());
0082
0083
0084 auto oldToNew = Tensor<std::int64_t>::Create({nNodes, 1}, execCtx);
0085 buildReverseMapKernel<<<gridUsed, blockDim, 0, stream>>>(tmp.data(), nUsed,
0086 oldToNew.data());
0087 ACTS_CUDA_CHECK(cudaGetLastError());
0088
0089
0090 auto newNodeFeatures = selectRows(tensors.nodeFeatures, mask, execCtx);
0091
0092
0093 const dim3 gridEdges = (2 * nEdges + blockDim.x - 1) / blockDim.x;
0094 remapEdgesKernel<<<gridEdges, blockDim, 0, stream>>>(
0095 2 * nEdges, oldToNew.data(), tensors.edgeIndex.data());
0096 ACTS_CUDA_CHECK(cudaGetLastError());
0097
0098
0099 std::vector<std::int64_t> hostUsedNodes(nUsed);
0100 ACTS_CUDA_CHECK(cudaMemcpyAsync(hostUsedNodes.data(), tmp.data(),
0101 nUsed * sizeof(std::int64_t),
0102 cudaMemcpyDeviceToHost, stream));
0103 ACTS_CUDA_CHECK(cudaStreamSynchronize(stream));
0104
0105 std::vector<int> remapped;
0106 remapped.reserve(nUsed);
0107 for (const auto oldIdx : hostUsedNodes) {
0108 remapped.push_back(spacePointIds[static_cast<std::size_t>(oldIdx)]);
0109 }
0110 spacePointIds = std::move(remapped);
0111
0112 return {std::move(newNodeFeatures), std::move(tensors.edgeIndex),
0113 std::move(tensors.edgeFeatures), std::move(tensors.edgeScores)};
0114 }
0115
0116 }