File indexing completed on 2025-08-28 08:12:54
0001
0002
0003
0004
0005
0006
0007
0008
0009 #include "Acts/Plugins/Gnn/Tensor.hpp"
0010
0011 #ifdef ACTS_GNN_WITH_CUDA
0012 #include "Acts/Plugins/Gnn/detail/CudaUtils.hpp"
0013 #endif
0014
0015 #include <cstring>
0016 #include <numeric>
0017 #include <span>
0018
0019 namespace Acts {
0020
0021 namespace detail {
0022
0023 TensorPtr createTensorMemory(std::size_t nbytes,
0024 const ExecutionContext &execContext) {
0025 if (execContext.device.type == Acts::Device::Type::eCPU) {
0026 void *ptr = new std::byte[nbytes];
0027 if (ptr == nullptr) {
0028 throw std::bad_alloc{};
0029 }
0030 return TensorPtr(ptr,
0031 [](void *p) { delete[] static_cast<std::byte *>(p); });
0032 } else {
0033 #ifdef ACTS_GNN_WITH_CUDA
0034 assert(execContext.stream.has_value());
0035 auto stream = *execContext.stream;
0036 void *ptr{};
0037 ACTS_CUDA_CHECK(cudaMallocAsync(&ptr, nbytes, stream));
0038 return TensorPtr(
0039 ptr, [stream](void *p) { ACTS_CUDA_CHECK(cudaFreeAsync(p, stream)); });
0040 #else
0041 throw std::runtime_error(
0042 "Cannot create CUDA tensor, library was not compiled with CUDA");
0043 #endif
0044 }
0045 }
0046
0047 TensorPtr cloneTensorMemory(const TensorPtr &ptr, std::size_t nbytes,
0048 Device devFrom, const ExecutionContext &to) {
0049 auto clone = createTensorMemory(nbytes, to);
0050 if (devFrom.isCpu() && to.device.isCpu()) {
0051 std::memcpy(clone.get(), ptr.get(), nbytes);
0052 } else {
0053 #ifdef ACTS_GNN_WITH_CUDA
0054 assert(to.stream.has_value());
0055 if (devFrom.isCuda() && to.device.isCuda()) {
0056 ACTS_CUDA_CHECK(cudaMemcpyAsync(clone.get(), ptr.get(), nbytes,
0057 cudaMemcpyDeviceToDevice, *to.stream));
0058 } else if (devFrom.isCpu() && to.device.isCuda()) {
0059 ACTS_CUDA_CHECK(cudaMemcpyAsync(clone.get(), ptr.get(), nbytes,
0060 cudaMemcpyHostToDevice, *to.stream));
0061 } else if (devFrom.isCuda() && to.device.isCpu()) {
0062 ACTS_CUDA_CHECK(cudaMemcpyAsync(clone.get(), ptr.get(), nbytes,
0063 cudaMemcpyDeviceToHost, *to.stream));
0064 }
0065 #else
0066 throw std::runtime_error(
0067 "Cannot clone CUDA tensor, library was not compiled with CUDA");
0068 #endif
0069 }
0070 return clone;
0071 }
0072
0073 void cudaSigmoid(Tensor<float> &tensor, cudaStream_t stream);
0074
0075 std::pair<Tensor<float>, Tensor<std::int64_t>> cudaApplyScoreCut(
0076 const Tensor<float> &scores, const Tensor<std::int64_t> &edgeIndex,
0077 float cut, cudaStream_t stream);
0078
0079 }
0080
0081 void sigmoid(Tensor<float> &tensor, std::optional<cudaStream_t> stream) {
0082 if (tensor.device().type == Acts::Device::Type::eCUDA) {
0083 #ifdef ACTS_GNN_WITH_CUDA
0084 return Acts::detail::cudaSigmoid(tensor, stream.value());
0085 #else
0086 throw std::runtime_error(
0087 "Cannot apply sigmoid to CUDA tensor, library was not compiled with "
0088 "CUDA");
0089 #endif
0090 }
0091
0092 for (auto it = tensor.data(); it != tensor.data() + tensor.size(); ++it) {
0093 *it = 1.f / (1.f + std::exp(-*it));
0094 }
0095 }
0096
0097 std::pair<Tensor<float>, Tensor<std::int64_t>> applyScoreCut(
0098 const Tensor<float> &scores, const Tensor<std::int64_t> &edgeIndex,
0099 float cut, std::optional<cudaStream_t> stream) {
0100 assert(scores.shape()[1] == 1);
0101 assert(edgeIndex.shape()[0] == 2);
0102 assert(edgeIndex.shape()[1] == scores.shape()[0]);
0103 assert(scores.device() == edgeIndex.device());
0104 ExecutionContext execContext{scores.device(), stream};
0105
0106 if (scores.device().type == Acts::Device::Type::eCUDA) {
0107 #ifdef ACTS_GNN_WITH_CUDA
0108 return detail::cudaApplyScoreCut(scores, edgeIndex, cut, stream.value());
0109 #else
0110 throw std::runtime_error(
0111 "Cannot apply score cut to CUDA tensor, library was not compiled with "
0112 "CUDA");
0113 #endif
0114 }
0115
0116 std::vector<std::size_t> indices(scores.size());
0117 std::iota(indices.begin(), indices.end(), 0);
0118 indices.erase(
0119 std::remove_if(indices.begin(), indices.end(),
0120 [&](std::size_t i) { return scores.data()[i] < cut; }),
0121 indices.end());
0122 auto n = indices.size();
0123 auto outputScores =
0124 Tensor<float>::Create({static_cast<std::size_t>(n), 1}, execContext);
0125 auto outputEdges = Tensor<std::int64_t>::Create(
0126 {2, static_cast<std::size_t>(n)}, execContext);
0127
0128 auto scoreIt = outputScores.data();
0129 auto edgeIt1 = outputEdges.data();
0130 auto edgeIt2 = outputEdges.data() + n;
0131 for (auto i : indices) {
0132 *scoreIt = scores.data()[i];
0133 *edgeIt1 = edgeIndex.data()[i];
0134 *edgeIt2 = edgeIndex.data()[i + scores.size()];
0135 ++scoreIt;
0136 ++edgeIt1;
0137 ++edgeIt2;
0138 }
0139
0140 return {std::move(outputScores), std::move(outputEdges)};
0141 }
0142
0143 std::pair<Tensor<std::int64_t>, std::optional<Tensor<float>>> applyEdgeLimit(
0144 const Tensor<std::int64_t> &edgeIndex,
0145 const std::optional<Tensor<float>> &edgeFeatures, std::size_t maxEdges,
0146 std::optional<cudaStream_t> stream) {
0147 if (edgeFeatures.has_value() &&
0148 edgeIndex.device() != edgeFeatures->device()) {
0149 throw std::invalid_argument(
0150 "limitEdges: edgeIndex and edgeFeatures must be on the same device!");
0151 }
0152 if (edgeFeatures.has_value() &&
0153 edgeFeatures->shape().at(0) != edgeIndex.shape().at(1)) {
0154 throw std::invalid_argument("limitEdges: inconsistent number of edges");
0155 }
0156
0157 const auto nEdgeFeatures =
0158 edgeFeatures.has_value() ? edgeFeatures->shape().at(1) : 0;
0159 const auto nEdgesOld = edgeIndex.shape().at(1);
0160
0161 std::optional<Tensor<std::int64_t>> newEdgeIndexTensor;
0162 std::optional<Tensor<float>> newEdgeFeatureTensor;
0163
0164 if (nEdgesOld <= maxEdges) {
0165
0166 newEdgeIndexTensor = edgeIndex.clone({edgeIndex.device(), stream});
0167 if (edgeFeatures.has_value()) {
0168 newEdgeFeatureTensor =
0169 edgeFeatures->clone({edgeFeatures->device(), stream});
0170 }
0171 } else if (edgeIndex.device().isCpu()) {
0172 ExecutionContext cpuCtx{Acts::Device::Cpu(), {}};
0173
0174 std::span<const std::int64_t> edge0(edgeIndex.data(), maxEdges);
0175 std::span<const std::int64_t> edge1(edgeIndex.data() + nEdgesOld, maxEdges);
0176
0177 newEdgeIndexTensor = Tensor<std::int64_t>::Create({2, maxEdges}, cpuCtx);
0178 std::copy(edge0.begin(), edge0.end(), newEdgeIndexTensor->data());
0179 std::copy(edge1.begin(), edge1.end(),
0180 newEdgeIndexTensor->data() + maxEdges);
0181
0182 if (edgeFeatures.has_value()) {
0183 std::span<const float> edgeFeaturesResized(edgeFeatures->data(),
0184 maxEdges * nEdgeFeatures);
0185
0186 newEdgeFeatureTensor =
0187 Tensor<float>::Create({maxEdges, nEdgeFeatures}, cpuCtx);
0188 std::copy(edgeFeaturesResized.begin(), edgeFeaturesResized.end(),
0189 newEdgeFeatureTensor->data());
0190 }
0191 } else {
0192 #ifdef ACTS_GNN_WITH_CUDA
0193 ExecutionContext gpuCtx{edgeIndex.device(), stream};
0194
0195 newEdgeIndexTensor = Tensor<std::int64_t>::Create({2, maxEdges}, gpuCtx);
0196 ACTS_CUDA_CHECK(cudaMemcpyAsync(newEdgeIndexTensor->data(),
0197 edgeIndex.data(),
0198 maxEdges * sizeof(std::int64_t),
0199 cudaMemcpyDeviceToDevice, stream.value()));
0200 ACTS_CUDA_CHECK(cudaMemcpyAsync(newEdgeIndexTensor->data() + maxEdges,
0201 edgeIndex.data() + nEdgesOld,
0202 maxEdges * sizeof(std::int64_t),
0203 cudaMemcpyDeviceToDevice, stream.value()));
0204
0205 if (edgeFeatures.has_value()) {
0206 newEdgeFeatureTensor =
0207 Tensor<float>::Create({maxEdges, nEdgeFeatures}, gpuCtx);
0208
0209 ACTS_CUDA_CHECK(
0210 cudaMemcpyAsync(newEdgeFeatureTensor->data(), edgeFeatures->data(),
0211 maxEdges * nEdgeFeatures * sizeof(float),
0212 cudaMemcpyDeviceToDevice, stream.value()));
0213 }
0214 #else
0215 throw std::runtime_error(
0216 "Cannot apply edge limit to CUDA tensors, library was not compiled "
0217 "with CUDA");
0218 #endif
0219 }
0220
0221 return {std::move(newEdgeIndexTensor.value()),
0222 std::move(newEdgeFeatureTensor)};
0223 }
0224
0225 }