Gnn/src/Tensor.cpp

0001 // This file is part of the ACTS project.
0002 //
0003 // Copyright (C) 2016 CERN for the benefit of the ACTS project
0004 //
0005 // This Source Code Form is subject to the terms of the Mozilla Public
0006 // License, v. 2.0. If a copy of the MPL was not distributed with this
0007 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
0008
0009 #include "Acts/Plugins/Gnn/Tensor.hpp"
0010
0011 #ifdef ACTS_GNN_WITH_CUDA
0012 #include "Acts/Plugins/Gnn/detail/CudaUtils.hpp"
0013 #endif
0014
0015 #include <cstring>
0016 #include <numeric>
0017 #include <span>
0018
0019 namespace Acts {
0020
0021 namespace detail {
0022
0023 TensorPtr createTensorMemory(std::size_t nbytes,
0024                              const ExecutionContext &execContext) {
0025   if (execContext.device.type == Acts::Device::Type::eCPU) {
0026     void *ptr = new std::byte[nbytes];
0027     if (ptr == nullptr) {
0028       throw std::bad_alloc{};
0029     }
0030     return TensorPtr(ptr,
0031                      [](void *p) { delete[] static_cast<std::byte *>(p); });
0032   } else {
0033 #ifdef ACTS_GNN_WITH_CUDA
0034     assert(execContext.stream.has_value());
0035     auto stream = *execContext.stream;
0036     void *ptr{};
0037     ACTS_CUDA_CHECK(cudaMallocAsync(&ptr, nbytes, stream));
0038     return TensorPtr(
0039         ptr, [stream](void *p) { ACTS_CUDA_CHECK(cudaFreeAsync(p, stream)); });
0040 #else
0041     throw std::runtime_error(
0042         "Cannot create CUDA tensor, library was not compiled with CUDA");
0043 #endif
0044   }
0045 }
0046
0047 TensorPtr cloneTensorMemory(const TensorPtr &ptr, std::size_t nbytes,
0048                             Device devFrom, const ExecutionContext &to) {
0049   auto clone = createTensorMemory(nbytes, to);
0050   if (devFrom.isCpu() && to.device.isCpu()) {
0051     std::memcpy(clone.get(), ptr.get(), nbytes);
0052   } else {
0053 #ifdef ACTS_GNN_WITH_CUDA
0054     assert(to.stream.has_value());
0055     if (devFrom.isCuda() && to.device.isCuda()) {
0056       ACTS_CUDA_CHECK(cudaMemcpyAsync(clone.get(), ptr.get(), nbytes,
0057                                       cudaMemcpyDeviceToDevice, *to.stream));
0058     } else if (devFrom.isCpu() && to.device.isCuda()) {
0059       ACTS_CUDA_CHECK(cudaMemcpyAsync(clone.get(), ptr.get(), nbytes,
0060                                       cudaMemcpyHostToDevice, *to.stream));
0061     } else if (devFrom.isCuda() && to.device.isCpu()) {
0062       ACTS_CUDA_CHECK(cudaMemcpyAsync(clone.get(), ptr.get(), nbytes,
0063                                       cudaMemcpyDeviceToHost, *to.stream));
0064     }
0065 #else
0066     throw std::runtime_error(
0067         "Cannot clone CUDA tensor, library was not compiled with CUDA");
0068 #endif
0069   }
0070   return clone;
0071 }
0072
0073 void cudaSigmoid(Tensor<float> &tensor, cudaStream_t stream);
0074
0075 std::pair<Tensor<float>, Tensor<std::int64_t>> cudaApplyScoreCut(
0076     const Tensor<float> &scores, const Tensor<std::int64_t> &edgeIndex,
0077     float cut, cudaStream_t stream);
0078
0079 }  // namespace detail
0080
0081 void sigmoid(Tensor<float> &tensor, std::optional<cudaStream_t> stream) {
0082   if (tensor.device().type == Acts::Device::Type::eCUDA) {
0083 #ifdef ACTS_GNN_WITH_CUDA
0084     return Acts::detail::cudaSigmoid(tensor, stream.value());
0085 #else
0086     throw std::runtime_error(
0087         "Cannot apply sigmoid to CUDA tensor, library was not compiled with "
0088         "CUDA");
0089 #endif
0090   }
0091
0092   for (auto it = tensor.data(); it != tensor.data() + tensor.size(); ++it) {
0093     *it = 1.f / (1.f + std::exp(-*it));
0094   }
0095 }
0096
0097 std::pair<Tensor<float>, Tensor<std::int64_t>> applyScoreCut(
0098     const Tensor<float> &scores, const Tensor<std::int64_t> &edgeIndex,
0099     float cut, std::optional<cudaStream_t> stream) {
0100   assert(scores.shape()[1] == 1);
0101   assert(edgeIndex.shape()[0] == 2);
0102   assert(edgeIndex.shape()[1] == scores.shape()[0]);
0103   assert(scores.device() == edgeIndex.device());
0104   ExecutionContext execContext{scores.device(), stream};
0105
0106   if (scores.device().type == Acts::Device::Type::eCUDA) {
0107 #ifdef ACTS_GNN_WITH_CUDA
0108     return detail::cudaApplyScoreCut(scores, edgeIndex, cut, stream.value());
0109 #else
0110     throw std::runtime_error(
0111         "Cannot apply score cut to CUDA tensor, library was not compiled with "
0112         "CUDA");
0113 #endif
0114   }
0115
0116   std::vector<std::size_t> indices(scores.size());
0117   std::iota(indices.begin(), indices.end(), 0);
0118   indices.erase(
0119       std::remove_if(indices.begin(), indices.end(),
0120                      [&](std::size_t i) { return scores.data()[i] < cut; }),
0121       indices.end());
0122   auto n = indices.size();
0123   auto outputScores =
0124       Tensor<float>::Create({static_cast<std::size_t>(n), 1}, execContext);
0125   auto outputEdges = Tensor<std::int64_t>::Create(
0126       {2, static_cast<std::size_t>(n)}, execContext);
0127
0128   auto scoreIt = outputScores.data();
0129   auto edgeIt1 = outputEdges.data();
0130   auto edgeIt2 = outputEdges.data() + n;
0131   for (auto i : indices) {
0132     *scoreIt = scores.data()[i];
0133     *edgeIt1 = edgeIndex.data()[i];
0134     *edgeIt2 = edgeIndex.data()[i + scores.size()];
0135     ++scoreIt;
0136     ++edgeIt1;
0137     ++edgeIt2;
0138   }
0139
0140   return {std::move(outputScores), std::move(outputEdges)};
0141 }
0142
0143 std::pair<Tensor<std::int64_t>, std::optional<Tensor<float>>> applyEdgeLimit(
0144     const Tensor<std::int64_t> &edgeIndex,
0145     const std::optional<Tensor<float>> &edgeFeatures, std::size_t maxEdges,
0146     std::optional<cudaStream_t> stream) {
0147   if (edgeFeatures.has_value() &&
0148       edgeIndex.device() != edgeFeatures->device()) {
0149     throw std::invalid_argument(
0150         "limitEdges: edgeIndex and edgeFeatures must be on the same device!");
0151   }
0152   if (edgeFeatures.has_value() &&
0153       edgeFeatures->shape().at(0) != edgeIndex.shape().at(1)) {
0154     throw std::invalid_argument("limitEdges: inconsistent number of edges");
0155   }
0156
0157   const auto nEdgeFeatures =
0158       edgeFeatures.has_value() ? edgeFeatures->shape().at(1) : 0;
0159   const auto nEdgesOld = edgeIndex.shape().at(1);
0160
0161   std::optional<Tensor<std::int64_t>> newEdgeIndexTensor;
0162   std::optional<Tensor<float>> newEdgeFeatureTensor;
0163
0164   if (nEdgesOld <= maxEdges) {
0165     // No need to limit edges, just clone the original tensors
0166     newEdgeIndexTensor = edgeIndex.clone({edgeIndex.device(), stream});
0167     if (edgeFeatures.has_value()) {
0168       newEdgeFeatureTensor =
0169           edgeFeatures->clone({edgeFeatures->device(), stream});
0170     }
0171   } else if (edgeIndex.device().isCpu()) {
0172     ExecutionContext cpuCtx{Acts::Device::Cpu(), {}};
0173
0174     std::span<const std::int64_t> edge0(edgeIndex.data(), maxEdges);
0175     std::span<const std::int64_t> edge1(edgeIndex.data() + nEdgesOld, maxEdges);
0176
0177     newEdgeIndexTensor = Tensor<std::int64_t>::Create({2, maxEdges}, cpuCtx);
0178     std::copy(edge0.begin(), edge0.end(), newEdgeIndexTensor->data());
0179     std::copy(edge1.begin(), edge1.end(),
0180               newEdgeIndexTensor->data() + maxEdges);
0181
0182     if (edgeFeatures.has_value()) {
0183       std::span<const float> edgeFeaturesResized(edgeFeatures->data(),
0184                                                  maxEdges * nEdgeFeatures);
0185
0186       newEdgeFeatureTensor =
0187           Tensor<float>::Create({maxEdges, nEdgeFeatures}, cpuCtx);
0188       std::copy(edgeFeaturesResized.begin(), edgeFeaturesResized.end(),
0189                 newEdgeFeatureTensor->data());
0190     }
0191   } else {
0192 #ifdef ACTS_GNN_WITH_CUDA
0193     ExecutionContext gpuCtx{edgeIndex.device(), stream};
0194
0195     newEdgeIndexTensor = Tensor<std::int64_t>::Create({2, maxEdges}, gpuCtx);
0196     ACTS_CUDA_CHECK(cudaMemcpyAsync(newEdgeIndexTensor->data(),
0197                                     edgeIndex.data(),
0198                                     maxEdges * sizeof(std::int64_t),
0199                                     cudaMemcpyDeviceToDevice, stream.value()));
0200     ACTS_CUDA_CHECK(cudaMemcpyAsync(newEdgeIndexTensor->data() + maxEdges,
0201                                     edgeIndex.data() + nEdgesOld,
0202                                     maxEdges * sizeof(std::int64_t),
0203                                     cudaMemcpyDeviceToDevice, stream.value()));
0204
0205     if (edgeFeatures.has_value()) {
0206       newEdgeFeatureTensor =
0207           Tensor<float>::Create({maxEdges, nEdgeFeatures}, gpuCtx);
0208
0209       ACTS_CUDA_CHECK(
0210           cudaMemcpyAsync(newEdgeFeatureTensor->data(), edgeFeatures->data(),
0211                           maxEdges * nEdgeFeatures * sizeof(float),
0212                           cudaMemcpyDeviceToDevice, stream.value()));
0213     }
0214 #else
0215     throw std::runtime_error(
0216         "Cannot apply edge limit to CUDA tensors, library was not compiled "
0217         "with CUDA");
0218 #endif
0219   }
0220
0221   return {std::move(newEdgeIndexTensor.value()),
0222           std::move(newEdgeFeatureTensor)};
0223 }
0224
0225 }  // namespace Acts