ExaTrkX/src/Tensor.cpp

0001 // This file is part of the ACTS project.
0002 //
0003 // Copyright (C) 2016 CERN for the benefit of the ACTS project
0004 //
0005 // This Source Code Form is subject to the terms of the Mozilla Public
0006 // License, v. 2.0. If a copy of the MPL was not distributed with this
0007 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
0008
0009 #include "Acts/Plugins/ExaTrkX/Tensor.hpp"
0010
0011 #ifdef ACTS_EXATRKX_WITH_CUDA
0012 #include "Acts/Plugins/ExaTrkX/detail/CudaUtils.hpp"
0013 #endif
0014
0015 #include <cstring>
0016 #include <numeric>
0017
0018 namespace Acts {
0019
0020 namespace detail {
0021
0022 TensorPtr createTensorMemory(std::size_t nbytes,
0023                              const ExecutionContext &execContext) {
0024   if (execContext.device.type == Acts::Device::Type::eCPU) {
0025     void *ptr = new std::byte[nbytes];
0026     if (ptr == nullptr) {
0027       throw std::bad_alloc{};
0028     }
0029     return TensorPtr(ptr,
0030                      [](void *p) { delete[] static_cast<std::byte *>(p); });
0031   } else {
0032 #ifdef ACTS_EXATRKX_WITH_CUDA
0033     assert(execContext.stream.has_value());
0034     auto stream = *execContext.stream;
0035     void *ptr{};
0036     ACTS_CUDA_CHECK(cudaMallocAsync(&ptr, nbytes, stream));
0037     return TensorPtr(
0038         ptr, [stream](void *p) { ACTS_CUDA_CHECK(cudaFreeAsync(p, stream)); });
0039 #else
0040     throw std::runtime_error(
0041         "Cannot create CUDA tensor, library was not compiled with CUDA");
0042 #endif
0043   }
0044 }
0045
0046 TensorPtr cloneTensorMemory(const TensorPtr &ptr, std::size_t nbytes,
0047                             Device devFrom, const ExecutionContext &to) {
0048   auto clone = createTensorMemory(nbytes, to);
0049   if (devFrom.isCpu() && to.device.isCpu()) {
0050     std::memcpy(clone.get(), ptr.get(), nbytes);
0051   } else {
0052 #ifdef ACTS_EXATRKX_WITH_CUDA
0053     assert(to.stream.has_value());
0054     if (devFrom.isCuda() && to.device.isCuda()) {
0055       ACTS_CUDA_CHECK(cudaMemcpyAsync(clone.get(), ptr.get(), nbytes,
0056                                       cudaMemcpyDeviceToDevice, *to.stream));
0057     } else if (devFrom.isCpu() && to.device.isCuda()) {
0058       ACTS_CUDA_CHECK(cudaMemcpyAsync(clone.get(), ptr.get(), nbytes,
0059                                       cudaMemcpyHostToDevice, *to.stream));
0060     } else if (devFrom.isCuda() && to.device.isCpu()) {
0061       ACTS_CUDA_CHECK(cudaMemcpyAsync(clone.get(), ptr.get(), nbytes,
0062                                       cudaMemcpyDeviceToHost, *to.stream));
0063     }
0064 #else
0065     throw std::runtime_error(
0066         "Cannot clone CUDA tensor, library was not compiled with CUDA");
0067 #endif
0068   }
0069   return clone;
0070 }
0071
0072 void cudaSigmoid(Tensor<float> &tensor, cudaStream_t stream);
0073
0074 std::pair<Tensor<float>, Tensor<std::int64_t>> cudaApplyScoreCut(
0075     const Tensor<float> &scores, const Tensor<std::int64_t> &edgeIndex,
0076     float cut, cudaStream_t stream);
0077
0078 }  // namespace detail
0079
0080 void sigmoid(Tensor<float> &tensor, std::optional<cudaStream_t> stream) {
0081   if (tensor.device().type == Acts::Device::Type::eCUDA) {
0082 #ifdef ACTS_EXATRKX_WITH_CUDA
0083     return Acts::detail::cudaSigmoid(tensor, stream.value());
0084 #else
0085     throw std::runtime_error(
0086         "Cannot apply sigmoid to CUDA tensor, library was not compiled with "
0087         "CUDA");
0088 #endif
0089   }
0090
0091   for (auto it = tensor.data(); it != tensor.data() + tensor.size(); ++it) {
0092     *it = 1.f / (1.f + std::exp(-*it));
0093   }
0094 }
0095
0096 std::pair<Tensor<float>, Tensor<std::int64_t>> applyScoreCut(
0097     const Tensor<float> &scores, const Tensor<std::int64_t> &edgeIndex,
0098     float cut, std::optional<cudaStream_t> stream) {
0099   assert(scores.shape()[1] == 1);
0100   assert(edgeIndex.shape()[0] == 2);
0101   assert(edgeIndex.shape()[1] == scores.shape()[0]);
0102   assert(scores.device() == edgeIndex.device());
0103   ExecutionContext execContext{scores.device(), stream};
0104
0105   if (scores.device().type == Acts::Device::Type::eCUDA) {
0106 #ifdef ACTS_EXATRKX_WITH_CUDA
0107     return detail::cudaApplyScoreCut(scores, edgeIndex, cut, stream.value());
0108 #else
0109     throw std::runtime_error(
0110         "Cannot apply score cut to CUDA tensor, library was not compiled with "
0111         "CUDA");
0112 #endif
0113   }
0114
0115   std::vector<std::size_t> indices(scores.size());
0116   std::iota(indices.begin(), indices.end(), 0);
0117   indices.erase(
0118       std::remove_if(indices.begin(), indices.end(),
0119                      [&](std::size_t i) { return scores.data()[i] < cut; }),
0120       indices.end());
0121   auto n = indices.size();
0122   auto outputScores =
0123       Tensor<float>::Create({static_cast<std::size_t>(n), 1}, execContext);
0124   auto outputEdges = Tensor<std::int64_t>::Create(
0125       {2, static_cast<std::size_t>(n)}, execContext);
0126
0127   auto scoreIt = outputScores.data();
0128   auto edgeIt1 = outputEdges.data();
0129   auto edgeIt2 = outputEdges.data() + n;
0130   for (auto i : indices) {
0131     *scoreIt = scores.data()[i];
0132     *edgeIt1 = edgeIndex.data()[i];
0133     *edgeIt2 = edgeIndex.data()[i + scores.size()];
0134     ++scoreIt;
0135     ++edgeIt1;
0136     ++edgeIt2;
0137   }
0138
0139   return {std::move(outputScores), std::move(outputEdges)};
0140 }
0141
0142 }  // namespace Acts