File indexing completed on 2025-07-02 07:51:55
0001
0002
0003
0004
0005
0006
0007
0008
0009 #include "Acts/Plugins/ExaTrkX/Tensor.hpp"
0010
0011 #ifdef ACTS_EXATRKX_WITH_CUDA
0012 #include "Acts/Plugins/ExaTrkX/detail/CudaUtils.hpp"
0013 #endif
0014
0015 #include <cstring>
0016 #include <numeric>
0017
0018 namespace Acts {
0019
0020 namespace detail {
0021
0022 TensorPtr createTensorMemory(std::size_t nbytes,
0023 const ExecutionContext &execContext) {
0024 if (execContext.device.type == Acts::Device::Type::eCPU) {
0025 void *ptr = new std::byte[nbytes];
0026 if (ptr == nullptr) {
0027 throw std::bad_alloc{};
0028 }
0029 return TensorPtr(ptr,
0030 [](void *p) { delete[] static_cast<std::byte *>(p); });
0031 } else {
0032 #ifdef ACTS_EXATRKX_WITH_CUDA
0033 assert(execContext.stream.has_value());
0034 auto stream = *execContext.stream;
0035 void *ptr{};
0036 ACTS_CUDA_CHECK(cudaMallocAsync(&ptr, nbytes, stream));
0037 return TensorPtr(
0038 ptr, [stream](void *p) { ACTS_CUDA_CHECK(cudaFreeAsync(p, stream)); });
0039 #else
0040 throw std::runtime_error(
0041 "Cannot create CUDA tensor, library was not compiled with CUDA");
0042 #endif
0043 }
0044 }
0045
0046 TensorPtr cloneTensorMemory(const TensorPtr &ptr, std::size_t nbytes,
0047 Device devFrom, const ExecutionContext &to) {
0048 auto clone = createTensorMemory(nbytes, to);
0049 if (devFrom.isCpu() && to.device.isCpu()) {
0050 std::memcpy(clone.get(), ptr.get(), nbytes);
0051 } else {
0052 #ifdef ACTS_EXATRKX_WITH_CUDA
0053 assert(to.stream.has_value());
0054 if (devFrom.isCuda() && to.device.isCuda()) {
0055 ACTS_CUDA_CHECK(cudaMemcpyAsync(clone.get(), ptr.get(), nbytes,
0056 cudaMemcpyDeviceToDevice, *to.stream));
0057 } else if (devFrom.isCpu() && to.device.isCuda()) {
0058 ACTS_CUDA_CHECK(cudaMemcpyAsync(clone.get(), ptr.get(), nbytes,
0059 cudaMemcpyHostToDevice, *to.stream));
0060 } else if (devFrom.isCuda() && to.device.isCpu()) {
0061 ACTS_CUDA_CHECK(cudaMemcpyAsync(clone.get(), ptr.get(), nbytes,
0062 cudaMemcpyDeviceToHost, *to.stream));
0063 }
0064 #else
0065 throw std::runtime_error(
0066 "Cannot clone CUDA tensor, library was not compiled with CUDA");
0067 #endif
0068 }
0069 return clone;
0070 }
0071
0072 void cudaSigmoid(Tensor<float> &tensor, cudaStream_t stream);
0073
0074 std::pair<Tensor<float>, Tensor<std::int64_t>> cudaApplyScoreCut(
0075 const Tensor<float> &scores, const Tensor<std::int64_t> &edgeIndex,
0076 float cut, cudaStream_t stream);
0077
0078 }
0079
0080 void sigmoid(Tensor<float> &tensor, std::optional<cudaStream_t> stream) {
0081 if (tensor.device().type == Acts::Device::Type::eCUDA) {
0082 #ifdef ACTS_EXATRKX_WITH_CUDA
0083 return Acts::detail::cudaSigmoid(tensor, stream.value());
0084 #else
0085 throw std::runtime_error(
0086 "Cannot apply sigmoid to CUDA tensor, library was not compiled with "
0087 "CUDA");
0088 #endif
0089 }
0090
0091 for (auto it = tensor.data(); it != tensor.data() + tensor.size(); ++it) {
0092 *it = 1.f / (1.f + std::exp(-*it));
0093 }
0094 }
0095
0096 std::pair<Tensor<float>, Tensor<std::int64_t>> applyScoreCut(
0097 const Tensor<float> &scores, const Tensor<std::int64_t> &edgeIndex,
0098 float cut, std::optional<cudaStream_t> stream) {
0099 assert(scores.shape()[1] == 1);
0100 assert(edgeIndex.shape()[0] == 2);
0101 assert(edgeIndex.shape()[1] == scores.shape()[0]);
0102 assert(scores.device() == edgeIndex.device());
0103 ExecutionContext execContext{scores.device(), stream};
0104
0105 if (scores.device().type == Acts::Device::Type::eCUDA) {
0106 #ifdef ACTS_EXATRKX_WITH_CUDA
0107 return detail::cudaApplyScoreCut(scores, edgeIndex, cut, stream.value());
0108 #else
0109 throw std::runtime_error(
0110 "Cannot apply score cut to CUDA tensor, library was not compiled with "
0111 "CUDA");
0112 #endif
0113 }
0114
0115 std::vector<std::size_t> indices(scores.size());
0116 std::iota(indices.begin(), indices.end(), 0);
0117 indices.erase(
0118 std::remove_if(indices.begin(), indices.end(),
0119 [&](std::size_t i) { return scores.data()[i] < cut; }),
0120 indices.end());
0121 auto n = indices.size();
0122 auto outputScores =
0123 Tensor<float>::Create({static_cast<std::size_t>(n), 1}, execContext);
0124 auto outputEdges = Tensor<std::int64_t>::Create(
0125 {2, static_cast<std::size_t>(n)}, execContext);
0126
0127 auto scoreIt = outputScores.data();
0128 auto edgeIt1 = outputEdges.data();
0129 auto edgeIt2 = outputEdges.data() + n;
0130 for (auto i : indices) {
0131 *scoreIt = scores.data()[i];
0132 *edgeIt1 = edgeIndex.data()[i];
0133 *edgeIt2 = edgeIndex.data()[i + scores.size()];
0134 ++scoreIt;
0135 ++edgeIt1;
0136 ++edgeIt2;
0137 }
0138
0139 return {std::move(outputScores), std::move(outputEdges)};
0140 }
0141
0142 }