File indexing completed on 2025-08-28 08:12:54
0001
0002
0003
0004
0005
0006
0007
0008
0009 #include "Acts/Plugins/Gnn/TensorRTEdgeClassifier.hpp"
0010
0011 #include "Acts/Plugins/Gnn/detail/CudaUtils.hpp"
0012
0013 #include <chrono>
0014 #include <filesystem>
0015 #include <fstream>
0016
0017 #include <NvInfer.h>
0018 #include <NvInferPlugin.h>
0019 #include <NvInferRuntimeBase.h>
0020 #include <cuda_runtime.h>
0021
0022 #include "printCudaMemInfo.hpp"
0023
0024 namespace {
0025
0026 class TensorRTLogger : public nvinfer1::ILogger {
0027 std::unique_ptr<const Acts::Logger> m_logger;
0028
0029 public:
0030 TensorRTLogger(Acts::Logging::Level lvl)
0031 : m_logger(Acts::getDefaultLogger("TensorRT", lvl)) {}
0032
0033 void log(Severity severity, const char *msg) noexcept override {
0034 const auto &logger = *m_logger;
0035 switch (severity) {
0036 case Severity::kVERBOSE:
0037 ACTS_DEBUG(msg);
0038 break;
0039 case Severity::kINFO:
0040 ACTS_INFO(msg);
0041 break;
0042 case Severity::kWARNING:
0043 ACTS_WARNING(msg);
0044 break;
0045 case Severity::kERROR:
0046 ACTS_ERROR(msg);
0047 break;
0048 case Severity::kINTERNAL_ERROR:
0049 ACTS_FATAL(msg);
0050 break;
0051 }
0052 }
0053 };
0054
0055 }
0056
0057 namespace Acts {
0058
0059 TensorRTEdgeClassifier::TensorRTEdgeClassifier(
0060 const Config &cfg, std::unique_ptr<const Logger> _logger)
0061 : m_logger(std::move(_logger)),
0062 m_cfg(cfg),
0063 m_trtLogger(std::make_unique<TensorRTLogger>(m_logger->level())) {
0064 auto status = initLibNvInferPlugins(m_trtLogger.get(), "");
0065 if (!status) {
0066 throw std::runtime_error("Failed to initialize TensorRT plugins");
0067 }
0068
0069 std::size_t fsize =
0070 std::filesystem::file_size(std::filesystem::path(m_cfg.modelPath));
0071 std::vector<char> engineData(fsize);
0072
0073 ACTS_DEBUG("Load '" << m_cfg.modelPath << "' with size " << fsize);
0074
0075 std::ifstream engineFile(m_cfg.modelPath);
0076 if (!engineFile) {
0077 throw std::runtime_error("Failed to open engine file");
0078 } else if (!engineFile.read(engineData.data(), fsize)) {
0079 throw std::runtime_error("Failed to read engine file");
0080 }
0081
0082 m_runtime.reset(nvinfer1::createInferRuntime(*m_trtLogger));
0083 if (!m_runtime) {
0084 throw std::runtime_error("Failed to create TensorRT runtime");
0085 }
0086
0087 m_engine.reset(m_runtime->deserializeCudaEngine(engineData.data(), fsize));
0088 if (!m_engine) {
0089 throw std::runtime_error("Failed to deserialize CUDA engine");
0090 }
0091
0092 ACTS_INFO("Device memory required by TRT context: "
0093 << m_engine->getDeviceMemorySizeV2() * 1e-9 << " GB");
0094
0095 for (auto i = 0ul; i < m_cfg.numExecutionContexts; ++i) {
0096 ACTS_DEBUG("Create execution context " << i);
0097 m_contexts.emplace_back(m_engine->createExecutionContext());
0098 if (!m_contexts.back()) {
0099 throw std::runtime_error("Failed to create execution context");
0100 }
0101 }
0102
0103 std::size_t freeMem{}, totalMem{};
0104 ACTS_CUDA_CHECK(cudaMemGetInfo(&freeMem, &totalMem));
0105 ACTS_DEBUG("Used CUDA memory after TensorRT initialization: "
0106 << (totalMem - freeMem) * 1e-9 << " / " << totalMem * 1e-9
0107 << " GB");
0108
0109 m_count.emplace(m_contexts.size());
0110
0111 if (m_engine->getNbOptimizationProfiles() > 1) {
0112 ACTS_WARNING("Cannot handle more then one optimization profile for now");
0113 }
0114
0115 m_maxNodes =
0116 m_engine->getProfileShape("x", 0, nvinfer1::OptProfileSelector::kMAX)
0117 .d[0];
0118 ACTS_INFO("Maximum number of nodes: " << m_maxNodes);
0119
0120 auto maxEdgesA =
0121 m_engine
0122 ->getProfileShape("edge_index", 0, nvinfer1::OptProfileSelector::kMAX)
0123 .d[1];
0124 auto maxEdgesB =
0125 m_engine
0126 ->getProfileShape("edge_attr", 0, nvinfer1::OptProfileSelector::kMAX)
0127 .d[0];
0128
0129 if (maxEdgesA != maxEdgesB) {
0130 throw std::invalid_argument(
0131 "Inconsistent max edges definition in engine for 'edge_index' and "
0132 "'edge_attr'");
0133 }
0134
0135 m_maxEdges = maxEdgesA;
0136 ACTS_INFO("Maximum number of edges: " << m_maxEdges);
0137 }
0138
0139 TensorRTEdgeClassifier::~TensorRTEdgeClassifier() {}
0140
0141 PipelineTensors TensorRTEdgeClassifier::operator()(
0142 PipelineTensors tensors, const ExecutionContext &execContext) {
0143 assert(execContext.device.type == Acts::Device::Type::eCUDA);
0144
0145 decltype(std::chrono::high_resolution_clock::now()) t0, t1, t2, t3, t4;
0146 t0 = std::chrono::high_resolution_clock::now();
0147
0148
0149
0150 if (auto nNodes = tensors.nodeFeatures.shape().at(0); nNodes > m_maxNodes) {
0151 ACTS_WARNING("Number of nodes ("
0152 << nNodes << ") exceeds configured maximum, return 0 edges");
0153 throw NoEdgesError{};
0154 }
0155
0156 if (auto nEdges = tensors.edgeIndex.shape().at(1); nEdges > m_maxEdges) {
0157 ACTS_WARNING("Number of edges ("
0158 << nEdges << ") exceeds maximum, shrink edge tensor to "
0159 << m_maxEdges);
0160
0161 auto [newEdgeIndex, newEdgeFeatures] =
0162 applyEdgeLimit(tensors.edgeIndex, tensors.edgeFeatures, m_maxEdges,
0163 execContext.stream);
0164 tensors.edgeIndex = std::move(newEdgeIndex);
0165 tensors.edgeFeatures = std::move(newEdgeFeatures);
0166 }
0167
0168
0169 std::unique_ptr<nvinfer1::IExecutionContext> context;
0170
0171
0172
0173 m_count->acquire();
0174 assert(!m_contexts.empty());
0175
0176
0177 {
0178 std::lock_guard<std::mutex> lock(m_contextMutex);
0179 context = std::move(m_contexts.back());
0180 m_contexts.pop_back();
0181 }
0182
0183 context->setInputShape(
0184 "x", nvinfer1::Dims2{static_cast<long>(tensors.nodeFeatures.shape()[0]),
0185 static_cast<long>(tensors.nodeFeatures.shape()[1])});
0186 context->setTensorAddress("x", tensors.nodeFeatures.data());
0187
0188 context->setInputShape(
0189 "edge_index",
0190 nvinfer1::Dims2{static_cast<long>(tensors.edgeIndex.shape()[0]),
0191 static_cast<long>(tensors.edgeIndex.shape()[1])});
0192 context->setTensorAddress("edge_index", tensors.edgeIndex.data());
0193
0194 if (tensors.edgeFeatures.has_value()) {
0195 context->setInputShape(
0196 "edge_attr",
0197 nvinfer1::Dims2{static_cast<long>(tensors.edgeFeatures->shape()[0]),
0198 static_cast<long>(tensors.edgeFeatures->shape()[1])});
0199 context->setTensorAddress("edge_attr", tensors.edgeFeatures->data());
0200 }
0201
0202 auto scores =
0203 Tensor<float>::Create({tensors.edgeIndex.shape()[1], 1ul}, execContext);
0204 context->setTensorAddress("output", scores.data());
0205
0206 t2 = std::chrono::high_resolution_clock::now();
0207
0208 auto stream = execContext.stream.value();
0209 auto status = context->enqueueV3(stream);
0210 if (!status) {
0211 throw std::runtime_error("Failed to execute TensorRT model");
0212 }
0213 ACTS_CUDA_CHECK(cudaStreamSynchronize(stream));
0214
0215 t3 = std::chrono::high_resolution_clock::now();
0216
0217 {
0218 std::lock_guard<std::mutex> lock(m_contextMutex);
0219 m_contexts.push_back(std::move(context));
0220 }
0221
0222
0223 m_count->release();
0224
0225 sigmoid(scores, execContext.stream);
0226
0227 ACTS_VERBOSE("Size after classifier: " << scores.shape()[0]);
0228 printCudaMemInfo(logger());
0229
0230 auto [newScores, newEdgeIndex] =
0231 applyScoreCut(scores, tensors.edgeIndex, m_cfg.cut, execContext.stream);
0232 ACTS_VERBOSE("Size after score cut: " << newEdgeIndex.shape()[1]);
0233 printCudaMemInfo(logger());
0234
0235 t4 = std::chrono::high_resolution_clock::now();
0236
0237 auto milliseconds = [](const auto &a, const auto &b) {
0238 return std::chrono::duration<double, std::milli>(b - a).count();
0239 };
0240 ACTS_DEBUG("Time anycast: " << milliseconds(t0, t1));
0241 ACTS_DEBUG("Time alloc, set shape " << milliseconds(t1, t2));
0242 ACTS_DEBUG("Time inference: " << milliseconds(t2, t3));
0243 ACTS_DEBUG("Time sigmoid and cut: " << milliseconds(t3, t4));
0244
0245 return {std::move(tensors.nodeFeatures), std::move(newEdgeIndex),
0246 std::nullopt, std::move(newScores)};
0247 }
0248
0249 }