File indexing completed on 2025-12-16 09:24:27
0001
0002
0003
0004
0005
0006
0007
0008
0009 #include "ActsPlugins/Gnn/TensorRTEdgeClassifier.hpp"
0010
0011 #include "ActsPlugins/Gnn/detail/CudaUtils.hpp"
0012
0013 #include <chrono>
0014 #include <filesystem>
0015 #include <fstream>
0016
0017 #include <NvInfer.h>
0018 #include <NvInferPlugin.h>
0019 #include <NvInferRuntimeBase.h>
0020 #include <cuda_runtime.h>
0021
0022 #include "printCudaMemInfo.hpp"
0023
0024 using namespace Acts;
0025
0026 namespace {
0027
0028 class TensorRTLogger : public nvinfer1::ILogger {
0029 std::unique_ptr<const Logger> m_logger;
0030
0031 public:
0032 TensorRTLogger(Logging::Level lvl)
0033 : m_logger(getDefaultLogger("TensorRT", lvl)) {}
0034
0035 void log(Severity severity, const char *msg) noexcept override {
0036 const auto &logger = *m_logger;
0037 switch (severity) {
0038 case Severity::kVERBOSE:
0039 ACTS_DEBUG(msg);
0040 break;
0041 case Severity::kINFO:
0042 ACTS_INFO(msg);
0043 break;
0044 case Severity::kWARNING:
0045 ACTS_WARNING(msg);
0046 break;
0047 case Severity::kERROR:
0048 ACTS_ERROR(msg);
0049 break;
0050 case Severity::kINTERNAL_ERROR:
0051 ACTS_FATAL(msg);
0052 break;
0053 }
0054 }
0055 };
0056
0057 }
0058
0059 namespace ActsPlugins {
0060
0061 TensorRTEdgeClassifier::TensorRTEdgeClassifier(
0062 const Config &cfg, std::unique_ptr<const Logger> _logger)
0063 : m_logger(std::move(_logger)),
0064 m_cfg(cfg),
0065 m_trtLogger(std::make_unique<TensorRTLogger>(m_logger->level())) {
0066 auto status = initLibNvInferPlugins(m_trtLogger.get(), "");
0067 if (!status) {
0068 throw std::runtime_error("Failed to initialize TensorRT plugins");
0069 }
0070
0071 std::size_t fsize =
0072 std::filesystem::file_size(std::filesystem::path(m_cfg.modelPath));
0073 std::vector<char> engineData(fsize);
0074
0075 ACTS_DEBUG("Load '" << m_cfg.modelPath << "' with size " << fsize);
0076
0077 std::ifstream engineFile(m_cfg.modelPath);
0078 if (!engineFile) {
0079 throw std::runtime_error("Failed to open engine file");
0080 } else if (!engineFile.read(engineData.data(), fsize)) {
0081 throw std::runtime_error("Failed to read engine file");
0082 }
0083
0084 m_runtime.reset(nvinfer1::createInferRuntime(*m_trtLogger));
0085 if (!m_runtime) {
0086 throw std::runtime_error("Failed to create TensorRT runtime");
0087 }
0088
0089 m_engine.reset(m_runtime->deserializeCudaEngine(engineData.data(), fsize));
0090 if (!m_engine) {
0091 throw std::runtime_error("Failed to deserialize CUDA engine");
0092 }
0093
0094 ACTS_INFO("Device memory required by TRT context: "
0095 << m_engine->getDeviceMemorySizeV2() * 1e-9 << " GB");
0096
0097 for (auto i = 0ul; i < m_cfg.numExecutionContexts; ++i) {
0098 ACTS_DEBUG("Create execution context " << i);
0099 m_contexts.emplace_back(m_engine->createExecutionContext());
0100 if (!m_contexts.back()) {
0101 throw std::runtime_error("Failed to create execution context");
0102 }
0103 }
0104
0105 std::size_t freeMem{}, totalMem{};
0106 ACTS_CUDA_CHECK(cudaMemGetInfo(&freeMem, &totalMem));
0107 ACTS_DEBUG("Used CUDA memory after TensorRT initialization: "
0108 << (totalMem - freeMem) * 1e-9 << " / " << totalMem * 1e-9
0109 << " GB");
0110
0111 m_count.emplace(m_contexts.size());
0112
0113 if (m_engine->getNbOptimizationProfiles() > 1) {
0114 ACTS_WARNING("Cannot handle more then one optimization profile for now");
0115 }
0116
0117 m_maxNodes =
0118 m_engine->getProfileShape("x", 0, nvinfer1::OptProfileSelector::kMAX)
0119 .d[0];
0120 ACTS_INFO("Maximum number of nodes: " << m_maxNodes);
0121
0122 auto maxEdgesA =
0123 m_engine
0124 ->getProfileShape("edge_index", 0, nvinfer1::OptProfileSelector::kMAX)
0125 .d[1];
0126 auto maxEdgesB =
0127 m_engine
0128 ->getProfileShape("edge_attr", 0, nvinfer1::OptProfileSelector::kMAX)
0129 .d[0];
0130
0131 if (maxEdgesA != maxEdgesB) {
0132 throw std::invalid_argument(
0133 "Inconsistent max edges definition in engine for 'edge_index' and "
0134 "'edge_attr'");
0135 }
0136
0137 m_maxEdges = maxEdgesA;
0138 ACTS_INFO("Maximum number of edges: " << m_maxEdges);
0139 }
0140
0141 TensorRTEdgeClassifier::~TensorRTEdgeClassifier() {}
0142
0143 PipelineTensors TensorRTEdgeClassifier::operator()(
0144 PipelineTensors tensors, const ExecutionContext &execContext) {
0145 assert(execContext.device.type == Device::Type::eCUDA);
0146
0147 decltype(std::chrono::high_resolution_clock::now()) t0, t1, t2, t3, t4;
0148 t0 = std::chrono::high_resolution_clock::now();
0149
0150
0151
0152 if (auto nNodes = tensors.nodeFeatures.shape().at(0); nNodes > m_maxNodes) {
0153 ACTS_WARNING("Number of nodes ("
0154 << nNodes << ") exceeds configured maximum, return 0 edges");
0155 throw NoEdgesError{};
0156 }
0157
0158 if (auto nEdges = tensors.edgeIndex.shape().at(1); nEdges > m_maxEdges) {
0159 ACTS_WARNING("Number of edges ("
0160 << nEdges << ") exceeds maximum, shrink edge tensor to "
0161 << m_maxEdges);
0162
0163 auto [newEdgeIndex, newEdgeFeatures] =
0164 applyEdgeLimit(tensors.edgeIndex, tensors.edgeFeatures, m_maxEdges,
0165 execContext.stream);
0166 tensors.edgeIndex = std::move(newEdgeIndex);
0167 tensors.edgeFeatures = std::move(newEdgeFeatures);
0168 }
0169
0170
0171 std::unique_ptr<nvinfer1::IExecutionContext> context;
0172
0173
0174
0175 m_count->acquire();
0176 assert(!m_contexts.empty());
0177
0178
0179 {
0180 std::lock_guard<std::mutex> lock(m_contextMutex);
0181 context = std::move(m_contexts.back());
0182 m_contexts.pop_back();
0183 }
0184
0185 context->setInputShape(
0186 "x", nvinfer1::Dims2{static_cast<long>(tensors.nodeFeatures.shape()[0]),
0187 static_cast<long>(tensors.nodeFeatures.shape()[1])});
0188 context->setTensorAddress("x", tensors.nodeFeatures.data());
0189
0190 context->setInputShape(
0191 "edge_index",
0192 nvinfer1::Dims2{static_cast<long>(tensors.edgeIndex.shape()[0]),
0193 static_cast<long>(tensors.edgeIndex.shape()[1])});
0194 context->setTensorAddress("edge_index", tensors.edgeIndex.data());
0195
0196 if (tensors.edgeFeatures.has_value()) {
0197 context->setInputShape(
0198 "edge_attr",
0199 nvinfer1::Dims2{static_cast<long>(tensors.edgeFeatures->shape()[0]),
0200 static_cast<long>(tensors.edgeFeatures->shape()[1])});
0201 context->setTensorAddress("edge_attr", tensors.edgeFeatures->data());
0202 }
0203
0204 auto scores =
0205 Tensor<float>::Create({tensors.edgeIndex.shape()[1], 1ul}, execContext);
0206 context->setTensorAddress("output", scores.data());
0207
0208 t2 = std::chrono::high_resolution_clock::now();
0209
0210 auto stream = execContext.stream.value();
0211 auto status = context->enqueueV3(stream);
0212 if (!status) {
0213 throw std::runtime_error("Failed to execute TensorRT model");
0214 }
0215 ACTS_CUDA_CHECK(cudaStreamSynchronize(stream));
0216
0217 t3 = std::chrono::high_resolution_clock::now();
0218
0219 {
0220 std::lock_guard<std::mutex> lock(m_contextMutex);
0221 m_contexts.push_back(std::move(context));
0222 }
0223
0224
0225 m_count->release();
0226
0227 sigmoid(scores, execContext.stream);
0228
0229 ACTS_VERBOSE("Size after classifier: " << scores.shape()[0]);
0230 printCudaMemInfo(logger());
0231
0232 auto [newScores, newEdgeIndex] =
0233 applyScoreCut(scores, tensors.edgeIndex, m_cfg.cut, execContext.stream);
0234 ACTS_VERBOSE("Size after score cut: " << newEdgeIndex.shape()[1]);
0235 printCudaMemInfo(logger());
0236
0237 t4 = std::chrono::high_resolution_clock::now();
0238
0239 auto milliseconds = [](const auto &a, const auto &b) {
0240 return std::chrono::duration<double, std::milli>(b - a).count();
0241 };
0242 ACTS_DEBUG("Time anycast: " << milliseconds(t0, t1));
0243 ACTS_DEBUG("Time alloc, set shape " << milliseconds(t1, t2));
0244 ACTS_DEBUG("Time inference: " << milliseconds(t2, t3));
0245 ACTS_DEBUG("Time sigmoid and cut: " << milliseconds(t3, t4));
0246
0247 return {std::move(tensors.nodeFeatures), std::move(newEdgeIndex),
0248 std::nullopt, std::move(newScores)};
0249 }
0250
0251 }