Gnn/src/TensorRTEdgeClassifier.cpp

0001 // This file is part of the ACTS project.
0002 //
0003 // Copyright (C) 2016 CERN for the benefit of the ACTS project
0004 //
0005 // This Source Code Form is subject to the terms of the Mozilla Public
0006 // License, v. 2.0. If a copy of the MPL was not distributed with this
0007 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
0008
0009 #include "Acts/Plugins/Gnn/TensorRTEdgeClassifier.hpp"
0010
0011 #include "Acts/Plugins/Gnn/detail/CudaUtils.hpp"
0012
0013 #include <chrono>
0014 #include <filesystem>
0015 #include <fstream>
0016
0017 #include <NvInfer.h>
0018 #include <NvInferPlugin.h>
0019 #include <NvInferRuntimeBase.h>
0020 #include <cuda_runtime.h>
0021
0022 #include "printCudaMemInfo.hpp"
0023
0024 namespace {
0025
0026 class TensorRTLogger : public nvinfer1::ILogger {
0027   std::unique_ptr<const Acts::Logger> m_logger;
0028
0029  public:
0030   TensorRTLogger(Acts::Logging::Level lvl)
0031       : m_logger(Acts::getDefaultLogger("TensorRT", lvl)) {}
0032
0033   void log(Severity severity, const char *msg) noexcept override {
0034     const auto &logger = *m_logger;
0035     switch (severity) {
0036       case Severity::kVERBOSE:
0037         ACTS_DEBUG(msg);
0038         break;
0039       case Severity::kINFO:
0040         ACTS_INFO(msg);
0041         break;
0042       case Severity::kWARNING:
0043         ACTS_WARNING(msg);
0044         break;
0045       case Severity::kERROR:
0046         ACTS_ERROR(msg);
0047         break;
0048       case Severity::kINTERNAL_ERROR:
0049         ACTS_FATAL(msg);
0050         break;
0051     }
0052   }
0053 };
0054
0055 }  // namespace
0056
0057 namespace Acts {
0058
0059 TensorRTEdgeClassifier::TensorRTEdgeClassifier(
0060     const Config &cfg, std::unique_ptr<const Logger> _logger)
0061     : m_logger(std::move(_logger)),
0062       m_cfg(cfg),
0063       m_trtLogger(std::make_unique<TensorRTLogger>(m_logger->level())) {
0064   auto status = initLibNvInferPlugins(m_trtLogger.get(), "");
0065   if (!status) {
0066     throw std::runtime_error("Failed to initialize TensorRT plugins");
0067   }
0068
0069   std::size_t fsize =
0070       std::filesystem::file_size(std::filesystem::path(m_cfg.modelPath));
0071   std::vector<char> engineData(fsize);
0072
0073   ACTS_DEBUG("Load '" << m_cfg.modelPath << "' with size " << fsize);
0074
0075   std::ifstream engineFile(m_cfg.modelPath);
0076   if (!engineFile) {
0077     throw std::runtime_error("Failed to open engine file");
0078   } else if (!engineFile.read(engineData.data(), fsize)) {
0079     throw std::runtime_error("Failed to read engine file");
0080   }
0081
0082   m_runtime.reset(nvinfer1::createInferRuntime(*m_trtLogger));
0083   if (!m_runtime) {
0084     throw std::runtime_error("Failed to create TensorRT runtime");
0085   }
0086
0087   m_engine.reset(m_runtime->deserializeCudaEngine(engineData.data(), fsize));
0088   if (!m_engine) {
0089     throw std::runtime_error("Failed to deserialize CUDA engine");
0090   }
0091
0092   ACTS_INFO("Device memory required by TRT context: "
0093             << m_engine->getDeviceMemorySizeV2() * 1e-9 << " GB");
0094
0095   for (auto i = 0ul; i < m_cfg.numExecutionContexts; ++i) {
0096     ACTS_DEBUG("Create execution context " << i);
0097     m_contexts.emplace_back(m_engine->createExecutionContext());
0098     if (!m_contexts.back()) {
0099       throw std::runtime_error("Failed to create execution context");
0100     }
0101   }
0102
0103   std::size_t freeMem{}, totalMem{};
0104   ACTS_CUDA_CHECK(cudaMemGetInfo(&freeMem, &totalMem));
0105   ACTS_DEBUG("Used CUDA memory after TensorRT initialization: "
0106              << (totalMem - freeMem) * 1e-9 << " / " << totalMem * 1e-9
0107              << " GB");
0108
0109   m_count.emplace(m_contexts.size());
0110
0111   if (m_engine->getNbOptimizationProfiles() > 1) {
0112     ACTS_WARNING("Cannot handle more then one optimization profile for now");
0113   }
0114
0115   m_maxNodes =
0116       m_engine->getProfileShape("x", 0, nvinfer1::OptProfileSelector::kMAX)
0117           .d[0];
0118   ACTS_INFO("Maximum number of nodes: " << m_maxNodes);
0119
0120   auto maxEdgesA =
0121       m_engine
0122           ->getProfileShape("edge_index", 0, nvinfer1::OptProfileSelector::kMAX)
0123           .d[1];
0124   auto maxEdgesB =
0125       m_engine
0126           ->getProfileShape("edge_attr", 0, nvinfer1::OptProfileSelector::kMAX)
0127           .d[0];
0128
0129   if (maxEdgesA != maxEdgesB) {
0130     throw std::invalid_argument(
0131         "Inconsistent max edges definition in engine for 'edge_index' and "
0132         "'edge_attr'");
0133   }
0134
0135   m_maxEdges = maxEdgesA;
0136   ACTS_INFO("Maximum number of edges: " << m_maxEdges);
0137 }
0138
0139 TensorRTEdgeClassifier::~TensorRTEdgeClassifier() {}
0140
0141 PipelineTensors TensorRTEdgeClassifier::operator()(
0142     PipelineTensors tensors, const ExecutionContext &execContext) {
0143   assert(execContext.device.type == Acts::Device::Type::eCUDA);
0144
0145   decltype(std::chrono::high_resolution_clock::now()) t0, t1, t2, t3, t4;
0146   t0 = std::chrono::high_resolution_clock::now();
0147
0148   // Curing this would require more complicated handling, and should happen
0149   // almost never
0150   if (auto nNodes = tensors.nodeFeatures.shape().at(0); nNodes > m_maxNodes) {
0151     ACTS_WARNING("Number of nodes ("
0152                  << nNodes << ") exceeds configured maximum, return 0 edges");
0153     throw NoEdgesError{};
0154   }
0155
0156   if (auto nEdges = tensors.edgeIndex.shape().at(1); nEdges > m_maxEdges) {
0157     ACTS_WARNING("Number of edges ("
0158                  << nEdges << ") exceeds maximum, shrink edge tensor to "
0159                  << m_maxEdges);
0160
0161     auto [newEdgeIndex, newEdgeFeatures] =
0162         applyEdgeLimit(tensors.edgeIndex, tensors.edgeFeatures, m_maxEdges,
0163                        execContext.stream);
0164     tensors.edgeIndex = std::move(newEdgeIndex);
0165     tensors.edgeFeatures = std::move(newEdgeFeatures);
0166   }
0167
0168   // get a context from the list of contexts
0169   std::unique_ptr<nvinfer1::IExecutionContext> context;
0170
0171   // Try to decrement the count before getting a context. By this it should be
0172   // garantueed that a context is available
0173   m_count->acquire();
0174   assert(!m_contexts.empty());
0175
0176   // Protect access to the context by a mutex
0177   {
0178     std::lock_guard<std::mutex> lock(m_contextMutex);
0179     context = std::move(m_contexts.back());
0180     m_contexts.pop_back();
0181   }
0182
0183   context->setInputShape(
0184       "x", nvinfer1::Dims2{static_cast<long>(tensors.nodeFeatures.shape()[0]),
0185                            static_cast<long>(tensors.nodeFeatures.shape()[1])});
0186   context->setTensorAddress("x", tensors.nodeFeatures.data());
0187
0188   context->setInputShape(
0189       "edge_index",
0190       nvinfer1::Dims2{static_cast<long>(tensors.edgeIndex.shape()[0]),
0191                       static_cast<long>(tensors.edgeIndex.shape()[1])});
0192   context->setTensorAddress("edge_index", tensors.edgeIndex.data());
0193
0194   if (tensors.edgeFeatures.has_value()) {
0195     context->setInputShape(
0196         "edge_attr",
0197         nvinfer1::Dims2{static_cast<long>(tensors.edgeFeatures->shape()[0]),
0198                         static_cast<long>(tensors.edgeFeatures->shape()[1])});
0199     context->setTensorAddress("edge_attr", tensors.edgeFeatures->data());
0200   }
0201
0202   auto scores =
0203       Tensor<float>::Create({tensors.edgeIndex.shape()[1], 1ul}, execContext);
0204   context->setTensorAddress("output", scores.data());
0205
0206   t2 = std::chrono::high_resolution_clock::now();
0207
0208   auto stream = execContext.stream.value();
0209   auto status = context->enqueueV3(stream);
0210   if (!status) {
0211     throw std::runtime_error("Failed to execute TensorRT model");
0212   }
0213   ACTS_CUDA_CHECK(cudaStreamSynchronize(stream));
0214
0215   t3 = std::chrono::high_resolution_clock::now();
0216
0217   {
0218     std::lock_guard<std::mutex> lock(m_contextMutex);
0219     m_contexts.push_back(std::move(context));
0220   }
0221
0222   // Increment the count after the context have been put back in the vector
0223   m_count->release();
0224
0225   sigmoid(scores, execContext.stream);
0226
0227   ACTS_VERBOSE("Size after classifier: " << scores.shape()[0]);
0228   printCudaMemInfo(logger());
0229
0230   auto [newScores, newEdgeIndex] =
0231       applyScoreCut(scores, tensors.edgeIndex, m_cfg.cut, execContext.stream);
0232   ACTS_VERBOSE("Size after score cut: " << newEdgeIndex.shape()[1]);
0233   printCudaMemInfo(logger());
0234
0235   t4 = std::chrono::high_resolution_clock::now();
0236
0237   auto milliseconds = [](const auto &a, const auto &b) {
0238     return std::chrono::duration<double, std::milli>(b - a).count();
0239   };
0240   ACTS_DEBUG("Time anycast:  " << milliseconds(t0, t1));
0241   ACTS_DEBUG("Time alloc, set shape " << milliseconds(t1, t2));
0242   ACTS_DEBUG("Time inference:       " << milliseconds(t2, t3));
0243   ACTS_DEBUG("Time sigmoid and cut: " << milliseconds(t3, t4));
0244
0245   return {std::move(tensors.nodeFeatures), std::move(newEdgeIndex),
0246           std::nullopt, std::move(newScores)};
0247 }
0248
0249 }  // namespace Acts