Back to home page

EIC code displayed by LXR

 
 

    


Warning, file /acts/Examples/Io/HepMC3/src/HepMC3Util.cpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 // This file is part of the ACTS project.
0002 //
0003 // Copyright (C) 2016 CERN for the benefit of the ACTS project
0004 //
0005 // This Source Code Form is subject to the terms of the Mozilla Public
0006 // License, v. 2.0. If a copy of the MPL was not distributed with this
0007 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
0008 
0009 #include "ActsExamples/Io/HepMC3/HepMC3Util.hpp"
0010 
0011 #include "Acts/Utilities/ScopedTimer.hpp"
0012 
0013 #include <chrono>
0014 #include <fstream>
0015 #include <iomanip>
0016 #include <iostream>
0017 #include <memory>
0018 #include <sstream>
0019 #include <stdexcept>
0020 
0021 #include <HepMC3/GenEvent.h>
0022 #include <HepMC3/GenParticle.h>
0023 #include <HepMC3/GenVertex.h>
0024 #include <HepMC3/Reader.h>
0025 #include <HepMC3/Version.h>
0026 #include <HepMC3/Writer.h>
0027 #include <nlohmann/json.hpp>
0028 
0029 namespace ActsExamples {
0030 
0031 namespace {
0032 template <typename T>
0033 void mergeEventsImpl(HepMC3::GenEvent& event, std::span<T> genEvents,
0034                      const Acts::Logger& logger) {
0035   Acts::AveragingScopedTimer mergeTimer("Merging HepMC3 events", logger(),
0036                                         Acts::Logging::DEBUG);
0037 
0038   std::vector<std::shared_ptr<HepMC3::GenParticle>> particles;
0039 
0040   // Loop once to find the total size we'll need
0041   std::size_t nParticles = 0;
0042   std::size_t nVertices = 0;
0043   for (const auto& genEvent : genEvents) {
0044     nParticles += genEvent->particles().size();
0045     nVertices += genEvent->vertices().size();
0046   }
0047 
0048   event.reserve(nParticles, nVertices);
0049 
0050   for (const auto& genEvent : genEvents) {
0051     auto sample = mergeTimer.sample();
0052     particles.clear();
0053     particles.reserve(genEvent->particles().size());
0054 
0055     auto copyAttributes = [&](const auto& src, auto& dst) {
0056       for (const auto& attr : src.attribute_names()) {
0057         auto value = src.attribute_as_string(attr);
0058         dst.add_attribute(attr,
0059                           std::make_shared<HepMC3::StringAttribute>(value));
0060       }
0061     };
0062 
0063     copyAttributes(*genEvent, event);
0064 
0065     // Add to combined event
0066     for (const auto& srcParticle : genEvent->particles()) {
0067       if (srcParticle->id() - 1 != static_cast<int>(particles.size())) {
0068         throw std::runtime_error("Particle id is not consecutive");
0069       }
0070       auto particle = std::make_shared<HepMC3::GenParticle>();
0071       particle->set_momentum(srcParticle->momentum());
0072       particle->set_generated_mass(srcParticle->generated_mass());
0073       particle->set_pid(srcParticle->pid());
0074       particle->set_status(srcParticle->status());
0075 
0076       particles.push_back(particle);
0077       event.add_particle(particle);
0078 
0079       copyAttributes(*srcParticle, *particle);
0080     }
0081 
0082     for (const auto& srcVertex : genEvent->vertices()) {
0083       auto vertex = std::make_shared<HepMC3::GenVertex>(srcVertex->position());
0084       vertex->set_status(srcVertex->status());
0085 
0086       event.add_vertex(vertex);
0087 
0088       copyAttributes(*srcVertex, *vertex);
0089 
0090       for (const auto& srcParticle : srcVertex->particles_in()) {
0091         const auto& particle = particles.at(srcParticle->id() - 1);
0092         vertex->add_particle_in(particle);
0093       }
0094       for (const auto& srcParticle : srcVertex->particles_out()) {
0095         const auto& particle = particles.at(srcParticle->id() - 1);
0096         vertex->add_particle_out(particle);
0097       }
0098     }
0099   }
0100 }
0101 }  // namespace
0102 
0103 void HepMC3Util::mergeEvents(HepMC3::GenEvent& event,
0104                              std::span<const HepMC3::GenEvent*> genEvents,
0105                              const Acts::Logger& logger) {
0106   mergeEventsImpl(event, genEvents, logger);
0107 }
0108 
0109 void HepMC3Util::mergeEvents(
0110     HepMC3::GenEvent& event,
0111     std::span<std::shared_ptr<const HepMC3::GenEvent>> genEvents,
0112     const Acts::Logger& logger) {
0113   mergeEventsImpl(event, genEvents, logger);
0114 }
0115 
0116 std::string_view HepMC3Util::compressionExtension(Compression compression) {
0117   switch (compression) {
0118     using enum Compression;
0119     case none:
0120       return "";
0121     case zlib:
0122       return ".gz";
0123     case lzma:
0124       return ".xz";
0125     case bzip2:
0126       return ".bz2";
0127     case zstd:
0128       return ".zst";
0129     default:
0130       throw std::invalid_argument{"Unknown compression value"};
0131   }
0132 }
0133 
0134 HepMC3Util::Compression HepMC3Util::compressionFromFilename(
0135     const std::filesystem::path& filename) {
0136   using enum Compression;
0137 
0138   std::string filenameStr = filename.string();
0139 
0140   // Check for compression extensions in order
0141   if (filenameStr.ends_with(".gz")) {
0142     return zlib;
0143   }
0144   if (filenameStr.ends_with(".xz")) {
0145     return lzma;
0146   }
0147   if (filenameStr.ends_with(".bz2")) {
0148     return bzip2;
0149   }
0150   if (filenameStr.ends_with(".zst")) {
0151     return zstd;
0152   }
0153 
0154   // No compression extension found
0155   return none;
0156 }
0157 
0158 std::span<const HepMC3Util::Compression>
0159 HepMC3Util::availableCompressionModes() {
0160   using enum Compression;
0161   static const auto values = []() -> std::vector<HepMC3Util::Compression> {
0162     return {
0163         none,
0164 #ifdef HEPMC3_Z_SUPPORT
0165         zlib,
0166 #endif
0167 #ifdef HEPMC3_LZMA_SUPPORT
0168         lzma,
0169 #endif
0170 #ifdef HEPMC3_BZ2_SUPPORT
0171         bzip2,
0172 #endif
0173 #ifdef HEPMC3_ZSTD_SUPPORT
0174         zstd,
0175 #endif
0176     };
0177   }();
0178   return values;
0179 }
0180 
0181 std::ostream& HepMC3Util::operator<<(std::ostream& os,
0182                                      HepMC3Util::Compression compression) {
0183   switch (compression) {
0184     using enum HepMC3Util::Compression;
0185     case none:
0186       return os << "none";
0187     case zlib:
0188       return os << "zlib";
0189     case lzma:
0190       return os << "lzma";
0191     case bzip2:
0192       return os << "bzip2";
0193     case zstd:
0194       return os << "zstd";
0195     default:
0196       throw std::invalid_argument{"Unknown compression value"};
0197   }
0198 }
0199 
0200 std::ostream& HepMC3Util::operator<<(std::ostream& os,
0201                                      HepMC3Util::Format format) {
0202   switch (format) {
0203     using enum HepMC3Util::Format;
0204     case ascii:
0205       return os << "ascii";
0206     case root:
0207       return os << "root";
0208     default:
0209       throw std::invalid_argument{"Unknown format value"};
0210   }
0211 }
0212 
0213 std::span<const HepMC3Util::Format> HepMC3Util::availableFormats() {
0214   using enum Format;
0215   static const auto values = []() -> std::vector<HepMC3Util::Format> {
0216     return {
0217         ascii,
0218 #ifdef HEPMC3_ROOT_SUPPORT
0219         root,
0220 #endif
0221     };
0222   }();
0223   return values;
0224 }
0225 
0226 HepMC3Util::Format HepMC3Util::formatFromFilename(
0227     const std::filesystem::path& filename) {
0228   using enum Format;
0229 
0230   for (auto compression : availableCompressionModes()) {
0231     auto ext = compressionExtension(compression);
0232 
0233     if (filename.string().ends_with(".hepmc3" + std::string(ext)) ||
0234         filename.string().ends_with(".hepmc" + std::string(ext))) {
0235       return ascii;
0236     }
0237   }
0238   if (filename.string().ends_with(".root")) {
0239     return root;
0240   }
0241 
0242   throw std::invalid_argument{"Unknown format extension: " +
0243                               std::string{filename}};
0244 }
0245 
0246 namespace {
0247 
0248 // Helper function to format file sizes
0249 std::string formatSize(std::uintmax_t bytes) {
0250   const std::array<const char*, 5> units = {"B", "KB", "MB", "GB", "TB"};
0251   std::size_t unit = 0;
0252   double size = static_cast<double>(bytes);
0253 
0254   while (size >= 1024.0 && unit < units.size() - 1) {
0255     size /= 1024.0;
0256     unit++;
0257   }
0258 
0259   std::ostringstream oss;
0260   oss << std::fixed << std::setprecision(2) << size << " " << units[unit];
0261   return oss.str();
0262 }
0263 
0264 // Helper function to write metadata sidecar file
0265 void writeMetadata(const std::filesystem::path& dataFile,
0266                    std::size_t numEvents) {
0267   std::filesystem::path metadataFile = dataFile;
0268   metadataFile += ".json";
0269 
0270   nlohmann::json metadata;
0271   metadata["num_events"] = numEvents;
0272 
0273   std::ofstream metadataStream(metadataFile);
0274   metadataStream << metadata.dump(2);
0275 }
0276 
0277 // Helper function to generate output filename
0278 std::string generateOutputFilename(const std::string& prefix,
0279                                    std::size_t fileNum,
0280                                    HepMC3Util::Format format,
0281                                    HepMC3Util::Compression compression) {
0282   std::ostringstream filename;
0283   filename << prefix << "_" << std::setfill('0') << std::setw(6) << fileNum;
0284 
0285   if (format == HepMC3Util::Format::ascii) {
0286     filename << ".hepmc3";
0287   } else if (format == HepMC3Util::Format::root) {
0288     filename << ".root";
0289   }
0290 
0291   filename << HepMC3Util::compressionExtension(compression);
0292 
0293   return filename.str();
0294 }
0295 
0296 }  // namespace
0297 
0298 HepMC3Util::NormalizeResult HepMC3Util::normalizeFiles(
0299     const std::vector<std::filesystem::path>& inputFiles,
0300     std::optional<std::filesystem::path> singleOutputPath,
0301     const std::filesystem::path& outputDir, const std::string& outputPrefix,
0302     std::optional<std::size_t> eventsPerFile,
0303     std::optional<std::size_t> maxEvents, Format format,
0304     Compression compression, int compressionLevel, bool verbose) {
0305   // Validate configuration
0306   if (inputFiles.empty()) {
0307     throw std::invalid_argument("No input files specified");
0308   }
0309 
0310   if (!singleOutputPath && !eventsPerFile.has_value()) {
0311     throw std::invalid_argument(
0312         "events-per-file must be > 0 in multi-file mode");
0313   }
0314 
0315   if (compressionLevel < 0 || compressionLevel > 19) {
0316     throw std::invalid_argument("compression-level must be 0-19");
0317   }
0318 
0319   if (format == Format::root && compression != Compression::none) {
0320     throw std::invalid_argument(
0321         "ROOT format does not support compression parameter");
0322   }
0323 
0324   NormalizeResult result;
0325 
0326   // Determine output directory
0327   std::filesystem::path actualOutputDir;
0328   if (singleOutputPath) {
0329     if (singleOutputPath->has_parent_path()) {
0330       actualOutputDir = singleOutputPath->parent_path();
0331     } else {
0332       actualOutputDir = ".";
0333     }
0334   } else {
0335     actualOutputDir = outputDir;
0336   }
0337 
0338   // Create output directory
0339   std::filesystem::create_directories(actualOutputDir);
0340 
0341   if (verbose) {
0342     std::cerr << "Configuration:\n";
0343     std::cerr << "  Input files: " << inputFiles.size() << "\n";
0344     if (singleOutputPath) {
0345       std::cerr << "  Single output file: " << *singleOutputPath << "\n";
0346       std::cerr << "  Format: " << format << "\n";
0347       std::cerr << "  Compression: " << compression << " (level "
0348                 << compressionLevel << ")\n";
0349     } else {
0350       std::cerr << "  Output dir: " << actualOutputDir << "\n";
0351       std::cerr << "  Output prefix: " << outputPrefix << "\n";
0352       std::cerr << "  Events per file: "
0353                 << (eventsPerFile.has_value()
0354                         ? std::to_string(eventsPerFile.value())
0355                         : "unset")
0356                 << "\n";
0357       std::cerr << "  Format: " << format << "\n";
0358       std::cerr << "  Compression: " << compression << " (level "
0359                 << compressionLevel << ")\n";
0360     }
0361     if (maxEvents.has_value()) {
0362       std::cerr << "  Max events to read: " << maxEvents.value() << "\n";
0363     }
0364     std::cerr << "\n";
0365   }
0366 
0367   // Processing state
0368   std::size_t globalEventIndex = 0;
0369   std::size_t eventsInCurrentFile = 0;
0370   std::size_t currentFileIndex = 0;
0371   std::unique_ptr<HepMC3::Writer> currentWriter;
0372   std::filesystem::path currentOutputPath;
0373 
0374   // Process each input file
0375   for (const auto& inputFile : inputFiles) {
0376     if (verbose) {
0377       std::cerr << "Reading " << inputFile << "...\n";
0378     }
0379 
0380     if (!std::filesystem::exists(inputFile)) {
0381       std::cerr << "WARNING: File not found: " << inputFile << "\n";
0382       continue;
0383     }
0384 
0385     // Track input file size
0386     result.totalInputSize += std::filesystem::file_size(inputFile);
0387 
0388     auto reader = HepMC3Util::deduceReader(inputFile.string());
0389     if (!reader) {
0390       std::cerr << "ERROR: Failed to open " << inputFile << "\n";
0391       continue;
0392     }
0393 
0394     HepMC3::GenEvent event;
0395     std::size_t eventsReadFromFile = 0;
0396     std::size_t lastProgressUpdate = 0;
0397     constexpr std::size_t progressInterval = 100;
0398 
0399     while (!reader->failed()) {
0400       // Check if we've reached the maximum number of events
0401       if (maxEvents > 0 && globalEventIndex >= maxEvents) {
0402         break;
0403       }
0404 
0405       auto readStart = std::chrono::high_resolution_clock::now();
0406       reader->read_event(event);
0407       auto readEnd = std::chrono::high_resolution_clock::now();
0408       result.totalReadTime +=
0409           std::chrono::duration<double>(readEnd - readStart).count();
0410 
0411       if (reader->failed()) {
0412         break;
0413       }
0414 
0415       // Show progress
0416       if (verbose &&
0417           (eventsReadFromFile - lastProgressUpdate) >= progressInterval) {
0418         std::cerr << "    Progress: " << eventsReadFromFile
0419                   << " events read...\r" << std::flush;
0420         lastProgressUpdate = eventsReadFromFile;
0421       }
0422 
0423       // Create new output file if needed
0424       if (eventsInCurrentFile == 0) {
0425         // Close previous file
0426         if (currentWriter) {
0427           currentWriter->close();
0428 
0429           // Get file size and write metadata
0430           if (std::filesystem::exists(currentOutputPath)) {
0431             auto fileSize = std::filesystem::file_size(currentOutputPath);
0432             result.totalOutputSize += fileSize;
0433 
0434             std::size_t events =
0435                 singleOutputPath ? globalEventIndex : eventsPerFile.value();
0436             writeMetadata(currentOutputPath, events);
0437             result.outputFiles.push_back(currentOutputPath);
0438 
0439             if (verbose) {
0440               double sizePerEvent =
0441                   static_cast<double>(fileSize) / static_cast<double>(events);
0442 
0443               std::cerr << "  Wrote " << currentOutputPath << " (" << events
0444                         << " events, " << formatSize(fileSize) << ", "
0445                         << formatSize(static_cast<std::uintmax_t>(sizePerEvent))
0446                         << "/event)\n";
0447             }
0448           }
0449         }
0450 
0451         // Generate output path based on mode
0452         if (singleOutputPath) {
0453           currentOutputPath = *singleOutputPath;
0454         } else {
0455           std::string filename = generateOutputFilename(
0456               outputPrefix, currentFileIndex, format, compression);
0457           currentOutputPath = actualOutputDir / filename;
0458         }
0459         currentWriter =
0460             HepMC3Util::createWriter(currentOutputPath, format, compression);
0461       }
0462 
0463       // Set event number
0464       event.set_event_number(globalEventIndex);
0465 
0466       // Clear run info from events that are not the first in their output file
0467       if (eventsInCurrentFile > 0) {
0468         event.set_run_info(nullptr);
0469       }
0470 
0471       auto writeStart = std::chrono::high_resolution_clock::now();
0472       currentWriter->write_event(event);
0473       auto writeEnd = std::chrono::high_resolution_clock::now();
0474       result.totalWriteTime +=
0475           std::chrono::duration<double>(writeEnd - writeStart).count();
0476 
0477       globalEventIndex++;
0478       eventsInCurrentFile++;
0479       eventsReadFromFile++;
0480 
0481       // Close file if chunk is complete (only in multi-file mode)
0482       if (!singleOutputPath && eventsInCurrentFile >= eventsPerFile) {
0483         currentWriter->close();
0484 
0485         // Get file size
0486         if (std::filesystem::exists(currentOutputPath)) {
0487           auto fileSize = std::filesystem::file_size(currentOutputPath);
0488           result.totalOutputSize += fileSize;
0489 
0490           writeMetadata(currentOutputPath, eventsInCurrentFile);
0491           result.outputFiles.push_back(currentOutputPath);
0492 
0493           if (verbose) {
0494             double sizePerEvent = static_cast<double>(fileSize) /
0495                                   static_cast<double>(eventsInCurrentFile);
0496 
0497             std::cerr << "  Wrote " << currentOutputPath << " ("
0498                       << eventsInCurrentFile << " events, "
0499                       << formatSize(fileSize) << ", "
0500                       << formatSize(static_cast<std::uintmax_t>(sizePerEvent))
0501                       << "/event)\n";
0502           }
0503         }
0504 
0505         currentWriter.reset();
0506         eventsInCurrentFile = 0;
0507         currentFileIndex++;
0508       }
0509     }
0510 
0511     reader->close();
0512 
0513     if (verbose) {
0514       // Clear progress line and print final count
0515       std::cerr << "  Read " << eventsReadFromFile << " events from "
0516                 << inputFile << "                    \n";
0517     }
0518 
0519     // Check if we've reached the maximum number of events
0520     if (maxEvents.has_value() && globalEventIndex >= maxEvents.value()) {
0521       if (verbose) {
0522         std::cerr << "Reached maximum event limit (" << maxEvents.value()
0523                   << ")\n";
0524       }
0525       break;
0526     }
0527   }
0528 
0529   // Close final file
0530   if (currentWriter && eventsInCurrentFile > 0) {
0531     currentWriter->close();
0532 
0533     // Get file size
0534     if (std::filesystem::exists(currentOutputPath)) {
0535       auto fileSize = std::filesystem::file_size(currentOutputPath);
0536       result.totalOutputSize += fileSize;
0537 
0538       writeMetadata(currentOutputPath, eventsInCurrentFile);
0539       result.outputFiles.push_back(currentOutputPath);
0540 
0541       if (verbose) {
0542         double sizePerEvent = static_cast<double>(fileSize) /
0543                               static_cast<double>(eventsInCurrentFile);
0544 
0545         std::cerr << "  Wrote " << currentOutputPath << " ("
0546                   << eventsInCurrentFile << " events, " << formatSize(fileSize)
0547                   << ", "
0548                   << formatSize(static_cast<std::uintmax_t>(sizePerEvent))
0549                   << "/event)\n";
0550       }
0551     }
0552   }
0553 
0554   result.numEvents = globalEventIndex;
0555 
0556   // Print summary
0557   if (verbose) {
0558     std::cerr << "\nSummary:\n";
0559     if (singleOutputPath) {
0560       std::cerr << "  Processed " << globalEventIndex
0561                 << " events into single file\n";
0562     } else {
0563       std::size_t totalFiles = (globalEventIndex + eventsPerFile.value() - 1) /
0564                                eventsPerFile.value();
0565       std::cerr << "  Processed " << globalEventIndex << " events into "
0566                 << totalFiles << " file(s)\n";
0567     }
0568 
0569     if (globalEventIndex > 0) {
0570       double bytesPerEvent = static_cast<double>(result.totalOutputSize) /
0571                              static_cast<double>(globalEventIndex);
0572       std::cerr << "  Total input size:  " << formatSize(result.totalInputSize)
0573                 << "\n";
0574       std::cerr << "  Total output size: " << formatSize(result.totalOutputSize)
0575                 << " ("
0576                 << formatSize(static_cast<std::uintmax_t>(bytesPerEvent))
0577                 << "/event)\n";
0578 
0579       if (result.totalInputSize > 0) {
0580         double ratio = static_cast<double>(result.totalOutputSize) /
0581                        static_cast<double>(result.totalInputSize);
0582         std::cerr << "  Compression ratio: " << std::fixed
0583                   << std::setprecision(2) << (ratio * 100.0) << "%\n";
0584       }
0585     } else {
0586       std::cerr << "  Total input size:  " << formatSize(result.totalInputSize)
0587                 << "\n";
0588       std::cerr << "  Total output size: " << formatSize(result.totalOutputSize)
0589                 << "\n";
0590     }
0591 
0592     // Print timing information
0593     if (globalEventIndex > 0) {
0594       std::cerr << "\nTiming breakdown:\n";
0595       std::cerr << "  Reading events:  " << std::fixed << std::setprecision(3)
0596                 << result.totalReadTime << " s ("
0597                 << (result.totalReadTime / globalEventIndex * 1000.0)
0598                 << " ms/event)\n";
0599       std::cerr << "  Writing events:  " << std::fixed << std::setprecision(3)
0600                 << result.totalWriteTime << " s ("
0601                 << (result.totalWriteTime / globalEventIndex * 1000.0)
0602                 << " ms/event)\n";
0603 
0604       double totalProcessingTime = result.totalReadTime + result.totalWriteTime;
0605       std::cerr << "  Total processing: " << std::fixed << std::setprecision(3)
0606                 << totalProcessingTime << " s\n";
0607 
0608       // Show percentage breakdown
0609       if (totalProcessingTime > 0) {
0610         std::cerr << "\nTime distribution:\n";
0611         std::cerr << "  Reading: " << std::fixed << std::setprecision(1)
0612                   << (result.totalReadTime / totalProcessingTime * 100.0)
0613                   << "%\n";
0614         std::cerr << "  Writing: " << std::fixed << std::setprecision(1)
0615                   << (result.totalWriteTime / totalProcessingTime * 100.0)
0616                   << "%\n";
0617       }
0618     }
0619   }
0620 
0621   return result;
0622 }
0623 
0624 }  // namespace ActsExamples