Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-10-24 08:19:37

0001 // This file is part of the ACTS project.
0002 //
0003 // Copyright (C) 2016 CERN for the benefit of the ACTS project
0004 //
0005 // This Source Code Form is subject to the terms of the Mozilla Public
0006 // License, v. 2.0. If a copy of the MPL was not distributed with this
0007 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
0008 
0009 #include "ActsExamples/Io/HepMC3/HepMC3Util.hpp"
0010 
0011 #include "Acts/Utilities/ScopedTimer.hpp"
0012 
0013 #include <chrono>
0014 #include <format>
0015 #include <fstream>
0016 #include <iomanip>
0017 #include <iostream>
0018 #include <memory>
0019 #include <sstream>
0020 #include <stdexcept>
0021 
0022 #include <HepMC3/GenEvent.h>
0023 #include <HepMC3/GenParticle.h>
0024 #include <HepMC3/GenVertex.h>
0025 #include <HepMC3/Reader.h>
0026 #include <HepMC3/Writer.h>
0027 #include <nlohmann/json.hpp>
0028 
0029 namespace ActsExamples {
0030 
0031 namespace {
0032 template <typename T>
0033 void mergeEventsImpl(HepMC3::GenEvent& event, std::span<T> genEvents,
0034                      const Acts::Logger& logger) {
0035   Acts::AveragingScopedTimer mergeTimer("Merging HepMC3 events", logger(),
0036                                         Acts::Logging::DEBUG);
0037 
0038   std::vector<std::shared_ptr<HepMC3::GenParticle>> particles;
0039 
0040   // Loop once to find the total size we'll need
0041   std::size_t nParticles = 0;
0042   std::size_t nVertices = 0;
0043   for (const auto& genEvent : genEvents) {
0044     nParticles += genEvent->particles().size();
0045     nVertices += genEvent->vertices().size();
0046   }
0047 
0048   event.reserve(nParticles, nVertices);
0049 
0050   for (const auto& genEvent : genEvents) {
0051     auto sample = mergeTimer.sample();
0052     particles.clear();
0053     particles.reserve(genEvent->particles().size());
0054 
0055     auto copyAttributes = [&](const auto& src, auto& dst) {
0056       for (const auto& attr : src.attribute_names()) {
0057         auto value = src.attribute_as_string(attr);
0058         dst.add_attribute(attr,
0059                           std::make_shared<HepMC3::StringAttribute>(value));
0060       }
0061     };
0062 
0063     copyAttributes(*genEvent, event);
0064 
0065     // Add to combined event
0066     for (const auto& srcParticle : genEvent->particles()) {
0067       if (srcParticle->id() - 1 != static_cast<int>(particles.size())) {
0068         throw std::runtime_error("Particle id is not consecutive");
0069       }
0070       auto particle = std::make_shared<HepMC3::GenParticle>();
0071       particle->set_momentum(srcParticle->momentum());
0072       particle->set_generated_mass(srcParticle->generated_mass());
0073       particle->set_pid(srcParticle->pid());
0074       particle->set_status(srcParticle->status());
0075 
0076       particles.push_back(particle);
0077       event.add_particle(particle);
0078 
0079       copyAttributes(*srcParticle, *particle);
0080     }
0081 
0082     for (const auto& srcVertex : genEvent->vertices()) {
0083       auto vertex = std::make_shared<HepMC3::GenVertex>(srcVertex->position());
0084       vertex->set_status(srcVertex->status());
0085 
0086       event.add_vertex(vertex);
0087 
0088       copyAttributes(*srcVertex, *vertex);
0089 
0090       for (const auto& srcParticle : srcVertex->particles_in()) {
0091         const auto& particle = particles.at(srcParticle->id() - 1);
0092         vertex->add_particle_in(particle);
0093       }
0094       for (const auto& srcParticle : srcVertex->particles_out()) {
0095         const auto& particle = particles.at(srcParticle->id() - 1);
0096         vertex->add_particle_out(particle);
0097       }
0098     }
0099   }
0100 }
0101 }  // namespace
0102 
0103 void HepMC3Util::mergeEvents(HepMC3::GenEvent& event,
0104                              std::span<const HepMC3::GenEvent*> genEvents,
0105                              const Acts::Logger& logger) {
0106   mergeEventsImpl(event, genEvents, logger);
0107 }
0108 
0109 void HepMC3Util::mergeEvents(
0110     HepMC3::GenEvent& event,
0111     std::span<std::shared_ptr<const HepMC3::GenEvent>> genEvents,
0112     const Acts::Logger& logger) {
0113   mergeEventsImpl(event, genEvents, logger);
0114 }
0115 
0116 std::string_view HepMC3Util::compressionExtension(Compression compression) {
0117   switch (compression) {
0118     using enum Compression;
0119     case none:
0120       return "";
0121     case zlib:
0122       return ".gz";
0123     case lzma:
0124       return ".xz";
0125     case bzip2:
0126       return ".bz2";
0127     case zstd:
0128       return ".zst";
0129     default:
0130       throw std::invalid_argument{"Unknown compression value"};
0131   }
0132 }
0133 
0134 std::span<const HepMC3Util::Compression>
0135 HepMC3Util::availableCompressionModes() {
0136   using enum Compression;
0137   static const auto values = []() -> std::vector<HepMC3Util::Compression> {
0138     return {
0139         none,
0140 #ifdef HEPMC3_Z_SUPPORT
0141         zlib,
0142 #endif
0143 #ifdef HEPMC3_LZMA_SUPPORT
0144         lzma,
0145 #endif
0146 #ifdef HEPMC3_BZ2_SUPPORT
0147         bzip2,
0148 #endif
0149 #ifdef HEPMC3_ZSTD_SUPPORT
0150         zstd,
0151 #endif
0152     };
0153   }();
0154   return values;
0155 }
0156 
0157 std::ostream& HepMC3Util::operator<<(std::ostream& os,
0158                                      HepMC3Util::Compression compression) {
0159   switch (compression) {
0160     using enum HepMC3Util::Compression;
0161     case none:
0162       return os << "none";
0163     case zlib:
0164       return os << "zlib";
0165     case lzma:
0166       return os << "lzma";
0167     case bzip2:
0168       return os << "bzip2";
0169     case zstd:
0170       return os << "zstd";
0171     default:
0172       throw std::invalid_argument{"Unknown compression value"};
0173   }
0174 }
0175 
0176 std::ostream& HepMC3Util::operator<<(std::ostream& os,
0177                                      HepMC3Util::Format format) {
0178   switch (format) {
0179     using enum HepMC3Util::Format;
0180     case ascii:
0181       return os << "ascii";
0182     case root:
0183       return os << "root";
0184     default:
0185       throw std::invalid_argument{"Unknown format value"};
0186   }
0187 }
0188 
0189 std::span<const HepMC3Util::Format> HepMC3Util::availableFormats() {
0190   using enum Format;
0191   static const auto values = []() -> std::vector<HepMC3Util::Format> {
0192     return {
0193         ascii,
0194 #ifdef HEPMC3_ROOT_SUPPORT
0195         root,
0196 #endif
0197     };
0198   }();
0199   return values;
0200 }
0201 
0202 HepMC3Util::Format HepMC3Util::formatFromFilename(std::string_view filename) {
0203   using enum Format;
0204 
0205   for (auto compression : availableCompressionModes()) {
0206     auto ext = compressionExtension(compression);
0207 
0208     if (filename.ends_with(".hepmc3" + std::string(ext)) ||
0209         filename.ends_with(".hepmc" + std::string(ext))) {
0210       return ascii;
0211     }
0212   }
0213   if (filename.ends_with(".root")) {
0214     return root;
0215   }
0216 
0217   throw std::invalid_argument{"Unknown format extension: " +
0218                               std::string{filename}};
0219 }
0220 
0221 namespace {
0222 
0223 // Helper function to format file sizes
0224 std::string formatSize(std::uintmax_t bytes) {
0225   const std::array<const char*, 5> units = {"B", "KB", "MB", "GB", "TB"};
0226   std::size_t unit = 0;
0227   double size = static_cast<double>(bytes);
0228 
0229   while (size >= 1024.0 && unit < units.size() - 1) {
0230     size /= 1024.0;
0231     unit++;
0232   }
0233 
0234   std::ostringstream oss;
0235   oss << std::fixed << std::setprecision(2) << size << " " << units[unit];
0236   return oss.str();
0237 }
0238 
0239 // Helper function to write metadata sidecar file
0240 void writeMetadata(const std::filesystem::path& dataFile,
0241                    std::size_t numEvents) {
0242   std::filesystem::path metadataFile = dataFile;
0243   metadataFile += ".json";
0244 
0245   nlohmann::json metadata;
0246   metadata["num_events"] = numEvents;
0247 
0248   std::ofstream metadataStream(metadataFile);
0249   metadataStream << metadata.dump(2);
0250 }
0251 
0252 // Helper function to generate output filename
0253 std::string generateOutputFilename(const std::string& prefix,
0254                                    std::size_t fileNum,
0255                                    HepMC3Util::Format format,
0256                                    HepMC3Util::Compression compression) {
0257   std::ostringstream filename;
0258   filename << prefix << "_" << std::setfill('0') << std::setw(6) << fileNum;
0259 
0260   if (format == HepMC3Util::Format::ascii) {
0261     filename << ".hepmc3";
0262   } else if (format == HepMC3Util::Format::root) {
0263     filename << ".root";
0264   }
0265 
0266   filename << HepMC3Util::compressionExtension(compression);
0267 
0268   return filename.str();
0269 }
0270 
0271 }  // namespace
0272 
0273 HepMC3Util::NormalizeResult HepMC3Util::normalizeFiles(
0274     const std::vector<std::filesystem::path>& inputFiles,
0275     std::optional<std::filesystem::path> singleOutputPath,
0276     const std::filesystem::path& outputDir, const std::string& outputPrefix,
0277     std::optional<std::size_t> eventsPerFile,
0278     std::optional<std::size_t> maxEvents, Format format,
0279     Compression compression, int compressionLevel, bool verbose) {
0280   // Validate configuration
0281   if (inputFiles.empty()) {
0282     throw std::invalid_argument("No input files specified");
0283   }
0284 
0285   if (!singleOutputPath && !eventsPerFile.has_value()) {
0286     throw std::invalid_argument(
0287         "events-per-file must be > 0 in multi-file mode");
0288   }
0289 
0290   if (compressionLevel < 0 || compressionLevel > 19) {
0291     throw std::invalid_argument("compression-level must be 0-19");
0292   }
0293 
0294   if (format == Format::root && compression != Compression::none) {
0295     throw std::invalid_argument(
0296         "ROOT format does not support compression parameter");
0297   }
0298 
0299   NormalizeResult result;
0300 
0301   // Determine output directory
0302   std::filesystem::path actualOutputDir;
0303   if (singleOutputPath) {
0304     if (singleOutputPath->has_parent_path()) {
0305       actualOutputDir = singleOutputPath->parent_path();
0306     } else {
0307       actualOutputDir = ".";
0308     }
0309   } else {
0310     actualOutputDir = outputDir;
0311   }
0312 
0313   // Create output directory
0314   std::filesystem::create_directories(actualOutputDir);
0315 
0316   if (verbose) {
0317     std::cerr << "Configuration:\n";
0318     std::cerr << "  Input files: " << inputFiles.size() << "\n";
0319     if (singleOutputPath) {
0320       std::cerr << "  Single output file: " << *singleOutputPath << "\n";
0321       std::cerr << "  Format: " << format << "\n";
0322       std::cerr << "  Compression: " << compression << " (level "
0323                 << compressionLevel << ")\n";
0324     } else {
0325       std::cerr << "  Output dir: " << actualOutputDir << "\n";
0326       std::cerr << "  Output prefix: " << outputPrefix << "\n";
0327       std::cerr << "  Events per file: "
0328                 << (eventsPerFile.has_value()
0329                         ? std::to_string(eventsPerFile.value())
0330                         : "unset")
0331                 << "\n";
0332       std::cerr << "  Format: " << format << "\n";
0333       std::cerr << "  Compression: " << compression << " (level "
0334                 << compressionLevel << ")\n";
0335     }
0336     if (maxEvents.has_value()) {
0337       std::cerr << "  Max events to read: " << maxEvents.value() << "\n";
0338     }
0339     std::cerr << "\n";
0340   }
0341 
0342   // Processing state
0343   std::size_t globalEventIndex = 0;
0344   std::size_t eventsInCurrentFile = 0;
0345   std::size_t currentFileIndex = 0;
0346   std::unique_ptr<HepMC3::Writer> currentWriter;
0347   std::filesystem::path currentOutputPath;
0348 
0349   // Process each input file
0350   for (const auto& inputFile : inputFiles) {
0351     if (verbose) {
0352       std::cerr << "Reading " << inputFile << "...\n";
0353     }
0354 
0355     if (!std::filesystem::exists(inputFile)) {
0356       std::cerr << "WARNING: File not found: " << inputFile << "\n";
0357       continue;
0358     }
0359 
0360     // Track input file size
0361     result.totalInputSize += std::filesystem::file_size(inputFile);
0362 
0363     auto reader = HepMC3Util::deduceReader(inputFile.string());
0364     if (!reader) {
0365       std::cerr << "ERROR: Failed to open " << inputFile << "\n";
0366       continue;
0367     }
0368 
0369     HepMC3::GenEvent event;
0370     std::size_t eventsReadFromFile = 0;
0371     std::size_t lastProgressUpdate = 0;
0372     constexpr std::size_t progressInterval = 100;
0373 
0374     while (!reader->failed()) {
0375       // Check if we've reached the maximum number of events
0376       if (maxEvents > 0 && globalEventIndex >= maxEvents) {
0377         break;
0378       }
0379 
0380       auto readStart = std::chrono::high_resolution_clock::now();
0381       reader->read_event(event);
0382       auto readEnd = std::chrono::high_resolution_clock::now();
0383       result.totalReadTime +=
0384           std::chrono::duration<double>(readEnd - readStart).count();
0385 
0386       if (reader->failed()) {
0387         break;
0388       }
0389 
0390       // Show progress
0391       if (verbose &&
0392           (eventsReadFromFile - lastProgressUpdate) >= progressInterval) {
0393         std::cerr << "    Progress: " << eventsReadFromFile
0394                   << " events read...\r" << std::flush;
0395         lastProgressUpdate = eventsReadFromFile;
0396       }
0397 
0398       // Create new output file if needed
0399       if (eventsInCurrentFile == 0) {
0400         // Close previous file
0401         if (currentWriter) {
0402           currentWriter->close();
0403 
0404           // Get file size and write metadata
0405           if (std::filesystem::exists(currentOutputPath)) {
0406             auto fileSize = std::filesystem::file_size(currentOutputPath);
0407             result.totalOutputSize += fileSize;
0408 
0409             std::size_t events =
0410                 singleOutputPath ? globalEventIndex : eventsPerFile.value();
0411             writeMetadata(currentOutputPath, events);
0412             result.outputFiles.push_back(currentOutputPath);
0413 
0414             if (verbose) {
0415               double sizePerEvent =
0416                   static_cast<double>(fileSize) / static_cast<double>(events);
0417 
0418               std::cerr << "  Wrote " << currentOutputPath << " (" << events
0419                         << " events, " << formatSize(fileSize) << ", "
0420                         << formatSize(static_cast<std::uintmax_t>(sizePerEvent))
0421                         << "/event)\n";
0422             }
0423           }
0424         }
0425 
0426         // Generate output path based on mode
0427         if (singleOutputPath) {
0428           currentOutputPath = *singleOutputPath;
0429         } else {
0430           std::string filename = generateOutputFilename(
0431               outputPrefix, currentFileIndex, format, compression);
0432           currentOutputPath = actualOutputDir / filename;
0433         }
0434         currentWriter =
0435             HepMC3Util::createWriter(currentOutputPath, format, compression);
0436       }
0437 
0438       // Set event number
0439       event.set_event_number(globalEventIndex);
0440 
0441       // Clear run info from events that are not the first in their output file
0442       if (eventsInCurrentFile > 0) {
0443         event.set_run_info(nullptr);
0444       }
0445 
0446       auto writeStart = std::chrono::high_resolution_clock::now();
0447       currentWriter->write_event(event);
0448       auto writeEnd = std::chrono::high_resolution_clock::now();
0449       result.totalWriteTime +=
0450           std::chrono::duration<double>(writeEnd - writeStart).count();
0451 
0452       globalEventIndex++;
0453       eventsInCurrentFile++;
0454       eventsReadFromFile++;
0455 
0456       // Close file if chunk is complete (only in multi-file mode)
0457       if (!singleOutputPath && eventsInCurrentFile >= eventsPerFile) {
0458         currentWriter->close();
0459 
0460         // Get file size
0461         if (std::filesystem::exists(currentOutputPath)) {
0462           auto fileSize = std::filesystem::file_size(currentOutputPath);
0463           result.totalOutputSize += fileSize;
0464 
0465           writeMetadata(currentOutputPath, eventsInCurrentFile);
0466           result.outputFiles.push_back(currentOutputPath);
0467 
0468           if (verbose) {
0469             double sizePerEvent = static_cast<double>(fileSize) /
0470                                   static_cast<double>(eventsInCurrentFile);
0471 
0472             std::cerr << "  Wrote " << currentOutputPath << " ("
0473                       << eventsInCurrentFile << " events, "
0474                       << formatSize(fileSize) << ", "
0475                       << formatSize(static_cast<std::uintmax_t>(sizePerEvent))
0476                       << "/event)\n";
0477           }
0478         }
0479 
0480         currentWriter.reset();
0481         eventsInCurrentFile = 0;
0482         currentFileIndex++;
0483       }
0484     }
0485 
0486     reader->close();
0487 
0488     if (verbose) {
0489       // Clear progress line and print final count
0490       std::cerr << "  Read " << eventsReadFromFile << " events from "
0491                 << inputFile << "                    \n";
0492     }
0493 
0494     // Check if we've reached the maximum number of events
0495     if (maxEvents.has_value() && globalEventIndex >= maxEvents.value()) {
0496       if (verbose) {
0497         std::cerr << "Reached maximum event limit (" << maxEvents.value()
0498                   << ")\n";
0499       }
0500       break;
0501     }
0502   }
0503 
0504   // Close final file
0505   if (currentWriter && eventsInCurrentFile > 0) {
0506     currentWriter->close();
0507 
0508     // Get file size
0509     if (std::filesystem::exists(currentOutputPath)) {
0510       auto fileSize = std::filesystem::file_size(currentOutputPath);
0511       result.totalOutputSize += fileSize;
0512 
0513       writeMetadata(currentOutputPath, eventsInCurrentFile);
0514       result.outputFiles.push_back(currentOutputPath);
0515 
0516       if (verbose) {
0517         double sizePerEvent = static_cast<double>(fileSize) /
0518                               static_cast<double>(eventsInCurrentFile);
0519 
0520         std::cerr << "  Wrote " << currentOutputPath << " ("
0521                   << eventsInCurrentFile << " events, " << formatSize(fileSize)
0522                   << ", "
0523                   << formatSize(static_cast<std::uintmax_t>(sizePerEvent))
0524                   << "/event)\n";
0525       }
0526     }
0527   }
0528 
0529   result.numEvents = globalEventIndex;
0530 
0531   // Print summary
0532   if (verbose) {
0533     std::cerr << "\nSummary:\n";
0534     if (singleOutputPath) {
0535       std::cerr << "  Processed " << globalEventIndex
0536                 << " events into single file\n";
0537     } else {
0538       std::size_t totalFiles = (globalEventIndex + eventsPerFile.value() - 1) /
0539                                eventsPerFile.value();
0540       std::cerr << "  Processed " << globalEventIndex << " events into "
0541                 << totalFiles << " file(s)\n";
0542     }
0543 
0544     if (globalEventIndex > 0) {
0545       double bytesPerEvent = static_cast<double>(result.totalOutputSize) /
0546                              static_cast<double>(globalEventIndex);
0547       std::cerr << "  Total input size:  " << formatSize(result.totalInputSize)
0548                 << "\n";
0549       std::cerr << "  Total output size: " << formatSize(result.totalOutputSize)
0550                 << " ("
0551                 << formatSize(static_cast<std::uintmax_t>(bytesPerEvent))
0552                 << "/event)\n";
0553 
0554       if (result.totalInputSize > 0) {
0555         double ratio = static_cast<double>(result.totalOutputSize) /
0556                        static_cast<double>(result.totalInputSize);
0557         std::cerr << "  Compression ratio: " << std::fixed
0558                   << std::setprecision(2) << (ratio * 100.0) << "%\n";
0559       }
0560     } else {
0561       std::cerr << "  Total input size:  " << formatSize(result.totalInputSize)
0562                 << "\n";
0563       std::cerr << "  Total output size: " << formatSize(result.totalOutputSize)
0564                 << "\n";
0565     }
0566 
0567     // Print timing information
0568     if (globalEventIndex > 0) {
0569       std::cerr << "\nTiming breakdown:\n";
0570       std::cerr << "  Reading events:  " << std::fixed << std::setprecision(3)
0571                 << result.totalReadTime << " s ("
0572                 << (result.totalReadTime / globalEventIndex * 1000.0)
0573                 << " ms/event)\n";
0574       std::cerr << "  Writing events:  " << std::fixed << std::setprecision(3)
0575                 << result.totalWriteTime << " s ("
0576                 << (result.totalWriteTime / globalEventIndex * 1000.0)
0577                 << " ms/event)\n";
0578 
0579       double totalProcessingTime = result.totalReadTime + result.totalWriteTime;
0580       std::cerr << "  Total processing: " << std::fixed << std::setprecision(3)
0581                 << totalProcessingTime << " s\n";
0582 
0583       // Show percentage breakdown
0584       if (totalProcessingTime > 0) {
0585         std::cerr << "\nTime distribution:\n";
0586         std::cerr << "  Reading: " << std::fixed << std::setprecision(1)
0587                   << (result.totalReadTime / totalProcessingTime * 100.0)
0588                   << "%\n";
0589         std::cerr << "  Writing: " << std::fixed << std::setprecision(1)
0590                   << (result.totalWriteTime / totalProcessingTime * 100.0)
0591                   << "%\n";
0592       }
0593     }
0594   }
0595 
0596   return result;
0597 }
0598 
0599 }  // namespace ActsExamples