File indexing completed on 2025-10-24 08:19:37
0001
0002
0003
0004
0005
0006
0007
0008
0009 #include "ActsExamples/Io/HepMC3/HepMC3Util.hpp"
0010
0011 #include "Acts/Utilities/ScopedTimer.hpp"
0012
0013 #include <chrono>
0014 #include <format>
0015 #include <fstream>
0016 #include <iomanip>
0017 #include <iostream>
0018 #include <memory>
0019 #include <sstream>
0020 #include <stdexcept>
0021
0022 #include <HepMC3/GenEvent.h>
0023 #include <HepMC3/GenParticle.h>
0024 #include <HepMC3/GenVertex.h>
0025 #include <HepMC3/Reader.h>
0026 #include <HepMC3/Writer.h>
0027 #include <nlohmann/json.hpp>
0028
0029 namespace ActsExamples {
0030
0031 namespace {
0032 template <typename T>
0033 void mergeEventsImpl(HepMC3::GenEvent& event, std::span<T> genEvents,
0034 const Acts::Logger& logger) {
0035 Acts::AveragingScopedTimer mergeTimer("Merging HepMC3 events", logger(),
0036 Acts::Logging::DEBUG);
0037
0038 std::vector<std::shared_ptr<HepMC3::GenParticle>> particles;
0039
0040
0041 std::size_t nParticles = 0;
0042 std::size_t nVertices = 0;
0043 for (const auto& genEvent : genEvents) {
0044 nParticles += genEvent->particles().size();
0045 nVertices += genEvent->vertices().size();
0046 }
0047
0048 event.reserve(nParticles, nVertices);
0049
0050 for (const auto& genEvent : genEvents) {
0051 auto sample = mergeTimer.sample();
0052 particles.clear();
0053 particles.reserve(genEvent->particles().size());
0054
0055 auto copyAttributes = [&](const auto& src, auto& dst) {
0056 for (const auto& attr : src.attribute_names()) {
0057 auto value = src.attribute_as_string(attr);
0058 dst.add_attribute(attr,
0059 std::make_shared<HepMC3::StringAttribute>(value));
0060 }
0061 };
0062
0063 copyAttributes(*genEvent, event);
0064
0065
0066 for (const auto& srcParticle : genEvent->particles()) {
0067 if (srcParticle->id() - 1 != static_cast<int>(particles.size())) {
0068 throw std::runtime_error("Particle id is not consecutive");
0069 }
0070 auto particle = std::make_shared<HepMC3::GenParticle>();
0071 particle->set_momentum(srcParticle->momentum());
0072 particle->set_generated_mass(srcParticle->generated_mass());
0073 particle->set_pid(srcParticle->pid());
0074 particle->set_status(srcParticle->status());
0075
0076 particles.push_back(particle);
0077 event.add_particle(particle);
0078
0079 copyAttributes(*srcParticle, *particle);
0080 }
0081
0082 for (const auto& srcVertex : genEvent->vertices()) {
0083 auto vertex = std::make_shared<HepMC3::GenVertex>(srcVertex->position());
0084 vertex->set_status(srcVertex->status());
0085
0086 event.add_vertex(vertex);
0087
0088 copyAttributes(*srcVertex, *vertex);
0089
0090 for (const auto& srcParticle : srcVertex->particles_in()) {
0091 const auto& particle = particles.at(srcParticle->id() - 1);
0092 vertex->add_particle_in(particle);
0093 }
0094 for (const auto& srcParticle : srcVertex->particles_out()) {
0095 const auto& particle = particles.at(srcParticle->id() - 1);
0096 vertex->add_particle_out(particle);
0097 }
0098 }
0099 }
0100 }
0101 }
0102
0103 void HepMC3Util::mergeEvents(HepMC3::GenEvent& event,
0104 std::span<const HepMC3::GenEvent*> genEvents,
0105 const Acts::Logger& logger) {
0106 mergeEventsImpl(event, genEvents, logger);
0107 }
0108
0109 void HepMC3Util::mergeEvents(
0110 HepMC3::GenEvent& event,
0111 std::span<std::shared_ptr<const HepMC3::GenEvent>> genEvents,
0112 const Acts::Logger& logger) {
0113 mergeEventsImpl(event, genEvents, logger);
0114 }
0115
0116 std::string_view HepMC3Util::compressionExtension(Compression compression) {
0117 switch (compression) {
0118 using enum Compression;
0119 case none:
0120 return "";
0121 case zlib:
0122 return ".gz";
0123 case lzma:
0124 return ".xz";
0125 case bzip2:
0126 return ".bz2";
0127 case zstd:
0128 return ".zst";
0129 default:
0130 throw std::invalid_argument{"Unknown compression value"};
0131 }
0132 }
0133
0134 std::span<const HepMC3Util::Compression>
0135 HepMC3Util::availableCompressionModes() {
0136 using enum Compression;
0137 static const auto values = []() -> std::vector<HepMC3Util::Compression> {
0138 return {
0139 none,
0140 #ifdef HEPMC3_Z_SUPPORT
0141 zlib,
0142 #endif
0143 #ifdef HEPMC3_LZMA_SUPPORT
0144 lzma,
0145 #endif
0146 #ifdef HEPMC3_BZ2_SUPPORT
0147 bzip2,
0148 #endif
0149 #ifdef HEPMC3_ZSTD_SUPPORT
0150 zstd,
0151 #endif
0152 };
0153 }();
0154 return values;
0155 }
0156
0157 std::ostream& HepMC3Util::operator<<(std::ostream& os,
0158 HepMC3Util::Compression compression) {
0159 switch (compression) {
0160 using enum HepMC3Util::Compression;
0161 case none:
0162 return os << "none";
0163 case zlib:
0164 return os << "zlib";
0165 case lzma:
0166 return os << "lzma";
0167 case bzip2:
0168 return os << "bzip2";
0169 case zstd:
0170 return os << "zstd";
0171 default:
0172 throw std::invalid_argument{"Unknown compression value"};
0173 }
0174 }
0175
0176 std::ostream& HepMC3Util::operator<<(std::ostream& os,
0177 HepMC3Util::Format format) {
0178 switch (format) {
0179 using enum HepMC3Util::Format;
0180 case ascii:
0181 return os << "ascii";
0182 case root:
0183 return os << "root";
0184 default:
0185 throw std::invalid_argument{"Unknown format value"};
0186 }
0187 }
0188
0189 std::span<const HepMC3Util::Format> HepMC3Util::availableFormats() {
0190 using enum Format;
0191 static const auto values = []() -> std::vector<HepMC3Util::Format> {
0192 return {
0193 ascii,
0194 #ifdef HEPMC3_ROOT_SUPPORT
0195 root,
0196 #endif
0197 };
0198 }();
0199 return values;
0200 }
0201
0202 HepMC3Util::Format HepMC3Util::formatFromFilename(std::string_view filename) {
0203 using enum Format;
0204
0205 for (auto compression : availableCompressionModes()) {
0206 auto ext = compressionExtension(compression);
0207
0208 if (filename.ends_with(".hepmc3" + std::string(ext)) ||
0209 filename.ends_with(".hepmc" + std::string(ext))) {
0210 return ascii;
0211 }
0212 }
0213 if (filename.ends_with(".root")) {
0214 return root;
0215 }
0216
0217 throw std::invalid_argument{"Unknown format extension: " +
0218 std::string{filename}};
0219 }
0220
0221 namespace {
0222
0223
0224 std::string formatSize(std::uintmax_t bytes) {
0225 const std::array<const char*, 5> units = {"B", "KB", "MB", "GB", "TB"};
0226 std::size_t unit = 0;
0227 double size = static_cast<double>(bytes);
0228
0229 while (size >= 1024.0 && unit < units.size() - 1) {
0230 size /= 1024.0;
0231 unit++;
0232 }
0233
0234 std::ostringstream oss;
0235 oss << std::fixed << std::setprecision(2) << size << " " << units[unit];
0236 return oss.str();
0237 }
0238
0239
0240 void writeMetadata(const std::filesystem::path& dataFile,
0241 std::size_t numEvents) {
0242 std::filesystem::path metadataFile = dataFile;
0243 metadataFile += ".json";
0244
0245 nlohmann::json metadata;
0246 metadata["num_events"] = numEvents;
0247
0248 std::ofstream metadataStream(metadataFile);
0249 metadataStream << metadata.dump(2);
0250 }
0251
0252
0253 std::string generateOutputFilename(const std::string& prefix,
0254 std::size_t fileNum,
0255 HepMC3Util::Format format,
0256 HepMC3Util::Compression compression) {
0257 std::ostringstream filename;
0258 filename << prefix << "_" << std::setfill('0') << std::setw(6) << fileNum;
0259
0260 if (format == HepMC3Util::Format::ascii) {
0261 filename << ".hepmc3";
0262 } else if (format == HepMC3Util::Format::root) {
0263 filename << ".root";
0264 }
0265
0266 filename << HepMC3Util::compressionExtension(compression);
0267
0268 return filename.str();
0269 }
0270
0271 }
0272
0273 HepMC3Util::NormalizeResult HepMC3Util::normalizeFiles(
0274 const std::vector<std::filesystem::path>& inputFiles,
0275 std::optional<std::filesystem::path> singleOutputPath,
0276 const std::filesystem::path& outputDir, const std::string& outputPrefix,
0277 std::optional<std::size_t> eventsPerFile,
0278 std::optional<std::size_t> maxEvents, Format format,
0279 Compression compression, int compressionLevel, bool verbose) {
0280
0281 if (inputFiles.empty()) {
0282 throw std::invalid_argument("No input files specified");
0283 }
0284
0285 if (!singleOutputPath && !eventsPerFile.has_value()) {
0286 throw std::invalid_argument(
0287 "events-per-file must be > 0 in multi-file mode");
0288 }
0289
0290 if (compressionLevel < 0 || compressionLevel > 19) {
0291 throw std::invalid_argument("compression-level must be 0-19");
0292 }
0293
0294 if (format == Format::root && compression != Compression::none) {
0295 throw std::invalid_argument(
0296 "ROOT format does not support compression parameter");
0297 }
0298
0299 NormalizeResult result;
0300
0301
0302 std::filesystem::path actualOutputDir;
0303 if (singleOutputPath) {
0304 if (singleOutputPath->has_parent_path()) {
0305 actualOutputDir = singleOutputPath->parent_path();
0306 } else {
0307 actualOutputDir = ".";
0308 }
0309 } else {
0310 actualOutputDir = outputDir;
0311 }
0312
0313
0314 std::filesystem::create_directories(actualOutputDir);
0315
0316 if (verbose) {
0317 std::cerr << "Configuration:\n";
0318 std::cerr << " Input files: " << inputFiles.size() << "\n";
0319 if (singleOutputPath) {
0320 std::cerr << " Single output file: " << *singleOutputPath << "\n";
0321 std::cerr << " Format: " << format << "\n";
0322 std::cerr << " Compression: " << compression << " (level "
0323 << compressionLevel << ")\n";
0324 } else {
0325 std::cerr << " Output dir: " << actualOutputDir << "\n";
0326 std::cerr << " Output prefix: " << outputPrefix << "\n";
0327 std::cerr << " Events per file: "
0328 << (eventsPerFile.has_value()
0329 ? std::to_string(eventsPerFile.value())
0330 : "unset")
0331 << "\n";
0332 std::cerr << " Format: " << format << "\n";
0333 std::cerr << " Compression: " << compression << " (level "
0334 << compressionLevel << ")\n";
0335 }
0336 if (maxEvents.has_value()) {
0337 std::cerr << " Max events to read: " << maxEvents.value() << "\n";
0338 }
0339 std::cerr << "\n";
0340 }
0341
0342
0343 std::size_t globalEventIndex = 0;
0344 std::size_t eventsInCurrentFile = 0;
0345 std::size_t currentFileIndex = 0;
0346 std::unique_ptr<HepMC3::Writer> currentWriter;
0347 std::filesystem::path currentOutputPath;
0348
0349
0350 for (const auto& inputFile : inputFiles) {
0351 if (verbose) {
0352 std::cerr << "Reading " << inputFile << "...\n";
0353 }
0354
0355 if (!std::filesystem::exists(inputFile)) {
0356 std::cerr << "WARNING: File not found: " << inputFile << "\n";
0357 continue;
0358 }
0359
0360
0361 result.totalInputSize += std::filesystem::file_size(inputFile);
0362
0363 auto reader = HepMC3Util::deduceReader(inputFile.string());
0364 if (!reader) {
0365 std::cerr << "ERROR: Failed to open " << inputFile << "\n";
0366 continue;
0367 }
0368
0369 HepMC3::GenEvent event;
0370 std::size_t eventsReadFromFile = 0;
0371 std::size_t lastProgressUpdate = 0;
0372 constexpr std::size_t progressInterval = 100;
0373
0374 while (!reader->failed()) {
0375
0376 if (maxEvents > 0 && globalEventIndex >= maxEvents) {
0377 break;
0378 }
0379
0380 auto readStart = std::chrono::high_resolution_clock::now();
0381 reader->read_event(event);
0382 auto readEnd = std::chrono::high_resolution_clock::now();
0383 result.totalReadTime +=
0384 std::chrono::duration<double>(readEnd - readStart).count();
0385
0386 if (reader->failed()) {
0387 break;
0388 }
0389
0390
0391 if (verbose &&
0392 (eventsReadFromFile - lastProgressUpdate) >= progressInterval) {
0393 std::cerr << " Progress: " << eventsReadFromFile
0394 << " events read...\r" << std::flush;
0395 lastProgressUpdate = eventsReadFromFile;
0396 }
0397
0398
0399 if (eventsInCurrentFile == 0) {
0400
0401 if (currentWriter) {
0402 currentWriter->close();
0403
0404
0405 if (std::filesystem::exists(currentOutputPath)) {
0406 auto fileSize = std::filesystem::file_size(currentOutputPath);
0407 result.totalOutputSize += fileSize;
0408
0409 std::size_t events =
0410 singleOutputPath ? globalEventIndex : eventsPerFile.value();
0411 writeMetadata(currentOutputPath, events);
0412 result.outputFiles.push_back(currentOutputPath);
0413
0414 if (verbose) {
0415 double sizePerEvent =
0416 static_cast<double>(fileSize) / static_cast<double>(events);
0417
0418 std::cerr << " Wrote " << currentOutputPath << " (" << events
0419 << " events, " << formatSize(fileSize) << ", "
0420 << formatSize(static_cast<std::uintmax_t>(sizePerEvent))
0421 << "/event)\n";
0422 }
0423 }
0424 }
0425
0426
0427 if (singleOutputPath) {
0428 currentOutputPath = *singleOutputPath;
0429 } else {
0430 std::string filename = generateOutputFilename(
0431 outputPrefix, currentFileIndex, format, compression);
0432 currentOutputPath = actualOutputDir / filename;
0433 }
0434 currentWriter =
0435 HepMC3Util::createWriter(currentOutputPath, format, compression);
0436 }
0437
0438
0439 event.set_event_number(globalEventIndex);
0440
0441
0442 if (eventsInCurrentFile > 0) {
0443 event.set_run_info(nullptr);
0444 }
0445
0446 auto writeStart = std::chrono::high_resolution_clock::now();
0447 currentWriter->write_event(event);
0448 auto writeEnd = std::chrono::high_resolution_clock::now();
0449 result.totalWriteTime +=
0450 std::chrono::duration<double>(writeEnd - writeStart).count();
0451
0452 globalEventIndex++;
0453 eventsInCurrentFile++;
0454 eventsReadFromFile++;
0455
0456
0457 if (!singleOutputPath && eventsInCurrentFile >= eventsPerFile) {
0458 currentWriter->close();
0459
0460
0461 if (std::filesystem::exists(currentOutputPath)) {
0462 auto fileSize = std::filesystem::file_size(currentOutputPath);
0463 result.totalOutputSize += fileSize;
0464
0465 writeMetadata(currentOutputPath, eventsInCurrentFile);
0466 result.outputFiles.push_back(currentOutputPath);
0467
0468 if (verbose) {
0469 double sizePerEvent = static_cast<double>(fileSize) /
0470 static_cast<double>(eventsInCurrentFile);
0471
0472 std::cerr << " Wrote " << currentOutputPath << " ("
0473 << eventsInCurrentFile << " events, "
0474 << formatSize(fileSize) << ", "
0475 << formatSize(static_cast<std::uintmax_t>(sizePerEvent))
0476 << "/event)\n";
0477 }
0478 }
0479
0480 currentWriter.reset();
0481 eventsInCurrentFile = 0;
0482 currentFileIndex++;
0483 }
0484 }
0485
0486 reader->close();
0487
0488 if (verbose) {
0489
0490 std::cerr << " Read " << eventsReadFromFile << " events from "
0491 << inputFile << " \n";
0492 }
0493
0494
0495 if (maxEvents.has_value() && globalEventIndex >= maxEvents.value()) {
0496 if (verbose) {
0497 std::cerr << "Reached maximum event limit (" << maxEvents.value()
0498 << ")\n";
0499 }
0500 break;
0501 }
0502 }
0503
0504
0505 if (currentWriter && eventsInCurrentFile > 0) {
0506 currentWriter->close();
0507
0508
0509 if (std::filesystem::exists(currentOutputPath)) {
0510 auto fileSize = std::filesystem::file_size(currentOutputPath);
0511 result.totalOutputSize += fileSize;
0512
0513 writeMetadata(currentOutputPath, eventsInCurrentFile);
0514 result.outputFiles.push_back(currentOutputPath);
0515
0516 if (verbose) {
0517 double sizePerEvent = static_cast<double>(fileSize) /
0518 static_cast<double>(eventsInCurrentFile);
0519
0520 std::cerr << " Wrote " << currentOutputPath << " ("
0521 << eventsInCurrentFile << " events, " << formatSize(fileSize)
0522 << ", "
0523 << formatSize(static_cast<std::uintmax_t>(sizePerEvent))
0524 << "/event)\n";
0525 }
0526 }
0527 }
0528
0529 result.numEvents = globalEventIndex;
0530
0531
0532 if (verbose) {
0533 std::cerr << "\nSummary:\n";
0534 if (singleOutputPath) {
0535 std::cerr << " Processed " << globalEventIndex
0536 << " events into single file\n";
0537 } else {
0538 std::size_t totalFiles = (globalEventIndex + eventsPerFile.value() - 1) /
0539 eventsPerFile.value();
0540 std::cerr << " Processed " << globalEventIndex << " events into "
0541 << totalFiles << " file(s)\n";
0542 }
0543
0544 if (globalEventIndex > 0) {
0545 double bytesPerEvent = static_cast<double>(result.totalOutputSize) /
0546 static_cast<double>(globalEventIndex);
0547 std::cerr << " Total input size: " << formatSize(result.totalInputSize)
0548 << "\n";
0549 std::cerr << " Total output size: " << formatSize(result.totalOutputSize)
0550 << " ("
0551 << formatSize(static_cast<std::uintmax_t>(bytesPerEvent))
0552 << "/event)\n";
0553
0554 if (result.totalInputSize > 0) {
0555 double ratio = static_cast<double>(result.totalOutputSize) /
0556 static_cast<double>(result.totalInputSize);
0557 std::cerr << " Compression ratio: " << std::fixed
0558 << std::setprecision(2) << (ratio * 100.0) << "%\n";
0559 }
0560 } else {
0561 std::cerr << " Total input size: " << formatSize(result.totalInputSize)
0562 << "\n";
0563 std::cerr << " Total output size: " << formatSize(result.totalOutputSize)
0564 << "\n";
0565 }
0566
0567
0568 if (globalEventIndex > 0) {
0569 std::cerr << "\nTiming breakdown:\n";
0570 std::cerr << " Reading events: " << std::fixed << std::setprecision(3)
0571 << result.totalReadTime << " s ("
0572 << (result.totalReadTime / globalEventIndex * 1000.0)
0573 << " ms/event)\n";
0574 std::cerr << " Writing events: " << std::fixed << std::setprecision(3)
0575 << result.totalWriteTime << " s ("
0576 << (result.totalWriteTime / globalEventIndex * 1000.0)
0577 << " ms/event)\n";
0578
0579 double totalProcessingTime = result.totalReadTime + result.totalWriteTime;
0580 std::cerr << " Total processing: " << std::fixed << std::setprecision(3)
0581 << totalProcessingTime << " s\n";
0582
0583
0584 if (totalProcessingTime > 0) {
0585 std::cerr << "\nTime distribution:\n";
0586 std::cerr << " Reading: " << std::fixed << std::setprecision(1)
0587 << (result.totalReadTime / totalProcessingTime * 100.0)
0588 << "%\n";
0589 std::cerr << " Writing: " << std::fixed << std::setprecision(1)
0590 << (result.totalWriteTime / totalProcessingTime * 100.0)
0591 << "%\n";
0592 }
0593 }
0594 }
0595
0596 return result;
0597 }
0598
0599 }