Warning, file /acts/Examples/Io/HepMC3/src/HepMC3Util.cpp was not indexed
or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001
0002
0003
0004
0005
0006
0007
0008
0009 #include "ActsExamples/Io/HepMC3/HepMC3Util.hpp"
0010
0011 #include "Acts/Utilities/ScopedTimer.hpp"
0012
0013 #include <chrono>
0014 #include <fstream>
0015 #include <iomanip>
0016 #include <iostream>
0017 #include <memory>
0018 #include <sstream>
0019 #include <stdexcept>
0020
0021 #include <HepMC3/GenEvent.h>
0022 #include <HepMC3/GenParticle.h>
0023 #include <HepMC3/GenVertex.h>
0024 #include <HepMC3/Reader.h>
0025 #include <HepMC3/Version.h>
0026 #include <HepMC3/Writer.h>
0027 #include <nlohmann/json.hpp>
0028
0029 namespace ActsExamples {
0030
0031 namespace {
0032 template <typename T>
0033 void mergeEventsImpl(HepMC3::GenEvent& event, std::span<T> genEvents,
0034 const Acts::Logger& logger) {
0035 Acts::AveragingScopedTimer mergeTimer("Merging HepMC3 events", logger(),
0036 Acts::Logging::DEBUG);
0037
0038 std::vector<std::shared_ptr<HepMC3::GenParticle>> particles;
0039
0040
0041 std::size_t nParticles = 0;
0042 std::size_t nVertices = 0;
0043 for (const auto& genEvent : genEvents) {
0044 nParticles += genEvent->particles().size();
0045 nVertices += genEvent->vertices().size();
0046 }
0047
0048 event.reserve(nParticles, nVertices);
0049
0050 for (const auto& genEvent : genEvents) {
0051 auto sample = mergeTimer.sample();
0052 particles.clear();
0053 particles.reserve(genEvent->particles().size());
0054
0055 auto copyAttributes = [&](const auto& src, auto& dst) {
0056 for (const auto& attr : src.attribute_names()) {
0057 auto value = src.attribute_as_string(attr);
0058 dst.add_attribute(attr,
0059 std::make_shared<HepMC3::StringAttribute>(value));
0060 }
0061 };
0062
0063 copyAttributes(*genEvent, event);
0064
0065
0066 for (const auto& srcParticle : genEvent->particles()) {
0067 if (srcParticle->id() - 1 != static_cast<int>(particles.size())) {
0068 throw std::runtime_error("Particle id is not consecutive");
0069 }
0070 auto particle = std::make_shared<HepMC3::GenParticle>();
0071 particle->set_momentum(srcParticle->momentum());
0072 particle->set_generated_mass(srcParticle->generated_mass());
0073 particle->set_pid(srcParticle->pid());
0074 particle->set_status(srcParticle->status());
0075
0076 particles.push_back(particle);
0077 event.add_particle(particle);
0078
0079 copyAttributes(*srcParticle, *particle);
0080 }
0081
0082 for (const auto& srcVertex : genEvent->vertices()) {
0083 auto vertex = std::make_shared<HepMC3::GenVertex>(srcVertex->position());
0084 vertex->set_status(srcVertex->status());
0085
0086 event.add_vertex(vertex);
0087
0088 copyAttributes(*srcVertex, *vertex);
0089
0090 for (const auto& srcParticle : srcVertex->particles_in()) {
0091 const auto& particle = particles.at(srcParticle->id() - 1);
0092 vertex->add_particle_in(particle);
0093 }
0094 for (const auto& srcParticle : srcVertex->particles_out()) {
0095 const auto& particle = particles.at(srcParticle->id() - 1);
0096 vertex->add_particle_out(particle);
0097 }
0098 }
0099 }
0100 }
0101 }
0102
0103 void HepMC3Util::mergeEvents(HepMC3::GenEvent& event,
0104 std::span<const HepMC3::GenEvent*> genEvents,
0105 const Acts::Logger& logger) {
0106 mergeEventsImpl(event, genEvents, logger);
0107 }
0108
0109 void HepMC3Util::mergeEvents(
0110 HepMC3::GenEvent& event,
0111 std::span<std::shared_ptr<const HepMC3::GenEvent>> genEvents,
0112 const Acts::Logger& logger) {
0113 mergeEventsImpl(event, genEvents, logger);
0114 }
0115
0116 std::string_view HepMC3Util::compressionExtension(Compression compression) {
0117 switch (compression) {
0118 using enum Compression;
0119 case none:
0120 return "";
0121 case zlib:
0122 return ".gz";
0123 case lzma:
0124 return ".xz";
0125 case bzip2:
0126 return ".bz2";
0127 case zstd:
0128 return ".zst";
0129 default:
0130 throw std::invalid_argument{"Unknown compression value"};
0131 }
0132 }
0133
0134 HepMC3Util::Compression HepMC3Util::compressionFromFilename(
0135 const std::filesystem::path& filename) {
0136 using enum Compression;
0137
0138 std::string filenameStr = filename.string();
0139
0140
0141 if (filenameStr.ends_with(".gz")) {
0142 return zlib;
0143 }
0144 if (filenameStr.ends_with(".xz")) {
0145 return lzma;
0146 }
0147 if (filenameStr.ends_with(".bz2")) {
0148 return bzip2;
0149 }
0150 if (filenameStr.ends_with(".zst")) {
0151 return zstd;
0152 }
0153
0154
0155 return none;
0156 }
0157
0158 std::span<const HepMC3Util::Compression>
0159 HepMC3Util::availableCompressionModes() {
0160 using enum Compression;
0161 static const auto values = []() -> std::vector<HepMC3Util::Compression> {
0162 return {
0163 none,
0164 #ifdef HEPMC3_Z_SUPPORT
0165 zlib,
0166 #endif
0167 #ifdef HEPMC3_LZMA_SUPPORT
0168 lzma,
0169 #endif
0170 #ifdef HEPMC3_BZ2_SUPPORT
0171 bzip2,
0172 #endif
0173 #ifdef HEPMC3_ZSTD_SUPPORT
0174 zstd,
0175 #endif
0176 };
0177 }();
0178 return values;
0179 }
0180
0181 std::ostream& HepMC3Util::operator<<(std::ostream& os,
0182 HepMC3Util::Compression compression) {
0183 switch (compression) {
0184 using enum HepMC3Util::Compression;
0185 case none:
0186 return os << "none";
0187 case zlib:
0188 return os << "zlib";
0189 case lzma:
0190 return os << "lzma";
0191 case bzip2:
0192 return os << "bzip2";
0193 case zstd:
0194 return os << "zstd";
0195 default:
0196 throw std::invalid_argument{"Unknown compression value"};
0197 }
0198 }
0199
0200 std::ostream& HepMC3Util::operator<<(std::ostream& os,
0201 HepMC3Util::Format format) {
0202 switch (format) {
0203 using enum HepMC3Util::Format;
0204 case ascii:
0205 return os << "ascii";
0206 case root:
0207 return os << "root";
0208 default:
0209 throw std::invalid_argument{"Unknown format value"};
0210 }
0211 }
0212
0213 std::span<const HepMC3Util::Format> HepMC3Util::availableFormats() {
0214 using enum Format;
0215 static const auto values = []() -> std::vector<HepMC3Util::Format> {
0216 return {
0217 ascii,
0218 #ifdef HEPMC3_ROOT_SUPPORT
0219 root,
0220 #endif
0221 };
0222 }();
0223 return values;
0224 }
0225
0226 HepMC3Util::Format HepMC3Util::formatFromFilename(
0227 const std::filesystem::path& filename) {
0228 using enum Format;
0229
0230 for (auto compression : availableCompressionModes()) {
0231 auto ext = compressionExtension(compression);
0232
0233 if (filename.string().ends_with(".hepmc3" + std::string(ext)) ||
0234 filename.string().ends_with(".hepmc" + std::string(ext))) {
0235 return ascii;
0236 }
0237 }
0238 if (filename.string().ends_with(".root")) {
0239 return root;
0240 }
0241
0242 throw std::invalid_argument{"Unknown format extension: " +
0243 std::string{filename}};
0244 }
0245
0246 namespace {
0247
0248
0249 std::string formatSize(std::uintmax_t bytes) {
0250 const std::array<const char*, 5> units = {"B", "KB", "MB", "GB", "TB"};
0251 std::size_t unit = 0;
0252 double size = static_cast<double>(bytes);
0253
0254 while (size >= 1024.0 && unit < units.size() - 1) {
0255 size /= 1024.0;
0256 unit++;
0257 }
0258
0259 std::ostringstream oss;
0260 oss << std::fixed << std::setprecision(2) << size << " " << units[unit];
0261 return oss.str();
0262 }
0263
0264
0265 void writeMetadata(const std::filesystem::path& dataFile,
0266 std::size_t numEvents) {
0267 std::filesystem::path metadataFile = dataFile;
0268 metadataFile += ".json";
0269
0270 nlohmann::json metadata;
0271 metadata["num_events"] = numEvents;
0272
0273 std::ofstream metadataStream(metadataFile);
0274 metadataStream << metadata.dump(2);
0275 }
0276
0277
0278 std::string generateOutputFilename(const std::string& prefix,
0279 std::size_t fileNum,
0280 HepMC3Util::Format format,
0281 HepMC3Util::Compression compression) {
0282 std::ostringstream filename;
0283 filename << prefix << "_" << std::setfill('0') << std::setw(6) << fileNum;
0284
0285 if (format == HepMC3Util::Format::ascii) {
0286 filename << ".hepmc3";
0287 } else if (format == HepMC3Util::Format::root) {
0288 filename << ".root";
0289 }
0290
0291 filename << HepMC3Util::compressionExtension(compression);
0292
0293 return filename.str();
0294 }
0295
0296 }
0297
0298 HepMC3Util::NormalizeResult HepMC3Util::normalizeFiles(
0299 const std::vector<std::filesystem::path>& inputFiles,
0300 std::optional<std::filesystem::path> singleOutputPath,
0301 const std::filesystem::path& outputDir, const std::string& outputPrefix,
0302 std::optional<std::size_t> eventsPerFile,
0303 std::optional<std::size_t> maxEvents, Format format,
0304 Compression compression, int compressionLevel, bool verbose) {
0305
0306 if (inputFiles.empty()) {
0307 throw std::invalid_argument("No input files specified");
0308 }
0309
0310 if (!singleOutputPath && !eventsPerFile.has_value()) {
0311 throw std::invalid_argument(
0312 "events-per-file must be > 0 in multi-file mode");
0313 }
0314
0315 if (compressionLevel < 0 || compressionLevel > 19) {
0316 throw std::invalid_argument("compression-level must be 0-19");
0317 }
0318
0319 if (format == Format::root && compression != Compression::none) {
0320 throw std::invalid_argument(
0321 "ROOT format does not support compression parameter");
0322 }
0323
0324 NormalizeResult result;
0325
0326
0327 std::filesystem::path actualOutputDir;
0328 if (singleOutputPath) {
0329 if (singleOutputPath->has_parent_path()) {
0330 actualOutputDir = singleOutputPath->parent_path();
0331 } else {
0332 actualOutputDir = ".";
0333 }
0334 } else {
0335 actualOutputDir = outputDir;
0336 }
0337
0338
0339 std::filesystem::create_directories(actualOutputDir);
0340
0341 if (verbose) {
0342 std::cerr << "Configuration:\n";
0343 std::cerr << " Input files: " << inputFiles.size() << "\n";
0344 if (singleOutputPath) {
0345 std::cerr << " Single output file: " << *singleOutputPath << "\n";
0346 std::cerr << " Format: " << format << "\n";
0347 std::cerr << " Compression: " << compression << " (level "
0348 << compressionLevel << ")\n";
0349 } else {
0350 std::cerr << " Output dir: " << actualOutputDir << "\n";
0351 std::cerr << " Output prefix: " << outputPrefix << "\n";
0352 std::cerr << " Events per file: "
0353 << (eventsPerFile.has_value()
0354 ? std::to_string(eventsPerFile.value())
0355 : "unset")
0356 << "\n";
0357 std::cerr << " Format: " << format << "\n";
0358 std::cerr << " Compression: " << compression << " (level "
0359 << compressionLevel << ")\n";
0360 }
0361 if (maxEvents.has_value()) {
0362 std::cerr << " Max events to read: " << maxEvents.value() << "\n";
0363 }
0364 std::cerr << "\n";
0365 }
0366
0367
0368 std::size_t globalEventIndex = 0;
0369 std::size_t eventsInCurrentFile = 0;
0370 std::size_t currentFileIndex = 0;
0371 std::unique_ptr<HepMC3::Writer> currentWriter;
0372 std::filesystem::path currentOutputPath;
0373
0374
0375 for (const auto& inputFile : inputFiles) {
0376 if (verbose) {
0377 std::cerr << "Reading " << inputFile << "...\n";
0378 }
0379
0380 if (!std::filesystem::exists(inputFile)) {
0381 std::cerr << "WARNING: File not found: " << inputFile << "\n";
0382 continue;
0383 }
0384
0385
0386 result.totalInputSize += std::filesystem::file_size(inputFile);
0387
0388 auto reader = HepMC3Util::deduceReader(inputFile.string());
0389 if (!reader) {
0390 std::cerr << "ERROR: Failed to open " << inputFile << "\n";
0391 continue;
0392 }
0393
0394 HepMC3::GenEvent event;
0395 std::size_t eventsReadFromFile = 0;
0396 std::size_t lastProgressUpdate = 0;
0397 constexpr std::size_t progressInterval = 100;
0398
0399 while (!reader->failed()) {
0400
0401 if (maxEvents > 0 && globalEventIndex >= maxEvents) {
0402 break;
0403 }
0404
0405 auto readStart = std::chrono::high_resolution_clock::now();
0406 reader->read_event(event);
0407 auto readEnd = std::chrono::high_resolution_clock::now();
0408 result.totalReadTime +=
0409 std::chrono::duration<double>(readEnd - readStart).count();
0410
0411 if (reader->failed()) {
0412 break;
0413 }
0414
0415
0416 if (verbose &&
0417 (eventsReadFromFile - lastProgressUpdate) >= progressInterval) {
0418 std::cerr << " Progress: " << eventsReadFromFile
0419 << " events read...\r" << std::flush;
0420 lastProgressUpdate = eventsReadFromFile;
0421 }
0422
0423
0424 if (eventsInCurrentFile == 0) {
0425
0426 if (currentWriter) {
0427 currentWriter->close();
0428
0429
0430 if (std::filesystem::exists(currentOutputPath)) {
0431 auto fileSize = std::filesystem::file_size(currentOutputPath);
0432 result.totalOutputSize += fileSize;
0433
0434 std::size_t events =
0435 singleOutputPath ? globalEventIndex : eventsPerFile.value();
0436 writeMetadata(currentOutputPath, events);
0437 result.outputFiles.push_back(currentOutputPath);
0438
0439 if (verbose) {
0440 double sizePerEvent =
0441 static_cast<double>(fileSize) / static_cast<double>(events);
0442
0443 std::cerr << " Wrote " << currentOutputPath << " (" << events
0444 << " events, " << formatSize(fileSize) << ", "
0445 << formatSize(static_cast<std::uintmax_t>(sizePerEvent))
0446 << "/event)\n";
0447 }
0448 }
0449 }
0450
0451
0452 if (singleOutputPath) {
0453 currentOutputPath = *singleOutputPath;
0454 } else {
0455 std::string filename = generateOutputFilename(
0456 outputPrefix, currentFileIndex, format, compression);
0457 currentOutputPath = actualOutputDir / filename;
0458 }
0459 currentWriter =
0460 HepMC3Util::createWriter(currentOutputPath, format, compression);
0461 }
0462
0463
0464 event.set_event_number(globalEventIndex);
0465
0466
0467 if (eventsInCurrentFile > 0) {
0468 event.set_run_info(nullptr);
0469 }
0470
0471 auto writeStart = std::chrono::high_resolution_clock::now();
0472 currentWriter->write_event(event);
0473 auto writeEnd = std::chrono::high_resolution_clock::now();
0474 result.totalWriteTime +=
0475 std::chrono::duration<double>(writeEnd - writeStart).count();
0476
0477 globalEventIndex++;
0478 eventsInCurrentFile++;
0479 eventsReadFromFile++;
0480
0481
0482 if (!singleOutputPath && eventsInCurrentFile >= eventsPerFile) {
0483 currentWriter->close();
0484
0485
0486 if (std::filesystem::exists(currentOutputPath)) {
0487 auto fileSize = std::filesystem::file_size(currentOutputPath);
0488 result.totalOutputSize += fileSize;
0489
0490 writeMetadata(currentOutputPath, eventsInCurrentFile);
0491 result.outputFiles.push_back(currentOutputPath);
0492
0493 if (verbose) {
0494 double sizePerEvent = static_cast<double>(fileSize) /
0495 static_cast<double>(eventsInCurrentFile);
0496
0497 std::cerr << " Wrote " << currentOutputPath << " ("
0498 << eventsInCurrentFile << " events, "
0499 << formatSize(fileSize) << ", "
0500 << formatSize(static_cast<std::uintmax_t>(sizePerEvent))
0501 << "/event)\n";
0502 }
0503 }
0504
0505 currentWriter.reset();
0506 eventsInCurrentFile = 0;
0507 currentFileIndex++;
0508 }
0509 }
0510
0511 reader->close();
0512
0513 if (verbose) {
0514
0515 std::cerr << " Read " << eventsReadFromFile << " events from "
0516 << inputFile << " \n";
0517 }
0518
0519
0520 if (maxEvents.has_value() && globalEventIndex >= maxEvents.value()) {
0521 if (verbose) {
0522 std::cerr << "Reached maximum event limit (" << maxEvents.value()
0523 << ")\n";
0524 }
0525 break;
0526 }
0527 }
0528
0529
0530 if (currentWriter && eventsInCurrentFile > 0) {
0531 currentWriter->close();
0532
0533
0534 if (std::filesystem::exists(currentOutputPath)) {
0535 auto fileSize = std::filesystem::file_size(currentOutputPath);
0536 result.totalOutputSize += fileSize;
0537
0538 writeMetadata(currentOutputPath, eventsInCurrentFile);
0539 result.outputFiles.push_back(currentOutputPath);
0540
0541 if (verbose) {
0542 double sizePerEvent = static_cast<double>(fileSize) /
0543 static_cast<double>(eventsInCurrentFile);
0544
0545 std::cerr << " Wrote " << currentOutputPath << " ("
0546 << eventsInCurrentFile << " events, " << formatSize(fileSize)
0547 << ", "
0548 << formatSize(static_cast<std::uintmax_t>(sizePerEvent))
0549 << "/event)\n";
0550 }
0551 }
0552 }
0553
0554 result.numEvents = globalEventIndex;
0555
0556
0557 if (verbose) {
0558 std::cerr << "\nSummary:\n";
0559 if (singleOutputPath) {
0560 std::cerr << " Processed " << globalEventIndex
0561 << " events into single file\n";
0562 } else {
0563 std::size_t totalFiles = (globalEventIndex + eventsPerFile.value() - 1) /
0564 eventsPerFile.value();
0565 std::cerr << " Processed " << globalEventIndex << " events into "
0566 << totalFiles << " file(s)\n";
0567 }
0568
0569 if (globalEventIndex > 0) {
0570 double bytesPerEvent = static_cast<double>(result.totalOutputSize) /
0571 static_cast<double>(globalEventIndex);
0572 std::cerr << " Total input size: " << formatSize(result.totalInputSize)
0573 << "\n";
0574 std::cerr << " Total output size: " << formatSize(result.totalOutputSize)
0575 << " ("
0576 << formatSize(static_cast<std::uintmax_t>(bytesPerEvent))
0577 << "/event)\n";
0578
0579 if (result.totalInputSize > 0) {
0580 double ratio = static_cast<double>(result.totalOutputSize) /
0581 static_cast<double>(result.totalInputSize);
0582 std::cerr << " Compression ratio: " << std::fixed
0583 << std::setprecision(2) << (ratio * 100.0) << "%\n";
0584 }
0585 } else {
0586 std::cerr << " Total input size: " << formatSize(result.totalInputSize)
0587 << "\n";
0588 std::cerr << " Total output size: " << formatSize(result.totalOutputSize)
0589 << "\n";
0590 }
0591
0592
0593 if (globalEventIndex > 0) {
0594 std::cerr << "\nTiming breakdown:\n";
0595 std::cerr << " Reading events: " << std::fixed << std::setprecision(3)
0596 << result.totalReadTime << " s ("
0597 << (result.totalReadTime / globalEventIndex * 1000.0)
0598 << " ms/event)\n";
0599 std::cerr << " Writing events: " << std::fixed << std::setprecision(3)
0600 << result.totalWriteTime << " s ("
0601 << (result.totalWriteTime / globalEventIndex * 1000.0)
0602 << " ms/event)\n";
0603
0604 double totalProcessingTime = result.totalReadTime + result.totalWriteTime;
0605 std::cerr << " Total processing: " << std::fixed << std::setprecision(3)
0606 << totalProcessingTime << " s\n";
0607
0608
0609 if (totalProcessingTime > 0) {
0610 std::cerr << "\nTime distribution:\n";
0611 std::cerr << " Reading: " << std::fixed << std::setprecision(1)
0612 << (result.totalReadTime / totalProcessingTime * 100.0)
0613 << "%\n";
0614 std::cerr << " Writing: " << std::fixed << std::setprecision(1)
0615 << (result.totalWriteTime / totalProcessingTime * 100.0)
0616 << "%\n";
0617 }
0618 }
0619 }
0620
0621 return result;
0622 }
0623
0624 }