File indexing completed on 2025-01-17 09:12:57
0001 #ifdef __MACH__
0002 #include <mach/mach.h>
0003 #endif
0004
0005 #include <iostream>
0006 #include <fstream>
0007 #include <string>
0008 #include <random>
0009 #include <ctime>
0010 #include <cstdlib>
0011 #include <vector>
0012 #include <algorithm>
0013 #include <numeric>
0014 #include <stdexcept>
0015 #include <chrono>
0016 #include <cmath>
0017 #include <random>
0018 #include <tuple>
0019 #include <sys/resource.h>
0020
0021 #include <HepMC3/ReaderFactory.h>
0022 #include <HepMC3/WriterAscii.h>
0023 #include "HepMC3/WriterRootTree.h"
0024 #include "HepMC3/GenRunInfo.h"
0025 #include "HepMC3/GenEvent.h"
0026 #include "HepMC3/Print.h"
0027
0028 #include "argparse/argparse.hpp"
0029
0030 using std::cout;
0031 using std::cerr;
0032 using std::endl;
0033 using std::string;
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046 class SignalBackgroundMerger {
0047
0048 private:
0049
0050 std::shared_ptr<HepMC3::Reader> sigAdapter;
0051 double sigFreq = 0;
0052 std::map<std::string, std::shared_ptr<HepMC3::Reader>> freqAdapters;
0053 std::map<std::string, double> freqs;
0054
0055 std::map<std::string,
0056 std::tuple<std::vector<HepMC3::GenEvent>,
0057 std::piecewise_constant_distribution<>,
0058 double>
0059 > weightDict;
0060
0061
0062 typedef struct{
0063 long eventCount;
0064 long particleCount;
0065 } stats;
0066 std::map<std::string, stats > infoDict;
0067
0068 public:
0069
0070 SignalBackgroundMerger(int argc, char* argv[]) {
0071 auto t0 = std::chrono::high_resolution_clock::now();
0072
0073
0074 digestArgs(argc, argv);
0075 rng.seed( rngSeed );
0076 banner();
0077 if (outputFile != "" ) {
0078 outputFileName = outputFile;
0079 } else {
0080 outputFileName = nameGen();
0081 }
0082 cout << "Writing to " << outputFileName << endl;
0083
0084 PrepData ( signalFile, signalFreq, signalSkip, true );
0085 PrepData ( bg1File, bg1Freq, bg1Skip );
0086 PrepData ( bg2File, bg2Freq, bg2Skip);
0087 PrepData ( bg3File, bg3Freq, bg3Skip);
0088
0089 auto t1 = std::chrono::high_resolution_clock::now();
0090 std::cout << "Initiation time: " << std::round(std::chrono::duration<double, std::chrono::seconds::period>(t1 - t0).count()) << " sec" << std::endl;
0091 std::cout << "\n==================================================================\n" << std::endl;
0092
0093 }
0094
0095 void merge(){
0096 auto t1 = std::chrono::high_resolution_clock::now();
0097
0098
0099 std::shared_ptr<HepMC3::Writer> f;
0100 if (rootFormat)
0101 f = std::make_shared<HepMC3::WriterRootTree>(outputFileName);
0102 else
0103 f = std::make_shared<HepMC3::WriterAscii>(outputFileName);
0104
0105
0106 int i = 0;
0107 for (i = 0; i<nSlices; ++i ) {
0108 if (i % 100 == 0 || verbose ) squawk(i);
0109 auto hepSlice = mergeSlice(i);
0110 if (!hepSlice) {
0111 std::cout << "Exhausted signal source." << std::endl;
0112 break;
0113 }
0114 hepSlice->set_event_number(i);
0115 f->write_event(*hepSlice);
0116 }
0117 std::cout << "Finished all requested slices." << std::endl;
0118
0119 int slicesDone = i;
0120 auto t2 = std::chrono::high_resolution_clock::now();
0121
0122 std::cout << "Slice loop time: " << std::round(std::chrono::duration<double, std::chrono::minutes::period>(t2 - t1).count()) << " min" << std::endl;
0123 std::cout << " -- " << std::round(std::chrono::duration<double, std::chrono::microseconds::period>(t2 - t1).count() / i) << " us / slice" << std::endl;
0124
0125 for (auto info : infoDict) {
0126 std::cout << "From " << info.first << std::endl;
0127 std::cout << " placed " << info.second.eventCount << " events" << std::endl;
0128 std::cout << " --> on average " << info.second.eventCount / nSlices << std::endl;
0129 std::cout << " placed " << info.second.particleCount << " final state particles" << std::endl;
0130 std::cout << " --> on average " << info.second.particleCount / nSlices << std::endl;
0131
0132 }
0133
0134 struct rusage r_usage;
0135 getrusage(RUSAGE_SELF, &r_usage);
0136
0137
0138
0139
0140 #ifdef __MACH__
0141 float mbsize = 1024 * 1024;
0142 #else
0143 float mbsize = 1024;
0144 #endif
0145
0146
0147 std::cout << endl << "Maximum Resident Memory " << r_usage.ru_maxrss / mbsize << " MB" << std::endl;
0148
0149 sigAdapter->close();
0150 for (auto& it : freqAdapters) {
0151 it.second->close();
0152 }
0153 f->close();
0154
0155 }
0156
0157
0158 void digestArgs(int argc, char* argv[]) {
0159
0160
0161
0162
0163
0164 argparse::ArgumentParser args ("Merge signal events with up to three background sources.");
0165
0166 args.add_argument("-i", "--signalFile")
0167 .default_value(std::string("root://dtn-eic.jlab.org//work/eic2/EPIC/EVGEN/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run1.ab.hepmc3.tree.root"))
0168 .help("Name of the HEPMC file with the signal events");
0169
0170 args.add_argument("-sf", "--signalFreq")
0171 .default_value(0.0)
0172 .scan<'g', double>()
0173 .help("Signal frequency in kHz. Default is 0 to have exactly one signal event per slice. Set to the estimated DIS rate to randomize.");
0174
0175 args.add_argument("-S", "--signalSkip")
0176 .default_value(0)
0177 .scan<'i', int>()
0178 .help("Number of signals events to skip. Default is 0.");
0179
0180 args.add_argument("-bg1", "--bg1File")
0181 .default_value(std::string("root://dtn-eic.jlab.org//work/eic2/EPIC/EVGEN/BACKGROUNDS/BEAMGAS/proton/pythia8.306-1.0/100GeV/pythia8.306-1.0_ProtonBeamGas_100GeV_run082.hepmc3.tree.root"))
0182 .help("Name of the first HEPMC file with background events");
0183
0184 args.add_argument("-bf1", "--bg1Freq")
0185 .default_value( 31.9 )
0186 .scan<'g', double>()
0187 .help("First background frequency in kHz. Default is the estimated hadron gas rate at 10x100. Set to 0 to use the weights in the corresponding input file.");
0188
0189 args.add_argument("-bg1S", "--bg1Skip")
0190 .default_value(0)
0191 .scan<'i', int>()
0192 .help("Number of first background events to skip. Default is 0.");
0193
0194 args.add_argument("-bg2", "--bg2File")
0195 .default_value(std::string("root://dtn-eic.jlab.org//work/eic2/EPIC/EVGEN/BACKGROUNDS/BEAMGAS/electron/beam_gas_ep_10GeV_foam_emin10keV_30Mevt.hepmc3.tree.root"))
0196 .help("Name of the second HEPMC file with background events");
0197
0198 args.add_argument("-bf2", "--bg2Freq")
0199 .default_value(3177.25)
0200 .scan<'g', double>()
0201 .help("Second background frequency in kHz. Default is the estimated electron gas rate at 10x100. Set to 0 to use the weights in the corresponding input file.");
0202
0203 args.add_argument("-bg2S", "--bg2Skip")
0204 .default_value(0)
0205 .scan<'i', int>()
0206 .help("Number of second background events to skip. Default is 0.");\
0207
0208 args.add_argument("-bg3", "--bg3File")
0209 .default_value(std::string(""))
0210 .help("Name of the third HEPMC file with background events");
0211
0212 args.add_argument("-bf3", "--bg3Freq")
0213 .default_value(0.0)
0214 .scan<'g', double>()
0215 .help("Third background frequency in kHz. Default is 0 to use the weights in the corresponding input file. Set to a value >0 to specify a frequency instead.");
0216
0217 args.add_argument("-bg3S", "--bg3Skip")
0218 .default_value(0)
0219 .scan<'i', int>()
0220 .help("Number of third background events to skip. Default is 0");
0221
0222 args.add_argument("-o", "--outputFile")
0223 .default_value(std::string("bgmerged.hepmc3.tree.root"))
0224 .help("Specify the output file name. By default bgmerged.hepmc3.tree.root is used");
0225
0226 args.add_argument("-r", "--rootFormat")
0227 .default_value(true)
0228 .implicit_value(true)
0229 .help("Use hepmc.root output format, default is true.");
0230
0231 args.add_argument("-w", "--intWindow")
0232 .default_value(2000.0)
0233 .scan<'g', double>()
0234 .help("Length of the integration window in nanoseconds. Default is 2000.");
0235
0236 args.add_argument("-N", "--nSlices")
0237 .default_value(10000)
0238 .scan<'i', int>()
0239 .help("Number of sampled time slices ('events'). Default is 10000. If set to -1, all events in the signal file will be used and background files cycled as needed.");
0240
0241 args.add_argument("--squashTime")
0242 .default_value(false)
0243 .implicit_value(true)
0244 .help("Integration is performed but no time information is associated to vertices.");
0245
0246 args.add_argument("--rngSeed")
0247 .default_value(0)
0248 .action([](const std::string& value) { return std::stoi(value); })
0249 .help("Random seed, default is None");
0250
0251 args.add_argument("-v", "--verbose")
0252 .default_value(false)
0253 .implicit_value(true)
0254 .help("Display details for every slice.");
0255
0256 try {
0257 args.parse_args(argc, argv);
0258 }
0259 catch (const std::runtime_error& err) {
0260 std::cout << err.what() << std::endl;
0261 std::cout << args;
0262 exit(0);
0263 }
0264
0265 signalFile = args.get<std::string>("--signalFile");
0266 signalFreq = args.get<double>("--signalFreq");
0267 signalSkip = args.get<int>("--signalSkip");
0268
0269 bg1File = args.get<std::string>("--bg1File");
0270 bg1Freq = args.get<double>("--bg1Freq");
0271 bg1Skip = args.get<int>("--bg1Skip");
0272
0273 bg2File = args.get<std::string>("--bg2File");
0274 bg2Freq = args.get<double>("--bg2Freq");
0275 bg2Skip = args.get<int>("--bg2Skip");
0276
0277 bg3File = args.get<std::string>("--bg3File");
0278 bg3Freq = args.get<double>("--bg3Freq");
0279 bg3Skip = args.get<int>("--bg3Skip");
0280
0281 outputFile = args.get<std::string>("--outputFile");
0282 rootFormat = args.get<bool>("--rootFormat");
0283 intWindow = args.get<double>("--intWindow");
0284 nSlices = args.get<int>("--nSlices");
0285 squashTime = args.get<bool>("--squashTime");
0286 rngSeed = args.get<int>("--rngSeed");
0287 verbose = args.get<bool>("--verbose");
0288
0289 }
0290
0291
0292 void banner() {
0293
0294 std::cout << "==================================================================" << std::endl;
0295 std::cout << "=== EPIC HEPMC MERGER ===" << std::endl;
0296 std::cout << "authors: Benjamen Sterwerf* (bsterwerf@berkeley.edu), Kolja Kauder** (kkauder@bnl.gov), Reynier Cruz-Torres***" << std::endl;
0297 std::cout << "* University of California, Berkeley" << std::endl;
0298 std::cout << "** Brookhaven National Laboratory" << std::endl;
0299 std::cout << "*** formerly Lawrence Berkeley National Laboratory" << std::endl;
0300 std::cout << "\nFor more information, run \n./signal_background_merger --help" << std::endl;
0301
0302 std::cout << "Number of Slices:" << nSlices << endl;
0303 std::string freqTerm = signalFreq > 0 ? std::to_string(signalFreq) + " kHz" : "(one event per time slice)";
0304 std::cout << "Signal events file and frequency:\n";
0305 std::cout << "\t- " << signalFile << "\t" << freqTerm << "\n";
0306
0307 std::cout << "\nBackground files and their respective frequencies:\n";
0308 if (!bg1File.empty()) {
0309 freqTerm = bg1Freq > 0 ? std::to_string(bg1Freq) + " kHz" : "(from weights)";
0310 std::cout << "\t- " << bg1File << "\t" << freqTerm << "\n";
0311 }
0312 if (!bg2File.empty()) {
0313 freqTerm = bg2Freq > 0 ? std::to_string(bg2Freq) + " kHz" : "(from weights)";
0314 std::cout << "\t- " << bg2File << "\t" << freqTerm << "\n";
0315 }
0316 if (!bg3File.empty()) {
0317 freqTerm = bg3Freq > 0 ? std::to_string(bg3Freq) + " kHz" : "(from weights)";
0318 std::cout << "\t- " << bg3File << "\t" << freqTerm << "\n";
0319 }
0320 }
0321
0322
0323 void PrepData(const std::string& fileName, double freq, int skip=0, bool signal=false) {
0324 if (fileName.empty()) return;
0325
0326 cout << "Prepping " << fileName << endl;
0327 std::shared_ptr<HepMC3::Reader> adapter;
0328 try {
0329 adapter = HepMC3::deduce_reader(fileName);
0330 if (!adapter) {
0331 throw std::runtime_error("Failed to open file");
0332 }
0333 } catch (const std::runtime_error& e) {
0334 std::cerr << "Opening " << fileName << " failed: " << e.what() << std::endl;
0335 exit(1);
0336 }
0337
0338 infoDict[fileName] = {0,0};
0339
0340 if (signal) {
0341 sigAdapter = adapter;
0342 sigFreq = freq;
0343 sigAdapter->skip(skip);
0344 return;
0345 }
0346
0347
0348 if (freq <= 0) {
0349 std::cout << "Reading in all events from " << fileName << std::endl;
0350 std::vector<HepMC3::GenEvent> events;
0351 std::vector<double> weights;
0352
0353 while(!adapter->failed()) {
0354 HepMC3::GenEvent evt(HepMC3::Units::GEV,HepMC3::Units::MM);
0355 adapter->read_event(evt);
0356
0357
0358 if (double w=evt.weight() > 0){
0359 events.push_back(evt);
0360 weights.push_back(evt.weight());
0361 }
0362 }
0363 adapter->close();
0364
0365 double avgRate = 0.0;
0366 for ( auto w : weights ){ avgRate += w;}
0367 avgRate /= weights.size();
0368 avgRate *= 1e-9;
0369 std::cout << "Average rate is " << avgRate << " GHz" << std::endl;
0370
0371 std::vector<int> indices (weights.size());
0372 std::iota (std::begin(indices), std::end(indices), 0);
0373
0374
0375
0376
0377
0378
0379
0380
0381
0382
0383 std::piecewise_constant_distribution<> weightedDist(std::begin(indices),std::end(indices),
0384 std::begin(weights));
0385 weightDict[fileName] = { std::make_tuple(events, weightedDist, avgRate) };
0386
0387 return;
0388 }
0389
0390
0391 adapter->skip(skip);
0392 freqAdapters[fileName] = adapter;
0393 freqs[fileName] = freq;
0394 }
0395
0396
0397 bool hasEnding (std::string const &fullString, std::string const &ending) {
0398 if (fullString.length() >= ending.length()) {
0399 return (0 == fullString.compare (fullString.length() - ending.length(), ending.length(), ending));
0400 } else {
0401 return false;
0402 }
0403 }
0404
0405
0406 std::string nameGen() {
0407
0408
0409 std::string name = signalFile;
0410 if (nSlices > 0) {
0411 size_t pos = name.find(".hepmc");
0412 if (pos != std::string::npos) {
0413 name.replace(pos, 6, "_n_" + std::to_string(nSlices) + ".hepmc");
0414 }
0415 }
0416 if ( rootFormat && !hasEnding(name,".root")){
0417 name.append(".root");
0418 }
0419 name = "bgmerged_" + name;
0420 return name;
0421 }
0422
0423
0424 void squawk(int i) {
0425
0426
0427 #ifdef __MACH__
0428 task_basic_info_data_t info;
0429 mach_msg_type_number_t size = sizeof(info);
0430 kern_return_t kerr = task_info(mach_task_self(),
0431 TASK_BASIC_INFO,
0432 (task_info_t)&info,
0433 &size);
0434
0435 long memory_usage = -1;
0436 if (kerr == KERN_SUCCESS) {
0437 memory_usage = info.resident_size / 1024 / 1024;
0438 }
0439 #else
0440 std::ifstream statm("/proc/self/statm");
0441 long size, resident, share, text, lib, data, dt;
0442 statm >> size >> resident >> share >> text >> lib >> data >> dt;
0443 statm.close();
0444
0445 long page_size = sysconf(_SC_PAGESIZE);
0446 long memory_usage = resident * page_size / 1024 / 1024 ;
0447 #endif
0448
0449
0450 std::cout << "Working on slice " << i + 1 << std::endl;
0451 std::cout << "Current memory usage: " << memory_usage << " MB" << std::endl;
0452
0453 }
0454
0455
0456 std::unique_ptr<HepMC3::GenEvent> mergeSlice(int i) {
0457 auto hepSlice = std::make_unique<HepMC3::GenEvent>(HepMC3::Units::GEV, HepMC3::Units::MM);
0458
0459 addFreqEvents(signalFile, sigAdapter, sigFreq, hepSlice, true);
0460
0461 for (const auto& freqBgs : freqAdapters) {
0462 auto fileName=freqBgs.first;
0463 addFreqEvents(fileName, freqAdapters[fileName], freqs[fileName], hepSlice);
0464 }
0465
0466 for (const auto& fileName : weightDict) {
0467 addWeightedEvents(fileName.first, hepSlice);
0468 }
0469
0470 return hepSlice;
0471 };
0472
0473
0474
0475 void addFreqEvents(std::string fileName, std::shared_ptr<HepMC3::Reader>& adapter, const double freq,
0476 std::unique_ptr<HepMC3::GenEvent>& hepSlice, bool signal = false) {
0477
0478
0479
0480 std::vector<double> timeline;
0481
0482 std::uniform_real_distribution<> uni(0, intWindow);
0483 if (freq == 0){
0484 if (!signal) {
0485 std::cerr << "frequency can't be 0 for background files" << std::endl;
0486 exit(1);
0487 }
0488
0489 timeline.push_back(uni(rng));
0490 } else {
0491
0492 timeline = poissonTimes(freq, intWindow);
0493 }
0494
0495 if ( verbose) std::cout << "Placing " << timeline.size() << " events from " << fileName << std::endl;
0496
0497 if (timeline.empty()) return;
0498 long particleCount = 0;
0499
0500
0501 for (double time : timeline) {
0502 if(adapter->failed()) {
0503 try{
0504 if (signal) {
0505 throw std::ifstream::failure("EOF");
0506 } else {
0507 std::cout << "Cycling back to the start of " << fileName << std::endl;
0508 adapter->close();
0509 adapter = HepMC3::deduce_reader(fileName);
0510 }
0511 } catch (std::ifstream::failure& e) {
0512 continue;
0513 }
0514 }
0515
0516 HepMC3::GenEvent inevt;
0517 adapter->read_event(inevt);
0518
0519 if (squashTime) time = 0;
0520 particleCount += insertHepmcEvent( inevt, hepSlice, time);
0521 }
0522
0523 infoDict[fileName].eventCount += timeline.size();
0524 infoDict[fileName].particleCount += particleCount;
0525
0526
0527 return;
0528 }
0529
0530
0531
0532 void addWeightedEvents(std::string fileName, std::unique_ptr<HepMC3::GenEvent>& hepSlice, bool signal = false) {
0533 auto& [events, weightedDist, avgRate ] = weightDict[fileName];
0534
0535
0536 int nEvents;
0537 std::poisson_distribution<> d( intWindow * avgRate );
0538
0539
0540
0541 while (true) {
0542 nEvents = d(rng);
0543 if (nEvents > events.size()) {
0544 std::cout << "WARNING: Trying to place " << nEvents << " events from " << fileName
0545 << " but the file doesn't have enough. Rerolling, but this is not physical." << std::endl;
0546 continue;
0547 }
0548 break;
0549 }
0550
0551 if (verbose) std::cout << "Placing " << nEvents << " events from " << fileName << std::endl;
0552
0553
0554
0555 std::vector<HepMC3::GenEvent> toPlace(nEvents);
0556 for ( auto& e : toPlace ){
0557 auto i = static_cast<int> (weightedDist(rng));
0558 e = events.at(i);
0559 }
0560
0561
0562 std::vector<double> timeline;
0563 std::uniform_real_distribution<> uni(0, intWindow);
0564 long particleCount = 0;
0565 if (!squashTime) {
0566 for ( auto& e : toPlace ){
0567 double time = squashTime ? 0 : uni(rng);
0568 particleCount += insertHepmcEvent( e, hepSlice, time);
0569 }
0570 }
0571
0572 infoDict[fileName].eventCount += nEvents;
0573 infoDict[fileName].particleCount += particleCount;
0574
0575 return;
0576 }
0577
0578
0579 long insertHepmcEvent( const HepMC3::GenEvent& inevt,
0580 std::unique_ptr<HepMC3::GenEvent>& hepSlice, double time=0) {
0581
0582 double timeHepmc = c_light * time;
0583
0584 std::vector<HepMC3::GenParticlePtr> particles;
0585 std::vector<HepMC3::GenVertexPtr> vertices;
0586
0587
0588
0589 for (auto& vertex : inevt.vertices()) {
0590 HepMC3::FourVector position = vertex->position();
0591 position.set_t(position.t() + timeHepmc);
0592 auto v1 = std::make_shared<HepMC3::GenVertex>(position);
0593 vertices.push_back(v1);
0594 }
0595
0596
0597 long finalParticleCount = 0;
0598 for (auto& particle : inevt.particles()) {
0599 HepMC3::FourVector momentum = particle->momentum();
0600 int status = particle->status();
0601 if (status == 1 ) finalParticleCount++;
0602 int pid = particle->pid();
0603 auto p1 = std::make_shared<HepMC3::GenParticle> (momentum, pid, status);
0604 p1->set_generated_mass(particle->generated_mass());
0605 particles.push_back(p1);
0606
0607 if (particle->production_vertex()->id() < 0) {
0608 int production_vertex = particle->production_vertex()->id();
0609 vertices[abs(production_vertex) - 1]->add_particle_out(p1);
0610 hepSlice->add_particle(p1);
0611 }
0612
0613
0614 if (particle->end_vertex()) {
0615 int end_vertex = particle->end_vertex()->id();
0616 vertices.at(abs(end_vertex) - 1)->add_particle_in(p1);
0617 }
0618 }
0619
0620
0621 for (auto& vertex : vertices) {
0622 hepSlice->add_vertex(vertex);
0623 }
0624
0625 return finalParticleCount;
0626 }
0627
0628
0629
0630 std::vector<double> poissonTimes(double mu, double endTime) {
0631 std::exponential_distribution<> exp(mu);
0632
0633 double t = 0;
0634 std::vector<double> ret;
0635 while (true) {
0636 double delt = exp(rng)*1e6;
0637
0638 t += delt;
0639 if (t >= endTime) {
0640 break;
0641 }
0642 ret.push_back(t);
0643 }
0644 return ret;
0645 }
0646
0647
0648
0649
0650 std::mt19937 rng;
0651 string signalFile, bg1File, bg2File, bg3File;
0652 double signalFreq, bg1Freq, bg2Freq, bg3Freq;
0653 int signalSkip, bg1Skip, bg2Skip, bg3Skip;
0654 string outputFile;
0655 string outputFileName;
0656 bool rootFormat;
0657 double intWindow;
0658 int nSlices;
0659 bool squashTime;
0660 int rngSeed;
0661 bool verbose;
0662
0663 const double c_light = 299.792458;
0664 };
0665
0666
0667 int main(int argc, char* argv[]) {
0668
0669 auto t0 = std::chrono::high_resolution_clock::now();
0670
0671 SignalBackgroundMerger sbm (argc, argv);
0672
0673
0674 sbm.merge();
0675
0676 std::cout << "\n==================================================================\n";
0677 std::cout << "Overall running time: " << std::round(std::chrono::duration<double, std::chrono::minutes::period>(std::chrono::high_resolution_clock::now() - t0).count()) << " min" << std::endl;
0678
0679 return 0;
0680 }