File indexing completed on 2024-05-18 07:44:02
0001 #ifdef __MACH__
0002 #include <mach/mach.h>
0003 #endif
0004
0005 #include <iostream>
0006 #include <fstream>
0007 #include <string>
0008 #include <random>
0009 #include <ctime>
0010 #include <cstdlib>
0011 #include <vector>
0012 #include <algorithm>
0013 #include <numeric>
0014 #include <stdexcept>
0015 #include <chrono>
0016 #include <cmath>
0017 #include <random>
0018 #include <tuple>
0019 #include <sys/resource.h>
0020
0021 #include <HepMC3/ReaderFactory.h>
0022 #include <HepMC3/WriterAscii.h>
0023 #include "HepMC3/WriterRootTree.h"
0024 #include "HepMC3/GenRunInfo.h"
0025 #include "HepMC3/GenEvent.h"
0026 #include "HepMC3/Print.h"
0027
0028 #include "argparse/argparse.hpp"
0029
0030
0031 #include <TH1D.h>
0032 #include <TH1I.h>
0033 #include <TF1.h>
0034 #include <TFile.h>
0035
0036 using std::cout;
0037 using std::cerr;
0038 using std::endl;
0039 using std::string;
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052 class SignalBackgroundMerger {
0053
0054 private:
0055
0056 std::shared_ptr<HepMC3::Reader> sigAdapter;
0057 double sigFreq = 0;
0058 std::map<std::string, std::shared_ptr<HepMC3::Reader>> freqAdapters;
0059 std::map<std::string, double> freqs;
0060
0061 std::map<std::string,
0062 std::tuple<std::vector<HepMC3::GenEvent>,
0063 std::piecewise_constant_distribution<>,
0064 double>
0065 > weightDict;
0066
0067
0068 std::map<std::string, long > infoDict;
0069
0070 public:
0071
0072 SignalBackgroundMerger(int argc, char* argv[]) {
0073 auto t0 = std::chrono::high_resolution_clock::now();
0074
0075
0076 digestArgs(argc, argv);
0077 rng.seed( rngSeed );
0078 banner();
0079 if (outputFile != "" ) {
0080 outputFileName = outputFile;
0081 } else {
0082 outputFileName = nameGen();
0083 }
0084 cout << "Writing to " << outputFileName << endl;
0085
0086 PrepData ( signalFile, signalFreq, signalSkip, true );
0087 PrepData ( bg1File, bg1Freq, bg1Skip );
0088 PrepData ( bg2File, bg2Freq, bg2Skip);
0089 PrepData ( bg3File, bg3Freq, bg3Skip);
0090
0091
0092
0093
0094
0095
0096
0097 auto t1 = std::chrono::high_resolution_clock::now();
0098 std::cout << "Initiation time: " << std::round(std::chrono::duration<double, std::chrono::seconds::period>(t1 - t0).count()) << " sec" << std::endl;
0099 std::cout << "\n==================================================================\n" << std::endl;
0100
0101 }
0102
0103 void merge(){
0104 auto t1 = std::chrono::high_resolution_clock::now();
0105
0106
0107 std::shared_ptr<HepMC3::Writer> f;
0108 if (rootFormat)
0109 f = std::make_shared<HepMC3::WriterRootTree>(outputFileName);
0110 else
0111 f = std::make_shared<HepMC3::WriterAscii>(outputFileName);
0112
0113
0114 int i = 0;
0115 for (i = 0; i<nSlices; ++i ) {
0116 if (i % 100 == 0 || verbose ) squawk(i);
0117 auto hepSlice = mergeSlice(i);
0118 if (!hepSlice) {
0119 std::cout << "Exhausted signal source." << std::endl;
0120 break;
0121 }
0122 hepSlice->set_event_number(i);
0123 f->write_event(*hepSlice);
0124 }
0125 std::cout << "Finished all requested slices." << std::endl;
0126
0127 int slicesDone = i;
0128 auto t2 = std::chrono::high_resolution_clock::now();
0129
0130 std::cout << "Slice loop time: " << std::round(std::chrono::duration<double, std::chrono::minutes::period>(t2 - t1).count()) << " min" << std::endl;
0131 std::cout << " -- " << std::round(std::chrono::duration<double>(t2 - t1).count() / i) << " sec / slice" << std::endl;
0132
0133 for (auto info : infoDict) {
0134 std::cout << "In total, placed " << info.second << " events from " << info.first << std::endl;
0135 std::cout << " --> on average " << info.second / nSlices << std::endl;
0136
0137 }
0138
0139 struct rusage r_usage;
0140 getrusage(RUSAGE_SELF, &r_usage);
0141
0142
0143
0144
0145 #ifdef __MACH__
0146 float mbsize = 1024 * 1024;
0147 #else
0148 float mbsize = 1024;
0149 #endif
0150
0151
0152 std::cout << endl << "Maximum Resident Memory " << r_usage.ru_maxrss / mbsize << " MB" << std::endl;
0153
0154 sigAdapter->close();
0155 for (auto& it : freqAdapters) {
0156 it.second->close();
0157 }
0158 f->close();
0159
0160 }
0161
0162
0163 void digestArgs(int argc, char* argv[]) {
0164
0165
0166
0167
0168
0169 argparse::ArgumentParser args ("Merge signal events with up to three background sources.");
0170
0171 args.add_argument("-i", "--signalFile")
0172 .default_value(std::string("small_ep_noradcor.10x100_q2_10_100_run001.hepmc"))
0173 .help("Name of the HEPMC file with the signal events");
0174
0175 args.add_argument("-sf", "--signalFreq")
0176 .default_value(0.0)
0177 .scan<'g', double>()
0178 .help("Signal frequency in kHz. Default is 0 to have exactly one signal event per slice. Set to the estimated DIS rate to randomize.");
0179
0180 args.add_argument("-S", "--signalSkip")
0181 .default_value(0)
0182 .scan<'i', int>()
0183 .help("Number of signals events to skip. Default is 0.");
0184
0185 args.add_argument("-bg1", "--bg1File")
0186 .default_value(std::string("small_hgas_100GeV_HiAc_25mrad.Asciiv3.hepmc"))
0187 .help("Name of the first HEPMC file with background events");
0188
0189 args.add_argument("-bf1", "--bg1Freq")
0190 .default_value(342.8)
0191 .scan<'g', double>()
0192 .help("First background frequency in kHz. Default is the estimated hadron gas rate at 10x100. Set to 0 to use the weights in the corresponding input file.");
0193
0194 args.add_argument("-bg1S", "--bg1Skip")
0195 .default_value(0)
0196 .scan<'i', int>()
0197 .help("Number of first background events to skip. Default is 0.");
0198
0199 args.add_argument("-bg2", "--bg2File")
0200 .default_value(std::string("small_beam_gas_ep_10GeV_foam_emin10keV_vtx.hepmc"))
0201 .help("Name of the second HEPMC file with background events");
0202
0203 args.add_argument("-bf2", "--bg2Freq")
0204 .default_value(3177.25)
0205 .scan<'g', double>()
0206 .help("Second background frequency in kHz. Default is the estimated electron gas rate at 10x100. Set to 0 to use the weights in the corresponding input file.");
0207
0208 args.add_argument("-bg2S", "--bg2Skip")
0209 .default_value(0)
0210 .scan<'i', int>()
0211 .help("Number of second background events to skip. Default is 0.");\
0212
0213 args.add_argument("-bg3", "--bg3File")
0214 .default_value(std::string("small_SR_single_2.5A_10GeV.hepmc"))
0215 .help("Name of the third HEPMC file with background events");
0216
0217 args.add_argument("-bf3", "--bg3Freq")
0218 .default_value(0.0)
0219 .scan<'g', double>()
0220 .help("Third background frequency in kHz. Default is 0 to use the weights in the corresponding input file. Set to a value >0 to specify a frequency instead.");
0221
0222 args.add_argument("-bg3S", "--bg3Skip")
0223 .default_value(0)
0224 .scan<'i', int>()
0225 .help("Number of third background events to skip. Default is 0");
0226
0227 args.add_argument("-o", "--outputFile")
0228 .default_value(std::string(""))
0229 .help("Specify the output file name. By default it will be auto-generated.");
0230
0231 args.add_argument("-r", "--rootFormat")
0232 .default_value(false)
0233 .implicit_value(true)
0234 .help("Use hepmc.root output format, default is false.");
0235
0236 args.add_argument("-w", "--intWindow")
0237 .default_value(2000.0)
0238 .scan<'g', double>()
0239 .help("Length of the integration window in nanoseconds. Default is 2000.");
0240
0241 args.add_argument("-N", "--nSlices")
0242 .default_value(-1)
0243 .scan<'i', int>()
0244 .help("Number of sampled time slices ('events'). Default is -1. If set to -1, all events in the signal file will be used and background files cycled as needed.");
0245
0246 args.add_argument("--squashTime")
0247 .default_value(false)
0248 .implicit_value(true)
0249 .help("Integration is performed but no time information is associated to vertices.");
0250
0251 args.add_argument("--rngSeed")
0252 .default_value(0)
0253 .action([](const std::string& value) { return std::stoi(value); })
0254 .help("Random seed, default is None");
0255
0256 args.add_argument("-v", "--verbose")
0257 .default_value(false)
0258 .implicit_value(true)
0259 .help("Display details for every slice.");
0260
0261 try {
0262 args.parse_args(argc, argv);
0263 }
0264 catch (const std::runtime_error& err) {
0265 std::cout << err.what() << std::endl;
0266 std::cout << args;
0267 exit(0);
0268 }
0269
0270 signalFile = args.get<std::string>("--signalFile");
0271 signalFreq = args.get<double>("--signalFreq");
0272 signalSkip = args.get<int>("--signalSkip");
0273
0274 bg1File = args.get<std::string>("--bg1File");
0275 bg1Freq = args.get<double>("--bg1Freq");
0276 bg1Skip = args.get<int>("--bg1Skip");
0277
0278 bg2File = args.get<std::string>("--bg2File");
0279 bg2Freq = args.get<double>("--bg2Freq");
0280 bg2Skip = args.get<int>("--bg2Skip");
0281
0282 bg3File = args.get<std::string>("--bg3File");
0283 bg3Freq = args.get<double>("--bg3Freq");
0284 bg3Skip = args.get<int>("--bg3Skip");
0285
0286 outputFile = args.get<std::string>("--outputFile");
0287 rootFormat = args.get<bool>("--rootFormat");
0288 intWindow = args.get<double>("--intWindow");
0289 nSlices = args.get<int>("--nSlices");
0290 squashTime = args.get<bool>("--squashTime");
0291 rngSeed = args.get<int>("--rngSeed");
0292 verbose = args.get<bool>("--verbose");
0293
0294 }
0295
0296
0297 void banner() {
0298
0299 std::cout << "==================================================================" << std::endl;
0300 std::cout << "=== EPIC HEPMC MERGER ===" << std::endl;
0301 std::cout << "authors: Benjamen Sterwerf* (bsterwerf@berkeley.edu), Kolja Kauder** (kkauder@bnl.gov), Reynier Cruz-Torres***" << std::endl;
0302 std::cout << "* University of California, Berkeley" << std::endl;
0303 std::cout << "** Brookhaven National Laboratory" << std::endl;
0304 std::cout << "*** formerly Lawrence Berkeley National Laboratory" << std::endl;
0305 std::cout << "\nFor more information, run \n./signal_background_merger --help" << std::endl;
0306
0307 std::cout << "Number of Slices:" << nSlices << endl;
0308 std::string freqTerm = signalFreq > 0 ? std::to_string(signalFreq) + " kHz" : "(one event per time slice)";
0309 std::cout << "Signal events file and frequency:\n";
0310 std::cout << "\t- " << signalFile << "\t" << freqTerm << "\n";
0311
0312 std::cout << "\nBackground files and their respective frequencies:\n";
0313 if (!bg1File.empty()) {
0314 freqTerm = bg1Freq > 0 ? std::to_string(bg1Freq) + " kHz" : "(from weights)";
0315 std::cout << "\t- " << bg1File << "\t" << freqTerm << "\n";
0316 }
0317 if (!bg2File.empty()) {
0318 freqTerm = bg2Freq > 0 ? std::to_string(bg2Freq) + " kHz" : "(from weights)";
0319 std::cout << "\t- " << bg2File << "\t" << freqTerm << "\n";
0320 }
0321 if (!bg3File.empty()) {
0322 freqTerm = bg3Freq > 0 ? std::to_string(bg3Freq) + " kHz" : "(from weights)";
0323 std::cout << "\t- " << bg3File << "\t" << freqTerm << "\n";
0324 }
0325 }
0326
0327
0328 void PrepData(const std::string& fileName, double freq, int skip=0, bool signal=false) {
0329 if (fileName.empty()) return;
0330
0331 cout << "Prepping " << fileName << endl;
0332 std::shared_ptr<HepMC3::Reader> adapter;
0333 try {
0334 adapter = HepMC3::deduce_reader(fileName);
0335 if (!adapter) {
0336 throw std::runtime_error("Failed to open file");
0337 }
0338 } catch (const std::runtime_error& e) {
0339 std::cerr << "Opening " << fileName << " failed: " << e.what() << std::endl;
0340 exit(1);
0341 }
0342
0343 infoDict[fileName] = 0;
0344
0345 if (signal) {
0346 sigAdapter = adapter;
0347 sigFreq = freq;
0348 sigAdapter->skip(skip);
0349 return;
0350 }
0351
0352
0353 if (freq <= 0) {
0354 std::cout << "Reading in all events from " << fileName << std::endl;
0355 std::vector<HepMC3::GenEvent> events;
0356 std::vector<double> weights;
0357
0358 while(!adapter->failed()) {
0359 HepMC3::GenEvent evt(HepMC3::Units::GEV,HepMC3::Units::MM);
0360 adapter->read_event(evt);
0361
0362
0363
0364
0365 if (double w=evt.weight() > 0){
0366 events.push_back(evt);
0367 weights.push_back(evt.weight());
0368 }
0369 }
0370 adapter->close();
0371
0372 double avgRate = 0.0;
0373 for ( auto w : weights ){ avgRate += w;}
0374 avgRate /= weights.size();
0375 avgRate *= 1e-9;
0376 std::cout << "Average rate is " << avgRate << " GHz" << std::endl;
0377
0378 std::vector<int> indices (weights.size());
0379 std::iota (std::begin(indices), std::end(indices), 0);
0380
0381
0382
0383
0384
0385
0386
0387
0388
0389
0390 std::piecewise_constant_distribution<> weightedDist(std::begin(indices),std::end(indices),
0391 std::begin(weights));
0392 weightDict[fileName] = { std::make_tuple(events, weightedDist, avgRate) };
0393
0394 return;
0395 }
0396
0397
0398 adapter->skip(skip);
0399 freqAdapters[fileName] = adapter;
0400 freqs[fileName] = freq;
0401 }
0402
0403
0404
0405 bool hasEnding (std::string const &fullString, std::string const &ending) {
0406 if (fullString.length() >= ending.length()) {
0407 return (0 == fullString.compare (fullString.length() - ending.length(), ending.length(), ending));
0408 } else {
0409 return false;
0410 }
0411 }
0412
0413
0414 std::string nameGen() {
0415 std::string name = signalFile;
0416 if (nSlices > 0) {
0417 size_t pos = name.find(".hepmc");
0418 if (pos != std::string::npos) {
0419 name.replace(pos, 6, "_n_" + std::to_string(nSlices) + ".hepmc");
0420 }
0421 }
0422 if ( rootFormat && !hasEnding(name,".root")){
0423 name.append(".root");
0424 }
0425 name = "bgmerged_" + name;
0426 return name;
0427 }
0428
0429
0430 void squawk(int i) {
0431
0432
0433 #ifdef __MACH__
0434 task_basic_info_data_t info;
0435 mach_msg_type_number_t size = sizeof(info);
0436 kern_return_t kerr = task_info(mach_task_self(),
0437 TASK_BASIC_INFO,
0438 (task_info_t)&info,
0439 &size);
0440
0441 long memory_usage = -1;
0442 if (kerr == KERN_SUCCESS) {
0443 memory_usage = info.resident_size / 1024 / 1024;
0444 }
0445 #else
0446 std::ifstream statm("/proc/self/statm");
0447 long size, resident, share, text, lib, data, dt;
0448 statm >> size >> resident >> share >> text >> lib >> data >> dt;
0449 statm.close();
0450
0451 long page_size = sysconf(_SC_PAGESIZE);
0452 long memory_usage = resident * page_size / 1024 / 1024 ;
0453 #endif
0454
0455
0456 std::cout << "Working on slice " << i + 1 << std::endl;
0457 std::cout << "Current memory usage: " << memory_usage << " MB" << std::endl;
0458
0459 }
0460
0461
0462 std::unique_ptr<HepMC3::GenEvent> mergeSlice(int i) {
0463 auto hepSlice = std::make_unique<HepMC3::GenEvent>(HepMC3::Units::GEV, HepMC3::Units::MM);
0464
0465 addFreqEvents(signalFile, sigAdapter, sigFreq, hepSlice, true);
0466
0467 for (const auto& freqBgs : freqAdapters) {
0468 auto fileName=freqBgs.first;
0469 addFreqEvents(fileName, freqAdapters[fileName], freqs[fileName], hepSlice);
0470 }
0471
0472 for (const auto& fileName : weightDict) {
0473 addWeightedEvents(fileName.first, hepSlice);
0474 }
0475
0476 return hepSlice;
0477 };
0478
0479
0480
0481 void addFreqEvents(std::string fileName, std::shared_ptr<HepMC3::Reader>& adapter, const double freq,
0482 std::unique_ptr<HepMC3::GenEvent>& hepSlice, bool signal = false) {
0483
0484
0485
0486 std::vector<double> timeline;
0487
0488 std::uniform_real_distribution<> uni(0, intWindow);
0489 if (freq == 0){
0490 if (!signal) {
0491 std::cerr << "frequency can't be 0 for background files" << std::endl;
0492 exit(1);
0493 }
0494
0495 timeline.push_back(uni(rng));
0496 } else {
0497
0498 timeline = poissonTimes(freq, intWindow);
0499 }
0500
0501 if ( verbose) std::cout << "Placing " << timeline.size() << " events from " << fileName << std::endl;
0502 infoDict[fileName] += timeline.size();
0503
0504
0505
0506
0507
0508
0509
0510
0511 if (timeline.empty()) return;
0512
0513
0514 for (double time : timeline) {
0515 if(adapter->failed()) {
0516 try{
0517 if (signal) {
0518 throw std::ifstream::failure("EOF");
0519 } else {
0520 std::cout << "Cycling back to the start of " << fileName << std::endl;
0521 adapter->close();
0522 adapter = HepMC3::deduce_reader(fileName);
0523 }
0524 } catch (std::ifstream::failure& e) {
0525 continue;
0526 }
0527 }
0528
0529 HepMC3::GenEvent inevt;
0530 adapter->read_event(inevt);
0531
0532 if (squashTime) time = 0;
0533 insertHepmcEvent( inevt, hepSlice, time);
0534 }
0535
0536 return;
0537 }
0538
0539
0540
0541 void addWeightedEvents(std::string fileName, std::unique_ptr<HepMC3::GenEvent>& hepSlice, bool signal = false) {
0542 auto& [events, weightedDist, avgRate ] = weightDict[fileName];
0543
0544
0545 int nEvents;
0546 std::poisson_distribution<> d( intWindow * avgRate );
0547
0548
0549
0550 while (true) {
0551 nEvents = d(rng);
0552 if (nEvents > events.size()) {
0553 std::cout << "WARNING: Trying to place " << nEvents << " events from " << fileName
0554 << " but the file doesn't have enough. Rerolling, but this is not physical." << std::endl;
0555 continue;
0556 }
0557 break;
0558 }
0559
0560 if (verbose) std::cout << "Placing " << nEvents << " events from " << fileName << std::endl;
0561 infoDict[fileName] += nEvents;
0562
0563
0564
0565 std::vector<HepMC3::GenEvent> toPlace(nEvents);
0566 for ( auto& e : toPlace ){
0567 auto i = static_cast<int> (weightedDist(rng));
0568 e = events.at(i);
0569 }
0570
0571
0572 std::vector<double> timeline;
0573 std::uniform_real_distribution<> uni(0, intWindow);
0574 if (!squashTime) {
0575 for ( auto& e : toPlace ){
0576 double time = squashTime ? 0 : uni(rng);
0577 insertHepmcEvent( e, hepSlice, time);
0578 }
0579 }
0580
0581 return;
0582 }
0583
0584
0585 void insertHepmcEvent( const HepMC3::GenEvent& inevt,
0586 std::unique_ptr<HepMC3::GenEvent>& hepSlice, double time=0) {
0587
0588 double timeHepmc = c_light * time;
0589
0590 std::vector<HepMC3::GenParticlePtr> particles;
0591 std::vector<HepMC3::GenVertexPtr> vertices;
0592
0593
0594
0595 for (auto& vertex : inevt.vertices()) {
0596 HepMC3::FourVector position = vertex->position();
0597 position.set_t(position.t() + timeHepmc);
0598 auto v1 = std::make_shared<HepMC3::GenVertex>(position);
0599 vertices.push_back(v1);
0600 }
0601
0602
0603 for (auto& particle : inevt.particles()) {
0604 HepMC3::FourVector momentum = particle->momentum();
0605 int status = particle->status();
0606 int pid = particle->pid();
0607 auto p1 = std::make_shared<HepMC3::GenParticle> (momentum, pid, status);
0608 p1->set_generated_mass(particle->generated_mass());
0609 particles.push_back(p1);
0610
0611 if (particle->production_vertex()->id() < 0) {
0612 int production_vertex = particle->production_vertex()->id();
0613 vertices[abs(production_vertex) - 1]->add_particle_out(p1);
0614 hepSlice->add_particle(p1);
0615 }
0616
0617
0618 if (particle->end_vertex()) {
0619 int end_vertex = particle->end_vertex()->id();
0620 vertices.at(abs(end_vertex) - 1)->add_particle_in(p1);
0621 }
0622 }
0623
0624
0625 for (auto& vertex : vertices) {
0626 hepSlice->add_vertex(vertex);
0627 }
0628
0629 }
0630
0631
0632
0633 std::vector<double> poissonTimes(double mu, double endTime) {
0634 std::exponential_distribution<> exp(mu);
0635
0636 double t = 0;
0637 std::vector<double> ret;
0638 while (true) {
0639 double delt = exp(rng)*1e6;
0640
0641 t += delt;
0642 if (t >= endTime) {
0643 break;
0644 }
0645 ret.push_back(t);
0646 }
0647 return ret;
0648 }
0649
0650
0651
0652
0653 std::mt19937 rng;
0654 string signalFile, bg1File, bg2File, bg3File;
0655 double signalFreq, bg1Freq, bg2Freq, bg3Freq;
0656 int signalSkip, bg1Skip, bg2Skip, bg3Skip;
0657 string outputFile;
0658 string outputFileName;
0659 bool rootFormat;
0660 double intWindow;
0661 int nSlices;
0662 bool squashTime;
0663 int rngSeed;
0664 bool verbose;
0665
0666 const double c_light = 299.792458;
0667
0668
0669
0670
0671
0672
0673
0674 };
0675
0676
0677 int main(int argc, char* argv[]) {
0678
0679 auto t0 = std::chrono::high_resolution_clock::now();
0680
0681 SignalBackgroundMerger sbm (argc, argv);
0682
0683
0684 sbm.merge();
0685
0686
0687
0688
0689
0690
0691
0692
0693
0694
0695
0696
0697
0698
0699
0700
0701
0702
0703
0704
0705
0706
0707 std::cout << "\n==================================================================\n";
0708 std::cout << "Overall running time: " << std::round(std::chrono::duration<double, std::chrono::minutes::period>(std::chrono::high_resolution_clock::now() - t0).count()) << " min" << std::endl;
0709
0710
0711 return 0;
0712 }