File indexing completed on 2025-01-18 09:27:05
0001 import pyhepmc
0002 from pyhepmc.io import WriterAscii
0003 import argparse
0004
0005 import numpy as np
0006 import pandas as pd
0007
0008 from datetime import datetime
0009 from operator import itemgetter
0010 import time
0011 import sys
0012
0013 import psutil
0014
0015
0016 class signal_background_merger:
0017 """
0018 Combine signal and up to three background HEPMC files.
0019
0020 Typical usage:
0021 python3 signal_background_merger --signalFile dis.hepmc --signalFreq 0 \
0022 --bg1File hgas.hepmc --bg1Freq 1852 \
0023 --bg2File egas.hepmc --bg2Freq 1852 \
0024 --bg3File sr.hepmc
0025 """
0026
0027
0028 def __init__(self):
0029 """ Parse arguments, print banner, open files, initialize rng"""
0030
0031 self.digestArgs()
0032 self.rng=np.random.default_rng(seed=self.args.rngSeed)
0033 self.banner()
0034
0035
0036 def digestArgs(self):
0037 """Handle the command line tedium"""
0038 parser = argparse.ArgumentParser(description='Merge signal events with up to three background sources.')
0039
0040 parser.add_argument('-i','--signalFile', default='small_ep_noradcor.10x100_q2_10_100_run001.hepmc',
0041 help='Name of the HEPMC file with the signal events')
0042 parser.add_argument('-sf','--signalFreq', type=float, default=0.0,
0043 help='Poisson-mu of the signal frequency in ns. Default is 0 to have exactly one signal event per slice. Set to the estimated DIS rate to randomize.')
0044
0045
0046 parser.add_argument('-bg1','--bg1File', default='small_hgas_100GeV_HiAc_25mrad.Asciiv3.hepmc',
0047 help='Name of the first HEPMC file with background events')
0048 parser.add_argument('-bf1','--bg1Freq', type=float, default=31347.0,
0049 help='Poisson-mu of the first background frequency in ns. Default is the estimated hadron gas rate at 10x100. Set to 0 to use the weights in the corresponding input file.')
0050
0051 parser.add_argument('-bg2','--bg2File', default='small_beam_gas_ep_10GeV_foam_emin10keV_vtx.hepmc',
0052 help='Name of the second HEPMC file with background events')
0053 parser.add_argument('-bf2','--bg2Freq', type=float, default=333.0,
0054 help='Poisson-mu of the second background frequency in ns. Default is the estimated electron gas rate at 10x100. Set to 0 to use the weights in the corresponding input file.')
0055
0056 parser.add_argument('-bg3','--bg3File', default='small_SR_single_2.5A_10GeV.hepmc',
0057 help='Name of the third HEPMC file with background events')
0058 parser.add_argument('-bf3','--bg3Freq', type=float, default=0,
0059 help='Poisson-mu of the third background frequency in ns. Default is 0 to use the weights in the corresponding input file. Set to a value >0 to specify a poisson mu instead.')
0060
0061
0062 parser.add_argument('-o','--outputFile', default='',
0063 help='Specify the output file name. By default it will be auto-generated.')
0064
0065 parser.add_argument('-w','--intWindow', type=float, default=2000.0,
0066 help='Length of the integration window in nanoseconds. Default is 2000.')
0067 parser.add_argument('-N','--nSlices', type=int, default=-1,
0068 help='Number of sampled time slices ("events"). Default is 10. If set to -1, all events in the signal file will be used and background files cycled as needed.')
0069 parser.add_argument('--squashTime', action='store_true',
0070 help='Integration is performed but no time information is associated to vertices.')
0071
0072 parser.add_argument('--rngSeed', action='store',type=int, default=None,
0073 help='Random seed, default is None')
0074
0075 parser.add_argument('-v','--verbose', action='store_true',
0076 help='Display details for every slice.')
0077 self.args = parser.parse_args()
0078
0079
0080 def banner(self):
0081 """Print a banner."""
0082 print('==================================================================')
0083 print('=== EPIC HEPMC MERGER ===')
0084 print('authors: Benjamen Sterwerf* (bsterwerf@berkeley.edu), Kolja Kauder** (kkauder@bnl.gov), Reynier Cruz-Torres***')
0085 print('* University of California, Berkeley')
0086 print('** Brookhaven National Laboratory')
0087 print('*** formerly Lawrence Berkeley National Laboratory')
0088
0089 print('\nFor more information, run \npython signal_background_merger.py --help')
0090 print('==================================================================\n')
0091
0092 freqTerm = str(self.args.signalFreq) + " ns" if self.args.signalFreq> 0 else "(one event per time slice)"
0093 print('Signal events file and frequency:')
0094 print('\t- {} \t {}'.format( self.args.signalFile, freqTerm ))
0095
0096 print('\nBackground files and their respective frequencies:')
0097 if self.args.bg1File != "" :
0098 freqTerm = str(self.args.bg1Freq) + " ns" if self.args.bg1Freq> 0 else "(from weights)"
0099 print('\t- {} \t {}'.format( self.args.bg1File, freqTerm ))
0100 if self.args.bg2File != "" :
0101 freqTerm = str(self.args.bg2Freq) + " ns" if self.args.bg2Freq> 0 else "(from weights)"
0102 print('\t- {} \t {}'.format( self.args.bg2File, freqTerm ))
0103 if self.args.bg3File != "" :
0104 freqTerm = str(self.args.bg3Freq) + " ns" if self.args.bg3Freq> 0 else "(from weights)"
0105 print('\t- {} \t {}'.format( self.args.bg3File, freqTerm ))
0106
0107
0108 print()
0109 print('Integration Window:',self.args.intWindow, ' ns')
0110 print('Number of time slices:', self.args.nSlices if self.args.nSlices>0 else ' (as many as needed to exhaust the signal file)')
0111 if self.args.squashTime:
0112 print('No timing information will be attached to vertices')
0113 print('The RNG is seeded to', self.args.rngSeed)
0114 print('==================================================================\n')
0115
0116
0117 def merge( self ):
0118 """Main method to steer the merging."""
0119
0120 t0 = time.time()
0121
0122 try :
0123
0124 sigFile=pyhepmc.io.open(self.args.signalFile)
0125 except IOError as e:
0126 print ('Opening files failed: %s' % e.strerror)
0127 sys.exit()
0128
0129
0130
0131 self.sigDict=dict()
0132 self.freqDict=dict()
0133 self.weightDict=dict()
0134 self.makeDicts ( self.args.signalFile, self.args.signalFreq, signal=True )
0135 self.makeDicts ( self.args.bg1File, self.args.bg1Freq )
0136 self.makeDicts ( self.args.bg2File, self.args.bg2Freq )
0137 self.makeDicts ( self.args.bg3File, self.args.bg3Freq )
0138
0139
0140 if self.args.outputFile != "" :
0141 outputFileName = self.args.outputFile
0142 else :
0143 outputFileName = self.nameGen()
0144
0145 t1 = time.time()
0146 print('Initiation time:',np.round((t1-t0)/60.,2),'min')
0147
0148
0149 nSlices=self.args.nSlices
0150
0151 print()
0152 print('==================================================================')
0153 print()
0154 i=0
0155 with WriterAscii(outputFileName) as f:
0156 while True :
0157 if (i==nSlices) :
0158 print ( "Finished all requested slices." )
0159 break
0160
0161 if i % 100 == 0 or self.args.verbose : self.squawk(i)
0162 hepSlice = self.mergeSlice( i )
0163
0164 try :
0165 if hepSlice==None :
0166 print ( "Exhausted signal source." )
0167 break
0168 except TypeError as e:
0169 pass
0170
0171 hepSlice.event_number=i
0172 f.write_event(hepSlice)
0173 i=i+1
0174
0175 t2 = time.time()
0176 self.slicesDone=i
0177 print('Slice loop time:',np.round((t2-t1)/60.,2),'min')
0178 print(' -- ',np.round((t2-t1) / self.slicesDone ,3),'sec / slice')
0179
0180
0181
0182 File, Freq = self.sigDict[self.args.signalFile]
0183
0184 for fileName in self.freqDict :
0185 File, Freq = self.freqDict[fileName]
0186 File.close()
0187
0188
0189
0190 def makeDicts( self, fileName, freq, signal=False ):
0191 """Create background timeline chunks, open input file, sort into frequency or weight type"""
0192 if fileName == "" : return
0193
0194 try :
0195
0196 File=pyhepmc.open(fileName)
0197 except IOError as e:
0198 print ('Opening {} failed: {}', fileName, e.strerror)
0199 sys.exit()
0200
0201
0202 if signal :
0203 self.sigDict[fileName] = [ File, freq ]
0204 return
0205
0206 if freq<=0 :
0207
0208
0209
0210 print ( "Reading in all events from", fileName )
0211
0212 container = [[event, event.weight()] for event in File if event.weight() > 0]
0213
0214 container = sorted ( container, key=itemgetter(1) )
0215 File.close()
0216 events = np.array([ item[0] for item in container ])
0217 weights = np.array([ item[1] for item in container ])
0218 avgRate = np.average (weights)
0219 avgRate *= 1e-9
0220 print( "Average rate is", avgRate, "GHz")
0221
0222 probs = weights / weights.sum()
0223 self.weightDict[fileName]=[ events, probs, avgRate ]
0224 return
0225
0226 self.freqDict[fileName] = [ File, freq ]
0227
0228 return
0229
0230
0231 def mergeSlice(self, i):
0232 """Arrange the composition of an individual time slice"""
0233
0234 hepSlice = pyhepmc.GenEvent(pyhepmc.Units.GEV, pyhepmc.Units.MM)
0235
0236
0237
0238 try :
0239 hepSlice = self.addFreqEvents( self.args.signalFile, hepSlice, True )
0240 except EOFError :
0241 return
0242
0243
0244 for fileName in self.freqDict :
0245 hepSlice = self.addFreqEvents( fileName, hepSlice )
0246
0247 for fileName in self.weightDict :
0248 hepSlice = self.addWeightedEvents( fileName, hepSlice )
0249
0250 return hepSlice
0251
0252
0253 def addFreqEvents( self, fileName, hepSlice, signal=False ):
0254 """Handles the signal as well as frequency-style backgrounds"""
0255
0256 if signal :
0257 File, Freq = self.sigDict[fileName]
0258 else :
0259 File, Freq = self.freqDict[fileName]
0260
0261
0262 c_light = 299.792458
0263 squash = self.args.squashTime
0264
0265
0266 intTime=self.args.intWindow
0267
0268
0269
0270 if Freq==0:
0271 if not signal :
0272 print( "frequency can't be 0 for background files" )
0273 sys.exit()
0274
0275 slice = np.array([ self.rng.uniform(low=0, high=intTime) ])
0276 else:
0277
0278 slice = self.poissonTimes( Freq, intTime )
0279
0280 if self.args.verbose : print ( "Placing",slice.size,"events from", fileName )
0281 if slice.size == 0 : return hepSlice
0282
0283
0284 for Time in slice :
0285
0286 inevt = File.read()
0287 try :
0288 if inevt==None :
0289 if signal :
0290
0291 raise EOFError
0292 else :
0293
0294 print("Cycling back to the start of ", fileName )
0295 File.close()
0296
0297 File=pyhepmc.io.open(fileName)
0298
0299 self.freqDict[fileName] = [File, Freq]
0300 inevt = File.read()
0301 except TypeError as e:
0302 pass
0303
0304
0305 TimeHepmc = c_light*Time
0306
0307 particles, vertices = [], []
0308
0309 for vertex in inevt.vertices:
0310 position=vertex.position
0311 if not squash:
0312 position=position+pyhepmc.FourVector(x=0,y=0,z=0,t=TimeHepmc)
0313 v1=pyhepmc.GenVertex(position)
0314 vertices.append(v1)
0315
0316
0317 for particle in inevt.particles:
0318
0319 momentum, status, pid = particle.momentum, particle.status, particle.pid
0320 p1 = pyhepmc.GenParticle(momentum=momentum, pid=pid, status=status)
0321 p1.generated_mass = particle.generated_mass
0322 particles.append(p1)
0323
0324
0325 if particle.production_vertex.id < 0:
0326 production_vertex=particle.production_vertex.id
0327 vertices[abs(production_vertex)-1].add_particle_out(p1)
0328 hepSlice.add_particle(p1)
0329
0330
0331 if particle.end_vertex:
0332 end_vertex = particle.end_vertex.id
0333 vertices[abs(end_vertex)-1].add_particle_in(p1)
0334
0335
0336 for vertex in vertices:
0337 hepSlice.add_vertex(vertex)
0338
0339 return hepSlice
0340
0341
0342 def addWeightedEvents( self, fileName, hepSlice, signal=False ):
0343 """Handles weighted backgrounds"""
0344
0345 events, probs, avgRate = self.weightDict[fileName]
0346
0347 c_light = 299.792458
0348 squash = self.args.squashTime
0349 intTime = self.args.intWindow
0350 squash = self.args.squashTime
0351
0352
0353 while True:
0354 nEvents = int( self.rng.exponential( intTime * avgRate ) )
0355 if nEvents > len(events) :
0356 print("WARNING: Trying to place",nEvents,"events from", fileName,
0357 "but the file doesn't have enough. Rerolling, but this is not physical.")
0358 continue
0359 break
0360 if self.args.verbose : print ( "Placing",nEvents,"events from", fileName )
0361
0362
0363 toPlace = self.rng.choice( a=events, size=nEvents, p=probs, replace=False )
0364
0365
0366 if not squash :
0367 times = list(self.rng.uniform(low=0, high=intTime, size=nEvents))
0368
0369 for inevt in toPlace :
0370 Time = 0
0371 if not squash :
0372 Time = times.pop()
0373
0374
0375
0376 particles, vertices = [], []
0377
0378 for vertex in inevt.vertices:
0379 position=vertex.position
0380 if not squash :
0381
0382 TimeHepmc = c_light*Time
0383 position=position+pyhepmc.FourVector(x=0,y=0,z=0,t=TimeHepmc)
0384 v1=pyhepmc.GenVertex(position)
0385 vertices.append(v1)
0386
0387
0388 for particle in inevt.particles:
0389
0390 momentum, status, pid = particle.momentum, particle.status, particle.pid
0391 p1 = pyhepmc.GenParticle(momentum=momentum, pid=pid, status=status)
0392 p1.generated_mass = particle.generated_mass
0393 particles.append(p1)
0394
0395
0396 if particle.production_vertex.id < 0:
0397 production_vertex=particle.production_vertex.id
0398 vertices[abs(production_vertex)-1].add_particle_out(p1)
0399 hepSlice.add_particle(p1)
0400
0401
0402 if particle.end_vertex:
0403 end_vertex = particle.end_vertex.id
0404 vertices[abs(end_vertex)-1].add_particle_in(p1)
0405
0406
0407 for vertex in vertices:
0408 hepSlice.add_vertex(vertex)
0409
0410 return hepSlice
0411
0412
0413 def poissonTimes( self, mu, endTime ):
0414 """Return an np.array of poisson-distributed times."""
0415
0416
0417 t = 0
0418 ret=np.array([])
0419 while True :
0420 delt = self.rng.exponential( mu )
0421 t = t + delt
0422 if t >= endTime :
0423 break
0424 ret = np.append(ret,t)
0425 return ret
0426
0427
0428 def nameGen(self):
0429
0430
0431
0432
0433 name = self.args.signalFile
0434 if self.args.nSlices > 0 :
0435 name = name.replace(".hepmc","_n_{}.hepmc".format(self.args.nSlices))
0436
0437 name = "bgmerged_"+name
0438 return name
0439
0440 def squawk(self,i) :
0441 print('Working on slice {}'.format( i+1 ))
0442 print('Resident Memory',psutil.Process().memory_info().rss / 1024 / 1024,"MB")
0443
0444
0445 def fileWriter(combo_cont):
0446 numEvents=len(combo_cont)
0447 name=nameGen(numEvents)
0448 Event_ind=0
0449 with WriterAscii(name) as f:
0450 for i in combo_cont:
0451 i.event_number=Event_ind
0452 Event_ind=Event_ind+1
0453 f.write_event(i)
0454 f.close()
0455
0456
0457
0458
0459 if __name__ == '__main__':
0460
0461 t0 = time.time()
0462 sbm = signal_background_merger()
0463 sbm.merge()
0464 print()
0465 print('==================================================================')
0466 print('Overall running time:',np.round((time.time()-t0)/60.,2),'min')
0467
0468
0469