Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:27:05

0001 import pyhepmc
0002 from pyhepmc.io import WriterAscii
0003 import argparse
0004 
0005 import numpy as np
0006 import pandas as pd
0007 
0008 from datetime import datetime
0009 from operator import itemgetter
0010 import time
0011 import sys
0012 
0013 import psutil
0014 
0015 # =============================================================
0016 class signal_background_merger:
0017     """
0018     Combine signal and up to three background HEPMC files.
0019     
0020     Typical usage:
0021     python3 signal_background_merger --signalFile dis.hepmc --signalFreq 0 \
0022             --bg1File hgas.hepmc --bg1Freq 1852 \
0023             --bg2File egas.hepmc --bg2Freq 1852 \
0024             --bg3File sr.hepmc
0025     """
0026 
0027     # ============================================================================================
0028     def __init__(self):
0029         """ Parse arguments, print banner, open files, initialize rng"""
0030         
0031         self.digestArgs()
0032         self.rng=np.random.default_rng(seed=self.args.rngSeed)
0033         self.banner()
0034 
0035     # ============================================================================================
0036     def digestArgs(self):
0037         """Handle the command line tedium"""
0038         parser = argparse.ArgumentParser(description='Merge signal events with up to three background sources.')
0039         
0040         parser.add_argument('-i','--signalFile', default='small_ep_noradcor.10x100_q2_10_100_run001.hepmc',
0041                             help='Name of the HEPMC file with the signal events')
0042         parser.add_argument('-sf','--signalFreq', type=float, default=0.0,
0043                             help='Poisson-mu of the signal frequency in ns. Default is 0 to have exactly one signal event per slice. Set to the estimated DIS rate to randomize.')
0044 
0045     
0046         parser.add_argument('-bg1','--bg1File', default='small_hgas_100GeV_HiAc_25mrad.Asciiv3.hepmc',
0047                             help='Name of the first HEPMC file with background events')
0048         parser.add_argument('-bf1','--bg1Freq', type=float, default=31347.0,
0049                             help='Poisson-mu of the first background frequency in ns. Default is the estimated hadron gas rate at 10x100. Set to 0 to use the weights in the corresponding input file.')
0050 
0051         parser.add_argument('-bg2','--bg2File', default='small_beam_gas_ep_10GeV_foam_emin10keV_vtx.hepmc',
0052                             help='Name of the second HEPMC file with background events')
0053         parser.add_argument('-bf2','--bg2Freq', type=float, default=333.0,
0054                             help='Poisson-mu of the second background frequency in ns. Default is the estimated electron  gas rate at 10x100. Set to 0 to use the weights in the corresponding input file.')
0055 
0056         parser.add_argument('-bg3','--bg3File', default='small_SR_single_2.5A_10GeV.hepmc',
0057                             help='Name of the third HEPMC file with background events')
0058         parser.add_argument('-bf3','--bg3Freq', type=float, default=0,
0059                             help='Poisson-mu of the third background frequency in ns. Default is 0 to use the weights in the corresponding input file. Set to a value >0 to specify a poisson mu instead.')
0060 
0061 
0062         parser.add_argument('-o','--outputFile', default='',
0063                             help='Specify the output file name. By default it will be auto-generated.')
0064 
0065         parser.add_argument('-w','--intWindow', type=float, default=2000.0,
0066                             help='Length of the integration window in nanoseconds. Default is 2000.')
0067         parser.add_argument('-N','--nSlices', type=int, default=-1,
0068                             help='Number of sampled time slices ("events"). Default is 10. If set to -1, all events in the signal file will be used and background files cycled as needed.')
0069         parser.add_argument('--squashTime', action='store_true',
0070                             help='Integration is performed but no time information is associated to vertices.')
0071 
0072         parser.add_argument('--rngSeed', action='store',type=int, default=None,
0073                             help='Random seed, default is None')
0074 
0075         parser.add_argument('-v','--verbose', action='store_true',
0076                             help='Display details for every slice.')
0077         self.args = parser.parse_args()
0078 
0079     # ============================================================================================
0080     def banner(self):
0081         """Print a banner."""
0082         print('==================================================================')
0083         print('=== EPIC HEPMC MERGER ===')
0084         print('authors: Benjamen Sterwerf* (bsterwerf@berkeley.edu), Kolja Kauder** (kkauder@bnl.gov), Reynier Cruz-Torres***')
0085         print('* University of California, Berkeley')
0086         print('** Brookhaven National Laboratory')
0087         print('*** formerly Lawrence Berkeley National Laboratory')
0088 
0089         print('\nFor more information, run \npython signal_background_merger.py --help')
0090         print('==================================================================\n')
0091 
0092         freqTerm = str(self.args.signalFreq) + " ns" if self.args.signalFreq> 0 else "(one event per time slice)"
0093         print('Signal events file and frequency:')
0094         print('\t- {} \t {}'.format( self.args.signalFile, freqTerm ))
0095         
0096         print('\nBackground files and their respective frequencies:')
0097         if self.args.bg1File != "" :
0098             freqTerm = str(self.args.bg1Freq) + " ns" if self.args.bg1Freq> 0 else "(from weights)"
0099             print('\t- {} \t {}'.format( self.args.bg1File, freqTerm ))
0100         if self.args.bg2File != "" :
0101             freqTerm = str(self.args.bg2Freq) + " ns" if self.args.bg2Freq> 0 else "(from weights)"
0102             print('\t- {} \t {}'.format( self.args.bg2File, freqTerm ))
0103         if self.args.bg3File != "" :
0104             freqTerm = str(self.args.bg3Freq) + " ns" if self.args.bg3Freq> 0 else "(from weights)"
0105             print('\t- {} \t {}'.format( self.args.bg3File, freqTerm ))
0106 
0107 
0108         print()
0109         print('Integration Window:',self.args.intWindow, ' ns')
0110         print('Number of time slices:', self.args.nSlices if self.args.nSlices>0 else ' (as many as needed to exhaust the signal file)')
0111         if self.args.squashTime:
0112             print('No timing information will be attached to vertices')
0113         print('The RNG is seeded to', self.args.rngSeed)
0114         print('==================================================================\n')
0115 
0116     # ============================================================================================
0117     def merge( self ):
0118         """Main method to steer the merging."""
0119 
0120         t0 = time.time()
0121         # Open signal file
0122         try :
0123             # sigFile=pyhepmc.io.ReaderAscii(self.args.signalFile)
0124             sigFile=pyhepmc.io.open(self.args.signalFile)
0125         except IOError as e:
0126             print ('Opening files failed: %s' % e.strerror)
0127             sys.exit()
0128 
0129         # Open input file readers, group them with the associated frequency,
0130         # then sort them into signal, frequency bg, or weight type bg
0131         self.sigDict=dict()
0132         self.freqDict=dict()
0133         self.weightDict=dict()
0134         self.makeDicts ( self.args.signalFile, self.args.signalFreq, signal=True )
0135         self.makeDicts ( self.args.bg1File, self.args.bg1Freq )
0136         self.makeDicts ( self.args.bg2File, self.args.bg2Freq )
0137         self.makeDicts ( self.args.bg3File, self.args.bg3Freq )
0138 
0139         # Open the output file
0140         if self.args.outputFile != "" :
0141             outputFileName = self.args.outputFile
0142         else :
0143             outputFileName = self.nameGen()
0144 
0145         t1 = time.time()        
0146         print('Initiation time:',np.round((t1-t0)/60.,2),'min')
0147                     
0148         # Process and write
0149         nSlices=self.args.nSlices
0150 
0151         print()
0152         print('==================================================================')
0153         print()
0154         i=0
0155         with WriterAscii(outputFileName) as f:
0156             while True :
0157                 if (i==nSlices) :
0158                     print ( "Finished all requested slices." )
0159                     break
0160                 # if  i % 10000 == 0 : print('Working on slice {}'.format( i+1 ))
0161                 if  i % 100 == 0 or self.args.verbose : self.squawk(i)
0162                 hepSlice = self.mergeSlice( i )
0163                 ### Arrgh, GenEvent==None throws an exception
0164                 try : 
0165                     if hepSlice==None : 
0166                         print ( "Exhausted signal source." )
0167                         break
0168                 except TypeError as e:
0169                     pass # just need to suppress the error
0170 
0171                 hepSlice.event_number=i
0172                 f.write_event(hepSlice)
0173                 i=i+1
0174 
0175         t2 = time.time()
0176         self.slicesDone=i
0177         print('Slice loop time:',np.round((t2-t1)/60.,2),'min')
0178         print(' -- ',np.round((t2-t1) / self.slicesDone ,3),'sec / slice')
0179         
0180 
0181         # Clean up, close all input files
0182         File, Freq = self.sigDict[self.args.signalFile]
0183         # File.close()
0184         for fileName in self.freqDict :
0185             File, Freq = self.freqDict[fileName]
0186             File.close()
0187 
0188     
0189     # ============================================================================================
0190     def makeDicts( self, fileName, freq, signal=False ):
0191         """Create background timeline chunks, open input file, sort into frequency or weight type"""
0192         if fileName == "" : return
0193 
0194         try :
0195             # Should use File=pyhepmc.open(fileName) but that doesn't currently work
0196             File=pyhepmc.open(fileName)
0197         except IOError as e:
0198             print ('Opening {} failed: {}', fileName, e.strerror)
0199             sys.exit()
0200             
0201         # For the signal only, we keep frequency even if it's 0
0202         if signal :
0203             self.sigDict[fileName] = [ File, freq ]
0204             return
0205             
0206         if freq<=0 :
0207             # file has its own weights
0208             # In this case, we will need to read in all events
0209             # because we need the distribution to draw from them
0210             print ( "Reading in all events from", fileName )
0211             # remove events with 0 weight - note that this does change avgRate = <weight> (by a little)
0212             container = [[event, event.weight()] for event in File if event.weight() > 0]
0213             # sorting may help for lookup, sampling
0214             container = sorted ( container, key=itemgetter(1) )            
0215             File.close()
0216             events  = np.array([ item[0] for item in container ])
0217             weights = np.array([ item[1] for item in container ])
0218             avgRate = np.average (weights) # I _think_ this is the total flux
0219             avgRate *= 1e-9 # convert to 1/ns == GHz
0220             print( "Average rate is", avgRate, "GHz")
0221             # np.Generator.choice expects normalized probabilities
0222             probs = weights / weights.sum()
0223             self.weightDict[fileName]=[ events, probs, avgRate ]
0224             return
0225 
0226         self.freqDict[fileName] = [ File, freq ]
0227 
0228         return
0229 
0230     # ============================================================================================
0231     def mergeSlice(self, i):
0232         """Arrange the composition of an individual time slice"""
0233         
0234         hepSlice = pyhepmc.GenEvent(pyhepmc.Units.GEV, pyhepmc.Units.MM)
0235         
0236         # Signal and frequency background are handled very similarly,
0237         # handle them in one method and just use a flag for what little is special
0238         try :
0239             hepSlice = self.addFreqEvents( self.args.signalFile, hepSlice, True )
0240         except EOFError :
0241             return
0242         
0243         # Treat frequency backgrounds very similarly 
0244         for fileName in self.freqDict :
0245             hepSlice = self.addFreqEvents( fileName, hepSlice )
0246 
0247         for fileName in self.weightDict :
0248             hepSlice = self.addWeightedEvents( fileName, hepSlice )
0249 
0250         return hepSlice
0251 
0252     # ============================================================================================
0253     def addFreqEvents( self, fileName, hepSlice, signal=False ):
0254         """Handles the signal as well as frequency-style backgrounds"""
0255 
0256         if signal :
0257             File, Freq = self.sigDict[fileName]
0258         else :
0259             File, Freq = self.freqDict[fileName]
0260 
0261 
0262         c_light  = 299.792458 # speed of light = 299.792458 mm/ns to get mm
0263         squash = self.args.squashTime
0264 
0265         # First, create a timeline
0266         intTime=self.args.intWindow
0267         # this could be offset by iSlice * intTime for continuous time throughout the file
0268 
0269         # Signals can be different
0270         if Freq==0:
0271             if not signal :
0272                 print( "frequency can't be 0 for background files" )
0273                 sys.exit()
0274             # exactly one signal event, at an arbigtrary point
0275             slice = np.array([ self.rng.uniform(low=0, high=intTime) ])
0276         else:
0277             # Generate poisson-distributed times to place events
0278             slice = self.poissonTimes( Freq, intTime )
0279 
0280         if self.args.verbose : print ( "Placing",slice.size,"events from", fileName )
0281         if slice.size == 0 : return hepSlice
0282 
0283         # Insert events at all specified locations
0284         for Time in slice :
0285             ### Arrgh, GenEvent==None throws an exception
0286             inevt = File.read()
0287             try : 
0288                 if inevt==None :
0289                     if signal :
0290                         # Exhausted signal events
0291                         raise EOFError
0292                     else :
0293                         # background file reached its end, reset to the start
0294                         print("Cycling back to the start of ", fileName )
0295                         File.close()
0296                         # File=pyhepmc.io.ReaderAscii(fileName)
0297                         File=pyhepmc.io.open(fileName)
0298                         # also update the dictionary
0299                         self.freqDict[fileName] = [File, Freq]
0300                         inevt = File.read()
0301             except TypeError as e:
0302                 pass # just need to suppress the error
0303 
0304             # Unit conversion
0305             TimeHepmc = c_light*Time
0306 
0307             particles, vertices = [], []
0308             # Stores the vertices of the event inside a vertex container. These vertices are in increasing order so we can index them with [abs(vertex_id)-1]
0309             for vertex in inevt.vertices:
0310                 position=vertex.position
0311                 if not squash:
0312                     position=position+pyhepmc.FourVector(x=0,y=0,z=0,t=TimeHepmc)
0313                 v1=pyhepmc.GenVertex(position)
0314                 vertices.append(v1)
0315             
0316             # copies the particles and attaches them to their corresponding vertices
0317             for particle in inevt.particles:
0318                 # no copy/clone operator...
0319                 momentum, status, pid = particle.momentum, particle.status, particle.pid                
0320                 p1 = pyhepmc.GenParticle(momentum=momentum, pid=pid, status=status)
0321                 p1.generated_mass = particle.generated_mass
0322                 particles.append(p1)
0323                 
0324                 # since the beam particles do not have a production vertex they cannot be attached to a production vertex
0325                 if particle.production_vertex.id < 0:
0326                     production_vertex=particle.production_vertex.id
0327                     vertices[abs(production_vertex)-1].add_particle_out(p1)
0328                     hepSlice.add_particle(p1)
0329                     
0330                 # Adds particles with an end vertex to their end vertices
0331                 if particle.end_vertex:
0332                     end_vertex = particle.end_vertex.id
0333                     vertices[abs(end_vertex)-1].add_particle_in(p1)
0334 
0335             # Adds the vertices with the attached particles to the event
0336             for vertex in vertices:
0337                 hepSlice.add_vertex(vertex)
0338 
0339         return hepSlice
0340         
0341     # ============================================================================================
0342     def addWeightedEvents( self, fileName, hepSlice, signal=False ):
0343         """Handles weighted backgrounds"""
0344 
0345         events, probs, avgRate = self.weightDict[fileName]
0346 
0347         c_light  = 299.792458 # speed of light = 299.792458 mm/ns to get mm
0348         squash  = self.args.squashTime
0349         intTime = self.args.intWindow
0350         squash = self.args.squashTime
0351 
0352         # How many events? Assume Poisson distribution
0353         while True:
0354             nEvents = int( self.rng.exponential( intTime * avgRate ) )
0355             if nEvents > len(events) :
0356                 print("WARNING: Trying to place",nEvents,"events from", fileName,
0357                       "but the file doesn't have enough. Rerolling, but this is not physical.")
0358                 continue
0359             break
0360         if self.args.verbose : print ( "Placing",nEvents,"events from", fileName )
0361 
0362         # Get events 
0363         toPlace = self.rng.choice( a=events, size=nEvents, p=probs, replace=False )
0364 
0365         # Place at random times
0366         if not squash :
0367             times = list(self.rng.uniform(low=0, high=intTime, size=nEvents))        
0368 
0369         for inevt in toPlace :
0370             Time = 0
0371             if not squash :
0372                 Time = times.pop()
0373 
0374             # print ( " -- Placing at", time)
0375             
0376             particles, vertices = [], []
0377             # Stores the vertices of the event inside a vertex container. These vertices are in increasing order so we can index them with [abs(vertex_id)-1]
0378             for vertex in inevt.vertices:
0379                 position=vertex.position
0380                 if not squash :
0381                     # Unit conversion
0382                     TimeHepmc = c_light*Time
0383                     position=position+pyhepmc.FourVector(x=0,y=0,z=0,t=TimeHepmc)
0384                 v1=pyhepmc.GenVertex(position)
0385                 vertices.append(v1)
0386             
0387             # copies the particles and attaches them to their corresponding vertices
0388             for particle in inevt.particles:
0389                 # no copy/clone operator...
0390                 momentum, status, pid = particle.momentum, particle.status, particle.pid                
0391                 p1 = pyhepmc.GenParticle(momentum=momentum, pid=pid, status=status)
0392                 p1.generated_mass = particle.generated_mass
0393                 particles.append(p1)
0394                 
0395                 # since the beam particles do not have a production vertex they cannot be attached to a production vertex
0396                 if particle.production_vertex.id < 0:
0397                     production_vertex=particle.production_vertex.id
0398                     vertices[abs(production_vertex)-1].add_particle_out(p1)
0399                     hepSlice.add_particle(p1)
0400                     
0401                 # Adds particles with an end vertex to their end vertices
0402                 if particle.end_vertex:
0403                     end_vertex = particle.end_vertex.id
0404                     vertices[abs(end_vertex)-1].add_particle_in(p1)
0405 
0406             # Adds the vertices with the attached particles to the event
0407             for vertex in vertices:
0408                 hepSlice.add_vertex(vertex)
0409 
0410         return hepSlice
0411 
0412     # ============================================================================================
0413     def poissonTimes( self, mu, endTime ):
0414         """Return an np.array of poisson-distributed times."""
0415         #Exponential distribution describes the time between poisson. We could start with an array of expected length
0416         #and then cut or fill as needed. Not very readable for very little gain.
0417         t = 0
0418         ret=np.array([])
0419         while True :
0420             delt = self.rng.exponential( mu )
0421             t = t + delt
0422             if t >= endTime :
0423                 break
0424             ret = np.append(ret,t)
0425         return ret
0426     
0427     # ============================================================================================
0428     def nameGen(self):
0429         # # datetime object containing current date and time
0430         # now = datetime.now()
0431         # dt_string = now.strftime("%Y_%m_%d_%H_%M_%S") # YY/mm/dd H:M:S        
0432         # return 'Sig_Back_Combo_{}_{}_event.hepmc'.format(dt_string,numEvents)
0433         name = self.args.signalFile
0434         if self.args.nSlices > 0 : 
0435             name = name.replace(".hepmc","_n_{}.hepmc".format(self.args.nSlices))
0436             
0437         name = "bgmerged_"+name
0438         return name        
0439 
0440     def squawk(self,i) :
0441         print('Working on slice {}'.format( i+1 ))
0442         print('Resident Memory',psutil.Process().memory_info().rss / 1024 / 1024,"MB")
0443     
0444     # ============================================================================================
0445     def fileWriter(combo_cont):
0446         numEvents=len(combo_cont)
0447         name=nameGen(numEvents)
0448         Event_ind=0
0449         with WriterAscii(name) as f:
0450             for i in combo_cont:
0451                 i.event_number=Event_ind
0452                 Event_ind=Event_ind+1
0453                 f.write_event(i)
0454         f.close()
0455 
0456     
0457 # ============================================================================================
0458 # Main function
0459 if __name__ == '__main__':
0460 
0461     t0 = time.time()
0462     sbm = signal_background_merger()
0463     sbm.merge()
0464     print()
0465     print('==================================================================')
0466     print('Overall running time:',np.round((time.time()-t0)/60.,2),'min')
0467 
0468 
0469