Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2024-09-28 07:03:08

0001 import sys
0002 import pandas as pd
0003 import numpy as np
0004 import uproot
0005 import scipy
0006 import math
0007 import glob
0008 from uproot_methods import *
0009 from concurrent.futures import ThreadPoolExecutor
0010 
0011 
0012 # Define useful variables
0013 
0014 ## Units
0015 
0016 global u_fb,u_mb,u_pb
0017 
0018 u_fb = 1
0019 u_mb = 1e12*u_fb
0020 u_pb = 1e3*u_fb
0021 
0022 global n_gen
0023 n_gen = 10e6
0024 
0025 
0026 
0027 def UprootLoad(files=[], treename="", branches=[]):
0028     global n_gen
0029     print(f"::UprootLoad({files}")
0030     executor = ThreadPoolExecutor(8)
0031 
0032     #df = uproot.lazyarrays(files,
0033     #                       treename,
0034     #                       branches=branches, 
0035     #                       entrysteps=1000)
0036 
0037     filenames = glob.glob(files[0])
0038 
0039     data = pd.DataFrame(columns=branches)
0040 
0041     for filename in filenames:
0042         tree = uproot.open(filename)[treename]
0043         data = pd.concat([data, tree.arrays(branches, namedecode='utf-8', outputtype=pd.DataFrame)])
0044 
0045     n_gen = len(data) #uproot.numentries(files, treename, total=True)
0046 
0047     print(f"Originally generated number of collisions: {n_gen}")
0048 
0049     print("Exploding the DataFrame...")
0050     #data = data.set_index(['bjorken_x', 'jb_x', 'jb_Q2']).apply(pd.Series.explode).reset_index()
0051     #data = data.apply(pd.Series.explode)
0052     print("Completed exploding the DataFrame")
0053 
0054     return data
0055 
0056 def TotalXSection(process='CC_DIS_e18_p275'):
0057     global u_mb
0058     if process == 'CC_DIS_e18_p275':
0059         return 2.408e-08*u_mb
0060     elif process == 'CC_DIS_e10_p100_CT18NNLO':
0061         return 5.44044e-09*u_mb
0062     elif process == 'CC_DIS_e10_p275_CT18NNLO':
0063         return 1.47637e-08*u_mb
0064     elif process == 'CC_DIS_e10_p275_CT1820Rs2':
0065         return 1.47637e-08*u_mb
0066     elif process == 'CC_DIS_e10_p100_CT1820Rs2':
0067         return 5.44842e-09*u_mb
0068     elif process == 'CC_DIS_e10_p275_CT1821Rs2':
0069         return 1.45884e-08*u_mb
0070     elif process == 'CC_DIS_e10_p100_CT1821Rs2':
0071         return 5.36006e-09*u_mb
0072     elif process == 'NC_DIS_e18_p275':
0073         return 3.671e-06*u_mb
0074     
0075     print(f"EICAnalysisTools::TotalXSection == Unable to find requested process, {process}.")
0076     # Don't continue. This is bad. Nonsense will result. The user should fix this.
0077     sys.exit(-1)
0078     return 0.0
0079 
0080 
0081 # Compute auxiliary information for a DataFrame
0082 def DISBjorkenX(row):
0083     """
0084     pProton      = branchParticle.At(0).P4(); #these numbers 0 , 3, 5 are hardcoded in Pythia8
0085     pleptonIn    = branchParticle.At(3).P4();
0086     pleptonOut   = branchParticle.At(5).P4();
0087     pPhoton      = pleptonIn - pleptonOut;
0088   
0089     #Q2, W2, Bjorken x, y, nu.
0090     Q2 = -pPhoton.M2()
0091     W2 = (pProton + pPhoton).M2()
0092     x = Q2 / (2. * pProton.Dot(pPhoton))
0093     y = (pProton.Dot(pPhoton)) / (pProton.Dot(pleptonIn))
0094     """
0095 
0096     particles_px = row['Particle.Px']
0097     particles_py = row['Particle.Py']
0098     particles_pz = row['Particle.Pz']
0099     particles_E = row['Particle.E']
0100 
0101     # Beam Particle 4-vectors
0102     iproton = 0
0103     ilepton1 = 3
0104     ilepton2 = 5
0105     
0106     pProton = TLorentzVector(particles_px[iproton], particles_py[iproton], particles_pz[iproton], particles_E[iproton])
0107     pleptonIn = TLorentzVector(particles_px[ilepton1], particles_py[ilepton1], particles_pz[ilepton1], particles_E[ilepton1])
0108     pleptonOut = TLorentzVector(particles_px[ilepton2], particles_py[ilepton2], particles_pz[ilepton2], particles_E[ilepton2])
0109     pPhoton = pleptonIn - pleptonOut
0110 
0111     Q2 = -pPhoton.mag2
0112     W2 = (pProton + pPhoton).mag2
0113     x = Q2 / (2. * pProton.dot(pPhoton))
0114 
0115     # Write this value of x for each jet, since we need to do jet-level plots
0116     x_array = np.ones(len(row['jet_pt'])) * x
0117 
0118     return x_array
0119 
0120 
0121 
0122 
0123 ###################################################################
0124 # Differential Computation Functions
0125 ###################################################################
0126 
0127 # Flavour tagging study code
0128 def DifferentialTaggingYield(df, x='jet_pt', xrange=[10,50], xbins=[10,12.5,15,20,25,30,40,50], which='all', process='CC_DIS_e18_p275', target_lumi = 100, taggers="jet_sip3dtag"):
0129     global u_fb, n_gen
0130     print(f"n_gen = {n_gen}")
0131 
0132     jet_flavor = np.concatenate(df['jet_flavor'].to_numpy()).ravel()
0133     jet_tag = np.concatenate(df['jet_sip3dtag'].to_numpy()).ravel()
0134 
0135     jet_x = np.concatenate(df[x].to_numpy()).ravel()
0136 
0137     #jet_flavor = df['jet_flavor']
0138     #jet_tag = df['jet_sip3dtag']
0139 
0140     #jet_x = df[x]
0141 
0142     if which == 'charm':
0143         jet_x = jet_x[ (jet_tag == 1) & (jet_flavor == 4) ]
0144     elif which == 'light':
0145         jet_x = jet_x[ (jet_tag == 1) & ((jet_flavor < 4) | (jet_flavor == 21)) ]
0146 
0147 
0148     (counts, bins) = np.histogram(jet_x, range=xrange, 
0149                                   bins=xbins)
0150                                   #bins=40)
0151 
0152     bin_widths = np.diff(bins)
0153     bin_centers = bins[:-1] + bin_widths/2
0154 
0155     errors = np.sqrt(counts)
0156     rel_errors = errors/counts
0157 
0158 
0159     # convert counts to dsigma/dpT * 100/fb
0160     dsigma_dx = counts * TotalXSection(process)  * target_lumi*u_fb**(-1) / n_gen #/ bin_widths
0161     dsigma_dx_errors = rel_errors * dsigma_dx
0162 
0163 
0164     print(f"========= Validation Information from DifferentialTaggingYield ========= \n \
0165     \n \
0166     Process considered:              {process} \n \
0167     Theory Cross-Section:            {TotalXSection(process)} \n \
0168     Target Integrated Luminosity:    {target_lumi} \n \
0169     Total Predicted Yield of Events: {np.sum(dsigma_dx)} \ \
0170     \n \
0171     ========= Validation Information from DifferentialTaggingYield =========")
0172           
0173     
0174     return (bin_centers, bin_widths, dsigma_dx, dsigma_dx_errors)
0175 
0176 
0177 
0178 def DifferentialTaggingEfficiency(df, x='jet_pt', xrange=[10,50], xbins=[10,12.5,15,20,25,30,40,50], which='all', taggers="jet_sip3dtag"):
0179 
0180     jet_flavor = np.concatenate(df['jet_flavor'].to_numpy()).ravel()
0181     jet_tag = np.concatenate(df['jet_sip3dtag'].to_numpy()).ravel()
0182 
0183     jet_x = np.concatenate(df[x].to_numpy()).ravel()
0184 
0185     # too slow
0186     #if len(jet_x.flatten()) != len(jet_flavor.flatten()):
0187     #    print("Hi!")
0188     #    print(len(jet_x))
0189     #    new_jet_x = np.array([])
0190     #    for i in np.arange(len(jet_x)):
0191     #        if len(jet_flavor[i]) > 1:
0192     #            print(len(jet_flavor[i]), " ", np.ones(len(jet_flavor[i])), " ", np.ones(len(jet_flavor[i]))*jet_x[i])
0193     #        new_jet_x = np.append(new_jet_x, np.ones(len(jet_flavor[i]))*jet_x[i])
0194     #print(new_jet_x[:15])
0195 
0196     if which == 'charm':
0197         jet_x = jet_x[ jet_flavor == 4 ]
0198         jet_tag = jet_tag[ jet_flavor == 4 ]
0199     elif which == 'light':
0200         jet_x = jet_x[ ((jet_flavor < 4) | (jet_flavor == 21)) ]
0201         jet_tag = jet_tag[ ((jet_flavor < 4) | (jet_flavor == 21)) ]
0202 
0203     (tru_counts, bins) = np.histogram(jet_x, range=xrange, 
0204                                       bins=xbins)
0205 
0206     (tag_counts, bins) = np.histogram(jet_x[ jet_tag == 1 ], range=xrange, 
0207                                       bins=xbins)
0208 
0209 
0210     eff = tag_counts/tru_counts
0211     n_err = scipy.stats.binom.interval(1.0-math.exp(-1), tru_counts, eff) 
0212 
0213     eff_err=[np.fabs(n_err[0]/tru_counts-eff), np.fabs(n_err[1]/tru_counts-eff)]
0214 
0215 
0216     bin_widths = np.diff(bins)
0217     bin_centers = bins[:-1] + bin_widths/2
0218 
0219     print(f"========= Validation Information from DifferentialTaggingEfficiency =========")
0220     print(eff)
0221     print(eff_err)
0222     print(f"Tagging Efficiency above X threshold for {which}:")
0223     for ibin in np.arange(len(bins)-1):
0224         #mean_eff = np.nanmean(eff[ibin:])
0225         #avg_errors = (eff_err[0]+eff_err[1])/2.0
0226         #mean_err = np.sqrt(np.sum((avg_errors[ibin:]/eff[ibin:])**2))*mean_eff
0227         n_tag = np.sum(tag_counts[ibin:])
0228         n_tru = np.sum(tru_counts[ibin:])
0229         eff_above_x = n_tag/n_tru
0230         err_above_x = np.nanmean([np.fabs(np.sum(n_err[0][ibin:])/n_tru-eff_above_x), np.fabs(np.sum(n_err[1][ibin:])/n_tru-eff_above_x)])
0231         #print(f"Minimum X: {bins[ibin]:.1f}   Tag. Efficiency: ({mean_eff*100:.3f} +/- {mean_err*100:.3f})%")
0232         print(f"Minimum X: {bins[ibin]:.1f}   Tag. Efficiency: ({eff_above_x*100:.3f} +/- {err_above_x*100:.3f})%")
0233     
0234 
0235     print(f"========= Validation Information from DifferentialTaggingEfficiency =========")
0236 
0237     return (bin_centers, bin_widths, eff, eff_err)
0238 
0239