File indexing completed on 2024-09-28 07:03:08
0001 import sys
0002 import pandas as pd
0003 import numpy as np
0004 import uproot
0005 import scipy
0006 import math
0007 import glob
0008 from uproot_methods import *
0009 from concurrent.futures import ThreadPoolExecutor
0010
0011
0012
0013
0014
0015
0016 global u_fb,u_mb,u_pb
0017
0018 u_fb = 1
0019 u_mb = 1e12*u_fb
0020 u_pb = 1e3*u_fb
0021
0022 global n_gen
0023 n_gen = 10e6
0024
0025
0026
0027 def UprootLoad(files=[], treename="", branches=[]):
0028 global n_gen
0029 print(f"::UprootLoad({files}")
0030 executor = ThreadPoolExecutor(8)
0031
0032
0033
0034
0035
0036
0037 filenames = glob.glob(files[0])
0038
0039 data = pd.DataFrame(columns=branches)
0040
0041 for filename in filenames:
0042 tree = uproot.open(filename)[treename]
0043 data = pd.concat([data, tree.arrays(branches, namedecode='utf-8', outputtype=pd.DataFrame)])
0044
0045 n_gen = len(data)
0046
0047 print(f"Originally generated number of collisions: {n_gen}")
0048
0049 print("Exploding the DataFrame...")
0050
0051
0052 print("Completed exploding the DataFrame")
0053
0054 return data
0055
0056 def TotalXSection(process='CC_DIS_e18_p275'):
0057 global u_mb
0058 if process == 'CC_DIS_e18_p275':
0059 return 2.408e-08*u_mb
0060 elif process == 'CC_DIS_e10_p100_CT18NNLO':
0061 return 5.44044e-09*u_mb
0062 elif process == 'CC_DIS_e10_p275_CT18NNLO':
0063 return 1.47637e-08*u_mb
0064 elif process == 'CC_DIS_e10_p275_CT1820Rs2':
0065 return 1.47637e-08*u_mb
0066 elif process == 'CC_DIS_e10_p100_CT1820Rs2':
0067 return 5.44842e-09*u_mb
0068 elif process == 'CC_DIS_e10_p275_CT1821Rs2':
0069 return 1.45884e-08*u_mb
0070 elif process == 'CC_DIS_e10_p100_CT1821Rs2':
0071 return 5.36006e-09*u_mb
0072 elif process == 'NC_DIS_e18_p275':
0073 return 3.671e-06*u_mb
0074
0075 print(f"EICAnalysisTools::TotalXSection == Unable to find requested process, {process}.")
0076
0077 sys.exit(-1)
0078 return 0.0
0079
0080
0081
0082 def DISBjorkenX(row):
0083 """
0084 pProton = branchParticle.At(0).P4(); #these numbers 0 , 3, 5 are hardcoded in Pythia8
0085 pleptonIn = branchParticle.At(3).P4();
0086 pleptonOut = branchParticle.At(5).P4();
0087 pPhoton = pleptonIn - pleptonOut;
0088
0089 #Q2, W2, Bjorken x, y, nu.
0090 Q2 = -pPhoton.M2()
0091 W2 = (pProton + pPhoton).M2()
0092 x = Q2 / (2. * pProton.Dot(pPhoton))
0093 y = (pProton.Dot(pPhoton)) / (pProton.Dot(pleptonIn))
0094 """
0095
0096 particles_px = row['Particle.Px']
0097 particles_py = row['Particle.Py']
0098 particles_pz = row['Particle.Pz']
0099 particles_E = row['Particle.E']
0100
0101
0102 iproton = 0
0103 ilepton1 = 3
0104 ilepton2 = 5
0105
0106 pProton = TLorentzVector(particles_px[iproton], particles_py[iproton], particles_pz[iproton], particles_E[iproton])
0107 pleptonIn = TLorentzVector(particles_px[ilepton1], particles_py[ilepton1], particles_pz[ilepton1], particles_E[ilepton1])
0108 pleptonOut = TLorentzVector(particles_px[ilepton2], particles_py[ilepton2], particles_pz[ilepton2], particles_E[ilepton2])
0109 pPhoton = pleptonIn - pleptonOut
0110
0111 Q2 = -pPhoton.mag2
0112 W2 = (pProton + pPhoton).mag2
0113 x = Q2 / (2. * pProton.dot(pPhoton))
0114
0115
0116 x_array = np.ones(len(row['jet_pt'])) * x
0117
0118 return x_array
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128 def DifferentialTaggingYield(df, x='jet_pt', xrange=[10,50], xbins=[10,12.5,15,20,25,30,40,50], which='all', process='CC_DIS_e18_p275', target_lumi = 100, taggers="jet_sip3dtag"):
0129 global u_fb, n_gen
0130 print(f"n_gen = {n_gen}")
0131
0132 jet_flavor = np.concatenate(df['jet_flavor'].to_numpy()).ravel()
0133 jet_tag = np.concatenate(df['jet_sip3dtag'].to_numpy()).ravel()
0134
0135 jet_x = np.concatenate(df[x].to_numpy()).ravel()
0136
0137
0138
0139
0140
0141
0142 if which == 'charm':
0143 jet_x = jet_x[ (jet_tag == 1) & (jet_flavor == 4) ]
0144 elif which == 'light':
0145 jet_x = jet_x[ (jet_tag == 1) & ((jet_flavor < 4) | (jet_flavor == 21)) ]
0146
0147
0148 (counts, bins) = np.histogram(jet_x, range=xrange,
0149 bins=xbins)
0150
0151
0152 bin_widths = np.diff(bins)
0153 bin_centers = bins[:-1] + bin_widths/2
0154
0155 errors = np.sqrt(counts)
0156 rel_errors = errors/counts
0157
0158
0159
0160 dsigma_dx = counts * TotalXSection(process) * target_lumi*u_fb**(-1) / n_gen
0161 dsigma_dx_errors = rel_errors * dsigma_dx
0162
0163
0164 print(f"========= Validation Information from DifferentialTaggingYield ========= \n \
0165 \n \
0166 Process considered: {process} \n \
0167 Theory Cross-Section: {TotalXSection(process)} \n \
0168 Target Integrated Luminosity: {target_lumi} \n \
0169 Total Predicted Yield of Events: {np.sum(dsigma_dx)} \ \
0170 \n \
0171 ========= Validation Information from DifferentialTaggingYield =========")
0172
0173
0174 return (bin_centers, bin_widths, dsigma_dx, dsigma_dx_errors)
0175
0176
0177
0178 def DifferentialTaggingEfficiency(df, x='jet_pt', xrange=[10,50], xbins=[10,12.5,15,20,25,30,40,50], which='all', taggers="jet_sip3dtag"):
0179
0180 jet_flavor = np.concatenate(df['jet_flavor'].to_numpy()).ravel()
0181 jet_tag = np.concatenate(df['jet_sip3dtag'].to_numpy()).ravel()
0182
0183 jet_x = np.concatenate(df[x].to_numpy()).ravel()
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194
0195
0196 if which == 'charm':
0197 jet_x = jet_x[ jet_flavor == 4 ]
0198 jet_tag = jet_tag[ jet_flavor == 4 ]
0199 elif which == 'light':
0200 jet_x = jet_x[ ((jet_flavor < 4) | (jet_flavor == 21)) ]
0201 jet_tag = jet_tag[ ((jet_flavor < 4) | (jet_flavor == 21)) ]
0202
0203 (tru_counts, bins) = np.histogram(jet_x, range=xrange,
0204 bins=xbins)
0205
0206 (tag_counts, bins) = np.histogram(jet_x[ jet_tag == 1 ], range=xrange,
0207 bins=xbins)
0208
0209
0210 eff = tag_counts/tru_counts
0211 n_err = scipy.stats.binom.interval(1.0-math.exp(-1), tru_counts, eff)
0212
0213 eff_err=[np.fabs(n_err[0]/tru_counts-eff), np.fabs(n_err[1]/tru_counts-eff)]
0214
0215
0216 bin_widths = np.diff(bins)
0217 bin_centers = bins[:-1] + bin_widths/2
0218
0219 print(f"========= Validation Information from DifferentialTaggingEfficiency =========")
0220 print(eff)
0221 print(eff_err)
0222 print(f"Tagging Efficiency above X threshold for {which}:")
0223 for ibin in np.arange(len(bins)-1):
0224
0225
0226
0227 n_tag = np.sum(tag_counts[ibin:])
0228 n_tru = np.sum(tru_counts[ibin:])
0229 eff_above_x = n_tag/n_tru
0230 err_above_x = np.nanmean([np.fabs(np.sum(n_err[0][ibin:])/n_tru-eff_above_x), np.fabs(np.sum(n_err[1][ibin:])/n_tru-eff_above_x)])
0231
0232 print(f"Minimum X: {bins[ibin]:.1f} Tag. Efficiency: ({eff_above_x*100:.3f} +/- {err_above_x*100:.3f})%")
0233
0234
0235 print(f"========= Validation Information from DifferentialTaggingEfficiency =========")
0236
0237 return (bin_centers, bin_widths, eff, eff_err)
0238
0239