File indexing completed on 2024-09-28 07:03:08
0001 import sys
0002 import pandas as pd
0003 import numpy as np
0004 import uproot
0005 import scipy
0006 import math
0007 from uproot_methods import *
0008 from concurrent.futures import ThreadPoolExecutor
0009
0010
0011
0012
0013
0014
0015 global u_fb,u_mb,u_pb
0016
0017 u_fb = 1
0018 u_mb = 1e12*u_fb
0019 u_pb = 1e3*u_fb
0020
0021
0022
0023 def UprootLoad(files=[], treename="", branches=[]):
0024 print(f"::UprootLoad({files}")
0025 executor = ThreadPoolExecutor(8)
0026
0027 df_list = []
0028 data = uproot.pandas.iterate(files,
0029 treename,
0030 branches=branches,
0031 flatten=False,
0032 executor=executor)
0033 for dataframe in data:
0034 df_list.append(dataframe)
0035
0036 df = pd.concat(df_list)
0037
0038 return df
0039
0040 def TotalXSection(process='CC_DIS_e18_p275'):
0041 global u_mb
0042 if process == 'CC_DIS_e18_p275':
0043 return 2.408e-08*u_mb
0044 elif process == 'CC_DIS_e10_p100_CT18NNLO':
0045 return 5.44044e-09*u_mb
0046 elif process == 'CC_DIS_e10_p275_CT18NNLO':
0047 return 1.47637e-08*u_mb
0048 elif process == 'CC_DIS_e10_p275_CT1820Rs2':
0049 return 1.47637e-08*u_mb
0050 elif process == 'CC_DIS_e10_p100_CT1820Rs2':
0051 return 5.44842e-09*u_mb
0052 elif process == 'CC_DIS_e10_p275_CT1821Rs2':
0053 return 1.45884e-08*u_mb
0054 elif process == 'CC_DIS_e10_p100_CT1821Rs2':
0055 return 5.36006e-09*u_mb
0056 elif process == 'NC_DIS_e18_p275':
0057 return 3.671e-06*u_mb
0058
0059 print(f"EICAnalysisTools::TotalXSection == Unable to find requested process, {process}.")
0060
0061 sys.exit(-1)
0062 return 0.0
0063
0064
0065
0066 def DISBjorkenX(row):
0067 """
0068 pProton = branchParticle.At(0).P4(); #these numbers 0 , 3, 5 are hardcoded in Pythia8
0069 pleptonIn = branchParticle.At(3).P4();
0070 pleptonOut = branchParticle.At(5).P4();
0071 pPhoton = pleptonIn - pleptonOut;
0072
0073 #Q2, W2, Bjorken x, y, nu.
0074 Q2 = -pPhoton.M2()
0075 W2 = (pProton + pPhoton).M2()
0076 x = Q2 / (2. * pProton.Dot(pPhoton))
0077 y = (pProton.Dot(pPhoton)) / (pProton.Dot(pleptonIn))
0078 """
0079
0080 particles_px = row['Particle.Px']
0081 particles_py = row['Particle.Py']
0082 particles_pz = row['Particle.Pz']
0083 particles_E = row['Particle.E']
0084
0085
0086 iproton = 0
0087 ilepton1 = 3
0088 ilepton2 = 5
0089
0090 pProton = TLorentzVector(particles_px[iproton], particles_py[iproton], particles_pz[iproton], particles_E[iproton])
0091 pleptonIn = TLorentzVector(particles_px[ilepton1], particles_py[ilepton1], particles_pz[ilepton1], particles_E[ilepton1])
0092 pleptonOut = TLorentzVector(particles_px[ilepton2], particles_py[ilepton2], particles_pz[ilepton2], particles_E[ilepton2])
0093 pPhoton = pleptonIn - pleptonOut
0094
0095 Q2 = -pPhoton.mag2
0096 W2 = (pProton + pPhoton).mag2
0097 x = Q2 / (2. * pProton.dot(pPhoton))
0098
0099
0100 x_array = np.ones(len(row['Jet.PT'])) * x
0101
0102 return x_array
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112 def DifferentialTaggingYield(df, x='Jet.PT', xrange=[10,50], xbins=[10,12.5,15,20,25,30,40,50], which='all', process='CC_DIS_e18_p275', target_lumi = 100):
0113 global u_fb
0114 n_gen = len(df)
0115 print(f"n_gen = {n_gen}")
0116 jet_pt = np.concatenate(df['Jet.PT'].to_numpy()).ravel()
0117 jet_eta = np.concatenate(df['Jet.Eta'].to_numpy()).ravel()
0118 jet_flavor = np.concatenate(df['Jet.Flavor'].to_numpy()).ravel()
0119 jet_tag = np.concatenate(df['Jet.BTag'].to_numpy()).ravel()
0120
0121 jet_x = np.concatenate(df[x].to_numpy()).ravel()
0122
0123
0124
0125 jet_basics = (jet_tag == 1)
0126
0127 all_flavor = ( jet_flavor > -999.0 ) & (jet_basics)
0128 light_flavor = (( jet_flavor < 4 ) | ( jet_flavor == 21 )) & (jet_basics)
0129 charm_flavor = ( jet_flavor == 4 ) & (jet_basics)
0130
0131 selection = all_flavor
0132 if which == 'charm':
0133 selection = charm_flavor
0134 elif which == 'light':
0135 selection = light_flavor
0136
0137 (counts, bins) = np.histogram(jet_x[ selection ], range=xrange,
0138 bins=xbins)
0139
0140
0141 bin_widths = np.diff(bins)
0142 bin_centers = bins[:-1] + bin_widths/2
0143
0144 errors = np.sqrt(counts)
0145 rel_errors = errors/counts
0146
0147
0148
0149 dsigma_dx = counts * TotalXSection(process) * target_lumi*u_fb**(-1) / n_gen
0150 dsigma_dx_errors = rel_errors * dsigma_dx
0151
0152
0153 print(f"========= Validation Information from DifferentialTaggingYield ========= \n \
0154 \n \
0155 Process considered: {process} \n \
0156 Theory Cross-Section: {TotalXSection(process)} \n \
0157 Target Integrated Luminosity: {target_lumi} \n \
0158 Total Predicted Yield of Events: {np.sum(dsigma_dx)} \ \
0159 \n \
0160 ========= Validation Information from DifferentialTaggingYield =========")
0161
0162
0163 return (bin_centers, bin_widths, dsigma_dx, dsigma_dx_errors)
0164
0165
0166
0167 def DifferentialTaggingEfficiency(df, x='Jet.PT', xrange=[10,50], xbins=[10,12.5,15,20,25,30,40,50], which='all'):
0168 jet_pt = np.concatenate(df['Jet.PT'].to_numpy()).ravel()
0169 jet_eta = np.concatenate(df['Jet.Eta'].to_numpy()).ravel()
0170 jet_flavor = np.concatenate(df['Jet.Flavor'].to_numpy()).ravel()
0171 jet_tag = np.concatenate(df['Jet.BTag'].to_numpy()).ravel()
0172
0173 jet_x = np.concatenate(df[x].to_numpy()).ravel()
0174
0175
0176 jet_tagged = (jet_tag == 1)
0177
0178 all_flavor = ( jet_flavor > -999.0 )
0179 light_flavor = ( jet_flavor < 4 ) | ( jet_flavor == 21 )
0180 charm_flavor = ( jet_flavor == 4 )
0181
0182 selection = all_flavor
0183 if which == 'charm':
0184 selection = charm_flavor
0185 elif which == 'light':
0186 selection = light_flavor
0187
0188 (tru_counts, bins) = np.histogram(jet_x[ selection ], range=xrange,
0189 bins=xbins)
0190
0191 (tag_counts, bins) = np.histogram(jet_x[ (selection) & (jet_tagged) ], range=xrange,
0192 bins=xbins)
0193
0194
0195 eff = tag_counts/tru_counts
0196 n_err = scipy.stats.binom.interval(1.0-math.exp(-1), tru_counts, eff)
0197
0198 eff_err=[np.fabs(n_err[0]/tru_counts-eff), np.fabs(n_err[1]/tru_counts-eff)]
0199
0200
0201 bin_widths = np.diff(bins)
0202 bin_centers = bins[:-1] + bin_widths/2
0203
0204 print(f"========= Validation Information from DifferentialTaggingEfficiency =========")
0205 print(eff)
0206 print(eff_err)
0207 print(f"Tagging Efficiency above X threshold for {which}:")
0208 for ibin in np.arange(len(bins)-1):
0209
0210
0211
0212 n_tag = np.sum(tag_counts[ibin:])
0213 n_tru = np.sum(tru_counts[ibin:])
0214 eff_above_x = n_tag/n_tru
0215 err_above_x = np.nanmean([np.fabs(np.sum(n_err[0][ibin:])/n_tru-eff_above_x), np.fabs(np.sum(n_err[1][ibin:])/n_tru-eff_above_x)])
0216
0217 print(f"Minimum X: {bins[ibin]:.1f} Tag. Efficiency: ({eff_above_x*100:.3f} +/- {err_above_x*100:.3f})%")
0218
0219
0220 print(f"========= Validation Information from DifferentialTaggingEfficiency =========")
0221
0222 return (bin_centers, bin_widths, eff, eff_err)
0223
0224