Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2024-09-28 07:03:08

0001 import sys
0002 import pandas as pd
0003 import numpy as np
0004 import uproot
0005 import scipy
0006 import math
0007 from uproot_methods import *
0008 from concurrent.futures import ThreadPoolExecutor
0009 
0010 
0011 # Define useful variables
0012 
0013 ## Units
0014 
0015 global u_fb,u_mb,u_pb
0016 
0017 u_fb = 1
0018 u_mb = 1e12*u_fb
0019 u_pb = 1e3*u_fb
0020 
0021 
0022 
0023 def UprootLoad(files=[], treename="", branches=[]):
0024     print(f"::UprootLoad({files}")
0025     executor = ThreadPoolExecutor(8)
0026 
0027     df_list = []
0028     data = uproot.pandas.iterate(files, 
0029                                  treename, 
0030                                  branches=branches,
0031                                  flatten=False,
0032                                  executor=executor)
0033     for dataframe in data:
0034         df_list.append(dataframe)
0035     
0036     df = pd.concat(df_list)
0037 
0038     return df
0039 
0040 def TotalXSection(process='CC_DIS_e18_p275'):
0041     global u_mb
0042     if process == 'CC_DIS_e18_p275':
0043         return 2.408e-08*u_mb
0044     elif process == 'CC_DIS_e10_p100_CT18NNLO':
0045         return 5.44044e-09*u_mb
0046     elif process == 'CC_DIS_e10_p275_CT18NNLO':
0047         return 1.47637e-08*u_mb
0048     elif process == 'CC_DIS_e10_p275_CT1820Rs2':
0049         return 1.47637e-08*u_mb
0050     elif process == 'CC_DIS_e10_p100_CT1820Rs2':
0051         return 5.44842e-09*u_mb
0052     elif process == 'CC_DIS_e10_p275_CT1821Rs2':
0053         return 1.45884e-08*u_mb
0054     elif process == 'CC_DIS_e10_p100_CT1821Rs2':
0055         return 5.36006e-09*u_mb
0056     elif process == 'NC_DIS_e18_p275':
0057         return 3.671e-06*u_mb
0058     
0059     print(f"EICAnalysisTools::TotalXSection == Unable to find requested process, {process}.")
0060     # Don't continue. This is bad. Nonsense will result. The user should fix this.
0061     sys.exit(-1)
0062     return 0.0
0063 
0064 
0065 # Compute auxiliary information for a DataFrame
0066 def DISBjorkenX(row):
0067     """
0068     pProton      = branchParticle.At(0).P4(); #these numbers 0 , 3, 5 are hardcoded in Pythia8
0069     pleptonIn    = branchParticle.At(3).P4();
0070     pleptonOut   = branchParticle.At(5).P4();
0071     pPhoton      = pleptonIn - pleptonOut;
0072   
0073     #Q2, W2, Bjorken x, y, nu.
0074     Q2 = -pPhoton.M2()
0075     W2 = (pProton + pPhoton).M2()
0076     x = Q2 / (2. * pProton.Dot(pPhoton))
0077     y = (pProton.Dot(pPhoton)) / (pProton.Dot(pleptonIn))
0078     """
0079 
0080     particles_px = row['Particle.Px']
0081     particles_py = row['Particle.Py']
0082     particles_pz = row['Particle.Pz']
0083     particles_E = row['Particle.E']
0084 
0085     # Beam Particle 4-vectors
0086     iproton = 0
0087     ilepton1 = 3
0088     ilepton2 = 5
0089     
0090     pProton = TLorentzVector(particles_px[iproton], particles_py[iproton], particles_pz[iproton], particles_E[iproton])
0091     pleptonIn = TLorentzVector(particles_px[ilepton1], particles_py[ilepton1], particles_pz[ilepton1], particles_E[ilepton1])
0092     pleptonOut = TLorentzVector(particles_px[ilepton2], particles_py[ilepton2], particles_pz[ilepton2], particles_E[ilepton2])
0093     pPhoton = pleptonIn - pleptonOut
0094 
0095     Q2 = -pPhoton.mag2
0096     W2 = (pProton + pPhoton).mag2
0097     x = Q2 / (2. * pProton.dot(pPhoton))
0098 
0099     # Write this value of x for each jet, since we need to do jet-level plots
0100     x_array = np.ones(len(row['Jet.PT'])) * x
0101 
0102     return x_array
0103 
0104 
0105 
0106 
0107 ###################################################################
0108 # Differential Computation Functions
0109 ###################################################################
0110 
0111 # Flavour tagging study code
0112 def DifferentialTaggingYield(df, x='Jet.PT', xrange=[10,50], xbins=[10,12.5,15,20,25,30,40,50], which='all', process='CC_DIS_e18_p275', target_lumi = 100):
0113     global u_fb
0114     n_gen = len(df)
0115     print(f"n_gen = {n_gen}")
0116     jet_pt = np.concatenate(df['Jet.PT'].to_numpy()).ravel()
0117     jet_eta = np.concatenate(df['Jet.Eta'].to_numpy()).ravel()
0118     jet_flavor = np.concatenate(df['Jet.Flavor'].to_numpy()).ravel()
0119     jet_tag = np.concatenate(df['Jet.BTag'].to_numpy()).ravel()
0120 
0121     jet_x = np.concatenate(df[x].to_numpy()).ravel()
0122 
0123 
0124     #jet_basics = (0.01 < jet_eta) & (jet_eta < 0.9)
0125     jet_basics = (jet_tag == 1)
0126     
0127     all_flavor = ( jet_flavor > -999.0 ) & (jet_basics)
0128     light_flavor = (( jet_flavor < 4 ) | ( jet_flavor == 21 )) & (jet_basics)
0129     charm_flavor = ( jet_flavor == 4 ) & (jet_basics)
0130 
0131     selection = all_flavor
0132     if which == 'charm':
0133         selection = charm_flavor
0134     elif which == 'light':
0135         selection = light_flavor
0136 
0137     (counts, bins) = np.histogram(jet_x[ selection ], range=xrange, 
0138                                   bins=xbins)
0139                                   #bins=40)
0140 
0141     bin_widths = np.diff(bins)
0142     bin_centers = bins[:-1] + bin_widths/2
0143 
0144     errors = np.sqrt(counts)
0145     rel_errors = errors/counts
0146 
0147 
0148     # convert counts to dsigma/dpT * 100/fb
0149     dsigma_dx = counts * TotalXSection(process)  * target_lumi*u_fb**(-1) / n_gen #/ bin_widths
0150     dsigma_dx_errors = rel_errors * dsigma_dx
0151 
0152 
0153     print(f"========= Validation Information from DifferentialTaggingYield ========= \n \
0154     \n \
0155     Process considered:              {process} \n \
0156     Theory Cross-Section:            {TotalXSection(process)} \n \
0157     Target Integrated Luminosity:    {target_lumi} \n \
0158     Total Predicted Yield of Events: {np.sum(dsigma_dx)} \ \
0159     \n \
0160     ========= Validation Information from DifferentialTaggingYield =========")
0161           
0162     
0163     return (bin_centers, bin_widths, dsigma_dx, dsigma_dx_errors)
0164 
0165 
0166 
0167 def DifferentialTaggingEfficiency(df, x='Jet.PT', xrange=[10,50], xbins=[10,12.5,15,20,25,30,40,50], which='all'):
0168     jet_pt = np.concatenate(df['Jet.PT'].to_numpy()).ravel()
0169     jet_eta = np.concatenate(df['Jet.Eta'].to_numpy()).ravel()
0170     jet_flavor = np.concatenate(df['Jet.Flavor'].to_numpy()).ravel()
0171     jet_tag = np.concatenate(df['Jet.BTag'].to_numpy()).ravel()
0172 
0173     jet_x = np.concatenate(df[x].to_numpy()).ravel()
0174 
0175 
0176     jet_tagged = (jet_tag == 1)
0177     
0178     all_flavor = ( jet_flavor > -999.0 ) 
0179     light_flavor = ( jet_flavor < 4 ) | ( jet_flavor == 21 )
0180     charm_flavor = ( jet_flavor == 4 ) 
0181 
0182     selection = all_flavor
0183     if which == 'charm':
0184         selection = charm_flavor
0185     elif which == 'light':
0186         selection = light_flavor
0187 
0188     (tru_counts, bins) = np.histogram(jet_x[ selection ], range=xrange, 
0189                                       bins=xbins)
0190 
0191     (tag_counts, bins) = np.histogram(jet_x[ (selection) & (jet_tagged) ], range=xrange, 
0192                                       bins=xbins)
0193 
0194 
0195     eff = tag_counts/tru_counts
0196     n_err = scipy.stats.binom.interval(1.0-math.exp(-1), tru_counts, eff) 
0197 
0198     eff_err=[np.fabs(n_err[0]/tru_counts-eff), np.fabs(n_err[1]/tru_counts-eff)]
0199 
0200 
0201     bin_widths = np.diff(bins)
0202     bin_centers = bins[:-1] + bin_widths/2
0203 
0204     print(f"========= Validation Information from DifferentialTaggingEfficiency =========")
0205     print(eff)
0206     print(eff_err)
0207     print(f"Tagging Efficiency above X threshold for {which}:")
0208     for ibin in np.arange(len(bins)-1):
0209         #mean_eff = np.nanmean(eff[ibin:])
0210         #avg_errors = (eff_err[0]+eff_err[1])/2.0
0211         #mean_err = np.sqrt(np.sum((avg_errors[ibin:]/eff[ibin:])**2))*mean_eff
0212         n_tag = np.sum(tag_counts[ibin:])
0213         n_tru = np.sum(tru_counts[ibin:])
0214         eff_above_x = n_tag/n_tru
0215         err_above_x = np.nanmean([np.fabs(np.sum(n_err[0][ibin:])/n_tru-eff_above_x), np.fabs(np.sum(n_err[1][ibin:])/n_tru-eff_above_x)])
0216         #print(f"Minimum X: {bins[ibin]:.1f}   Tag. Efficiency: ({mean_eff*100:.3f} +/- {mean_err*100:.3f})%")
0217         print(f"Minimum X: {bins[ibin]:.1f}   Tag. Efficiency: ({eff_above_x*100:.3f} +/- {err_above_x*100:.3f})%")
0218     
0219 
0220     print(f"========= Validation Information from DifferentialTaggingEfficiency =========")
0221 
0222     return (bin_centers, bin_widths, eff, eff_err)
0223 
0224