PerformanceStudy/SinglePion/run_performance_study.py

0001 '''
0002     check tracking eff, resol for EIC-ePIC single particle simulation.
0003     input: recon rootfiles with default name format: rec_{setting}_eta_{eta_list[ii]:g}_{eta_list[ii+1]:g}_{mom}GeV_{nev}.root
0004     Shujie Li, Sept 2024
0005 '''
0006
0007 ## this block of functions are copied from epic_analysis.ipynb
0008 from matplotlib.backends.backend_pdf import PdfPages
0009 from lmfit.models import GaussianModel
0010
0011 from importlib import reload
0012 import os
0013 import sys
0014 import types
0015 import pandas as pd
0016 import scipy
0017 from scipy.signal import find_peaks
0018 pd.set_option('display.max_rows', 500)
0019 pd.options.display.max_rows = 40
0020 pd.options.display.min_rows = 20
0021 pd.options.display.max_columns = 100
0022
0023 import awkward as ak
0024 # import ROOT
0025 import uproot as ur
0026 print('Uproot version: ' + ur.__version__) ## this script assumed version 4
0027 ur.default_library="pd" ## does not work???
0028 from scipy import stats
0029 import numpy as np
0030 import argparse
0031 from matplotlib import pyplot as plt
0032 from matplotlib.gridspec import GridSpec
0033 import matplotlib.ticker as ticker
0034 import matplotlib.cm as cm
0035 import matplotlib as mpl
0036 import matplotlib.pylab as plt
0037 plt.rcParams['figure.figsize'] = [8.0, 6.0]
0038 plt.rcParams['ytick.direction'] = 'in'
0039 plt.rcParams['xtick.direction'] = 'in'
0040 plt.rcParams['xaxis.labellocation'] = 'right'
0041 plt.rcParams['yaxis.labellocation'] = 'top'
0042 SMALL_SIZE = 10
0043 MEDIUM_SIZE = 12
0044 BIGGER_SIZE = 16
0045 plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
0046 plt.rc('axes', titlesize=MEDIUM_SIZE)     # fontsize of the axes title
0047 plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
0048 plt.rc('xtick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
0049 plt.rc('ytick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
0050 plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
0051 plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title]
0052
0053 deg2rad = np.pi/180.0
0054 ## convert theta to eta
0055 def theta2eta(xx, inverse=0):
0056     xx = np.array(xx)
0057     if inverse==1:
0058         return np.arctan((np.e)**(-xx))*2
0059     else:
0060         return -np.log(np.tan(xx/2.))
0061
0062 ## read a root tree with uproot. provide s3_dir to read from a remote server, xrootd required.
0063 # dir = 'EPIC/RECO/23.11.0/epic_craterlake/DIS/NC/18x275/minQ2=10/'
0064 # file = 'pythia8NCDIS_18x275_minQ2=10_beamEffects_xAngle=-0.025_hiDiv_1.0000.eicrecon.tree.edm4eic.root'
0065 def read_ur(fname, tname, s3_dir=""):
0066     if len(s3_dir)>1: # read from JLab server
0067         server = 'root://dtn-eic.jlab.org//work/eic2/'
0068         fname = server+s3_dir+fname
0069     tree     = ur.open(fname)[tname]
0070     print(f"read_ur: read {fname}:{tname}. {tree.num_entries} events in total")
0071     return tree
0072
0073 ## read a branch from uproot tree with an option to flatten, return pandas dataframe
0074 #  use kflatten=0 if want to get the nested dataframe
0075 #  iev: event index, default=-1: get all events
0076 def get_branch(tree,bname="",kflatten=1):
0077     if bname not in tree.keys():
0078         sys.exit("ERROR(get_branch): can't find branch "+bname)
0079     df    = tree[bname].array(library="ak")
0080     df    = ak.to_dataframe(df)
0081     if isinstance(df,pd.Series):
0082         return df #df.to_frame(name=bname.split("_")[-1])
0083
0084     colls = df.columns
0085     if len(colls)<1:
0086         print('cannot find any leaf under branch {}'.format(bname))
0087         return pd.DataFrame()
0088
0089     #remove prefix in name
0090     cols = [str(c) for c in colls if str(c).startswith('{}.'.format(bname))]
0091     #if it's an array member, the only column is "value"
0092     if not cols:
0093         return df
0094     # drop nested entry
0095     for cname in list(cols):
0096         if "[" in cname:
0097             cols.remove(cname) ## can not convert array to python. drop for now
0098         elif "covariance.covariance" in cname: ## for TrackParameters
0099             cols.remove(cname)
0100     # rename and flat
0101     df = df[cols]
0102     df.rename(columns={c: c.replace(bname + '.', '') for c in df.columns}, inplace=True)
0103     if kflatten:
0104         df   = df.reset_index()
0105     # df.rename(columns={c: c[0].upper() + c[1:] for c in df.columns}, inplace=True)
0106     return  df
0107
0108 def hist_gaus(dataset, ax, bins=100, klog=0, header=None):
0109     ## select data in range if bins is provided as an array
0110     if not np.isscalar(bins):
0111         c1 = dataset <= bins[-1]
0112         c2 = dataset >= bins[0]
0113         dataset = dataset[c1 & c2]
0114     ## ----1st fit------
0115     n, bins, _ = ax.hist(dataset, bins, density=False, facecolor='b', alpha=0.3)
0116     xx    = bins[0:-1] + (bins[1] - bins[0]) / 2.0
0117     ymax  = np.max(n)
0118     std   = np.std(dataset)
0119     mean  = np.mean(dataset)
0120
0121     c1 = xx <= (mean + 2 * std)
0122     c2 = xx >= (mean - 2 * std)
0123     cond  = c1 & c2
0124
0125     ii = 0
0126     while len(n[cond]) < len(bins) / 2.0:
0127         # ax.cla()
0128         ax.clear()
0129         diff = (bins[-1] - bins[0]) / 2.0 / 2.0
0130         n, bins, _ = ax.hist(
0131             dataset,
0132             np.linspace(
0133                 bins[0] + diff,
0134                 bins[-1] - diff,
0135                 len(bins)
0136             ),
0137             density=False,
0138             facecolor='b',
0139             alpha=0.3
0140         )
0141         xx = bins[0:-1] + (bins[1]-bins[0]) / 2.0
0142         c1 = xx <= (mean + 2 * std)
0143         c2 = xx >= (mean - 2 * std)
0144         cond = c1 & c2
0145         ii += 1
0146         if ii > 10:
0147             print("ERROR(hist_gaus): can not adjust the range.")
0148             return -1, -1, -1
0149
0150     model = GaussianModel()
0151     # create parameters with initial guesses:
0152     params = model.make_params(center=np.median(xx[cond]), amplitude=np.max(n), sigma=np.std(xx[cond]))
0153     result = model.fit(n, params, x=xx)
0154
0155     # -----2nd fit--------
0156     std = result.params['sigma']
0157     # std = result.params['sigma'].get_value
0158     # print(mean,std)
0159     c1 = xx <= (mean + 2 * std)
0160     c2 = xx >= (mean - 2 * std)
0161     cond = c1 & c2
0162     if len(xx[cond]) < 10:
0163         print("Fit failed")
0164         return -1, -1, -1
0165     model = GaussianModel()
0166     params = model.make_params(center=np.median(xx[cond]), amplitude=np.max(n[cond]), sigma=np.std(xx[cond]))
0167     try:
0168         result = model.fit(n[cond], params, x=xx[cond])
0169     except TypeError:
0170         print("Fit failed")
0171         return -1, -1,-1
0172     if result.params['sigma'].stderr ==  None:
0173         print("Fit failed")
0174         return -1, -1, -1
0175     #     print(result.fit_report())
0176
0177     #     print (result.best_values)
0178     # plt.plot(xx, result.best_fit, 'r-', label='best fit')
0179     rv_mean = float(result.params['center'])
0180     rv_std = float(result.params['sigma'])
0181     rv_std_err = float(result.params['sigma'].stderr)
0182     rv_ampl = float(result.params['amplitude'])
0183
0184     if len(result.best_fit) > 0:
0185         ax.plot(xx[cond], result.best_fit, 'r-', label='sigma=%g, err=%g' %(result.params['sigma'], result.params['sigma'].stderr))
0186
0187     ax.legend(title=header, frameon=False, loc='upper left')
0188
0189     ymax  = np.max(n)
0190     if klog:
0191         ax.set_yscale('log')
0192         ax.set_ylim(1, ymax * 10)
0193     else:
0194         ax.set_ylim(0, ymax * 1.3)
0195     return float(result.params['sigma']), float(result.params['sigma'].stderr)
0196     # return float(result.params['center']), float(result.params['sigma']), float(result.params['sigma'].stderr)
0197
0198
0199 def hist_gaus(dataset,ax, bins=100,klog=0,header=None):
0200     ## select data in range if bins is provided as an array
0201     if not np.isscalar(bins):
0202         c1 = dataset<=bins[-1]
0203         c2 = dataset>=bins[0]
0204         dataset=dataset[c1&c2]
0205     ## ----1st fit------
0206     n, bins, patches = ax.hist(dataset, bins,density=False, facecolor='b', alpha=0.3)
0207     xx    = bins[0:-1]+(bins[1]-bins[0])/2.0
0208     ymax  = np.max(n)
0209     std   = np.std(dataset)
0210     mean  = np.mean(dataset)
0211     c1 = xx<=(mean+2*std)
0212     c2 = xx>=(mean-2*std)
0213     cond  = c1&c2
0214
0215     ii=0
0216     while len(n[cond])<(len(bins)/2.0):
0217         # ax.cla()
0218         ax.clear()
0219         diff = (bins[-1]-bins[0])/2.0/2.0
0220         n, bins, patches = ax.hist(dataset, np.linspace(bins[0]+diff,bins[-1]-diff,len(bins)),density=False, facecolor='b', alpha=0.3)
0221         xx    = bins[0:-1]+(bins[1]-bins[0])/2.0
0222         c1 = xx<=(mean+2*std)
0223         c2 = xx>=(mean-2*std)
0224         cond  = c1&c2
0225         ii+=1
0226         if ii>5:
0227             print("ERROR(hist_gaus): can not adjust the range.")
0228             return -1,-1
0229
0230     model = GaussianModel()
0231     # create parameters with initial guesses:
0232     params = model.make_params(center=np.median(xx[cond]), amplitude=np.max(n), sigma=np.std(xx[cond]))
0233     result = model.fit(n, params, x=xx)
0234
0235     # -----2nd fit--------
0236     std = result.params['sigma'].value
0237     # print(mean,std)
0238     c1 = xx<=(mean+2*std)
0239     c2 = xx>=(mean-2*std)
0240     cond = c1&c2
0241     if len(xx[cond])<10:
0242         print("Fit failed")
0243         return -1, -1
0244     model = GaussianModel()
0245     params = model.make_params(center=np.median(xx[cond]), amplitude=np.max(n[cond]), sigma=np.std(xx[cond]))
0246     try:
0247         result = model.fit(n[cond], params, x=xx[cond])
0248     except TypeError:
0249         print("Fit failed")
0250         return -1,-1
0251     if result.params['sigma'].stderr ==  None:
0252         print("Fit failed")
0253         return -1, -1
0254     #     print(result.fit_report())
0255
0256     #     print (result.best_values)
0257     # plt.plot(xx, result.best_fit, 'r-', label='best fit')
0258     if len(result.best_fit)>0:
0259         ax.plot(xx[cond], result.best_fit, 'r-', label='sigma=%g,err=%g' %(result.params['sigma'].value,result.params['sigma'].stderr))
0260     ax.legend(title=header, frameon=False,loc='upper left')
0261
0262     ymax  = np.max(n)
0263     if klog:
0264         ax.set_yscale('log')
0265         ax.set_ylim(1,ymax*10)
0266     else:
0267         ax.set_ylim(0,ymax*1.3)
0268     return float(result.params['sigma'].value),float(result.params['sigma'].stderr)
0269
0270
0271 def plot_eff(pion_o, pion,eta_bins=np.linspace(-4, 4, 21)):
0272     fig, ax = plt.subplots(1,1,figsize=[6,6])
0273     plt.title("")
0274     ## eff
0275     # original eta of all particle
0276     sim_eta, _ = np.histogram(pion_o['eta'].values, bins=eta_bins)
0277     # original eta of particles get reconstruted
0278     rec_eta, _ = np.histogram(pion['eta'], bins=eta_bins)
0279     track_eff_total = np.sum(rec_eta)/np.sum(sim_eta)
0280
0281     eta_centers = (eta_bins[1:] + eta_bins[:-1])/2.
0282     eta_binsize = np.mean(np.diff(eta_centers))
0283     track_eff = np.nan_to_num(np.array(rec_eta)/np.array(sim_eta))
0284
0285     # binary distribution, pq*sqrt(N)
0286     # TODO check the errors
0287     # eff = np.mean(track_eff)
0288     track_err = np.nan_to_num(track_eff*(1. - track_eff)*np.reciprocal(np.sqrt(sim_eta)))
0289     # rec_err = eff*(1. - eff)*np.sqrt(rec_eta)
0290     # track_eff_lower = track_eff - np.maximum(np.zeros(shape=rec_eta.shape), (rec_eta - rec_err)/sim_eta)
0291     # track_eff_upper = np.minimum(np.ones(shape=rec_eta.shape), (rec_eta + rec_err)/sim_eta) - track_eff
0292     track_eff_lower = track_eff - np.maximum(np.zeros(shape=rec_eta.shape), track_eff - track_err)
0293     track_eff_upper = np.minimum(np.ones(shape=rec_eta.shape), track_eff + track_err) - track_eff
0294
0295     ax.errorbar(eta_centers, track_eff, xerr=eta_binsize/2., yerr=[track_eff_lower, track_eff_upper],
0296                 fmt='o', capsize=3)
0297     ax.set_ylim(0., 1.1)
0298     ax.set_xlim(-4.5, 4.5)
0299     ax.set_ylabel('Tracking Efficiency')#, fontsize=20)
0300     ax.set_xlabel(r'$\eta$')#, fontsize=20)
0301     ax.text(-4,1.04,"recon/generated events= %d / %d =%.2f" %(len(pion),len(pion_o),len(pion)/len(pion_o)))
0302     ax.axhline(1,ls='--',color='grey')
0303     return track_eff,track_err, eta_centers, fig
0304
0305
0306 def plot_resol(pion,params):
0307     fig, axs = plt.subplots(2,2, figsize=(10,6),dpi=300)
0308     plt.title("")
0309
0310     ## calculate resolutions
0311     dp_lim=5*2 #%
0312     th_lim=0.005*2 #rad
0313     ph_lim=0.03*2
0314     dca_lim = 3*2
0315     nbins  = 200
0316
0317     # ----------------momentum resolution----------------------
0318     i  = 0
0319     ax = axs.flat[i]
0320     # obs    = 'theta'
0321     rec   = 1./np.array(params['qOverP'])
0322     sim   = np.array(pion['mom'])
0323     delta = (rec - sim)/sim*100 # in %
0324     sig_mom,err_mom=hist_gaus(delta,ax,np.linspace(-dp_lim, dp_lim, nbins+1),klog=0,header=None)
0325     ax.set_xlabel(r'$\delta p/p$ [%]')#, fontsize=20)
0326
0327     # ----------------theta resolution----------------------
0328     i+=1
0329     ax = axs.flat[i]
0330     obs    = 'theta'
0331     rec    = np.array(params[obs])
0332     sim    = np.array(pion[obs])
0333     delta    = (rec - sim)
0334     sig_th,err_th=hist_gaus(delta,ax,np.linspace(-th_lim, th_lim, nbins+1),klog=0,header=None)
0335     ax.set_xlabel(r'$d\theta$ [rad]')#, fontsize=20)
0336
0337     # ----------------phi resolution----------------------
0338     i+=1
0339     ax = axs.flat[i]
0340     obs   = 'phi'
0341     rec   = np.array(params[obs])
0342     sim   = np.array(pion[obs])
0343     delta = (rec - sim)
0344     sig_ph,err_ph=hist_gaus(delta,ax,np.linspace(-ph_lim, ph_lim, nbins+1),klog=0,header=None)
0345     ax.set_xlabel(r'$d\phi$ [rad]')#, fontsize=20)
0346
0347     # ----------------theta resolution----------------------
0348     i+=1
0349     ax = axs.flat[i]
0350     obs    = 'loc.a'
0351     rec    = np.array(params[obs])
0352     sim    = 0#np.array(pion[obs])
0353     delta    = (rec - sim)
0354     sig_dca,err_dca=hist_gaus(delta,ax,np.linspace(-dca_lim, dca_lim, nbins+1),klog=0,header=None)
0355     ax.set_xlabel(r'DCA$_r$ [mm]')#, fontsize=20)
0356     return sig_mom, err_mom, sig_th, err_th, sig_ph, err_ph, sig_dca, err_dca, fig
0357
0358
0359 ## this only works for single particle simulation, no hit-based track-particle matching
0360 ## set eff_eta_bins to [] to disable eff plots. same for resol
0361 ## degrees of 3,10,40,140,170,177 correspond to eta bin of -3.5,-2.5,-1,1,2.5,3.5
0362 def performance_plot(fname,rootfile_path,eff_eta_bins=np.linspace(-4,4,21),resol_deg_bins=np.array([3,10,40,140,170,177]),pid=211, out_dir="."):
0363     out_dir=out_dir+"/"
0364     ## read events tree
0365     tag     = fname.split('/')[-1][:-5]
0366     tree    = read_ur(fname,"events",rootfile_path)
0367
0368     bname   = "CentralCKFTrackParameters"
0369     params  = get_branch(tree,bname)
0370     ## to deal with the nested subsubentry created by the covariance column
0371     ## also keep only the first track if more than one are reconstructed
0372     params  = params[(params.subentry==0)&(params.subsubentry==0)].reset_index()
0373     params["eta"] = theta2eta(params.theta)
0374     params  = params.drop(['index','subsubentry'],axis=1)
0375
0376     bname   = "MCParticles"
0377     part    = get_branch(tree,bname)
0378     # primary particle
0379     cond1   = part.generatorStatus==1
0380     # pid (defaut=pi+)
0381     cond2   = part.PDG==pid
0382
0383     pion    = part[cond1&cond2].reset_index()
0384     x,y,z   = pion[["momentum.x","momentum.y","momentum.z"]].to_numpy().T
0385     r       = np.sqrt(x**2 + y**2 + z**2)  # Magnitude of the vector (distance to origin)
0386     pion["theta"]= np.arccos(z / r)
0387     pion["phi"]  = np.arctan2(y, x)
0388     pion["eta"]  = theta2eta(pion.theta)
0389     pion["mom"]  = r
0390     # select particles that get reconstructed
0391     pion_o = pion #save all generaged pions
0392     cond = pion.entry.isin(params.entry)
0393     pion = pion.drop('index',axis=1)
0394     pion = pion[cond].reset_index()
0395     # now pion and params should be one-to-one
0396
0397     ## eff plot
0398     if len(eff_eta_bins)>0:
0399         dump=plot_eff(pion_o,pion,eff_eta_bins)
0400         dump[-1].axes[0].set_title(fname)
0401         dump[-1].savefig(f'{out_dir}/eff_{tag}.png')
0402         plt.close()
0403         formatted_string = f"{tag};{dump[0].tolist()};{dump[1].tolist()};{dump[2].tolist()}"
0404         with open(f'{out_dir}eff_out.txt', 'a') as eff_file:
0405             eff_file.write(formatted_string + '\n')
0406
0407     ## resolutions
0408     if len(resol_deg_bins)>0:
0409         ## use this for Joe's rootfiles which is generated with eta_bin=0.5
0410         if len(resol_deg_bins)==1:
0411             dump=plot_resol(pion, params)
0412             dump[-1].axes[0].set_title(f"{tag}")
0413             dump[-1].savefig(f'../output/resol_{tag}.png')
0414             plt.close()
0415             temp = list(dump[0:-1])
0416             temp = ' '.join(map(str,temp))
0417             formatted_string = f"{tag} {temp}"
0418             with open(f'{out_dir}resol_out_whole.txt', 'a') as resol_file:
0419                 resol_file.write(formatted_string + '\n')
0420
0421
0422         ## need to make slices of eta/theta for simulation campaign data
0423         else:
0424             for dd in np.arange(len(resol_deg_bins)-1):
0425                 deg_lo = resol_deg_bins[dd]
0426                 deg_hi = resol_deg_bins[dd+1]
0427                 cond1  = (pion.theta/deg2rad)>deg_lo
0428                 cond2  = (pion.theta/deg2rad)<=deg_hi
0429                 cond   = cond1&cond2
0430                 pion_slice   = pion[cond].reset_index()
0431                 params_slice = params[cond].reset_index()
0432                 ## only proceed with good stats
0433                 if len(pion_slice)>100:
0434                     dump=plot_resol(pion_slice, params_slice)
0435                     dump[-1].axes[0].set_title(f"{deg_lo} to {deg_hi} in "+tag)
0436                     dump[-1].savefig(f'{out_dir}resol_{tag}_theta_{deg_lo}_{deg_hi}.png')
0437                     plt.close()
0438                     temp = list(dump[0:-1])
0439                     temp = ' '.join(map(str,temp))
0440                     formatted_string = f"{tag} {deg_lo} {deg_hi} {temp}"
0441                     with open(f'{out_dir}resol_out_slices.txt', 'a') as resol_file:
0442                         resol_file.write(formatted_string + '\n')
0443
0444
0445 if __name__ == "__main__":
0446     eff_eta_bins   = np.linspace(-4,4,41)
0447     # resol_deg_bins =[0]
0448     resol_deg_bins = np.array([3,10,40,140,170,177])
0449     mom_list = [0.5, 1, 2, 5, 10, 15]
0450     eta_list = [-3.5,-2.5,-1,1,2.5,3.5]
0451     setting="default"
0452     nev=5000
0453     dirname = ""
0454     for mom in mom_list:
0455         for ii in np.arange(len(eta_list)-1):
0456             filename = dirname+f"rec_{setting}_eta_{eta_list[ii]:g}_{eta_list[ii+1]:g}_{mom}GeV_{nev}.root"
0457             print(filename)
0458             performance_plot(filename,"",eff_eta_bins=eff_eta_bins, resol_deg_bins=resol_deg_bins)
0459             break