File indexing completed on 2025-11-05 10:03:02
0001 '''
0002 check tracking eff, resol for EIC-ePIC single particle simulation.
0003 input: recon rootfiles with default name format: rec_{setting}_eta_{eta_list[ii]:g}_{eta_list[ii+1]:g}_{mom}GeV_{nev}.root
0004 Shujie Li, Sept 2024
0005 '''
0006
0007
0008 from matplotlib.backends.backend_pdf import PdfPages
0009 from lmfit.models import GaussianModel
0010
0011 from importlib import reload
0012 import os
0013 import sys
0014 import types
0015 import pandas as pd
0016 import scipy
0017 from scipy.signal import find_peaks
0018 pd.set_option('display.max_rows', 500)
0019 pd.options.display.max_rows = 40
0020 pd.options.display.min_rows = 20
0021 pd.options.display.max_columns = 100
0022
0023 import awkward as ak
0024
0025 import uproot as ur
0026 print('Uproot version: ' + ur.__version__)
0027 ur.default_library="pd"
0028 from scipy import stats
0029 import numpy as np
0030 import argparse
0031 from matplotlib import pyplot as plt
0032 from matplotlib.gridspec import GridSpec
0033 import matplotlib.ticker as ticker
0034 import matplotlib.cm as cm
0035 import matplotlib as mpl
0036 import matplotlib.pylab as plt
0037 plt.rcParams['figure.figsize'] = [8.0, 6.0]
0038 plt.rcParams['ytick.direction'] = 'in'
0039 plt.rcParams['xtick.direction'] = 'in'
0040 plt.rcParams['xaxis.labellocation'] = 'right'
0041 plt.rcParams['yaxis.labellocation'] = 'top'
0042 SMALL_SIZE = 10
0043 MEDIUM_SIZE = 12
0044 BIGGER_SIZE = 16
0045 plt.rc('font', size=SMALL_SIZE)
0046 plt.rc('axes', titlesize=MEDIUM_SIZE)
0047 plt.rc('axes', labelsize=MEDIUM_SIZE)
0048 plt.rc('xtick', labelsize=MEDIUM_SIZE)
0049 plt.rc('ytick', labelsize=MEDIUM_SIZE)
0050 plt.rc('legend', fontsize=SMALL_SIZE)
0051 plt.rc('figure', titlesize=BIGGER_SIZE)
0052
0053 deg2rad = np.pi/180.0
0054
0055 def theta2eta(xx, inverse=0):
0056 xx = np.array(xx)
0057 if inverse==1:
0058 return np.arctan((np.e)**(-xx))*2
0059 else:
0060 return -np.log(np.tan(xx/2.))
0061
0062
0063
0064
0065 def read_ur(fname, tname, s3_dir=""):
0066 if len(s3_dir)>1:
0067 server = 'root://dtn-eic.jlab.org//work/eic2/'
0068 fname = server+s3_dir+fname
0069 tree = ur.open(fname)[tname]
0070 print(f"read_ur: read {fname}:{tname}. {tree.num_entries} events in total")
0071 return tree
0072
0073
0074
0075
0076 def get_branch(tree,bname="",kflatten=1):
0077 if bname not in tree.keys():
0078 sys.exit("ERROR(get_branch): can't find branch "+bname)
0079 df = tree[bname].array(library="ak")
0080 df = ak.to_dataframe(df)
0081 if isinstance(df,pd.Series):
0082 return df
0083
0084 colls = df.columns
0085 if len(colls)<1:
0086 print('cannot find any leaf under branch {}'.format(bname))
0087 return pd.DataFrame()
0088
0089
0090 cols = [str(c) for c in colls if str(c).startswith('{}.'.format(bname))]
0091
0092 if not cols:
0093 return df
0094
0095 for cname in list(cols):
0096 if "[" in cname:
0097 cols.remove(cname)
0098 elif "covariance.covariance" in cname:
0099 cols.remove(cname)
0100
0101 df = df[cols]
0102 df.rename(columns={c: c.replace(bname + '.', '') for c in df.columns}, inplace=True)
0103 if kflatten:
0104 df = df.reset_index()
0105
0106 return df
0107
0108 def hist_gaus(dataset, ax, bins=100, klog=0, header=None):
0109
0110 if not np.isscalar(bins):
0111 c1 = dataset <= bins[-1]
0112 c2 = dataset >= bins[0]
0113 dataset = dataset[c1 & c2]
0114
0115 n, bins, _ = ax.hist(dataset, bins, density=False, facecolor='b', alpha=0.3)
0116 xx = bins[0:-1] + (bins[1] - bins[0]) / 2.0
0117 ymax = np.max(n)
0118 std = np.std(dataset)
0119 mean = np.mean(dataset)
0120
0121 c1 = xx <= (mean + 2 * std)
0122 c2 = xx >= (mean - 2 * std)
0123 cond = c1 & c2
0124
0125 ii = 0
0126 while len(n[cond]) < len(bins) / 2.0:
0127
0128 ax.clear()
0129 diff = (bins[-1] - bins[0]) / 2.0 / 2.0
0130 n, bins, _ = ax.hist(
0131 dataset,
0132 np.linspace(
0133 bins[0] + diff,
0134 bins[-1] - diff,
0135 len(bins)
0136 ),
0137 density=False,
0138 facecolor='b',
0139 alpha=0.3
0140 )
0141 xx = bins[0:-1] + (bins[1]-bins[0]) / 2.0
0142 c1 = xx <= (mean + 2 * std)
0143 c2 = xx >= (mean - 2 * std)
0144 cond = c1 & c2
0145 ii += 1
0146 if ii > 10:
0147 print("ERROR(hist_gaus): can not adjust the range.")
0148 return -1, -1, -1
0149
0150 model = GaussianModel()
0151
0152 params = model.make_params(center=np.median(xx[cond]), amplitude=np.max(n), sigma=np.std(xx[cond]))
0153 result = model.fit(n, params, x=xx)
0154
0155
0156 std = result.params['sigma']
0157
0158
0159 c1 = xx <= (mean + 2 * std)
0160 c2 = xx >= (mean - 2 * std)
0161 cond = c1 & c2
0162 if len(xx[cond]) < 10:
0163 print("Fit failed")
0164 return -1, -1, -1
0165 model = GaussianModel()
0166 params = model.make_params(center=np.median(xx[cond]), amplitude=np.max(n[cond]), sigma=np.std(xx[cond]))
0167 try:
0168 result = model.fit(n[cond], params, x=xx[cond])
0169 except TypeError:
0170 print("Fit failed")
0171 return -1, -1,-1
0172 if result.params['sigma'].stderr == None:
0173 print("Fit failed")
0174 return -1, -1, -1
0175
0176
0177
0178
0179 rv_mean = float(result.params['center'])
0180 rv_std = float(result.params['sigma'])
0181 rv_std_err = float(result.params['sigma'].stderr)
0182 rv_ampl = float(result.params['amplitude'])
0183
0184 if len(result.best_fit) > 0:
0185 ax.plot(xx[cond], result.best_fit, 'r-', label='sigma=%g, err=%g' %(result.params['sigma'], result.params['sigma'].stderr))
0186
0187 ax.legend(title=header, frameon=False, loc='upper left')
0188
0189 ymax = np.max(n)
0190 if klog:
0191 ax.set_yscale('log')
0192 ax.set_ylim(1, ymax * 10)
0193 else:
0194 ax.set_ylim(0, ymax * 1.3)
0195 return float(result.params['sigma']), float(result.params['sigma'].stderr)
0196
0197
0198
0199 def hist_gaus(dataset,ax, bins=100,klog=0,header=None):
0200
0201 if not np.isscalar(bins):
0202 c1 = dataset<=bins[-1]
0203 c2 = dataset>=bins[0]
0204 dataset=dataset[c1&c2]
0205
0206 n, bins, patches = ax.hist(dataset, bins,density=False, facecolor='b', alpha=0.3)
0207 xx = bins[0:-1]+(bins[1]-bins[0])/2.0
0208 ymax = np.max(n)
0209 std = np.std(dataset)
0210 mean = np.mean(dataset)
0211 c1 = xx<=(mean+2*std)
0212 c2 = xx>=(mean-2*std)
0213 cond = c1&c2
0214
0215 ii=0
0216 while len(n[cond])<(len(bins)/2.0):
0217
0218 ax.clear()
0219 diff = (bins[-1]-bins[0])/2.0/2.0
0220 n, bins, patches = ax.hist(dataset, np.linspace(bins[0]+diff,bins[-1]-diff,len(bins)),density=False, facecolor='b', alpha=0.3)
0221 xx = bins[0:-1]+(bins[1]-bins[0])/2.0
0222 c1 = xx<=(mean+2*std)
0223 c2 = xx>=(mean-2*std)
0224 cond = c1&c2
0225 ii+=1
0226 if ii>5:
0227 print("ERROR(hist_gaus): can not adjust the range.")
0228 return -1,-1
0229
0230 model = GaussianModel()
0231
0232 params = model.make_params(center=np.median(xx[cond]), amplitude=np.max(n), sigma=np.std(xx[cond]))
0233 result = model.fit(n, params, x=xx)
0234
0235
0236 std = result.params['sigma'].value
0237
0238 c1 = xx<=(mean+2*std)
0239 c2 = xx>=(mean-2*std)
0240 cond = c1&c2
0241 if len(xx[cond])<10:
0242 print("Fit failed")
0243 return -1, -1
0244 model = GaussianModel()
0245 params = model.make_params(center=np.median(xx[cond]), amplitude=np.max(n[cond]), sigma=np.std(xx[cond]))
0246 try:
0247 result = model.fit(n[cond], params, x=xx[cond])
0248 except TypeError:
0249 print("Fit failed")
0250 return -1,-1
0251 if result.params['sigma'].stderr == None:
0252 print("Fit failed")
0253 return -1, -1
0254
0255
0256
0257
0258 if len(result.best_fit)>0:
0259 ax.plot(xx[cond], result.best_fit, 'r-', label='sigma=%g,err=%g' %(result.params['sigma'].value,result.params['sigma'].stderr))
0260 ax.legend(title=header, frameon=False,loc='upper left')
0261
0262 ymax = np.max(n)
0263 if klog:
0264 ax.set_yscale('log')
0265 ax.set_ylim(1,ymax*10)
0266 else:
0267 ax.set_ylim(0,ymax*1.3)
0268 return float(result.params['sigma'].value),float(result.params['sigma'].stderr)
0269
0270
0271 def plot_eff(pion_o, pion,eta_bins=np.linspace(-4, 4, 21)):
0272 fig, ax = plt.subplots(1,1,figsize=[6,6])
0273 plt.title("")
0274
0275
0276 sim_eta, _ = np.histogram(pion_o['eta'].values, bins=eta_bins)
0277
0278 rec_eta, _ = np.histogram(pion['eta'], bins=eta_bins)
0279 track_eff_total = np.sum(rec_eta)/np.sum(sim_eta)
0280
0281 eta_centers = (eta_bins[1:] + eta_bins[:-1])/2.
0282 eta_binsize = np.mean(np.diff(eta_centers))
0283 track_eff = np.nan_to_num(np.array(rec_eta)/np.array(sim_eta))
0284
0285
0286
0287
0288 track_err = np.nan_to_num(track_eff*(1. - track_eff)*np.reciprocal(np.sqrt(sim_eta)))
0289
0290
0291
0292 track_eff_lower = track_eff - np.maximum(np.zeros(shape=rec_eta.shape), track_eff - track_err)
0293 track_eff_upper = np.minimum(np.ones(shape=rec_eta.shape), track_eff + track_err) - track_eff
0294
0295 ax.errorbar(eta_centers, track_eff, xerr=eta_binsize/2., yerr=[track_eff_lower, track_eff_upper],
0296 fmt='o', capsize=3)
0297 ax.set_ylim(0., 1.1)
0298 ax.set_xlim(-4.5, 4.5)
0299 ax.set_ylabel('Tracking Efficiency')
0300 ax.set_xlabel(r'$\eta$')
0301 ax.text(-4,1.04,"recon/generated events= %d / %d =%.2f" %(len(pion),len(pion_o),len(pion)/len(pion_o)))
0302 ax.axhline(1,ls='--',color='grey')
0303 return track_eff,track_err, eta_centers, fig
0304
0305
0306 def plot_resol(pion,params):
0307 fig, axs = plt.subplots(2,2, figsize=(10,6),dpi=300)
0308 plt.title("")
0309
0310
0311 dp_lim=5*2
0312 th_lim=0.005*2
0313 ph_lim=0.03*2
0314 dca_lim = 3*2
0315 nbins = 200
0316
0317
0318 i = 0
0319 ax = axs.flat[i]
0320
0321 rec = 1./np.array(params['qOverP'])
0322 sim = np.array(pion['mom'])
0323 delta = (rec - sim)/sim*100
0324 sig_mom,err_mom=hist_gaus(delta,ax,np.linspace(-dp_lim, dp_lim, nbins+1),klog=0,header=None)
0325 ax.set_xlabel(r'$\delta p/p$ [%]')
0326
0327
0328 i+=1
0329 ax = axs.flat[i]
0330 obs = 'theta'
0331 rec = np.array(params[obs])
0332 sim = np.array(pion[obs])
0333 delta = (rec - sim)
0334 sig_th,err_th=hist_gaus(delta,ax,np.linspace(-th_lim, th_lim, nbins+1),klog=0,header=None)
0335 ax.set_xlabel(r'$d\theta$ [rad]')
0336
0337
0338 i+=1
0339 ax = axs.flat[i]
0340 obs = 'phi'
0341 rec = np.array(params[obs])
0342 sim = np.array(pion[obs])
0343 delta = (rec - sim)
0344 sig_ph,err_ph=hist_gaus(delta,ax,np.linspace(-ph_lim, ph_lim, nbins+1),klog=0,header=None)
0345 ax.set_xlabel(r'$d\phi$ [rad]')
0346
0347
0348 i+=1
0349 ax = axs.flat[i]
0350 obs = 'loc.a'
0351 rec = np.array(params[obs])
0352 sim = 0
0353 delta = (rec - sim)
0354 sig_dca,err_dca=hist_gaus(delta,ax,np.linspace(-dca_lim, dca_lim, nbins+1),klog=0,header=None)
0355 ax.set_xlabel(r'DCA$_r$ [mm]')
0356 return sig_mom, err_mom, sig_th, err_th, sig_ph, err_ph, sig_dca, err_dca, fig
0357
0358
0359
0360
0361
0362 def performance_plot(fname,rootfile_path,eff_eta_bins=np.linspace(-4,4,21),resol_deg_bins=np.array([3,10,40,140,170,177]),pid=211, out_dir="."):
0363 out_dir=out_dir+"/"
0364
0365 tag = fname.split('/')[-1][:-5]
0366 tree = read_ur(fname,"events",rootfile_path)
0367
0368 bname = "CentralCKFTrackParameters"
0369 params = get_branch(tree,bname)
0370
0371
0372 params = params[(params.subentry==0)&(params.subsubentry==0)].reset_index()
0373 params["eta"] = theta2eta(params.theta)
0374 params = params.drop(['index','subsubentry'],axis=1)
0375
0376 bname = "MCParticles"
0377 part = get_branch(tree,bname)
0378
0379 cond1 = part.generatorStatus==1
0380
0381 cond2 = part.PDG==pid
0382
0383 pion = part[cond1&cond2].reset_index()
0384 x,y,z = pion[["momentum.x","momentum.y","momentum.z"]].to_numpy().T
0385 r = np.sqrt(x**2 + y**2 + z**2)
0386 pion["theta"]= np.arccos(z / r)
0387 pion["phi"] = np.arctan2(y, x)
0388 pion["eta"] = theta2eta(pion.theta)
0389 pion["mom"] = r
0390
0391 pion_o = pion
0392 cond = pion.entry.isin(params.entry)
0393 pion = pion.drop('index',axis=1)
0394 pion = pion[cond].reset_index()
0395
0396
0397
0398 if len(eff_eta_bins)>0:
0399 dump=plot_eff(pion_o,pion,eff_eta_bins)
0400 dump[-1].axes[0].set_title(fname)
0401 dump[-1].savefig(f'{out_dir}/eff_{tag}.png')
0402 plt.close()
0403 formatted_string = f"{tag};{dump[0].tolist()};{dump[1].tolist()};{dump[2].tolist()}"
0404 with open(f'{out_dir}eff_out.txt', 'a') as eff_file:
0405 eff_file.write(formatted_string + '\n')
0406
0407
0408 if len(resol_deg_bins)>0:
0409
0410 if len(resol_deg_bins)==1:
0411 dump=plot_resol(pion, params)
0412 dump[-1].axes[0].set_title(f"{tag}")
0413 dump[-1].savefig(f'../output/resol_{tag}.png')
0414 plt.close()
0415 temp = list(dump[0:-1])
0416 temp = ' '.join(map(str,temp))
0417 formatted_string = f"{tag} {temp}"
0418 with open(f'{out_dir}resol_out_whole.txt', 'a') as resol_file:
0419 resol_file.write(formatted_string + '\n')
0420
0421
0422
0423 else:
0424 for dd in np.arange(len(resol_deg_bins)-1):
0425 deg_lo = resol_deg_bins[dd]
0426 deg_hi = resol_deg_bins[dd+1]
0427 cond1 = (pion.theta/deg2rad)>deg_lo
0428 cond2 = (pion.theta/deg2rad)<=deg_hi
0429 cond = cond1&cond2
0430 pion_slice = pion[cond].reset_index()
0431 params_slice = params[cond].reset_index()
0432
0433 if len(pion_slice)>100:
0434 dump=plot_resol(pion_slice, params_slice)
0435 dump[-1].axes[0].set_title(f"{deg_lo} to {deg_hi} in "+tag)
0436 dump[-1].savefig(f'{out_dir}resol_{tag}_theta_{deg_lo}_{deg_hi}.png')
0437 plt.close()
0438 temp = list(dump[0:-1])
0439 temp = ' '.join(map(str,temp))
0440 formatted_string = f"{tag} {deg_lo} {deg_hi} {temp}"
0441 with open(f'{out_dir}resol_out_slices.txt', 'a') as resol_file:
0442 resol_file.write(formatted_string + '\n')
0443
0444
0445 if __name__ == "__main__":
0446 eff_eta_bins = np.linspace(-4,4,41)
0447
0448 resol_deg_bins = np.array([3,10,40,140,170,177])
0449 mom_list = [0.5, 1, 2, 5, 10, 15]
0450 eta_list = [-3.5,-2.5,-1,1,2.5,3.5]
0451 setting="default"
0452 nev=5000
0453 dirname = ""
0454 for mom in mom_list:
0455 for ii in np.arange(len(eta_list)-1):
0456 filename = dirname+f"rec_{setting}_eta_{eta_list[ii]:g}_{eta_list[ii+1]:g}_{mom}GeV_{nev}.root"
0457 print(filename)
0458 performance_plot(filename,"",eff_eta_bins=eff_eta_bins, resol_deg_bins=resol_deg_bins)
0459 break