File indexing completed on 2026-03-29 08:37:26
0001 import os
0002 from datetime import datetime
0003 from matplotlib.backends.backend_pdf import PdfPages
0004 import matplotlib.pyplot as plt
0005
0006 _PDF_BASENAME = 'plot_single_measurements'
0007 _PDF_COVER_ADDED = False
0008
0009 def _cover_title_from_globals():
0010 for key in ('tag', 'fname'):
0011 if key in globals() and globals()[key] is not None:
0012 val = str(globals()[key])
0013 return os.path.basename(val)
0014 return _PDF_BASENAME
0015
0016 def _add_pdf_cover(fname, pdf_pages):
0017 fig = plt.figure(figsize=(8.5, 11))
0018 fig.text(0.5, 0.6, 'source file: '+fname+'.root', ha='center', va='center', fontsize=16)
0019 fig.text(0.5, 0.5, datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
0020 ha='center', va='center', fontsize=12)
0021 pdf_pages.savefig(fig)
0022 plt.close(fig)
0023
0024 _PDF_PATH = f'{_PDF_BASENAME}_figures.pdf'
0025 _pdf_pages = PdfPages(_PDF_PATH)
0026
0027 def _save_new_figs(figs_before):
0028 global _PDF_COVER_ADDED
0029 if not _PDF_COVER_ADDED:
0030 _add_pdf_cover(_cover_title_from_globals(), _pdf_pages)
0031 _PDF_COVER_ADDED = True
0032 for num in plt.get_fignums():
0033 if num not in figs_before:
0034 _pdf_pages.savefig(plt.figure(num))
0035
0036
0037
0038
0039
0040 import sys
0041 import numpy as np
0042 from pathlib import Path
0043 import matplotlib.pyplot as plt
0044 import pandas as pd
0045 import awkward as ak
0046
0047 path_to_epic_analysis_base_py="/global/homes/s/shujie/eic_dir/worksim/scripts/modules/"
0048 repo_root = Path(path_to_epic_analysis_base_py)
0049 if str(repo_root) not in sys.path:
0050 sys.path.insert(0, str(repo_root))
0051
0052 import epic_analysis_base as ana
0053
0054
0055
0056
0057
0058
0059 mom_name="500MeV"
0060 deg_name="3to50deg"
0061 s3_dir = f"EPIC/RECO/25.12.0/epic_craterlake/SINGLE/pi+/{mom_name}/{deg_name}/"
0062 fname = f"pi+_{mom_name}_{deg_name}.0001.eicrecon.edm4eic.root"
0063 tag=fname.split('/')[-1]
0064 tag=tag.split('.root')[0]
0065
0066 ana.COL_TABLE = ana.get_col_table(fname,s3_dir,0)
0067 tree = ana.read_ur(fname,"events",s3_dir,entry_stop=100)
0068
0069 _fig_nums_before = set(plt.get_fignums())
0070
0071 parts = ana.get_part(tree)
0072 fig,axs=plt.subplots(1,3, figsize=(12,3))
0073 _=axs[0].hist(parts["eta"],bins=100)
0074 axs[0].set_title("eta")
0075 _=axs[1].hist(parts["phi"],bins=100)
0076 axs[1].set_title("phi")
0077 _=axs[2].hist(parts["mom"],bins=np.arange(0,20,0.1))
0078 axs[2].set_title("momentum [GeV]")
0079
0080 _save_new_figs(_fig_nums_before)
0081
0082 traj_meas_hits=ana.get_traj_hits_particle(tree)
0083 traj_out_hits=ana.get_traj_hits_particle(tree,measurement_name="outliers_deprecated")
0084
0085
0086 vname = "measurementChi2"
0087 chi2=ana.get_relation(tree,"CentralCKFTrajectories",vname)
0088 traj_meas_hits[vname]=np.hstack(chi2["values"])
0089
0090 vname = "outlierChi2"
0091 chi2=ana.get_relation(tree,"CentralCKFTrajectories",vname)
0092 traj_out_hits=traj_out_hits[traj_out_hits.nOutliers>0].reset_index()
0093 traj_out_hits[vname]=np.hstack(chi2["values"])
0094
0095
0096
0097
0098
0099 _fig_nums_before = set(plt.get_fignums())
0100 for nn in ["barrel"]:
0101 plt.figure()
0102 if nn=="barrel":
0103 ax='x'
0104 ay='y'
0105 elif nn=="endcap":
0106 ax='z'
0107 ay='r'
0108 plt.xlabel(ax+' [mm]')
0109 plt.ylabel(ay+' [mm]')
0110 ax = 'position.'+ax
0111 ay = 'position.'+ay
0112 cond = traj_meas_hits["hits_colName"].str.contains(nn, case=False, na=False)
0113 df = traj_meas_hits[cond]
0114 plt.scatter(df[ax],df[ay],s=0.1,c='b',label="measurement hits")
0115
0116 cond = traj_out_hits["hits_colName"].str.contains(nn, case=False, na=False)
0117 df = traj_out_hits[cond]
0118 plt.scatter(df[ax],df[ay],s=1,c='r',label="outlier hits")
0119 plt.legend()
0120 plt.title(nn+" region")
0121
0122
0123 _save_new_figs(_fig_nums_before)
0124
0125 _fig_nums_before = set(plt.get_fignums())
0126
0127
0128
0129
0130 plt.figure()
0131 scounts=np.zeros(len(ana.barrel_range))
0132 rcounts=np.zeros(len(ana.barrel_range))
0133
0134 sim_col = pd.Series(ana.name_sim_barrel).drop_duplicates().tolist()
0135 rec_col = pd.Series(ana.name_rec_barrel).drop_duplicates().tolist()
0136 for ss,rr in zip(sim_col, rec_col):
0137 sim_hits= ana.get_branch_df(tree,ss)
0138 sim_hits['position.r']=np.sqrt(sim_hits["position.x"]**2+sim_hits["position.y"]**2)
0139 r_sim = sim_hits["position.r"]
0140 rec_hits= ana.get_branch_df(tree,rr)
0141 rec_hits['position.r']=np.sqrt(rec_hits["position.x"]**2+rec_hits["position.y"]**2)
0142 r_rec = rec_hits["position.r"]
0143
0144 for ii,(r1,r2) in enumerate(ana.barrel_range):
0145 n_sim=len(sim_hits[(r_sim>r1)&(r_sim<r2)]["entry"].unique())
0146 n_rec=len(rec_hits[(r_rec>r1)&(r_rec<r2)]["entry"].unique())
0147 if n_sim>0:
0148 print(ss, r1,r2, n_sim, n_rec)
0149 scounts[ii]+=n_sim
0150 rcounts[ii]+=n_rec
0151
0152
0153
0154 bin_centers=[(a+b)/2 for (a,b) in ana.barrel_range]
0155 _=plt.hist(bin_centers, bins=np.hstack(ana.barrel_range), weights=scounts, histtype="step", label="sim hits")
0156 _=plt.hist(bin_centers, bins=np.hstack(ana.barrel_range), weights=rcounts, histtype="step",label="digi hits")
0157
0158
0159
0160 def check_ranges(arr, ranges):
0161 return [int(((arr >= lower) & (arr < upper))) for (lower, upper) in ranges]
0162
0163
0164 def get_range_counts(traj_hits,axis='r'):
0165 if axis=='r':
0166 my_range=ana.barrel_range
0167 my_var ='position.r'
0168 elif axis=='z':
0169 my_range=ana.disk_range
0170 my_var ='position.z'
0171 rbins = traj_hits[my_var].apply(lambda arr: check_ranges(arr, my_range))
0172
0173 rbins_df = pd.DataFrame(rbins.tolist(), index=traj_hits.index)
0174
0175 rbins_unique = (
0176 rbins_df
0177 .groupby([traj_hits['entry'], traj_hits['subentry']])
0178 .max()
0179 )
0180
0181 return rbins_unique.sum()
0182
0183
0184
0185 rbins = get_range_counts(traj_meas_hits, 'r')
0186 _=plt.hist(bin_centers, bins=np.hstack(ana.barrel_range), weights=rbins,histtype="step",label="measurements")
0187
0188 rbins = get_range_counts(traj_out_hits, 'r')
0189 _=plt.hist(bin_centers, bins=np.hstack(ana.barrel_range), weights=rbins,label="outliers")
0190 plt.legend(frameon=0)
0191
0192
0193 plt.xlabel("R[mm]")
0194 plt.xlim(0,900)
0195 plt.title("Barrel hits in tracking")
0196 plt.ylabel("# of events")
0197
0198 _save_new_figs(_fig_nums_before)
0199
0200 _fig_nums_before = set(plt.get_fignums())
0201 plt.figure()
0202 bins=np.linspace(0,20,41)
0203 df=traj_meas_hits
0204 for nn in set(ana.name_rec_barrel):
0205 print(nn)
0206 cond=df["hits_colName"]==nn
0207 dfc = df[cond]
0208 if len(dfc)>0:
0209 _=plt.hist(np.hstack(dfc.measurementChi2),bins=bins,histtype="step",label=nn)
0210 plt.legend()
0211 plt.yscale('log')
0212 plt.xlabel("chi2")
0213 plt.ylabel("entries")
0214 plt.title("measurement chi2")
0215
0216 _save_new_figs(_fig_nums_before)
0217
0218
0219
0220 seg = ana.get_branch_df(tree, '_CentralTrackSegments_points')
0221 seg['position.r'] = np.sqrt(seg['position.x']**2 + seg['position.y']**2)
0222 seg['position.phi'] = np.arctan2(seg['position.y'], seg['position.x'])
0223
0224 for lower, upper in ana.barrel_range:
0225 in_range = (seg['position.r'] >= lower) & (seg['position.r'] < upper)
0226 if not in_range.any():
0227 continue
0228 top_surfaces = seg.loc[in_range, 'surface'].value_counts().head(2).index
0229 seg = seg[~(in_range & seg['surface'].isin(top_surfaces))]
0230
0231 from scipy.spatial import cKDTree
0232
0233 def _match_closest_seg(traj_df, seg_df):
0234 if traj_df.empty or seg_df.empty:
0235 out = traj_df.copy()
0236 out['closest_seg_x'] = np.nan
0237 out['closest_seg_y'] = np.nan
0238 out['closest_seg_z'] = np.nan
0239 out['closest_seg_dist'] = np.nan
0240 return out
0241
0242 seg_xyz = np.column_stack([seg_df['position.x'], seg_df['position.y'], seg_df['position.z']])
0243 traj_xyz = np.column_stack([traj_df['position.x'], traj_df['position.y'], traj_df['position.z']])
0244 ckdtree = cKDTree(seg_xyz)
0245 dist, idx = ckdtree.query(traj_xyz, k=1)
0246
0247 matched = traj_df.copy()
0248 matched['closest_seg_x'] = seg_df.iloc[idx]['position.x'].to_numpy()
0249 matched['closest_seg_y'] = seg_df.iloc[idx]['position.y'].to_numpy()
0250 matched['closest_seg_z'] = seg_df.iloc[idx]['position.z'].to_numpy()
0251 matched['closest_seg_dist'] = dist
0252 return matched
0253
0254 traj_meas_hits = (
0255 traj_meas_hits
0256 .groupby('entry', group_keys=False)
0257 .apply(lambda g: _match_closest_seg(g, seg[seg['entry'] == g.name]))
0258 )
0259
0260 traj_out_hits = (
0261 traj_out_hits
0262 .groupby('entry', group_keys=False)
0263 .apply(lambda g: _match_closest_seg(g, seg[seg['entry'] == g.name]))
0264 )
0265
0266
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276 _fig_nums_before = set(plt.get_fignums())
0277 bins = np.linspace(0, 1, 41)
0278 fig, ax = plt.subplots(3, 1, sharex=True, figsize=(7, 8), gridspec_kw={'hspace': 0.08})
0279 for ii, rr in enumerate(ana.barrel_range):
0280 df = traj_meas_hits
0281 cond = (df['position.r'] > rr[0]) & (df['position.r'] < rr[1])
0282 counts_meas, _ = np.histogram(df[cond].closest_seg_dist, bins=bins)
0283 ax[0].hist(df[cond].closest_seg_dist, bins=bins, histtype='step', label=ana.barrel_name[ii])
0284
0285 df = traj_out_hits
0286 cond = (df['position.r'] > rr[0]) & (df['position.r'] < rr[1])
0287 counts_out, _ = np.histogram(df[cond].closest_seg_dist, bins=bins)
0288 ax[1].hist(df[cond].closest_seg_dist, bins=bins, histtype='step', label=ana.barrel_name[ii])
0289
0290 sum_counts = counts_meas + counts_out
0291 ax[2].hist(bins[:-1], bins=bins, weights=sum_counts, histtype='step', label=ana.barrel_name[ii])
0292
0293 ax[0].legend()
0294
0295 label_loc = (0.45, 0.92)
0296 ax[0].text(*label_loc, 'measurements', transform=ax[0].transAxes, va='top')
0297 ax[1].text(*label_loc, 'outliers', transform=ax[1].transAxes, va='top')
0298 ax[2].text(*label_loc, 'all', transform=ax[2].transAxes, va='top')
0299
0300 ax[0].set_yscale('log')
0301 ax[1].set_yscale('log')
0302 ax[2].set_yscale('log')
0303 ax[0].title('Hit residuals by category')
0304 plt.tight_layout()
0305
0306
0307
0308
0309 _save_new_figs(_fig_nums_before)
0310
0311
0312 _pdf_pages.close()