Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-03-29 08:37:26

0001 import os
0002 from datetime import datetime
0003 from matplotlib.backends.backend_pdf import PdfPages
0004 import matplotlib.pyplot as plt
0005 
0006 _PDF_BASENAME = 'plot_single_measurements'
0007 _PDF_COVER_ADDED = False
0008 
0009 def _cover_title_from_globals():
0010     for key in ('tag', 'fname'):
0011         if key in globals() and globals()[key] is not None:
0012             val = str(globals()[key])
0013             return os.path.basename(val)
0014     return _PDF_BASENAME
0015 
0016 def _add_pdf_cover(fname, pdf_pages):
0017     fig = plt.figure(figsize=(8.5, 11))
0018     fig.text(0.5, 0.6, 'source file: '+fname+'.root', ha='center', va='center', fontsize=16)
0019     fig.text(0.5, 0.5, datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
0020              ha='center', va='center', fontsize=12)
0021     pdf_pages.savefig(fig)
0022     plt.close(fig)
0023 
0024 _PDF_PATH = f'{_PDF_BASENAME}_figures.pdf'
0025 _pdf_pages = PdfPages(_PDF_PATH)
0026 
0027 def _save_new_figs(figs_before):
0028     global _PDF_COVER_ADDED
0029     if not _PDF_COVER_ADDED:
0030         _add_pdf_cover(_cover_title_from_globals(), _pdf_pages)
0031         _PDF_COVER_ADDED = True
0032     for num in plt.get_fignums():
0033         if num not in figs_before:
0034             _pdf_pages.savefig(plt.figure(num))
0035 
0036 # inspect single particle hit quality: measurement, outlier, chi2, residual
0037 
0038 ## load modules
0039 
0040 import sys
0041 import numpy as np
0042 from pathlib import Path
0043 import matplotlib.pyplot as plt
0044 import pandas as pd
0045 import awkward as ak
0046 # Add repo root to sys.path
0047 path_to_epic_analysis_base_py="/global/homes/s/shujie/eic_dir/worksim/scripts/modules/"
0048 repo_root = Path(path_to_epic_analysis_base_py)
0049 if str(repo_root) not in sys.path:
0050     sys.path.insert(0, str(repo_root))
0051 
0052 import epic_analysis_base as ana
0053 
0054 ## local file
0055 # fname     = "/global/cfs/cdirs/m3763/shujie/worksim/matmap/"+"rec__mom_0.5GeV_Central_2601_athavan_map.root"
0056 # s3_dir    = ''
0057 
0058 ## simulation campaign
0059 mom_name="500MeV"
0060 deg_name="3to50deg"
0061 s3_dir = f"EPIC/RECO/25.12.0/epic_craterlake/SINGLE/pi+/{mom_name}/{deg_name}/"
0062 fname    = f"pi+_{mom_name}_{deg_name}.0001.eicrecon.edm4eic.root"
0063 tag=fname.split('/')[-1]
0064 tag=tag.split('.root')[0]
0065 
0066 ana.COL_TABLE = ana.get_col_table(fname,s3_dir,0)
0067 tree          = ana.read_ur(fname,"events",s3_dir,entry_stop=100)
0068 
0069 _fig_nums_before = set(plt.get_fignums())
0070 
0071 parts = ana.get_part(tree)
0072 fig,axs=plt.subplots(1,3, figsize=(12,3))
0073 _=axs[0].hist(parts["eta"],bins=100)
0074 axs[0].set_title("eta")
0075 _=axs[1].hist(parts["phi"],bins=100)
0076 axs[1].set_title("phi")
0077 _=axs[2].hist(parts["mom"],bins=np.arange(0,20,0.1))
0078 axs[2].set_title("momentum [GeV]")
0079 ## save to pdf
0080 _save_new_figs(_fig_nums_before)
0081 
0082 traj_meas_hits=ana.get_traj_hits_particle(tree)
0083 traj_out_hits=ana.get_traj_hits_particle(tree,measurement_name="outliers_deprecated")
0084 
0085 ## append chi2 values
0086 vname = "measurementChi2"
0087 chi2=ana.get_relation(tree,"CentralCKFTrajectories",vname)
0088 traj_meas_hits[vname]=np.hstack(chi2["values"])
0089 
0090 vname = "outlierChi2"
0091 chi2=ana.get_relation(tree,"CentralCKFTrajectories",vname)
0092 traj_out_hits=traj_out_hits[traj_out_hits.nOutliers>0].reset_index()
0093 traj_out_hits[vname]=np.hstack(chi2["values"])
0094 
0095 # traj=ana.get_traj_relations(tree,l_var=["measurementChi2","outlierChi2"])
0096 
0097 # chi2 (for barrel region)
0098 
0099 _fig_nums_before = set(plt.get_fignums())
0100 for nn in ["barrel"]:#, "endcap"]:
0101     plt.figure()
0102     if nn=="barrel":
0103         ax='x'
0104         ay='y'
0105     elif nn=="endcap":
0106         ax='z'
0107         ay='r'
0108     plt.xlabel(ax+' [mm]')
0109     plt.ylabel(ay+' [mm]')
0110     ax = 'position.'+ax
0111     ay = 'position.'+ay
0112     cond = traj_meas_hits["hits_colName"].str.contains(nn, case=False, na=False)
0113     df = traj_meas_hits[cond]
0114     plt.scatter(df[ax],df[ay],s=0.1,c='b',label="measurement hits")
0115 
0116     cond = traj_out_hits["hits_colName"].str.contains(nn, case=False, na=False)
0117     df = traj_out_hits[cond]
0118     plt.scatter(df[ax],df[ay],s=1,c='r',label="outlier hits")
0119     plt.legend()
0120     plt.title(nn+" region")
0121     ## save to pdf
0122     
0123 _save_new_figs(_fig_nums_before)
0124 
0125 _fig_nums_before = set(plt.get_fignums())
0126 ## Rec hits v.s. R
0127 ## ---------Barrel------------##
0128 ## number of svt hits per layer
0129 ### events have sim/rec hits from given layer 
0130 plt.figure()
0131 scounts=np.zeros(len(ana.barrel_range))
0132 rcounts=np.zeros(len(ana.barrel_range))
0133 # for ss,rr in zip(name_sim_barrel,name_rec_barrel):
0134 sim_col = pd.Series(ana.name_sim_barrel).drop_duplicates().tolist()
0135 rec_col = pd.Series(ana.name_rec_barrel).drop_duplicates().tolist()
0136 for ss,rr in zip(sim_col, rec_col):
0137     sim_hits= ana.get_branch_df(tree,ss)
0138     sim_hits['position.r']=np.sqrt(sim_hits["position.x"]**2+sim_hits["position.y"]**2)
0139     r_sim = sim_hits["position.r"]
0140     rec_hits= ana.get_branch_df(tree,rr)
0141     rec_hits['position.r']=np.sqrt(rec_hits["position.x"]**2+rec_hits["position.y"]**2)
0142     r_rec = rec_hits["position.r"]
0143     
0144     for ii,(r1,r2) in enumerate(ana.barrel_range):
0145         n_sim=len(sim_hits[(r_sim>r1)&(r_sim<r2)]["entry"].unique())
0146         n_rec=len(rec_hits[(r_rec>r1)&(r_rec<r2)]["entry"].unique())
0147         if n_sim>0:
0148             print(ss, r1,r2, n_sim, n_rec)
0149             scounts[ii]+=n_sim            
0150             rcounts[ii]+=n_rec
0151 # plt.scatter((r1+r2)/2, n_meas)
0152 
0153 ## plot rec hits v.s. measurement
0154 bin_centers=[(a+b)/2 for (a,b) in ana.barrel_range]
0155 _=plt.hist(bin_centers, bins=np.hstack(ana.barrel_range), weights=scounts, histtype="step", label="sim hits")
0156 _=plt.hist(bin_centers, bins=np.hstack(ana.barrel_range), weights=rcounts, histtype="step",label="digi hits")
0157 
0158 
0159 ### events have measurements from given layer 
0160 def check_ranges(arr, ranges):
0161     return [int(((arr >= lower) & (arr < upper))) for (lower, upper) in ranges]
0162 
0163 # per row: list of 0/1 for each range
0164 def get_range_counts(traj_hits,axis='r'):
0165     if axis=='r':
0166         my_range=ana.barrel_range
0167         my_var  ='position.r'
0168     elif axis=='z':
0169         my_range=ana.disk_range
0170         my_var  ='position.z'
0171     rbins = traj_hits[my_var].apply(lambda arr: check_ranges(arr, my_range))
0172     # build a DataFrame where each column is a range bin
0173     rbins_df = pd.DataFrame(rbins.tolist(), index=traj_hits.index)
0174     # group by entry/subentry and take max (any hit in range for that pair)
0175     rbins_unique = (
0176         rbins_df
0177         .groupby([traj_hits['entry'], traj_hits['subentry']])
0178         .max()
0179     )
0180     # total counts per range (one per unique entry/subentry)
0181     return rbins_unique.sum()
0182 
0183 # sum the counts per range
0184 # rbins = np.array(rbins.to_list()).sum(axis=0)# counts = range_check_matrix.sum(axis=0)
0185 rbins = get_range_counts(traj_meas_hits, 'r')
0186 _=plt.hist(bin_centers, bins=np.hstack(ana.barrel_range), weights=rbins,histtype="step",label="measurements")
0187 
0188 rbins = get_range_counts(traj_out_hits, 'r')
0189 _=plt.hist(bin_centers, bins=np.hstack(ana.barrel_range), weights=rbins,label="outliers")
0190 plt.legend(frameon=0)
0191 
0192 # _=plt.hist(np.hstack(traj_hits.measurements_r),bins=np.hstack(barrel_range))
0193 plt.xlabel("R[mm]")
0194 plt.xlim(0,900)
0195 plt.title("Barrel hits in tracking")
0196 plt.ylabel("# of events")
0197 ## save to pdf
0198 _save_new_figs(_fig_nums_before)
0199 
0200 _fig_nums_before = set(plt.get_fignums())
0201 plt.figure()
0202 bins=np.linspace(0,20,41)
0203 df=traj_meas_hits
0204 for nn in set(ana.name_rec_barrel):
0205     print(nn)
0206     cond=df["hits_colName"]==nn
0207     dfc = df[cond]
0208     if len(dfc)>0:
0209         _=plt.hist(np.hstack(dfc.measurementChi2),bins=bins,histtype="step",label=nn)
0210 plt.legend()
0211 plt.yscale('log')
0212 plt.xlabel("chi2")
0213 plt.ylabel("entries")
0214 plt.title("measurement chi2")
0215 ## save to pdf
0216 _save_new_figs(_fig_nums_before)
0217 
0218 # residuals
0219 
0220 seg = ana.get_branch_df(tree, '_CentralTrackSegments_points')
0221 seg['position.r'] = np.sqrt(seg['position.x']**2 + seg['position.y']**2)
0222 seg['position.phi'] = np.arctan2(seg['position.y'], seg['position.x'])
0223 
0224 for lower, upper in ana.barrel_range:
0225     in_range = (seg['position.r'] >= lower) & (seg['position.r'] < upper)
0226     if not in_range.any():
0227         continue
0228     top_surfaces = seg.loc[in_range, 'surface'].value_counts().head(2).index ## drop apprach 1 and 2 surfaces
0229     seg = seg[~(in_range & seg['surface'].isin(top_surfaces))]
0230 
0231 from scipy.spatial import cKDTree
0232 
0233 def _match_closest_seg(traj_df, seg_df):
0234     if traj_df.empty or seg_df.empty:
0235         out = traj_df.copy()
0236         out['closest_seg_x'] = np.nan
0237         out['closest_seg_y'] = np.nan
0238         out['closest_seg_z'] = np.nan
0239         out['closest_seg_dist'] = np.nan
0240         return out
0241 
0242     seg_xyz = np.column_stack([seg_df['position.x'], seg_df['position.y'], seg_df['position.z']])
0243     traj_xyz = np.column_stack([traj_df['position.x'], traj_df['position.y'], traj_df['position.z']])
0244     ckdtree = cKDTree(seg_xyz)
0245     dist, idx = ckdtree.query(traj_xyz, k=1)
0246 
0247     matched = traj_df.copy()
0248     matched['closest_seg_x'] = seg_df.iloc[idx]['position.x'].to_numpy()
0249     matched['closest_seg_y'] = seg_df.iloc[idx]['position.y'].to_numpy()
0250     matched['closest_seg_z'] = seg_df.iloc[idx]['position.z'].to_numpy()
0251     matched['closest_seg_dist'] = dist
0252     return matched
0253 
0254 traj_meas_hits = (
0255     traj_meas_hits
0256     .groupby('entry', group_keys=False)
0257     .apply(lambda g: _match_closest_seg(g, seg[seg['entry'] == g.name]))
0258 )
0259 
0260 traj_out_hits = (
0261     traj_out_hits
0262     .groupby('entry', group_keys=False)
0263     .apply(lambda g: _match_closest_seg(g, seg[seg['entry'] == g.name]))
0264 )
0265 
0266 # ientry = 1
0267 # iseg = seg[seg['entry'] == ientry]
0268 # itraj = traj_meas_hits[traj_meas_hits['entry'] == ientry]
0269 
0270 # plt.scatter(itraj['position.x'], itraj['position.y'], s=10, label='traj')
0271 # plt.scatter(iseg['position.x'], iseg['position.y'], s=3, label='seg')
0272 # plt.scatter(itraj['closest_seg_x'], itraj['closest_seg_y'], s=6, label='closest seg')
0273 # plt.legend(frameon=False)
0274 # itraj
0275 
0276 _fig_nums_before = set(plt.get_fignums())
0277 bins = np.linspace(0, 1, 41)
0278 fig, ax = plt.subplots(3, 1, sharex=True, figsize=(7, 8), gridspec_kw={'hspace': 0.08})
0279 for ii, rr in enumerate(ana.barrel_range):
0280     df = traj_meas_hits
0281     cond = (df['position.r'] > rr[0]) & (df['position.r'] < rr[1])
0282     counts_meas, _ = np.histogram(df[cond].closest_seg_dist, bins=bins)
0283     ax[0].hist(df[cond].closest_seg_dist, bins=bins, histtype='step', label=ana.barrel_name[ii])
0284 
0285     df = traj_out_hits
0286     cond = (df['position.r'] > rr[0]) & (df['position.r'] < rr[1])
0287     counts_out, _ = np.histogram(df[cond].closest_seg_dist, bins=bins)
0288     ax[1].hist(df[cond].closest_seg_dist, bins=bins, histtype='step', label=ana.barrel_name[ii])
0289 
0290     sum_counts = counts_meas + counts_out
0291     ax[2].hist(bins[:-1], bins=bins, weights=sum_counts, histtype='step', label=ana.barrel_name[ii])
0292 
0293 ax[0].legend()
0294 
0295 label_loc = (0.45, 0.92)
0296 ax[0].text(*label_loc, 'measurements', transform=ax[0].transAxes, va='top')
0297 ax[1].text(*label_loc, 'outliers', transform=ax[1].transAxes, va='top')
0298 ax[2].text(*label_loc, 'all', transform=ax[2].transAxes, va='top')
0299 
0300 ax[0].set_yscale('log')
0301 ax[1].set_yscale('log')
0302 ax[2].set_yscale('log')
0303 ax[0].title('Hit residuals by category')
0304 plt.tight_layout()
0305 ## save to pdf
0306 
0307 # cond = (df['position.r'] > 400) & (df['position.r'] < 500)
0308 # _=plt.hist(df[cond].closest_seg_dist,bins=100)
0309 _save_new_figs(_fig_nums_before)
0310 
0311 
0312 _pdf_pages.close()