File indexing completed on 2026-03-29 08:37:26
0001 import os
0002 import re
0003 from pathlib import Path
0004
0005 import numpy as np
0006
0007 import epic_analysis_base as ana
0008
0009
0010 def _col_id(name):
0011
0012 return next(k for k, v in ana.COL_TABLE.items() if v == name)
0013
0014
0015 def parse_filename_metadata(path):
0016 """Parse filename for eta range, momentum, and tag/setting.
0017
0018 Returns dict with keys: tag, setting, eta_lo, eta_hi, mom_gev.
0019 """
0020 fname = Path(path).name
0021 tag = fname
0022 if tag.endswith(".root"):
0023 tag = tag[:-5]
0024
0025 setting = None
0026 if "rec_" in tag and "_eta" in tag:
0027 setting = tag.split("rec_", 1)[1].split("_eta", 1)[0]
0028 elif tag.startswith("rec_"):
0029 setting = tag.split("rec_", 1)[1].split("_", 1)[0]
0030
0031 eta_lo = eta_hi = None
0032 eta_match = re.search(r"eta_(-?\d+(?:\.\d+)?)_(-?\d+(?:\.\d+)?)", tag)
0033 if eta_match:
0034 eta_lo = float(eta_match.group(1))
0035 eta_hi = float(eta_match.group(2))
0036
0037 mom_gev = None
0038 mom_match = re.search(r"mom_([0-9]+(?:\.[0-9]+)?)GeV", tag)
0039 if mom_match:
0040 mom_gev = float(mom_match.group(1))
0041 else:
0042 gev_match = re.search(r"([0-9]+(?:\.[0-9]+)?)GeV", tag)
0043 if gev_match:
0044 mom_gev = float(gev_match.group(1))
0045 else:
0046 mev_match = re.search(r"([0-9]+(?:\.[0-9]+)?)MeV", tag)
0047 if mev_match:
0048 mom_gev = float(mev_match.group(1)) / 1000.0
0049
0050 return {
0051 "tag": tag,
0052 "setting": setting,
0053 "eta_lo": eta_lo,
0054 "eta_hi": eta_hi,
0055 "mom_gev": mom_gev,
0056 }
0057
0058
0059 def load_tree(fname, s3_dir="", entry_stop=None):
0060
0061 ana.COL_TABLE = ana.get_col_table(fname, s3_dir=s3_dir)
0062 tree = ana.read_ur(fname, "events", s3_dir=s3_dir, entry_stop=entry_stop)
0063 return tree
0064
0065
0066 def build_track_mc_df(
0067 tree,
0068 track_params="CentralCKFTrackParameters",
0069 assoc_name="CentralCKFTrackAssociations",
0070 track_collection="CentralCKFTracks",
0071 ):
0072
0073 cov_branch = f"{track_params}/{track_params}.covariance.covariance[21]"
0074
0075 params = ana.get_branch_df(tree, track_params)
0076 cov_df = ana.get_branch_df(tree, cov_branch)
0077 cov_wide = cov_df.pivot_table(index=["entry", "subentry"], columns="subsubentry", values="values")
0078 cov_wide.columns = [f"cov_{c}" for c in cov_wide.columns]
0079 cov_wide = cov_wide.reset_index()
0080
0081 params = params.merge(cov_wide, on=["entry", "subentry"], how="left")
0082 params = params.rename(
0083 columns={
0084 "subentry": "track_index",
0085 "theta": "theta_rec",
0086 "phi": "phi_rec",
0087 "qOverP": "qOverP_rec",
0088 }
0089 )
0090
0091 rec_assoc = ana.get_branch_df(tree, f"_{assoc_name}_rec")
0092 sim_assoc = ana.get_branch_df(tree, f"_{assoc_name}_sim")
0093 assoc = rec_assoc[["entry", "subentry", "index", "collectionID"]].rename(
0094 columns={"subentry": "assoc_index", "index": "track_index", "collectionID": "track_colID"}
0095 )
0096 assoc["mc_index"] = sim_assoc["index"]
0097 assoc["mc_colID"] = sim_assoc["collectionID"]
0098
0099 track_col_id = _col_id(track_collection)
0100 mc_col_id = _col_id("MCParticles")
0101 assoc = assoc[(assoc["track_colID"] == track_col_id) & (assoc["mc_colID"] == mc_col_id)]
0102
0103 df = assoc.merge(params, on=["entry", "track_index"], how="inner")
0104
0105 mc = ana.get_branch_df(tree, "MCParticles")
0106 mc = mc.rename(columns={"subentry": "mc_index"})
0107 mc["p_mc"] = np.sqrt(mc["momentum.x"] ** 2 + mc["momentum.y"] ** 2 + mc["momentum.z"] ** 2)
0108 mc["theta_mc"] = np.arccos(mc["momentum.z"] / mc["p_mc"])
0109 mc["phi_mc"] = np.arctan2(mc["momentum.y"], mc["momentum.x"])
0110 mc["eta_mc"] = ana.theta2eta(mc["theta_mc"])
0111 mc["qOverP_true"] = mc["charge"] / mc["p_mc"]
0112
0113 df = df.merge(
0114 mc[["entry", "mc_index", "theta_mc", "phi_mc", "eta_mc", "qOverP_true", "p_mc", "PDG", "generatorStatus"]],
0115 on=["entry", "mc_index"],
0116 how="inner",
0117 )
0118
0119 return df, mc