Back to home page

EIC code displayed by LXR

 
 

    


Warning, /detector_benchmarks/benchmarks/backwards_ecal/backwards_ecal.org is written in an unsupported language. File is not indexed.

0001 #+PROPERTY: header-args:jupyter-python :session /jpy:localhost#8888:backwards_ecal :async yes :results drawer :exports both
0002 
0003 #+TITLE: ePIC EEEMCal benchmark
0004 #+AUTHOR: Dmitry Kalinkin
0005 #+OPTIONS: d:t
0006 
0007 #+LATEX_CLASS_OPTIONS: [9pt,letter]
0008 #+BIND: org-latex-image-default-width ""
0009 #+BIND: org-latex-image-default-option "scale=0.3"
0010 #+BIND: org-latex-images-centered nil
0011 #+BIND: org-latex-minted-options (("breaklines") ("bgcolor" "black!5") ("frame" "single"))
0012 #+LATEX_HEADER: \usepackage[margin=1in]{geometry}
0013 #+LATEX_HEADER: \setlength{\parindent}{0pt}
0014 #+LATEX: \sloppy
0015 
0016 #+begin_src jupyter-python :results silent
0017 import os
0018 from pathlib import Path
0019 
0020 import awkward as ak
0021 import boost_histogram as bh
0022 import dask_histogram as dh
0023 import numpy as np
0024 import uproot
0025 import vector
0026 #+end_src
0027 
0028 #+begin_src jupyter-python :results silent
0029 from dask.distributed import Client
0030 
0031 client = Client(os.environ.get("DASK_SCHEDULER", "localhost:8786"))
0032 #+end_src
0033 
0034 #+begin_src jupyter-python
0035 vector.register_awkward()
0036 
0037 from dask.distributed import WorkerPlugin
0038 class VectorImporter(WorkerPlugin):
0039     idempotent=True
0040 
0041     def setup(self, worker):
0042         import vector
0043 
0044         vector.register_awkward()
0045 
0046 client.register_plugin(VectorImporter())
0047 #+end_src
0048 
0049 * Plotting setup
0050                 
0051 #+begin_src jupyter-python :results silent
0052 import matplotlib as mpl
0053 import matplotlib.pyplot as plt
0054        
0055 def setup_presentation_style():
0056     mpl.rcParams.update(mpl.rcParamsDefault)
0057     plt.style.use('ggplot')
0058     plt.rcParams.update({
0059         'axes.labelsize': 8,
0060         'axes.titlesize': 9,
0061         'figure.titlesize': 9,
0062         'figure.figsize': (4, 3),
0063         'legend.fontsize': 7,
0064         'xtick.labelsize': 8,
0065         'ytick.labelsize': 8,
0066         'xaxis.labellocation': 'right',
0067         'yaxis.labellocation': 'top',
0068         'pgf.rcfonts': False,
0069     })
0070 
0071 setup_presentation_style()
0072 #+end_src       
0073 
0074 * Parameters
0075 
0076 #+begin_src jupyter-python :results silent
0077 DETECTOR_CONFIG=os.environ.get("DETECTOR_CONFIG")
0078 PLOT_TITLE=os.environ.get("PLOT_TITLE")
0079 INPUT_PATH_FORMAT=os.environ.get("INPUT_PATH_FORMAT", "EPIC/RECO/24.04.0/epic_craterlake/SINGLE/{particle}/{energy}/130to177deg/{particle}_{energy}_130to177deg.{ix:04d}.eicrecon.edm4eic.root")
0080 
0081 output_dir=Path(os.environ.get("OUTPUT_DIR", "./"))
0082 output_dir.mkdir(parents=True, exist_ok=True)
0083 #+end_src
0084 
0085 * Analysis
0086 
0087 First, we define a requirement on what phase we will consider for our
0088 analysis. The following function filters single-particle events that
0089 are thrown within $-3.5 < \eta < -2.0$:
0090 
0091 #+begin_src jupyter-python
0092 def filter_pointing(events):
0093     part_momentum = ak.zip({
0094         "m": events["MCParticles.mass"],
0095         "px": events["MCParticles.momentum.x"],
0096         "py": events["MCParticles.momentum.y"],
0097         "pz": events["MCParticles.momentum.z"],
0098     }, with_name="Momentum4D")
0099     cond = (part_momentum.eta[:,0] > -3.5) & (part_momentum.eta[:,0] < -2.)
0100     return events[cond]
0101 #+end_src
0102 
0103 #+begin_src jupyter-python
0104 energies = [
0105     "100MeV",
0106     "200MeV",
0107     "500MeV",
0108     "1GeV",
0109     "2GeV",
0110     "5GeV",
0111     "10GeV",
0112     "20GeV",
0113 ]
0114 filter_name = [
0115     "MCParticles.*",
0116     "EcalEndcapNClusters.energy",
0117 ]
0118 
0119 pi_eval = {}
0120 e_eval = {}
0121 
0122 def readlist(path):
0123     with open(path, "rt") as fp:
0124         paths = [line.rstrip() for line in fp.readlines()]
0125     return paths
0126 
0127 for energy in energies:
0128     pi_eval[energy] = uproot.dask(
0129         {path: "events" for path in readlist(INPUT_PATH_FORMAT.format(particle="pi-", energy=energy))},
0130         filter_name=filter_name,
0131         steps_per_file=1,
0132         open_files=False,
0133     ).map_partitions(filter_pointing)
0134     e_eval[energy] = uproot.dask(
0135         {path: "events" for path in readlist(INPUT_PATH_FORMAT.format(particle="e-", energy=energy))},
0136         filter_name=filter_name,
0137         steps_per_file=1,
0138         open_files=False,
0139     ).map_partitions(filter_pointing)
0140 #+end_src
0141 
0142 ** Energy resolution
0143 
0144 The first step in the analysis is plotting distributions of cluster energies.
0145 Only electron distribution is needed, but we also do pi- for the pion rejection
0146 study.
0147 
0148 #+begin_src jupyter-python
0149 e_over_p_hist = {}
0150 e_over_p_axis = bh.axis.Regular(110, 0., 1.10)
0151 
0152 for ix, energy in enumerate(energies):
0153     for particle_name, dataset in [("e-", e_eval[energy]), ("pi-", pi_eval[energy])]:
0154         energy_value = float(energy.replace("GeV", "").replace("MeV", "e-3"))
0155         def clf(events):
0156             return ak.drop_none(ak.max(events["EcalEndcapNClusters.energy"], axis=-1)) / energy_value
0157         e_over_p_hist[(particle_name, energy)] = dh.factory(
0158             clf(dataset),
0159             axes=(e_over_p_axis,),
0160         )
0161 
0162 e_over_p_hist = client.gather(client.compute(e_over_p_hist))
0163 #+end_src
0164 
0165 The next step is to plot the histograms together with fits and produce a plot
0166 for resolution vs energy that summarizes them.
0167 
0168 #+begin_src jupyter-python
0169 fig, axs = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6))
0170 
0171 axs = np.ravel(np.array(axs))
0172 
0173 sigmas_rel_FWHM_cb = {}
0174 fractions_below = {}
0175 
0176 def norm_by_sum(arr):
0177     return arr / np.sum(arr)
0178 
0179 for ix, energy in enumerate(energies):
0180     plt.sca(axs[ix])
0181     hist = e_over_p_hist[("e-", energy)]
0182     patch = plt.stairs(norm_by_sum(hist.values()), hist.axes[0].edges, label=rf"$e^-$ {PLOT_TITLE}")
0183     plt.title(f"{energy}")
0184 
0185     import scipy.stats
0186     def f(x, n, beta, m, loc, scale):
0187         return n * scipy.stats.crystalball.pdf(x, beta, m, loc, scale)
0188     p0 = (np.sum(hist.values()[10:]), 2., 3., 0.95, 0.05)
0189 
0190     try:
0191         import scipy.optimize
0192         par, pcov = scipy.optimize.curve_fit(f, hist.axes[0].centers[5:], hist.values()[5:], p0=p0, maxfev=10000)
0193     except RuntimeError:
0194         print(hist)
0195         raise
0196     plt.plot(hist.axes[0].centers, f(hist.axes[0].centers, *par), label=rf"Crystal Ball fit", color="tab:green", lw=0.8)
0197 
0198     def summarize_fit(par):
0199         _, _, _, loc_cb, scale_cb = par
0200         # Calculate FWHM
0201         y_max = np.max(f(np.linspace(0., 1., 100), *par))
0202         f_prime = lambda x: f(x, *par) - y_max / 2
0203         x_plus, = scipy.optimize.root(f_prime, loc_cb + scale_cb).x
0204         x_minus, = scipy.optimize.root(f_prime, loc_cb - scale_cb).x
0205         plt.axvline(x_minus, ls="--", lw=0.75, color=patch.get_facecolor(), label=r"$\mu - $FWHM")
0206         plt.axvline(x_plus, ls=":", lw=0.75, color=patch.get_facecolor(), label=r"$\mu + $FWHM")
0207         fwhm = (x_plus - x_minus) / loc_cb
0208         sigma_rel_FWHM_cb = fwhm / 2 / np.sqrt(2 * np.log(2))
0209 
0210         cutoff_x = loc_cb - fwhm
0211         fraction_below = np.sum(hist.values()[hist.axes[0].centers < cutoff_x]) / np.sum(hist.values())
0212 
0213         return sigma_rel_FWHM_cb, fraction_below
0214 
0215     sigma_rel_FWHM_cb, fraction_below = summarize_fit(par)
0216     sigmas_rel_FWHM_cb.setdefault(PLOT_TITLE, {})[energy] = sigma_rel_FWHM_cb
0217     fractions_below.setdefault(PLOT_TITLE, {})[energy] = fraction_below
0218 
0219     plt.legend()
0220     plt.xlabel("$E/p$", loc="right")
0221     plt.ylabel("Event yield", loc="top")
0222 
0223 fig.savefig(output_dir / f"resolution_plots.pdf", bbox_inches="tight")
0224 fig.savefig(output_dir / f"resolution_plots.png", bbox_inches="tight")
0225 plt.show()
0226 plt.close(fig)
0227 
0228 plt.figure()
0229 energy_values = np.array([float(energy.replace("GeV", "").replace("MeV", "e-3")) for energy in energies])
0230 
0231 for clf_label, sigma_rel_FWHM_cb in sigmas_rel_FWHM_cb.items():
0232     sigma_over_e = np.array([sigma_rel_FWHM_cb[energy] for energy in energies]) * 100 # convert to %
0233 
0234     def f(energy, stochastic, constant):
0235         return np.sqrt((stochastic / np.sqrt(energy)) ** 2 + constant ** 2)
0236     cond = energy_values >= 0.5
0237     try:
0238         import scipy.optimize
0239         par, pcov = scipy.optimize.curve_fit(f, energy_values[cond], sigma_over_e[cond], maxfev=10000)
0240     except RuntimeError:
0241         print("energy_values", energy_values[cond])
0242         print("sigma_over_e", sigma_over_e[cond])
0243         raise
0244     stochastic, constant = par
0245 
0246     plt.plot(
0247         energy_values,
0248         sigma_over_e,
0249         marker=".",
0250         ls="none",
0251         label=f"{clf_label}"
0252     )
0253     xmin = np.min(energy_values[cond])
0254     xmax = np.max(energy_values[cond])
0255     xs = np.arange(xmin, xmax, 0.1)
0256     plt.plot(
0257         xs,
0258         f(xs, *par),
0259         ls="--",
0260         lw=0.5,
0261         label=fr"Functional fit: ${np.ceil(stochastic * 10) / 10:.1f}\% / \sqrt{{E}} \oplus {np.ceil(constant * 10) / 10:.1f}\%$",
0262     )
0263 xmin = np.min(energy_values)
0264 xmax = np.max(energy_values) * 1.05
0265 xs = np.arange(xmin, xmax + 0.1, 0.1)
0266 plt.fill_between(
0267     xs,
0268     np.sqrt((2 / np.sqrt(xs)) ** 2 + 1 ** 2),
0269     np.sqrt((2 / np.sqrt(xs)) ** 2 + 3 ** 2),
0270     lw=0., alpha=0.2, color="black", label=r"YR requirement $2\% / \sqrt{E} \oplus (1-3)\%$",
0271 )
0272 plt.xlim(0., xmax)
0273 plt.ylim(top=6.)
0274 plt.legend()
0275 plt.xlabel("Energy, GeV", loc="right")
0276 plt.ylabel(r"$\sigma_{E} / E$ derived from FWHM, %", loc="top")
0277 plt.savefig(output_dir / f"resolution.pdf", bbox_inches="tight")
0278 plt.savefig(output_dir / f"resolution.png", bbox_inches="tight")
0279 plt.show()
0280 #+end_src
0281 
0282 ** Pion rejection
0283 
0284 #+begin_src jupyter-python
0285 fig, axs = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6))
0286 fig_log, axs_log = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6))
0287 fig_roc, axs_roc = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6))
0288 
0289 axs = np.ravel(np.array(axs))
0290 axs_log = np.ravel(np.array(axs_log))
0291 axs_roc = np.ravel(np.array(axs_roc))
0292 
0293 rocs = {}
0294 
0295 for ix, energy in enumerate(energies):
0296     energy_value = float(energy.replace("GeV", "").replace("MeV", "e-3"))
0297     def clf(events):
0298         return ak.drop_none(ak.max(events["EcalEndcapNClusters.energy"], axis=-1)) / energy_value
0299     e_pred = clf(e_eval[energy])
0300     pi_pred = clf(pi_eval[energy])
0301 
0302     for do_log, ax in [(False, axs[ix]), (True, axs_log[ix])]:
0303         plt.sca(ax)
0304         patch = plt.stairs(norm_by_sum(e_over_p_hist[("e-", energy)].values()), e_over_p_hist[("e-", energy)].axes[0].edges, label=rf"$e^-$ {PLOT_TITLE}")
0305         patch = plt.stairs(norm_by_sum(e_over_p_hist[("pi-", energy)].values()), e_over_p_hist[("pi-", energy)].axes[0].edges, label=rf"$\pi^-$ {PLOT_TITLE}")
0306         plt.title(f"{energy}")
0307         plt.legend()
0308         plt.xlabel("Classifier output")
0309         plt.ylabel("Event yield")
0310         if do_log:
0311             plt.yscale("log")
0312 
0313     plt.sca(axs_roc[ix])
0314     fpr = np.cumsum(e_over_p_hist[("pi-", energy)].values()[::-1])
0315     fpr /= fpr[-1]
0316     tpr = np.cumsum(e_over_p_hist[("e-", energy)].values()[::-1])
0317     tpr /= tpr[-1]
0318     cond = fpr != 0 # avoid infinite rejection (region of large uncertainty)
0319     cond &= tpr != 1 # avoid linear interpolation (region of large uncertainty)
0320     def mk_interp(tpr, fpr):
0321         def interp(eff):
0322             return np.interp(eff, tpr, fpr)
0323         return interp
0324     rocs.setdefault(PLOT_TITLE, {})[energy] = mk_interp(tpr, fpr)
0325     plt.plot(tpr[cond] * 100, 1 / fpr[cond], label=f"{PLOT_TITLE}")
0326     plt.yscale("log")
0327     plt.title(f"{energy}")
0328     plt.legend(loc="lower left")
0329     plt.xlabel("Electron efficiency, %")
0330     plt.ylabel("Pion rejection factor")
0331 
0332 fig.savefig(output_dir / f"pred.pdf", bbox_inches="tight")
0333 fig.savefig(output_dir / f"pred.png", bbox_inches="tight")
0334 plt.close(fig)
0335 fig_log.savefig(output_dir / f"pred_log.pdf", bbox_inches="tight")
0336 fig_log.savefig(output_dir / f"pred_log.png", bbox_inches="tight")
0337 fig_log.show()
0338 fig_roc.savefig(output_dir / f"roc.pdf", bbox_inches="tight")
0339 fig_roc.savefig(output_dir / f"roc.png", bbox_inches="tight")
0340 fig_roc.show()
0341 
0342 plt.figure()
0343 for clf_label, roc in rocs.items():
0344     plt.plot(
0345         [float(energy.replace("GeV", "").replace("MeV", "e-3")) for energy in energies],
0346         [1 / roc[energy](0.95) for energy in energies],
0347         marker=".",
0348         label=f"{clf_label}",
0349     )
0350 xmax = np.max(energy_values) * 1.05
0351 plt.xlim(0., xmax)
0352 plt.yscale("log")
0353 plt.legend()
0354 plt.xlabel("Energy, GeV")
0355 plt.ylabel("Pion rejection at 95%")
0356 plt.savefig(output_dir / f"pion_rej.pdf", bbox_inches="tight")
0357 plt.savefig(output_dir / f"pion_rej.png", bbox_inches="tight")
0358 plt.show()
0359 #+end_src