Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-06 07:47:13

0001 import gzip
0002 import os
0003 import re
0004 
0005 import awkward as ak
0006 import click
0007 import numpy as np
0008 import uproot
0009 from bokeh.events import DocumentReady
0010 from bokeh.io import curdoc
0011 from bokeh.layouts import gridplot
0012 from bokeh.models import ColumnDataSource
0013 from bokeh.models import CustomJSExpr
0014 from bokeh.models import Range1d
0015 from bokeh.models import PrintfTickFormatter
0016 from bokeh.plotting import figure, output_file, save
0017 from hist import Hist
0018 from scipy.stats import kstest
0019 
0020 from ..util import skip_common_prefix
0021 
0022 _MIDPOINT_EXPR_CODE = """
0023 const y1 = this.data.y1;
0024 const y2 = this.data.y2;
0025 return y1.map((v, i) => (v + y2[i]) / 2);
0026 """
0027 
0028 
0029 def _is_leaf(obj):
0030     """Check if an uproot branch/field object is a leaf (has no sub-branches/sub-fields).
0031 
0032     Supports both TTree TBranch objects (which use `.branches`) and
0033     RNTuple RField objects (which use `.fields`).
0034     """
0035     if hasattr(obj, 'branches'):
0036         return len(obj.branches) == 0
0037     if hasattr(obj, 'fields'):
0038         return len(obj.fields) == 0
0039     return True
0040 
0041 
0042 def _normalize_key(key):
0043     """Normalize uproot key format between TTree and RNTuple styles.
0044 
0045     TTree EDM4hep keys follow 'CollectionName/CollectionName.fieldPath' pattern.
0046     RNTuple EDM4hep keys follow 'CollectionName.fieldPath' pattern.
0047     This function converts TTree-style keys to the RNTuple-style format so that
0048     the same physics quantity has the same key regardless of the input file format.
0049 
0050     TTree keys for fixed-size array branches include a trailing '[N]' size
0051     annotation (e.g. 'covariance.covariance[21]') which is absent in RNTuple
0052     keys; this suffix is stripped so the two formats match.
0053 
0054     >>> _normalize_key('MCParticles/MCParticles.momentum.x')
0055     'MCParticles.momentum.x'
0056     >>> _normalize_key('MCParticles.momentum.x')
0057     'MCParticles.momentum.x'
0058     >>> _normalize_key('EventHeader/EventHeader.eventNumber')
0059     'EventHeader.eventNumber'
0060     >>> _normalize_key('CentralCKFTrackParameters/CentralCKFTrackParameters.covariance.covariance[21]')
0061     'CentralCKFTrackParameters.covariance.covariance'
0062     """
0063     if "/" in key:
0064         _, field_part = key.split("/", 1)
0065     else:
0066         field_part = key
0067     return re.sub(r'\[\d+\]$', '', field_part)
0068 
0069 
0070 def match_filter(key, match, unmatch):
0071     accept = True
0072     if match:
0073         accept = False
0074         for regex in match:
0075             if regex.match(key):
0076                 accept = True
0077     for regex in unmatch:
0078         if regex.match(key):
0079             accept = False
0080     return accept
0081 
0082 
0083 @click.command()
0084 @click.argument("files", type=click.File('rb'), nargs=-1)
0085 @click.option(
0086     "-m", "--match", multiple=True,
0087     help="Only include collections with names matching a regex"
0088 )
0089 @click.option(
0090     "-M", "--unmatch", multiple=True,
0091     help="Exclude collections with names matching a regex"
0092 )
0093 @click.option(
0094     "--serve", is_flag=True,
0095     default=False,
0096     help="Run a local HTTP server to view the report"
0097 )
0098 def bara(files, match, unmatch, serve):
0099     arr = {}
0100 
0101     match = list(map(re.compile, match))
0102     unmatch = list(map(re.compile, unmatch))
0103 
0104     for _file in files:
0105         tree = uproot.open(_file)["events"]
0106 
0107         sort_by_evtnum = None
0108         for evtnum_key in ["EventHeader/EventHeader.eventNumber", "EventHeader.eventNumber"]:
0109             if evtnum_key in tree.keys(recursive=True):
0110                 evtnum = tree[evtnum_key].array()
0111                 sort_by_evtnum = ak.argsort(ak.flatten(evtnum))
0112                 break
0113 
0114         for key in tree.keys(recursive=True):
0115             if not key.startswith("PARAMETERS") and _is_leaf(tree[key]):
0116                 normalized = _normalize_key(key)
0117                 if match_filter(normalized, match, unmatch):
0118                     val = tree[key].array()
0119                     if sort_by_evtnum is not None:
0120                         val = val[sort_by_evtnum]
0121                     arr.setdefault(normalized, {})[_file] = val
0122 
0123     paths = skip_common_prefix([_file.name.split("/") for _file in files])
0124     paths = skip_common_prefix([reversed(list(path)) for path in paths])
0125     labels = ["/".join(reversed(list(reversed_path))) for reversed_path in paths]
0126 
0127     collection_figs = {}
0128     collection_with_diffs = {}
0129     collection_step_exprs = {}
0130 
0131     for key in sorted(arr.keys()):
0132         if any("string" in str(ak.type(a)) for a in arr[key].values()):
0133             click.echo(f"String value detected for key \"{key}\". Skipping...")
0134             continue
0135         x_min = min(filter(
0136             lambda v: v is not None,
0137             map(lambda a: ak.min(ak.mask(a, np.isfinite(a))), arr[key].values())
0138         ), default=None)
0139         if x_min is None:
0140             continue
0141         x_range = max(filter(
0142             lambda v: v is not None,
0143             map(lambda a: ak.max(ak.mask(a - x_min, np.isfinite(a))), arr[key].values())
0144         ), default=None)
0145         nbins = 10
0146 
0147         if (any("* uint" in str(ak.type(a)) for a in arr[key].values())
0148            or any("* int" in str(ak.type(a)) for a in arr[key].values())):
0149             x_range = x_range + 1
0150             nbins = int(min(100, np.ceil(x_range)))
0151         else:
0152             x_range = x_range * 1.1
0153 
0154         if x_range == 0:
0155             x_range = 1
0156 
0157         if "." in key:
0158             branch_name = key.split(".", 1)[0]
0159             leaf_name = key
0160         else:
0161             branch_name = key
0162             leaf_name = key
0163 
0164         midpoint_expr = collection_step_exprs.setdefault(
0165             branch_name,
0166             CustomJSExpr(code=_MIDPOINT_EXPR_CODE),
0167         )
0168         fig = figure(x_axis_label=leaf_name, y_axis_label="Entries")
0169         if x_range < 1.:
0170             fig.xaxis.formatter = PrintfTickFormatter(format="%.2g")
0171         collection_figs.setdefault(branch_name, []).append(fig)
0172         y_max = 0
0173 
0174         prev_file_arr = None
0175         vis_params = [
0176           ("green", 1.5, "solid", " "),
0177           ("red", 3, "dashed", ","),
0178           ("blue", 2, "dotted", "."),
0179         ]
0180 
0181         if set(arr[key].keys()) != set(files):
0182             # not every file has the key
0183             collection_with_diffs[branch_name] = 0.0
0184 
0185         for _file, label, (color, line_width, line_dash, hatch_pattern) in zip(files, labels, vis_params):
0186             if _file not in arr[key]:
0187                 continue
0188             file_arr = arr[key][_file]
0189 
0190             # diff and KS test
0191             pvalue = None
0192             if prev_file_arr is not None:
0193                 if ((ak.num(file_arr, axis=0) != ak.num(prev_file_arr, axis=0))
0194                    or ak.any(ak.num(file_arr, axis=1)
0195                              != ak.num(prev_file_arr, axis=1))
0196                    or ak.any(ak.nan_to_none(file_arr)
0197                              != ak.nan_to_none(prev_file_arr))):
0198                     if (ak.num(ak.flatten(file_arr, axis=None), axis=0) > 0 and
0199                         ak.num(ak.flatten(prev_file_arr, axis=None), axis=0) > 0):
0200                         # We can only apply the KS test on non-empty arrays
0201                         pvalue = kstest(
0202                                 ak.to_numpy(ak.flatten(file_arr, axis=None)),
0203                                 ak.to_numpy(ak.flatten(prev_file_arr, axis=None))
0204                             ).pvalue
0205                     else:
0206                         pvalue = 0
0207                     print(key)
0208                     print(prev_file_arr, file_arr, f"p = {pvalue:.3f}")
0209                     collection_with_diffs[branch_name] = min(pvalue, collection_with_diffs.get(branch_name, 1.))
0210 
0211             # Figure
0212             h = (
0213                 Hist.new
0214                 .Reg(nbins, 0, x_range, name="x", label=key)
0215                 .Int64()
0216             )
0217             h.fill(x=ak.flatten(file_arr - x_min, axis=None))
0218 
0219             ys, edges = h.to_numpy()
0220             y0 = np.concatenate([ys, [ys[-1]]])
0221             legend_label=label + (f"\n{100*pvalue:.0f}%CL KS" if pvalue is not None else "")
0222             source = ColumnDataSource(
0223                 {
0224                     "x": edges + x_min,
0225                     "y1": y0 - np.sqrt(y0),
0226                     "y2": y0 + np.sqrt(y0),
0227                 }
0228             )
0229             step_r = fig.step(
0230                 x="x",
0231                 y={"expr": midpoint_expr},
0232                 mode="after",
0233                 source=source,
0234                 legend_label=legend_label,
0235                 line_color=color,
0236                 line_width=line_width,
0237                 line_dash=line_dash,
0238             )
0239             step_r.nonselection_glyph = step_r.glyph
0240             varea_r = fig.varea_step(
0241                 x="x",
0242                 y1="y1",
0243                 y2="y2",
0244                 step_mode="after",
0245                 source=source,
0246                 legend_label=legend_label,
0247                 fill_color=color if hatch_pattern == " " else None,
0248                 fill_alpha=0.25,
0249                 hatch_color=color,
0250                 hatch_alpha=0.5,
0251                 hatch_pattern=hatch_pattern,
0252             )
0253             varea_r.nonselection_glyph = varea_r.glyph
0254             fig.legend.background_fill_alpha = 0.5 # make legend more transparent
0255 
0256             y_max = max(y_max, np.max(y0 + np.sqrt(y0)))
0257             prev_file_arr = file_arr
0258 
0259         x_bounds = (x_min - 0.05 * x_range, x_min + 1.05 * x_range)
0260         y_bounds = (- 0.05 * y_max, 1.05 * y_max)
0261         # Set y range for histograms
0262         if np.all(np.isfinite(x_bounds)):
0263             try:
0264                 fig.x_range = Range1d(
0265                     *x_bounds,
0266                     bounds=x_bounds)
0267             except ValueError as e:
0268                 click.secho(str(e), fg="red", err=True)
0269         else:
0270             click.secho(f"overflow while calculating x bounds for \"{key}\"", fg="red", err=True)
0271         if np.all(np.isfinite(y_bounds)):
0272             try:
0273                 fig.y_range = Range1d(
0274                     *y_bounds,
0275                     bounds=y_bounds)
0276             except ValueError as e:
0277                 click.secho(str(e), fg="red", err=True)
0278         else:
0279             click.secho(f"overflow while calculating y bounds for \"{key}\"", fg="red", err=True)
0280 
0281     def to_filename(branch_name):
0282         return branch_name.replace("#", "__pound__")
0283 
0284     def option_key(item):
0285         collection_name, figs = item
0286         key = ""
0287         if collection_name in collection_with_diffs:
0288             if collection_with_diffs[collection_name] > 0.99:
0289                 key += " 0.99"
0290             elif collection_with_diffs[collection_name] > 0.95:
0291                 key += " 0.95"
0292             elif collection_with_diffs[collection_name] > 0.67:
0293                 key += " 0.67"
0294             else:
0295                 key += " 0.00"
0296         key += collection_name.lstrip("_")
0297         return key
0298 
0299     options = [("", "")]
0300     for collection_name, figs in sorted(collection_figs.items(), key=option_key):
0301         marker = ""
0302         if collection_name in collection_with_diffs:
0303             if collection_with_diffs[collection_name] > 0.99:
0304                 marker = " (*)"
0305             elif collection_with_diffs[collection_name] > 0.95:
0306                 marker = " (**)"
0307             elif collection_with_diffs[collection_name] > 0.67:
0308                 marker = " (***)"
0309             else:
0310                 marker = " (****)"
0311         options.append((to_filename(collection_name), collection_name + marker))
0312 
0313     from bokeh.models import CustomJS, Select
0314     def mk_dropdown(value=""):
0315         dropdown = Select(title="Select branch (**** < 67% CL, ..., * > 99% CL stat. equiv.):", value=value, options=options)
0316         dropdown.js_on_change("value", CustomJS(code="""
0317           console.log('dropdown: ' + this.value, this.toString())
0318           if (this.value != "") {
0319             window.location.hash = "#" + this.value;
0320             fetchAndReplaceBokehDocument(this.value);
0321           }
0322         """))
0323         return dropdown
0324 
0325     def mk_dropdown_minimal(value=""):
0326         # Embed only the currently selected option; the full list is stored once
0327         # in index.html's JavaScript and restored client-side after each load.
0328         # This avoids repeating a ~54 KB options list in every .json.gz file.
0329         label = next((lbl for val, lbl in options if val == value), value)
0330         minimal_options = [("", "")] + ([(value, label)] if value else [])
0331         dropdown = Select(title="Select branch (**** < 67% CL, ..., * > 99% CL stat. equiv.):", value=value, options=minimal_options)
0332         dropdown.js_on_change("value", CustomJS(code="""
0333           console.log('dropdown: ' + this.value, this.toString())
0334           if (this.value != "") {
0335             window.location.hash = "#" + this.value;
0336             fetchAndReplaceBokehDocument(this.value);
0337           }
0338         """))
0339         return dropdown
0340 
0341     from bokeh.layouts import column
0342     from bokeh.embed import json_item
0343     import json
0344 
0345     os.makedirs("capybara-reports", exist_ok=True)
0346 
0347     for collection_name, figs in collection_figs.items():
0348         item = column(
0349           mk_dropdown_minimal(collection_name),
0350           gridplot(figs, ncols=3, width=400, height=300),
0351         )
0352 
0353         with gzip.open(f"capybara-reports/{to_filename(collection_name)}.json.gz", "wt") as fp:
0354             json.dump(json_item(item), fp, separators=(',', ':'))
0355 
0356     curdoc().js_on_event(DocumentReady, CustomJS(args={"all_options": options}, code="""
0357       window._bokehSelectOptions = all_options;
0358 
0359       function fetchAndReplaceBokehDocument(location) {
0360         fetch(location + '.json.gz')
0361           .then(async function(response) {
0362             if (!response.ok) {
0363                 throw new Error('Network response was not ok');
0364             }
0365 
0366             const ds = new DecompressionStream('gzip');
0367             const decompressedStream = response.body.pipeThrough(ds);
0368             const decompressedResponse = new Response(decompressedStream);
0369             const item = await decompressedResponse.json();
0370 
0371             Bokeh.documents[0].replace_with_json(item.doc);
0372 
0373             // Restore the full options list to the newly loaded Select widget.
0374             for (const [, model] of Bokeh.documents[0]._all_models) {
0375               if (model.options instanceof Array) {
0376                 model.options = window._bokehSelectOptions;
0377                 model.value = location;
0378                 break;
0379               }
0380             }
0381           })
0382           .catch(function(error) {
0383             console.error('Fetch or decompression failed:', error);
0384           });
0385       }
0386 
0387       window.onhashchange = function() {
0388         var location = window.location.hash.replace(/^#/, "");
0389         if ((location != "") && ((typeof current_location === 'undefined') || (current_location != location))) {
0390           fetchAndReplaceBokehDocument(location);
0391           window.current_location = location;
0392         }
0393       }
0394       window.onhashchange();
0395     """))
0396     output_file(filename="capybara-reports/index.html", title="ePIC capybara report")
0397     save(mk_dropdown())
0398 
0399     if serve:
0400         os.chdir("capybara-reports/")
0401         from http.server import SimpleHTTPRequestHandler
0402         from socketserver import TCPServer
0403         with TCPServer(("127.0.0.1", 24535), SimpleHTTPRequestHandler) as httpd:
0404             print("Serving report at http://127.0.0.1:24535")
0405             try:
0406                 httpd.serve_forever()
0407             except KeyboardInterrupt:
0408                 pass