File indexing completed on 2026-06-05 07:52:55
0001 import gzip
0002 import os
0003 import re
0004
0005 import awkward as ak
0006 import click
0007 import numpy as np
0008 import uproot
0009 from bokeh.events import DocumentReady
0010 from bokeh.io import curdoc
0011 from bokeh.layouts import gridplot
0012 from bokeh.models import ColumnDataSource
0013 from bokeh.models import CustomJSExpr
0014 from bokeh.models import Range1d
0015 from bokeh.models import PrintfTickFormatter
0016 from bokeh.plotting import figure, output_file, save
0017 from hist import Hist
0018 from scipy.stats import kstest
0019
0020 from ..util import skip_common_prefix
0021
0022 _MIDPOINT_EXPR_CODE = """
0023 const y1 = this.data.y1;
0024 const y2 = this.data.y2;
0025 return y1.map((v, i) => (v + y2[i]) / 2);
0026 """
0027
0028
0029 def _is_leaf(obj):
0030 """Check if an uproot branch/field object is a leaf (has no sub-branches/sub-fields).
0031
0032 Supports both TTree TBranch objects (which use `.branches`) and
0033 RNTuple RField objects (which use `.fields`).
0034 """
0035 if hasattr(obj, 'branches'):
0036 return len(obj.branches) == 0
0037 if hasattr(obj, 'fields'):
0038 return len(obj.fields) == 0
0039 return True
0040
0041
0042 def _normalize_key(key):
0043 """Normalize uproot key format between TTree and RNTuple styles.
0044
0045 TTree EDM4hep keys follow 'CollectionName/CollectionName.fieldPath' pattern.
0046 RNTuple EDM4hep keys follow 'CollectionName.fieldPath' pattern.
0047 This function converts TTree-style keys to the RNTuple-style format so that
0048 the same physics quantity has the same key regardless of the input file format.
0049
0050 TTree keys for fixed-size array branches include a trailing '[N]' size
0051 annotation (e.g. 'covariance.covariance[21]') which is absent in RNTuple
0052 keys; this suffix is stripped so the two formats match.
0053
0054 >>> _normalize_key('MCParticles/MCParticles.momentum.x')
0055 'MCParticles.momentum.x'
0056 >>> _normalize_key('MCParticles.momentum.x')
0057 'MCParticles.momentum.x'
0058 >>> _normalize_key('EventHeader/EventHeader.eventNumber')
0059 'EventHeader.eventNumber'
0060 >>> _normalize_key('CentralCKFTrackParameters/CentralCKFTrackParameters.covariance.covariance[21]')
0061 'CentralCKFTrackParameters.covariance.covariance'
0062 """
0063 if "/" in key:
0064 _, field_part = key.split("/", 1)
0065 else:
0066 field_part = key
0067 return re.sub(r'\[\d+\]$', '', field_part)
0068
0069
0070 def match_filter(key, match, unmatch):
0071 accept = True
0072 if match:
0073 accept = False
0074 for regex in match:
0075 if regex.match(key):
0076 accept = True
0077 for regex in unmatch:
0078 if regex.match(key):
0079 accept = False
0080 return accept
0081
0082
0083 @click.command()
0084 @click.argument("files", type=click.File('rb'), nargs=-1)
0085 @click.option(
0086 "-m", "--match", multiple=True,
0087 help="Only include collections with names matching a regex"
0088 )
0089 @click.option(
0090 "-M", "--unmatch", multiple=True,
0091 help="Exclude collections with names matching a regex"
0092 )
0093 @click.option(
0094 "--serve", is_flag=True,
0095 default=False,
0096 help="Run a local HTTP server to view the report"
0097 )
0098 def bara(files, match, unmatch, serve):
0099 arr = {}
0100
0101 match = list(map(re.compile, match))
0102 unmatch = list(map(re.compile, unmatch))
0103
0104 for _file in files:
0105 tree = uproot.open(_file)["events"]
0106
0107 sort_by_evtnum = None
0108 for evtnum_key in ["EventHeader/EventHeader.eventNumber", "EventHeader.eventNumber"]:
0109 if evtnum_key in tree.keys(recursive=True):
0110 evtnum = tree[evtnum_key].array()
0111 sort_by_evtnum = ak.argsort(ak.flatten(evtnum))
0112 break
0113
0114 for key in tree.keys(recursive=True):
0115 if not key.startswith("PARAMETERS") and _is_leaf(tree[key]):
0116 normalized = _normalize_key(key)
0117 if match_filter(normalized, match, unmatch):
0118 val = tree[key].array()
0119 if sort_by_evtnum is not None:
0120 val = val[sort_by_evtnum]
0121 arr.setdefault(normalized, {})[_file] = val
0122
0123 paths = skip_common_prefix([_file.name.split("/") for _file in files])
0124 paths = skip_common_prefix([reversed(list(path)) for path in paths])
0125 labels = ["/".join(reversed(list(reversed_path))) for reversed_path in paths]
0126
0127 collection_figs = {}
0128 collection_with_diffs = {}
0129 collection_step_exprs = {}
0130
0131 for key in sorted(arr.keys()):
0132 if any("string" in str(ak.type(a)) for a in arr[key].values()):
0133 click.echo(f"String value detected for key \"{key}\". Skipping...")
0134 continue
0135 x_min = min(filter(
0136 lambda v: v is not None,
0137 map(lambda a: ak.min(ak.mask(a, np.isfinite(a))), arr[key].values())
0138 ), default=None)
0139 if x_min is None:
0140 continue
0141 x_range = max(filter(
0142 lambda v: v is not None,
0143 map(lambda a: ak.max(ak.mask(a - x_min, np.isfinite(a))), arr[key].values())
0144 ), default=None)
0145 nbins = 10
0146
0147 if (any("* uint" in str(ak.type(a)) for a in arr[key].values())
0148 or any("* int" in str(ak.type(a)) for a in arr[key].values())):
0149 x_range = x_range + 1
0150 nbins = int(min(100, np.ceil(x_range)))
0151 else:
0152 x_range = x_range * 1.1
0153
0154 if x_range == 0:
0155 x_range = 1
0156
0157 if "." in key:
0158 branch_name = key.split(".", 1)[0]
0159 leaf_name = key
0160 else:
0161 branch_name = key
0162 leaf_name = key
0163
0164 midpoint_expr = collection_step_exprs.setdefault(
0165 branch_name,
0166 CustomJSExpr(code=_MIDPOINT_EXPR_CODE),
0167 )
0168 fig = figure(x_axis_label=leaf_name, y_axis_label="Entries")
0169 if x_range < 1.:
0170 fig.xaxis.formatter = PrintfTickFormatter(format="%.2g")
0171 collection_figs.setdefault(branch_name, []).append(fig)
0172 y_max = 0
0173
0174 prev_file_arr = None
0175 vis_params = [
0176 ("green", 1.5, "solid", " "),
0177 ("red", 3, "dashed", ","),
0178 ("blue", 2, "dotted", "."),
0179 ]
0180
0181 if set(arr[key].keys()) != set(files):
0182
0183 collection_with_diffs[branch_name] = 0.0
0184
0185 for _file, label, (color, line_width, line_dash, hatch_pattern) in zip(files, labels, vis_params):
0186 if _file not in arr[key]:
0187 continue
0188 file_arr = arr[key][_file]
0189
0190
0191 pvalue = None
0192 if prev_file_arr is not None:
0193 if ((ak.num(file_arr, axis=0) != ak.num(prev_file_arr, axis=0))
0194 or ak.any(ak.num(file_arr, axis=1)
0195 != ak.num(prev_file_arr, axis=1))
0196 or ak.any(ak.nan_to_none(file_arr)
0197 != ak.nan_to_none(prev_file_arr))):
0198 if (ak.num(ak.flatten(file_arr, axis=None), axis=0) > 0 and
0199 ak.num(ak.flatten(prev_file_arr, axis=None), axis=0) > 0):
0200
0201 pvalue = kstest(
0202 ak.to_numpy(ak.flatten(file_arr, axis=None)),
0203 ak.to_numpy(ak.flatten(prev_file_arr, axis=None))
0204 ).pvalue
0205 else:
0206 pvalue = 0
0207 print(key, f"p = {pvalue:.3f}")
0208 print(prev_file_arr)
0209 print(file_arr)
0210 collection_with_diffs[branch_name] = min(pvalue, collection_with_diffs.get(branch_name, 1.))
0211
0212
0213 h = (
0214 Hist.new
0215 .Reg(nbins, 0, x_range, name="x", label=key)
0216 .Int64()
0217 )
0218 h.fill(x=ak.flatten(file_arr - x_min, axis=None))
0219
0220 ys, edges = h.to_numpy()
0221 y0 = np.concatenate([ys, [ys[-1]]])
0222 legend_label=label + (f"\n{100*pvalue:.0f}%CL KS" if pvalue is not None else "")
0223 source = ColumnDataSource(
0224 {
0225 "x": edges + x_min,
0226 "y1": y0 - np.sqrt(y0),
0227 "y2": y0 + np.sqrt(y0),
0228 }
0229 )
0230 step_r = fig.step(
0231 x="x",
0232 y={"expr": midpoint_expr},
0233 mode="after",
0234 source=source,
0235 legend_label=legend_label,
0236 line_color=color,
0237 line_width=line_width,
0238 line_dash=line_dash,
0239 )
0240 step_r.nonselection_glyph = step_r.glyph
0241 varea_r = fig.varea_step(
0242 x="x",
0243 y1="y1",
0244 y2="y2",
0245 step_mode="after",
0246 source=source,
0247 legend_label=legend_label,
0248 fill_color=color if hatch_pattern == " " else None,
0249 fill_alpha=0.25,
0250 hatch_color=color,
0251 hatch_alpha=0.5,
0252 hatch_pattern=hatch_pattern,
0253 )
0254 varea_r.nonselection_glyph = varea_r.glyph
0255 fig.legend.background_fill_alpha = 0.5
0256
0257 y_max = max(y_max, np.max(y0 + np.sqrt(y0)))
0258 prev_file_arr = file_arr
0259
0260 x_bounds = (x_min - 0.05 * x_range, x_min + 1.05 * x_range)
0261 y_bounds = (- 0.05 * y_max, 1.05 * y_max)
0262
0263 if np.all(np.isfinite(x_bounds)):
0264 try:
0265 fig.x_range = Range1d(
0266 *x_bounds,
0267 bounds=x_bounds)
0268 except ValueError as e:
0269 click.secho(str(e), fg="red", err=True)
0270 else:
0271 click.secho(f"overflow while calculating x bounds for \"{key}\"", fg="red", err=True)
0272 if np.all(np.isfinite(y_bounds)):
0273 try:
0274 fig.y_range = Range1d(
0275 *y_bounds,
0276 bounds=y_bounds)
0277 except ValueError as e:
0278 click.secho(str(e), fg="red", err=True)
0279 else:
0280 click.secho(f"overflow while calculating y bounds for \"{key}\"", fg="red", err=True)
0281
0282 def to_filename(branch_name):
0283 return branch_name.replace("#", "__pound__")
0284
0285 def option_key(item):
0286 collection_name, figs = item
0287 key = ""
0288 if collection_name in collection_with_diffs:
0289 if collection_with_diffs[collection_name] > 0.99:
0290 key += " 0.99"
0291 elif collection_with_diffs[collection_name] > 0.95:
0292 key += " 0.95"
0293 elif collection_with_diffs[collection_name] > 0.67:
0294 key += " 0.67"
0295 else:
0296 key += " 0.00"
0297 key += collection_name.lstrip("_")
0298 return key
0299
0300 options = [("", "")]
0301 for collection_name, figs in sorted(collection_figs.items(), key=option_key):
0302 marker = ""
0303 if collection_name in collection_with_diffs:
0304 if collection_with_diffs[collection_name] > 0.99:
0305 marker = " (*)"
0306 elif collection_with_diffs[collection_name] > 0.95:
0307 marker = " (**)"
0308 elif collection_with_diffs[collection_name] > 0.67:
0309 marker = " (***)"
0310 else:
0311 marker = " (****)"
0312 options.append((to_filename(collection_name), collection_name + marker))
0313
0314 from bokeh.models import CustomJS, Select
0315 def mk_dropdown(value=""):
0316 dropdown = Select(title="Select branch (**** < 67% CL, ..., * > 99% CL stat. equiv.):", value=value, options=options)
0317 dropdown.js_on_change("value", CustomJS(code="""
0318 console.log('dropdown: ' + this.value, this.toString())
0319 if (this.value != "") {
0320 window.location.hash = "#" + this.value;
0321 fetchAndReplaceBokehDocument(this.value);
0322 }
0323 """))
0324 return dropdown
0325
0326 def mk_dropdown_minimal(value=""):
0327
0328
0329
0330 label = next((lbl for val, lbl in options if val == value), value)
0331 minimal_options = [("", "")] + ([(value, label)] if value else [])
0332 dropdown = Select(title="Select branch (**** < 67% CL, ..., * > 99% CL stat. equiv.):", value=value, options=minimal_options)
0333 dropdown.js_on_change("value", CustomJS(code="""
0334 console.log('dropdown: ' + this.value, this.toString())
0335 if (this.value != "") {
0336 window.location.hash = "#" + this.value;
0337 fetchAndReplaceBokehDocument(this.value);
0338 }
0339 """))
0340 return dropdown
0341
0342 from bokeh.layouts import column
0343 from bokeh.embed import json_item
0344 import json
0345
0346 os.makedirs("capybara-reports", exist_ok=True)
0347
0348 for collection_name, figs in collection_figs.items():
0349 item = column(
0350 mk_dropdown_minimal(collection_name),
0351 gridplot(figs, ncols=3, width=400, height=300),
0352 )
0353
0354 with gzip.open(f"capybara-reports/{to_filename(collection_name)}.json.gz", "wt") as fp:
0355 json.dump(json_item(item), fp, separators=(',', ':'))
0356
0357 curdoc().js_on_event(DocumentReady, CustomJS(args={"all_options": options}, code="""
0358 window._bokehSelectOptions = all_options;
0359
0360 function fetchAndReplaceBokehDocument(location) {
0361 fetch(location + '.json.gz')
0362 .then(async function(response) {
0363 if (!response.ok) {
0364 throw new Error('Network response was not ok');
0365 }
0366
0367 const ds = new DecompressionStream('gzip');
0368 const decompressedStream = response.body.pipeThrough(ds);
0369 const decompressedResponse = new Response(decompressedStream);
0370 const item = await decompressedResponse.json();
0371
0372 Bokeh.documents[0].replace_with_json(item.doc);
0373
0374 // Restore the full options list to the newly loaded Select widget.
0375 for (const [, model] of Bokeh.documents[0]._all_models) {
0376 if (model.options instanceof Array) {
0377 model.options = window._bokehSelectOptions;
0378 model.value = location;
0379 break;
0380 }
0381 }
0382 })
0383 .catch(function(error) {
0384 console.error('Fetch or decompression failed:', error);
0385 });
0386 }
0387
0388 window.onhashchange = function() {
0389 var location = window.location.hash.replace(/^#/, "");
0390 if ((location != "") && ((typeof current_location === 'undefined') || (current_location != location))) {
0391 fetchAndReplaceBokehDocument(location);
0392 window.current_location = location;
0393 }
0394 }
0395 window.onhashchange();
0396 """))
0397 output_file(filename="capybara-reports/index.html", title="ePIC capybara report")
0398 save(mk_dropdown())
0399
0400 if serve:
0401 os.chdir("capybara-reports/")
0402 from http.server import SimpleHTTPRequestHandler
0403 from socketserver import TCPServer
0404 with TCPServer(("127.0.0.1", 24535), SimpleHTTPRequestHandler) as httpd:
0405 print("Serving report at http://127.0.0.1:24535")
0406 try:
0407 httpd.serve_forever()
0408 except KeyboardInterrupt:
0409 pass