File indexing completed on 2026-04-06 07:47:13
0001 import gzip
0002 import os
0003 import re
0004
0005 import awkward as ak
0006 import click
0007 import numpy as np
0008 import uproot
0009 from bokeh.events import DocumentReady
0010 from bokeh.io import curdoc
0011 from bokeh.layouts import gridplot
0012 from bokeh.models import ColumnDataSource
0013 from bokeh.models import CustomJSExpr
0014 from bokeh.models import Range1d
0015 from bokeh.models import PrintfTickFormatter
0016 from bokeh.plotting import figure, output_file, save
0017 from hist import Hist
0018 from scipy.stats import kstest
0019
0020 from ..util import skip_common_prefix
0021
0022 _MIDPOINT_EXPR_CODE = """
0023 const y1 = this.data.y1;
0024 const y2 = this.data.y2;
0025 return y1.map((v, i) => (v + y2[i]) / 2);
0026 """
0027
0028
0029 def _is_leaf(obj):
0030 """Check if an uproot branch/field object is a leaf (has no sub-branches/sub-fields).
0031
0032 Supports both TTree TBranch objects (which use `.branches`) and
0033 RNTuple RField objects (which use `.fields`).
0034 """
0035 if hasattr(obj, 'branches'):
0036 return len(obj.branches) == 0
0037 if hasattr(obj, 'fields'):
0038 return len(obj.fields) == 0
0039 return True
0040
0041
0042 def _normalize_key(key):
0043 """Normalize uproot key format between TTree and RNTuple styles.
0044
0045 TTree EDM4hep keys follow 'CollectionName/CollectionName.fieldPath' pattern.
0046 RNTuple EDM4hep keys follow 'CollectionName.fieldPath' pattern.
0047 This function converts TTree-style keys to the RNTuple-style format so that
0048 the same physics quantity has the same key regardless of the input file format.
0049
0050 TTree keys for fixed-size array branches include a trailing '[N]' size
0051 annotation (e.g. 'covariance.covariance[21]') which is absent in RNTuple
0052 keys; this suffix is stripped so the two formats match.
0053
0054 >>> _normalize_key('MCParticles/MCParticles.momentum.x')
0055 'MCParticles.momentum.x'
0056 >>> _normalize_key('MCParticles.momentum.x')
0057 'MCParticles.momentum.x'
0058 >>> _normalize_key('EventHeader/EventHeader.eventNumber')
0059 'EventHeader.eventNumber'
0060 >>> _normalize_key('CentralCKFTrackParameters/CentralCKFTrackParameters.covariance.covariance[21]')
0061 'CentralCKFTrackParameters.covariance.covariance'
0062 """
0063 if "/" in key:
0064 _, field_part = key.split("/", 1)
0065 else:
0066 field_part = key
0067 return re.sub(r'\[\d+\]$', '', field_part)
0068
0069
0070 def match_filter(key, match, unmatch):
0071 accept = True
0072 if match:
0073 accept = False
0074 for regex in match:
0075 if regex.match(key):
0076 accept = True
0077 for regex in unmatch:
0078 if regex.match(key):
0079 accept = False
0080 return accept
0081
0082
0083 @click.command()
0084 @click.argument("files", type=click.File('rb'), nargs=-1)
0085 @click.option(
0086 "-m", "--match", multiple=True,
0087 help="Only include collections with names matching a regex"
0088 )
0089 @click.option(
0090 "-M", "--unmatch", multiple=True,
0091 help="Exclude collections with names matching a regex"
0092 )
0093 @click.option(
0094 "--serve", is_flag=True,
0095 default=False,
0096 help="Run a local HTTP server to view the report"
0097 )
0098 def bara(files, match, unmatch, serve):
0099 arr = {}
0100
0101 match = list(map(re.compile, match))
0102 unmatch = list(map(re.compile, unmatch))
0103
0104 for _file in files:
0105 tree = uproot.open(_file)["events"]
0106
0107 sort_by_evtnum = None
0108 for evtnum_key in ["EventHeader/EventHeader.eventNumber", "EventHeader.eventNumber"]:
0109 if evtnum_key in tree.keys(recursive=True):
0110 evtnum = tree[evtnum_key].array()
0111 sort_by_evtnum = ak.argsort(ak.flatten(evtnum))
0112 break
0113
0114 for key in tree.keys(recursive=True):
0115 if not key.startswith("PARAMETERS") and _is_leaf(tree[key]):
0116 normalized = _normalize_key(key)
0117 if match_filter(normalized, match, unmatch):
0118 val = tree[key].array()
0119 if sort_by_evtnum is not None:
0120 val = val[sort_by_evtnum]
0121 arr.setdefault(normalized, {})[_file] = val
0122
0123 paths = skip_common_prefix([_file.name.split("/") for _file in files])
0124 paths = skip_common_prefix([reversed(list(path)) for path in paths])
0125 labels = ["/".join(reversed(list(reversed_path))) for reversed_path in paths]
0126
0127 collection_figs = {}
0128 collection_with_diffs = {}
0129 collection_step_exprs = {}
0130
0131 for key in sorted(arr.keys()):
0132 if any("string" in str(ak.type(a)) for a in arr[key].values()):
0133 click.echo(f"String value detected for key \"{key}\". Skipping...")
0134 continue
0135 x_min = min(filter(
0136 lambda v: v is not None,
0137 map(lambda a: ak.min(ak.mask(a, np.isfinite(a))), arr[key].values())
0138 ), default=None)
0139 if x_min is None:
0140 continue
0141 x_range = max(filter(
0142 lambda v: v is not None,
0143 map(lambda a: ak.max(ak.mask(a - x_min, np.isfinite(a))), arr[key].values())
0144 ), default=None)
0145 nbins = 10
0146
0147 if (any("* uint" in str(ak.type(a)) for a in arr[key].values())
0148 or any("* int" in str(ak.type(a)) for a in arr[key].values())):
0149 x_range = x_range + 1
0150 nbins = int(min(100, np.ceil(x_range)))
0151 else:
0152 x_range = x_range * 1.1
0153
0154 if x_range == 0:
0155 x_range = 1
0156
0157 if "." in key:
0158 branch_name = key.split(".", 1)[0]
0159 leaf_name = key
0160 else:
0161 branch_name = key
0162 leaf_name = key
0163
0164 midpoint_expr = collection_step_exprs.setdefault(
0165 branch_name,
0166 CustomJSExpr(code=_MIDPOINT_EXPR_CODE),
0167 )
0168 fig = figure(x_axis_label=leaf_name, y_axis_label="Entries")
0169 if x_range < 1.:
0170 fig.xaxis.formatter = PrintfTickFormatter(format="%.2g")
0171 collection_figs.setdefault(branch_name, []).append(fig)
0172 y_max = 0
0173
0174 prev_file_arr = None
0175 vis_params = [
0176 ("green", 1.5, "solid", " "),
0177 ("red", 3, "dashed", ","),
0178 ("blue", 2, "dotted", "."),
0179 ]
0180
0181 if set(arr[key].keys()) != set(files):
0182
0183 collection_with_diffs[branch_name] = 0.0
0184
0185 for _file, label, (color, line_width, line_dash, hatch_pattern) in zip(files, labels, vis_params):
0186 if _file not in arr[key]:
0187 continue
0188 file_arr = arr[key][_file]
0189
0190
0191 pvalue = None
0192 if prev_file_arr is not None:
0193 if ((ak.num(file_arr, axis=0) != ak.num(prev_file_arr, axis=0))
0194 or ak.any(ak.num(file_arr, axis=1)
0195 != ak.num(prev_file_arr, axis=1))
0196 or ak.any(ak.nan_to_none(file_arr)
0197 != ak.nan_to_none(prev_file_arr))):
0198 if (ak.num(ak.flatten(file_arr, axis=None), axis=0) > 0 and
0199 ak.num(ak.flatten(prev_file_arr, axis=None), axis=0) > 0):
0200
0201 pvalue = kstest(
0202 ak.to_numpy(ak.flatten(file_arr, axis=None)),
0203 ak.to_numpy(ak.flatten(prev_file_arr, axis=None))
0204 ).pvalue
0205 else:
0206 pvalue = 0
0207 print(key)
0208 print(prev_file_arr, file_arr, f"p = {pvalue:.3f}")
0209 collection_with_diffs[branch_name] = min(pvalue, collection_with_diffs.get(branch_name, 1.))
0210
0211
0212 h = (
0213 Hist.new
0214 .Reg(nbins, 0, x_range, name="x", label=key)
0215 .Int64()
0216 )
0217 h.fill(x=ak.flatten(file_arr - x_min, axis=None))
0218
0219 ys, edges = h.to_numpy()
0220 y0 = np.concatenate([ys, [ys[-1]]])
0221 legend_label=label + (f"\n{100*pvalue:.0f}%CL KS" if pvalue is not None else "")
0222 source = ColumnDataSource(
0223 {
0224 "x": edges + x_min,
0225 "y1": y0 - np.sqrt(y0),
0226 "y2": y0 + np.sqrt(y0),
0227 }
0228 )
0229 step_r = fig.step(
0230 x="x",
0231 y={"expr": midpoint_expr},
0232 mode="after",
0233 source=source,
0234 legend_label=legend_label,
0235 line_color=color,
0236 line_width=line_width,
0237 line_dash=line_dash,
0238 )
0239 step_r.nonselection_glyph = step_r.glyph
0240 varea_r = fig.varea_step(
0241 x="x",
0242 y1="y1",
0243 y2="y2",
0244 step_mode="after",
0245 source=source,
0246 legend_label=legend_label,
0247 fill_color=color if hatch_pattern == " " else None,
0248 fill_alpha=0.25,
0249 hatch_color=color,
0250 hatch_alpha=0.5,
0251 hatch_pattern=hatch_pattern,
0252 )
0253 varea_r.nonselection_glyph = varea_r.glyph
0254 fig.legend.background_fill_alpha = 0.5
0255
0256 y_max = max(y_max, np.max(y0 + np.sqrt(y0)))
0257 prev_file_arr = file_arr
0258
0259 x_bounds = (x_min - 0.05 * x_range, x_min + 1.05 * x_range)
0260 y_bounds = (- 0.05 * y_max, 1.05 * y_max)
0261
0262 if np.all(np.isfinite(x_bounds)):
0263 try:
0264 fig.x_range = Range1d(
0265 *x_bounds,
0266 bounds=x_bounds)
0267 except ValueError as e:
0268 click.secho(str(e), fg="red", err=True)
0269 else:
0270 click.secho(f"overflow while calculating x bounds for \"{key}\"", fg="red", err=True)
0271 if np.all(np.isfinite(y_bounds)):
0272 try:
0273 fig.y_range = Range1d(
0274 *y_bounds,
0275 bounds=y_bounds)
0276 except ValueError as e:
0277 click.secho(str(e), fg="red", err=True)
0278 else:
0279 click.secho(f"overflow while calculating y bounds for \"{key}\"", fg="red", err=True)
0280
0281 def to_filename(branch_name):
0282 return branch_name.replace("#", "__pound__")
0283
0284 def option_key(item):
0285 collection_name, figs = item
0286 key = ""
0287 if collection_name in collection_with_diffs:
0288 if collection_with_diffs[collection_name] > 0.99:
0289 key += " 0.99"
0290 elif collection_with_diffs[collection_name] > 0.95:
0291 key += " 0.95"
0292 elif collection_with_diffs[collection_name] > 0.67:
0293 key += " 0.67"
0294 else:
0295 key += " 0.00"
0296 key += collection_name.lstrip("_")
0297 return key
0298
0299 options = [("", "")]
0300 for collection_name, figs in sorted(collection_figs.items(), key=option_key):
0301 marker = ""
0302 if collection_name in collection_with_diffs:
0303 if collection_with_diffs[collection_name] > 0.99:
0304 marker = " (*)"
0305 elif collection_with_diffs[collection_name] > 0.95:
0306 marker = " (**)"
0307 elif collection_with_diffs[collection_name] > 0.67:
0308 marker = " (***)"
0309 else:
0310 marker = " (****)"
0311 options.append((to_filename(collection_name), collection_name + marker))
0312
0313 from bokeh.models import CustomJS, Select
0314 def mk_dropdown(value=""):
0315 dropdown = Select(title="Select branch (**** < 67% CL, ..., * > 99% CL stat. equiv.):", value=value, options=options)
0316 dropdown.js_on_change("value", CustomJS(code="""
0317 console.log('dropdown: ' + this.value, this.toString())
0318 if (this.value != "") {
0319 window.location.hash = "#" + this.value;
0320 fetchAndReplaceBokehDocument(this.value);
0321 }
0322 """))
0323 return dropdown
0324
0325 def mk_dropdown_minimal(value=""):
0326
0327
0328
0329 label = next((lbl for val, lbl in options if val == value), value)
0330 minimal_options = [("", "")] + ([(value, label)] if value else [])
0331 dropdown = Select(title="Select branch (**** < 67% CL, ..., * > 99% CL stat. equiv.):", value=value, options=minimal_options)
0332 dropdown.js_on_change("value", CustomJS(code="""
0333 console.log('dropdown: ' + this.value, this.toString())
0334 if (this.value != "") {
0335 window.location.hash = "#" + this.value;
0336 fetchAndReplaceBokehDocument(this.value);
0337 }
0338 """))
0339 return dropdown
0340
0341 from bokeh.layouts import column
0342 from bokeh.embed import json_item
0343 import json
0344
0345 os.makedirs("capybara-reports", exist_ok=True)
0346
0347 for collection_name, figs in collection_figs.items():
0348 item = column(
0349 mk_dropdown_minimal(collection_name),
0350 gridplot(figs, ncols=3, width=400, height=300),
0351 )
0352
0353 with gzip.open(f"capybara-reports/{to_filename(collection_name)}.json.gz", "wt") as fp:
0354 json.dump(json_item(item), fp, separators=(',', ':'))
0355
0356 curdoc().js_on_event(DocumentReady, CustomJS(args={"all_options": options}, code="""
0357 window._bokehSelectOptions = all_options;
0358
0359 function fetchAndReplaceBokehDocument(location) {
0360 fetch(location + '.json.gz')
0361 .then(async function(response) {
0362 if (!response.ok) {
0363 throw new Error('Network response was not ok');
0364 }
0365
0366 const ds = new DecompressionStream('gzip');
0367 const decompressedStream = response.body.pipeThrough(ds);
0368 const decompressedResponse = new Response(decompressedStream);
0369 const item = await decompressedResponse.json();
0370
0371 Bokeh.documents[0].replace_with_json(item.doc);
0372
0373 // Restore the full options list to the newly loaded Select widget.
0374 for (const [, model] of Bokeh.documents[0]._all_models) {
0375 if (model.options instanceof Array) {
0376 model.options = window._bokehSelectOptions;
0377 model.value = location;
0378 break;
0379 }
0380 }
0381 })
0382 .catch(function(error) {
0383 console.error('Fetch or decompression failed:', error);
0384 });
0385 }
0386
0387 window.onhashchange = function() {
0388 var location = window.location.hash.replace(/^#/, "");
0389 if ((location != "") && ((typeof current_location === 'undefined') || (current_location != location))) {
0390 fetchAndReplaceBokehDocument(location);
0391 window.current_location = location;
0392 }
0393 }
0394 window.onhashchange();
0395 """))
0396 output_file(filename="capybara-reports/index.html", title="ePIC capybara report")
0397 save(mk_dropdown())
0398
0399 if serve:
0400 os.chdir("capybara-reports/")
0401 from http.server import SimpleHTTPRequestHandler
0402 from socketserver import TCPServer
0403 with TCPServer(("127.0.0.1", 24535), SimpleHTTPRequestHandler) as httpd:
0404 print("Serving report at http://127.0.0.1:24535")
0405 try:
0406 httpd.serve_forever()
0407 except KeyboardInterrupt:
0408 pass