File indexing completed on 2025-09-15 08:17:54
0001 import gzip
0002 import os
0003 import re
0004
0005 import awkward as ak
0006 import click
0007 import numpy as np
0008 import uproot
0009 from bokeh.events import DocumentReady
0010 from bokeh.io import curdoc
0011 from bokeh.layouts import gridplot
0012 from bokeh.models import Range1d
0013 from bokeh.models import PrintfTickFormatter
0014 from bokeh.plotting import figure, output_file, save
0015 from hist import Hist
0016 from scipy.stats import kstest
0017
0018 from ..util import skip_common_prefix
0019
0020
0021 def match_filter(key, match, unmatch):
0022 accept = True
0023 if match:
0024 accept = False
0025 for regex in match:
0026 if regex.match(key):
0027 accept = True
0028 for regex in unmatch:
0029 if regex.match(key):
0030 accept = False
0031 return accept
0032
0033
0034 @click.command()
0035 @click.argument("files", type=click.File('rb'), nargs=-1)
0036 @click.option(
0037 "-m", "--match", multiple=True,
0038 help="Only include collections with names matching a regex"
0039 )
0040 @click.option(
0041 "-M", "--unmatch", multiple=True,
0042 help="Exclude collections with names matching a regex"
0043 )
0044 @click.option(
0045 "--serve", is_flag=True,
0046 default=False,
0047 help="Run a local HTTP server to view the report"
0048 )
0049 def bara(files, match, unmatch, serve):
0050 arr = {}
0051
0052 match = list(map(re.compile, match))
0053 unmatch = list(map(re.compile, unmatch))
0054
0055 for _file in files:
0056 tree = uproot.open(_file)["events"]
0057
0058 sort_by_evtnum = None
0059 if "EventHeader/EventHeader.eventNumber" in tree.keys(recursive=True):
0060 evtnum = tree["EventHeader/EventHeader.eventNumber"].array()
0061 sort_by_evtnum = ak.argsort(ak.flatten(evtnum))
0062
0063 keys = [
0064 key for key in tree.keys(recursive=True)
0065 if not key.startswith("PARAMETERS")
0066 and len(tree[key].branches) == 0
0067 and match_filter(key, match, unmatch)
0068 ]
0069
0070 for key in keys:
0071 val = tree[key].array()
0072 if sort_by_evtnum is not None:
0073 val = val[sort_by_evtnum]
0074 arr.setdefault(key, {})[_file] = val
0075
0076 paths = skip_common_prefix([_file.name.split("/") for _file in files])
0077 paths = skip_common_prefix([reversed(list(path)) for path in paths])
0078 labels = ["/".join(reversed(list(reversed_path))) for reversed_path in paths]
0079
0080 collection_figs = {}
0081 collection_with_diffs = {}
0082
0083 for key in sorted(arr.keys()):
0084 if any("string" in str(ak.type(a)) for a in arr[key].values()):
0085 click.echo(f"String value detected for key \"{key}\". Skipping...")
0086 continue
0087 x_min = min(filter(
0088 lambda v: v is not None,
0089 map(lambda a: ak.min(ak.mask(a, np.isfinite(a))), arr[key].values())
0090 ), default=None)
0091 if x_min is None:
0092 continue
0093 x_range = max(filter(
0094 lambda v: v is not None,
0095 map(lambda a: ak.max(ak.mask(a - x_min, np.isfinite(a))), arr[key].values())
0096 ), default=None)
0097 nbins = 10
0098
0099 if (any("* uint" in str(ak.type(a)) for a in arr[key].values())
0100 or any("* int" in str(ak.type(a)) for a in arr[key].values())):
0101 x_range = x_range + 1
0102 nbins = int(min(100, np.ceil(x_range)))
0103 else:
0104 x_range = x_range * 1.1
0105
0106 if x_range == 0:
0107 x_range = 1
0108
0109 if "/" in key:
0110 branch_name, leaf_name = key.split("/", 1)
0111 else:
0112 branch_name = key
0113 leaf_name = key
0114
0115 fig = figure(x_axis_label=leaf_name, y_axis_label="Entries")
0116 if x_range < 1.:
0117 fig.xaxis.formatter = PrintfTickFormatter(format="%.2g")
0118 collection_figs.setdefault(branch_name, []).append(fig)
0119 y_max = 0
0120
0121 prev_file_arr = None
0122 vis_params = [
0123 ("green", 1.5, "solid", " "),
0124 ("red", 3, "dashed", ","),
0125 ("blue", 2, "dotted", "."),
0126 ]
0127
0128 if set(arr[key].keys()) != set(files):
0129
0130 collection_with_diffs[branch_name] = 0.0
0131
0132 for _file, label, (color, line_width, line_dash, hatch_pattern) in zip(files, labels, vis_params):
0133 if _file not in arr[key]:
0134 continue
0135 file_arr = arr[key][_file]
0136
0137
0138 pvalue = None
0139 if prev_file_arr is not None:
0140 if ((ak.num(file_arr, axis=0) != ak.num(prev_file_arr, axis=0))
0141 or ak.any(ak.num(file_arr, axis=1)
0142 != ak.num(prev_file_arr, axis=1))
0143 or ak.any(ak.nan_to_none(file_arr)
0144 != ak.nan_to_none(prev_file_arr))):
0145 if (ak.num(ak.flatten(file_arr, axis=None), axis=0) > 0 and
0146 ak.num(ak.flatten(prev_file_arr, axis=None), axis=0) > 0):
0147
0148 pvalue = kstest(
0149 ak.to_numpy(ak.flatten(file_arr, axis=None)),
0150 ak.to_numpy(ak.flatten(prev_file_arr, axis=None))
0151 ).pvalue
0152 else:
0153 pvalue = 0
0154 print(key)
0155 print(prev_file_arr, file_arr, f"p = {pvalue:.3f}")
0156 collection_with_diffs[branch_name] = min(pvalue, collection_with_diffs.get(branch_name, 1.))
0157
0158
0159 h = (
0160 Hist.new
0161 .Reg(nbins, 0, x_range, name="x", label=key)
0162 .Int64()
0163 )
0164 h.fill(x=ak.flatten(file_arr - x_min, axis=None))
0165
0166 ys, edges = h.to_numpy()
0167 y0 = np.concatenate([ys, [ys[-1]]])
0168 legend_label=label + (f"\n{100*pvalue:.0f}%CL KS" if pvalue is not None else "")
0169 fig.step(
0170 x=edges + x_min,
0171 y=y0,
0172 mode="after",
0173 legend_label=legend_label,
0174 line_color=color,
0175 line_width=line_width,
0176 line_dash=line_dash,
0177 )
0178 fig.varea_step(
0179 x=edges + x_min,
0180 y1=y0 - np.sqrt(y0),
0181 y2=y0 + np.sqrt(y0),
0182 step_mode="after",
0183 legend_label=legend_label,
0184 fill_color=color if hatch_pattern == " " else None,
0185 fill_alpha=0.25,
0186 hatch_color=color,
0187 hatch_alpha=0.5,
0188 hatch_pattern=hatch_pattern,
0189 )
0190 fig.legend.background_fill_alpha = 0.5
0191
0192 y_max = max(y_max, np.max(y0 + np.sqrt(y0)))
0193 prev_file_arr = file_arr
0194
0195 x_bounds = (x_min - 0.05 * x_range, x_min + 1.05 * x_range)
0196 y_bounds = (- 0.05 * y_max, 1.05 * y_max)
0197
0198 if np.all(np.isfinite(x_bounds)):
0199 try:
0200 fig.x_range = Range1d(
0201 *x_bounds,
0202 bounds=x_bounds)
0203 except ValueError as e:
0204 click.secho(str(e), fg="red", err=True)
0205 else:
0206 click.secho(f"overflow while calculating x bounds for \"{key}\"", fg="red", err=True)
0207 if np.all(np.isfinite(y_bounds)):
0208 try:
0209 fig.y_range = Range1d(
0210 *y_bounds,
0211 bounds=y_bounds)
0212 except ValueError as e:
0213 click.secho(str(e), fg="red", err=True)
0214 else:
0215 click.secho(f"overflow while calculating y bounds for \"{key}\"", fg="red", err=True)
0216
0217 def to_filename(branch_name):
0218 return branch_name.replace("#", "__pound__")
0219
0220 def option_key(item):
0221 collection_name, figs = item
0222 key = ""
0223 if collection_name in collection_with_diffs:
0224 if collection_with_diffs[collection_name] > 0.99:
0225 key += " 0.99"
0226 elif collection_with_diffs[collection_name] > 0.95:
0227 key += " 0.95"
0228 elif collection_with_diffs[collection_name] > 0.67:
0229 key += " 0.67"
0230 else:
0231 key += " 0.00"
0232 key += collection_name.lstrip("_")
0233 return key
0234
0235 options = [("", "")]
0236 for collection_name, figs in sorted(collection_figs.items(), key=option_key):
0237 marker = ""
0238 if collection_name in collection_with_diffs:
0239 if collection_with_diffs[collection_name] > 0.99:
0240 marker = " (*)"
0241 elif collection_with_diffs[collection_name] > 0.95:
0242 marker = " (**)"
0243 elif collection_with_diffs[collection_name] > 0.67:
0244 marker = " (***)"
0245 else:
0246 marker = " (****)"
0247 options.append((to_filename(collection_name), collection_name + marker))
0248
0249 from bokeh.models import CustomJS, Select
0250 def mk_dropdown(value=""):
0251 dropdown = Select(title="Select branch (**** < 67% CL, ..., * > 99% CL stat. equiv.):", value=value, options=options)
0252 dropdown.js_on_change("value", CustomJS(code="""
0253 console.log('dropdown: ' + this.value, this.toString())
0254 if (this.value != "") {
0255 window.location.hash = "#" + this.value;
0256 fetchAndReplaceBokehDocument(this.value);
0257 }
0258 """))
0259 return dropdown
0260
0261 from bokeh.layouts import column
0262 from bokeh.embed import json_item
0263 import json
0264
0265 os.makedirs("capybara-reports", exist_ok=True)
0266
0267 for collection_name, figs in collection_figs.items():
0268 item = column(
0269 mk_dropdown(collection_name),
0270 gridplot(figs, ncols=3, width=400, height=300),
0271 )
0272
0273 with gzip.open(f"capybara-reports/{to_filename(collection_name)}.json.gz", "wt") as fp:
0274 json.dump(json_item(item), fp)
0275
0276 curdoc().js_on_event(DocumentReady, CustomJS(code="""
0277 function fetchAndReplaceBokehDocument(location) {
0278 fetch(location + '.json.gz')
0279 .then(async function(response) {
0280 if (!response.ok) {
0281 throw new Error('Network response was not ok');
0282 }
0283
0284 const ds = new DecompressionStream('gzip');
0285 const decompressedStream = response.body.pipeThrough(ds);
0286 const decompressedResponse = new Response(decompressedStream);
0287 const item = await decompressedResponse.json();
0288
0289 Bokeh.documents[0].replace_with_json(item.doc);
0290 })
0291 .catch(function(error) {
0292 console.error('Fetch or decompression failed:', error);
0293 });
0294 }
0295
0296 window.onhashchange = function() {
0297 var location = window.location.hash.replace(/^#/, "");
0298 if ((location != "") && ((typeof current_location === 'undefined') || (current_location != location))) {
0299 fetchAndReplaceBokehDocument(location);
0300 window.current_location = location;
0301 }
0302 }
0303 window.onhashchange();
0304 """))
0305 output_file(filename="capybara-reports/index.html", title="ePIC capybara report")
0306 save(mk_dropdown())
0307
0308 if serve:
0309 os.chdir("capybara-reports/")
0310 from http.server import SimpleHTTPRequestHandler
0311 from socketserver import TCPServer
0312 with TCPServer(("127.0.0.1", 24535), SimpleHTTPRequestHandler) as httpd:
0313 print("Serving report at http://127.0.0.1:24535")
0314 try:
0315 httpd.serve_forever()
0316 except KeyboardInterrupt:
0317 pass