File indexing completed on 2025-07-03 07:56:04
0001 import gzip
0002 import os
0003 import re
0004
0005 import awkward as ak
0006 import click
0007 import numpy as np
0008 import uproot
0009 from bokeh.events import DocumentReady
0010 from bokeh.io import curdoc
0011 from bokeh.layouts import gridplot
0012 from bokeh.models import Range1d
0013 from bokeh.models import PrintfTickFormatter
0014 from bokeh.plotting import figure, output_file, save
0015 from hist import Hist
0016 from scipy.stats import kstest
0017
0018 from ..util import skip_common_prefix
0019
0020
0021 def match_filter(key, match, unmatch):
0022 accept = True
0023 if match:
0024 accept = False
0025 for regex in match:
0026 if regex.match(key):
0027 accept = True
0028 for regex in unmatch:
0029 if regex.match(key):
0030 accept = False
0031 return accept
0032
0033
0034 @click.command()
0035 @click.argument("files", type=click.File('rb'), nargs=-1)
0036 @click.option(
0037 "-m", "--match", multiple=True,
0038 help="Only include collections with names matching a regex"
0039 )
0040 @click.option(
0041 "-M", "--unmatch", multiple=True,
0042 help="Exclude collections with names matching a regex"
0043 )
0044 @click.option(
0045 "--serve", is_flag=True,
0046 default=False,
0047 help="Run a local HTTP server to view the report"
0048 )
0049 def bara(files, match, unmatch, serve):
0050 arr = {}
0051
0052 match = list(map(re.compile, match))
0053 unmatch = list(map(re.compile, unmatch))
0054
0055 for _file in files:
0056 tree = uproot.open(_file)["events"]
0057 keys = [
0058 key for key in tree.keys(recursive=True)
0059 if not key.startswith("PARAMETERS")
0060 and len(tree[key].branches) == 0
0061 and match_filter(key, match, unmatch)
0062 ]
0063 for key in keys:
0064 arr.setdefault(key, {})[_file] = tree[key].array()
0065
0066 paths = skip_common_prefix([_file.name.split("/") for _file in files])
0067 paths = skip_common_prefix([reversed(list(path)) for path in paths])
0068 labels = ["/".join(reversed(list(reversed_path))) for reversed_path in paths]
0069
0070 collection_figs = {}
0071 collection_with_diffs = {}
0072
0073 for key in sorted(arr.keys()):
0074 if any("string" in str(ak.type(a)) for a in arr[key].values()):
0075 click.echo(f"String value detected for key \"{key}\". Skipping...")
0076 continue
0077 x_min = min(filter(
0078 lambda v: v is not None,
0079 map(lambda a: ak.min(ak.mask(a, np.isfinite(a))), arr[key].values())
0080 ), default=None)
0081 if x_min is None:
0082 continue
0083 x_range = max(filter(
0084 lambda v: v is not None,
0085 map(lambda a: ak.max(ak.mask(a - x_min, np.isfinite(a))), arr[key].values())
0086 ), default=None)
0087 nbins = 10
0088
0089 if (any("* uint" in str(ak.type(a)) for a in arr[key].values())
0090 or any("* int" in str(ak.type(a)) for a in arr[key].values())):
0091 x_range = x_range + 1
0092 nbins = int(min(100, np.ceil(x_range)))
0093 else:
0094 x_range = x_range * 1.1
0095
0096 if x_range == 0:
0097 x_range = 1
0098
0099 if "/" in key:
0100 branch_name, leaf_name = key.split("/", 1)
0101 else:
0102 branch_name = key
0103 leaf_name = key
0104
0105 fig = figure(x_axis_label=leaf_name, y_axis_label="Entries")
0106 if x_range < 1.:
0107 fig.xaxis.formatter = PrintfTickFormatter(format="%.2g")
0108 collection_figs.setdefault(branch_name, []).append(fig)
0109 y_max = 0
0110
0111 prev_file_arr = None
0112 vis_params = [
0113 ("green", 1.5, "solid", " "),
0114 ("red", 3, "dashed", ","),
0115 ("blue", 2, "dotted", "."),
0116 ]
0117
0118 if set(arr[key].keys()) != set(files):
0119
0120 collection_with_diffs[branch_name] = 0.0
0121
0122 for _file, label, (color, line_width, line_dash, hatch_pattern) in zip(files, labels, vis_params):
0123 if _file not in arr[key]:
0124 continue
0125 file_arr = arr[key][_file]
0126
0127
0128 pvalue = None
0129 if prev_file_arr is not None:
0130 if ((ak.num(file_arr, axis=0) != ak.num(prev_file_arr, axis=0))
0131 or ak.any(ak.num(file_arr, axis=1)
0132 != ak.num(prev_file_arr, axis=1))
0133 or ak.any(ak.nan_to_none(file_arr)
0134 != ak.nan_to_none(prev_file_arr))):
0135 if (ak.num(ak.flatten(file_arr, axis=None), axis=0) > 0 and
0136 ak.num(ak.flatten(prev_file_arr, axis=None), axis=0) > 0):
0137
0138 pvalue = kstest(
0139 ak.to_numpy(ak.flatten(file_arr, axis=None)),
0140 ak.to_numpy(ak.flatten(prev_file_arr, axis=None))
0141 ).pvalue
0142 else:
0143 pvalue = 0
0144 print(key)
0145 print(prev_file_arr, file_arr, f"p = {pvalue:.3f}")
0146 collection_with_diffs[branch_name] = min(pvalue, collection_with_diffs.get(branch_name, 1.))
0147
0148
0149 h = (
0150 Hist.new
0151 .Reg(nbins, 0, x_range, name="x", label=key)
0152 .Int64()
0153 )
0154 h.fill(x=ak.flatten(file_arr - x_min, axis=None))
0155
0156 ys, edges = h.to_numpy()
0157 y0 = np.concatenate([ys, [ys[-1]]])
0158 legend_label=label + (f"\n{100*pvalue:.0f}%CL KS" if pvalue is not None else "")
0159 fig.step(
0160 x=edges + x_min,
0161 y=y0,
0162 mode="after",
0163 legend_label=legend_label,
0164 line_color=color,
0165 line_width=line_width,
0166 line_dash=line_dash,
0167 )
0168 fig.varea_step(
0169 x=edges + x_min,
0170 y1=y0 - np.sqrt(y0),
0171 y2=y0 + np.sqrt(y0),
0172 step_mode="after",
0173 legend_label=legend_label,
0174 fill_color=color if hatch_pattern == " " else None,
0175 fill_alpha=0.25,
0176 hatch_color=color,
0177 hatch_alpha=0.5,
0178 hatch_pattern=hatch_pattern,
0179 )
0180 fig.legend.background_fill_alpha = 0.5
0181
0182 y_max = max(y_max, np.max(y0 + np.sqrt(y0)))
0183 prev_file_arr = file_arr
0184
0185 x_bounds = (x_min - 0.05 * x_range, x_min + 1.05 * x_range)
0186 y_bounds = (- 0.05 * y_max, 1.05 * y_max)
0187
0188 if np.all(np.isfinite(x_bounds)):
0189 try:
0190 fig.x_range = Range1d(
0191 *x_bounds,
0192 bounds=x_bounds)
0193 except ValueError as e:
0194 click.secho(str(e), fg="red", err=True)
0195 else:
0196 click.secho(f"overflow while calculating x bounds for \"{key}\"", fg="red", err=True)
0197 if np.all(np.isfinite(y_bounds)):
0198 try:
0199 fig.y_range = Range1d(
0200 *y_bounds,
0201 bounds=y_bounds)
0202 except ValueError as e:
0203 click.secho(str(e), fg="red", err=True)
0204 else:
0205 click.secho(f"overflow while calculating y bounds for \"{key}\"", fg="red", err=True)
0206
0207 def to_filename(branch_name):
0208 return branch_name.replace("#", "__pound__")
0209
0210 def option_key(item):
0211 collection_name, figs = item
0212 key = ""
0213 if collection_name in collection_with_diffs:
0214 if collection_with_diffs[collection_name] > 0.99:
0215 key += " 0.99"
0216 elif collection_with_diffs[collection_name] > 0.95:
0217 key += " 0.95"
0218 elif collection_with_diffs[collection_name] > 0.67:
0219 key += " 0.67"
0220 else:
0221 key += " 0.00"
0222 key += collection_name.lstrip("_")
0223 return key
0224
0225 options = [("", "")]
0226 for collection_name, figs in sorted(collection_figs.items(), key=option_key):
0227 marker = ""
0228 if collection_name in collection_with_diffs:
0229 if collection_with_diffs[collection_name] > 0.99:
0230 marker = " (*)"
0231 elif collection_with_diffs[collection_name] > 0.95:
0232 marker = " (**)"
0233 elif collection_with_diffs[collection_name] > 0.67:
0234 marker = " (***)"
0235 else:
0236 marker = " (****)"
0237 options.append((to_filename(collection_name), collection_name + marker))
0238
0239 from bokeh.models import CustomJS, Select
0240 def mk_dropdown(value=""):
0241 dropdown = Select(title="Select branch (**** < 67% CL, ..., * > 99% CL stat. equiv.):", value=value, options=options)
0242 dropdown.js_on_change("value", CustomJS(code="""
0243 console.log('dropdown: ' + this.value, this.toString())
0244 if (this.value != "") {
0245 window.location.hash = "#" + this.value;
0246 fetchAndReplaceBokehDocument(this.value);
0247 }
0248 """))
0249 return dropdown
0250
0251 from bokeh.layouts import column
0252 from bokeh.embed import json_item
0253 import json
0254
0255 os.makedirs("capybara-reports", exist_ok=True)
0256
0257 for collection_name, figs in collection_figs.items():
0258 item = column(
0259 mk_dropdown(collection_name),
0260 gridplot(figs, ncols=3, width=400, height=300),
0261 )
0262
0263 with gzip.open(f"capybara-reports/{to_filename(collection_name)}.json.gz", "wt") as fp:
0264 json.dump(json_item(item), fp)
0265
0266 curdoc().js_on_event(DocumentReady, CustomJS(code="""
0267 function fetchAndReplaceBokehDocument(location) {
0268 fetch(location + '.json.gz')
0269 .then(async function(response) {
0270 if (!response.ok) {
0271 throw new Error('Network response was not ok');
0272 }
0273
0274 const ds = new DecompressionStream('gzip');
0275 const decompressedStream = response.body.pipeThrough(ds);
0276 const decompressedResponse = new Response(decompressedStream);
0277 const item = await decompressedResponse.json();
0278
0279 Bokeh.documents[0].replace_with_json(item.doc);
0280 })
0281 .catch(function(error) {
0282 console.error('Fetch or decompression failed:', error);
0283 });
0284 }
0285
0286 window.onhashchange = function() {
0287 var location = window.location.hash.replace(/^#/, "");
0288 if ((location != "") && ((typeof current_location === 'undefined') || (current_location != location))) {
0289 fetchAndReplaceBokehDocument(location);
0290 window.current_location = location;
0291 }
0292 }
0293 window.onhashchange();
0294 """))
0295 output_file(filename="capybara-reports/index.html", title="ePIC capybara report")
0296 save(mk_dropdown())
0297
0298 if serve:
0299 os.chdir("capybara-reports/")
0300 from http.server import SimpleHTTPRequestHandler
0301 from socketserver import TCPServer
0302 with TCPServer(("127.0.0.1", 24535), SimpleHTTPRequestHandler) as httpd:
0303 print("Serving report at http://127.0.0.1:24535")
0304 try:
0305 httpd.serve_forever()
0306 except KeyboardInterrupt:
0307 pass