File indexing completed on 2025-01-30 09:18:57
0001 import os
0002 import re
0003
0004 import awkward as ak
0005 import click
0006 import numpy as np
0007 import uproot
0008 from bokeh.events import DocumentReady
0009 from bokeh.io import curdoc
0010 from bokeh.layouts import gridplot
0011 from bokeh.models import Range1d
0012 from bokeh.plotting import figure, output_file, save
0013 from hist import Hist
0014 from scipy.stats import kstest
0015
0016 from ..util import skip_common_prefix
0017
0018
0019 def match_filter(key, match, unmatch):
0020 accept = True
0021 if match:
0022 accept = False
0023 for regex in match:
0024 if regex.match(key):
0025 accept = True
0026 for regex in unmatch:
0027 if regex.match(key):
0028 accept = False
0029 return accept
0030
0031
0032 @click.command()
0033 @click.argument("files", type=click.File('rb'), nargs=-1)
0034 @click.option(
0035 "-m", "--match", multiple=True,
0036 help="Only include collections with names matching a regex"
0037 )
0038 @click.option(
0039 "-M", "--unmatch", multiple=True,
0040 help="Exclude collections with names matching a regex"
0041 )
0042 @click.option(
0043 "--serve", is_flag=True,
0044 default=False,
0045 help="Run a local HTTP server to view the report"
0046 )
0047 def bara(files, match, unmatch, serve):
0048 arr = {}
0049
0050 match = list(map(re.compile, match))
0051 unmatch = list(map(re.compile, unmatch))
0052
0053 for _file in files:
0054 tree = uproot.open(_file)["events"]
0055 keys = [
0056 key for key in tree.keys(recursive=True)
0057 if not key.startswith("PARAMETERS")
0058 and len(tree[key].branches) == 0
0059 and match_filter(key, match, unmatch)
0060 ]
0061 for key in keys:
0062 arr.setdefault(key, {})[_file] = tree[key].array()
0063
0064 paths = skip_common_prefix([_file.name.split("/") for _file in files])
0065 paths = skip_common_prefix([reversed(list(path)) for path in paths])
0066 labels = ["/".join(reversed(list(reversed_path))) for reversed_path in paths]
0067
0068 collection_figs = {}
0069 collection_with_diffs = {}
0070
0071 for key in sorted(arr.keys()):
0072 if any("string" in str(ak.type(a)) for a in arr[key].values()):
0073 click.echo(f"String value detected for key \"{key}\". Skipping...")
0074 continue
0075 x_min = min(filter(
0076 lambda v: v is not None,
0077 map(lambda a: ak.min(ak.mask(a, np.isfinite(a))), arr[key].values())
0078 ), default=None)
0079 if x_min is None:
0080 continue
0081 x_range = max(filter(
0082 lambda v: v is not None,
0083 map(lambda a: ak.max(ak.mask(a - x_min, np.isfinite(a))), arr[key].values())
0084 ), default=None) + 1
0085
0086 nbins = 10
0087 if (any("* uint" in str(ak.type(a)) for a in arr[key].values())
0088 or any("* int" in str(ak.type(a)) for a in arr[key].values())):
0089 nbins = int(min(100, np.ceil(x_range)))
0090
0091 if "/" in key:
0092 branch_name, leaf_name = key.split("/", 1)
0093 else:
0094 branch_name = key
0095 leaf_name = key
0096
0097 fig = figure(x_axis_label=leaf_name, y_axis_label="Entries")
0098 collection_figs.setdefault(branch_name, []).append(fig)
0099 y_max = 0
0100
0101 prev_file_arr = None
0102 vis_params = [
0103 ("green", 1.5, "solid", " "),
0104 ("red", 3, "dashed", ","),
0105 ("blue", 2, "dotted", "."),
0106 ]
0107
0108 if set(arr[key].keys()) != set(files):
0109
0110 collection_with_diffs[branch_name] = 0.0
0111
0112 for _file, label, (color, line_width, line_dash, hatch_pattern) in zip(files, labels, vis_params):
0113 if _file not in arr[key]:
0114 continue
0115 file_arr = arr[key][_file]
0116
0117
0118 pvalue = None
0119 if prev_file_arr is not None:
0120 if ((ak.num(file_arr, axis=0) != ak.num(prev_file_arr, axis=0))
0121 or ak.any(ak.num(file_arr, axis=1)
0122 != ak.num(prev_file_arr, axis=1))
0123 or ak.any(ak.nan_to_none(file_arr)
0124 != ak.nan_to_none(prev_file_arr))):
0125 if (ak.num(ak.flatten(file_arr, axis=None), axis=0) > 0 and
0126 ak.num(ak.flatten(prev_file_arr, axis=None), axis=0) > 0):
0127
0128 pvalue = kstest(
0129 ak.to_numpy(ak.flatten(file_arr, axis=None)),
0130 ak.to_numpy(ak.flatten(prev_file_arr, axis=None))
0131 ).pvalue
0132 else:
0133 pvalue = 0
0134 print(key)
0135 print(prev_file_arr, file_arr, f"p = {pvalue:.3f}")
0136 collection_with_diffs[branch_name] = min(pvalue, collection_with_diffs.get(branch_name, 1.))
0137
0138
0139 h = (
0140 Hist.new
0141 .Reg(nbins, 0, x_range, name="x", label=key)
0142 .Int64()
0143 )
0144 h.fill(x=ak.flatten(file_arr - x_min, axis=None))
0145
0146 ys, edges = h.to_numpy()
0147 y0 = np.concatenate([ys, [ys[-1]]])
0148 legend_label=label + (f"\n{100*pvalue:.0f}%CL KS" if pvalue is not None else "")
0149 fig.step(
0150 x=edges + x_min,
0151 y=y0,
0152 mode="after",
0153 legend_label=legend_label,
0154 line_color=color,
0155 line_width=line_width,
0156 line_dash=line_dash,
0157 )
0158 fig.varea_step(
0159 x=edges + x_min,
0160 y1=y0 - np.sqrt(y0),
0161 y2=y0 + np.sqrt(y0),
0162 step_mode="after",
0163 legend_label=legend_label,
0164 fill_color=color if hatch_pattern == " " else None,
0165 fill_alpha=0.25,
0166 hatch_color=color,
0167 hatch_alpha=0.5,
0168 hatch_pattern=hatch_pattern,
0169 )
0170
0171 y_max = max(y_max, np.max(y0 + np.sqrt(y0)))
0172 prev_file_arr = file_arr
0173
0174 x_bounds = (x_min - 0.05 * x_range, x_min + 1.05 * x_range)
0175 y_bounds = (- 0.05 * y_max, 1.05 * y_max)
0176
0177 if np.all(np.isfinite(x_bounds)):
0178 try:
0179 fig.x_range = Range1d(
0180 *x_bounds,
0181 bounds=x_bounds)
0182 except ValueError as e:
0183 click.secho(str(e), fg="red", err=True)
0184 else:
0185 click.secho(f"overflow while calculating x bounds for \"{key}\"", fg="red", err=True)
0186 if np.all(np.isfinite(y_bounds)):
0187 try:
0188 fig.y_range = Range1d(
0189 *y_bounds,
0190 bounds=y_bounds)
0191 except ValueError as e:
0192 click.secho(str(e), fg="red", err=True)
0193 else:
0194 click.secho(f"overflow while calculating y bounds for \"{key}\"", fg="red", err=True)
0195
0196 def to_filename(branch_name):
0197 return branch_name.replace("#", "__pound__")
0198
0199 def option_key(item):
0200 collection_name, figs = item
0201 key = ""
0202 if collection_name in collection_with_diffs:
0203 if collection_with_diffs[collection_name] > 0.99:
0204 key += " 0.99"
0205 elif collection_with_diffs[collection_name] > 0.95:
0206 key += " 0.95"
0207 elif collection_with_diffs[collection_name] > 0.67:
0208 key += " 0.67"
0209 else:
0210 key += " 0.00"
0211 key += collection_name.lstrip("_")
0212 return key
0213
0214 options = [("", "")]
0215 for collection_name, figs in sorted(collection_figs.items(), key=option_key):
0216 marker = ""
0217 if collection_name in collection_with_diffs:
0218 if collection_with_diffs[collection_name] > 0.99:
0219 marker = " (*)"
0220 elif collection_with_diffs[collection_name] > 0.95:
0221 marker = " (**)"
0222 elif collection_with_diffs[collection_name] > 0.67:
0223 marker = " (***)"
0224 else:
0225 marker = " (****)"
0226 options.append((to_filename(collection_name), collection_name + marker))
0227
0228 from bokeh.models import CustomJS, Select
0229 def mk_dropdown(value=""):
0230 dropdown = Select(title="Select branch (**** < 67% CL, ..., * > 99% CL stat. equiv.):", value=value, options=options)
0231 dropdown.js_on_change("value", CustomJS(code="""
0232 console.log('dropdown: ' + this.value, this.toString())
0233 if (this.value != "") {
0234 window.location.hash = "#" + this.value;
0235 fetch(this.value + '.json')
0236 .then(function(response) { return response.json(); })
0237 .then(function(item) { Bokeh.documents[0].replace_with_json(item.doc); })
0238 }
0239 """))
0240 return dropdown
0241
0242 from bokeh.layouts import column
0243 from bokeh.embed import json_item
0244 import json
0245
0246 os.makedirs("capybara-reports", exist_ok=True)
0247
0248 for collection_name, figs in collection_figs.items():
0249 item = column(
0250 mk_dropdown(collection_name),
0251 gridplot(figs, ncols=3, width=400, height=300),
0252 )
0253
0254 with open(f"capybara-reports/{to_filename(collection_name)}.json", "w") as fp:
0255 json.dump(json_item(item), fp)
0256
0257 curdoc().js_on_event(DocumentReady, CustomJS(code="""
0258 window.onhashchange = function() {
0259 var location = window.location.hash.replace(/^#/, "");
0260 if ((location != "") && ((typeof current_location === 'undefined') || (current_location != location))) {
0261 fetch(location + '.json')
0262 .then(function(response) { return response.json(); })
0263 .then(function(item) { Bokeh.documents[0].replace_with_json(item.doc); })
0264 window.current_location = location;
0265 }
0266 }
0267 window.onhashchange();
0268 """))
0269 output_file(filename="capybara-reports/index.html", title="ePIC capybara report")
0270 save(mk_dropdown())
0271
0272 if serve:
0273 os.chdir("capybara-reports/")
0274 from http.server import SimpleHTTPRequestHandler
0275 from socketserver import TCPServer
0276 with TCPServer(("127.0.0.1", 24535), SimpleHTTPRequestHandler) as httpd:
0277 print("Serving report at http://127.0.0.1:24535")
0278 try:
0279 httpd.serve_forever()
0280 except KeyboardInterrupt:
0281 pass