Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-30 09:18:57

0001 import os
0002 import re
0003 
0004 import awkward as ak
0005 import click
0006 import numpy as np
0007 import uproot
0008 from bokeh.events import DocumentReady
0009 from bokeh.io import curdoc
0010 from bokeh.layouts import gridplot
0011 from bokeh.models import Range1d
0012 from bokeh.plotting import figure, output_file, save
0013 from hist import Hist
0014 from scipy.stats import kstest
0015 
0016 from ..util import skip_common_prefix
0017 
0018 
0019 def match_filter(key, match, unmatch):
0020     accept = True
0021     if match:
0022         accept = False
0023         for regex in match:
0024             if regex.match(key):
0025                 accept = True
0026     for regex in unmatch:
0027         if regex.match(key):
0028             accept = False
0029     return accept
0030 
0031 
0032 @click.command()
0033 @click.argument("files", type=click.File('rb'), nargs=-1)
0034 @click.option(
0035     "-m", "--match", multiple=True,
0036     help="Only include collections with names matching a regex"
0037 )
0038 @click.option(
0039     "-M", "--unmatch", multiple=True,
0040     help="Exclude collections with names matching a regex"
0041 )
0042 @click.option(
0043     "--serve", is_flag=True,
0044     default=False,
0045     help="Run a local HTTP server to view the report"
0046 )
0047 def bara(files, match, unmatch, serve):
0048     arr = {}
0049 
0050     match = list(map(re.compile, match))
0051     unmatch = list(map(re.compile, unmatch))
0052 
0053     for _file in files:
0054         tree = uproot.open(_file)["events"]
0055         keys = [
0056             key for key in tree.keys(recursive=True)
0057             if not key.startswith("PARAMETERS")
0058             and len(tree[key].branches) == 0
0059             and match_filter(key, match, unmatch)
0060         ]
0061         for key in keys:
0062             arr.setdefault(key, {})[_file] = tree[key].array()
0063 
0064     paths = skip_common_prefix([_file.name.split("/") for _file in files])
0065     paths = skip_common_prefix([reversed(list(path)) for path in paths])
0066     labels = ["/".join(reversed(list(reversed_path))) for reversed_path in paths]
0067 
0068     collection_figs = {}
0069     collection_with_diffs = {}
0070 
0071     for key in sorted(arr.keys()):
0072         if any("string" in str(ak.type(a)) for a in arr[key].values()):
0073             click.echo(f"String value detected for key \"{key}\". Skipping...")
0074             continue
0075         x_min = min(filter(
0076             lambda v: v is not None,
0077             map(lambda a: ak.min(ak.mask(a, np.isfinite(a))), arr[key].values())
0078         ), default=None)
0079         if x_min is None:
0080             continue
0081         x_range = max(filter(
0082             lambda v: v is not None,
0083             map(lambda a: ak.max(ak.mask(a - x_min, np.isfinite(a))), arr[key].values())
0084         ), default=None) + 1
0085 
0086         nbins = 10
0087         if (any("* uint" in str(ak.type(a)) for a in arr[key].values())
0088            or any("* int" in str(ak.type(a)) for a in arr[key].values())):
0089             nbins = int(min(100, np.ceil(x_range)))
0090 
0091         if "/" in key:
0092             branch_name, leaf_name = key.split("/", 1)
0093         else:
0094             branch_name = key
0095             leaf_name = key
0096 
0097         fig = figure(x_axis_label=leaf_name, y_axis_label="Entries")
0098         collection_figs.setdefault(branch_name, []).append(fig)
0099         y_max = 0
0100 
0101         prev_file_arr = None
0102         vis_params = [
0103           ("green", 1.5, "solid", " "),
0104           ("red", 3, "dashed", ","),
0105           ("blue", 2, "dotted", "."),
0106         ]
0107 
0108         if set(arr[key].keys()) != set(files):
0109             # not every file has the key
0110             collection_with_diffs[branch_name] = 0.0
0111 
0112         for _file, label, (color, line_width, line_dash, hatch_pattern) in zip(files, labels, vis_params):
0113             if _file not in arr[key]:
0114                 continue
0115             file_arr = arr[key][_file]
0116 
0117             # diff and KS test
0118             pvalue = None
0119             if prev_file_arr is not None:
0120                 if ((ak.num(file_arr, axis=0) != ak.num(prev_file_arr, axis=0))
0121                    or ak.any(ak.num(file_arr, axis=1)
0122                              != ak.num(prev_file_arr, axis=1))
0123                    or ak.any(ak.nan_to_none(file_arr)
0124                              != ak.nan_to_none(prev_file_arr))):
0125                     if (ak.num(ak.flatten(file_arr, axis=None), axis=0) > 0 and
0126                         ak.num(ak.flatten(prev_file_arr, axis=None), axis=0) > 0):
0127                         # We can only apply the KS test on non-empty arrays
0128                         pvalue = kstest(
0129                                 ak.to_numpy(ak.flatten(file_arr, axis=None)),
0130                                 ak.to_numpy(ak.flatten(prev_file_arr, axis=None))
0131                             ).pvalue
0132                     else:
0133                         pvalue = 0
0134                     print(key)
0135                     print(prev_file_arr, file_arr, f"p = {pvalue:.3f}")
0136                     collection_with_diffs[branch_name] = min(pvalue, collection_with_diffs.get(branch_name, 1.))
0137 
0138             # Figure
0139             h = (
0140                 Hist.new
0141                 .Reg(nbins, 0, x_range, name="x", label=key)
0142                 .Int64()
0143             )
0144             h.fill(x=ak.flatten(file_arr - x_min, axis=None))
0145 
0146             ys, edges = h.to_numpy()
0147             y0 = np.concatenate([ys, [ys[-1]]])
0148             legend_label=label + (f"\n{100*pvalue:.0f}%CL KS" if pvalue is not None else "")
0149             fig.step(
0150                 x=edges + x_min,
0151                 y=y0,
0152                 mode="after",
0153                 legend_label=legend_label,
0154                 line_color=color,
0155                 line_width=line_width,
0156                 line_dash=line_dash,
0157             )
0158             fig.varea_step(
0159                 x=edges + x_min,
0160                 y1=y0 - np.sqrt(y0),
0161                 y2=y0 + np.sqrt(y0),
0162                 step_mode="after",
0163                 legend_label=legend_label,
0164                 fill_color=color if hatch_pattern == " " else None,
0165                 fill_alpha=0.25,
0166                 hatch_color=color,
0167                 hatch_alpha=0.5,
0168                 hatch_pattern=hatch_pattern,
0169             )
0170 
0171             y_max = max(y_max, np.max(y0 + np.sqrt(y0)))
0172             prev_file_arr = file_arr
0173 
0174         x_bounds = (x_min - 0.05 * x_range, x_min + 1.05 * x_range)
0175         y_bounds = (- 0.05 * y_max, 1.05 * y_max)
0176         # Set y range for histograms
0177         if np.all(np.isfinite(x_bounds)):
0178             try:
0179                 fig.x_range = Range1d(
0180                     *x_bounds,
0181                     bounds=x_bounds)
0182             except ValueError as e:
0183                 click.secho(str(e), fg="red", err=True)
0184         else:
0185             click.secho(f"overflow while calculating x bounds for \"{key}\"", fg="red", err=True)
0186         if np.all(np.isfinite(y_bounds)):
0187             try:
0188                 fig.y_range = Range1d(
0189                     *y_bounds,
0190                     bounds=y_bounds)
0191             except ValueError as e:
0192                 click.secho(str(e), fg="red", err=True)
0193         else:
0194             click.secho(f"overflow while calculating y bounds for \"{key}\"", fg="red", err=True)
0195 
0196     def to_filename(branch_name):
0197         return branch_name.replace("#", "__pound__")
0198 
0199     def option_key(item):
0200         collection_name, figs = item
0201         key = ""
0202         if collection_name in collection_with_diffs:
0203             if collection_with_diffs[collection_name] > 0.99:
0204                 key += " 0.99"
0205             elif collection_with_diffs[collection_name] > 0.95:
0206                 key += " 0.95"
0207             elif collection_with_diffs[collection_name] > 0.67:
0208                 key += " 0.67"
0209             else:
0210                 key += " 0.00"
0211         key += collection_name.lstrip("_")
0212         return key
0213 
0214     options = [("", "")]
0215     for collection_name, figs in sorted(collection_figs.items(), key=option_key):
0216         marker = ""
0217         if collection_name in collection_with_diffs:
0218             if collection_with_diffs[collection_name] > 0.99:
0219                 marker = " (*)"
0220             elif collection_with_diffs[collection_name] > 0.95:
0221                 marker = " (**)"
0222             elif collection_with_diffs[collection_name] > 0.67:
0223                 marker = " (***)"
0224             else:
0225                 marker = " (****)"
0226         options.append((to_filename(collection_name), collection_name + marker))
0227 
0228     from bokeh.models import CustomJS, Select
0229     def mk_dropdown(value=""):
0230         dropdown = Select(title="Select branch (**** < 67% CL, ..., * > 99% CL stat. equiv.):", value=value, options=options)
0231         dropdown.js_on_change("value", CustomJS(code="""
0232           console.log('dropdown: ' + this.value, this.toString())
0233           if (this.value != "") {
0234             window.location.hash = "#" + this.value;
0235             fetch(this.value + '.json')
0236               .then(function(response) { return response.json(); })
0237               .then(function(item) { Bokeh.documents[0].replace_with_json(item.doc); })
0238           }
0239         """))
0240         return dropdown
0241 
0242     from bokeh.layouts import column
0243     from bokeh.embed import json_item
0244     import json
0245 
0246     os.makedirs("capybara-reports", exist_ok=True)
0247 
0248     for collection_name, figs in collection_figs.items():
0249         item = column(
0250           mk_dropdown(collection_name),
0251           gridplot(figs, ncols=3, width=400, height=300),
0252         )
0253 
0254         with open(f"capybara-reports/{to_filename(collection_name)}.json", "w") as fp:
0255             json.dump(json_item(item), fp)
0256 
0257     curdoc().js_on_event(DocumentReady, CustomJS(code="""
0258       window.onhashchange = function() {
0259         var location = window.location.hash.replace(/^#/, "");
0260         if ((location != "") && ((typeof current_location === 'undefined') || (current_location != location))) {
0261           fetch(location + '.json')
0262               .then(function(response) { return response.json(); })
0263               .then(function(item) { Bokeh.documents[0].replace_with_json(item.doc); })
0264           window.current_location = location;
0265         }
0266       }
0267       window.onhashchange();
0268     """))
0269     output_file(filename="capybara-reports/index.html", title="ePIC capybara report")
0270     save(mk_dropdown())
0271 
0272     if serve:
0273         os.chdir("capybara-reports/")
0274         from http.server import SimpleHTTPRequestHandler
0275         from socketserver import TCPServer
0276         with TCPServer(("127.0.0.1", 24535), SimpleHTTPRequestHandler) as httpd:
0277             print("Serving report at http://127.0.0.1:24535")
0278             try:
0279                 httpd.serve_forever()
0280             except KeyboardInterrupt:
0281                 pass