Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-07-03 07:56:04

0001 import gzip
0002 import os
0003 import re
0004 
0005 import awkward as ak
0006 import click
0007 import numpy as np
0008 import uproot
0009 from bokeh.events import DocumentReady
0010 from bokeh.io import curdoc
0011 from bokeh.layouts import gridplot
0012 from bokeh.models import Range1d
0013 from bokeh.models import PrintfTickFormatter
0014 from bokeh.plotting import figure, output_file, save
0015 from hist import Hist
0016 from scipy.stats import kstest
0017 
0018 from ..util import skip_common_prefix
0019 
0020 
0021 def match_filter(key, match, unmatch):
0022     accept = True
0023     if match:
0024         accept = False
0025         for regex in match:
0026             if regex.match(key):
0027                 accept = True
0028     for regex in unmatch:
0029         if regex.match(key):
0030             accept = False
0031     return accept
0032 
0033 
0034 @click.command()
0035 @click.argument("files", type=click.File('rb'), nargs=-1)
0036 @click.option(
0037     "-m", "--match", multiple=True,
0038     help="Only include collections with names matching a regex"
0039 )
0040 @click.option(
0041     "-M", "--unmatch", multiple=True,
0042     help="Exclude collections with names matching a regex"
0043 )
0044 @click.option(
0045     "--serve", is_flag=True,
0046     default=False,
0047     help="Run a local HTTP server to view the report"
0048 )
0049 def bara(files, match, unmatch, serve):
0050     arr = {}
0051 
0052     match = list(map(re.compile, match))
0053     unmatch = list(map(re.compile, unmatch))
0054 
0055     for _file in files:
0056         tree = uproot.open(_file)["events"]
0057         keys = [
0058             key for key in tree.keys(recursive=True)
0059             if not key.startswith("PARAMETERS")
0060             and len(tree[key].branches) == 0
0061             and match_filter(key, match, unmatch)
0062         ]
0063         for key in keys:
0064             arr.setdefault(key, {})[_file] = tree[key].array()
0065 
0066     paths = skip_common_prefix([_file.name.split("/") for _file in files])
0067     paths = skip_common_prefix([reversed(list(path)) for path in paths])
0068     labels = ["/".join(reversed(list(reversed_path))) for reversed_path in paths]
0069 
0070     collection_figs = {}
0071     collection_with_diffs = {}
0072 
0073     for key in sorted(arr.keys()):
0074         if any("string" in str(ak.type(a)) for a in arr[key].values()):
0075             click.echo(f"String value detected for key \"{key}\". Skipping...")
0076             continue
0077         x_min = min(filter(
0078             lambda v: v is not None,
0079             map(lambda a: ak.min(ak.mask(a, np.isfinite(a))), arr[key].values())
0080         ), default=None)
0081         if x_min is None:
0082             continue
0083         x_range = max(filter(
0084             lambda v: v is not None,
0085             map(lambda a: ak.max(ak.mask(a - x_min, np.isfinite(a))), arr[key].values())
0086         ), default=None)
0087         nbins = 10
0088 
0089         if (any("* uint" in str(ak.type(a)) for a in arr[key].values())
0090            or any("* int" in str(ak.type(a)) for a in arr[key].values())):
0091             x_range = x_range + 1
0092             nbins = int(min(100, np.ceil(x_range)))
0093         else:
0094             x_range = x_range * 1.1
0095 
0096         if x_range == 0:
0097             x_range = 1
0098 
0099         if "/" in key:
0100             branch_name, leaf_name = key.split("/", 1)
0101         else:
0102             branch_name = key
0103             leaf_name = key
0104 
0105         fig = figure(x_axis_label=leaf_name, y_axis_label="Entries")
0106         if x_range < 1.:
0107             fig.xaxis.formatter = PrintfTickFormatter(format="%.2g")
0108         collection_figs.setdefault(branch_name, []).append(fig)
0109         y_max = 0
0110 
0111         prev_file_arr = None
0112         vis_params = [
0113           ("green", 1.5, "solid", " "),
0114           ("red", 3, "dashed", ","),
0115           ("blue", 2, "dotted", "."),
0116         ]
0117 
0118         if set(arr[key].keys()) != set(files):
0119             # not every file has the key
0120             collection_with_diffs[branch_name] = 0.0
0121 
0122         for _file, label, (color, line_width, line_dash, hatch_pattern) in zip(files, labels, vis_params):
0123             if _file not in arr[key]:
0124                 continue
0125             file_arr = arr[key][_file]
0126 
0127             # diff and KS test
0128             pvalue = None
0129             if prev_file_arr is not None:
0130                 if ((ak.num(file_arr, axis=0) != ak.num(prev_file_arr, axis=0))
0131                    or ak.any(ak.num(file_arr, axis=1)
0132                              != ak.num(prev_file_arr, axis=1))
0133                    or ak.any(ak.nan_to_none(file_arr)
0134                              != ak.nan_to_none(prev_file_arr))):
0135                     if (ak.num(ak.flatten(file_arr, axis=None), axis=0) > 0 and
0136                         ak.num(ak.flatten(prev_file_arr, axis=None), axis=0) > 0):
0137                         # We can only apply the KS test on non-empty arrays
0138                         pvalue = kstest(
0139                                 ak.to_numpy(ak.flatten(file_arr, axis=None)),
0140                                 ak.to_numpy(ak.flatten(prev_file_arr, axis=None))
0141                             ).pvalue
0142                     else:
0143                         pvalue = 0
0144                     print(key)
0145                     print(prev_file_arr, file_arr, f"p = {pvalue:.3f}")
0146                     collection_with_diffs[branch_name] = min(pvalue, collection_with_diffs.get(branch_name, 1.))
0147 
0148             # Figure
0149             h = (
0150                 Hist.new
0151                 .Reg(nbins, 0, x_range, name="x", label=key)
0152                 .Int64()
0153             )
0154             h.fill(x=ak.flatten(file_arr - x_min, axis=None))
0155 
0156             ys, edges = h.to_numpy()
0157             y0 = np.concatenate([ys, [ys[-1]]])
0158             legend_label=label + (f"\n{100*pvalue:.0f}%CL KS" if pvalue is not None else "")
0159             fig.step(
0160                 x=edges + x_min,
0161                 y=y0,
0162                 mode="after",
0163                 legend_label=legend_label,
0164                 line_color=color,
0165                 line_width=line_width,
0166                 line_dash=line_dash,
0167             )
0168             fig.varea_step(
0169                 x=edges + x_min,
0170                 y1=y0 - np.sqrt(y0),
0171                 y2=y0 + np.sqrt(y0),
0172                 step_mode="after",
0173                 legend_label=legend_label,
0174                 fill_color=color if hatch_pattern == " " else None,
0175                 fill_alpha=0.25,
0176                 hatch_color=color,
0177                 hatch_alpha=0.5,
0178                 hatch_pattern=hatch_pattern,
0179             )
0180             fig.legend.background_fill_alpha = 0.5 # make legend more transparent
0181 
0182             y_max = max(y_max, np.max(y0 + np.sqrt(y0)))
0183             prev_file_arr = file_arr
0184 
0185         x_bounds = (x_min - 0.05 * x_range, x_min + 1.05 * x_range)
0186         y_bounds = (- 0.05 * y_max, 1.05 * y_max)
0187         # Set y range for histograms
0188         if np.all(np.isfinite(x_bounds)):
0189             try:
0190                 fig.x_range = Range1d(
0191                     *x_bounds,
0192                     bounds=x_bounds)
0193             except ValueError as e:
0194                 click.secho(str(e), fg="red", err=True)
0195         else:
0196             click.secho(f"overflow while calculating x bounds for \"{key}\"", fg="red", err=True)
0197         if np.all(np.isfinite(y_bounds)):
0198             try:
0199                 fig.y_range = Range1d(
0200                     *y_bounds,
0201                     bounds=y_bounds)
0202             except ValueError as e:
0203                 click.secho(str(e), fg="red", err=True)
0204         else:
0205             click.secho(f"overflow while calculating y bounds for \"{key}\"", fg="red", err=True)
0206 
0207     def to_filename(branch_name):
0208         return branch_name.replace("#", "__pound__")
0209 
0210     def option_key(item):
0211         collection_name, figs = item
0212         key = ""
0213         if collection_name in collection_with_diffs:
0214             if collection_with_diffs[collection_name] > 0.99:
0215                 key += " 0.99"
0216             elif collection_with_diffs[collection_name] > 0.95:
0217                 key += " 0.95"
0218             elif collection_with_diffs[collection_name] > 0.67:
0219                 key += " 0.67"
0220             else:
0221                 key += " 0.00"
0222         key += collection_name.lstrip("_")
0223         return key
0224 
0225     options = [("", "")]
0226     for collection_name, figs in sorted(collection_figs.items(), key=option_key):
0227         marker = ""
0228         if collection_name in collection_with_diffs:
0229             if collection_with_diffs[collection_name] > 0.99:
0230                 marker = " (*)"
0231             elif collection_with_diffs[collection_name] > 0.95:
0232                 marker = " (**)"
0233             elif collection_with_diffs[collection_name] > 0.67:
0234                 marker = " (***)"
0235             else:
0236                 marker = " (****)"
0237         options.append((to_filename(collection_name), collection_name + marker))
0238 
0239     from bokeh.models import CustomJS, Select
0240     def mk_dropdown(value=""):
0241         dropdown = Select(title="Select branch (**** < 67% CL, ..., * > 99% CL stat. equiv.):", value=value, options=options)
0242         dropdown.js_on_change("value", CustomJS(code="""
0243           console.log('dropdown: ' + this.value, this.toString())
0244           if (this.value != "") {
0245             window.location.hash = "#" + this.value;
0246             fetchAndReplaceBokehDocument(this.value);
0247           }
0248         """))
0249         return dropdown
0250 
0251     from bokeh.layouts import column
0252     from bokeh.embed import json_item
0253     import json
0254 
0255     os.makedirs("capybara-reports", exist_ok=True)
0256 
0257     for collection_name, figs in collection_figs.items():
0258         item = column(
0259           mk_dropdown(collection_name),
0260           gridplot(figs, ncols=3, width=400, height=300),
0261         )
0262 
0263         with gzip.open(f"capybara-reports/{to_filename(collection_name)}.json.gz", "wt") as fp:
0264             json.dump(json_item(item), fp)
0265 
0266     curdoc().js_on_event(DocumentReady, CustomJS(code="""
0267       function fetchAndReplaceBokehDocument(location) {
0268         fetch(location + '.json.gz')
0269           .then(async function(response) {
0270             if (!response.ok) {
0271                 throw new Error('Network response was not ok');
0272             }
0273 
0274             const ds = new DecompressionStream('gzip');
0275             const decompressedStream = response.body.pipeThrough(ds);
0276             const decompressedResponse = new Response(decompressedStream);
0277             const item = await decompressedResponse.json();
0278 
0279             Bokeh.documents[0].replace_with_json(item.doc);
0280           })
0281           .catch(function(error) {
0282             console.error('Fetch or decompression failed:', error);
0283           });
0284       }
0285 
0286       window.onhashchange = function() {
0287         var location = window.location.hash.replace(/^#/, "");
0288         if ((location != "") && ((typeof current_location === 'undefined') || (current_location != location))) {
0289           fetchAndReplaceBokehDocument(location);
0290           window.current_location = location;
0291         }
0292       }
0293       window.onhashchange();
0294     """))
0295     output_file(filename="capybara-reports/index.html", title="ePIC capybara report")
0296     save(mk_dropdown())
0297 
0298     if serve:
0299         os.chdir("capybara-reports/")
0300         from http.server import SimpleHTTPRequestHandler
0301         from socketserver import TCPServer
0302         with TCPServer(("127.0.0.1", 24535), SimpleHTTPRequestHandler) as httpd:
0303             print("Serving report at http://127.0.0.1:24535")
0304             try:
0305                 httpd.serve_forever()
0306             except KeyboardInterrupt:
0307                 pass