Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-15 08:17:54

0001 import gzip
0002 import os
0003 import re
0004 
0005 import awkward as ak
0006 import click
0007 import numpy as np
0008 import uproot
0009 from bokeh.events import DocumentReady
0010 from bokeh.io import curdoc
0011 from bokeh.layouts import gridplot
0012 from bokeh.models import Range1d
0013 from bokeh.models import PrintfTickFormatter
0014 from bokeh.plotting import figure, output_file, save
0015 from hist import Hist
0016 from scipy.stats import kstest
0017 
0018 from ..util import skip_common_prefix
0019 
0020 
0021 def match_filter(key, match, unmatch):
0022     accept = True
0023     if match:
0024         accept = False
0025         for regex in match:
0026             if regex.match(key):
0027                 accept = True
0028     for regex in unmatch:
0029         if regex.match(key):
0030             accept = False
0031     return accept
0032 
0033 
0034 @click.command()
0035 @click.argument("files", type=click.File('rb'), nargs=-1)
0036 @click.option(
0037     "-m", "--match", multiple=True,
0038     help="Only include collections with names matching a regex"
0039 )
0040 @click.option(
0041     "-M", "--unmatch", multiple=True,
0042     help="Exclude collections with names matching a regex"
0043 )
0044 @click.option(
0045     "--serve", is_flag=True,
0046     default=False,
0047     help="Run a local HTTP server to view the report"
0048 )
0049 def bara(files, match, unmatch, serve):
0050     arr = {}
0051 
0052     match = list(map(re.compile, match))
0053     unmatch = list(map(re.compile, unmatch))
0054 
0055     for _file in files:
0056         tree = uproot.open(_file)["events"]
0057 
0058         sort_by_evtnum = None
0059         if "EventHeader/EventHeader.eventNumber" in tree.keys(recursive=True):
0060             evtnum = tree["EventHeader/EventHeader.eventNumber"].array()
0061             sort_by_evtnum = ak.argsort(ak.flatten(evtnum))
0062 
0063         keys = [
0064             key for key in tree.keys(recursive=True)
0065             if not key.startswith("PARAMETERS")
0066             and len(tree[key].branches) == 0
0067             and match_filter(key, match, unmatch)
0068         ]
0069 
0070         for key in keys:
0071             val = tree[key].array()
0072             if sort_by_evtnum is not None:
0073                 val = val[sort_by_evtnum]
0074             arr.setdefault(key, {})[_file] = val
0075 
0076     paths = skip_common_prefix([_file.name.split("/") for _file in files])
0077     paths = skip_common_prefix([reversed(list(path)) for path in paths])
0078     labels = ["/".join(reversed(list(reversed_path))) for reversed_path in paths]
0079 
0080     collection_figs = {}
0081     collection_with_diffs = {}
0082 
0083     for key in sorted(arr.keys()):
0084         if any("string" in str(ak.type(a)) for a in arr[key].values()):
0085             click.echo(f"String value detected for key \"{key}\". Skipping...")
0086             continue
0087         x_min = min(filter(
0088             lambda v: v is not None,
0089             map(lambda a: ak.min(ak.mask(a, np.isfinite(a))), arr[key].values())
0090         ), default=None)
0091         if x_min is None:
0092             continue
0093         x_range = max(filter(
0094             lambda v: v is not None,
0095             map(lambda a: ak.max(ak.mask(a - x_min, np.isfinite(a))), arr[key].values())
0096         ), default=None)
0097         nbins = 10
0098 
0099         if (any("* uint" in str(ak.type(a)) for a in arr[key].values())
0100            or any("* int" in str(ak.type(a)) for a in arr[key].values())):
0101             x_range = x_range + 1
0102             nbins = int(min(100, np.ceil(x_range)))
0103         else:
0104             x_range = x_range * 1.1
0105 
0106         if x_range == 0:
0107             x_range = 1
0108 
0109         if "/" in key:
0110             branch_name, leaf_name = key.split("/", 1)
0111         else:
0112             branch_name = key
0113             leaf_name = key
0114 
0115         fig = figure(x_axis_label=leaf_name, y_axis_label="Entries")
0116         if x_range < 1.:
0117             fig.xaxis.formatter = PrintfTickFormatter(format="%.2g")
0118         collection_figs.setdefault(branch_name, []).append(fig)
0119         y_max = 0
0120 
0121         prev_file_arr = None
0122         vis_params = [
0123           ("green", 1.5, "solid", " "),
0124           ("red", 3, "dashed", ","),
0125           ("blue", 2, "dotted", "."),
0126         ]
0127 
0128         if set(arr[key].keys()) != set(files):
0129             # not every file has the key
0130             collection_with_diffs[branch_name] = 0.0
0131 
0132         for _file, label, (color, line_width, line_dash, hatch_pattern) in zip(files, labels, vis_params):
0133             if _file not in arr[key]:
0134                 continue
0135             file_arr = arr[key][_file]
0136 
0137             # diff and KS test
0138             pvalue = None
0139             if prev_file_arr is not None:
0140                 if ((ak.num(file_arr, axis=0) != ak.num(prev_file_arr, axis=0))
0141                    or ak.any(ak.num(file_arr, axis=1)
0142                              != ak.num(prev_file_arr, axis=1))
0143                    or ak.any(ak.nan_to_none(file_arr)
0144                              != ak.nan_to_none(prev_file_arr))):
0145                     if (ak.num(ak.flatten(file_arr, axis=None), axis=0) > 0 and
0146                         ak.num(ak.flatten(prev_file_arr, axis=None), axis=0) > 0):
0147                         # We can only apply the KS test on non-empty arrays
0148                         pvalue = kstest(
0149                                 ak.to_numpy(ak.flatten(file_arr, axis=None)),
0150                                 ak.to_numpy(ak.flatten(prev_file_arr, axis=None))
0151                             ).pvalue
0152                     else:
0153                         pvalue = 0
0154                     print(key)
0155                     print(prev_file_arr, file_arr, f"p = {pvalue:.3f}")
0156                     collection_with_diffs[branch_name] = min(pvalue, collection_with_diffs.get(branch_name, 1.))
0157 
0158             # Figure
0159             h = (
0160                 Hist.new
0161                 .Reg(nbins, 0, x_range, name="x", label=key)
0162                 .Int64()
0163             )
0164             h.fill(x=ak.flatten(file_arr - x_min, axis=None))
0165 
0166             ys, edges = h.to_numpy()
0167             y0 = np.concatenate([ys, [ys[-1]]])
0168             legend_label=label + (f"\n{100*pvalue:.0f}%CL KS" if pvalue is not None else "")
0169             fig.step(
0170                 x=edges + x_min,
0171                 y=y0,
0172                 mode="after",
0173                 legend_label=legend_label,
0174                 line_color=color,
0175                 line_width=line_width,
0176                 line_dash=line_dash,
0177             )
0178             fig.varea_step(
0179                 x=edges + x_min,
0180                 y1=y0 - np.sqrt(y0),
0181                 y2=y0 + np.sqrt(y0),
0182                 step_mode="after",
0183                 legend_label=legend_label,
0184                 fill_color=color if hatch_pattern == " " else None,
0185                 fill_alpha=0.25,
0186                 hatch_color=color,
0187                 hatch_alpha=0.5,
0188                 hatch_pattern=hatch_pattern,
0189             )
0190             fig.legend.background_fill_alpha = 0.5 # make legend more transparent
0191 
0192             y_max = max(y_max, np.max(y0 + np.sqrt(y0)))
0193             prev_file_arr = file_arr
0194 
0195         x_bounds = (x_min - 0.05 * x_range, x_min + 1.05 * x_range)
0196         y_bounds = (- 0.05 * y_max, 1.05 * y_max)
0197         # Set y range for histograms
0198         if np.all(np.isfinite(x_bounds)):
0199             try:
0200                 fig.x_range = Range1d(
0201                     *x_bounds,
0202                     bounds=x_bounds)
0203             except ValueError as e:
0204                 click.secho(str(e), fg="red", err=True)
0205         else:
0206             click.secho(f"overflow while calculating x bounds for \"{key}\"", fg="red", err=True)
0207         if np.all(np.isfinite(y_bounds)):
0208             try:
0209                 fig.y_range = Range1d(
0210                     *y_bounds,
0211                     bounds=y_bounds)
0212             except ValueError as e:
0213                 click.secho(str(e), fg="red", err=True)
0214         else:
0215             click.secho(f"overflow while calculating y bounds for \"{key}\"", fg="red", err=True)
0216 
0217     def to_filename(branch_name):
0218         return branch_name.replace("#", "__pound__")
0219 
0220     def option_key(item):
0221         collection_name, figs = item
0222         key = ""
0223         if collection_name in collection_with_diffs:
0224             if collection_with_diffs[collection_name] > 0.99:
0225                 key += " 0.99"
0226             elif collection_with_diffs[collection_name] > 0.95:
0227                 key += " 0.95"
0228             elif collection_with_diffs[collection_name] > 0.67:
0229                 key += " 0.67"
0230             else:
0231                 key += " 0.00"
0232         key += collection_name.lstrip("_")
0233         return key
0234 
0235     options = [("", "")]
0236     for collection_name, figs in sorted(collection_figs.items(), key=option_key):
0237         marker = ""
0238         if collection_name in collection_with_diffs:
0239             if collection_with_diffs[collection_name] > 0.99:
0240                 marker = " (*)"
0241             elif collection_with_diffs[collection_name] > 0.95:
0242                 marker = " (**)"
0243             elif collection_with_diffs[collection_name] > 0.67:
0244                 marker = " (***)"
0245             else:
0246                 marker = " (****)"
0247         options.append((to_filename(collection_name), collection_name + marker))
0248 
0249     from bokeh.models import CustomJS, Select
0250     def mk_dropdown(value=""):
0251         dropdown = Select(title="Select branch (**** < 67% CL, ..., * > 99% CL stat. equiv.):", value=value, options=options)
0252         dropdown.js_on_change("value", CustomJS(code="""
0253           console.log('dropdown: ' + this.value, this.toString())
0254           if (this.value != "") {
0255             window.location.hash = "#" + this.value;
0256             fetchAndReplaceBokehDocument(this.value);
0257           }
0258         """))
0259         return dropdown
0260 
0261     from bokeh.layouts import column
0262     from bokeh.embed import json_item
0263     import json
0264 
0265     os.makedirs("capybara-reports", exist_ok=True)
0266 
0267     for collection_name, figs in collection_figs.items():
0268         item = column(
0269           mk_dropdown(collection_name),
0270           gridplot(figs, ncols=3, width=400, height=300),
0271         )
0272 
0273         with gzip.open(f"capybara-reports/{to_filename(collection_name)}.json.gz", "wt") as fp:
0274             json.dump(json_item(item), fp)
0275 
0276     curdoc().js_on_event(DocumentReady, CustomJS(code="""
0277       function fetchAndReplaceBokehDocument(location) {
0278         fetch(location + '.json.gz')
0279           .then(async function(response) {
0280             if (!response.ok) {
0281                 throw new Error('Network response was not ok');
0282             }
0283 
0284             const ds = new DecompressionStream('gzip');
0285             const decompressedStream = response.body.pipeThrough(ds);
0286             const decompressedResponse = new Response(decompressedStream);
0287             const item = await decompressedResponse.json();
0288 
0289             Bokeh.documents[0].replace_with_json(item.doc);
0290           })
0291           .catch(function(error) {
0292             console.error('Fetch or decompression failed:', error);
0293           });
0294       }
0295 
0296       window.onhashchange = function() {
0297         var location = window.location.hash.replace(/^#/, "");
0298         if ((location != "") && ((typeof current_location === 'undefined') || (current_location != location))) {
0299           fetchAndReplaceBokehDocument(location);
0300           window.current_location = location;
0301         }
0302       }
0303       window.onhashchange();
0304     """))
0305     output_file(filename="capybara-reports/index.html", title="ePIC capybara report")
0306     save(mk_dropdown())
0307 
0308     if serve:
0309         os.chdir("capybara-reports/")
0310         from http.server import SimpleHTTPRequestHandler
0311         from socketserver import TCPServer
0312         with TCPServer(("127.0.0.1", 24535), SimpleHTTPRequestHandler) as httpd:
0313             print("Serving report at http://127.0.0.1:24535")
0314             try:
0315                 httpd.serve_forever()
0316             except KeyboardInterrupt:
0317                 pass