File indexing completed on 2025-01-31 09:17:15
0001
0002 import hashlib
0003 from pathlib import Path
0004 import argparse
0005
0006 import uproot
0007 import numpy as np
0008 import awkward as ak
0009
0010
0011 def hash_root_file(path: Path, ordering_invariant: bool = True) -> str:
0012 rf = uproot.open(path)
0013
0014 gh = hashlib.sha256()
0015
0016 for tree_name in sorted(rf.keys(cycle=False)):
0017 gh.update(tree_name.encode("utf8"))
0018
0019 try:
0020 tree = rf[tree_name]
0021 if not isinstance(tree, uproot.TTree):
0022 continue
0023 except NotImplementedError:
0024 continue
0025 keys = list(sorted(tree.keys()))
0026
0027 branches = tree.arrays(library="ak")
0028
0029 if not ordering_invariant:
0030 h = hashlib.sha256()
0031 for name in keys:
0032 h.update(name.encode("utf8"))
0033 arr = branches[name]
0034 arr = ak.flatten(arr, axis=None)
0035 arr = np.array(arr)
0036 h.update(arr.tobytes())
0037 gh.update(h.digest())
0038
0039 else:
0040 items = np.array([])
0041
0042 for row in zip(*[branches[b] for b in keys]):
0043 h = hashlib.md5()
0044 for obj in row:
0045 if isinstance(obj, ak.highlevel.Array):
0046 if obj.ndim == 1:
0047 h.update(ak.to_numpy(obj).tobytes())
0048 else:
0049 arr = ak.to_numpy(ak.flatten(obj, axis=None))
0050 h.update(arr.tobytes())
0051 else:
0052 h.update(np.array([obj]).tobytes())
0053 items = np.append(items, h.digest())
0054
0055 items.sort()
0056
0057 h = hashlib.sha256()
0058 h.update("".join(keys).encode("utf8"))
0059 h.update(items.tobytes())
0060
0061 gh.update(h.digest())
0062 return gh.hexdigest()
0063
0064
0065 if "__main__" == __name__:
0066 p = argparse.ArgumentParser(
0067 description="Calculate a hash of the numeric content of a root file"
0068 )
0069
0070 p.add_argument(
0071 "input_file", type=Path, help="The input ROOT file to calculate a hash for"
0072 )
0073 p.add_argument(
0074 "--no-ordering-invariant",
0075 "-n",
0076 action="store_true",
0077 help="Calculate a hash that is not invariant under reordering of entries? (faster than invariant)",
0078 )
0079
0080 args = p.parse_args()
0081
0082 print(
0083 hash_root_file(
0084 path=args.input_file,
0085 ordering_invariant=not args.no_ordering_invariant,
0086 )
0087 )