Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-31 09:17:15

0001 #!/usr/bin/env python3
0002 import hashlib
0003 from pathlib import Path
0004 import argparse
0005 
0006 import uproot
0007 import numpy as np
0008 import awkward as ak
0009 
0010 
0011 def hash_root_file(path: Path, ordering_invariant: bool = True) -> str:
0012     rf = uproot.open(path)
0013 
0014     gh = hashlib.sha256()
0015 
0016     for tree_name in sorted(rf.keys(cycle=False)):
0017         gh.update(tree_name.encode("utf8"))
0018 
0019         try:
0020             tree = rf[tree_name]
0021             if not isinstance(tree, uproot.TTree):
0022                 continue
0023         except NotImplementedError:
0024             continue
0025         keys = list(sorted(tree.keys()))
0026 
0027         branches = tree.arrays(library="ak")
0028 
0029         if not ordering_invariant:
0030             h = hashlib.sha256()
0031             for name in keys:
0032                 h.update(name.encode("utf8"))
0033                 arr = branches[name]
0034                 arr = ak.flatten(arr, axis=None)
0035                 arr = np.array(arr)
0036                 h.update(arr.tobytes())
0037             gh.update(h.digest())
0038 
0039         else:
0040             items = np.array([])
0041 
0042             for row in zip(*[branches[b] for b in keys]):
0043                 h = hashlib.md5()
0044                 for obj in row:
0045                     if isinstance(obj, ak.highlevel.Array):
0046                         if obj.ndim == 1:
0047                             h.update(ak.to_numpy(obj).tobytes())
0048                         else:
0049                             arr = ak.to_numpy(ak.flatten(obj, axis=None))
0050                             h.update(arr.tobytes())
0051                     else:
0052                         h.update(np.array([obj]).tobytes())
0053                 items = np.append(items, h.digest())
0054 
0055             items.sort()
0056 
0057             h = hashlib.sha256()
0058             h.update("".join(keys).encode("utf8"))
0059             h.update(items.tobytes())
0060 
0061             gh.update(h.digest())
0062     return gh.hexdigest()
0063 
0064 
0065 if "__main__" == __name__:
0066     p = argparse.ArgumentParser(
0067         description="Calculate a hash of the numeric content of a root file"
0068     )
0069 
0070     p.add_argument(
0071         "input_file", type=Path, help="The input ROOT file to calculate a hash for"
0072     )
0073     p.add_argument(
0074         "--no-ordering-invariant",
0075         "-n",
0076         action="store_true",
0077         help="Calculate a hash that is not invariant under reordering of entries? (faster than invariant)",
0078     )
0079 
0080     args = p.parse_args()
0081 
0082     print(
0083         hash_root_file(
0084             path=args.input_file,
0085             ordering_invariant=not args.no_ordering_invariant,
0086         )
0087     )