Calorimetery/ClusterVisualizer/utils.py

0001 '''
0002     A utility script to help the analysis of imaging calorimeter data
0003
0004     Author: Chao Peng (ANL)
0005     Date: 04/30/2021
0006
0007     Added all mc particle info to obtain decaying particles
0008     Author: Jihee Kim (ANL)
0009     Date: 08/06/2021
0010 '''
0011
0012 import os
0013 import ROOT
0014 import numpy as np
0015 import pandas as pd
0016 import matplotlib
0017 import DDG4
0018 from ROOT import gROOT, gInterpreter
0019 from lxml import etree as ET
0020
0021
0022 class ReadoutDecoder:
0023     def __init__(self, compact, readout):
0024         self.readouts = self.getReadouts(compact)
0025         self.changeReadout(readout)
0026
0027     def changeReadout(self, readout):
0028         self.fieldsmap = self.decomposeIDs(self.readouts[readout])
0029
0030     def get(self, idvals, field):
0031         start, width = self.fieldsmap[field]
0032         if width >= 0:
0033             return np.bitwise_and(np.right_shift(idvals, start), (1 << width) - 1)
0034         # first bit is sign bit
0035         else:
0036             width = abs(width) - 1
0037             vals = np.bitwise_and(np.right_shift(idvals, start), (1 << width) - 1)
0038             return np.where(np.bitwise_and(np.right_shift(idvals, start + width), 1), vals - (1 << width), vals)
0039
0040     def mask(self, field):
0041         start, width = self.fieldsmap[field]
0042         return np.uint64((2**abs(width) - 1) << start)
0043
0044     def decode(self, idvals):
0045         return {field: self.get(idvals, field) for field, _ in self.fieldsmap.items()}
0046
0047     @staticmethod
0048     def getReadouts(path):
0049         res = dict()
0050         ReadoutDecoder.__getReadoutsRecur(path, res)
0051         return res
0052
0053     @staticmethod
0054     def __getReadoutsRecur(path, res):
0055         if not os.path.exists(path):
0056             print('Xml file {} not exist! Ignored it.'.format(path))
0057             return
0058         lccdd = ET.parse(path).getroot()
0059         readouts = lccdd.find('readouts')
0060         if readouts is not None:
0061             for readout in readouts.getchildren():
0062                 ids = readout.find('id')
0063                 if ids is not None:
0064                     res[readout.attrib['name']] = ids.text
0065         for child in lccdd.getchildren():
0066             if child.tag == 'include':
0067                 root_dir = os.path.dirname(os.path.realpath(path))
0068                 ReadoutDecoder.__getReadoutsRecur(os.path.join(root_dir, child.attrib['ref']), res)
0069
0070     @staticmethod
0071     def decomposeIDs(id_str):
0072         res = dict()
0073         curr_bit = 0
0074         for field_bits in id_str.split(','):
0075             elements = field_bits.split(':')
0076             field_name = elements[0]
0077             bit_width = int(elements[-1])
0078             if len(elements) == 3:
0079                 curr_bit = int(elements[1])
0080             res[field_name] = (curr_bit, bit_width)
0081             curr_bit += abs(bit_width)
0082         return res
0083
0084
0085 # read from RDataFrame and flatten a given collection, return pandas dataframe
0086 def flatten_collection(rdf, collection, cols=None):
0087     if not cols:
0088         cols = [str(c) for c in rdf.GetColumnNames() if str(c).startswith('{}.'.format(collection))]
0089     else:
0090         cols = ['{}.{}'.format(collection, c) for c in cols]
0091     if not cols:
0092         print('cannot find any branch under collection {}'.format(collection))
0093         return pd.DataFrame()
0094     # print(rdf.GetColumnNames())
0095     data = rdf.AsNumpy(cols)
0096     # flatten the data, add an event id to identify clusters from different events
0097     evns = []
0098     for i, vec in enumerate(data[cols[0]]):
0099         evns += [i]*vec.size()
0100     for n, vals in data.items():
0101         # make sure ints are not converted to floats
0102         typename = vals[0].__class__.__name__.lower()
0103         # default
0104         dtype = np.float64
0105         if 'unsigned int' in typename or 'unsigned long' in typename:
0106             dtype = np.uint64
0107         elif 'int' in typename or 'long' in typename:
0108             dtype = np.int64
0109         # print(n, typename, dtype)
0110         # type safe creation
0111         data[n] = np.asarray([v for vec in vals for v in vec], dtype=dtype)
0112     # build data frame
0113     dfp = pd.DataFrame({c: pd.Series(v) for c, v in data.items()})
0114     dfp.loc[:, 'event'] = evns
0115     return dfp
0116
0117 # helper function to truncate color map (for a better view from the rainbow colormap)
0118 def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
0119     new_cmap = matplotlib.colors.LinearSegmentedColormap.from_list(
0120         'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval),
0121         cmap(np.linspace(minval, maxval, n)))
0122     return new_cmap
0123
0124
0125 # load root macros, input is an argument string
0126 def load_root_macros(arg_macros):
0127     for path in arg_macros.split(','):
0128         path = path.strip()
0129         if os.path.exists(path):
0130             gROOT.Macro(path)
0131         else:
0132             print('\"{}\" does not exist, skip loading it.'.format(path))
0133
0134
0135 # read mc particles from root file
0136 def get_mcp_data(path, evnums=None, branch='MCParticles'):
0137     f = ROOT.TFile(path)
0138     events = f.events
0139     if evnums is None:
0140         evnums = np.arange(events.GetEntries())
0141     elif isinstance(evnums, int):
0142         evnums = [evnums]
0143
0144     dbuf = np.zeros(shape=(5000*len(evnums), 6))
0145     idb = 0
0146     for iev in evnums:
0147         if iev >= events.GetEntries():
0148             print('Error: event {:d} is out of range (0 - {:d})'.format(iev, events.GetEntries() - 1))
0149             continue
0150
0151         events.GetEntry(iev)
0152         # extract full mc particle data
0153         for part in getattr(events, branch):
0154             dbuf[idb] = (iev, part.momentum.x, part.momentum.y, part.momentum.z, part.PDG, part.simulatorStatus)
0155             idb += 1
0156     return pd.DataFrame(data=dbuf[:idb], columns=['event', 'px', 'py', 'pz', 'pid', 'status'])
0157
0158
0159 # read mc particles from root file
0160 def get_mcp_simple(path, evnums=None, branch='MCParticles'):
0161     f = ROOT.TFile(path)
0162     events = f.events
0163     if evnums is None:
0164         evnums = np.arange(events.GetEntries())
0165     elif isinstance(evnums, int):
0166         evnums = [evnums]
0167
0168     dbuf = np.zeros(shape=(len(evnums), 6))
0169     idb = 0
0170     for iev in evnums:
0171         if iev >= events.GetEntries():
0172             print('Error: event {:d} is out of range (0 - {:d})'.format(iev, events.GetEntries() - 1))
0173             continue
0174
0175         events.GetEntry(iev)
0176         # extract full mc particle data
0177         part = getattr(events, branch)[2]
0178         dbuf[idb] = (iev, part.momentum.x, part.momentum.y, part.momentum.z, part.PDG, part.simulatorStatus)
0179         idb += 1
0180     return pd.DataFrame(data=dbuf[:idb], columns=['event', 'px', 'py', 'pz', 'pid', 'status'])
0181
0182 #######################################
0183 # read all mc particles from root file
0184 #######################################
0185 def get_all_mcp(path, evnums=None, branch='MCParticles'):
0186     f = ROOT.TFile(path)
0187     events = f.events
0188     if evnums is None:
0189         evnums = np.arange(events.GetEntries())
0190     elif isinstance(evnums, int):
0191         evnums = [evnums]
0192
0193     dbuf = np.zeros(shape=(2000*len(evnums), 9))
0194     idb = 0
0195     for iev in evnums:
0196         if iev >= events.GetEntries():
0197             print('Error: event {:d} is out of range (0 - {:d})'.format(iev, events.GetEntries() - 1))
0198             continue
0199
0200         events.GetEntry(iev)
0201         # extract mc particle data
0202         for ptl in getattr(events, branch):
0203             dbuf[idb] = (iev, ptl.momentum.x, ptl.momentum.y, ptl.momentum.z, ptl.PDG, ptl.simulatorStatus, ptl.endpoint.x, ptl.endpoint.y, ptl.endpoint.z)
0204             idb += 1
0205
0206     return pd.DataFrame(data=dbuf[:idb], columns=['event', 'px', 'py', 'pz', 'pid', 'status', 'vex', 'vey', 'vez'])
0207
0208 # read hits data from root file
0209 def get_hits_data(path, evnums=None, branch='RecoEcalBarrelImaginglHits'):
0210     f = ROOT.TFile(path)
0211     events = f.events
0212     if evnums is None:
0213         evnums = np.arange(events.GetEntries())
0214     elif isinstance(evnums, int):
0215         evnums = [evnums]
0216
0217     dbuf = np.zeros(shape=(2000*len(evnums), 7))
0218     idb = 0
0219     for iev in evnums:
0220         if iev >= events.GetEntries():
0221             print('Error: event {:d} is out of range (0 - {:d})'.format(iev, events.GetEntries() - 1))
0222             continue
0223
0224         events.GetEntry(iev)
0225         for ihit, hit in enumerate(getattr(events, branch)):
0226             dbuf[idb] = (iev, ihit, hit.layer, hit.position.x, hit.position.y,
0227                     hit.position.z, hit.energy*1000.)
0228             idb += 1
0229
0230     return pd.DataFrame(data=dbuf[:idb], columns=['event', 'cluster', 'layer', 'x', 'y',
0231         'z', 'energy'])
0232
0233
0234 # read layers data from root file
0235 def get_layers_data(path, evnums=None, branch="EcalBarrelImagingClustersLayers"):
0236     f = ROOT.TFile(path)
0237     events = f.events
0238     if evnums is None:
0239         evnums = np.arange(events.GetEntries())
0240     elif isinstance(evnums, int):
0241         evnums = [evnums]
0242
0243     dbuf = np.zeros(shape=(2000*len(evnums), 7))
0244     idb = 0
0245     for iev in evnums:
0246         if iev >= events.GetEntries():
0247             print('Error: event {:d} is out of range (0 - {:d})'.format(iev, events.GetEntries() - 1))
0248             continue
0249
0250         events.GetEntry(iev)
0251         for icl, layer in enumerate(getattr(events, branch)):
0252             dbuf[idb] = (iev, icl, layer.layer,
0253                          layer.position.x, layer.position.y, layer.position.z,
0254                          layer.energy*1000.)
0255             idb += 1
0256
0257     return pd.DataFrame(data=dbuf[:idb], columns=['event', 'cluster', 'layer', 'x', 'y',
0258         'z', 'energy'])
0259
0260
0261 # read clusters data from root file
0262 def get_clusters_data(path, evnums=None, branch='EcalBarrelImagingClustersReco'):
0263     f = ROOT.TFile(path)
0264     events = f.events
0265     if evnums is None:
0266         evnums = np.arange(events.GetEntries())
0267     elif isinstance(evnums, int):
0268         evnums = [evnums]
0269
0270     dbuf = np.zeros(shape=(20*len(evnums), 6))
0271     idb = 0
0272     for iev in evnums:
0273         if iev >= events.GetEntries():
0274             print('Error: event {:d} is out of range (0 - {:d})'.format(iev, events.GetEntries() - 1))
0275             continue
0276
0277         events.GetEntry(iev)
0278         for k, cl in enumerate(getattr(events, branch)):
0279             dbuf[idb] = (iev, k, cl.nhits, cl.energy*1000., cl.cl_theta, cl.cl_phi)
0280             idb += 1
0281
0282     return pd.DataFrame(data=dbuf[:idb], columns=['event', 'cluster', 'nhits', 'energy', 'cl_theta', 'cl_phi'])
0283
0284
0285 def compact_constants(path, names):
0286     if not os.path.exists(path):
0287         print('Cannot find compact file \"{}\".'.format(path))
0288         return []
0289     if not names:
0290         return []
0291     kernel = DDG4.Kernel()
0292     description = kernel.detectorDescription()
0293     kernel.loadGeometry("file:{}".format(path))
0294     try:
0295         vals = [description.constantAsDouble(n) for n in names]
0296     except:
0297         print('Fail to extract values from {}, return empty.'.format(names))
0298         vals = []
0299     kernel.terminate()
0300     return vals