Warning, /epic/bin/compare_detector_parameter_table is written in an unsupported language. File is not indexed.
0001 #!/usr/bin/env python3
0002
0003 # SPDX-License-Identifier: LGPL-3.0-or-later
0004 # Copyright (C) 2023 Chao Peng
0005 '''
0006 A script to compare two detector parameter tables.
0007 Report inconsistent numbers, missing components/columns, or other errors.
0008 '''
0009
0010 import os
0011 import json
0012 import argparse
0013 import numpy as np
0014 import pandas as pd
0015
0016 pd.set_option('display.max_rows', 500)
0017
0018 # key_cols + value_cols are required columns
0019 key_cols = [
0020 'Region',
0021 'Component',
0022 'Sub-Component',
0023 ]
0024 value_cols = [
0025 'Length (cm)',
0026 'Inner Radius (cm)',
0027 'Outer Radius (cm)',
0028 'Offset from Center (cm)',
0029 'Physical Start (cm)',
0030 'Physical End (cm)',
0031 ]
0032
0033 # read parameter table and do a little bit formatting
0034 def read_par_table(path, **kwargs):
0035 # read and format
0036 if path.endswith('html') or path.endswith('htm'):
0037 dft = pd.read_html(path, **kwargs)[0]
0038 else:
0039 dft = pd.read_csv(path, **kwargs)
0040 dft.columns = [i.replace(' \n', ' ').replace('\n', ' ').strip() for i in dft.columns.astype(str)]
0041 unnamed_cols = [i for i in dft.columns if i.startswith('Unnamed')]
0042 dft = dft.drop(columns=unnamed_cols)
0043 for col in dft.columns:
0044 if pd.api.types.is_string_dtype(dft[col].dtype):
0045 dft.loc[:, col] = dft[col].str.replace('\n', ' ').str.replace(' ', ' ').str.strip()
0046
0047 missing_required_cols = [c for c in key_cols + value_cols if c not in dft.columns]
0048 if len(missing_required_cols) > 0:
0049 print('Error: required columns do not exist in \"{}\": {}'.format(path, missing_required_cols))
0050 exit(-1)
0051
0052 dft.loc[:, 'Region'] = dft['Region'].fillna(method='ffill')
0053 dft.loc[:, 'Component'] = dft['Component'].fillna(method='ffill')
0054 dft.loc[:, 'Sub-Component'] = dft['Sub-Component'].fillna('')
0055 # build a key (multi-indexing also works)
0056 comp = dft['Region'] + '_' + dft['Component']
0057 sub_comp = dft['Sub-Component'].copy()
0058 sub_comp.loc[sub_comp.str.len() > 0] = '_' + sub_comp.astype(str)
0059 dft.loc[:, 'key'] = (comp + sub_comp).str.replace(' ', '_').str.lower()
0060 return dft.set_index('key', drop=True)
0061
0062
0063 if __name__ == '__main__':
0064 parser = argparse.ArgumentParser(
0065 prog='compare_detector_parameter_table',
0066 description = 'A python script to compare two detector parameter tables.'
0067 + '\n ' # 7 spaces for 'usage: '
0068 + r'It adapts the format as of 2023/10 (see https://eic.jlab.org/Menagerie).'
0069 )
0070 parser.add_argument(
0071 'det_table',
0072 help='Path or url to the DETECTOR parameter table (csv or html).'
0073 )
0074 parser.add_argument(
0075 'sim_table',
0076 help='Path or url to the SIMULATION parameter table (csv or html).'
0077 )
0078 parser.add_argument(
0079 '--template',
0080 default='https://raw.githubusercontent.com/eic/epic/main/templates/DetectorParameterTable.csv.jinja2',
0081 help='Path or url to the template file of the detector paramter table'
0082 )
0083 parser.add_argument(
0084 '--det-encoding', default='ISO-8859-1',
0085 help='encoding for the detector parameter table.'
0086 )
0087 parser.add_argument(
0088 '--sim-encoding', default='utf-8',
0089 help='encoding for the simulation parameter table.'
0090 )
0091 parser.add_argument(
0092 '-v', '--verbose', action='store_true',
0093 help='enable to print out the report.'
0094 )
0095 parser.add_argument(
0096 '-o', '--output', default='par_table_report.json',
0097 help='output path for the report.'
0098 )
0099 args = parser.parse_args()
0100
0101 dfd = read_par_table(args.det_table, encoding=args.det_encoding)
0102 # ignore alternatives
0103 dfd = dfd[~dfd['Region'].str.contains('ALTERNATIVE')]
0104 dfs = read_par_table(args.sim_table, encoding=args.sim_encoding)
0105
0106 try:
0107 temp = read_par_table(args.template)
0108 except:
0109 print('Failed to load parameter table template from \"{}\".'.format(args.template))
0110 print('Report will not include information about the simulation variable names.')
0111 temp = pd.DataFrame()
0112
0113 # check components
0114 report = dict()
0115 matched_keys = np.intersect1d(dfd.index, dfs.index)
0116 missing_keys = [k for k in dfd.index if k not in matched_keys]
0117 extra_keys = [k for k in dfs.index if k not in matched_keys]
0118 df_mis = dfd.loc[missing_keys, ['Region', 'Component', 'Sub-Component']].reset_index(drop=True)
0119 df_ext = dfs.loc[extra_keys, ['Region', 'Component', 'Sub-Component']].reset_index(drop=True)
0120 dfdm = dfd.loc[matched_keys, key_cols + value_cols]
0121 dfsm = dfs.loc[:, key_cols + value_cols]
0122 try:
0123 dftm = temp.loc[:, key_cols + value_cols]
0124 except:
0125 dftm = pd.DataFrame()
0126
0127 comps = []
0128 total_counts = np.zeros(3, dtype=int)
0129 # check component values
0130 for k, drow in dfdm.iterrows():
0131 srow = dfsm.loc[k].squeeze()
0132 try:
0133 tvars = dftm.loc[k].squeeze().to_dict()
0134 except:
0135 tvars = dict()
0136 # check value
0137 comp = drow[key_cols].to_dict()
0138 missing_vals, wrong_vals, correct_vals = {}, {}, {}
0139 # print(tvars)
0140 for vcol in value_cols:
0141 # print(k, vcol)
0142 tvar = str(tvars.get(vcol, 'Not Found')).replace('nan', 'Empty')
0143 try:
0144 dval = float(drow[vcol])
0145 except:
0146 dval = np.nan
0147 try:
0148 sval = float(srow[vcol])
0149 except:
0150 sval = np.nan
0151
0152 if np.any(np.isnan([sval, dval])):
0153 missing_vals[vcol] = dict(det=dval, sim=sval, template_var=tvar)
0154 elif not np.isclose(sval, dval):
0155 wrong_vals[vcol] = dict(det=dval, sim=sval, template_var=tvar)
0156 else:
0157 correct_vals[vcol] = dict(det=dval, sim=sval, template_var=tvar)
0158
0159 comp['Mismatched'] = wrong_vals
0160 total_counts[0] += len(wrong_vals)
0161 comp['Missing'] = missing_vals
0162 total_counts[1] += len(missing_vals)
0163 comp['Correct'] = correct_vals
0164 total_counts[2] += len(correct_vals)
0165 comps.append(comp)
0166
0167 # printout reports
0168 if args.verbose:
0169 print('These det table components are missing in the sim table:')
0170 print(df_mis)
0171
0172 print('These sim table components are not found in the det table:')
0173 print(df_ext)
0174
0175 print('These components are checked:')
0176
0177 for comp in comps:
0178 subdet = pd.DataFrame(columns=['det', 'sim', 'template_var', 'stat'], index=value_cols)
0179 print('{}, {}, {}: '.format(comp['Region'], comp['Component'], comp['Sub-Component']))
0180 for stat in ['Mismatched', 'Missing', 'Correct']:
0181 for key, val in comp[stat].items():
0182 subdet.loc[key, 'stat'] = stat
0183 for k, v in val.items():
0184 subdet.loc[key, k] = v
0185 print(subdet)
0186 print(' ')
0187
0188 # save reports to a json file
0189 report['stats'] = dict(
0190 checked_columns=value_cols,
0191 table_components=dict(
0192 detector=dict(
0193 total=dfd.shape[0],
0194 matched=dfdm.shape[0],
0195 missing=df_mis.shape[0],),
0196 simulation=dict(
0197 total=dfs.shape[0],
0198 matched=dfdm.shape[0],
0199 missing=df_ext.shape[0],)
0200 ),
0201 component_values=dict(
0202 checked_components=dfdm.shape[0],
0203 checked_values=int(np.sum(total_counts)),
0204 mismatched=int(total_counts[0]),
0205 missing=int(total_counts[1]),
0206 correct=int(total_counts[2]),
0207 ),
0208 )
0209 report['results'] = dict(
0210 checked_components=comps,
0211 missing_components=df_mis.to_dict('records'),
0212 extra_components=df_ext.to_dict('records')
0213 )
0214 with open(args.output, 'w') as f:
0215 json.dump(report, f, indent=4)
0216 print('Comparison reports saved to \"{}\".'.format(args.output))