Back to home page

EIC code displayed by LXR

 
 

    


Warning, /epic/bin/compare_detector_parameter_table is written in an unsupported language. File is not indexed.

0001 #!/usr/bin/env python3
0002 
0003 # SPDX-License-Identifier: LGPL-3.0-or-later
0004 # Copyright (C) 2023 Chao Peng
0005 '''
0006     A script to compare two detector parameter tables.
0007     Report inconsistent numbers, missing components/columns, or other errors.
0008 '''
0009 
0010 import os
0011 import json
0012 import argparse
0013 import numpy as np
0014 import pandas as pd
0015 
0016 pd.set_option('display.max_rows', 500)
0017 
0018 # key_cols + value_cols are required columns
0019 key_cols = [
0020         'Region',
0021         'Component',
0022         'Sub-Component',
0023         ]
0024 value_cols = [
0025         'Length (cm)',
0026         'Inner Radius (cm)',
0027         'Outer Radius (cm)',
0028         'Offset from Center (cm)',
0029         'Physical Start (cm)',
0030         'Physical End (cm)',
0031         ]
0032 
0033 # read parameter table and do a little bit formatting
0034 def read_par_table(path, **kwargs):
0035     # read and format
0036     if path.endswith('html') or path.endswith('htm'):
0037         dft = pd.read_html(path, **kwargs)[0]
0038     else:
0039         dft = pd.read_csv(path, **kwargs)
0040     dft.columns = [i.replace(' \n', ' ').replace('\n', ' ').strip() for i in dft.columns.astype(str)]
0041     unnamed_cols = [i for i in dft.columns if i.startswith('Unnamed')]
0042     dft = dft.drop(columns=unnamed_cols)
0043     for col in dft.columns:
0044         if pd.api.types.is_string_dtype(dft[col].dtype):
0045             dft.loc[:, col] = dft[col].str.replace('\n', ' ').str.replace('  ', ' ').str.strip()
0046 
0047     missing_required_cols = [c for c in key_cols + value_cols if c not in dft.columns]
0048     if len(missing_required_cols) > 0:
0049         print('Error: required columns do not exist in \"{}\": {}'.format(path, missing_required_cols))
0050         exit(-1)
0051 
0052     dft.loc[:, 'Region'] = dft['Region'].fillna(method='ffill')
0053     dft.loc[:, 'Component'] = dft['Component'].fillna(method='ffill')
0054     dft.loc[:, 'Sub-Component'] = dft['Sub-Component'].fillna('')
0055     # build a key (multi-indexing also works)
0056     comp = dft['Region'] + '_' + dft['Component']
0057     sub_comp = dft['Sub-Component'].copy()
0058     sub_comp.loc[sub_comp.str.len() > 0] = '_' + sub_comp.astype(str)
0059     dft.loc[:, 'key'] = (comp + sub_comp).str.replace(' ', '_').str.lower()
0060     return dft.set_index('key', drop=True)
0061 
0062 
0063 if __name__ == '__main__':
0064     parser = argparse.ArgumentParser(
0065             prog='compare_detector_parameter_table',
0066             description = 'A python script to compare two detector parameter tables.'
0067                         + '\n       ' # 7 spaces for 'usage: '
0068                         + r'It adapts the format as of 2023/10 (see https://eic.jlab.org/Menagerie).'
0069             )
0070     parser.add_argument(
0071             'det_table',
0072             help='Path or url to the DETECTOR parameter table (csv or html).'
0073             )
0074     parser.add_argument(
0075             'sim_table',
0076             help='Path or url to the SIMULATION parameter table (csv or html).'
0077             )
0078     parser.add_argument(
0079             '--template',
0080             default='https://raw.githubusercontent.com/eic/epic/main/templates/DetectorParameterTable.csv.jinja2',
0081             help='Path or url to the template file of the detector paramter table'
0082             )
0083     parser.add_argument(
0084             '--det-encoding', default='ISO-8859-1',
0085             help='encoding for the detector parameter table.'
0086             )
0087     parser.add_argument(
0088             '--sim-encoding', default='utf-8',
0089             help='encoding for the simulation parameter table.'
0090             )
0091     parser.add_argument(
0092             '-v', '--verbose', action='store_true',
0093             help='enable to print out the report.'
0094             )
0095     parser.add_argument(
0096             '-o', '--output', default='par_table_report.json',
0097             help='output path for the report.'
0098             )
0099     args = parser.parse_args()
0100 
0101     dfd = read_par_table(args.det_table, encoding=args.det_encoding)
0102     # ignore alternatives
0103     dfd = dfd[~dfd['Region'].str.contains('ALTERNATIVE')]
0104     dfs = read_par_table(args.sim_table, encoding=args.sim_encoding)
0105 
0106     try:
0107         temp = read_par_table(args.template)
0108     except:
0109         print('Failed to load parameter table template from \"{}\".'.format(args.template))
0110         print('Report will not include information about the simulation variable names.')
0111         temp = pd.DataFrame()
0112 
0113     # check components
0114     report = dict()
0115     matched_keys = np.intersect1d(dfd.index, dfs.index)
0116     missing_keys = [k for k in dfd.index if k not in matched_keys]
0117     extra_keys = [k for k in dfs.index if k not in matched_keys]
0118     df_mis = dfd.loc[missing_keys, ['Region', 'Component', 'Sub-Component']].reset_index(drop=True)
0119     df_ext = dfs.loc[extra_keys, ['Region', 'Component', 'Sub-Component']].reset_index(drop=True)
0120     dfdm = dfd.loc[matched_keys, key_cols + value_cols]
0121     dfsm = dfs.loc[:, key_cols + value_cols]
0122     try:
0123         dftm = temp.loc[:, key_cols + value_cols]
0124     except:
0125         dftm = pd.DataFrame()
0126 
0127     comps = []
0128     total_counts = np.zeros(3, dtype=int)
0129     # check component values
0130     for k, drow in dfdm.iterrows():
0131         srow = dfsm.loc[k].squeeze()
0132         try:
0133             tvars = dftm.loc[k].squeeze().to_dict()
0134         except:
0135             tvars = dict()
0136         # check value
0137         comp = drow[key_cols].to_dict()
0138         missing_vals, wrong_vals, correct_vals = {}, {}, {}
0139         # print(tvars)
0140         for vcol in value_cols:
0141             # print(k, vcol)
0142             tvar = str(tvars.get(vcol, 'Not Found')).replace('nan', 'Empty')
0143             try:
0144                 dval = float(drow[vcol])
0145             except:
0146                 dval = np.nan
0147             try:
0148                 sval = float(srow[vcol])
0149             except:
0150                 sval = np.nan
0151 
0152             if np.any(np.isnan([sval, dval])):
0153                 missing_vals[vcol] = dict(det=dval, sim=sval, template_var=tvar)
0154             elif not np.isclose(sval, dval):
0155                 wrong_vals[vcol] = dict(det=dval, sim=sval, template_var=tvar)
0156             else:
0157                 correct_vals[vcol] = dict(det=dval, sim=sval, template_var=tvar)
0158 
0159         comp['Mismatched'] = wrong_vals
0160         total_counts[0] += len(wrong_vals)
0161         comp['Missing'] = missing_vals
0162         total_counts[1] += len(missing_vals)
0163         comp['Correct'] = correct_vals
0164         total_counts[2] += len(correct_vals)
0165         comps.append(comp)
0166 
0167     # printout reports
0168     if args.verbose:
0169         print('These det table components are missing in the sim table:')
0170         print(df_mis)
0171 
0172         print('These sim table components are not found in the det table:')
0173         print(df_ext)
0174 
0175         print('These components are checked:')
0176 
0177         for comp in comps:
0178             subdet = pd.DataFrame(columns=['det', 'sim', 'template_var', 'stat'], index=value_cols)
0179             print('{}, {}, {}: '.format(comp['Region'], comp['Component'], comp['Sub-Component']))
0180             for stat in ['Mismatched', 'Missing', 'Correct']:
0181                 for key, val in comp[stat].items():
0182                     subdet.loc[key, 'stat'] = stat
0183                     for k, v in val.items():
0184                         subdet.loc[key, k] = v
0185             print(subdet)
0186             print(' ')
0187 
0188     # save reports to a json file
0189     report['stats'] = dict(
0190             checked_columns=value_cols,
0191             table_components=dict(
0192                 detector=dict(
0193                     total=dfd.shape[0],
0194                     matched=dfdm.shape[0],
0195                     missing=df_mis.shape[0],),
0196                 simulation=dict(
0197                     total=dfs.shape[0],
0198                     matched=dfdm.shape[0],
0199                     missing=df_ext.shape[0],)
0200                 ),
0201             component_values=dict(
0202                 checked_components=dfdm.shape[0],
0203                 checked_values=int(np.sum(total_counts)),
0204                 mismatched=int(total_counts[0]),
0205                 missing=int(total_counts[1]),
0206                 correct=int(total_counts[2]),
0207                 ),
0208             )
0209     report['results'] = dict(
0210             checked_components=comps,
0211             missing_components=df_mis.to_dict('records'),
0212             extra_components=df_ext.to_dict('records')
0213             )
0214     with open(args.output, 'w') as f:
0215         json.dump(report, f, indent=4)
0216     print('Comparison reports saved to \"{}\".'.format(args.output))