File indexing completed on 2026-04-27 07:41:40
0001
0002
0003 import os
0004 import sys
0005 from datetime import datetime
0006 import yaml
0007
0008 import matplotlib.pyplot as plt
0009 import numpy as np
0010 from matplotlib.backends.backend_pdf import PdfPages
0011
0012 from argparsing import submission_args
0013 from sphenixdbutils import dbQuery, cnxn_string_map, list_to_condition
0014 from sphenixprodrules import RuleConfig
0015 from sphenixmisc import setup_rot_handler
0016 from simpleLogger import slogger, CustomFormatter, CHATTY, DEBUG, INFO, WARN, ERROR, CRITICAL
0017
0018
0019 def get_time_diffs(run_condition, dsttype):
0020 query = f"""
0021 SELECT submitting, ended
0022 FROM production_status
0023 WHERE {run_condition}
0024 AND dsttype like '{dsttype}%'
0025 AND status = 'finished'
0026 AND submitting IS NOT NULL
0027 AND ended IS NOT NULL
0028 """
0029
0030 DEBUG(f"Executing query:\n{query}")
0031
0032 cursor = dbQuery(cnxn_string_map['statr'], query)
0033 if not cursor:
0034 ERROR("Failed to query production database.")
0035 return None
0036
0037 results = cursor.fetchall()
0038 if not results:
0039 return []
0040
0041 time_diffs_seconds = []
0042 for submitting, ended in results:
0043 if isinstance(submitting, str):
0044 submitting = datetime.fromisoformat(submitting)
0045 if isinstance(ended, str):
0046 ended = datetime.fromisoformat(ended)
0047
0048 time_diffs_seconds.append((ended - submitting).total_seconds())
0049
0050 return time_diffs_seconds
0051
0052 def plot_histogram(ax, time_diffs_seconds, title, time_unit='hours'):
0053 """
0054 Plots a histogram of time differences with an overflow bin.
0055 """
0056
0057 max_time_hours = np.max(time_diffs_seconds) / 3600 if time_diffs_seconds else 0
0058
0059
0060 config = {
0061 'hours': {'conv': 3600, 'label': 'hours', 'max_val': 60, 'bin_w': 1, 'tick_step': 10},
0062 'minutes': {'conv': 60, 'label': 'minutes', 'max_val': 600, 'bin_w': 10, 'tick_step': 60},
0063 'seconds': {'conv': 1, 'label': 'seconds', 'max_val': 1200, 'bin_w': 20, 'tick_step': 120},
0064 }
0065
0066 if time_unit not in config:
0067 raise ValueError("Invalid time_unit. Must be 'hours', 'minutes', or 'seconds'.")
0068
0069 cfg = config[time_unit]
0070 time_diffs = [t / cfg['conv'] for t in time_diffs_seconds]
0071 avg_time = np.mean(time_diffs)
0072
0073 max_val = cfg['max_val']
0074 bin_width = cfg['bin_w']
0075 tick_step = cfg['tick_step']
0076
0077
0078 if time_unit == 'hours' and max_time_hours < 10:
0079 max_val = 10
0080 bin_width = 10 / 60
0081 tick_step = 1
0082
0083
0084 plot_data = [min(diff, max_val) for diff in time_diffs]
0085 bins = np.arange(0, max_val + bin_width, bin_width)
0086
0087
0088 ax.hist(plot_data, bins=bins, alpha=0.7, label=f'Time (Avg: {avg_time:.2f} {cfg["label"]})')
0089
0090
0091 ax.set_title(title)
0092 ax.set_xlabel(f'Time from Submission to Finish ({cfg["label"]})')
0093 ax.set_ylabel('Number of Jobs')
0094 ax.legend()
0095 ax.grid(True, which='both', linestyle='--', linewidth=0.5)
0096
0097
0098 ax.set_xlim(0, max_val)
0099 xticks = np.arange(0, max_val + bin_width, tick_step)
0100
0101
0102 if max_val not in xticks:
0103 xticks = np.append(xticks, max_val)
0104
0105 xticklabels = [f'{t:g}' for t in xticks]
0106 xticklabels[-1] = f'{int(max_val)}+'
0107
0108 ax.set_xticks(xticks)
0109 ax.set_xticklabels(xticklabels)
0110
0111 def main():
0112 """
0113 Main function to plot job time distribution.
0114 """
0115 args = submission_args()
0116
0117 plt.rcParams.update({'font.size': 16})
0118
0119 sublogdir = setup_rot_handler(args)
0120 slogger.setLevel(args.loglevel)
0121 INFO(f"Logging to {sublogdir}, level {args.loglevel}")
0122
0123 param_overrides = {}
0124 param_overrides["runs"] = args.runs
0125 param_overrides["runlist"] = args.runlist
0126 param_overrides["nevents"] = 0
0127
0128 if args.physicsmode is not None:
0129 param_overrides["physicsmode"] = args.physicsmode
0130
0131 param_overrides["prodmode"] = "production"
0132 if args.mangle_dirpath:
0133 param_overrides["prodmode"] = args.mangle_dirpath
0134
0135 try:
0136 rule = RuleConfig.from_yaml_file(
0137 yaml_file=args.config,
0138 rule_name=args.rulename,
0139 param_overrides=param_overrides
0140 )
0141 INFO(f"Successfully loaded rule configuration: {args.rulename}")
0142 except (ValueError, FileNotFoundError) as e:
0143 ERROR(f"Error: {e}")
0144 sys.exit(1)
0145
0146
0147 if args.runs and len(rule.runlist_int) > 1:
0148 output_pdf_path = f'job_time_distribution_{args.rulename}.pdf'
0149 with PdfPages(output_pdf_path) as pdf:
0150 for run in rule.runlist_int:
0151 INFO(f"Processing run: {run}")
0152 run_condition = list_to_condition([run], name="run")
0153 time_diffs_seconds = get_time_diffs(run_condition, rule.dsttype)
0154
0155 if not time_diffs_seconds:
0156 INFO(f"No finished jobs found for run {run}.")
0157 continue
0158
0159 fig, ax = plt.subplots(figsize=(12, 7))
0160 plt.style.use('seaborn-v0_8-deep')
0161
0162 title = f'Job Time Distribution for {args.rulename} (Run: {run})'
0163 plot_histogram(ax, time_diffs_seconds, title, time_unit='hours')
0164
0165 plt.tight_layout()
0166 pdf.savefig(fig)
0167 plt.close(fig)
0168
0169 INFO(f"Saved multi-page PDF to {output_pdf_path}")
0170
0171 else:
0172 run_condition = list_to_condition(rule.runlist_int, name="run")
0173 time_diffs_seconds = get_time_diffs(run_condition, rule.dsttype)
0174 if time_diffs_seconds is None:
0175 sys.exit(1)
0176 if not time_diffs_seconds:
0177 INFO("No finished jobs found for the specified runs.")
0178 sys.exit(0)
0179
0180 run_str = f"Run(s): {rule.runlist_int}"
0181
0182
0183 if rule.runlist is not None:
0184
0185 run_str = f"Runs from file: {os.path.basename(rule.runlist)}"
0186
0187 base_title = f'Job Time Distribution for {args.rulename}\n{run_str}'
0188
0189 for unit in ['hours', 'minutes', 'seconds']:
0190 fig, ax = plt.subplots(figsize=(12, 7))
0191 plt.style.use('seaborn-v0_8-deep')
0192
0193 plot_histogram(ax, time_diffs_seconds, base_title, time_unit=unit)
0194
0195 plt.tight_layout()
0196 output_file = f'job_time_distribution_{args.rulename}_{unit}.png'
0197 plt.savefig(output_file)
0198 INFO(f"Saved plot to {output_file}")
0199 plt.close(fig)
0200
0201
0202 if __name__ == '__main__':
0203 main()