Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-27 07:41:40

0001 #!/usr/bin/env python
0002 
0003 import os
0004 import sys
0005 from datetime import datetime
0006 import yaml
0007 
0008 import matplotlib.pyplot as plt
0009 import numpy as np
0010 from matplotlib.backends.backend_pdf import PdfPages
0011 
0012 from argparsing import submission_args
0013 from sphenixdbutils import dbQuery, cnxn_string_map, list_to_condition
0014 from sphenixprodrules import RuleConfig
0015 from sphenixmisc import setup_rot_handler
0016 from simpleLogger import slogger, CustomFormatter, CHATTY, DEBUG, INFO, WARN, ERROR, CRITICAL  # noqa: F401
0017 
0018 
0019 def get_time_diffs(run_condition, dsttype):
0020     query = f"""
0021     SELECT submitting, ended
0022     FROM production_status
0023     WHERE {run_condition}
0024       AND dsttype like '{dsttype}%'
0025       AND status = 'finished'
0026       AND submitting IS NOT NULL
0027       AND ended IS NOT NULL
0028     """
0029     
0030     DEBUG(f"Executing query:\n{query}")
0031 
0032     cursor = dbQuery(cnxn_string_map['statr'], query)
0033     if not cursor:
0034         ERROR("Failed to query production database.")
0035         return None
0036 
0037     results = cursor.fetchall()
0038     if not results:
0039         return []
0040 
0041     time_diffs_seconds = []
0042     for submitting, ended in results:
0043         if isinstance(submitting, str):
0044             submitting = datetime.fromisoformat(submitting)
0045         if isinstance(ended, str):
0046             ended = datetime.fromisoformat(ended)
0047             
0048         time_diffs_seconds.append((ended - submitting).total_seconds())
0049     
0050     return time_diffs_seconds
0051 
0052 def plot_histogram(ax, time_diffs_seconds, title, time_unit='hours'):
0053     """
0054     Plots a histogram of time differences with an overflow bin.
0055     """
0056     
0057     max_time_hours = np.max(time_diffs_seconds) / 3600 if time_diffs_seconds else 0
0058 
0059     # Unit-specific configurations
0060     config = {
0061         'hours': {'conv': 3600, 'label': 'hours', 'max_val': 60, 'bin_w': 1, 'tick_step': 10},
0062         'minutes': {'conv': 60, 'label': 'minutes', 'max_val': 600, 'bin_w': 10, 'tick_step': 60},
0063         'seconds': {'conv': 1, 'label': 'seconds', 'max_val': 1200, 'bin_w': 20, 'tick_step': 120},
0064     }
0065 
0066     if time_unit not in config:
0067         raise ValueError("Invalid time_unit. Must be 'hours', 'minutes', or 'seconds'.")
0068 
0069     cfg = config[time_unit]
0070     time_diffs = [t / cfg['conv'] for t in time_diffs_seconds]
0071     avg_time = np.mean(time_diffs)
0072     
0073     max_val = cfg['max_val']
0074     bin_width = cfg['bin_w']
0075     tick_step = cfg['tick_step']
0076     
0077     # Special condition for jobs finishing in less than 10 hours
0078     if time_unit == 'hours' and max_time_hours < 10:
0079         max_val = 10
0080         bin_width = 10 / 60  # 10-minute bins
0081         tick_step = 1
0082 
0083     # Prepare data for plotting
0084     plot_data = [min(diff, max_val) for diff in time_diffs]
0085     bins = np.arange(0, max_val + bin_width, bin_width)
0086     
0087     # Plotting
0088     ax.hist(plot_data, bins=bins, alpha=0.7, label=f'Time (Avg: {avg_time:.2f} {cfg["label"]})')
0089 
0090     # Axes and labels
0091     ax.set_title(title)
0092     ax.set_xlabel(f'Time from Submission to Finish ({cfg["label"]})')
0093     ax.set_ylabel('Number of Jobs')
0094     ax.legend()
0095     ax.grid(True, which='both', linestyle='--', linewidth=0.5)
0096 
0097     # X-axis ticks and overflow label
0098     ax.set_xlim(0, max_val)
0099     xticks = np.arange(0, max_val + bin_width, tick_step)
0100     
0101     # Ensure the last tick is at max_val
0102     if max_val not in xticks:
0103         xticks = np.append(xticks, max_val)
0104 
0105     xticklabels = [f'{t:g}' for t in xticks]
0106     xticklabels[-1] = f'{int(max_val)}+'
0107 
0108     ax.set_xticks(xticks)
0109     ax.set_xticklabels(xticklabels)
0110 
0111 def main():
0112     """
0113     Main function to plot job time distribution.
0114     """
0115     args = submission_args()
0116     
0117     plt.rcParams.update({'font.size': 16})
0118 
0119     sublogdir = setup_rot_handler(args)
0120     slogger.setLevel(args.loglevel)
0121     INFO(f"Logging to {sublogdir}, level {args.loglevel}")
0122 
0123     param_overrides = {}
0124     param_overrides["runs"] = args.runs
0125     param_overrides["runlist"] = args.runlist
0126     param_overrides["nevents"] = 0 
0127 
0128     if args.physicsmode is not None:
0129         param_overrides["physicsmode"] = args.physicsmode
0130 
0131     param_overrides["prodmode"] = "production"
0132     if args.mangle_dirpath:
0133         param_overrides["prodmode"] = args.mangle_dirpath
0134 
0135     try:
0136         rule = RuleConfig.from_yaml_file(
0137             yaml_file=args.config,
0138             rule_name=args.rulename,
0139             param_overrides=param_overrides
0140         )
0141         INFO(f"Successfully loaded rule configuration: {args.rulename}")
0142     except (ValueError, FileNotFoundError) as e:
0143         ERROR(f"Error: {e}")
0144         sys.exit(1)
0145 
0146     # If --runs is specified and there are multiple runs, create a multi-page PDF.
0147     if args.runs and len(rule.runlist_int) > 1:
0148         output_pdf_path = f'job_time_distribution_{args.rulename}.pdf'
0149         with PdfPages(output_pdf_path) as pdf:
0150             for run in rule.runlist_int:
0151                 INFO(f"Processing run: {run}")
0152                 run_condition = list_to_condition([run], name="run")
0153                 time_diffs_seconds = get_time_diffs(run_condition, rule.dsttype)
0154 
0155                 if not time_diffs_seconds:
0156                     INFO(f"No finished jobs found for run {run}.")
0157                     continue
0158 
0159                 fig, ax = plt.subplots(figsize=(12, 7))
0160                 plt.style.use('seaborn-v0_8-deep')
0161                 
0162                 title = f'Job Time Distribution for {args.rulename} (Run: {run})'
0163                 plot_histogram(ax, time_diffs_seconds, title, time_unit='hours') # Only hours for PDF
0164                 
0165                 plt.tight_layout()
0166                 pdf.savefig(fig)
0167                 plt.close(fig)
0168 
0169             INFO(f"Saved multi-page PDF to {output_pdf_path}")
0170 
0171     else: # Original behavior: single plot for all runs, now for each time unit
0172         run_condition = list_to_condition(rule.runlist_int, name="run")
0173         time_diffs_seconds = get_time_diffs(run_condition, rule.dsttype)
0174         if time_diffs_seconds is None:
0175             sys.exit(1) # Error occurred in get_time_diffs
0176         if not time_diffs_seconds:
0177             INFO("No finished jobs found for the specified runs.")
0178             sys.exit(0)
0179 
0180         run_str = f"Run(s): {rule.runlist_int}"
0181         # if len(rule.runlist_int) > 2:
0182         #     run_str = f"Runs $\in$ [{min(rule.runlist_int)},...,{max(rule.runlist_int)}]"
0183         if rule.runlist is not None:
0184             #run_str = f"Runs from file: {rule.runlist}"
0185             run_str = f"Runs from file: {os.path.basename(rule.runlist)}"
0186 
0187         base_title = f'Job Time Distribution for {args.rulename}\n{run_str}'
0188         
0189         for unit in ['hours', 'minutes', 'seconds']:
0190             fig, ax = plt.subplots(figsize=(12, 7))
0191             plt.style.use('seaborn-v0_8-deep')
0192             
0193             plot_histogram(ax, time_diffs_seconds, base_title, time_unit=unit)
0194 
0195             plt.tight_layout()
0196             output_file = f'job_time_distribution_{args.rulename}_{unit}.png'
0197             plt.savefig(output_file)
0198             INFO(f"Saved plot to {output_file}")
0199             plt.close(fig)
0200     
0201 
0202 if __name__ == '__main__':
0203     main()