File indexing completed on 2026-06-04 08:56:25
0001
0002
0003 import sys
0004 from datetime import datetime, timezone, timedelta
0005
0006 import matplotlib.pyplot as plt
0007 import matplotlib.dates as mdates
0008 import numpy as np
0009
0010 from argparsing import submission_args
0011 from sphenixdbutils import dbQuery, cnxn_string_map
0012 from sphenixprodrules import RuleConfig
0013 from sphenixmisc import setup_rot_handler
0014 from simpleLogger import slogger, CustomFormatter, CHATTY, DEBUG, INFO, WARN, ERROR, CRITICAL
0015
0016
0017 START_DATE = datetime(2026, 4, 21, tzinfo=timezone.utc)
0018 BIN_HOURS = 2
0019 ROLLING_BINS = 12
0020
0021
0022 def get_start_times(dsttype, tag, dataset, since):
0023 query = f"""
0024 SELECT started
0025 FROM production_jobs
0026 WHERE tag = '{tag}'
0027 AND dataset = '{dataset}'
0028 AND status = 'finished'
0029 AND started >= '{since.isoformat()}'
0030 AND dsttype LIKE '{dsttype}%'
0031 AND started IS NOT NULL
0032 """
0033 DEBUG(f"Executing query:\n{query}")
0034
0035 cursor = dbQuery(cnxn_string_map['statr'], query)
0036 if not cursor:
0037 ERROR("Failed to query production database.")
0038 return None
0039
0040 results = cursor.fetchall()
0041 if not results:
0042 return []
0043
0044 times = []
0045 for (started,) in results:
0046 if isinstance(started, str):
0047 started = datetime.fromisoformat(started)
0048 if started.tzinfo is None:
0049 started = started.replace(tzinfo=timezone.utc)
0050 times.append(started)
0051
0052 return times
0053
0054
0055 def main():
0056 args = submission_args()
0057
0058 plt.rcParams.update({'font.size': 16})
0059
0060 sublogdir = setup_rot_handler(args)
0061 slogger.setLevel(args.loglevel)
0062 INFO(f"Logging to {sublogdir}, level {args.loglevel}")
0063
0064 param_overrides = {}
0065 param_overrides["runs"] = args.runs
0066 param_overrides["runlist"] = args.runlist
0067 param_overrides["nevents"] = 0
0068
0069 if args.physicsmode is not None:
0070 param_overrides["physicsmode"] = args.physicsmode
0071
0072 param_overrides["prodmode"] = "production"
0073 if args.mangle_dirpath:
0074 param_overrides["prodmode"] = args.mangle_dirpath
0075
0076 try:
0077 rule = RuleConfig.from_yaml_file(
0078 yaml_file=args.config,
0079 rule_name=args.rulename,
0080 param_overrides=param_overrides
0081 )
0082 INFO(f"Successfully loaded rule configuration: {args.rulename}")
0083 except (ValueError, FileNotFoundError) as e:
0084 ERROR(f"Error: {e}")
0085 sys.exit(1)
0086
0087 start_times = get_start_times(rule.dsttype, rule.outtriplet, rule.dataset, START_DATE)
0088 if start_times is None:
0089 sys.exit(1)
0090 if not start_times:
0091 INFO(f"No finished jobs found since {START_DATE.date()}.")
0092 sys.exit(0)
0093
0094 INFO(f"Found {len(start_times)} finished jobs.")
0095
0096 now = datetime.now(timezone.utc)
0097 bin_edges = mdates.drange(START_DATE, now + timedelta(hours=BIN_HOURS), timedelta(hours=BIN_HOURS))
0098 start_nums = [mdates.date2num(t) for t in start_times]
0099
0100 counts, _ = np.histogram(start_nums, bins=bin_edges)
0101 bin_centers = 0.5 * (bin_edges[:-1] + bin_edges[1:])
0102
0103 kernel = np.ones(ROLLING_BINS) / ROLLING_BINS
0104 rolling = np.convolve(counts, kernel, mode='same')
0105
0106 fig, ax = plt.subplots(figsize=(16, 7))
0107 plt.style.use('seaborn-v0_8-deep')
0108
0109 ax.bar(bin_edges[:-1], counts, width=(bin_edges[1] - bin_edges[0]), alpha=0.5, align='edge', label=f'{BIN_HOURS}h bins')
0110 ax.plot(bin_centers, rolling, color='red', linewidth=2, label=f'{ROLLING_BINS * BIN_HOURS}h rolling avg')
0111
0112 ax.xaxis_date()
0113 ax.xaxis.set_major_locator(mdates.DayLocator(interval=1))
0114 ax.xaxis.set_minor_locator(mdates.HourLocator(interval=6))
0115 ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %d'))
0116 plt.xticks(rotation=45, ha='right')
0117
0118 ax.set_xlim(bin_edges[0], bin_edges[-1])
0119 ax.set_title(f'Finished jobs by start time — {args.rulename}\n(since {START_DATE.date()}, {BIN_HOURS}h bins)')
0120 ax.set_xlabel('Job start time')
0121 ax.set_ylabel('Number of jobs finished')
0122 ax.legend()
0123 ax.grid(True, which='both', linestyle='--', linewidth=0.5)
0124
0125 plt.tight_layout()
0126 output_file = f'job_throughput_{args.rulename}.png'
0127 plt.savefig(output_file)
0128 INFO(f"Saved plot to {output_file}")
0129 plt.close(fig)
0130
0131
0132 if __name__ == '__main__':
0133 main()