Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-06-04 08:56:25

0001 #!/usr/bin/env python
0002 
0003 import sys
0004 from datetime import datetime, timezone, timedelta
0005 
0006 import matplotlib.pyplot as plt
0007 import matplotlib.dates as mdates
0008 import numpy as np
0009 
0010 from argparsing import submission_args
0011 from sphenixdbutils import dbQuery, cnxn_string_map
0012 from sphenixprodrules import RuleConfig
0013 from sphenixmisc import setup_rot_handler
0014 from simpleLogger import slogger, CustomFormatter, CHATTY, DEBUG, INFO, WARN, ERROR, CRITICAL  # noqa: F401
0015 
0016 
0017 START_DATE    = datetime(2026, 4, 21, tzinfo=timezone.utc)
0018 BIN_HOURS     = 2
0019 ROLLING_BINS  = 12   # rolling average window: 12 bins = 24 hours
0020 
0021 
0022 def get_start_times(dsttype, tag, dataset, since):
0023     query = f"""
0024     SELECT started
0025     FROM production_jobs
0026     WHERE tag = '{tag}'
0027       AND dataset = '{dataset}'
0028       AND status = 'finished'
0029       AND started >= '{since.isoformat()}'
0030       AND dsttype LIKE '{dsttype}%'
0031       AND started IS NOT NULL
0032     """
0033     DEBUG(f"Executing query:\n{query}")
0034 
0035     cursor = dbQuery(cnxn_string_map['statr'], query)
0036     if not cursor:
0037         ERROR("Failed to query production database.")
0038         return None
0039 
0040     results = cursor.fetchall()
0041     if not results:
0042         return []
0043 
0044     times = []
0045     for (started,) in results:
0046         if isinstance(started, str):
0047             started = datetime.fromisoformat(started)
0048         if started.tzinfo is None:
0049             started = started.replace(tzinfo=timezone.utc)
0050         times.append(started)
0051 
0052     return times
0053 
0054 
0055 def main():
0056     args = submission_args()
0057 
0058     plt.rcParams.update({'font.size': 16})
0059 
0060     sublogdir = setup_rot_handler(args)
0061     slogger.setLevel(args.loglevel)
0062     INFO(f"Logging to {sublogdir}, level {args.loglevel}")
0063 
0064     param_overrides = {}
0065     param_overrides["runs"]     = args.runs
0066     param_overrides["runlist"]  = args.runlist
0067     param_overrides["nevents"]  = 0
0068 
0069     if args.physicsmode is not None:
0070         param_overrides["physicsmode"] = args.physicsmode
0071 
0072     param_overrides["prodmode"] = "production"
0073     if args.mangle_dirpath:
0074         param_overrides["prodmode"] = args.mangle_dirpath
0075 
0076     try:
0077         rule = RuleConfig.from_yaml_file(
0078             yaml_file=args.config,
0079             rule_name=args.rulename,
0080             param_overrides=param_overrides
0081         )
0082         INFO(f"Successfully loaded rule configuration: {args.rulename}")
0083     except (ValueError, FileNotFoundError) as e:
0084         ERROR(f"Error: {e}")
0085         sys.exit(1)
0086 
0087     start_times = get_start_times(rule.dsttype, rule.outtriplet, rule.dataset, START_DATE)
0088     if start_times is None:
0089         sys.exit(1)
0090     if not start_times:
0091         INFO(f"No finished jobs found since {START_DATE.date()}.")
0092         sys.exit(0)
0093 
0094     INFO(f"Found {len(start_times)} finished jobs.")
0095 
0096     now       = datetime.now(timezone.utc)
0097     bin_edges = mdates.drange(START_DATE, now + timedelta(hours=BIN_HOURS), timedelta(hours=BIN_HOURS))
0098     start_nums = [mdates.date2num(t) for t in start_times]
0099 
0100     counts, _ = np.histogram(start_nums, bins=bin_edges)
0101     bin_centers = 0.5 * (bin_edges[:-1] + bin_edges[1:])
0102 
0103     kernel  = np.ones(ROLLING_BINS) / ROLLING_BINS
0104     rolling = np.convolve(counts, kernel, mode='same')
0105 
0106     fig, ax = plt.subplots(figsize=(16, 7))
0107     plt.style.use('seaborn-v0_8-deep')
0108 
0109     ax.bar(bin_edges[:-1], counts, width=(bin_edges[1] - bin_edges[0]), alpha=0.5, align='edge', label=f'{BIN_HOURS}h bins')
0110     ax.plot(bin_centers, rolling, color='red', linewidth=2, label=f'{ROLLING_BINS * BIN_HOURS}h rolling avg')
0111 
0112     ax.xaxis_date()
0113     ax.xaxis.set_major_locator(mdates.DayLocator(interval=1))
0114     ax.xaxis.set_minor_locator(mdates.HourLocator(interval=6))
0115     ax.xaxis.set_major_formatter(mdates.DateFormatter('%b %d'))
0116     plt.xticks(rotation=45, ha='right')
0117 
0118     ax.set_xlim(bin_edges[0], bin_edges[-1])
0119     ax.set_title(f'Finished jobs by start time — {args.rulename}\n(since {START_DATE.date()}, {BIN_HOURS}h bins)')
0120     ax.set_xlabel('Job start time')
0121     ax.set_ylabel('Number of jobs finished')
0122     ax.legend()
0123     ax.grid(True, which='both', linestyle='--', linewidth=0.5)
0124 
0125     plt.tight_layout()
0126     output_file = f'job_throughput_{args.rulename}.png'
0127     plt.savefig(output_file)
0128     INFO(f"Saved plot to {output_file}")
0129     plt.close(fig)
0130 
0131 
0132 if __name__ == '__main__':
0133     main()