File indexing completed on 2026-04-11 08:41:05
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 import os
0011 import logging
0012
0013
0014 from pilot.util.container import execute
0015
0016 logger = logging.getLogger(__name__)
0017
0018
0019 def get_core_count(job):
0020 """
0021 Return the core count from ATHENA_PROC_NUMBER.
0022
0023 :param job: job object.
0024 :return: core count (int).
0025 """
0026
0027 if "HPC_HPC" in job.infosys.queuedata.catchall:
0028 if job.corecount is None:
0029 job.corecount = 0
0030 else:
0031 if job.corecount:
0032
0033 if 'ATHENA_PROC_NUMBER' in os.environ:
0034 try:
0035 job.corecount = int(os.environ.get('ATHENA_PROC_NUMBER'))
0036 except (ValueError, TypeError) as exc:
0037 logger.warning("ATHENA_PROC_NUMBER is not properly set: %s (will use existing job.corecount value)", exc)
0038 else:
0039 try:
0040 job.corecount = int(os.environ.get('ATHENA_PROC_NUMBER'))
0041 except Exception:
0042 logger.warning("environment variable ATHENA_PROC_NUMBER is not set. corecount is not set")
0043
0044 return job.corecount
0045
0046
0047 def add_core_count(corecount, core_counts=[]):
0048 """
0049 Add a core count measurement to the list of core counts.
0050
0051 :param corecount: current actual core count (int).
0052 :param core_counts: list of core counts (list).
0053 :return: updated list of core counts (list).
0054 """
0055
0056 if core_counts is None:
0057 core_counts = []
0058 core_counts.append(corecount)
0059
0060 return core_counts
0061
0062
0063 def set_core_counts(job):
0064 """
0065 Set the number of used cores.
0066
0067 :param job: job object.
0068 :return:
0069 """
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089 if job.pgrp:
0090
0091
0092
0093
0094
0095
0096
0097
0098 cmd = "ps axo pgid,psr | sort | grep %d | uniq | awk '{print $1}' | grep -x %d | wc -l" % (job.pgrp, job.pgrp)
0099 _, stdout, _ = execute(cmd, mute=True)
0100 logger.debug('%s: %s', cmd, stdout)
0101 try:
0102 job.actualcorecount = int(stdout)
0103 except ValueError as exc:
0104 logger.warning('failed to convert number of actual cores to int: %s', exc)
0105 else:
0106 job.corecounts = add_core_count(job.actualcorecount)
0107
0108
0109
0110
0111
0112
0113
0114 else:
0115 logger.debug('payload process group not set - cannot check number of cores used by payload')