![]() |
|
|||
File indexing completed on 2025-06-20 08:46:38
0001 /*****************************************************************************\ 0002 * slurm.h - Definitions for all of the Slurm RPCs 0003 ***************************************************************************** 0004 * Copyright (C) 2002-2007 The Regents of the University of California. 0005 * Copyright (C) 2008-2010 Lawrence Livermore National Security. 0006 * Portions Copyright (C) 2010-2017 SchedMD LLC <https://www.schedmd.com>. 0007 * Portions Copyright (C) 2012-2013 Los Alamos National Security, LLC. 0008 * Portions Copyright 2013 Hewlett Packard Enterprise Development LP 0009 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 0010 * Written by Morris Jette <jette1@llnl.gov>, et. al. 0011 * CODE-OCEC-09-009. All rights reserved. 0012 * 0013 * This file is part of Slurm, a resource management program. 0014 * For details, see <https://slurm.schedmd.com/>. 0015 * Please also read the included file: DISCLAIMER. 0016 * 0017 * Slurm is free software; you can redistribute it and/or modify it under 0018 * the terms of the GNU General Public License as published by the Free 0019 * Software Foundation; either version 2 of the License, or (at your option) 0020 * any later version. 0021 * 0022 * In addition, as a special exception, the copyright holders give permission 0023 * to link the code of portions of this program with the OpenSSL library under 0024 * certain conditions as described in each individual source file, and 0025 * distribute linked combinations including the two. You must obey the GNU 0026 * General Public License in all respects for all of the code used other than 0027 * OpenSSL. If you modify file(s) with this exception, you may extend this 0028 * exception to your version of the file(s), but you are not obligated to do 0029 * so. If you do not wish to do so, delete this exception statement from your 0030 * version. If you delete this exception statement from all source files in 0031 * the program, then also delete it here. 0032 * 0033 * Slurm is distributed in the hope that it will be useful, but WITHOUT ANY 0034 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 0035 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 0036 * details. 0037 * 0038 * You should have received a copy of the GNU General Public License along 0039 * with Slurm; if not, write to the Free Software Foundation, Inc., 0040 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 0041 \*****************************************************************************/ 0042 0043 #ifndef _SLURM_H 0044 #define _SLURM_H 0045 0046 /* Number of dimensions the system has */ 0047 #define SYSTEM_DIMENSIONS 1 0048 #define HIGHEST_DIMENSIONS 5 0049 0050 #ifdef __cplusplus 0051 extern "C" { 0052 #endif 0053 0054 #include <slurm/slurm_errno.h> 0055 #include <slurm/slurm_version.h> 0056 0057 #include <inttypes.h> /* for uint16_t, uint32_t definitions */ 0058 #include <netinet/in.h> /* struct sockaddr_in */ 0059 #include <stdbool.h> 0060 #include <stdio.h> /* for FILE definitions */ 0061 #include <sys/types.h> /* for uid_t definition */ 0062 #include <time.h> /* for time_t definitions */ 0063 #include <unistd.h> 0064 0065 /* Define slurm_addr_t below to avoid including extraneous slurm headers */ 0066 typedef struct sockaddr_storage slurm_addr_t; 0067 0068 #ifndef __slurmdb_cluster_rec_t_defined 0069 # define __slurmdb_cluster_rec_t_defined 0070 typedef struct slurmdb_cluster_rec slurmdb_cluster_rec_t; 0071 #endif 0072 0073 /* Define switch_jobinfo_t below to avoid including extraneous slurm headers */ 0074 #ifndef __switch_jobinfo_t_defined 0075 # define __switch_jobinfo_t_defined 0076 typedef struct switch_jobinfo switch_jobinfo_t; /* opaque data type */ 0077 #endif 0078 0079 /* Define job_resources_t below 0080 * to avoid including extraneous slurm headers */ 0081 #ifndef __job_resources_t_defined 0082 # define __job_resources_t_defined /* Opaque data for select plugins */ 0083 typedef struct job_resources job_resources_t; 0084 #endif 0085 0086 /* Define select_jobinfo_t, select_nodeinfo_t below 0087 * to avoid including extraneous slurm headers */ 0088 #ifndef __select_jobinfo_t_defined 0089 # define __select_jobinfo_t_defined /* Opaque data for select plugins */ 0090 typedef struct select_jobinfo select_jobinfo_t; /* for BlueGene */ 0091 typedef struct select_nodeinfo select_nodeinfo_t; /* for BlueGene */ 0092 #endif 0093 0094 /* Define jobacctinfo_t below to avoid including extraneous slurm headers */ 0095 #ifndef __jobacctinfo_t_defined 0096 # define __jobacctinfo_t_defined 0097 typedef struct jobacctinfo jobacctinfo_t; /* opaque data type */ 0098 #endif 0099 0100 /* Define allocation_msg_thread_t below to avoid including extraneous 0101 * slurm headers */ 0102 #ifndef __allocation_msg_thread_t_defined 0103 # define __allocation_msg_thread_t_defined 0104 typedef struct allocation_msg_thread allocation_msg_thread_t; 0105 #endif 0106 0107 #ifndef __sbcast_cred_t_defined 0108 # define __sbcast_cred_t_defined 0109 typedef struct sbcast_cred sbcast_cred_t; /* opaque data type */ 0110 #endif 0111 0112 /*****************************************************************************\ 0113 * DEFINITIONS FOR POSIX VALUES 0114 \*****************************************************************************/ 0115 #ifndef HOST_NAME_MAX 0116 #define HOST_NAME_MAX 64 0117 #endif 0118 0119 /*****************************************************************************\ 0120 * DEFINITIONS FOR INPUT VALUES 0121 \*****************************************************************************/ 0122 0123 /* INFINITE is used to identify unlimited configurations, */ 0124 /* eg. the maximum count of nodes any job may use in some partition */ 0125 #define INFINITE8 (0xff) 0126 #define INFINITE16 (0xffff) 0127 #define INFINITE (0xffffffff) 0128 #define INFINITE64 (0xffffffffffffffff) 0129 #define NO_VAL8 (0xfe) 0130 #define NO_VAL16 (0xfffe) 0131 #define NO_VAL (0xfffffffe) 0132 #define NO_VAL64 (0xfffffffffffffffe) 0133 #define NO_CONSUME_VAL64 (0xfffffffffffffffd) 0134 #define MAX_TASKS_PER_NODE 512 0135 #define MAX_JOB_ID (0x03FFFFFF) /* bits 0-25 */ 0136 #define MAX_HET_JOB_COMPONENTS 128 0137 #define MAX_FED_CLUSTERS 63 0138 0139 /* 0140 * Max normal step id leaving a few for special steps like the batch and extern 0141 * steps 0142 */ 0143 #define SLURM_MAX_NORMAL_STEP_ID (0xfffffff0) 0144 /* Job step ID of pending step */ 0145 #define SLURM_PENDING_STEP (0xfffffffd) 0146 /* Job step ID of external process container */ 0147 #define SLURM_EXTERN_CONT (0xfffffffc) 0148 /* Job step ID of batch scripts */ 0149 #define SLURM_BATCH_SCRIPT (0xfffffffb) 0150 /* Job step ID for the interactive step (if used) */ 0151 #define SLURM_INTERACTIVE_STEP (0xfffffffa) 0152 0153 /* How many seconds to wait after eio_signal_shutdown() is called before 0154 * terminating the job and abandoning any I/O remaining to be processed. 0155 */ 0156 #define DEFAULT_EIO_SHUTDOWN_WAIT 60 0157 0158 /* 0159 * SLURM_ID_HASH 0160 * Description: 0161 * Creates a hash of a Slurm JOBID and STEPID 0162 * The JOB STEP ID is in the top 32 bits of the hash with the job id occupying 0163 * the lower 32 bits. 0164 * 0165 * IN _jobid -- SLURM's JOB ID (uint32_t) 0166 * IN _stepid -- SLURM's JOB STEP ID (uint32_t) 0167 * RET id_hash -- (uint64_t) 0168 */ 0169 #define SLURM_ID_HASH(_jobid, _stepid) \ 0170 (uint64_t)(((uint64_t)_stepid << 32) + _jobid) 0171 #define SLURM_ID_HASH_JOB_ID(hash_id) (uint32_t)(hash_id & 0x00000000FFFFFFFF) 0172 #define SLURM_ID_HASH_STEP_ID(hash_id) (uint32_t)(hash_id >> 32) 0173 0174 /* 0175 * Convert a hash ID to its legacy (pre-17.11) equivalent 0176 * Used for backward compatibility for Cray PMI 0177 */ 0178 #define SLURM_ID_HASH_LEGACY(hash_id) \ 0179 ((hash_id >> 32) * 10000000000 + (hash_id & 0x00000000FFFFFFFF)) 0180 0181 /* Slurm hash definition to be used for various purposes */ 0182 typedef struct { 0183 unsigned char type; 0184 unsigned char hash[32]; 0185 } slurm_hash_t; 0186 0187 /* 0188 * Bit definitions when setting flags 0189 * 0190 * SLURM_BIT(0) 0x0000000000000001 0191 * SLURM_BIT(1) 0x0000000000000002 0192 * SLURM_BIT(2) 0x0000000000000004 0193 * SLURM_BIT(3) 0x0000000000000008 0194 * SLURM_BIT(4) 0x0000000000000010 0195 * SLURM_BIT(5) 0x0000000000000020 0196 * SLURM_BIT(6) 0x0000000000000040 0197 * SLURM_BIT(7) 0x0000000000000080 0198 * SLURM_BIT(8) 0x0000000000000100 0199 * SLURM_BIT(9) 0x0000000000000200 0200 * SLURM_BIT(10) 0x0000000000000400 0201 * SLURM_BIT(11) 0x0000000000000800 0202 * SLURM_BIT(12) 0x0000000000001000 0203 * SLURM_BIT(13) 0x0000000000002000 0204 * SLURM_BIT(14) 0x0000000000004000 0205 * SLURM_BIT(15) 0x0000000000008000 0206 * SLURM_BIT(16) 0x0000000000010000 0207 * SLURM_BIT(17) 0x0000000000020000 0208 * SLURM_BIT(18) 0x0000000000040000 0209 * SLURM_BIT(19) 0x0000000000080000 0210 * SLURM_BIT(20) 0x0000000000100000 0211 * SLURM_BIT(21) 0x0000000000200000 0212 * SLURM_BIT(22) 0x0000000000400000 0213 * SLURM_BIT(23) 0x0000000000800000 0214 * SLURM_BIT(24) 0x0000000001000000 0215 * SLURM_BIT(25) 0x0000000002000000 0216 * SLURM_BIT(26) 0x0000000004000000 0217 * SLURM_BIT(27) 0x0000000008000000 0218 * SLURM_BIT(28) 0x0000000010000000 0219 * SLURM_BIT(29) 0x0000000020000000 0220 * SLURM_BIT(30) 0x0000000040000000 0221 * SLURM_BIT(31) 0x0000000080000000 0222 * SLURM_BIT(32) 0x0000000100000000 0223 * SLURM_BIT(33) 0x0000000200000000 0224 * SLURM_BIT(34) 0x0000000400000000 0225 * SLURM_BIT(35) 0x0000000800000000 0226 * SLURM_BIT(36) 0x0000001000000000 0227 * SLURM_BIT(37) 0x0000002000000000 0228 * SLURM_BIT(38) 0x0000004000000000 0229 * SLURM_BIT(39) 0x0000008000000000 0230 * SLURM_BIT(40) 0x0000010000000000 0231 * SLURM_BIT(41) 0x0000020000000000 0232 * SLURM_BIT(42) 0x0000040000000000 0233 * SLURM_BIT(43) 0x0000080000000000 0234 * SLURM_BIT(44) 0x0000100000000000 0235 * SLURM_BIT(45) 0x0000200000000000 0236 * SLURM_BIT(46) 0x0000400000000000 0237 * SLURM_BIT(47) 0x0000800000000000 0238 * SLURM_BIT(48) 0x0001000000000000 0239 * SLURM_BIT(49) 0x0002000000000000 0240 * SLURM_BIT(50) 0x0004000000000000 0241 * SLURM_BIT(51) 0x0008000000000000 0242 * SLURM_BIT(52) 0x0010000000000000 0243 * SLURM_BIT(53) 0x0020000000000000 0244 * SLURM_BIT(54) 0x0040000000000000 0245 * SLURM_BIT(55) 0x0080000000000000 0246 * SLURM_BIT(56) 0x0100000000000000 0247 * SLURM_BIT(57) 0x0200000000000000 0248 * SLURM_BIT(58) 0x0400000000000000 0249 * SLURM_BIT(59) 0x0800000000000000 0250 * SLURM_BIT(60) 0x1000000000000000 0251 * SLURM_BIT(61) 0x2000000000000000 0252 * SLURM_BIT(62) 0x4000000000000000 0253 * SLURM_BIT(63) 0x8000000000000000 0254 */ 0255 0256 #define SLURM_BIT(offset) ((uint64_t)1 << offset) 0257 0258 /* last entry must be JOB_END, keep in sync with job_state_string and 0259 * job_state_string_compact. values may be ORed with JOB_STATE_FLAGS 0260 * below. */ 0261 enum job_states { 0262 JOB_PENDING, /* queued waiting for initiation */ 0263 JOB_RUNNING, /* allocated resources and executing */ 0264 JOB_SUSPENDED, /* allocated resources, execution suspended */ 0265 JOB_COMPLETE, /* completed execution successfully */ 0266 JOB_CANCELLED, /* cancelled by user */ 0267 JOB_FAILED, /* completed execution unsuccessfully */ 0268 JOB_TIMEOUT, /* terminated on reaching time limit */ 0269 JOB_NODE_FAIL, /* terminated on node failure */ 0270 JOB_PREEMPTED, /* terminated due to preemption */ 0271 JOB_BOOT_FAIL, /* terminated due to node boot failure */ 0272 JOB_DEADLINE, /* terminated on deadline */ 0273 JOB_OOM, /* experienced out of memory error */ 0274 JOB_END /* not a real state, last entry in table */ 0275 }; 0276 #define JOB_STATE_BASE 0x000000ff /* Used for job_states above */ 0277 #define JOB_STATE_FLAGS 0xffffff00 /* Used for state flags below */ 0278 0279 /* SLURM_BIT(0-7) are already taken with base job_states above */ 0280 #define JOB_LAUNCH_FAILED SLURM_BIT(8) 0281 #define JOB_UPDATE_DB SLURM_BIT(9) /* Send job start to database again */ 0282 #define JOB_REQUEUE SLURM_BIT(10) /* Requeue job in completing state */ 0283 #define JOB_REQUEUE_HOLD SLURM_BIT(11) /* Requeue any job in hold */ 0284 #define JOB_SPECIAL_EXIT SLURM_BIT(12) /* Requeue an exit job in hold */ 0285 #define JOB_RESIZING SLURM_BIT(13) /* Size of job about to change, flag set 0286 * before calling accounting functions 0287 * immediately before job changes size 0288 */ 0289 #define JOB_CONFIGURING SLURM_BIT(14) /* Allocated nodes booting */ 0290 #define JOB_COMPLETING SLURM_BIT(15) /* Waiting for epilog completion */ 0291 #define JOB_STOPPED SLURM_BIT(16) /* Job is stopped state (holding 0292 resources, but sent SIGSTOP */ 0293 #define JOB_RECONFIG_FAIL SLURM_BIT(17) /* Node configuration for job failed, 0294 not job state, just job requeue 0295 flag */ 0296 #define JOB_POWER_UP_NODE SLURM_BIT(18) /* Allocated powered down nodes, 0297 * waiting for reboot */ 0298 #define JOB_REVOKED SLURM_BIT(19) /* Sibling job revoked */ 0299 #define JOB_REQUEUE_FED SLURM_BIT(20) /* Job being requeued by federation */ 0300 #define JOB_RESV_DEL_HOLD SLURM_BIT(21) /* Job is hold */ 0301 #define JOB_SIGNALING SLURM_BIT(22) /* Outgoing signal is pending */ 0302 #define JOB_STAGE_OUT SLURM_BIT(23) /* Staging out data (burst buffer) */ 0303 0304 #define READY_JOB_FATAL -2 /* fatal error */ 0305 #define READY_JOB_ERROR -1 /* ordinary error */ 0306 0307 #define READY_NODE_STATE SLURM_BIT(0) /* job's node's are ready */ 0308 #define READY_JOB_STATE SLURM_BIT(1) /* job is ready to execute */ 0309 #define READY_PROLOG_STATE SLURM_BIT(2) /* PrologSlurmctld is done */ 0310 0311 #define MAIL_JOB_BEGIN SLURM_BIT(0) /* Notify when job begins */ 0312 #define MAIL_JOB_END SLURM_BIT(1) /* Notify when job ends */ 0313 #define MAIL_JOB_FAIL SLURM_BIT(2) /* Notify if job fails */ 0314 #define MAIL_JOB_REQUEUE SLURM_BIT(3) /* Notify if job requeued */ 0315 #define MAIL_JOB_TIME100 SLURM_BIT(4) /* Notify on reaching 100% of time 0316 * limit */ 0317 #define MAIL_JOB_TIME90 SLURM_BIT(5) /* Notify on reaching 90% of time 0318 * limit */ 0319 #define MAIL_JOB_TIME80 SLURM_BIT(6) /* Notify on reaching 80% of time 0320 * limit */ 0321 #define MAIL_JOB_TIME50 SLURM_BIT(7) /* Notify on reaching 50% of time 0322 * limit */ 0323 #define MAIL_JOB_STAGE_OUT SLURM_BIT(8) /* Notify on completion of burst 0324 * buffer stage out */ 0325 #define MAIL_ARRAY_TASKS SLURM_BIT(9) /* Send emails for each array task */ 0326 #define MAIL_INVALID_DEPEND SLURM_BIT(10) /* Notify on job invalid dependency */ 0327 0328 /* Do not fatal on unrecognized input */ 0329 #define PARSE_FLAGS_IGNORE_NEW SLURM_BIT(0) 0330 /* Check 0600 permissions for slurmdbd.conf included files */ 0331 #define PARSE_FLAGS_CHECK_PERMISSIONS SLURM_BIT(1) 0332 /* Only parse Include directives and ignore rest of parsing */ 0333 #define PARSE_FLAGS_INCLUDE_ONLY SLURM_BIT(2) 0334 0335 /* 0336 * job_array_struct_t array_flags definitions. ARRAY_TASK_REQUEUED could be 0337 * substituted in the future to tot_requeued_tasks member in the struct, which 0338 * would provide a more accurated array statistic. 0339 */ 0340 #define ARRAY_TASK_REQUEUED 0x0001 /* At least one task was requeued. */ 0341 0342 #define NICE_OFFSET 0x80000000 /* offset for job's nice value */ 0343 0344 /* Reason for job to be pending rather than executing or reason for job 0345 * failure. If multiple reasons exists, only one is given for the sake of 0346 * system efficiency */ 0347 enum job_state_reason { 0348 /* Reasons for job to be pending */ 0349 WAIT_NO_REASON = 0, /* not set or job not pending */ 0350 WAIT_PRIORITY, /* higher priority jobs exist */ 0351 WAIT_DEPENDENCY, /* dependent job has not completed */ 0352 WAIT_RESOURCES, /* required resources not available */ 0353 WAIT_PART_NODE_LIMIT, /* request exceeds partition node limit */ 0354 WAIT_PART_TIME_LIMIT, /* request exceeds partition time limit */ 0355 WAIT_PART_DOWN, /* requested partition is down */ 0356 WAIT_PART_INACTIVE, /* requested partition is inactive */ 0357 WAIT_HELD, /* job is held by administrator */ 0358 WAIT_TIME, /* job waiting for specific begin time */ 0359 WAIT_LICENSES, /* job is waiting for licenses */ 0360 WAIT_ASSOC_JOB_LIMIT, /* user/bank job limit reached */ 0361 WAIT_ASSOC_RESOURCE_LIMIT,/* user/bank resource limit reached */ 0362 WAIT_ASSOC_TIME_LIMIT, /* user/bank time limit reached */ 0363 WAIT_RESERVATION, /* reservation not available */ 0364 WAIT_NODE_NOT_AVAIL, /* required node is DOWN or DRAINED */ 0365 WAIT_HELD_USER, /* job is held by user */ 0366 WAIT_FRONT_END, /* Front end nodes are DOWN */ 0367 FAIL_DEFER, /* individual submit time sched deferred */ 0368 FAIL_DOWN_PARTITION, /* partition for job is DOWN */ 0369 FAIL_DOWN_NODE, /* some node in the allocation failed */ 0370 FAIL_BAD_CONSTRAINTS, /* constraints can not be satisfied */ 0371 FAIL_SYSTEM, /* slurm system failure */ 0372 FAIL_LAUNCH, /* unable to launch job */ 0373 FAIL_EXIT_CODE, /* exit code was non-zero */ 0374 FAIL_TIMEOUT, /* reached end of time limit */ 0375 FAIL_INACTIVE_LIMIT, /* reached slurm InactiveLimit */ 0376 FAIL_ACCOUNT, /* invalid account */ 0377 FAIL_QOS, /* invalid QOS */ 0378 WAIT_QOS_THRES, /* required QOS threshold has been breached */ 0379 WAIT_QOS_JOB_LIMIT, /* QOS job limit reached */ 0380 WAIT_QOS_RESOURCE_LIMIT,/* QOS resource limit reached */ 0381 WAIT_QOS_TIME_LIMIT, /* QOS time limit reached */ 0382 FAIL_SIGNAL, /* raised a signal that caused it to exit */ 0383 DEFUNCT_WAIT_34, /* free for reuse */ 0384 WAIT_CLEANING, /* If a job is requeued and it is 0385 * still cleaning up from the last run. */ 0386 WAIT_PROLOG, /* Prolog is running */ 0387 WAIT_QOS, /* QOS not allowed */ 0388 WAIT_ACCOUNT, /* Account not allowed */ 0389 WAIT_DEP_INVALID, /* Dependency condition invalid or never 0390 * satisfied 0391 */ 0392 WAIT_QOS_GRP_CPU, /* QOS GrpTRES exceeded (CPU) */ 0393 WAIT_QOS_GRP_CPU_MIN, /* QOS GrpTRESMins exceeded (CPU) */ 0394 WAIT_QOS_GRP_CPU_RUN_MIN, /* QOS GrpTRESRunMins exceeded (CPU) */ 0395 WAIT_QOS_GRP_JOB, /* QOS GrpJobs exceeded */ 0396 WAIT_QOS_GRP_MEM, /* QOS GrpTRES exceeded (Memory) */ 0397 WAIT_QOS_GRP_NODE, /* QOS GrpTRES exceeded (Node) */ 0398 WAIT_QOS_GRP_SUB_JOB, /* QOS GrpSubmitJobs exceeded */ 0399 WAIT_QOS_GRP_WALL, /* QOS GrpWall exceeded */ 0400 WAIT_QOS_MAX_CPU_PER_JOB, /* QOS MaxTRESPerJob exceeded (CPU) */ 0401 WAIT_QOS_MAX_CPU_MINS_PER_JOB,/* QOS MaxTRESMinsPerJob exceeded (CPU) */ 0402 WAIT_QOS_MAX_NODE_PER_JOB, /* QOS MaxTRESPerJob exceeded (Node) */ 0403 WAIT_QOS_MAX_WALL_PER_JOB, /* QOS MaxWallDurationPerJob exceeded */ 0404 WAIT_QOS_MAX_CPU_PER_USER, /* QOS MaxTRESPerUser exceeded (CPU) */ 0405 WAIT_QOS_MAX_JOB_PER_USER, /* QOS MaxJobsPerUser exceeded */ 0406 WAIT_QOS_MAX_NODE_PER_USER, /* QOS MaxTRESPerUser exceeded (Node) */ 0407 WAIT_QOS_MAX_SUB_JOB, /* QOS MaxSubmitJobsPerUser exceeded */ 0408 WAIT_QOS_MIN_CPU, /* QOS MinTRESPerJob not reached (CPU) */ 0409 WAIT_ASSOC_GRP_CPU, /* ASSOC GrpTRES exceeded (CPU) */ 0410 WAIT_ASSOC_GRP_CPU_MIN, /* ASSOC GrpTRESMins exceeded (CPU) */ 0411 WAIT_ASSOC_GRP_CPU_RUN_MIN, /* ASSOC GrpTRESRunMins exceeded (CPU) */ 0412 WAIT_ASSOC_GRP_JOB, /* ASSOC GrpJobs exceeded */ 0413 WAIT_ASSOC_GRP_MEM, /* ASSOC GrpTRES exceeded (Memory) */ 0414 WAIT_ASSOC_GRP_NODE, /* ASSOC GrpTRES exceeded (Node) */ 0415 WAIT_ASSOC_GRP_SUB_JOB, /* ASSOC GrpSubmitJobs exceeded */ 0416 WAIT_ASSOC_GRP_WALL, /* ASSOC GrpWall exceeded */ 0417 WAIT_ASSOC_MAX_JOBS, /* ASSOC MaxJobs exceeded */ 0418 WAIT_ASSOC_MAX_CPU_PER_JOB, /* ASSOC MaxTRESPerJob exceeded (CPU) */ 0419 WAIT_ASSOC_MAX_CPU_MINS_PER_JOB,/* ASSOC MaxTRESMinsPerJob 0420 * exceeded (CPU) */ 0421 WAIT_ASSOC_MAX_NODE_PER_JOB, /* ASSOC MaxTRESPerJob exceeded (NODE) */ 0422 WAIT_ASSOC_MAX_WALL_PER_JOB, /* ASSOC MaxWallDurationPerJob 0423 * exceeded */ 0424 WAIT_ASSOC_MAX_SUB_JOB, /* ASSOC MaxSubmitJobsPerUser exceeded */ 0425 0426 WAIT_MAX_REQUEUE, /* MAX_BATCH_REQUEUE reached */ 0427 WAIT_ARRAY_TASK_LIMIT, /* job array running task limit */ 0428 WAIT_BURST_BUFFER_RESOURCE, /* Burst buffer resources */ 0429 WAIT_BURST_BUFFER_STAGING, /* Burst buffer file stage-in */ 0430 FAIL_BURST_BUFFER_OP, /* Burst buffer operation failure */ 0431 WAIT_POWER_NOT_AVAIL, /* not enough power available */ 0432 WAIT_POWER_RESERVED, /* job is waiting for available power 0433 * because of power reservations */ 0434 WAIT_ASSOC_GRP_UNK, /* ASSOC GrpTRES exceeded 0435 * (Unknown) */ 0436 WAIT_ASSOC_GRP_UNK_MIN, /* ASSOC GrpTRESMins exceeded 0437 * (Unknown) */ 0438 WAIT_ASSOC_GRP_UNK_RUN_MIN, /* ASSOC GrpTRESRunMins exceeded 0439 * (Unknown) */ 0440 WAIT_ASSOC_MAX_UNK_PER_JOB, /* ASSOC MaxTRESPerJob exceeded 0441 * (Unknown) */ 0442 WAIT_ASSOC_MAX_UNK_PER_NODE, /* ASSOC MaxTRESPerNode exceeded 0443 * (Unknown) */ 0444 WAIT_ASSOC_MAX_UNK_MINS_PER_JOB,/* ASSOC MaxTRESMinsPerJob 0445 * exceeded (Unknown) */ 0446 WAIT_ASSOC_MAX_CPU_PER_NODE, /* ASSOC MaxTRESPerNode exceeded (CPU) */ 0447 WAIT_ASSOC_GRP_MEM_MIN, /* ASSOC GrpTRESMins exceeded 0448 * (Memory) */ 0449 WAIT_ASSOC_GRP_MEM_RUN_MIN, /* ASSOC GrpTRESRunMins exceeded 0450 * (Memory) */ 0451 WAIT_ASSOC_MAX_MEM_PER_JOB, /* ASSOC MaxTRESPerJob exceeded (Memory) */ 0452 WAIT_ASSOC_MAX_MEM_PER_NODE, /* ASSOC MaxTRESPerNode exceeded (CPU) */ 0453 WAIT_ASSOC_MAX_MEM_MINS_PER_JOB,/* ASSOC MaxTRESMinsPerJob 0454 * exceeded (Memory) */ 0455 WAIT_ASSOC_GRP_NODE_MIN, /* ASSOC GrpTRESMins exceeded (Node) */ 0456 WAIT_ASSOC_GRP_NODE_RUN_MIN, /* ASSOC GrpTRESRunMins exceeded (Node) */ 0457 WAIT_ASSOC_MAX_NODE_MINS_PER_JOB,/* ASSOC MaxTRESMinsPerJob 0458 * exceeded (Node) */ 0459 WAIT_ASSOC_GRP_ENERGY, /* ASSOC GrpTRES exceeded 0460 * (Energy) */ 0461 WAIT_ASSOC_GRP_ENERGY_MIN, /* ASSOC GrpTRESMins exceeded 0462 * (Energy) */ 0463 WAIT_ASSOC_GRP_ENERGY_RUN_MIN, /* ASSOC GrpTRESRunMins exceeded 0464 * (Energy) */ 0465 WAIT_ASSOC_MAX_ENERGY_PER_JOB, /* ASSOC MaxTRESPerJob exceeded 0466 * (Energy) */ 0467 WAIT_ASSOC_MAX_ENERGY_PER_NODE, /* ASSOC MaxTRESPerNode 0468 * exceeded (Energy) */ 0469 WAIT_ASSOC_MAX_ENERGY_MINS_PER_JOB,/* ASSOC MaxTRESMinsPerJob 0470 * exceeded (Energy) */ 0471 WAIT_ASSOC_GRP_GRES, /* ASSOC GrpTRES exceeded (GRES) */ 0472 WAIT_ASSOC_GRP_GRES_MIN, /* ASSOC GrpTRESMins exceeded (GRES) */ 0473 WAIT_ASSOC_GRP_GRES_RUN_MIN, /* ASSOC GrpTRESRunMins exceeded (GRES) */ 0474 WAIT_ASSOC_MAX_GRES_PER_JOB, /* ASSOC MaxTRESPerJob exceeded (GRES) */ 0475 WAIT_ASSOC_MAX_GRES_PER_NODE, /* ASSOC MaxTRESPerNode exceeded (GRES) */ 0476 WAIT_ASSOC_MAX_GRES_MINS_PER_JOB,/* ASSOC MaxTRESMinsPerJob 0477 * exceeded (GRES) */ 0478 WAIT_ASSOC_GRP_LIC, /* ASSOC GrpTRES exceeded 0479 * (license) */ 0480 WAIT_ASSOC_GRP_LIC_MIN, /* ASSOC GrpTRESMins exceeded 0481 * (license) */ 0482 WAIT_ASSOC_GRP_LIC_RUN_MIN, /* ASSOC GrpTRESRunMins exceeded 0483 * (license) */ 0484 WAIT_ASSOC_MAX_LIC_PER_JOB, /* ASSOC MaxTRESPerJob exceeded 0485 * (license) */ 0486 WAIT_ASSOC_MAX_LIC_MINS_PER_JOB,/* ASSOC MaxTRESMinsPerJob exceeded 0487 * (license) */ 0488 WAIT_ASSOC_GRP_BB, /* ASSOC GrpTRES exceeded 0489 * (burst buffer) */ 0490 WAIT_ASSOC_GRP_BB_MIN, /* ASSOC GrpTRESMins exceeded 0491 * (burst buffer) */ 0492 WAIT_ASSOC_GRP_BB_RUN_MIN, /* ASSOC GrpTRESRunMins exceeded 0493 * (burst buffer) */ 0494 WAIT_ASSOC_MAX_BB_PER_JOB, /* ASSOC MaxTRESPerJob exceeded 0495 * (burst buffer) */ 0496 WAIT_ASSOC_MAX_BB_PER_NODE, /* ASSOC MaxTRESPerNode exceeded 0497 * (burst buffer) */ 0498 WAIT_ASSOC_MAX_BB_MINS_PER_JOB,/* ASSOC MaxTRESMinsPerJob exceeded 0499 * (burst buffer) */ 0500 WAIT_QOS_GRP_UNK, /* QOS GrpTRES exceeded (Unknown) */ 0501 WAIT_QOS_GRP_UNK_MIN, /* QOS GrpTRESMins exceeded (Unknown) */ 0502 WAIT_QOS_GRP_UNK_RUN_MIN, /* QOS GrpTRESRunMins exceeded (Unknown) */ 0503 WAIT_QOS_MAX_UNK_PER_JOB, /* QOS MaxTRESPerJob exceeded (Unknown) */ 0504 WAIT_QOS_MAX_UNK_PER_NODE, /* QOS MaxTRESPerNode exceeded (Unknown) */ 0505 WAIT_QOS_MAX_UNK_PER_USER, /* QOS MaxTRESPerUser exceeded (Unknown) */ 0506 WAIT_QOS_MAX_UNK_MINS_PER_JOB,/* QOS MaxTRESMinsPerJob 0507 * exceeded (Unknown) */ 0508 WAIT_QOS_MIN_UNK, /* QOS MinTRESPerJob exceeded (Unknown) */ 0509 WAIT_QOS_MAX_CPU_PER_NODE, /* QOS MaxTRESPerNode exceeded (CPU) */ 0510 WAIT_QOS_GRP_MEM_MIN, /* QOS GrpTRESMins exceeded 0511 * (Memory) */ 0512 WAIT_QOS_GRP_MEM_RUN_MIN, /* QOS GrpTRESRunMins exceeded 0513 * (Memory) */ 0514 WAIT_QOS_MAX_MEM_MINS_PER_JOB,/* QOS MaxTRESMinsPerJob 0515 * exceeded (Memory) */ 0516 WAIT_QOS_MAX_MEM_PER_JOB, /* QOS MaxTRESPerJob exceeded (CPU) */ 0517 WAIT_QOS_MAX_MEM_PER_NODE, /* QOS MaxTRESPerNode exceeded (MEM) */ 0518 WAIT_QOS_MAX_MEM_PER_USER, /* QOS MaxTRESPerUser exceeded (CPU) */ 0519 WAIT_QOS_MIN_MEM, /* QOS MinTRESPerJob not reached (Memory) */ 0520 WAIT_QOS_GRP_ENERGY, /* QOS GrpTRES exceeded (Energy) */ 0521 WAIT_QOS_GRP_ENERGY_MIN, /* QOS GrpTRESMins exceeded (Energy) */ 0522 WAIT_QOS_GRP_ENERGY_RUN_MIN, /* QOS GrpTRESRunMins exceeded (Energy) */ 0523 WAIT_QOS_MAX_ENERGY_PER_JOB, /* QOS MaxTRESPerJob exceeded (Energy) */ 0524 WAIT_QOS_MAX_ENERGY_PER_NODE,/* QOS MaxTRESPerNode exceeded (Energy) */ 0525 WAIT_QOS_MAX_ENERGY_PER_USER,/* QOS MaxTRESPerUser exceeded (Energy) */ 0526 WAIT_QOS_MAX_ENERGY_MINS_PER_JOB,/* QOS MaxTRESMinsPerJob 0527 * exceeded (Energy) */ 0528 WAIT_QOS_MIN_ENERGY, /* QOS MinTRESPerJob not reached (Energy) */ 0529 WAIT_QOS_GRP_NODE_MIN, /* QOS GrpTRESMins exceeded (Node) */ 0530 WAIT_QOS_GRP_NODE_RUN_MIN, /* QOS GrpTRESRunMins exceeded (Node) */ 0531 WAIT_QOS_MAX_NODE_MINS_PER_JOB, /* QOS MaxTRESMinsPerJob 0532 * exceeded (Node) */ 0533 WAIT_QOS_MIN_NODE, /* QOS MinTRESPerJob not reached (Node) */ 0534 WAIT_QOS_GRP_GRES, /* QOS GrpTRES exceeded (GRES) */ 0535 WAIT_QOS_GRP_GRES_MIN, /* QOS GrpTRESMins exceeded (GRES) */ 0536 WAIT_QOS_GRP_GRES_RUN_MIN, /* QOS GrpTRESRunMins exceeded (GRES) */ 0537 WAIT_QOS_MAX_GRES_PER_JOB, /* QOS MaxTRESPerJob exceeded (GRES) */ 0538 WAIT_QOS_MAX_GRES_PER_NODE, /* QOS MaxTRESPerNode exceeded (GRES) */ 0539 WAIT_QOS_MAX_GRES_PER_USER, /* QOS MaxTRESPerUser exceeded 0540 * (GRES) */ 0541 WAIT_QOS_MAX_GRES_MINS_PER_JOB,/* QOS MaxTRESMinsPerJob 0542 * exceeded (GRES) */ 0543 WAIT_QOS_MIN_GRES, /* QOS MinTRESPerJob not reached (CPU) */ 0544 WAIT_QOS_GRP_LIC, /* QOS GrpTRES exceeded (license) */ 0545 WAIT_QOS_GRP_LIC_MIN, /* QOS GrpTRESMins exceeded (license) */ 0546 WAIT_QOS_GRP_LIC_RUN_MIN, /* QOS GrpTRESRunMins exceeded (license) */ 0547 WAIT_QOS_MAX_LIC_PER_JOB, /* QOS MaxTRESPerJob exceeded (license) */ 0548 WAIT_QOS_MAX_LIC_PER_USER, /* QOS MaxTRESPerUser exceeded (license) */ 0549 WAIT_QOS_MAX_LIC_MINS_PER_JOB,/* QOS MaxTRESMinsPerJob exceeded 0550 * (license) */ 0551 WAIT_QOS_MIN_LIC, /* QOS MinTRESPerJob not reached 0552 * (license) */ 0553 WAIT_QOS_GRP_BB, /* QOS GrpTRES exceeded 0554 * (burst buffer) */ 0555 WAIT_QOS_GRP_BB_MIN, /* QOS GrpTRESMins exceeded 0556 * (burst buffer) */ 0557 WAIT_QOS_GRP_BB_RUN_MIN, /* QOS GrpTRESRunMins exceeded 0558 * (burst buffer) */ 0559 WAIT_QOS_MAX_BB_PER_JOB, /* QOS MaxTRESPerJob exceeded 0560 * (burst buffer) */ 0561 WAIT_QOS_MAX_BB_PER_NODE, /* QOS MaxTRESPerNode exceeded 0562 * (burst buffer) */ 0563 WAIT_QOS_MAX_BB_PER_USER, /* QOS MaxTRESPerUser exceeded 0564 * (burst buffer) */ 0565 WAIT_QOS_MAX_BB_MINS_PER_JOB,/* QOS MaxTRESMinsPerJob exceeded 0566 * (burst buffer) */ 0567 WAIT_QOS_MIN_BB, /* QOS MinTRESPerJob not reached 0568 * (burst buffer) */ 0569 FAIL_DEADLINE, /* reached deadline */ 0570 /* QOS MaxTRESPerAccount */ 0571 WAIT_QOS_MAX_BB_PER_ACCT, /* exceeded burst buffer */ 0572 WAIT_QOS_MAX_CPU_PER_ACCT, /* exceeded CPUs */ 0573 WAIT_QOS_MAX_ENERGY_PER_ACCT, /* exceeded Energy */ 0574 WAIT_QOS_MAX_GRES_PER_ACCT, /* exceeded GRES */ 0575 WAIT_QOS_MAX_NODE_PER_ACCT, /* exceeded Nodes */ 0576 WAIT_QOS_MAX_LIC_PER_ACCT, /* exceeded Licenses */ 0577 WAIT_QOS_MAX_MEM_PER_ACCT, /* exceeded Memory */ 0578 WAIT_QOS_MAX_UNK_PER_ACCT, /* exceeded Unknown */ 0579 /********************/ 0580 WAIT_QOS_MAX_JOB_PER_ACCT, /* QOS MaxJobPerAccount exceeded */ 0581 WAIT_QOS_MAX_SUB_JOB_PER_ACCT,/* QOS MaxJobSubmitSPerAccount exceeded */ 0582 WAIT_PART_CONFIG, /* Generic partition configuration reason */ 0583 WAIT_ACCOUNT_POLICY, /* Generic accounting policy reason */ 0584 0585 WAIT_FED_JOB_LOCK, /* Can't get fed job lock */ 0586 FAIL_OOM, /* Exhausted memory */ 0587 WAIT_PN_MEM_LIMIT, /* MaxMemPer[CPU|Node] exceeded */ 0588 0589 /* exceeded Billing TRES limits */ 0590 WAIT_ASSOC_GRP_BILLING, /* GrpTRES */ 0591 WAIT_ASSOC_GRP_BILLING_MIN, /* GrpTRESMins */ 0592 WAIT_ASSOC_GRP_BILLING_RUN_MIN, /* GrpTRESRunMins */ 0593 WAIT_ASSOC_MAX_BILLING_PER_JOB, /* MaxTRESPerJob */ 0594 WAIT_ASSOC_MAX_BILLING_PER_NODE, /* MaxTRESPerNode */ 0595 WAIT_ASSOC_MAX_BILLING_MINS_PER_JOB,/* MaxTRESMinsPerJob */ 0596 0597 WAIT_QOS_GRP_BILLING, /* GrpTRES */ 0598 WAIT_QOS_GRP_BILLING_MIN, /* GrpTRESMins */ 0599 WAIT_QOS_GRP_BILLING_RUN_MIN, /* GrpTRESRunMins */ 0600 WAIT_QOS_MAX_BILLING_PER_JOB, /* MaxTRESPerJob */ 0601 WAIT_QOS_MAX_BILLING_PER_NODE, /* MaxTRESPerNode */ 0602 WAIT_QOS_MAX_BILLING_PER_USER, /* MaxTRESPerUser */ 0603 WAIT_QOS_MAX_BILLING_MINS_PER_JOB, /* MaxTRESMinsPerJob */ 0604 WAIT_QOS_MAX_BILLING_PER_ACCT, /* MaxTRESPerAcct */ 0605 WAIT_QOS_MIN_BILLING, /* MinTRESPerJob */ 0606 0607 WAIT_RESV_DELETED, /* Reservation was deleted */ 0608 WAIT_RESV_INVALID, 0609 FAIL_CONSTRAINTS, /* Constraints cannot currently be satisfied */ 0610 }; 0611 0612 enum job_acct_types { 0613 JOB_START, 0614 JOB_STEP, 0615 JOB_SUSPEND, 0616 JOB_TERMINATED 0617 }; 0618 0619 /* Partition state flags */ 0620 #define PARTITION_SUBMIT 0x01 /* Allow job submission to partition */ 0621 #define PARTITION_SCHED 0x02 /* Allow job startup from partition */ 0622 0623 /* Actual partition states based upon state flags */ 0624 #define PARTITION_DOWN (PARTITION_SUBMIT) 0625 #define PARTITION_UP (PARTITION_SUBMIT | PARTITION_SCHED) 0626 #define PARTITION_DRAIN (PARTITION_SCHED) 0627 #define PARTITION_INACTIVE 0x00 0628 0629 /* Partition enforce flags for jobs */ 0630 #define PARTITION_ENFORCE_NONE 0 0631 #define PARTITION_ENFORCE_ALL 1 /* job limit must be valid for ALL 0632 * partitions */ 0633 #define PARTITION_ENFORCE_ANY 2 /* job limit must be valid for ANY 0634 * partition */ 0635 0636 /* 0637 * Auth plugin (id) used for communication. 0638 * Update auth_plugin_types in slurm_auth.c if changed. 0639 */ 0640 enum auth_plugin_type { 0641 AUTH_PLUGIN_NONE = 100, 0642 AUTH_PLUGIN_MUNGE = 101, 0643 AUTH_PLUGIN_JWT = 102, 0644 AUTH_PLUGIN_SLURM = 103, 0645 }; 0646 0647 /* 0648 * Hash plugin (id) used for communication. 0649 */ 0650 enum hash_plugin_type { 0651 HASH_PLUGIN_DEFAULT = 0, 0652 HASH_PLUGIN_NONE, 0653 HASH_PLUGIN_K12, 0654 HASH_PLUGIN_SHA256, 0655 HASH_PLUGIN_CNT, 0656 }; 0657 0658 /* Select plugin (id) in use by cluster */ 0659 enum select_plugin_type { 0660 /* 100 unused (originally for BlueGene) */ 0661 /* 0662 * 101 cons_res was removed in 23.11. However, this is needed for 0663 * systems that are upgrading from an older version and were using 0664 * cons_res. This can be removed two versions after 23.11. 0665 */ 0666 SELECT_PLUGIN_CONS_RES = 101, 0667 SELECT_PLUGIN_LINEAR = 102, /* Linear on a normal system */ 0668 /* 103 unused (originally used for BGQ) */ 0669 /* 104 unused (originally used for Cray/ALPS with select/linear) */ 0670 /* 105 unused (originally used for Cray/ALPS with select/cons_res) */ 0671 SELECT_PLUGIN_SERIAL = 106, /* Serial */ 0672 SELECT_PLUGIN_CRAY_LINEAR = 107, /* Linear on a Native Cray */ 0673 /* 0674 * 108 cons_res on a cray was removed in 23.11. However, this is needed 0675 * for systems that are upgrading from an older version and were using 0676 * cons_res. This can be removed two versions after 23.11. 0677 */ 0678 SELECT_PLUGIN_CRAY_CONS_RES = 108, /* Cons Res on a Native Cray */ 0679 SELECT_PLUGIN_CONS_TRES = 109, /* Cons TRES on a normal system */ 0680 SELECT_PLUGIN_CRAY_CONS_TRES = 110 /* Cons TRES on a Native Cray */ 0681 }; 0682 0683 /* switch plugin (id) in use by cluster */ 0684 enum switch_plugin_type { 0685 SWITCH_PLUGIN_NONE = 100, /* NONE */ 0686 SWITCH_PLUGIN_GENERIC = 101, /* Generic */ 0687 SWITCH_PLUGIN_CRAY = 102, /* Cray */ 0688 /* 103 unused (originally used for NRT) */ 0689 SWITCH_PLUGIN_SLINGSHOT = 104, /* HPE Slingshot */ 0690 }; 0691 0692 enum select_jobdata_type { 0693 SELECT_JOBDATA_NETWORK = 23, /* data-> char * network info */ 0694 }; 0695 0696 enum select_nodedata_type { 0697 SELECT_NODEDATA_SUBCNT = 2, /* data-> uint16_t */ 0698 SELECT_NODEDATA_PTR = 5, /* data-> select_nodeinfo_t *nodeinfo */ 0699 SELECT_NODEDATA_MEM_ALLOC = 8, /* data-> uint32_t */ 0700 SELECT_NODEDATA_TRES_ALLOC_FMT_STR = 9, /* data-> char *, 0701 * free with xfree */ 0702 SELECT_NODEDATA_TRES_ALLOC_WEIGHTED = 10, /* data-> double */ 0703 }; 0704 0705 enum select_print_mode { 0706 SELECT_PRINT_HEAD, /* Print just the header */ 0707 SELECT_PRINT_DATA, /* Print just the data */ 0708 SELECT_PRINT_MIXED, /* Print "field=value" */ 0709 SELECT_PRINT_MIXED_SHORT,/* Print less "field=value" */ 0710 SELECT_PRINT_BG_ID, /* Print just the BG_ID */ 0711 SELECT_PRINT_NODES, /* Print the nodelist */ 0712 SELECT_PRINT_CONNECTION,/* Print just the CONNECTION type */ 0713 SELECT_PRINT_ROTATE, /* Print just the ROTATE */ 0714 SELECT_PRINT_GEOMETRY, /* Print just the GEO */ 0715 SELECT_PRINT_START, /* Print just the START location */ 0716 SELECT_PRINT_BLRTS_IMAGE,/* Print just the BLRTS IMAGE */ 0717 SELECT_PRINT_LINUX_IMAGE,/* Print just the LINUX IMAGE */ 0718 SELECT_PRINT_MLOADER_IMAGE,/* Print just the MLOADER IMAGE */ 0719 SELECT_PRINT_RAMDISK_IMAGE,/* Print just the RAMDISK IMAGE */ 0720 SELECT_PRINT_REBOOT, /* Print just the REBOOT */ 0721 SELECT_PRINT_RESV_ID, /* Print just Cray/BASIL reservation ID */ 0722 SELECT_PRINT_START_LOC /* Print just the start location */ 0723 }; 0724 0725 enum select_node_cnt { 0726 SELECT_GET_NODE_SCALING, /* Give scaling factor for node count */ 0727 SELECT_GET_NODE_CPU_CNT, /* Give how many cpus are on a node */ 0728 SELECT_GET_MP_CPU_CNT, /* Give how many cpus are on a 0729 * base partition */ 0730 SELECT_APPLY_NODE_MIN_OFFSET, /* Apply min offset to variable */ 0731 SELECT_APPLY_NODE_MAX_OFFSET, /* Apply max offset to variable */ 0732 SELECT_SET_NODE_CNT, /* Set altered node cnt */ 0733 SELECT_SET_MP_CNT /* Given a node cnt return the 0734 * base partition count */ 0735 }; 0736 0737 enum acct_gather_profile_info { 0738 ACCT_GATHER_PROFILE_DIR, /* Give directory profiling is stored */ 0739 ACCT_GATHER_PROFILE_DEFAULT, /* What is being collected for 0740 * profiling by default */ 0741 ACCT_GATHER_PROFILE_RUNNING /* What is actually be collected 0742 * wither it be user or 0743 * default. (Only works in the slurmstepd) 0744 */ 0745 }; 0746 0747 #define ACCT_GATHER_PROFILE_NOT_SET 0x00000000 0748 #define ACCT_GATHER_PROFILE_NONE SLURM_BIT(0) 0749 #define ACCT_GATHER_PROFILE_ENERGY SLURM_BIT(1) 0750 #define ACCT_GATHER_PROFILE_TASK SLURM_BIT(2) 0751 #define ACCT_GATHER_PROFILE_LUSTRE SLURM_BIT(3) 0752 #define ACCT_GATHER_PROFILE_NETWORK SLURM_BIT(4) 0753 #define ACCT_GATHER_PROFILE_ALL 0xffffffff 0754 0755 /* jobacct data types */ 0756 enum jobacct_data_type { 0757 JOBACCT_DATA_TOTAL, /* data-> jobacctinfo_t * */ 0758 JOBACCT_DATA_PIPE, /* data-> file descriptor */ 0759 JOBACCT_DATA_RUSAGE, /* data-> rusage set user_cpu_sec, 0760 * user_cpu_usec, sys_cpu_sec, sys_cpu_usec */ 0761 JOBACCT_DATA_TOT_VSIZE = 5, /* data-> uint64_t vsize */ 0762 JOBACCT_DATA_TOT_RSS = 8, /* data-> uint64_t psize */ 0763 }; 0764 0765 enum acct_energy_type { 0766 ENERGY_DATA_JOULES_TASK, 0767 ENERGY_DATA_STRUCT, 0768 ENERGY_DATA_RECONFIG, 0769 ENERGY_DATA_PROFILE, 0770 ENERGY_DATA_LAST_POLL, 0771 ENERGY_DATA_SENSOR_CNT, 0772 ENERGY_DATA_NODE_ENERGY, 0773 ENERGY_DATA_NODE_ENERGY_UP, 0774 ENERGY_DATA_STEP_PTR 0775 }; 0776 0777 typedef enum { 0778 UPDATE_SET, /* Set to specified value */ 0779 UPDATE_ADD, /* Append to existing value (+=)*/ 0780 UPDATE_REMOVE, /* Remove from existing vale (-=) */ 0781 } update_mode_t; 0782 0783 /* 0784 * Task distribution states/methods 0785 * 0786 * Symbol format is SLURM_DIST_<node>_<socket>_<core> 0787 * 0788 * <node> = Method for distributing tasks to nodes. 0789 * This determines the order in which task ids are 0790 * distributed to the nodes selected for the job/step. 0791 * <socket> = Method for distributing allocated lllps across sockets. 0792 * This determines the order in which allocated lllps are 0793 * distributed across sockets for binding to tasks. 0794 * <core> = Method for distributing allocated lllps across cores. 0795 * This determines the order in which allocated lllps are 0796 * distributed across cores for binding to tasks. 0797 * 0798 * Note that the socket and core distributions apply only to task affinity. 0799 */ 0800 typedef enum task_dist_states { 0801 /* NOTE: start SLURM_DIST_CYCLIC at 1 for HP MPI */ 0802 SLURM_DIST_CYCLIC = 0x0001, 0803 SLURM_DIST_BLOCK = 0x0002, 0804 SLURM_DIST_ARBITRARY = 0x0003, 0805 SLURM_DIST_PLANE = 0x0004, 0806 SLURM_DIST_CYCLIC_CYCLIC = 0x0011, 0807 SLURM_DIST_CYCLIC_BLOCK = 0x0021, 0808 SLURM_DIST_CYCLIC_CFULL = 0x0031, 0809 SLURM_DIST_BLOCK_CYCLIC = 0x0012, 0810 SLURM_DIST_BLOCK_BLOCK = 0x0022, 0811 SLURM_DIST_BLOCK_CFULL = 0x0032, 0812 SLURM_DIST_CYCLIC_CYCLIC_CYCLIC = 0x0111, 0813 SLURM_DIST_CYCLIC_CYCLIC_BLOCK = 0x0211, 0814 SLURM_DIST_CYCLIC_CYCLIC_CFULL = 0x0311, 0815 SLURM_DIST_CYCLIC_BLOCK_CYCLIC = 0x0121, 0816 SLURM_DIST_CYCLIC_BLOCK_BLOCK = 0x0221, 0817 SLURM_DIST_CYCLIC_BLOCK_CFULL = 0x0321, 0818 SLURM_DIST_CYCLIC_CFULL_CYCLIC = 0x0131, 0819 SLURM_DIST_CYCLIC_CFULL_BLOCK = 0x0231, 0820 SLURM_DIST_CYCLIC_CFULL_CFULL = 0x0331, 0821 SLURM_DIST_BLOCK_CYCLIC_CYCLIC = 0x0112, 0822 SLURM_DIST_BLOCK_CYCLIC_BLOCK = 0x0212, 0823 SLURM_DIST_BLOCK_CYCLIC_CFULL = 0x0312, 0824 SLURM_DIST_BLOCK_BLOCK_CYCLIC = 0x0122, 0825 SLURM_DIST_BLOCK_BLOCK_BLOCK = 0x0222, 0826 SLURM_DIST_BLOCK_BLOCK_CFULL = 0x0322, 0827 SLURM_DIST_BLOCK_CFULL_CYCLIC = 0x0132, 0828 SLURM_DIST_BLOCK_CFULL_BLOCK = 0x0232, 0829 SLURM_DIST_BLOCK_CFULL_CFULL = 0x0332, 0830 0831 SLURM_DIST_NODECYCLIC = 0x0001, 0832 SLURM_DIST_NODEBLOCK = 0x0002, 0833 SLURM_DIST_SOCKCYCLIC = 0x0010, 0834 SLURM_DIST_SOCKBLOCK = 0x0020, 0835 SLURM_DIST_SOCKCFULL = 0x0030, 0836 SLURM_DIST_CORECYCLIC = 0x0100, 0837 SLURM_DIST_COREBLOCK = 0x0200, 0838 SLURM_DIST_CORECFULL = 0x0300, 0839 0840 /* Unused = 0x1000, */ 0841 SLURM_DIST_UNKNOWN = 0x2000 0842 } task_dist_states_t; 0843 0844 #define SLURM_DIST_STATE_BASE 0x00FFFF 0845 #define SLURM_DIST_STATE_FLAGS 0xFF0000 0846 #define SLURM_DIST_PACK_NODES 0x800000 0847 #define SLURM_DIST_NO_PACK_NODES 0x400000 0848 0849 #define SLURM_DIST_NODEMASK 0xF00F 0850 #define SLURM_DIST_SOCKMASK 0xF0F0 0851 #define SLURM_DIST_COREMASK 0xFF00 0852 #define SLURM_DIST_NODESOCKMASK 0xF0FF 0853 0854 /* Open stdout/err file mode, 0 for system default (JobFileAppend) */ 0855 #define OPEN_MODE_APPEND 1 0856 #define OPEN_MODE_TRUNCATE 2 0857 0858 typedef enum cpu_bind_type { /* cpu binding type from --cpu-bind=... */ 0859 /* verbose can be set with any other flag */ 0860 CPU_BIND_VERBOSE = 0x0001, /* =v, */ 0861 /* the following auto-binding flags are mutually exclusive */ 0862 CPU_BIND_TO_THREADS = 0x0002, /* =threads */ 0863 CPU_BIND_TO_CORES = 0x0004, /* =cores */ 0864 CPU_BIND_TO_SOCKETS = 0x0008, /* =sockets */ 0865 CPU_BIND_TO_LDOMS = 0x0010, /* locality domains */ 0866 /* the following manual binding flags are mutually exclusive */ 0867 /* CPU_BIND_NONE needs to be the lowest value among manual bindings */ 0868 CPU_BIND_NONE = 0x0020, /* =no */ 0869 CPU_BIND_RANK = 0x0040, /* =rank */ 0870 CPU_BIND_MAP = 0x0080, /* =map_cpu:<list of CPU IDs> */ 0871 CPU_BIND_MASK = 0x0100, /* =mask_cpu:<list of CPU masks> */ 0872 CPU_BIND_LDRANK = 0x0200, /* =locality domain rank */ 0873 CPU_BIND_LDMAP = 0x0400, /* =map_ldom:<list of locality domains> */ 0874 CPU_BIND_LDMASK = 0x0800, /* =mask_ldom:<list of ldom masks> */ 0875 0876 /* the following is used primarily for the 0877 --hint=nomultithread when -mblock:block is requested. */ 0878 CPU_BIND_ONE_THREAD_PER_CORE = 0x2000,/* Only bind to one 0879 * thread of a core */ 0880 0881 /* default binding if auto binding doesn't match. */ 0882 CPU_AUTO_BIND_TO_THREADS = 0x04000, 0883 CPU_AUTO_BIND_TO_CORES = 0x10000, 0884 CPU_AUTO_BIND_TO_SOCKETS = 0x20000, 0885 0886 /* the following is used only as a flag for expressing 0887 * the contents of TaskPluginParams */ 0888 SLURMD_OFF_SPEC = 0x40000, 0889 CPU_BIND_OFF = 0x80000 /* Disable binding */ 0890 } cpu_bind_type_t; 0891 0892 #define CPU_BIND_T_TO_MASK 0x001e 0893 #define CPU_BIND_T_AUTO_TO_MASK 0x34000 0894 #define CPU_BIND_T_MASK 0x0fe0 0895 #define CPU_BIND_T_TASK_PARAMS_MASK (SLURMD_OFF_SPEC | CPU_BIND_OFF) 0896 0897 /* Flag to indicate that cpu_freq is a range: low,medium,high,high-1 0898 * instead of an integer value in kilohertz */ 0899 #define CPU_FREQ_RANGE_FLAG 0x80000000 0900 #define CPU_FREQ_LOW 0x80000001 0901 #define CPU_FREQ_MEDIUM 0x80000002 0902 #define CPU_FREQ_HIGH 0x80000003 0903 #define CPU_FREQ_HIGHM1 0x80000004 0904 #define CPU_FREQ_CONSERVATIVE 0x88000000 0905 #define CPU_FREQ_ONDEMAND 0x84000000 0906 #define CPU_FREQ_PERFORMANCE 0x82000000 0907 #define CPU_FREQ_POWERSAVE 0x81000000 0908 #define CPU_FREQ_USERSPACE 0x80800000 0909 #define CPU_FREQ_SCHEDUTIL 0x80400000 0910 #define CPU_FREQ_GOV_MASK 0x8ff00000 0911 0912 typedef enum mem_bind_type { /* memory binding type from --mem-bind=... */ 0913 /* verbose can be set with any other flag */ 0914 MEM_BIND_VERBOSE= 0x01, /* =v, */ 0915 /* the following five manual binding flags are mutually exclusive */ 0916 /* MEM_BIND_NONE needs to be the first in this sub-list */ 0917 MEM_BIND_NONE = 0x02, /* =no */ 0918 MEM_BIND_RANK = 0x04, /* =rank */ 0919 MEM_BIND_MAP = 0x08, /* =map_mem:<list of NUMA IDs> */ 0920 MEM_BIND_MASK = 0x10, /* =mask_mem:<list of NUMA masks> */ 0921 MEM_BIND_LOCAL = 0x20, /* =local */ 0922 /* sort and prefer can be set with any other flags */ 0923 MEM_BIND_SORT = 0x40, /* =sort */ 0924 MEM_BIND_PREFER = 0x80 /* =prefer */ 0925 } mem_bind_type_t; 0926 0927 #define MEM_BIND_TYPE_MASK 0x3e 0928 #define MEM_BIND_TYPE_FLAGS_MASK 0xc0 0929 0930 typedef enum accel_bind_type { /* accelerator binding from --accel_bind= */ 0931 ACCEL_BIND_VERBOSE = 0x01, /* 'v' verbose */ 0932 ACCEL_BIND_CLOSEST_GPU = 0x02, /* 'g' Use closest GPU to the CPU */ 0933 ACCEL_BIND_CLOSEST_NIC = 0x08 /* 'n' Use closest NIC to CPU */ 0934 } accel_bind_type_t; 0935 0936 /* The last entry in node_states must be STATE_END, keep in sync with 0937 * node_state_string. values may be ORed with NODE_STATE_FLAGS below. 0938 * Node states typically alternate between NODE_STATE_IDLE and 0939 * NODE_STATE_ALLOCATED. The NODE_STATE_COMPLETING flag will be set 0940 * when jobs are in the process of terminating. */ 0941 enum node_states { 0942 NODE_STATE_UNKNOWN, /* node's initial state, unknown */ 0943 NODE_STATE_DOWN, /* node in non-usable state */ 0944 NODE_STATE_IDLE, /* node idle and available for use */ 0945 NODE_STATE_ALLOCATED, /* node has been allocated to a job */ 0946 NODE_STATE_ERROR, /* UNUSED - node is in an error state */ 0947 NODE_STATE_MIXED, /* node has a mixed state */ 0948 NODE_STATE_FUTURE, /* node slot reserved for future use */ 0949 NODE_STATE_END /* last entry in table */ 0950 }; 0951 #define NODE_STATE_BASE 0x0000000f 0952 #define NODE_STATE_FLAGS 0xfffffff0 0953 0954 /* SLURM_BIT(0-3) taken for base states */ 0955 #define NODE_STATE_NET SLURM_BIT(4) /* If a node is using Cray's 0956 * Network Performance 0957 * Counters but isn't in a 0958 * allocation. */ 0959 #define NODE_STATE_RES SLURM_BIT(5) /* If a node is in a 0960 * reservation (used primarily 0961 * to note a node isn't idle 0962 * for non-reservation jobs) */ 0963 #define NODE_STATE_UNDRAIN SLURM_BIT(6) /* Clear DRAIN flag for a node */ 0964 #define NODE_STATE_CLOUD SLURM_BIT(7) /* node comes from cloud */ 0965 #define NODE_RESUME SLURM_BIT(8) /* Restore a DRAINED, DRAINING, DOWN 0966 * or FAILING node to service (e.g. 0967 * IDLE or ALLOCATED). Used in 0968 * slurm_update_node() request */ 0969 #define NODE_STATE_DRAIN SLURM_BIT(9) /* do not allocated new work */ 0970 #define NODE_STATE_COMPLETING SLURM_BIT(10) /* node is completing allocated 0971 * job */ 0972 #define NODE_STATE_NO_RESPOND SLURM_BIT(11) /* node is not responding */ 0973 #define NODE_STATE_POWERED_DOWN SLURM_BIT(12) /* node is powered down */ 0974 #define NODE_STATE_FAIL SLURM_BIT(13) /* node is failing, do not allocate 0975 * new work */ 0976 #define NODE_STATE_POWERING_UP SLURM_BIT(14) /* node is powering up */ 0977 #define NODE_STATE_MAINT SLURM_BIT(15) /* node in maintenance 0978 * reservation */ 0979 #define NODE_STATE_REBOOT_REQUESTED SLURM_BIT(16) /* node reboot requested */ 0980 #define NODE_STATE_REBOOT_CANCEL SLURM_BIT(17) /* cancel pending reboot */ 0981 #define NODE_STATE_POWERING_DOWN SLURM_BIT(18) /* node is powering down */ 0982 #define NODE_STATE_DYNAMIC_FUTURE SLURM_BIT(19) /* dynamic future node */ 0983 #define NODE_STATE_REBOOT_ISSUED SLURM_BIT(20) /* node reboot passed to agent */ 0984 #define NODE_STATE_PLANNED SLURM_BIT(21) /* node scheduled for a job in the 0985 * future */ 0986 #define NODE_STATE_INVALID_REG SLURM_BIT(22) /* invalid registration, don't 0987 * ping */ 0988 #define NODE_STATE_POWER_DOWN SLURM_BIT(23) /* manual node power down */ 0989 #define NODE_STATE_POWER_UP SLURM_BIT(24) /* manual node power up */ 0990 #define NODE_STATE_POWER_DRAIN SLURM_BIT(25) /* signal power down asap */ 0991 #define NODE_STATE_DYNAMIC_NORM SLURM_BIT(26) /* dynamic norm node */ 0992 0993 /* used to define the size of the credential.signature size 0994 * used to define the key size of the io_stream_header_t 0995 */ 0996 #define SLURM_SSL_SIGNATURE_LENGTH 128 0997 0998 /* Used as show_flags for slurm_get_ and slurm_load_ function calls. 0999 * Values can be ORed */ 1000 #define SHOW_ALL 0x0001 /* Show info for "hidden" partitions */ 1001 #define SHOW_DETAIL 0x0002 /* Show detailed resource information */ 1002 /* was SHOW_DETAIL2 0x0004 Removed v19.05 */ 1003 #define SHOW_MIXED 0x0008 /* Automatically set node MIXED state */ 1004 #define SHOW_LOCAL 0x0010 /* Show only local information, even on 1005 * federated cluster */ 1006 #define SHOW_SIBLING 0x0020 /* Show sibling jobs on a federated cluster */ 1007 #define SHOW_FEDERATION 0x0040 /* Show federated state information. 1008 * Shows local info if not in federation */ 1009 #define SHOW_FUTURE 0x0080 /* Show future nodes */ 1010 1011 /* CR_CPU, CR_SOCKET and CR_CORE are mutually exclusive 1012 * CR_MEMORY may be added to any of the above values or used by itself 1013 * CR_ONE_TASK_PER_CORE may also be added to any of the above values */ 1014 #define CR_CPU 0x0001 /* Resources are shared down to the level of 1015 * logical processors which can be socket, 1016 * core, or thread depending on the system. */ 1017 #define CR_SOCKET 0x0002 /* Resources are shared down to the socket 1018 * level. Jobs will not be co-allocated 1019 * within a socket. */ 1020 #define CR_CORE 0x0004 /* Resources are shared down to the core level. 1021 * Jobs will not be co-allocated within a 1022 * core. */ 1023 #define CR_BOARD 0x0008 /* Resources are shared down to the board 1024 * level. Jobs will not be co-allocated 1025 * within a board. */ 1026 #define CR_MEMORY 0x0010 /* Memory as consumable resources. Memory is 1027 * not over-committed when selected as a CR. */ 1028 /* was CR_OTHER_CONS_RES 0x0020, removed v23.11 */ 1029 /* was CR_NHC_STEP_NO 0x0040, removed v19.05 */ 1030 /* was CR_NHC_NO 0x0080, removed v19.05 */ 1031 1032 /* By default, schedule only one task per core. 1033 * Without this option, tasks would be allocated threads. */ 1034 #define CR_ONE_TASK_PER_CORE 0x0100 1035 1036 /* Pack tasks tightly onto allocated nodes rather than distributing them evenly 1037 * across available nodes */ 1038 #define CR_PACK_NODES 0x0200 1039 1040 #define LL_SHARED_GRES 0x0400 /* Prefer least-loaded device for shared GRES */ 1041 #define CR_OTHER_CONS_TRES 0x0800 /* if layering select plugins use 1042 * cons_tres instead of linear (default) 1043 */ 1044 /* By default, distribute cores using a block approach inside the nodes */ 1045 #define CR_CORE_DEFAULT_DIST_BLOCK 0x1000 1046 #define CR_LLN 0x4000 /* Select nodes by "least loaded." */ 1047 #define MULTIPLE_SHARING_GRES_PJ 0x8000 /* Allow multiple sharing gres per job */ 1048 1049 1050 /* 1051 * This is used internally to know whether the job was started with 1052 * cons_tres or linear. It is not a configuration option. 1053 */ 1054 #define CR_LINEAR 0x8000 1055 1056 #define MEM_PER_CPU 0x8000000000000000 1057 #define SHARED_FORCE 0x8000 1058 1059 #define PRIVATE_DATA_JOBS SLURM_BIT(0) /* job/step data is private */ 1060 #define PRIVATE_DATA_NODES SLURM_BIT(1) /* node data is private */ 1061 #define PRIVATE_DATA_PARTITIONS SLURM_BIT(2) /* partition data is private */ 1062 #define PRIVATE_DATA_USAGE SLURM_BIT(3) /* accounting usage data is 1063 * private */ 1064 #define PRIVATE_DATA_USERS SLURM_BIT(4) /* accounting user data is 1065 * private */ 1066 #define PRIVATE_DATA_ACCOUNTS SLURM_BIT(5) /* accounting account data is 1067 * private */ 1068 #define PRIVATE_DATA_RESERVATIONS SLURM_BIT(6) /* reservation data is private */ 1069 /* SLURM_BIT(7) Available 2 versions after 23.02 */ 1070 #define PRIVATE_DATA_EVENTS SLURM_BIT(8) /* events are private */ 1071 1072 #define PRIORITY_RESET_NONE 0x0000 /* never clear */ 1073 #define PRIORITY_RESET_NOW 0x0001 /* clear now (when slurmctld restarts) */ 1074 #define PRIORITY_RESET_DAILY 0x0002 /* clear daily at midnight */ 1075 #define PRIORITY_RESET_WEEKLY 0x0003 /* clear weekly at Sunday 00:00 */ 1076 #define PRIORITY_RESET_MONTHLY 0x0004 /* clear monthly on first at 00:00 */ 1077 #define PRIORITY_RESET_QUARTERLY 0x0005 /* clear quarterly on first at 00:00 */ 1078 #define PRIORITY_RESET_YEARLY 0x0006 /* clear yearly on first at 00:00 */ 1079 1080 #define PROP_PRIO_OFF 0x0000 /* Do not propagage user nice value */ 1081 #define PROP_PRIO_ON 0x0001 /* Propagate user nice value */ 1082 #define PROP_PRIO_NICER 0x0002 /* Ensure that user tasks have a nice 1083 * value that is higher than slurmd */ 1084 1085 #define PRIORITY_FLAGS_ACCRUE_ALWAYS SLURM_BIT(0) /* Flag to always accrue 1086 * age priority to pending 1087 * jobs ignoring 1088 * dependencies or holds 1089 */ 1090 #define PRIORITY_FLAGS_MAX_TRES SLURM_BIT(1) /* Calculate billed_tres 1091 * as the MAX of TRES on a 1092 * node rather than the 1093 * sum or TRES. */ 1094 #define PRIORITY_FLAGS_SIZE_RELATIVE SLURM_BIT(2) /* Enable job size 1095 * measurement relative to 1096 * its time limit */ 1097 #define PRIORITY_FLAGS_DEPTH_OBLIVIOUS SLURM_BIT(3) /* Flag to use depth 1098 * oblivious formula for 1099 * computing hierarchical 1100 * fairshare */ 1101 #define PRIORITY_FLAGS_CALCULATE_RUNNING SLURM_BIT(4) /* Calculate priorities 1102 * for running jobs, not 1103 * only the pending jobs. 1104 */ 1105 #define PRIORITY_FLAGS_FAIR_TREE SLURM_BIT(5) /* Prioritize by level in 1106 * account hierarchy. */ 1107 #define PRIORITY_FLAGS_INCR_ONLY SLURM_BIT(6) /* Priority can only 1108 * increase, never 1109 * decrease in value */ 1110 1111 #define PRIORITY_FLAGS_NO_NORMAL_ASSOC SLURM_BIT(7) 1112 #define PRIORITY_FLAGS_NO_NORMAL_PART SLURM_BIT(8) 1113 #define PRIORITY_FLAGS_NO_NORMAL_QOS SLURM_BIT(9) 1114 #define PRIORITY_FLAGS_NO_NORMAL_TRES SLURM_BIT(10) 1115 1116 /* These bits are set in the bitflags field of job_desc_msg_t */ 1117 #define KILL_INV_DEP SLURM_BIT(0) /* Kill job on invalid dependency */ 1118 #define NO_KILL_INV_DEP SLURM_BIT(1) /* Don't kill job on invalid 1119 * dependency */ 1120 #define HAS_STATE_DIR SLURM_BIT(2) /* Used by slurmctld to track 1121 * state dir */ 1122 #define BACKFILL_TEST SLURM_BIT(3) /* Backfill test in progress */ 1123 #define GRES_ENFORCE_BIND SLURM_BIT(4) /* Enforce CPU/GRES binding */ 1124 #define TEST_NOW_ONLY SLURM_BIT(5) /* Test for immediately start only */ 1125 #define JOB_SEND_ENV SLURM_BIT(6) /* Send env to the dbd */ 1126 /* SLURM_BIT(7) Free to reuse */ 1127 #define SPREAD_JOB SLURM_BIT(8) /* Spread job across max node count */ 1128 #define USE_MIN_NODES SLURM_BIT(9) /* Prefer minimum node count */ 1129 #define JOB_KILL_HURRY SLURM_BIT(10) /* Avoid burst buffer stage out */ 1130 #define TRES_STR_CALC SLURM_BIT(11) /* Avoid calculating TRES strings at 1131 * the end of a job. */ 1132 #define SIB_JOB_FLUSH SLURM_BIT(12) /* Don't send complete to origin */ 1133 #define HET_JOB_FLAG SLURM_BIT(13) /* Heterogeneous job management flag */ 1134 #define JOB_NTASKS_SET SLURM_BIT(14) /* --ntasks explicitly set */ 1135 #define JOB_CPUS_SET SLURM_BIT(15) /* --cpus-per-tasks explicitly set */ 1136 #define BF_WHOLE_NODE_TEST SLURM_BIT(16) /* Backfill test in progress */ 1137 #define TOP_PRIO_TMP SLURM_BIT(17) /* Temporary flag for top priority job 1138 * operation */ 1139 #define JOB_ACCRUE_OVER SLURM_BIT(18) /* We have cleared the accrual count of 1140 * a job. */ 1141 #define GRES_DISABLE_BIND SLURM_BIT(19) /* Disable CPU/GRES binding */ 1142 #define JOB_WAS_RUNNING SLURM_BIT(20) /* Job was running */ 1143 #define RESET_ACCRUE_TIME SLURM_BIT(21) /* Reset the job's accrue time */ 1144 #define CRON_JOB SLURM_BIT(22) /* Job submitted through scrontab */ 1145 #define JOB_MEM_SET SLURM_BIT(23) /* Memory limit explicitly set by job */ 1146 /* SLURM_BIT(24) Removed in 23.11, free in 25.05 */ 1147 1148 #define USE_DEFAULT_ACCT SLURM_BIT(25) /* Job submitted to default account */ 1149 #define USE_DEFAULT_PART SLURM_BIT(26) /* Job submitted to default 1150 * partition */ 1151 #define USE_DEFAULT_QOS SLURM_BIT(27) /* Job submitted with default QOS */ 1152 #define USE_DEFAULT_WCKEY SLURM_BIT(28) /* Job submitted with default WCKEY */ 1153 #define JOB_DEPENDENT SLURM_BIT(29) /* Job dependent or invalid depend */ 1154 #define JOB_MAGNETIC SLURM_BIT(30) /* Job attempting to run in a 1155 * magnetic reservation */ 1156 #define JOB_PART_ASSIGNED SLURM_BIT(31) /* Job didn't request a partition */ 1157 #define BACKFILL_SCHED SLURM_BIT(32) /* Job was considered in last 1158 * backfill attempt if not set the 1159 * normal scheduler set 1160 * last_eval_time */ 1161 #define BACKFILL_LAST SLURM_BIT(33) /* Job was considered in last 1162 * schedule attempt */ 1163 #define TASKS_CHANGED SLURM_BIT(34) /* Reset licenses per job */ 1164 #define JOB_SEND_SCRIPT SLURM_BIT(35) /* Send script to the dbd */ 1165 #define RESET_LIC_TASK SLURM_BIT(36) /* Reset licenses per task */ 1166 #define RESET_LIC_JOB SLURM_BIT(37) /* Reset licenses per job */ 1167 1168 /* These bits are set in the x11 field of job_desc_msg_t */ 1169 #define X11_FORWARD_ALL 0x0001 /* all nodes should setup forward */ 1170 #define X11_FORWARD_BATCH 0x0002 /* only the batch node */ 1171 #define X11_FORWARD_FIRST 0x0004 /* only the first node */ 1172 #define X11_FORWARD_LAST 0x0008 /* only the last node */ 1173 1174 typedef enum { 1175 SSF_NONE = 0, /* No flags set */ 1176 SSF_EXCLUSIVE = 1 << 0, /* CPUs not shared with other steps */ 1177 SSF_NO_KILL = 1 << 1, /* Don't kill step on node failure */ 1178 SSF_OVERCOMMIT = 1 << 2, /* Allow the step allocation of more tasks 1179 * to a node than available processors. */ 1180 SSF_WHOLE = 1 << 3, /* Use whole nodes in step allocation */ 1181 SSF_INTERACTIVE = 1 << 4, /* Request interactive step allocation */ 1182 SSF_MEM_ZERO = 1 << 5, /* Requested --mem=0; use all memory but do not 1183 * count against the job's memory allocation */ 1184 SSF_OVERLAP_FORCE = 1 << 6, /* Force this to overlap with all other 1185 * steps; resources allocated to this step 1186 * are not decremented from the job's 1187 * allocation */ 1188 SSF_NO_SIG_FAIL = 1 << 7, /* Don't fail step due to signal */ 1189 SSF_EXT_LAUNCHER = 1 << 8, /* Request is for an external launcher */ 1190 } step_spec_flags_t; 1191 1192 enum topology_plugin_type { 1193 TOPOLOGY_PLUGIN_DEFAULT = 100, 1194 TOPOLOGY_PLUGIN_3DTORUS = 101, 1195 TOPOLOGY_PLUGIN_TREE = 102, 1196 TOPOLOGY_PLUGIN_BLOCK = 103, 1197 }; 1198 1199 /*****************************************************************************\ 1200 * SLURM LIBRARY INITIALIZATION FUNCTIONS 1201 \*****************************************************************************/ 1202 1203 /* 1204 * MUST be called before any other Slurm library API calls. 1205 * 1206 * conf should be a fully qualified path to a slurm.conf configuration file, 1207 * or more commonly NULL to allow libslurm to automatically locate its own 1208 * configuration. 1209 */ 1210 extern void slurm_init(const char *conf); 1211 1212 /* 1213 * Call at process termination to cleanup internal configuration structures. 1214 * 1215 * Strongly recommended if valgrind or similar tools will be used to check 1216 * your application for memory leaks. 1217 */ 1218 extern void slurm_fini(void); 1219 1220 /*****************************************************************************\ 1221 * SLURM HOSTLIST FUNCTIONS 1222 \*****************************************************************************/ 1223 1224 /* The hostlist opaque data type 1225 * 1226 * A hostlist is a list of hostnames optimized for a prefixXXXX style 1227 * naming convention, where XXXX is a decimal, numeric suffix. 1228 */ 1229 #ifndef __hostlist_t_defined 1230 # define __hostlist_t_defined 1231 typedef struct hostlist hostlist_t; 1232 #endif 1233 1234 /* 1235 * slurm_hostlist_create(): 1236 * 1237 * Create a new hostlist from a string representation. 1238 * 1239 * The string representation (str) may contain one or more hostnames or 1240 * bracketed hostlists separated by either `,' or whitespace. A bracketed 1241 * hostlist is denoted by a common prefix followed by a list of numeric 1242 * ranges contained within brackets: e.g. "tux[0-5,12,20-25]" 1243 * 1244 * To support systems with 3-D topography, a rectangular prism may 1245 * be described using two three digit numbers separated by "x": e.g. 1246 * "bgl[123x456]". This selects all nodes between 1 and 4 inclusive 1247 * in the first dimension, between 2 and 5 in the second, and between 1248 * 3 and 6 in the third dimension for a total of 4*4*4=64 nodes 1249 * 1250 * Note: if this module is compiled with WANT_RECKLESS_HOSTRANGE_EXPANSION 1251 * defined, a much more loose interpretation of host ranges is used. 1252 * Reckless hostrange expansion allows all of the following (in addition to 1253 * bracketed hostlists): 1254 * 1255 * o tux0-5,tux12,tux20-25 1256 * o tux0-tux5,tux12,tux20-tux25 1257 * o tux0-5,12,20-25 1258 * 1259 * If str is NULL, and empty hostlist is created and returned. 1260 * 1261 * If the create fails, hostlist_create() returns NULL. 1262 * 1263 * The returned hostlist must be freed with hostlist_destroy() 1264 * 1265 */ 1266 extern hostlist_t *slurm_hostlist_create(const char *hostlist); 1267 1268 /* slurm_hostlist_count(): 1269 * 1270 * Return the number of hosts in hostlist hl. 1271 */ 1272 extern int slurm_hostlist_count(hostlist_t *hl); 1273 1274 /* 1275 * slurm_hostlist_destroy(): 1276 * 1277 * Destroy a hostlist object. Frees all memory allocated to the hostlist. 1278 */ 1279 extern void slurm_hostlist_destroy(hostlist_t *hl); 1280 1281 /* slurm_hostlist_find(): 1282 * 1283 * Searches hostlist hl for the first host matching hostname 1284 * and returns position in list if found. 1285 * 1286 * Returns -1 if host is not found. 1287 */ 1288 extern int slurm_hostlist_find(hostlist_t *hl, const char *hostname); 1289 1290 /* slurm_hostlist_push(): 1291 * 1292 * push a string representation of hostnames onto a hostlist. 1293 * 1294 * The hosts argument may take the same form as in slurm_hostlist_create() 1295 * 1296 * Returns the number of hostnames inserted into the list, 1297 * or 0 on failure. 1298 */ 1299 extern int slurm_hostlist_push(hostlist_t *hl, const char *hosts); 1300 1301 /* slurm_hostlist_push_host(): 1302 * 1303 * Push a single host onto the hostlist hl. 1304 * This function is more efficient than slurm_hostlist_push() for a single 1305 * hostname, since the argument does not need to be checked for ranges. 1306 * 1307 * return value is 1 for success, 0 for failure. 1308 */ 1309 extern int slurm_hostlist_push_host(hostlist_t *hl, const char *host); 1310 1311 /* slurm_hostlist_ranged_string(): 1312 * 1313 * Write the string representation of the hostlist hl into buf, 1314 * writing at most n chars. Returns the number of bytes written, 1315 * or -1 if truncation occurred. 1316 * 1317 * The result will be NULL terminated. 1318 * 1319 * slurm_hostlist_ranged_string() will write a bracketed hostlist representation 1320 * where possible. 1321 */ 1322 extern ssize_t slurm_hostlist_ranged_string(hostlist_t *hl, size_t n, char *buf); 1323 1324 /* hostlist_ranged_string_xmalloc(): 1325 * 1326 * Wrapper of hostlist_ranged_string(), with result buffer dynamically 1327 * allocated using xmalloc(). 1328 * The result will be NULL on failure (out of memory). 1329 * 1330 * Caller should free the result string using xfree(). 1331 */ 1332 extern char *slurm_hostlist_ranged_string_xmalloc(hostlist_t *hl); 1333 1334 /* 1335 * slurm_hostlist_shift(): 1336 * 1337 * Returns the string representation of the first host in the hostlist 1338 * or NULL if the hostlist is empty or there was an error allocating memory. 1339 * The host is removed from the hostlist. 1340 * 1341 * Note: Caller is responsible for freeing the returned memory. 1342 */ 1343 extern char *slurm_hostlist_shift(hostlist_t *hl); 1344 1345 /* slurm_hostlist_uniq(): 1346 * 1347 * Sort the hostlist hl and remove duplicate entries. 1348 * 1349 */ 1350 extern void slurm_hostlist_uniq(hostlist_t *hl); 1351 1352 /*****************************************************************************\ 1353 * SLURM LIST FUNCTIONS 1354 \*****************************************************************************/ 1355 1356 #ifndef __list_datatypes_defined 1357 # define __list_datatypes_defined 1358 typedef struct xlist * List; 1359 typedef struct xlist list_t; 1360 /* 1361 * List opaque data type. 1362 */ 1363 1364 typedef struct listIterator * ListIterator; 1365 typedef struct listIterator list_itr_t; 1366 /* 1367 * List Iterator opaque data type. 1368 */ 1369 1370 typedef void (*ListDelF) (void *x); 1371 /* 1372 * Function prototype to deallocate data stored in a list. 1373 * This function is responsible for freeing all memory associated 1374 * with an item, including all subordinate items (if applicable). 1375 */ 1376 1377 typedef int (*ListCmpF) (void *x, void *y); 1378 /* 1379 * Function prototype for comparing two items in a list. 1380 * Returns less-than-zero if (x<y), zero if (x==y), and 1381 * greather-than-zero if (x>y). 1382 */ 1383 1384 typedef int (*ListFindF) (void *x, void *key); 1385 /* 1386 * Function prototype for matching items in a list. 1387 * Returns non-zero if (x==key); o/w returns zero. 1388 */ 1389 1390 typedef int (*ListForF) (void *x, void *arg); 1391 /* 1392 * Function prototype for operating on each item in a list. 1393 * Returns less-than-zero on error. 1394 */ 1395 #endif 1396 1397 /* slurm_list_append(): 1398 * 1399 * Inserts data [x] at the end of list [l]. 1400 */ 1401 extern void slurm_list_append(list_t *l, void *x); 1402 1403 /* slurm_list_count(): 1404 * 1405 * Returns the number of items in list [l]. 1406 */ 1407 extern int slurm_list_count(list_t *l); 1408 1409 /* slurm_list_create(): 1410 * 1411 * Creates and returns a new empty list. 1412 * The deletion function [f] is used to deallocate memory used by items 1413 * in the list; if this is NULL, memory associated with these items 1414 * will not be freed when the list is destroyed. 1415 * Note: Abandoning a list without calling slurm_list_destroy() will result 1416 * in a memory leak. 1417 */ 1418 extern list_t *slurm_list_create(ListDelF f); 1419 1420 /* slurm_list_destroy(): 1421 * 1422 * Destroys list [l], freeing memory used for list iterators and the 1423 * list itself; if a deletion function was specified when the list 1424 * was created, it will be called for each item in the list. 1425 */ 1426 extern void slurm_list_destroy(list_t *l); 1427 1428 /* slurm_list_find(): 1429 * 1430 * Traverses the list from the point of the list iterator [i] 1431 * using [f] to match each item with [key]. 1432 * Returns a ptr to the next item for which the function [f] 1433 * returns non-zero, or NULL once the end of the list is reached. 1434 * Example: i=slurm_list_iterator_reset(i); 1435 * while ((x=slurm_list_find(i,f,k))) {...} 1436 */ 1437 extern void *slurm_list_find(list_itr_t *i, ListFindF f, void *key); 1438 1439 /* slurm_list_is_empty(): 1440 * 1441 * Returns non-zero if list [l] is empty; o/w returns zero. 1442 */ 1443 extern int slurm_list_is_empty(list_t *l); 1444 1445 /* 1446 * Creates and returns a list iterator for non-destructively traversing 1447 * list [l]. 1448 */ 1449 extern list_itr_t *slurm_list_iterator_create(list_t *l); 1450 1451 /* slurm_list_iterator_reset(): 1452 * 1453 * Resets the list iterator [i] to start traversal at the beginning 1454 * of the list. 1455 */ 1456 extern void slurm_list_iterator_reset(list_itr_t *i); 1457 1458 /* 1459 * Destroys the list iterator [i]; list iterators not explicitly destroyed 1460 * in this manner will be destroyed when the list is deallocated via 1461 * list_destroy(). 1462 */ 1463 extern void slurm_list_iterator_destroy(list_itr_t *i); 1464 1465 /* slurm_list_next(): 1466 * 1467 * Returns a ptr to the next item's data, 1468 * or NULL once the end of the list is reached. 1469 * Example: i=slurm_list_iterator_create(i); 1470 * while ((x=slurm_list_next(i))) {...} 1471 */ 1472 extern void *slurm_list_next(list_itr_t *i); 1473 1474 /* slurm_list_sort(): 1475 * 1476 * Sorts list [l] into ascending order according to the function [f]. 1477 * Note: Sorting a list resets all iterators associated with the list. 1478 * Note: The sort algorithm is stable. 1479 */ 1480 extern void slurm_list_sort(list_t *l, ListCmpF f); 1481 1482 /* slurm_list_pop(): 1483 * 1484 * Pops the data item at the top of the stack [l]. 1485 * Returns the data's ptr, or NULL if the stack is empty. 1486 */ 1487 extern void *slurm_list_pop(list_t *l); 1488 1489 /*****************************************************************************\ 1490 * SLURM BITSTR FUNCTIONS 1491 \*****************************************************************************/ 1492 1493 #ifndef __bitstr_datatypes_defined 1494 # define __bitstr_datatypes_defined 1495 1496 typedef int64_t bitstr_t; 1497 #define BITSTR_SHIFT BITSTR_SHIFT_WORD64 1498 1499 typedef bitstr_t bitoff_t; 1500 1501 #endif 1502 1503 #define ALLOC_SID_ADMIN_HOLD 0x00000001 /* admin job hold */ 1504 #define ALLOC_SID_USER_HOLD 0x00000002 /* user job hold */ 1505 1506 #define JOB_SHARED_NONE 0x0000 1507 #define JOB_SHARED_OK 0x0001 1508 #define JOB_SHARED_USER 0x0002 1509 #define JOB_SHARED_MCS 0x0003 1510 1511 #define SLURM_POWER_FLAGS_LEVEL 0x0001 /* Equal power cap on all nodes */ 1512 1513 /*****************************************************************************\ 1514 * PROTOCOL DATA STRUCTURE DEFINITIONS 1515 \*****************************************************************************/ 1516 typedef struct dynamic_plugin_data { 1517 void *data; 1518 uint32_t plugin_id; 1519 } dynamic_plugin_data_t; 1520 1521 typedef struct acct_gather_energy { 1522 uint32_t ave_watts; /* average power consump of node, in watts */ 1523 uint64_t base_consumed_energy; 1524 uint64_t consumed_energy; /* total energy consumed by node, in joules */ 1525 uint32_t current_watts; /* current power consump of node, in watts */ 1526 uint64_t previous_consumed_energy; 1527 time_t poll_time; /* When information was last retrieved */ 1528 } acct_gather_energy_t; 1529 1530 typedef struct ext_sensors_data { 1531 uint64_t consumed_energy; /* total energy consumed, in joules */ 1532 uint32_t temperature; /* temperature, in celsius */ 1533 time_t energy_update_time; /* last update time for consumed_energy */ 1534 uint32_t current_watts; /* current power consumption, in watts */ 1535 } ext_sensors_data_t; 1536 1537 typedef struct power_mgmt_data { 1538 uint32_t cap_watts; /* power consumption limit of node, in watts */ 1539 uint32_t current_watts; /* current power consumption, in watts */ 1540 uint64_t joule_counter; /* total energy consumption by node, in joules */ 1541 uint32_t new_cap_watts; /* new power consumption limit of node, in watts */ 1542 uint32_t max_watts; /* maximum power consumption by node, in watts */ 1543 uint32_t min_watts; /* minimum power consumption by node, in watts */ 1544 time_t new_job_time; /* set when a new job has been scheduled on the 1545 * node, used to trigger higher cap */ 1546 uint16_t state; /* Power state information */ 1547 uint64_t time_usec; /* Data timestamp in microseconds since start 1548 * of the day */ 1549 } power_mgmt_data_t; 1550 1551 typedef struct { 1552 time_t expiration; 1553 char *net_cred; 1554 slurm_addr_t *node_addrs; 1555 uint32_t node_cnt; 1556 char *node_list; 1557 } slurm_node_alias_addrs_t; 1558 1559 #define CORE_SPEC_THREAD 0x8000 /* If set, this is a thread count not core count */ 1560 1561 /* 1562 * Update: 1563 * _copy_job_desc_to_job_record() 1564 * slurm_free_job_desc_msg() 1565 */ 1566 typedef struct job_descriptor { /* For submit, allocate, and update requests */ 1567 char *account; /* charge to specified account */ 1568 char *acctg_freq; /* accounting polling intervals (seconds) */ 1569 char *admin_comment; /* administrator's arbitrary comment (update only) */ 1570 char *alloc_node; /* node making resource allocation request 1571 * NOTE: Normally set by slurm_submit* or 1572 * slurm_allocate* function */ 1573 uint16_t alloc_resp_port; /* port to send allocation confirmation to */ 1574 uint32_t alloc_sid; /* local sid making resource allocation request 1575 * NOTE: Normally set by slurm_submit* or 1576 * slurm_allocate* function 1577 * NOTE: Also used for update flags, see 1578 * ALLOC_SID_* flags */ 1579 uint32_t argc; /* number of arguments to the script */ 1580 char **argv; /* arguments to the script */ 1581 char *array_inx; /* job array index values */ 1582 bitstr_t *array_bitmap; /* NOTE: Set by slurmctld */ 1583 char *batch_features; /* features required for batch script's node */ 1584 time_t begin_time; /* delay initiation until this time */ 1585 uint64_t bitflags; /* bitflags */ 1586 char *burst_buffer; /* burst buffer specifications */ 1587 char *clusters; /* cluster names used for multi-cluster jobs */ 1588 char *cluster_features; /* required cluster feature specification, 1589 * default NONE */ 1590 char *comment; /* arbitrary comment */ 1591 uint16_t contiguous; /* 1 if job requires contiguous nodes, 1592 * 0 otherwise,default=0 */ 1593 char *container; /* OCI container bundle */ 1594 char *container_id; /* OCI container ID */ 1595 uint16_t core_spec; /* specialized core/thread count, 1596 * see CORE_SPEC_THREAD */ 1597 char *cpu_bind; /* binding map for map/mask_cpu - This 1598 * currently does not matter to the 1599 * job allocation, setting this does 1600 * not do anything for steps. */ 1601 uint16_t cpu_bind_type; /* see cpu_bind_type_t - This 1602 * currently does not matter to the 1603 * job allocation, setting this does 1604 * not do anything for steps. */ 1605 uint32_t cpu_freq_min; /* Minimum cpu frequency */ 1606 uint32_t cpu_freq_max; /* Maximum cpu frequency */ 1607 uint32_t cpu_freq_gov; /* cpu frequency governor */ 1608 char *cpus_per_tres; /* semicolon delimited list of TRES=# values */ 1609 void *crontab_entry; /* really cron_entry_t */ 1610 time_t deadline; /* deadline */ 1611 uint32_t delay_boot; /* delay boot for desired node state */ 1612 char *dependency; /* synchronize job execution with other jobs */ 1613 time_t end_time; /* time by which job must complete, used for 1614 * job update only now, possible deadline 1615 * scheduling in the future */ 1616 char **environment; /* environment variables to set for job, 1617 * name=value pairs, one per line */ 1618 slurm_hash_t env_hash; /* hash value of environment NO NOT PACK */ 1619 uint32_t env_size; /* element count in environment */ 1620 char *exc_nodes; /* comma separated list of nodes excluded 1621 * from job's allocation, default NONE */ 1622 char *extra; /* Arbitrary string */ 1623 char *features; /* required feature specification, 1624 * default NONE */ 1625 uint64_t fed_siblings_active; /* Bitmap of active fed sibling ids */ 1626 uint64_t fed_siblings_viable; /* Bitmap of viable fed sibling ids */ 1627 uint32_t group_id; /* group to assume, if run as root. */ 1628 uint32_t het_job_offset; /* HetJob component offset */ 1629 void *id; /* actually identity_t. DO NOT PACK */ 1630 uint16_t immediate; /* 1 if allocate to run or fail immediately, 1631 * 0 if to be queued awaiting resources */ 1632 uint32_t job_id; /* job ID, default set by Slurm */ 1633 char * job_id_str; /* string representation of the jobid */ 1634 char *job_size_str; 1635 uint16_t kill_on_node_fail; /* 1 if node failure to kill job, 1636 * 0 otherwise,default=1 */ 1637 char *licenses; /* licenses required by the job */ 1638 char *licenses_tot; /* total licenses required by the job included 1639 * from tres requests as well, NOT PACKED */ 1640 uint16_t mail_type; /* see MAIL_JOB_ definitions above */ 1641 char *mail_user; /* user to receive notification */ 1642 char *mcs_label; /* mcs_label if mcs plugin in use */ 1643 char *mem_bind; /* binding map for map/mask_cpu */ 1644 uint16_t mem_bind_type; /* see mem_bind_type_t */ 1645 char *mem_per_tres; /* semicolon delimited list of TRES=# values */ 1646 char *name; /* name of the job, default "" */ 1647 char *network; /* network use spec */ 1648 uint32_t nice; /* requested priority change, 1649 * NICE_OFFSET == no change */ 1650 uint32_t num_tasks; /* number of tasks to be started, 1651 * for batch only */ 1652 uint8_t open_mode; /* out/err open mode truncate or append, 1653 * see OPEN_MODE_* */ 1654 char *origin_cluster; /* cluster name that initiated the job. */ 1655 uint16_t other_port; /* port to send various notification msg to */ 1656 uint8_t overcommit; /* over subscribe resources, for batch only */ 1657 char *partition; /* name of requested partition, 1658 * default in Slurm config */ 1659 uint16_t plane_size; /* plane size when task_dist = 1660 SLURM_DIST_PLANE */ 1661 uint8_t power_flags; /* power management flags, 1662 * see SLURM_POWER_FLAGS_ */ 1663 char *prefer; /* soft feature specification, 1664 * default NONE */ 1665 uint32_t priority; /* relative priority of the job, 1666 * explicitly set only for user root, 1667 * 0 == held (don't initiate) */ 1668 uint32_t profile; /* Level of acct_gather_profile {all | none} */ 1669 char *qos; /* Quality of Service */ 1670 uint16_t reboot; /* force node reboot before startup */ 1671 char *resp_host; /* NOTE: Set by slurmctld */ 1672 uint16_t restart_cnt; /* count of job restarts */ 1673 char *req_nodes; /* comma separated list of required nodes 1674 * default NONE */ 1675 uint16_t requeue; /* enable or disable job requeue option */ 1676 char *reservation; /* name of reservation to use */ 1677 char *script; /* the actual job script, default NONE */ 1678 void *script_buf; /* job script as mmap buf */ 1679 slurm_hash_t script_hash; /* hash value of script NO NOT PACK */ 1680 uint16_t shared; /* 2 if the job can only share nodes with other 1681 * jobs owned by that user, 1682 * 1 if job can share nodes with other jobs, 1683 * 0 if job needs exclusive access to the node, 1684 * or NO_VAL to accept the system default. 1685 * SHARED_FORCE to eliminate user control. */ 1686 uint32_t site_factor; /* factor to consider in priority */ 1687 char **spank_job_env; /* environment variables for job prolog/epilog 1688 * scripts as set by SPANK plugins */ 1689 uint32_t spank_job_env_size; /* element count in spank_env */ 1690 char *submit_line; /* The command issued with all it's options in a 1691 * string */ 1692 uint32_t task_dist; /* see enum task_dist_state */ 1693 uint32_t time_limit; /* maximum run time in minutes, default is 1694 * partition limit */ 1695 uint32_t time_min; /* minimum run time in minutes, default is 1696 * time_limit */ 1697 char *tres_bind; /* Task to TRES binding directives */ 1698 char *tres_freq; /* TRES frequency directives */ 1699 char *tres_per_job; /* semicolon delimited list of TRES=# values */ 1700 char *tres_per_node; /* semicolon delimited list of TRES=# values */ 1701 char *tres_per_socket; /* semicolon delimited list of TRES=# values */ 1702 char *tres_per_task; /* semicolon delimited list of TRES=# values */ 1703 uint32_t user_id; /* set only if different from current UID, 1704 * can only be explicitly set by user root */ 1705 uint16_t wait_all_nodes;/* 0 to start job immediately after allocation 1706 * 1 to start job after all nodes booted 1707 * or NO_VAL to use system default */ 1708 uint16_t warn_flags; /* flags related to job signals 1709 * (eg. KILL_JOB_BATCH) */ 1710 uint16_t warn_signal; /* signal to send when approaching end time */ 1711 uint16_t warn_time; /* time before end to send signal (seconds) */ 1712 char *work_dir; /* pathname of working directory */ 1713 1714 /* job constraints: */ 1715 uint16_t cpus_per_task; /* number of processors required for 1716 * each task */ 1717 uint32_t min_cpus; /* minimum number of processors required, 1718 * default=0 */ 1719 uint32_t max_cpus; /* maximum number of processors required, 1720 * default=0 */ 1721 uint32_t min_nodes; /* minimum number of nodes required by job, 1722 * default=0 */ 1723 uint32_t max_nodes; /* maximum number of nodes usable by job, 1724 * default=0 */ 1725 uint16_t boards_per_node; /* boards per node required by job */ 1726 uint16_t sockets_per_board;/* sockets per board required by job */ 1727 uint16_t sockets_per_node;/* sockets per node required by job */ 1728 uint16_t cores_per_socket;/* cores per socket required by job */ 1729 uint16_t threads_per_core;/* threads per core required by job */ 1730 uint16_t ntasks_per_node;/* number of tasks to invoke on each node */ 1731 uint16_t ntasks_per_socket;/* number of tasks to invoke on 1732 * each socket */ 1733 uint16_t ntasks_per_core;/* number of tasks to invoke on each core */ 1734 uint16_t ntasks_per_board;/* number of tasks to invoke on each board */ 1735 uint16_t ntasks_per_tres;/* number of tasks that can access each gpu */ 1736 uint16_t pn_min_cpus; /* minimum # CPUs per node, default=0 */ 1737 uint64_t pn_min_memory; /* minimum real memory per node OR 1738 * real memory per CPU | MEM_PER_CPU, 1739 * default=0 (no limit) */ 1740 uint32_t pn_min_tmp_disk;/* minimum tmp disk per node, 1741 * default=0 */ 1742 char *req_context; /* requested selinux context */ 1743 uint32_t req_switch; /* Minimum number of switches */ 1744 char *selinux_context; /* used internally in the slurmctld, 1745 DON'T PACK */ 1746 char *std_err; /* pathname of stderr */ 1747 char *std_in; /* pathname of stdin */ 1748 char *std_out; /* pathname of stdout */ 1749 uint64_t *tres_req_cnt; /* used internally in the slurmctld, 1750 DON'T PACK */ 1751 uint32_t wait4switch; /* Maximum time to wait for minimum switches */ 1752 char *wckey; /* wckey for job */ 1753 uint16_t x11; /* --x11 flags */ 1754 char *x11_magic_cookie; /* automatically stolen from submit node */ 1755 char *x11_target; /* target hostname, or unix socket if port == 0 */ 1756 uint16_t x11_target_port; /* target tcp port, 6000 + the display number */ 1757 } job_desc_msg_t; 1758 1759 typedef struct job_info { 1760 char *account; /* charge to specified account */ 1761 time_t accrue_time; /* time job is eligible for running */ 1762 char *admin_comment; /* administrator's arbitrary comment */ 1763 char *alloc_node; /* local node making resource alloc */ 1764 uint32_t alloc_sid; /* local sid making resource alloc */ 1765 bitstr_t *array_bitmap; /* NOTE: set on unpack */ 1766 uint32_t array_job_id; /* job_id of a job array or 0 if N/A */ 1767 uint32_t array_task_id; /* task_id of a job array */ 1768 uint32_t array_max_tasks; /* Maximum number of running tasks */ 1769 char *array_task_str; /* string expression of task IDs in this record */ 1770 uint32_t assoc_id; /* association id for job */ 1771 char *batch_features; /* features required for batch script's node */ 1772 uint16_t batch_flag; /* 1 if batch: queued job with script */ 1773 char *batch_host; /* name of host running batch script */ 1774 uint64_t bitflags; /* Various job flags */ 1775 uint16_t boards_per_node; /* boards per node required by job */ 1776 char *burst_buffer; /* burst buffer specifications */ 1777 char *burst_buffer_state; /* burst buffer state info */ 1778 char *cluster; /* name of cluster that the job is on */ 1779 char *cluster_features; /* comma separated list of required cluster 1780 * features */ 1781 char *command; /* command to be executed, built from submitted 1782 * job's argv */ 1783 char *comment; /* arbitrary comment */ 1784 char *container; /* OCI Container bundle path */ 1785 char *container_id; /* OCI Container ID */ 1786 uint16_t contiguous; /* 1 if job requires contiguous nodes */ 1787 uint16_t core_spec; /* specialized core count */ 1788 uint16_t cores_per_socket; /* cores per socket required by job */ 1789 double billable_tres; /* billable TRES cache. updated upon resize */ 1790 uint16_t cpus_per_task; /* number of processors required for 1791 * each task */ 1792 uint32_t cpu_freq_min; /* Minimum cpu frequency */ 1793 uint32_t cpu_freq_max; /* Maximum cpu frequency */ 1794 uint32_t cpu_freq_gov; /* cpu frequency governor */ 1795 char *cpus_per_tres; /* semicolon delimited list of TRES=# values */ 1796 char *cronspec; /* cron time specification (scrontab jobs) */ 1797 time_t deadline; /* deadline */ 1798 uint32_t delay_boot; /* delay boot for desired node state */ 1799 char *dependency; /* synchronize job execution with other jobs */ 1800 uint32_t derived_ec; /* highest exit code of all job steps */ 1801 time_t eligible_time; /* time job is eligible for running */ 1802 time_t end_time; /* time of termination, actual or expected */ 1803 char *exc_nodes; /* comma separated list of excluded nodes */ 1804 int32_t *exc_node_inx; /* excluded list index pairs into node_table: 1805 * start_range_1, end_range_1, 1806 * start_range_2, .., -1 */ 1807 uint32_t exit_code; /* exit code for job (status from wait call) */ 1808 char *extra; /* Arbitrary string */ 1809 char *failed_node; /* if set, node that caused job to fail */ 1810 char *features; /* comma separated list of required features */ 1811 char *fed_origin_str; /* Origin cluster's name */ 1812 uint64_t fed_siblings_active; /* bitmap of active fed sibling ids */ 1813 char *fed_siblings_active_str; /* string of active sibling names */ 1814 uint64_t fed_siblings_viable; /* bitmap of viable fed sibling ids */ 1815 char *fed_siblings_viable_str; /* string of viable sibling names */ 1816 uint32_t gres_detail_cnt; /* Count of gres_detail_str records, 1817 * one per allocated node */ 1818 char **gres_detail_str; /* Details of GRES count/index alloc per node */ 1819 char *gres_total; /* Total count of gres used with names */ 1820 uint32_t group_id; /* group job submitted as */ 1821 uint32_t het_job_id; /* job ID of hetjob leader */ 1822 char *het_job_id_set; /* job IDs for all components */ 1823 uint32_t het_job_offset; /* HetJob component offset from leader */ 1824 uint32_t job_id; /* job ID */ 1825 job_resources_t *job_resrcs; /* opaque data type, job resources */ 1826 char *job_size_str; 1827 uint32_t job_state; /* state of the job, see enum job_states */ 1828 time_t last_sched_eval; /* last time job was evaluated for scheduling */ 1829 char *licenses; /* licenses required by the job */ 1830 uint16_t mail_type; /* see MAIL_JOB_ definitions above */ 1831 char *mail_user; /* user to receive notification */ 1832 uint32_t max_cpus; /* maximum number of cpus usable by job */ 1833 uint32_t max_nodes; /* maximum number of nodes usable by job */ 1834 char *mcs_label; /* mcs_label if mcs plugin in use */ 1835 char *mem_per_tres; /* semicolon delimited list of TRES=# values */ 1836 char *name; /* name of the job */ 1837 char *network; /* network specification */ 1838 char *nodes; /* list of nodes allocated to job */ 1839 uint32_t nice; /* requested priority change */ 1840 int32_t *node_inx; /* list index pairs into node_table for *nodes: 1841 * start_range_1, end_range_1, 1842 * start_range_2, .., -1 */ 1843 uint16_t ntasks_per_core;/* number of tasks to invoke on each core */ 1844 uint16_t ntasks_per_tres;/* number of tasks that can access each gpu */ 1845 uint16_t ntasks_per_node;/* number of tasks to invoke on each node */ 1846 uint16_t ntasks_per_socket;/* number of tasks to invoke on each socket*/ 1847 uint16_t ntasks_per_board; /* number of tasks to invoke on each board */ 1848 uint32_t num_cpus; /* minimum number of cpus required by job */ 1849 uint32_t num_nodes; /* minimum number of nodes required by job */ 1850 uint32_t num_tasks; /* requested task count */ 1851 char *partition; /* name of assigned partition */ 1852 char *prefer; /* comma separated list of soft features */ 1853 uint64_t pn_min_memory; /* minimum real memory per node, default=0 */ 1854 uint16_t pn_min_cpus; /* minimum # CPUs per node, default=0 */ 1855 uint32_t pn_min_tmp_disk; /* minimum tmp disk per node, default=0 */ 1856 uint8_t power_flags; /* power management flags, 1857 * see SLURM_POWER_FLAGS_ */ 1858 time_t preempt_time; /* preemption signal time */ 1859 time_t preemptable_time;/* job becomes preemptable from 1860 * PreemptExemptTime */ 1861 time_t pre_sus_time; /* time job ran prior to last suspend */ 1862 uint32_t priority; /* relative priority of the job, 1863 * 0=held, 1=required nodes DOWN/DRAINED */ 1864 uint32_t profile; /* Level of acct_gather_profile {all | none} */ 1865 char *qos; /* Quality of Service */ 1866 uint8_t reboot; /* node reboot requested before start */ 1867 char *req_nodes; /* comma separated list of required nodes */ 1868 int32_t *req_node_inx; /* required list index pairs into node_table: 1869 * start_range_1, end_range_1, 1870 * start_range_2, .., -1 */ 1871 uint32_t req_switch; /* Minimum number of switches */ 1872 uint16_t requeue; /* enable or disable job requeue option */ 1873 time_t resize_time; /* time of latest size change */ 1874 uint16_t restart_cnt; /* count of job restarts */ 1875 char *resv_name; /* reservation name */ 1876 char *sched_nodes; /* list of nodes scheduled to be used for job */ 1877 char *selinux_context; 1878 uint16_t shared; /* 1 if job can share nodes with other jobs */ 1879 uint16_t show_flags; /* conveys level of details requested */ 1880 uint32_t site_factor; /* factor to consider in priority */ 1881 uint16_t sockets_per_board;/* sockets per board required by job */ 1882 uint16_t sockets_per_node; /* sockets per node required by job */ 1883 time_t start_time; /* time execution begins, actual or expected */ 1884 uint16_t start_protocol_ver; /* Slurm version step was started with 1885 * either srun or the lowest slurmd version 1886 * it is talking to */ 1887 char *state_desc; /* optional details for state_reason */ 1888 uint32_t state_reason; /* reason job still pending or failed, see 1889 * slurm.h:enum job_state_reason */ 1890 char *std_err; /* pathname of job's stderr file */ 1891 char *std_in; /* pathname of job's stdin file */ 1892 char *std_out; /* pathname of job's stdout file */ 1893 time_t submit_time; /* time of job submission */ 1894 time_t suspend_time; /* time job last suspended or resumed */ 1895 char *system_comment; /* slurmctld's arbitrary comment */ 1896 uint32_t time_limit; /* maximum run time in minutes or INFINITE */ 1897 uint32_t time_min; /* minimum run time in minutes or INFINITE */ 1898 uint16_t threads_per_core; /* threads per core required by job */ 1899 char *tres_bind; /* Task to TRES binding directives */ 1900 char *tres_freq; /* TRES frequency directives */ 1901 char *tres_per_job; /* semicolon delimited list of TRES=# values */ 1902 char *tres_per_node; /* semicolon delimited list of TRES=# values */ 1903 char *tres_per_socket; /* semicolon delimited list of TRES=# values */ 1904 char *tres_per_task; /* semicolon delimited list of TRES=# values */ 1905 char *tres_req_str; /* tres requested in the job */ 1906 char *tres_alloc_str; /* tres used in the job */ 1907 uint32_t user_id; /* user the job runs as */ 1908 char *user_name; /* user_name or null. not always set, but 1909 * accurate if set (and can avoid a local 1910 * lookup call) */ 1911 uint32_t wait4switch; /* Maximum time to wait for minimum switches */ 1912 char *wckey; /* wckey for job */ 1913 char *work_dir; /* pathname of working directory */ 1914 } slurm_job_info_t; 1915 1916 typedef slurm_job_info_t job_info_t; 1917 1918 typedef struct { 1919 uint32_t nice; 1920 double priority_age; 1921 double priority_assoc; 1922 double priority_fs; 1923 double priority_js; 1924 double priority_part; 1925 double priority_qos; 1926 uint32_t priority_site; 1927 1928 double *priority_tres;/* tres priorities with weights applied. */ 1929 uint32_t tres_cnt; /* number of configured tres' on system. */ 1930 char **tres_names; /* packed as assoc_mgr_tres_names[] */ 1931 double *tres_weights; /* PriorityWeightTRES weights as an array */ 1932 } priority_factors_t; 1933 1934 typedef struct priority_factors_object { 1935 char *account; 1936 char *cluster_name; /* Cluster name ONLY set in federation */ 1937 double direct_prio; /* Manually set priority. If it is set prio_factors 1938 * will be NULL */ 1939 uint32_t job_id; 1940 char *partition; 1941 priority_factors_t *prio_factors; 1942 char *qos; 1943 uint32_t user_id; 1944 } priority_factors_object_t; 1945 1946 typedef struct priority_factors_response_msg { 1947 list_t *priority_factors_list; /* priority_factors_object_t list */ 1948 } priority_factors_response_msg_t; 1949 1950 typedef struct job_info_msg { 1951 time_t last_backfill; /* time of late backfill run */ 1952 time_t last_update; /* time of latest info */ 1953 uint32_t record_count; /* number of records */ 1954 slurm_job_info_t *job_array; /* the job records */ 1955 } job_info_msg_t; 1956 1957 typedef struct step_update_request_msg { 1958 uint32_t job_id; 1959 uint32_t step_id; 1960 uint32_t time_limit; /* In minutes */ 1961 } step_update_request_msg_t; 1962 1963 typedef struct suspend_exc_update_msg { 1964 char *update_str; 1965 update_mode_t mode; 1966 } suspend_exc_update_msg_t; 1967 1968 typedef struct { 1969 char *node_list; /* nodelist corresponding to task layout */ 1970 uint16_t *cpus_per_node; /* cpus per node */ 1971 uint32_t *cpu_count_reps; /* how many nodes have same cpu count */ 1972 uint32_t num_hosts; /* number of hosts we have */ 1973 uint32_t num_tasks; /* number of tasks to distribute across these cpus*/ 1974 uint16_t *cpus_per_task; /* number of cpus per task */ 1975 uint32_t *cpus_task_reps; /* how many nodes have same per task count */ 1976 uint32_t task_dist; /* type of distribution we are using */ 1977 uint16_t plane_size; /* plane size (only needed for plane distribution*/ 1978 } slurm_step_layout_req_t; 1979 1980 typedef struct slurm_step_layout { 1981 uint16_t *cpt_compact_array; /* Compressed per-node cpus_per_task. 1982 * Index with slurm_get_rep_count_inx() */ 1983 uint32_t cpt_compact_cnt; /* number of elements in cpt_compact arrays */ 1984 uint32_t *cpt_compact_reps; /* number of consecutive nodes on which a 1985 * value in cpt_compact_array is 1986 * duplicated */ 1987 char *front_end; /* If a front-end architecture, the name of 1988 * of the node running all tasks, 1989 * NULL otherwise */ 1990 slurm_node_alias_addrs_t *alias_addrs; 1991 uint32_t node_cnt; /* node count */ 1992 char *node_list; /* list of nodes in step */ 1993 uint16_t plane_size; /* plane size when task_dist = 1994 * SLURM_DIST_PLANE */ 1995 uint16_t start_protocol_ver; /* Slurm version step was started with 1996 * either srun or the lowest slurmd version 1997 * it is talking to */ 1998 /* Array of length "node_cnt". Each element of the array 1999 * is the number of tasks assigned to the corresponding node */ 2000 uint16_t *tasks; 2001 uint32_t task_cnt; /* total number of tasks in the step */ 2002 uint32_t task_dist; /* see enum task_dist_state */ 2003 /* Array (of length "node_cnt") of task ID arrays. The length 2004 * of each subarray is designated by the corresponding value in 2005 * the tasks array. */ 2006 uint32_t **tids; /* host id => task id mapping */ 2007 } slurm_step_layout_t; 2008 2009 typedef struct slurm_step_id_msg { 2010 uint32_t job_id; 2011 uint32_t step_het_comp; 2012 uint32_t step_id; 2013 } slurm_step_id_t; 2014 2015 typedef struct slurm_step_io_fds { 2016 struct { 2017 int fd; 2018 uint32_t taskid; 2019 uint32_t nodeid; 2020 } input, out, err; 2021 } slurm_step_io_fds_t; 2022 2023 #define SLURM_STEP_IO_FDS_INITIALIZER {{0, (uint32_t)-1, (uint32_t)-1}, \ 2024 {1, (uint32_t)-1, (uint32_t)-1}, \ 2025 {2, (uint32_t)-1, (uint32_t)-1}} 2026 2027 typedef struct launch_tasks_response_msg { 2028 uint32_t return_code; 2029 char *node_name; 2030 uint32_t srun_node_id; 2031 uint32_t count_of_pids; 2032 uint32_t *local_pids; 2033 slurm_step_id_t step_id; 2034 uint32_t *task_ids; /* array of length count_of_pids */ 2035 } launch_tasks_response_msg_t; 2036 2037 typedef struct task_ext_msg { 2038 uint32_t num_tasks; 2039 uint32_t *task_id_list; 2040 uint32_t return_code; 2041 slurm_step_id_t step_id; 2042 } task_exit_msg_t; 2043 2044 typedef struct { 2045 uint32_t job_id; /* slurm job_id */ 2046 uint32_t flags; /* flags */ 2047 uint16_t port; /* target TCP port */ 2048 char *target; /* target host or UNIX socket */ 2049 } net_forward_msg_t; 2050 2051 typedef struct srun_ping_msg { 2052 uint32_t job_id; /* slurm job_id */ 2053 } srun_ping_msg_t; 2054 2055 typedef slurm_step_id_t srun_job_complete_msg_t; 2056 2057 typedef struct srun_timeout_msg { 2058 slurm_step_id_t step_id; 2059 time_t timeout; /* when job scheduled to be killed */ 2060 } srun_timeout_msg_t; 2061 2062 typedef struct srun_user_msg { 2063 uint32_t job_id; /* slurm job_id */ 2064 char *msg; /* message to user's srun */ 2065 } srun_user_msg_t; 2066 2067 typedef struct srun_node_fail_msg { 2068 char *nodelist; /* name of failed node(s) */ 2069 slurm_step_id_t step_id; 2070 } srun_node_fail_msg_t; 2071 2072 typedef struct srun_step_missing_msg { 2073 char *nodelist; /* name of node(s) lacking this step */ 2074 slurm_step_id_t step_id; 2075 } srun_step_missing_msg_t; 2076 2077 enum suspend_opts { 2078 SUSPEND_JOB, /* Suspend a job now */ 2079 RESUME_JOB /* Resume a job now */ 2080 }; 2081 2082 /* NOTE: Set either job_id_str (NULL by default) or job_id */ 2083 typedef struct suspend_msg { 2084 uint16_t op; /* suspend operation, see enum suspend_opts */ 2085 uint32_t job_id; /* slurm job ID (number) */ 2086 char * job_id_str; /* slurm job ID (string) */ 2087 } suspend_msg_t; 2088 2089 /* NOTE: Set either job_id_str (NULL by default) or job_id */ 2090 typedef struct top_job_msg { 2091 uint16_t op; /* suspend operation, see enum suspend_opts */ 2092 uint32_t job_id; /* slurm job ID (number) */ 2093 char * job_id_str; /* slurm job ID (string) */ 2094 } top_job_msg_t; 2095 2096 typedef struct { 2097 char *alias_list; /* node name/address/hostname aliases */ 2098 uint32_t argc; 2099 char **argv; 2100 uint32_t envc; 2101 char **env; 2102 char *container; /* OCI Container bundle path */ 2103 char *cwd; 2104 uint32_t msg_timeout; /* timeout set for sending message */ 2105 uint16_t ntasks_per_board;/* number of tasks to invoke on each board */ 2106 uint16_t ntasks_per_core; /* number of tasks to invoke on each core */ 2107 uint16_t ntasks_per_tres;/* number of tasks that can access each gpu */ 2108 uint16_t ntasks_per_socket;/* number of tasks to invoke on 2109 * each socket */ 2110 2111 /* I/O handling */ 2112 bool buffered_stdio; 2113 bool labelio; 2114 char *remote_output_filename; 2115 char *remote_error_filename; 2116 char *remote_input_filename; 2117 slurm_step_io_fds_t local_fds; 2118 2119 bool multi_prog; 2120 bool no_alloc; 2121 uint32_t slurmd_debug; /* remote slurmd debug level */ 2122 uint32_t het_job_node_offset; /* Hetjob node offset or NO_VAL */ 2123 uint32_t het_job_id; /* Hetjob ID or NO_VAL */ 2124 uint32_t het_job_nnodes;/* total task count for entire hetjob */ 2125 uint32_t het_job_ntasks;/* total task count for entire hetjob */ 2126 uint32_t het_job_step_cnt;/* total step count for entire hetjob */ 2127 uint16_t *het_job_task_cnts; /* Number of tasks on each node in hetjob */ 2128 uint32_t **het_job_tids; /* Task IDs on each node in hetjob */ 2129 uint32_t *het_job_tid_offsets;/* map of tasks (by id) to originating 2130 * hetjob */ 2131 uint32_t het_job_offset;/* Hetjob offset or NO_VAL */ 2132 uint32_t het_job_task_offset; /* Hetjob task offset or NO_VAL */ 2133 char *het_job_node_list; /* Hetjob step node list */ 2134 bool parallel_debug; 2135 uint32_t profile; /* Level of acct_gather_profile {all | none} */ 2136 char *task_prolog; 2137 char *task_epilog; 2138 uint16_t cpu_bind_type; /* use cpu_bind_type_t */ 2139 char *cpu_bind; 2140 uint32_t cpu_freq_min; /* Minimum cpu frequency */ 2141 uint32_t cpu_freq_max; /* Maximum cpu frequency */ 2142 uint32_t cpu_freq_gov; /* cpu frequency governor */ 2143 uint16_t mem_bind_type; /* use mem_bind_type_t */ 2144 char *mem_bind; 2145 uint16_t accel_bind_type; /* --accel-bind= */ 2146 2147 uint16_t max_sockets; 2148 uint16_t max_cores; 2149 uint16_t max_threads; 2150 uint16_t cpus_per_task; 2151 uint16_t *cpt_compact_array; /* Compressed per-node cpus_per_task. 2152 * Index with slurm_get_rep_count_inx() */ 2153 uint32_t cpt_compact_cnt; /* number of elements in cpt_compact arrays */ 2154 uint32_t *cpt_compact_reps; /* number of consecutive nodes on which a 2155 * value in cpt_compact_array is 2156 * duplicated */ 2157 uint16_t threads_per_core; 2158 uint32_t task_dist; 2159 uint16_t tree_width; 2160 bool preserve_env; 2161 2162 char *mpi_plugin_name; 2163 uint8_t open_mode; 2164 char *acctg_freq; 2165 bool pty; 2166 char **spank_job_env; /* environment variables for job prolog/epilog 2167 * scripts as set by SPANK plugins */ 2168 uint32_t spank_job_env_size; /* element count in spank_env */ 2169 char *tres_bind; 2170 char *tres_freq; 2171 } slurm_step_launch_params_t; 2172 2173 typedef struct { 2174 void (*step_complete)(srun_job_complete_msg_t *); 2175 void (*step_signal)(int); 2176 void (*step_timeout)(srun_timeout_msg_t *); 2177 void (*task_start)(launch_tasks_response_msg_t *); 2178 void (*task_finish)(task_exit_msg_t *); 2179 } slurm_step_launch_callbacks_t; 2180 2181 typedef struct { 2182 void (*job_complete)(srun_job_complete_msg_t *); 2183 void (*timeout)(srun_timeout_msg_t *); 2184 void (*user_msg)(srun_user_msg_t *); 2185 void (*node_fail)(srun_node_fail_msg_t *); 2186 void (*job_suspend)(suspend_msg_t *); 2187 } slurm_allocation_callbacks_t; 2188 2189 typedef struct { 2190 void (*acct_full)(); 2191 void (*dbd_fail)(); 2192 void (*dbd_resumed)(); 2193 void (*db_fail)(); 2194 void (*db_resumed)(); 2195 } slurm_trigger_callbacks_t; 2196 2197 typedef struct { 2198 uint32_t array_job_id; /* job_id of a job array or 0 if N/A */ 2199 uint32_t array_task_id; /* task_id of a job array */ 2200 char *cluster; /* cluster that the step is running on. */ 2201 char *container; /* OCI container bundle path */ 2202 char *container_id; /* OCI container ID */ 2203 uint32_t cpu_freq_min; /* Minimum cpu frequency */ 2204 uint32_t cpu_freq_max; /* Maximum cpu frequency */ 2205 uint32_t cpu_freq_gov; /* cpu frequency governor */ 2206 char *cpus_per_tres; /* comma delimited list of TRES=# values */ 2207 char *mem_per_tres; /* comma delimited list of TRES=# values */ 2208 char *name; /* name of job step */ 2209 char *network; /* network specs for job step */ 2210 char *nodes; /* list of nodes allocated to job_step */ 2211 int32_t *node_inx; /* list index pairs into node_table for *nodes: 2212 * start_range_1, end_range_1, 2213 * start_range_2, .., -1 */ 2214 uint32_t num_cpus; /* how many cpus are being used by step */ 2215 uint32_t num_tasks; /* number of tasks */ 2216 char *partition; /* name of assigned partition */ 2217 char *resv_ports; /* ports allocated for MPI */ 2218 time_t run_time; /* net run time (factor out time suspended) */ 2219 char *srun_host; /* host of srun command */ 2220 uint32_t srun_pid; /* PID of srun command */ 2221 time_t start_time; /* step start time */ 2222 uint16_t start_protocol_ver; /* Slurm version step was started with 2223 * either srun or the lowest slurmd version 2224 * it is talking to */ 2225 uint32_t state; /* state of the step, see enum job_states */ 2226 slurm_step_id_t step_id; 2227 char *submit_line; /* The command issued with all it's options in a 2228 * string */ 2229 uint32_t task_dist; /* see enum task_dist_state */ 2230 uint32_t time_limit; /* step time limit */ 2231 char *tres_alloc_str; /* tres used in the job */ 2232 char *tres_bind; /* Task to TRES binding directives */ 2233 char *tres_freq; /* TRES frequency directives */ 2234 char *tres_per_step; /* comma delimited list of TRES=# values */ 2235 char *tres_per_node; /* comma delimited list of TRES=# values */ 2236 char *tres_per_socket; /* comma delimited list of TRES=# values */ 2237 char *tres_per_task; /* comma delimited list of TRES=# values */ 2238 uint32_t user_id; /* user the job runs as */ 2239 } job_step_info_t; 2240 2241 typedef struct job_step_info_response_msg { 2242 time_t last_update; /* time of latest info */ 2243 uint32_t job_step_count; /* number of records */ 2244 job_step_info_t *job_steps; /* the job step records */ 2245 } job_step_info_response_msg_t; 2246 2247 typedef struct { 2248 char *node_name; 2249 uint32_t *pid; 2250 uint32_t pid_cnt; 2251 } job_step_pids_t; 2252 2253 typedef struct { 2254 list_t *pid_list; /* list of job_step_pids_t *'s */ 2255 slurm_step_id_t step_id; 2256 } job_step_pids_response_msg_t; 2257 2258 typedef struct { 2259 jobacctinfo_t *jobacct; 2260 uint32_t num_tasks; 2261 uint32_t return_code; 2262 job_step_pids_t *step_pids; 2263 } job_step_stat_t; 2264 2265 typedef struct { 2266 list_t *stats_list; /* list of job_step_stat_t *'s */ 2267 slurm_step_id_t step_id; 2268 } job_step_stat_response_msg_t; 2269 2270 typedef struct node_info { 2271 char *arch; /* computer architecture */ 2272 char *bcast_address; /* BcastAddr (optional) */ 2273 uint16_t boards; /* total number of boards per node */ 2274 time_t boot_time; /* time of node boot */ 2275 char *cluster_name; /* Cluster name ONLY set in federation */ 2276 uint16_t cores; /* number of cores per socket */ 2277 uint16_t core_spec_cnt; /* number of specialized cores on node */ 2278 uint32_t cpu_bind; /* Default task binding */ 2279 uint32_t cpu_load; /* CPU load * 100 */ 2280 uint64_t free_mem; /* free memory in MiB */ 2281 uint16_t cpus; /* configured count of cpus running on 2282 * the node */ 2283 uint16_t cpus_efctv; /* count of effective cpus on the node. 2284 i.e cpus minus specialized cpus*/ 2285 char *cpu_spec_list; /* node's specialized cpus */ 2286 acct_gather_energy_t *energy; /* energy data */ 2287 ext_sensors_data_t *ext_sensors; /* external sensor data */ 2288 char *extra; /* arbitrary sting */ 2289 power_mgmt_data_t *power; /* power management data */ 2290 char *features; /* list of a node's available features */ 2291 char *features_act; /* list of a node's current active features, 2292 * Same as "features" if NULL */ 2293 char *gres; /* list of a node's generic resources */ 2294 char *gres_drain; /* list of drained GRES */ 2295 char *gres_used; /* list of GRES in current use */ 2296 char *instance_id; /* cloud instance id */ 2297 char *instance_type; /* cloud instance type */ 2298 time_t last_busy; /* time node was last busy (i.e. no jobs) */ 2299 char *mcs_label; /* mcs label if mcs plugin in use */ 2300 uint64_t mem_spec_limit; /* MB memory limit for specialization */ 2301 char *name; /* node name to slurm */ 2302 uint32_t next_state; /* state after reboot (enum node_states) */ 2303 char *node_addr; /* communication name (optional) */ 2304 char *node_hostname; /* node's hostname (optional) */ 2305 uint32_t node_state; /* see enum node_states */ 2306 char *os; /* operating system currently running */ 2307 uint32_t owner; /* User allowed to use this node or NO_VAL */ 2308 char *partitions; /* Comma separated list of partitions containing 2309 * this node, NOT supplied by slurmctld, but 2310 * populated by scontrol */ 2311 uint16_t port; /* TCP port number of the slurmd */ 2312 uint64_t real_memory; /* configured MB of real memory on the node */ 2313 char *comment; /* arbitrary comment */ 2314 char *reason; /* reason for node being DOWN or DRAINING */ 2315 time_t reason_time; /* Time stamp when reason was set, ignore if 2316 * no reason is set. */ 2317 uint32_t reason_uid; /* User that set the reason, ignore if 2318 * no reason is set. */ 2319 time_t resume_after; /* automatically resume DOWN or DRAINED node at 2320 * this point in time */ 2321 char *resv_name; /* If node is in a reservation this is 2322 * the name of the reservation */ 2323 dynamic_plugin_data_t *select_nodeinfo; /* opaque data structure, 2324 * use 2325 * slurm_get_select_nodeinfo() 2326 * to access contents */ 2327 time_t slurmd_start_time;/* time of slurmd startup */ 2328 uint16_t sockets; /* total number of sockets per node */ 2329 uint16_t threads; /* number of threads per core */ 2330 uint32_t tmp_disk; /* configured MB of total disk in TMP_FS */ 2331 uint32_t weight; /* arbitrary priority of node for scheduling */ 2332 char *tres_fmt_str; /* str representing configured TRES on node */ 2333 char *version; /* Slurm version number */ 2334 } node_info_t; 2335 2336 typedef struct node_info_msg { 2337 time_t last_update; /* time of latest info */ 2338 uint32_t record_count; /* number of records */ 2339 node_info_t *node_array; /* the node records */ 2340 } node_info_msg_t; 2341 2342 typedef struct front_end_info { 2343 char *allow_groups; /* allowed group string */ 2344 char *allow_users; /* allowed user string */ 2345 time_t boot_time; /* Time of node boot, 2346 * computed from up_time */ 2347 char *deny_groups; /* denied group string */ 2348 char *deny_users; /* denied user string */ 2349 char *name; /* node name */ 2350 uint32_t node_state; /* see enum node_states */ 2351 char *reason; /* reason for node being DOWN or 2352 * DRAINING */ 2353 time_t reason_time; /* Time stamp when reason was set, 2354 * ignore if no reason is set. */ 2355 uint32_t reason_uid; /* User that set the reason, 2356 * ignore if no reason is set. */ 2357 time_t slurmd_start_time; /* Time of slurmd startup */ 2358 char *version; /* Slurm version number */ 2359 } front_end_info_t; 2360 2361 typedef struct front_end_info_msg { 2362 time_t last_update; /* time of latest info */ 2363 uint32_t record_count; /* number of records */ 2364 front_end_info_t *front_end_array; /* the front_end records */ 2365 } front_end_info_msg_t; 2366 2367 typedef struct topo_info { 2368 uint16_t level; /* level in hierarchy, leaf=0 */ 2369 uint32_t link_speed; /* link speed, arbitrary units */ 2370 char *name; /* switch name */ 2371 char *nodes; /* name if direct descendent nodes */ 2372 char *switches; /* name if direct descendent switches */ 2373 } topo_info_t; 2374 2375 typedef struct topo_info_response_msg { 2376 uint32_t record_count; /* number of records */ 2377 topo_info_t *topo_array; /* the switch topology records */ 2378 dynamic_plugin_data_t *topo_info; 2379 } topo_info_response_msg_t; 2380 2381 typedef struct job_alloc_info_msg { 2382 uint32_t job_id; /* job ID */ 2383 char *req_cluster; /* requesting cluster */ 2384 } job_alloc_info_msg_t; 2385 2386 typedef struct { 2387 uint32_t array_task_id; /* task_id of a job array or NO_VAL */ 2388 uint32_t het_job_offset; /* het_job_offset or NO_VAL */ 2389 slurm_step_id_t step_id; 2390 } slurm_selected_step_t; 2391 2392 typedef slurm_selected_step_t step_alloc_info_msg_t; 2393 2394 typedef struct acct_gather_node_resp_msg { 2395 acct_gather_energy_t *energy; 2396 char *node_name; /* node name */ 2397 uint16_t sensor_cnt; 2398 } acct_gather_node_resp_msg_t; 2399 2400 typedef struct acct_gather_energy_req_msg { 2401 uint16_t context_id; 2402 uint16_t delta; 2403 } acct_gather_energy_req_msg_t; 2404 2405 #define JOB_DEF_CPU_PER_GPU 0x0001 2406 #define JOB_DEF_MEM_PER_GPU 0x0002 2407 typedef struct job_defaults { 2408 uint16_t type; /* See JOB_DEF_* above */ 2409 uint64_t value; /* Value */ 2410 } job_defaults_t; 2411 2412 /* Current partition state information and used to set partition options 2413 * using slurm_update_partition(). */ 2414 #define PART_FLAG_DEFAULT SLURM_BIT(0) /* Set if default partition */ 2415 #define PART_FLAG_HIDDEN SLURM_BIT(1) /* Set if partition is hidden */ 2416 #define PART_FLAG_NO_ROOT SLURM_BIT(2) /* Set if user root jobs disabled */ 2417 #define PART_FLAG_ROOT_ONLY SLURM_BIT(3) /* Set if only root can submit jobs */ 2418 #define PART_FLAG_REQ_RESV SLURM_BIT(4) /* Set if reservation is required */ 2419 #define PART_FLAG_LLN SLURM_BIT(5) /* Set if least loaded node selection 2420 * is desired */ 2421 #define PART_FLAG_EXCLUSIVE_USER SLURM_BIT(6)/* Set if nodes allocated exclusively 2422 * by user */ 2423 #define PART_FLAG_PDOI SLURM_BIT(7) /* Set if nodes POWER_DOWN on IDLE, 2424 * after running jobs */ 2425 /* Used with slurm_update_partition() to clear flags associated with existing 2426 * partitions. For example, if a partition is currently hidden and you want 2427 * to make it visible then set flags to PART_FLAG_HIDDEN_CLR and call 2428 * slurm_update_partition(). */ 2429 #define PART_FLAG_DEFAULT_CLR SLURM_BIT(8) /* Clear DEFAULT partition flag */ 2430 #define PART_FLAG_HIDDEN_CLR SLURM_BIT(9) /* Clear HIDDEN partition flag */ 2431 #define PART_FLAG_NO_ROOT_CLR SLURM_BIT(10) /* Clear NO_ROOT partition flag */ 2432 #define PART_FLAG_ROOT_ONLY_CLR SLURM_BIT(11) /* Clear ROOT_ONLY partition flag */ 2433 #define PART_FLAG_REQ_RESV_CLR SLURM_BIT(12) /* Clear RES_REQ partition flag */ 2434 #define PART_FLAG_LLN_CLR SLURM_BIT(13) /* Clear LLN partition flag */ 2435 #define PART_FLAG_EXC_USER_CLR SLURM_BIT(14) /* Clear EXCLUSIVE_USER flag */ 2436 #define PART_FLAG_PDOI_CLR SLURM_BIT(15) /* Clear PDOI partition flag */ 2437 2438 typedef struct partition_info { 2439 char *allow_alloc_nodes;/* list names of allowed allocating 2440 * nodes */ 2441 char *allow_accounts; /* comma delimited list of accounts, 2442 * null indicates all */ 2443 char *allow_groups; /* comma delimited list of groups, 2444 * null indicates all */ 2445 char *allow_qos; /* comma delimited list of qos, 2446 * null indicates all */ 2447 char *alternate; /* name of alternate partition */ 2448 char *billing_weights_str;/* per TRES billing weights string */ 2449 char *cluster_name; /* Cluster name ONLY set in federation */ 2450 uint16_t cr_type; /* see CR_* values */ 2451 uint32_t cpu_bind; /* Default task binding */ 2452 uint64_t def_mem_per_cpu; /* default MB memory per allocated CPU */ 2453 uint32_t default_time; /* minutes, NO_VAL or INFINITE */ 2454 char *deny_accounts; /* comma delimited list of denied accounts */ 2455 char *deny_qos; /* comma delimited list of denied qos */ 2456 uint16_t flags; /* see PART_FLAG_* above */ 2457 uint32_t grace_time; /* preemption grace time in seconds */ 2458 list_t *job_defaults_list; /* List of job_defaults_t elements */ 2459 char *job_defaults_str; /* String of job defaults, 2460 * used only for partition update RPC */ 2461 uint32_t max_cpus_per_node; /* maximum allocated CPUs per node */ 2462 uint32_t max_cpus_per_socket; /* maximum allocated CPUs per socket */ 2463 uint64_t max_mem_per_cpu; /* maximum MB memory per allocated CPU */ 2464 uint32_t max_nodes; /* per job or INFINITE */ 2465 uint16_t max_share; /* number of jobs to gang schedule */ 2466 uint32_t max_time; /* minutes or INFINITE */ 2467 uint32_t min_nodes; /* per job */ 2468 char *name; /* name of the partition */ 2469 int32_t *node_inx; /* list index pairs into node_table: 2470 * start_range_1, end_range_1, 2471 * start_range_2, .., -1 */ 2472 char *nodes; /* list names of nodes in partition */ 2473 char *nodesets; /* list of nodesets used by partition */ 2474 uint16_t over_time_limit; /* job's time limit can be exceeded by this 2475 * number of minutes before cancellation */ 2476 uint16_t preempt_mode; /* See PREEMPT_MODE_* in slurm/slurm.h */ 2477 uint16_t priority_job_factor; /* job priority weight factor */ 2478 uint16_t priority_tier; /* tier for scheduling and preemption */ 2479 char *qos_char; /* The partition QOS name */ 2480 uint16_t resume_timeout; /* time required in order to perform a node 2481 * resume operation */ 2482 uint16_t state_up; /* see PARTITION_ states above */ 2483 uint32_t suspend_time; /* node idle for this long before power save 2484 * mode */ 2485 uint16_t suspend_timeout; /* time required in order to perform a node 2486 * suspend operation */ 2487 uint32_t total_cpus; /* total number of cpus in the partition */ 2488 uint32_t total_nodes; /* total number of nodes in the partition */ 2489 char *tres_fmt_str; /* str of configured TRES in partition */ 2490 } partition_info_t; 2491 2492 typedef struct delete_partition_msg { 2493 char *name; /* name of partition to be delete */ 2494 } delete_part_msg_t; 2495 2496 typedef struct resource_allocation_response_msg { 2497 char *account; /* allocation account */ 2498 uint32_t job_id; /* assigned job id */ 2499 char *alias_list; /* node name/address/hostname aliases */ 2500 char *batch_host; /* host executing batch script */ 2501 uint32_t cpu_freq_min; /* Minimum cpu frequency */ 2502 uint32_t cpu_freq_max; /* Maximum cpu frequency */ 2503 uint32_t cpu_freq_gov; /* cpu frequency governor */ 2504 uint16_t *cpus_per_node;/* cpus per node */ 2505 uint32_t *cpu_count_reps;/* how many nodes have same cpu count */ 2506 uint32_t env_size; /* element count in environment */ 2507 char **environment; /* environment variables to set for job, 2508 * name=value pairs, one per line */ 2509 uint32_t error_code; /* error code for warning message */ 2510 gid_t gid; /* resolved group id of job */ 2511 char *group_name; /* resolved group name of job */ 2512 char *job_submit_user_msg;/* job_submit plugin user_msg */ 2513 slurm_addr_t *node_addr; /* network addresses */ 2514 uint32_t node_cnt; /* count of nodes */ 2515 char *node_list; /* assigned list of nodes */ 2516 uint16_t ntasks_per_board;/* number of tasks to invoke on each board */ 2517 uint16_t ntasks_per_core; /* number of tasks to invoke on each core */ 2518 uint16_t ntasks_per_tres;/* number of tasks that can access each gpu */ 2519 uint16_t ntasks_per_socket;/* number of tasks to invoke on 2520 * each socket */ 2521 uint32_t num_cpu_groups;/* size of cpus_per_node and cpu_count_reps */ 2522 char *partition; /* name of partition used to run job */ 2523 uint64_t pn_min_memory; /* minimum real memory per node OR 2524 * real memory per CPU | MEM_PER_CPU, 2525 * default=0 (no limit) */ 2526 char *qos; /* allocation qos */ 2527 char *resv_name; /* allocation reservation */ 2528 char *tres_per_node; /* comma delimited list of TRES=# values */ 2529 uid_t uid; /* resolved user id of job */ 2530 char *user_name; /* resolved user name of job */ 2531 void *working_cluster_rec; /* Cluster to direct remaining messages to. 2532 * slurmdb_cluster_rec_t* because slurm.h 2533 * doesn't know about slurmdb.h. */ 2534 } resource_allocation_response_msg_t; 2535 2536 typedef struct partition_info_msg { 2537 time_t last_update; /* time of latest info */ 2538 uint32_t record_count; /* number of records */ 2539 partition_info_t *partition_array; /* the partition records */ 2540 } partition_info_msg_t; 2541 2542 typedef struct will_run_response_msg { 2543 uint32_t job_id; /* ID of job to start */ 2544 char *job_submit_user_msg; /* job submit plugin user_msg */ 2545 char *node_list; /* nodes where job will start */ 2546 char *part_name; /* partition where job will start */ 2547 list_t *preemptee_job_id; /* jobs preempted to start this job */ 2548 uint32_t proc_cnt; /* CPUs allocated to job at start */ 2549 time_t start_time; /* time when job will start */ 2550 double sys_usage_per; /* System usage percentage */ 2551 } will_run_response_msg_t; 2552 2553 /*********************************/ 2554 2555 /* 2556 * Resource reservation data structures. 2557 * Create, show, modify and delete functions are required 2558 */ 2559 #define RESERVE_FLAG_MAINT SLURM_BIT(0) /* Set MAINT flag */ 2560 #define RESERVE_FLAG_NO_MAINT SLURM_BIT(1) /* Clear MAINT flag */ 2561 #define RESERVE_FLAG_DAILY SLURM_BIT(2) /* Set DAILY flag */ 2562 #define RESERVE_FLAG_NO_DAILY SLURM_BIT(3) /* Clear DAILY flag */ 2563 #define RESERVE_FLAG_WEEKLY SLURM_BIT(4) /* Set WEEKLY flag */ 2564 #define RESERVE_FLAG_NO_WEEKLY SLURM_BIT(5) /* Clear WEEKLY flag */ 2565 #define RESERVE_FLAG_IGN_JOBS SLURM_BIT(6) /* Ignore running jobs */ 2566 #define RESERVE_FLAG_NO_IGN_JOB SLURM_BIT(7) /* Clear ignore running 2567 * jobs flag */ 2568 #define RESERVE_FLAG_ANY_NODES SLURM_BIT(8) /* Use any compute nodes */ 2569 #define RESERVE_FLAG_NO_ANY_NODES SLURM_BIT(9) /* Clear any compute 2570 * node flag */ 2571 #define RESERVE_FLAG_STATIC SLURM_BIT(10) /* Static node allocation */ 2572 #define RESERVE_FLAG_NO_STATIC SLURM_BIT(11) /* Clear static node 2573 * allocation */ 2574 #define RESERVE_FLAG_PART_NODES SLURM_BIT(12) /* Use partition nodes only */ 2575 #define RESERVE_FLAG_NO_PART_NODES SLURM_BIT(13) /* Clear partition 2576 * nodes only flag */ 2577 #define RESERVE_FLAG_OVERLAP SLURM_BIT(14) /* Permit to overlap others */ 2578 #define RESERVE_FLAG_SPEC_NODES SLURM_BIT(15) /* Contains specific nodes */ 2579 /* SLURM_BIT(16) Available 2 versions after 23.11 */ 2580 #define RESERVE_FLAG_TIME_FLOAT SLURM_BIT(17) /* Time offset is relative */ 2581 #define RESERVE_FLAG_REPLACE SLURM_BIT(18) /* Replace resources 2582 * as assigned to jobs */ 2583 #define RESERVE_FLAG_ALL_NODES SLURM_BIT(19) /* Use all compute nodes */ 2584 #define RESERVE_FLAG_PURGE_COMP SLURM_BIT(20) /* Purge reservation 2585 * after last job done */ 2586 #define RESERVE_FLAG_WEEKDAY SLURM_BIT(21) /* Set WEEKDAY flag */ 2587 #define RESERVE_FLAG_NO_WEEKDAY SLURM_BIT(22) /* Clear WEEKDAY flag */ 2588 #define RESERVE_FLAG_WEEKEND SLURM_BIT(23) /* Set WEEKEND flag */ 2589 #define RESERVE_FLAG_NO_WEEKEND SLURM_BIT(24) /* Clear WEEKEND flag */ 2590 #define RESERVE_FLAG_FLEX SLURM_BIT(25) /* Set FLEX flag */ 2591 #define RESERVE_FLAG_NO_FLEX SLURM_BIT(26) /* Clear FLEX flag */ 2592 #define RESERVE_FLAG_DUR_PLUS SLURM_BIT(27) /* Add duration time, 2593 * only used on 2594 * modifying a 2595 * reservation */ 2596 #define RESERVE_FLAG_DUR_MINUS SLURM_BIT(28) /* Remove duration time, 2597 * only used on 2598 * modifying a 2599 * reservation */ 2600 #define RESERVE_FLAG_NO_HOLD_JOBS SLURM_BIT(29) /* No hold jobs after 2601 * end of reservation */ 2602 #define RESERVE_FLAG_REPLACE_DOWN SLURM_BIT(30) /* Replace DOWN or 2603 * DRAINED nodes */ 2604 #define RESERVE_FLAG_NO_PURGE_COMP SLURM_BIT(31) /* Clear PURGE flag */ 2605 2606 #define RESERVE_FLAG_MAGNETIC SLURM_BIT(32) /* Allow jobs to run 2607 * without specifying 2608 * the reservation name 2609 * if they meet 2610 * eligibility status */ 2611 #define RESERVE_FLAG_NO_MAGNETIC SLURM_BIT(33) /* Clear MAGNETIC flag */ 2612 #define RESERVE_FLAG_SKIP SLURM_BIT(34) /* Skip/delete 2613 * next/current 2614 * reservation without 2615 * deleting the 2616 * reservation proper */ 2617 #define RESERVE_FLAG_HOURLY SLURM_BIT(35) /* Set HOURLY flag */ 2618 #define RESERVE_FLAG_NO_HOURLY SLURM_BIT(36) /* Clear HOURLY flag */ 2619 #define RESERVE_FLAG_GRES_REQ SLURM_BIT(37) /* There has been GRES 2620 * requested */ 2621 #define RESERVE_TRES_PER_NODE SLURM_BIT(38) /* If the tres_str is per-node 2622 * instead of per-job */ 2623 2624 #define RESERVE_REOCCURRING (RESERVE_FLAG_HOURLY | RESERVE_FLAG_DAILY | \ 2625 RESERVE_FLAG_WEEKLY | RESERVE_FLAG_WEEKDAY | \ 2626 RESERVE_FLAG_WEEKEND) 2627 2628 typedef struct resv_core_spec { 2629 char *node_name; /* Name of reserved node */ 2630 char *core_id; /* IDs of reserved cores */ 2631 } resv_core_spec_t; 2632 2633 typedef struct reserve_info { 2634 char *accounts; /* names of accounts permitted to use */ 2635 char *burst_buffer; /* burst buffer resources to be included */ 2636 char *comment; /* arbitrary comment */ 2637 uint32_t core_cnt; /* count of cores required */ 2638 uint32_t core_spec_cnt; /* count of core_spec records */ 2639 resv_core_spec_t *core_spec; /* reserved cores specification */ 2640 time_t end_time; /* end time of reservation */ 2641 char *features; /* required node features */ 2642 uint64_t flags; /* see RESERVE_FLAG_* above */ 2643 char *groups; /* names of linux Groups permitted to use */ 2644 char *licenses; /* names of licenses to be reserved */ 2645 uint32_t max_start_delay;/* Maximum delay in which jobs outside of the 2646 * reservation will be permitted to overlap 2647 * once any jobs are queued for the 2648 * reservation */ 2649 char *name; /* name of reservation */ 2650 uint32_t node_cnt; /* count of nodes required */ 2651 int32_t *node_inx; /* list index pairs into node_table for *nodes: 2652 * start_range_1, end_range_1, 2653 * start_range_2, .., -1 */ 2654 char *node_list; /* list of reserved nodes or ALL */ 2655 char *partition; /* name of partition to be used */ 2656 uint32_t purge_comp_time; /* If PURGE_COMP flag is set the amount of 2657 * minutes this reservation will sit idle 2658 * until it is revoked. 2659 */ 2660 time_t start_time; /* start time of reservation */ 2661 uint32_t resv_watts; /* amount of power to reserve */ 2662 char *tres_str; /* list of TRES's used by reservation */ 2663 char *users; /* names of users permitted to use */ 2664 } reserve_info_t; 2665 2666 typedef struct reserve_info_msg { 2667 time_t last_update; /* time of latest info */ 2668 uint32_t record_count; /* number of records */ 2669 reserve_info_t *reservation_array; /* the reservation records */ 2670 } reserve_info_msg_t; 2671 2672 typedef struct resv_desc_msg { 2673 char *accounts; /* names of accounts permitted to use */ 2674 char *burst_buffer; /* burst buffer resources to be included */ 2675 char *comment; /* arbitrary comment */ 2676 uint32_t core_cnt; /* Count of cores required */ 2677 uint32_t duration; /* duration of reservation in minutes */ 2678 time_t end_time; /* end time of reservation */ 2679 char *features; /* required node features */ 2680 uint64_t flags; /* see RESERVE_FLAG_* above */ 2681 char *groups; /* names of linux groups permitted to use */ 2682 void *job_ptr; /* internal use only DON'T PACK */ 2683 char *licenses; /* names of licenses to be reserved */ 2684 uint32_t max_start_delay;/* Maximum delay in which jobs outside of the 2685 * reservation will be permitted to overlap 2686 * once any jobs are queued for the 2687 * reservation */ 2688 char *name; /* name of reservation (optional on create) */ 2689 uint32_t node_cnt; /* Count of nodes required. */ 2690 char *node_list; /* list of reserved nodes or ALL */ 2691 char *partition; /* name of partition to be used */ 2692 uint32_t purge_comp_time; /* If PURGE_COMP flag is set the amount of 2693 * minutes this reservation will sit idle 2694 * until it is revoked. 2695 */ 2696 time_t start_time; /* start time of reservation */ 2697 uint32_t resv_watts; /* amount of power to reserve */ 2698 char *tres_str; /* list of TRES's used by reservation */ 2699 char *users; /* names of users permitted to use */ 2700 } resv_desc_msg_t; 2701 2702 typedef struct reserve_response_msg { 2703 char *name; /* name of reservation */ 2704 } reserve_response_msg_t; 2705 2706 typedef struct reservation_name_msg { 2707 char *name; /* name of reservation just created or 2708 * to be delete */ 2709 } reservation_name_msg_t; 2710 2711 2712 #define DEBUG_FLAG_SELECT_TYPE SLURM_BIT(0) /* SelectType plugin */ 2713 #define DEBUG_FLAG_STEPS SLURM_BIT(1) /* slurmctld steps */ 2714 #define DEBUG_FLAG_TRIGGERS SLURM_BIT(2) /* slurmctld triggers */ 2715 #define DEBUG_FLAG_CPU_BIND SLURM_BIT(3) /* CPU binding */ 2716 #define DEBUG_FLAG_NET_RAW SLURM_BIT(4) /* Raw Network dumps */ 2717 #define DEBUG_FLAG_NO_CONF_HASH SLURM_BIT(5) /* no warning about 2718 * slurm.conf files checksum 2719 * mismatch */ 2720 #define DEBUG_FLAG_GRES SLURM_BIT(6) /* Generic Resource info */ 2721 #define DEBUG_FLAG_MPI SLURM_BIT(7) /* MPI debug */ 2722 #define DEBUG_FLAG_DATA SLURM_BIT(8) /* data_t logging */ 2723 #define DEBUG_FLAG_WORKQ SLURM_BIT(9) /* Work Queue */ 2724 #define DEBUG_FLAG_NET SLURM_BIT(10) /* Network logging */ 2725 #define DEBUG_FLAG_PRIO SLURM_BIT(11) /* debug for priority 2726 * plugin */ 2727 #define DEBUG_FLAG_BACKFILL SLURM_BIT(12) /* debug for 2728 * sched/backfill */ 2729 #define DEBUG_FLAG_GANG SLURM_BIT(13) /* debug gang scheduler */ 2730 #define DEBUG_FLAG_RESERVATION SLURM_BIT(14) /* advanced reservations */ 2731 #define DEBUG_FLAG_FRONT_END SLURM_BIT(15) /* front-end nodes */ 2732 #define DEBUG_FLAG_SACK SLURM_BIT(16) /* SACK handling */ 2733 #define DEBUG_FLAG_SWITCH SLURM_BIT(17) /* SwitchType plugin */ 2734 #define DEBUG_FLAG_ENERGY SLURM_BIT(18) /* AcctGatherEnergy plugin */ 2735 #define DEBUG_FLAG_EXT_SENSORS SLURM_BIT(19) /* ExtSensorsType plugin */ 2736 #define DEBUG_FLAG_LICENSE SLURM_BIT(20) /* AcctGatherProfile 2737 * plugin */ 2738 #define DEBUG_FLAG_PROFILE SLURM_BIT(21) /* AcctGatherProfile 2739 * plugin */ 2740 #define DEBUG_FLAG_INTERCONNECT SLURM_BIT(22) /* AcctGatherInterconnect 2741 * plugin */ 2742 /* #define SLURM_BIT(23) /\* UNUSED *\/ */ 2743 #define DEBUG_FLAG_JOB_CONT SLURM_BIT(24) /* JobContainer plugin */ 2744 /* #define SLURM_BIT(25) /\* UNUSED *\/ */ 2745 #define DEBUG_FLAG_PROTOCOL SLURM_BIT(26) /* Communication protocol */ 2746 #define DEBUG_FLAG_BACKFILL_MAP SLURM_BIT(27) /* Backfill scheduler node 2747 * map */ 2748 #define DEBUG_FLAG_TRACE_JOBS SLURM_BIT(28) /* Trace jobs by id 2749 * and state */ 2750 #define DEBUG_FLAG_ROUTE SLURM_BIT(29) /* Route plugin */ 2751 #define DEBUG_FLAG_DB_ASSOC SLURM_BIT(30) /* Association debug */ 2752 #define DEBUG_FLAG_DB_EVENT SLURM_BIT(31) /* Event debug */ 2753 #define DEBUG_FLAG_DB_JOB SLURM_BIT(32) /* Database job debug */ 2754 #define DEBUG_FLAG_DB_QOS SLURM_BIT(33) /* QOS debug */ 2755 #define DEBUG_FLAG_DB_QUERY SLURM_BIT(34) /* Database query debug */ 2756 #define DEBUG_FLAG_DB_RESV SLURM_BIT(35) /* Reservation debug */ 2757 #define DEBUG_FLAG_DB_RES SLURM_BIT(36) /* Resource debug */ 2758 #define DEBUG_FLAG_DB_STEP SLURM_BIT(37) /* Database step debug */ 2759 #define DEBUG_FLAG_DB_USAGE SLURM_BIT(38) /* Usage/Rollup debug */ 2760 #define DEBUG_FLAG_DB_WCKEY SLURM_BIT(39) /* Database WCKey debug */ 2761 #define DEBUG_FLAG_BURST_BUF SLURM_BIT(40) /* Burst buffer plugin */ 2762 #define DEBUG_FLAG_CPU_FREQ SLURM_BIT(41) /* --cpu_freq debug */ 2763 #define DEBUG_FLAG_POWER SLURM_BIT(42) /* Power plugin debug */ 2764 #define DEBUG_FLAG_TIME_CRAY SLURM_BIT(43) /* Time Cray components */ 2765 #define DEBUG_FLAG_DB_ARCHIVE SLURM_BIT(44) /* DBD Archiving/Purging */ 2766 #define DEBUG_FLAG_DB_TRES SLURM_BIT(45) /* Database TRES debug */ 2767 #define DEBUG_FLAG_JOBCOMP SLURM_BIT(46) /* JobComp debug */ 2768 #define DEBUG_FLAG_NODE_FEATURES SLURM_BIT(47) /* Node Features debug */ 2769 #define DEBUG_FLAG_FEDR SLURM_BIT(48) /* Federation debug */ 2770 #define DEBUG_FLAG_HETJOB SLURM_BIT(49) /* Heterogeneous job debug */ 2771 #define DEBUG_FLAG_ACCRUE SLURM_BIT(50) /* Accrue counters debug */ 2772 /* #define SLURM_BIT(51) /\* UNUSED *\/ */ 2773 #define DEBUG_FLAG_AGENT SLURM_BIT(52) /* RPC Agent debug */ 2774 #define DEBUG_FLAG_DEPENDENCY SLURM_BIT(53) /* Dependency debug */ 2775 #define DEBUG_FLAG_JAG SLURM_BIT(54) /* Job Account Gather debug */ 2776 #define DEBUG_FLAG_CGROUP SLURM_BIT(55) /* cgroup debug */ 2777 #define DEBUG_FLAG_SCRIPT SLURM_BIT(56) /* slurmscriptd debug */ 2778 2779 #define PREEMPT_MODE_OFF 0x0000 /* disable job preemption */ 2780 #define PREEMPT_MODE_SUSPEND 0x0001 /* suspend jobs to preempt */ 2781 #define PREEMPT_MODE_REQUEUE 0x0002 /* requeue or kill jobs to preempt */ 2782 2783 #define PREEMPT_MODE_CANCEL 0x0008 /* always cancel the job */ 2784 #define PREEMPT_MODE_COND_OFF 0x0010 /* represents PREEMPT_MODE_OFF in list*/ 2785 #define PREEMPT_MODE_WITHIN 0x4000 /* enable preemption within qos */ 2786 #define PREEMPT_MODE_GANG 0x8000 /* enable gang scheduling */ 2787 2788 #define RECONFIG_KEEP_PART_INFO SLURM_BIT(0) /* keep dynamic partition info on scontrol reconfig */ 2789 #define RECONFIG_KEEP_PART_STAT SLURM_BIT(1) /* keep dynamic partition state on scontrol reconfig */ 2790 #define RECONFIG_KEEP_POWER_SAVE_SETTINGS SLURM_BIT(2) /* keep dynamic power save settings on scontrol reconfig */ 2791 2792 #define HEALTH_CHECK_NODE_IDLE 0x0001 /* execute on idle nodes */ 2793 #define HEALTH_CHECK_NODE_ALLOC 0x0002 /* execute on fully allocated nodes */ 2794 #define HEALTH_CHECK_NODE_MIXED 0x0004 /* execute on partially allocated nodes */ 2795 #define HEALTH_CHECK_NODE_NONDRAINED_IDLE 0x0008 /* execute on idle nodes that 2796 * are not drained */ 2797 #define HEALTH_CHECK_CYCLE 0x8000 /* cycle through nodes node */ 2798 #define HEALTH_CHECK_NODE_ANY 0x000f /* execute on all node states */ 2799 2800 #define PROLOG_FLAG_ALLOC 0x0001 /* execute prolog upon allocation */ 2801 #define PROLOG_FLAG_NOHOLD 0x0002 /* don't block salloc/srun until 2802 * slurmctld knows the prolog has 2803 * run on each node in the allocation */ 2804 #define PROLOG_FLAG_CONTAIN 0x0004 /* Use proctrack plugin to create a 2805 * container upon allocation */ 2806 #define PROLOG_FLAG_SERIAL 0x0008 /* serially execute prolog/epilog */ 2807 #define PROLOG_FLAG_X11 0x0010 /* enable slurm x11 forwarding support */ 2808 #define PROLOG_FLAG_DEFER_BATCH 0x0020 /* defer REQUEST_BATCH_JOB_LAUNCH until prolog end on all nodes */ 2809 #define PROLOG_FLAG_FORCE_REQUEUE_ON_FAIL 0x0040 /* always requeue job on prolog failure */ 2810 2811 #define CTL_CONF_OR SLURM_BIT(0) /*SlurmdParameters=config_overrides*/ 2812 #define CTL_CONF_SJC SLURM_BIT(1) /* AccountingStoreFlags=job_comment*/ 2813 #define CTL_CONF_DRJ SLURM_BIT(2) /* DisableRootJobs */ 2814 #define CTL_CONF_ASRU SLURM_BIT(3) /* AllowSpecResourcesUsage */ 2815 #define CTL_CONF_PAM SLURM_BIT(4) /* UsePam */ 2816 #define CTL_CONF_WCKEY SLURM_BIT(5) /* TrackWCKey */ 2817 #define CTL_CONF_IPV4_ENABLED SLURM_BIT(6) /* IPv4 is enabled */ 2818 #define CTL_CONF_IPV6_ENABLED SLURM_BIT(7) /* IPv6 is enabled */ 2819 #define CTL_CONF_SJX SLURM_BIT(8) /* AccountingStoreFlags=job_extra */ 2820 #define CTL_CONF_SJS SLURM_BIT(9) /* AccountingStoreFlags=job_script */ 2821 #define CTL_CONF_SJE SLURM_BIT(10) /* AccountingStoreFlags=job_env */ 2822 2823 #define LOG_FMT_ISO8601_MS 0 2824 #define LOG_FMT_ISO8601 1 2825 #define LOG_FMT_RFC5424_MS 2 2826 #define LOG_FMT_RFC5424 3 2827 #define LOG_FMT_CLOCK 4 2828 #define LOG_FMT_SHORT 5 2829 #define LOG_FMT_THREAD_ID 6 2830 #define LOG_FMT_RFC3339 7 2831 #define LOG_FMT_FORMAT_STDERR SLURM_BIT(15) 2832 2833 /* 2834 * If adding to slurm_conf_t contents that need to be used in the slurmstepd 2835 * please remember to add those to [un]pack_slurm_conf_lite() in 2836 * src/slurmd/common/slurmstepd_init.[h|c] 2837 */ 2838 typedef struct { 2839 time_t last_update; /* last update time of the build parameters */ 2840 char *accounting_storage_tres; /* list of tres */ 2841 uint16_t accounting_storage_enforce; /* job requires valid association: 2842 * user/account/partition/cluster */ 2843 char *accounting_storage_backup_host; /* accounting storage 2844 * backup host */ 2845 char *accounting_storage_ext_host; /* accounting storage ext host */ 2846 char *accounting_storage_host; /* accounting storage host */ 2847 char *accounting_storage_params; /* accounting storage params */ 2848 char *accounting_storage_pass; /* accounting storage 2849 * password */ 2850 uint16_t accounting_storage_port;/* node accounting storage port */ 2851 char *accounting_storage_type; /* accounting storage type */ 2852 char *accounting_storage_user; /* accounting storage user */ 2853 void *acct_gather_conf; /* account gather config */ 2854 char *acct_gather_energy_type; /* energy accounting type */ 2855 char *acct_gather_profile_type; /* profile accounting type */ 2856 char *acct_gather_interconnect_type; /* interconnect accounting type */ 2857 char *acct_gather_filesystem_type; /* filesystem accounting type */ 2858 uint16_t acct_gather_node_freq; /* secs between node acct request */ 2859 char *authalttypes; /* alternate authentication types */ 2860 char *authinfo; /* authentication info */ 2861 char *authalt_params; /* alternate authentication parameters */ 2862 char *authtype; /* authentication type */ 2863 uint16_t batch_start_timeout; /* max secs for batch job to start */ 2864 char *bb_type; /* burst buffer plugin type */ 2865 char *bcast_exclude; /* Bcast exclude library paths */ 2866 char *bcast_parameters; /* bcast options */ 2867 time_t boot_time; /* time slurmctld last booted */ 2868 void *cgroup_conf; /* cgroup support config file */ 2869 char *cli_filter_plugins; /* List of cli_filter plugins to use */ 2870 char *core_spec_plugin; /* core specialization plugin name */ 2871 char *cluster_name; /* general name of the entire cluster */ 2872 char *comm_params; /* Communication parameters */ 2873 uint16_t complete_wait; /* seconds to wait for job completion before 2874 * scheduling another job */ 2875 uint32_t conf_flags; /* various CTL_CONF_* flags to determine 2876 * settings */ 2877 char **control_addr; /* comm path of slurmctld 2878 * primary server and backups */ 2879 uint32_t control_cnt; /* Length of control_addr & control_machine */ 2880 char **control_machine; /* name of slurmctld primary 2881 * server and backups */ 2882 uint32_t cpu_freq_def; /* default cpu frequency / governor */ 2883 uint32_t cpu_freq_govs; /* cpu freq governors allowed */ 2884 char *cred_type; /* credential signature plugin */ 2885 uint64_t debug_flags; /* see DEBUG_FLAG_* above for values */ 2886 uint64_t def_mem_per_cpu; /* default MB memory per allocated CPU */ 2887 char *dependency_params; /* DependencyParameters */ 2888 uint16_t eio_timeout; /* timeout for the eio thread */ 2889 uint16_t enforce_part_limits; /* if set, reject job exceeding 2890 * partition size and/or time limits */ 2891 char *epilog; /* pathname of job epilog */ 2892 uint32_t epilog_msg_time; /* usecs for slurmctld to process an 2893 * epilog complete message */ 2894 char *epilog_slurmctld; /* pathname of job epilog run by slurmctld */ 2895 char *ext_sensors_type; /* external sensors plugin type */ 2896 uint16_t ext_sensors_freq; /* secs between ext sensors sampling */ 2897 void *ext_sensors_conf; /* external sensors config file*/ 2898 char *fed_params; /* Federation parameters */ 2899 uint32_t first_job_id; /* first slurm generated job_id to assign */ 2900 uint16_t fs_dampening_factor; /* dampening for Fairshare factor */ 2901 uint16_t getnameinfo_cache_timeout; /* for getnameinfo() cache*/ 2902 uint16_t get_env_timeout; /* timeout for srun --get-user-env option */ 2903 char * gres_plugins; /* list of generic resource plugins */ 2904 uint16_t group_time; /* update group time interval */ 2905 uint16_t group_force; /* update group/partition info even if no change 2906 * detected */ 2907 char *gpu_freq_def; /* default GPU frequency / voltage */ 2908 uint32_t hash_val; /* Hash value of the slurm.conf file */ 2909 uint16_t health_check_interval; /* secs between health checks */ 2910 uint16_t health_check_node_state; /* Node states on which to execute 2911 * health check program, see 2912 * HEALTH_CHECK_NODE_* above */ 2913 char * health_check_program; /* pathname of health check program */ 2914 uint16_t inactive_limit;/* seconds of inactivity before a 2915 * inactive resource allocation is released */ 2916 char *interactive_step_opts; /* InteractiveStepOptions */ 2917 char *job_acct_gather_freq; /* poll frequency for job accounting 2918 * gather plugins */ 2919 char *job_acct_gather_type; /* job accounting gather type */ 2920 char *job_acct_gather_params; /* job accounting gather parameters */ 2921 uint16_t job_acct_oom_kill; /* Enforce mem limit at runtime y|n */ 2922 char *job_comp_host; /* job completion logging host */ 2923 char *job_comp_loc; /* job completion logging location */ 2924 char *job_comp_params; /* job completion parameters for plugin */ 2925 char *job_comp_pass; /* job completion storage password */ 2926 uint32_t job_comp_port; /* job completion storage port */ 2927 char *job_comp_type; /* job completion storage type */ 2928 char *job_comp_user; /* job completion storage user */ 2929 char *job_container_plugin; /* job container plugin type */ 2930 list_t *job_defaults_list; /* list of job_defaults_t elements */ 2931 uint16_t job_file_append; /* if set, append to stdout/err file */ 2932 uint16_t job_requeue; /* If set, jobs get requeued on node failre */ 2933 char *job_submit_plugins; /* List of job_submit plugins to use */ 2934 uint32_t keepalive_interval; /* Interval between keepalive probes */ 2935 uint32_t keepalive_probes; /* Number of keepalive probe attempts */ 2936 uint32_t keepalive_time; /* Keep alive time for srun I/O sockets */ 2937 uint16_t kill_on_bad_exit; /* If set, the job will be 2938 * terminated immediately when one of 2939 * the processes is aborted or crashed */ 2940 uint16_t kill_wait; /* seconds between SIGXCPU to SIGKILL 2941 * on job termination */ 2942 char *launch_params; /* step launcher plugin options */ 2943 char *licenses; /* licenses available on this cluster */ 2944 uint16_t log_fmt; /* Log file timestamp format */ 2945 char *mail_domain; /* default domain to append to usernames */ 2946 char *mail_prog; /* pathname of mail program */ 2947 uint32_t max_array_sz; /* Maximum job array size */ 2948 uint32_t max_batch_requeue; /* maximum number of requeues */ 2949 uint32_t max_dbd_msgs; /* maximum number of messages queued while DBD 2950 * is not connected */ 2951 uint32_t max_job_cnt; /* maximum number of active jobs */ 2952 uint32_t max_job_id; /* maximum job id before using first_job_id */ 2953 uint64_t max_mem_per_cpu; /* maximum MB memory per allocated CPU */ 2954 uint32_t max_node_cnt; /* max number of static + dynamic nodes */ 2955 uint32_t max_step_cnt; /* maximum number of steps per job */ 2956 uint16_t max_tasks_per_node; /* maximum tasks per node */ 2957 char *mcs_plugin; /* mcs plugin type */ 2958 char *mcs_plugin_params; /* mcs plugin parameters */ 2959 uint32_t min_job_age; /* COMPLETED jobs over this age (secs) 2960 * purged from in memory records */ 2961 void *mpi_conf; /* MPI support config file */ 2962 char *mpi_default; /* Default version of MPI in use */ 2963 char *mpi_params; /* MPI parameters */ 2964 uint16_t msg_timeout; /* message timeout */ 2965 uint32_t next_job_id; /* next slurm generated job_id to assign */ 2966 void *node_features_conf; /* Node Features Plugin config file */ 2967 char *node_features_plugins; /* List of node_features plugins to use */ 2968 char *node_prefix; /* prefix of nodes in partition, only set in 2969 bluegene clusters NULL otherwise */ 2970 uint16_t over_time_limit; /* job's time limit can be exceeded by this 2971 * number of minutes before cancellation */ 2972 char *plugindir; /* pathname to plugins */ 2973 char *plugstack; /* pathname to plugin stack config file */ 2974 char *power_parameters; /* power management parameters */ 2975 char *power_plugin; /* power management plugin type */ 2976 uint32_t preempt_exempt_time; /* Time before jobs are preemptable */ 2977 uint16_t preempt_mode; /* See PREEMPT_MODE_* in slurm/slurm.h */ 2978 char *preempt_params; /* PreemptParameters to tune preemption */ 2979 char *preempt_type; /* job preemption selection plugin */ 2980 char *prep_params; /* PrEp parameters */ 2981 char *prep_plugins; /* PrEp plugins */ 2982 uint32_t priority_decay_hl; /* priority decay half life in 2983 * seconds */ 2984 uint32_t priority_calc_period; /* seconds between priority decay 2985 * calculation */ 2986 uint16_t priority_favor_small; /* favor small jobs over large */ 2987 uint16_t priority_flags; /* set some flags for priority configuration, 2988 * see PRIORITY_FLAGS_* above */ 2989 uint32_t priority_max_age; /* time when not to add any more 2990 * priority to a job if reached */ 2991 char *priority_params; /* priority plugin parameters */ 2992 uint16_t priority_reset_period; /* when to clear usage, 2993 * see PRIORITY_RESET_* */ 2994 char *priority_type; /* priority type plugin */ 2995 uint32_t priority_weight_age; /* weight for age factor */ 2996 uint32_t priority_weight_assoc; /* weight for assoc factor */ 2997 uint32_t priority_weight_fs; /* weight for Fairshare factor */ 2998 uint32_t priority_weight_js; /* weight for Job Size factor */ 2999 uint32_t priority_weight_part; /* weight for Partition factor */ 3000 uint32_t priority_weight_qos; /* weight for QOS factor */ 3001 char *priority_weight_tres; /* weights (str) for different TRES' */ 3002 uint16_t private_data; /* block viewing of information, 3003 * see PRIVATE_DATA_* */ 3004 char *proctrack_type; /* process tracking plugin type */ 3005 char *prolog; /* pathname of job prolog run by slurmd */ 3006 uint16_t prolog_epilog_timeout; /* prolog/epilog timeout */ 3007 char *prolog_slurmctld; /* pathname of job prolog run by slurmctld */ 3008 uint16_t propagate_prio_process; /* process priority propagation, 3009 * see PROP_PRIO_* */ 3010 uint16_t prolog_flags; /* set some flags for prolog configuration 3011 see PROLOG_FLAG_* */ 3012 char *propagate_rlimits;/* Propagate (all/specific) resource limits */ 3013 char *propagate_rlimits_except;/* Propagate all rlimits except these */ 3014 char *reboot_program; /* program to reboot the node */ 3015 uint16_t reconfig_flags;/* see RECONFIG_* */ 3016 char *requeue_exit; /* requeue exit values */ 3017 char *requeue_exit_hold; /* requeue exit hold values */ 3018 char *resume_fail_program; /* program to handle failed resume tries */ 3019 char *resume_program; /* program to make nodes full power */ 3020 uint16_t resume_rate; /* nodes to make full power, per minute */ 3021 uint16_t resume_timeout;/* time required in order to perform a node 3022 * resume operation */ 3023 char *resv_epilog; /* path of reservation epilog run by slurmctld */ 3024 uint16_t resv_over_run; /* how long a running job can exceed 3025 * reservation time */ 3026 char *resv_prolog; /* path of reservation prolog run by slurmctld */ 3027 uint16_t ret2service; /* 1 return DOWN node to service at 3028 * registration */ 3029 char *sched_logfile; /* where slurm Scheduler log gets written */ 3030 uint16_t sched_log_level; /* configured level of slurm Scheduler log */ 3031 char *sched_params; /* SchedulerParameters OR 3032 * contents of scheduler plugin config file */ 3033 uint16_t sched_time_slice; /* gang scheduler slice time, secs */ 3034 char *schedtype; /* type of scheduler to use */ 3035 char *scron_params; /* ScronParameters */ 3036 char *select_type; /* type of node selector to use */ 3037 void *select_conf_key_pairs; /* key-pair list which can be 3038 * listed with slurm_print_key_pairs() */ 3039 uint16_t select_type_param; /* Parameters 3040 * describing the select_type plugin */ 3041 char *site_factor_plugin; /* PrioritySiteFactorPlugin */ 3042 char *site_factor_params; /* PrioritySiteFactorParameters */ 3043 char *slurm_conf; /* pathname of slurm config file */ 3044 uint32_t slurm_user_id; /* uid of slurm_user_name */ 3045 char *slurm_user_name; /* user that slurmctld runs as */ 3046 uint32_t slurmd_user_id;/* uid of slurmd_user_name */ 3047 char *slurmd_user_name; /* user that slurmd runs as */ 3048 char *slurmctld_addr; /* Address used for communications to the 3049 * currently active slurmctld daemon */ 3050 uint16_t slurmctld_debug; /* slurmctld logging level */ 3051 char *slurmctld_logfile;/* where slurmctld error log gets written */ 3052 char *slurmctld_pidfile;/* where to put slurmctld pidfile */ 3053 uint32_t slurmctld_port; /* default communications port to slurmctld */ 3054 uint16_t slurmctld_port_count; /* number of slurmctld comm ports */ 3055 char *slurmctld_primary_off_prog; /* Run when becomes slurmctld backup */ 3056 char *slurmctld_primary_on_prog; /* Run when becomes slurmctld primary */ 3057 uint16_t slurmctld_syslog_debug; /* slurmctld output to 3058 * local logfile and syslog*/ 3059 uint16_t slurmctld_timeout;/* seconds that backup controller waits 3060 * on non-responding primarly controller */ 3061 char *slurmctld_params; /* SlurmctldParameters */ 3062 uint16_t slurmd_debug; /* slurmd logging level */ 3063 char *slurmd_logfile; /* where slurmd error log gets written */ 3064 char *slurmd_params; /* SlurmdParameters */ 3065 char *slurmd_pidfile; /* where to put slurmd pidfile */ 3066 uint32_t slurmd_port; /* default communications port to slurmd */ 3067 char *slurmd_spooldir; /* where slurmd put temporary state info */ 3068 uint16_t slurmd_syslog_debug; /* slurmd output to 3069 * local logfile and syslog*/ 3070 uint16_t slurmd_timeout;/* how long slurmctld waits for slurmd before 3071 * considering node DOWN */ 3072 char *srun_epilog; /* srun epilog program */ 3073 uint16_t *srun_port_range; /* port range for srun */ 3074 char *srun_prolog; /* srun prolog program */ 3075 char *state_save_location;/* pathname of slurmctld state save 3076 * directory */ 3077 char *suspend_exc_nodes;/* nodes to not make power saving */ 3078 char *suspend_exc_parts;/* partitions to not make power saving */ 3079 char *suspend_exc_states; /* states that should not be powered down */ 3080 char *suspend_program; /* program to make nodes power saving */ 3081 uint16_t suspend_rate; /* nodes to make power saving, per minute */ 3082 uint32_t suspend_time; /* node idle for this long before power save mode */ 3083 uint16_t suspend_timeout;/* time required in order to perform a node 3084 * suspend operation */ 3085 char *switch_type; /* switch or interconnect type */ 3086 char *switch_param; /* SwitchParameters */ 3087 char *task_epilog; /* pathname of task launch epilog */ 3088 char *task_plugin; /* task launch plugin */ 3089 uint32_t task_plugin_param; /* see CPU_BIND_* */ 3090 char *task_prolog; /* pathname of task launch prolog */ 3091 uint16_t tcp_timeout; /* tcp timeout */ 3092 char *tmp_fs; /* pathname of temporary file system */ 3093 char *topology_param; /* network topology parameters */ 3094 char *topology_plugin; /* network topology plugin */ 3095 uint16_t tree_width; /* number of threads per node to span */ 3096 char *unkillable_program; /* program run by the slurmstepd when 3097 * processes in a job step are unkillable */ 3098 uint16_t unkillable_timeout; /* time in seconds, after processes in a 3099 * job step have been signaled, before 3100 * they are considered "unkillable". */ 3101 char *version; /* version of slurmctld */ 3102 uint16_t vsize_factor; /* virtual memory limit size factor */ 3103 uint16_t wait_time; /* default job --wait time */ 3104 char *x11_params; /* X11Parameters */ 3105 } slurm_conf_t; 3106 3107 typedef struct slurmd_status_msg { 3108 time_t booted; /* when daemon was started */ 3109 time_t last_slurmctld_msg; /* time of last slurmctld message */ 3110 uint16_t slurmd_debug; /* logging level */ 3111 uint16_t actual_cpus; /* actual logical processor count */ 3112 uint16_t actual_boards; /* actual total boards count */ 3113 uint16_t actual_sockets; /* actual total sockets count */ 3114 uint16_t actual_cores; /* actual core per socket count */ 3115 uint16_t actual_threads; /* actual thread per core count */ 3116 uint64_t actual_real_mem; /* actual real memory in MB */ 3117 uint32_t actual_tmp_disk; /* actual temp disk space in MB */ 3118 uint32_t pid; /* process ID */ 3119 char *hostname; /* local hostname */ 3120 char *slurmd_logfile; /* slurmd log file location */ 3121 char *step_list; /* list of active job steps */ 3122 char *version; /* version running */ 3123 } slurmd_status_t; 3124 3125 typedef struct submit_response_msg { 3126 uint32_t job_id; /* job ID */ 3127 uint32_t step_id; /* step ID */ 3128 uint32_t error_code; /* error code for warning message */ 3129 char *job_submit_user_msg; /* job submit plugin user_msg */ 3130 } submit_response_msg_t; 3131 3132 /* NOTE: If setting node_addr and/or node_hostname then comma separate names 3133 * and include an equal number of node_names */ 3134 typedef struct slurm_update_node_msg { 3135 char *comment; /* arbitrary comment */ 3136 uint32_t cpu_bind; /* default CPU binding type */ 3137 char *extra; /* arbitrary string */ 3138 char *features; /* new available feature for node */ 3139 char *features_act; /* new active feature for node */ 3140 char *gres; /* new generic resources for node */ 3141 char *instance_id; /* cloud instance id */ 3142 char *instance_type; /* cloud instance type */ 3143 char *node_addr; /* communication name (optional) */ 3144 char *node_hostname; /* node's hostname (optional) */ 3145 char *node_names; /* nodelist expression */ 3146 uint32_t node_state; /* see enum node_states */ 3147 char *reason; /* reason for node being DOWN or DRAINING */ 3148 uint32_t reason_uid; /* user ID of sending (needed if user 3149 * root is sending message) */ 3150 uint32_t resume_after; /* automatically resume DOWN or DRAINED node 3151 * after this amount of seconds */ 3152 uint32_t weight; /* new weight for node */ 3153 } update_node_msg_t; 3154 3155 typedef struct slurm_update_front_end_msg { 3156 char *name; /* comma separated list of front end nodes */ 3157 uint32_t node_state; /* see enum node_states */ 3158 char *reason; /* reason for node being DOWN or DRAINING */ 3159 uint32_t reason_uid; /* user ID of sending (needed if user 3160 * root is sending message) */ 3161 } update_front_end_msg_t; 3162 3163 typedef struct partition_info update_part_msg_t; 3164 3165 typedef struct job_sbcast_cred_msg { 3166 uint32_t job_id; /* assigned job id */ 3167 char *node_list; /* assigned list of nodes */ 3168 void *sbcast_cred; /* opaque data structure */ 3169 } job_sbcast_cred_msg_t; 3170 3171 typedef struct { 3172 uint32_t lifespan; 3173 char *username; 3174 } token_request_msg_t; 3175 3176 typedef struct { 3177 char *token; 3178 } token_response_msg_t; 3179 3180 /* Opaque data type for slurm_step_ctx_* functions */ 3181 typedef struct slurm_step_ctx_struct slurm_step_ctx_t; 3182 3183 #define STAT_COMMAND_RESET 0x0000 3184 #define STAT_COMMAND_GET 0x0001 3185 typedef struct stats_info_request_msg { 3186 uint16_t command_id; 3187 } stats_info_request_msg_t; 3188 3189 typedef struct stats_info_response_msg { 3190 uint32_t parts_packed; 3191 time_t req_time; 3192 time_t req_time_start; 3193 uint32_t server_thread_count; 3194 uint32_t agent_queue_size; 3195 uint32_t agent_count; 3196 uint32_t agent_thread_count; 3197 uint32_t dbd_agent_queue_size; 3198 uint32_t gettimeofday_latency; 3199 3200 uint32_t schedule_cycle_max; 3201 uint32_t schedule_cycle_last; 3202 uint32_t schedule_cycle_sum; 3203 uint32_t schedule_cycle_counter; 3204 uint32_t schedule_cycle_depth; 3205 uint32_t *schedule_exit; 3206 uint32_t schedule_exit_cnt; 3207 uint32_t schedule_queue_len; 3208 3209 uint32_t jobs_submitted; 3210 uint32_t jobs_started; 3211 uint32_t jobs_completed; 3212 uint32_t jobs_canceled; 3213 uint32_t jobs_failed; 3214 3215 uint32_t jobs_pending; 3216 uint32_t jobs_running; 3217 time_t job_states_ts; 3218 3219 uint32_t bf_backfilled_jobs; 3220 uint32_t bf_last_backfilled_jobs; 3221 uint32_t bf_backfilled_het_jobs; 3222 uint32_t bf_cycle_counter; 3223 uint64_t bf_cycle_sum; 3224 uint32_t bf_cycle_last; 3225 uint32_t bf_cycle_max; 3226 uint32_t *bf_exit; 3227 uint32_t bf_exit_cnt; 3228 uint32_t bf_last_depth; 3229 uint32_t bf_last_depth_try; 3230 uint32_t bf_depth_sum; 3231 uint32_t bf_depth_try_sum; 3232 uint32_t bf_queue_len; 3233 uint32_t bf_queue_len_sum; 3234 uint32_t bf_table_size; 3235 uint32_t bf_table_size_sum; 3236 time_t bf_when_last_cycle; 3237 uint32_t bf_active; 3238 3239 uint32_t rpc_type_size; 3240 uint16_t *rpc_type_id; 3241 uint32_t *rpc_type_cnt; 3242 uint64_t *rpc_type_time; 3243 3244 uint32_t rpc_user_size; 3245 uint32_t *rpc_user_id; 3246 uint32_t *rpc_user_cnt; 3247 uint64_t *rpc_user_time; 3248 3249 uint32_t rpc_queue_type_count; 3250 uint32_t *rpc_queue_type_id; 3251 uint32_t *rpc_queue_count; 3252 3253 uint32_t rpc_dump_count; 3254 uint32_t *rpc_dump_types; 3255 char **rpc_dump_hostlist; 3256 } stats_info_response_msg_t; 3257 3258 #define TRIGGER_FLAG_PERM 0x0001 3259 3260 #define TRIGGER_RES_TYPE_JOB 0x0001 3261 #define TRIGGER_RES_TYPE_NODE 0x0002 3262 #define TRIGGER_RES_TYPE_SLURMCTLD 0x0003 3263 #define TRIGGER_RES_TYPE_SLURMDBD 0x0004 3264 #define TRIGGER_RES_TYPE_DATABASE 0x0005 3265 #define TRIGGER_RES_TYPE_FRONT_END 0x0006 3266 #define TRIGGER_RES_TYPE_OTHER 0x0007 3267 3268 #define TRIGGER_TYPE_UP SLURM_BIT(0) 3269 #define TRIGGER_TYPE_DOWN SLURM_BIT(1) 3270 #define TRIGGER_TYPE_FAIL SLURM_BIT(2) 3271 #define TRIGGER_TYPE_TIME SLURM_BIT(3) 3272 #define TRIGGER_TYPE_FINI SLURM_BIT(4) 3273 #define TRIGGER_TYPE_RECONFIG SLURM_BIT(5) 3274 /* SLURM_BIT(6), UNUSED */ 3275 #define TRIGGER_TYPE_IDLE SLURM_BIT(7) 3276 #define TRIGGER_TYPE_DRAINED SLURM_BIT(8) 3277 #define TRIGGER_TYPE_PRI_CTLD_FAIL SLURM_BIT(9) 3278 #define TRIGGER_TYPE_PRI_CTLD_RES_OP SLURM_BIT(10) 3279 #define TRIGGER_TYPE_PRI_CTLD_RES_CTRL SLURM_BIT(11) 3280 #define TRIGGER_TYPE_PRI_CTLD_ACCT_FULL SLURM_BIT(12) 3281 #define TRIGGER_TYPE_BU_CTLD_FAIL SLURM_BIT(13) 3282 #define TRIGGER_TYPE_BU_CTLD_RES_OP SLURM_BIT(14) 3283 #define TRIGGER_TYPE_BU_CTLD_AS_CTRL SLURM_BIT(15) 3284 #define TRIGGER_TYPE_PRI_DBD_FAIL SLURM_BIT(16) 3285 #define TRIGGER_TYPE_PRI_DBD_RES_OP SLURM_BIT(17) 3286 #define TRIGGER_TYPE_PRI_DB_FAIL SLURM_BIT(18) 3287 #define TRIGGER_TYPE_PRI_DB_RES_OP SLURM_BIT(19) 3288 #define TRIGGER_TYPE_BURST_BUFFER SLURM_BIT(20) 3289 #define TRIGGER_TYPE_DRAINING SLURM_BIT(21) 3290 #define TRIGGER_TYPE_RESUME SLURM_BIT(22) 3291 3292 3293 typedef struct trigger_info { 3294 uint16_t flags; /* TRIGGER_FLAG_* */ 3295 uint32_t trig_id; /* trigger ID */ 3296 uint16_t res_type; /* TRIGGER_RES_TYPE_* */ 3297 char * res_id; /* resource ID */ 3298 uint32_t control_inx; /* controller index */ 3299 uint32_t trig_type; /* TRIGGER_TYPE_* */ 3300 uint16_t offset; /* seconds from trigger, 0x8000 origin */ 3301 uint32_t user_id; /* user requesting trigger */ 3302 char * program; /* program to execute */ 3303 } trigger_info_t; 3304 3305 typedef struct trigger_info_msg { 3306 uint32_t record_count; /* number of records */ 3307 trigger_info_t *trigger_array; /* the trigger records */ 3308 } trigger_info_msg_t; 3309 3310 3311 /* Individual license information 3312 */ 3313 typedef struct slurm_license_info { 3314 char *name; /* license name */ 3315 uint32_t total; /* total number of available licenses */ 3316 uint32_t in_use; /* number of license in use */ 3317 uint32_t available; /* number of available license */ 3318 uint8_t remote; /* non-zero if remote license (not 3319 * defined in slurm.conf) */ 3320 uint32_t reserved; /* number of licenses reserved */ 3321 uint32_t last_consumed; /* number of licenses last known to be 3322 consumed in the license manager 3323 (for remote) */ 3324 uint32_t last_deficit; 3325 time_t last_update; /* last updated (for remote) */ 3326 } slurm_license_info_t; 3327 3328 /* License information array as returned by the controller. 3329 */ 3330 typedef struct license_info_msg { 3331 time_t last_update; 3332 uint32_t num_lic; 3333 slurm_license_info_t *lic_array; 3334 } license_info_msg_t; 3335 3336 typedef struct { 3337 uint32_t job_array_count; 3338 char **job_array_id; /* Note: The string may be truncated */ 3339 uint32_t *error_code; 3340 char **err_msg; 3341 } job_array_resp_msg_t; 3342 3343 /* Association manager state running in the slurmctld */ 3344 typedef struct { 3345 list_t *assoc_list; /* list of slurmdb_assoc_rec_t with usage packed */ 3346 list_t *qos_list; /* list of slurmdb_qos_rec_t with usage packed */ 3347 uint32_t tres_cnt; 3348 char **tres_names; 3349 list_t *user_list; /* list of slurmdb_user_rec_t */ 3350 } assoc_mgr_info_msg_t; 3351 3352 #define ASSOC_MGR_INFO_FLAG_ASSOC 0x00000001 3353 #define ASSOC_MGR_INFO_FLAG_USERS 0x00000002 3354 #define ASSOC_MGR_INFO_FLAG_QOS 0x00000004 3355 3356 typedef struct { 3357 list_t *acct_list; /* char * list of account names */ 3358 uint32_t flags; /* flags determining what is returned */ 3359 list_t *qos_list; /* char * list of qos names */ 3360 list_t *user_list; /* char * list of user names */ 3361 } assoc_mgr_info_request_msg_t; 3362 3363 typedef struct network_callerid_msg { 3364 unsigned char ip_src[16]; 3365 unsigned char ip_dst[16]; 3366 uint32_t port_src; 3367 uint32_t port_dst; 3368 int32_t af; /* NOTE: un/packed as uint32_t */ 3369 } network_callerid_msg_t; 3370 3371 /*****************************************************************************\ 3372 * RESOURCE ALLOCATION FUNCTIONS 3373 \*****************************************************************************/ 3374 3375 /* 3376 * slurm_init_job_desc_msg - initialize job descriptor with 3377 * default values 3378 * OUT job_desc_msg - user defined job descriptor 3379 */ 3380 extern void slurm_init_job_desc_msg(job_desc_msg_t *job_desc_msg); 3381 3382 /* 3383 * slurm_allocate_resources - allocate resources for a job request 3384 * If the requested resources are not immediately available, the slurmctld 3385 * will send the job_alloc_resp_msg to the specified node and port. 3386 * IN job_desc_msg - description of resource allocation request 3387 * OUT job_alloc_resp_msg - response to request. This only represents 3388 * a job allocation if resources are immediately. Otherwise it just contains 3389 * the job id of the enqueued job request. 3390 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 3391 * NOTE: free the response using slurm_free_resource_allocation_response_msg() 3392 */ 3393 extern int slurm_allocate_resources(job_desc_msg_t *job_desc_msg, 3394 resource_allocation_response_msg_t **job_alloc_resp_msg); 3395 3396 /* 3397 * slurm_allocate_resources_blocking 3398 * allocate resources for a job request. This call will block until 3399 * the allocation is granted, or the specified timeout limit is reached. 3400 * IN req - description of resource allocation request 3401 * IN timeout - amount of time, in seconds, to wait for a response before 3402 * giving up. 3403 * A timeout of zero will wait indefinitely. 3404 * IN pending_callback - If the allocation cannot be granted immediately, 3405 * the controller will put the job in the PENDING state. If 3406 * pending callback is not NULL, it will be called with the job_id 3407 * of the pending job as the sole parameter. 3408 * 3409 * RET allocation structure on success, NULL on error set errno to 3410 * indicate the error (errno will be ETIMEDOUT if the timeout is reached 3411 * with no allocation granted) 3412 * NOTE: free the response using slurm_free_resource_allocation_response_msg() 3413 */ 3414 extern resource_allocation_response_msg_t *slurm_allocate_resources_blocking( 3415 const job_desc_msg_t *user_req, 3416 time_t timeout, 3417 void (*pending_callback)(uint32_t job_id)); 3418 3419 /* 3420 * slurm_free_resource_allocation_response_msg - free slurm resource 3421 * allocation response message 3422 * IN msg - pointer to allocation response message 3423 * NOTE: buffer is loaded by slurm_allocate_resources 3424 */ 3425 extern void slurm_free_resource_allocation_response_msg(resource_allocation_response_msg_t *msg); 3426 3427 /* 3428 * slurm_allocate_het_job_blocking 3429 * allocate resources for a list of job requests. This call will block 3430 * until the entire allocation is granted, or the specified timeout limit 3431 * is reached. 3432 * IN job_req_list - list of resource allocation requests, type job_desc_msg_t 3433 * IN timeout - amount of time, in seconds, to wait for a response before 3434 * giving up. 3435 * A timeout of zero will wait indefinitely. 3436 * IN pending_callback - If the allocation cannot be granted immediately, 3437 * the controller will put the job in the PENDING state. If 3438 * pending callback is not NULL, it will be called with the job_id 3439 * of the pending job as the sole parameter. 3440 * 3441 * RET list of allocation structures on success, NULL on error set errno to 3442 * indicate the error (errno will be ETIMEDOUT if the timeout is reached 3443 * with no allocation granted) 3444 * NOTE: free the response using list_destroy() 3445 */ 3446 extern list_t *slurm_allocate_het_job_blocking( 3447 list_t *job_req_list, 3448 time_t timeout, 3449 void(*pending_callback)(uint32_t job_id)); 3450 3451 /* 3452 * slurm_allocation_lookup - retrieve info for an existing resource 3453 * allocation 3454 * IN job_id - job allocation identifier 3455 * OUT resp - job allocation information 3456 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 3457 * NOTE: free the response using slurm_free_resource_allocation_response_msg() 3458 */ 3459 extern int slurm_allocation_lookup(uint32_t job_id, 3460 resource_allocation_response_msg_t **resp); 3461 3462 /* 3463 * slurm_het_job_lookup - retrieve info for an existing heterogeneous job 3464 * allocation without the addrs and such 3465 * IN jobid - job allocation identifier 3466 * OUT resp - list of job allocation information, type 3467 * resource_allocation_response_msg_t 3468 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 3469 * NOTE: returns information an individual job as well 3470 * NOTE: free the response using list_destroy() 3471 */ 3472 extern int slurm_het_job_lookup(uint32_t jobid, list_t **resp); 3473 3474 /* 3475 * slurm_read_hostfile - Read a Slurm hostfile specified by "filename". 3476 * "filename" must contain a list of Slurm NodeNames, one per line. 3477 * Reads up to "n" number of hostnames from the file. Returns a 3478 * string representing a hostlist ranged string of the contents of 3479 * the file. This is a helper function, it does not contact any 3480 * Slurm daemons. 3481 * 3482 * IN filename - name of Slurm Hostlist file to be read. 3483 * IN n - number of NodeNames required 3484 * RET - a string representing the hostlist. Returns NULL if there are 3485 * fewer than "n" hostnames in the file, or if an error occurs. 3486 * 3487 * NOTE: Returned string must be freed with free(). 3488 */ 3489 extern char *slurm_read_hostfile(const char *filename, int n); 3490 3491 /* 3492 * slurm_allocation_msg_thr_create - startup a message handler talking 3493 * with the controller dealing with messages from the controller during an 3494 * allocation. 3495 * IN port - port we are listening for messages on from the controller 3496 * IN callbacks - callbacks for different types of messages 3497 * RET allocation_msg_thread_t * or NULL on failure 3498 */ 3499 extern allocation_msg_thread_t *slurm_allocation_msg_thr_create(uint16_t *port, 3500 const slurm_allocation_callbacks_t *callbacks); 3501 3502 /* 3503 * slurm_allocation_msg_thr_destroy - shutdown the message handler talking 3504 * with the controller dealing with messages from the controller during an 3505 * allocation. 3506 * IN msg_thr - allocation_msg_thread_t pointer allocated with 3507 * slurm_allocation_msg_thr_create 3508 */ 3509 extern void slurm_allocation_msg_thr_destroy(allocation_msg_thread_t *msg_thr); 3510 3511 /* 3512 * slurm_submit_batch_job - issue RPC to submit a job for later execution 3513 * NOTE: free the response using slurm_free_submit_response_response_msg 3514 * IN job_desc_msg - description of batch job request 3515 * OUT slurm_alloc_msg - response to request 3516 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 3517 */ 3518 extern int slurm_submit_batch_job(job_desc_msg_t *job_desc_msg, 3519 submit_response_msg_t **slurm_alloc_msg); 3520 3521 /* 3522 * slurm_submit_batch_het_job - issue RPC to submit a heterogeneous job for 3523 * later execution 3524 * NOTE: free the response using slurm_free_submit_response_response_msg 3525 * IN job_req_list - list of resource allocation requests, type job_desc_msg_t 3526 * OUT slurm_alloc_msg - response to request 3527 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 3528 */ 3529 extern int slurm_submit_batch_het_job(list_t *job_req_list, 3530 submit_response_msg_t **slurm_alloc_msg); 3531 3532 /* 3533 * slurm_free_submit_response_response_msg - free slurm 3534 * job submit response message 3535 * IN msg - pointer to job submit response message 3536 * NOTE: buffer is loaded by slurm_submit_batch_job 3537 */ 3538 extern void slurm_free_submit_response_response_msg(submit_response_msg_t *msg); 3539 3540 /* 3541 * slurm_job_batch_script - retrieve the batch script for a given jobid 3542 * returns SLURM_SUCCESS, or appropriate error code 3543 */ 3544 extern int slurm_job_batch_script(FILE *out, uint32_t jobid); 3545 3546 /* 3547 * slurm_job_will_run - determine if a job would execute immediately if 3548 * submitted now 3549 * IN job_desc_msg - description of resource allocation request 3550 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 3551 */ 3552 extern int slurm_job_will_run(job_desc_msg_t *job_desc_msg); 3553 3554 /* 3555 * slurm_het_job_will_run - determine if a heterogeneous job would execute 3556 * immediately if submitted now 3557 * IN job_req_list - list of job_desc_msg_t structures describing the resource 3558 * allocation request 3559 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 3560 */ 3561 extern int slurm_het_job_will_run(list_t *job_req_list); 3562 3563 3564 /* 3565 * slurm_job_will_run2 - determine if a job would execute immediately if 3566 * submitted now 3567 * IN job_desc_msg - description of resource allocation request 3568 * OUT will_run_resp - job run time data 3569 * free using slurm_free_will_run_response_msg() 3570 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 3571 */ 3572 extern int slurm_job_will_run2(job_desc_msg_t *req, 3573 will_run_response_msg_t **will_run_resp); 3574 3575 /* 3576 * slurm_sbcast_lookup - retrieve info for an existing resource allocation 3577 * including a credential needed for sbcast. 3578 * IN selected_step - filled in with step_id and het_job_offset 3579 * OUT info - job allocation information including a credential for sbcast 3580 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 3581 * NOTE: free the "resp" using slurm_free_sbcast_cred_msg 3582 */ 3583 extern int slurm_sbcast_lookup(slurm_selected_step_t *selected_step, 3584 job_sbcast_cred_msg_t **info); 3585 3586 extern void slurm_free_sbcast_cred_msg(job_sbcast_cred_msg_t *msg); 3587 3588 /* slurm_load_licenses() 3589 * 3590 * Retrieve license information from the controller. 3591 * IN feature - feature name or NULL 3592 * OUT 3593 * 3594 */ 3595 extern int slurm_load_licenses(time_t, license_info_msg_t **, uint16_t); 3596 extern void slurm_free_license_info_msg(license_info_msg_t *); 3597 3598 /* get the running assoc_mgr info 3599 * IN assoc_mgr_info_request_msg_t: request filtering data returned 3600 * OUT assoc_mgr_info_msg_t: returned structure filled in with 3601 * assoc_mgr lists, must be freed by slurm_free_assoc_mgr_info_msg 3602 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 3603 */ 3604 extern int slurm_load_assoc_mgr_info(assoc_mgr_info_request_msg_t *, 3605 assoc_mgr_info_msg_t **); 3606 extern void slurm_free_assoc_mgr_info_msg(assoc_mgr_info_msg_t *); 3607 extern void slurm_free_assoc_mgr_info_request_members(assoc_mgr_info_request_msg_t *); 3608 extern void slurm_free_assoc_mgr_info_request_msg(assoc_mgr_info_request_msg_t *); 3609 3610 3611 /*****************************************************************************\ 3612 * JOB/STEP SIGNALING FUNCTIONS 3613 \*****************************************************************************/ 3614 3615 typedef struct job_step_kill_msg { 3616 char *sjob_id; 3617 uint16_t signal; 3618 uint16_t flags; 3619 char *sibling; 3620 slurm_step_id_t step_id; 3621 } job_step_kill_msg_t; 3622 3623 /* 3624 * NOTE: See _signal_batch_job() controller and _rpc_signal_tasks() in slurmd. 3625 */ 3626 #define KILL_JOB_BATCH SLURM_BIT(0) /* signal batch shell only */ 3627 #define KILL_ARRAY_TASK SLURM_BIT(1) /* kill single task of a job array */ 3628 #define KILL_STEPS_ONLY SLURM_BIT(2) /* Do not signal batch script */ 3629 #define KILL_FULL_JOB SLURM_BIT(3) /* Signal all steps, including batch 3630 * script */ 3631 #define KILL_FED_REQUEUE SLURM_BIT(4) /* Mark job as requeued when requeued */ 3632 #define KILL_HURRY SLURM_BIT(5) /* Skip burst buffer stage out */ 3633 #define KILL_OOM SLURM_BIT(6) /* Kill due to Out-Of-Memory */ 3634 #define KILL_NO_SIBS SLURM_BIT(7) /* Don't kill other sibling jobs */ 3635 #define KILL_JOB_RESV SLURM_BIT(8) /* Job is willing to run on nodes in a 3636 * magnetic reservation. */ 3637 #define KILL_NO_CRON SLURM_BIT(9) /* request killing cron Jobs */ 3638 #define KILL_NO_SIG_FAIL SLURM_BIT(10) /* Don't fail job due to signal (steps only) */ 3639 3640 /* Use top bit of uint16_t in conjuction with KILL_* flags to indicate signal 3641 * has been sent to job previously. Does not need to be passed to slurmd. */ 3642 #define WARN_SENT SLURM_BIT(15) /* warn already sent, clear this on 3643 * requeue */ 3644 3645 /* 3646 * slurm_kill_job - send the specified signal to all steps of an existing job 3647 * IN job_id - the job's id 3648 * IN signal - signal number 3649 * IN flags - see KILL_JOB_* flags above 3650 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 3651 */ 3652 extern int slurm_kill_job(uint32_t job_id, uint16_t signal, uint16_t flags); 3653 3654 /* 3655 * slurm_kill_job_step - send the specified signal to an existing job step 3656 * IN job_id - the job's id 3657 * IN step_id - the job step's id 3658 * IN signal - signal number 3659 * IN flags - see KILL_* or 0 for no flags 3660 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 3661 */ 3662 extern int slurm_kill_job_step(uint32_t job_id, uint32_t step_id, 3663 uint16_t signal, uint16_t flags); 3664 /* 3665 * slurm_kill_job2 - send REQUEST_KILL_JOB msg to an existing job or step. 3666 * IN job_id - the job's id (in a string format) 3667 * IN signal - signal to send 3668 * IN flags - see KILL_* flags above (such as KILL_JOB_BATCH) 3669 * IN sibling - optional string of sibling cluster to send the message to. 3670 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 3671 */ 3672 extern int slurm_kill_job2(const char *job_id, uint16_t signal, uint16_t flags, 3673 const char *sibling); 3674 3675 /* 3676 * slurm_signal_job - send the specified signal to all steps of an existing job 3677 * IN job_id - the job's id 3678 * IN signal - signal number 3679 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 3680 */ 3681 extern int slurm_signal_job(uint32_t job_id, uint16_t signal); 3682 3683 /* 3684 * slurm_signal_job_step - send the specified signal to an existing job step 3685 * IN job_id - the job's id 3686 * IN step_id - the job step's id - use SLURM_BATCH_SCRIPT as the step_id 3687 * to send a signal to a job's batch script 3688 * IN signal - signal number 3689 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 3690 */ 3691 extern int slurm_signal_job_step(uint32_t job_id, 3692 uint32_t step_id, 3693 uint32_t signal); 3694 3695 3696 /*****************************************************************************\ 3697 * JOB/STEP COMPLETION FUNCTIONS 3698 \*****************************************************************************/ 3699 3700 /* 3701 * slurm_complete_job - note the completion of a job and all of its steps 3702 * IN job_id - the job's id 3703 * IN job_return_code - the highest exit code of any task of the job 3704 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 3705 */ 3706 extern int slurm_complete_job(uint32_t job_id, uint32_t job_return_code); 3707 3708 /* 3709 * slurm_terminate_job_step - terminates a job step by sending a 3710 * REQUEST_TERMINATE_TASKS rpc to all slurmd of a job step, and then 3711 * calls slurm_complete_job_step() after verifying that all 3712 * nodes in the job step no longer have running tasks from the job 3713 * step. (May take over 35 seconds to return.) 3714 * IN job_id - the job's id 3715 * IN step_id - the job step's id - use SLURM_BATCH_SCRIPT as the step_id 3716 * to terminate a job's batch script 3717 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 3718 */ 3719 extern int slurm_terminate_job_step(uint32_t job_id, uint32_t step_id); 3720 3721 /*****************************************************************************\ 3722 * SLURM TASK SPAWNING FUNCTIONS 3723 \*****************************************************************************/ 3724 3725 /* 3726 * slurm_step_launch_params_t_init - initialize a user-allocated 3727 * slurm_step_launch_params_t structure with default values. 3728 * default values. This function will NOT allocate any new memory. 3729 * IN ptr - pointer to a structure allocated by the use. The structure will 3730 * be initialized. 3731 */ 3732 extern void slurm_step_launch_params_t_init(slurm_step_launch_params_t *ptr); 3733 3734 /* 3735 * slurm_step_launch - launch a parallel job step 3736 * IN ctx - job step context generated by slurm_step_ctx_create 3737 * IN params - job step parameters 3738 * IN callbacks - Identify functions to be called when various events occur 3739 * IN het_job_step_cnt - Total count of hetjob steps to be launched, -1 otherwise 3740 * RET SLURM_SUCCESS or SLURM_ERROR (with errno set) 3741 */ 3742 extern int slurm_step_launch(slurm_step_ctx_t *ctx, 3743 const slurm_step_launch_params_t *params, 3744 const slurm_step_launch_callbacks_t *callbacks); 3745 3746 /* 3747 * slurm_step_launch_add - Add tasks to a step that was already started 3748 * IN ctx - job step context generated by slurm_step_ctx_create 3749 * IN first_ctx - job step context generated by slurm_step_ctx_create for 3750 * first component of the job step 3751 * IN params - job step parameters 3752 * IN node_list - list of extra nodes to add 3753 * IN start_nodeid - in the global scheme which node id is the first 3754 * node in node_list. 3755 * RET SLURM_SUCCESS or SLURM_ERROR (with errno set) 3756 */ 3757 extern int slurm_step_launch_add(slurm_step_ctx_t *ctx, 3758 slurm_step_ctx_t *first_ctx, 3759 const slurm_step_launch_params_t *params, 3760 char *node_list); 3761 3762 /* 3763 * Block until all tasks have started. 3764 */ 3765 extern int slurm_step_launch_wait_start(slurm_step_ctx_t *ctx); 3766 3767 /* 3768 * Block until all tasks have finished (or failed to start altogether). 3769 */ 3770 extern void slurm_step_launch_wait_finish(slurm_step_ctx_t *ctx); 3771 3772 /* 3773 * Abort an in-progress launch, or terminate the fully launched job step. 3774 * 3775 * Can be called from a signal handler. 3776 */ 3777 extern void slurm_step_launch_abort(slurm_step_ctx_t *ctx); 3778 3779 /* 3780 * Forward a signal to all those nodes with running tasks 3781 */ 3782 extern void slurm_step_launch_fwd_signal(slurm_step_ctx_t *ctx, int signo); 3783 3784 /* 3785 * Wake tasks stopped for debugging on nodes with running tasks 3786 */ 3787 extern void slurm_step_launch_fwd_wake(slurm_step_ctx_t *ctx); 3788 3789 /*****************************************************************************\ 3790 * SLURM CONTROL CONFIGURATION READ/PRINT/UPDATE FUNCTIONS 3791 \*****************************************************************************/ 3792 3793 /* 3794 * slurm_api_version - Return a single number reflecting the Slurm API's 3795 * version number. Use the macros SLURM_VERSION_NUM, SLURM_VERSION_MAJOR, 3796 * SLURM_VERSION_MINOR, and SLURM_VERSION_MICRO to work with this value 3797 * RET API's version number 3798 */ 3799 extern long slurm_api_version(void); 3800 3801 /* 3802 * slurm_load_ctl_conf - issue RPC to get slurm control configuration 3803 * information if changed since update_time 3804 * IN update_time - time of current configuration data 3805 * IN slurm_ctl_conf_ptr - place to store slurm control configuration 3806 * pointer 3807 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 3808 * NOTE: free the response using slurm_free_ctl_conf 3809 */ 3810 extern int slurm_load_ctl_conf(time_t update_time, 3811 slurm_conf_t **slurm_ctl_conf_ptr); 3812 3813 /* 3814 * slurm_free_ctl_conf - free slurm control information response message 3815 * IN msg - pointer to slurm control information response message 3816 * NOTE: buffer is loaded by slurm_load_ctl_conf 3817 */ 3818 extern void slurm_free_ctl_conf(slurm_conf_t *slurm_ctl_conf_ptr); 3819 3820 /* 3821 * slurm_print_ctl_conf - output the contents of slurm control configuration 3822 * message as loaded using slurm_load_ctl_conf 3823 * IN out - file to write to 3824 * IN slurm_ctl_conf_ptr - slurm control configuration pointer 3825 */ 3826 extern void slurm_print_ctl_conf(FILE *out, slurm_conf_t *slurm_ctl_conf_ptr); 3827 3828 /* 3829 * slurm_write_ctl_conf - write the contents of slurm control configuration 3830 * message as loaded using slurm_load_ctl_conf to a file 3831 * IN out - file to write to 3832 * IN slurm_ctl_conf_ptr - slurm control configuration pointer 3833 * IN node_info_ptr - pointer to node table of information 3834 * IN part_info_ptr - pointer to partition information 3835 */ 3836 extern void slurm_write_ctl_conf(slurm_conf_t *slurm_ctl_conf_ptr, 3837 node_info_msg_t *node_info_ptr, 3838 partition_info_msg_t *part_info_ptr); 3839 3840 /* 3841 * slurm_ctl_conf_2_key_pairs - put the slurm_conf_t variables into 3842 * a list_t of opaque data type config_key_pair_t 3843 * IN slurm_ctl_conf_ptr - slurm control configuration pointer 3844 * RET list of opaque data type config_key_pair_t 3845 */ 3846 extern void *slurm_ctl_conf_2_key_pairs(slurm_conf_t *slurm_ctl_conf_ptr); 3847 3848 /* 3849 * slurm_print_key_pairs - output the contents of key_pairs 3850 * which is a list of opaque data type config_key_pair_t 3851 * IN out - file to write to 3852 * IN key_pairs - list containing key pairs to be printed 3853 * IN title - title of key pair list 3854 */ 3855 extern void slurm_print_key_pairs(FILE *out, void *key_pairs, char *title); 3856 3857 /* 3858 * slurm_load_slurmd_status - issue RPC to get the status of slurmd 3859 * daemon on this machine 3860 * IN slurmd_status_ptr - place to store slurmd status information 3861 * RET 0 or -1 on error 3862 * NOTE: free the response using slurm_free_slurmd_status() 3863 */ 3864 extern int slurm_load_slurmd_status(slurmd_status_t **slurmd_status_ptr); 3865 3866 /* 3867 * slurm_free_slurmd_status - free slurmd state information 3868 * IN msg - pointer to slurmd state information 3869 * NOTE: buffer is loaded by slurm_load_slurmd_status 3870 */ 3871 extern void slurm_free_slurmd_status(slurmd_status_t* slurmd_status_ptr); 3872 3873 /* 3874 * slurm_print_slurmd_status - output the contents of slurmd status 3875 * message as loaded using slurm_load_slurmd_status 3876 * IN out - file to write to 3877 * IN slurmd_status_ptr - slurmd status pointer 3878 */ 3879 void slurm_print_slurmd_status(FILE *out, slurmd_status_t *slurmd_status_ptr); 3880 3881 /* 3882 * slurm_init_update_step_msg - initialize step update message with default 3883 * values before calling slurm_update_step() 3884 * OUT step_msg - step update messasge descriptor 3885 */ 3886 extern void slurm_init_update_step_msg(step_update_request_msg_t *step_msg); 3887 3888 /* Get scheduling statistics */ 3889 extern int slurm_get_statistics(stats_info_response_msg_t **buf, 3890 stats_info_request_msg_t *req); 3891 3892 /* Reset scheduling statistics */ 3893 extern int slurm_reset_statistics(stats_info_request_msg_t *req); 3894 3895 /*****************************************************************************\ 3896 * SLURM JOB RESOURCES READ/PRINT FUNCTIONS 3897 \*****************************************************************************/ 3898 3899 /* 3900 * slurm_job_cpus_allocated_on_node_id - 3901 * get the number of cpus allocated to a job 3902 * on a node by node id 3903 * IN job_resrcs_ptr - pointer to job_resources structure 3904 * IN node_id - zero-origin node id in allocation 3905 * RET number of CPUs allocated to job on this node or -1 on error 3906 */ 3907 extern int slurm_job_cpus_allocated_on_node_id(job_resources_t *job_resrcs_ptr, 3908 int node_id); 3909 3910 /* 3911 * slurm_job_cpus_allocated_on_node - 3912 * get the number of cpus allocated to a job 3913 * on a node by node name 3914 * IN job_resrcs_ptr - pointer to job_resources structure 3915 * IN node_name - name of node 3916 * RET number of CPUs allocated to job on this node or -1 on error 3917 */ 3918 extern int slurm_job_cpus_allocated_on_node(job_resources_t *job_resrcs_ptr, 3919 const char *node_name); 3920 3921 /* 3922 * slurm_job_cpus_allocated_str_on_node_id - 3923 * get the string representation of cpus allocated 3924 * to a job on a node by node id 3925 * IN cpus - str where the resulting cpu list is returned 3926 * IN cpus_len - max size of cpus str 3927 * IN job_resrcs_ptr - pointer to job_resources structure 3928 * IN node_id - zero-origin node id in allocation 3929 * RET 0 on success or -1 on error 3930 */ 3931 extern int slurm_job_cpus_allocated_str_on_node_id(char *cpus, 3932 size_t cpus_len, 3933 job_resources_t *job_resrcs_ptr, 3934 int node_id); 3935 3936 /* 3937 * slurm_job_cpus_allocated_str_on_node - 3938 * get the string representation of cpus allocated 3939 * to a job on a node by node name 3940 * IN cpus - str where the resulting cpu list is returned 3941 * IN cpus_len - max size of cpus str 3942 * IN job_resrcs_ptr - pointer to job_resources structure 3943 * IN node_name - name of node 3944 * RET 0 on success or -1 on error 3945 */ 3946 extern int slurm_job_cpus_allocated_str_on_node(char *cpus, 3947 size_t cpus_len, 3948 job_resources_t *job_resrcs_ptr, 3949 const char *node_name); 3950 3951 /*****************************************************************************\ 3952 * SLURM JOB CONTROL CONFIGURATION READ/PRINT/UPDATE FUNCTIONS 3953 \*****************************************************************************/ 3954 3955 /* 3956 * slurm_free_job_info_msg - free the job information response message 3957 * IN msg - pointer to job information response message 3958 * NOTE: buffer is loaded by slurm_load_jobs() 3959 */ 3960 extern void slurm_free_job_info_msg(job_info_msg_t *job_buffer_ptr); 3961 3962 /* 3963 * slurm_free_priority_factors_response_msg - free the job priority factor 3964 * information response message 3965 * IN msg - pointer to job priority information response message 3966 * NOTE: buffer is loaded by slurm_load_job_prio() 3967 */ 3968 extern void slurm_free_priority_factors_response_msg( 3969 priority_factors_response_msg_t *factors_resp); 3970 3971 /* 3972 * slurm_get_end_time - get the expected end time for a given slurm job 3973 * IN jobid - slurm job id 3974 * end_time_ptr - location in which to store scheduled end time for job 3975 * RET 0 or -1 on error 3976 */ 3977 extern int slurm_get_end_time(uint32_t jobid, time_t *end_time_ptr); 3978 3979 /* Given a job record pointer, return its stderr path */ 3980 extern void slurm_get_job_stderr(char *buf, int buf_size, job_info_t *job_ptr); 3981 3982 /* Given a job record pointer, return its stdin path */ 3983 extern void slurm_get_job_stdin(char *buf, int buf_size, job_info_t *job_ptr); 3984 3985 /* Given a job record pointer, return its stdout path */ 3986 extern void slurm_get_job_stdout(char *buf, int buf_size, job_info_t *job_ptr); 3987 3988 /* 3989 * slurm_get_rem_time - get the expected time remaining for a given job 3990 * IN jobid - slurm job id 3991 * RET remaining time in seconds or -1 on error 3992 */ 3993 extern long slurm_get_rem_time(uint32_t jobid); 3994 3995 /* 3996 * slurm_job_node_ready - report if nodes are ready for job to execute now 3997 * IN job_id - slurm job id 3998 * RET: READY_* values defined above 3999 */ 4000 extern int slurm_job_node_ready(uint32_t job_id); 4001 4002 /* 4003 * slurm_load_job - issue RPC to get job information for one job ID 4004 * IN job_info_msg_pptr - place to store a job configuration pointer 4005 * IN job_id - ID of job we want information about 4006 * IN show_flags - job filtering options 4007 * RET 0 or -1 on error 4008 * NOTE: free the response using slurm_free_job_info_msg 4009 */ 4010 extern int slurm_load_job(job_info_msg_t **resp, 4011 uint32_t job_id, 4012 uint16_t show_flags); 4013 4014 /* 4015 * slurm_load_job_prio - issue RPC to get job priority information for jobs 4016 * OUT factors_resp - job priority factors 4017 * IN show_flags - job filtering option: 0 or SHOW_LOCAL 4018 * RET 0 or -1 on error 4019 * NOTE: free the response using slurm_free_priority_factors_response_msg() 4020 */ 4021 extern int slurm_load_job_prio(priority_factors_response_msg_t **factors_resp, 4022 uint16_t show_flags); 4023 4024 /* 4025 * slurm_load_job_user - issue RPC to get slurm information about all jobs 4026 * to be run as the specified user 4027 * IN/OUT job_info_msg_pptr - place to store a job configuration pointer 4028 * IN user_id - ID of user we want information for 4029 * IN show_flags - job filtering options 4030 * RET 0 or -1 on error 4031 * NOTE: free the response using slurm_free_job_info_msg 4032 */ 4033 extern int slurm_load_job_user(job_info_msg_t **job_info_msg_pptr, 4034 uint32_t user_id, 4035 uint16_t show_flags); 4036 4037 /* 4038 * slurm_load_jobs - issue RPC to get slurm all job configuration 4039 * information if changed since update_time 4040 * IN update_time - time of current configuration data 4041 * IN/OUT job_info_msg_pptr - place to store a job configuration pointer 4042 * IN show_flags - job filtering options 4043 * RET 0 or -1 on error 4044 * NOTE: free the response using slurm_free_job_info_msg 4045 */ 4046 extern int slurm_load_jobs(time_t update_time, 4047 job_info_msg_t **job_info_msg_pptr, 4048 uint16_t show_flags); 4049 4050 /* 4051 * slurm_notify_job - send message to the job's stdout, 4052 * usable only by user root 4053 * IN job_id - slurm job_id or 0 for all jobs 4054 * IN message - arbitrary message 4055 * RET 0 or -1 on error 4056 */ 4057 extern int slurm_notify_job(uint32_t job_id, char *message); 4058 4059 /* 4060 * slurm_pid2jobid - issue RPC to get the slurm job_id given a process_id 4061 * on this machine 4062 * IN job_pid - process_id of interest on this machine 4063 * OUT job_id_ptr - place to store a slurm job_id 4064 * RET 0 or -1 on error 4065 */ 4066 extern int slurm_pid2jobid(pid_t job_pid, uint32_t *job_id_ptr); 4067 4068 /* 4069 * slurm_print_job_info - output information about a specific Slurm 4070 * job based upon message as loaded using slurm_load_jobs 4071 * IN out - file to write to 4072 * IN job_ptr - an individual job information record pointer 4073 * IN one_liner - print as a single line if true 4074 */ 4075 extern void slurm_print_job_info(FILE *out, 4076 slurm_job_info_t *job_ptr, 4077 int one_liner); 4078 4079 /* 4080 * slurm_print_job_info_msg - output information about all Slurm 4081 * jobs based upon message as loaded using slurm_load_jobs 4082 * IN out - file to write to 4083 * IN job_info_msg_ptr - job information message pointer 4084 * IN one_liner - print as a single line if true 4085 */ 4086 extern void slurm_print_job_info_msg(FILE *out, 4087 job_info_msg_t *job_info_msg_ptr, 4088 int one_liner); 4089 4090 /* 4091 * slurm_sprint_job_info - output information about a specific Slurm 4092 * job based upon message as loaded using slurm_load_jobs 4093 * IN job_ptr - an individual job information record pointer 4094 * IN one_liner - print as a single line if true 4095 * RET out - char * containing formatted output (must be freed after call) 4096 * NULL is returned on failure. 4097 */ 4098 extern char *slurm_sprint_job_info(slurm_job_info_t *job_ptr, 4099 int one_liner); 4100 4101 /* 4102 * slurm_update_job - issue RPC to a job's configuration per request, 4103 * only usable by user root or (for some parameters) the job's owner 4104 * IN job_msg - description of job updates 4105 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 4106 */ 4107 extern int slurm_update_job(job_desc_msg_t *job_msg); 4108 4109 /* 4110 * slurm_update_job2 - issue RPC to a job's configuration per request, 4111 * only usable by user root or (for some parameters) the job's owner 4112 * IN job_msg - description of job updates 4113 * OUT resp - per task response to the request, 4114 * free using slurm_free_job_array_resp() 4115 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 4116 */ 4117 extern int slurm_update_job2(job_desc_msg_t *job_msg, 4118 job_array_resp_msg_t **resp); 4119 4120 /* 4121 * slurm_xlate_job_id - Translate a Slurm job ID string into a slurm job ID 4122 * number. If this job ID contains an array index, map this to the 4123 * equivalent Slurm job ID number (e.g. "123_2" to 124) 4124 * 4125 * IN job_id_str - String containing a single job ID number 4126 * RET - equivalent job ID number or 0 on error 4127 */ 4128 extern uint32_t slurm_xlate_job_id(char *job_id_str); 4129 4130 4131 /*****************************************************************************\ 4132 * SLURM JOB STEP CONFIGURATION READ/PRINT/UPDATE FUNCTIONS 4133 \*****************************************************************************/ 4134 4135 /* 4136 * slurm_get_job_steps - issue RPC to get specific slurm job step 4137 * configuration information if changed since update_time. 4138 * a job_id value of NO_VAL implies all jobs, a step_id value of 4139 * NO_VAL implies all steps 4140 * IN update_time - time of current configuration data 4141 * IN job_id - get information for specific job id, NO_VAL for all jobs 4142 * IN step_id - get information for specific job step id, NO_VAL for all 4143 * job steps 4144 * IN step_response_pptr - place to store a step response pointer 4145 * IN show_flags - job step filtering options 4146 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 4147 * NOTE: free the response using slurm_free_job_step_info_response_msg 4148 */ 4149 extern int slurm_get_job_steps(time_t update_time, 4150 uint32_t job_id, 4151 uint32_t step_id, 4152 job_step_info_response_msg_t **step_response_pptr, 4153 uint16_t show_flags); 4154 4155 /* 4156 * Issue RPC to find all steps matching container id and uid (unless uid=NO_VAL) 4157 * IN show_flags - job step filtering options 4158 * IN/OUT steps - List (of slurm_step_id_t*) to populate. 4159 * Must free step ids with slurm_free_step_id(). 4160 * RET SLURM_SUCCESS or error 4161 */ 4162 extern int slurm_find_step_ids_by_container_id(uint16_t show_flags, uid_t uid, 4163 const char *container_id, 4164 list_t *steps); 4165 4166 /* 4167 * slurm_free_job_step_info_response_msg - free the job step 4168 * information response message 4169 * IN msg - pointer to job step information response message 4170 * NOTE: buffer is loaded by slurm_get_job_steps. 4171 */ 4172 extern void slurm_free_job_step_info_response_msg(job_step_info_response_msg_t *msg); 4173 4174 /* 4175 * slurm_print_job_step_info_msg - output information about all Slurm 4176 * job steps based upon message as loaded using slurm_get_job_steps 4177 * IN out - file to write to 4178 * IN job_step_info_msg_ptr - job step information message pointer 4179 * IN one_liner - print as a single line if true 4180 */ 4181 extern void slurm_print_job_step_info_msg(FILE *out, 4182 job_step_info_response_msg_t *job_step_info_msg_ptr, 4183 int one_liner); 4184 4185 /* 4186 * slurm_print_job_step_info - output information about a specific Slurm 4187 * job step based upon message as loaded using slurm_get_job_steps 4188 * IN out - file to write to 4189 * IN job_ptr - an individual job step information record pointer 4190 * IN one_liner - print as a single line if true 4191 */ 4192 extern void slurm_print_job_step_info(FILE *out, 4193 job_step_info_t *step_ptr, 4194 int one_liner); 4195 4196 /* 4197 * slurm_job_step_layout_get - get the slurm_step_layout_t structure for 4198 * a particular job step 4199 * 4200 * IN step_id 4201 * RET pointer to a slurm_step_layout_t (free with 4202 * slurm_free_step_layout) on success, and NULL on error. 4203 */ 4204 extern slurm_step_layout_t *slurm_job_step_layout_get(slurm_step_id_t *step_id); 4205 4206 /* 4207 * slurm_sprint_job_step_info - output information about a specific Slurm 4208 * job step based upon message as loaded using slurm_get_job_steps 4209 * IN job_ptr - an individual job step information record pointer 4210 * IN one_liner - print as a single line if true 4211 * RET out - char * containing formatted output (must be freed after call) 4212 * NULL is returned on failure. 4213 */ 4214 extern char *slurm_sprint_job_step_info(job_step_info_t *step_ptr, 4215 int one_liner); 4216 4217 /* 4218 * slurm_job_step_stat - status a current step 4219 * 4220 * IN step_id 4221 * IN node_list, optional, if NULL then all nodes in step are returned. 4222 * OUT resp 4223 * RET SLURM_SUCCESS on success SLURM_ERROR else 4224 */ 4225 extern int slurm_job_step_stat(slurm_step_id_t *step_id, 4226 char *node_list, 4227 uint16_t use_protocol_ver, 4228 job_step_stat_response_msg_t **resp); 4229 4230 /* 4231 * slurm_job_step_get_pids - get the complete list of pids for a given 4232 * job step 4233 * 4234 * IN step_id 4235 * OUT resp 4236 * RET SLURM_SUCCESS on success SLURM_ERROR else 4237 */ 4238 extern int slurm_job_step_get_pids(slurm_step_id_t *step_id, 4239 char *node_list, 4240 job_step_pids_response_msg_t **resp); 4241 4242 extern void slurm_job_step_layout_free(slurm_step_layout_t *layout); 4243 extern void slurm_job_step_pids_free(job_step_pids_t *object); 4244 extern void slurm_job_step_pids_response_msg_free(void *object); 4245 extern void slurm_job_step_stat_free(job_step_stat_t *object); 4246 extern void slurm_job_step_stat_response_msg_free(void *object); 4247 4248 /* Update the time limit of a job step, 4249 * IN step_msg - step update messasge descriptor 4250 * RET 0 or -1 on error */ 4251 extern int slurm_update_step(step_update_request_msg_t *step_msg); 4252 4253 extern void slurm_destroy_selected_step(void *object); 4254 4255 /*****************************************************************************\ 4256 * SLURM NODE CONFIGURATION READ/PRINT/UPDATE FUNCTIONS 4257 \*****************************************************************************/ 4258 4259 /* 4260 * slurm_load_node - issue RPC to get slurm all node configuration information 4261 * if changed since update_time 4262 * IN update_time - time of current configuration data 4263 * OUT resp - place to store a node configuration pointer 4264 * IN show_flags - node filtering options (e.g. SHOW_FEDERATION) 4265 * RET 0 or a slurm error code 4266 * NOTE: free the response using slurm_free_node_info_msg 4267 */ 4268 extern int slurm_load_node(time_t update_time, node_info_msg_t **resp, 4269 uint16_t show_flags); 4270 4271 /* 4272 * slurm_load_node2 - equivalent to slurm_load_node() with addition 4273 * of cluster record for communications in a federation 4274 */ 4275 extern int slurm_load_node2(time_t update_time, node_info_msg_t **resp, 4276 uint16_t show_flags, 4277 slurmdb_cluster_rec_t *cluster); 4278 4279 /* 4280 * slurm_load_node_single - issue RPC to get slurm configuration information 4281 * for a specific node 4282 * OUT resp - place to store a node configuration pointer 4283 * IN node_name - name of the node for which information is requested 4284 * IN show_flags - node filtering options 4285 * RET 0 or a slurm error code 4286 * NOTE: free the response using slurm_free_node_info_msg 4287 */ 4288 extern int slurm_load_node_single(node_info_msg_t **resp, char *node_name, 4289 uint16_t show_flags); 4290 4291 /* 4292 * slurm_load_node_single2 - equivalent to slurm_load_node_single() with 4293 * addition of cluster record for communications in a federation 4294 */ 4295 extern int slurm_load_node_single2(node_info_msg_t **resp, char *node_name, 4296 uint16_t show_flags, 4297 slurmdb_cluster_rec_t *cluster); 4298 4299 /* Given data structures containing information about nodes and partitions, 4300 * populate the node's "partitions" field */ 4301 void 4302 slurm_populate_node_partitions(node_info_msg_t *node_buffer_ptr, 4303 partition_info_msg_t *part_buffer_ptr); 4304 4305 /* 4306 * slurm_get_node_energy - issue RPC to get the energy data of all 4307 * configured sensors on the target machine 4308 * IN host - name of node to query, NULL if localhost 4309 * IN delta - Use cache if data is newer than this in seconds 4310 * OUT sensor_cnt - number of sensors 4311 * OUT energy - array of acct_gather_energy_t structures on success or 4312 * NULL other wise 4313 * RET 0 on success or a slurm error code 4314 * NOTE: free the response using xfree 4315 */ 4316 extern int slurm_get_node_energy(char *host, 4317 uint16_t context_id, 4318 uint16_t delta, 4319 uint16_t *sensors_cnt, 4320 acct_gather_energy_t **energy); 4321 4322 extern int slurm_get_node_alias_addrs(char *node_list, 4323 slurm_node_alias_addrs_t **alias_addrs); 4324 4325 /* 4326 * slurm_free_node_info_msg - free the node information response message 4327 * IN msg - pointer to node information response message 4328 * NOTE: buffer is loaded by slurm_load_node. 4329 */ 4330 extern void slurm_free_node_info_msg(node_info_msg_t *node_buffer_ptr); 4331 4332 /* 4333 * slurm_print_node_info_msg - output information about all Slurm nodes 4334 * based upon message as loaded using slurm_load_node 4335 * IN out - file to write to 4336 * IN node_info_msg_ptr - node information message pointer 4337 * IN one_liner - print as a single line if true 4338 */ 4339 extern void slurm_print_node_info_msg(FILE *out, 4340 node_info_msg_t *node_info_msg_ptr, 4341 int one_liner); 4342 4343 /* 4344 * slurm_print_node_table - output information about a specific Slurm nodes 4345 * based upon message as loaded using slurm_load_node 4346 * IN out - file to write to 4347 * IN node_ptr - an individual node information record pointer 4348 * IN one_liner - print as a single line if true 4349 */ 4350 extern void slurm_print_node_table(FILE *out, 4351 node_info_t *node_ptr, 4352 int one_liner); 4353 4354 /* 4355 * slurm_sprint_node_table - output information about a specific Slurm nodes 4356 * based upon message as loaded using slurm_load_node 4357 * IN node_ptr - an individual node information record pointer 4358 * IN one_liner - print as a single line if true 4359 * RET out - char * containing formatted output (must be freed after call) 4360 * NULL is returned on failure. 4361 */ 4362 extern char *slurm_sprint_node_table(node_info_t *node_ptr, 4363 int one_liner); 4364 4365 /* 4366 * slurm_init_update_node_msg - initialize node update message 4367 * OUT update_node_msg - user defined node descriptor 4368 */ 4369 void slurm_init_update_node_msg(update_node_msg_t *update_node_msg); 4370 4371 /* 4372 * slurm_create_node - issue RPC to create node(s), only usable by user root 4373 * IN node_msg - node definition(s) 4374 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 4375 */ 4376 extern int slurm_create_node(update_node_msg_t *node_msg); 4377 4378 /* 4379 * slurm_update_node - issue RPC to a node's configuration per request, 4380 * only usable by user root 4381 * IN node_msg - description of node updates 4382 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 4383 */ 4384 extern int slurm_update_node(update_node_msg_t *node_msg); 4385 4386 /* 4387 * slurm_delete_node - issue RPC to delete a node, only usable by user root 4388 * IN node_msg - use to pass nodelist of names to delete 4389 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 4390 */ 4391 int slurm_delete_node(update_node_msg_t *node_msg); 4392 4393 /*****************************************************************************\ 4394 * SLURM FRONT_END CONFIGURATION READ/PRINT/UPDATE FUNCTIONS 4395 \*****************************************************************************/ 4396 4397 /* 4398 * slurm_load_front_end - issue RPC to get slurm all front_end configuration 4399 * information if changed since update_time 4400 * IN update_time - time of current configuration data 4401 * IN front_end_info_msg_pptr - place to store a front_end configuration pointer 4402 * RET 0 or a slurm error code 4403 * NOTE: free the response using slurm_free_front_end_info_msg 4404 */ 4405 extern int slurm_load_front_end(time_t update_time, 4406 front_end_info_msg_t **resp); 4407 4408 /* 4409 * slurm_free_front_end_info_msg - free the front_end information response 4410 * message 4411 * IN msg - pointer to front_end information response message 4412 * NOTE: buffer is loaded by slurm_load_front_end. 4413 */ 4414 extern void slurm_free_front_end_info_msg(front_end_info_msg_t *front_end_buffer_ptr); 4415 4416 /* 4417 * slurm_print_front_end_info_msg - output information about all Slurm 4418 * front_ends based upon message as loaded using slurm_load_front_end 4419 * IN out - file to write to 4420 * IN front_end_info_msg_ptr - front_end information message pointer 4421 * IN one_liner - print as a single line if true 4422 */ 4423 extern void slurm_print_front_end_info_msg(FILE *out, 4424 front_end_info_msg_t *front_end_info_msg_ptr, 4425 int one_liner); 4426 /* 4427 * slurm_print_front_end_table - output information about a specific Slurm 4428 * front_ends based upon message as loaded using slurm_load_front_end 4429 * IN out - file to write to 4430 * IN front_end_ptr - an individual front_end information record pointer 4431 * IN one_liner - print as a single line if true 4432 */ 4433 extern void slurm_print_front_end_table(FILE *out, 4434 front_end_info_t *front_end_ptr, 4435 int one_liner); 4436 4437 /* 4438 * slurm_sprint_front_end_table - output information about a specific Slurm 4439 * front_end based upon message as loaded using slurm_load_front_end 4440 * IN front_end_ptr - an individual front_end information record pointer 4441 * IN one_liner - print as a single line if true 4442 * RET out - char * containing formatted output (must be freed after call) 4443 * NULL is returned on failure. 4444 */ 4445 extern char *slurm_sprint_front_end_table(front_end_info_t *front_end_ptr, 4446 int one_liner); 4447 4448 /* 4449 * slurm_init_update_front_end_msg - initialize front_end node update message 4450 * OUT update_front_end_msg - user defined node descriptor 4451 */ 4452 void slurm_init_update_front_end_msg(update_front_end_msg_t *update_front_end_msg); 4453 4454 /* 4455 * slurm_update_front_end - issue RPC to a front_end node's configuration per 4456 * request, only usable by user root 4457 * IN front_end_msg - description of front_end node updates 4458 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 4459 */ 4460 extern int slurm_update_front_end(update_front_end_msg_t *front_end_msg); 4461 4462 4463 /*****************************************************************************\ 4464 * SLURM SWITCH TOPOLOGY CONFIGURATION READ/PRINT FUNCTIONS 4465 \*****************************************************************************/ 4466 4467 /* 4468 * slurm_load_topo - issue RPC to get slurm all switch topology configuration 4469 * information 4470 * IN node_info_msg_pptr - place to store a node configuration pointer 4471 * RET 0 or a slurm error code 4472 * NOTE: free the response using slurm_free_topo_info_msg 4473 */ 4474 extern int slurm_load_topo(topo_info_response_msg_t **topo_info_msg_pptr); 4475 4476 /* 4477 * slurm_free_topo_info_msg - free the switch topology configuration 4478 * information response message 4479 * IN msg - pointer to switch topology configuration response message 4480 * NOTE: buffer is loaded by slurm_load_topo. 4481 */ 4482 extern void slurm_free_topo_info_msg(topo_info_response_msg_t *msg); 4483 4484 /* 4485 * slurm_print_topo_info_msg - output information about all switch topology 4486 * configuration information based upon message as loaded using 4487 * slurm_load_topo 4488 * IN out - file to write to 4489 * IN topo_info_msg_ptr - switch topology information message pointer 4490 * IN node_list - NULL to print all topology information 4491 * IN one_liner - print as a single line if not zero 4492 */ 4493 extern void slurm_print_topo_info_msg(FILE *out, 4494 topo_info_response_msg_t *topo_info_msg_ptr, 4495 char *node_list, int one_liner); 4496 4497 /*****************************************************************************\ 4498 * SLURM SELECT READ/PRINT/UPDATE FUNCTIONS 4499 \*****************************************************************************/ 4500 4501 /* 4502 * slurm_get_select_nodeinfo - get data from a select node credential 4503 * IN nodeinfo - updated select node credential 4504 * IN data_type - type of data to enter into node credential 4505 * IN state - state of node query 4506 * IN/OUT data - the data to enter into node credential 4507 * RET 0 or -1 on error 4508 */ 4509 extern int slurm_get_select_nodeinfo(dynamic_plugin_data_t *nodeinfo, 4510 enum select_nodedata_type data_type, 4511 enum node_states state, 4512 void *data); 4513 4514 /*****************************************************************************\ 4515 * SLURM PARTITION CONFIGURATION READ/PRINT/UPDATE FUNCTIONS 4516 \*****************************************************************************/ 4517 4518 /* 4519 * slurm_init_part_desc_msg - initialize partition descriptor with 4520 * default values 4521 * IN/OUT update_part_msg - user defined partition descriptor 4522 */ 4523 extern void slurm_init_part_desc_msg(update_part_msg_t *update_part_msg); 4524 4525 /* 4526 * slurm_load_partitions - issue RPC to get slurm all partition configuration 4527 * information if changed since update_time 4528 * IN update_time - time of current configuration data 4529 * IN partition_info_msg_pptr - place to store a partition configuration 4530 * pointer 4531 * IN show_flags - partitions filtering options (e.g. SHOW_FEDERATION) 4532 * RET 0 or a slurm error code 4533 * NOTE: free the response using slurm_free_partition_info_msg 4534 */ 4535 extern int slurm_load_partitions(time_t update_time, 4536 partition_info_msg_t **part_buffer_ptr, 4537 uint16_t show_flags); 4538 4539 /* 4540 * slurm_load_partitions2 - equivalent to slurm_load_partitions() with addition 4541 * of cluster record for communications in a federation 4542 */ 4543 extern int slurm_load_partitions2(time_t update_time, 4544 partition_info_msg_t **resp, 4545 uint16_t show_flags, 4546 slurmdb_cluster_rec_t *cluster); 4547 4548 /* 4549 * slurm_free_partition_info_msg - free the partition information 4550 * response message 4551 * IN msg - pointer to partition information response message 4552 * NOTE: buffer is loaded by slurm_load_partitions 4553 */ 4554 extern void slurm_free_partition_info_msg(partition_info_msg_t *part_info_ptr); 4555 4556 /* 4557 * slurm_print_partition_info_msg - output information about all Slurm 4558 * partitions based upon message as loaded using slurm_load_partitions 4559 * IN out - file to write to 4560 * IN part_info_ptr - partitions information message pointer 4561 * IN one_liner - print as a single line if true 4562 */ 4563 extern void slurm_print_partition_info_msg(FILE *out, partition_info_msg_t *part_info_ptr, int one_liner); 4564 4565 /* 4566 * slurm_print_partition_info - output information about a specific Slurm 4567 * partition based upon message as loaded using slurm_load_partitions 4568 * IN out - file to write to 4569 * IN part_ptr - an individual partition information record pointer 4570 * IN one_liner - print as a single line if true 4571 */ 4572 extern void slurm_print_partition_info(FILE *out, 4573 partition_info_t *part_ptr, 4574 int one_liner); 4575 4576 /* 4577 * slurm_sprint_partition_info - output information about a specific Slurm 4578 * partition based upon message as loaded using slurm_load_partitions 4579 * IN part_ptr - an individual partition information record pointer 4580 * IN one_liner - print as a single line if true 4581 * RET out - char * with formatted output (must be freed after call) 4582 * NULL is returned on failure. 4583 */ 4584 extern char *slurm_sprint_partition_info(partition_info_t *part_ptr, 4585 int one_liner); 4586 4587 /* 4588 * slurm_create_partition - create a new partition, only usable by user root 4589 * IN part_msg - description of partition configuration 4590 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 4591 */ 4592 extern int slurm_create_partition(update_part_msg_t *part_msg); 4593 4594 /* 4595 * slurm_update_partition - issue RPC to update a partition's configuration 4596 * per request, only usable by user root 4597 * IN part_msg - description of partition updates 4598 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 4599 */ 4600 extern int slurm_update_partition(update_part_msg_t *part_msg); 4601 4602 /* 4603 * slurm_delete_partition - issue RPC to delete a partition, only usable 4604 * by user root 4605 * IN part_msg - description of partition to delete 4606 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 4607 */ 4608 extern int slurm_delete_partition(delete_part_msg_t *part_msg); 4609 4610 /*****************************************************************************\ 4611 * SLURM RESERVATION CONFIGURATION READ/PRINT/UPDATE FUNCTIONS 4612 \*****************************************************************************/ 4613 4614 /* 4615 * slurm_init_resv_desc_msg - initialize reservation descriptor with 4616 * default values 4617 * OUT job_desc_msg - user defined partition descriptor 4618 */ 4619 extern void slurm_init_resv_desc_msg(resv_desc_msg_t *update_resv_msg); 4620 /* 4621 * slurm_create_reservation - create a new reservation, only usable by user root 4622 * IN resv_msg - description of reservation 4623 * RET name of reservation on success (caller must free the memory), 4624 * otherwise return NULL and set errno to indicate the error 4625 */ 4626 extern char *slurm_create_reservation(resv_desc_msg_t *resv_msg); 4627 4628 /* 4629 * slurm_update_reservation - modify an existing reservation, only usable by 4630 * user root 4631 * IN resv_msg - description of reservation 4632 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 4633 */ 4634 extern int slurm_update_reservation(resv_desc_msg_t *resv_msg); 4635 4636 /* 4637 * slurm_delete_reservation - issue RPC to delete a reservation, only usable 4638 * by user root 4639 * IN resv_msg - description of reservation to delete 4640 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 4641 */ 4642 extern int slurm_delete_reservation(reservation_name_msg_t *resv_msg); 4643 4644 /* 4645 * slurm_load_reservations - issue RPC to get all slurm reservation 4646 * configuration information if changed since update_time 4647 * IN update_time - time of current configuration data 4648 * IN reserve_info_msg_pptr - place to store a reservation configuration 4649 * pointer 4650 * RET 0 or a slurm error code 4651 * NOTE: free the response using slurm_free_reservation_info_msg 4652 */ 4653 extern int slurm_load_reservations(time_t update_time, 4654 reserve_info_msg_t **resp); 4655 4656 /* 4657 * slurm_print_reservation_info_msg - output information about all Slurm 4658 * reservations based upon message as loaded using slurm_load_reservation 4659 * IN out - file to write to 4660 * IN resv_info_ptr - reservation information message pointer 4661 * IN one_liner - print as a single line if true 4662 */ 4663 void slurm_print_reservation_info_msg(FILE* out, 4664 reserve_info_msg_t *resv_info_ptr, 4665 int one_liner); 4666 4667 /* 4668 * slurm_print_reservation_info - output information about a specific Slurm 4669 * reservation based upon message as loaded using slurm_load_reservation 4670 * IN out - file to write to 4671 * IN resv_ptr - an individual reservation information record pointer 4672 * IN one_liner - print as a single line if true 4673 */ 4674 void slurm_print_reservation_info(FILE* out, 4675 reserve_info_t *resv_ptr, 4676 int one_liner); 4677 4678 /* 4679 * slurm_sprint_reservation_info - output information about a specific Slurm 4680 * reservation based upon message as loaded using slurm_load_reservations 4681 * IN resv_ptr - an individual reservation information record pointer 4682 * IN one_liner - print as a single line if true 4683 * RET out - char * containing formatted output (must be freed after call) 4684 * NULL is returned on failure. 4685 */ 4686 char *slurm_sprint_reservation_info(reserve_info_t *resv_ptr, int one_liner); 4687 4688 /* 4689 * slurm_free_reservation_info_msg - free the reservation information 4690 * response message 4691 * IN msg - pointer to reservation information response message 4692 * NOTE: buffer is loaded by slurm_load_reservation 4693 */ 4694 extern void slurm_free_reservation_info_msg(reserve_info_msg_t *resv_info_ptr); 4695 4696 /*****************************************************************************\ 4697 * SLURM PING/RECONFIGURE/SHUTDOWN STRUCTURES 4698 \*****************************************************************************/ 4699 4700 typedef struct { 4701 char *hostname; /* symlink - do not xfree() */ 4702 bool pinged; /* true on successful ping */ 4703 long latency; /* time to ping or timeout on !pinged */ 4704 /* 4705 * controller offset which defines default mode: 4706 * 0: primary 4707 * 1: backup 4708 * 2+: backup# 4709 */ 4710 int offset; 4711 } controller_ping_t; 4712 4713 /*****************************************************************************\ 4714 * SLURM PING/RECONFIGURE/SHUTDOWN FUNCTIONS 4715 \*****************************************************************************/ 4716 4717 /* 4718 * slurm_ping - issue RPC to have Slurm controller (slurmctld) 4719 * IN dest - controller to contact (0=primary, 1=backup, 2=backup2, etc.) 4720 * RET 0 or a slurm error code 4721 */ 4722 extern int slurm_ping(int dest); 4723 4724 /* 4725 * RET array of each ping result (NULL terminated). 4726 * Caller must xfree() the result. 4727 */ 4728 extern controller_ping_t *ping_all_controllers(); 4729 4730 /* 4731 * slurm_reconfigure - issue RPC to have Slurm controller (slurmctld) 4732 * reload its configuration file 4733 * RET 0 or a slurm error code 4734 */ 4735 extern int slurm_reconfigure(void); 4736 4737 /* 4738 * slurm_shutdown - issue RPC to have Slurm controller (slurmctld) 4739 * cease operations, both the primary and all backup controllers 4740 * are shutdown. 4741 * IN options - 0: all slurm daemons are shutdown 4742 * 1: slurmctld generates a core file 4743 * 2: only the slurmctld is shutdown (no core file) 4744 * RET 0 or a slurm error code 4745 */ 4746 extern int slurm_shutdown(uint16_t options); 4747 4748 /* 4749 * slurm_takeover - issue RPC to have a Slurm backup controller take over the 4750 * primary controller. REQUEST_CONTROL is sent by the backup 4751 * to the primary controller to take control 4752 * backup_inx IN - Index of BackupController to assume controller (typically 1) 4753 * RET 0 or a slurm error code 4754 */ 4755 extern int slurm_takeover(int backup_inx); 4756 4757 /* 4758 * slurm_set_debugflags - issue RPC to set slurm controller debug flags 4759 * IN debug_flags_plus - debug flags to be added 4760 * IN debug_flags_minus - debug flags to be removed 4761 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 4762 */ 4763 extern int slurm_set_debugflags(uint64_t debug_flags_plus, 4764 uint64_t debug_flags_minus); 4765 /* 4766 * slurm_set_slurmd_debug_flags - issue RPC to set slurmd debug flags 4767 * IN debug_flags_plus - debug flags to be added 4768 * IN debug_flags_minus - debug flags to be removed 4769 * IN debug_flags_set - new debug flags value 4770 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR 4771 */ 4772 extern int slurm_set_slurmd_debug_flags(char *node_list, 4773 uint64_t debug_flags_plus, 4774 uint64_t debug_flags_minus); 4775 4776 /* 4777 * slurm_set_slurmd_debug_level - issue RPC to set slurmd debug level 4778 * IN debug_level - requested debug level 4779 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR 4780 */ 4781 extern int slurm_set_slurmd_debug_level(char *node_list, uint32_t debug_level); 4782 4783 /* 4784 * slurm_set_debug_level - issue RPC to set slurm controller debug level 4785 * IN debug_level - requested debug level 4786 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 4787 */ 4788 extern int slurm_set_debug_level(uint32_t debug_level); 4789 4790 /* 4791 * slurm_set_schedlog_level - issue RPC to set slurm scheduler log level 4792 * IN schedlog_level - requested scheduler log level 4793 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 4794 */ 4795 extern int slurm_set_schedlog_level(uint32_t schedlog_level); 4796 4797 /* 4798 * slurm_set_fs_dampeningfactor - issue RPC to set slurm fs dampening factor 4799 * IN factor - requested fs dampening factor 4800 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 4801 */ 4802 extern int slurm_set_fs_dampeningfactor(uint16_t factor); 4803 4804 /* 4805 * slurm_update_suspend_exc_nodes - issue RPC to set SuspendExcNodes 4806 * IN nodes - string to set 4807 * IN mode - Whether to set, append or remove nodes from the setting 4808 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 4809 */ 4810 extern int slurm_update_suspend_exc_nodes(char *nodes, update_mode_t mode); 4811 4812 /* 4813 * slurm_update_suspend_exc_parts - issue RPC to set SuspendExcParts 4814 * IN parts - string to set 4815 * IN mode - Whether to set, append or remove partitions from the setting 4816 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 4817 */ 4818 extern int slurm_update_suspend_exc_parts(char *parts, update_mode_t mode); 4819 4820 /* 4821 * slurm_update_suspend_exc_states - issue RPC to set SuspendExcStates 4822 * IN states - string to set 4823 * IN mode - Whether to set, append or remove states from the setting 4824 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set 4825 */ 4826 extern int slurm_update_suspend_exc_states(char *states, update_mode_t mode); 4827 4828 /*****************************************************************************\ 4829 * SLURM JOB SUSPEND FUNCTIONS 4830 \*****************************************************************************/ 4831 4832 /* 4833 * slurm_suspend - suspend execution of a job. 4834 * IN job_id - job on which to perform operation 4835 * RET 0 or a slurm error code 4836 */ 4837 extern int slurm_suspend(uint32_t job_id); 4838 4839 /* 4840 * slurm_suspend2 - suspend execution of a job. 4841 * IN job_id in string form - job on which to perform operation, may be job 4842 * array specification (e.g. "123_1-20,44"); 4843 * OUT resp - per task response to the request, 4844 * free using slurm_free_job_array_resp() 4845 * RET 0 or a slurm error code 4846 */ 4847 extern int slurm_suspend2(char *job_id, job_array_resp_msg_t **resp); 4848 4849 /* 4850 * slurm_resume - resume execution of a previously suspended job. 4851 * IN job_id - job on which to perform operation 4852 * RET 0 or a slurm error code 4853 */ 4854 extern int slurm_resume(uint32_t job_id); 4855 4856 /* 4857 * slurm_resume2 - resume execution of a previously suspended job. 4858 * IN job_id in string form - job on which to perform operation, may be job 4859 * array specification (e.g. "123_1-20,44"); 4860 * OUT resp - per task response to the request, 4861 * free using slurm_free_job_array_resp() 4862 * RET 0 or a slurm error code 4863 */ 4864 extern int slurm_resume2(char *job_id, job_array_resp_msg_t **resp); 4865 4866 /* Free job array oriented response with individual return codes by task ID */ 4867 extern void slurm_free_job_array_resp(job_array_resp_msg_t *resp); 4868 4869 /* 4870 * slurm_requeue - re-queue a batch job, if already running 4871 * then terminate it first 4872 * IN job_id - job on which to perform operation 4873 * IN flags - JOB_SPECIAL_EXIT - job should be placed special exit state and 4874 * held. 4875 * JOB_REQUEUE_HOLD - job should be placed JOB_PENDING state and 4876 * held. 4877 * JOB_RECONFIG_FAIL - Node configuration for job failed 4878 * JOB_RUNNING - Operate only on jobs in a state of 4879 * CONFIGURING, RUNNING, STOPPED or SUSPENDED. 4880 * RET 0 or a slurm error code 4881 */ 4882 extern int slurm_requeue(uint32_t job_id, uint32_t flags); 4883 4884 /* 4885 * slurm_requeue2 - re-queue a batch job, if already running 4886 * then terminate it first 4887 * IN job_id in string form - job on which to perform operation, may be job 4888 * array specification (e.g. "123_1-20,44"); 4889 * IN flags - JOB_SPECIAL_EXIT - job should be placed special exit state and 4890 * held. 4891 * JOB_REQUEUE_HOLD - job should be placed JOB_PENDING state and 4892 * held. 4893 * JOB_RECONFIG_FAIL - Node configuration for job failed 4894 * JOB_RUNNING - Operate only on jobs in a state of 4895 * CONFIGURING, RUNNING, STOPPED or SUSPENDED. 4896 * OUT resp - per task response to the request, 4897 * free using slurm_free_job_array_resp() 4898 * RET 0 or a slurm error code 4899 */ 4900 extern int slurm_requeue2(char *job_id, uint32_t flags, 4901 job_array_resp_msg_t **resp); 4902 4903 /*****************************************************************************\ 4904 * SLURM TRIGGER FUNCTIONS 4905 \*****************************************************************************/ 4906 4907 /* 4908 * slurm_set_trigger - Set an event trigger 4909 * RET 0 or a slurm error code 4910 */ 4911 extern int slurm_set_trigger(trigger_info_t *trigger_set); 4912 4913 /* 4914 * slurm_clear_trigger - Clear (remove) an existing event trigger 4915 * RET 0 or a slurm error code 4916 */ 4917 extern int slurm_clear_trigger(trigger_info_t *trigger_clear); 4918 4919 /* 4920 * slurm_get_triggers - Get all event trigger information 4921 * Use slurm_free_trigger_msg() to free the memory allocated by this function 4922 * RET 0 or a slurm error code 4923 */ 4924 extern int slurm_get_triggers(trigger_info_msg_t **trigger_get); 4925 4926 /* 4927 * slurm_pull_trigger - Pull an event trigger 4928 * RET 0 or a slurm error code 4929 */ 4930 extern int slurm_pull_trigger(trigger_info_t *trigger_pull); 4931 4932 /* 4933 * slurm_free_trigger_msg - Free data structure returned by 4934 * slurm_get_triggers() 4935 */ 4936 extern void slurm_free_trigger_msg(trigger_info_msg_t *trigger_free); 4937 4938 /* 4939 * slurm_init_trigger_msg - initialize trigger clear/update message 4940 * OUT trigger_info_msg - user defined trigger descriptor 4941 */ 4942 void slurm_init_trigger_msg(trigger_info_t *trigger_info_msg); 4943 4944 /*****************************************************************************\ 4945 * SLURM BURST BUFFER FUNCTIONS 4946 \*****************************************************************************/ 4947 #define BB_FLAG_DISABLE_PERSISTENT 0x0001 /* Disable regular user to create 4948 * and destroy persistent burst 4949 * buffers */ 4950 #define BB_FLAG_ENABLE_PERSISTENT 0x0002 /* Allow regular user to create 4951 * and destroy persistent burst 4952 * buffers */ 4953 #define BB_FLAG_EMULATE_CRAY 0x0004 /* Using dw_wlm_cli emulator */ 4954 #define BB_FLAG_PRIVATE_DATA 0x0008 /* Buffers only visible to owner */ 4955 #define BB_FLAG_TEARDOWN_FAILURE 0x0010 /* Teardown after failed staged in/out */ 4956 4957 #define BB_SIZE_IN_NODES 0x8000000000000000 4958 /* 4959 * Burst buffer states: Keep in sync with bb_state_string() and bb_state_num() 4960 * in slurm_protocol_defs.c. 4961 */ 4962 #define BB_STATE_PENDING 0x0000 /* Placeholder: no action started */ 4963 #define BB_STATE_ALLOCATING 0x0001 /* Cray: bbs_setup started */ 4964 #define BB_STATE_ALLOCATED 0x0002 /* Cray: bbs_setup started */ 4965 #define BB_STATE_DELETING 0x0005 /* Cray: bbs_setup started */ 4966 #define BB_STATE_DELETED 0x0006 /* Cray: bbs_setup started */ 4967 #define BB_STATE_STAGING_IN 0x0011 /* Cray: bbs_data_in started */ 4968 #define BB_STATE_STAGED_IN 0x0012 /* Cray: bbs_data_in complete */ 4969 #define BB_STATE_PRE_RUN 0x0018 /* Cray: bbs_pre_run started */ 4970 #define BB_STATE_ALLOC_REVOKE 0x001a /* Cray: allocation revoked */ 4971 #define BB_STATE_RUNNING 0x0021 /* Job is running */ 4972 #define BB_STATE_SUSPEND 0x0022 /* Job is suspended (future) */ 4973 #define BB_STATE_POST_RUN 0x0029 /* Cray: bbs_post_run started */ 4974 #define BB_STATE_STAGING_OUT 0x0031 /* Cray: bbs_data_out started */ 4975 #define BB_STATE_STAGED_OUT 0x0032 /* Cray: bbs_data_out complete */ 4976 #define BB_STATE_TEARDOWN 0x0041 /* Cray: bbs_teardown started */ 4977 #define BB_STATE_TEARDOWN_FAIL 0x0043 /* Cray: bbs_teardown failed, retrying */ 4978 #define BB_STATE_COMPLETE 0x0045 /* Cray: bbs_teardown complete */ 4979 4980 /* Information about alternate pools or other burst buffer resources */ 4981 typedef struct { 4982 uint64_t granularity; /* Granularity of resource allocation size */ 4983 char *name; /* Resource (pool) name */ 4984 uint64_t total_space; /* Total size of available resources, unused 4985 * by burst_buffer_resv_t */ 4986 uint64_t used_space; /* Allocated space, in bytes */ 4987 uint64_t unfree_space; /* used plus drained space, units are bytes */ 4988 } burst_buffer_pool_t; 4989 4990 typedef struct { 4991 char *account; /* Associated account (for limits) */ 4992 uint32_t array_job_id; 4993 uint32_t array_task_id; 4994 time_t create_time; /* Time of creation */ 4995 uint32_t job_id; 4996 char *name; /* Name of persistent burst buffer */ 4997 char *partition; /* Associated partition (for limits) */ 4998 char *pool; /* Resource (pool) name */ 4999 char *qos; /* Associated QOS (for limits) */ 5000 uint64_t size; /* In bytes by default */ 5001 uint16_t state; /* See BB_STATE_* */ 5002 uint32_t user_id; 5003 } burst_buffer_resv_t; 5004 5005 typedef struct { 5006 uint32_t user_id; 5007 uint64_t used; 5008 } burst_buffer_use_t; 5009 5010 typedef struct { 5011 char *allow_users; 5012 char *default_pool; /* Name of default pool to use */ 5013 char *create_buffer; 5014 char *deny_users; 5015 char *destroy_buffer; 5016 uint32_t flags; /* See BB_FLAG_* above */ 5017 char *get_sys_state; 5018 char *get_sys_status; 5019 uint64_t granularity; /* Granularity of resource allocation */ 5020 uint32_t pool_cnt; /* Count of records in pool_ptr */ 5021 burst_buffer_pool_t *pool_ptr; 5022 char *name; /* Plugin name */ 5023 uint32_t other_timeout; /* Seconds or zero */ 5024 uint32_t stage_in_timeout; /* Seconds or zero */ 5025 uint32_t stage_out_timeout; /* Seconds or zero */ 5026 char *start_stage_in; 5027 char *start_stage_out; 5028 char *stop_stage_in; 5029 char *stop_stage_out; 5030 uint64_t total_space; /* In bytes */ 5031 uint64_t unfree_space; /* Allocated or drained, in bytes */ 5032 uint64_t used_space; /* Allocated, in bytes */ 5033 uint32_t validate_timeout; /* Seconds or zero */ 5034 5035 uint32_t buffer_count; 5036 burst_buffer_resv_t *burst_buffer_resv_ptr; 5037 5038 uint32_t use_count; 5039 burst_buffer_use_t *burst_buffer_use_ptr; 5040 } burst_buffer_info_t; 5041 5042 typedef struct { 5043 burst_buffer_info_t *burst_buffer_array; 5044 uint32_t record_count; /* Elements in burst_buffer_array */ 5045 } burst_buffer_info_msg_t; 5046 5047 /* 5048 * slurm_burst_buffer_state_string - translate burst buffer state number to 5049 * it string equivalent 5050 */ 5051 extern char *slurm_burst_buffer_state_string(uint16_t state); 5052 5053 /* 5054 * slurm_load_burst_buffer_stat - issue RPC to get burst buffer status 5055 * IN argc - count of status request options 5056 * IN argv - status request options 5057 * OUT status_resp - status response, memory must be released using xfree() 5058 * RET 0 or a slurm error code 5059 */ 5060 extern int slurm_load_burst_buffer_stat(int argc, char **argv, 5061 char **status_resp); 5062 5063 /* 5064 * slurm_load_burst_buffer_info - issue RPC to get slurm all burst buffer plugin 5065 * information 5066 * OUT burst_buffer_info_msg_pptr - place to store a burst buffer configuration 5067 * pointer 5068 * RET 0 or a slurm error code 5069 * NOTE: free the response using slurm_free_burst_buffer_info_msg 5070 */ 5071 extern int slurm_load_burst_buffer_info(burst_buffer_info_msg_t **burst_buffer_info_msg_pptr); 5072 5073 /* 5074 * slurm_free_burst_buffer_info_msg - free buffer returned by 5075 * slurm_load_burst_buffer 5076 * IN burst_buffer_info_msg_ptr - pointer to burst_buffer_info_msg_t 5077 * RET 0 or a slurm error code 5078 */ 5079 extern void slurm_free_burst_buffer_info_msg(burst_buffer_info_msg_t *burst_buffer_info_msg); 5080 5081 /* 5082 * slurm_print_burst_buffer_info_msg - output information about burst buffers 5083 * based upon message as loaded using slurm_load_burst_buffer 5084 * IN out - file to write to 5085 * IN info_ptr - burst_buffer information message pointer 5086 * IN one_liner - print as a single line if true 5087 * IN verbose - higher values to log additional details 5088 */ 5089 extern void slurm_print_burst_buffer_info_msg(FILE *out, 5090 burst_buffer_info_msg_t *info_ptr, 5091 int one_liner, 5092 int verbosity); 5093 5094 /* 5095 * slurm_print_burst_buffer_record - output information about a specific Slurm 5096 * burst_buffer record based upon message as loaded using 5097 * slurm_load_burst_buffer_info() 5098 * IN out - file to write to 5099 * IN burst_buffer_ptr - an individual burst buffer record pointer 5100 * IN one_liner - print as a single line if not zero 5101 * IN verbose - higher values to log additional details 5102 * RET out - char * containing formatted output (must be freed after call) 5103 * NULL is returned on failure. 5104 */ 5105 extern void slurm_print_burst_buffer_record(FILE *out, 5106 burst_buffer_info_t *burst_buffer_ptr, 5107 int one_liner, 5108 int verbose); 5109 5110 /* 5111 * slurm_network_callerid - issue RPC to get the job id of a job from a remote 5112 * slurmd based upon network socket information. 5113 * 5114 * IN req - Information about network connection in question 5115 * OUT job_id - ID of the job or NO_VAL 5116 * OUT node_name - name of the remote slurmd 5117 * IN node_name_size - size of the node_name buffer 5118 * RET SLURM_SUCCESS or SLURM_ERROR on error 5119 */ 5120 extern int slurm_network_callerid(network_callerid_msg_t req, 5121 uint32_t *job_id, 5122 char *node_name, 5123 int node_name_size); 5124 5125 /* 5126 * Move the specified job ID to the top of the queue for a given user ID, 5127 * partition, account, and QOS. 5128 * IN job_id_str - a job id 5129 * RET 0 or -1 on error */ 5130 extern int slurm_top_job(char *job_id_str); 5131 5132 /* 5133 * Fetch an auth token for a given username. 5134 * IN username - NULL, or a specific username if run as SlurmUser/root. 5135 * IN lifespan - lifespan the token should be valid for. 5136 */ 5137 extern char *slurm_fetch_token(char *username, int lifespan); 5138 5139 /*****************************************************************************\ 5140 * SLURM FEDERATION FUNCTIONS 5141 \*****************************************************************************/ 5142 5143 /* 5144 * slurm_load_federation - issue RPC to get federation status from controller 5145 * IN/OUT fed_pptr - place to store returned federation information. 5146 * slurmdb_federation_rec_t treated as a void pointer to since 5147 * slurm.h doesn't have ties to slurmdb.h. 5148 * NOTE: Use slurm_destroy_federation_rec() to release the returned memory 5149 * RET 0 or -1 on error 5150 */ 5151 extern int slurm_load_federation(void **fed_pptr); 5152 5153 /* 5154 * slurm_print_federation - prints slurmdb_federation_rec_t (passed as void* 5155 * since slurm.h doesn't know about slurmdb.h). 5156 */ 5157 extern void slurm_print_federation(void *fed); 5158 5159 /* 5160 * slurm_destroy_federation_rec - Release memory allocated by 5161 * slurm_load_federation() 5162 */ 5163 extern void slurm_destroy_federation_rec(void *fed); 5164 5165 /*****************************************************************************\ 5166 * SLURM CRONTAB FUNCTIONS 5167 \*****************************************************************************/ 5168 5169 extern int slurm_request_crontab(uid_t uid, char **crontab, 5170 char **disabled_lines); 5171 5172 typedef struct { 5173 char *err_msg; 5174 char *failed_lines; 5175 uint32_t *jobids; 5176 uint32_t jobids_count; 5177 char *job_submit_user_msg; 5178 uint32_t return_code; 5179 } crontab_update_response_msg_t; 5180 5181 extern crontab_update_response_msg_t *slurm_update_crontab(uid_t uid, gid_t gid, 5182 char *crontab, 5183 list_t *jobs); 5184 5185 extern int slurm_remove_crontab(uid_t uid, gid_t gid); 5186 5187 #ifdef __cplusplus 5188 } 5189 #endif 5190 5191 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |