Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-06-20 08:46:38

0001 /*****************************************************************************\
0002  *  spank.h - Stackable Plug-in Architecture for Node job Kontrol
0003  *****************************************************************************
0004  *  Copyright (C) 2002-2007 The Regents of the University of California.
0005  *  Copyright (C) 2008-2010 Lawrence Livermore National Security.
0006  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
0007  *  CODE-OCEC-09-009. All rights reserved.
0008  *
0009  *  This file is part of Slurm, a resource management program.
0010  *  For details, see <https://slurm.schedmd.com/>.
0011  *  Please also read the included file: DISCLAIMER.
0012  *
0013  *  Slurm is free software; you can redistribute it and/or modify it under
0014  *  the terms of the GNU General Public License as published by the Free
0015  *  Software Foundation; either version 2 of the License, or (at your option)
0016  *  any later version.
0017  *
0018  *  In addition, as a special exception, the copyright holders give permission
0019  *  to link the code of portions of this program with the OpenSSL library under
0020  *  certain conditions as described in each individual source file, and
0021  *  distribute linked combinations including the two. You must obey the GNU
0022  *  General Public License in all respects for all of the code used other than
0023  *  OpenSSL. If you modify file(s) with this exception, you may extend this
0024  *  exception to your version of the file(s), but you are not obligated to do
0025  *  so. If you do not wish to do so, delete this exception statement from your
0026  *  version.  If you delete this exception statement from all source files in
0027  *  the program, then also delete it here.
0028  *
0029  *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
0030  *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
0031  *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
0032  *  details.
0033  *
0034  *  You should have received a copy of the GNU General Public License along
0035  *  with Slurm; if not, write to the Free Software Foundation, Inc.,
0036  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
0037 \*****************************************************************************/
0038 #ifndef SPANK_H
0039 #define SPANK_H
0040 
0041 #include <slurm/slurm_errno.h>
0042 #include <slurm/slurm_version.h>
0043 
0044 /*  SPANK handle. Plug-in's context for running Slurm job
0045  */
0046 typedef struct spank_handle * spank_t;
0047 
0048 /*  Prototype for all spank plugin operations
0049  */
0050 typedef int (spank_f) (spank_t spank, int ac, char *argv[]);
0051 
0052 /*  SPANK plugin operations. SPANK plugin should have at least one of
0053  *   these functions defined non-NULL.
0054  *
0055  *  Plug-in callbacks are completed at the following points in slurmd:
0056  *
0057  *   slurmd
0058  *        `-> init()
0059  *        |
0060  *        `-> job_prolog()
0061  *        |
0062  *        | `-> slurmstepd
0063  *        |      `-> init ()
0064  *        |       -> process spank options
0065  *        |       -> init_post_opt ()
0066  *        |      + drop privileges (initgroups(), seteuid(), chdir())
0067  *        |      `-> user_init ()
0068  *        |      + for each task
0069  *        |      |       + fork ()
0070  *        |      |       |
0071  *        |      |       + reclaim privileges
0072  *        |      |       `-> task_init_privileged ()
0073  *        |      |       |
0074  *        |      |       + become_user ()
0075  *        |      |       `-> task_init ()
0076  *        |      |       |
0077  *        |      |       + execve ()
0078  *        |      |
0079  *        |      + reclaim privileges
0080  *        |      + for each task
0081  *        |      |     `-> task_post_fork ()
0082  *        |      |
0083  *        |      + for each task
0084  *        |      |       + wait ()
0085  *        |      |          `-> task_exit ()
0086  *        |      `-> exit ()
0087  *        |
0088  *        `---> job_epilog()
0089  *        |
0090  *        `-> slurmd_exit()
0091  *
0092  *   In srun only the init(), init_post_opt() and local_user_init(), and exit()
0093  *    callbacks are used.
0094  *
0095  *   In sbatch/salloc only the init(), init_post_opt(), and exit() callbacks
0096  *    are used.
0097  *
0098  *   In slurmd proper, only the init(), slurmd_exit(), and
0099  *    job_prolog/epilog callbacks are used.
0100  *
0101  */
0102 
0103 extern spank_f slurm_spank_init;
0104 extern spank_f slurm_spank_job_prolog;
0105 extern spank_f slurm_spank_init_post_opt;
0106 extern spank_f slurm_spank_local_user_init;
0107 extern spank_f slurm_spank_user_init;
0108 extern spank_f slurm_spank_task_init_privileged;
0109 extern spank_f slurm_spank_task_init;
0110 extern spank_f slurm_spank_task_post_fork;
0111 extern spank_f slurm_spank_task_exit;
0112 extern spank_f slurm_spank_job_epilog;
0113 extern spank_f slurm_spank_slurmd_exit;
0114 extern spank_f slurm_spank_exit;
0115 
0116 
0117 /*  Items which may be obtained from the spank handle using the
0118  *   spank_get_item () call. The expected list of variable arguments may
0119  *   be found in the comments below.
0120  *
0121  *  For example, S_JOB_NCPUS takes (uint16_t *), a pointer to uint16_t, so
0122  *   the get item call would look like:
0123  *
0124  *    uint16_t ncpus;
0125  *    spank_err_t rc = spank_get_item (spank, S_JOB_NCPUS, &ncpus);
0126  *
0127  *   while  S_JOB_PID_TO_GLOBAL_ID takes (pid_t, uint32_t *), so it would
0128  *   be called as:
0129  *
0130  *    uint32_t global_id;
0131  *    spank_err_t rc;
0132  *    rc = spank_get_item (spank, S_JOB_PID_TO_GLOBAL_ID, pid, &global_id);
0133  */
0134 enum spank_item {
0135     S_JOB_UID,               /* User id (uid_t *)                            */
0136     S_JOB_GID,               /* Primary group id (gid_t *)                   */
0137     S_JOB_ID,                /* Slurm job id (uint32_t *)                    */
0138     S_JOB_STEPID,            /* Slurm job step id (uint32_t *)               */
0139     S_JOB_NNODES,            /* Total number of nodes in job (uint32_t *)    */
0140     S_JOB_NODEID,            /* Relative id of this node (uint32_t *)        */
0141     S_JOB_LOCAL_TASK_COUNT,  /* Number of local tasks (uint32_t *)           */
0142     S_JOB_TOTAL_TASK_COUNT,  /* Total number of tasks in job (uint32_t *)    */
0143     S_JOB_NCPUS,             /* Number of CPUs used by this job (uint16_t *) */
0144     S_JOB_ARGV,              /* Command args (int *, char ***)               */
0145     S_JOB_ENV,               /* Job env array (char ***)                     */
0146     S_TASK_ID,               /* Local task id (int *)                        */
0147     S_TASK_GLOBAL_ID,        /* Global task id (uint32_t *)                  */
0148     S_TASK_EXIT_STATUS,      /* Exit status of task if exited (int *)        */
0149     S_TASK_PID,              /* Task pid (pid_t *)                           */
0150     S_JOB_PID_TO_GLOBAL_ID,  /* global task id from pid (pid_t, uint32_t *)  */
0151     S_JOB_PID_TO_LOCAL_ID,   /* local task id from pid (pid_t, uint32_t *)   */
0152     S_JOB_LOCAL_TO_GLOBAL_ID,/* local id to global id (uint32_t, uint32_t *) */
0153     S_JOB_GLOBAL_TO_LOCAL_ID,/* global id to local id (uint32_t, uint32_t *) */
0154     S_JOB_SUPPLEMENTARY_GIDS,/* Array of suppl. gids (gid_t **, int *)       */
0155     S_SLURM_VERSION,         /* Current Slurm version (char **)              */
0156     S_SLURM_VERSION_MAJOR,   /* Slurm version major release (char **)        */
0157     S_SLURM_VERSION_MINOR,   /* Slurm version minor release (char **)        */
0158     S_SLURM_VERSION_MICRO,   /* Slurm version micro release (char **)        */
0159     S_STEP_CPUS_PER_TASK,    /* CPUs allocated per task (=1 if --overcommit
0160                               * option is used, uint32_t *)                  */
0161     S_JOB_ALLOC_CORES,       /* Job allocated cores in list format (char **) */
0162     S_JOB_ALLOC_MEM,         /* Job allocated memory in MB (uint64_t *)      */
0163     S_STEP_ALLOC_CORES,      /* Step alloc'd cores in list format  (char **) */
0164     S_STEP_ALLOC_MEM,        /* Step alloc'd memory in MB (uint64_t *)       */
0165     S_SLURM_RESTART_COUNT,   /* Job restart count (uint32_t *)               */
0166     S_JOB_ARRAY_ID,          /* Slurm job array id (uint32_t *) or 0         */
0167     S_JOB_ARRAY_TASK_ID,     /* Slurm job array task id (uint32_t *)         */
0168 };
0169 
0170 typedef enum spank_item spank_item_t;
0171 
0172 /*
0173  * SPANK error codes match the Slurm internal error codes and the inherited
0174  * POSIX error codes.
0175  */
0176 typedef slurm_err_t spank_err_t;
0177 
0178 /*
0179  *  SPANK plugin context
0180  */
0181 enum spank_context {
0182     S_CTX_ERROR,             /* Error obtaining current context              */
0183     S_CTX_LOCAL,             /* Local context (srun)                         */
0184     S_CTX_REMOTE,            /* Remote context (slurmstepd)                  */
0185     S_CTX_ALLOCATOR,         /* Allocator context (sbatch/salloc)            */
0186     S_CTX_SLURMD,            /* slurmd context                               */
0187     S_CTX_JOB_SCRIPT         /* prolog/epilog context                        */
0188 };
0189 
0190 #define HAVE_S_CTX_SLURMD 1     /* slurmd context supported                  */
0191 #define HAVE_S_CTX_JOB_SCRIPT 1 /* job script (prolog/epilog) supported      */
0192 
0193 typedef enum spank_context spank_context_t;
0194 
0195 /*
0196  *  SPANK plugin options
0197  */
0198 
0199 /*
0200  *  SPANK option callback. `val' is an integer value provided by
0201  *   the plugin to distinguish between plugin-local options, `optarg'
0202  *   is an argument passed by the user (if applicable), and `remote'
0203  *   specifies whether this call is being made locally (e.g. in srun)
0204  *   or remotely (e.g. in slurmstepd/slurmd).
0205  */
0206 typedef int (*spank_opt_cb_f) (int val, const char *optarg, int remote);
0207 
0208 struct spank_option {
0209     char *         name;    /* long option provided by plugin               */
0210     char *         arginfo; /* one word description of argument if required */
0211     char *         usage;   /* Usage text                                   */
0212     int            has_arg; /* Does option require argument?                */
0213     int            val;     /* value to return using callback               */
0214     spank_opt_cb_f cb;      /* Callback function to check option value      */
0215 };
0216 
0217 /*
0218  *  Plugins may export a spank_options option table as symbol "spank_options".
0219  *   This method only works in "local" and "remote" mode. To register options
0220  *   in "allocator" mode (sbatch/salloc), use the preferred
0221  *   spank_option_register function described below.
0222  */
0223 extern struct spank_option spank_options [];
0224 
0225 /*
0226  *  SPANK plugin option table must end with the following entry:
0227  */
0228 #define SPANK_OPTIONS_TABLE_END { NULL, NULL, NULL, 0, 0, NULL }
0229 
0230 /*
0231  *  Maximum allowed length of SPANK option name:
0232  */
0233 #define SPANK_OPTION_MAXLEN      75
0234 
0235 
0236 /*  SPANK interface prototypes
0237  */
0238 #ifdef __cplusplus
0239 extern "C" {
0240 #endif
0241 
0242 /*
0243  *  Return the string representation of a spank_err_t error code.
0244  */
0245 const char *spank_strerror (spank_err_t err);
0246 
0247 /*
0248  *  Determine whether a given spank plugin symbol is supported
0249  *   in this version of SPANK interface.
0250  *
0251  *  Returns:
0252  *  = 1   The symbol is supported
0253  *  = 0   The symbol is not supported
0254  *  = -1  Invalid argument
0255  */
0256 int spank_symbol_supported (const char *symbol);
0257 
0258 /*
0259  *  Determine whether plugin is loaded in "remote" context
0260  *
0261  *  Returns:
0262  *  = 1   remote context, i.e. plugin is loaded in /slurmstepd.
0263  *  = 0   not remote context
0264  *  < 0   spank handle was not valid.
0265  */
0266 int spank_remote (spank_t spank);
0267 
0268 /*
0269  *  Return the context in which the calling plugin is loaded.
0270  *
0271  *  Returns the spank_context for the calling plugin, or SPANK_CTX_ERROR
0272  *   if the current context cannot be determined.
0273  */
0274 spank_context_t spank_context (void);
0275 
0276 /*
0277  *  Register a plugin-provided option dynamically. This function
0278  *   is only valid when called from slurm_spank_init(), and must
0279  *   be guaranteed to be called in all contexts in which it is
0280  *   used (local, remote, allocator).
0281  *
0282  *  This function is the only method to register options in
0283  *   allocator context.
0284  *
0285  *  May be called multiple times to register many options.
0286  *
0287  *  Returns ESPANK_SUCCESS on successful registration of the option
0288  *   or ESPANK_BAD_ARG if not called from slurm_spank_init().
0289  */
0290 spank_err_t spank_option_register (spank_t spank, struct spank_option *opt);
0291 
0292 /*
0293  *  Check whether spank plugin option [opt] has been activated.
0294  *   If the option takes an argument, then the option argument
0295  *   (if found) will be returned in *optarg.
0296  *  This function can be invoked from the following functions:
0297  *  slurm_spank_job_prolog, slurm_spank_local_user_init, slurm_spank_user_init,
0298  *  slurm_spank_task_init_privileged, slurm_spank_task_init,
0299  *  slurm_spank_task_exit, and slurm_spank_job_epilog.
0300  *
0301  *  Returns
0302  *   ESPANK_SUCCESS if the option was used by user. In this case
0303  *    *optarg will contain the option argument if opt->has_arg != 0.
0304  *   ESPANK_ERROR if the option wasn't used.
0305  *   ESPANK_BAD_ARG if an invalid argument was passed to the function,
0306  *    such as NULL opt, NULL opt->name, or NULL optarg when opt->has_arg != 0.
0307  *   ESPANK_NOT_AVAIL if called from improper context.
0308  */
0309 spank_err_t spank_option_getopt (spank_t spank, struct spank_option *opt,
0310     char **optarg);
0311 
0312 
0313 /*  Get the value for the current job or task item specified,
0314  *   storing the result in the subsequent pointer argument(s).
0315  *   Refer to the spank_item_t comments for argument types.
0316  *   For S_JOB_ARGV, S_JOB_ENV, and S_SLURM_VERSION* items
0317  *   the result returned to the caller should not be freed or
0318  *   modified.
0319  *
0320  *  Returns ESPANK_SUCCESS on success, ESPANK_NOTASK if an S_TASK*
0321  *   item is requested from outside a task context, ESPANK_BAD_ARG
0322  *   if invalid args are passed to spank_get_item or spank_get_item
0323  *   is called from an invalid context, and ESPANK_NOT_REMOTE
0324  *   if not called from slurmstepd context or spank_local_user_init.
0325  */
0326 spank_err_t spank_get_item (spank_t spank, spank_item_t item, ...);
0327 
0328 /*  Place a copy of environment variable "var" from the job's environment
0329  *   into buffer "buf" of size "len."
0330  *
0331  *  Returns ESPANK_SUCCESS on success, o/w spank_err_t on failure:
0332  *    ESPANK_BAD_ARG      = spank handle invalid or len < 0.
0333  *    ESPANK_ENV_NOEXIST  = environment variable doesn't exist in job's env.
0334  *    ESPANK_NOSPACE      = buffer too small, truncation occurred.
0335  *    ESPANK_NOT_REMOTE   = not called in remote context (i.e. from slurmd).
0336  */
0337 spank_err_t spank_getenv (spank_t spank, const char *var, char *buf, int len);
0338 
0339 /*
0340  *  Set the environment variable "var" to "val" in the environment of
0341  *   the current job or task in the spank handle. If overwrite != 0 an
0342  *   existing value for var will be overwritten.
0343  *
0344  *  Returns ESPANK_SUCCESS on success, o/w spank_err_t on failure:
0345  *     ESPANK_ENV_EXISTS  = var exists in job env and overwrite == 0.
0346  *     ESPANK_BAD_ARG     = spank handle invalid or var/val are NULL.
0347  *     ESPANK_NOT_REMOTE  = not called from slurmstepd.
0348  */
0349 spank_err_t spank_setenv (spank_t spank, const char *var, const char *val,
0350         int overwrite);
0351 
0352 /*
0353  *  Unset environment variable "var" in the environment of current job or
0354  *   task in the spank handle.
0355  *
0356  *  Returns ESPANK_SUCCESS on success, o/w spank_err_t on failure:
0357  *    ESPANK_BAD_ARG   = spank handle invalid or var is NULL.
0358  *    ESPANK_NOT_REMOTE = not called from slurmstepd.
0359  */
0360 spank_err_t spank_unsetenv (spank_t spank, const char *var);
0361 
0362 /*
0363  *  Set an environment variable "name" to "value" in the "job control"
0364  *   environment, which is an extra set of environment variables
0365  *   included in the environment of the Slurm prolog and epilog
0366  *   programs. Environment variables set via this function will
0367  *   be prepended with SPANK_ to differentiate them from other env
0368  *   vars, and to avoid security issues.
0369  *
0370  *  Returns ESPANK_SUCCESS on success, o/w/ spank_err_t on failure:
0371  *     ESPANK_ENV_EXISTS  = var exists in control env and overwrite == 0.
0372  *     ESPANK_NOT_LOCAL   = not called in local context
0373  */
0374 spank_err_t spank_job_control_setenv (spank_t sp, const char *name,
0375         const char *value, int overwrite);
0376 
0377 /*
0378  *  Place a copy of environment variable "name" from the job control
0379  *   environment into a buffer buf of size len.
0380  *
0381  *  Returns ESPANK_SUCCESS on success, o/w spank_err_t on failure:
0382  *     ESPANK_BAD_ARG     = invalid spank handle or len <= 0
0383  *     ESPANK_ENV_NOEXIST = environment var does not exist in control env
0384  *     ESPANK_NOSPACE     = buffer too small, truncation occurred.
0385  *     ESPANK_NOT_LOCAL   = not called in local context
0386  */
0387 spank_err_t spank_job_control_getenv (spank_t sp, const char *name,
0388         char *buf, int len);
0389 
0390 /*
0391  *  Unset environment variable "name" in the job control environment.
0392  *
0393  *  Returns ESPANK_SUCCESS on success, o/w spank_err_t on failure:
0394  *     ESPANK_BAD_ARG   = invalid spank handle or var is NULL
0395  *     ESPANK_NOT_LOCAL   = not called in local context
0396  */
0397 spank_err_t spank_job_control_unsetenv (spank_t sp, const char *name);
0398 
0399 /*
0400  *  Prepend the argument vector "argv" of length "argc" to the
0401  *  argument vector of the task to be spawned
0402  *  This function can be invoked from the following functions to take effect:
0403  *  slurm_spank_task_init_privileged, and slurm_spank_task_init.
0404  *
0405  *  Returns ESPANK_SUCCESS on success, o/w spank_err_t on failure:
0406  *    ESPANK_BAD_ARG   = spank handle invalid or argv is NULL.
0407  *    ESPANK_NOT_TASK  = called from outside a task context.
0408  */
0409 spank_err_t spank_prepend_task_argv(spank_t spank, int argc, const char *argv[]);
0410 
0411 /*
0412  *  Slurm logging functions which are exported to plugins.
0413  */
0414 extern void slurm_info (const char *format, ...)
0415   __attribute__ ((format (printf, 1, 2)));
0416 extern void slurm_error (const char *format, ...)
0417   __attribute__ ((format (printf, 1, 2)));
0418 extern void slurm_verbose (const char *format, ...)
0419   __attribute__ ((format (printf, 1, 2)));
0420 extern void slurm_debug (const char *format, ...)
0421   __attribute__ ((format (printf, 1, 2)));
0422 extern void slurm_debug2 (const char *format, ...)
0423   __attribute__ ((format (printf, 1, 2)));
0424 extern void slurm_debug3 (const char *format, ...)
0425   __attribute__ ((format (printf, 1, 2)));
0426 
0427 /*
0428  * Print at the same log level as error(), but without prefixing the message
0429  * with "error: ". Useful to report back to srun commands from SPANK plugins,
0430  * as info() will only go to the logs.
0431  */
0432 extern void slurm_spank_log(const char *, ...)
0433   __attribute__ ((format (printf, 1, 2)));
0434 
0435 #ifdef __cplusplus
0436 }
0437 #endif
0438 
0439 /*
0440  *  All spank plugins must issue the following for the Slurm plugin
0441  *   loader.
0442  */
0443 #define SPANK_PLUGIN(__name, __ver) \
0444     const char plugin_name [] = #__name; \
0445     const char plugin_type [] = "spank"; \
0446     const unsigned int plugin_version = SLURM_VERSION_NUMBER; \
0447     const unsigned int spank_plugin_version = __ver;
0448 
0449 #endif /* !SPANK_H */