Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-25 08:29:08

0001 -- SQL command to create the production jobs table.
0002 -- This schema is designed to store detailed information about each job
0003 -- executed within the sPHENIX production system.
0004 
0005 DO $$
0006 BEGIN
0007     IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'prodstate') THEN
0008         CREATE TYPE prodstate AS ENUM (
0009                'submitting',       -- job is being submitted
0010                'submitted',        -- job is submitted
0011                'started',          -- job has started on the worker node
0012                'running',          -- job is running
0013                'held',             -- job is held
0014                'evicted',          -- job has recieved a bad signal
0015                'failed',           -- job ended with nonzero status code
0016                'finished'          -- job has finished
0017         );
0018     END IF;
0019 END$$;
0020 
0021 CREATE TABLE production_jobs (
0022     job_id                SERIAL                  PRIMARY KEY,
0023     ClusterId             BIGINT,
0024     ProcId                INT,
0025     rulename              TEXT                    NOT NULL,
0026     tag                   TEXT                    NOT NULL,
0027     dsttype               TEXT                    NOT NULL,
0028     run                   INT                     NOT NULL,
0029     segment               INT,
0030     status                prodstate,
0031     submitted             TIMESTAMP WITH TIME ZONE  DEFAULT CURRENT_TIMESTAMP,
0032     started               TIMESTAMP WITH TIME ZONE,
0033     finished              TIMESTAMP WITH TIME ZONE,
0034     RemoteUserCpu         FLOAT,
0035     RemoteSysCpu          FLOAT,
0036     MemoryUsage           INT,
0037     DiskUsage             INT,
0038     ExitCode              INT,
0039     submission_host       TEXT,
0040     execution_node        TEXT,
0041     log                   TEXT,
0042     err                   TEXT,
0043     out                   TEXT,
0044     intriplet             TEXT,
0045     indsttype_str         TEXT,
0046     xferslots             INT,
0047     request_memory        INT,
0048     request_disk          INT,
0049     request_cpus          INT,
0050     neventsper            INT
0051 );
0052 
0053 -- Add comments to the table and columns to explain their purpose.
0054 COMMENT ON TABLE production_jobs IS 'Table to store information about individual production jobs.';
0055 COMMENT ON COLUMN production_jobs.job_id IS 'Unique identifier for each job record in this table.';
0056 COMMENT ON COLUMN production_jobs.ClusterId IS 'The cluster ID assigned by the batch system (e.g., HTCondor).';
0057 COMMENT ON COLUMN production_jobs.ProcId IS 'The process ID within the cluster assigned by the batch system.';
0058 COMMENT ON COLUMN production_jobs.rulename IS 'The name of the production rule from the YAML configuration.';
0059 COMMENT ON COLUMN production_jobs.tag IS 'The output triplet (e.g., new_2025p000_v000) for the production.';
0060 COMMENT ON COLUMN production_jobs.dsttype IS 'The type of data being produced (e.g., DST_CALOFITTING).';
0061 COMMENT ON COLUMN production_jobs.run IS 'The run number being processed.';
0062 COMMENT ON COLUMN production_jobs.segment IS 'The segment number of the data file being processed.';
0063 COMMENT ON COLUMN production_jobs.status IS 'The production status of the job.';
0064 COMMENT ON COLUMN production_jobs.submitted IS 'Timestamp when the job was submitted to the batch system. From Condor''s QDate attribute.';
0065 COMMENT ON COLUMN production_jobs.started IS 'Timestamp when the job execution began. From Condor''s JobStartDate attribute.';
0066 COMMENT ON COLUMN production_jobs.finished IS 'Timestamp when the job execution finished. From Condor''s CompletionDate attribute.';
0067 COMMENT ON COLUMN production_jobs.RemoteUserCpu IS 'User CPU time used by the job, in seconds. From Condor''s RemoteUserCpu attribute.';
0068 COMMENT ON COLUMN production_jobs.RemoteSysCpu IS 'System CPU time used by the job, in seconds. From Condor''s RemoteSysCpu attribute.';
0069 COMMENT ON COLUMN production_jobs.MemoryUsage IS 'Peak memory usage of the job, in MB. From Condor''s MemoryUsage attribute.';
0070 COMMENT ON COLUMN production_jobs.DiskUsage IS 'Peak disk usage of the job, in KB. From Condor''s DiskUsage attribute.';
0071 COMMENT ON COLUMN production_jobs.ExitCode IS 'The exit code of the job process. From Condor''s ExitCode attribute.';
0072 COMMENT ON COLUMN production_jobs.submission_host IS 'The hostname of the machine where the job was submitted from.';
0073 COMMENT ON COLUMN production_jobs.execution_node IS 'The hostname of the node where the job ran. From Condor''s RemoteHost attribute.';
0074 COMMENT ON COLUMN production_jobs.log IS 'Path to the standard output log file.';
0075 COMMENT ON COLUMN production_jobs.err IS 'Path to the standard error log file.';
0076 COMMENT ON COLUMN production_jobs.out IS 'Path to the primary output file produced by the job.';
0077 COMMENT ON COLUMN production_jobs.intriplet IS 'For input data provenance.';
0078 COMMENT ON COLUMN production_jobs.indsttype_str IS 'For input data provenance.';
0079 COMMENT ON COLUMN production_jobs.xferslots IS 'To log requested resources for debugging and planning.';
0080 COMMENT ON COLUMN production_jobs.request_memory IS 'To log requested resources for debugging and planning.';
0081 COMMENT ON COLUMN production_jobs.request_disk IS 'To log requested resources for debugging and planning.';
0082 COMMENT ON COLUMN production_jobs.request_cpus IS 'To log requested resources for debugging and planning.';
0083 COMMENT ON COLUMN production_jobs.neventsper IS 'To record the number of events processed per job.';
0084 
0085 -- Create indexes on frequently queried columns for better performance.
0086 CREATE INDEX ON production_jobs (run);
0087 CREATE INDEX ON production_jobs (status);
0088 CREATE INDEX ON production_jobs (rulename);
0089 CREATE INDEX ON production_jobs (tag);
0090 
0091 -- A composite unique index can be useful for queries that filter on both cluster and process id.
0092 CREATE UNIQUE INDEX production_jobs_cluster_process_id_idx ON production_jobs (ClusterId, ProcId);