File indexing completed on 2026-04-25 08:29:11
0001 """
0002 PCS (Physics Configuration System) data models.
0003
0004 Tag lifecycle: draft (editable) → locked (immutable, usable in datasets).
0005 Tag numbering: physics tags = category.digit * 1000 + N; e/s/r tags increment from 1 via PersistentState.
0006 Datasets: composed from four locked tags, auto-named, with block management for Rucio's 100k file limit.
0007 """
0008 from django.db import models, transaction
0009 from django.core.validators import MinValueValidator, MaxValueValidator
0010 from django.core.exceptions import ValidationError
0011
0012
0013 TAG_STATUS_CHOICES = [
0014 ('draft', 'Draft'),
0015 ('locked', 'Locked'),
0016 ]
0017
0018
0019 class PhysicsCategory(models.Model):
0020 """Physics area (e.g. DVCS=3, DIS=4). Digit determines tag number range."""
0021 digit = models.PositiveSmallIntegerField(
0022 primary_key=True,
0023 validators=[MinValueValidator(1), MaxValueValidator(9)],
0024 help_text="Single digit 1-9. Physics tag numbers = digit * 1000 + N."
0025 )
0026 name = models.CharField(max_length=100, unique=True)
0027 description = models.TextField(blank=True, default='')
0028 created_by = models.CharField(max_length=100)
0029 created_at = models.DateTimeField(auto_now_add=True)
0030
0031 class Meta:
0032 db_table = 'pcs_physics_category'
0033 ordering = ['digit']
0034 verbose_name_plural = 'physics categories'
0035
0036 def __str__(self):
0037 return f"{self.digit}: {self.name}"
0038
0039
0040 class PhysicsTag(models.Model):
0041 """Physics process tag (p3001, p3002...). Number = category.digit * 1000 + N."""
0042 tag_number = models.IntegerField(unique=True)
0043 tag_label = models.CharField(max_length=10, unique=True)
0044 category = models.ForeignKey(
0045 PhysicsCategory, on_delete=models.PROTECT, related_name='tags'
0046 )
0047 status = models.CharField(max_length=10, choices=TAG_STATUS_CHOICES, default='draft')
0048 description = models.TextField(blank=True, default='')
0049 parameters = models.JSONField(default=dict)
0050 created_by = models.CharField(max_length=100)
0051 created_at = models.DateTimeField(auto_now_add=True)
0052 updated_at = models.DateTimeField(auto_now=True)
0053
0054 class Meta:
0055 db_table = 'pcs_physics_tag'
0056 ordering = ['tag_number']
0057
0058 def __str__(self):
0059 return self.tag_label
0060
0061 def save(self, *args, **kwargs):
0062 self.tag_label = f"p{self.tag_number}"
0063 super().save(*args, **kwargs)
0064
0065 @classmethod
0066 def allocate_next(cls, category):
0067 """Atomically allocate the next tag number: category.digit * 1000 + global suffix."""
0068 suffix = _allocate_simple_tag('pcs_next_physics')
0069 return category.digit * 1000 + suffix
0070
0071
0072 class EvgenTag(models.Model):
0073 """Event generation tag (e1, e2...). Number auto-incremented via PersistentState."""
0074 tag_number = models.IntegerField(unique=True)
0075 tag_label = models.CharField(max_length=10, unique=True)
0076 status = models.CharField(max_length=10, choices=TAG_STATUS_CHOICES, default='draft')
0077 description = models.TextField(blank=True, default='')
0078 parameters = models.JSONField(default=dict)
0079 created_by = models.CharField(max_length=100)
0080 created_at = models.DateTimeField(auto_now_add=True)
0081 updated_at = models.DateTimeField(auto_now=True)
0082
0083 class Meta:
0084 db_table = 'pcs_evgen_tag'
0085 ordering = ['tag_number']
0086
0087 def __str__(self):
0088 return self.tag_label
0089
0090 def save(self, *args, **kwargs):
0091 self.tag_label = f"e{self.tag_number}"
0092 super().save(*args, **kwargs)
0093
0094 @classmethod
0095 def allocate_next(cls):
0096 return _allocate_simple_tag('pcs_next_evgen')
0097
0098
0099 class SimuTag(models.Model):
0100 """Simulation tag (s1, s2...). Number auto-incremented via PersistentState."""
0101 tag_number = models.IntegerField(unique=True)
0102 tag_label = models.CharField(max_length=10, unique=True)
0103 status = models.CharField(max_length=10, choices=TAG_STATUS_CHOICES, default='draft')
0104 description = models.TextField(blank=True, default='')
0105 parameters = models.JSONField(default=dict)
0106 created_by = models.CharField(max_length=100)
0107 created_at = models.DateTimeField(auto_now_add=True)
0108 updated_at = models.DateTimeField(auto_now=True)
0109
0110 class Meta:
0111 db_table = 'pcs_simu_tag'
0112 ordering = ['tag_number']
0113
0114 def __str__(self):
0115 return self.tag_label
0116
0117 def save(self, *args, **kwargs):
0118 self.tag_label = f"s{self.tag_number}"
0119 super().save(*args, **kwargs)
0120
0121 @classmethod
0122 def allocate_next(cls):
0123 return _allocate_simple_tag('pcs_next_simu')
0124
0125
0126 class RecoTag(models.Model):
0127 """Reconstruction tag (r1, r2...). Number auto-incremented via PersistentState."""
0128 tag_number = models.IntegerField(unique=True)
0129 tag_label = models.CharField(max_length=10, unique=True)
0130 status = models.CharField(max_length=10, choices=TAG_STATUS_CHOICES, default='draft')
0131 description = models.TextField(blank=True, default='')
0132 parameters = models.JSONField(default=dict)
0133 created_by = models.CharField(max_length=100)
0134 created_at = models.DateTimeField(auto_now_add=True)
0135 updated_at = models.DateTimeField(auto_now=True)
0136
0137 class Meta:
0138 db_table = 'pcs_reco_tag'
0139 ordering = ['tag_number']
0140
0141 def __str__(self):
0142 return self.tag_label
0143
0144 def save(self, *args, **kwargs):
0145 self.tag_label = f"r{self.tag_number}"
0146 super().save(*args, **kwargs)
0147
0148 @classmethod
0149 def allocate_next(cls):
0150 return _allocate_simple_tag('pcs_next_reco')
0151
0152
0153 class Dataset(models.Model):
0154 """
0155 Production dataset composed from four locked tags.
0156
0157 Each row is one block. Block 1 always exists. The dataset_name ties
0158 blocks together. The DID format is '{scope}:{dataset_name}.b{N}'.
0159 All tags must be locked before a dataset can be created.
0160 """
0161 dataset_name = models.CharField(max_length=255)
0162 scope = models.CharField(max_length=100, default='group.EIC')
0163 detector_version = models.CharField(max_length=50)
0164 detector_config = models.CharField(max_length=100)
0165 physics_tag = models.ForeignKey(PhysicsTag, on_delete=models.PROTECT, related_name='datasets')
0166 evgen_tag = models.ForeignKey(EvgenTag, on_delete=models.PROTECT, related_name='datasets')
0167 simu_tag = models.ForeignKey(SimuTag, on_delete=models.PROTECT, related_name='datasets')
0168 reco_tag = models.ForeignKey(RecoTag, on_delete=models.PROTECT, related_name='datasets')
0169 block_num = models.PositiveIntegerField(default=1)
0170 blocks = models.PositiveIntegerField(default=1)
0171 did = models.CharField(max_length=300, unique=True)
0172 file_count = models.IntegerField(default=0)
0173 data_size = models.BigIntegerField(default=0)
0174 description = models.TextField(blank=True, default='')
0175 metadata = models.JSONField(null=True, blank=True)
0176 created_by = models.CharField(max_length=100)
0177 created_at = models.DateTimeField(auto_now_add=True)
0178
0179 class Meta:
0180 db_table = 'pcs_dataset'
0181 ordering = ['-created_at']
0182 unique_together = [('dataset_name', 'block_num')]
0183
0184 def __str__(self):
0185 return self.did
0186
0187 def clean(self):
0188 for tag_field in ['physics_tag', 'evgen_tag', 'simu_tag', 'reco_tag']:
0189 tag = getattr(self, tag_field, None)
0190 if tag and tag.status != 'locked':
0191 raise ValidationError(
0192 {tag_field: f"Tag {tag.tag_label} must be locked before use in a dataset."}
0193 )
0194
0195 def save(self, *args, **kwargs):
0196 if not self.dataset_name:
0197 self.dataset_name = self.build_dataset_name()
0198 if not self.did:
0199 self.did = f"{self.scope}:{self.dataset_name}.b{self.block_num}"
0200 if len(self.dataset_name) > 255:
0201 raise ValidationError("Dataset name exceeds 255 characters.")
0202 self.full_clean()
0203 super().save(*args, **kwargs)
0204
0205 def build_dataset_name(self):
0206 """Auto-name: {scope}.{detector_version}.{detector_config}.{p}.{e}.{s}.{r}"""
0207 return (
0208 f"{self.scope}.{self.detector_version}.{self.detector_config}"
0209 f".{self.physics_tag.tag_label}.{self.evgen_tag.tag_label}"
0210 f".{self.simu_tag.tag_label}.{self.reco_tag.tag_label}"
0211 )
0212
0213 @property
0214 def task_name(self):
0215 """Task name = dataset_name (without .bN block suffix)."""
0216 return self.dataset_name
0217
0218
0219 class ProdConfig(models.Model):
0220 """
0221 Production configuration template — mutable operational settings for job submission.
0222
0223 Captures everything needed to build a production submit command beyond what
0224 the four tags and dataset already define: background mixing, output control,
0225 software stack, resource targets, condor template, and PanDA/Rucio overrides.
0226 Always mutable — the PanDA task/job spec is the immutable record.
0227 """
0228 name = models.CharField(max_length=200, unique=True,
0229 help_text="Human-readable config name, e.g. 'DVCS 10x100 standard'")
0230 description = models.TextField(blank=True, default='')
0231
0232
0233 bg_mixing = models.BooleanField(default=False)
0234 bg_cross_section = models.CharField(max_length=200, blank=True, default='')
0235 bg_evtgen_file = models.CharField(max_length=500, blank=True, default='')
0236
0237
0238 copy_reco = models.BooleanField(default=True)
0239 copy_full = models.BooleanField(default=False)
0240 copy_log = models.BooleanField(default=True)
0241 use_rucio = models.BooleanField(default=True)
0242
0243
0244 jug_xl_tag = models.CharField(max_length=100, blank=True, default='',
0245 help_text="e.g. 26.02.0-stable")
0246 container_image = models.CharField(max_length=500, blank=True, default='',
0247 help_text="Singularity/Apptainer image reference")
0248
0249
0250 target_hours_per_job = models.DecimalField(max_digits=6, decimal_places=2, null=True, blank=True,
0251 help_text="Target walltime per job in hours")
0252 events_per_task = models.PositiveIntegerField(null=True, blank=True,
0253 help_text="Total events for the task")
0254
0255
0256 condor_template = models.TextField(blank=True, default='',
0257 help_text="HTCondor submission template")
0258
0259
0260 panda_site = models.CharField(max_length=200, blank=True, default='')
0261 panda_queue = models.CharField(max_length=200, blank=True, default='')
0262 panda_working_group = models.CharField(max_length=100, blank=True, default='')
0263 panda_resource_type = models.CharField(max_length=100, blank=True, default='')
0264
0265
0266 rucio_rse = models.CharField(max_length=200, blank=True, default='',
0267 help_text="Rucio Storage Element for output")
0268 rucio_replication_rules = models.JSONField(null=True, blank=True,
0269 help_text="Rucio replication rule definitions")
0270
0271
0272
0273
0274
0275 data = models.JSONField(null=True, blank=True,
0276 help_text="Additional submission parameters (JSON)")
0277
0278 created_by = models.CharField(max_length=100)
0279 created_at = models.DateTimeField(auto_now_add=True)
0280 updated_at = models.DateTimeField(auto_now=True)
0281
0282 class Meta:
0283 db_table = 'pcs_prod_config'
0284 ordering = ['-updated_at']
0285
0286 def __str__(self):
0287 return self.name
0288
0289
0290 PRODTASK_STATUS_CHOICES = [
0291 ('draft', 'Draft'),
0292 ('ready', 'Ready'),
0293 ('submitted', 'Submitted'),
0294 ('completed', 'Completed'),
0295 ('failed', 'Failed'),
0296 ]
0297
0298
0299 class ProdTask(models.Model):
0300 """
0301 A production task: Dataset + ProdConfig + submission-specific params.
0302 Fully defines a production submission from which Condor and PanDA
0303 commands can be generated.
0304 """
0305 name = models.CharField(max_length=255, unique=True,
0306 help_text="Task name (auto-derived from dataset or manual)")
0307 description = models.TextField(blank=True, default='')
0308 status = models.CharField(max_length=20, choices=PRODTASK_STATUS_CHOICES, default='draft')
0309
0310
0311 dataset = models.ForeignKey(Dataset, on_delete=models.PROTECT, related_name='prod_tasks')
0312 prod_config = models.ForeignKey(ProdConfig, on_delete=models.PROTECT, related_name='prod_tasks')
0313
0314
0315 csv_file = models.CharField(max_length=500, blank=True, default='',
0316 help_text="CSV file path in simulation_campaign_datasets")
0317 overrides = models.JSONField(null=True, blank=True,
0318 help_text="Per-task overrides of ProdConfig fields (JSON)")
0319
0320
0321 condor_command = models.TextField(blank=True, default='',
0322 help_text="Generated Condor submission command")
0323 panda_command = models.TextField(blank=True, default='',
0324 help_text="Generated PanDA submission command")
0325
0326
0327 panda_task_id = models.BigIntegerField(null=True, blank=True,
0328 help_text="PanDA task ID after submission")
0329 condor_cluster_id = models.CharField(max_length=100, blank=True, default='',
0330 help_text="Condor cluster ID after submission")
0331
0332 created_by = models.CharField(max_length=100)
0333 created_at = models.DateTimeField(auto_now_add=True)
0334 updated_at = models.DateTimeField(auto_now=True)
0335
0336 class Meta:
0337 db_table = 'pcs_prod_task'
0338 ordering = ['-updated_at']
0339
0340 def __str__(self):
0341 return self.name
0342
0343 def get_effective_config(self):
0344 """Return ProdConfig field values with per-task overrides applied."""
0345 config = self.prod_config
0346 overrides = self.overrides or {}
0347 result = {}
0348 for field in config._meta.get_fields():
0349 if not hasattr(field, 'attname'):
0350 continue
0351 name = field.name
0352 if name in ('id', 'created_at', 'updated_at'):
0353 continue
0354 result[name] = overrides.get(name, getattr(config, name))
0355
0356 base_data = config.data or {}
0357 override_data = overrides.get('data', {})
0358 if isinstance(override_data, dict):
0359 result['data'] = {**base_data, **override_data}
0360 return result
0361
0362 def generate_commands(self):
0363 """Generate and cache both Condor and PanDA commands."""
0364 from .commands import build_condor_command, build_panda_command
0365 self.condor_command = build_condor_command(self)
0366 self.panda_command = build_panda_command(self)
0367
0368
0369 def _allocate_simple_tag(state_key):
0370 """Atomically allocate the next tag number using PersistentState."""
0371 from monitor_app.models import PersistentState
0372 with transaction.atomic():
0373 obj, _ = PersistentState.objects.select_for_update().get_or_create(
0374 id=1, defaults={'state_data': {}}
0375 )
0376 current = obj.state_data.get(state_key, 1)
0377 obj.state_data[state_key] = current + 1
0378 obj.save()
0379 return current