Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-25 08:29:11

0001 """
0002 PCS (Physics Configuration System) data models.
0003 
0004 Tag lifecycle: draft (editable) → locked (immutable, usable in datasets).
0005 Tag numbering: physics tags = category.digit * 1000 + N; e/s/r tags increment from 1 via PersistentState.
0006 Datasets: composed from four locked tags, auto-named, with block management for Rucio's 100k file limit.
0007 """
0008 from django.db import models, transaction
0009 from django.core.validators import MinValueValidator, MaxValueValidator
0010 from django.core.exceptions import ValidationError
0011 
0012 
0013 TAG_STATUS_CHOICES = [
0014     ('draft', 'Draft'),
0015     ('locked', 'Locked'),
0016 ]
0017 
0018 
0019 class PhysicsCategory(models.Model):
0020     """Physics area (e.g. DVCS=3, DIS=4). Digit determines tag number range."""
0021     digit = models.PositiveSmallIntegerField(
0022         primary_key=True,
0023         validators=[MinValueValidator(1), MaxValueValidator(9)],
0024         help_text="Single digit 1-9. Physics tag numbers = digit * 1000 + N."
0025     )
0026     name = models.CharField(max_length=100, unique=True)
0027     description = models.TextField(blank=True, default='')
0028     created_by = models.CharField(max_length=100)
0029     created_at = models.DateTimeField(auto_now_add=True)
0030 
0031     class Meta:
0032         db_table = 'pcs_physics_category'
0033         ordering = ['digit']
0034         verbose_name_plural = 'physics categories'
0035 
0036     def __str__(self):
0037         return f"{self.digit}: {self.name}"
0038 
0039 
0040 class PhysicsTag(models.Model):
0041     """Physics process tag (p3001, p3002...). Number = category.digit * 1000 + N."""
0042     tag_number = models.IntegerField(unique=True)
0043     tag_label = models.CharField(max_length=10, unique=True)
0044     category = models.ForeignKey(
0045         PhysicsCategory, on_delete=models.PROTECT, related_name='tags'
0046     )
0047     status = models.CharField(max_length=10, choices=TAG_STATUS_CHOICES, default='draft')
0048     description = models.TextField(blank=True, default='')
0049     parameters = models.JSONField(default=dict)
0050     created_by = models.CharField(max_length=100)
0051     created_at = models.DateTimeField(auto_now_add=True)
0052     updated_at = models.DateTimeField(auto_now=True)
0053 
0054     class Meta:
0055         db_table = 'pcs_physics_tag'
0056         ordering = ['tag_number']
0057 
0058     def __str__(self):
0059         return self.tag_label
0060 
0061     def save(self, *args, **kwargs):
0062         self.tag_label = f"p{self.tag_number}"
0063         super().save(*args, **kwargs)
0064 
0065     @classmethod
0066     def allocate_next(cls, category):
0067         """Atomically allocate the next tag number: category.digit * 1000 + global suffix."""
0068         suffix = _allocate_simple_tag('pcs_next_physics')
0069         return category.digit * 1000 + suffix
0070 
0071 
0072 class EvgenTag(models.Model):
0073     """Event generation tag (e1, e2...). Number auto-incremented via PersistentState."""
0074     tag_number = models.IntegerField(unique=True)
0075     tag_label = models.CharField(max_length=10, unique=True)
0076     status = models.CharField(max_length=10, choices=TAG_STATUS_CHOICES, default='draft')
0077     description = models.TextField(blank=True, default='')
0078     parameters = models.JSONField(default=dict)
0079     created_by = models.CharField(max_length=100)
0080     created_at = models.DateTimeField(auto_now_add=True)
0081     updated_at = models.DateTimeField(auto_now=True)
0082 
0083     class Meta:
0084         db_table = 'pcs_evgen_tag'
0085         ordering = ['tag_number']
0086 
0087     def __str__(self):
0088         return self.tag_label
0089 
0090     def save(self, *args, **kwargs):
0091         self.tag_label = f"e{self.tag_number}"
0092         super().save(*args, **kwargs)
0093 
0094     @classmethod
0095     def allocate_next(cls):
0096         return _allocate_simple_tag('pcs_next_evgen')
0097 
0098 
0099 class SimuTag(models.Model):
0100     """Simulation tag (s1, s2...). Number auto-incremented via PersistentState."""
0101     tag_number = models.IntegerField(unique=True)
0102     tag_label = models.CharField(max_length=10, unique=True)
0103     status = models.CharField(max_length=10, choices=TAG_STATUS_CHOICES, default='draft')
0104     description = models.TextField(blank=True, default='')
0105     parameters = models.JSONField(default=dict)
0106     created_by = models.CharField(max_length=100)
0107     created_at = models.DateTimeField(auto_now_add=True)
0108     updated_at = models.DateTimeField(auto_now=True)
0109 
0110     class Meta:
0111         db_table = 'pcs_simu_tag'
0112         ordering = ['tag_number']
0113 
0114     def __str__(self):
0115         return self.tag_label
0116 
0117     def save(self, *args, **kwargs):
0118         self.tag_label = f"s{self.tag_number}"
0119         super().save(*args, **kwargs)
0120 
0121     @classmethod
0122     def allocate_next(cls):
0123         return _allocate_simple_tag('pcs_next_simu')
0124 
0125 
0126 class RecoTag(models.Model):
0127     """Reconstruction tag (r1, r2...). Number auto-incremented via PersistentState."""
0128     tag_number = models.IntegerField(unique=True)
0129     tag_label = models.CharField(max_length=10, unique=True)
0130     status = models.CharField(max_length=10, choices=TAG_STATUS_CHOICES, default='draft')
0131     description = models.TextField(blank=True, default='')
0132     parameters = models.JSONField(default=dict)
0133     created_by = models.CharField(max_length=100)
0134     created_at = models.DateTimeField(auto_now_add=True)
0135     updated_at = models.DateTimeField(auto_now=True)
0136 
0137     class Meta:
0138         db_table = 'pcs_reco_tag'
0139         ordering = ['tag_number']
0140 
0141     def __str__(self):
0142         return self.tag_label
0143 
0144     def save(self, *args, **kwargs):
0145         self.tag_label = f"r{self.tag_number}"
0146         super().save(*args, **kwargs)
0147 
0148     @classmethod
0149     def allocate_next(cls):
0150         return _allocate_simple_tag('pcs_next_reco')
0151 
0152 
0153 class Dataset(models.Model):
0154     """
0155     Production dataset composed from four locked tags.
0156 
0157     Each row is one block. Block 1 always exists. The dataset_name ties
0158     blocks together. The DID format is '{scope}:{dataset_name}.b{N}'.
0159     All tags must be locked before a dataset can be created.
0160     """
0161     dataset_name = models.CharField(max_length=255)
0162     scope = models.CharField(max_length=100, default='group.EIC')
0163     detector_version = models.CharField(max_length=50)
0164     detector_config = models.CharField(max_length=100)
0165     physics_tag = models.ForeignKey(PhysicsTag, on_delete=models.PROTECT, related_name='datasets')
0166     evgen_tag = models.ForeignKey(EvgenTag, on_delete=models.PROTECT, related_name='datasets')
0167     simu_tag = models.ForeignKey(SimuTag, on_delete=models.PROTECT, related_name='datasets')
0168     reco_tag = models.ForeignKey(RecoTag, on_delete=models.PROTECT, related_name='datasets')
0169     block_num = models.PositiveIntegerField(default=1)
0170     blocks = models.PositiveIntegerField(default=1)
0171     did = models.CharField(max_length=300, unique=True)
0172     file_count = models.IntegerField(default=0)
0173     data_size = models.BigIntegerField(default=0)
0174     description = models.TextField(blank=True, default='')
0175     metadata = models.JSONField(null=True, blank=True)
0176     created_by = models.CharField(max_length=100)
0177     created_at = models.DateTimeField(auto_now_add=True)
0178 
0179     class Meta:
0180         db_table = 'pcs_dataset'
0181         ordering = ['-created_at']
0182         unique_together = [('dataset_name', 'block_num')]
0183 
0184     def __str__(self):
0185         return self.did
0186 
0187     def clean(self):
0188         for tag_field in ['physics_tag', 'evgen_tag', 'simu_tag', 'reco_tag']:
0189             tag = getattr(self, tag_field, None)
0190             if tag and tag.status != 'locked':
0191                 raise ValidationError(
0192                     {tag_field: f"Tag {tag.tag_label} must be locked before use in a dataset."}
0193                 )
0194 
0195     def save(self, *args, **kwargs):
0196         if not self.dataset_name:
0197             self.dataset_name = self.build_dataset_name()
0198         if not self.did:
0199             self.did = f"{self.scope}:{self.dataset_name}.b{self.block_num}"
0200         if len(self.dataset_name) > 255:
0201             raise ValidationError("Dataset name exceeds 255 characters.")
0202         self.full_clean()
0203         super().save(*args, **kwargs)
0204 
0205     def build_dataset_name(self):
0206         """Auto-name: {scope}.{detector_version}.{detector_config}.{p}.{e}.{s}.{r}"""
0207         return (
0208             f"{self.scope}.{self.detector_version}.{self.detector_config}"
0209             f".{self.physics_tag.tag_label}.{self.evgen_tag.tag_label}"
0210             f".{self.simu_tag.tag_label}.{self.reco_tag.tag_label}"
0211         )
0212 
0213     @property
0214     def task_name(self):
0215         """Task name = dataset_name (without .bN block suffix)."""
0216         return self.dataset_name
0217 
0218 
0219 class ProdConfig(models.Model):
0220     """
0221     Production configuration template — mutable operational settings for job submission.
0222 
0223     Captures everything needed to build a production submit command beyond what
0224     the four tags and dataset already define: background mixing, output control,
0225     software stack, resource targets, condor template, and PanDA/Rucio overrides.
0226     Always mutable — the PanDA task/job spec is the immutable record.
0227     """
0228     name = models.CharField(max_length=200, unique=True,
0229                             help_text="Human-readable config name, e.g. 'DVCS 10x100 standard'")
0230     description = models.TextField(blank=True, default='')
0231 
0232     # Background mixing
0233     bg_mixing = models.BooleanField(default=False)
0234     bg_cross_section = models.CharField(max_length=200, blank=True, default='')
0235     bg_evtgen_file = models.CharField(max_length=500, blank=True, default='')
0236 
0237     # Output file control
0238     copy_reco = models.BooleanField(default=True)
0239     copy_full = models.BooleanField(default=False)
0240     copy_log = models.BooleanField(default=True)
0241     use_rucio = models.BooleanField(default=True)
0242 
0243     # Software stack
0244     jug_xl_tag = models.CharField(max_length=100, blank=True, default='',
0245                                   help_text="e.g. 26.02.0-stable")
0246     container_image = models.CharField(max_length=500, blank=True, default='',
0247                                        help_text="Singularity/Apptainer image reference")
0248 
0249     # Resource targets
0250     target_hours_per_job = models.DecimalField(max_digits=6, decimal_places=2, null=True, blank=True,
0251                                                help_text="Target walltime per job in hours")
0252     events_per_task = models.PositiveIntegerField(null=True, blank=True,
0253                                                   help_text="Total events for the task")
0254 
0255     # Condor template
0256     condor_template = models.TextField(blank=True, default='',
0257                                        help_text="HTCondor submission template")
0258 
0259     # PanDA overrides (nullable — PanDA decides defaults)
0260     panda_site = models.CharField(max_length=200, blank=True, default='')
0261     panda_queue = models.CharField(max_length=200, blank=True, default='')
0262     panda_working_group = models.CharField(max_length=100, blank=True, default='')
0263     panda_resource_type = models.CharField(max_length=100, blank=True, default='')
0264 
0265     # Rucio overrides (nullable)
0266     rucio_rse = models.CharField(max_length=200, blank=True, default='',
0267                                  help_text="Rucio Storage Element for output")
0268     rucio_replication_rules = models.JSONField(null=True, blank=True,
0269                                                help_text="Rucio replication rule definitions")
0270 
0271     # Extensible submission parameters (no migration needed for new keys).
0272     # Keys: transformation, processing_type, prod_source_label, vo,
0273     # n_jobs, events_per_job, events_per_file, files_per_job,
0274     # corecount, no_build, skip_scout, exec_command, scope
0275     data = models.JSONField(null=True, blank=True,
0276                             help_text="Additional submission parameters (JSON)")
0277 
0278     created_by = models.CharField(max_length=100)
0279     created_at = models.DateTimeField(auto_now_add=True)
0280     updated_at = models.DateTimeField(auto_now=True)
0281 
0282     class Meta:
0283         db_table = 'pcs_prod_config'
0284         ordering = ['-updated_at']
0285 
0286     def __str__(self):
0287         return self.name
0288 
0289 
0290 PRODTASK_STATUS_CHOICES = [
0291     ('draft', 'Draft'),
0292     ('ready', 'Ready'),
0293     ('submitted', 'Submitted'),
0294     ('completed', 'Completed'),
0295     ('failed', 'Failed'),
0296 ]
0297 
0298 
0299 class ProdTask(models.Model):
0300     """
0301     A production task: Dataset + ProdConfig + submission-specific params.
0302     Fully defines a production submission from which Condor and PanDA
0303     commands can be generated.
0304     """
0305     name = models.CharField(max_length=255, unique=True,
0306                             help_text="Task name (auto-derived from dataset or manual)")
0307     description = models.TextField(blank=True, default='')
0308     status = models.CharField(max_length=20, choices=PRODTASK_STATUS_CHOICES, default='draft')
0309 
0310     # Core composition
0311     dataset = models.ForeignKey(Dataset, on_delete=models.PROTECT, related_name='prod_tasks')
0312     prod_config = models.ForeignKey(ProdConfig, on_delete=models.PROTECT, related_name='prod_tasks')
0313 
0314     # Task-specific submission params
0315     csv_file = models.CharField(max_length=500, blank=True, default='',
0316                                 help_text="CSV file path in simulation_campaign_datasets")
0317     overrides = models.JSONField(null=True, blank=True,
0318                                  help_text="Per-task overrides of ProdConfig fields (JSON)")
0319 
0320     # Generated commands (cached on save)
0321     condor_command = models.TextField(blank=True, default='',
0322                                       help_text="Generated Condor submission command")
0323     panda_command = models.TextField(blank=True, default='',
0324                                      help_text="Generated PanDA submission command")
0325 
0326     # Submission tracking
0327     panda_task_id = models.BigIntegerField(null=True, blank=True,
0328                                             help_text="PanDA task ID after submission")
0329     condor_cluster_id = models.CharField(max_length=100, blank=True, default='',
0330                                           help_text="Condor cluster ID after submission")
0331 
0332     created_by = models.CharField(max_length=100)
0333     created_at = models.DateTimeField(auto_now_add=True)
0334     updated_at = models.DateTimeField(auto_now=True)
0335 
0336     class Meta:
0337         db_table = 'pcs_prod_task'
0338         ordering = ['-updated_at']
0339 
0340     def __str__(self):
0341         return self.name
0342 
0343     def get_effective_config(self):
0344         """Return ProdConfig field values with per-task overrides applied."""
0345         config = self.prod_config
0346         overrides = self.overrides or {}
0347         result = {}
0348         for field in config._meta.get_fields():
0349             if not hasattr(field, 'attname'):
0350                 continue
0351             name = field.name
0352             if name in ('id', 'created_at', 'updated_at'):
0353                 continue
0354             result[name] = overrides.get(name, getattr(config, name))
0355         # Merge the data dicts specially (override keys, not replace entire dict)
0356         base_data = config.data or {}
0357         override_data = overrides.get('data', {})
0358         if isinstance(override_data, dict):
0359             result['data'] = {**base_data, **override_data}
0360         return result
0361 
0362     def generate_commands(self):
0363         """Generate and cache both Condor and PanDA commands."""
0364         from .commands import build_condor_command, build_panda_command
0365         self.condor_command = build_condor_command(self)
0366         self.panda_command = build_panda_command(self)
0367 
0368 
0369 def _allocate_simple_tag(state_key):
0370     """Atomically allocate the next tag number using PersistentState."""
0371     from monitor_app.models import PersistentState
0372     with transaction.atomic():
0373         obj, _ = PersistentState.objects.select_for_update().get_or_create(
0374             id=1, defaults={'state_data': {}}
0375         )
0376         current = obj.state_data.get(state_key, 1)
0377         obj.state_data[state_key] = current + 1
0378         obj.save()
0379         return current