Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-06-26 08:40:22

0001 #!/usr/bin/env python3
0002 """
0003 pcs_automatch_dryrun.py — read-only preview of the physics-tag automatch.
0004 
0005 The catalog import pins every legacy ``csv_import`` row to one placeholder
0006 physics tag; the automatch (``derive_physics`` + ``find_or_create_physics_tag``,
0007 wired into ``import_default_datasets_csv``) derives each row's real physics from
0008 its EVGEN path and rebinds it to the matching locked tag, creating or locking
0009 tags as needed. This script runs that derivation + matching against the live
0010 catalog **without writing anything** (``dry_run=True`` throughout, no save) and
0011 reports what a reload would do: tags reused, drafts that would be locked in
0012 place, distinct new tags created, rows rebound, backgrounds parked, and any
0013 unrecognized path.
0014 
0015 Usage::
0016 
0017     cd /data/wenauseic/github/swf-monitor/src
0018     source ../../swf-testbed/.venv/bin/activate && source ~/.env
0019     python ../scripts/pcs_automatch_dryrun.py
0020 """
0021 import os
0022 import sys
0023 
0024 THIS_DIR = os.path.dirname(os.path.abspath(__file__))
0025 sys.path.insert(0, os.path.join(THIS_DIR, '..', 'src'))
0026 os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'swf_monitor_project.settings')
0027 
0028 import django  # noqa: E402
0029 django.setup()
0030 
0031 from pcs.models import Dataset, PhysicsTag  # noqa: E402
0032 from pcs.physics_match import derive_physics  # noqa: E402
0033 from pcs.services import (  # noqa: E402
0034     find_or_create_physics_tag,
0035     _task_name_from_path,
0036     _extract_csv_filters,
0037 )
0038 
0039 
0040 def _param_key(derived):
0041     """Identity of a would-be tag, for deduping creates into distinct tags."""
0042     if derived.get('process') == 'SINGLE':
0043         return ('SINGLE', derived.get('particle', ''), derived.get('gun_energy', ''))
0044     return (derived.get('process'),
0045             derived.get('beam_energy_electron'),
0046             derived.get('beam_energy_hadron'))
0047 
0048 
0049 def main():
0050     anchor = PhysicsTag.objects.filter(status='locked').order_by('tag_number').first()
0051     rows = Dataset.objects.filter(dataset_name__startswith='csv_import.').order_by('id')
0052 
0053     reuse_locked = set()        # distinct tag labels reused as-is (already locked)
0054     lock_in_place = set()       # distinct draft tag labels that would be locked
0055     new_tags = set()            # distinct param-keys with no existing match -> create
0056     rebound = 0                 # rows that would move off the placeholder anchor
0057     reuse_rows = 0              # rows that bind to an existing (reused) tag
0058     new_rows = 0                # rows that bind to a newly-created tag
0059     parked = 0                  # background rows left on the anchor
0060     unrecognized = []           # paths the derivation cannot parse
0061 
0062     for ds in rows:
0063         ds_path = ds.get_metadata_value('source', 'location', default='') or ''
0064         task_name = _task_name_from_path(ds_path) or ds.dataset_name
0065         filters = _extract_csv_filters(ds_path, 'epic_craterlake')
0066         derived = derive_physics(task_name, beam=filters.get('beam', ''))
0067         if derived is None:
0068             unrecognized.append(task_name)
0069             continue
0070         if derived.get('process') in ('BEAMGAS', 'SYNRAD'):
0071             parked += 1
0072             continue
0073         tag, action = find_or_create_physics_tag(derived, dry_run=True)
0074         rebound += 1
0075         if action == 'reuse-locked':
0076             reuse_locked.add(tag.tag_label)
0077             reuse_rows += 1
0078         elif action == 'lock-in-place':
0079             lock_in_place.add(tag.tag_label)
0080             reuse_rows += 1
0081         else:  # create
0082             new_tags.add(_param_key(derived))
0083             new_rows += 1
0084 
0085     print('PCS physics-tag automatch — DRY RUN (no writes)')
0086     print(f'  placeholder anchor tag: {anchor.tag_label if anchor else "(none)"}')
0087     print(f'  csv_import rows:        {rows.count()}')
0088     print(f'  rows rebound:           {rebound}  ({reuse_rows} reuse existing, {new_rows} bind new)')
0089     print(f'  backgrounds parked:     {parked}')
0090     print(f'  tags reused (locked):   {len(reuse_locked)}')
0091     print(f'  draft tags locked in place: {len(lock_in_place)}')
0092     print(f'  distinct new tags:      {len(new_tags)}')
0093     print(f'  reused + new total:     {len(reuse_locked) + len(lock_in_place) + len(new_tags)}')
0094     if lock_in_place:
0095         print('  would lock in place: ' + ', '.join(sorted(lock_in_place)))
0096     if unrecognized:
0097         print(f'  UNRECOGNIZED paths ({len(unrecognized)}):')
0098         for p in unrecognized:
0099             print(f'    - {p}')
0100     return 0
0101 
0102 
0103 if __name__ == '__main__':
0104     sys.exit(main())