File indexing completed on 2026-06-26 08:40:22
0001
0002 """
0003 pcs_automatch_dryrun.py — read-only preview of the physics-tag automatch.
0004
0005 The catalog import pins every legacy ``csv_import`` row to one placeholder
0006 physics tag; the automatch (``derive_physics`` + ``find_or_create_physics_tag``,
0007 wired into ``import_default_datasets_csv``) derives each row's real physics from
0008 its EVGEN path and rebinds it to the matching locked tag, creating or locking
0009 tags as needed. This script runs that derivation + matching against the live
0010 catalog **without writing anything** (``dry_run=True`` throughout, no save) and
0011 reports what a reload would do: tags reused, drafts that would be locked in
0012 place, distinct new tags created, rows rebound, backgrounds parked, and any
0013 unrecognized path.
0014
0015 Usage::
0016
0017 cd /data/wenauseic/github/swf-monitor/src
0018 source ../../swf-testbed/.venv/bin/activate && source ~/.env
0019 python ../scripts/pcs_automatch_dryrun.py
0020 """
0021 import os
0022 import sys
0023
0024 THIS_DIR = os.path.dirname(os.path.abspath(__file__))
0025 sys.path.insert(0, os.path.join(THIS_DIR, '..', 'src'))
0026 os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'swf_monitor_project.settings')
0027
0028 import django
0029 django.setup()
0030
0031 from pcs.models import Dataset, PhysicsTag
0032 from pcs.physics_match import derive_physics
0033 from pcs.services import (
0034 find_or_create_physics_tag,
0035 _task_name_from_path,
0036 _extract_csv_filters,
0037 )
0038
0039
0040 def _param_key(derived):
0041 """Identity of a would-be tag, for deduping creates into distinct tags."""
0042 if derived.get('process') == 'SINGLE':
0043 return ('SINGLE', derived.get('particle', ''), derived.get('gun_energy', ''))
0044 return (derived.get('process'),
0045 derived.get('beam_energy_electron'),
0046 derived.get('beam_energy_hadron'))
0047
0048
0049 def main():
0050 anchor = PhysicsTag.objects.filter(status='locked').order_by('tag_number').first()
0051 rows = Dataset.objects.filter(dataset_name__startswith='csv_import.').order_by('id')
0052
0053 reuse_locked = set()
0054 lock_in_place = set()
0055 new_tags = set()
0056 rebound = 0
0057 reuse_rows = 0
0058 new_rows = 0
0059 parked = 0
0060 unrecognized = []
0061
0062 for ds in rows:
0063 ds_path = ds.get_metadata_value('source', 'location', default='') or ''
0064 task_name = _task_name_from_path(ds_path) or ds.dataset_name
0065 filters = _extract_csv_filters(ds_path, 'epic_craterlake')
0066 derived = derive_physics(task_name, beam=filters.get('beam', ''))
0067 if derived is None:
0068 unrecognized.append(task_name)
0069 continue
0070 if derived.get('process') in ('BEAMGAS', 'SYNRAD'):
0071 parked += 1
0072 continue
0073 tag, action = find_or_create_physics_tag(derived, dry_run=True)
0074 rebound += 1
0075 if action == 'reuse-locked':
0076 reuse_locked.add(tag.tag_label)
0077 reuse_rows += 1
0078 elif action == 'lock-in-place':
0079 lock_in_place.add(tag.tag_label)
0080 reuse_rows += 1
0081 else:
0082 new_tags.add(_param_key(derived))
0083 new_rows += 1
0084
0085 print('PCS physics-tag automatch — DRY RUN (no writes)')
0086 print(f' placeholder anchor tag: {anchor.tag_label if anchor else "(none)"}')
0087 print(f' csv_import rows: {rows.count()}')
0088 print(f' rows rebound: {rebound} ({reuse_rows} reuse existing, {new_rows} bind new)')
0089 print(f' backgrounds parked: {parked}')
0090 print(f' tags reused (locked): {len(reuse_locked)}')
0091 print(f' draft tags locked in place: {len(lock_in_place)}')
0092 print(f' distinct new tags: {len(new_tags)}')
0093 print(f' reused + new total: {len(reuse_locked) + len(lock_in_place) + len(new_tags)}')
0094 if lock_in_place:
0095 print(' would lock in place: ' + ', '.join(sorted(lock_in_place)))
0096 if unrecognized:
0097 print(f' UNRECOGNIZED paths ({len(unrecognized)}):')
0098 for p in unrecognized:
0099 print(f' - {p}')
0100 return 0
0101
0102
0103 if __name__ == '__main__':
0104 sys.exit(main())