Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-06-26 08:40:22

0001 #!/usr/bin/env python3
0002 """
0003 pcs_physics_completion_dryrun.py — read-only preview of the physics-tag re-tag.
0004 
0005 Runs the real derivation + matching (``physics_match.derive_physics`` +
0006 ``services.find_or_create_physics_tag(dry_run=True)``) over both catalog
0007 populations — the csv_import EVGEN paths and the 4900 past Rucio DIDs — and
0008 reports what a catalog reload WOULD do once the completed-physics code is
0009 deployed: rows resolved, existing tags reused, distinct new tags created (per
0010 category), backgrounds routed to the signal-free p6001 tag, the p1006 anchor
0011 unwind, and any path whose physics cannot be derived. Writes nothing.
0012 
0013 Usage::
0014     cd /data/wenauseic/github/swf-monitor/src
0015     source ../../swf-testbed/.venv/bin/activate && source ~/.env
0016     python ../scripts/pcs_physics_completion_dryrun.py
0017 """
0018 import os
0019 import sys
0020 from collections import Counter, defaultdict
0021 
0022 THIS_DIR = os.path.dirname(os.path.abspath(__file__))
0023 sys.path.insert(0, os.path.join(THIS_DIR, '..', 'src'))
0024 os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'swf_monitor_project.settings')
0025 import django  # noqa: E402
0026 django.setup()
0027 from pcs.models import Dataset  # noqa: E402
0028 from pcs.physics_match import derive_physics  # noqa: E402
0029 from pcs.services import (  # noqa: E402
0030     find_or_create_physics_tag, _task_name_from_path, _extract_csv_filters,
0031     _physics_key, _PROCESS_CATEGORY,
0032 )
0033 
0034 CAT_NAME = {1: 'Single', 2: 'DIS', 3: 'DVCS', 4: 'SIDIS', 5: 'Exclusive', 6: 'Background'}
0035 
0036 
0037 def _rows():
0038     """Yield (population, current_tag_label, derived) for every catalog row."""
0039     for ds in Dataset.objects.filter(dataset_name__startswith='csv_import.') \
0040             .select_related('physics_tag').iterator():
0041         loc = ds.get_metadata_value('source', 'location', default='') or ''
0042         task_name = _task_name_from_path(loc) or ds.dataset_name
0043         beam = _extract_csv_filters(loc, 'epic_craterlake').get('beam', '')
0044         yield 'csv_import', ds.physics_tag.tag_label, task_name, derive_physics(task_name, beam=beam)
0045     for ds in Dataset.objects.filter(dataset_name__startswith='past.') \
0046             .select_related('physics_tag').iterator():
0047         po = ds.get_metadata_value('past_output', default={}) or {}
0048         remainder = (po.get('path') or {}).get('path_remainder', '')
0049         beam = (po.get('filters') or {}).get('beam', '')
0050         yield 'past', ds.physics_tag.tag_label, remainder, derive_physics(remainder, beam=beam)
0051 
0052 
0053 def main():
0054     n = Counter()
0055     reused = set()                      # existing tag labels reused
0056     new_keys = defaultdict(set)         # category -> set of new param-keys
0057     reuse_by_cat = defaultdict(set)     # category -> reused tag labels
0058     current = Counter()                 # current physics_tag of scanned rows
0059     moved_off_anchor = 0
0060     unrecognised = []
0061 
0062     for pop, cur_label, path, derived in _rows():
0063         n[pop] += 1
0064         current[cur_label] += 1
0065         if derived is None:
0066             unrecognised.append(path)
0067             continue
0068         proc = derived.get('process')
0069         if proc in ('BEAMGAS', 'SYNRAD'):
0070             n['background_p6001'] += 1
0071             continue
0072         tag, action = find_or_create_physics_tag(derived, dry_run=True)
0073         cat = _PROCESS_CATEGORY.get(proc)
0074         if action == 'reuse':
0075             reused.add(tag.tag_label)
0076             reuse_by_cat[cat].add(tag.tag_label)
0077             if tag.tag_label != cur_label:
0078                 moved_off_anchor += 1
0079         else:
0080             new_keys[cat].add(_physics_key(derived))
0081             moved_off_anchor += 1            # a brand-new tag is always a move
0082 
0083     anchor_label, anchor_n = current.most_common(1)[0]
0084     print('=' * 70)
0085     print('PCS PHYSICS-TAG RE-TAG — DRY RUN (no writes)')
0086     print('=' * 70)
0087     print(f'rows scanned: csv_import={n["csv_import"]}  past={n["past"]}')
0088     print(f'  resolved to a physics tag, moved off current binding: {moved_off_anchor}')
0089     print(f'  backgrounds -> p6001 (signal-free): {n["background_p6001"]}')
0090     print(f'  unrecognised (kept on anchor): {len(unrecognised)}')
0091     print(f'\ncurrent dominant binding (the anchor): {anchor_label} holds {anchor_n} rows')
0092 
0093     print('\nresulting physics tags per category (reuse existing + create new):')
0094     total_reuse = total_new = 0
0095     for cat in sorted(set(reuse_by_cat) | set(new_keys)):
0096         r, c = len(reuse_by_cat.get(cat, ())), len(new_keys.get(cat, ()))
0097         total_reuse += r
0098         total_new += c
0099         print(f'  cat {cat} {CAT_NAME.get(cat, "?"):10s} reuse {r:3d}  create {c:3d}  total {r + c:3d}')
0100     print(f'  {"":15s} reuse {total_reuse:3d}  create {total_new:3d}  '
0101           f'TOTAL {total_reuse + total_new:3d} distinct physics tags')
0102 
0103     if unrecognised:
0104         u = Counter(unrecognised)
0105         print(f'\nUNRECOGNISED paths ({len(unrecognised)} rows, {len(u)} distinct):')
0106         for p, c in u.most_common(20):
0107             print(f'  {c:4d}x  {p!r}')
0108     return 0
0109 
0110 
0111 if __name__ == '__main__':
0112     sys.exit(main())