Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-06-26 08:40:25

0001 """Derive canonical physics-tag parameters from a catalog task's EVGEN path.
0002 
0003 The legacy ``csv_import`` catalog encodes a task's real physics in its path
0004 (``EVGEN/<category>/<sub>/.../<beam>/...``), not in a physics tag — the import
0005 pinned every row to one placeholder anchor tag, so the bound physics (beam,
0006 process) is wrong for almost all of them. This module parses the path into the
0007 parameters a physics tag carries, so each task can be matched to the correct
0008 existing tag, or have the right tag created. See EPICPROD_TASK_CATALOG.md.
0009 
0010 The single source of truth is the path. ``derive_physics(path)`` returns the
0011 canonical params; the caller matches/creates the PhysicsTag and rebinds.
0012 """
0013 import re
0014 
0015 
0016 #: EXCLUSIVE / SIDIS sub-process folders carry this afterburner-conversion suffix.
0017 _ABCONV = '_ABCONV'
0018 
0019 
0020 def _strip_abconv(s):
0021     return s[:-len(_ABCONV)] if s.endswith(_ABCONV) else s
0022 
0023 
0024 def _beam_pair(beam):
0025     """'18x275' -> ('18', '275'); '' -> ('N/A', 'N/A')."""
0026     if beam and 'x' in beam:
0027         e, h = beam.split('x', 1)
0028         return (e or 'N/A', h or 'N/A')
0029     return ('N/A', 'N/A')
0030 
0031 
0032 # ── physics-tag token vocabulary (scanned at ANY path position) ───────────────
0033 # The catalog paths are not positional: a physics token (q2, species, decay,
0034 # beam-config, ...) can sit at any depth, be compounded (coherent_ep), or live
0035 # in a filename (UPSILON). Derivation recognises tokens by pattern, not slot.
0036 _KNOWN_AREAS = {'SINGLE', 'DIS', 'DDIS', 'SIDIS', 'EXCLUSIVE', 'EW_BSM', 'BACKGROUNDS'}
0037 _BEAM_RE    = re.compile(r'^\d+x\d+$')
0038 _Q2_RE      = re.compile(r'^(minQ2=\d+|q2_\S+)$')
0039 _ION_RE     = re.compile(r'^e(H[0-9]+|He[0-9]+|Li[0-9]*|Ca|Cu|Ru|Pb|Au|C|O)$')
0040 _NUCLEON    = {'ep', 'en'}
0041 _MASS_RE    = re.compile(r'^ma_[0-9]')
0042 _CHANNEL_RE = re.compile(r'^aem')
0043 _UPSILON_RE = re.compile(r'^upsilon(1s|2s|3s)(photo|_threshold)_ab_(hiAcc|hiDiv)_(\d+x\d+)')
0044 _BEAMCONFIG = {'hiAcc', 'hiDiv'}
0045 _DECAY      = {'edecay', 'mudecay'}
0046 _CHARGE     = {'hplus', 'hminus'}
0047 _HELICITY   = {'hel_plus', 'hel_minus'}
0048 _COHERENCE  = {'coherent', 'Coherent'}
0049 _MODEL      = {'bsat'}
0050 _POLAR      = {'unpolarised', 'polarised'}
0051 _FINAL_STATE = {'pi+', 'pi-', 'pi0', 'K+', 'K-', 'K0', 'K+Lambda'}
0052 #: radiation (Rad/noRad) and generator tokens are EVGEN-tag axes — not physics.
0053 
0054 
0055 def _split_compounds(tok):
0056     """Yield a segment and its '_'-split parts so embedded physics tokens are
0057     seen: 'coherent_ep' -> coherent, ep; 'ep_noradcor' -> ep (noradcor ignored)."""
0058     yield tok
0059     if '_' in tok:
0060         yield from tok.split('_')
0061 
0062 
0063 def _physics_area_segments(path):
0064     """Return path segments from the first recognised physics area onward,
0065     dropping any leading prefix — the ``EVGEN`` root, or a past Rucio-DID
0066     background-overlay chain (``Bkg_*/Synrad_*/GoldC*/<um>/…``). ``[]`` if no
0067     physics area is present."""
0068     segs = [s for s in (path or '').split('/') if s]
0069     for i, s in enumerate(segs):
0070         if s in _KNOWN_AREAS:
0071             return segs[i:]
0072     return []
0073 
0074 
0075 def _beam_split(tok):
0076     e, h = tok.split('x', 1)
0077     return (e or 'N/A', h or 'N/A')
0078 
0079 
0080 def derive_physics(path, beam=''):
0081     """Full physics-tag parameter set from an EVGEN path or a past Rucio-DID
0082     path remainder. Token-scanning, not positional. Returns the schema-named
0083     param dict, or ``None`` when no physics area is present. Excludes the angle
0084     range (a sample variant), radiation, and generator (EVGEN-tag axes).
0085 
0086     BACKGROUNDS resolve to process BEAMGAS/SYNRAD so the caller can route them to
0087     the signal-free p6001 physics tag plus a k background tag.
0088     """
0089     segs = _physics_area_segments(path)
0090     if not segs:
0091         return None
0092     area, rest = segs[0], segs[1:]
0093 
0094     if area == 'SINGLE':
0095         return {'process': 'SINGLE',
0096                 'beam_energy_electron': 'N/A', 'beam_energy_hadron': 'N/A',
0097                 'particle': rest[0] if rest else '',
0098                 'gun_energy': rest[1] if len(rest) > 1 else ''}
0099 
0100     if area == 'BACKGROUNDS':
0101         e, h = _beam_pair(beam)
0102         sub = rest[0] if rest else ''
0103         if sub == 'SYNRAD':
0104             return {'process': 'SYNRAD', 'beam_energy_electron': e, 'beam_energy_hadron': h}
0105         return {'process': 'BEAMGAS', 'beam_energy_electron': e, 'beam_energy_hadron': h,
0106                 'bg_source': rest[1] if len(rest) > 1 else '',
0107                 'bg_mechanism': rest[2] if len(rest) > 2 else ''}
0108 
0109     sig = {}
0110     if area in ('DIS', 'DDIS'):
0111         proc = 'DDIS' if area == 'DDIS' else 'DIS'
0112         if 'NC' in rest:
0113             proc = 'DIS_NC'
0114         elif 'CC' in rest:
0115             proc = 'DIS_CC'
0116         sig['process'] = proc
0117     elif area == 'SIDIS':
0118         sub = _strip_abconv(rest[0]) if rest else ''
0119         sig['process'] = 'SIDIS_' + sub if sub in ('D0', 'DIJET', 'Lc') else 'SIDIS'
0120     elif area == 'EXCLUSIVE':
0121         sig['process'] = _strip_abconv(rest[0]) if rest else 'EXCLUSIVE'
0122     elif area == 'EW_BSM':
0123         sig['process'] = rest[0] if rest else 'EW_BSM'       # ALP
0124     else:
0125         sig['process'] = area
0126 
0127     ions, nucleons = [], []
0128     for raw in rest:
0129         m = _UPSILON_RE.match(raw)
0130         if m:
0131             sig['state'] = m.group(1)
0132             sig['mechanism'] = m.group(2).lstrip('_')
0133             sig['beam_config'] = m.group(3)
0134             sig['beam_energy_electron'], sig['beam_energy_hadron'] = _beam_split(m.group(4))
0135             continue
0136         for tok in _split_compounds(raw):
0137             if _BEAM_RE.match(tok):
0138                 sig['beam_energy_electron'], sig['beam_energy_hadron'] = _beam_split(tok)
0139             elif _Q2_RE.match(tok):       sig['q2_range'] = tok
0140             elif _ION_RE.match(tok):      ions.append(tok)
0141             elif tok in _NUCLEON:         nucleons.append(tok)
0142             elif tok in _BEAMCONFIG:      sig['beam_config'] = tok
0143             elif tok in _DECAY:           sig['decay_mode'] = tok
0144             elif tok in _CHARGE:          sig['hadron_charge'] = tok
0145             elif tok in _HELICITY:        sig['helicity'] = tok
0146             elif tok in _COHERENCE:       sig['coherence'] = 'coherent'
0147             elif tok in _MODEL:           sig['model'] = tok
0148             elif tok in _POLAR:           sig['polarization'] = tok
0149             elif tok in _FINAL_STATE:     sig['final_state'] = tok
0150             elif _MASS_RE.match(tok):     sig['mass'] = tok
0151             elif _CHANNEL_RE.match(tok):  sig['channel'] = tok
0152 
0153     if ions:
0154         sig['beam_species'] = ions[0]
0155         if nucleons:
0156             sig['nucleon'] = nucleons[0]
0157     elif nucleons:
0158         sig['beam_species'] = nucleons[0]        # bare 'ep' = electron-proton beam
0159     if 'beam_energy_electron' not in sig:
0160         if beam and 'x' in beam:
0161             sig['beam_energy_electron'], sig['beam_energy_hadron'] = _beam_split(beam)
0162         else:
0163             sig['beam_energy_electron'] = sig['beam_energy_hadron'] = 'N/A'
0164     return sig
0165 
0166 
0167 #: BEAMGAS 4th path segment is a physical mechanism when it is one of these;
0168 #: otherwise it names the generator.
0169 _BG_MECHANISMS = ('brems', 'coulomb', 'touschek')
0170 _BG_BEAM_NXM = re.compile(r'^(\d+)x(\d+)$')
0171 _BG_BEAM_SINGLE = re.compile(r'^(\d+)GeV$')
0172 
0173 
0174 def _bg_beam(segs, source):
0175     """Beam energies from a backgrounds path. 'NxM' -> (N, M). A bare 'NGeV' is
0176     assigned by source: a proton source to the hadron beam, otherwise electron."""
0177     for s in segs:
0178         m = _BG_BEAM_NXM.match(s)
0179         if m:
0180             return m.group(1), m.group(2)
0181         m = _BG_BEAM_SINGLE.match(s)
0182         if m:
0183             return ('N/A', m.group(1)) if source == 'proton' else (m.group(1), 'N/A')
0184     return 'N/A', 'N/A'
0185 
0186 
0187 def derive_background(path):
0188     """Canonical background (k) tag params from a stripped EVGEN/BACKGROUNDS path,
0189     or None if the path is not a backgrounds entry.
0190 
0191     BEAMGAS: ``EVGEN/BACKGROUNDS/BEAMGAS/<source>/<mechanism-or-generator>/…/<beam>/…``.
0192     The 4th segment is a physical mechanism (brems/coulomb/touschek) when it is
0193     one, otherwise it is the generator. SYNRAD has no source or mechanism. All
0194     values are open strings — the parser passes through whatever the path names.
0195     Always returns the full set of match fields (blank where not present) so the
0196     tag-dedup lookup is consistent.
0197     """
0198     segs = (path or '').split('/')
0199     if len(segs) < 3 or segs[0] != 'EVGEN' or segs[1] != 'BACKGROUNDS':
0200         return None
0201     sub = segs[2]
0202     params = {
0203         'background_type': sub,
0204         'bg_source': '', 'bg_mechanism': '', 'bg_generator': '',
0205         'beam_energy_electron': 'N/A', 'beam_energy_hadron': 'N/A',
0206     }
0207     if sub == 'SYNRAD':
0208         if len(segs) > 3:
0209             params['bg_generator'] = segs[3]
0210     else:
0211         source = segs[3] if len(segs) > 3 else ''
0212         params['bg_source'] = source
0213         seg4 = segs[4] if len(segs) > 4 else ''
0214         if seg4 in _BG_MECHANISMS:
0215             params['bg_mechanism'] = seg4
0216             gen_parts = [s for s in segs[5:7]
0217                          if s and not _BG_BEAM_NXM.match(s) and not _BG_BEAM_SINGLE.match(s)]
0218             params['bg_generator'] = '/'.join(gen_parts)
0219         else:
0220             params['bg_generator'] = seg4
0221     e, h = _bg_beam(segs, params['bg_source'])
0222     params['beam_energy_electron'] = e
0223     params['beam_energy_hadron'] = h
0224     return params
0225 
0226 
0227 #: Generator family names whose version follows the name in a token. pythia is
0228 #: handled separately because its family carries the major-version digit
0229 #: (pythia8 / pythia6).
0230 _EVGEN_NAMES = ('EpIC', 'BeAGLE', 'sartre', 'eSTARlight', 'eicMesonSFGen',
0231                 'lAger', 'rapgap', 'DEMPgen', 'DJANGOH', 'GETaLM')
0232 _PYTHIA_RE = re.compile(r'^[Pp]ythia[ _]?(\d)(.*)$')
0233 
0234 
0235 def _split_gen_token(tok):
0236     """Split a '<Generator><Version>' token into (generator, generator_version),
0237     or (None, None) when there is no known generator *with a non-empty version*.
0238 
0239     A bare generator name (e.g. 'pythia8', 'eSTARlight') has no version in the
0240     source and resolves to (None, None) — left for manual association, not
0241     guessed. The leading 'v' of a version is preserved (EpIC 'v1.1.6-1.2');
0242     pythiaN keeps its major-version digit in both the family and the version.
0243     """
0244     t = (tok or '').strip()
0245     if not t:
0246         return None, None
0247     m = _PYTHIA_RE.match(t)
0248     if m:
0249         if not m.group(2).strip(' ._-'):     # bare 'pythia8' / 'Pythia 8'
0250             return None, None
0251         return f'pythia{m.group(1)}', f'{m.group(1)}{m.group(2)}'.lstrip('._- ')
0252     for g in _EVGEN_NAMES:
0253         if t.lower().startswith(g.lower()):
0254             ver = t[len(g):].lstrip('._- ')   # keep a leading 'v'
0255             return (g, ver) if ver else (None, None)
0256     return None, None
0257 
0258 
0259 def derive_evgen(path, gen_version=''):
0260     """Curated (generator, generator_version) for a catalog row, or None when no
0261     confident resolution exists — left for manual association, never guessed.
0262 
0263     - ``EVGEN/SINGLE/...`` samples are the particle gun.
0264     - A background ``dataprod_rel`` release names no generator; the generator is
0265       the repository (e.g. EIC_ESR_Xsuite, EIC_SR_Geant4).
0266     - Otherwise a '<Generator><Version>' token from the gen_version release tag
0267       or the path is split; a bare generator with no version resolves to None.
0268     """
0269     segs = (path or '').split('/')
0270     if len(segs) > 1 and segs[0] == 'EVGEN' and segs[1] == 'SINGLE':
0271         return {'generator': 'particle_gun', 'generator_version': ''}
0272     # Radiative corrections are a generator run-mode that genuinely changes the
0273     # physics, so they belong in the evgen tag (not a sample discriminator): a
0274     # 'Rad' / 'noRad' path segment splits an otherwise-identical generator into
0275     # distinct tags. Only set when the path names it; absent elsewhere.
0276     radiative = ''
0277     if 'noRad' in segs:
0278         radiative = 'off'
0279     elif 'Rad' in segs:
0280         radiative = 'on'
0281 
0282     def _with_rad(params):
0283         if radiative:
0284             params['radiative'] = radiative
0285         return params
0286 
0287     gv = (gen_version or '').strip()
0288     # PYTHIA-RAD-CORR releases are bare versions and the path's pythia6 segment
0289     # is not a clean generator+version — ambiguous, left for manual association.
0290     if 'pythia-rad-corr' in gv.lower():
0291         return None
0292     release = gv.rstrip('/').split('/')[-1] if gv else ''
0293     if release.startswith('dataprod_rel') and 'github.com/' in gv:
0294         repo = gv.split('github.com/', 1)[1].split('/releases', 1)[0].split('/')[-1]
0295         if repo:
0296             return _with_rad({'generator': repo, 'generator_version': release})
0297     for tok in [release, gv, *segs]:
0298         g, v = _split_gen_token(tok)
0299         if g:
0300             return _with_rad({'generator': g, 'generator_version': v})
0301     return None
0302 
0303 
0304 def single_particle_angle(path):
0305     """Angular-range tail of a single-particle path, or '' if none.
0306 
0307     Single-particle samples share a ``(particle, gun_energy)`` physics tag but
0308     differ by polar-angle range; the angle is a per-task detail, not part of the
0309     reusable tag (``derive_physics`` deliberately omits it). This returns the
0310     path tail after ``EVGEN/SINGLE/<particle>/<energy>/`` so the importer can
0311     store it as a per-task override. Returns '' for a non-single-particle path.
0312     """
0313     segs = (path or '').split('/')
0314     if len(segs) < 2 or segs[0] != 'EVGEN' or segs[1] != 'SINGLE':
0315         return ''
0316     return '/'.join(segs[4:])