File indexing completed on 2026-04-25 08:28:59
0001
0002 """Backfill metadata from did name"""
0003
0004 import argparse
0005 import re
0006
0007 from rucio.client import Client
0008 from shared_utils import detect_generator, detect_q2, detect_pwg, detect_dsc
0009 ion_map = {
0010 "Au": "Au197",
0011 "Ru": "Ru96",
0012 "Cu": "Cu63",
0013 "He": "He3",
0014 "H2": "H2",
0015 }
0016 gun_particles = [
0017 "e-", "e+", "proton", "neutron",
0018 "pi+", "pi-", "pi0",
0019 "kaon-", "kaon+",
0020 "gamma", "mu-"
0021 ]
0022
0023
0024 beam_energy_override_map = {
0025 "UPSILON_ABCONV": (18, 275),
0026 }
0027
0028 ion_regex = re.compile(r'(?:^|[/_])e(Au|Ru|He|Cu|H2)(?:[/_]|$)')
0029
0030
0031 parser = argparse.ArgumentParser(description="Backfill metadata from DID name")
0032 parser.add_argument(
0033 "-c", "--campaigns", nargs="+", required=True,
0034 help="List of campaign version strings (e.g. 26.03.0 26.03.1)"
0035 )
0036 parser.add_argument(
0037 "--dry-run", action="store_true",
0038 help="Print metadata that would be set without making any changes in Rucio"
0039 )
0040 args = parser.parse_args()
0041
0042
0043 version_map = {v: f"{v}-stable" for v in args.campaigns}
0044
0045 client = Client()
0046
0047 datasets_dids = sorted(
0048 did
0049 for version in version_map
0050 for did in client.list_dids(scope="epic", filters={"name": f"/RECO/{version}/*"})
0051 )
0052
0053 for did in datasets_dids:
0054 path = did
0055
0056 did_version_match = re.search(r'/RECO/([^/]+)/', path)
0057 software_release = version_map.get(did_version_match.group(1)) if did_version_match else "other"
0058
0059
0060
0061 ebeam_defaults = {10: 100, 18: 275}
0062 beam_match = re.search(r'(\d+)x(\d+)', path)
0063 electron_beam_energy = int(beam_match.group(1)) if beam_match else None
0064 ion_beam_energy = int(beam_match.group(2)) if beam_match else None
0065 if not beam_match and 'BEAMGAS/electron' in path:
0066 ebeam_match = re.search(r'(\d+)GeV', path)
0067 if ebeam_match:
0068 electron_beam_energy = int(ebeam_match.group(1))
0069 ion_beam_energy = None
0070 elif not beam_match and 'BEAMGAS/proton' in path:
0071 pbeam_match = re.search(r'(\d+)GeV', path)
0072 if pbeam_match:
0073 ion_beam_energy = int(pbeam_match.group(1))
0074 electron_beam_energy = None
0075 elif not beam_match:
0076 for key, (ebeam, pbeam) in beam_energy_override_map.items():
0077 if f"/{key}" in path:
0078 electron_beam_energy = ebeam
0079 ion_beam_energy = pbeam
0080 break
0081
0082
0083 is_background_mixed = "Bkg" in path
0084
0085
0086
0087 q2_min, q2_max = detect_q2(path)
0088
0089
0090 generator = detect_generator(path, is_single="SINGLE" in path)
0091
0092
0093 ion_species = None
0094 match = ion_regex.search(path)
0095 if match:
0096 ion_raw = match.group(1)
0097 if ion_raw in ion_map:
0098 ion_species = ion_map.get(ion_raw)
0099 else:
0100 ion_species = "p"
0101
0102 parts = path.strip("/").split("/")
0103
0104 geometry_config = None
0105 data_level = None
0106 if len(parts) > 1:
0107 if parts[0] == "RECO":
0108 data_level = "reconstruction"
0109 geometry_config = parts[2]
0110 elif parts[0] == "FULL":
0111 data_level = "simulation"
0112 geometry_config = parts[2]
0113
0114 if geometry_config:
0115 if geometry_config.startswith("epic_"):
0116 geometry_config = geometry_config[len("epic_"):]
0117 if electron_beam_energy and ion_beam_energy:
0118 geometry_config = f"{geometry_config}_{electron_beam_energy}x{ion_beam_energy}"
0119 if ion_species and ion_species != "p":
0120 geometry_config = f"{geometry_config}_{ion_species}"
0121 elif electron_beam_energy and 'BEAMGAS/electron' in path:
0122 default_ion = ebeam_defaults.get(electron_beam_energy)
0123 if default_ion:
0124 geometry_config = f"{geometry_config}_{electron_beam_energy}x{default_ion}"
0125 elif ion_beam_energy and 'BEAMGAS/proton' in path:
0126 pbeam_defaults = {v: k for k, v in ebeam_defaults.items()}
0127 default_ebeam = pbeam_defaults.get(ion_beam_energy)
0128 if default_ebeam:
0129 geometry_config = f"{geometry_config}_{default_ebeam}x{ion_beam_energy}"
0130
0131 requester_pwg = detect_pwg(path)
0132 requester_dsc = detect_dsc(path, is_background_mixed=is_background_mixed)
0133 if "BACKGROUNDS" in path:
0134 requester_pwg = None
0135 electron_beam_energy = None
0136 ion_beam_energy = None
0137 ion_species = None
0138 q2_min = None
0139 q2_max = None
0140
0141 gun_particle = None
0142 gun_momentum_min = None
0143 gun_momentum_max = None
0144 gun_theta_min = None
0145 gun_theta_max = None
0146 gun_phi_min = None
0147 gun_phi_max = None
0148 gun_distribution = None
0149 if "SINGLE" in path:
0150 requester_pwg = None
0151 normalized_path = re.sub(r'[_/]', ' ', path.lower())
0152 for p in gun_particles:
0153 if p in normalized_path:
0154 gun_particle = p
0155 break
0156 generator = "single_particle"
0157
0158 base = geometry_config.rsplit("_", 1)[0] if (geometry_config and electron_beam_energy and ion_beam_energy) else geometry_config
0159 geometry_config = f"{base}_5x41"
0160 ion_species = None
0161 electron_beam_energy = None
0162 ion_beam_energy = None
0163 q2_min = None
0164 q2_max = None
0165
0166
0167 mom_mev = re.search(r'/(\d+)MeV/', path)
0168 mom_gev = re.search(r'/(\d+)GeV/', path)
0169 if mom_gev:
0170 gun_momentum_min = float(mom_gev.group(1))
0171 elif mom_mev:
0172 gun_momentum_min = float(mom_mev.group(1)) / 1000
0173 if gun_momentum_min is not None:
0174 gun_momentum_max = gun_momentum_min
0175
0176
0177 theta_match = re.search(r'/(\d+)to(\d+)deg', path)
0178 if theta_match:
0179 gun_theta_min = float(theta_match.group(1))
0180 gun_theta_max = float(theta_match.group(2))
0181 gun_distribution = "cos(theta)"
0182 elif "etaScan" in path:
0183 gun_distribution = "uniform"
0184
0185
0186 gun_phi_min = 0
0187 gun_phi_max = 360
0188
0189 print(f"\nDID: {did}")
0190 print(f"software_release: {software_release}")
0191 print(f"electron_beam_energy_gev: {electron_beam_energy}")
0192 print(f"ion_beam_energy_gev: {ion_beam_energy}")
0193 print(f"ion_species: {ion_species}")
0194 print(f"q2_min_gev2: {q2_min}")
0195 print(f"q2_max_gev2: {q2_max}")
0196 print(f"is_background_mixed: {is_background_mixed}")
0197 print(f"data_level: {data_level}")
0198 if gun_particle:
0199 print(f"gun_particle: {gun_particle}")
0200 print(f"generator: {generator}")
0201 print(f"geometry_config: {geometry_config}")
0202 print(f"requester_pwg: {requester_pwg}")
0203 if requester_dsc:
0204 print(f"requester_dsc: {requester_dsc}")
0205 if gun_momentum_min is not None:
0206 print(f"gun_momentum_min_gev: {gun_momentum_min}")
0207 print(f"gun_momentum_max_gev: {gun_momentum_max}")
0208 if gun_theta_min is not None:
0209 print(f"gun_theta_min_deg: {gun_theta_min}")
0210 print(f"gun_theta_max_deg: {gun_theta_max}")
0211 if gun_phi_min is not None:
0212 print(f"gun_phi_min_deg: {gun_phi_min}")
0213 print(f"gun_phi_max_deg: {gun_phi_max}")
0214 if gun_distribution:
0215 print(f"gun_distribution: {gun_distribution}")
0216 print("-" * 40)
0217
0218
0219 metadata = {
0220 "software_release": software_release,
0221 "electron_beam_energy_gev": electron_beam_energy,
0222 "ion_beam_energy_gev": ion_beam_energy,
0223 "ion_species": ion_species,
0224 "q2_min_gev2": q2_min,
0225 "q2_max_gev2": q2_max,
0226 "is_background_mixed": is_background_mixed,
0227 "data_level": data_level,
0228 "generator": generator,
0229 "geometry_config": geometry_config,
0230 "gun_momentum_min_gev": gun_momentum_min,
0231 "gun_momentum_max_gev": gun_momentum_max,
0232 "gun_theta_min_deg": gun_theta_min,
0233 "gun_theta_max_deg": gun_theta_max,
0234 "gun_phi_min_deg": gun_phi_min,
0235 "gun_phi_max_deg": gun_phi_max,
0236 "gun_distribution": gun_distribution,
0237 }
0238 if gun_particle:
0239 metadata["gun_particle"] = gun_particle
0240 if requester_pwg:
0241 metadata["requester_pwg"] = requester_pwg
0242 if requester_dsc:
0243 metadata["requester_dsc"] = requester_dsc
0244 metadata = {k: v for k, v in metadata.items() if v is not None}
0245
0246
0247 if args.dry_run:
0248 print(f"[DRY RUN] Would set metadata for DID: {did}")
0249 print(f"[DRY RUN] metadata: {metadata}")
0250 else:
0251 try:
0252 client.set_metadata_bulk(scope="epic", name=did, meta=metadata, recursive=False)
0253 print(f"Metadata added successfully for DID: {did}")
0254 except Exception as e:
0255 print(f"Error adding metadata for DID: {did}, error: {e}")
0256