Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-11 08:41:05

0001 #!/usr/bin/env python
0002 # Licensed under the Apache License, Version 2.0 (the "License");
0003 # you may not use this file except in compliance with the License.
0004 # You may obtain a copy of the License at
0005 # http://www.apache.org/licenses/LICENSE-2.0
0006 #
0007 # Authors:
0008 # - Paul Nilsson, paul.nilsson@cern.ch, 2018-2019
0009 
0010 import os
0011 import logging
0012 from xml.dom import minidom
0013 from xml.etree import ElementTree
0014 
0015 from pilot.util.filehandling import write_file
0016 
0017 logger = logging.getLogger(__name__)
0018 
0019 
0020 def create_input_file_metadata(file_dictionary, workdir, filename="PoolFileCatalog.xml"):
0021     """
0022     Create a Pool File Catalog for the files listed in the input dictionary.
0023     The function creates properly formatted XML (pretty printed) and writes the XML to file.
0024     Note: any environment variables in the pfn tags will be expanded (see pilot/control/data::get_input_file_dictionary()).
0025 
0026     Format:
0027     dictionary = {'guid': 'pfn', ..}
0028     ->
0029     <POOLFILECATALOG>
0030     <!DOCTYPE POOLFILECATALOG SYSTEM "InMemory">
0031     <File ID="guid">
0032       <physical>
0033         <pfn filetype="ROOT_All" name="surl"/>
0034       </physical>
0035       <logical/>
0036     </File>
0037     <POOLFILECATALOG>
0038 
0039     :param file_dictionary: file dictionary.
0040     :param workdir: job work directory (string).
0041     :param filename: PFC file name (string).
0042     :return: xml (string)
0043     """
0044 
0045     # create the file structure
0046     data = ElementTree.Element('POOLFILECATALOG')
0047 
0048     for fileid in list(file_dictionary.keys()):  # Python 2/3
0049         _file = ElementTree.SubElement(data, 'File')
0050         _file.set('ID', fileid)
0051         _physical = ElementTree.SubElement(_file, 'physical')
0052         _pfn = ElementTree.SubElement(_physical, 'pfn')
0053         _pfn.set('filetype', 'ROOT_All')
0054         _pfn.set('name', file_dictionary.get(fileid))
0055         ElementTree.SubElement(_file, 'logical')
0056 
0057     # create a new XML file with the results
0058     xml = ElementTree.tostring(data, encoding='utf8')
0059     xml = minidom.parseString(xml).toprettyxml(indent="  ")
0060 
0061     # add escape character for & (needed for google turls)
0062     if '&' in xml:
0063         xml = xml.replace('&', '&#038;')
0064 
0065     # stitch in the DOCTYPE
0066     xml = xml.replace('<POOLFILECATALOG>', '<!DOCTYPE POOLFILECATALOG SYSTEM "InMemory">\n<POOLFILECATALOG>')
0067 
0068     write_file(os.path.join(workdir, filename), xml, mute=False)
0069 
0070     return xml
0071 
0072 
0073 def get_file_info_from_xml(workdir, filename="PoolFileCatalog.xml"):
0074     """
0075     Return a file info dictionary based on the metadata in the given XML file.
0076     The file info dictionary is used to replace the input file LFN list in the job parameters with the full PFNs
0077     which are needed for direct access in production jobs.
0078 
0079     Example of PoolFileCatalog.xml:
0080 
0081     <?xml version="1.0" ?>
0082     <POOLFILECATALOG>
0083       <File ID="4ACC5018-2EA3-B441-BC11-0C0992847FD1">
0084         <physical>
0085           <pfn filetype="ROOT_ALL" name="root://dcgftp.usatlas.bnl.gov:1096//../AOD.11164242._001522.pool.root.1"/>
0086         </physical>
0087         <logical/>
0088       </File>
0089     </POOLFILECATALOG>
0090 
0091     which gives the following dictionary:
0092 
0093     {'AOD.11164242._001522.pool.root.1': ['root://dcgftp.usatlas.bnl.gov:1096//../AOD.11164242._001522.pool.root.1',
0094     '4ACC5018-2EA3-B441-BC11-0C0992847FD1']}
0095 
0096     :param workdir: directory of PoolFileCatalog.xml (string).
0097     :param filename: file name (default: PoolFileCatalog.xml) (string).
0098     :return: dictionary { LFN: [PFN, GUID], .. }
0099     """
0100 
0101     file_info_dictionary = {}
0102     tree = ElementTree.parse(os.path.join(workdir, filename))
0103     root = tree.getroot()
0104     # root.tag = POOLFILECATALOG
0105 
0106     for child in root:
0107         # child.tag = 'File', child.attrib = {'ID': '4ACC5018-2EA3-B441-BC11-0C0992847FD1'}
0108         guid = child.attrib['ID']
0109         for grandchild in child:
0110             # grandchild.tag = 'physical', grandchild.attrib = {}
0111             for greatgrandchild in grandchild:
0112                 # greatgrandchild.tag = 'pfn', greatgrandchild.attrib = {'filetype': 'ROOT_ALL', 'name': 'root://dcgftp.usatlas.bnl ..'}
0113                 pfn = greatgrandchild.attrib['name']
0114                 lfn = os.path.basename(pfn)
0115                 file_info_dictionary[lfn] = [pfn, guid]
0116 
0117     return file_info_dictionary
0118 
0119 
0120 def get_metadata_from_xml(workdir, filename="metadata.xml"):
0121     """
0122     Parse the payload metadata.xml file.
0123 
0124     Example of metadata.xml:
0125 
0126     <?xml version="1.0" encoding="UTF-8"?>
0127     <!DOCTYPE POOLFILECATALOG SYSTEM 'InMemory'>
0128     <POOLFILECATALOG>
0129       <File ID="D2A6D6F4-ADB2-B140-9C2E-D2D5C099B342">
0130         <logical>
0131           <lfn name="RDO_011a43ba-7c98-488d-8741-08da579c5de7.root"/>
0132         </logical>
0133         <metadata att_name="geometryVersion" att_value="ATLAS-R2-2015-03-01-00"/>
0134         <metadata att_name="conditionsTag" att_value="OFLCOND-RUN12-SDR-19"/>
0135         <metadata att_name="size" att_value="3250143"/>
0136         <metadata att_name="events" att_value="3"/>
0137         <metadata att_name="beamType" att_value="collisions"/>
0138         <metadata att_name="fileType" att_value="RDO"/>
0139       </File>
0140     </POOLFILECATALOG>
0141 
0142     which gives the following dictionary:
0143 
0144     {'RDO_011a43ba-7c98-488d-8741-08da579c5de7.root': {'conditionsTag': 'OFLCOND-RUN12-SDR-19',
0145     'beamType': 'collisions', 'fileType': 'RDO', 'geometryVersion': 'ATLAS-R2-2015-03-01-00', 'events': '3',
0146     'size': '3250143'}}
0147 
0148     :param workdir: payload work directory (string).
0149     :param filename: metadata file name (string).
0150     :return: metadata dictionary.
0151     """
0152 
0153     # metadata_dictionary = { lfn: { att_name1: att_value1, .. }, ..}
0154     metadata_dictionary = {}
0155     path = os.path.join(workdir, filename)
0156     if not os.path.exists(path):
0157         logger.warning('file does not exist: %s', path)
0158         return metadata_dictionary
0159 
0160     tree = ElementTree.parse(path)
0161     root = tree.getroot()
0162     # root.tag = POOLFILECATALOG
0163 
0164     for child in root:
0165         # child.tag = 'File', child.attrib = {'ID': '4ACC5018-2EA3-B441-BC11-0C0992847FD1'}
0166         lfn = ""
0167         guid = child.attrib['ID'] if 'ID' in child.attrib else None
0168         for grandchild in child:
0169             # grandchild.tag = 'logical', grandchild.attrib = {}
0170             if grandchild.tag == 'logical':
0171                 for greatgrandchild in grandchild:
0172                     # greatgrandchild.tag = lfn
0173                     # greatgrandchild.attrib = lfn {'name': 'RDO_011a43ba-7c98-488d-8741-08da579c5de7.root'}
0174                     lfn = greatgrandchild.attrib.get('name')
0175                     metadata_dictionary[lfn] = {}
0176             elif grandchild.tag == 'metadata':
0177                 # grandchild.attrib = {'att_name': 'events', 'att_value': '3'}
0178                 name = grandchild.attrib.get('att_name')
0179                 value = grandchild.attrib.get('att_value')
0180                 metadata_dictionary[lfn][name] = value
0181             else:
0182                 # unknown metadata entry
0183                 pass
0184             if guid:
0185                 metadata_dictionary[lfn]['guid'] = guid
0186 
0187     return metadata_dictionary
0188 
0189 
0190 def get_number_of_events(metadata_dictionary, filename=''):
0191     """
0192     Get the number of events for the given file from the metadata dictionary (from metadata.xml).
0193 
0194     :param metadata_dictionary: dictionary from parsed metadata.xml file.
0195     :param filename: file name for which the number of events relates to (string).
0196     :return: number of events (int). -1 is returned if the events could not be extracted from the dictionary.
0197     """
0198 
0199     nevents = -1
0200     if filename != '' and filename in metadata_dictionary:
0201         try:
0202             nevents = int(metadata_dictionary[filename].get('events'))
0203         except ValueError as exc:
0204             logger.warning('failed to convert number of events to int: %s', exc)
0205     else:
0206         logger.warning('number of events could not be extracted from metadata dictionary (based on metadata.xml)')
0207 
0208     return nevents
0209 
0210 
0211 def get_total_number_of_events(metadata_dictionary):
0212     """
0213     Get the total number of events for all files in the metadata dictionary.
0214 
0215     :param metadata_dictionary: dictionary from parsed metadata.xml file.
0216     :return: total number of processed events (int).
0217     """
0218 
0219     nevents = 0
0220     for filename in metadata_dictionary:
0221         _nevents = get_number_of_events(metadata_dictionary, filename=filename)
0222         if _nevents != -1:
0223             nevents += _nevents
0224 
0225     return nevents
0226 
0227 
0228 def get_guid(metadata_dictionary, filename=''):
0229     """
0230     Get the guid from the metadata dictionary for the given LFN.
0231 
0232     :param metadata_dictionary: dictionary from parsed metadata.xml file.
0233     :param filename: file name for which the number of events relates to (string).
0234     :return: guid (string, None is returned if guid could not be extracted).
0235     """
0236 
0237     guid = None
0238     if filename != '' and filename in metadata_dictionary:
0239         try:
0240             guid = metadata_dictionary[filename].get('guid')
0241         except ValueError as exc:
0242             logger.warning('failed to get guid from xml: %s', exc)
0243     else:
0244         logger.warning('guid could not be extracted from metadata dictionary (based on metadata.xml)')
0245 
0246     return guid
0247 
0248 
0249 def get_guid_from_xml(metadata_dictionary, lfn):
0250     """
0251     Get the guid for the given LFN in the metadata dictionary.
0252 
0253     :param metadata_dictionary: dictionary from parsed metadata.xml file.
0254     :param lfn: LFN (string).
0255     :return: total number of processed events (int).
0256     """
0257 
0258     guid = None
0259     for filename in metadata_dictionary:
0260         if filename == lfn:
0261             guid = get_guid(metadata_dictionary, filename=filename)
0262 
0263     return guid