Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-10 08:39:17

0001 #!/usr/bin/env python
0002 # Licensed under the Apache License, Version 2.0 (the "License");
0003 # you may not use this file except in compliance with the License.
0004 # You may obtain a copy of the License at
0005 # http://www.apache.org/licenses/LICENSE-2.0
0006 #
0007 # Authors:
0008 # - Paul Nilsson, paul.nilsson@cern.ch, 2017-2018
0009 
0010 import collections
0011 import hashlib
0012 import io
0013 import os
0014 import re
0015 import tarfile
0016 import time
0017 import uuid
0018 from glob import glob
0019 from json import load
0020 from json import dump as dumpjson
0021 from shutil import copy2, rmtree
0022 import sys
0023 from zlib import adler32
0024 
0025 from pilot.common.exception import ConversionFailure, FileHandlingFailure, MKDirFailure, NoSuchFile
0026 from pilot.util.config import config
0027 #from pilot.util.mpi import get_ranks_info
0028 from .container import execute
0029 from .math import diff_lists
0030 
0031 import logging
0032 logger = logging.getLogger(__name__)
0033 
0034 
0035 def get_pilot_work_dir(workdir):
0036     """
0037     Return the full path to the main PanDA Pilot work directory. Called once at the beginning of the batch job.
0038 
0039     :param workdir: The full path to where the main work directory should be created
0040     :return: The name of main work directory
0041     """
0042 
0043     return os.path.join(workdir, "PanDA_Pilot2_%d_%s" % (os.getpid(), str(int(time.time()))))
0044 
0045 
0046 def mkdirs(workdir, chmod=0o770):  # Python 2/3
0047     """
0048     Create a directory.
0049     Perform a chmod if set.
0050 
0051     :param workdir: Full path to the directory to be created
0052     :param chmod: chmod code (default 0770) (octal).
0053     :raises PilotException: MKDirFailure.
0054     :return:
0055     """
0056 
0057     try:
0058         os.makedirs(workdir)
0059         if chmod:
0060             os.chmod(workdir, chmod)
0061     except Exception as exc:
0062         raise MKDirFailure(exc)
0063 
0064 
0065 def rmdirs(path):
0066     """
0067     Remove directory in path.
0068 
0069     :param path: path to directory to be removed (string).
0070     :return: Boolean (True if success).
0071     """
0072 
0073     status = False
0074 
0075     try:
0076         rmtree(path)
0077     except OSError as exc:
0078         logger.warning("failed to remove directories %s: %s", path, exc)
0079     else:
0080         status = True
0081 
0082     return status
0083 
0084 
0085 def read_file(filename, mode='r'):
0086     """
0087     Open, read and close a file.
0088     :param filename: file name (string).
0089     :param mode:
0090     :return: file contents (string).
0091     """
0092 
0093     out = ""
0094     f = open_file(filename, mode)
0095     if f:
0096         out = f.read()
0097         f.close()
0098 
0099     return out
0100 
0101 
0102 def write_file(path, contents, mute=True, mode='w', unique=False):
0103     """
0104     Write the given contents to a file.
0105     If unique=True, then if the file already exists, an index will be added (e.g. 'out.txt' -> 'out-1.txt')
0106     :param path: full path for file (string).
0107     :param contents: file contents (object).
0108     :param mute: boolean to control stdout info message.
0109     :param mode: file mode (e.g. 'w', 'r', 'a', 'wb', 'rb') (string).
0110     :param unique: file must be unique (Boolean).
0111     :raises PilotException: FileHandlingFailure.
0112     :return: True if successful, otherwise False.
0113     """
0114 
0115     status = False
0116 
0117     # add an incremental file name (add -%d if path already exists) if necessary
0118     if unique:
0119         path = get_nonexistant_path(path)
0120 
0121     f = open_file(path, mode)
0122     if f:
0123         try:
0124             f.write(contents)
0125         except IOError as exc:
0126             raise FileHandlingFailure(exc)
0127         else:
0128             status = True
0129         f.close()
0130 
0131     if not mute:
0132         if 'w' in mode:
0133             logger.info('created file: %s', path)
0134         if 'a' in mode:
0135             logger.info('appended file: %s', path)
0136 
0137     return status
0138 
0139 
0140 def open_file(filename, mode):
0141     """
0142     Open and return a file pointer for the given mode.
0143     Note: the caller needs to close the file.
0144 
0145     :param filename: file name (string).
0146     :param mode: file mode (character).
0147     :raises PilotException: FileHandlingFailure.
0148     :return: file pointer.
0149     """
0150 
0151     f = None
0152     try:
0153         f = open(filename, mode)
0154     except IOError as exc:
0155         raise FileHandlingFailure(exc)
0156 
0157     return f
0158 
0159 
0160 def find_text_files():
0161     """
0162     Find all non-binary files.
0163 
0164     :return: list of files.
0165     """
0166 
0167     files = []
0168     # -I = ignore binary files
0169     cmd = r"find . -type f -exec grep -Iq . {} \; -print"
0170 
0171     exit_code, stdout, stderr = execute(cmd)
0172     if stdout:
0173         # remove last \n if present
0174         if stdout.endswith('\n'):
0175             stdout = stdout[:-1]
0176         files = stdout.split('\n')
0177 
0178     return files
0179 
0180 
0181 def get_files(pattern="*.log"):
0182     """
0183     Find all files whose names follow the given pattern.
0184 
0185     :param pattern: file name pattern (string).
0186     :return: list of files.
0187     """
0188 
0189     files = []
0190     cmd = "find . -name %s" % pattern
0191 
0192     exit_code, stdout, stderr = execute(cmd)
0193     if stdout:
0194         # remove last \n if present
0195         if stdout.endswith('\n'):
0196             stdout = stdout[:-1]
0197         files = stdout.split('\n')
0198 
0199     return files
0200 
0201 
0202 def tail(filename, nlines=10):
0203     """
0204     Return the last n lines of a file.
0205     Note: the function uses the posix tail function.
0206 
0207     :param filename: name of file to do the tail on (string).
0208     :param nlines: number of lines (int).
0209     :return: file tail (str).
0210     """
0211 
0212     exit_code, stdout, stderr = execute('tail -n %d %s' % (nlines, filename))
0213     # protection
0214     if type(stdout) != str:
0215         stdout = ""
0216     return stdout
0217 
0218 
0219 def grep(patterns, file_name):
0220     """
0221     Search for the patterns in the given list in a file.
0222 
0223     Example:
0224       grep(["St9bad_alloc", "FATAL"], "athena_stdout.txt")
0225       -> [list containing the lines below]
0226         CaloTrkMuIdAlg2.sysExecute()             ERROR St9bad_alloc
0227         AthAlgSeq.sysExecute()                   FATAL  Standard std::exception is caught
0228 
0229     :param patterns: list of regexp patterns.
0230     :param file_name: file name (string).
0231     :return: list of matched lines in file.
0232     """
0233 
0234     matched_lines = []
0235     p = []
0236     for pattern in patterns:
0237         p.append(re.compile(pattern))
0238 
0239     f = open_file(file_name, 'r')
0240     if f:
0241         while True:
0242             # get the next line in the file
0243             line = f.readline()
0244             if not line:
0245                 break
0246 
0247             # can the search pattern be found
0248             for cp in p:
0249                 if re.search(cp, line):
0250                     matched_lines.append(line)
0251         f.close()
0252 
0253     return matched_lines
0254 
0255 
0256 def convert(data):
0257     """
0258     Convert unicode data to utf-8.
0259 
0260     Usage examples:
0261     1. Dictionary:
0262       data = {u'Max': {u'maxRSS': 3664, u'maxSwap': 0, u'maxVMEM': 142260, u'maxPSS': 1288}, u'Avg':
0263              {u'avgVMEM': 94840, u'avgPSS': 850, u'avgRSS': 2430, u'avgSwap': 0}}
0264     convert(data)
0265       {'Max': {'maxRSS': 3664, 'maxSwap': 0, 'maxVMEM': 142260, 'maxPSS': 1288}, 'Avg': {'avgVMEM': 94840,
0266        'avgPSS': 850, 'avgRSS': 2430, 'avgSwap': 0}}
0267     2. String:
0268       data = u'hello'
0269     convert(data)
0270       'hello'
0271     3. List:
0272       data = [u'1',u'2','3']
0273     convert(data)
0274       ['1', '2', '3']
0275 
0276     :param data: unicode object to be converted to utf-8
0277     :return: converted data to utf-8
0278     """
0279 
0280     try:
0281         _basestring = basestring  # Python 2  # noqa: F821
0282     except Exception:
0283         _basestring = str  # Python 3 (note order in try statement)
0284     if isinstance(data, _basestring):
0285         return str(data)
0286     elif isinstance(data, collections.Mapping):
0287         try:
0288             ret = dict(list(map(convert, iter(list(data.items())))))  # Python 3
0289         except Exception:
0290             ret = dict(map(convert, data.iteritems()))  # Python 2
0291         return ret
0292     elif isinstance(data, collections.Iterable):
0293         try:
0294             ret = type(data)(list(map(convert, data)))  # Python 3
0295         except Exception:
0296             ret = type(data)(map(convert, data))  # Python 2
0297         return ret
0298     else:
0299         return data
0300 
0301 
0302 def is_json(input_file):
0303     """
0304     Check if the file is in JSON format.
0305     The function reads the first character of the file, and if it is "{" then returns True.
0306 
0307     :param input_file: file name (string)
0308     :return: Boolean.
0309     """
0310 
0311     with open(input_file) as unknown_file:
0312         c = unknown_file.read(1)
0313         if c == '{':
0314             return True
0315         return False
0316 
0317 
0318 def read_list(filename):
0319     """
0320     Read a list from a JSON file.
0321 
0322     :param filename: file name (string).
0323     :return: list.
0324     """
0325 
0326     _list = []
0327 
0328     # open output file for reading
0329     try:
0330         with open(filename, 'r') as filehandle:
0331             _list = load(filehandle)
0332     except IOError as exc:
0333         logger.warning('failed to read %s: %s', filename, exc)
0334 
0335     return convert(_list)
0336 
0337 
0338 def read_json(filename):
0339     """
0340     Read a dictionary with unicode to utf-8 conversion
0341 
0342     :param filename:
0343     :raises PilotException: FileHandlingFailure, ConversionFailure
0344     :return: json dictionary
0345     """
0346 
0347     dictionary = None
0348     f = open_file(filename, 'r')
0349     if f:
0350         try:
0351             dictionary = load(f)
0352         except Exception as exc:
0353             logger.warning('exception caught: %s', exc)
0354             #raise FileHandlingFailure(str(error))
0355         else:
0356             f.close()
0357 
0358             # Try to convert the dictionary from unicode to utf-8
0359             if dictionary != {}:
0360                 try:
0361                     dictionary = convert(dictionary)
0362                 except Exception as exc:
0363                     raise ConversionFailure(exc)
0364 
0365     return dictionary
0366 
0367 
0368 def write_json(filename, data, sort_keys=True, indent=4, separators=(',', ': ')):
0369     """
0370     Write the dictionary to a JSON file.
0371 
0372     :param filename: file name (string).
0373     :param data: object to be written to file (dictionary or list).
0374     :param sort_keys: should entries be sorted? (boolean).
0375     :param indent: indentation level, default 4 (int).
0376     :param separators: field separators (default (',', ': ') for dictionaries, use e.g. (',\n') for lists) (tuple)
0377     :raises PilotException: FileHandlingFailure.
0378     :return: status (boolean).
0379     """
0380 
0381     status = False
0382 
0383     try:
0384         with open(filename, 'w') as fh:
0385             dumpjson(data, fh, sort_keys=sort_keys, indent=indent, separators=separators)
0386     except IOError as exc:
0387         raise FileHandlingFailure(exc)
0388     else:
0389         status = True
0390 
0391     return status
0392 
0393 
0394 def touch(path):
0395     """
0396     Touch a file and update mtime in case the file exists.
0397     Default to use execute() if case of python problem with appending to non-existant path.
0398 
0399     :param path: full path to file to be touched (string).
0400     :return:
0401     """
0402 
0403     try:
0404         with open(path, 'a'):
0405             os.utime(path, None)
0406     except Exception:
0407         exit_code, stdout, stderr = execute('touch %s' % path)
0408 
0409 
0410 def remove_empty_directories(src_dir):
0411     """
0412     Removal of empty directories in the given src_dir tree.
0413     Only empty directories will be removed.
0414 
0415     :param src_dir: directory to be purged of empty directories.
0416     :return:
0417     """
0418 
0419     for dirpath, subdirs, files in os.walk(src_dir, topdown=False):
0420         if dirpath == src_dir:
0421             break
0422         try:
0423             os.rmdir(dirpath)
0424         except OSError:
0425             pass
0426 
0427 
0428 def remove(path):
0429     """
0430     Remove file.
0431     :param path: path to file (string).
0432     :return: 0 if successful, -1 if failed (int)
0433     """
0434 
0435     try:
0436         os.remove(path)
0437     except OSError as exc:
0438         logger.warning("failed to remove file: %s (%s, %s)", path, exc.errno, exc.strerror)
0439         return -1
0440     else:
0441         logger.debug('removed %s', path)
0442 
0443     return 0
0444 
0445 
0446 def remove_dir_tree(path):
0447     """
0448     Remove directory tree.
0449     :param path: path to directory (string).
0450     :return: 0 if successful, -1 if failed (int)
0451     """
0452 
0453     try:
0454         rmtree(path)
0455     except OSError as exc:
0456         logger.warning("failed to remove directory: %s (%s, %s)", path, exc.errno, exc.strerror)
0457         return -1
0458     return 0
0459 
0460 
0461 def remove_files(workdir, files):
0462     """
0463     Remove all given files from workdir.
0464 
0465     :param workdir: working directory (string).
0466     :param files: file list.
0467     :return: exit code (0 if all went well, -1 otherwise)
0468     """
0469 
0470     ec = 0
0471     if type(files) != list:
0472         logger.warning('files parameter not a list: %s', str(type(list)))
0473         ec = -1
0474     else:
0475         for f in files:
0476             _ec = remove(os.path.join(workdir, f))
0477             if _ec != 0 and ec == 0:
0478                 ec = _ec
0479 
0480     return ec
0481 
0482 
0483 def tar_files(wkdir, excludedfiles, logfile_name, attempt=0):
0484     """
0485     Tarring of files in given directory.
0486 
0487     :param wkdir: work directory (string)
0488     :param excludedfiles: list of files to be excluded from tar operation (list)
0489     :param logfile_name: file name (string)
0490     :param attempt: attempt number (integer)
0491     :return: 0 if successful, 1 in case of error (int)
0492     """
0493 
0494     to_pack = []
0495     pack_start = time.time()
0496     for path, subdir, files in os.walk(wkdir):
0497         for file in files:
0498             if file not in excludedfiles:
0499                 rel_dir = os.path.relpath(path, wkdir)
0500                 file_rel_path = os.path.join(rel_dir, file)
0501                 file_path = os.path.join(path, file)
0502                 to_pack.append((file_path, file_rel_path))
0503     if to_pack:
0504         try:
0505             logfile_name = os.path.join(wkdir, logfile_name)
0506             log_pack = tarfile.open(logfile_name, 'w:gz')
0507             for f in to_pack:
0508                 log_pack.add(f[0], arcname=f[1])
0509             log_pack.close()
0510         except IOError:
0511             if attempt == 0:
0512                 safe_delay = 15
0513                 logger.warning('i/o error - will retry in {0} seconds'.format(safe_delay))
0514                 time.sleep(safe_delay)
0515                 tar_files(wkdir, excludedfiles, logfile_name, attempt=1)
0516             else:
0517                 logger.warning("continues i/o errors during packing of logs - job will fail")
0518                 return 1
0519 
0520     for f in to_pack:
0521         remove(f[0])
0522 
0523     remove_empty_directories(wkdir)
0524     pack_time = time.time() - pack_start
0525     logger.debug("packing of logs took {0} seconds".format(pack_time))
0526 
0527     return 0
0528 
0529 
0530 def move(path1, path2):
0531     """
0532     Move a file from path1 to path2.
0533 
0534     :param path1: source path (string).
0535     :param path2: destination path2 (string).
0536     """
0537 
0538     if not os.path.exists(path1):
0539         logger.warning('file copy failure: path does not exist: %s', path1)
0540         raise NoSuchFile("File does not exist: %s" % path1)
0541 
0542     try:
0543         import shutil
0544         shutil.move(path1, path2)
0545     except IOError as exc:
0546         logger.warning("exception caught during file move: %s", exc)
0547         raise FileHandlingFailure(exc)
0548     else:
0549         logger.info("moved %s to %s", path1, path2)
0550 
0551 
0552 def copy(path1, path2):
0553     """
0554     Copy path1 to path2.
0555 
0556     :param path1: file path (string).
0557     :param path2: file path (string).
0558     :raises PilotException: FileHandlingFailure, NoSuchFile
0559     :return:
0560     """
0561 
0562     if not os.path.exists(path1):
0563         logger.warning('file copy failure: path does not exist: %s', path1)
0564         raise NoSuchFile("File does not exist: %s" % path1)
0565 
0566     try:
0567         copy2(path1, path2)
0568     except IOError as exc:
0569         logger.warning("exception caught during file copy: %s", exc)
0570         raise FileHandlingFailure(exc)
0571     else:
0572         logger.info("copied %s to %s", path1, path2)
0573 
0574 
0575 def find_executable(name):
0576     """
0577     Is the command 'name' available locally?
0578 
0579     :param name: command name (string).
0580     :return: full path to command if it exists, otherwise empty string.
0581     """
0582 
0583     from distutils.spawn import find_executable
0584     return find_executable(name)
0585 
0586 
0587 def add_to_total_size(path, total_size):
0588     """
0589     Add the size of file in the given path to the total size of all in/output files.
0590 
0591     :param path: path to file (string).
0592     :param total_size: prior total size of all input/output files (long).
0593     :return: total size of all input/output files (long).
0594     """
0595 
0596     if os.path.exists(path):
0597         # Get the file size
0598         fsize = get_local_file_size(path)
0599         if fsize:
0600             logger.info("size of file %s: %d B", path, fsize)
0601             try:
0602                 total_size += long(fsize)  # Python 2  # noqa: F821
0603             except Exception:
0604                 total_size += int(fsize)  # Python 3 (note order in try statement)
0605     else:
0606         logger.warning("skipping file %s since it is not present", path)
0607 
0608     return total_size
0609 
0610 
0611 def get_local_file_size(filename):
0612     """
0613     Get the file size of a local file.
0614 
0615     :param filename: file name (string).
0616     :return: file size (int).
0617     """
0618 
0619     file_size = None
0620 
0621     if os.path.exists(filename):
0622         try:
0623             file_size = os.path.getsize(filename)
0624         except Exception as exc:
0625             logger.warning("failed to get file size: %s", exc)
0626     else:
0627         logger.warning("local file does not exist: %s", filename)
0628 
0629     return file_size
0630 
0631 
0632 def get_guid():
0633     """
0634     Generate a GUID using the uuid library.
0635     E.g. guid = '92008FAF-BE4C-49CF-9C5C-E12BC74ACD19'
0636 
0637     :return: a random GUID (string)
0638     """
0639 
0640     return str(uuid.uuid4()).upper()
0641 
0642 
0643 def get_table_from_file(filename, header=None, separator="\t", convert_to_float=True):
0644     """
0645     Extract a table of data from a txt file.
0646     E.g.
0647     header="Time VMEM PSS RSS Swap rchar wchar rbytes wbytes"
0648     or the first line in the file is
0649     Time VMEM PSS RSS Swap rchar wchar rbytes wbytes
0650     each of which will become keys in the dictionary, whose corresponding values are stored in lists, with the entries
0651     corresponding to the values in the rows of the input file.
0652 
0653     The output dictionary will have the format
0654     {'Time': [ .. data from first row .. ], 'VMEM': [.. data from second row], ..}
0655 
0656     :param filename: name of input text file, full path (string).
0657     :param header: header string.
0658     :param separator: separator character (char).
0659     :param convert_to_float: boolean, if True, all values will be converted to floats.
0660     :return: dictionary.
0661     """
0662 
0663     tabledict = {}
0664     keylist = []  # ordered list of dictionary key names
0665 
0666     try:
0667         f = open_file(filename, 'r')
0668     except Exception as exc:
0669         logger.warning("failed to open file: %s, %s", filename, exc)
0670     else:
0671         firstline = True
0672         for line in f:
0673             fields = line.split(separator)
0674             if firstline:
0675                 firstline = False
0676                 tabledict, keylist = _define_tabledict_keys(header, fields, separator)
0677                 if not header:
0678                     continue
0679 
0680             # from now on, fill the dictionary fields with the input data
0681             i = 0
0682             for field in fields:
0683                 # get the corresponding dictionary key from the keylist
0684                 key = keylist[i]
0685                 # store the field value in the correct list
0686                 if convert_to_float:
0687                     try:
0688                         field = float(field)
0689                     except Exception as exc:
0690                         logger.warning("failed to convert %s to float: %s (aborting)", field, exc)
0691                         return None
0692                 tabledict[key].append(field)
0693                 i += 1
0694         f.close()
0695 
0696     return tabledict
0697 
0698 
0699 def _define_tabledict_keys(header, fields, separator):
0700     """
0701     Define the keys for the tabledict dictionary.
0702     Note: this function is only used by parse_table_from_file().
0703 
0704     :param header: header string.
0705     :param fields: header content string.
0706     :param separator: separator character (char).
0707     :return: tabledict (dictionary), keylist (ordered list with dictionary key names).
0708     """
0709 
0710     tabledict = {}
0711     keylist = []
0712 
0713     if not header:
0714         # get the dictionary keys from the header of the file
0715         for key in fields:
0716             # first line defines the header, whose elements will be used as dictionary keys
0717             if key == '':
0718                 continue
0719             if key.endswith('\n'):
0720                 key = key[:-1]
0721             tabledict[key] = []
0722             keylist.append(key)
0723     else:
0724         # get the dictionary keys from the provided header
0725         keys = header.split(separator)
0726         for key in keys:
0727             if key == '':
0728                 continue
0729             if key.endswith('\n'):
0730                 key = key[:-1]
0731             tabledict[key] = []
0732             keylist.append(key)
0733 
0734     return tabledict, keylist
0735 
0736 
0737 def calculate_checksum(filename, algorithm='adler32'):
0738     """
0739     Calculate the checksum value for the given file.
0740     The default algorithm is adler32. Md5 is also be supported.
0741     Valid algorithms are 1) adler32/adler/ad32/ad, 2) md5/md5sum/md.
0742 
0743     :param filename: file name (string).
0744     :param algorithm: optional algorithm string.
0745     :raises FileHandlingFailure, NotImplementedError: exception raised when file does not exist or for unknown algorithm.
0746     :return: checksum value (string).
0747     """
0748 
0749     if not os.path.exists(filename):
0750         raise FileHandlingFailure('file does not exist: %s' % filename)
0751 
0752     if algorithm == 'adler32' or algorithm == 'adler' or algorithm == 'ad' or algorithm == 'ad32':
0753         return calculate_adler32_checksum(filename)
0754     elif algorithm == 'md5' or algorithm == 'md5sum' or algorithm == 'md':
0755         return calculate_md5_checksum(filename)
0756     else:
0757         msg = 'unknown checksum algorithm: %s' % algorithm
0758         logger.warning(msg)
0759         raise NotImplementedError()
0760 
0761 
0762 def calculate_adler32_checksum(filename):
0763     """
0764     Calculate the adler32 checksum for the given file.
0765     The file is assumed to exist.
0766 
0767     :param filename: file name (string).
0768     :return: checksum value (string).
0769     """
0770 
0771     asum = 1  # default adler32 starting value
0772     blocksize = 64 * 1024 * 1024  # read buffer size, 64 Mb
0773 
0774     with open(filename, 'rb') as f:
0775         while True:
0776             data = f.read(blocksize)
0777             if not data:
0778                 break
0779             asum = adler32(data, asum)
0780             if asum < 0:
0781                 asum += 2**32
0782 
0783     # convert to hex
0784     return "{0:08x}".format(asum)
0785 
0786 
0787 def calculate_md5_checksum(filename):
0788     """
0789     Calculate the md5 checksum for the given file.
0790     The file is assumed to exist.
0791 
0792     :param filename: file name (string).
0793     :return: checksum value (string).
0794     """
0795 
0796     length = io.DEFAULT_BUFFER_SIZE
0797     md5 = hashlib.md5()
0798 
0799     with io.open(filename, mode="rb") as fd:
0800         for chunk in iter(lambda: fd.read(length), b''):
0801             md5.update(chunk)
0802 
0803     return md5.hexdigest()
0804 
0805 
0806 def get_checksum_value(checksum):
0807     """
0808     Return the checksum value.
0809     The given checksum might either be a standard ad32 or md5 string, or a dictionary with the format
0810     { checksum_type: value } as defined in the `FileSpec` class. This function extracts the checksum value from this
0811     dictionary (or immediately returns the checksum value if the given value is a string).
0812 
0813     :param checksum: checksum object (string or dictionary).
0814     :return: checksum. checksum string.
0815     """
0816 
0817     if type(checksum) == str:
0818         return checksum
0819 
0820     checksum_value = ''
0821     checksum_type = get_checksum_type(checksum)
0822 
0823     if type(checksum) == dict:
0824         checksum_value = checksum.get(checksum_type)
0825 
0826     return checksum_value
0827 
0828 
0829 def get_checksum_type(checksum):
0830     """
0831     Return the checksum type (ad32 or md5).
0832     The given checksum can be either be a standard ad32 or md5 value, or a dictionary with the format
0833     { checksum_type: value } as defined in the `FileSpec` class.
0834     In case the checksum type cannot be identified, the function returns 'unknown'.
0835 
0836     :param checksum: checksum string or dictionary.
0837     :return: checksum type (string).
0838     """
0839 
0840     checksum_type = 'unknown'
0841     if type(checksum) == dict:
0842         for key in list(checksum.keys()):  # Python 2/3
0843             # the dictionary is assumed to only contain one key-value pair
0844             checksum_type = key
0845             break
0846     elif type(checksum) == str:
0847         if len(checksum) == 8:
0848             checksum_type = 'ad32'
0849         elif len(checksum) == 32:
0850             checksum_type = 'md5'
0851 
0852     return checksum_type
0853 
0854 
0855 def scan_file(path, error_messages, warning_message=None):
0856     """
0857     Scan file for known error messages.
0858 
0859     :param path: path to file (string).
0860     :param error_messages: list of error messages.
0861     :param warning_message: optional warning message to printed with any of the error_messages have been found (string).
0862     :return: Boolean. (note: True means the error was found)
0863     """
0864 
0865     found_problem = False
0866 
0867     matched_lines = grep(error_messages, path)
0868     if len(matched_lines) > 0:
0869         if warning_message:
0870             logger.warning(warning_message)
0871         for line in matched_lines:
0872             logger.info(line)
0873         found_problem = True
0874 
0875     return found_problem
0876 
0877 
0878 def verify_file_list(list_of_files):
0879     """
0880     Make sure that the files in the given list exist, return the list of files that does exist.
0881 
0882     :param list_of_files: file list.
0883     :return: list of existing files.
0884     """
0885 
0886     # remove any non-existent files from the input file list
0887     filtered_list = [f for f in list_of_files if os.path.exists(f)]
0888 
0889     diff = diff_lists(list_of_files, filtered_list)
0890     if diff:
0891         logger.debug('found %d file(s) that do not exist (e.g. %s)', len(diff), diff[0])
0892 
0893     return filtered_list
0894 
0895 
0896 def find_latest_modified_file(list_of_files):
0897     """
0898     Find the most recently modified file among the list of given files.
0899     In case int conversion of getmtime() fails, int(time.time()) will be returned instead.
0900 
0901     :param list_of_files: list of files with full paths.
0902     :return: most recently updated file (string), modification time (int).
0903     """
0904 
0905     if not list_of_files:
0906         logger.warning('there were no files to check mod time for')
0907         return None, None
0908 
0909     try:
0910         latest_file = max(list_of_files, key=os.path.getmtime)
0911         mtime = int(os.path.getmtime(latest_file))
0912     except OSError as exc:
0913         logger.warning("int conversion failed for mod time: %s", exc)
0914         latest_file = ""
0915         mtime = None
0916 
0917     return latest_file, mtime
0918 
0919 
0920 def dump(path, cmd="cat"):
0921     """
0922     Dump the content of the file in the given path to the log.
0923 
0924     :param path: file path (string).
0925     :param cmd: optional command (string).
0926     :return: cat (string).
0927     """
0928 
0929     if os.path.exists(path) or cmd == "echo":
0930         _cmd = "%s %s" % (cmd, path)
0931         exit_code, stdout, stderr = execute(_cmd)
0932         logger.info("%s:\n%s", _cmd, stdout + stderr)
0933     else:
0934         logger.info("path %s does not exist", path)
0935 
0936 
0937 def establish_logging(debug=True, nopilotlog=False, filename=config.Pilot.pilotlog, loglevel=0):
0938     """
0939     Setup and establish logging.
0940 
0941     Option loglevel can be used to decide which (predetermined) logging format to use.
0942     Example:
0943       loglevel=0: '%(asctime)s | %(levelname)-8s | %(name)-32s | %(funcName)-25s | %(message)s'
0944       loglevel=1: 'ts=%(asctime)s level=%(levelname)-8s event=%(name)-32s.%(funcName)-25s msg="%(message)s"'
0945 
0946     :param debug: debug mode (Boolean),
0947     :param nopilotlog: True when pilot log is not known (Boolean).
0948     :param filename: name of log file (string).
0949     :param loglevel: selector for logging level (int).
0950     :return:
0951     """
0952 
0953     _logger = logging.getLogger('')
0954     _logger.handlers = []
0955     _logger.propagate = False
0956 
0957     console = logging.StreamHandler(sys.stdout)
0958     if debug:
0959         format_str = '%(asctime)s | %(levelname)-8s | %(name)-32s | %(funcName)-25s | %(message)s'
0960         level = logging.DEBUG
0961     else:
0962         format_str = '%(asctime)s | %(levelname)-8s | %(message)s'
0963         level = logging.INFO
0964     #rank, maxrank = get_ranks_info()
0965     #if rank is not None:
0966     #    format_str = 'Rank {0} |'.format(rank) + format_str
0967     if nopilotlog:
0968         logging.basicConfig(level=level, format=format_str, filemode='w')
0969     else:
0970         logging.basicConfig(filename=filename, level=level, format=format_str, filemode='w')
0971     console.setLevel(level)
0972     console.setFormatter(logging.Formatter(format_str))
0973     logging.Formatter.converter = time.gmtime
0974     #if not len(_logger.handlers):
0975     _logger.addHandler(console)
0976 
0977 
0978 def remove_core_dumps(workdir, pid=None):
0979     """
0980     Remove any remaining core dumps so they do not end up in the log tarball
0981 
0982     A core dump from the payload process should not be deleted if in debug mode (checked by the called). Also,
0983     a found core dump from a non-payload process, should be removed but should result in function returning False.
0984 
0985     :param workdir: working directory for payload (string).
0986     :param pid: payload pid (integer).
0987     :return: Boolean (True if a payload core dump is found)
0988     """
0989 
0990     found = False
0991 
0992     coredumps = glob("%s/core.*" % workdir) + glob("%s/core" % workdir)
0993     if coredumps:
0994         for coredump in coredumps:
0995             if pid and os.path.basename(coredump) == "core.%d" % pid:
0996                 found = True
0997             logger.info("removing core dump: %s", str(coredump))
0998             remove(coredump)
0999 
1000     return found
1001 
1002 
1003 def get_nonexistant_path(fname_path):
1004     """
1005     Get the path to a filename which does not exist by incrementing path.
1006 
1007     :param fname_path: file name path (string).
1008     :return: file name path (string).
1009     """
1010 
1011     if not os.path.exists(fname_path):
1012         return fname_path
1013     filename, file_extension = os.path.splitext(fname_path)
1014     i = 1
1015     new_fname = "{}-{}{}".format(filename, i, file_extension)
1016     while os.path.exists(new_fname):
1017         i += 1
1018         new_fname = "{}-{}{}".format(filename, i, file_extension)
1019     return new_fname
1020 
1021 
1022 def update_extension(path='', extension=''):
1023     """
1024     Update the file name extension to the given extension.
1025 
1026     :param path: file path (string).
1027     :param extension: new extension (string).
1028     :return: file path with new extension (string).
1029     """
1030 
1031     path, old_extension = os.path.splitext(path)
1032     if not extension.startswith('.'):
1033         extension = '.' + extension
1034     path += extension
1035 
1036     return path
1037 
1038 
1039 def get_valid_path_from_list(paths):
1040     """
1041     Return the first valid path from the given list.
1042 
1043     :param paths: list of file paths.
1044     :return: first valid path from list (string).
1045     """
1046 
1047     valid_path = None
1048     for path in paths:
1049         if os.path.exists(path):
1050             valid_path = path
1051             break
1052 
1053     return valid_path
1054 
1055 
1056 def copy_pilot_source(workdir):
1057     """
1058     Copy the pilot source into the work directory.
1059 
1060     :param workdir: working directory (string).
1061     :return: diagnostics (string).
1062     """
1063 
1064     diagnostics = ""
1065     srcdir = os.path.join(os.environ.get('PILOT_SOURCE_DIR', '.'), 'pilot2')
1066     try:
1067         logger.debug('copy %s to %s', srcdir, workdir)
1068         cmd = 'cp -r %s/* %s' % (srcdir, workdir)
1069         exit_code, stdout, stderr = execute(cmd)
1070         if exit_code != 0:
1071             diagnostics = 'file copy failed: %d, %s' % (exit_code, stdout)
1072             logger.warning(diagnostics)
1073     except Exception as exc:
1074         diagnostics = 'exception caught when copying pilot2 source: %s' % exc
1075         logger.warning(diagnostics)
1076 
1077     return diagnostics
1078 
1079 
1080 def create_symlink(from_path='', to_path=''):
1081     """
1082     Create a symlink from/to the given paths.
1083 
1084     :param from_path: from path (string).
1085     :param to_path: to path (string).
1086     """
1087 
1088     try:
1089         os.symlink(from_path, to_path)
1090     except Exception as exc:
1091         logger.warning('failed to create symlink from %s to %s: %s', from_path, to_path, exc)
1092     else:
1093         logger.debug('created symlink from %s to %s', from_path, to_path)
1094 
1095 
1096 def locate_file(pattern):
1097     """
1098     Locate a file defined by the pattern.
1099 
1100     Example:
1101         pattern = os.path.join(os.getcwd(), '**/core.123')
1102         -> /Users/Paul/Development/python/tt/core.123
1103 
1104     :param pattern: pattern name (string).
1105     :return: path (string).
1106     """
1107 
1108     path = None
1109     for fname in glob(pattern):
1110         if os.path.isfile(fname):
1111             path = fname
1112 
1113     return path
1114 
1115 
1116 def find_last_line(filename):
1117     """
1118     Find the last line in a (not too large) file.
1119 
1120     :param filename: file name, full path (string).
1121     :return: last line (string).
1122     """
1123 
1124     last_line = ""
1125     with open(filename) as f:
1126         for line in f:
1127             pass
1128         last_line = line
1129 
1130     return last_line
1131 
1132 
1133 def get_disk_usage(start_path='.'):
1134     """
1135     Calculate the disk usage of the given directory (including any sub-directories).
1136 
1137     :param start_path: directory (string).
1138     :return: disk usage in bytes (int).
1139     """
1140 
1141     total_size = 0
1142     for dirpath, dirnames, filenames in os.walk(start_path):
1143         for f in filenames:
1144             fp = os.path.join(dirpath, f)
1145             # skip if it is symbolic link
1146             if os.path.exists(fp) and not os.path.islink(fp):
1147                 try:
1148                     total_size += os.path.getsize(fp)
1149                 except Exception as exc:  # change to FileNotFoundError in Pilot 3 for Python 3
1150                     logger.warning('caught exception: %s (skipping this file)', exc)
1151                     continue
1152     return total_size