File indexing completed on 2026-04-10 08:39:17
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 import collections
0011 import hashlib
0012 import io
0013 import os
0014 import re
0015 import tarfile
0016 import time
0017 import uuid
0018 from glob import glob
0019 from json import load
0020 from json import dump as dumpjson
0021 from shutil import copy2, rmtree
0022 import sys
0023 from zlib import adler32
0024
0025 from pilot.common.exception import ConversionFailure, FileHandlingFailure, MKDirFailure, NoSuchFile
0026 from pilot.util.config import config
0027
0028 from .container import execute
0029 from .math import diff_lists
0030
0031 import logging
0032 logger = logging.getLogger(__name__)
0033
0034
0035 def get_pilot_work_dir(workdir):
0036 """
0037 Return the full path to the main PanDA Pilot work directory. Called once at the beginning of the batch job.
0038
0039 :param workdir: The full path to where the main work directory should be created
0040 :return: The name of main work directory
0041 """
0042
0043 return os.path.join(workdir, "PanDA_Pilot2_%d_%s" % (os.getpid(), str(int(time.time()))))
0044
0045
0046 def mkdirs(workdir, chmod=0o770):
0047 """
0048 Create a directory.
0049 Perform a chmod if set.
0050
0051 :param workdir: Full path to the directory to be created
0052 :param chmod: chmod code (default 0770) (octal).
0053 :raises PilotException: MKDirFailure.
0054 :return:
0055 """
0056
0057 try:
0058 os.makedirs(workdir)
0059 if chmod:
0060 os.chmod(workdir, chmod)
0061 except Exception as exc:
0062 raise MKDirFailure(exc)
0063
0064
0065 def rmdirs(path):
0066 """
0067 Remove directory in path.
0068
0069 :param path: path to directory to be removed (string).
0070 :return: Boolean (True if success).
0071 """
0072
0073 status = False
0074
0075 try:
0076 rmtree(path)
0077 except OSError as exc:
0078 logger.warning("failed to remove directories %s: %s", path, exc)
0079 else:
0080 status = True
0081
0082 return status
0083
0084
0085 def read_file(filename, mode='r'):
0086 """
0087 Open, read and close a file.
0088 :param filename: file name (string).
0089 :param mode:
0090 :return: file contents (string).
0091 """
0092
0093 out = ""
0094 f = open_file(filename, mode)
0095 if f:
0096 out = f.read()
0097 f.close()
0098
0099 return out
0100
0101
0102 def write_file(path, contents, mute=True, mode='w', unique=False):
0103 """
0104 Write the given contents to a file.
0105 If unique=True, then if the file already exists, an index will be added (e.g. 'out.txt' -> 'out-1.txt')
0106 :param path: full path for file (string).
0107 :param contents: file contents (object).
0108 :param mute: boolean to control stdout info message.
0109 :param mode: file mode (e.g. 'w', 'r', 'a', 'wb', 'rb') (string).
0110 :param unique: file must be unique (Boolean).
0111 :raises PilotException: FileHandlingFailure.
0112 :return: True if successful, otherwise False.
0113 """
0114
0115 status = False
0116
0117
0118 if unique:
0119 path = get_nonexistant_path(path)
0120
0121 f = open_file(path, mode)
0122 if f:
0123 try:
0124 f.write(contents)
0125 except IOError as exc:
0126 raise FileHandlingFailure(exc)
0127 else:
0128 status = True
0129 f.close()
0130
0131 if not mute:
0132 if 'w' in mode:
0133 logger.info('created file: %s', path)
0134 if 'a' in mode:
0135 logger.info('appended file: %s', path)
0136
0137 return status
0138
0139
0140 def open_file(filename, mode):
0141 """
0142 Open and return a file pointer for the given mode.
0143 Note: the caller needs to close the file.
0144
0145 :param filename: file name (string).
0146 :param mode: file mode (character).
0147 :raises PilotException: FileHandlingFailure.
0148 :return: file pointer.
0149 """
0150
0151 f = None
0152 try:
0153 f = open(filename, mode)
0154 except IOError as exc:
0155 raise FileHandlingFailure(exc)
0156
0157 return f
0158
0159
0160 def find_text_files():
0161 """
0162 Find all non-binary files.
0163
0164 :return: list of files.
0165 """
0166
0167 files = []
0168
0169 cmd = r"find . -type f -exec grep -Iq . {} \; -print"
0170
0171 exit_code, stdout, stderr = execute(cmd)
0172 if stdout:
0173
0174 if stdout.endswith('\n'):
0175 stdout = stdout[:-1]
0176 files = stdout.split('\n')
0177
0178 return files
0179
0180
0181 def get_files(pattern="*.log"):
0182 """
0183 Find all files whose names follow the given pattern.
0184
0185 :param pattern: file name pattern (string).
0186 :return: list of files.
0187 """
0188
0189 files = []
0190 cmd = "find . -name %s" % pattern
0191
0192 exit_code, stdout, stderr = execute(cmd)
0193 if stdout:
0194
0195 if stdout.endswith('\n'):
0196 stdout = stdout[:-1]
0197 files = stdout.split('\n')
0198
0199 return files
0200
0201
0202 def tail(filename, nlines=10):
0203 """
0204 Return the last n lines of a file.
0205 Note: the function uses the posix tail function.
0206
0207 :param filename: name of file to do the tail on (string).
0208 :param nlines: number of lines (int).
0209 :return: file tail (str).
0210 """
0211
0212 exit_code, stdout, stderr = execute('tail -n %d %s' % (nlines, filename))
0213
0214 if type(stdout) != str:
0215 stdout = ""
0216 return stdout
0217
0218
0219 def grep(patterns, file_name):
0220 """
0221 Search for the patterns in the given list in a file.
0222
0223 Example:
0224 grep(["St9bad_alloc", "FATAL"], "athena_stdout.txt")
0225 -> [list containing the lines below]
0226 CaloTrkMuIdAlg2.sysExecute() ERROR St9bad_alloc
0227 AthAlgSeq.sysExecute() FATAL Standard std::exception is caught
0228
0229 :param patterns: list of regexp patterns.
0230 :param file_name: file name (string).
0231 :return: list of matched lines in file.
0232 """
0233
0234 matched_lines = []
0235 p = []
0236 for pattern in patterns:
0237 p.append(re.compile(pattern))
0238
0239 f = open_file(file_name, 'r')
0240 if f:
0241 while True:
0242
0243 line = f.readline()
0244 if not line:
0245 break
0246
0247
0248 for cp in p:
0249 if re.search(cp, line):
0250 matched_lines.append(line)
0251 f.close()
0252
0253 return matched_lines
0254
0255
0256 def convert(data):
0257 """
0258 Convert unicode data to utf-8.
0259
0260 Usage examples:
0261 1. Dictionary:
0262 data = {u'Max': {u'maxRSS': 3664, u'maxSwap': 0, u'maxVMEM': 142260, u'maxPSS': 1288}, u'Avg':
0263 {u'avgVMEM': 94840, u'avgPSS': 850, u'avgRSS': 2430, u'avgSwap': 0}}
0264 convert(data)
0265 {'Max': {'maxRSS': 3664, 'maxSwap': 0, 'maxVMEM': 142260, 'maxPSS': 1288}, 'Avg': {'avgVMEM': 94840,
0266 'avgPSS': 850, 'avgRSS': 2430, 'avgSwap': 0}}
0267 2. String:
0268 data = u'hello'
0269 convert(data)
0270 'hello'
0271 3. List:
0272 data = [u'1',u'2','3']
0273 convert(data)
0274 ['1', '2', '3']
0275
0276 :param data: unicode object to be converted to utf-8
0277 :return: converted data to utf-8
0278 """
0279
0280 try:
0281 _basestring = basestring
0282 except Exception:
0283 _basestring = str
0284 if isinstance(data, _basestring):
0285 return str(data)
0286 elif isinstance(data, collections.Mapping):
0287 try:
0288 ret = dict(list(map(convert, iter(list(data.items())))))
0289 except Exception:
0290 ret = dict(map(convert, data.iteritems()))
0291 return ret
0292 elif isinstance(data, collections.Iterable):
0293 try:
0294 ret = type(data)(list(map(convert, data)))
0295 except Exception:
0296 ret = type(data)(map(convert, data))
0297 return ret
0298 else:
0299 return data
0300
0301
0302 def is_json(input_file):
0303 """
0304 Check if the file is in JSON format.
0305 The function reads the first character of the file, and if it is "{" then returns True.
0306
0307 :param input_file: file name (string)
0308 :return: Boolean.
0309 """
0310
0311 with open(input_file) as unknown_file:
0312 c = unknown_file.read(1)
0313 if c == '{':
0314 return True
0315 return False
0316
0317
0318 def read_list(filename):
0319 """
0320 Read a list from a JSON file.
0321
0322 :param filename: file name (string).
0323 :return: list.
0324 """
0325
0326 _list = []
0327
0328
0329 try:
0330 with open(filename, 'r') as filehandle:
0331 _list = load(filehandle)
0332 except IOError as exc:
0333 logger.warning('failed to read %s: %s', filename, exc)
0334
0335 return convert(_list)
0336
0337
0338 def read_json(filename):
0339 """
0340 Read a dictionary with unicode to utf-8 conversion
0341
0342 :param filename:
0343 :raises PilotException: FileHandlingFailure, ConversionFailure
0344 :return: json dictionary
0345 """
0346
0347 dictionary = None
0348 f = open_file(filename, 'r')
0349 if f:
0350 try:
0351 dictionary = load(f)
0352 except Exception as exc:
0353 logger.warning('exception caught: %s', exc)
0354
0355 else:
0356 f.close()
0357
0358
0359 if dictionary != {}:
0360 try:
0361 dictionary = convert(dictionary)
0362 except Exception as exc:
0363 raise ConversionFailure(exc)
0364
0365 return dictionary
0366
0367
0368 def write_json(filename, data, sort_keys=True, indent=4, separators=(',', ': ')):
0369 """
0370 Write the dictionary to a JSON file.
0371
0372 :param filename: file name (string).
0373 :param data: object to be written to file (dictionary or list).
0374 :param sort_keys: should entries be sorted? (boolean).
0375 :param indent: indentation level, default 4 (int).
0376 :param separators: field separators (default (',', ': ') for dictionaries, use e.g. (',\n') for lists) (tuple)
0377 :raises PilotException: FileHandlingFailure.
0378 :return: status (boolean).
0379 """
0380
0381 status = False
0382
0383 try:
0384 with open(filename, 'w') as fh:
0385 dumpjson(data, fh, sort_keys=sort_keys, indent=indent, separators=separators)
0386 except IOError as exc:
0387 raise FileHandlingFailure(exc)
0388 else:
0389 status = True
0390
0391 return status
0392
0393
0394 def touch(path):
0395 """
0396 Touch a file and update mtime in case the file exists.
0397 Default to use execute() if case of python problem with appending to non-existant path.
0398
0399 :param path: full path to file to be touched (string).
0400 :return:
0401 """
0402
0403 try:
0404 with open(path, 'a'):
0405 os.utime(path, None)
0406 except Exception:
0407 exit_code, stdout, stderr = execute('touch %s' % path)
0408
0409
0410 def remove_empty_directories(src_dir):
0411 """
0412 Removal of empty directories in the given src_dir tree.
0413 Only empty directories will be removed.
0414
0415 :param src_dir: directory to be purged of empty directories.
0416 :return:
0417 """
0418
0419 for dirpath, subdirs, files in os.walk(src_dir, topdown=False):
0420 if dirpath == src_dir:
0421 break
0422 try:
0423 os.rmdir(dirpath)
0424 except OSError:
0425 pass
0426
0427
0428 def remove(path):
0429 """
0430 Remove file.
0431 :param path: path to file (string).
0432 :return: 0 if successful, -1 if failed (int)
0433 """
0434
0435 try:
0436 os.remove(path)
0437 except OSError as exc:
0438 logger.warning("failed to remove file: %s (%s, %s)", path, exc.errno, exc.strerror)
0439 return -1
0440 else:
0441 logger.debug('removed %s', path)
0442
0443 return 0
0444
0445
0446 def remove_dir_tree(path):
0447 """
0448 Remove directory tree.
0449 :param path: path to directory (string).
0450 :return: 0 if successful, -1 if failed (int)
0451 """
0452
0453 try:
0454 rmtree(path)
0455 except OSError as exc:
0456 logger.warning("failed to remove directory: %s (%s, %s)", path, exc.errno, exc.strerror)
0457 return -1
0458 return 0
0459
0460
0461 def remove_files(workdir, files):
0462 """
0463 Remove all given files from workdir.
0464
0465 :param workdir: working directory (string).
0466 :param files: file list.
0467 :return: exit code (0 if all went well, -1 otherwise)
0468 """
0469
0470 ec = 0
0471 if type(files) != list:
0472 logger.warning('files parameter not a list: %s', str(type(list)))
0473 ec = -1
0474 else:
0475 for f in files:
0476 _ec = remove(os.path.join(workdir, f))
0477 if _ec != 0 and ec == 0:
0478 ec = _ec
0479
0480 return ec
0481
0482
0483 def tar_files(wkdir, excludedfiles, logfile_name, attempt=0):
0484 """
0485 Tarring of files in given directory.
0486
0487 :param wkdir: work directory (string)
0488 :param excludedfiles: list of files to be excluded from tar operation (list)
0489 :param logfile_name: file name (string)
0490 :param attempt: attempt number (integer)
0491 :return: 0 if successful, 1 in case of error (int)
0492 """
0493
0494 to_pack = []
0495 pack_start = time.time()
0496 for path, subdir, files in os.walk(wkdir):
0497 for file in files:
0498 if file not in excludedfiles:
0499 rel_dir = os.path.relpath(path, wkdir)
0500 file_rel_path = os.path.join(rel_dir, file)
0501 file_path = os.path.join(path, file)
0502 to_pack.append((file_path, file_rel_path))
0503 if to_pack:
0504 try:
0505 logfile_name = os.path.join(wkdir, logfile_name)
0506 log_pack = tarfile.open(logfile_name, 'w:gz')
0507 for f in to_pack:
0508 log_pack.add(f[0], arcname=f[1])
0509 log_pack.close()
0510 except IOError:
0511 if attempt == 0:
0512 safe_delay = 15
0513 logger.warning('i/o error - will retry in {0} seconds'.format(safe_delay))
0514 time.sleep(safe_delay)
0515 tar_files(wkdir, excludedfiles, logfile_name, attempt=1)
0516 else:
0517 logger.warning("continues i/o errors during packing of logs - job will fail")
0518 return 1
0519
0520 for f in to_pack:
0521 remove(f[0])
0522
0523 remove_empty_directories(wkdir)
0524 pack_time = time.time() - pack_start
0525 logger.debug("packing of logs took {0} seconds".format(pack_time))
0526
0527 return 0
0528
0529
0530 def move(path1, path2):
0531 """
0532 Move a file from path1 to path2.
0533
0534 :param path1: source path (string).
0535 :param path2: destination path2 (string).
0536 """
0537
0538 if not os.path.exists(path1):
0539 logger.warning('file copy failure: path does not exist: %s', path1)
0540 raise NoSuchFile("File does not exist: %s" % path1)
0541
0542 try:
0543 import shutil
0544 shutil.move(path1, path2)
0545 except IOError as exc:
0546 logger.warning("exception caught during file move: %s", exc)
0547 raise FileHandlingFailure(exc)
0548 else:
0549 logger.info("moved %s to %s", path1, path2)
0550
0551
0552 def copy(path1, path2):
0553 """
0554 Copy path1 to path2.
0555
0556 :param path1: file path (string).
0557 :param path2: file path (string).
0558 :raises PilotException: FileHandlingFailure, NoSuchFile
0559 :return:
0560 """
0561
0562 if not os.path.exists(path1):
0563 logger.warning('file copy failure: path does not exist: %s', path1)
0564 raise NoSuchFile("File does not exist: %s" % path1)
0565
0566 try:
0567 copy2(path1, path2)
0568 except IOError as exc:
0569 logger.warning("exception caught during file copy: %s", exc)
0570 raise FileHandlingFailure(exc)
0571 else:
0572 logger.info("copied %s to %s", path1, path2)
0573
0574
0575 def find_executable(name):
0576 """
0577 Is the command 'name' available locally?
0578
0579 :param name: command name (string).
0580 :return: full path to command if it exists, otherwise empty string.
0581 """
0582
0583 from distutils.spawn import find_executable
0584 return find_executable(name)
0585
0586
0587 def add_to_total_size(path, total_size):
0588 """
0589 Add the size of file in the given path to the total size of all in/output files.
0590
0591 :param path: path to file (string).
0592 :param total_size: prior total size of all input/output files (long).
0593 :return: total size of all input/output files (long).
0594 """
0595
0596 if os.path.exists(path):
0597
0598 fsize = get_local_file_size(path)
0599 if fsize:
0600 logger.info("size of file %s: %d B", path, fsize)
0601 try:
0602 total_size += long(fsize)
0603 except Exception:
0604 total_size += int(fsize)
0605 else:
0606 logger.warning("skipping file %s since it is not present", path)
0607
0608 return total_size
0609
0610
0611 def get_local_file_size(filename):
0612 """
0613 Get the file size of a local file.
0614
0615 :param filename: file name (string).
0616 :return: file size (int).
0617 """
0618
0619 file_size = None
0620
0621 if os.path.exists(filename):
0622 try:
0623 file_size = os.path.getsize(filename)
0624 except Exception as exc:
0625 logger.warning("failed to get file size: %s", exc)
0626 else:
0627 logger.warning("local file does not exist: %s", filename)
0628
0629 return file_size
0630
0631
0632 def get_guid():
0633 """
0634 Generate a GUID using the uuid library.
0635 E.g. guid = '92008FAF-BE4C-49CF-9C5C-E12BC74ACD19'
0636
0637 :return: a random GUID (string)
0638 """
0639
0640 return str(uuid.uuid4()).upper()
0641
0642
0643 def get_table_from_file(filename, header=None, separator="\t", convert_to_float=True):
0644 """
0645 Extract a table of data from a txt file.
0646 E.g.
0647 header="Time VMEM PSS RSS Swap rchar wchar rbytes wbytes"
0648 or the first line in the file is
0649 Time VMEM PSS RSS Swap rchar wchar rbytes wbytes
0650 each of which will become keys in the dictionary, whose corresponding values are stored in lists, with the entries
0651 corresponding to the values in the rows of the input file.
0652
0653 The output dictionary will have the format
0654 {'Time': [ .. data from first row .. ], 'VMEM': [.. data from second row], ..}
0655
0656 :param filename: name of input text file, full path (string).
0657 :param header: header string.
0658 :param separator: separator character (char).
0659 :param convert_to_float: boolean, if True, all values will be converted to floats.
0660 :return: dictionary.
0661 """
0662
0663 tabledict = {}
0664 keylist = []
0665
0666 try:
0667 f = open_file(filename, 'r')
0668 except Exception as exc:
0669 logger.warning("failed to open file: %s, %s", filename, exc)
0670 else:
0671 firstline = True
0672 for line in f:
0673 fields = line.split(separator)
0674 if firstline:
0675 firstline = False
0676 tabledict, keylist = _define_tabledict_keys(header, fields, separator)
0677 if not header:
0678 continue
0679
0680
0681 i = 0
0682 for field in fields:
0683
0684 key = keylist[i]
0685
0686 if convert_to_float:
0687 try:
0688 field = float(field)
0689 except Exception as exc:
0690 logger.warning("failed to convert %s to float: %s (aborting)", field, exc)
0691 return None
0692 tabledict[key].append(field)
0693 i += 1
0694 f.close()
0695
0696 return tabledict
0697
0698
0699 def _define_tabledict_keys(header, fields, separator):
0700 """
0701 Define the keys for the tabledict dictionary.
0702 Note: this function is only used by parse_table_from_file().
0703
0704 :param header: header string.
0705 :param fields: header content string.
0706 :param separator: separator character (char).
0707 :return: tabledict (dictionary), keylist (ordered list with dictionary key names).
0708 """
0709
0710 tabledict = {}
0711 keylist = []
0712
0713 if not header:
0714
0715 for key in fields:
0716
0717 if key == '':
0718 continue
0719 if key.endswith('\n'):
0720 key = key[:-1]
0721 tabledict[key] = []
0722 keylist.append(key)
0723 else:
0724
0725 keys = header.split(separator)
0726 for key in keys:
0727 if key == '':
0728 continue
0729 if key.endswith('\n'):
0730 key = key[:-1]
0731 tabledict[key] = []
0732 keylist.append(key)
0733
0734 return tabledict, keylist
0735
0736
0737 def calculate_checksum(filename, algorithm='adler32'):
0738 """
0739 Calculate the checksum value for the given file.
0740 The default algorithm is adler32. Md5 is also be supported.
0741 Valid algorithms are 1) adler32/adler/ad32/ad, 2) md5/md5sum/md.
0742
0743 :param filename: file name (string).
0744 :param algorithm: optional algorithm string.
0745 :raises FileHandlingFailure, NotImplementedError: exception raised when file does not exist or for unknown algorithm.
0746 :return: checksum value (string).
0747 """
0748
0749 if not os.path.exists(filename):
0750 raise FileHandlingFailure('file does not exist: %s' % filename)
0751
0752 if algorithm == 'adler32' or algorithm == 'adler' or algorithm == 'ad' or algorithm == 'ad32':
0753 return calculate_adler32_checksum(filename)
0754 elif algorithm == 'md5' or algorithm == 'md5sum' or algorithm == 'md':
0755 return calculate_md5_checksum(filename)
0756 else:
0757 msg = 'unknown checksum algorithm: %s' % algorithm
0758 logger.warning(msg)
0759 raise NotImplementedError()
0760
0761
0762 def calculate_adler32_checksum(filename):
0763 """
0764 Calculate the adler32 checksum for the given file.
0765 The file is assumed to exist.
0766
0767 :param filename: file name (string).
0768 :return: checksum value (string).
0769 """
0770
0771 asum = 1
0772 blocksize = 64 * 1024 * 1024
0773
0774 with open(filename, 'rb') as f:
0775 while True:
0776 data = f.read(blocksize)
0777 if not data:
0778 break
0779 asum = adler32(data, asum)
0780 if asum < 0:
0781 asum += 2**32
0782
0783
0784 return "{0:08x}".format(asum)
0785
0786
0787 def calculate_md5_checksum(filename):
0788 """
0789 Calculate the md5 checksum for the given file.
0790 The file is assumed to exist.
0791
0792 :param filename: file name (string).
0793 :return: checksum value (string).
0794 """
0795
0796 length = io.DEFAULT_BUFFER_SIZE
0797 md5 = hashlib.md5()
0798
0799 with io.open(filename, mode="rb") as fd:
0800 for chunk in iter(lambda: fd.read(length), b''):
0801 md5.update(chunk)
0802
0803 return md5.hexdigest()
0804
0805
0806 def get_checksum_value(checksum):
0807 """
0808 Return the checksum value.
0809 The given checksum might either be a standard ad32 or md5 string, or a dictionary with the format
0810 { checksum_type: value } as defined in the `FileSpec` class. This function extracts the checksum value from this
0811 dictionary (or immediately returns the checksum value if the given value is a string).
0812
0813 :param checksum: checksum object (string or dictionary).
0814 :return: checksum. checksum string.
0815 """
0816
0817 if type(checksum) == str:
0818 return checksum
0819
0820 checksum_value = ''
0821 checksum_type = get_checksum_type(checksum)
0822
0823 if type(checksum) == dict:
0824 checksum_value = checksum.get(checksum_type)
0825
0826 return checksum_value
0827
0828
0829 def get_checksum_type(checksum):
0830 """
0831 Return the checksum type (ad32 or md5).
0832 The given checksum can be either be a standard ad32 or md5 value, or a dictionary with the format
0833 { checksum_type: value } as defined in the `FileSpec` class.
0834 In case the checksum type cannot be identified, the function returns 'unknown'.
0835
0836 :param checksum: checksum string or dictionary.
0837 :return: checksum type (string).
0838 """
0839
0840 checksum_type = 'unknown'
0841 if type(checksum) == dict:
0842 for key in list(checksum.keys()):
0843
0844 checksum_type = key
0845 break
0846 elif type(checksum) == str:
0847 if len(checksum) == 8:
0848 checksum_type = 'ad32'
0849 elif len(checksum) == 32:
0850 checksum_type = 'md5'
0851
0852 return checksum_type
0853
0854
0855 def scan_file(path, error_messages, warning_message=None):
0856 """
0857 Scan file for known error messages.
0858
0859 :param path: path to file (string).
0860 :param error_messages: list of error messages.
0861 :param warning_message: optional warning message to printed with any of the error_messages have been found (string).
0862 :return: Boolean. (note: True means the error was found)
0863 """
0864
0865 found_problem = False
0866
0867 matched_lines = grep(error_messages, path)
0868 if len(matched_lines) > 0:
0869 if warning_message:
0870 logger.warning(warning_message)
0871 for line in matched_lines:
0872 logger.info(line)
0873 found_problem = True
0874
0875 return found_problem
0876
0877
0878 def verify_file_list(list_of_files):
0879 """
0880 Make sure that the files in the given list exist, return the list of files that does exist.
0881
0882 :param list_of_files: file list.
0883 :return: list of existing files.
0884 """
0885
0886
0887 filtered_list = [f for f in list_of_files if os.path.exists(f)]
0888
0889 diff = diff_lists(list_of_files, filtered_list)
0890 if diff:
0891 logger.debug('found %d file(s) that do not exist (e.g. %s)', len(diff), diff[0])
0892
0893 return filtered_list
0894
0895
0896 def find_latest_modified_file(list_of_files):
0897 """
0898 Find the most recently modified file among the list of given files.
0899 In case int conversion of getmtime() fails, int(time.time()) will be returned instead.
0900
0901 :param list_of_files: list of files with full paths.
0902 :return: most recently updated file (string), modification time (int).
0903 """
0904
0905 if not list_of_files:
0906 logger.warning('there were no files to check mod time for')
0907 return None, None
0908
0909 try:
0910 latest_file = max(list_of_files, key=os.path.getmtime)
0911 mtime = int(os.path.getmtime(latest_file))
0912 except OSError as exc:
0913 logger.warning("int conversion failed for mod time: %s", exc)
0914 latest_file = ""
0915 mtime = None
0916
0917 return latest_file, mtime
0918
0919
0920 def dump(path, cmd="cat"):
0921 """
0922 Dump the content of the file in the given path to the log.
0923
0924 :param path: file path (string).
0925 :param cmd: optional command (string).
0926 :return: cat (string).
0927 """
0928
0929 if os.path.exists(path) or cmd == "echo":
0930 _cmd = "%s %s" % (cmd, path)
0931 exit_code, stdout, stderr = execute(_cmd)
0932 logger.info("%s:\n%s", _cmd, stdout + stderr)
0933 else:
0934 logger.info("path %s does not exist", path)
0935
0936
0937 def establish_logging(debug=True, nopilotlog=False, filename=config.Pilot.pilotlog, loglevel=0):
0938 """
0939 Setup and establish logging.
0940
0941 Option loglevel can be used to decide which (predetermined) logging format to use.
0942 Example:
0943 loglevel=0: '%(asctime)s | %(levelname)-8s | %(name)-32s | %(funcName)-25s | %(message)s'
0944 loglevel=1: 'ts=%(asctime)s level=%(levelname)-8s event=%(name)-32s.%(funcName)-25s msg="%(message)s"'
0945
0946 :param debug: debug mode (Boolean),
0947 :param nopilotlog: True when pilot log is not known (Boolean).
0948 :param filename: name of log file (string).
0949 :param loglevel: selector for logging level (int).
0950 :return:
0951 """
0952
0953 _logger = logging.getLogger('')
0954 _logger.handlers = []
0955 _logger.propagate = False
0956
0957 console = logging.StreamHandler(sys.stdout)
0958 if debug:
0959 format_str = '%(asctime)s | %(levelname)-8s | %(name)-32s | %(funcName)-25s | %(message)s'
0960 level = logging.DEBUG
0961 else:
0962 format_str = '%(asctime)s | %(levelname)-8s | %(message)s'
0963 level = logging.INFO
0964
0965
0966
0967 if nopilotlog:
0968 logging.basicConfig(level=level, format=format_str, filemode='w')
0969 else:
0970 logging.basicConfig(filename=filename, level=level, format=format_str, filemode='w')
0971 console.setLevel(level)
0972 console.setFormatter(logging.Formatter(format_str))
0973 logging.Formatter.converter = time.gmtime
0974
0975 _logger.addHandler(console)
0976
0977
0978 def remove_core_dumps(workdir, pid=None):
0979 """
0980 Remove any remaining core dumps so they do not end up in the log tarball
0981
0982 A core dump from the payload process should not be deleted if in debug mode (checked by the called). Also,
0983 a found core dump from a non-payload process, should be removed but should result in function returning False.
0984
0985 :param workdir: working directory for payload (string).
0986 :param pid: payload pid (integer).
0987 :return: Boolean (True if a payload core dump is found)
0988 """
0989
0990 found = False
0991
0992 coredumps = glob("%s/core.*" % workdir) + glob("%s/core" % workdir)
0993 if coredumps:
0994 for coredump in coredumps:
0995 if pid and os.path.basename(coredump) == "core.%d" % pid:
0996 found = True
0997 logger.info("removing core dump: %s", str(coredump))
0998 remove(coredump)
0999
1000 return found
1001
1002
1003 def get_nonexistant_path(fname_path):
1004 """
1005 Get the path to a filename which does not exist by incrementing path.
1006
1007 :param fname_path: file name path (string).
1008 :return: file name path (string).
1009 """
1010
1011 if not os.path.exists(fname_path):
1012 return fname_path
1013 filename, file_extension = os.path.splitext(fname_path)
1014 i = 1
1015 new_fname = "{}-{}{}".format(filename, i, file_extension)
1016 while os.path.exists(new_fname):
1017 i += 1
1018 new_fname = "{}-{}{}".format(filename, i, file_extension)
1019 return new_fname
1020
1021
1022 def update_extension(path='', extension=''):
1023 """
1024 Update the file name extension to the given extension.
1025
1026 :param path: file path (string).
1027 :param extension: new extension (string).
1028 :return: file path with new extension (string).
1029 """
1030
1031 path, old_extension = os.path.splitext(path)
1032 if not extension.startswith('.'):
1033 extension = '.' + extension
1034 path += extension
1035
1036 return path
1037
1038
1039 def get_valid_path_from_list(paths):
1040 """
1041 Return the first valid path from the given list.
1042
1043 :param paths: list of file paths.
1044 :return: first valid path from list (string).
1045 """
1046
1047 valid_path = None
1048 for path in paths:
1049 if os.path.exists(path):
1050 valid_path = path
1051 break
1052
1053 return valid_path
1054
1055
1056 def copy_pilot_source(workdir):
1057 """
1058 Copy the pilot source into the work directory.
1059
1060 :param workdir: working directory (string).
1061 :return: diagnostics (string).
1062 """
1063
1064 diagnostics = ""
1065 srcdir = os.path.join(os.environ.get('PILOT_SOURCE_DIR', '.'), 'pilot2')
1066 try:
1067 logger.debug('copy %s to %s', srcdir, workdir)
1068 cmd = 'cp -r %s/* %s' % (srcdir, workdir)
1069 exit_code, stdout, stderr = execute(cmd)
1070 if exit_code != 0:
1071 diagnostics = 'file copy failed: %d, %s' % (exit_code, stdout)
1072 logger.warning(diagnostics)
1073 except Exception as exc:
1074 diagnostics = 'exception caught when copying pilot2 source: %s' % exc
1075 logger.warning(diagnostics)
1076
1077 return diagnostics
1078
1079
1080 def create_symlink(from_path='', to_path=''):
1081 """
1082 Create a symlink from/to the given paths.
1083
1084 :param from_path: from path (string).
1085 :param to_path: to path (string).
1086 """
1087
1088 try:
1089 os.symlink(from_path, to_path)
1090 except Exception as exc:
1091 logger.warning('failed to create symlink from %s to %s: %s', from_path, to_path, exc)
1092 else:
1093 logger.debug('created symlink from %s to %s', from_path, to_path)
1094
1095
1096 def locate_file(pattern):
1097 """
1098 Locate a file defined by the pattern.
1099
1100 Example:
1101 pattern = os.path.join(os.getcwd(), '**/core.123')
1102 -> /Users/Paul/Development/python/tt/core.123
1103
1104 :param pattern: pattern name (string).
1105 :return: path (string).
1106 """
1107
1108 path = None
1109 for fname in glob(pattern):
1110 if os.path.isfile(fname):
1111 path = fname
1112
1113 return path
1114
1115
1116 def find_last_line(filename):
1117 """
1118 Find the last line in a (not too large) file.
1119
1120 :param filename: file name, full path (string).
1121 :return: last line (string).
1122 """
1123
1124 last_line = ""
1125 with open(filename) as f:
1126 for line in f:
1127 pass
1128 last_line = line
1129
1130 return last_line
1131
1132
1133 def get_disk_usage(start_path='.'):
1134 """
1135 Calculate the disk usage of the given directory (including any sub-directories).
1136
1137 :param start_path: directory (string).
1138 :return: disk usage in bytes (int).
1139 """
1140
1141 total_size = 0
1142 for dirpath, dirnames, filenames in os.walk(start_path):
1143 for f in filenames:
1144 fp = os.path.join(dirpath, f)
1145
1146 if os.path.exists(fp) and not os.path.islink(fp):
1147 try:
1148 total_size += os.path.getsize(fp)
1149 except Exception as exc:
1150 logger.warning('caught exception: %s (skipping this file)', exc)
1151 continue
1152 return total_size