Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 08:38:53

0001 #!/usr/bin/env python
0002 # Licensed under the Apache License, Version 2.0 (the "License");
0003 # you may not use this file except in compliance with the License.
0004 # You may obtain a copy of the License at
0005 # http://www.apache.org/licenses/LICENSE-2.0
0006 #
0007 # Authors:
0008 # - Wen Guan, wen.guan@cern.ch, 2017-2018
0009 # - Paul Nilsson, paul.nilsson@cern.ch, 2017-2019
0010 
0011 """
0012 Exceptions in pilot
0013 """
0014 
0015 import time
0016 import threading
0017 import traceback
0018 from sys import exc_info, version_info
0019 
0020 from .errorcodes import ErrorCodes
0021 errors = ErrorCodes()
0022 
0023 
0024 def is_python3():
0025     """
0026     Check if we are running on Python 3.
0027 
0028     :return: boolean.
0029     """
0030 
0031     return version_info >= (3, 0)
0032 
0033 
0034 class PilotException(Exception):
0035     """
0036     The basic exception class.
0037     The pilot error code can be defined here, where the pilot error code will
0038     be propageted to job server.
0039     """
0040 
0041     def __init__(self, *args, **kwargs):
0042         super(PilotException, self).__init__(args, kwargs)
0043         self.args = args
0044         self.kwargs = kwargs
0045         code = self.kwargs.get('code', None)
0046         if code:
0047             self._errorCode = code
0048         else:
0049             self._errorCode = errors.UNKNOWNEXCEPTION
0050         self._message = errors.get_error_message(self._errorCode)
0051         self._error_string = None
0052         self._stack_trace = "%s" % traceback.format_exc()
0053 
0054     def __str__(self):
0055         try:
0056             self._error_string = "error code: %s, message: %s" % (self._errorCode, self._message % self.kwargs)
0057         except Exception:
0058             # at least get the core message out if something happened
0059             self._error_string = "error code: %s, message: %s" % (self._errorCode, self._message)
0060 
0061         if len(self.args) > 0:
0062             # If there is a non-kwarg parameter, assume it's the error
0063             # message or reason description and tack it on to the end
0064             # of the exception message
0065             # Convert all arguments into their string representations...
0066             try:
0067                 args = ["%s" % arg for arg in self.args if arg]
0068             except Exception:
0069                 args = ["{}".format(self.args)]
0070             self._error_string = (self._error_string + "\ndetails: %s" % '\n'.join(args))
0071         return self._error_string.strip()
0072 
0073     def get_detail(self):
0074         try:
0075             self._error_string = "error code: %s, message: %s" % (self._errorCode, self._message % self.kwargs)
0076         except Exception:
0077             # at least get the core message out if something happened
0078             self._error_string = "error code: %s, message: %s" % (self._errorCode, self._message)
0079 
0080         return self._error_string + "\nstacktrace: %s" % self._stack_trace
0081 
0082     def get_error_code(self):
0083         return self._errorCode
0084 
0085     def get_last_error(self):
0086         if self.args:
0087             return self.args[-1]
0088         return self._message
0089 
0090 
0091 #class NotImplementedError(PilotException):
0092 #    """
0093 #    Not implemented exception.
0094 #    """
0095 #    def __init__(self, *args, **kwargs):
0096 #        super(NotImplementedError, self).__init__(args, kwargs)
0097 #        self._errorCode = errors.NOTIMPLEMENTED
0098 #        self._message = errors.get_error_message(self._errorCode)
0099 
0100 
0101 class UnknownException(PilotException):
0102     """
0103     Unknown exception.
0104     """
0105     def __init__(self, *args, **kwargs):
0106         super(UnknownException, self).__init__(args, kwargs)
0107         self._errorCode = errors.UNKNOWNEXCEPTION
0108         self._message = errors.get_error_message(self._errorCode)
0109 
0110 
0111 class NoLocalSpace(PilotException):
0112     """
0113     Not enough local space.
0114     """
0115     def __init__(self, *args, **kwargs):
0116         super(NoLocalSpace, self).__init__(args, kwargs)
0117         self._errorCode = errors.NOLOCALSPACE
0118         self._message = errors.get_error_message(self._errorCode)
0119 
0120 
0121 class SizeTooLarge(PilotException):
0122     """
0123     Too large input files.
0124     """
0125     def __init__(self, *args, **kwargs):
0126         super(SizeTooLarge, self).__init__(args, kwargs)
0127         self._errorCode = errors.SIZETOOLARGE
0128         self._message = errors.get_error_message(self._errorCode)
0129 
0130 
0131 class StageInFailure(PilotException):
0132     """
0133     Failed to stage-in file.
0134     """
0135     def __init__(self, *args, **kwargs):
0136         super(StageInFailure, self).__init__(args, kwargs)
0137         self._errorCode = errors.STAGEINFAILED
0138         self._message = errors.get_error_message(self._errorCode)
0139 
0140 
0141 class StageOutFailure(PilotException):
0142     """
0143     Failed to stage-out file.
0144     """
0145     def __init__(self, *args, **kwargs):
0146         super(StageOutFailure, self).__init__(args, kwargs)
0147         self._errorCode = errors.STAGEOUTFAILED
0148         self._message = errors.get_error_message(self._errorCode)
0149 
0150 
0151 class SetupFailure(PilotException):
0152     """
0153     Failed to setup environment.
0154     """
0155     def __init__(self, *args, **kwargs):
0156         super(SetupFailure, self).__init__(args, kwargs)
0157         self._errorCode = errors.SETUPFAILURE
0158         self._message = errors.get_error_message(self._errorCode)
0159 
0160 
0161 class RunPayloadFailure(PilotException):
0162     """
0163     Failed to execute payload.
0164     """
0165     def __init__(self, *args, **kwargs):
0166         super(RunPayloadFailure, self).__init__(args, kwargs)
0167         self._errorCode = errors.PAYLOADEXECUTIONFAILURE
0168         self._message = errors.get_error_message(self._errorCode)
0169 
0170 
0171 class MessageFailure(PilotException):
0172     """
0173     Failed to handle messages.
0174     """
0175     def __init__(self, *args, **kwargs):
0176         super(MessageFailure, self).__init__(args, kwargs)
0177         self._errorCode = errors.MESSAGEHANDLINGFAILURE
0178         self._message = errors.get_error_message(self._errorCode)
0179 
0180 
0181 class CommunicationFailure(PilotException):
0182     """
0183     Failed to communicate with servers such as Panda, Harvester, ACT and so on.
0184     """
0185     def __init__(self, *args, **kwargs):
0186         super(CommunicationFailure, self).__init__(args, kwargs)
0187         self._errorCode = errors.COMMUNICATIONFAILURE
0188         self._message = errors.get_error_message(self._errorCode)
0189 
0190 
0191 class FileHandlingFailure(PilotException):
0192     """
0193     Failed during file handling.
0194     """
0195     def __init__(self, *args, **kwargs):
0196         super(FileHandlingFailure, self).__init__(args, kwargs)
0197         self._errorCode = errors.FILEHANDLINGFAILURE
0198         self._message = errors.get_error_message(self._errorCode)
0199 
0200 
0201 class NoSuchFile(PilotException):
0202     """
0203     No such file or directory.
0204     """
0205     def __init__(self, *args, **kwargs):
0206         super(NoSuchFile, self).__init__(args, kwargs)
0207         self._errorCode = errors.NOSUCHFILE
0208         self._message = errors.get_error_message(self._errorCode)
0209 
0210 
0211 class ConversionFailure(PilotException):
0212     """
0213     Failed to convert object data.
0214     """
0215     def __init__(self, *args, **kwargs):
0216         super(ConversionFailure, self).__init__(args, kwargs)
0217         self._errorCode = errors.CONVERSIONFAILURE
0218         self._message = errors.get_error_message(self._errorCode)
0219 
0220 
0221 class MKDirFailure(PilotException):
0222     """
0223     Failed to create local directory.
0224     """
0225     def __init__(self, *args, **kwargs):
0226         super(MKDirFailure, self).__init__(args, kwargs)
0227         self._errorCode = errors.MKDIR
0228         self._message = errors.get_error_message(self._errorCode)
0229 
0230 
0231 class NoGridProxy(PilotException):
0232     """
0233     Grid proxy not valid.
0234     """
0235     def __init__(self, *args, **kwargs):
0236         super(NoGridProxy, self).__init__(args, kwargs)
0237         self._errorCode = errors.NOPROXY
0238         self._message = errors.get_error_message(self._errorCode)
0239 
0240 
0241 class NoVomsProxy(PilotException):
0242     """
0243     Voms proxy not valid.
0244     """
0245     def __init__(self, *args, **kwargs):
0246         super(NoVomsProxy, self).__init__(args, kwargs)
0247         self._errorCode = errors.NOVOMSPROXY
0248         self._message = errors.get_error_message(self._errorCode)
0249 
0250 
0251 class TrfDownloadFailure(PilotException):
0252     """
0253     Transform could not be downloaded.
0254     """
0255     def __init__(self, *args, **kwargs):
0256         super(TrfDownloadFailure, self).__init__(args, kwargs)
0257         self._errorCode = errors.TRFDOWNLOADFAILURE
0258         self._message = errors.get_error_message(self._errorCode)
0259 
0260 
0261 class NotDefined(PilotException):
0262     """
0263     Not defined exception.
0264     """
0265     def __init__(self, *args, **kwargs):
0266         super(NotDefined, self).__init__(args, kwargs)
0267         self._errorCode = errors.NOTDEFINED
0268         self._message = errors.get_error_message(self._errorCode)
0269 
0270 
0271 class NotSameLength(PilotException):
0272     """
0273     Not same length exception.
0274     """
0275     def __init__(self, *args, **kwargs):
0276         super(NotSameLength, self).__init__(args, kwargs)
0277         self._errorCode = errors.NOTSAMELENGTH
0278         self._message = errors.get_error_message(self._errorCode)
0279 
0280 
0281 class ESRecoverable(PilotException):
0282     """
0283     Eventservice recoverable exception.
0284     """
0285     def __init__(self, *args, **kwargs):
0286         super(ESRecoverable, self).__init__(args, kwargs)
0287         self._errorCode = errors.ESRECOVERABLE
0288         self._message = errors.get_error_message(self._errorCode)
0289 
0290 
0291 class ESFatal(PilotException):
0292     """
0293     Eventservice fatal exception.
0294     """
0295     def __init__(self, *args, **kwargs):
0296         super(ESFatal, self).__init__(args, kwargs)
0297         self._errorCode = errors.ESFATAL
0298         self._message = errors.get_error_message(self._errorCode)
0299 
0300 
0301 class ExecutedCloneJob(PilotException):
0302     """
0303     Clone job executed exception.
0304     """
0305     def __init__(self, *args, **kwargs):
0306         super(ExecutedCloneJob, self).__init__(args, kwargs)
0307         self._errorCode = errors.EXECUTEDCLONEJOB
0308         self._message = errors.get_error_message(self._errorCode)
0309 
0310 
0311 class ESNoEvents(PilotException):
0312     """
0313     Eventservice no events exception.
0314     """
0315     def __init__(self, *args, **kwargs):
0316         super(ESNoEvents, self).__init__(args, kwargs)
0317         self._errorCode = errors.ESNOEVENTS
0318         self._message = errors.get_error_message(self._errorCode)
0319 
0320 
0321 class ExceededMaxWaitTime(PilotException):
0322     """
0323     Exceeded maximum waiting time (after abort_job has been set).
0324     """
0325     def __init__(self, *args, **kwargs):
0326         super(ExceededMaxWaitTime, self).__init__(args, kwargs)
0327         self._errorCode = errors.EXCEEDEDMAXWAITTIME
0328         self._message = errors.get_error_message(self._errorCode)
0329 
0330 
0331 class BadXML(PilotException):
0332     """
0333     Badly formed XML.
0334     """
0335     def __init__(self, *args, **kwargs):
0336         super(BadXML, self).__init__(args, kwargs)
0337         self._errorCode = errors.BADXML
0338         self._message = errors.get_error_message(self._errorCode)
0339 
0340 
0341 class NoSoftwareDir(PilotException):
0342     """
0343     Software applications directory does not exist.
0344     """
0345     def __init__(self, *args, **kwargs):
0346         super(NoSoftwareDir, self).__init__(args, kwargs)
0347         self._errorCode = errors.NOSOFTWAREDIR
0348         self._message = errors.get_error_message(self._errorCode)
0349 
0350 
0351 class LogFileCreationFailure(PilotException):
0352     """
0353     Log file could not be created.
0354     """
0355     def __init__(self, *args, **kwargs):
0356         super(LogFileCreationFailure, self).__init__(args, kwargs)
0357         self._errorCode = errors.LOGFILECREATIONFAILURE
0358         self._message = errors.get_error_message(self._errorCode)
0359 
0360 
0361 class QueuedataFailure(PilotException):
0362     """
0363     Failed to download queuedata.
0364     """
0365     def __init__(self, *args, **kwargs):
0366         super(QueuedataFailure, self).__init__(args, kwargs)
0367         self._errorCode = errors.QUEUEDATA
0368         self._message = errors.get_error_message(self._errorCode)
0369 
0370 
0371 class QueuedataNotOK(PilotException):
0372     """
0373     Corrupt queuedata.
0374     """
0375     def __init__(self, *args, **kwargs):
0376         super(QueuedataNotOK, self).__init__(args, kwargs)
0377         self._errorCode = errors.QUEUEDATANOTOK
0378         self._message = errors.get_error_message(self._errorCode)
0379 
0380 
0381 class ReplicasNotFound(PilotException):
0382     """
0383     No matching replicas were found in list_replicas() output.
0384     """
0385     def __init__(self, *args, **kwargs):
0386         super(ReplicasNotFound, self).__init__(args, kwargs)
0387         self._errorCode = errors.NOREPLICAS
0388         self._message = errors.get_error_message(self._errorCode)
0389 
0390 
0391 class JobAlreadyRunning(PilotException):
0392     """
0393     Job is already running elsewhere.
0394     """
0395     def __init__(self, *args, **kwargs):
0396         super(JobAlreadyRunning, self).__init__(args, kwargs)
0397         self._errorCode = errors.JOBALREADYRUNNING
0398         self._message = errors.get_error_message(self._errorCode)
0399 
0400     def __str__(self):
0401         return "%s: %s, timeout=%s seconds%s" % (self.__class__.__name__, self._message, self._timeout, ' : %s' % repr(self.args) if self.args else '')
0402 
0403 
0404 class ExcThread(threading.Thread):
0405     """
0406     Support class that allows for catching exceptions in threads.
0407     """
0408 
0409     def __init__(self, bucket, target, kwargs, name):
0410         """
0411         Init function with a bucket that can be used to communicate exceptions to the caller.
0412         The bucket is a Queue.queue() or queue.Queue() object that can hold an exception thrown by a thread.
0413 
0414         :param bucket: queue based bucket.
0415         :param target: target function to execute.
0416         :param kwargs: target function options.
0417         """
0418         threading.Thread.__init__(self, target=target, kwargs=kwargs, name=name)
0419         self.name = name
0420         self.bucket = bucket
0421 
0422     def run(self):
0423         """
0424         Thread run function.
0425         Any exceptions in the threads are caught in this function and placed in the bucket of the current thread.
0426         The bucket will be emptied by the control module that launched the thread. E.g. an exception is thrown in
0427         the retrieve thread (in function retrieve()) that is created by the job.control thread. The exception is caught
0428         by the run() function and placed in the bucket belonging to the retrieve thread. The bucket is emptied in
0429         job.control().
0430 
0431         :return:
0432         """
0433         try:
0434             if is_python3():
0435                 self._target(**self._kwargs)
0436             else:
0437                 self._Thread__target(**self._Thread__kwargs)
0438         except Exception:
0439             # logger object can't be used here for some reason:
0440             # IOError: [Errno 2] No such file or directory: '/state/partition1/scratch/PanDA_Pilot2_*/pilotlog.txt'
0441             print('exception caught by thread run() function: %s' % str(exc_info()))
0442             print(traceback.format_exc())
0443             print(traceback.print_tb(exc_info()[2]))
0444             self.bucket.put(exc_info())
0445             print("exception has been put in bucket queue belonging to thread \'%s\'" % self.name)
0446             if is_python3():
0447                 args = self._kwargs.get('args', None)
0448             else:
0449                 args = self._Thread__kwargs.get('args', None)
0450             if args:
0451                 # the sleep is needed to allow the threads to catch up
0452                 print('setting graceful stop in 10 s since there is no point in continuing')
0453                 time.sleep(10)
0454                 args.graceful_stop.set()
0455 
0456     def get_bucket(self):
0457         """
0458         Return the bucket object that holds any information about thrown exceptions.
0459 
0460         :return: bucket (Queue object)
0461         """
0462         return self.bucket