File indexing completed on 2026-04-09 08:38:56
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 from pilot.common.exception import NotDefined
0011
0012 from decimal import Decimal
0013 from re import split, sub
0014
0015 import logging
0016 logger = logging.getLogger(__name__)
0017
0018 SYMBOLS = {
0019 'customary': ('B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'),
0020 'customary_ext': ('byte', 'kilo', 'mega', 'giga', 'tera', 'peta', 'exa', 'zetta', 'iotta'),
0021
0022 'iec': ('Bi', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi'),
0023 'iec_ext': ('byte', 'kibi', 'mebi', 'gibi', 'tebi', 'pebi', 'exbi', 'zebi', 'yobi'),
0024 }
0025
0026
0027 def mean(data):
0028 """
0029 Return the sample arithmetic mean of data.
0030
0031 :param data: list of floats or ints.
0032 :return: mean value (float).
0033 """
0034
0035 n = len(data)
0036 if n < 1:
0037 raise ValueError('mean requires at least one data point')
0038
0039
0040 return sum(data) / float(n)
0041
0042
0043 def sum_square_dev(data):
0044 """
0045 Return sum of square deviations of sequence data.
0046 Sum (x - x_mean)**2
0047
0048 :param data: list of floats or ints.
0049 :return: sum of squares (float).
0050 """
0051
0052 c = mean(data)
0053
0054 return sum((x - c) ** 2 for x in data)
0055
0056
0057 def sum_dev(x, y):
0058 """
0059 Return sum of deviations of sequence data.
0060 Sum (x - x_mean)**(y - y_mean)
0061
0062 :param x: list of ints or floats.
0063 :param y: list of ints or floats.
0064 :return: sum of deviations (float).
0065 """
0066
0067 c1 = mean(x)
0068 c2 = mean(y)
0069
0070 return sum((_x - c1) * (_y - c2) for _x, _y in zip(x, y))
0071
0072
0073 def chi2(observed, expected):
0074 """
0075 Return the chi2 sum of the provided observed and expected values.
0076
0077 :param observed: list of floats.
0078 :param expected: list of floats.
0079 :return: chi2 (float).
0080 """
0081
0082 if 0 in expected:
0083 return 0.0
0084
0085 return sum((_o - _e) ** 2 / _e ** 2 for _o, _e in zip(observed, expected))
0086
0087
0088 def float_to_rounded_string(num, precision=3):
0089 """
0090 Convert float to a string with a desired number of digits (the precision).
0091 E.g. num=3.1415, precision=2 -> '3.14'.
0092
0093 :param num: number to be converted (float).
0094 :param precision: number of desired digits (int)
0095 :raises NotDefined: for undefined precisions and float conversions to Decimal.
0096 :return: rounded string.
0097 """
0098
0099 try:
0100 _precision = Decimal(10) ** -precision
0101 except Exception as e:
0102 raise NotDefined('failed to define precision=%s: %e' % (str(precision), e))
0103
0104 try:
0105 s = Decimal(str(num)).quantize(_precision)
0106 except Exception as e:
0107 raise NotDefined('failed to convert %s to Decimal: %s' % (str(num), e))
0108
0109 return str(s)
0110
0111
0112 def tryint(x):
0113 """
0114 Used by numbered string comparison (to protect against unexpected letters in version number).
0115
0116 :param x: possible int.
0117 :return: converted int or original value in case of ValueError.
0118 """
0119
0120 try:
0121 return int(x)
0122 except ValueError:
0123 return x
0124
0125
0126 def split_version(s):
0127 """
0128 Split version string into parts and convert the parts into integers when possible.
0129 Any encountered strings are left as they are.
0130 The function is used with release strings.
0131 split_version("1.2.3") = (1,2,3)
0132 split_version("1.2.Nightly") = (1,2,"Nightly")
0133
0134 The function can also be used for sorting:
0135 > names = ['YT4.11', '4.3', 'YT4.2', '4.10', 'PT2.19', 'PT2.9']
0136 > sorted(names, key=splittedname)
0137 ['4.3', '4.10', 'PT2.9', 'PT2.19', 'YT4.2', 'YT4.11']
0138
0139 :param s: release string.
0140 :return: converted release tuple.
0141 """
0142
0143 return tuple(tryint(x) for x in split('([^.]+)', s))
0144
0145
0146 def is_greater_or_equal(a, b):
0147 """
0148 Is the numbered string a >= b?
0149 "1.2.3" > "1.2" -- more digits
0150 "1.2.3" > "1.2.2" -- rank based comparison
0151 "1.3.2" > "1.2.3" -- rank based comparison
0152 "1.2.N" > "1.2.2" -- nightlies checker, always greater
0153
0154 :param a: numbered string.
0155 :param b: numbered string.
0156 :return: boolean.
0157 """
0158
0159 return split_version(a) >= split_version(b)
0160
0161
0162 def add_lists(list1, list2):
0163 """
0164 Add list1 and list2 and remove any duplicates.
0165 Example:
0166 list1=[1,2,3,4]
0167 list2=[3,4,5,6]
0168 add_lists(list1, list2) = [1, 2, 3, 4, 5, 6]
0169
0170 :param list1: input list 1
0171 :param list2: input list 2
0172 :return: added lists with removed duplicates
0173 """
0174 return list1 + list(set(list2) - set(list1))
0175
0176
0177 def convert_mb_to_b(size):
0178 """
0179 Convert value from MB to B for the given size variable.
0180 If the size is a float, the function will convert it to int.
0181
0182 :param size: size in MB (float or int).
0183 :return: size in B (int).
0184 :raises: ValueError for conversion error.
0185 """
0186
0187 try:
0188 size = int(size)
0189 except Exception as e:
0190 raise ValueError('cannot convert %s to int: %s' % (str(size), e))
0191
0192 return size * 1024 ** 2
0193
0194
0195 def diff_lists(list_a, list_b):
0196 """
0197 Return the difference between list_a and list_b.
0198
0199 :param list_a: input list a.
0200 :param list_b: input list b.
0201 :return: difference (list).
0202 """
0203
0204 return list(set(list_a) - set(list_b))
0205
0206
0207 def bytes2human(n, _format='%(value).1f %(symbol)s', symbols='customary'):
0208 """
0209 Convert n bytes into a human readable string based on format.
0210 symbols can be either "customary", "customary_ext", "iec" or "iec_ext",
0211 see: http://goo.gl/kTQMs
0212
0213 >>> bytes2human(0)
0214 '0.0 B'
0215 >>> bytes2human(0.9)
0216 '0.0 B'
0217 >>> bytes2human(1)
0218 '1.0 B'
0219 >>> bytes2human(1.9)
0220 '1.0 B'
0221 >>> bytes2human(1024)
0222 '1.0 K'
0223 >>> bytes2human(1048576)
0224 '1.0 M'
0225 >>> bytes2human(1099511627776127398123789121)
0226 '909.5 Y'
0227
0228 >>> bytes2human(9856, symbols="customary")
0229 '9.6 K'
0230 >>> bytes2human(9856, symbols="customary_ext")
0231 '9.6 kilo'
0232 >>> bytes2human(9856, symbols="iec")
0233 '9.6 Ki'
0234 >>> bytes2human(9856, symbols="iec_ext")
0235 '9.6 kibi'
0236
0237 >>> bytes2human(10000, "%(value).1f %(symbol)s/sec")
0238 '9.8 K/sec'
0239
0240 >>> # precision can be adjusted by playing with %f operator
0241 >>> bytes2human(10000, _format="%(value).5f %(symbol)s")
0242 '9.76562 K'
0243 """
0244 n = int(n)
0245 if n < 0:
0246 raise ValueError("n < 0")
0247 symbols = SYMBOLS[symbols]
0248 prefix = {}
0249 for i, s in enumerate(symbols[1:]):
0250 prefix[s] = 1 << (i + 1) * 10
0251 for symbol in reversed(symbols[1:]):
0252 if n >= prefix[symbol]:
0253 value = float(n) / prefix[symbol]
0254 return _format % locals()
0255 return _format % dict(symbol=symbols[0], value=n)
0256
0257
0258 def human2bytes(s, divider=None):
0259 """
0260 Attempts to guess the string format based on default symbols
0261 set and return the corresponding bytes as an integer.
0262 When unable to recognize the format ValueError is raised.
0263
0264 If no digit passed, only a letter, it is interpreted as a one of a kind. Eg "KB" = "1 KB".
0265 If no letter passed, it is assumed to be in bytes. Eg "512" = "512 B"
0266
0267 The second argument is used to convert to another magnitude (eg return not bytes but KB).
0268 It can be interpreted as a cluster size. Eg "512 B", or "0.2 K".
0269
0270 >>> human2bytes('0 B')
0271 0
0272 >>> human2bytes('3')
0273 3
0274 >>> human2bytes('K')
0275 1024
0276 >>> human2bytes('1 K')
0277 1024
0278 >>> human2bytes('1 M')
0279 1048576
0280 >>> human2bytes('1 Gi')
0281 1073741824
0282 >>> human2bytes('1 tera')
0283 1099511627776
0284
0285 >>> human2bytes('0.5kilo')
0286 512
0287 >>> human2bytes('0.1 byte')
0288 0
0289 >>> human2bytes('1 k') # k is an alias for K
0290 1024
0291 >>> human2bytes('12 foo')
0292 Traceback (most recent call last):
0293 ...
0294 ValueError: can't interpret '12 foo'
0295
0296 >>> human2bytes('1 M', 'K')
0297 1024
0298 >>> human2bytes('2 G', 'M')
0299 2048
0300 >>> human2bytes('G', '2M')
0301 512
0302 """
0303 init = s
0304 num = ""
0305 while s and s[0:1].isdigit() or s[0:1] == '.':
0306 num += s[0]
0307 s = s[1:]
0308
0309 if len(num) == 0:
0310 num = "1"
0311 num = float(num)
0312 letter = s.strip()
0313 letter = sub(r'(?i)(?<=.)(bi?|bytes?)$', "", letter)
0314 if len(letter) == 0:
0315 letter = "B"
0316
0317 for name, sset in list(SYMBOLS.items()):
0318 if letter in sset:
0319 break
0320 else:
0321 if letter == 'k':
0322
0323 sset = SYMBOLS['customary']
0324 letter = letter.upper()
0325 else:
0326 raise ValueError("can't interpret %r" % init)
0327 prefix = {sset[0]: 1}
0328 for i, s in enumerate(sset[1:]):
0329 prefix[s] = 1 << (i + 1) * 10
0330
0331 div = 1 if divider is None else human2bytes(divider)
0332 return int(num * prefix[letter] / div)