pilot/util/math.py

0001 #!/usr/bin/env python
0002 # Licensed under the Apache License, Version 2.0 (the "License");
0003 # you may not use this file except in compliance with the License.
0004 # You may obtain a copy of the License at
0005 # http://www.apache.org/licenses/LICENSE-2.0
0006 #
0007 # Authors:
0008 # - Paul Nilsson, paul.nilsson@cern.ch, 2018-2020
0009
0010 from pilot.common.exception import NotDefined
0011
0012 from decimal import Decimal
0013 from re import split, sub
0014
0015 import logging
0016 logger = logging.getLogger(__name__)
0017
0018 SYMBOLS = {
0019     'customary': ('B', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'),
0020     'customary_ext': ('byte', 'kilo', 'mega', 'giga', 'tera', 'peta', 'exa', 'zetta', 'iotta'),
0021
0022     'iec': ('Bi', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi', 'Yi'),
0023     'iec_ext': ('byte', 'kibi', 'mebi', 'gibi', 'tebi', 'pebi', 'exbi', 'zebi', 'yobi'),
0024 }
0025
0026
0027 def mean(data):
0028     """
0029     Return the sample arithmetic mean of data.
0030
0031     :param data: list of floats or ints.
0032     :return: mean value (float).
0033     """
0034
0035     n = len(data)
0036     if n < 1:
0037         raise ValueError('mean requires at least one data point')
0038
0039     # return sum(data)/n # in Python 2 use sum(data)/float(n)
0040     return sum(data) / float(n)
0041
0042
0043 def sum_square_dev(data):
0044     """
0045     Return sum of square deviations of sequence data.
0046     Sum (x - x_mean)**2
0047
0048     :param data: list of floats or ints.
0049     :return: sum of squares (float).
0050     """
0051
0052     c = mean(data)
0053
0054     return sum((x - c) ** 2 for x in data)
0055
0056
0057 def sum_dev(x, y):
0058     """
0059     Return sum of deviations of sequence data.
0060     Sum (x - x_mean)**(y - y_mean)
0061
0062     :param x: list of ints or floats.
0063     :param y:  list of ints or floats.
0064     :return: sum of deviations (float).
0065     """
0066
0067     c1 = mean(x)
0068     c2 = mean(y)
0069
0070     return sum((_x - c1) * (_y - c2) for _x, _y in zip(x, y))
0071
0072
0073 def chi2(observed, expected):
0074     """
0075     Return the chi2 sum of the provided observed and expected values.
0076
0077     :param observed: list of floats.
0078     :param expected: list of floats.
0079     :return: chi2 (float).
0080     """
0081
0082     if 0 in expected:
0083         return 0.0
0084
0085     return sum((_o - _e) ** 2 / _e ** 2 for _o, _e in zip(observed, expected))
0086
0087
0088 def float_to_rounded_string(num, precision=3):
0089     """
0090     Convert float to a string with a desired number of digits (the precision).
0091     E.g. num=3.1415, precision=2 -> '3.14'.
0092
0093     :param num: number to be converted (float).
0094     :param precision: number of desired digits (int)
0095     :raises NotDefined: for undefined precisions and float conversions to Decimal.
0096     :return: rounded string.
0097     """
0098
0099     try:
0100         _precision = Decimal(10) ** -precision
0101     except Exception as e:
0102         raise NotDefined('failed to define precision=%s: %e' % (str(precision), e))
0103
0104     try:
0105         s = Decimal(str(num)).quantize(_precision)
0106     except Exception as e:
0107         raise NotDefined('failed to convert %s to Decimal: %s' % (str(num), e))
0108
0109     return str(s)
0110
0111
0112 def tryint(x):
0113     """
0114     Used by numbered string comparison (to protect against unexpected letters in version number).
0115
0116     :param x: possible int.
0117     :return: converted int or original value in case of ValueError.
0118     """
0119
0120     try:
0121         return int(x)
0122     except ValueError:
0123         return x
0124
0125
0126 def split_version(s):
0127     """
0128     Split version string into parts and convert the parts into integers when possible.
0129     Any encountered strings are left as they are.
0130     The function is used with release strings.
0131     split_version("1.2.3") = (1,2,3)
0132     split_version("1.2.Nightly") = (1,2,"Nightly")
0133
0134     The function can also be used for sorting:
0135     > names = ['YT4.11', '4.3', 'YT4.2', '4.10', 'PT2.19', 'PT2.9']
0136     > sorted(names, key=splittedname)
0137     ['4.3', '4.10', 'PT2.9', 'PT2.19', 'YT4.2', 'YT4.11']
0138
0139     :param s: release string.
0140     :return: converted release tuple.
0141     """
0142
0143     return tuple(tryint(x) for x in split('([^.]+)', s))
0144
0145
0146 def is_greater_or_equal(a, b):
0147     """
0148     Is the numbered string a >= b?
0149     "1.2.3" > "1.2"  -- more digits
0150     "1.2.3" > "1.2.2"  -- rank based comparison
0151     "1.3.2" > "1.2.3"  -- rank based comparison
0152     "1.2.N" > "1.2.2"  -- nightlies checker, always greater
0153
0154     :param a: numbered string.
0155     :param b: numbered string.
0156     :return: boolean.
0157     """
0158
0159     return split_version(a) >= split_version(b)
0160
0161
0162 def add_lists(list1, list2):
0163     """
0164     Add list1 and list2 and remove any duplicates.
0165     Example:
0166     list1=[1,2,3,4]
0167     list2=[3,4,5,6]
0168     add_lists(list1, list2) = [1, 2, 3, 4, 5, 6]
0169
0170     :param list1: input list 1
0171     :param list2: input list 2
0172     :return: added lists with removed duplicates
0173     """
0174     return list1 + list(set(list2) - set(list1))
0175
0176
0177 def convert_mb_to_b(size):
0178     """
0179     Convert value from MB to B for the given size variable.
0180     If the size is a float, the function will convert it to int.
0181
0182     :param size: size in MB (float or int).
0183     :return: size in B (int).
0184     :raises: ValueError for conversion error.
0185     """
0186
0187     try:
0188         size = int(size)
0189     except Exception as e:
0190         raise ValueError('cannot convert %s to int: %s' % (str(size), e))
0191
0192     return size * 1024 ** 2
0193
0194
0195 def diff_lists(list_a, list_b):
0196     """
0197     Return the difference between list_a and list_b.
0198
0199     :param list_a: input list a.
0200     :param list_b: input list b.
0201     :return: difference (list).
0202     """
0203
0204     return list(set(list_a) - set(list_b))
0205
0206
0207 def bytes2human(n, _format='%(value).1f %(symbol)s', symbols='customary'):
0208     """
0209     Convert n bytes into a human readable string based on format.
0210     symbols can be either "customary", "customary_ext", "iec" or "iec_ext",
0211     see: http://goo.gl/kTQMs
0212
0213       >>> bytes2human(0)
0214       '0.0 B'
0215       >>> bytes2human(0.9)
0216       '0.0 B'
0217       >>> bytes2human(1)
0218       '1.0 B'
0219       >>> bytes2human(1.9)
0220       '1.0 B'
0221       >>> bytes2human(1024)
0222       '1.0 K'
0223       >>> bytes2human(1048576)
0224       '1.0 M'
0225       >>> bytes2human(1099511627776127398123789121)
0226       '909.5 Y'
0227
0228       >>> bytes2human(9856, symbols="customary")
0229       '9.6 K'
0230       >>> bytes2human(9856, symbols="customary_ext")
0231       '9.6 kilo'
0232       >>> bytes2human(9856, symbols="iec")
0233       '9.6 Ki'
0234       >>> bytes2human(9856, symbols="iec_ext")
0235       '9.6 kibi'
0236
0237       >>> bytes2human(10000, "%(value).1f %(symbol)s/sec")
0238       '9.8 K/sec'
0239
0240       >>> # precision can be adjusted by playing with %f operator
0241       >>> bytes2human(10000, _format="%(value).5f %(symbol)s")
0242       '9.76562 K'
0243     """
0244     n = int(n)
0245     if n < 0:
0246         raise ValueError("n < 0")
0247     symbols = SYMBOLS[symbols]
0248     prefix = {}
0249     for i, s in enumerate(symbols[1:]):
0250         prefix[s] = 1 << (i + 1) * 10
0251     for symbol in reversed(symbols[1:]):
0252         if n >= prefix[symbol]:
0253             value = float(n) / prefix[symbol]
0254             return _format % locals()
0255     return _format % dict(symbol=symbols[0], value=n)
0256
0257
0258 def human2bytes(s, divider=None):
0259     """
0260     Attempts to guess the string format based on default symbols
0261     set and return the corresponding bytes as an integer.
0262     When unable to recognize the format ValueError is raised.
0263
0264     If no digit passed, only a letter, it is interpreted as a one of a kind. Eg "KB" = "1 KB".
0265     If no letter passed, it is assumed to be in bytes. Eg "512" = "512 B"
0266
0267     The second argument is used to convert to another magnitude (eg return not bytes but KB).
0268     It can be interpreted as a cluster size. Eg "512 B", or "0.2 K".
0269
0270       >>> human2bytes('0 B')
0271       0
0272       >>> human2bytes('3')
0273       3
0274       >>> human2bytes('K')
0275       1024
0276       >>> human2bytes('1 K')
0277       1024
0278       >>> human2bytes('1 M')
0279       1048576
0280       >>> human2bytes('1 Gi')
0281       1073741824
0282       >>> human2bytes('1 tera')
0283       1099511627776
0284
0285       >>> human2bytes('0.5kilo')
0286       512
0287       >>> human2bytes('0.1  byte')
0288       0
0289       >>> human2bytes('1 k')  # k is an alias for K
0290       1024
0291       >>> human2bytes('12 foo')
0292       Traceback (most recent call last):
0293           ...
0294       ValueError: can't interpret '12 foo'
0295
0296       >>> human2bytes('1 M', 'K')
0297       1024
0298       >>> human2bytes('2 G', 'M')
0299       2048
0300       >>> human2bytes('G', '2M')
0301       512
0302     """
0303     init = s
0304     num = ""
0305     while s and s[0:1].isdigit() or s[0:1] == '.':
0306         num += s[0]
0307         s = s[1:]
0308
0309     if len(num) == 0:
0310         num = "1"
0311     num = float(num)
0312     letter = s.strip()
0313     letter = sub(r'(?i)(?<=.)(bi?|bytes?)$', "", letter)
0314     if len(letter) == 0:
0315         letter = "B"
0316
0317     for name, sset in list(SYMBOLS.items()):
0318         if letter in sset:
0319             break
0320     else:
0321         if letter == 'k':
0322             # treat 'k' as an alias for 'K' as per: http://goo.gl/kTQMs
0323             sset = SYMBOLS['customary']
0324             letter = letter.upper()
0325         else:
0326             raise ValueError("can't interpret %r" % init)
0327     prefix = {sset[0]: 1}
0328     for i, s in enumerate(sset[1:]):
0329         prefix[s] = 1 << (i + 1) * 10
0330
0331     div = 1 if divider is None else human2bytes(divider)
0332     return int(num * prefix[letter] / div)