src/Tensor/TensorUInt128.h

0001 // This file is part of Eigen, a lightweight C++ template library
0002 // for linear algebra.
0003 //
0004 // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
0005 //
0006 // This Source Code Form is subject to the terms of the Mozilla
0007 // Public License v. 2.0. If a copy of the MPL was not distributed
0008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
0009
0010 #ifndef EIGEN_CXX11_TENSOR_TENSOR_UINT128_H
0011 #define EIGEN_CXX11_TENSOR_TENSOR_UINT128_H
0012
0013 namespace Eigen {
0014 namespace internal {
0015
0016
0017 template <uint64_t n>
0018 struct static_val {
0019   static const uint64_t value = n;
0020   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator uint64_t() const { return n; }
0021
0022   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val() { }
0023
0024   template <typename T>
0025   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val(const T& v) {
0026     EIGEN_UNUSED_VARIABLE(v);
0027     eigen_assert(v == n);
0028   }
0029 };
0030
0031
0032 template <typename HIGH = uint64_t, typename LOW = uint64_t>
0033 struct TensorUInt128
0034 {
0035   HIGH high;
0036   LOW low;
0037
0038   template<typename OTHER_HIGH, typename OTHER_LOW>
0039   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
0040   TensorUInt128(const TensorUInt128<OTHER_HIGH, OTHER_LOW>& other) : high(other.high), low(other.low) {
0041     EIGEN_STATIC_ASSERT(sizeof(OTHER_HIGH) <= sizeof(HIGH), YOU_MADE_A_PROGRAMMING_MISTAKE);
0042     EIGEN_STATIC_ASSERT(sizeof(OTHER_LOW) <= sizeof(LOW), YOU_MADE_A_PROGRAMMING_MISTAKE);
0043   }
0044
0045   template<typename OTHER_HIGH, typename OTHER_LOW>
0046   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
0047   TensorUInt128& operator = (const TensorUInt128<OTHER_HIGH, OTHER_LOW>& other) {
0048     EIGEN_STATIC_ASSERT(sizeof(OTHER_HIGH) <= sizeof(HIGH), YOU_MADE_A_PROGRAMMING_MISTAKE);
0049     EIGEN_STATIC_ASSERT(sizeof(OTHER_LOW) <= sizeof(LOW), YOU_MADE_A_PROGRAMMING_MISTAKE);
0050     high = other.high;
0051     low = other.low;
0052     return *this;
0053   }
0054
0055   template<typename T>
0056   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
0057   explicit TensorUInt128(const T& x) : high(0), low(x) {
0058     eigen_assert((static_cast<typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type>(x) <= NumTraits<uint64_t>::highest()));
0059     eigen_assert(x >= 0);
0060   }
0061
0062   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
0063   TensorUInt128(HIGH y, LOW x) : high(y), low(x) { }
0064
0065   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator LOW() const {
0066     return low;
0067   }
0068   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LOW lower() const {
0069     return low;
0070   }
0071   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HIGH upper() const {
0072     return high;
0073   }
0074 };
0075
0076
0077 template <typename HL, typename LL, typename HR, typename LR>
0078 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
0079 bool operator == (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
0080 {
0081   return (lhs.high == rhs.high) & (lhs.low == rhs.low);
0082 }
0083
0084 template <typename HL, typename LL, typename HR, typename LR>
0085 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
0086 bool operator != (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
0087 {
0088   return (lhs.high != rhs.high) | (lhs.low != rhs.low);
0089 }
0090
0091 template <typename HL, typename LL, typename HR, typename LR>
0092 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
0093 bool operator >= (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
0094 {
0095   if (lhs.high != rhs.high) {
0096     return lhs.high > rhs.high;
0097   }
0098   return lhs.low >= rhs.low;
0099 }
0100
0101 template <typename HL, typename LL, typename HR, typename LR>
0102 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
0103 bool operator < (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
0104 {
0105   if (lhs.high != rhs.high) {
0106     return lhs.high < rhs.high;
0107   }
0108   return lhs.low < rhs.low;
0109 }
0110
0111 template <typename HL, typename LL, typename HR, typename LR>
0112 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
0113 TensorUInt128<uint64_t, uint64_t> operator + (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
0114 {
0115   TensorUInt128<uint64_t, uint64_t> result(lhs.high + rhs.high, lhs.low + rhs.low);
0116   if (result.low < rhs.low) {
0117     result.high += 1;
0118   }
0119   return result;
0120 }
0121
0122 template <typename HL, typename LL, typename HR, typename LR>
0123 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
0124 TensorUInt128<uint64_t, uint64_t> operator - (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
0125 {
0126   TensorUInt128<uint64_t, uint64_t> result(lhs.high - rhs.high, lhs.low - rhs.low);
0127   if (result.low > lhs.low) {
0128     result.high -= 1;
0129   }
0130   return result;
0131 }
0132
0133
0134 template <typename HL, typename LL, typename HR, typename LR>
0135 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0136 TensorUInt128<uint64_t, uint64_t> operator * (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
0137 {
0138   // Split each 128-bit integer into 4 32-bit integers, and then do the
0139   // multiplications by hand as follow:
0140   //   lhs      a  b  c  d
0141   //   rhs      e  f  g  h
0142   //           -----------
0143   //           ah bh ch dh
0144   //           bg cg dg
0145   //           cf df
0146   //           de
0147   // The result is stored in 2 64bit integers, high and low.
0148
0149   const uint64_t LOW = 0x00000000FFFFFFFFLL;
0150   const uint64_t HIGH = 0xFFFFFFFF00000000LL;
0151
0152   uint64_t d = lhs.low & LOW;
0153   uint64_t c = (lhs.low & HIGH) >> 32LL;
0154   uint64_t b = lhs.high & LOW;
0155   uint64_t a = (lhs.high & HIGH) >> 32LL;
0156
0157   uint64_t h = rhs.low & LOW;
0158   uint64_t g = (rhs.low & HIGH) >> 32LL;
0159   uint64_t f = rhs.high & LOW;
0160   uint64_t e = (rhs.high & HIGH) >> 32LL;
0161
0162   // Compute the low 32 bits of low
0163   uint64_t acc = d * h;
0164   uint64_t low = acc & LOW;
0165   //  Compute the high 32 bits of low. Add a carry every time we wrap around
0166   acc >>= 32LL;
0167   uint64_t carry = 0;
0168   uint64_t acc2 = acc + c * h;
0169   if (acc2 < acc) {
0170     carry++;
0171   }
0172   acc = acc2 + d * g;
0173   if (acc < acc2) {
0174     carry++;
0175   }
0176   low |= (acc << 32LL);
0177
0178   // Carry forward the high bits of acc to initiate the computation of the
0179   // low 32 bits of high
0180   acc2 = (acc >> 32LL) | (carry << 32LL);
0181   carry = 0;
0182
0183   acc = acc2 + b * h;
0184   if (acc < acc2) {
0185     carry++;
0186   }
0187   acc2 = acc + c * g;
0188   if (acc2 < acc) {
0189     carry++;
0190   }
0191   acc = acc2 + d * f;
0192   if (acc < acc2) {
0193     carry++;
0194   }
0195   uint64_t high = acc & LOW;
0196
0197   // Start to compute the high 32 bits of high.
0198   acc2 = (acc >> 32LL) | (carry << 32LL);
0199
0200   acc = acc2 + a * h;
0201   acc2 = acc + b * g;
0202   acc = acc2 + c * f;
0203   acc2 = acc + d * e;
0204   high |= (acc2 << 32LL);
0205
0206   return TensorUInt128<uint64_t, uint64_t>(high, low);
0207 }
0208
0209 template <typename HL, typename LL, typename HR, typename LR>
0210 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0211 TensorUInt128<uint64_t, uint64_t> operator / (const TensorUInt128<HL, LL>& lhs, const TensorUInt128<HR, LR>& rhs)
0212 {
0213   if (rhs == TensorUInt128<static_val<0>, static_val<1> >(1)) {
0214     return TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low);
0215   } else if (lhs < rhs) {
0216     return TensorUInt128<uint64_t, uint64_t>(0);
0217   } else {
0218     // calculate the biggest power of 2 times rhs that's less than or equal to lhs
0219     TensorUInt128<uint64_t, uint64_t> power2(1);
0220     TensorUInt128<uint64_t, uint64_t> d(rhs);
0221     TensorUInt128<uint64_t, uint64_t> tmp(lhs - d);
0222     while (lhs >= d) {
0223       tmp = tmp - d;
0224       d = d + d;
0225       power2 = power2 + power2;
0226     }
0227
0228     tmp = TensorUInt128<uint64_t, uint64_t>(lhs.high, lhs.low);
0229     TensorUInt128<uint64_t, uint64_t> result(0);
0230     while (power2 != TensorUInt128<static_val<0>, static_val<0> >(0)) {
0231       if (tmp >= d) {
0232         tmp = tmp - d;
0233         result = result + power2;
0234       }
0235       // Shift right
0236       power2 = TensorUInt128<uint64_t, uint64_t>(power2.high >> 1, (power2.low >> 1) | (power2.high << 63));
0237       d = TensorUInt128<uint64_t, uint64_t>(d.high >> 1, (d.low >> 1) | (d.high << 63));
0238     }
0239
0240     return result;
0241   }
0242 }
0243
0244
0245 }  // namespace internal
0246 }  // namespace Eigen
0247
0248
0249 #endif  // EIGEN_CXX11_TENSOR_TENSOR_UINT128_H