detail/fast_float/digit_comparison.hpp

0001 // Copyright 2020-2023 Daniel Lemire
0002 // Copyright 2023 Matt Borland
0003 // Distributed under the Boost Software License, Version 1.0.
0004 // https://www.boost.org/LICENSE_1_0.txt
0005 //
0006 // Derivative of: https://github.com/fastfloat/fast_float
0007
0008 #ifndef BOOST_CHARCONV_DETAIL_FASTFLOAT_DIGIT_COMPARISON_HPP
0009 #define BOOST_CHARCONV_DETAIL_FASTFLOAT_DIGIT_COMPARISON_HPP
0010
0011 #include <boost/charconv/detail/fast_float/float_common.hpp>
0012 #include <boost/charconv/detail/fast_float/bigint.hpp>
0013 #include <boost/charconv/detail/fast_float/ascii_number.hpp>
0014 #include <algorithm>
0015 #include <cstdint>
0016 #include <cstring>
0017 #include <iterator>
0018
0019 namespace boost { namespace charconv { namespace detail { namespace fast_float {
0020
0021 // 1e0 to 1e19
0022 constexpr static uint64_t powers_of_ten_uint64[] = {
0023     1UL, 10UL, 100UL, 1000UL, 10000UL, 100000UL, 1000000UL, 10000000UL, 100000000UL,
0024     1000000000UL, 10000000000UL, 100000000000UL, 1000000000000UL, 10000000000000UL,
0025     100000000000000UL, 1000000000000000UL, 10000000000000000UL, 100000000000000000UL,
0026     1000000000000000000UL, 10000000000000000000UL};
0027
0028 // calculate the exponent, in scientific notation, of the number.
0029 // this algorithm is not even close to optimized, but it has no practical
0030 // effect on performance: in order to have a faster algorithm, we'd need
0031 // to slow down performance for faster algorithms, and this is still fast.
0032 template <typename UC>
0033 BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
0034 int32_t scientific_exponent(parsed_number_string_t<UC> & num) noexcept {
0035   uint64_t mantissa = num.mantissa;
0036   int32_t exponent = int32_t(num.exponent);
0037   while (mantissa >= 10000) {
0038     mantissa /= 10000;
0039     exponent += 4;
0040   }
0041   while (mantissa >= 100) {
0042     mantissa /= 100;
0043     exponent += 2;
0044   }
0045   while (mantissa >= 10) {
0046     mantissa /= 10;
0047     exponent += 1;
0048   }
0049   return exponent;
0050 }
0051
0052 // this converts a native floating-point number to an extended-precision float.
0053 template <typename T>
0054 BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
0055 adjusted_mantissa to_extended(T value) noexcept {
0056   using equiv_uint = typename binary_format<T>::equiv_uint;
0057   constexpr equiv_uint exponent_mask = binary_format<T>::exponent_mask();
0058   constexpr equiv_uint mantissa_mask = binary_format<T>::mantissa_mask();
0059   constexpr equiv_uint hidden_bit_mask = binary_format<T>::hidden_bit_mask();
0060
0061   adjusted_mantissa am;
0062   int32_t bias = binary_format<T>::mantissa_explicit_bits() - binary_format<T>::minimum_exponent();
0063   equiv_uint bits;
0064 #if BOOST_CHARCONV_FASTFLOAT_HAS_BIT_CAST
0065   bits = std::bit_cast<equiv_uint>(value);
0066 #else
0067   ::memcpy(&bits, &value, sizeof(T));
0068 #endif
0069   if ((bits & exponent_mask) == 0) {
0070     // denormal
0071     am.power2 = 1 - bias;
0072     am.mantissa = bits & mantissa_mask;
0073   } else {
0074     // normal
0075     am.power2 = int32_t((bits & exponent_mask) >> binary_format<T>::mantissa_explicit_bits());
0076     am.power2 -= bias;
0077     am.mantissa = (bits & mantissa_mask) | hidden_bit_mask;
0078   }
0079
0080   return am;
0081 }
0082
0083 // get the extended precision value of the halfway point between b and b+u.
0084 // we are given a native float that represents b, so we need to adjust it
0085 // halfway between b and b+u.
0086 template <typename T>
0087 BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
0088 adjusted_mantissa to_extended_halfway(T value) noexcept {
0089   adjusted_mantissa am = to_extended(value);
0090   am.mantissa <<= 1;
0091   am.mantissa += 1;
0092   am.power2 -= 1;
0093   return am;
0094 }
0095
0096 // round an extended-precision float to the nearest machine float.
0097 template <typename T, typename callback>
0098 BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
0099 void round(adjusted_mantissa& am, callback cb) noexcept {
0100   int32_t mantissa_shift = 64 - binary_format<T>::mantissa_explicit_bits() - 1;
0101   if (-am.power2 >= mantissa_shift) {
0102     // have a denormal float
0103     int32_t shift = -am.power2 + 1;
0104     cb(am, std::min<int32_t>(shift, 64));
0105     // check for round-up: if rounding-nearest carried us to the hidden bit.
0106     am.power2 = (am.mantissa < (uint64_t(1) << binary_format<T>::mantissa_explicit_bits())) ? 0 : 1;
0107     return;
0108   }
0109
0110   // have a normal float, use the default shift.
0111   cb(am, mantissa_shift);
0112
0113   // check for carry
0114   if (am.mantissa >= (uint64_t(2) << binary_format<T>::mantissa_explicit_bits())) {
0115     am.mantissa = (uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
0116     am.power2++;
0117   }
0118
0119   // check for infinite: we could have carried to an infinite power
0120   am.mantissa &= ~(uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
0121   if (am.power2 >= binary_format<T>::infinite_power()) {
0122     am.power2 = binary_format<T>::infinite_power();
0123     am.mantissa = 0;
0124   }
0125 }
0126
0127 template <typename callback>
0128 BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
0129 void round_nearest_tie_even(adjusted_mantissa& am, int32_t shift, callback cb) noexcept {
0130   const uint64_t mask
0131   = (shift == 64)
0132     ? UINT64_MAX
0133     : (uint64_t(1) << shift) - 1;
0134   const uint64_t halfway
0135   = (shift == 0)
0136     ? 0
0137     : uint64_t(1) << (shift - 1);
0138   uint64_t truncated_bits = am.mantissa & mask;
0139   bool is_above = truncated_bits > halfway;
0140   bool is_halfway = truncated_bits == halfway;
0141
0142   // shift digits into position
0143   if (shift == 64) {
0144     am.mantissa = 0;
0145   } else {
0146     am.mantissa >>= shift;
0147   }
0148   am.power2 += shift;
0149
0150   bool is_odd = (am.mantissa & 1) == 1;
0151   am.mantissa += uint64_t(cb(is_odd, is_halfway, is_above));
0152 }
0153
0154 BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
0155 void round_down(adjusted_mantissa& am, int32_t shift) noexcept {
0156   if (shift == 64) {
0157     am.mantissa = 0;
0158   } else {
0159     am.mantissa >>= shift;
0160   }
0161   am.power2 += shift;
0162 }
0163 template <typename UC>
0164 BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
0165 void skip_zeros(UC const * & first, UC const * last) noexcept {
0166   uint64_t val;
0167   while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len<UC>()) {
0168     ::memcpy(&val, first, sizeof(uint64_t));
0169     if (val != int_cmp_zeros<UC>()) {
0170       break;
0171     }
0172     first += int_cmp_len<UC>();
0173   }
0174   while (first != last) {
0175     if (*first != UC('0')) {
0176       break;
0177     }
0178     first++;
0179   }
0180 }
0181
0182 // determine if any non-zero digits were truncated.
0183 // all characters must be valid digits.
0184 template <typename UC>
0185 BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
0186 bool is_truncated(UC const * first, UC const * last) noexcept {
0187   // do 8-bit optimizations, can just compare to 8 literal 0s.
0188   uint64_t val;
0189   while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len<UC>()) {
0190     ::memcpy(&val, first, sizeof(uint64_t));
0191     if (val != int_cmp_zeros<UC>()) {
0192       return true;
0193     }
0194     first += int_cmp_len<UC>();
0195   }
0196   while (first != last) {
0197     if (*first != UC('0')) {
0198       return true;
0199     }
0200     ++first;
0201   }
0202   return false;
0203 }
0204 template <typename UC>
0205 BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
0206 bool is_truncated(span<const UC> s) noexcept {
0207   return is_truncated(s.ptr, s.ptr + s.len());
0208 }
0209
0210 BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
0211 void parse_eight_digits(const char16_t*& , limb& , size_t& , size_t& ) noexcept {
0212   // currently unused
0213 }
0214
0215 BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
0216 void parse_eight_digits(const char32_t*& , limb& , size_t& , size_t& ) noexcept {
0217   // currently unused
0218 }
0219
0220 BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
0221 void parse_eight_digits(const char*& p, limb& value, size_t& counter, size_t& count) noexcept {
0222   value = value * 100000000 + parse_eight_digits_unrolled(p);
0223   p += 8;
0224   counter += 8;
0225   count += 8;
0226 }
0227
0228 template <typename UC>
0229 BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
0230 void parse_one_digit(UC const *& p, limb& value, size_t& counter, size_t& count) noexcept {
0231   value = value * 10 + limb(*p - UC('0'));
0232   p++;
0233   counter++;
0234   count++;
0235 }
0236
0237 BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
0238 void add_native(bigint& big, limb power, limb value) noexcept {
0239   big.mul(power);
0240   big.add(value);
0241 }
0242
0243 BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
0244 void round_up_bigint(bigint& big, size_t& count) noexcept {
0245   // need to round-up the digits, but need to avoid rounding
0246   // ....9999 to ...10000, which could cause a false halfway point.
0247   add_native(big, 10, 1);
0248   count++;
0249 }
0250
0251 // parse the significant digits into a big integer
0252 template <typename UC>
0253 inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
0254 void parse_mantissa(bigint& result, parsed_number_string_t<UC>& num, size_t max_digits, size_t& digits) noexcept {
0255   // try to minimize the number of big integer and scalar multiplication.
0256   // therefore, try to parse 8 digits at a time, and multiply by the largest
0257   // scalar value (9 or 19 digits) for each step.
0258   size_t counter = 0;
0259   digits = 0;
0260   limb value = 0;
0261 #ifdef BOOST_CHARCONV_FASTFLOAT_64BIT_LIMB
0262   constexpr size_t step = 19;
0263 #else
0264   constexpr size_t step = 9;
0265 #endif
0266
0267   // process all integer digits.
0268   UC const * p = num.integer.ptr;
0269   UC const * pend = p + num.integer.len();
0270   skip_zeros(p, pend);
0271   // process all digits, in increments of step per loop
0272   while (p != pend) {
0273     if (std::is_same<UC,char>::value) {
0274       while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) {
0275         parse_eight_digits(p, value, counter, digits);
0276       }
0277     }
0278     while (counter < step && p != pend && digits < max_digits) {
0279       parse_one_digit(p, value, counter, digits);
0280     }
0281     if (digits == max_digits) {
0282       // add the temporary value, then check if we've truncated any digits
0283       add_native(result, limb(powers_of_ten_uint64[counter]), value);
0284       bool truncated = is_truncated(p, pend);
0285       if (num.fraction.ptr != nullptr) {
0286         truncated |= is_truncated(num.fraction);
0287       }
0288       if (truncated) {
0289         round_up_bigint(result, digits);
0290       }
0291       return;
0292     } else {
0293       add_native(result, limb(powers_of_ten_uint64[counter]), value);
0294       counter = 0;
0295       value = 0;
0296     }
0297   }
0298
0299   // add our fraction digits, if they're available.
0300   if (num.fraction.ptr != nullptr) {
0301     p = num.fraction.ptr;
0302     pend = p + num.fraction.len();
0303     if (digits == 0) {
0304       skip_zeros(p, pend);
0305     }
0306     // process all digits, in increments of step per loop
0307     while (p != pend) {
0308       if (std::is_same<UC,char>::value) {
0309         while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) {
0310           parse_eight_digits(p, value, counter, digits);
0311         }
0312       }
0313       while (counter < step && p != pend && digits < max_digits) {
0314         parse_one_digit(p, value, counter, digits);
0315       }
0316       if (digits == max_digits) {
0317         // add the temporary value, then check if we've truncated any digits
0318         add_native(result, limb(powers_of_ten_uint64[counter]), value);
0319         bool truncated = is_truncated(p, pend);
0320         if (truncated) {
0321           round_up_bigint(result, digits);
0322         }
0323         return;
0324       } else {
0325         add_native(result, limb(powers_of_ten_uint64[counter]), value);
0326         counter = 0;
0327         value = 0;
0328       }
0329     }
0330   }
0331
0332   if (counter != 0) {
0333     add_native(result, limb(powers_of_ten_uint64[counter]), value);
0334   }
0335 }
0336
0337 template <typename T>
0338 inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
0339 adjusted_mantissa positive_digit_comp(bigint& bigmant, int32_t exponent) noexcept {
0340   BOOST_CHARCONV_FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent)));
0341   adjusted_mantissa answer;
0342   bool truncated;
0343   answer.mantissa = bigmant.hi64(truncated);
0344   int bias = binary_format<T>::mantissa_explicit_bits() - binary_format<T>::minimum_exponent();
0345   answer.power2 = bigmant.bit_length() - 64 + bias;
0346
0347   round<T>(answer, [truncated](adjusted_mantissa& a, int32_t shift) {
0348     round_nearest_tie_even(a, shift, [truncated](bool is_odd, bool is_halfway, bool is_above) -> bool {
0349       return is_above || (is_halfway && truncated) || (is_odd && is_halfway);
0350     });
0351   });
0352
0353   return answer;
0354 }
0355
0356 // the scaling here is quite simple: we have, for the real digits `m * 10^e`,
0357 // and for the theoretical digits `n * 2^f`. Since `e` is always negative,
0358 // to scale them identically, we do `n * 2^f * 5^-f`, so we now have `m * 2^e`.
0359 // we then need to scale by `2^(f- e)`, and then the two significant digits
0360 // are of the same magnitude.
0361 template <typename T>
0362 inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
0363 adjusted_mantissa negative_digit_comp(bigint& bigmant, adjusted_mantissa am, int32_t exponent) noexcept {
0364   bigint& real_digits = bigmant;
0365   int32_t real_exp = exponent;
0366
0367   // get the value of `b`, rounded down, and get a bigint representation of b+h
0368   adjusted_mantissa am_b = am;
0369   // gcc7 buf: use a lambda to remove the noexcept qualifier bug with -Wnoexcept-type.
0370   round<T>(am_b, [](adjusted_mantissa&a, int32_t shift) { round_down(a, shift); });
0371   T b;
0372   to_float(false, am_b, b);
0373   adjusted_mantissa theor = to_extended_halfway(b);
0374   bigint theor_digits(theor.mantissa);
0375   int32_t theor_exp = theor.power2;
0376
0377   // scale real digits and theor digits to be same power.
0378   int32_t pow2_exp = theor_exp - real_exp;
0379   uint32_t pow5_exp = uint32_t(-real_exp);
0380   if (pow5_exp != 0) {
0381     BOOST_CHARCONV_FASTFLOAT_ASSERT(theor_digits.pow5(pow5_exp));
0382   }
0383   if (pow2_exp > 0) {
0384     BOOST_CHARCONV_FASTFLOAT_ASSERT(theor_digits.pow2(uint32_t(pow2_exp)));
0385   } else if (pow2_exp < 0) {
0386     BOOST_CHARCONV_FASTFLOAT_ASSERT(real_digits.pow2(uint32_t(-pow2_exp)));
0387   }
0388
0389   // compare digits, and use it to director rounding
0390   int ord = real_digits.compare(theor_digits);
0391   adjusted_mantissa answer = am;
0392   round<T>(answer, [ord](adjusted_mantissa& a, int32_t shift) {
0393     round_nearest_tie_even(a, shift, [ord](bool is_odd, bool, bool) -> bool {
0394       if (ord > 0) {
0395         return true;
0396       } else if (ord < 0) {
0397         return false;
0398       } else {
0399         return is_odd;
0400       }
0401     });
0402   });
0403
0404   return answer;
0405 }
0406
0407 // parse the significant digits as a big integer to unambiguously round
0408 // the significant digits. here, we are trying to determine how to round
0409 // an extended float representation close to `b+h`, halfway between `b`
0410 // (the float rounded-down) and `b+u`, the next positive float. this
0411 // algorithm is always correct, and uses one of two approaches. when
0412 // the exponent is positive relative to the significant digits (such as
0413 // 1234), we create a big-integer representation, get the high 64-bits,
0414 // determine if any lower bits are truncated, and use that to direct
0415 // rounding. in case of a negative exponent relative to the significant
0416 // digits (such as 1.2345), we create a theoretical representation of
0417 // `b` as a big-integer type, scaled to the same binary exponent as
0418 // the actual digits. we then compare the big integer representations
0419 // of both, and use that to direct rounding.
0420 template <typename T, typename UC>
0421 inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
0422 adjusted_mantissa digit_comp(parsed_number_string_t<UC>& num, adjusted_mantissa am) noexcept {
0423   // remove the invalid exponent bias
0424   am.power2 -= invalid_am_bias;
0425
0426   int32_t sci_exp = scientific_exponent(num);
0427   size_t max_digits = binary_format<T>::max_digits();
0428   size_t digits = 0;
0429   bigint bigmant;
0430   parse_mantissa(bigmant, num, max_digits, digits);
0431   // can't underflow, since digits is at most max_digits.
0432   int32_t exponent = sci_exp + 1 - int32_t(digits);
0433   if (exponent >= 0) {
0434     return positive_digit_comp<T>(bigmant, exponent);
0435   } else {
0436     return negative_digit_comp<T>(bigmant, am, exponent);
0437   }
0438 }
0439
0440 }}}} // namespace fast_float
0441
0442 #endif