charconv/detail/compute_float64.hpp

0001 // Copyright 2020-2023 Daniel Lemire
0002 // Copyright 2023 Matt Borland
0003 // Distributed under the Boost Software License, Version 1.0.
0004 // https://www.boost.org/LICENSE_1_0.txt
0005
0006 #ifndef BOOST_CHARCONV_DETAIL_COMPUTE_FLOAT64_HPP
0007 #define BOOST_CHARCONV_DETAIL_COMPUTE_FLOAT64_HPP
0008
0009 #include <boost/charconv/detail/config.hpp>
0010 #include <boost/charconv/detail/significand_tables.hpp>
0011 #include <boost/charconv/detail/emulated128.hpp>
0012 #include <boost/core/bit.hpp>
0013 #include <cstdint>
0014 #include <cfloat>
0015 #include <cstring>
0016 #include <cmath>
0017
0018 namespace boost { namespace charconv { namespace detail {
0019
0020 static constexpr double powers_of_ten[] = {
0021     1e0,  1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,  1e10, 1e11,
0022     1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22
0023 };
0024
0025 // Attempts to compute i * 10^(power) exactly; and if "negative" is true, negate the result.
0026 //
0027 // This function will only work in some cases, when it does not work, success is
0028 // set to false. This should work *most of the time* (like 99% of the time).
0029 // We assume that power is in the [-325, 308] interval.
0030 inline double compute_float64(std::int64_t power, std::uint64_t i, bool negative, bool& success) noexcept
0031 {
0032     static constexpr auto smallest_power = -325;
0033     static constexpr auto largest_power = 308;
0034
0035     // We start with a fast path
0036     // It was described in Clinger WD.
0037     // How to read floating point numbers accurately.
0038     // ACM SIGPLAN Notices. 1990
0039 #if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
0040     if (0 <= power && power <= 22 && i <= UINT64_C(9007199254740991))
0041 #else
0042     if (-22 <= power && power <= 22 && i <= UINT64_C(9007199254740991))
0043 #endif
0044     {
0045         // The general idea is as follows.
0046         // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then
0047         // 1) Both s and p can be represented exactly as 64-bit floating-point
0048         // values
0049         // (binary64).
0050         // 2) Because s and p can be represented exactly as floating-point values,
0051         // then s * p
0052         // and s / p will produce correctly rounded values.
0053
0054         auto d = static_cast<double>(i);
0055
0056         if (power < 0)
0057         {
0058             d = d / powers_of_ten[-power];
0059         }
0060         else
0061         {
0062             d = d * powers_of_ten[power];
0063         }
0064
0065         if (negative)
0066         {
0067             d = -d;
0068         }
0069
0070         success = true;
0071         return d;
0072     }
0073
0074     // When 22 < power && power <  22 + 16, we could
0075     // hope for another, secondary fast path.  It was
0076     // described by David M. Gay in  "Correctly rounded
0077     // binary-decimal and decimal-binary conversions." (1990)
0078     // If you need to compute i * 10^(22 + x) for x < 16,
0079     // first compute i * 10^x, if you know that result is exact
0080     // (e.g., when i * 10^x < 2^53),
0081     // then you can still proceed and do (i * 10^x) * 10^22.
0082     // Is this worth your time?
0083     // You need  22 < power *and* power <  22 + 16 *and* (i * 10^(x-22) < 2^53)
0084     // for this second fast path to work.
0085     // If you have 22 < power *and* power <  22 + 16, and then you
0086     // optimistically compute "i * 10^(x-22)", there is still a chance that you
0087     // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of
0088     // this optimization maybe less common than we would like. Source:
0089     // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
0090     // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html
0091
0092     if (i == 0 || power < smallest_power)
0093     {
0094         return negative ? -0.0 : 0.0;
0095     }
0096     else if (power > largest_power)
0097     {
0098         return negative ? -HUGE_VAL : HUGE_VAL;
0099     }
0100
0101     const std::uint64_t factor_significand = significands_table::significand_64[power - smallest_power];
0102     const std::int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63;
0103     int leading_zeros = boost::core::countl_zero(i);
0104     i <<= static_cast<std::uint64_t>(leading_zeros);
0105
0106     uint128 product = umul128(i, factor_significand);
0107     std::uint64_t low = product.low;
0108     std::uint64_t high = product.high;
0109
0110     // We know that upper has at most one leading zero because
0111     // both i and  factor_mantissa have a leading one. This means
0112     // that the result is at least as large as ((1<<63)*(1<<63))/(1<<64).
0113     //
0114     // As long as the first 9 bits of "upper" are not "1", then we
0115     // know that we have an exact computed value for the leading
0116     // 55 bits because any imprecision would play out as a +1, in the worst case.
0117     // Having 55 bits is necessary because we need 53 bits for the mantissa,
0118     // but we have to have one rounding bit and, we can waste a bit if the most
0119     // significant bit of the product is zero.
0120     //
0121     // We expect this next branch to be rarely taken (say 1% of the time).
0122     // When (upper & 0x1FF) == 0x1FF, it can be common for
0123     // lower + i < lower to be true (proba. much higher than 1%).
0124     if (BOOST_UNLIKELY((high & 0x1FF) == 0x1FF) && (low + i < low))
0125     {
0126         const std::uint64_t factor_significand_low = significands_table::significand_128[power - smallest_power];
0127         product = umul128(i, factor_significand_low);
0128         //const std::uint64_t product_low = product.low;
0129         const std::uint64_t product_middle2 = product.high;
0130         const std::uint64_t product_middle1 = low;
0131         std::uint64_t product_high = high;
0132         const std::uint64_t product_middle = product_middle1 + product_middle2;
0133
0134         if (product_middle < product_middle1)
0135         {
0136             product_high++;
0137         }
0138
0139         // Commented out because possibly unneeded
0140         // See: https://arxiv.org/pdf/2212.06644.pdf
0141         /*
0142         // we want to check whether mantissa *i + i would affect our result
0143         // This does happen, e.g. with 7.3177701707893310e+15
0144         if (((product_middle + 1 == 0) && ((product_high & 0x1FF) == 0x1FF) && (product_low + i < product_low)))
0145         {
0146             success = false;
0147             return 0;
0148         }
0149         */
0150
0151         low = product_middle;
0152         high = product_high;
0153     }
0154
0155     // The final significand should be 53 bits with a leading 1
0156     // We shift it so that it occupies 54 bits with a leading 1
0157     const std::uint64_t upper_bit = high >> 63;
0158     std::uint64_t significand = high >> (upper_bit + 9);
0159     leading_zeros += static_cast<int>(1 ^ upper_bit);
0160
0161     // If we have lots of trailing zeros we may fall between two values
0162     if (BOOST_UNLIKELY((low == 0) && ((high & 0x1FF) == 0) && ((significand & 3) == 1)))
0163     {
0164         // if significand & 1 == 1 we might need to round up
0165         success = false;
0166         return 0;
0167     }
0168
0169     significand += significand & 1;
0170     significand >>= 1;
0171
0172     // Here the significand < (1<<53), unless there is an overflow
0173     if (significand >= (UINT64_C(1) << 53))
0174     {
0175         significand = (UINT64_C(1) << 52);
0176         leading_zeros--;
0177     }
0178
0179     significand &= ~(UINT64_C(1) << 52);
0180     const auto real_exponent = static_cast<std::uint64_t>(exponent - leading_zeros);
0181
0182     // We have to check that real_exponent is in range, otherwise fail
0183     if (BOOST_UNLIKELY((real_exponent < 1) || (real_exponent > 2046)))
0184     {
0185         success = false;
0186         return 0;
0187     }
0188
0189     significand |= real_exponent << 52;
0190     significand |= ((static_cast<std::uint64_t>(negative) << 63));
0191
0192     double d;
0193     std::memcpy(&d, &significand, sizeof(d));
0194
0195     success = true;
0196     return d;
0197 }
0198
0199 }}} // Namespaces
0200
0201 #endif // BOOST_CHARCONV_DETAIL_COMPUTE_FLOAT64_HPP