root/ROOT/RFloat16.hxx

0001 // @(#)root/base
0002
0003 /*************************************************************************
0004  * Copyright (C) 1995-2023, Rene Brun and Fons Rademakers.               *
0005  * All rights reserved.                                                  *
0006  *                                                                       *
0007  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0008  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0009  *************************************************************************/
0010
0011 #include <cstdint>
0012 #include <cstring>
0013
0014 #ifndef ROOT_RFloat16
0015 #define ROOT_RFloat16
0016
0017 /**
0018  * Conversion functions between full- and half-precision floats. The code used here is taken (with some modifications)
0019  * from the `half` C++ library (https://half.sourceforge.net/index.html), distributed under the MIT license.
0020  *
0021  * Original license:
0022  *
0023  * The MIT License
0024  *
0025  * Copyright (c) 2012-2021 Christian Rau
0026  *
0027  * Permission is hereby granted, free of charge, to any person obtaining a copy
0028  * of this software and associated documentation files (the "Software"), to deal
0029  * in the Software without restriction, including without limitation the rights
0030  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
0031  * copies of the Software, and to permit persons to whom the Software is
0032  * furnished to do so, subject to the following conditions:
0033  *
0034  * The above copyright notice and this permission notice shall be included in
0035  * all copies or substantial portions of the Software.
0036  *
0037  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
0038  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
0039  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
0040  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
0041  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
0042  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
0043  * THE SOFTWARE.
0044  */
0045
0046 #ifndef HALF_ENABLE_F16C_INTRINSICS
0047 /// Enable F16C intruction set intrinsics.
0048 /// Defining this to 1 enables the use of [F16C compiler intrinsics](https://en.wikipedia.org/wiki/F16C) for converting
0049 /// between half-precision and single-precision values which may result in improved performance. This will not perform
0050 /// additional checks for support of the F16C instruction set, so an appropriate target platform is required when
0051 /// enabling this feature.
0052 ///
0053 /// Unless predefined it will be enabled automatically when the `__F16C__` symbol is defined, which some compilers do on
0054 /// supporting platforms.
0055 #define HALF_ENABLE_F16C_INTRINSICS __F16C__
0056 #endif
0057 #if HALF_ENABLE_F16C_INTRINSICS
0058 #include <immintrin.h>
0059 #endif
0060
0061 namespace ROOT {
0062 namespace Internal {
0063 ////////////////////////////////////////////////////////////////////////////////
0064 /// \brief Get the half-precision overflow.
0065 ///
0066 /// \param[in] value Half-precision value with sign bit only
0067 ///
0068 /// \return Rounded overflowing half-precision value
0069 constexpr std::uint16_t GetOverflowedValue(std::uint16_t value = 0)
0070 {
0071    return (value | 0x7C00);
0072 }
0073
0074 ////////////////////////////////////////////////////////////////////////////////
0075 /// \brief Round the given half-precision number to the nearest representable value.
0076 ///
0077 /// \param[in] value The finite half-precision number to round
0078 /// \param[in] guardBit The most significant discarded bit
0079 /// \param[in] stickyBit Logical OR of all but the most significant discarded bits
0080 ///
0081 /// \return The nearest-rounded half-precision value
0082 constexpr std::uint16_t GetRoundedValue(std::uint16_t value, int guardBit, int stickyBit)
0083 {
0084    return (value + (guardBit & (stickyBit | value)));
0085 }
0086
0087 ////////////////////////////////////////////////////////////////////////////////
0088 /// \brief Convert an IEEE single-precision float to half-precision.
0089 ///
0090 /// Credit for this goes to [Jeroen van der Zijp](http://fox-toolkit.org/ftp/fasthalffloatconversion.pdf).
0091 ///
0092 /// \param[in] value The single-precision value to convert
0093 ///
0094 /// \return The converted half-precision value
0095 inline std::uint16_t FloatToHalf(float value)
0096 {
0097 #if HALF_ENABLE_F16C_INTRINSICS
0098    return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(value), _MM_FROUND_TO_NEAREST_INT));
0099 #else
0100    std::uint32_t fbits;
0101    std::memcpy(&fbits, &value, sizeof(float));
0102
0103    std::uint16_t sign = (fbits >> 16) & 0x8000;
0104    fbits &= 0x7FFFFFFF;
0105    if (fbits >= 0x7F800000)
0106       return sign | 0x7C00 | ((fbits > 0x7F800000) ? (0x200 | ((fbits >> 13) & 0x3FF)) : 0);
0107    if (fbits >= 0x47800000)
0108       return GetOverflowedValue(sign);
0109    if (fbits >= 0x38800000)
0110       return GetRoundedValue(sign | (((fbits >> 23) - 112) << 10) | ((fbits >> 13) & 0x3FF), (fbits >> 12) & 1,
0111                              (fbits & 0xFFF) != 0);
0112    if (fbits >= 0x33000000) {
0113       int i = 125 - (fbits >> 23);
0114       fbits = (fbits & 0x7FFFFF) | 0x800000;
0115       return GetRoundedValue(sign | (fbits >> (i + 1)), (fbits >> i) & 1,
0116                              (fbits & ((static_cast<std::uint32_t>(1) << i) - 1)) != 0);
0117    }
0118
0119    return sign;
0120 #endif
0121 }
0122
0123 ////////////////////////////////////////////////////////////////////////////////
0124 /// \brief Convert an IEEE half-precision float to single-precision.
0125 ///
0126 /// Credit for this goes to [Jeroen van der Zijp](http://fox-toolkit.org/ftp/fasthalffloatconversion.pdf).
0127 ///
0128 /// \param[in] value The half-precision value to convert
0129 ///
0130 /// \return The converted single-precision value
0131 inline float HalfToFloat(std::uint16_t value)
0132 {
0133 #if HALF_ENABLE_F16C_INTRINSICS
0134    return _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(value)));
0135 #else
0136    std::uint32_t fbits = static_cast<std::uint32_t>(value & 0x8000) << 16;
0137    int abs = value & 0x7FFF;
0138    if (abs) {
0139       fbits |= 0x38000000 << static_cast<unsigned>(abs >= 0x7C00);
0140       for (; abs < 0x400; abs <<= 1, fbits -= 0x800000)
0141          ;
0142       fbits += static_cast<std::uint32_t>(abs) << 13;
0143    }
0144    float out;
0145    std::memcpy(&out, &fbits, sizeof(float));
0146    return out;
0147 #endif
0148 }
0149 } // namespace Internal
0150 } // namespace ROOT
0151
0152 #endif // ROOT_RFloat16