![]() |
|
|||
File indexing completed on 2025-09-17 09:14:28
0001 // @(#)root/base 0002 0003 /************************************************************************* 0004 * Copyright (C) 1995-2023, Rene Brun and Fons Rademakers. * 0005 * All rights reserved. * 0006 * * 0007 * For the licensing terms see $ROOTSYS/LICENSE. * 0008 * For the list of contributors see $ROOTSYS/README/CREDITS. * 0009 *************************************************************************/ 0010 0011 #include <cstdint> 0012 #include <cstring> 0013 0014 #ifndef ROOT_RFloat16 0015 #define ROOT_RFloat16 0016 0017 /** 0018 * Conversion functions between full- and half-precision floats. The code used here is taken (with some modifications) 0019 * from the `half` C++ library (https://half.sourceforge.net/index.html), distributed under the MIT license. 0020 * 0021 * Original license: 0022 * 0023 * The MIT License 0024 * 0025 * Copyright (c) 2012-2021 Christian Rau 0026 * 0027 * Permission is hereby granted, free of charge, to any person obtaining a copy 0028 * of this software and associated documentation files (the "Software"), to deal 0029 * in the Software without restriction, including without limitation the rights 0030 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 0031 * copies of the Software, and to permit persons to whom the Software is 0032 * furnished to do so, subject to the following conditions: 0033 * 0034 * The above copyright notice and this permission notice shall be included in 0035 * all copies or substantial portions of the Software. 0036 * 0037 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 0038 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 0039 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 0040 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 0041 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 0042 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 0043 * THE SOFTWARE. 0044 */ 0045 0046 #ifndef HALF_ENABLE_F16C_INTRINSICS 0047 /// Enable F16C intruction set intrinsics. 0048 /// Defining this to 1 enables the use of [F16C compiler intrinsics](https://en.wikipedia.org/wiki/F16C) for converting 0049 /// between half-precision and single-precision values which may result in improved performance. This will not perform 0050 /// additional checks for support of the F16C instruction set, so an appropriate target platform is required when 0051 /// enabling this feature. 0052 /// 0053 /// Unless predefined it will be enabled automatically when the `__F16C__` symbol is defined, which some compilers do on 0054 /// supporting platforms. 0055 #define HALF_ENABLE_F16C_INTRINSICS __F16C__ 0056 #endif 0057 #if HALF_ENABLE_F16C_INTRINSICS 0058 #include <immintrin.h> 0059 #endif 0060 0061 namespace ROOT { 0062 namespace Internal { 0063 //////////////////////////////////////////////////////////////////////////////// 0064 /// \brief Get the half-precision overflow. 0065 /// 0066 /// \param[in] value Half-precision value with sign bit only 0067 /// 0068 /// \return Rounded overflowing half-precision value 0069 constexpr std::uint16_t GetOverflowedValue(std::uint16_t value = 0) 0070 { 0071 return (value | 0x7C00); 0072 } 0073 0074 //////////////////////////////////////////////////////////////////////////////// 0075 /// \brief Round the given half-precision number to the nearest representable value. 0076 /// 0077 /// \param[in] value The finite half-precision number to round 0078 /// \param[in] guardBit The most significant discarded bit 0079 /// \param[in] stickyBit Logical OR of all but the most significant discarded bits 0080 /// 0081 /// \return The nearest-rounded half-precision value 0082 constexpr std::uint16_t GetRoundedValue(std::uint16_t value, int guardBit, int stickyBit) 0083 { 0084 return (value + (guardBit & (stickyBit | value))); 0085 } 0086 0087 //////////////////////////////////////////////////////////////////////////////// 0088 /// \brief Convert an IEEE single-precision float to half-precision. 0089 /// 0090 /// Credit for this goes to [Jeroen van der Zijp](http://fox-toolkit.org/ftp/fasthalffloatconversion.pdf). 0091 /// 0092 /// \param[in] value The single-precision value to convert 0093 /// 0094 /// \return The converted half-precision value 0095 inline std::uint16_t FloatToHalf(float value) 0096 { 0097 #if HALF_ENABLE_F16C_INTRINSICS 0098 return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(value), _MM_FROUND_TO_NEAREST_INT)); 0099 #else 0100 std::uint32_t fbits; 0101 std::memcpy(&fbits, &value, sizeof(float)); 0102 0103 std::uint16_t sign = (fbits >> 16) & 0x8000; 0104 fbits &= 0x7FFFFFFF; 0105 if (fbits >= 0x7F800000) 0106 return sign | 0x7C00 | ((fbits > 0x7F800000) ? (0x200 | ((fbits >> 13) & 0x3FF)) : 0); 0107 if (fbits >= 0x47800000) 0108 return GetOverflowedValue(sign); 0109 if (fbits >= 0x38800000) 0110 return GetRoundedValue(sign | (((fbits >> 23) - 112) << 10) | ((fbits >> 13) & 0x3FF), (fbits >> 12) & 1, 0111 (fbits & 0xFFF) != 0); 0112 if (fbits >= 0x33000000) { 0113 int i = 125 - (fbits >> 23); 0114 fbits = (fbits & 0x7FFFFF) | 0x800000; 0115 return GetRoundedValue(sign | (fbits >> (i + 1)), (fbits >> i) & 1, 0116 (fbits & ((static_cast<std::uint32_t>(1) << i) - 1)) != 0); 0117 } 0118 0119 return sign; 0120 #endif 0121 } 0122 0123 //////////////////////////////////////////////////////////////////////////////// 0124 /// \brief Convert an IEEE half-precision float to single-precision. 0125 /// 0126 /// Credit for this goes to [Jeroen van der Zijp](http://fox-toolkit.org/ftp/fasthalffloatconversion.pdf). 0127 /// 0128 /// \param[in] value The half-precision value to convert 0129 /// 0130 /// \return The converted single-precision value 0131 inline float HalfToFloat(std::uint16_t value) 0132 { 0133 #if HALF_ENABLE_F16C_INTRINSICS 0134 return _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(value))); 0135 #else 0136 std::uint32_t fbits = static_cast<std::uint32_t>(value & 0x8000) << 16; 0137 int abs = value & 0x7FFF; 0138 if (abs) { 0139 fbits |= 0x38000000 << static_cast<unsigned>(abs >= 0x7C00); 0140 for (; abs < 0x400; abs <<= 1, fbits -= 0x800000) 0141 ; 0142 fbits += static_cast<std::uint32_t>(abs) << 13; 0143 } 0144 float out; 0145 std::memcpy(&out, &fbits, sizeof(float)); 0146 return out; 0147 #endif 0148 } 0149 } // namespace Internal 0150 } // namespace ROOT 0151 0152 #endif // ROOT_RFloat16
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |