|
|
|||
File indexing completed on 2025-12-16 10:14:57
0001 #pragma once 0002 #ifndef FP16_FP16_H 0003 #define FP16_FP16_H 0004 0005 #if defined(__cplusplus) && (__cplusplus >= 201103L) 0006 #include <cstdint> 0007 #include <cmath> 0008 #elif !defined(__OPENCL_VERSION__) 0009 #include <stdint.h> 0010 #include <math.h> 0011 #endif 0012 0013 #ifdef _MSC_VER 0014 #include <intrin.h> 0015 #endif 0016 0017 #include <fp16/bitcasts.h> 0018 0019 0020 /* 0021 * Convert a 16-bit floating-point number in IEEE half-precision format, in bit representation, to 0022 * a 32-bit floating-point number in IEEE single-precision format, in bit representation. 0023 * 0024 * @note The implementation doesn't use any floating-point operations. 0025 */ 0026 static inline uint32_t fp16_ieee_to_fp32_bits(uint16_t h) { 0027 /* 0028 * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word: 0029 * +---+-----+------------+-------------------+ 0030 * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| 0031 * +---+-----+------------+-------------------+ 0032 * Bits 31 26-30 16-25 0-15 0033 * 0034 * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits. 0035 */ 0036 const uint32_t w = (uint32_t) h << 16; 0037 /* 0038 * Extract the sign of the input number into the high bit of the 32-bit word: 0039 * 0040 * +---+----------------------------------+ 0041 * | S |0000000 00000000 00000000 00000000| 0042 * +---+----------------------------------+ 0043 * Bits 31 0-31 0044 */ 0045 const uint32_t sign = w & UINT32_C(0x80000000); 0046 /* 0047 * Extract mantissa and biased exponent of the input number into the bits 0-30 of the 32-bit word: 0048 * 0049 * +---+-----+------------+-------------------+ 0050 * | 0 |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| 0051 * +---+-----+------------+-------------------+ 0052 * Bits 30 27-31 17-26 0-16 0053 */ 0054 const uint32_t nonsign = w & UINT32_C(0x7FFFFFFF); 0055 /* 0056 * Renorm shift is the number of bits to shift mantissa left to make the half-precision number normalized. 0057 * If the initial number is normalized, some of its high 6 bits (sign == 0 and 5-bit exponent) equals one. 0058 * In this case renorm_shift == 0. If the number is denormalize, renorm_shift > 0. Note that if we shift 0059 * denormalized nonsign by renorm_shift, the unit bit of mantissa will shift into exponent, turning the 0060 * biased exponent into 1, and making mantissa normalized (i.e. without leading 1). 0061 */ 0062 #ifdef _MSC_VER 0063 unsigned long nonsign_bsr; 0064 _BitScanReverse(&nonsign_bsr, (unsigned long) nonsign); 0065 uint32_t renorm_shift = (uint32_t) nonsign_bsr ^ 31; 0066 #else 0067 uint32_t renorm_shift = __builtin_clz(nonsign); 0068 #endif 0069 renorm_shift = renorm_shift > 5 ? renorm_shift - 5 : 0; 0070 /* 0071 * Iff half-precision number has exponent of 15, the addition overflows it into bit 31, 0072 * and the subsequent shift turns the high 9 bits into 1. Thus 0073 * inf_nan_mask == 0074 * 0x7F800000 if the half-precision number had exponent of 15 (i.e. was NaN or infinity) 0075 * 0x00000000 otherwise 0076 */ 0077 const int32_t inf_nan_mask = ((int32_t) (nonsign + 0x04000000) >> 8) & INT32_C(0x7F800000); 0078 /* 0079 * Iff nonsign is 0, it overflows into 0xFFFFFFFF, turning bit 31 into 1. Otherwise, bit 31 remains 0. 0080 * The signed shift right by 31 broadcasts bit 31 into all bits of the zero_mask. Thus 0081 * zero_mask == 0082 * 0xFFFFFFFF if the half-precision number was zero (+0.0h or -0.0h) 0083 * 0x00000000 otherwise 0084 */ 0085 const int32_t zero_mask = (int32_t) (nonsign - 1) >> 31; 0086 /* 0087 * 1. Shift nonsign left by renorm_shift to normalize it (if the input was denormal) 0088 * 2. Shift nonsign right by 3 so the exponent (5 bits originally) becomes an 8-bit field and 10-bit mantissa 0089 * shifts into the 10 high bits of the 23-bit mantissa of IEEE single-precision number. 0090 * 3. Add 0x70 to the exponent (starting at bit 23) to compensate the different in exponent bias 0091 * (0x7F for single-precision number less 0xF for half-precision number). 0092 * 4. Subtract renorm_shift from the exponent (starting at bit 23) to account for renormalization. As renorm_shift 0093 * is less than 0x70, this can be combined with step 3. 0094 * 5. Binary OR with inf_nan_mask to turn the exponent into 0xFF if the input was NaN or infinity. 0095 * 6. Binary ANDNOT with zero_mask to turn the mantissa and exponent into zero if the input was zero. 0096 * 7. Combine with the sign of the input number. 0097 */ 0098 return sign | ((((nonsign << renorm_shift >> 3) + ((0x70 - renorm_shift) << 23)) | inf_nan_mask) & ~zero_mask); 0099 } 0100 0101 /* 0102 * Convert a 16-bit floating-point number in IEEE half-precision format, in bit representation, to 0103 * a 32-bit floating-point number in IEEE single-precision format. 0104 * 0105 * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals) 0106 * floating-point operations and bitcasts between integer and floating-point variables. 0107 */ 0108 static inline float fp16_ieee_to_fp32_value(uint16_t h) { 0109 /* 0110 * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word: 0111 * +---+-----+------------+-------------------+ 0112 * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| 0113 * +---+-----+------------+-------------------+ 0114 * Bits 31 26-30 16-25 0-15 0115 * 0116 * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits. 0117 */ 0118 const uint32_t w = (uint32_t) h << 16; 0119 /* 0120 * Extract the sign of the input number into the high bit of the 32-bit word: 0121 * 0122 * +---+----------------------------------+ 0123 * | S |0000000 00000000 00000000 00000000| 0124 * +---+----------------------------------+ 0125 * Bits 31 0-31 0126 */ 0127 const uint32_t sign = w & UINT32_C(0x80000000); 0128 /* 0129 * Extract mantissa and biased exponent of the input number into the high bits of the 32-bit word: 0130 * 0131 * +-----+------------+---------------------+ 0132 * |EEEEE|MM MMMM MMMM|0 0000 0000 0000 0000| 0133 * +-----+------------+---------------------+ 0134 * Bits 27-31 17-26 0-16 0135 */ 0136 const uint32_t two_w = w + w; 0137 0138 /* 0139 * Shift mantissa and exponent into bits 23-28 and bits 13-22 so they become mantissa and exponent 0140 * of a single-precision floating-point number: 0141 * 0142 * S|Exponent | Mantissa 0143 * +-+---+-----+------------+----------------+ 0144 * |0|000|EEEEE|MM MMMM MMMM|0 0000 0000 0000| 0145 * +-+---+-----+------------+----------------+ 0146 * Bits | 23-31 | 0-22 0147 * 0148 * Next, there are some adjustments to the exponent: 0149 * - The exponent needs to be corrected by the difference in exponent bias between single-precision and half-precision 0150 * formats (0x7F - 0xF = 0x70) 0151 * - Inf and NaN values in the inputs should become Inf and NaN values after conversion to the single-precision number. 0152 * Therefore, if the biased exponent of the half-precision input was 0x1F (max possible value), the biased exponent 0153 * of the single-precision output must be 0xFF (max possible value). We do this correction in two steps: 0154 * - First, we adjust the exponent by (0xFF - 0x1F) = 0xE0 (see exp_offset below) rather than by 0x70 suggested 0155 * by the difference in the exponent bias (see above). 0156 * - Then we multiply the single-precision result of exponent adjustment by 2**(-112) to reverse the effect of 0157 * exponent adjustment by 0xE0 less the necessary exponent adjustment by 0x70 due to difference in exponent bias. 0158 * The floating-point multiplication hardware would ensure than Inf and NaN would retain their value on at least 0159 * partially IEEE754-compliant implementations. 0160 * 0161 * Note that the above operations do not handle denormal inputs (where biased exponent == 0). However, they also do not 0162 * operate on denormal inputs, and do not produce denormal results. 0163 */ 0164 const uint32_t exp_offset = UINT32_C(0xE0) << 23; 0165 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__) 0166 const float exp_scale = 0x1.0p-112f; 0167 #else 0168 const float exp_scale = fp32_from_bits(UINT32_C(0x7800000)); 0169 #endif 0170 const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale; 0171 0172 /* 0173 * Convert denormalized half-precision inputs into single-precision results (always normalized). 0174 * Zero inputs are also handled here. 0175 * 0176 * In a denormalized number the biased exponent is zero, and mantissa has on-zero bits. 0177 * First, we shift mantissa into bits 0-9 of the 32-bit word. 0178 * 0179 * zeros | mantissa 0180 * +---------------------------+------------+ 0181 * |0000 0000 0000 0000 0000 00|MM MMMM MMMM| 0182 * +---------------------------+------------+ 0183 * Bits 10-31 0-9 0184 * 0185 * Now, remember that denormalized half-precision numbers are represented as: 0186 * FP16 = mantissa * 2**(-24). 0187 * The trick is to construct a normalized single-precision number with the same mantissa and thehalf-precision input 0188 * and with an exponent which would scale the corresponding mantissa bits to 2**(-24). 0189 * A normalized single-precision floating-point number is represented as: 0190 * FP32 = (1 + mantissa * 2**(-23)) * 2**(exponent - 127) 0191 * Therefore, when the biased exponent is 126, a unit change in the mantissa of the input denormalized half-precision 0192 * number causes a change of the constructud single-precision number by 2**(-24), i.e. the same ammount. 0193 * 0194 * The last step is to adjust the bias of the constructed single-precision number. When the input half-precision number 0195 * is zero, the constructed single-precision number has the value of 0196 * FP32 = 1 * 2**(126 - 127) = 2**(-1) = 0.5 0197 * Therefore, we need to subtract 0.5 from the constructed single-precision number to get the numerical equivalent of 0198 * the input half-precision number. 0199 */ 0200 const uint32_t magic_mask = UINT32_C(126) << 23; 0201 const float magic_bias = 0.5f; 0202 const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias; 0203 0204 /* 0205 * - Choose either results of conversion of input as a normalized number, or as a denormalized number, depending on the 0206 * input exponent. The variable two_w contains input exponent in bits 27-31, therefore if its smaller than 2**27, the 0207 * input is either a denormal number, or zero. 0208 * - Combine the result of conversion of exponent and mantissa with the sign of the input number. 0209 */ 0210 const uint32_t denormalized_cutoff = UINT32_C(1) << 27; 0211 const uint32_t result = sign | 0212 (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value)); 0213 return fp32_from_bits(result); 0214 } 0215 0216 /* 0217 * Convert a 32-bit floating-point number in IEEE single-precision format to a 16-bit floating-point number in 0218 * IEEE half-precision format, in bit representation. 0219 * 0220 * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals) 0221 * floating-point operations and bitcasts between integer and floating-point variables. 0222 */ 0223 static inline uint16_t fp16_ieee_from_fp32_value(float f) { 0224 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__) 0225 const float scale_to_inf = 0x1.0p+112f; 0226 const float scale_to_zero = 0x1.0p-110f; 0227 #else 0228 const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000)); 0229 const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000)); 0230 #endif 0231 float base = (fabsf(f) * scale_to_inf) * scale_to_zero; 0232 0233 const uint32_t w = fp32_to_bits(f); 0234 const uint32_t shl1_w = w + w; 0235 const uint32_t sign = w & UINT32_C(0x80000000); 0236 uint32_t bias = shl1_w & UINT32_C(0xFF000000); 0237 if (bias < UINT32_C(0x71000000)) { 0238 bias = UINT32_C(0x71000000); 0239 } 0240 0241 base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base; 0242 const uint32_t bits = fp32_to_bits(base); 0243 const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00); 0244 const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF); 0245 const uint32_t nonsign = exp_bits + mantissa_bits; 0246 return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign); 0247 } 0248 0249 /* 0250 * Convert a 16-bit floating-point number in ARM alternative half-precision format, in bit representation, to 0251 * a 32-bit floating-point number in IEEE single-precision format, in bit representation. 0252 * 0253 * @note The implementation doesn't use any floating-point operations. 0254 */ 0255 static inline uint32_t fp16_alt_to_fp32_bits(uint16_t h) { 0256 /* 0257 * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word: 0258 * +---+-----+------------+-------------------+ 0259 * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| 0260 * +---+-----+------------+-------------------+ 0261 * Bits 31 26-30 16-25 0-15 0262 * 0263 * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits. 0264 */ 0265 const uint32_t w = (uint32_t) h << 16; 0266 /* 0267 * Extract the sign of the input number into the high bit of the 32-bit word: 0268 * 0269 * +---+----------------------------------+ 0270 * | S |0000000 00000000 00000000 00000000| 0271 * +---+----------------------------------+ 0272 * Bits 31 0-31 0273 */ 0274 const uint32_t sign = w & UINT32_C(0x80000000); 0275 /* 0276 * Extract mantissa and biased exponent of the input number into the bits 0-30 of the 32-bit word: 0277 * 0278 * +---+-----+------------+-------------------+ 0279 * | 0 |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| 0280 * +---+-----+------------+-------------------+ 0281 * Bits 30 27-31 17-26 0-16 0282 */ 0283 const uint32_t nonsign = w & UINT32_C(0x7FFFFFFF); 0284 /* 0285 * Renorm shift is the number of bits to shift mantissa left to make the half-precision number normalized. 0286 * If the initial number is normalized, some of its high 6 bits (sign == 0 and 5-bit exponent) equals one. 0287 * In this case renorm_shift == 0. If the number is denormalize, renorm_shift > 0. Note that if we shift 0288 * denormalized nonsign by renorm_shift, the unit bit of mantissa will shift into exponent, turning the 0289 * biased exponent into 1, and making mantissa normalized (i.e. without leading 1). 0290 */ 0291 #ifdef _MSC_VER 0292 unsigned long nonsign_bsr; 0293 _BitScanReverse(&nonsign_bsr, (unsigned long) nonsign); 0294 uint32_t renorm_shift = (uint32_t) nonsign_bsr ^ 31; 0295 #else 0296 uint32_t renorm_shift = __builtin_clz(nonsign); 0297 #endif 0298 renorm_shift = renorm_shift > 5 ? renorm_shift - 5 : 0; 0299 /* 0300 * Iff nonsign is 0, it overflows into 0xFFFFFFFF, turning bit 31 into 1. Otherwise, bit 31 remains 0. 0301 * The signed shift right by 31 broadcasts bit 31 into all bits of the zero_mask. Thus 0302 * zero_mask == 0303 * 0xFFFFFFFF if the half-precision number was zero (+0.0h or -0.0h) 0304 * 0x00000000 otherwise 0305 */ 0306 const int32_t zero_mask = (int32_t) (nonsign - 1) >> 31; 0307 /* 0308 * 1. Shift nonsign left by renorm_shift to normalize it (if the input was denormal) 0309 * 2. Shift nonsign right by 3 so the exponent (5 bits originally) becomes an 8-bit field and 10-bit mantissa 0310 * shifts into the 10 high bits of the 23-bit mantissa of IEEE single-precision number. 0311 * 3. Add 0x70 to the exponent (starting at bit 23) to compensate the different in exponent bias 0312 * (0x7F for single-precision number less 0xF for half-precision number). 0313 * 4. Subtract renorm_shift from the exponent (starting at bit 23) to account for renormalization. As renorm_shift 0314 * is less than 0x70, this can be combined with step 3. 0315 * 5. Binary ANDNOT with zero_mask to turn the mantissa and exponent into zero if the input was zero. 0316 * 6. Combine with the sign of the input number. 0317 */ 0318 return sign | (((nonsign << renorm_shift >> 3) + ((0x70 - renorm_shift) << 23)) & ~zero_mask); 0319 } 0320 0321 /* 0322 * Convert a 16-bit floating-point number in ARM alternative half-precision format, in bit representation, to 0323 * a 32-bit floating-point number in IEEE single-precision format. 0324 * 0325 * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals) 0326 * floating-point operations and bitcasts between integer and floating-point variables. 0327 */ 0328 static inline float fp16_alt_to_fp32_value(uint16_t h) { 0329 /* 0330 * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word: 0331 * +---+-----+------------+-------------------+ 0332 * | S |EEEEE|MM MMMM MMMM|0000 0000 0000 0000| 0333 * +---+-----+------------+-------------------+ 0334 * Bits 31 26-30 16-25 0-15 0335 * 0336 * S - sign bit, E - bits of the biased exponent, M - bits of the mantissa, 0 - zero bits. 0337 */ 0338 const uint32_t w = (uint32_t) h << 16; 0339 /* 0340 * Extract the sign of the input number into the high bit of the 32-bit word: 0341 * 0342 * +---+----------------------------------+ 0343 * | S |0000000 00000000 00000000 00000000| 0344 * +---+----------------------------------+ 0345 * Bits 31 0-31 0346 */ 0347 const uint32_t sign = w & UINT32_C(0x80000000); 0348 /* 0349 * Extract mantissa and biased exponent of the input number into the high bits of the 32-bit word: 0350 * 0351 * +-----+------------+---------------------+ 0352 * |EEEEE|MM MMMM MMMM|0 0000 0000 0000 0000| 0353 * +-----+------------+---------------------+ 0354 * Bits 27-31 17-26 0-16 0355 */ 0356 const uint32_t two_w = w + w; 0357 0358 /* 0359 * Shift mantissa and exponent into bits 23-28 and bits 13-22 so they become mantissa and exponent 0360 * of a single-precision floating-point number: 0361 * 0362 * S|Exponent | Mantissa 0363 * +-+---+-----+------------+----------------+ 0364 * |0|000|EEEEE|MM MMMM MMMM|0 0000 0000 0000| 0365 * +-+---+-----+------------+----------------+ 0366 * Bits | 23-31 | 0-22 0367 * 0368 * Next, the exponent is adjusted for the difference in exponent bias between single-precision and half-precision 0369 * formats (0x7F - 0xF = 0x70). This operation never overflows or generates non-finite values, as the largest 0370 * half-precision exponent is 0x1F and after the adjustment is can not exceed 0x8F < 0xFE (largest single-precision 0371 * exponent for non-finite values). 0372 * 0373 * Note that this operation does not handle denormal inputs (where biased exponent == 0). However, they also do not 0374 * operate on denormal inputs, and do not produce denormal results. 0375 */ 0376 const uint32_t exp_offset = UINT32_C(0x70) << 23; 0377 const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset); 0378 0379 /* 0380 * Convert denormalized half-precision inputs into single-precision results (always normalized). 0381 * Zero inputs are also handled here. 0382 * 0383 * In a denormalized number the biased exponent is zero, and mantissa has on-zero bits. 0384 * First, we shift mantissa into bits 0-9 of the 32-bit word. 0385 * 0386 * zeros | mantissa 0387 * +---------------------------+------------+ 0388 * |0000 0000 0000 0000 0000 00|MM MMMM MMMM| 0389 * +---------------------------+------------+ 0390 * Bits 10-31 0-9 0391 * 0392 * Now, remember that denormalized half-precision numbers are represented as: 0393 * FP16 = mantissa * 2**(-24). 0394 * The trick is to construct a normalized single-precision number with the same mantissa and thehalf-precision input 0395 * and with an exponent which would scale the corresponding mantissa bits to 2**(-24). 0396 * A normalized single-precision floating-point number is represented as: 0397 * FP32 = (1 + mantissa * 2**(-23)) * 2**(exponent - 127) 0398 * Therefore, when the biased exponent is 126, a unit change in the mantissa of the input denormalized half-precision 0399 * number causes a change of the constructud single-precision number by 2**(-24), i.e. the same ammount. 0400 * 0401 * The last step is to adjust the bias of the constructed single-precision number. When the input half-precision number 0402 * is zero, the constructed single-precision number has the value of 0403 * FP32 = 1 * 2**(126 - 127) = 2**(-1) = 0.5 0404 * Therefore, we need to subtract 0.5 from the constructed single-precision number to get the numerical equivalent of 0405 * the input half-precision number. 0406 */ 0407 const uint32_t magic_mask = UINT32_C(126) << 23; 0408 const float magic_bias = 0.5f; 0409 const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias; 0410 0411 /* 0412 * - Choose either results of conversion of input as a normalized number, or as a denormalized number, depending on the 0413 * input exponent. The variable two_w contains input exponent in bits 27-31, therefore if its smaller than 2**27, the 0414 * input is either a denormal number, or zero. 0415 * - Combine the result of conversion of exponent and mantissa with the sign of the input number. 0416 */ 0417 const uint32_t denormalized_cutoff = UINT32_C(1) << 27; 0418 const uint32_t result = sign | 0419 (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value)); 0420 return fp32_from_bits(result); 0421 } 0422 0423 /* 0424 * Convert a 32-bit floating-point number in IEEE single-precision format to a 16-bit floating-point number in 0425 * ARM alternative half-precision format, in bit representation. 0426 * 0427 * @note The implementation relies on IEEE-like (no assumption about rounding mode and no operations on denormals) 0428 * floating-point operations and bitcasts between integer and floating-point variables. 0429 */ 0430 static inline uint16_t fp16_alt_from_fp32_value(float f) { 0431 const uint32_t w = fp32_to_bits(f); 0432 const uint32_t sign = w & UINT32_C(0x80000000); 0433 const uint32_t shl1_w = w + w; 0434 0435 const uint32_t shl1_max_fp16_fp32 = UINT32_C(0x8FFFC000); 0436 const uint32_t shl1_base = shl1_w > shl1_max_fp16_fp32 ? shl1_max_fp16_fp32 : shl1_w; 0437 uint32_t shl1_bias = shl1_base & UINT32_C(0xFF000000); 0438 const uint32_t exp_difference = 23 - 10; 0439 const uint32_t shl1_bias_min = (127 - 1 - exp_difference) << 24; 0440 if (shl1_bias < shl1_bias_min) { 0441 shl1_bias = shl1_bias_min; 0442 } 0443 0444 const float bias = fp32_from_bits((shl1_bias >> 1) + ((exp_difference + 2) << 23)); 0445 const float base = fp32_from_bits((shl1_base >> 1) + (2 << 23)) + bias; 0446 0447 const uint32_t exp_f = fp32_to_bits(base) >> 13; 0448 return (sign >> 16) | ((exp_f & UINT32_C(0x00007C00)) + (fp32_to_bits(base) & UINT32_C(0x00000FFF))); 0449 } 0450 0451 #endif /* FP16_FP16_H */
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|