![]() |
|
|||
File indexing completed on 2025-02-21 10:16:43
0001 /* 0002 * vdtcore_common.h 0003 * Common functions for the vdt routines. 0004 * The basic idea is to exploit Pade polynomials. 0005 * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier 0006 * moshier@na-net.ornl.gov) as well as actual code for the exp, log, sin, cos, 0007 * tan, asin, acos and atan functions. The Cephes library can be found here: 0008 * http://www.netlib.org/cephes/ 0009 * 0010 * Created on: Jun 23, 2012 0011 * Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente 0012 */ 0013 0014 /* 0015 * VDT is free software: you can redistribute it and/or modify 0016 * it under the terms of the GNU Lesser Public License as published by 0017 * the Free Software Foundation, either version 3 of the License, or 0018 * (at your option) any later version. 0019 * 0020 * This program is distributed in the hope that it will be useful, 0021 * but WITHOUT ANY WARRANTY; without even the implied warranty of 0022 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 0023 * GNU Lesser Public License for more details. 0024 * 0025 * You should have received a copy of the GNU Lesser Public License 0026 * along with this program. If not, see <http://www.gnu.org/licenses/>. 0027 */ 0028 0029 #ifndef VDTCOMMON_H_ 0030 #define VDTCOMMON_H_ 0031 0032 #include "inttypes.h" 0033 #include <cmath> 0034 0035 namespace vdt{ 0036 0037 namespace details{ 0038 0039 // Constants 0040 const double TWOPI = 2.*M_PI; 0041 const double PI = M_PI; 0042 const double PIO2 = M_PI_2; 0043 const double PIO4 = M_PI_4; 0044 const double ONEOPIO4 = 4./M_PI; 0045 0046 const float TWOPIF = 2.*M_PI; 0047 const float PIF = M_PI; 0048 const float PIO2F = M_PI_2; 0049 const float PIO4F = M_PI_4; 0050 const float ONEOPIO4F = 4./M_PI; 0051 0052 const double MOREBITS = 6.123233995736765886130E-17; 0053 0054 0055 const float MAXNUMF = 3.4028234663852885981170418348451692544e38f; 0056 0057 //------------------------------------------------------------------------------ 0058 0059 /// Used to switch between different type of interpretations of the data (64 bits) 0060 union ieee754{ 0061 inline ieee754 () {}; 0062 inline ieee754 (double thed) {d=thed;}; 0063 inline ieee754 (uint64_t thell) {ll=thell;}; 0064 inline ieee754 (float thef) {f[0]=thef;}; 0065 inline ieee754 (uint32_t thei) {i[0]=thei;}; 0066 double d; 0067 float f[2]; 0068 uint32_t i[2]; 0069 uint64_t ll; 0070 uint16_t s[4]; 0071 }; 0072 0073 //------------------------------------------------------------------------------ 0074 0075 /// Converts an unsigned long long to a double 0076 inline double uint642dp(uint64_t ll) { 0077 ieee754 tmp; 0078 tmp.ll=ll; 0079 return tmp.d; 0080 } 0081 0082 //------------------------------------------------------------------------------ 0083 0084 /// Converts a double to an unsigned long long 0085 inline uint64_t dp2uint64(double x) { 0086 ieee754 tmp; 0087 tmp.d=x; 0088 return tmp.ll; 0089 } 0090 0091 //------------------------------------------------------------------------------ 0092 /// Makes an AND of a double and a unsigned long long 0093 inline double dpANDuint64(const double x, const uint64_t i ){ 0094 return uint642dp(dp2uint64(x) & i); 0095 } 0096 //------------------------------------------------------------------------------ 0097 /// Makes an OR of a double and a unsigned long long 0098 inline double dpORuint64(const double x, const uint64_t i ){ 0099 return uint642dp(dp2uint64(x) | i); 0100 } 0101 0102 /// Makes a XOR of a double and a unsigned long long 0103 inline double dpXORuint64(const double x, const uint64_t i ){ 0104 return uint642dp(dp2uint64(x) ^ i); 0105 } 0106 0107 //------------------------------------------------------------------------------ 0108 inline uint64_t getSignMask(const double x){ 0109 const uint64_t mask=0x8000000000000000ULL; 0110 return dp2uint64(x) & mask; 0111 } 0112 0113 //------------------------------------------------------------------------------ 0114 /// Converts an int to a float 0115 inline float uint322sp(int x) { 0116 ieee754 tmp; 0117 tmp.i[0]=x; 0118 return tmp.f[0]; 0119 } 0120 0121 //------------------------------------------------------------------------------ 0122 /// Converts a float to an int 0123 inline uint32_t sp2uint32(float x) { 0124 ieee754 tmp; 0125 tmp.f[0]=x; 0126 return tmp.i[0]; 0127 } 0128 0129 //------------------------------------------------------------------------------ 0130 /// Makes an AND of a float and a unsigned long 0131 inline float spANDuint32(const float x, const uint32_t i ){ 0132 return uint322sp(sp2uint32(x) & i); 0133 } 0134 //------------------------------------------------------------------------------ 0135 /// Makes an OR of a float and a unsigned long 0136 inline float spORuint32(const float x, const uint32_t i ){ 0137 return uint322sp(sp2uint32(x) | i); 0138 } 0139 0140 //------------------------------------------------------------------------------ 0141 /// Makes an OR of a float and a unsigned long 0142 inline float spXORuint32(const float x, const uint32_t i ){ 0143 return uint322sp(sp2uint32(x) ^ i); 0144 } 0145 //------------------------------------------------------------------------------ 0146 /// Get the sign mask 0147 inline uint32_t getSignMask(const float x){ 0148 const uint32_t mask=0x80000000; 0149 return sp2uint32(x) & mask; 0150 } 0151 0152 //------------------------------------------------------------------------------ 0153 /// Like frexp but vectorising and the exponent is a double. 0154 inline double getMantExponent(const double x, double & fe){ 0155 0156 uint64_t n = dp2uint64(x); 0157 0158 // Shift to the right up to the beginning of the exponent. 0159 // Then with a mask, cut off the sign bit 0160 uint64_t le = (n >> 52); 0161 0162 // chop the head of the number: an int contains more than 11 bits (32) 0163 int32_t e = le; // This is important since sums on uint64_t do not vectorise 0164 fe = e-1023 ; 0165 0166 // This puts to 11 zeroes the exponent 0167 n &=0x800FFFFFFFFFFFFFULL; 0168 // build a mask which is 0.5, i.e. an exponent equal to 1022 0169 // which means *2, see the above +1. 0170 const uint64_t p05 = 0x3FE0000000000000ULL; //dp2uint64(0.5); 0171 n |= p05; 0172 0173 return uint642dp(n); 0174 } 0175 0176 //------------------------------------------------------------------------------ 0177 /// Like frexp but vectorising and the exponent is a float. 0178 inline float getMantExponentf(const float x, float & fe){ 0179 0180 uint32_t n = sp2uint32(x); 0181 int32_t e = (n >> 23)-127; 0182 fe = e; 0183 0184 // fractional part 0185 const uint32_t p05f = 0x3f000000; // //sp2uint32(0.5); 0186 n &= 0x807fffff;// ~0x7f800000; 0187 n |= p05f; 0188 0189 return uint322sp(n); 0190 0191 } 0192 0193 //------------------------------------------------------------------------------ 0194 /// Converts a fp to an int 0195 inline uint32_t fp2uint(float x) { 0196 return sp2uint32(x); 0197 } 0198 /// Converts a fp to an int 0199 inline uint64_t fp2uint(double x) { 0200 return dp2uint64(x); 0201 } 0202 /// Converts an int to fp 0203 inline float int2fp(uint32_t i) { 0204 return uint322sp(i); 0205 } 0206 /// Converts an int to fp 0207 inline double int2fp(uint64_t i) { 0208 return uint642dp(i); 0209 } 0210 0211 //------------------------------------------------------------------------------ 0212 /** 0213 * A vectorisable floor implementation, not only triggered by fast-math. 0214 * These functions do not distinguish between -0.0 and 0.0, so are not IEC6509 0215 * compliant for argument -0.0 0216 **/ 0217 inline double fpfloor(const double x){ 0218 // no problem since exp is defined between -708 and 708. Int is enough for it! 0219 int32_t ret = int32_t (x); 0220 ret-=(sp2uint32(x)>>31); 0221 return ret; 0222 0223 } 0224 //------------------------------------------------------------------------------ 0225 /** 0226 * A vectorisable floor implementation, not only triggered by fast-math. 0227 * These functions do not distinguish between -0.0 and 0.0, so are not IEC6509 0228 * compliant for argument -0.0 0229 **/ 0230 inline float fpfloor(const float x){ 0231 int32_t ret = int32_t (x); 0232 ret-=(sp2uint32(x)>>31); 0233 return ret; 0234 0235 } 0236 0237 //------------------------------------------------------------------------------ 0238 0239 0240 0241 0242 } 0243 0244 } // end of namespace vdt 0245 0246 #endif /* VDTCOMMON_H_ */
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |