include/vdt/vdtcore_common.h

0001 /*
0002  * vdtcore_common.h
0003  * Common functions for the vdt routines.
0004  * The basic idea is to exploit Pade polynomials.
0005  * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier
0006  * moshier@na-net.ornl.gov) as well as actual code for the exp, log, sin, cos,
0007  * tan, asin, acos and atan functions. The Cephes library can be found here:
0008  * http://www.netlib.org/cephes/
0009  *
0010  *  Created on: Jun 23, 2012
0011  *      Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
0012  */
0013
0014 /*
0015  * VDT is free software: you can redistribute it and/or modify
0016  * it under the terms of the GNU Lesser Public License as published by
0017  * the Free Software Foundation, either version 3 of the License, or
0018  * (at your option) any later version.
0019  *
0020  * This program is distributed in the hope that it will be useful,
0021  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0022  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0023  * GNU Lesser Public License for more details.
0024  *
0025  * You should have received a copy of the GNU Lesser Public License
0026  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
0027  */
0028
0029 #ifndef VDTCOMMON_H_
0030 #define VDTCOMMON_H_
0031
0032 #include "inttypes.h"
0033 #include <cmath>
0034
0035 namespace vdt{
0036
0037 namespace details{
0038
0039 // Constants
0040 const double TWOPI = 2.*M_PI;
0041 const double PI = M_PI;
0042 const double PIO2 = M_PI_2;
0043 const double PIO4 = M_PI_4;
0044 const double ONEOPIO4 = 4./M_PI;
0045
0046 const float TWOPIF = 2.*M_PI;
0047 const float PIF = M_PI;
0048 const float PIO2F = M_PI_2;
0049 const float PIO4F = M_PI_4;
0050 const float ONEOPIO4F = 4./M_PI;
0051
0052 const double MOREBITS = 6.123233995736765886130E-17;
0053
0054
0055 const float MAXNUMF = 3.4028234663852885981170418348451692544e38f;
0056
0057 //------------------------------------------------------------------------------
0058
0059 /// Used to switch between different type of interpretations of the data (64 bits)
0060 union ieee754{
0061     inline ieee754 () {};
0062     inline ieee754 (double thed) {d=thed;};
0063     inline ieee754 (uint64_t thell) {ll=thell;};
0064     inline ieee754 (float thef) {f[0]=thef;};
0065     inline ieee754 (uint32_t thei) {i[0]=thei;};
0066   double d;
0067   float f[2];
0068   uint32_t i[2];
0069   uint64_t ll;
0070   uint16_t s[4];
0071 };
0072
0073 //------------------------------------------------------------------------------
0074
0075 /// Converts an unsigned long long to a double
0076 inline double uint642dp(uint64_t ll) {
0077   ieee754 tmp;
0078   tmp.ll=ll;
0079   return tmp.d;
0080 }
0081
0082 //------------------------------------------------------------------------------
0083
0084 /// Converts a double to an unsigned long long
0085 inline uint64_t dp2uint64(double x) {
0086   ieee754 tmp;
0087   tmp.d=x;
0088   return tmp.ll;
0089 }
0090
0091 //------------------------------------------------------------------------------
0092 /// Makes an AND of a double and a unsigned long long
0093 inline double dpANDuint64(const double x, const uint64_t i ){
0094   return uint642dp(dp2uint64(x) & i);
0095 }
0096 //------------------------------------------------------------------------------
0097 /// Makes an OR of a double and a unsigned long long
0098 inline double dpORuint64(const double x, const uint64_t i ){
0099   return uint642dp(dp2uint64(x) | i);
0100 }
0101
0102 /// Makes a XOR of a double and a unsigned long long
0103 inline double dpXORuint64(const double x, const uint64_t i ){
0104   return uint642dp(dp2uint64(x) ^ i);
0105 }
0106
0107 //------------------------------------------------------------------------------
0108 inline uint64_t getSignMask(const double x){
0109   const uint64_t mask=0x8000000000000000ULL;
0110   return dp2uint64(x) & mask;
0111 }
0112
0113 //------------------------------------------------------------------------------
0114 /// Converts an int to a float
0115 inline float uint322sp(int x) {
0116     ieee754 tmp;
0117     tmp.i[0]=x;
0118     return tmp.f[0];
0119   }
0120
0121 //------------------------------------------------------------------------------
0122 /// Converts a float to an int
0123 inline uint32_t sp2uint32(float x) {
0124     ieee754 tmp;
0125     tmp.f[0]=x;
0126     return tmp.i[0];
0127   }
0128
0129 //------------------------------------------------------------------------------
0130 /// Makes an AND of a float and a unsigned long
0131 inline float spANDuint32(const float x, const uint32_t i ){
0132   return uint322sp(sp2uint32(x) & i);
0133 }
0134 //------------------------------------------------------------------------------
0135 /// Makes an OR of a float and a unsigned long
0136 inline float spORuint32(const float x, const uint32_t i ){
0137   return uint322sp(sp2uint32(x) | i);
0138 }
0139
0140 //------------------------------------------------------------------------------
0141 /// Makes an OR of a float and a unsigned long
0142 inline float spXORuint32(const float x, const uint32_t i ){
0143   return uint322sp(sp2uint32(x) ^ i);
0144 }
0145 //------------------------------------------------------------------------------
0146 /// Get the sign mask
0147 inline uint32_t getSignMask(const float x){
0148   const uint32_t mask=0x80000000;
0149   return sp2uint32(x) & mask;
0150 }
0151
0152 //------------------------------------------------------------------------------
0153 /// Like frexp but vectorising and the exponent is a double.
0154 inline double getMantExponent(const double x, double & fe){
0155
0156   uint64_t n = dp2uint64(x);
0157
0158   // Shift to the right up to the beginning of the exponent.
0159   // Then with a mask, cut off the sign bit
0160   uint64_t le = (n >> 52);
0161
0162   // chop the head of the number: an int contains more than 11 bits (32)
0163   int32_t e = le; // This is important since sums on uint64_t do not vectorise
0164   fe = e-1023 ;
0165
0166   // This puts to 11 zeroes the exponent
0167   n &=0x800FFFFFFFFFFFFFULL;
0168   // build a mask which is 0.5, i.e. an exponent equal to 1022
0169   // which means *2, see the above +1.
0170   const uint64_t p05 = 0x3FE0000000000000ULL; //dp2uint64(0.5);
0171   n |= p05;
0172
0173   return uint642dp(n);
0174 }
0175
0176 //------------------------------------------------------------------------------
0177 /// Like frexp but vectorising and the exponent is a float.
0178 inline float getMantExponentf(const float x, float & fe){
0179
0180     uint32_t n = sp2uint32(x);
0181     int32_t e = (n >> 23)-127;
0182     fe = e;
0183
0184     // fractional part
0185     const uint32_t p05f = 0x3f000000; // //sp2uint32(0.5);
0186     n &= 0x807fffff;// ~0x7f800000;
0187     n |= p05f;
0188
0189     return uint322sp(n);
0190
0191 }
0192
0193 //------------------------------------------------------------------------------
0194 /// Converts a fp to an int
0195 inline uint32_t fp2uint(float x) {
0196     return sp2uint32(x);
0197   }
0198 /// Converts a fp to an int
0199 inline uint64_t fp2uint(double x) {
0200     return dp2uint64(x);
0201   }
0202 /// Converts an int to fp
0203 inline float int2fp(uint32_t i) {
0204     return uint322sp(i);
0205   }
0206 /// Converts an int to fp
0207 inline double int2fp(uint64_t i) {
0208     return uint642dp(i);
0209   }
0210
0211 //------------------------------------------------------------------------------
0212 /**
0213  * A vectorisable floor implementation, not only triggered by fast-math.
0214  * These functions do not distinguish between -0.0 and 0.0, so are not IEC6509
0215  * compliant for argument -0.0
0216 **/
0217 inline double fpfloor(const double x){
0218   // no problem since exp is defined between -708 and 708. Int is enough for it!
0219   int32_t ret = int32_t (x);
0220   ret-=(sp2uint32(x)>>31);
0221   return ret;
0222
0223 }
0224 //------------------------------------------------------------------------------
0225 /**
0226  * A vectorisable floor implementation, not only triggered by fast-math.
0227  * These functions do not distinguish between -0.0 and 0.0, so are not IEC6509
0228  * compliant for argument -0.0
0229 **/
0230 inline float fpfloor(const float x){
0231   int32_t ret = int32_t (x);
0232   ret-=(sp2uint32(x)>>31);
0233   return ret;
0234
0235 }
0236
0237 //------------------------------------------------------------------------------
0238
0239
0240
0241
0242 }
0243
0244 } // end of namespace vdt
0245
0246 #endif /* VDTCOMMON_H_ */