include/vdt/tan.h

0001 /*
0002  * tan.h
0003  * The basic idea is to exploit Pade polynomials.
0004  * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier
0005  * moshier@na-net.ornl.gov) as well as actual code.
0006  * The Cephes library can be found here:  http://www.netlib.org/cephes/
0007  *
0008  *  Created on: Jun 23, 2012
0009  *      Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
0010  */
0011
0012 /*
0013  * VDT is free software: you can redistribute it and/or modify
0014  * it under the terms of the GNU Lesser Public License as published by
0015  * the Free Software Foundation, either version 3 of the License, or
0016  * (at your option) any later version.
0017  *
0018  * This program is distributed in the hope that it will be useful,
0019  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0020  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0021  * GNU Lesser Public License for more details.
0022  *
0023  * You should have received a copy of the GNU Lesser Public License
0024  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
0025  */
0026
0027 #ifndef TAN_H_
0028 #define TAN_H_
0029
0030 #include "vdtcore_common.h"
0031 #include "sincos.h"
0032
0033 namespace vdt{
0034
0035
0036 namespace details{
0037
0038 const double PX1tan=-1.30936939181383777646E4;
0039 const double PX2tan=1.15351664838587416140E6;
0040 const double PX3tan=-1.79565251976484877988E7;
0041
0042 const double QX1tan = 1.36812963470692954678E4;
0043 const double QX2tan = -1.32089234440210967447E6;
0044 const double QX3tan = 2.50083801823357915839E7;
0045 const double QX4tan = -5.38695755929454629881E7;
0046
0047 const double DP1tan = 7.853981554508209228515625E-1;
0048 const double DP2tan = 7.94662735614792836714E-9;
0049 const double DP3tan = 3.06161699786838294307E-17;
0050
0051 const float DP1Ftan = 0.78515625;
0052 const float DP2Ftan = 2.4187564849853515625e-4;
0053 const float DP3Ftan = 3.77489497744594108e-8;
0054
0055
0056 //------------------------------------------------------------------------------
0057 /// Reduce to -45 to 45
0058 inline double reduce2quadranttan(double x, int32_t& quad) {
0059
0060     x = fabs(x);
0061     quad = int( ONEOPIO4 * x ); // always positive, so (int) == std::floor
0062     quad = (quad+1) & (~1);
0063     const double y = quad;
0064     // Extended precision modular arithmetic
0065     return ((x - y * DP1tan) - y * DP2tan) - y * DP3tan;
0066   }
0067
0068 //------------------------------------------------------------------------------
0069 /// Reduce to -45 to 45
0070 inline float reduce2quadranttan(float x, int32_t& quad) {
0071
0072     x = fabs(x);
0073     quad = int( ONEOPIO4F * x ); // always positive, so (int) == std::floor
0074     quad = (quad+1) & (~1);
0075     const float y = quad;
0076     // Extended precision modular arithmetic
0077     return ((x - y * DP1Ftan) - y * DP2Ftan) - y * DP3Ftan;
0078   }
0079
0080 }
0081
0082 //------------------------------------------------------------------------------
0083 /// Double precision tangent implementation
0084 inline double fast_tan(double x){
0085
0086     const uint64_t sign_mask = details::getSignMask(x);
0087
0088     int32_t quad =0;
0089     const double z=details::reduce2quadranttan(x,quad);
0090
0091     const double zz = z * z;
0092
0093     double res=z;
0094
0095     if( zz > 1.0e-14 ){
0096         double px = details::PX1tan;
0097         px *= zz;
0098         px += details::PX2tan;
0099         px *= zz;
0100         px += details::PX3tan;
0101
0102         double qx=zz;
0103         qx += details::QX1tan;
0104         qx *=zz;
0105         qx += details::QX2tan;
0106         qx *=zz;
0107         qx += details::QX3tan;
0108         qx *=zz;
0109         qx += details::QX4tan;
0110
0111         res = z + z * zz * px / qx;
0112     }
0113
0114     // A no branching way to say: if j&2 res = -1/res. You can!!!
0115     quad &=2;
0116     quad >>=1;
0117     const int32_t alt = quad^1;
0118     // Avoid fpe generated by 1/0 if res is 0
0119     const double zeroIfXNonZero = (x==0.);
0120     res += zeroIfXNonZero;
0121     res = quad * (-1./res) + alt * res; // one coeff is one and one is 0!
0122
0123     // Again, return 0 if res==0, the correct result otherwhise
0124     return details::dpXORuint64(res,sign_mask) * (1.-zeroIfXNonZero);
0125
0126 }
0127
0128 // Single precision ------------------------------------------------------------
0129
0130 inline float fast_tanf(float x){
0131     const uint32_t sign_mask = details::getSignMask(x);
0132
0133     int32_t quad =0;
0134     const float z=details::reduce2quadranttan(x,quad);
0135
0136     const float zz = z * z;
0137
0138     float res=z;
0139
0140     if( zz > 1.0e-14f ){
0141       res =
0142         ((((( 9.38540185543E-3f * zz
0143         + 3.11992232697E-3f) * zz
0144         + 2.44301354525E-2f) * zz
0145         + 5.34112807005E-2f) * zz
0146         + 1.33387994085E-1f) * zz
0147         + 3.33331568548E-1f) * zz * z
0148         + z;
0149     }
0150
0151     // A no branching way to say: if j&2 res = -1/res. You can!!!
0152     quad &=2;
0153     quad >>=1;
0154     const int32_t alt = quad^1;
0155     // Avoid fpe generated by 1/0 if res is 0
0156     const float zeroIfXNonZero = (x==0.f);
0157     res += zeroIfXNonZero;
0158     res = quad * (-1.f/res) + alt * res; // one coeff is one and one is 0!
0159
0160     return details::spXORuint32(res,sign_mask) * (1.f-zeroIfXNonZero);
0161
0162 }
0163
0164 //------------------------------------------------------------------------------
0165
0166 void tanv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0167 void fast_tanv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0168 void tanfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0169 void fast_tanfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0170
0171 //------------------------------------------------------------------------------
0172
0173 } //vdt namespace
0174
0175
0176 #endif /* TAN_H_ */