include/vdt/atan.h

0001 /*
0002  * atan.h
0003  * The basic idea is to exploit Pade polynomials.
0004  * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier
0005  * moshier@na-net.ornl.gov) as well as actual code.
0006  * The Cephes library can be found here:  http://www.netlib.org/cephes/
0007  *
0008  *  Created on: Jun 23, 2012
0009  *      Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
0010  */
0011
0012 /*
0013  * VDT is free software: you can redistribute it and/or modify
0014  * it under the terms of the GNU Lesser Public License as published by
0015  * the Free Software Foundation, either version 3 of the License, or
0016  * (at your option) any later version.
0017  *
0018  * This program is distributed in the hope that it will be useful,
0019  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0020  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0021  * GNU Lesser Public License for more details.
0022  *
0023  * You should have received a copy of the GNU Lesser Public License
0024  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
0025  */
0026
0027 #ifndef ATAN_H_
0028 #define ATAN_H_
0029
0030 #include "vdtcore_common.h"
0031
0032 namespace vdt{
0033
0034 namespace details{
0035 const double T3PO8 = 2.41421356237309504880;
0036 const double MOREBITSO2 = MOREBITS * 0.5;
0037
0038 inline double get_atan_px(const double x2){
0039
0040     const double PX1atan = -8.750608600031904122785E-1;
0041     const double PX2atan = -1.615753718733365076637E1;
0042     const double PX3atan = -7.500855792314704667340E1;
0043     const double PX4atan = -1.228866684490136173410E2;
0044     const double PX5atan = -6.485021904942025371773E1;
0045
0046     double px = PX1atan;
0047     px *= x2;
0048     px += PX2atan;
0049     px *= x2;
0050     px += PX3atan;
0051     px *= x2;
0052     px += PX4atan;
0053     px *= x2;
0054     px += PX5atan;
0055
0056     return px;
0057 }
0058
0059
0060 inline double get_atan_qx(const double x2){
0061     const double QX1atan = 2.485846490142306297962E1;
0062     const double QX2atan = 1.650270098316988542046E2;
0063     const double QX3atan = 4.328810604912902668951E2;
0064     const double QX4atan = 4.853903996359136964868E2;
0065     const double QX5atan = 1.945506571482613964425E2;
0066
0067     double qx=x2;
0068     qx += QX1atan;
0069     qx *=x2;
0070     qx += QX2atan;
0071     qx *=x2;
0072     qx += QX3atan;
0073     qx *=x2;
0074     qx += QX4atan;
0075     qx *=x2;
0076     qx += QX5atan;
0077
0078     return qx;
0079 }
0080
0081 }
0082
0083
0084
0085 /// Fast Atan implementation double precision
0086 inline double fast_atan(double x){
0087
0088     /* make argument positive and save the sign */
0089     const uint64_t sign_mask = details::getSignMask(x);
0090     x=std::fabs(x);
0091
0092     /* range reduction */
0093     const double originalx=x;
0094
0095     double y = details::PIO4;
0096     double factor = details::MOREBITSO2;
0097     x = (x-1.0) / (x+1.0);
0098
0099     if( originalx > details::T3PO8 ) {
0100         y = details::PIO2;
0101         factor = details::MOREBITS;
0102         x = -1.0 / originalx ;
0103     }
0104     if ( originalx <= 0.66 ) {
0105         y = 0.;
0106         factor = 0.;
0107         x = originalx;
0108     }
0109
0110     const double x2 = x * x;
0111
0112     const double px = details::get_atan_px(x2);
0113     const double qx = details::get_atan_qx(x2);
0114
0115     //double res = y +x * x2 * px / qx + x +factor;
0116
0117     const double poq=px / qx;
0118
0119     double res = x * x2 * poq + x;
0120     res+=y;
0121
0122     res+=factor;
0123
0124     return details::dpORuint64(res,sign_mask);
0125 }
0126
0127 //------------------------------------------------------------------------------
0128 /// Fast Atan implementation single precision
0129 inline float fast_atanf( float xx ) {
0130
0131     const uint32_t sign_mask = details::getSignMask(xx);
0132
0133     float x= std::fabs(xx);
0134     const float x0=x;
0135     float y=0.0f;
0136
0137     /* range reduction */
0138     if( x0 > 0.4142135623730950f ){ // * tan pi/8
0139         x = (x0-1.0f)/(x0+1.0f);
0140         y = details::PIO4F;
0141     }
0142     if( x0 > 2.414213562373095f ){  // tan 3pi/8
0143         x = -( 1.0f/x0 );
0144         y = details::PIO2F;
0145     }
0146
0147
0148     const float x2 = x * x;
0149     y +=
0150             ((( 8.05374449538e-2f * x2
0151                     - 1.38776856032E-1f) * x2
0152                     + 1.99777106478E-1f) * x2
0153                     - 3.33329491539E-1f) * x2 * x
0154                     + x;
0155
0156     return details::spORuint32(y,sign_mask);
0157 }
0158
0159 //------------------------------------------------------------------------------
0160 // Vector signatures
0161
0162 void atanv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0163 void fast_atanv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0164 void atanfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0165 void fast_atanfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0166
0167 }// end of vdt
0168
0169 #endif // end of atan