include/vdt/asin.h

0001 /*
0002  * aasin.h
0003  * The basic idea is to exploit Pade' polynomials.
0004  * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier
0005  * moshier@na-net.ornl.gov) as well as actual code.
0006  * The Cephes library can be found here:  http://www.netlib.org/cephes/
0007  *
0008  *  Created on: Jun 23, 2012
0009  *      Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
0010  */
0011
0012 /*
0013  * VDT is free software: you can redistribute it and/or modify
0014  * it under the terms of the GNU Lesser Public License as published by
0015  * the Free Software Foundation, either version 3 of the License, or
0016  * (at your option) any later version.
0017  *
0018  * This program is distributed in the hope that it will be useful,
0019  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0020  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0021  * GNU Lesser Public License for more details.
0022  *
0023  * You should have received a copy of the GNU Lesser Public License
0024  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
0025  */
0026
0027 #ifndef ASIN_H_
0028 #define ASIN_H_
0029
0030 #include "vdtcore_common.h"
0031
0032 namespace vdt{
0033
0034 namespace details{
0035
0036 const double RX1asin = 2.967721961301243206100E-3;
0037 const double RX2asin = -5.634242780008963776856E-1;
0038 const double RX3asin = 6.968710824104713396794E0;
0039 const double RX4asin = -2.556901049652824852289E1;
0040 const double RX5asin = 2.853665548261061424989E1;
0041
0042 const double SX1asin = -2.194779531642920639778E1;
0043 const double SX2asin =  1.470656354026814941758E2;
0044 const double SX3asin = -3.838770957603691357202E2;
0045 const double SX4asin = 3.424398657913078477438E2;
0046
0047 const double PX1asin = 4.253011369004428248960E-3;
0048 const double PX2asin = -6.019598008014123785661E-1;
0049 const double PX3asin = 5.444622390564711410273E0;
0050 const double PX4asin = -1.626247967210700244449E1;
0051 const double PX5asin = 1.956261983317594739197E1;
0052 const double PX6asin = -8.198089802484824371615E0;
0053
0054 const double QX1asin = -1.474091372988853791896E1;
0055 const double QX2asin =  7.049610280856842141659E1;
0056 const double QX3asin = -1.471791292232726029859E2;
0057 const double QX4asin = 1.395105614657485689735E2;
0058 const double QX5asin = -4.918853881490881290097E1;
0059
0060 inline double getRX(const double x){
0061     double rx = RX1asin;
0062     rx*= x;
0063     rx+= RX2asin;
0064     rx*= x;
0065     rx+= RX3asin;
0066     rx*= x;
0067     rx+= RX4asin;
0068     rx*= x;
0069     rx+= RX5asin;
0070     return rx;
0071 }
0072 inline double getSX(const double x){
0073     double sx = x;
0074     sx+= SX1asin;
0075     sx*= x;
0076     sx+= SX2asin;
0077     sx*= x;
0078     sx+= SX3asin;
0079     sx*= x;
0080     sx+= SX4asin;
0081     return sx;
0082 }
0083
0084 inline double getPX(const double x){
0085     double px = PX1asin;
0086     px*= x;
0087     px+= PX2asin;
0088     px*= x;
0089     px+= PX3asin;
0090     px*= x;
0091     px+= PX4asin;
0092     px*= x;
0093     px+= PX5asin;
0094     px*= x;
0095     px+= PX6asin;
0096     return px;
0097 }
0098
0099 inline double getQX(const double x){
0100     double qx = x;
0101     qx+= QX1asin;
0102     qx*= x;
0103     qx+= QX2asin;
0104     qx*= x;
0105     qx+= QX3asin;
0106     qx*= x;
0107     qx+= QX4asin;
0108     qx*= x;
0109     qx+= QX5asin;
0110     return qx;
0111     }
0112 }
0113
0114 }
0115
0116 namespace vdt{
0117
0118 // asin double precision --------------------------------------------------------
0119 /// Double Precision asin
0120 inline double fast_asin(double x){
0121
0122     const uint64_t sign_mask = details::getSignMask(x);
0123     x = std::fabs(x);
0124     const double a = x;
0125
0126
0127     double zz = 1.0 - a;
0128     double px = details::getRX(zz);
0129     double qx = details::getSX(zz);
0130
0131     const double p = zz * px/qx;
0132
0133     zz = std::sqrt(zz+zz);
0134     double z = details::PIO4 - zz;
0135     zz = zz * p - details::MOREBITS;
0136     z -= zz;
0137     z += details::PIO4;
0138
0139     if( a < 0.625 ){
0140         zz = a * a;
0141         px = details::getPX(zz);
0142         qx = details::getQX(zz);
0143         z = zz*px/qx;
0144         z = a * z + a;
0145     }
0146
0147
0148     // Linear approx, not sooo needed but seable. Price is cheap though
0149     double res = a < 1e-8? a : z ;
0150         // Restore Sign
0151     return details::dpORuint64(res,sign_mask);
0152
0153 }
0154
0155 //------------------------------------------------------------------------------
0156 /// Single Precision asin
0157 inline float fast_asinf(float x){
0158
0159
0160     uint32_t flag=0;
0161
0162     const uint32_t sign_mask = details::getSignMask(x);
0163     const float a = std::fabs(x);
0164
0165     float z;
0166     if( a > 0.5f )
0167         {
0168         z = 0.5f * (1.0f - a);
0169         x = sqrtf( z );
0170         flag = 1;
0171         }
0172     else
0173         {
0174         x = a;
0175         z = x * x;
0176         }
0177
0178     z = (((( 4.2163199048E-2f * z
0179             + 2.4181311049E-2f) * z
0180             + 4.5470025998E-2f) * z
0181             + 7.4953002686E-2f) * z
0182             + 1.6666752422E-1f) * z * x
0183             + x;
0184
0185 //     if( flag != 0 )
0186 //       {
0187 //       z = z + z;
0188 //       z = PIO2F - z;
0189 //       }
0190
0191     // No branch with the two coefficients
0192
0193     float tmp = z + z;
0194     tmp = details::PIO2F - tmp;
0195
0196     // Linear approx, not sooo needed but seable. Price is cheap though
0197     float res = a < 1e-4f? a : tmp * flag + (1-flag) * z ;
0198
0199     // Restore Sign
0200     return details::spORuint32(res,sign_mask);
0201
0202 }
0203
0204 //------------------------------------------------------------------------------
0205 // The cos is in this file as well
0206
0207 inline double fast_acos( double x ){return details::PIO2  - fast_asin(x);}
0208
0209 //------------------------------------------------------------------------------
0210
0211 inline float fast_acosf( float x ){return details::PIO2F  - fast_asinf(x);}
0212
0213 //------------------------------------------------------------------------------
0214
0215 // Vector signatures
0216
0217 void asinv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0218 void fast_asinv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0219 void asinfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0220 void fast_asinfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0221
0222 void acosv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0223 void fast_acosv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0224 void acosfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0225 void fast_acosfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0226
0227 } //vdt namespace
0228
0229 #endif /* ASIN_H_ */