File indexing completed on 2025-02-21 10:16:42
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027 #ifndef ASIN_H_
0028 #define ASIN_H_
0029
0030 #include "vdtcore_common.h"
0031
0032 namespace vdt{
0033
0034 namespace details{
0035
0036 const double RX1asin = 2.967721961301243206100E-3;
0037 const double RX2asin = -5.634242780008963776856E-1;
0038 const double RX3asin = 6.968710824104713396794E0;
0039 const double RX4asin = -2.556901049652824852289E1;
0040 const double RX5asin = 2.853665548261061424989E1;
0041
0042 const double SX1asin = -2.194779531642920639778E1;
0043 const double SX2asin = 1.470656354026814941758E2;
0044 const double SX3asin = -3.838770957603691357202E2;
0045 const double SX4asin = 3.424398657913078477438E2;
0046
0047 const double PX1asin = 4.253011369004428248960E-3;
0048 const double PX2asin = -6.019598008014123785661E-1;
0049 const double PX3asin = 5.444622390564711410273E0;
0050 const double PX4asin = -1.626247967210700244449E1;
0051 const double PX5asin = 1.956261983317594739197E1;
0052 const double PX6asin = -8.198089802484824371615E0;
0053
0054 const double QX1asin = -1.474091372988853791896E1;
0055 const double QX2asin = 7.049610280856842141659E1;
0056 const double QX3asin = -1.471791292232726029859E2;
0057 const double QX4asin = 1.395105614657485689735E2;
0058 const double QX5asin = -4.918853881490881290097E1;
0059
0060 inline double getRX(const double x){
0061 double rx = RX1asin;
0062 rx*= x;
0063 rx+= RX2asin;
0064 rx*= x;
0065 rx+= RX3asin;
0066 rx*= x;
0067 rx+= RX4asin;
0068 rx*= x;
0069 rx+= RX5asin;
0070 return rx;
0071 }
0072 inline double getSX(const double x){
0073 double sx = x;
0074 sx+= SX1asin;
0075 sx*= x;
0076 sx+= SX2asin;
0077 sx*= x;
0078 sx+= SX3asin;
0079 sx*= x;
0080 sx+= SX4asin;
0081 return sx;
0082 }
0083
0084 inline double getPX(const double x){
0085 double px = PX1asin;
0086 px*= x;
0087 px+= PX2asin;
0088 px*= x;
0089 px+= PX3asin;
0090 px*= x;
0091 px+= PX4asin;
0092 px*= x;
0093 px+= PX5asin;
0094 px*= x;
0095 px+= PX6asin;
0096 return px;
0097 }
0098
0099 inline double getQX(const double x){
0100 double qx = x;
0101 qx+= QX1asin;
0102 qx*= x;
0103 qx+= QX2asin;
0104 qx*= x;
0105 qx+= QX3asin;
0106 qx*= x;
0107 qx+= QX4asin;
0108 qx*= x;
0109 qx+= QX5asin;
0110 return qx;
0111 }
0112 }
0113
0114 }
0115
0116 namespace vdt{
0117
0118
0119
0120 inline double fast_asin(double x){
0121
0122 const uint64_t sign_mask = details::getSignMask(x);
0123 x = std::fabs(x);
0124 const double a = x;
0125
0126
0127 double zz = 1.0 - a;
0128 double px = details::getRX(zz);
0129 double qx = details::getSX(zz);
0130
0131 const double p = zz * px/qx;
0132
0133 zz = std::sqrt(zz+zz);
0134 double z = details::PIO4 - zz;
0135 zz = zz * p - details::MOREBITS;
0136 z -= zz;
0137 z += details::PIO4;
0138
0139 if( a < 0.625 ){
0140 zz = a * a;
0141 px = details::getPX(zz);
0142 qx = details::getQX(zz);
0143 z = zz*px/qx;
0144 z = a * z + a;
0145 }
0146
0147
0148
0149 double res = a < 1e-8? a : z ;
0150
0151 return details::dpORuint64(res,sign_mask);
0152
0153 }
0154
0155
0156
0157 inline float fast_asinf(float x){
0158
0159
0160 uint32_t flag=0;
0161
0162 const uint32_t sign_mask = details::getSignMask(x);
0163 const float a = std::fabs(x);
0164
0165 float z;
0166 if( a > 0.5f )
0167 {
0168 z = 0.5f * (1.0f - a);
0169 x = sqrtf( z );
0170 flag = 1;
0171 }
0172 else
0173 {
0174 x = a;
0175 z = x * x;
0176 }
0177
0178 z = (((( 4.2163199048E-2f * z
0179 + 2.4181311049E-2f) * z
0180 + 4.5470025998E-2f) * z
0181 + 7.4953002686E-2f) * z
0182 + 1.6666752422E-1f) * z * x
0183 + x;
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193 float tmp = z + z;
0194 tmp = details::PIO2F - tmp;
0195
0196
0197 float res = a < 1e-4f? a : tmp * flag + (1-flag) * z ;
0198
0199
0200 return details::spORuint32(res,sign_mask);
0201
0202 }
0203
0204
0205
0206
0207 inline double fast_acos( double x ){return details::PIO2 - fast_asin(x);}
0208
0209
0210
0211 inline float fast_acosf( float x ){return details::PIO2F - fast_asinf(x);}
0212
0213
0214
0215
0216
0217 void asinv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0218 void fast_asinv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0219 void asinfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0220 void fast_asinfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0221
0222 void acosv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0223 void fast_acosv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0224 void acosfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0225 void fast_acosfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0226
0227 }
0228
0229 #endif