include/vdt/atan2.h

0001 /*
0002  * atan2.h
0003  * The basic idea is to exploit Pade polynomials.
0004  * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier
0005  * moshier@na-net.ornl.gov) as well as actual code.
0006  * The Cephes library can be found here:  http://www.netlib.org/cephes/
0007  *
0008  *  Created on: Sept 20, 2012
0009  *      Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
0010  */
0011
0012 /*
0013  * VDT is free software: you can redistribute it and/or modify
0014  * it under the terms of the GNU Lesser Public License as published by
0015  * the Free Software Foundation, either version 3 of the License, or
0016  * (at your option) any later version.
0017  *
0018  * This program is distributed in the hope that it will be useful,
0019  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0020  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0021  * GNU Lesser Public License for more details.
0022  *
0023  * You should have received a copy of the GNU Lesser Public License
0024  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
0025  */
0026
0027 #ifndef ATAN2_H_
0028 #define ATAN2_H_
0029
0030 #include "vdtcore_common.h"
0031 #include "atan.h"
0032
0033 namespace vdt{
0034
0035 inline double fast_atan2( double y, double x ) {
0036     // move in first octant
0037     double xx = std::fabs(x);
0038     double yy = std::fabs(y);
0039     double tmp (0.0);
0040     if (yy>xx) {
0041         tmp = yy;
0042         yy=xx; xx=tmp;
0043         tmp=1.;
0044     }
0045
0046     // To avoid the fpe, we protect against /0.
0047     const double oneIfXXZero = (xx==0.);
0048     double t=yy/(xx+oneIfXXZero);
0049     double z=t;
0050
0051     double s = details::PIO4;
0052     double factor = details::MOREBITSO2;
0053     t = (t-1.0) / (t+1.0);
0054
0055     if( z > details::T3PO8 ) {
0056         s = details::PIO2;
0057         factor = details::MOREBITS;
0058         t = -1.0 / z ;
0059     }
0060     if ( z <= 0.66 ) {
0061         s = 0.;
0062         factor = 0.;
0063         t = z;
0064     }
0065
0066     const double t2 = t * t;
0067
0068     const double px = details::get_atan_px(t2);
0069     const double qx = details::get_atan_qx(t2);
0070
0071     //double res = y +x * x2 * px / qx + x +factor;
0072
0073     const double poq=px / qx;
0074
0075     double ret = t * t2 * poq + t;
0076     ret+=s;
0077
0078     ret+=factor;
0079
0080     // Here we put the result to 0 if xx was 0, if not nothing happens!
0081     ret*= (1.-oneIfXXZero);
0082
0083     // move back in place
0084     if (y==0) ret=0.0;
0085     if (tmp!=0) ret = details::PIO2 - ret;
0086     if (x<0) ret = details::PI - ret;
0087     if (y<0) ret = -ret;
0088
0089
0090     return ret;
0091 }
0092
0093 inline float fast_atan2f( float y, float x ) {
0094
0095     // move in first octant
0096     float xx = std::fabs(x);
0097     float yy = std::fabs(y);
0098     float tmp (0.0f);
0099     if (yy>xx) {
0100         tmp = yy;
0101         yy=xx; xx=tmp;
0102         tmp =1.f;
0103     }
0104
0105     // To avoid the fpe, we protect against /0.
0106     const float oneIfXXZero = (xx==0.f);
0107
0108     float t=yy/(xx/*+oneIfXXZero*/);
0109     float z=t;
0110     if( t > 0.4142135623730950f ) // * tan pi/8
0111             z = (t-1.0f)/(t+1.0f);
0112
0113     //printf("%e %e %e %e\n",yy,xx,t,z);
0114     float z2 = z * z;
0115
0116     float ret =(((( 8.05374449538e-2f * z2
0117                     - 1.38776856032E-1f) * z2
0118                     + 1.99777106478E-1f) * z2
0119                     - 3.33329491539E-1f) * z2 * z
0120                     + z );
0121
0122     // Here we put the result to 0 if xx was 0, if not nothing happens!
0123     ret*= (1.f - oneIfXXZero);
0124
0125     // move back in place
0126     if (y==0.f) ret=0.f;
0127     if( t > 0.4142135623730950f ) ret += details::PIO4F;
0128     if (tmp!=0) ret = details::PIO2F - ret;
0129     if (x<0.f) ret = details::PIF - ret;
0130     if (y<0.f) ret = -ret;
0131
0132     return ret;
0133
0134 }
0135
0136
0137
0138 //------------------------------------------------------------------------------
0139 // Vector signatures
0140
0141 void atan2v(const uint32_t size, double const * __restrict__ iarray, double const * __restrict__ iarray2, double* __restrict__ oarray);
0142 void fast_atan2v(const uint32_t size, double const * __restrict__ iarray, double const * __restrict__ iarray2, double* __restrict__ oarray);
0143 void atan2fv(const uint32_t size, float const * __restrict__ iarray, float const * __restrict__ iarray2, float* __restrict__ oarray);
0144 void fast_atan2fv(const uint32_t size, float const * __restrict__ iarray, float const * __restrict__ iarray2, float* __restrict__ oarray);
0145
0146 } // end namespace vdt
0147
0148
0149 #endif