arch/ZVector/MathFunctions.h

0001 // This file is part of Eigen, a lightweight C++ template library
0002 // for linear algebra.
0003 //
0004 // Copyright (C) 2007 Julien Pommier
0005 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
0006 // Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
0007 //
0008 // This Source Code Form is subject to the terms of the Mozilla
0009 // Public License v. 2.0. If a copy of the MPL was not distributed
0010 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
0011
0012 /* The sin, cos, exp, and log functions of this file come from
0013  * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
0014  */
0015
0016 #ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
0017 #define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
0018
0019 namespace Eigen {
0020
0021 namespace internal {
0022
0023 #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
0024 static _EIGEN_DECLARE_CONST_Packet4f(1 , 1.0f);
0025 static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
0026 static _EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
0027 static _EIGEN_DECLARE_CONST_Packet4i(23, 23);
0028
0029 static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
0030
0031 /* the smallest non denormalized float number */
0032 static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos,  0x00800000);
0033 static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf,     0xff800000); // -1.f/0.f
0034 static _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_nan,     0xffffffff);
0035
0036 /* natural logarithm computed for 4 simultaneous float
0037   return NaN for x <= 0
0038 */
0039 static _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
0040 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2f);
0041 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1f);
0042 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1f);
0043 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1f);
0044 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1f);
0045 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1f);
0046 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1f);
0047 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1f);
0048 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1f);
0049 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
0050 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
0051
0052 static _EIGEN_DECLARE_CONST_Packet4f(exp_hi,  88.3762626647950f);
0053 static _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
0054
0055 static _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
0056 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
0057 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
0058
0059 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4f);
0060 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3f);
0061 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3f);
0062 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2f);
0063 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
0064 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
0065 #endif
0066
0067 static _EIGEN_DECLARE_CONST_Packet2d(1 , 1.0);
0068 static _EIGEN_DECLARE_CONST_Packet2d(2 , 2.0);
0069 static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
0070
0071 static _EIGEN_DECLARE_CONST_Packet2d(exp_hi,  709.437);
0072 static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -709.436139303);
0073
0074 static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
0075
0076 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
0077 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
0078 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
0079
0080 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
0081 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
0082 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
0083 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
0084
0085 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
0086 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
0087
0088 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0089 Packet2d pexp<Packet2d>(const Packet2d& _x)
0090 {
0091   Packet2d x = _x;
0092
0093   Packet2d tmp, fx;
0094   Packet2l emm0;
0095
0096   // clamp x
0097   x = pmax(pmin(x, p2d_exp_hi), p2d_exp_lo);
0098   /* express exp(x) as exp(g + n*log(2)) */
0099   fx = pmadd(p2d_cephes_LOG2EF, x, p2d_half);
0100
0101   fx = vec_floor(fx);
0102
0103   tmp = pmul(fx, p2d_cephes_exp_C1);
0104   Packet2d z = pmul(fx, p2d_cephes_exp_C2);
0105   x = psub(x, tmp);
0106   x = psub(x, z);
0107
0108   Packet2d x2 = pmul(x,x);
0109
0110   Packet2d px = p2d_cephes_exp_p0;
0111   px = pmadd(px, x2, p2d_cephes_exp_p1);
0112   px = pmadd(px, x2, p2d_cephes_exp_p2);
0113   px = pmul (px, x);
0114
0115   Packet2d qx = p2d_cephes_exp_q0;
0116   qx = pmadd(qx, x2, p2d_cephes_exp_q1);
0117   qx = pmadd(qx, x2, p2d_cephes_exp_q2);
0118   qx = pmadd(qx, x2, p2d_cephes_exp_q3);
0119
0120   x = pdiv(px,psub(qx,px));
0121   x = pmadd(p2d_2,x,p2d_1);
0122
0123   // build 2^n
0124   emm0 = vec_ctsl(fx, 0);
0125
0126   static const Packet2l p2l_1023 = { 1023, 1023 };
0127   static const Packet2ul p2ul_52 = { 52, 52 };
0128
0129   emm0 = emm0 + p2l_1023;
0130   emm0 = emm0 << reinterpret_cast<Packet2l>(p2ul_52);
0131
0132   // Altivec's max & min operators just drop silent NaNs. Check NaNs in
0133   // inputs and return them unmodified.
0134   Packet2ul isnumber_mask = reinterpret_cast<Packet2ul>(vec_cmpeq(_x, _x));
0135   return vec_sel(_x, pmax(pmul(x, reinterpret_cast<Packet2d>(emm0)), _x),
0136                  isnumber_mask);
0137 }
0138
0139 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0140 Packet4f pexp<Packet4f>(const Packet4f& _x)
0141 {
0142 #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
0143   Packet4f x = _x;
0144
0145   Packet4f tmp, fx;
0146   Packet4i emm0;
0147
0148   // clamp x
0149   x = pmax(pmin(x, p4f_exp_hi), p4f_exp_lo);
0150
0151   // express exp(x) as exp(g + n*log(2))
0152   fx = pmadd(x, p4f_cephes_LOG2EF, p4f_half);
0153
0154   fx = pfloor(fx);
0155
0156   tmp = pmul(fx, p4f_cephes_exp_C1);
0157   Packet4f z = pmul(fx, p4f_cephes_exp_C2);
0158   x = psub(x, tmp);
0159   x = psub(x, z);
0160
0161   z = pmul(x,x);
0162
0163   Packet4f y = p4f_cephes_exp_p0;
0164   y = pmadd(y, x, p4f_cephes_exp_p1);
0165   y = pmadd(y, x, p4f_cephes_exp_p2);
0166   y = pmadd(y, x, p4f_cephes_exp_p3);
0167   y = pmadd(y, x, p4f_cephes_exp_p4);
0168   y = pmadd(y, x, p4f_cephes_exp_p5);
0169   y = pmadd(y, z, x);
0170   y = padd(y, p4f_1);
0171
0172   // build 2^n
0173   emm0 = (Packet4i){ (int)fx[0], (int)fx[1], (int)fx[2], (int)fx[3] };
0174   emm0 = emm0 + p4i_0x7f;
0175   emm0 = emm0 << reinterpret_cast<Packet4i>(p4i_23);
0176
0177   return pmax(pmul(y, reinterpret_cast<Packet4f>(emm0)), _x);
0178 #else
0179   Packet4f res;
0180   res.v4f[0] = pexp<Packet2d>(_x.v4f[0]);
0181   res.v4f[1] = pexp<Packet2d>(_x.v4f[1]);
0182   return res;
0183 #endif
0184 }
0185
0186 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0187 Packet2d psqrt<Packet2d>(const Packet2d& x)
0188 {
0189   return vec_sqrt(x);
0190 }
0191
0192 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0193 Packet4f psqrt<Packet4f>(const Packet4f& x)
0194 {
0195   Packet4f res;
0196 #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
0197   res = vec_sqrt(x);
0198 #else
0199   res.v4f[0] = psqrt<Packet2d>(x.v4f[0]);
0200   res.v4f[1] = psqrt<Packet2d>(x.v4f[1]);
0201 #endif
0202   return res;
0203 }
0204
0205 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0206 Packet2d prsqrt<Packet2d>(const Packet2d& x) {
0207   return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);
0208 }
0209
0210 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0211 Packet4f prsqrt<Packet4f>(const Packet4f& x) {
0212   Packet4f res;
0213 #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
0214   res = pset1<Packet4f>(1.0) / psqrt<Packet4f>(x);
0215 #else
0216   res.v4f[0] = prsqrt<Packet2d>(x.v4f[0]);
0217   res.v4f[1] = prsqrt<Packet2d>(x.v4f[1]);
0218 #endif
0219   return res;
0220 }
0221
0222 // Hyperbolic Tangent function.
0223 template <>
0224 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
0225 ptanh<Packet4f>(const Packet4f& x) {
0226   return internal::generic_fast_tanh_float(x);
0227 }
0228
0229 }  // end namespace internal
0230
0231 }  // end namespace Eigen
0232
0233 #endif  // EIGEN_MATH_FUNCTIONS_ALTIVEC_H