File indexing completed on 2025-01-19 09:51:39
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016 #ifndef EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_H
0017 #define EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_H
0018
0019 namespace Eigen {
0020 namespace internal {
0021
0022
0023 template<typename T> struct make_integer;
0024 template<> struct make_integer<float> { typedef numext::int32_t type; };
0025 template<> struct make_integer<double> { typedef numext::int64_t type; };
0026 template<> struct make_integer<half> { typedef numext::int16_t type; };
0027 template<> struct make_integer<bfloat16> { typedef numext::int16_t type; };
0028
0029 template<typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
0030 Packet pfrexp_generic_get_biased_exponent(const Packet& a) {
0031 typedef typename unpacket_traits<Packet>::type Scalar;
0032 typedef typename unpacket_traits<Packet>::integer_packet PacketI;
0033 enum { mantissa_bits = numext::numeric_limits<Scalar>::digits - 1};
0034 return pcast<PacketI, Packet>(plogical_shift_right<mantissa_bits>(preinterpret<PacketI>(pabs(a))));
0035 }
0036
0037
0038
0039 template<typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
0040 Packet pfrexp_generic(const Packet& a, Packet& exponent) {
0041 typedef typename unpacket_traits<Packet>::type Scalar;
0042 typedef typename make_unsigned<typename make_integer<Scalar>::type>::type ScalarUI;
0043 enum {
0044 TotalBits = sizeof(Scalar) * CHAR_BIT,
0045 MantissaBits = numext::numeric_limits<Scalar>::digits - 1,
0046 ExponentBits = int(TotalBits) - int(MantissaBits) - 1
0047 };
0048
0049 EIGEN_CONSTEXPR ScalarUI scalar_sign_mantissa_mask =
0050 ~(((ScalarUI(1) << int(ExponentBits)) - ScalarUI(1)) << int(MantissaBits));
0051 const Packet sign_mantissa_mask = pset1frombits<Packet>(static_cast<ScalarUI>(scalar_sign_mantissa_mask));
0052 const Packet half = pset1<Packet>(Scalar(0.5));
0053 const Packet zero = pzero(a);
0054 const Packet normal_min = pset1<Packet>((numext::numeric_limits<Scalar>::min)());
0055
0056
0057 const Packet is_denormal = pcmp_lt(pabs(a), normal_min);
0058 EIGEN_CONSTEXPR ScalarUI scalar_normalization_offset = ScalarUI(int(MantissaBits) + 1);
0059
0060 const Scalar scalar_normalization_factor = Scalar(ScalarUI(1) << int(scalar_normalization_offset));
0061 const Packet normalization_factor = pset1<Packet>(scalar_normalization_factor);
0062 const Packet normalized_a = pselect(is_denormal, pmul(a, normalization_factor), a);
0063
0064
0065 const Scalar scalar_exponent_offset = -Scalar((ScalarUI(1)<<(int(ExponentBits)-1)) - ScalarUI(2));
0066 Packet exponent_offset = pset1<Packet>(scalar_exponent_offset);
0067 const Packet normalization_offset = pset1<Packet>(-Scalar(scalar_normalization_offset));
0068 exponent_offset = pselect(is_denormal, padd(exponent_offset, normalization_offset), exponent_offset);
0069
0070
0071 exponent = pfrexp_generic_get_biased_exponent(normalized_a);
0072
0073
0074 const Scalar scalar_non_finite_exponent = Scalar((ScalarUI(1) << int(ExponentBits)) - ScalarUI(1));
0075 const Packet non_finite_exponent = pset1<Packet>(scalar_non_finite_exponent);
0076 const Packet is_zero_or_not_finite = por(pcmp_eq(a, zero), pcmp_eq(exponent, non_finite_exponent));
0077 const Packet m = pselect(is_zero_or_not_finite, a, por(pand(normalized_a, sign_mantissa_mask), half));
0078 exponent = pselect(is_zero_or_not_finite, zero, padd(exponent, exponent_offset));
0079 return m;
0080 }
0081
0082
0083
0084 template<typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
0085 Packet pldexp_generic(const Packet& a, const Packet& exponent) {
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108 typedef typename unpacket_traits<Packet>::integer_packet PacketI;
0109 typedef typename unpacket_traits<Packet>::type Scalar;
0110 typedef typename unpacket_traits<PacketI>::type ScalarI;
0111 enum {
0112 TotalBits = sizeof(Scalar) * CHAR_BIT,
0113 MantissaBits = numext::numeric_limits<Scalar>::digits - 1,
0114 ExponentBits = int(TotalBits) - int(MantissaBits) - 1
0115 };
0116
0117 const Packet max_exponent = pset1<Packet>(Scalar((ScalarI(1)<<int(ExponentBits)) + ScalarI(int(MantissaBits) - 1)));
0118 const PacketI bias = pset1<PacketI>((ScalarI(1)<<(int(ExponentBits)-1)) - ScalarI(1));
0119 const PacketI e = pcast<Packet, PacketI>(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));
0120 PacketI b = parithmetic_shift_right<2>(e);
0121 Packet c = preinterpret<Packet>(plogical_shift_left<int(MantissaBits)>(padd(b, bias)));
0122 Packet out = pmul(pmul(pmul(a, c), c), c);
0123 b = psub(psub(psub(e, b), b), b);
0124 c = preinterpret<Packet>(plogical_shift_left<int(MantissaBits)>(padd(b, bias)));
0125 out = pmul(out, c);
0126 return out;
0127 }
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138 template<typename Packet>
0139 struct pldexp_fast_impl {
0140 typedef typename unpacket_traits<Packet>::integer_packet PacketI;
0141 typedef typename unpacket_traits<Packet>::type Scalar;
0142 typedef typename unpacket_traits<PacketI>::type ScalarI;
0143 enum {
0144 TotalBits = sizeof(Scalar) * CHAR_BIT,
0145 MantissaBits = numext::numeric_limits<Scalar>::digits - 1,
0146 ExponentBits = int(TotalBits) - int(MantissaBits) - 1
0147 };
0148
0149 static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
0150 Packet run(const Packet& a, const Packet& exponent) {
0151 const Packet bias = pset1<Packet>(Scalar((ScalarI(1)<<(int(ExponentBits)-1)) - ScalarI(1)));
0152 const Packet limit = pset1<Packet>(Scalar((ScalarI(1)<<int(ExponentBits)) - ScalarI(1)));
0153
0154 const PacketI e = pcast<Packet, PacketI>(pmin(pmax(padd(exponent, bias), pzero(limit)), limit));
0155
0156 return pmul(a, preinterpret<Packet>(plogical_shift_left<int(MantissaBits)>(e)));
0157 }
0158 };
0159
0160
0161
0162
0163
0164
0165
0166 template <typename Packet, bool base2>
0167 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0168 EIGEN_UNUSED
0169 Packet plog_impl_float(const Packet _x)
0170 {
0171 Packet x = _x;
0172
0173 const Packet cst_1 = pset1<Packet>(1.0f);
0174 const Packet cst_neg_half = pset1<Packet>(-0.5f);
0175
0176 const Packet cst_min_norm_pos = pset1frombits<Packet>( 0x00800000u);
0177 const Packet cst_minus_inf = pset1frombits<Packet>( 0xff800000u);
0178 const Packet cst_pos_inf = pset1frombits<Packet>( 0x7f800000u);
0179
0180
0181 const Packet cst_cephes_SQRTHF = pset1<Packet>(0.707106781186547524f);
0182 const Packet cst_cephes_log_p0 = pset1<Packet>(7.0376836292E-2f);
0183 const Packet cst_cephes_log_p1 = pset1<Packet>(-1.1514610310E-1f);
0184 const Packet cst_cephes_log_p2 = pset1<Packet>(1.1676998740E-1f);
0185 const Packet cst_cephes_log_p3 = pset1<Packet>(-1.2420140846E-1f);
0186 const Packet cst_cephes_log_p4 = pset1<Packet>(+1.4249322787E-1f);
0187 const Packet cst_cephes_log_p5 = pset1<Packet>(-1.6668057665E-1f);
0188 const Packet cst_cephes_log_p6 = pset1<Packet>(+2.0000714765E-1f);
0189 const Packet cst_cephes_log_p7 = pset1<Packet>(-2.4999993993E-1f);
0190 const Packet cst_cephes_log_p8 = pset1<Packet>(+3.3333331174E-1f);
0191
0192
0193 x = pmax(x, cst_min_norm_pos);
0194
0195 Packet e;
0196
0197 x = pfrexp(x,e);
0198
0199
0200
0201
0202
0203
0204
0205
0206 Packet mask = pcmp_lt(x, cst_cephes_SQRTHF);
0207 Packet tmp = pand(x, mask);
0208 x = psub(x, cst_1);
0209 e = psub(e, pand(cst_1, mask));
0210 x = padd(x, tmp);
0211
0212 Packet x2 = pmul(x, x);
0213 Packet x3 = pmul(x2, x);
0214
0215
0216
0217 Packet y, y1, y2;
0218 y = pmadd(cst_cephes_log_p0, x, cst_cephes_log_p1);
0219 y1 = pmadd(cst_cephes_log_p3, x, cst_cephes_log_p4);
0220 y2 = pmadd(cst_cephes_log_p6, x, cst_cephes_log_p7);
0221 y = pmadd(y, x, cst_cephes_log_p2);
0222 y1 = pmadd(y1, x, cst_cephes_log_p5);
0223 y2 = pmadd(y2, x, cst_cephes_log_p8);
0224 y = pmadd(y, x3, y1);
0225 y = pmadd(y, x3, y2);
0226 y = pmul(y, x3);
0227
0228 y = pmadd(cst_neg_half, x2, y);
0229 x = padd(x, y);
0230
0231
0232 if (base2) {
0233 const Packet cst_log2e = pset1<Packet>(static_cast<float>(EIGEN_LOG2E));
0234 x = pmadd(x, cst_log2e, e);
0235 } else {
0236 const Packet cst_ln2 = pset1<Packet>(static_cast<float>(EIGEN_LN2));
0237 x = pmadd(e, cst_ln2, x);
0238 }
0239
0240 Packet invalid_mask = pcmp_lt_or_nan(_x, pzero(_x));
0241 Packet iszero_mask = pcmp_eq(_x,pzero(_x));
0242 Packet pos_inf_mask = pcmp_eq(_x,cst_pos_inf);
0243
0244
0245
0246
0247 return pselect(iszero_mask, cst_minus_inf,
0248 por(pselect(pos_inf_mask,cst_pos_inf,x), invalid_mask));
0249 }
0250
0251 template <typename Packet>
0252 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0253 EIGEN_UNUSED
0254 Packet plog_float(const Packet _x)
0255 {
0256 return plog_impl_float<Packet, false>(_x);
0257 }
0258
0259 template <typename Packet>
0260 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0261 EIGEN_UNUSED
0262 Packet plog2_float(const Packet _x)
0263 {
0264 return plog_impl_float<Packet, true>(_x);
0265 }
0266
0267
0268
0269
0270
0271
0272
0273
0274
0275
0276 template <typename Packet, bool base2>
0277 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0278 EIGEN_UNUSED
0279 Packet plog_impl_double(const Packet _x)
0280 {
0281 Packet x = _x;
0282
0283 const Packet cst_1 = pset1<Packet>(1.0);
0284 const Packet cst_neg_half = pset1<Packet>(-0.5);
0285
0286 const Packet cst_min_norm_pos = pset1frombits<Packet>( static_cast<uint64_t>(0x0010000000000000ull));
0287 const Packet cst_minus_inf = pset1frombits<Packet>( static_cast<uint64_t>(0xfff0000000000000ull));
0288 const Packet cst_pos_inf = pset1frombits<Packet>( static_cast<uint64_t>(0x7ff0000000000000ull));
0289
0290
0291
0292
0293 const Packet cst_cephes_SQRTHF = pset1<Packet>(0.70710678118654752440E0);
0294 const Packet cst_cephes_log_p0 = pset1<Packet>(1.01875663804580931796E-4);
0295 const Packet cst_cephes_log_p1 = pset1<Packet>(4.97494994976747001425E-1);
0296 const Packet cst_cephes_log_p2 = pset1<Packet>(4.70579119878881725854E0);
0297 const Packet cst_cephes_log_p3 = pset1<Packet>(1.44989225341610930846E1);
0298 const Packet cst_cephes_log_p4 = pset1<Packet>(1.79368678507819816313E1);
0299 const Packet cst_cephes_log_p5 = pset1<Packet>(7.70838733755885391666E0);
0300
0301 const Packet cst_cephes_log_q0 = pset1<Packet>(1.0);
0302 const Packet cst_cephes_log_q1 = pset1<Packet>(1.12873587189167450590E1);
0303 const Packet cst_cephes_log_q2 = pset1<Packet>(4.52279145837532221105E1);
0304 const Packet cst_cephes_log_q3 = pset1<Packet>(8.29875266912776603211E1);
0305 const Packet cst_cephes_log_q4 = pset1<Packet>(7.11544750618563894466E1);
0306 const Packet cst_cephes_log_q5 = pset1<Packet>(2.31251620126765340583E1);
0307
0308
0309 x = pmax(x, cst_min_norm_pos);
0310
0311 Packet e;
0312
0313 x = pfrexp(x,e);
0314
0315
0316
0317
0318
0319
0320
0321
0322 Packet mask = pcmp_lt(x, cst_cephes_SQRTHF);
0323 Packet tmp = pand(x, mask);
0324 x = psub(x, cst_1);
0325 e = psub(e, pand(cst_1, mask));
0326 x = padd(x, tmp);
0327
0328 Packet x2 = pmul(x, x);
0329 Packet x3 = pmul(x2, x);
0330
0331
0332
0333 Packet y, y1, y_;
0334 y = pmadd(cst_cephes_log_p0, x, cst_cephes_log_p1);
0335 y1 = pmadd(cst_cephes_log_p3, x, cst_cephes_log_p4);
0336 y = pmadd(y, x, cst_cephes_log_p2);
0337 y1 = pmadd(y1, x, cst_cephes_log_p5);
0338 y_ = pmadd(y, x3, y1);
0339
0340 y = pmadd(cst_cephes_log_q0, x, cst_cephes_log_q1);
0341 y1 = pmadd(cst_cephes_log_q3, x, cst_cephes_log_q4);
0342 y = pmadd(y, x, cst_cephes_log_q2);
0343 y1 = pmadd(y1, x, cst_cephes_log_q5);
0344 y = pmadd(y, x3, y1);
0345
0346 y_ = pmul(y_, x3);
0347 y = pdiv(y_, y);
0348
0349 y = pmadd(cst_neg_half, x2, y);
0350 x = padd(x, y);
0351
0352
0353 if (base2) {
0354 const Packet cst_log2e = pset1<Packet>(static_cast<double>(EIGEN_LOG2E));
0355 x = pmadd(x, cst_log2e, e);
0356 } else {
0357 const Packet cst_ln2 = pset1<Packet>(static_cast<double>(EIGEN_LN2));
0358 x = pmadd(e, cst_ln2, x);
0359 }
0360
0361 Packet invalid_mask = pcmp_lt_or_nan(_x, pzero(_x));
0362 Packet iszero_mask = pcmp_eq(_x,pzero(_x));
0363 Packet pos_inf_mask = pcmp_eq(_x,cst_pos_inf);
0364
0365
0366
0367
0368 return pselect(iszero_mask, cst_minus_inf,
0369 por(pselect(pos_inf_mask,cst_pos_inf,x), invalid_mask));
0370 }
0371
0372 template <typename Packet>
0373 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0374 EIGEN_UNUSED
0375 Packet plog_double(const Packet _x)
0376 {
0377 return plog_impl_double<Packet, false>(_x);
0378 }
0379
0380 template <typename Packet>
0381 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0382 EIGEN_UNUSED
0383 Packet plog2_double(const Packet _x)
0384 {
0385 return plog_impl_double<Packet, true>(_x);
0386 }
0387
0388
0389
0390
0391 template<typename Packet>
0392 Packet generic_plog1p(const Packet& x)
0393 {
0394 typedef typename unpacket_traits<Packet>::type ScalarType;
0395 const Packet one = pset1<Packet>(ScalarType(1));
0396 Packet xp1 = padd(x, one);
0397 Packet small_mask = pcmp_eq(xp1, one);
0398 Packet log1 = plog(xp1);
0399 Packet inf_mask = pcmp_eq(xp1, log1);
0400 Packet log_large = pmul(x, pdiv(log1, psub(xp1, one)));
0401 return pselect(por(small_mask, inf_mask), x, log_large);
0402 }
0403
0404
0405
0406
0407 template<typename Packet>
0408 Packet generic_expm1(const Packet& x)
0409 {
0410 typedef typename unpacket_traits<Packet>::type ScalarType;
0411 const Packet one = pset1<Packet>(ScalarType(1));
0412 const Packet neg_one = pset1<Packet>(ScalarType(-1));
0413 Packet u = pexp(x);
0414 Packet one_mask = pcmp_eq(u, one);
0415 Packet u_minus_one = psub(u, one);
0416 Packet neg_one_mask = pcmp_eq(u_minus_one, neg_one);
0417 Packet logu = plog(u);
0418
0419
0420
0421
0422 Packet pos_inf_mask = pcmp_eq(logu, u);
0423 Packet expm1 = pmul(u_minus_one, pdiv(x, logu));
0424 expm1 = pselect(pos_inf_mask, u, expm1);
0425 return pselect(one_mask,
0426 x,
0427 pselect(neg_one_mask,
0428 neg_one,
0429 expm1));
0430 }
0431
0432
0433
0434
0435
0436 template <typename Packet>
0437 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0438 EIGEN_UNUSED
0439 Packet pexp_float(const Packet _x)
0440 {
0441 const Packet cst_1 = pset1<Packet>(1.0f);
0442 const Packet cst_half = pset1<Packet>(0.5f);
0443 const Packet cst_exp_hi = pset1<Packet>( 88.723f);
0444 const Packet cst_exp_lo = pset1<Packet>(-88.723f);
0445
0446 const Packet cst_cephes_LOG2EF = pset1<Packet>(1.44269504088896341f);
0447 const Packet cst_cephes_exp_p0 = pset1<Packet>(1.9875691500E-4f);
0448 const Packet cst_cephes_exp_p1 = pset1<Packet>(1.3981999507E-3f);
0449 const Packet cst_cephes_exp_p2 = pset1<Packet>(8.3334519073E-3f);
0450 const Packet cst_cephes_exp_p3 = pset1<Packet>(4.1665795894E-2f);
0451 const Packet cst_cephes_exp_p4 = pset1<Packet>(1.6666665459E-1f);
0452 const Packet cst_cephes_exp_p5 = pset1<Packet>(5.0000001201E-1f);
0453
0454
0455 Packet x = pmax(pmin(_x, cst_exp_hi), cst_exp_lo);
0456
0457
0458
0459 Packet m = pfloor(pmadd(x, cst_cephes_LOG2EF, cst_half));
0460
0461
0462
0463
0464 const Packet cst_cephes_exp_C1 = pset1<Packet>(-0.693359375f);
0465 const Packet cst_cephes_exp_C2 = pset1<Packet>(2.12194440e-4f);
0466 Packet r = pmadd(m, cst_cephes_exp_C1, x);
0467 r = pmadd(m, cst_cephes_exp_C2, r);
0468
0469 Packet r2 = pmul(r, r);
0470 Packet r3 = pmul(r2, r);
0471
0472
0473 Packet y, y1, y2;
0474 y = pmadd(cst_cephes_exp_p0, r, cst_cephes_exp_p1);
0475 y1 = pmadd(cst_cephes_exp_p3, r, cst_cephes_exp_p4);
0476 y2 = padd(r, cst_1);
0477 y = pmadd(y, r, cst_cephes_exp_p2);
0478 y1 = pmadd(y1, r, cst_cephes_exp_p5);
0479 y = pmadd(y, r3, y1);
0480 y = pmadd(y, r2, y2);
0481
0482
0483
0484 return pmax(pldexp(y,m), _x);
0485 }
0486
0487 template <typename Packet>
0488 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0489 EIGEN_UNUSED
0490 Packet pexp_double(const Packet _x)
0491 {
0492 Packet x = _x;
0493
0494 const Packet cst_1 = pset1<Packet>(1.0);
0495 const Packet cst_2 = pset1<Packet>(2.0);
0496 const Packet cst_half = pset1<Packet>(0.5);
0497
0498 const Packet cst_exp_hi = pset1<Packet>(709.784);
0499 const Packet cst_exp_lo = pset1<Packet>(-709.784);
0500
0501 const Packet cst_cephes_LOG2EF = pset1<Packet>(1.4426950408889634073599);
0502 const Packet cst_cephes_exp_p0 = pset1<Packet>(1.26177193074810590878e-4);
0503 const Packet cst_cephes_exp_p1 = pset1<Packet>(3.02994407707441961300e-2);
0504 const Packet cst_cephes_exp_p2 = pset1<Packet>(9.99999999999999999910e-1);
0505 const Packet cst_cephes_exp_q0 = pset1<Packet>(3.00198505138664455042e-6);
0506 const Packet cst_cephes_exp_q1 = pset1<Packet>(2.52448340349684104192e-3);
0507 const Packet cst_cephes_exp_q2 = pset1<Packet>(2.27265548208155028766e-1);
0508 const Packet cst_cephes_exp_q3 = pset1<Packet>(2.00000000000000000009e0);
0509 const Packet cst_cephes_exp_C1 = pset1<Packet>(0.693145751953125);
0510 const Packet cst_cephes_exp_C2 = pset1<Packet>(1.42860682030941723212e-6);
0511
0512 Packet tmp, fx;
0513
0514
0515 x = pmax(pmin(x, cst_exp_hi), cst_exp_lo);
0516
0517 fx = pmadd(cst_cephes_LOG2EF, x, cst_half);
0518
0519
0520 fx = pfloor(fx);
0521
0522
0523
0524
0525 tmp = pmul(fx, cst_cephes_exp_C1);
0526 Packet z = pmul(fx, cst_cephes_exp_C2);
0527 x = psub(x, tmp);
0528 x = psub(x, z);
0529
0530 Packet x2 = pmul(x, x);
0531
0532
0533 Packet px = cst_cephes_exp_p0;
0534 px = pmadd(px, x2, cst_cephes_exp_p1);
0535 px = pmadd(px, x2, cst_cephes_exp_p2);
0536 px = pmul(px, x);
0537
0538
0539 Packet qx = cst_cephes_exp_q0;
0540 qx = pmadd(qx, x2, cst_cephes_exp_q1);
0541 qx = pmadd(qx, x2, cst_cephes_exp_q2);
0542 qx = pmadd(qx, x2, cst_cephes_exp_q3);
0543
0544
0545
0546
0547 x = pdiv(px, psub(qx, px));
0548 x = pmadd(cst_2, x, cst_1);
0549
0550
0551
0552
0553 return pmax(pldexp(x,fx), _x);
0554 }
0555
0556
0557
0558
0559
0560
0561
0562
0563
0564
0565 inline float trig_reduce_huge (float xf, int *quadrant)
0566 {
0567 using Eigen::numext::int32_t;
0568 using Eigen::numext::uint32_t;
0569 using Eigen::numext::int64_t;
0570 using Eigen::numext::uint64_t;
0571
0572 const double pio2_62 = 3.4061215800865545e-19;
0573 const uint64_t zero_dot_five = uint64_t(1) << 61;
0574
0575
0576
0577 static const uint32_t two_over_pi [] =
0578 {
0579 0x00000028, 0x000028be, 0x0028be60, 0x28be60db,
0580 0xbe60db93, 0x60db9391, 0xdb939105, 0x9391054a,
0581 0x91054a7f, 0x054a7f09, 0x4a7f09d5, 0x7f09d5f4,
0582 0x09d5f47d, 0xd5f47d4d, 0xf47d4d37, 0x7d4d3770,
0583 0x4d377036, 0x377036d8, 0x7036d8a5, 0x36d8a566,
0584 0xd8a5664f, 0xa5664f10, 0x664f10e4, 0x4f10e410,
0585 0x10e41000, 0xe4100000
0586 };
0587
0588 uint32_t xi = numext::bit_cast<uint32_t>(xf);
0589
0590
0591
0592
0593 uint32_t e = (xi >> 23) - 118;
0594
0595 xi = ((xi & 0x007fffffu)| 0x00800000u) << (e & 0x7);
0596
0597 uint32_t i = e >> 3;
0598 uint32_t twoopi_1 = two_over_pi[i-1];
0599 uint32_t twoopi_2 = two_over_pi[i+3];
0600 uint32_t twoopi_3 = two_over_pi[i+7];
0601
0602
0603 uint64_t p;
0604 p = uint64_t(xi) * twoopi_3;
0605 p = uint64_t(xi) * twoopi_2 + (p >> 32);
0606 p = (uint64_t(xi * twoopi_1) << 32) + p;
0607
0608
0609 uint64_t q = (p + zero_dot_five) >> 62;
0610 *quadrant = int(q);
0611
0612
0613
0614
0615 p -= q<<62;
0616 return float(double(int64_t(p)) * pio2_62);
0617 }
0618
0619 template<bool ComputeSine,typename Packet>
0620 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0621 EIGEN_UNUSED
0622 #if EIGEN_GNUC_AT_LEAST(4,4) && EIGEN_COMP_GNUC_STRICT
0623 __attribute__((optimize("-fno-unsafe-math-optimizations")))
0624 #endif
0625 Packet psincos_float(const Packet& _x)
0626 {
0627 typedef typename unpacket_traits<Packet>::integer_packet PacketI;
0628
0629 const Packet cst_2oPI = pset1<Packet>(0.636619746685028076171875f);
0630 const Packet cst_rounding_magic = pset1<Packet>(12582912);
0631 const PacketI csti_1 = pset1<PacketI>(1);
0632 const Packet cst_sign_mask = pset1frombits<Packet>(0x80000000u);
0633
0634 Packet x = pabs(_x);
0635
0636
0637 Packet y = pmul(x, cst_2oPI);
0638
0639
0640 Packet y_round = padd(y, cst_rounding_magic);
0641 EIGEN_OPTIMIZATION_BARRIER(y_round)
0642 PacketI y_int = preinterpret<PacketI>(y_round);
0643 y = psub(y_round, cst_rounding_magic);
0644
0645
0646
0647 #if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD)
0648
0649
0650 const float huge_th = ComputeSine ? 117435.992f : 71476.0625f;
0651 x = pmadd(y, pset1<Packet>(-1.57079601287841796875f), x);
0652 x = pmadd(y, pset1<Packet>(-3.1391647326017846353352069854736328125e-07f), x);
0653 x = pmadd(y, pset1<Packet>(-5.390302529957764765544681040410068817436695098876953125e-15f), x);
0654 #else
0655
0656
0657
0658
0659
0660
0661 const float huge_th = ComputeSine ? 25966.f : 18838.f;
0662 x = pmadd(y, pset1<Packet>(-1.5703125), x);
0663 EIGEN_OPTIMIZATION_BARRIER(x)
0664 x = pmadd(y, pset1<Packet>(-0.000483989715576171875), x);
0665 EIGEN_OPTIMIZATION_BARRIER(x)
0666 x = pmadd(y, pset1<Packet>(1.62865035235881805419921875e-07), x);
0667 x = pmadd(y, pset1<Packet>(5.5644315544167710640977020375430583953857421875e-11), x);
0668
0669
0670
0671
0672
0673
0674
0675
0676
0677
0678
0679
0680
0681 #endif
0682
0683 if(predux_any(pcmp_le(pset1<Packet>(huge_th),pabs(_x))))
0684 {
0685 const int PacketSize = unpacket_traits<Packet>::size;
0686 EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) float vals[PacketSize];
0687 EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) float x_cpy[PacketSize];
0688 EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) int y_int2[PacketSize];
0689 pstoreu(vals, pabs(_x));
0690 pstoreu(x_cpy, x);
0691 pstoreu(y_int2, y_int);
0692 for(int k=0; k<PacketSize;++k)
0693 {
0694 float val = vals[k];
0695 if(val>=huge_th && (numext::isfinite)(val))
0696 x_cpy[k] = trig_reduce_huge(val,&y_int2[k]);
0697 }
0698 x = ploadu<Packet>(x_cpy);
0699 y_int = ploadu<PacketI>(y_int2);
0700 }
0701
0702
0703
0704
0705 Packet sign_bit = ComputeSine ? pxor(_x, preinterpret<Packet>(plogical_shift_left<30>(y_int)))
0706 : preinterpret<Packet>(plogical_shift_left<30>(padd(y_int,csti_1)));
0707 sign_bit = pand(sign_bit, cst_sign_mask);
0708
0709
0710
0711 Packet poly_mask = preinterpret<Packet>(pcmp_eq(pand(y_int, csti_1), pzero(y_int)));
0712
0713 Packet x2 = pmul(x,x);
0714
0715
0716 Packet y1 = pset1<Packet>(2.4372266125283204019069671630859375e-05f);
0717 y1 = pmadd(y1, x2, pset1<Packet>(-0.00138865201734006404876708984375f ));
0718 y1 = pmadd(y1, x2, pset1<Packet>(0.041666619479656219482421875f ));
0719 y1 = pmadd(y1, x2, pset1<Packet>(-0.5f));
0720 y1 = pmadd(y1, x2, pset1<Packet>(1.f));
0721
0722
0723
0724
0725
0726
0727
0728
0729
0730 Packet y2 = pset1<Packet>(-0.0001959234114083702898469196984621021329076029360294342041015625f);
0731 y2 = pmadd(y2, x2, pset1<Packet>( 0.0083326873655616851693794799871284340042620897293090820312500000f));
0732 y2 = pmadd(y2, x2, pset1<Packet>(-0.1666666203982298255503735617821803316473960876464843750000000000f));
0733 y2 = pmul(y2, x2);
0734 y2 = pmadd(y2, x, x);
0735
0736
0737 y = ComputeSine ? pselect(poly_mask,y2,y1)
0738 : pselect(poly_mask,y1,y2);
0739
0740
0741 return pxor(y, sign_bit);
0742 }
0743
0744 template<typename Packet>
0745 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0746 EIGEN_UNUSED
0747 Packet psin_float(const Packet& x)
0748 {
0749 return psincos_float<true>(x);
0750 }
0751
0752 template<typename Packet>
0753 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0754 EIGEN_UNUSED
0755 Packet pcos_float(const Packet& x)
0756 {
0757 return psincos_float<false>(x);
0758 }
0759
0760
0761 template<typename Packet>
0762 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0763 EIGEN_UNUSED
0764 Packet psqrt_complex(const Packet& a) {
0765 typedef typename unpacket_traits<Packet>::type Scalar;
0766 typedef typename Scalar::value_type RealScalar;
0767 typedef typename unpacket_traits<Packet>::as_real RealPacket;
0768
0769
0770
0771
0772
0773
0774
0775
0776
0777
0778
0779
0780
0781
0782
0783
0784
0785
0786
0787
0788
0789
0790
0791
0792
0793
0794
0795
0796
0797
0798
0799
0800
0801
0802
0803
0804
0805 RealPacket a_abs = pabs(a.v);
0806 RealPacket a_abs_flip = pcplxflip(Packet(a_abs)).v;
0807 RealPacket a_max = pmax(a_abs, a_abs_flip);
0808 RealPacket a_min = pmin(a_abs, a_abs_flip);
0809 RealPacket a_min_zero_mask = pcmp_eq(a_min, pzero(a_min));
0810 RealPacket a_max_zero_mask = pcmp_eq(a_max, pzero(a_max));
0811 RealPacket r = pdiv(a_min, a_max);
0812 const RealPacket cst_one = pset1<RealPacket>(RealScalar(1));
0813 RealPacket l = pmul(a_max, psqrt(padd(cst_one, pmul(r, r))));
0814
0815 l = pselect(a_min_zero_mask, a_max, l);
0816
0817
0818
0819
0820 const RealPacket cst_half = pset1<RealPacket>(RealScalar(0.5));
0821 Packet rho;
0822 rho.v = psqrt(pmul(cst_half, padd(a_abs, l)));
0823
0824
0825
0826
0827 RealPacket eta = pandnot(pmul(cst_half, pdiv(a.v, pcplxflip(rho).v)), a_max_zero_mask);
0828 RealPacket real_mask = peven_mask(a.v);
0829 Packet positive_real_result;
0830
0831 positive_real_result.v = pselect(real_mask, rho.v, eta);
0832
0833
0834
0835 const RealScalar neg_zero = RealScalar(numext::bit_cast<float>(0x80000000u));
0836 const RealPacket cst_imag_sign_mask = pset1<Packet>(Scalar(RealScalar(0.0), neg_zero)).v;
0837 RealPacket imag_signs = pand(a.v, cst_imag_sign_mask);
0838 Packet negative_real_result;
0839
0840 negative_real_result.v = por(pabs(pcplxflip(positive_real_result).v), imag_signs);
0841
0842
0843 Packet negative_real_mask;
0844 negative_real_mask.v = pcmp_lt(pand(real_mask, a.v), pzero(a.v));
0845 negative_real_mask.v = por(negative_real_mask.v, pcplxflip(negative_real_mask).v);
0846 Packet result = pselect(negative_real_mask, negative_real_result, positive_real_result);
0847
0848
0849
0850
0851
0852
0853 const RealPacket cst_pos_inf = pset1<RealPacket>(NumTraits<RealScalar>::infinity());
0854 Packet is_inf;
0855 is_inf.v = pcmp_eq(a_abs, cst_pos_inf);
0856 Packet is_real_inf;
0857 is_real_inf.v = pand(is_inf.v, real_mask);
0858 is_real_inf = por(is_real_inf, pcplxflip(is_real_inf));
0859
0860 Packet real_inf_result;
0861 real_inf_result.v = pmul(a_abs, pset1<Packet>(Scalar(RealScalar(1.0), RealScalar(0.0))).v);
0862 real_inf_result.v = pselect(negative_real_mask.v, pcplxflip(real_inf_result).v, real_inf_result.v);
0863
0864 Packet is_imag_inf;
0865 is_imag_inf.v = pandnot(is_inf.v, real_mask);
0866 is_imag_inf = por(is_imag_inf, pcplxflip(is_imag_inf));
0867 Packet imag_inf_result;
0868 imag_inf_result.v = por(pand(cst_pos_inf, real_mask), pandnot(a.v, real_mask));
0869
0870 return pselect(is_imag_inf, imag_inf_result,
0871 pselect(is_real_inf, real_inf_result,result));
0872 }
0873
0874
0875
0876
0877
0878
0879
0880
0881 template<typename Packet>
0882 EIGEN_STRONG_INLINE
0883 void absolute_split(const Packet& x, Packet& n, Packet& r) {
0884 n = pround(x);
0885 r = psub(x, n);
0886 }
0887
0888
0889
0890 template<typename Packet>
0891 EIGEN_STRONG_INLINE
0892 void fast_twosum(const Packet& x, const Packet& y, Packet& s_hi, Packet& s_lo) {
0893 s_hi = padd(x, y);
0894 const Packet t = psub(s_hi, x);
0895 s_lo = psub(y, t);
0896 }
0897
0898 #ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
0899
0900
0901
0902
0903 template<typename Packet>
0904 EIGEN_STRONG_INLINE
0905 void twoprod(const Packet& x, const Packet& y,
0906 Packet& p_hi, Packet& p_lo) {
0907 p_hi = pmul(x, y);
0908 p_lo = pmadd(x, y, pnegate(p_hi));
0909 }
0910
0911 #else
0912
0913
0914
0915
0916
0917
0918 template<typename Packet>
0919 EIGEN_STRONG_INLINE
0920 void veltkamp_splitting(const Packet& x, Packet& x_hi, Packet& x_lo) {
0921 typedef typename unpacket_traits<Packet>::type Scalar;
0922 EIGEN_CONSTEXPR int shift = (NumTraits<Scalar>::digits() + 1) / 2;
0923 const Scalar shift_scale = Scalar(uint64_t(1) << shift);
0924 const Packet gamma = pmul(pset1<Packet>(shift_scale + Scalar(1)), x);
0925 Packet rho = psub(x, gamma);
0926 x_hi = padd(rho, gamma);
0927 x_lo = psub(x, x_hi);
0928 }
0929
0930
0931
0932
0933
0934 template<typename Packet>
0935 EIGEN_STRONG_INLINE
0936 void twoprod(const Packet& x, const Packet& y,
0937 Packet& p_hi, Packet& p_lo) {
0938 Packet x_hi, x_lo, y_hi, y_lo;
0939 veltkamp_splitting(x, x_hi, x_lo);
0940 veltkamp_splitting(y, y_hi, y_lo);
0941
0942 p_hi = pmul(x, y);
0943 p_lo = pmadd(x_hi, y_hi, pnegate(p_hi));
0944 p_lo = pmadd(x_hi, y_lo, p_lo);
0945 p_lo = pmadd(x_lo, y_hi, p_lo);
0946 p_lo = pmadd(x_lo, y_lo, p_lo);
0947 }
0948
0949 #endif
0950
0951
0952
0953
0954
0955
0956
0957
0958 template<typename Packet>
0959 EIGEN_STRONG_INLINE
0960 void twosum(const Packet& x_hi, const Packet& x_lo,
0961 const Packet& y_hi, const Packet& y_lo,
0962 Packet& s_hi, Packet& s_lo) {
0963 const Packet x_greater_mask = pcmp_lt(pabs(y_hi), pabs(x_hi));
0964 Packet r_hi_1, r_lo_1;
0965 fast_twosum(x_hi, y_hi,r_hi_1, r_lo_1);
0966 Packet r_hi_2, r_lo_2;
0967 fast_twosum(y_hi, x_hi,r_hi_2, r_lo_2);
0968 const Packet r_hi = pselect(x_greater_mask, r_hi_1, r_hi_2);
0969
0970 const Packet s1 = padd(padd(y_lo, r_lo_1), x_lo);
0971 const Packet s2 = padd(padd(x_lo, r_lo_2), y_lo);
0972 const Packet s = pselect(x_greater_mask, s1, s2);
0973
0974 fast_twosum(r_hi, s, s_hi, s_lo);
0975 }
0976
0977
0978
0979 template<typename Packet>
0980 EIGEN_STRONG_INLINE
0981 void fast_twosum(const Packet& x_hi, const Packet& x_lo,
0982 const Packet& y_hi, const Packet& y_lo,
0983 Packet& s_hi, Packet& s_lo) {
0984 Packet r_hi, r_lo;
0985 fast_twosum(x_hi, y_hi, r_hi, r_lo);
0986 const Packet s = padd(padd(y_lo, r_lo), x_lo);
0987 fast_twosum(r_hi, s, s_hi, s_lo);
0988 }
0989
0990
0991
0992
0993 template<typename Packet>
0994 EIGEN_STRONG_INLINE
0995 void fast_twosum(const Packet& x,
0996 const Packet& y_hi, const Packet& y_lo,
0997 Packet& s_hi, Packet& s_lo) {
0998 Packet r_hi, r_lo;
0999 fast_twosum(x, y_hi, r_hi, r_lo);
1000 const Packet s = padd(y_lo, r_lo);
1001 fast_twosum(r_hi, s, s_hi, s_lo);
1002 }
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012 template<typename Packet>
1013 EIGEN_STRONG_INLINE
1014 void twoprod(const Packet& x_hi, const Packet& x_lo, const Packet& y,
1015 Packet& p_hi, Packet& p_lo) {
1016 Packet c_hi, c_lo1;
1017 twoprod(x_hi, y, c_hi, c_lo1);
1018 const Packet c_lo2 = pmul(x_lo, y);
1019 Packet t_hi, t_lo1;
1020 fast_twosum(c_hi, c_lo2, t_hi, t_lo1);
1021 const Packet t_lo2 = padd(t_lo1, c_lo1);
1022 fast_twosum(t_hi, t_lo2, p_hi, p_lo);
1023 }
1024
1025
1026
1027
1028
1029
1030
1031 template<typename Packet>
1032 EIGEN_STRONG_INLINE
1033 void twoprod(const Packet& x_hi, const Packet& x_lo,
1034 const Packet& y_hi, const Packet& y_lo,
1035 Packet& p_hi, Packet& p_lo) {
1036 Packet p_hi_hi, p_hi_lo;
1037 twoprod(x_hi, x_lo, y_hi, p_hi_hi, p_hi_lo);
1038 Packet p_lo_hi, p_lo_lo;
1039 twoprod(x_hi, x_lo, y_lo, p_lo_hi, p_lo_lo);
1040 fast_twosum(p_hi_hi, p_hi_lo, p_lo_hi, p_lo_lo, p_hi, p_lo);
1041 }
1042
1043
1044
1045 template <typename Packet>
1046 void doubleword_reciprocal(const Packet& x, Packet& recip_hi, Packet& recip_lo) {
1047 typedef typename unpacket_traits<Packet>::type Scalar;
1048
1049 Packet approx_recip = prsqrt(x);
1050 approx_recip = pmul(approx_recip, approx_recip);
1051
1052
1053
1054
1055
1056
1057 Packet t1_hi, t1_lo;
1058 twoprod(pnegate(x), approx_recip, t1_hi, t1_lo);
1059
1060 Packet t2_hi, t2_lo;
1061 fast_twosum(pset1<Packet>(Scalar(2)), t1_hi, t2_hi, t2_lo);
1062 Packet t3_hi, t3_lo;
1063 fast_twosum(t2_hi, padd(t2_lo, t1_lo), t3_hi, t3_lo);
1064
1065 twoprod(t3_hi, t3_lo, approx_recip, recip_hi, recip_lo);
1066 }
1067
1068
1069
1070 template <typename Scalar>
1071 struct accurate_log2 {
1072 template <typename Packet>
1073 EIGEN_STRONG_INLINE
1074 void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) {
1075 log2_x_hi = plog2(x);
1076 log2_x_lo = pzero(x);
1077 }
1078 };
1079
1080
1081
1082
1083
1084
1085
1086 template <>
1087 struct accurate_log2<float> {
1088 template <typename Packet>
1089 EIGEN_STRONG_INLINE
1090 void operator()(const Packet& z, Packet& log2_x_hi, Packet& log2_x_lo) {
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106 const Packet p6 = pset1<Packet>( 9.703654795885e-2f);
1107 const Packet p5 = pset1<Packet>(-0.1690667718648f);
1108 const Packet p4 = pset1<Packet>( 0.1720575392246f);
1109 const Packet p3 = pset1<Packet>(-0.1789081543684f);
1110 const Packet p2 = pset1<Packet>( 0.2050433009862f);
1111 const Packet p1 = pset1<Packet>(-0.2404672354459f);
1112 const Packet p0 = pset1<Packet>( 0.2885761857032f);
1113
1114 const Packet C3_hi = pset1<Packet>(-0.360674142838f);
1115 const Packet C3_lo = pset1<Packet>(-6.13283912543e-09f);
1116 const Packet C2_hi = pset1<Packet>(0.480897903442f);
1117 const Packet C2_lo = pset1<Packet>(-1.44861207474e-08f);
1118 const Packet C1_hi = pset1<Packet>(-0.721347510815f);
1119 const Packet C1_lo = pset1<Packet>(-4.84483164698e-09f);
1120 const Packet C0_hi = pset1<Packet>(1.44269502163f);
1121 const Packet C0_lo = pset1<Packet>(2.01711713999e-08f);
1122 const Packet one = pset1<Packet>(1.0f);
1123
1124 const Packet x = psub(z, one);
1125
1126
1127
1128 Packet x2 = pmul(x,x);
1129 Packet p_even = pmadd(p6, x2, p4);
1130 p_even = pmadd(p_even, x2, p2);
1131 p_even = pmadd(p_even, x2, p0);
1132 Packet p_odd = pmadd(p5, x2, p3);
1133 p_odd = pmadd(p_odd, x2, p1);
1134 Packet p = pmadd(p_odd, x, p_even);
1135
1136
1137
1138
1139
1140 Packet q_hi, q_lo;
1141 Packet t_hi, t_lo;
1142
1143 twoprod(p, x, t_hi, t_lo);
1144 fast_twosum(C3_hi, C3_lo, t_hi, t_lo, q_hi, q_lo);
1145
1146 twoprod(q_hi, q_lo, x, t_hi, t_lo);
1147 fast_twosum(C2_hi, C2_lo, t_hi, t_lo, q_hi, q_lo);
1148
1149 twoprod(q_hi, q_lo, x, t_hi, t_lo);
1150 fast_twosum(C1_hi, C1_lo, t_hi, t_lo, q_hi, q_lo);
1151
1152 twoprod(q_hi, q_lo, x, t_hi, t_lo);
1153 fast_twosum(C0_hi, C0_lo, t_hi, t_lo, q_hi, q_lo);
1154
1155
1156 twoprod(q_hi, q_lo, x, log2_x_hi, log2_x_lo);
1157 }
1158 };
1159
1160
1161
1162
1163
1164
1165
1166
1167 template <>
1168 struct accurate_log2<double> {
1169 template <typename Packet>
1170 EIGEN_STRONG_INLINE
1171 void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) {
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193 const Packet q12 = pset1<Packet>(2.87074255468000586e-9);
1194 const Packet q10 = pset1<Packet>(2.38957980901884082e-8);
1195 const Packet q8 = pset1<Packet>(2.31032094540014656e-7);
1196 const Packet q6 = pset1<Packet>(2.27279857398537278e-6);
1197 const Packet q4 = pset1<Packet>(2.31271023278625638e-5);
1198 const Packet q2 = pset1<Packet>(2.47556738444535513e-4);
1199 const Packet q0 = pset1<Packet>(2.88543873228900172e-3);
1200 const Packet C_hi = pset1<Packet>(0.0400377511598501157);
1201 const Packet C_lo = pset1<Packet>(-4.77726582251425391e-19);
1202 const Packet one = pset1<Packet>(1.0);
1203
1204 const Packet cst_2_log2e_hi = pset1<Packet>(2.88539008177792677);
1205 const Packet cst_2_log2e_lo = pset1<Packet>(4.07660016854549667e-17);
1206
1207 Packet num_hi, num_lo;
1208 twoprod(cst_2_log2e_hi, cst_2_log2e_lo, psub(x, one), num_hi, num_lo);
1209
1210
1211
1212 Packet denom_hi, denom_lo;
1213 doubleword_reciprocal(padd(x, one), denom_hi, denom_lo);
1214
1215 Packet r_hi, r_lo;
1216 twoprod(num_hi, num_lo, denom_hi, denom_lo, r_hi, r_lo);
1217
1218 Packet r2_hi, r2_lo;
1219 twoprod(r_hi, r_lo, r_hi, r_lo, r2_hi, r2_lo);
1220
1221 Packet r4_hi, r4_lo;
1222 twoprod(r2_hi, r2_lo, r2_hi, r2_lo, r4_hi, r4_lo);
1223
1224
1225
1226 Packet q_even = pmadd(q12, r4_hi, q8);
1227 Packet q_odd = pmadd(q10, r4_hi, q6);
1228 q_even = pmadd(q_even, r4_hi, q4);
1229 q_odd = pmadd(q_odd, r4_hi, q2);
1230 q_even = pmadd(q_even, r4_hi, q0);
1231 Packet q = pmadd(q_odd, r2_hi, q_even);
1232
1233
1234
1235
1236
1237
1238 Packet p_hi, p_lo;
1239 twoprod(r2_hi, r2_lo, q, p_hi, p_lo);
1240
1241 Packet p1_hi, p1_lo;
1242 fast_twosum(C_hi, C_lo, p_hi, p_lo, p1_hi, p1_lo);
1243
1244 Packet p2_hi, p2_lo;
1245 twoprod(r2_hi, r2_lo, p1_hi, p1_lo, p2_hi, p2_lo);
1246
1247 Packet p3_hi, p3_lo;
1248 fast_twosum(one, p2_hi, p2_lo, p3_hi, p3_lo);
1249
1250
1251 twoprod(p3_hi, p3_lo, r_hi, r_lo, log2_x_hi, log2_x_lo);
1252 }
1253 };
1254
1255
1256 template <typename Scalar>
1257 struct fast_accurate_exp2 {
1258 template <typename Packet>
1259 EIGEN_STRONG_INLINE
1260 Packet operator()(const Packet& x) {
1261
1262 return pexp(pmul(pset1<Packet>(Scalar(EIGEN_LN2)), x));
1263 }
1264 };
1265
1266
1267
1268
1269
1270 template <>
1271 struct fast_accurate_exp2<float> {
1272 template <typename Packet>
1273 EIGEN_STRONG_INLINE
1274 Packet operator()(const Packet& x) {
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286 const Packet p4 = pset1<Packet>(1.539513905e-4f);
1287 const Packet p3 = pset1<Packet>(1.340007293e-3f);
1288 const Packet p2 = pset1<Packet>(9.618283249e-3f);
1289 const Packet p1 = pset1<Packet>(5.550328270e-2f);
1290 const Packet p0 = pset1<Packet>(0.2402264923f);
1291
1292 const Packet C_hi = pset1<Packet>(0.6931471825f);
1293 const Packet C_lo = pset1<Packet>(2.36836577e-08f);
1294 const Packet one = pset1<Packet>(1.0f);
1295
1296
1297
1298
1299 Packet x2 = pmul(x,x);
1300 Packet p_even = pmadd(p4, x2, p2);
1301 Packet p_odd = pmadd(p3, x2, p1);
1302 p_even = pmadd(p_even, x2, p0);
1303 Packet p = pmadd(p_odd, x, p_even);
1304
1305
1306
1307 Packet p_hi, p_lo;
1308
1309 twoprod(p, x, p_hi, p_lo);
1310
1311 Packet q1_hi, q1_lo;
1312 twosum(p_hi, p_lo, C_hi, C_lo, q1_hi, q1_lo);
1313
1314 Packet q2_hi, q2_lo;
1315 twoprod(q1_hi, q1_lo, x, q2_hi, q2_lo);
1316
1317 Packet q3_hi, q3_lo;
1318
1319
1320 fast_twosum(one, q2_hi, q3_hi, q3_lo);
1321 return padd(q3_hi, padd(q2_lo, q3_lo));
1322 }
1323 };
1324
1325
1326
1327
1328 template <>
1329 struct fast_accurate_exp2<double> {
1330 template <typename Packet>
1331 EIGEN_STRONG_INLINE
1332 Packet operator()(const Packet& x) {
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344 const Packet p9 = pset1<Packet>(4.431642109085495276e-10);
1345 const Packet p8 = pset1<Packet>(7.073829923303358410e-9);
1346 const Packet p7 = pset1<Packet>(1.017822306737031311e-7);
1347 const Packet p6 = pset1<Packet>(1.321543498017646657e-6);
1348 const Packet p5 = pset1<Packet>(1.525273342728892877e-5);
1349 const Packet p4 = pset1<Packet>(1.540353045780084423e-4);
1350 const Packet p3 = pset1<Packet>(1.333355814685869807e-3);
1351 const Packet p2 = pset1<Packet>(9.618129107593478832e-3);
1352 const Packet p1 = pset1<Packet>(5.550410866481961247e-2);
1353 const Packet p0 = pset1<Packet>(0.240226506959101332);
1354 const Packet C_hi = pset1<Packet>(0.693147180559945286);
1355 const Packet C_lo = pset1<Packet>(4.81927865669806721e-17);
1356 const Packet one = pset1<Packet>(1.0);
1357
1358
1359
1360
1361 Packet x2 = pmul(x,x);
1362 Packet p_even = pmadd(p8, x2, p6);
1363 Packet p_odd = pmadd(p9, x2, p7);
1364 p_even = pmadd(p_even, x2, p4);
1365 p_odd = pmadd(p_odd, x2, p5);
1366 p_even = pmadd(p_even, x2, p2);
1367 p_odd = pmadd(p_odd, x2, p3);
1368 p_even = pmadd(p_even, x2, p0);
1369 p_odd = pmadd(p_odd, x2, p1);
1370 Packet p = pmadd(p_odd, x, p_even);
1371
1372
1373
1374 Packet p_hi, p_lo;
1375
1376 twoprod(p, x, p_hi, p_lo);
1377
1378 Packet q1_hi, q1_lo;
1379 twosum(p_hi, p_lo, C_hi, C_lo, q1_hi, q1_lo);
1380
1381 Packet q2_hi, q2_lo;
1382 twoprod(q1_hi, q1_lo, x, q2_hi, q2_lo);
1383
1384 Packet q3_hi, q3_lo;
1385
1386
1387 fast_twosum(one, q2_hi, q3_hi, q3_lo);
1388 return padd(q3_hi, padd(q2_lo, q3_lo));
1389 }
1390 };
1391
1392
1393
1394
1395
1396
1397 template <typename Packet>
1398 EIGEN_STRONG_INLINE Packet generic_pow_impl(const Packet& x, const Packet& y) {
1399 typedef typename unpacket_traits<Packet>::type Scalar;
1400
1401 Packet e_x;
1402 Packet m_x = pfrexp(x, e_x);
1403
1404
1405 EIGEN_CONSTEXPR Scalar sqrt_half = Scalar(0.70710678118654752440);
1406 const Packet m_x_scale_mask = pcmp_lt(m_x, pset1<Packet>(sqrt_half));
1407 m_x = pselect(m_x_scale_mask, pmul(pset1<Packet>(Scalar(2)), m_x), m_x);
1408 e_x = pselect(m_x_scale_mask, psub(e_x, pset1<Packet>(Scalar(1))), e_x);
1409
1410
1411 Packet rx_hi, rx_lo;
1412 accurate_log2<Scalar>()(m_x, rx_hi, rx_lo);
1413
1414
1415
1416 Packet f1_hi, f1_lo, f2_hi, f2_lo;
1417 twoprod(e_x, y, f1_hi, f1_lo);
1418 twoprod(rx_hi, rx_lo, y, f2_hi, f2_lo);
1419
1420
1421
1422
1423
1424
1425 Packet f_hi, f_lo;
1426 fast_twosum(f1_hi, f1_lo, f2_hi, f2_lo, f_hi, f_lo);
1427
1428
1429 Packet n_z, r_z;
1430 absolute_split(f_hi, n_z, r_z);
1431 r_z = padd(r_z, f_lo);
1432 Packet n_r;
1433 absolute_split(r_z, n_r, r_z);
1434 n_z = padd(n_z, n_r);
1435
1436
1437
1438
1439
1440
1441 const Packet e_r = fast_accurate_exp2<Scalar>()(r_z);
1442 return pldexp(e_r, n_z);
1443 }
1444
1445
1446 template<typename Packet>
1447 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
1448 EIGEN_UNUSED
1449 Packet generic_pow(const Packet& x, const Packet& y) {
1450 typedef typename unpacket_traits<Packet>::type Scalar;
1451
1452 const Packet cst_pos_inf = pset1<Packet>(NumTraits<Scalar>::infinity());
1453 const Packet cst_zero = pset1<Packet>(Scalar(0));
1454 const Packet cst_one = pset1<Packet>(Scalar(1));
1455 const Packet cst_nan = pset1<Packet>(NumTraits<Scalar>::quiet_NaN());
1456
1457 const Packet abs_x = pabs(x);
1458
1459 const Packet x_is_zero = pcmp_eq(x, cst_zero);
1460 const Packet x_is_neg = pcmp_lt(x, cst_zero);
1461 const Packet abs_x_is_inf = pcmp_eq(abs_x, cst_pos_inf);
1462 const Packet abs_x_is_one = pcmp_eq(abs_x, cst_one);
1463 const Packet abs_x_is_gt_one = pcmp_lt(cst_one, abs_x);
1464 const Packet abs_x_is_lt_one = pcmp_lt(abs_x, cst_one);
1465 const Packet x_is_one = pandnot(abs_x_is_one, x_is_neg);
1466 const Packet x_is_neg_one = pand(abs_x_is_one, x_is_neg);
1467 const Packet x_is_nan = pandnot(ptrue(x), pcmp_eq(x, x));
1468
1469
1470 const Packet y_is_one = pcmp_eq(y, cst_one);
1471 const Packet y_is_zero = pcmp_eq(y, cst_zero);
1472 const Packet y_is_neg = pcmp_lt(y, cst_zero);
1473 const Packet y_is_pos = pandnot(ptrue(y), por(y_is_zero, y_is_neg));
1474 const Packet y_is_nan = pandnot(ptrue(y), pcmp_eq(y, y));
1475 const Packet abs_y_is_inf = pcmp_eq(pabs(y), cst_pos_inf);
1476 EIGEN_CONSTEXPR Scalar huge_exponent =
1477 (NumTraits<Scalar>::max_exponent() * Scalar(EIGEN_LN2)) /
1478 NumTraits<Scalar>::epsilon();
1479 const Packet abs_y_is_huge = pcmp_le(pset1<Packet>(huge_exponent), pabs(y));
1480
1481
1482 const Packet y_is_int = pcmp_eq(pfloor(y), y);
1483 const Packet y_div_2 = pmul(y, pset1<Packet>(Scalar(0.5)));
1484 const Packet y_is_even = pcmp_eq(pround(y_div_2), y_div_2);
1485
1486
1487 const Packet invalid_negative_x = pandnot(pandnot(pandnot(x_is_neg, abs_x_is_inf),
1488 y_is_int),
1489 abs_y_is_inf);
1490 const Packet pow_is_one = por(por(x_is_one, y_is_zero),
1491 pand(x_is_neg_one,
1492 por(abs_y_is_inf, pandnot(y_is_even, invalid_negative_x))));
1493 const Packet pow_is_nan = por(invalid_negative_x, por(x_is_nan, y_is_nan));
1494 const Packet pow_is_zero = por(por(por(pand(x_is_zero, y_is_pos),
1495 pand(abs_x_is_inf, y_is_neg)),
1496 pand(pand(abs_x_is_lt_one, abs_y_is_huge),
1497 y_is_pos)),
1498 pand(pand(abs_x_is_gt_one, abs_y_is_huge),
1499 y_is_neg));
1500 const Packet pow_is_inf = por(por(por(pand(x_is_zero, y_is_neg),
1501 pand(abs_x_is_inf, y_is_pos)),
1502 pand(pand(abs_x_is_lt_one, abs_y_is_huge),
1503 y_is_neg)),
1504 pand(pand(abs_x_is_gt_one, abs_y_is_huge),
1505 y_is_pos));
1506
1507
1508 const Packet negate_pow_abs = pandnot(x_is_neg, y_is_even);
1509 const Packet pow_abs = generic_pow_impl(abs_x, y);
1510 return pselect(y_is_one, x,
1511 pselect(pow_is_one, cst_one,
1512 pselect(pow_is_nan, cst_nan,
1513 pselect(pow_is_inf, cst_pos_inf,
1514 pselect(pow_is_zero, cst_zero,
1515 pselect(negate_pow_abs, pnegate(pow_abs), pow_abs))))));
1516 }
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559 template <typename Packet, int N>
1560 struct ppolevl {
1561 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) {
1562 EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
1563 return pmadd(ppolevl<Packet, N-1>::run(x, coeff), x, pset1<Packet>(coeff[N]));
1564 }
1565 };
1566
1567 template <typename Packet>
1568 struct ppolevl<Packet, 0> {
1569 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) {
1570 EIGEN_UNUSED_VARIABLE(x);
1571 return pset1<Packet>(coeff[0]);
1572 }
1573 };
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627 template <typename Packet, int N>
1628 struct pchebevl {
1629 EIGEN_DEVICE_FUNC
1630 static EIGEN_STRONG_INLINE Packet run(Packet x, const typename unpacket_traits<Packet>::type coef[]) {
1631 typedef typename unpacket_traits<Packet>::type Scalar;
1632 Packet b0 = pset1<Packet>(coef[0]);
1633 Packet b1 = pset1<Packet>(static_cast<Scalar>(0.f));
1634 Packet b2;
1635
1636 for (int i = 1; i < N; i++) {
1637 b2 = b1;
1638 b1 = b0;
1639 b0 = psub(pmadd(x, b1, pset1<Packet>(coef[i])), b2);
1640 }
1641
1642 return pmul(pset1<Packet>(static_cast<Scalar>(0.5f)), psub(b0, b2));
1643 }
1644 };
1645
1646 }
1647 }
1648
1649 #endif