File indexing completed on 2025-08-28 09:11:29
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #ifndef XSIMD_GENERIC_TRIGO_HPP
0013 #define XSIMD_GENERIC_TRIGO_HPP
0014
0015 #include "./xsimd_generic_details.hpp"
0016
0017 #include <array>
0018
0019 namespace xsimd
0020 {
0021
0022 namespace kernel
0023 {
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034 using namespace types;
0035
0036
0037 template <class A, class T>
0038 XSIMD_INLINE batch<T, A> acos(batch<T, A> const& self, requires_arch<generic>) noexcept
0039 {
0040 using batch_type = batch<T, A>;
0041 batch_type x = abs(self);
0042 auto x_larger_05 = x > batch_type(0.5);
0043 x = select(x_larger_05, sqrt(fma(batch_type(-0.5), x, batch_type(0.5))), self);
0044 x = asin(x);
0045 x = select(x_larger_05, x + x, x);
0046 x = select(self < batch_type(-0.5), constants::pi<batch_type>() - x, x);
0047 return select(x_larger_05, x, constants::pio2<batch_type>() - x);
0048 }
0049 template <class A, class T>
0050 XSIMD_INLINE batch<std::complex<T>, A> acos(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
0051 {
0052 using batch_type = batch<std::complex<T>, A>;
0053 using real_batch = typename batch_type::real_batch;
0054 batch_type tmp = asin(z);
0055 return { constants::pio2<real_batch>() - tmp.real(), -tmp.imag() };
0056 }
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068 template <class A, class T>
0069 XSIMD_INLINE batch<T, A> acosh(batch<T, A> const& self, requires_arch<generic>) noexcept
0070 {
0071 using batch_type = batch<T, A>;
0072 batch_type x = self - batch_type(1.);
0073 auto test = x > constants::oneotwoeps<batch_type>();
0074 batch_type z = select(test, self, x + sqrt(x + x + x * x));
0075 batch_type l1pz = log1p(z);
0076 return select(test, l1pz + constants::log_2<batch_type>(), l1pz);
0077 }
0078 template <class A, class T>
0079 XSIMD_INLINE batch<std::complex<T>, A> acosh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
0080 {
0081 using batch_type = batch<std::complex<T>, A>;
0082 batch_type w = acos(z);
0083 w = batch_type(-w.imag(), w.real());
0084 return w;
0085 }
0086
0087
0088 template <class A>
0089 XSIMD_INLINE batch<float, A> asin(batch<float, A> const& self, requires_arch<generic>) noexcept
0090 {
0091 using batch_type = batch<float, A>;
0092 batch_type x = abs(self);
0093 batch_type sign = bitofsign(self);
0094 auto x_larger_05 = x > batch_type(0.5);
0095 batch_type z = select(x_larger_05, batch_type(0.5) * (batch_type(1.) - x), x * x);
0096 x = select(x_larger_05, sqrt(z), x);
0097 batch_type z1 = detail::horner<batch_type,
0098 0x3e2aaae4,
0099 0x3d9980f6,
0100 0x3d3a3ec7,
0101 0x3cc617e3,
0102 0x3d2cb352>(z);
0103 z1 = fma(z1, z * x, x);
0104 z = select(x_larger_05, constants::pio2<batch_type>() - (z1 + z1), z1);
0105 return z ^ sign;
0106 }
0107 template <class A>
0108 XSIMD_INLINE batch<double, A> asin(batch<double, A> const& self, requires_arch<generic>) noexcept
0109 {
0110 using batch_type = batch<double, A>;
0111 batch_type x = abs(self);
0112 auto small_cond = x < constants::sqrteps<batch_type>();
0113 batch_type ct1 = batch_type(bit_cast<double>(int64_t(0x3fe4000000000000)));
0114 batch_type zz1 = batch_type(1.) - x;
0115 batch_type vp = zz1 * detail::horner<batch_type, 0x403c896240f3081dull, 0xc03991aaac01ab68ull, 0x401bdff5baf33e6aull, 0xbfe2079259f9290full, 0x3f684fc3988e9f08ull>(zz1) / detail::horner1<batch_type, 0x40756709b0b644beull, 0xc077fe08959063eeull, 0x40626219af6a7f42ull, 0xc035f2a2b6bf5d8cull>(zz1);
0116 zz1 = sqrt(zz1 + zz1);
0117 batch_type z = constants::pio4<batch_type>() - zz1;
0118 zz1 = fms(zz1, vp, constants::pio_2lo<batch_type>());
0119 z = z - zz1;
0120 zz1 = z + constants::pio4<batch_type>();
0121 batch_type zz2 = self * self;
0122 z = zz2 * detail::horner<batch_type, 0xc020656c06ceafd5ull, 0x40339007da779259ull, 0xc0304331de27907bull, 0x4015c74b178a2dd9ull, 0xbfe34341333e5c16ull, 0x3f716b9b0bd48ad3ull>(zz2) / detail::horner1<batch_type, 0xc04898220a3607acull, 0x4061705684ffbf9dull, 0xc06265bb6d3576d7ull, 0x40519fc025fe9054ull, 0xc02d7b590b5e0eabull>(zz2);
0123 zz2 = fma(x, z, x);
0124 return select(x > batch_type(1.), constants::nan<batch_type>(),
0125 select(small_cond, x,
0126 select(x > ct1, zz1, zz2))
0127 ^ bitofsign(self));
0128 }
0129 template <class A, class T>
0130 XSIMD_INLINE batch<std::complex<T>, A> asin(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
0131 {
0132 using batch_type = batch<std::complex<T>, A>;
0133 using real_batch = typename batch_type::real_batch;
0134 real_batch x = z.real();
0135 real_batch y = z.imag();
0136
0137 batch_type ct(-y, x);
0138 batch_type zz(real_batch(1.) - (x - y) * (x + y), -2 * x * y);
0139 zz = log(ct + sqrt(zz));
0140 batch_type resg(zz.imag(), -zz.real());
0141
0142 return select(y == real_batch(0.),
0143 select(fabs(x) > real_batch(1.),
0144 batch_type(constants::pio2<real_batch>(), real_batch(0.)),
0145 batch_type(asin(x), real_batch(0.))),
0146 resg);
0147 }
0148
0149
0150
0151
0152
0153
0154
0155
0156
0157
0158
0159 namespace detail
0160 {
0161 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0162 XSIMD_INLINE batch<T, A>
0163 average(const batch<T, A>& x1, const batch<T, A>& x2) noexcept
0164 {
0165 return (x1 & x2) + ((x1 ^ x2) >> 1);
0166 }
0167
0168 template <class A, class T>
0169 XSIMD_INLINE batch<T, A>
0170 averagef(const batch<T, A>& x1, const batch<T, A>& x2) noexcept
0171 {
0172 using batch_type = batch<T, A>;
0173 return fma(x1, batch_type(0.5), x2 * batch_type(0.5));
0174 }
0175 template <class A>
0176 XSIMD_INLINE batch<float, A> average(batch<float, A> const& x1, batch<float, A> const& x2) noexcept
0177 {
0178 return averagef(x1, x2);
0179 }
0180 template <class A>
0181 XSIMD_INLINE batch<double, A> average(batch<double, A> const& x1, batch<double, A> const& x2) noexcept
0182 {
0183 return averagef(x1, x2);
0184 }
0185 }
0186 template <class A>
0187 XSIMD_INLINE batch<float, A> asinh(batch<float, A> const& self, requires_arch<generic>) noexcept
0188 {
0189 using batch_type = batch<float, A>;
0190 batch_type x = abs(self);
0191 auto lthalf = x < batch_type(0.5);
0192 batch_type x2 = x * x;
0193 batch_type bts = bitofsign(self);
0194 batch_type z(0.);
0195 if (any(lthalf))
0196 {
0197 z = detail::horner<batch_type,
0198 0x3f800000,
0199 0xbe2aa9ad,
0200 0x3d9949b1,
0201 0xbd2ee581,
0202 0x3ca4d6e6>(x2)
0203 * x;
0204 if (all(lthalf))
0205 return z ^ bts;
0206 }
0207 batch_type tmp = select(x > constants::oneosqrteps<batch_type>(), x, detail::average(x, hypot(batch_type(1.), x)));
0208 #ifndef XSIMD_NO_NANS
0209 return select(isnan(self), constants::nan<batch_type>(), select(lthalf, z, log(tmp) + constants::log_2<batch_type>()) ^ bts);
0210 #else
0211 return select(lthalf, z, log(tmp) + constants::log_2<batch_type>()) ^ bts;
0212 #endif
0213 }
0214 template <class A>
0215 XSIMD_INLINE batch<double, A> asinh(batch<double, A> const& self, requires_arch<generic>) noexcept
0216 {
0217 using batch_type = batch<double, A>;
0218 batch_type x = abs(self);
0219 auto test = x > constants::oneosqrteps<batch_type>();
0220 batch_type z = select(test, x - batch_type(1.), x + x * x / (batch_type(1.) + hypot(batch_type(1.), x)));
0221 #ifndef XSIMD_NO_INFINITIES
0222 z = select(x == constants::infinity<batch_type>(), x, z);
0223 #endif
0224 batch_type l1pz = log1p(z);
0225 z = select(test, l1pz + constants::log_2<batch_type>(), l1pz);
0226 return bitofsign(self) ^ z;
0227 }
0228 template <class A, class T>
0229 XSIMD_INLINE batch<std::complex<T>, A> asinh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
0230 {
0231 using batch_type = batch<std::complex<T>, A>;
0232 batch_type w = asin(batch_type(-z.imag(), z.real()));
0233 w = batch_type(w.imag(), -w.real());
0234 return w;
0235 }
0236
0237
0238 namespace detail
0239 {
0240 template <class A>
0241 static XSIMD_INLINE batch<float, A> kernel_atan(const batch<float, A>& x, const batch<float, A>& recx) noexcept
0242 {
0243 using batch_type = batch<float, A>;
0244 const auto flag1 = x < constants::tan3pio8<batch_type>();
0245 const auto flag2 = (x >= batch_type(bit_cast<float>((uint32_t)0x3ed413cd))) && flag1;
0246 batch_type yy = select(flag1, batch_type(0.), constants::pio2<batch_type>());
0247 yy = select(flag2, constants::pio4<batch_type>(), yy);
0248 batch_type xx = select(flag1, x, -recx);
0249 xx = select(flag2, (x - batch_type(1.)) / (x + batch_type(1.)), xx);
0250 const batch_type z = xx * xx;
0251 batch_type z1 = detail::horner<batch_type,
0252 0xbeaaaa2aul,
0253 0x3e4c925ful,
0254 0xbe0e1b85ul,
0255 0x3da4f0d1ul>(z);
0256 z1 = fma(xx, z1 * z, xx);
0257 z1 = select(flag2, z1 + constants::pio_4lo<batch_type>(), z1);
0258 z1 = select(!flag1, z1 + constants::pio_2lo<batch_type>(), z1);
0259 return yy + z1;
0260 }
0261 template <class A>
0262 static XSIMD_INLINE batch<double, A> kernel_atan(const batch<double, A>& x, const batch<double, A>& recx) noexcept
0263 {
0264 using batch_type = batch<double, A>;
0265 const auto flag1 = x < constants::tan3pio8<batch_type>();
0266 const auto flag2 = (x >= constants::tanpio8<batch_type>()) && flag1;
0267 batch_type yy = select(flag1, batch_type(0.), constants::pio2<batch_type>());
0268 yy = select(flag2, constants::pio4<batch_type>(), yy);
0269 batch_type xx = select(flag1, x, -recx);
0270 xx = select(flag2, (x - batch_type(1.)) / (x + batch_type(1.)), xx);
0271 batch_type z = xx * xx;
0272 z *= detail::horner<batch_type,
0273 0xc0503669fd28ec8eull,
0274 0xc05eb8bf2d05ba25ull,
0275 0xc052c08c36880273ull,
0276 0xc03028545b6b807aull,
0277 0xbfec007fa1f72594ull>(z)
0278 / detail::horner1<batch_type,
0279 0x4068519efbbd62ecull,
0280 0x407e563f13b049eaull,
0281 0x407b0e18d2e2be3bull,
0282 0x4064a0dd43b8fa25ull,
0283 0x4038dbc45b14603cull>(z);
0284 z = fma(xx, z, xx);
0285 z = select(flag2, z + constants::pio_4lo<batch_type>(), z);
0286 z = z + select(flag1, batch_type(0.), constants::pio_2lo<batch_type>());
0287 return yy + z;
0288 }
0289 }
0290 template <class A, class T>
0291 XSIMD_INLINE batch<T, A> atan(batch<T, A> const& self, requires_arch<generic>) noexcept
0292 {
0293 using batch_type = batch<T, A>;
0294 const batch_type absa = abs(self);
0295 const batch_type x = detail::kernel_atan(absa, batch_type(1.) / absa);
0296 return x ^ bitofsign(self);
0297 }
0298 template <class A, class T>
0299 XSIMD_INLINE batch<std::complex<T>, A> atan(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
0300 {
0301 using batch_type = batch<std::complex<T>, A>;
0302 using real_batch = typename batch_type::real_batch;
0303 real_batch x = z.real();
0304 real_batch y = z.imag();
0305 real_batch x2 = x * x;
0306 real_batch one(1.);
0307 real_batch a = one - x2 - (y * y);
0308 real_batch w = 0.5 * atan2(2. * x, a);
0309 real_batch num = y + one;
0310 num = x2 + num * num;
0311 real_batch den = y - one;
0312 den = x2 + den * den;
0313 batch_type res = select((x == real_batch(0.)) && (y == real_batch(1.)),
0314 batch_type(real_batch(0.), constants::infinity<real_batch>()),
0315 batch_type(w, 0.25 * log(num / den)));
0316 return res;
0317 }
0318
0319
0320
0321
0322
0323
0324
0325
0326
0327
0328
0329 template <class A, class T>
0330 XSIMD_INLINE batch<T, A> atanh(batch<T, A> const& self, requires_arch<generic>) noexcept
0331 {
0332 using batch_type = batch<T, A>;
0333 batch_type x = abs(self);
0334 batch_type t = x + x;
0335 batch_type z = batch_type(1.) - x;
0336 auto test = x < batch_type(0.5);
0337 batch_type tmp = select(test, x, t) / z;
0338 return bitofsign(self) ^ (batch_type(0.5) * log1p(select(test, fma(t, tmp, t), tmp)));
0339 }
0340 template <class A, class T>
0341 XSIMD_INLINE batch<std::complex<T>, A> atanh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
0342 {
0343 using batch_type = batch<std::complex<T>, A>;
0344 batch_type w = atan(batch_type(-z.imag(), z.real()));
0345 w = batch_type(w.imag(), -w.real());
0346 return w;
0347 }
0348
0349
0350 template <class A, class T>
0351 XSIMD_INLINE batch<T, A> atan2(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
0352 {
0353 using batch_type = batch<T, A>;
0354 const batch_type q = abs(self / other);
0355 const batch_type z = detail::kernel_atan(q, batch_type(1.) / q);
0356 return select(other > batch_type(0.), z, constants::pi<batch_type>() - z) * signnz(self);
0357 }
0358
0359
0360 namespace detail
0361 {
0362 template <class T, class A>
0363 XSIMD_INLINE batch<T, A> quadrant(const batch<T, A>& x) noexcept
0364 {
0365 return x & batch<T, A>(3);
0366 }
0367
0368 template <class A>
0369 XSIMD_INLINE batch<float, A> quadrant(const batch<float, A>& x) noexcept
0370 {
0371 return to_float(quadrant(to_int(x)));
0372 }
0373
0374 template <class A>
0375 XSIMD_INLINE batch<double, A> quadrant(const batch<double, A>& x) noexcept
0376 {
0377 using batch_type = batch<double, A>;
0378 batch_type a = x * batch_type(0.25);
0379 return (a - floor(a)) * batch_type(4.);
0380 }
0381
0382
0383
0384
0385
0386
0387
0388
0389
0390
0391 template <class A>
0392 XSIMD_INLINE batch<float, A> cos_eval(const batch<float, A>& z) noexcept
0393 {
0394 using batch_type = batch<float, A>;
0395 batch_type y = detail::horner<batch_type,
0396 0x3d2aaaa5,
0397 0xbab60619,
0398 0x37ccf5ce>(z);
0399 return batch_type(1.) + fma(z, batch_type(-0.5), y * z * z);
0400 }
0401
0402 template <class A>
0403 XSIMD_INLINE batch<float, A> sin_eval(const batch<float, A>& z, const batch<float, A>& x) noexcept
0404 {
0405 using batch_type = batch<float, A>;
0406 batch_type y = detail::horner<batch_type,
0407 0xbe2aaaa2,
0408 0x3c08839d,
0409 0xb94ca1f9>(z);
0410 return fma(y * z, x, x);
0411 }
0412
0413 template <class A>
0414 static XSIMD_INLINE batch<float, A> base_tancot_eval(const batch<float, A>& z) noexcept
0415 {
0416 using batch_type = batch<float, A>;
0417 batch_type zz = z * z;
0418 batch_type y = detail::horner<batch_type,
0419 0x3eaaaa6f,
0420 0x3e0896dd,
0421 0x3d5ac5c9,
0422 0x3cc821b5,
0423 0x3b4c779c,
0424 0x3c19c53b>(zz);
0425 return fma(y, zz * z, z);
0426 }
0427
0428 template <class A, class BB>
0429 static XSIMD_INLINE batch<float, A> tan_eval(const batch<float, A>& z, const BB& test) noexcept
0430 {
0431 using batch_type = batch<float, A>;
0432 batch_type y = base_tancot_eval(z);
0433 return select(test, y, -batch_type(1.) / y);
0434 }
0435
0436 template <class A, class BB>
0437 static XSIMD_INLINE batch<float, A> cot_eval(const batch<float, A>& z, const BB& test) noexcept
0438 {
0439 using batch_type = batch<float, A>;
0440 batch_type y = base_tancot_eval(z);
0441 return select(test, batch_type(1.) / y, -y);
0442 }
0443
0444
0445
0446
0447
0448
0449
0450
0451
0452
0453 template <class A>
0454 static XSIMD_INLINE batch<double, A> cos_eval(const batch<double, A>& z) noexcept
0455 {
0456 using batch_type = batch<double, A>;
0457 batch_type y = detail::horner<batch_type,
0458 0x3fe0000000000000ull,
0459 0xbfa5555555555551ull,
0460 0x3f56c16c16c15d47ull,
0461 0xbefa01a019ddbcd9ull,
0462 0x3e927e4f8e06d9a5ull,
0463 0xbe21eea7c1e514d4ull,
0464 0x3da8ff831ad9b219ull>(z);
0465 return batch_type(1.) - y * z;
0466 }
0467
0468 template <class A>
0469 static XSIMD_INLINE batch<double, A> sin_eval(const batch<double, A>& z, const batch<double, A>& x) noexcept
0470 {
0471 using batch_type = batch<double, A>;
0472 batch_type y = detail::horner<batch_type,
0473 0xbfc5555555555548ull,
0474 0x3f8111111110f7d0ull,
0475 0xbf2a01a019bfdf03ull,
0476 0x3ec71de3567d4896ull,
0477 0xbe5ae5e5a9291691ull,
0478 0x3de5d8fd1fcf0ec1ull>(z);
0479 return fma(y * z, x, x);
0480 }
0481
0482 template <class A>
0483 static XSIMD_INLINE batch<double, A> base_tancot_eval(const batch<double, A>& z) noexcept
0484 {
0485 using batch_type = batch<double, A>;
0486 batch_type zz = z * z;
0487 batch_type num = detail::horner<batch_type,
0488 0xc1711fead3299176ull,
0489 0x413199eca5fc9dddull,
0490 0xc0c992d8d24f3f38ull>(zz);
0491 batch_type den = detail::horner1<batch_type,
0492 0xc189afe03cbe5a31ull,
0493 0x4177d98fc2ead8efull,
0494 0xc13427bc582abc96ull,
0495 0x40cab8a5eeb36572ull>(zz);
0496 return fma(z, (zz * (num / den)), z);
0497 }
0498
0499 template <class A, class BB>
0500 static XSIMD_INLINE batch<double, A> tan_eval(const batch<double, A>& z, const BB& test) noexcept
0501 {
0502 using batch_type = batch<double, A>;
0503 batch_type y = base_tancot_eval(z);
0504 return select(test, y, -batch_type(1.) / y);
0505 }
0506
0507 template <class A, class BB>
0508 static XSIMD_INLINE batch<double, A> cot_eval(const batch<double, A>& z, const BB& test) noexcept
0509 {
0510 using batch_type = batch<double, A>;
0511 batch_type y = base_tancot_eval(z);
0512 return select(test, batch_type(1.) / y, -y);
0513 }
0514
0515
0516
0517
0518
0519
0520
0521
0522
0523
0524 struct trigo_radian_tag
0525 {
0526 };
0527 struct trigo_pi_tag
0528 {
0529 };
0530
0531 template <class B, class Tag = trigo_radian_tag>
0532 struct trigo_reducer
0533 {
0534 static XSIMD_INLINE B reduce(const B& x, B& xr) noexcept
0535 {
0536 if (all(x <= constants::pio4<B>()))
0537 {
0538 xr = x;
0539 return B(0.);
0540 }
0541 else if (all(x <= constants::pio2<B>()))
0542 {
0543 auto test = x > constants::pio4<B>();
0544 xr = x - constants::pio2_1<B>();
0545 xr -= constants::pio2_2<B>();
0546 xr -= constants::pio2_3<B>();
0547 xr = select(test, xr, x);
0548 return select(test, B(1.), B(0.));
0549 }
0550 else if (all(x <= constants::twentypi<B>()))
0551 {
0552 B xi = nearbyint(x * constants::twoopi<B>());
0553 xr = fnma(xi, constants::pio2_1<B>(), x);
0554 xr -= xi * constants::pio2_2<B>();
0555 xr -= xi * constants::pio2_3<B>();
0556 return quadrant(xi);
0557 }
0558 else if (all(x <= constants::mediumpi<B>()))
0559 {
0560 B fn = nearbyint(x * constants::twoopi<B>());
0561 B r = x - fn * constants::pio2_1<B>();
0562 B w = fn * constants::pio2_1t<B>();
0563 B t = r;
0564 w = fn * constants::pio2_2<B>();
0565 r = t - w;
0566 w = fn * constants::pio2_2t<B>() - ((t - r) - w);
0567 t = r;
0568 w = fn * constants::pio2_3<B>();
0569 r = t - w;
0570 w = fn * constants::pio2_3t<B>() - ((t - r) - w);
0571 xr = r - w;
0572 return quadrant(fn);
0573 }
0574 else
0575 {
0576 static constexpr std::size_t size = B::size;
0577 using value_type = typename B::value_type;
0578 alignas(B) std::array<value_type, size> tmp;
0579 alignas(B) std::array<value_type, size> txr;
0580 alignas(B) std::array<value_type, size> args;
0581 x.store_aligned(args.data());
0582
0583 for (std::size_t i = 0; i < size; ++i)
0584 {
0585 double arg = args[i];
0586 if (arg == std::numeric_limits<value_type>::infinity())
0587 {
0588 tmp[i] = 0.;
0589 txr[i] = std::numeric_limits<value_type>::quiet_NaN();
0590 }
0591 else
0592 {
0593 double y[2];
0594 std::int32_t n = ::xsimd::detail::__ieee754_rem_pio2(arg, y);
0595 tmp[i] = value_type(n & 3);
0596 txr[i] = value_type(y[0]);
0597 }
0598 }
0599 xr = B::load_aligned(&txr[0]);
0600 B res = B::load_aligned(&tmp[0]);
0601 return res;
0602 }
0603 }
0604 };
0605
0606 template <class B>
0607 struct trigo_reducer<B, trigo_pi_tag>
0608 {
0609 static XSIMD_INLINE B reduce(const B& x, B& xr) noexcept
0610 {
0611 B xi = nearbyint(x * B(2.));
0612 B x2 = x - xi * B(0.5);
0613 xr = x2 * constants::pi<B>();
0614 return quadrant(xi);
0615 }
0616 };
0617
0618 }
0619 template <class A, class T>
0620 XSIMD_INLINE batch<T, A> cos(batch<T, A> const& self, requires_arch<generic>) noexcept
0621 {
0622 using batch_type = batch<T, A>;
0623 const batch_type x = abs(self);
0624 batch_type xr = constants::nan<batch_type>();
0625 const batch_type n = detail::trigo_reducer<batch_type>::reduce(x, xr);
0626 auto tmp = select(n >= batch_type(2.), batch_type(1.), batch_type(0.));
0627 auto swap_bit = fma(batch_type(-2.), tmp, n);
0628 auto sign_bit = select((swap_bit ^ tmp) != batch_type(0.), constants::signmask<batch_type>(), batch_type(0.));
0629 const batch_type z = xr * xr;
0630 const batch_type se = detail::sin_eval(z, xr);
0631 const batch_type ce = detail::cos_eval(z);
0632 const batch_type z1 = select(swap_bit != batch_type(0.), se, ce);
0633 return z1 ^ sign_bit;
0634 }
0635
0636 template <class A, class T>
0637 XSIMD_INLINE batch<std::complex<T>, A> cos(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
0638 {
0639 return { cos(z.real()) * cosh(z.imag()), -sin(z.real()) * sinh(z.imag()) };
0640 }
0641
0642
0643
0644
0645
0646
0647
0648
0649
0650
0651
0652
0653
0654 template <class A, class T>
0655 XSIMD_INLINE batch<T, A> cosh(batch<T, A> const& self, requires_arch<generic>) noexcept
0656 {
0657 using batch_type = batch<T, A>;
0658 batch_type x = abs(self);
0659 auto test1 = x > (constants::maxlog<batch_type>() - constants::log_2<batch_type>());
0660 batch_type fac = select(test1, batch_type(0.5), batch_type(1.));
0661 batch_type tmp = exp(x * fac);
0662 batch_type tmp1 = batch_type(0.5) * tmp;
0663 return select(test1, tmp1 * tmp, detail::average(tmp, batch_type(1.) / tmp));
0664 }
0665 template <class A, class T>
0666 XSIMD_INLINE batch<std::complex<T>, A> cosh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
0667 {
0668 auto x = z.real();
0669 auto y = z.imag();
0670 return { cosh(x) * cos(y), sinh(x) * sin(y) };
0671 }
0672
0673
0674 namespace detail
0675 {
0676 template <class A, class T, class Tag = trigo_radian_tag>
0677 XSIMD_INLINE batch<T, A> sin(batch<T, A> const& self, Tag = Tag()) noexcept
0678 {
0679 using batch_type = batch<T, A>;
0680 const batch_type x = abs(self);
0681 batch_type xr = constants::nan<batch_type>();
0682 const batch_type n = detail::trigo_reducer<batch_type, Tag>::reduce(x, xr);
0683 auto tmp = select(n >= batch_type(2.), batch_type(1.), batch_type(0.));
0684 auto swap_bit = fma(batch_type(-2.), tmp, n);
0685 auto sign_bit = bitofsign(self) ^ select(tmp != batch_type(0.), constants::signmask<batch_type>(), batch_type(0.));
0686 const batch_type z = xr * xr;
0687 const batch_type se = detail::sin_eval(z, xr);
0688 const batch_type ce = detail::cos_eval(z);
0689 const batch_type z1 = select(swap_bit == batch_type(0.), se, ce);
0690 return z1 ^ sign_bit;
0691 }
0692 }
0693
0694 template <class A, class T>
0695 XSIMD_INLINE batch<T, A> sin(batch<T, A> const& self, requires_arch<generic>) noexcept
0696 {
0697 return detail::sin(self);
0698 }
0699
0700 template <class A, class T>
0701 XSIMD_INLINE batch<std::complex<T>, A> sin(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
0702 {
0703 return { sin(z.real()) * cosh(z.imag()), cos(z.real()) * sinh(z.imag()) };
0704 }
0705
0706
0707 template <class A, class T>
0708 XSIMD_INLINE std::pair<batch<T, A>, batch<T, A>> sincos(batch<T, A> const& self, requires_arch<generic>) noexcept
0709 {
0710 using batch_type = batch<T, A>;
0711 const batch_type x = abs(self);
0712 batch_type xr = constants::nan<batch_type>();
0713 const batch_type n = detail::trigo_reducer<batch_type>::reduce(x, xr);
0714 auto tmp = select(n >= batch_type(2.), batch_type(1.), batch_type(0.));
0715 auto swap_bit = fma(batch_type(-2.), tmp, n);
0716 const batch_type z = xr * xr;
0717 const batch_type se = detail::sin_eval(z, xr);
0718 const batch_type ce = detail::cos_eval(z);
0719 auto sin_sign_bit = bitofsign(self) ^ select(tmp != batch_type(0.), constants::signmask<batch_type>(), batch_type(0.));
0720 const batch_type sin_z1 = select(swap_bit == batch_type(0.), se, ce);
0721 auto cos_sign_bit = select((swap_bit ^ tmp) != batch_type(0.), constants::signmask<batch_type>(), batch_type(0.));
0722 const batch_type cos_z1 = select(swap_bit != batch_type(0.), se, ce);
0723 return std::make_pair(sin_z1 ^ sin_sign_bit, cos_z1 ^ cos_sign_bit);
0724 }
0725
0726 template <class A, class T>
0727 XSIMD_INLINE std::pair<batch<std::complex<T>, A>, batch<std::complex<T>, A>>
0728 sincos(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
0729 {
0730 using batch_type = batch<std::complex<T>, A>;
0731 using real_batch = typename batch_type::real_batch;
0732 real_batch rcos = cos(z.real());
0733 real_batch rsin = sin(z.real());
0734 real_batch icosh = cosh(z.imag());
0735 real_batch isinh = sinh(z.imag());
0736 return std::make_pair(batch_type(rsin * icosh, rcos * isinh), batch_type(rcos * icosh, -rsin * isinh));
0737 }
0738
0739
0740 namespace detail
0741 {
0742
0743
0744
0745
0746
0747
0748
0749
0750
0751 template <class A>
0752 XSIMD_INLINE batch<float, A> sinh_kernel(batch<float, A> const& self) noexcept
0753 {
0754 using batch_type = batch<float, A>;
0755 batch_type sqr_self = self * self;
0756 return detail::horner<batch_type,
0757 0x3f800000,
0758 0x3e2aaacc,
0759 0x3c087bbe,
0760 0x39559e2f
0761 >(sqr_self)
0762 * self;
0763 }
0764
0765 template <class A>
0766 XSIMD_INLINE batch<double, A> sinh_kernel(batch<double, A> const& self) noexcept
0767 {
0768 using batch_type = batch<double, A>;
0769 batch_type sqrself = self * self;
0770 return fma(self, (detail::horner<batch_type,
0771 0xc115782bdbf6ab05ull,
0772 0xc0c694b8c71d6182ull,
0773 0xc064773a398ff4feull,
0774 0xbfe9435fe8bb3cd6ull
0775 >(sqrself)
0776 / detail::horner1<batch_type,
0777 0xc1401a20e4f90044ull,
0778 0x40e1a7ba7ed72245ull,
0779 0xc0715b6096e96484ull
0780 >(sqrself))
0781 * sqrself,
0782 self);
0783 }
0784 }
0785
0786
0787
0788
0789
0790
0791
0792
0793
0794 template <class A, class T>
0795 XSIMD_INLINE batch<T, A> sinh(batch<T, A> const& a, requires_arch<generic>) noexcept
0796 {
0797 using batch_type = batch<T, A>;
0798 batch_type half(0.5);
0799 batch_type x = abs(a);
0800 auto lt1 = x < batch_type(1.);
0801 batch_type bts = bitofsign(a);
0802 batch_type z(0.);
0803 if (any(lt1))
0804 {
0805 z = detail::sinh_kernel(x);
0806 if (all(lt1))
0807 return z ^ bts;
0808 }
0809 auto test1 = x > (constants::maxlog<batch_type>() - constants::log_2<batch_type>());
0810 batch_type fac = select(test1, half, batch_type(1.));
0811 batch_type tmp = exp(x * fac);
0812 batch_type tmp1 = half * tmp;
0813 batch_type r = select(test1, tmp1 * tmp, tmp1 - half / tmp);
0814 return select(lt1, z, r) ^ bts;
0815 }
0816 template <class A, class T>
0817 XSIMD_INLINE batch<std::complex<T>, A> sinh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
0818 {
0819 auto x = z.real();
0820 auto y = z.imag();
0821 return { sinh(x) * cos(y), cosh(x) * sin(y) };
0822 }
0823
0824
0825 template <class A, class T>
0826 XSIMD_INLINE batch<T, A> tan(batch<T, A> const& self, requires_arch<generic>) noexcept
0827 {
0828 using batch_type = batch<T, A>;
0829 const batch_type x = abs(self);
0830 batch_type xr = constants::nan<batch_type>();
0831 const batch_type n = detail::trigo_reducer<batch_type>::reduce(x, xr);
0832 auto tmp = select(n >= batch_type(2.), batch_type(1.), batch_type(0.));
0833 auto swap_bit = fma(batch_type(-2.), tmp, n);
0834 auto test = (swap_bit == batch_type(0.));
0835 const batch_type y = detail::tan_eval(xr, test);
0836 return y ^ bitofsign(self);
0837 }
0838 template <class A, class T>
0839 XSIMD_INLINE batch<std::complex<T>, A> tan(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
0840 {
0841 using batch_type = batch<std::complex<T>, A>;
0842 using real_batch = typename batch_type::real_batch;
0843 real_batch d = cos(2 * z.real()) + cosh(2 * z.imag());
0844 batch_type winf(constants::infinity<real_batch>(), constants::infinity<real_batch>());
0845 real_batch wreal = sin(2 * z.real()) / d;
0846 real_batch wimag = sinh(2 * z.imag());
0847 batch_type wres = select(isinf(wimag), batch_type(wreal, real_batch(1.)), batch_type(wreal, wimag / d));
0848 return select(d == real_batch(0.), winf, wres);
0849 }
0850
0851
0852 namespace detail
0853 {
0854
0855
0856
0857
0858
0859
0860
0861
0862
0863 template <class B>
0864 struct tanh_kernel;
0865
0866 template <class A>
0867 struct tanh_kernel<batch<float, A>>
0868 {
0869 using batch_type = batch<float, A>;
0870 static XSIMD_INLINE batch_type tanh(const batch_type& x) noexcept
0871 {
0872 batch_type sqrx = x * x;
0873 return fma(detail::horner<batch_type,
0874 0xbeaaaa99,
0875 0x3e088393,
0876 0xbd5c1e2d,
0877 0x3ca9134e,
0878 0xbbbaf0ea
0879 >(sqrx)
0880 * sqrx,
0881 x, x);
0882 }
0883
0884 static XSIMD_INLINE batch_type cotanh(const batch_type& x) noexcept
0885 {
0886 return batch_type(1.) / tanh(x);
0887 }
0888 };
0889
0890 template <class A>
0891 struct tanh_kernel<batch<double, A>>
0892 {
0893 using batch_type = batch<double, A>;
0894 static XSIMD_INLINE batch_type tanh(const batch_type& x) noexcept
0895 {
0896 batch_type sqrx = x * x;
0897 return fma(sqrx * p(sqrx) / q(sqrx), x, x);
0898 }
0899
0900 static XSIMD_INLINE batch_type cotanh(const batch_type& x) noexcept
0901 {
0902 batch_type sqrx = x * x;
0903 batch_type qval = q(sqrx);
0904 return qval / (x * fma(p(sqrx), sqrx, qval));
0905 }
0906
0907 static XSIMD_INLINE batch_type p(const batch_type& x) noexcept
0908 {
0909 return detail::horner<batch_type,
0910 0xc0993ac030580563,
0911 0xc058d26a0e26682d,
0912 0xbfeedc5baafd6f4b
0913 >(x);
0914 }
0915
0916 static XSIMD_INLINE batch_type q(const batch_type& x) noexcept
0917 {
0918 return detail::horner1<batch_type,
0919 0x40b2ec102442040c,
0920 0x40a176fa0e5535fa,
0921 0x405c33f28a581B86
0922 >(x);
0923 }
0924 };
0925
0926 }
0927
0928
0929
0930
0931
0932
0933
0934
0935
0936 template <class A, class T>
0937 XSIMD_INLINE batch<T, A> tanh(batch<T, A> const& self, requires_arch<generic>) noexcept
0938 {
0939 using batch_type = batch<T, A>;
0940 batch_type one(1.);
0941 batch_type x = abs(self);
0942 auto test = x < (batch_type(5.) / batch_type(8.));
0943 batch_type bts = bitofsign(self);
0944 batch_type z = one;
0945 if (any(test))
0946 {
0947 z = detail::tanh_kernel<batch_type>::tanh(x);
0948 if (all(test))
0949 return z ^ bts;
0950 }
0951 batch_type r = fma(batch_type(-2.), one / (one + exp(x + x)), one);
0952 return select(test, z, r) ^ bts;
0953 }
0954 template <class A, class T>
0955 XSIMD_INLINE batch<std::complex<T>, A> tanh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
0956 {
0957 using real_batch = typename batch<std::complex<T>, A>::real_batch;
0958 auto x = z.real();
0959 auto y = z.imag();
0960 real_batch two(2);
0961 auto d = cosh(two * x) + cos(two * y);
0962 return { sinh(two * x) / d, sin(two * y) / d };
0963 }
0964
0965 }
0966
0967 }
0968
0969 #endif