arch/generic/xsimd_generic_trigo.hpp

0001 /***************************************************************************
0002  * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and         *
0003  * Martin Renou                                                             *
0004  * Copyright (c) QuantStack                                                 *
0005  * Copyright (c) Serge Guelton                                              *
0006  *                                                                          *
0007  * Distributed under the terms of the BSD 3-Clause License.                 *
0008  *                                                                          *
0009  * The full license is in the file LICENSE, distributed with this software. *
0010  ****************************************************************************/
0011
0012 #ifndef XSIMD_GENERIC_TRIGO_HPP
0013 #define XSIMD_GENERIC_TRIGO_HPP
0014
0015 #include "./xsimd_generic_details.hpp"
0016
0017 #include <array>
0018
0019 namespace xsimd
0020 {
0021
0022     namespace kernel
0023     {
0024         /* origin: boost/simd/arch/common/detail/simd/trig_base.hpp */
0025         /*
0026          * ====================================================
0027          * copyright 2016 NumScale SAS
0028          *
0029          * Distributed under the Boost Software License, Version 1.0.
0030          * (See copy at http://boost.org/LICENSE_1_0.txt)
0031          * ====================================================
0032          */
0033
0034         using namespace types;
0035
0036         // acos
0037         template <class A, class T>
0038         XSIMD_INLINE batch<T, A> acos(batch<T, A> const& self, requires_arch<generic>) noexcept
0039         {
0040             using batch_type = batch<T, A>;
0041             batch_type x = abs(self);
0042             auto x_larger_05 = x > batch_type(0.5);
0043             x = select(x_larger_05, sqrt(fma(batch_type(-0.5), x, batch_type(0.5))), self);
0044             x = asin(x);
0045             x = select(x_larger_05, x + x, x);
0046             x = select(self < batch_type(-0.5), constants::pi<batch_type>() - x, x);
0047             return select(x_larger_05, x, constants::pio2<batch_type>() - x);
0048         }
0049         template <class A, class T>
0050         XSIMD_INLINE batch<std::complex<T>, A> acos(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
0051         {
0052             using batch_type = batch<std::complex<T>, A>;
0053             using real_batch = typename batch_type::real_batch;
0054             batch_type tmp = asin(z);
0055             return { constants::pio2<real_batch>() - tmp.real(), -tmp.imag() };
0056         }
0057
0058         // acosh
0059         /* origin: boost/simd/arch/common/simd/function/acosh.hpp */
0060         /*
0061          * ====================================================
0062          * copyright 2016 NumScale SAS
0063          *
0064          * Distributed under the Boost Software License, Version 1.0.
0065          * (See copy at http://boost.org/LICENSE_1_0.txt)
0066          * ====================================================
0067          */
0068         template <class A, class T>
0069         XSIMD_INLINE batch<T, A> acosh(batch<T, A> const& self, requires_arch<generic>) noexcept
0070         {
0071             using batch_type = batch<T, A>;
0072             batch_type x = self - batch_type(1.);
0073             auto test = x > constants::oneotwoeps<batch_type>();
0074             batch_type z = select(test, self, x + sqrt(x + x + x * x));
0075             batch_type l1pz = log1p(z);
0076             return select(test, l1pz + constants::log_2<batch_type>(), l1pz);
0077         }
0078         template <class A, class T>
0079         XSIMD_INLINE batch<std::complex<T>, A> acosh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
0080         {
0081             using batch_type = batch<std::complex<T>, A>;
0082             batch_type w = acos(z);
0083             w = batch_type(-w.imag(), w.real());
0084             return w;
0085         }
0086
0087         // asin
0088         template <class A>
0089         XSIMD_INLINE batch<float, A> asin(batch<float, A> const& self, requires_arch<generic>) noexcept
0090         {
0091             using batch_type = batch<float, A>;
0092             batch_type x = abs(self);
0093             batch_type sign = bitofsign(self);
0094             auto x_larger_05 = x > batch_type(0.5);
0095             batch_type z = select(x_larger_05, batch_type(0.5) * (batch_type(1.) - x), x * x);
0096             x = select(x_larger_05, sqrt(z), x);
0097             batch_type z1 = detail::horner<batch_type,
0098                                            0x3e2aaae4,
0099                                            0x3d9980f6,
0100                                            0x3d3a3ec7,
0101                                            0x3cc617e3,
0102                                            0x3d2cb352>(z);
0103             z1 = fma(z1, z * x, x);
0104             z = select(x_larger_05, constants::pio2<batch_type>() - (z1 + z1), z1);
0105             return z ^ sign;
0106         }
0107         template <class A>
0108         XSIMD_INLINE batch<double, A> asin(batch<double, A> const& self, requires_arch<generic>) noexcept
0109         {
0110             using batch_type = batch<double, A>;
0111             batch_type x = abs(self);
0112             auto small_cond = x < constants::sqrteps<batch_type>();
0113             batch_type ct1 = batch_type(bit_cast<double>(int64_t(0x3fe4000000000000)));
0114             batch_type zz1 = batch_type(1.) - x;
0115             batch_type vp = zz1 * detail::horner<batch_type, 0x403c896240f3081dull, 0xc03991aaac01ab68ull, 0x401bdff5baf33e6aull, 0xbfe2079259f9290full, 0x3f684fc3988e9f08ull>(zz1) / detail::horner1<batch_type, 0x40756709b0b644beull, 0xc077fe08959063eeull, 0x40626219af6a7f42ull, 0xc035f2a2b6bf5d8cull>(zz1);
0116             zz1 = sqrt(zz1 + zz1);
0117             batch_type z = constants::pio4<batch_type>() - zz1;
0118             zz1 = fms(zz1, vp, constants::pio_2lo<batch_type>());
0119             z = z - zz1;
0120             zz1 = z + constants::pio4<batch_type>();
0121             batch_type zz2 = self * self;
0122             z = zz2 * detail::horner<batch_type, 0xc020656c06ceafd5ull, 0x40339007da779259ull, 0xc0304331de27907bull, 0x4015c74b178a2dd9ull, 0xbfe34341333e5c16ull, 0x3f716b9b0bd48ad3ull>(zz2) / detail::horner1<batch_type, 0xc04898220a3607acull, 0x4061705684ffbf9dull, 0xc06265bb6d3576d7ull, 0x40519fc025fe9054ull, 0xc02d7b590b5e0eabull>(zz2);
0123             zz2 = fma(x, z, x);
0124             return select(x > batch_type(1.), constants::nan<batch_type>(),
0125                           select(small_cond, x,
0126                                  select(x > ct1, zz1, zz2))
0127                               ^ bitofsign(self));
0128         }
0129         template <class A, class T>
0130         XSIMD_INLINE batch<std::complex<T>, A> asin(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
0131         {
0132             using batch_type = batch<std::complex<T>, A>;
0133             using real_batch = typename batch_type::real_batch;
0134             real_batch x = z.real();
0135             real_batch y = z.imag();
0136
0137             batch_type ct(-y, x);
0138             batch_type zz(real_batch(1.) - (x - y) * (x + y), -2 * x * y);
0139             zz = log(ct + sqrt(zz));
0140             batch_type resg(zz.imag(), -zz.real());
0141
0142             return select(y == real_batch(0.),
0143                           select(fabs(x) > real_batch(1.),
0144                                  batch_type(constants::pio2<real_batch>(), real_batch(0.)),
0145                                  batch_type(asin(x), real_batch(0.))),
0146                           resg);
0147         }
0148
0149         // asinh
0150         /* origin: boost/simd/arch/common/simd/function/asinh.hpp */
0151         /*
0152          * ====================================================
0153          * copyright 2016 NumScale SAS
0154          *
0155          * Distributed under the Boost Software License, Version 1.0.
0156          * (See copy at http://boost.org/LICENSE_1_0.txt)
0157          * ====================================================
0158          */
0159         namespace detail
0160         {
0161             template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0162             XSIMD_INLINE batch<T, A>
0163             average(const batch<T, A>& x1, const batch<T, A>& x2) noexcept
0164             {
0165                 return (x1 & x2) + ((x1 ^ x2) >> 1);
0166             }
0167
0168             template <class A, class T>
0169             XSIMD_INLINE batch<T, A>
0170             averagef(const batch<T, A>& x1, const batch<T, A>& x2) noexcept
0171             {
0172                 using batch_type = batch<T, A>;
0173                 return fma(x1, batch_type(0.5), x2 * batch_type(0.5));
0174             }
0175             template <class A>
0176             XSIMD_INLINE batch<float, A> average(batch<float, A> const& x1, batch<float, A> const& x2) noexcept
0177             {
0178                 return averagef(x1, x2);
0179             }
0180             template <class A>
0181             XSIMD_INLINE batch<double, A> average(batch<double, A> const& x1, batch<double, A> const& x2) noexcept
0182             {
0183                 return averagef(x1, x2);
0184             }
0185         }
0186         template <class A>
0187         XSIMD_INLINE batch<float, A> asinh(batch<float, A> const& self, requires_arch<generic>) noexcept
0188         {
0189             using batch_type = batch<float, A>;
0190             batch_type x = abs(self);
0191             auto lthalf = x < batch_type(0.5);
0192             batch_type x2 = x * x;
0193             batch_type bts = bitofsign(self);
0194             batch_type z(0.);
0195             if (any(lthalf))
0196             {
0197                 z = detail::horner<batch_type,
0198                                    0x3f800000,
0199                                    0xbe2aa9ad,
0200                                    0x3d9949b1,
0201                                    0xbd2ee581,
0202                                    0x3ca4d6e6>(x2)
0203                     * x;
0204                 if (all(lthalf))
0205                     return z ^ bts;
0206             }
0207             batch_type tmp = select(x > constants::oneosqrteps<batch_type>(), x, detail::average(x, hypot(batch_type(1.), x)));
0208 #ifndef XSIMD_NO_NANS
0209             return select(isnan(self), constants::nan<batch_type>(), select(lthalf, z, log(tmp) + constants::log_2<batch_type>()) ^ bts);
0210 #else
0211             return select(lthalf, z, log(tmp) + constants::log_2<batch_type>()) ^ bts;
0212 #endif
0213         }
0214         template <class A>
0215         XSIMD_INLINE batch<double, A> asinh(batch<double, A> const& self, requires_arch<generic>) noexcept
0216         {
0217             using batch_type = batch<double, A>;
0218             batch_type x = abs(self);
0219             auto test = x > constants::oneosqrteps<batch_type>();
0220             batch_type z = select(test, x - batch_type(1.), x + x * x / (batch_type(1.) + hypot(batch_type(1.), x)));
0221 #ifndef XSIMD_NO_INFINITIES
0222             z = select(x == constants::infinity<batch_type>(), x, z);
0223 #endif
0224             batch_type l1pz = log1p(z);
0225             z = select(test, l1pz + constants::log_2<batch_type>(), l1pz);
0226             return bitofsign(self) ^ z;
0227         }
0228         template <class A, class T>
0229         XSIMD_INLINE batch<std::complex<T>, A> asinh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
0230         {
0231             using batch_type = batch<std::complex<T>, A>;
0232             batch_type w = asin(batch_type(-z.imag(), z.real()));
0233             w = batch_type(w.imag(), -w.real());
0234             return w;
0235         }
0236
0237         // atan
0238         namespace detail
0239         {
0240             template <class A>
0241             static XSIMD_INLINE batch<float, A> kernel_atan(const batch<float, A>& x, const batch<float, A>& recx) noexcept
0242             {
0243                 using batch_type = batch<float, A>;
0244                 const auto flag1 = x < constants::tan3pio8<batch_type>();
0245                 const auto flag2 = (x >= batch_type(bit_cast<float>((uint32_t)0x3ed413cd))) && flag1;
0246                 batch_type yy = select(flag1, batch_type(0.), constants::pio2<batch_type>());
0247                 yy = select(flag2, constants::pio4<batch_type>(), yy);
0248                 batch_type xx = select(flag1, x, -recx);
0249                 xx = select(flag2, (x - batch_type(1.)) / (x + batch_type(1.)), xx);
0250                 const batch_type z = xx * xx;
0251                 batch_type z1 = detail::horner<batch_type,
0252                                                0xbeaaaa2aul,
0253                                                0x3e4c925ful,
0254                                                0xbe0e1b85ul,
0255                                                0x3da4f0d1ul>(z);
0256                 z1 = fma(xx, z1 * z, xx);
0257                 z1 = select(flag2, z1 + constants::pio_4lo<batch_type>(), z1);
0258                 z1 = select(!flag1, z1 + constants::pio_2lo<batch_type>(), z1);
0259                 return yy + z1;
0260             }
0261             template <class A>
0262             static XSIMD_INLINE batch<double, A> kernel_atan(const batch<double, A>& x, const batch<double, A>& recx) noexcept
0263             {
0264                 using batch_type = batch<double, A>;
0265                 const auto flag1 = x < constants::tan3pio8<batch_type>();
0266                 const auto flag2 = (x >= constants::tanpio8<batch_type>()) && flag1;
0267                 batch_type yy = select(flag1, batch_type(0.), constants::pio2<batch_type>());
0268                 yy = select(flag2, constants::pio4<batch_type>(), yy);
0269                 batch_type xx = select(flag1, x, -recx);
0270                 xx = select(flag2, (x - batch_type(1.)) / (x + batch_type(1.)), xx);
0271                 batch_type z = xx * xx;
0272                 z *= detail::horner<batch_type,
0273                                     0xc0503669fd28ec8eull,
0274                                     0xc05eb8bf2d05ba25ull,
0275                                     0xc052c08c36880273ull,
0276                                     0xc03028545b6b807aull,
0277                                     0xbfec007fa1f72594ull>(z)
0278                     / detail::horner1<batch_type,
0279                                       0x4068519efbbd62ecull,
0280                                       0x407e563f13b049eaull,
0281                                       0x407b0e18d2e2be3bull,
0282                                       0x4064a0dd43b8fa25ull,
0283                                       0x4038dbc45b14603cull>(z);
0284                 z = fma(xx, z, xx);
0285                 z = select(flag2, z + constants::pio_4lo<batch_type>(), z);
0286                 z = z + select(flag1, batch_type(0.), constants::pio_2lo<batch_type>());
0287                 return yy + z;
0288             }
0289         }
0290         template <class A, class T>
0291         XSIMD_INLINE batch<T, A> atan(batch<T, A> const& self, requires_arch<generic>) noexcept
0292         {
0293             using batch_type = batch<T, A>;
0294             const batch_type absa = abs(self);
0295             const batch_type x = detail::kernel_atan(absa, batch_type(1.) / absa);
0296             return x ^ bitofsign(self);
0297         }
0298         template <class A, class T>
0299         XSIMD_INLINE batch<std::complex<T>, A> atan(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
0300         {
0301             using batch_type = batch<std::complex<T>, A>;
0302             using real_batch = typename batch_type::real_batch;
0303             real_batch x = z.real();
0304             real_batch y = z.imag();
0305             real_batch x2 = x * x;
0306             real_batch one(1.);
0307             real_batch a = one - x2 - (y * y);
0308             real_batch w = 0.5 * atan2(2. * x, a);
0309             real_batch num = y + one;
0310             num = x2 + num * num;
0311             real_batch den = y - one;
0312             den = x2 + den * den;
0313             batch_type res = select((x == real_batch(0.)) && (y == real_batch(1.)),
0314                                     batch_type(real_batch(0.), constants::infinity<real_batch>()),
0315                                     batch_type(w, 0.25 * log(num / den)));
0316             return res;
0317         }
0318
0319         // atanh
0320         /* origin: boost/simd/arch/common/simd/function/acosh.hpp */
0321         /*
0322          * ====================================================
0323          * copyright 2016 NumScale SAS
0324          *
0325          * Distributed under the Boost Software License, Version 1.0.
0326          * (See copy at http://boost.org/LICENSE_1_0.txt)
0327          * ====================================================
0328          */
0329         template <class A, class T>
0330         XSIMD_INLINE batch<T, A> atanh(batch<T, A> const& self, requires_arch<generic>) noexcept
0331         {
0332             using batch_type = batch<T, A>;
0333             batch_type x = abs(self);
0334             batch_type t = x + x;
0335             batch_type z = batch_type(1.) - x;
0336             auto test = x < batch_type(0.5);
0337             batch_type tmp = select(test, x, t) / z;
0338             return bitofsign(self) ^ (batch_type(0.5) * log1p(select(test, fma(t, tmp, t), tmp)));
0339         }
0340         template <class A, class T>
0341         XSIMD_INLINE batch<std::complex<T>, A> atanh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
0342         {
0343             using batch_type = batch<std::complex<T>, A>;
0344             batch_type w = atan(batch_type(-z.imag(), z.real()));
0345             w = batch_type(w.imag(), -w.real());
0346             return w;
0347         }
0348
0349         // atan2
0350         template <class A, class T>
0351         XSIMD_INLINE batch<T, A> atan2(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
0352         {
0353             using batch_type = batch<T, A>;
0354             const batch_type q = abs(self / other);
0355             const batch_type z = detail::kernel_atan(q, batch_type(1.) / q);
0356             return select(other > batch_type(0.), z, constants::pi<batch_type>() - z) * signnz(self);
0357         }
0358
0359         // cos
0360         namespace detail
0361         {
0362             template <class T, class A>
0363             XSIMD_INLINE batch<T, A> quadrant(const batch<T, A>& x) noexcept
0364             {
0365                 return x & batch<T, A>(3);
0366             }
0367
0368             template <class A>
0369             XSIMD_INLINE batch<float, A> quadrant(const batch<float, A>& x) noexcept
0370             {
0371                 return to_float(quadrant(to_int(x)));
0372             }
0373
0374             template <class A>
0375             XSIMD_INLINE batch<double, A> quadrant(const batch<double, A>& x) noexcept
0376             {
0377                 using batch_type = batch<double, A>;
0378                 batch_type a = x * batch_type(0.25);
0379                 return (a - floor(a)) * batch_type(4.);
0380             }
0381             /* origin: boost/simd/arch/common/detail/simd/f_trig_evaluation.hpp */
0382             /*
0383              * ====================================================
0384              * copyright 2016 NumScale SAS
0385              *
0386              * Distributed under the Boost Software License, Version 1.0.
0387              * (See copy at http://boost.org/LICENSE_1_0.txt)
0388              * ====================================================
0389              */
0390
0391             template <class A>
0392             XSIMD_INLINE batch<float, A> cos_eval(const batch<float, A>& z) noexcept
0393             {
0394                 using batch_type = batch<float, A>;
0395                 batch_type y = detail::horner<batch_type,
0396                                               0x3d2aaaa5,
0397                                               0xbab60619,
0398                                               0x37ccf5ce>(z);
0399                 return batch_type(1.) + fma(z, batch_type(-0.5), y * z * z);
0400             }
0401
0402             template <class A>
0403             XSIMD_INLINE batch<float, A> sin_eval(const batch<float, A>& z, const batch<float, A>& x) noexcept
0404             {
0405                 using batch_type = batch<float, A>;
0406                 batch_type y = detail::horner<batch_type,
0407                                               0xbe2aaaa2,
0408                                               0x3c08839d,
0409                                               0xb94ca1f9>(z);
0410                 return fma(y * z, x, x);
0411             }
0412
0413             template <class A>
0414             static XSIMD_INLINE batch<float, A> base_tancot_eval(const batch<float, A>& z) noexcept
0415             {
0416                 using batch_type = batch<float, A>;
0417                 batch_type zz = z * z;
0418                 batch_type y = detail::horner<batch_type,
0419                                               0x3eaaaa6f,
0420                                               0x3e0896dd,
0421                                               0x3d5ac5c9,
0422                                               0x3cc821b5,
0423                                               0x3b4c779c,
0424                                               0x3c19c53b>(zz);
0425                 return fma(y, zz * z, z);
0426             }
0427
0428             template <class A, class BB>
0429             static XSIMD_INLINE batch<float, A> tan_eval(const batch<float, A>& z, const BB& test) noexcept
0430             {
0431                 using batch_type = batch<float, A>;
0432                 batch_type y = base_tancot_eval(z);
0433                 return select(test, y, -batch_type(1.) / y);
0434             }
0435
0436             template <class A, class BB>
0437             static XSIMD_INLINE batch<float, A> cot_eval(const batch<float, A>& z, const BB& test) noexcept
0438             {
0439                 using batch_type = batch<float, A>;
0440                 batch_type y = base_tancot_eval(z);
0441                 return select(test, batch_type(1.) / y, -y);
0442             }
0443
0444             /* origin: boost/simd/arch/common/detail/simd/d_trig_evaluation.hpp */
0445             /*
0446              * ====================================================
0447              * copyright 2016 NumScale SAS
0448              *
0449              * Distributed under the Boost Software License, Version 1.0.
0450              * (See copy at http://boost.org/LICENSE_1_0.txt)
0451              * ====================================================
0452              */
0453             template <class A>
0454             static XSIMD_INLINE batch<double, A> cos_eval(const batch<double, A>& z) noexcept
0455             {
0456                 using batch_type = batch<double, A>;
0457                 batch_type y = detail::horner<batch_type,
0458                                               0x3fe0000000000000ull,
0459                                               0xbfa5555555555551ull,
0460                                               0x3f56c16c16c15d47ull,
0461                                               0xbefa01a019ddbcd9ull,
0462                                               0x3e927e4f8e06d9a5ull,
0463                                               0xbe21eea7c1e514d4ull,
0464                                               0x3da8ff831ad9b219ull>(z);
0465                 return batch_type(1.) - y * z;
0466             }
0467
0468             template <class A>
0469             static XSIMD_INLINE batch<double, A> sin_eval(const batch<double, A>& z, const batch<double, A>& x) noexcept
0470             {
0471                 using batch_type = batch<double, A>;
0472                 batch_type y = detail::horner<batch_type,
0473                                               0xbfc5555555555548ull,
0474                                               0x3f8111111110f7d0ull,
0475                                               0xbf2a01a019bfdf03ull,
0476                                               0x3ec71de3567d4896ull,
0477                                               0xbe5ae5e5a9291691ull,
0478                                               0x3de5d8fd1fcf0ec1ull>(z);
0479                 return fma(y * z, x, x);
0480             }
0481
0482             template <class A>
0483             static XSIMD_INLINE batch<double, A> base_tancot_eval(const batch<double, A>& z) noexcept
0484             {
0485                 using batch_type = batch<double, A>;
0486                 batch_type zz = z * z;
0487                 batch_type num = detail::horner<batch_type,
0488                                                 0xc1711fead3299176ull,
0489                                                 0x413199eca5fc9dddull,
0490                                                 0xc0c992d8d24f3f38ull>(zz);
0491                 batch_type den = detail::horner1<batch_type,
0492                                                  0xc189afe03cbe5a31ull,
0493                                                  0x4177d98fc2ead8efull,
0494                                                  0xc13427bc582abc96ull,
0495                                                  0x40cab8a5eeb36572ull>(zz);
0496                 return fma(z, (zz * (num / den)), z);
0497             }
0498
0499             template <class A, class BB>
0500             static XSIMD_INLINE batch<double, A> tan_eval(const batch<double, A>& z, const BB& test) noexcept
0501             {
0502                 using batch_type = batch<double, A>;
0503                 batch_type y = base_tancot_eval(z);
0504                 return select(test, y, -batch_type(1.) / y);
0505             }
0506
0507             template <class A, class BB>
0508             static XSIMD_INLINE batch<double, A> cot_eval(const batch<double, A>& z, const BB& test) noexcept
0509             {
0510                 using batch_type = batch<double, A>;
0511                 batch_type y = base_tancot_eval(z);
0512                 return select(test, batch_type(1.) / y, -y);
0513             }
0514             /* origin: boost/simd/arch/common/detail/simd/trig_reduction.hpp */
0515             /*
0516              * ====================================================
0517              * copyright 2016 NumScale SAS
0518              *
0519              * Distributed under the Boost Software License, Version 1.0.
0520              * (See copy at http://boost.org/LICENSE_1_0.txt)
0521              * ====================================================
0522              */
0523
0524             struct trigo_radian_tag
0525             {
0526             };
0527             struct trigo_pi_tag
0528             {
0529             };
0530
0531             template <class B, class Tag = trigo_radian_tag>
0532             struct trigo_reducer
0533             {
0534                 static XSIMD_INLINE B reduce(const B& x, B& xr) noexcept
0535                 {
0536                     if (all(x <= constants::pio4<B>()))
0537                     {
0538                         xr = x;
0539                         return B(0.);
0540                     }
0541                     else if (all(x <= constants::pio2<B>()))
0542                     {
0543                         auto test = x > constants::pio4<B>();
0544                         xr = x - constants::pio2_1<B>();
0545                         xr -= constants::pio2_2<B>();
0546                         xr -= constants::pio2_3<B>();
0547                         xr = select(test, xr, x);
0548                         return select(test, B(1.), B(0.));
0549                     }
0550                     else if (all(x <= constants::twentypi<B>()))
0551                     {
0552                         B xi = nearbyint(x * constants::twoopi<B>());
0553                         xr = fnma(xi, constants::pio2_1<B>(), x);
0554                         xr -= xi * constants::pio2_2<B>();
0555                         xr -= xi * constants::pio2_3<B>();
0556                         return quadrant(xi);
0557                     }
0558                     else if (all(x <= constants::mediumpi<B>()))
0559                     {
0560                         B fn = nearbyint(x * constants::twoopi<B>());
0561                         B r = x - fn * constants::pio2_1<B>();
0562                         B w = fn * constants::pio2_1t<B>();
0563                         B t = r;
0564                         w = fn * constants::pio2_2<B>();
0565                         r = t - w;
0566                         w = fn * constants::pio2_2t<B>() - ((t - r) - w);
0567                         t = r;
0568                         w = fn * constants::pio2_3<B>();
0569                         r = t - w;
0570                         w = fn * constants::pio2_3t<B>() - ((t - r) - w);
0571                         xr = r - w;
0572                         return quadrant(fn);
0573                     }
0574                     else
0575                     {
0576                         static constexpr std::size_t size = B::size;
0577                         using value_type = typename B::value_type;
0578                         alignas(B) std::array<value_type, size> tmp;
0579                         alignas(B) std::array<value_type, size> txr;
0580                         alignas(B) std::array<value_type, size> args;
0581                         x.store_aligned(args.data());
0582
0583                         for (std::size_t i = 0; i < size; ++i)
0584                         {
0585                             double arg = args[i];
0586                             if (arg == std::numeric_limits<value_type>::infinity())
0587                             {
0588                                 tmp[i] = 0.;
0589                                 txr[i] = std::numeric_limits<value_type>::quiet_NaN();
0590                             }
0591                             else
0592                             {
0593                                 double y[2];
0594                                 std::int32_t n = ::xsimd::detail::__ieee754_rem_pio2(arg, y);
0595                                 tmp[i] = value_type(n & 3);
0596                                 txr[i] = value_type(y[0]);
0597                             }
0598                         }
0599                         xr = B::load_aligned(&txr[0]);
0600                         B res = B::load_aligned(&tmp[0]);
0601                         return res;
0602                     }
0603                 }
0604             };
0605
0606             template <class B>
0607             struct trigo_reducer<B, trigo_pi_tag>
0608             {
0609                 static XSIMD_INLINE B reduce(const B& x, B& xr) noexcept
0610                 {
0611                     B xi = nearbyint(x * B(2.));
0612                     B x2 = x - xi * B(0.5);
0613                     xr = x2 * constants::pi<B>();
0614                     return quadrant(xi);
0615                 }
0616             };
0617
0618         }
0619         template <class A, class T>
0620         XSIMD_INLINE batch<T, A> cos(batch<T, A> const& self, requires_arch<generic>) noexcept
0621         {
0622             using batch_type = batch<T, A>;
0623             const batch_type x = abs(self);
0624             batch_type xr = constants::nan<batch_type>();
0625             const batch_type n = detail::trigo_reducer<batch_type>::reduce(x, xr);
0626             auto tmp = select(n >= batch_type(2.), batch_type(1.), batch_type(0.));
0627             auto swap_bit = fma(batch_type(-2.), tmp, n);
0628             auto sign_bit = select((swap_bit ^ tmp) != batch_type(0.), constants::signmask<batch_type>(), batch_type(0.));
0629             const batch_type z = xr * xr;
0630             const batch_type se = detail::sin_eval(z, xr);
0631             const batch_type ce = detail::cos_eval(z);
0632             const batch_type z1 = select(swap_bit != batch_type(0.), se, ce);
0633             return z1 ^ sign_bit;
0634         }
0635
0636         template <class A, class T>
0637         XSIMD_INLINE batch<std::complex<T>, A> cos(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
0638         {
0639             return { cos(z.real()) * cosh(z.imag()), -sin(z.real()) * sinh(z.imag()) };
0640         }
0641
0642         // cosh
0643
0644         /* origin: boost/simd/arch/common/simd/function/cosh.hpp */
0645         /*
0646          * ====================================================
0647          * copyright 2016 NumScale SAS
0648          *
0649          * Distributed under the Boost Software License, Version 1.0.
0650          * (See copy at http://boost.org/LICENSE_1_0.txt)
0651          * ====================================================
0652          */
0653
0654         template <class A, class T>
0655         XSIMD_INLINE batch<T, A> cosh(batch<T, A> const& self, requires_arch<generic>) noexcept
0656         {
0657             using batch_type = batch<T, A>;
0658             batch_type x = abs(self);
0659             auto test1 = x > (constants::maxlog<batch_type>() - constants::log_2<batch_type>());
0660             batch_type fac = select(test1, batch_type(0.5), batch_type(1.));
0661             batch_type tmp = exp(x * fac);
0662             batch_type tmp1 = batch_type(0.5) * tmp;
0663             return select(test1, tmp1 * tmp, detail::average(tmp, batch_type(1.) / tmp));
0664         }
0665         template <class A, class T>
0666         XSIMD_INLINE batch<std::complex<T>, A> cosh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
0667         {
0668             auto x = z.real();
0669             auto y = z.imag();
0670             return { cosh(x) * cos(y), sinh(x) * sin(y) };
0671         }
0672
0673         // sin
0674         namespace detail
0675         {
0676             template <class A, class T, class Tag = trigo_radian_tag>
0677             XSIMD_INLINE batch<T, A> sin(batch<T, A> const& self, Tag = Tag()) noexcept
0678             {
0679                 using batch_type = batch<T, A>;
0680                 const batch_type x = abs(self);
0681                 batch_type xr = constants::nan<batch_type>();
0682                 const batch_type n = detail::trigo_reducer<batch_type, Tag>::reduce(x, xr);
0683                 auto tmp = select(n >= batch_type(2.), batch_type(1.), batch_type(0.));
0684                 auto swap_bit = fma(batch_type(-2.), tmp, n);
0685                 auto sign_bit = bitofsign(self) ^ select(tmp != batch_type(0.), constants::signmask<batch_type>(), batch_type(0.));
0686                 const batch_type z = xr * xr;
0687                 const batch_type se = detail::sin_eval(z, xr);
0688                 const batch_type ce = detail::cos_eval(z);
0689                 const batch_type z1 = select(swap_bit == batch_type(0.), se, ce);
0690                 return z1 ^ sign_bit;
0691             }
0692         }
0693
0694         template <class A, class T>
0695         XSIMD_INLINE batch<T, A> sin(batch<T, A> const& self, requires_arch<generic>) noexcept
0696         {
0697             return detail::sin(self);
0698         }
0699
0700         template <class A, class T>
0701         XSIMD_INLINE batch<std::complex<T>, A> sin(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
0702         {
0703             return { sin(z.real()) * cosh(z.imag()), cos(z.real()) * sinh(z.imag()) };
0704         }
0705
0706         // sincos
0707         template <class A, class T>
0708         XSIMD_INLINE std::pair<batch<T, A>, batch<T, A>> sincos(batch<T, A> const& self, requires_arch<generic>) noexcept
0709         {
0710             using batch_type = batch<T, A>;
0711             const batch_type x = abs(self);
0712             batch_type xr = constants::nan<batch_type>();
0713             const batch_type n = detail::trigo_reducer<batch_type>::reduce(x, xr);
0714             auto tmp = select(n >= batch_type(2.), batch_type(1.), batch_type(0.));
0715             auto swap_bit = fma(batch_type(-2.), tmp, n);
0716             const batch_type z = xr * xr;
0717             const batch_type se = detail::sin_eval(z, xr);
0718             const batch_type ce = detail::cos_eval(z);
0719             auto sin_sign_bit = bitofsign(self) ^ select(tmp != batch_type(0.), constants::signmask<batch_type>(), batch_type(0.));
0720             const batch_type sin_z1 = select(swap_bit == batch_type(0.), se, ce);
0721             auto cos_sign_bit = select((swap_bit ^ tmp) != batch_type(0.), constants::signmask<batch_type>(), batch_type(0.));
0722             const batch_type cos_z1 = select(swap_bit != batch_type(0.), se, ce);
0723             return std::make_pair(sin_z1 ^ sin_sign_bit, cos_z1 ^ cos_sign_bit);
0724         }
0725
0726         template <class A, class T>
0727         XSIMD_INLINE std::pair<batch<std::complex<T>, A>, batch<std::complex<T>, A>>
0728         sincos(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
0729         {
0730             using batch_type = batch<std::complex<T>, A>;
0731             using real_batch = typename batch_type::real_batch;
0732             real_batch rcos = cos(z.real());
0733             real_batch rsin = sin(z.real());
0734             real_batch icosh = cosh(z.imag());
0735             real_batch isinh = sinh(z.imag());
0736             return std::make_pair(batch_type(rsin * icosh, rcos * isinh), batch_type(rcos * icosh, -rsin * isinh));
0737         }
0738
0739         // sinh
0740         namespace detail
0741         {
0742             /* origin: boost/simd/arch/common/detail/generic/sinh_kernel.hpp */
0743             /*
0744              * ====================================================
0745              * copyright 2016 NumScale SAS
0746              *
0747              * Distributed under the Boost Software License, Version 1.0.
0748              * (See copy at http://boost.org/LICENSE_1_0.txt)
0749              * ====================================================
0750              */
0751             template <class A>
0752             XSIMD_INLINE batch<float, A> sinh_kernel(batch<float, A> const& self) noexcept
0753             {
0754                 using batch_type = batch<float, A>;
0755                 batch_type sqr_self = self * self;
0756                 return detail::horner<batch_type,
0757                                       0x3f800000, // 1.0f
0758                                       0x3e2aaacc, // 1.66667160211E-1f
0759                                       0x3c087bbe, // 8.33028376239E-3f
0760                                       0x39559e2f // 2.03721912945E-4f
0761                                       >(sqr_self)
0762                     * self;
0763             }
0764
0765             template <class A>
0766             XSIMD_INLINE batch<double, A> sinh_kernel(batch<double, A> const& self) noexcept
0767             {
0768                 using batch_type = batch<double, A>;
0769                 batch_type sqrself = self * self;
0770                 return fma(self, (detail::horner<batch_type,
0771                                                  0xc115782bdbf6ab05ull, //  -3.51754964808151394800E5
0772                                                  0xc0c694b8c71d6182ull, //  -1.15614435765005216044E4,
0773                                                  0xc064773a398ff4feull, //  -1.63725857525983828727E2,
0774                                                  0xbfe9435fe8bb3cd6ull //  -7.89474443963537015605E-1
0775                                                  >(sqrself)
0776                                   / detail::horner1<batch_type,
0777                                                     0xc1401a20e4f90044ull, //  -2.11052978884890840399E6
0778                                                     0x40e1a7ba7ed72245ull, //   3.61578279834431989373E4,
0779                                                     0xc0715b6096e96484ull //  -2.77711081420602794433E2,
0780                                                     >(sqrself))
0781                                * sqrself,
0782                            self);
0783             }
0784         }
0785         /* origin: boost/simd/arch/common/simd/function/sinh.hpp */
0786         /*
0787          * ====================================================
0788          * copyright 2016 NumScale SAS
0789          *
0790          * Distributed under the Boost Software License, Version 1.0.
0791          * (See copy at http://boost.org/LICENSE_1_0.txt)
0792          * ====================================================
0793          */
0794         template <class A, class T>
0795         XSIMD_INLINE batch<T, A> sinh(batch<T, A> const& a, requires_arch<generic>) noexcept
0796         {
0797             using batch_type = batch<T, A>;
0798             batch_type half(0.5);
0799             batch_type x = abs(a);
0800             auto lt1 = x < batch_type(1.);
0801             batch_type bts = bitofsign(a);
0802             batch_type z(0.);
0803             if (any(lt1))
0804             {
0805                 z = detail::sinh_kernel(x);
0806                 if (all(lt1))
0807                     return z ^ bts;
0808             }
0809             auto test1 = x > (constants::maxlog<batch_type>() - constants::log_2<batch_type>());
0810             batch_type fac = select(test1, half, batch_type(1.));
0811             batch_type tmp = exp(x * fac);
0812             batch_type tmp1 = half * tmp;
0813             batch_type r = select(test1, tmp1 * tmp, tmp1 - half / tmp);
0814             return select(lt1, z, r) ^ bts;
0815         }
0816         template <class A, class T>
0817         XSIMD_INLINE batch<std::complex<T>, A> sinh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
0818         {
0819             auto x = z.real();
0820             auto y = z.imag();
0821             return { sinh(x) * cos(y), cosh(x) * sin(y) };
0822         }
0823
0824         // tan
0825         template <class A, class T>
0826         XSIMD_INLINE batch<T, A> tan(batch<T, A> const& self, requires_arch<generic>) noexcept
0827         {
0828             using batch_type = batch<T, A>;
0829             const batch_type x = abs(self);
0830             batch_type xr = constants::nan<batch_type>();
0831             const batch_type n = detail::trigo_reducer<batch_type>::reduce(x, xr);
0832             auto tmp = select(n >= batch_type(2.), batch_type(1.), batch_type(0.));
0833             auto swap_bit = fma(batch_type(-2.), tmp, n);
0834             auto test = (swap_bit == batch_type(0.));
0835             const batch_type y = detail::tan_eval(xr, test);
0836             return y ^ bitofsign(self);
0837         }
0838         template <class A, class T>
0839         XSIMD_INLINE batch<std::complex<T>, A> tan(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
0840         {
0841             using batch_type = batch<std::complex<T>, A>;
0842             using real_batch = typename batch_type::real_batch;
0843             real_batch d = cos(2 * z.real()) + cosh(2 * z.imag());
0844             batch_type winf(constants::infinity<real_batch>(), constants::infinity<real_batch>());
0845             real_batch wreal = sin(2 * z.real()) / d;
0846             real_batch wimag = sinh(2 * z.imag());
0847             batch_type wres = select(isinf(wimag), batch_type(wreal, real_batch(1.)), batch_type(wreal, wimag / d));
0848             return select(d == real_batch(0.), winf, wres);
0849         }
0850
0851         // tanh
0852         namespace detail
0853         {
0854             /* origin: boost/simd/arch/common/detail/generic/tanh_kernel.hpp */
0855             /*
0856              * ====================================================
0857              * copyright 2016 NumScale SAS
0858              *
0859              * Distributed under the Boost Software License, Version 1.0.
0860              * (See copy at http://boost.org/LICENSE_1_0.txt)
0861              * ====================================================
0862              */
0863             template <class B>
0864             struct tanh_kernel;
0865
0866             template <class A>
0867             struct tanh_kernel<batch<float, A>>
0868             {
0869                 using batch_type = batch<float, A>;
0870                 static XSIMD_INLINE batch_type tanh(const batch_type& x) noexcept
0871                 {
0872                     batch_type sqrx = x * x;
0873                     return fma(detail::horner<batch_type,
0874                                               0xbeaaaa99, //    -3.33332819422E-1F
0875                                               0x3e088393, //    +1.33314422036E-1F
0876                                               0xbd5c1e2d, //    -5.37397155531E-2F
0877                                               0x3ca9134e, //    +2.06390887954E-2F
0878                                               0xbbbaf0ea //    -5.70498872745E-3F
0879                                               >(sqrx)
0880                                    * sqrx,
0881                                x, x);
0882                 }
0883
0884                 static XSIMD_INLINE batch_type cotanh(const batch_type& x) noexcept
0885                 {
0886                     return batch_type(1.) / tanh(x);
0887                 }
0888             };
0889
0890             template <class A>
0891             struct tanh_kernel<batch<double, A>>
0892             {
0893                 using batch_type = batch<double, A>;
0894                 static XSIMD_INLINE batch_type tanh(const batch_type& x) noexcept
0895                 {
0896                     batch_type sqrx = x * x;
0897                     return fma(sqrx * p(sqrx) / q(sqrx), x, x);
0898                 }
0899
0900                 static XSIMD_INLINE batch_type cotanh(const batch_type& x) noexcept
0901                 {
0902                     batch_type sqrx = x * x;
0903                     batch_type qval = q(sqrx);
0904                     return qval / (x * fma(p(sqrx), sqrx, qval));
0905                 }
0906
0907                 static XSIMD_INLINE batch_type p(const batch_type& x) noexcept
0908                 {
0909                     return detail::horner<batch_type,
0910                                           0xc0993ac030580563, // -1.61468768441708447952E3
0911                                           0xc058d26a0e26682d, // -9.92877231001918586564E1,
0912                                           0xbfeedc5baafd6f4b // -9.64399179425052238628E-1
0913                                           >(x);
0914                 }
0915
0916                 static XSIMD_INLINE batch_type q(const batch_type& x) noexcept
0917                 {
0918                     return detail::horner1<batch_type,
0919                                            0x40b2ec102442040c, //  4.84406305325125486048E3
0920                                            0x40a176fa0e5535fa, //  2.23548839060100448583E3,
0921                                            0x405c33f28a581B86 //  1.12811678491632931402E2,
0922                                            >(x);
0923                 }
0924             };
0925
0926         }
0927         /* origin: boost/simd/arch/common/simd/function/tanh.hpp */
0928         /*
0929          * ====================================================
0930          * copyright 2016 NumScale SAS
0931          *
0932          * Distributed under the Boost Software License, Version 1.0.
0933          * (See copy at http://boost.org/LICENSE_1_0.txt)
0934          * ====================================================
0935          */
0936         template <class A, class T>
0937         XSIMD_INLINE batch<T, A> tanh(batch<T, A> const& self, requires_arch<generic>) noexcept
0938         {
0939             using batch_type = batch<T, A>;
0940             batch_type one(1.);
0941             batch_type x = abs(self);
0942             auto test = x < (batch_type(5.) / batch_type(8.));
0943             batch_type bts = bitofsign(self);
0944             batch_type z = one;
0945             if (any(test))
0946             {
0947                 z = detail::tanh_kernel<batch_type>::tanh(x);
0948                 if (all(test))
0949                     return z ^ bts;
0950             }
0951             batch_type r = fma(batch_type(-2.), one / (one + exp(x + x)), one);
0952             return select(test, z, r) ^ bts;
0953         }
0954         template <class A, class T>
0955         XSIMD_INLINE batch<std::complex<T>, A> tanh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
0956         {
0957             using real_batch = typename batch<std::complex<T>, A>::real_batch;
0958             auto x = z.real();
0959             auto y = z.imag();
0960             real_batch two(2);
0961             auto d = cosh(two * x) + cos(two * y);
0962             return { sinh(two * x) / d, sin(two * y) / d };
0963         }
0964
0965     }
0966
0967 }
0968
0969 #endif