xsimd/arch/xsimd_scalar.hpp

0001 /***************************************************************************
0002  * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and         *
0003  * Martin Renou                                                             *
0004  * Copyright (c) QuantStack                                                 *
0005  * Copyright (c) Serge Guelton                                              *
0006  *                                                                          *
0007  * Distributed under the terms of the BSD 3-Clause License.                 *
0008  *                                                                          *
0009  * The full license is in the file LICENSE, distributed with this software. *
0010  ****************************************************************************/
0011
0012 #ifndef XSIMD_SCALAR_HPP
0013 #define XSIMD_SCALAR_HPP
0014
0015 #include <cassert>
0016 #include <cmath>
0017 #include <complex>
0018 #include <cstdint>
0019 #include <cstring>
0020 #include <limits>
0021 #include <type_traits>
0022
0023 #include "xsimd/config/xsimd_inline.hpp"
0024
0025 #ifdef XSIMD_ENABLE_XTL_COMPLEX
0026 #include "xtl/xcomplex.hpp"
0027 #endif
0028
0029 #ifdef __APPLE__
0030 #include <AvailabilityMacros.h>
0031 #endif
0032
0033 namespace xsimd
0034 {
0035     template <class T, class A>
0036     class batch;
0037     template <class T, class A>
0038     class batch_bool;
0039
0040     using std::abs;
0041
0042     using std::acos;
0043     using std::acosh;
0044     using std::arg;
0045     using std::asin;
0046     using std::asinh;
0047     using std::atan;
0048     using std::atan2;
0049     using std::atanh;
0050     using std::cbrt;
0051     using std::ceil;
0052     using std::conj;
0053     using std::copysign;
0054     using std::cos;
0055     using std::cosh;
0056     using std::erf;
0057     using std::erfc;
0058     using std::exp;
0059     using std::exp2;
0060     using std::expm1;
0061     using std::fabs;
0062     using std::fdim;
0063     using std::floor;
0064     using std::fmax;
0065     using std::fmin;
0066     using std::fmod;
0067     using std::hypot;
0068     using std::ldexp;
0069     using std::lgamma;
0070     using std::log;
0071     using std::log10;
0072     using std::log1p;
0073     using std::log2;
0074     using std::modf;
0075     using std::nearbyint;
0076     using std::nextafter;
0077     using std::norm;
0078     using std::polar;
0079     using std::proj;
0080     using std::remainder;
0081     using std::rint;
0082     using std::round;
0083     using std::sin;
0084     using std::sinh;
0085     using std::sqrt;
0086     using std::tan;
0087     using std::tanh;
0088     using std::tgamma;
0089     using std::trunc;
0090
0091     XSIMD_INLINE signed char abs(signed char v)
0092     {
0093         return v < 0 ? -v : v;
0094     }
0095
0096     namespace detail
0097     {
0098         // Use templated type here to prevent automatic instantiation that may
0099         // ends up in a warning
0100         template <typename char_type>
0101         XSIMD_INLINE char abs(char_type v, std::true_type)
0102         {
0103             return v;
0104         }
0105         template <typename char_type>
0106         XSIMD_INLINE char abs(char_type v, std::false_type)
0107         {
0108             return v < 0 ? -v : v;
0109         }
0110     }
0111
0112     XSIMD_INLINE char abs(char v)
0113     {
0114         return detail::abs(v, std::is_unsigned<char>::type {});
0115     }
0116
0117     XSIMD_INLINE short abs(short v)
0118     {
0119         return v < 0 ? -v : v;
0120     }
0121     XSIMD_INLINE unsigned char abs(unsigned char v)
0122     {
0123         return v;
0124     }
0125     XSIMD_INLINE unsigned short abs(unsigned short v)
0126     {
0127         return v;
0128     }
0129     XSIMD_INLINE unsigned int abs(unsigned int v)
0130     {
0131         return v;
0132     }
0133     XSIMD_INLINE unsigned long abs(unsigned long v)
0134     {
0135         return v;
0136     }
0137     XSIMD_INLINE unsigned long long abs(unsigned long long v)
0138     {
0139         return v;
0140     }
0141
0142 #ifndef _WIN32
0143     using std::isfinite;
0144     using std::isinf;
0145     using std::isnan;
0146 #else
0147
0148     // Windows defines catch all templates
0149     template <class T>
0150     XSIMD_INLINE typename std::enable_if<std::is_floating_point<T>::value, bool>::type
0151     isfinite(T var) noexcept
0152     {
0153         return std::isfinite(var);
0154     }
0155
0156     template <class T>
0157     XSIMD_INLINE typename std::enable_if<std::is_integral<T>::value, bool>::type
0158     isfinite(T var) noexcept
0159     {
0160         return isfinite(double(var));
0161     }
0162
0163     template <class T>
0164     XSIMD_INLINE typename std::enable_if<std::is_floating_point<T>::value, bool>::type
0165     isinf(T var) noexcept
0166     {
0167         return std::isinf(var);
0168     }
0169
0170     template <class T>
0171     XSIMD_INLINE typename std::enable_if<std::is_integral<T>::value, bool>::type
0172     isinf(T var) noexcept
0173     {
0174         return isinf(double(var));
0175     }
0176
0177     template <class T>
0178     XSIMD_INLINE typename std::enable_if<std::is_floating_point<T>::value, bool>::type
0179     isnan(T var) noexcept
0180     {
0181         return std::isnan(var);
0182     }
0183
0184     template <class T>
0185     XSIMD_INLINE typename std::enable_if<std::is_integral<T>::value, bool>::type
0186     isnan(T var) noexcept
0187     {
0188         return isnan(double(var));
0189     }
0190 #endif
0191
0192     template <class T, class Tp>
0193     XSIMD_INLINE typename std::common_type<T, Tp>::type add(T const& x, Tp const& y) noexcept
0194     {
0195         return x + y;
0196     }
0197
0198     template <class T, class Tp>
0199     XSIMD_INLINE typename std::common_type<T, Tp>::type avg(T const& x, Tp const& y) noexcept
0200     {
0201         using common_type = typename std::common_type<T, Tp>::type;
0202         if (std::is_floating_point<common_type>::value)
0203             return (x + y) / 2;
0204         else if (std::is_unsigned<common_type>::value)
0205         {
0206             return (x & y) + ((x ^ y) >> 1);
0207         }
0208         else
0209         {
0210             // Inspired by
0211             // https://stackoverflow.com/questions/5697500/take-the-average-of-two-signed-numbers-in-c
0212             auto t = (x & y) + ((x ^ y) >> 1);
0213             auto t_u = static_cast<typename std::make_unsigned<common_type>::type>(t);
0214             auto avg = t + (static_cast<T>(t_u >> (8 * sizeof(T) - 1)) & (x ^ y));
0215             return avg;
0216         }
0217     }
0218
0219     template <class T, class Tp>
0220     XSIMD_INLINE typename std::common_type<T, Tp>::type avgr(T const& x, Tp const& y) noexcept
0221     {
0222         using common_type = typename std::common_type<T, Tp>::type;
0223         if (std::is_floating_point<common_type>::value)
0224             return avg(x, y);
0225         else
0226         {
0227             return avg(x, y) + ((x ^ y) & 1);
0228         }
0229     }
0230
0231     template <class T>
0232     XSIMD_INLINE T incr(T const& x) noexcept
0233     {
0234         return x + T(1);
0235     }
0236
0237     template <class T>
0238     XSIMD_INLINE T incr_if(T const& x, bool mask) noexcept
0239     {
0240         return x + T(mask ? 1 : 0);
0241     }
0242
0243     XSIMD_INLINE bool all(bool mask)
0244     {
0245         return mask;
0246     }
0247
0248     XSIMD_INLINE bool any(bool mask)
0249     {
0250         return mask;
0251     }
0252
0253     XSIMD_INLINE bool none(bool mask)
0254     {
0255         return !mask;
0256     }
0257
0258     template <class T>
0259     XSIMD_INLINE typename std::enable_if<std::is_integral<T>::value, T>::type
0260     bitwise_and(T x, T y) noexcept
0261     {
0262         return x & y;
0263     }
0264
0265     template <class T_out, class T_in>
0266     XSIMD_INLINE T_out bitwise_cast(T_in x) noexcept
0267     {
0268         static_assert(sizeof(T_in) == sizeof(T_out), "bitwise_cast between types of the same size");
0269         T_out r;
0270         std::memcpy((void*)&r, (void*)&x, sizeof(T_in));
0271         return r;
0272     }
0273
0274     XSIMD_INLINE float bitwise_and(float x, float y) noexcept
0275     {
0276         uint32_t ix, iy;
0277         std::memcpy((void*)&ix, (void*)&x, sizeof(float));
0278         std::memcpy((void*)&iy, (void*)&y, sizeof(float));
0279         uint32_t ir = bitwise_and(ix, iy);
0280         float r;
0281         std::memcpy((void*)&r, (void*)&ir, sizeof(float));
0282         return r;
0283     }
0284
0285     XSIMD_INLINE double bitwise_and(double x, double y) noexcept
0286     {
0287         uint64_t ix, iy;
0288         std::memcpy((void*)&ix, (void*)&x, sizeof(double));
0289         std::memcpy((void*)&iy, (void*)&y, sizeof(double));
0290         uint64_t ir = bitwise_and(ix, iy);
0291         double r;
0292         std::memcpy((void*)&r, (void*)&ir, sizeof(double));
0293         return r;
0294     }
0295
0296     template <class T0, class T1>
0297     XSIMD_INLINE typename std::enable_if<std::is_integral<T0>::value && std::is_integral<T1>::value, T0>::type
0298     bitwise_lshift(T0 x, T1 shift) noexcept
0299     {
0300         return x << shift;
0301     }
0302
0303     template <class T0, class T1>
0304     XSIMD_INLINE typename std::enable_if<std::is_integral<T0>::value && std::is_integral<T1>::value, T0>::type
0305     bitwise_rshift(T0 x, T1 shift) noexcept
0306     {
0307         return x >> shift;
0308     }
0309
0310     template <class T>
0311     XSIMD_INLINE typename std::enable_if<std::is_integral<T>::value, T>::type
0312     bitwise_not(T x) noexcept
0313     {
0314         return ~x;
0315     }
0316
0317     XSIMD_INLINE bool bitwise_not(bool x) noexcept
0318     {
0319         return !x;
0320     }
0321
0322     XSIMD_INLINE float bitwise_not(float x) noexcept
0323     {
0324         uint32_t ix;
0325         std::memcpy((void*)&ix, (void*)&x, sizeof(float));
0326         uint32_t ir = bitwise_not(ix);
0327         float r;
0328         std::memcpy((void*)&r, (void*)&ir, sizeof(float));
0329         return r;
0330     }
0331
0332     XSIMD_INLINE double bitwise_not(double x) noexcept
0333     {
0334         uint64_t ix;
0335         std::memcpy((void*)&ix, (void*)&x, sizeof(double));
0336         uint64_t ir = bitwise_not(ix);
0337         double r;
0338         std::memcpy((void*)&r, (void*)&ir, sizeof(double));
0339         return r;
0340     }
0341
0342     template <class T>
0343     XSIMD_INLINE typename std::enable_if<std::is_scalar<T>::value, T>::type bitwise_andnot(T x, T y) noexcept
0344     {
0345         return bitwise_and(x, bitwise_not(y));
0346     }
0347
0348     template <class T>
0349     XSIMD_INLINE typename std::enable_if<std::is_integral<T>::value, T>::type
0350     bitwise_or(T x, T y) noexcept
0351     {
0352         return x | y;
0353     }
0354
0355     XSIMD_INLINE float bitwise_or(float x, float y) noexcept
0356     {
0357         uint32_t ix, iy;
0358         std::memcpy((void*)&ix, (void*)&x, sizeof(float));
0359         std::memcpy((void*)&iy, (void*)&y, sizeof(float));
0360         uint32_t ir = bitwise_or(ix, iy);
0361         float r;
0362         std::memcpy((void*)&r, (void*)&ir, sizeof(float));
0363         return r;
0364     }
0365
0366     XSIMD_INLINE double bitwise_or(double x, double y) noexcept
0367     {
0368         uint64_t ix, iy;
0369         std::memcpy((void*)&ix, (void*)&x, sizeof(double));
0370         std::memcpy((void*)&iy, (void*)&y, sizeof(double));
0371         uint64_t ir = bitwise_or(ix, iy);
0372         double r;
0373         std::memcpy((void*)&r, (void*)&ir, sizeof(double));
0374         return r;
0375     }
0376
0377     template <class T>
0378     XSIMD_INLINE typename std::enable_if<std::is_integral<T>::value, T>::type
0379     bitwise_xor(T x, T y) noexcept
0380     {
0381         return x ^ y;
0382     }
0383
0384     XSIMD_INLINE float bitwise_xor(float x, float y) noexcept
0385     {
0386         uint32_t ix, iy;
0387         std::memcpy((void*)&ix, (void*)&x, sizeof(float));
0388         std::memcpy((void*)&iy, (void*)&y, sizeof(float));
0389         uint32_t ir = bitwise_xor(ix, iy);
0390         float r;
0391         std::memcpy((void*)&r, (void*)&ir, sizeof(float));
0392         return r;
0393     }
0394
0395     XSIMD_INLINE double bitwise_xor(double x, double y) noexcept
0396     {
0397         uint64_t ix, iy;
0398         std::memcpy((void*)&ix, (void*)&x, sizeof(double));
0399         std::memcpy((void*)&iy, (void*)&y, sizeof(double));
0400         uint64_t ir = bitwise_xor(ix, iy);
0401         double r;
0402         std::memcpy((void*)&r, (void*)&ir, sizeof(double));
0403         return r;
0404     }
0405
0406     template <class T, class Tp>
0407     XSIMD_INLINE typename std::common_type<T, Tp>::type div(T const& x, Tp const& y) noexcept
0408     {
0409         return x / y;
0410     }
0411
0412     template <class T, class Tp>
0413     XSIMD_INLINE auto mod(T const& x, Tp const& y) noexcept -> decltype(x % y)
0414     {
0415         return x % y;
0416     }
0417
0418     template <class T, class Tp>
0419     XSIMD_INLINE typename std::common_type<T, Tp>::type mul(T const& x, Tp const& y) noexcept
0420     {
0421         return x * y;
0422     }
0423
0424     template <class T>
0425     XSIMD_INLINE T neg(T const& x) noexcept
0426     {
0427         return -x;
0428     }
0429
0430     template <class T>
0431     XSIMD_INLINE auto pos(T const& x) noexcept -> decltype(+x)
0432     {
0433         return +x;
0434     }
0435
0436     XSIMD_INLINE float reciprocal(float const& x) noexcept
0437     {
0438         return 1.f / x;
0439     }
0440
0441     XSIMD_INLINE double reciprocal(double const& x) noexcept
0442     {
0443         return 1. / x;
0444     }
0445
0446     template <class T0, class T1>
0447     XSIMD_INLINE typename std::enable_if<std::is_integral<T0>::value && std::is_integral<T1>::value, T0>::type
0448     rotl(T0 x, T1 shift) noexcept
0449     {
0450         constexpr auto N = std::numeric_limits<T0>::digits;
0451         return (x << shift) | (x >> (N - shift));
0452     }
0453
0454     template <class T0, class T1>
0455     XSIMD_INLINE typename std::enable_if<std::is_integral<T0>::value && std::is_integral<T1>::value, T0>::type
0456     rotr(T0 x, T1 shift) noexcept
0457     {
0458         constexpr auto N = std::numeric_limits<T0>::digits;
0459         return (x >> shift) | (x << (N - shift));
0460     }
0461
0462     template <class T>
0463     XSIMD_INLINE bool isnan(std::complex<T> var) noexcept
0464     {
0465         return std::isnan(std::real(var)) || std::isnan(std::imag(var));
0466     }
0467
0468     template <class T>
0469     XSIMD_INLINE bool isinf(std::complex<T> var) noexcept
0470     {
0471         return std::isinf(std::real(var)) || std::isinf(std::imag(var));
0472     }
0473
0474     template <class T>
0475     XSIMD_INLINE bool isfinite(std::complex<T> var) noexcept
0476     {
0477         return std::isfinite(std::real(var)) && std::isfinite(std::imag(var));
0478     }
0479
0480 #ifdef XSIMD_ENABLE_XTL_COMPLEX
0481     using xtl::abs;
0482     using xtl::acos;
0483     using xtl::acosh;
0484     using xtl::asin;
0485     using xtl::asinh;
0486     using xtl::atan;
0487     using xtl::atanh;
0488     using xtl::cos;
0489     using xtl::cosh;
0490     using xtl::exp;
0491     using xtl::log;
0492     using xtl::log10;
0493     using xtl::norm;
0494     using xtl::pow;
0495     using xtl::proj;
0496     using xtl::sin;
0497     using xtl::sinh;
0498     using xtl::sqrt;
0499     using xtl::tan;
0500     using xtl::tanh;
0501 #endif
0502
0503     template <typename T, class = typename std::enable_if<std::is_scalar<T>::value>::type>
0504     XSIMD_INLINE T clip(const T& val, const T& low, const T& hi) noexcept
0505     {
0506         assert(low <= hi && "ordered clipping bounds");
0507         return low > val ? low : (hi < val ? hi : val);
0508     }
0509
0510     template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type>
0511     XSIMD_INLINE bool is_flint(const T& x) noexcept
0512     {
0513         return std::isnan(x - x) ? false : (x - std::trunc(x)) == T(0);
0514     }
0515
0516     template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type>
0517     XSIMD_INLINE bool is_even(const T& x) noexcept
0518     {
0519         return is_flint(x * T(0.5));
0520     }
0521
0522     template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type>
0523     XSIMD_INLINE bool is_odd(const T& x) noexcept
0524     {
0525         return is_even(x - 1.);
0526     }
0527
0528     XSIMD_INLINE int32_t nearbyint_as_int(float var) noexcept
0529     {
0530         return static_cast<int32_t>(std::nearbyint(var));
0531     }
0532
0533     XSIMD_INLINE int64_t nearbyint_as_int(double var) noexcept
0534     {
0535         return static_cast<int64_t>(std::nearbyint(var));
0536     }
0537
0538     template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type>
0539     XSIMD_INLINE bool eq(const T& x0, const T& x1) noexcept
0540     {
0541         return x0 == x1;
0542     }
0543
0544     template <class T>
0545     XSIMD_INLINE bool eq(const std::complex<T>& x0, const std::complex<T>& x1) noexcept
0546     {
0547         return x0 == x1;
0548     }
0549
0550     template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type>
0551     XSIMD_INLINE bool ge(const T& x0, const T& x1) noexcept
0552     {
0553         return x0 >= x1;
0554     }
0555
0556     template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type>
0557     XSIMD_INLINE bool gt(const T& x0, const T& x1) noexcept
0558     {
0559         return x0 > x1;
0560     }
0561
0562     template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type>
0563     XSIMD_INLINE bool le(const T& x0, const T& x1) noexcept
0564     {
0565         return x0 <= x1;
0566     }
0567
0568     template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type>
0569     XSIMD_INLINE bool lt(const T& x0, const T& x1) noexcept
0570     {
0571         return x0 < x1;
0572     }
0573
0574     template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type>
0575     XSIMD_INLINE bool neq(const T& x0, const T& x1) noexcept
0576     {
0577         return x0 != x1;
0578     }
0579
0580     template <class T>
0581     XSIMD_INLINE bool neq(const std::complex<T>& x0, const std::complex<T>& x1) noexcept
0582     {
0583         return !(x0 == x1);
0584     }
0585
0586 #if defined(__APPLE__) && (MAC_OS_X_VERSION_MIN_REQUIRED > 1080)
0587     XSIMD_INLINE float exp10(const float& x) noexcept
0588     {
0589         return __exp10f(x);
0590     }
0591     XSIMD_INLINE double exp10(const double& x) noexcept
0592     {
0593         return __exp10(x);
0594     }
0595 #elif defined(__GLIBC__)
0596     XSIMD_INLINE float exp10(const float& x) noexcept
0597     {
0598         return ::exp10f(x);
0599     }
0600     XSIMD_INLINE double exp10(const double& x) noexcept
0601     {
0602         return ::exp10(x);
0603     }
0604 #elif !defined(__clang__) && defined(__GNUC__) && (__GNUC__ >= 5)
0605     XSIMD_INLINE float exp10(const float& x) noexcept
0606     {
0607         return __builtin_exp10f(x);
0608     }
0609     XSIMD_INLINE double exp10(const double& x) noexcept
0610     {
0611         return __builtin_exp10(x);
0612     }
0613 #elif defined(_WIN32)
0614     template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type>
0615     XSIMD_INLINE T exp10(const T& x) noexcept
0616     {
0617         // Very inefficient but other implementations give incorrect results
0618         // on Windows
0619         return std::pow(T(10), x);
0620     }
0621 #else
0622     XSIMD_INLINE float exp10(const float& x) noexcept
0623     {
0624         const float ln10 = std::log(10.f);
0625         return std::exp(ln10 * x);
0626     }
0627     XSIMD_INLINE double exp10(const double& x) noexcept
0628     {
0629         const double ln10 = std::log(10.);
0630         return std::exp(ln10 * x);
0631     }
0632 #endif
0633
0634     template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type>
0635     XSIMD_INLINE auto rsqrt(const T& x) noexcept -> decltype(std::sqrt(x))
0636     {
0637         using float_type = decltype(std::sqrt(x));
0638         return static_cast<float_type>(1) / std::sqrt(x);
0639     }
0640
0641     namespace detail
0642     {
0643         template <class C>
0644         XSIMD_INLINE C expm1_complex_scalar_impl(const C& val) noexcept
0645         {
0646             using T = typename C::value_type;
0647             T isin = std::sin(val.imag());
0648             T rem1 = std::expm1(val.real());
0649             T re = rem1 + T(1.);
0650             T si = std::sin(val.imag() * T(0.5));
0651             return std::complex<T>(rem1 - T(2.) * re * si * si, re * isin);
0652         }
0653     }
0654
0655     template <class T>
0656     XSIMD_INLINE std::complex<T> expm1(const std::complex<T>& val) noexcept
0657     {
0658         return detail::expm1_complex_scalar_impl(val);
0659     }
0660
0661 #ifdef XSIMD_ENABLE_XTL_COMPLEX
0662     template <class T, bool i3ec>
0663     XSIMD_INLINE xtl::xcomplex<T, T, i3ec> expm1(const xtl::xcomplex<T, T, i3ec>& val) noexcept
0664     {
0665         return detail::expm1_complex_scalar_impl(val);
0666     }
0667 #endif
0668
0669     namespace detail
0670     {
0671         template <class C>
0672         XSIMD_INLINE C log1p_complex_scalar_impl(const C& val) noexcept
0673         {
0674             using T = typename C::value_type;
0675             C u = C(1.) + val;
0676             return u == C(1.) ? val : (u.real() <= T(0.) ? log(u) : log(u) * val / (u - C(1.)));
0677         }
0678     }
0679
0680     template <class T>
0681     XSIMD_INLINE std::complex<T> log1p(const std::complex<T>& val) noexcept
0682     {
0683         return detail::log1p_complex_scalar_impl(val);
0684     }
0685
0686     template <class T>
0687     XSIMD_INLINE std::complex<T> log2(const std::complex<T>& val) noexcept
0688     {
0689         return log(val) / std::log(T(2));
0690     }
0691
0692     template <typename T, class = typename std::enable_if<std::is_scalar<T>::value>::type>
0693     XSIMD_INLINE T sadd(const T& lhs, const T& rhs) noexcept
0694     {
0695         if (std::numeric_limits<T>::is_signed)
0696         {
0697             if ((lhs > 0) && (rhs > std::numeric_limits<T>::max() - lhs))
0698             {
0699                 return std::numeric_limits<T>::max();
0700             }
0701             else if ((lhs < 0) && (rhs < std::numeric_limits<T>::lowest() - lhs))
0702             {
0703                 return std::numeric_limits<T>::lowest();
0704             }
0705             else
0706             {
0707                 return lhs + rhs;
0708             }
0709         }
0710         else
0711         {
0712             if (rhs > std::numeric_limits<T>::max() - lhs)
0713             {
0714                 return std::numeric_limits<T>::max();
0715             }
0716             else
0717             {
0718                 return lhs + rhs;
0719             }
0720         }
0721     }
0722
0723     template <typename T, class = typename std::enable_if<std::is_scalar<T>::value>::type>
0724     XSIMD_INLINE T ssub(const T& lhs, const T& rhs) noexcept
0725     {
0726         if (std::numeric_limits<T>::is_signed)
0727         {
0728             return sadd(lhs, (T)-rhs);
0729         }
0730         else
0731         {
0732             if (lhs < rhs)
0733             {
0734                 return std::numeric_limits<T>::lowest();
0735             }
0736             else
0737             {
0738                 return lhs - rhs;
0739             }
0740         }
0741     }
0742
0743     namespace detail
0744     {
0745         template <class T>
0746         struct value_type_or_type_helper
0747         {
0748             using type = T;
0749         };
0750         template <class T, class A>
0751         struct value_type_or_type_helper<batch<T, A>>
0752         {
0753             using type = T;
0754         };
0755
0756         template <class T>
0757         using value_type_or_type = typename value_type_or_type_helper<T>::type;
0758
0759         template <class T0, class T1>
0760         XSIMD_INLINE typename std::enable_if<std::is_integral<T1>::value, T0>::type
0761         ipow(const T0& x, const T1& n) noexcept
0762         {
0763             static_assert(std::is_integral<T1>::value, "second argument must be an integer");
0764             T0 a = x;
0765             T1 b = n;
0766             bool const recip = b < 0;
0767             T0 r(static_cast<value_type_or_type<T0>>(1));
0768             while (1)
0769             {
0770                 if (b & 1)
0771                 {
0772                     r *= a;
0773                 }
0774                 b /= 2;
0775                 if (b == 0)
0776                 {
0777                     break;
0778                 }
0779                 a *= a;
0780             }
0781             return recip ? static_cast<T0>(1) / r : r;
0782         }
0783     }
0784
0785     template <class T0, class T1>
0786     XSIMD_INLINE typename std::enable_if<std::is_integral<T1>::value, T0>::type
0787     pow(const T0& x, const T1& n) noexcept
0788     {
0789         return detail::ipow(x, n);
0790     }
0791
0792     template <class T0, class T1>
0793     XSIMD_INLINE auto
0794     pow(const T0& t0, const T1& t1) noexcept
0795         -> typename std::enable_if<std::is_scalar<T0>::value && std::is_floating_point<T1>::value, decltype(std::pow(t0, t1))>::type
0796     {
0797         return std::pow(t0, t1);
0798     }
0799
0800     template <class T0, class T1>
0801     XSIMD_INLINE typename std::enable_if<std::is_integral<T1>::value, std::complex<T0>>::type
0802     pow(const std::complex<T0>& t0, const T1& t1) noexcept
0803     {
0804         return detail::ipow(t0, t1);
0805     }
0806
0807     template <class T0, class T1>
0808     XSIMD_INLINE typename std::enable_if<!std::is_integral<T1>::value, std::complex<T0>>::type
0809     pow(const std::complex<T0>& t0, const T1& t1) noexcept
0810     {
0811         return std::pow(t0, t1);
0812     }
0813
0814     template <class T0, class T1>
0815     XSIMD_INLINE auto
0816     pow(const T0& t0, const std::complex<T1>& t1) noexcept
0817         -> typename std::enable_if<std::is_scalar<T0>::value, decltype(std::pow(t0, t1))>::type
0818     {
0819         return std::pow(t0, t1);
0820     }
0821
0822     template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type>
0823     XSIMD_INLINE T bitofsign(T const& x) noexcept
0824     {
0825         return T(x < T(0));
0826     }
0827
0828     XSIMD_INLINE float bitofsign(float const& x) noexcept
0829     {
0830         return float(std::signbit(x));
0831     }
0832
0833     XSIMD_INLINE double bitofsign(double const& x) noexcept
0834     {
0835         return double(std::signbit(x));
0836     }
0837
0838     XSIMD_INLINE long double bitofsign(long double const& x) noexcept
0839     {
0840         return static_cast<long double>(std::signbit(x));
0841     }
0842
0843     template <class T>
0844     XSIMD_INLINE auto signbit(T const& v) noexcept -> decltype(bitofsign(v))
0845     {
0846         return bitofsign(v);
0847     }
0848
0849     XSIMD_INLINE double sign(bool const& v) noexcept
0850     {
0851         return v;
0852     }
0853
0854     template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type>
0855     XSIMD_INLINE T sign(const T& v) noexcept
0856     {
0857         return v < T(0) ? T(-1.) : v == T(0) ? T(0.)
0858                                              : T(1.);
0859     }
0860
0861     namespace detail
0862     {
0863         template <class C>
0864         XSIMD_INLINE C sign_complex_scalar_impl(const C& v) noexcept
0865         {
0866             using value_type = typename C::value_type;
0867             if (v.real())
0868             {
0869                 return C(sign(v.real()), value_type(0));
0870             }
0871             else
0872             {
0873                 return C(sign(v.imag()), value_type(0));
0874             }
0875         }
0876     }
0877
0878     template <class T>
0879     XSIMD_INLINE std::complex<T> sign(const std::complex<T>& v) noexcept
0880     {
0881         return detail::sign_complex_scalar_impl(v);
0882     }
0883
0884 #ifdef XSIMD_ENABLE_XTL_COMPLEX
0885     template <class T, bool i3ec>
0886     XSIMD_INLINE xtl::xcomplex<T, T, i3ec> sign(const xtl::xcomplex<T, T, i3ec>& v) noexcept
0887     {
0888         return detail::sign_complex_scalar_impl(v);
0889     }
0890 #endif
0891
0892     XSIMD_INLINE double signnz(bool const&) noexcept
0893     {
0894         return 1;
0895     }
0896
0897     template <class T, class = typename std::enable_if<std::is_scalar<T>::value>::type>
0898     XSIMD_INLINE T signnz(const T& v) noexcept
0899     {
0900         return v < T(0) ? T(-1.) : T(1.);
0901     }
0902
0903     template <class T, class Tp>
0904     XSIMD_INLINE typename std::common_type<T, Tp>::type sub(T const& x, Tp const& y) noexcept
0905     {
0906         return x - y;
0907     }
0908
0909     template <class T>
0910     XSIMD_INLINE T decr(T const& x) noexcept
0911     {
0912         return x - T(1);
0913     }
0914
0915     template <class T>
0916     XSIMD_INLINE T decr_if(T const& x, bool mask) noexcept
0917     {
0918         return x - T(mask ? 1 : 0);
0919     }
0920
0921 #ifdef XSIMD_ENABLE_XTL_COMPLEX
0922     template <class T, bool i3ec>
0923     XSIMD_INLINE xtl::xcomplex<T, T, i3ec> log2(const xtl::xcomplex<T, T, i3ec>& val) noexcept
0924     {
0925         return log(val) / log(T(2));
0926     }
0927 #endif
0928
0929 #ifdef XSIMD_ENABLE_XTL_COMPLEX
0930     template <class T, bool i3ec>
0931     XSIMD_INLINE xtl::xcomplex<T, T, i3ec> log1p(const xtl::xcomplex<T, T, i3ec>& val) noexcept
0932     {
0933         return detail::log1p_complex_scalar_impl(val);
0934     }
0935 #endif
0936
0937     template <class T0, class T1>
0938     XSIMD_INLINE auto min(T0 const& self, T1 const& other) noexcept
0939         -> typename std::enable_if<std::is_scalar<T0>::value && std::is_scalar<T1>::value,
0940                                    typename std::decay<decltype(self > other ? other : self)>::type>::type
0941     {
0942         return self > other ? other : self;
0943     }
0944
0945     // numpy defines minimum operator on complex using lexical comparison
0946     template <class T0, class T1>
0947     XSIMD_INLINE std::complex<typename std::common_type<T0, T1>::type>
0948     min(std::complex<T0> const& self, std::complex<T1> const& other) noexcept
0949     {
0950         return (self.real() < other.real()) ? (self) : (self.real() == other.real() ? (self.imag() < other.imag() ? self : other) : other);
0951     }
0952
0953     template <class T0, class T1>
0954     XSIMD_INLINE auto max(T0 const& self, T1 const& other) noexcept
0955         -> typename std::enable_if<std::is_scalar<T0>::value && std::is_scalar<T1>::value,
0956                                    typename std::decay<decltype(self > other ? other : self)>::type>::type
0957     {
0958         return self < other ? other : self;
0959     }
0960
0961     // numpy defines maximum operator on complex using lexical comparison
0962     template <class T0, class T1>
0963     XSIMD_INLINE std::complex<typename std::common_type<T0, T1>::type>
0964     max(std::complex<T0> const& self, std::complex<T1> const& other) noexcept
0965     {
0966         return (self.real() > other.real()) ? (self) : (self.real() == other.real() ? (self.imag() > other.imag() ? self : other) : other);
0967     }
0968
0969     template <class T>
0970     XSIMD_INLINE typename std::enable_if<std::is_integral<T>::value, T>::type fma(const T& a, const T& b, const T& c) noexcept
0971     {
0972         return a * b + c;
0973     }
0974
0975     template <class T>
0976     XSIMD_INLINE typename std::enable_if<std::is_floating_point<T>::value, T>::type fma(const T& a, const T& b, const T& c) noexcept
0977     {
0978         return std::fma(a, b, c);
0979     }
0980
0981     template <class T>
0982     XSIMD_INLINE typename std::enable_if<std::is_scalar<T>::value, T>::type fms(const T& a, const T& b, const T& c) noexcept
0983     {
0984         return a * b - c;
0985     }
0986
0987     namespace detail
0988     {
0989         template <class C>
0990         XSIMD_INLINE C fma_complex_scalar_impl(const C& a, const C& b, const C& c) noexcept
0991         {
0992             return { fms(a.real(), b.real(), fms(a.imag(), b.imag(), c.real())),
0993                      fma(a.real(), b.imag(), fma(a.imag(), b.real(), c.imag())) };
0994         }
0995     }
0996
0997     template <class T>
0998     XSIMD_INLINE std::complex<T> fma(const std::complex<T>& a, const std::complex<T>& b, const std::complex<T>& c) noexcept
0999     {
1000         return detail::fma_complex_scalar_impl(a, b, c);
1001     }
1002
1003 #ifdef XSIMD_ENABLE_XTL_COMPLEX
1004     template <class T, bool i3ec>
1005     XSIMD_INLINE xtl::xcomplex<T, T, i3ec> fma(const xtl::xcomplex<T, T, i3ec>& a, const xtl::xcomplex<T, T, i3ec>& b, const xtl::xcomplex<T, T, i3ec>& c) noexcept
1006     {
1007         return detail::fma_complex_scalar_impl(a, b, c);
1008     }
1009 #endif
1010
1011     namespace detail
1012     {
1013         template <class C>
1014         XSIMD_INLINE C fms_complex_scalar_impl(const C& a, const C& b, const C& c) noexcept
1015         {
1016             return { fms(a.real(), b.real(), fma(a.imag(), b.imag(), c.real())),
1017                      fma(a.real(), b.imag(), fms(a.imag(), b.real(), c.imag())) };
1018         }
1019     }
1020
1021     template <class T>
1022     XSIMD_INLINE std::complex<T> fms(const std::complex<T>& a, const std::complex<T>& b, const std::complex<T>& c) noexcept
1023     {
1024         return detail::fms_complex_scalar_impl(a, b, c);
1025     }
1026
1027 #ifdef XSIMD_ENABLE_XTL_COMPLEX
1028     template <class T, bool i3ec>
1029     XSIMD_INLINE xtl::xcomplex<T, T, i3ec> fms(const xtl::xcomplex<T, T, i3ec>& a, const xtl::xcomplex<T, T, i3ec>& b, const xtl::xcomplex<T, T, i3ec>& c) noexcept
1030     {
1031         return detail::fms_complex_scalar_impl(a, b, c);
1032     }
1033 #endif
1034
1035     template <class T>
1036     XSIMD_INLINE typename std::enable_if<std::is_integral<T>::value, T>::type fnma(const T& a, const T& b, const T& c) noexcept
1037     {
1038         return -(a * b) + c;
1039     }
1040
1041     template <class T>
1042     XSIMD_INLINE typename std::enable_if<std::is_floating_point<T>::value, T>::type fnma(const T& a, const T& b, const T& c) noexcept
1043     {
1044         return std::fma(-a, b, c);
1045     }
1046
1047     namespace detail
1048     {
1049         template <class C>
1050         XSIMD_INLINE C fnma_complex_scalar_impl(const C& a, const C& b, const C& c) noexcept
1051         {
1052             return { fms(a.imag(), b.imag(), fms(a.real(), b.real(), c.real())),
1053                      -fma(a.real(), b.imag(), fms(a.imag(), b.real(), c.imag())) };
1054         }
1055     }
1056
1057     template <class T>
1058     XSIMD_INLINE std::complex<T> fnma(const std::complex<T>& a, const std::complex<T>& b, const std::complex<T>& c) noexcept
1059     {
1060         return detail::fnma_complex_scalar_impl(a, b, c);
1061     }
1062
1063 #ifdef XSIMD_ENABLE_XTL_COMPLEX
1064     template <class T, bool i3ec>
1065     XSIMD_INLINE xtl::xcomplex<T, T, i3ec> fnma(const xtl::xcomplex<T, T, i3ec>& a, const xtl::xcomplex<T, T, i3ec>& b, const xtl::xcomplex<T, T, i3ec>& c) noexcept
1066     {
1067         return detail::fnma_complex_scalar_impl(a, b, c);
1068     }
1069 #endif
1070
1071     template <class T>
1072     XSIMD_INLINE typename std::enable_if<std::is_integral<T>::value, T>::type fnms(const T& a, const T& b, const T& c) noexcept
1073     {
1074         return -(a * b) - c;
1075     }
1076
1077     template <class T>
1078     XSIMD_INLINE typename std::enable_if<std::is_floating_point<T>::value, T>::type fnms(const T& a, const T& b, const T& c) noexcept
1079     {
1080         return -std::fma(a, b, c);
1081     }
1082
1083     namespace detail
1084     {
1085         template <class C>
1086         XSIMD_INLINE C fnms_complex_scalar_impl(const C& a, const C& b, const C& c) noexcept
1087         {
1088             return { fms(a.imag(), b.imag(), fma(a.real(), b.real(), c.real())),
1089                      -fma(a.real(), b.imag(), fma(a.imag(), b.real(), c.imag())) };
1090         }
1091     }
1092
1093     template <class T>
1094     XSIMD_INLINE std::complex<T> fnms(const std::complex<T>& a, const std::complex<T>& b, const std::complex<T>& c) noexcept
1095     {
1096         return detail::fnms_complex_scalar_impl(a, b, c);
1097     }
1098
1099 #ifdef XSIMD_ENABLE_XTL_COMPLEX
1100     template <class T, bool i3ec>
1101     XSIMD_INLINE xtl::xcomplex<T, T, i3ec> fnms(const xtl::xcomplex<T, T, i3ec>& a, const xtl::xcomplex<T, T, i3ec>& b, const xtl::xcomplex<T, T, i3ec>& c) noexcept
1102     {
1103         return detail::fnms_complex_scalar_impl(a, b, c);
1104     }
1105 #endif
1106
1107     namespace detail
1108     {
1109 #define XSIMD_HASSINCOS_TRAIT(func)                                                                                                           \
1110     template <class S>                                                                                                                        \
1111     struct has##func                                                                                                                          \
1112     {                                                                                                                                         \
1113         template <class T>                                                                                                                    \
1114         static XSIMD_INLINE auto get(T* ptr) -> decltype(func(std::declval<T>(), std::declval<T*>(), std::declval<T*>()), std::true_type {}); \
1115         static XSIMD_INLINE std::false_type get(...);                                                                                         \
1116         static constexpr bool value = decltype(get((S*)nullptr))::value;                                                                      \
1117     }
1118
1119 #define XSIMD_HASSINCOS(func, T) has##func<T>::value
1120
1121         XSIMD_HASSINCOS_TRAIT(sincos);
1122         XSIMD_HASSINCOS_TRAIT(sincosf);
1123         XSIMD_HASSINCOS_TRAIT(__sincos);
1124         XSIMD_HASSINCOS_TRAIT(__sincosf);
1125
1126         struct generic_sincosf
1127         {
1128             template <class T>
1129             XSIMD_INLINE typename std::enable_if<XSIMD_HASSINCOS(sincosf, T), void>::type
1130             operator()(float val, T& s, T& c)
1131             {
1132                 sincosf(val, &s, &c);
1133             }
1134
1135             template <class T>
1136             XSIMD_INLINE typename std::enable_if<!XSIMD_HASSINCOS(sincosf, T) && XSIMD_HASSINCOS(__sincosf, T), void>::type
1137             operator()(float val, T& s, T& c)
1138             {
1139                 __sincosf(val, &s, &c);
1140             }
1141
1142             template <class T>
1143             XSIMD_INLINE typename std::enable_if<!XSIMD_HASSINCOS(sincosf, T) && !XSIMD_HASSINCOS(__sincosf, T), void>::type
1144             operator()(float val, T& s, T& c)
1145             {
1146                 s = std::sin(val);
1147                 c = std::cos(val);
1148             }
1149         };
1150
1151         struct generic_sincos
1152         {
1153             template <class T>
1154             XSIMD_INLINE typename std::enable_if<XSIMD_HASSINCOS(sincos, T), void>::type
1155             operator()(double val, T& s, T& c)
1156             {
1157                 sincos(val, &s, &c);
1158             }
1159
1160             template <class T>
1161             XSIMD_INLINE typename std::enable_if<!XSIMD_HASSINCOS(sincos, T) && XSIMD_HASSINCOS(__sincos, T), void>::type
1162             operator()(double val, T& s, T& c)
1163             {
1164                 __sincos(val, &s, &c);
1165             }
1166
1167             template <class T>
1168             XSIMD_INLINE typename std::enable_if<!XSIMD_HASSINCOS(sincos, T) && !XSIMD_HASSINCOS(__sincos, T), void>::type
1169             operator()(double val, T& s, T& c)
1170             {
1171                 s = std::sin(val);
1172                 c = std::cos(val);
1173             }
1174         };
1175
1176 #undef XSIMD_HASSINCOS_TRAIT
1177 #undef XSIMD_HASSINCOS
1178     }
1179
1180     XSIMD_INLINE std::pair<float, float> sincos(float val) noexcept
1181     {
1182         float s, c;
1183         detail::generic_sincosf {}(val, s, c);
1184         return std::make_pair(s, c);
1185     }
1186
1187     XSIMD_INLINE std::pair<double, double> sincos(double val) noexcept
1188     {
1189         double s, c;
1190         detail::generic_sincos {}(val, s, c);
1191         return std::make_pair(s, c);
1192     }
1193
1194     template <class T>
1195     XSIMD_INLINE std::pair<std::complex<T>, std::complex<T>>
1196     sincos(const std::complex<T>& val) noexcept
1197     {
1198         return std::make_pair(std::sin(val), std::cos(val));
1199     }
1200
1201 #ifdef XSIMD_ENABLE_XTL_COMPLEX
1202     template <class T>
1203     XSIMD_INLINE std::pair<xtl::xcomplex<T>, xtl::xcomplex<T>> sincos(const xtl::xcomplex<T>& val) noexcept
1204     {
1205         return std::make_pair(sin(val), cos(val));
1206     }
1207 #endif
1208
1209     template <class T, class _ = typename std::enable_if<std::is_floating_point<T>::value, void>::type>
1210     XSIMD_INLINE T frexp(T const& val, int& exp) noexcept
1211     {
1212         return std::frexp(val, &exp);
1213     }
1214
1215     template <class T>
1216     XSIMD_INLINE T select(bool cond, T const& true_br, T const& false_br) noexcept
1217     {
1218         return cond ? true_br : false_br;
1219     }
1220
1221 }
1222
1223 #endif