arch/generic/xsimd_generic_arithmetic.hpp

0001 /***************************************************************************
0002  * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and         *
0003  * Martin Renou                                                             *
0004  * Copyright (c) QuantStack                                                 *
0005  * Copyright (c) Serge Guelton                                              *
0006  *                                                                          *
0007  * Distributed under the terms of the BSD 3-Clause License.                 *
0008  *                                                                          *
0009  * The full license is in the file LICENSE, distributed with this software. *
0010  ****************************************************************************/
0011
0012 #ifndef XSIMD_GENERIC_ARITHMETIC_HPP
0013 #define XSIMD_GENERIC_ARITHMETIC_HPP
0014
0015 #include <complex>
0016 #include <limits>
0017 #include <type_traits>
0018
0019 #include "./xsimd_generic_details.hpp"
0020
0021 namespace xsimd
0022 {
0023
0024     namespace kernel
0025     {
0026
0027         using namespace types;
0028
0029         // bitwise_lshift
0030         template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
0031         XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
0032         {
0033             return detail::apply([](T x, T y) noexcept
0034                                  { return x << y; },
0035                                  self, other);
0036         }
0037
0038         // bitwise_rshift
0039         template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
0040         XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
0041         {
0042             return detail::apply([](T x, T y) noexcept
0043                                  { return x >> y; },
0044                                  self, other);
0045         }
0046
0047         // decr
0048         template <class A, class T>
0049         XSIMD_INLINE batch<T, A> decr(batch<T, A> const& self, requires_arch<generic>) noexcept
0050         {
0051             return self - T(1);
0052         }
0053
0054         // decr_if
0055         template <class A, class T, class Mask>
0056         XSIMD_INLINE batch<T, A> decr_if(batch<T, A> const& self, Mask const& mask, requires_arch<generic>) noexcept
0057         {
0058             return select(mask, decr(self), self);
0059         }
0060
0061         // div
0062         template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0063         XSIMD_INLINE batch<T, A> div(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
0064         {
0065             return detail::apply([](T x, T y) noexcept -> T
0066                                  { return x / y; },
0067                                  self, other);
0068         }
0069
0070         // fma
0071         template <class A, class T>
0072         XSIMD_INLINE batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) noexcept
0073         {
0074             return x * y + z;
0075         }
0076
0077         template <class A, class T>
0078         XSIMD_INLINE batch<std::complex<T>, A> fma(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
0079         {
0080             auto res_r = fms(x.real(), y.real(), fms(x.imag(), y.imag(), z.real()));
0081             auto res_i = fma(x.real(), y.imag(), fma(x.imag(), y.real(), z.imag()));
0082             return { res_r, res_i };
0083         }
0084
0085         // fms
0086         template <class A, class T>
0087         XSIMD_INLINE batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) noexcept
0088         {
0089             return x * y - z;
0090         }
0091
0092         template <class A, class T>
0093         XSIMD_INLINE batch<std::complex<T>, A> fms(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
0094         {
0095             auto res_r = fms(x.real(), y.real(), fma(x.imag(), y.imag(), z.real()));
0096             auto res_i = fma(x.real(), y.imag(), fms(x.imag(), y.real(), z.imag()));
0097             return { res_r, res_i };
0098         }
0099
0100         // fnma
0101         template <class A, class T>
0102         XSIMD_INLINE batch<T, A> fnma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) noexcept
0103         {
0104             return -x * y + z;
0105         }
0106
0107         template <class A, class T>
0108         XSIMD_INLINE batch<std::complex<T>, A> fnma(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
0109         {
0110             auto res_r = -fms(x.real(), y.real(), fma(x.imag(), y.imag(), z.real()));
0111             auto res_i = -fma(x.real(), y.imag(), fms(x.imag(), y.real(), z.imag()));
0112             return { res_r, res_i };
0113         }
0114
0115         // fnms
0116         template <class A, class T>
0117         XSIMD_INLINE batch<T, A> fnms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) noexcept
0118         {
0119             return -x * y - z;
0120         }
0121
0122         template <class A, class T>
0123         XSIMD_INLINE batch<std::complex<T>, A> fnms(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
0124         {
0125             auto res_r = -fms(x.real(), y.real(), fms(x.imag(), y.imag(), z.real()));
0126             auto res_i = -fma(x.real(), y.imag(), fma(x.imag(), y.real(), z.imag()));
0127             return { res_r, res_i };
0128         }
0129
0130         // hadd
0131         template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
0132         XSIMD_INLINE T hadd(batch<T, A> const& self, requires_arch<generic>) noexcept
0133         {
0134             alignas(A::alignment()) T buffer[batch<T, A>::size];
0135             self.store_aligned(buffer);
0136             T res = 0;
0137             for (T val : buffer)
0138             {
0139                 res += val;
0140             }
0141             return res;
0142         }
0143
0144         // incr
0145         template <class A, class T>
0146         XSIMD_INLINE batch<T, A> incr(batch<T, A> const& self, requires_arch<generic>) noexcept
0147         {
0148             return self + T(1);
0149         }
0150
0151         // incr_if
0152         template <class A, class T, class Mask>
0153         XSIMD_INLINE batch<T, A> incr_if(batch<T, A> const& self, Mask const& mask, requires_arch<generic>) noexcept
0154         {
0155             return select(mask, incr(self), self);
0156         }
0157
0158         // mul
0159         template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
0160         XSIMD_INLINE batch<T, A> mul(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
0161         {
0162             return detail::apply([](T x, T y) noexcept -> T
0163                                  { return x * y; },
0164                                  self, other);
0165         }
0166
0167         // rotl
0168         template <class A, class T, class STy>
0169         XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, STy other, requires_arch<generic>) noexcept
0170         {
0171             constexpr auto N = std::numeric_limits<T>::digits;
0172             return (self << other) | (self >> (N - other));
0173         }
0174
0175         // rotr
0176         template <class A, class T, class STy>
0177         XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, STy other, requires_arch<generic>) noexcept
0178         {
0179             constexpr auto N = std::numeric_limits<T>::digits;
0180             return (self >> other) | (self << (N - other));
0181         }
0182
0183         // sadd
0184         template <class A>
0185         XSIMD_INLINE batch<float, A> sadd(batch<float, A> const& self, batch<float, A> const& other, requires_arch<generic>) noexcept
0186         {
0187             return add(self, other); // no saturated arithmetic on floating point numbers
0188         }
0189         template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
0190         XSIMD_INLINE batch<T, A> sadd(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
0191         {
0192             if (std::is_signed<T>::value)
0193             {
0194                 auto mask = (other >> (8 * sizeof(T) - 1));
0195                 auto self_pos_branch = min(std::numeric_limits<T>::max() - other, self);
0196                 auto self_neg_branch = max(std::numeric_limits<T>::min() - other, self);
0197                 return other + select(batch_bool<T, A>(mask.data), self_neg_branch, self_pos_branch);
0198             }
0199             else
0200             {
0201                 const auto diffmax = std::numeric_limits<T>::max() - self;
0202                 const auto mindiff = min(diffmax, other);
0203                 return self + mindiff;
0204             }
0205         }
0206         template <class A>
0207         XSIMD_INLINE batch<double, A> sadd(batch<double, A> const& self, batch<double, A> const& other, requires_arch<generic>) noexcept
0208         {
0209             return add(self, other); // no saturated arithmetic on floating point numbers
0210         }
0211
0212         // ssub
0213         template <class A>
0214         XSIMD_INLINE batch<float, A> ssub(batch<float, A> const& self, batch<float, A> const& other, requires_arch<generic>) noexcept
0215         {
0216             return sub(self, other); // no saturated arithmetic on floating point numbers
0217         }
0218         template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
0219         XSIMD_INLINE batch<T, A> ssub(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
0220         {
0221             if (std::is_signed<T>::value)
0222             {
0223                 return sadd(self, -other);
0224             }
0225             else
0226             {
0227                 const auto diff = min(self, other);
0228                 return self - diff;
0229             }
0230         }
0231         template <class A>
0232         XSIMD_INLINE batch<double, A> ssub(batch<double, A> const& self, batch<double, A> const& other, requires_arch<generic>) noexcept
0233         {
0234             return sub(self, other); // no saturated arithmetic on floating point numbers
0235         }
0236
0237     }
0238
0239 }
0240
0241 #endif