File indexing completed on 2025-08-28 09:11:27
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #ifndef XSIMD_GENERIC_ARITHMETIC_HPP
0013 #define XSIMD_GENERIC_ARITHMETIC_HPP
0014
0015 #include <complex>
0016 #include <limits>
0017 #include <type_traits>
0018
0019 #include "./xsimd_generic_details.hpp"
0020
0021 namespace xsimd
0022 {
0023
0024 namespace kernel
0025 {
0026
0027 using namespace types;
0028
0029
0030 template <class A, class T, class >
0031 XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
0032 {
0033 return detail::apply([](T x, T y) noexcept
0034 { return x << y; },
0035 self, other);
0036 }
0037
0038
0039 template <class A, class T, class >
0040 XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
0041 {
0042 return detail::apply([](T x, T y) noexcept
0043 { return x >> y; },
0044 self, other);
0045 }
0046
0047
0048 template <class A, class T>
0049 XSIMD_INLINE batch<T, A> decr(batch<T, A> const& self, requires_arch<generic>) noexcept
0050 {
0051 return self - T(1);
0052 }
0053
0054
0055 template <class A, class T, class Mask>
0056 XSIMD_INLINE batch<T, A> decr_if(batch<T, A> const& self, Mask const& mask, requires_arch<generic>) noexcept
0057 {
0058 return select(mask, decr(self), self);
0059 }
0060
0061
0062 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0063 XSIMD_INLINE batch<T, A> div(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
0064 {
0065 return detail::apply([](T x, T y) noexcept -> T
0066 { return x / y; },
0067 self, other);
0068 }
0069
0070
0071 template <class A, class T>
0072 XSIMD_INLINE batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) noexcept
0073 {
0074 return x * y + z;
0075 }
0076
0077 template <class A, class T>
0078 XSIMD_INLINE batch<std::complex<T>, A> fma(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
0079 {
0080 auto res_r = fms(x.real(), y.real(), fms(x.imag(), y.imag(), z.real()));
0081 auto res_i = fma(x.real(), y.imag(), fma(x.imag(), y.real(), z.imag()));
0082 return { res_r, res_i };
0083 }
0084
0085
0086 template <class A, class T>
0087 XSIMD_INLINE batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) noexcept
0088 {
0089 return x * y - z;
0090 }
0091
0092 template <class A, class T>
0093 XSIMD_INLINE batch<std::complex<T>, A> fms(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
0094 {
0095 auto res_r = fms(x.real(), y.real(), fma(x.imag(), y.imag(), z.real()));
0096 auto res_i = fma(x.real(), y.imag(), fms(x.imag(), y.real(), z.imag()));
0097 return { res_r, res_i };
0098 }
0099
0100
0101 template <class A, class T>
0102 XSIMD_INLINE batch<T, A> fnma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) noexcept
0103 {
0104 return -x * y + z;
0105 }
0106
0107 template <class A, class T>
0108 XSIMD_INLINE batch<std::complex<T>, A> fnma(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
0109 {
0110 auto res_r = -fms(x.real(), y.real(), fma(x.imag(), y.imag(), z.real()));
0111 auto res_i = -fma(x.real(), y.imag(), fms(x.imag(), y.real(), z.imag()));
0112 return { res_r, res_i };
0113 }
0114
0115
0116 template <class A, class T>
0117 XSIMD_INLINE batch<T, A> fnms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<generic>) noexcept
0118 {
0119 return -x * y - z;
0120 }
0121
0122 template <class A, class T>
0123 XSIMD_INLINE batch<std::complex<T>, A> fnms(batch<std::complex<T>, A> const& x, batch<std::complex<T>, A> const& y, batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
0124 {
0125 auto res_r = -fms(x.real(), y.real(), fms(x.imag(), y.imag(), z.real()));
0126 auto res_i = -fma(x.real(), y.imag(), fma(x.imag(), y.real(), z.imag()));
0127 return { res_r, res_i };
0128 }
0129
0130
0131 template <class A, class T, class >
0132 XSIMD_INLINE T hadd(batch<T, A> const& self, requires_arch<generic>) noexcept
0133 {
0134 alignas(A::alignment()) T buffer[batch<T, A>::size];
0135 self.store_aligned(buffer);
0136 T res = 0;
0137 for (T val : buffer)
0138 {
0139 res += val;
0140 }
0141 return res;
0142 }
0143
0144
0145 template <class A, class T>
0146 XSIMD_INLINE batch<T, A> incr(batch<T, A> const& self, requires_arch<generic>) noexcept
0147 {
0148 return self + T(1);
0149 }
0150
0151
0152 template <class A, class T, class Mask>
0153 XSIMD_INLINE batch<T, A> incr_if(batch<T, A> const& self, Mask const& mask, requires_arch<generic>) noexcept
0154 {
0155 return select(mask, incr(self), self);
0156 }
0157
0158
0159 template <class A, class T, class >
0160 XSIMD_INLINE batch<T, A> mul(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
0161 {
0162 return detail::apply([](T x, T y) noexcept -> T
0163 { return x * y; },
0164 self, other);
0165 }
0166
0167
0168 template <class A, class T, class STy>
0169 XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, STy other, requires_arch<generic>) noexcept
0170 {
0171 constexpr auto N = std::numeric_limits<T>::digits;
0172 return (self << other) | (self >> (N - other));
0173 }
0174
0175
0176 template <class A, class T, class STy>
0177 XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, STy other, requires_arch<generic>) noexcept
0178 {
0179 constexpr auto N = std::numeric_limits<T>::digits;
0180 return (self >> other) | (self << (N - other));
0181 }
0182
0183
0184 template <class A>
0185 XSIMD_INLINE batch<float, A> sadd(batch<float, A> const& self, batch<float, A> const& other, requires_arch<generic>) noexcept
0186 {
0187 return add(self, other);
0188 }
0189 template <class A, class T, class >
0190 XSIMD_INLINE batch<T, A> sadd(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
0191 {
0192 if (std::is_signed<T>::value)
0193 {
0194 auto mask = (other >> (8 * sizeof(T) - 1));
0195 auto self_pos_branch = min(std::numeric_limits<T>::max() - other, self);
0196 auto self_neg_branch = max(std::numeric_limits<T>::min() - other, self);
0197 return other + select(batch_bool<T, A>(mask.data), self_neg_branch, self_pos_branch);
0198 }
0199 else
0200 {
0201 const auto diffmax = std::numeric_limits<T>::max() - self;
0202 const auto mindiff = min(diffmax, other);
0203 return self + mindiff;
0204 }
0205 }
0206 template <class A>
0207 XSIMD_INLINE batch<double, A> sadd(batch<double, A> const& self, batch<double, A> const& other, requires_arch<generic>) noexcept
0208 {
0209 return add(self, other);
0210 }
0211
0212
0213 template <class A>
0214 XSIMD_INLINE batch<float, A> ssub(batch<float, A> const& self, batch<float, A> const& other, requires_arch<generic>) noexcept
0215 {
0216 return sub(self, other);
0217 }
0218 template <class A, class T, class >
0219 XSIMD_INLINE batch<T, A> ssub(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
0220 {
0221 if (std::is_signed<T>::value)
0222 {
0223 return sadd(self, -other);
0224 }
0225 else
0226 {
0227 const auto diff = min(self, other);
0228 return self - diff;
0229 }
0230 }
0231 template <class A>
0232 XSIMD_INLINE batch<double, A> ssub(batch<double, A> const& self, batch<double, A> const& other, requires_arch<generic>) noexcept
0233 {
0234 return sub(self, other);
0235 }
0236
0237 }
0238
0239 }
0240
0241 #endif