File indexing completed on 2025-08-28 09:11:32
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #ifndef XSIMD_EMULATED_HPP
0013 #define XSIMD_EMULATED_HPP
0014
0015 #include <complex>
0016 #include <limits>
0017 #include <numeric>
0018 #include <type_traits>
0019
0020 #include "../arch/xsimd_scalar.hpp"
0021
0022 #include "../types/xsimd_emulated_register.hpp"
0023 #include "../types/xsimd_utils.hpp"
0024
0025 namespace xsimd
0026 {
0027 template <typename T, class A, bool... Values>
0028 struct batch_bool_constant;
0029
0030 template <class T_out, class T_in, class A>
0031 XSIMD_INLINE batch<T_out, A> bitwise_cast(batch<T_in, A> const& x) noexcept;
0032
0033 template <typename T, class A, T... Values>
0034 struct batch_constant;
0035
0036 namespace kernel
0037 {
0038 using namespace types;
0039
0040
0041 template <class A, class T, size_t I>
0042 XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<generic>) noexcept;
0043 template <class A, typename T, typename ITy, ITy... Indices>
0044 XSIMD_INLINE batch<T, A> shuffle(batch<T, A> const& x, batch<T, A> const& y, batch_constant<ITy, A, Indices...>, requires_arch<generic>) noexcept;
0045
0046 namespace detail
0047 {
0048 template <size_t I, class F, class... Bs>
0049 auto emulated_apply(F func, Bs const&... bs) -> decltype(func(bs.data[I]...))
0050 {
0051 return func(bs.data[I]...);
0052 }
0053
0054 template <class F, class B, class... Bs, size_t... Is>
0055 auto emulated_apply(F func, ::xsimd::detail::index_sequence<Is...>, B const& b, Bs const&... bs) -> std::array<decltype(func(b.data[0], bs.data[0]...)), B::size>
0056 {
0057 return { emulated_apply<Is>(func, b, bs...)... };
0058 }
0059
0060 template <class B, class F, class... Bs>
0061 auto emulated_apply(F func, B const& b, Bs const&... bs) -> std::array<decltype(func(b.data[0], bs.data[0]...)), B::size>
0062 {
0063 return emulated_apply(func, ::xsimd::detail::make_index_sequence<B::size>(), b, bs...);
0064 }
0065 }
0066
0067
0068 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0069 XSIMD_INLINE batch<T, A> abs(batch<T, A> const& self, requires_arch<emulated<N>>) noexcept
0070 {
0071 return detail::emulated_apply([](T v)
0072 { return xsimd::abs(v); },
0073 self);
0074 }
0075
0076
0077 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0078 XSIMD_INLINE batch<T, A> add(batch<T, A> const& self, batch<T, A> const& other, requires_arch<emulated<N>>) noexcept
0079 {
0080 return detail::emulated_apply([](T v0, T v1)
0081 { return xsimd::add(v0, v1); },
0082 self, other);
0083 }
0084
0085
0086 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0087 XSIMD_INLINE bool all(batch_bool<T, A> const& self, requires_arch<emulated<N>>) noexcept
0088 {
0089 return std::all_of(self.data.begin(), self.data.end(), [](T v)
0090 { return bool(v); });
0091 }
0092
0093
0094 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0095 XSIMD_INLINE bool any(batch_bool<T, A> const& self, requires_arch<emulated<N>>) noexcept
0096 {
0097 return std::any_of(self.data.begin(), self.data.end(), [](T v)
0098 { return bool(v); });
0099 }
0100
0101
0102 template <class A, class T_out, class T_in, size_t N = 8 * sizeof(T_in) * batch<T_in, A>::size>
0103 XSIMD_INLINE batch_bool<T_out, A> batch_bool_cast(batch_bool<T_in, A> const& self, batch_bool<T_out, A> const&, requires_arch<emulated<N>>) noexcept
0104 {
0105 return { self.data };
0106 }
0107
0108
0109 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0110 XSIMD_INLINE batch<T, A> bitwise_and(batch<T, A> const& self, batch<T, A> const& other, requires_arch<emulated<N>>) noexcept
0111 {
0112 return detail::emulated_apply([](T v0, T v1)
0113 { return xsimd::bitwise_and(v0, v1); },
0114 self, other);
0115 }
0116
0117 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0118 XSIMD_INLINE batch_bool<T, A> bitwise_and(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<emulated<N>>) noexcept
0119 {
0120 return detail::emulated_apply([](bool v0, bool v1)
0121 { return xsimd::bitwise_and(v0, v1); },
0122 self, other);
0123 }
0124
0125
0126 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0127 XSIMD_INLINE batch<T, A> bitwise_andnot(batch<T, A> const& self, batch<T, A> const& other, requires_arch<emulated<N>>) noexcept
0128 {
0129 return detail::emulated_apply([](T v0, T v1)
0130 { return xsimd::bitwise_andnot(v0, v1); },
0131 self, other);
0132 }
0133
0134 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0135 XSIMD_INLINE batch_bool<T, A> bitwise_andnot(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<emulated<N>>) noexcept
0136 {
0137 return detail::emulated_apply([](bool v0, bool v1)
0138 { return xsimd::bitwise_andnot(v0, v1); },
0139 self, other);
0140 }
0141
0142
0143 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0144 XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, int32_t other, requires_arch<emulated<N>>) noexcept
0145 {
0146 return detail::emulated_apply([other](T v)
0147 { return xsimd::bitwise_lshift(v, other); },
0148 self);
0149 }
0150
0151
0152 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0153 XSIMD_INLINE batch<T, A> bitwise_not(batch<T, A> const& self, requires_arch<emulated<N>>) noexcept
0154 {
0155 return detail::emulated_apply([](T v)
0156 { return xsimd::bitwise_not(v); },
0157 self);
0158 }
0159
0160 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0161 XSIMD_INLINE batch_bool<T, A> bitwise_not(batch_bool<T, A> const& self, requires_arch<emulated<N>>) noexcept
0162 {
0163 return detail::emulated_apply([](bool v)
0164 { return xsimd::bitwise_not(v); },
0165 self);
0166 }
0167
0168
0169 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0170 XSIMD_INLINE batch<T, A> bitwise_or(batch<T, A> const& self, batch<T, A> const& other, requires_arch<emulated<N>>) noexcept
0171 {
0172 return detail::emulated_apply([](T v0, T v1)
0173 { return xsimd::bitwise_or(v0, v1); },
0174 self, other);
0175 }
0176
0177 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0178 XSIMD_INLINE batch_bool<T, A> bitwise_or(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<emulated<N>>) noexcept
0179 {
0180 return detail::emulated_apply([](bool v0, bool v1)
0181 { return xsimd::bitwise_or(v0, v1); },
0182 self, other);
0183 }
0184
0185
0186 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0187 XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, int32_t other, requires_arch<emulated<N>>) noexcept
0188 {
0189 return detail::emulated_apply([other](T v)
0190 { return xsimd::bitwise_rshift(v, other); },
0191 self);
0192 }
0193
0194
0195 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0196 XSIMD_INLINE batch<T, A> bitwise_xor(batch<T, A> const& self, batch<T, A> const& other, requires_arch<emulated<N>>) noexcept
0197 {
0198 return detail::emulated_apply([](T v0, T v1)
0199 { return xsimd::bitwise_xor(v0, v1); },
0200 self, other);
0201 }
0202
0203 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0204 XSIMD_INLINE batch_bool<T, A> bitwise_xor(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<emulated<N>>) noexcept
0205 {
0206 return detail::emulated_apply([](bool v0, bool v1)
0207 { return xsimd::bitwise_xor(v0, v1); },
0208 self, other);
0209 }
0210
0211
0212 template <class A, class T_in, class T_out, size_t N = 8 * sizeof(T_in) * batch<T_in, A>::size>
0213 XSIMD_INLINE batch<T_out, A> bitwise_cast(batch<T_in, A> const& self, batch<T_out, A> const&, requires_arch<emulated<N>>) noexcept
0214 {
0215 constexpr size_t size = batch<T_out, A>::size;
0216 std::array<T_out, size> result;
0217 char* raw_data = reinterpret_cast<char*>(result.data());
0218 const char* raw_input = reinterpret_cast<const char*>(self.data.data());
0219 memcpy(raw_data, raw_input, size * sizeof(T_out));
0220 return result;
0221 }
0222
0223
0224 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0225 batch<T, A> XSIMD_INLINE broadcast(T val, requires_arch<emulated<N>>) noexcept
0226 {
0227 constexpr size_t size = batch<T, A>::size;
0228 std::array<T, size> r;
0229 std::fill(r.begin(), r.end(), val);
0230 return r;
0231 }
0232
0233 #if 0
0234
0235 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0236 XSIMD_INLINE size_t count(batch_bool<T, A> const& x, requires_arch<emulated<N>>) noexcept
0237 {
0238 uint64_t m = x.mask();
0239
0240 m = m - ((m >> 1) & (uint64_t) ~(uint64_t)0 / 3);
0241 m = (m & (uint64_t) ~(uint64_t)0 / 15 * 3) + ((m >> 2) & (uint64_t) ~(uint64_t)0 / 15 * 3);
0242 m = (m + (m >> 4)) & (uint64_t) ~(uint64_t)0 / 255 * 15;
0243 return (m * ((uint64_t) ~(uint64_t)0 / 255)) >> (sizeof(uint64_t) - 1) * CHAR_BIT;
0244 }
0245 #endif
0246
0247
0248 namespace detail
0249 {
0250
0251 template <class A, typename T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0252 XSIMD_INLINE batch<T, A> complex_low(batch<std::complex<T>, A> const& self, requires_arch<emulated<N>>) noexcept
0253 {
0254 constexpr size_t size = batch<T, A>::size;
0255 std::array<T, size> result;
0256 for (size_t i = 0; i < size / 2; ++i)
0257 {
0258 result[2 * i] = self.real().data[i];
0259 result[1 + 2 * i] = self.imag().data[i];
0260 }
0261 return result;
0262 }
0263
0264 template <class A, typename T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0265 XSIMD_INLINE batch<T, A> complex_high(batch<std::complex<T>, A> const& self, requires_arch<emulated<N>>) noexcept
0266 {
0267 constexpr size_t size = batch<T, A>::size;
0268 std::array<T, size> result;
0269 for (size_t i = 0; i < size / 2; ++i)
0270 {
0271 result[2 * i] = self.real().data[i + size / 2];
0272 result[1 + 2 * i] = self.imag().data[i + size / 2];
0273 }
0274 return result;
0275 }
0276 }
0277
0278
0279 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0280 XSIMD_INLINE batch<T, A> decr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<emulated<N>>) noexcept
0281 {
0282 return self - batch<T, A>(mask.data);
0283 }
0284
0285
0286 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0287 XSIMD_INLINE batch<T, A> div(batch<T, A> const& self, batch<T, A> const& other, requires_arch<emulated<N>>) noexcept
0288 {
0289 return detail::emulated_apply([](T v0, T v1)
0290 { return xsimd::div(v0, v1); },
0291 self, other);
0292 }
0293
0294
0295 namespace detail
0296 {
0297 template <class A, size_t N = 8 * sizeof(float) * batch<float, A>::size>
0298 XSIMD_INLINE batch<float, A> fast_cast(batch<int32_t, A> const& self, batch<float, A> const&, requires_arch<emulated<N>>) noexcept
0299 {
0300 return detail::emulated_apply([](int32_t v)
0301 { return float(v); },
0302 self);
0303 }
0304
0305 template <class A, size_t N = 8 * sizeof(float) * batch<float, A>::size>
0306 XSIMD_INLINE batch<float, A> fast_cast(batch<uint32_t, A> const& self, batch<float, A> const&, requires_arch<emulated<N>>) noexcept
0307 {
0308 return detail::emulated_apply([](uint32_t v)
0309 { return float(v); },
0310 self);
0311 }
0312
0313 template <class A, size_t N = 8 * sizeof(double) * batch<double, A>::size>
0314 XSIMD_INLINE batch<double, A> fast_cast(batch<int64_t, A> const& self, batch<double, A> const&, requires_arch<emulated<N>>) noexcept
0315 {
0316 return detail::emulated_apply([](int64_t v)
0317 { return double(v); },
0318 self);
0319 }
0320
0321 template <class A, size_t N = 8 * sizeof(double) * batch<double, A>::size>
0322 XSIMD_INLINE batch<double, A> fast_cast(batch<uint64_t, A> const& self, batch<double, A> const&, requires_arch<emulated<N>>) noexcept
0323 {
0324 return detail::emulated_apply([](uint64_t v)
0325 { return double(v); },
0326 self);
0327 }
0328
0329 template <class A, size_t N = 8 * sizeof(int32_t) * batch<int32_t, A>::size>
0330 XSIMD_INLINE batch<int32_t, A> fast_cast(batch<float, A> const& self, batch<int32_t, A> const&, requires_arch<emulated<N>>) noexcept
0331 {
0332 return detail::emulated_apply([](float v)
0333 { return int32_t(v); },
0334 self);
0335 }
0336
0337 template <class A, size_t N = 8 * sizeof(double) * batch<double, A>::size>
0338 XSIMD_INLINE batch<int64_t, A> fast_cast(batch<double, A> const& self, batch<int64_t, A> const&, requires_arch<emulated<N>>) noexcept
0339 {
0340 return detail::emulated_apply([](double v)
0341 { return int64_t(v); },
0342 self);
0343 }
0344 }
0345
0346
0347 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0348 XSIMD_INLINE batch_bool<T, emulated<N>> eq(batch<T, emulated<N>> const& self, batch<T, emulated<N>> const& other, requires_arch<emulated<N>>) noexcept
0349 {
0350 return detail::emulated_apply([](T v0, T v1)
0351 { return xsimd::eq(v0, v1); },
0352 self, other);
0353 }
0354
0355 template <class A, class T, size_t N = 8 * sizeof(T) * batch_bool<T, A>::size>
0356 XSIMD_INLINE batch_bool<T, emulated<N>> eq(batch_bool<T, emulated<N>> const& self, batch_bool<T, emulated<N>> const& other, requires_arch<emulated<N>>) noexcept
0357 {
0358 return detail::emulated_apply([](bool v0, bool v1)
0359 { return xsimd::eq(v0, v1); },
0360 self, other);
0361 }
0362
0363
0364 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0365 XSIMD_INLINE batch<T, A> from_bool(batch_bool<T, A> const& self, requires_arch<emulated<N>>) noexcept
0366 {
0367 return detail::emulated_apply([](bool v)
0368 { return T(v); },
0369 self);
0370 }
0371
0372
0373 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0374 XSIMD_INLINE batch_bool<T, A> from_mask(batch_bool<T, A> const&, uint64_t mask, requires_arch<emulated<N>>) noexcept
0375 {
0376 constexpr size_t size = batch<T, A>::size;
0377 std::array<bool, size> vmask;
0378 for (size_t i = 0; i < size; ++i)
0379 vmask[i] = (mask >> i) & 1u;
0380 return vmask;
0381 }
0382
0383
0384 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0385 XSIMD_INLINE batch_bool<T, emulated<N>> ge(batch<T, emulated<N>> const& self, batch<T, emulated<N>> const& other, requires_arch<emulated<N>>) noexcept
0386 {
0387 return detail::emulated_apply([](T v0, T v1)
0388 { return xsimd::ge(v0, v1); },
0389 self, other);
0390 }
0391
0392
0393 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0394 XSIMD_INLINE batch_bool<T, emulated<N>> gt(batch<T, emulated<N>> const& self, batch<T, emulated<N>> const& other, requires_arch<emulated<N>>) noexcept
0395 {
0396 return detail::emulated_apply([](T v0, T v1)
0397 { return xsimd::gt(v0, v1); },
0398 self, other);
0399 }
0400
0401
0402 template <class A, typename T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0403 XSIMD_INLINE batch<T, A> haddp(batch<T, A> const* row, requires_arch<emulated<N>>) noexcept
0404 {
0405 constexpr size_t size = batch<T, A>::size;
0406 std::array<T, size> r;
0407 for (size_t i = 0; i < size; ++i)
0408 r[i] = std::accumulate(row[i].data.begin() + 1, row[i].data.end(), row[i].data.front());
0409 return r;
0410 }
0411
0412
0413 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0414 XSIMD_INLINE batch<T, A> incr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<emulated<N>>) noexcept
0415 {
0416 return self + batch<T, A>(mask.data);
0417 }
0418
0419
0420 template <class A, class T, size_t I, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0421 XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<emulated<N>>) noexcept
0422 {
0423 batch<T, A> other = self;
0424 other.data[I] = val;
0425 return other;
0426 }
0427
0428
0429 template <class A, typename T, size_t N = 8 * sizeof(T) * batch<T, A>::size, class = typename std::enable_if<std::is_floating_point<T>::value, void>::type>
0430 XSIMD_INLINE batch_bool<T, A> isnan(batch<T, A> const& self, requires_arch<emulated<N>>) noexcept
0431 {
0432 return detail::emulated_apply([](T v)
0433 { return xsimd::isnan(v); },
0434 self);
0435 }
0436
0437
0438 template <class A, typename T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0439 XSIMD_INLINE batch<T, A> load_aligned(T const* mem, convert<T>, requires_arch<emulated<N>>) noexcept
0440 {
0441 constexpr size_t size = batch<T, A>::size;
0442 std::array<T, size> res;
0443 std::copy(mem, mem + size, res.begin());
0444 return res;
0445 }
0446
0447
0448 template <class A, typename T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0449 XSIMD_INLINE batch<T, A> load_unaligned(T const* mem, convert<T>, requires_arch<emulated<N>>) noexcept
0450 {
0451 constexpr size_t size = batch<T, A>::size;
0452 std::array<T, size> res;
0453 std::copy(mem, mem + size, res.begin());
0454 return res;
0455 }
0456
0457
0458 namespace detail
0459 {
0460 template <class A, typename T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0461 XSIMD_INLINE batch<std::complex<T>, A> load_complex(batch<T, A> const& hi, batch<T, A> const& lo, requires_arch<emulated<N>>) noexcept
0462 {
0463 constexpr size_t size = batch<T, A>::size;
0464 std::array<T, size> real, imag;
0465 for (size_t i = 0; i < size / 2; ++i)
0466 {
0467 real[i] = hi.data[2 * i];
0468 imag[i] = hi.data[1 + 2 * i];
0469 }
0470 for (size_t i = 0; i < size / 2; ++i)
0471 {
0472 real[size / 2 + i] = lo.data[2 * i];
0473 imag[size / 2 + i] = lo.data[1 + 2 * i];
0474 }
0475 return { real, imag };
0476 }
0477 }
0478
0479
0480 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0481 XSIMD_INLINE batch_bool<T, emulated<N>> le(batch<T, emulated<N>> const& self, batch<T, emulated<N>> const& other, requires_arch<emulated<N>>) noexcept
0482 {
0483 return detail::emulated_apply([](T v0, T v1)
0484 { return xsimd::le(v0, v1); },
0485 self, other);
0486 }
0487
0488
0489 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0490 XSIMD_INLINE batch_bool<T, emulated<N>> lt(batch<T, emulated<N>> const& self, batch<T, emulated<N>> const& other, requires_arch<emulated<N>>) noexcept
0491 {
0492 return detail::emulated_apply([](T v0, T v1)
0493 { return xsimd::lt(v0, v1); },
0494 self, other);
0495 }
0496
0497
0498 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0499 XSIMD_INLINE uint64_t mask(batch_bool<T, A> const& self, requires_arch<emulated<N>>) noexcept
0500 {
0501 constexpr size_t size = batch<T, A>::size;
0502 uint64_t res = 0;
0503 for (size_t i = 0; i < size; ++i)
0504 res |= (self.data[i] ? 1u : 0u) << i;
0505 return res;
0506 }
0507
0508
0509 template <class A, typename T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0510 XSIMD_INLINE batch<T, A> max(batch<T, A> const& self, batch<T, A> const& other, requires_arch<emulated<N>>) noexcept
0511 {
0512 return detail::emulated_apply([](T v0, T v1)
0513 { return xsimd::max(v0, v1); },
0514 self, other);
0515 }
0516
0517
0518 template <class A, typename T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0519 XSIMD_INLINE batch<T, A> min(batch<T, A> const& self, batch<T, A> const& other, requires_arch<emulated<N>>) noexcept
0520 {
0521 return detail::emulated_apply([](T v0, T v1)
0522 { return xsimd::min(v0, v1); },
0523 self, other);
0524 }
0525
0526
0527 template <class A, typename T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0528 XSIMD_INLINE batch<T, A> mul(batch<T, A> const& self, batch<T, A> const& other, requires_arch<emulated<N>>) noexcept
0529 {
0530 return detail::emulated_apply([](T v0, T v1)
0531 { return xsimd::mul(v0, v1); },
0532 self, other);
0533 }
0534
0535
0536 template <class A, typename T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0537 XSIMD_INLINE batch<as_integer_t<T>, A> nearbyint_as_int(batch<T, A> const& self,
0538 requires_arch<emulated<N>>) noexcept
0539 {
0540 return detail::emulated_apply([](T v)
0541 { return xsimd::nearbyint_as_int(v); },
0542 self);
0543 }
0544
0545
0546 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0547 XSIMD_INLINE batch<T, A> neg(batch<T, A> const& self, requires_arch<emulated<N>>) noexcept
0548 {
0549 return detail::emulated_apply([](T v)
0550 { return xsimd::neg(v); },
0551 self);
0552 }
0553
0554
0555 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0556 XSIMD_INLINE batch_bool<T, A> neq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<emulated<N>>) noexcept
0557 {
0558 return detail::emulated_apply([](T v0, T v1)
0559 { return xsimd::neq(v0, v1); },
0560 self, other);
0561 }
0562
0563 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0564 XSIMD_INLINE batch_bool<T, A> neq(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<emulated<N>>) noexcept
0565 {
0566 return detail::emulated_apply([](bool v0, bool v1)
0567 { return xsimd::neq(v0, v1); },
0568 self, other);
0569 }
0570
0571
0572 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0573 XSIMD_INLINE T reduce_add(batch<T, A> const& self, requires_arch<emulated<N>>) noexcept
0574 {
0575 constexpr size_t size = batch<T, A>::size;
0576 std::array<T, size> buffer;
0577 self.store_unaligned(buffer.data());
0578 return std::accumulate(buffer.begin() + 1, buffer.end(), *buffer.begin());
0579 }
0580
0581
0582 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0583 XSIMD_INLINE T reduce_max(batch<T, A> const& self, requires_arch<emulated<N>>) noexcept
0584 {
0585 return std::accumulate(self.data.begin() + 1, self.data.end(), *self.data.begin(), [](T const& x, T const& y)
0586 { return xsimd::max(x, y); });
0587 }
0588
0589
0590 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0591 XSIMD_INLINE T reduce_min(batch<T, A> const& self, requires_arch<emulated<N>>) noexcept
0592 {
0593 return std::accumulate(self.data.begin() + 1, self.data.end(), *self.data.begin(), [](T const& x, T const& y)
0594 { return xsimd::min(x, y); });
0595 }
0596
0597
0598 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0599 XSIMD_INLINE batch<T, A> rsqrt(batch<T, A> const& self, requires_arch<emulated<N>>) noexcept
0600 {
0601 return detail::emulated_apply([](T v)
0602 { return xsimd::rsqrt(v); },
0603 self);
0604 }
0605
0606
0607 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0608 XSIMD_INLINE batch<T, A> select(batch_bool<T, A> const& cond, batch<T, A> const& true_br, batch<T, A> const& false_br, requires_arch<emulated<N>>) noexcept
0609 {
0610 return detail::emulated_apply([](bool c, T t, T f)
0611 { return xsimd::select(c, t, f); },
0612 cond, true_br, false_br);
0613 }
0614
0615 template <class A, class T, bool... Values>
0616 XSIMD_INLINE batch<T, A> select(batch_bool_constant<T, A, Values...> const& cond, batch<T, A> const& true_br, batch<T, A> const& false_br, requires_arch<emulated<8 * sizeof(T) * batch<T, A>::size>>) noexcept
0617 {
0618 constexpr size_t size = batch<T, A>::size;
0619 static_assert(sizeof...(Values) == size, "consistent init");
0620 return select((batch_bool<T, A>)cond, true_br, false_br, emulated<8 * sizeof(T) * size> {});
0621 }
0622
0623
0624 template <class A, typename T, class ITy, ITy... Is>
0625 XSIMD_INLINE batch<T, A> shuffle(batch<T, A> const& x, batch<float, A> const& y, batch_constant<ITy, A, Is...> mask, requires_arch<emulated<batch<T, A>::size>>) noexcept
0626 {
0627 constexpr size_t size = batch<T, A>::size;
0628 batch<ITy, A> bmask = mask;
0629 std::array<T, size> res;
0630 for (size_t i = 0; i < size; ++i)
0631 res[i] = bmask.data[i] < size ? x.data[bmask.data[i]] : y.data[bmask.data[i] - size];
0632 return res;
0633 }
0634
0635
0636 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0637 XSIMD_INLINE batch<T, A> sqrt(batch<T, A> const& self, requires_arch<emulated<N>>) noexcept
0638 {
0639 return detail::emulated_apply([](T v)
0640 { return xsimd::sqrt(v); },
0641 self);
0642 }
0643
0644
0645 template <size_t M, class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0646 XSIMD_INLINE batch<T, A> slide_left(batch<T, A> const& x, requires_arch<emulated<N>>) noexcept
0647 {
0648 constexpr size_t size = batch<T, A>::size;
0649 std::array<T, size> result;
0650 char* raw_data = reinterpret_cast<char*>(result.data());
0651 memset(raw_data, 0, M);
0652 memcpy(raw_data + M, reinterpret_cast<const char*>(x.data.data()), sizeof(T) * result.size() - M);
0653 return result;
0654 }
0655
0656
0657 template <size_t M, class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0658 XSIMD_INLINE batch<T, A> slide_right(batch<T, A> const& x, requires_arch<emulated<N>>) noexcept
0659 {
0660 constexpr size_t size = batch<T, A>::size;
0661 std::array<T, size> result;
0662 char* raw_data = reinterpret_cast<char*>(result.data());
0663 memcpy(raw_data, reinterpret_cast<const char*>(x.data.data()) + M, sizeof(T) * result.size() - M);
0664 memset(raw_data + sizeof(T) * result.size() - M, 0, M);
0665 return result;
0666 }
0667
0668
0669 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0670 XSIMD_INLINE batch<T, A> sadd(batch<T, A> const& self, batch<T, A> const& other, requires_arch<emulated<N>>) noexcept
0671 {
0672 return detail::emulated_apply([](T v0, T v1)
0673 { return xsimd::sadd(v0, v1); },
0674 self, other);
0675 }
0676
0677
0678 template <class A, class T, size_t N, class... Values>
0679 XSIMD_INLINE batch<T, emulated<N>> set(batch<T, emulated<N>> const&, requires_arch<emulated<N>>, Values... values) noexcept
0680 {
0681 static_assert(sizeof...(Values) == batch<T, emulated<N>>::size, "consistent init");
0682 return { typename batch<T, emulated<N>>::register_type { static_cast<T>(values)... } };
0683 }
0684
0685 template <class A, class T, size_t N, class... Values>
0686 XSIMD_INLINE batch_bool<T, emulated<N>> set(batch_bool<T, emulated<N>> const&, requires_arch<emulated<N>>, Values... values) noexcept
0687 {
0688 static_assert(sizeof...(Values) == batch<T, emulated<N>>::size, "consistent init");
0689 return { std::array<bool, sizeof...(Values)> { static_cast<bool>(values)... } };
0690 }
0691
0692
0693 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0694 XSIMD_INLINE batch<T, A> ssub(batch<T, A> const& self, batch<T, A> const& other, requires_arch<emulated<N>>) noexcept
0695 {
0696 return detail::emulated_apply([](T v0, T v1)
0697 { return xsimd::ssub(v0, v1); },
0698 self, other);
0699 }
0700
0701
0702 template <class A, class T, size_t N>
0703 XSIMD_INLINE void store_aligned(T* mem, batch<T, emulated<N>> const& self, requires_arch<emulated<N>>) noexcept
0704 {
0705 std::copy(self.data.begin(), self.data.end(), mem);
0706 }
0707
0708
0709 template <class A, class T, size_t N>
0710 XSIMD_INLINE void store_unaligned(T* mem, batch<T, emulated<N>> const& self, requires_arch<emulated<N>>) noexcept
0711 {
0712 std::copy(self.data.begin(), self.data.end(), mem);
0713 }
0714
0715
0716 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0717 XSIMD_INLINE batch<T, A> sub(batch<T, A> const& self, batch<T, A> const& other, requires_arch<emulated<N>>) noexcept
0718 {
0719 return detail::emulated_apply([](T v0, T v1)
0720 { return xsimd::sub(v0, v1); },
0721 self, other);
0722 }
0723
0724
0725
0726 template <class A, typename T, class ITy, ITy... Is>
0727 XSIMD_INLINE batch<T, A> swizzle(batch<T, A> const& self, batch_constant<ITy, A, Is...> mask, requires_arch<emulated<8 * sizeof(T) * batch<T, A>::size>>) noexcept
0728 {
0729 constexpr size_t size = batch<T, A>::size;
0730 batch<ITy, A> bmask = mask;
0731 std::array<T, size> res;
0732 for (size_t i = 0; i < size; ++i)
0733 res[i] = self.data[bmask.data[i]];
0734 return res;
0735 }
0736
0737
0738 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0739 XSIMD_INLINE batch<T, A> zip_hi(batch<T, A> const& self, batch<T, A> const& other, requires_arch<emulated<N>>) noexcept
0740 {
0741 constexpr size_t size = batch<T, A>::size;
0742
0743 std::array<T, size> res;
0744 if (size % 2)
0745 {
0746 for (size_t i = 0; i < size; ++i)
0747 res[i] = (i % 2 ? self : other).data[size / 2 + i / 2];
0748 }
0749 else
0750 {
0751 for (size_t i = 0; i < size; ++i)
0752 res[i] = (i % 2 ? other : self).data[size / 2 + i / 2];
0753 }
0754 return res;
0755 }
0756
0757
0758 template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
0759 XSIMD_INLINE batch<T, A> zip_lo(batch<T, A> const& self, batch<T, A> const& other, requires_arch<emulated<N>>) noexcept
0760 {
0761 constexpr size_t size = batch<T, A>::size;
0762
0763 std::array<T, size> res;
0764 for (size_t i = 0; i < size; ++i)
0765 res[i] = (i % 2 ? other : self).data[i / 2];
0766 return res;
0767 }
0768 }
0769 }
0770
0771 #endif