File indexing completed on 2025-08-28 09:11:37
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #ifndef XSIMD_SVE_HPP
0014 #define XSIMD_SVE_HPP
0015
0016 #include <complex>
0017 #include <type_traits>
0018
0019 #include "../types/xsimd_sve_register.hpp"
0020
0021 namespace xsimd
0022 {
0023 template <typename T, class A, T... Values>
0024 struct batch_constant;
0025
0026 namespace kernel
0027 {
0028 namespace detail
0029 {
0030 using xsimd::index;
0031 using xsimd::types::detail::sve_vector_type;
0032
0033
0034 XSIMD_INLINE svbool_t sve_ptrue_impl(index<1>) noexcept { return svptrue_b8(); }
0035 XSIMD_INLINE svbool_t sve_ptrue_impl(index<2>) noexcept { return svptrue_b16(); }
0036 XSIMD_INLINE svbool_t sve_ptrue_impl(index<4>) noexcept { return svptrue_b32(); }
0037 XSIMD_INLINE svbool_t sve_ptrue_impl(index<8>) noexcept { return svptrue_b64(); }
0038
0039 template <class T>
0040 svbool_t sve_ptrue() noexcept { return sve_ptrue_impl(index<sizeof(T)> {}); }
0041
0042
0043 XSIMD_INLINE uint64_t sve_pcount_impl(svbool_t p, index<1>) noexcept { return svcntp_b8(p, p); }
0044 XSIMD_INLINE uint64_t sve_pcount_impl(svbool_t p, index<2>) noexcept { return svcntp_b16(p, p); }
0045 XSIMD_INLINE uint64_t sve_pcount_impl(svbool_t p, index<4>) noexcept { return svcntp_b32(p, p); }
0046 XSIMD_INLINE uint64_t sve_pcount_impl(svbool_t p, index<8>) noexcept { return svcntp_b64(p, p); }
0047
0048 template <class T>
0049 XSIMD_INLINE uint64_t sve_pcount(svbool_t p) noexcept { return sve_pcount_impl(p, index<sizeof(T)> {}); }
0050
0051
0052 template <class T>
0053 using sve_enable_signed_int_t = typename std::enable_if<std::is_integral<T>::value && std::is_signed<T>::value, int>::type;
0054
0055
0056 template <class T>
0057 using sve_enable_unsigned_int_t = typename std::enable_if<std::is_integral<T>::value && !std::is_signed<T>::value, int>::type;
0058
0059
0060 template <class T>
0061 using sve_enable_floating_point_t = typename std::enable_if<std::is_floating_point<T>::value, int>::type;
0062
0063
0064 template <class T>
0065 using sve_enable_signed_int_or_floating_point_t = typename std::enable_if<std::is_signed<T>::value, int>::type;
0066
0067
0068 template <class T>
0069 using sve_enable_all_t = typename std::enable_if<std::is_arithmetic<T>::value, int>::type;
0070 }
0071
0072
0073
0074
0075
0076 namespace detail
0077 {
0078
0079 using sve_fix_char_t_impl = typename std::conditional<std::is_signed<char>::value, int8_t, uint8_t>::type;
0080
0081 template <class T>
0082 using sve_fix_char_t = typename std::conditional<std::is_same<char, typename std::decay<T>::type>::value,
0083 sve_fix_char_t_impl, T>::type;
0084 }
0085
0086 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0087 XSIMD_INLINE batch<T, A> load_aligned(T const* src, convert<T>, requires_arch<sve>) noexcept
0088 {
0089 return svld1(detail::sve_ptrue<T>(), reinterpret_cast<detail::sve_fix_char_t<T> const*>(src));
0090 }
0091
0092 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0093 XSIMD_INLINE batch<T, A> load_unaligned(T const* src, convert<T>, requires_arch<sve>) noexcept
0094 {
0095 return load_aligned<A>(src, convert<T>(), sve {});
0096 }
0097
0098
0099 template <class A, class T, detail::sve_enable_floating_point_t<T> = 0>
0100 XSIMD_INLINE batch<std::complex<T>, A> load_complex_aligned(std::complex<T> const* mem, convert<std::complex<T>>, requires_arch<sve>) noexcept
0101 {
0102 const T* buf = reinterpret_cast<const T*>(mem);
0103 const auto tmp = svld2(detail::sve_ptrue<T>(), buf);
0104 const auto real = svget2(tmp, 0);
0105 const auto imag = svget2(tmp, 1);
0106 return batch<std::complex<T>, A> { real, imag };
0107 }
0108
0109 template <class A, class T, detail::sve_enable_floating_point_t<T> = 0>
0110 XSIMD_INLINE batch<std::complex<T>, A> load_complex_unaligned(std::complex<T> const* mem, convert<std::complex<T>>, requires_arch<sve>) noexcept
0111 {
0112 return load_complex_aligned<A>(mem, convert<std::complex<T>> {}, sve {});
0113 }
0114
0115
0116
0117
0118
0119 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0120 XSIMD_INLINE void store_aligned(T* dst, batch<T, A> const& src, requires_arch<sve>) noexcept
0121 {
0122 svst1(detail::sve_ptrue<T>(), reinterpret_cast<detail::sve_fix_char_t<T>*>(dst), src);
0123 }
0124
0125 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0126 XSIMD_INLINE void store_unaligned(T* dst, batch<T, A> const& src, requires_arch<sve>) noexcept
0127 {
0128 store_aligned<A>(dst, src, sve {});
0129 }
0130
0131
0132 template <class A, class T, detail::sve_enable_floating_point_t<T> = 0>
0133 XSIMD_INLINE void store_complex_aligned(std::complex<T>* dst, batch<std::complex<T>, A> const& src, requires_arch<sve>) noexcept
0134 {
0135 using v2type = typename std::conditional<(sizeof(T) == 4), svfloat32x2_t, svfloat64x2_t>::type;
0136 v2type tmp {};
0137 tmp = svset2(tmp, 0, src.real());
0138 tmp = svset2(tmp, 1, src.imag());
0139 T* buf = reinterpret_cast<T*>(dst);
0140 svst2(detail::sve_ptrue<T>(), buf, tmp);
0141 }
0142
0143 template <class A, class T, detail::sve_enable_floating_point_t<T> = 0>
0144 XSIMD_INLINE void store_complex_unaligned(std::complex<T>* dst, batch<std::complex<T>, A> const& src, requires_arch<sve>) noexcept
0145 {
0146 store_complex_aligned(dst, src, sve {});
0147 }
0148
0149
0150
0151
0152
0153 namespace detail
0154 {
0155 template <class T, class U>
0156 using sve_enable_sg_t = typename std::enable_if<(sizeof(T) == sizeof(U) && (sizeof(T) == 4 || sizeof(T) == 8)), int>::type;
0157 }
0158
0159
0160 template <class A, class T, class U, detail::sve_enable_sg_t<T, U> = 0>
0161 XSIMD_INLINE void scatter(batch<T, A> const& src, T* dst, batch<U, A> const& index, kernel::requires_arch<sve>) noexcept
0162 {
0163 svst1_scatter_index(detail::sve_ptrue<T>(), dst, index.data, src.data);
0164 }
0165
0166
0167 template <class A, class T, class U, detail::sve_enable_sg_t<T, U> = 0>
0168 XSIMD_INLINE batch<T, A> gather(batch<T, A> const&, T const* src, batch<U, A> const& index, kernel::requires_arch<sve>) noexcept
0169 {
0170 return svld1_gather_index(detail::sve_ptrue<T>(), src, index.data);
0171 }
0172
0173
0174
0175
0176
0177
0178 template <class A, class T, detail::enable_sized_unsigned_t<T, 1> = 0>
0179 XSIMD_INLINE batch<T, A> broadcast(T arg, requires_arch<sve>) noexcept
0180 {
0181 return svdup_n_u8(uint8_t(arg));
0182 }
0183
0184 template <class A, class T, detail::enable_sized_signed_t<T, 1> = 0>
0185 XSIMD_INLINE batch<T, A> broadcast(T arg, requires_arch<sve>) noexcept
0186 {
0187 return svdup_n_s8(int8_t(arg));
0188 }
0189
0190 template <class A, class T, detail::enable_sized_unsigned_t<T, 2> = 0>
0191 XSIMD_INLINE batch<T, A> broadcast(T arg, requires_arch<sve>) noexcept
0192 {
0193 return svdup_n_u16(uint16_t(arg));
0194 }
0195
0196 template <class A, class T, detail::enable_sized_signed_t<T, 2> = 0>
0197 XSIMD_INLINE batch<T, A> broadcast(T arg, requires_arch<sve>) noexcept
0198 {
0199 return svdup_n_s16(int16_t(arg));
0200 }
0201
0202 template <class A, class T, detail::enable_sized_unsigned_t<T, 4> = 0>
0203 XSIMD_INLINE batch<T, A> broadcast(T arg, requires_arch<sve>) noexcept
0204 {
0205 return svdup_n_u32(uint32_t(arg));
0206 }
0207
0208 template <class A, class T, detail::enable_sized_signed_t<T, 4> = 0>
0209 XSIMD_INLINE batch<T, A> broadcast(T arg, requires_arch<sve>) noexcept
0210 {
0211 return svdup_n_s32(int32_t(arg));
0212 }
0213
0214 template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
0215 XSIMD_INLINE batch<T, A> broadcast(T arg, requires_arch<sve>) noexcept
0216 {
0217 return svdup_n_u64(uint64_t(arg));
0218 }
0219
0220 template <class A, class T, detail::enable_sized_signed_t<T, 8> = 0>
0221 XSIMD_INLINE batch<T, A> broadcast(T arg, requires_arch<sve>) noexcept
0222 {
0223 return svdup_n_s64(int64_t(arg));
0224 }
0225
0226 template <class A>
0227 XSIMD_INLINE batch<float, A> broadcast(float arg, requires_arch<sve>) noexcept
0228 {
0229 return svdup_n_f32(arg);
0230 }
0231
0232 template <class A>
0233 XSIMD_INLINE batch<double, A> broadcast(double arg, requires_arch<sve>) noexcept
0234 {
0235 return svdup_n_f64(arg);
0236 }
0237
0238 template <class A, class T, detail::sve_enable_floating_point_t<T> = 0>
0239 XSIMD_INLINE batch<T, A> broadcast(T val, requires_arch<sve>) noexcept
0240 {
0241 return broadcast<sve>(val, sve {});
0242 }
0243
0244
0245
0246
0247
0248
0249 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0250 XSIMD_INLINE batch<T, A> add(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0251 {
0252 return svadd_x(detail::sve_ptrue<T>(), lhs, rhs);
0253 }
0254
0255
0256 template <class A, class T, detail::enable_integral_t<T> = 0>
0257 XSIMD_INLINE batch<T, A> sadd(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0258 {
0259 return svqadd(lhs, rhs);
0260 }
0261
0262
0263 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0264 XSIMD_INLINE batch<T, A> sub(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0265 {
0266 return svsub_x(detail::sve_ptrue<T>(), lhs, rhs);
0267 }
0268
0269
0270 template <class A, class T, detail::enable_integral_t<T> = 0>
0271 XSIMD_INLINE batch<T, A> ssub(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0272 {
0273 return svqsub(lhs, rhs);
0274 }
0275
0276
0277 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0278 XSIMD_INLINE batch<T, A> mul(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0279 {
0280 return svmul_x(detail::sve_ptrue<T>(), lhs, rhs);
0281 }
0282
0283
0284 template <class A, class T, typename std::enable_if<sizeof(T) >= 4, int>::type = 0>
0285 XSIMD_INLINE batch<T, A> div(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0286 {
0287 return svdiv_x(detail::sve_ptrue<T>(), lhs, rhs);
0288 }
0289
0290
0291 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0292 XSIMD_INLINE batch<T, A> max(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0293 {
0294 return svmax_x(detail::sve_ptrue<T>(), lhs, rhs);
0295 }
0296
0297
0298 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0299 XSIMD_INLINE batch<T, A> min(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0300 {
0301 return svmin_x(detail::sve_ptrue<T>(), lhs, rhs);
0302 }
0303
0304
0305 template <class A, class T, detail::enable_sized_unsigned_t<T, 1> = 0>
0306 XSIMD_INLINE batch<T, A> neg(batch<T, A> const& arg, requires_arch<sve>) noexcept
0307 {
0308 return svreinterpret_u8(svneg_x(detail::sve_ptrue<T>(), svreinterpret_s8(arg)));
0309 }
0310
0311 template <class A, class T, detail::enable_sized_unsigned_t<T, 2> = 0>
0312 XSIMD_INLINE batch<T, A> neg(batch<T, A> const& arg, requires_arch<sve>) noexcept
0313 {
0314 return svreinterpret_u16(svneg_x(detail::sve_ptrue<T>(), svreinterpret_s16(arg)));
0315 }
0316
0317 template <class A, class T, detail::enable_sized_unsigned_t<T, 4> = 0>
0318 XSIMD_INLINE batch<T, A> neg(batch<T, A> const& arg, requires_arch<sve>) noexcept
0319 {
0320 return svreinterpret_u32(svneg_x(detail::sve_ptrue<T>(), svreinterpret_s32(arg)));
0321 }
0322
0323 template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
0324 XSIMD_INLINE batch<T, A> neg(batch<T, A> const& arg, requires_arch<sve>) noexcept
0325 {
0326 return svreinterpret_u64(svneg_x(detail::sve_ptrue<T>(), svreinterpret_s64(arg)));
0327 }
0328
0329 template <class A, class T, detail::sve_enable_signed_int_or_floating_point_t<T> = 0>
0330 XSIMD_INLINE batch<T, A> neg(batch<T, A> const& arg, requires_arch<sve>) noexcept
0331 {
0332 return svneg_x(detail::sve_ptrue<T>(), arg);
0333 }
0334
0335
0336 template <class A, class T, detail::sve_enable_unsigned_int_t<T> = 0>
0337 XSIMD_INLINE batch<T, A> abs(batch<T, A> const& arg, requires_arch<sve>) noexcept
0338 {
0339 return arg;
0340 }
0341
0342 template <class A, class T, detail::sve_enable_signed_int_or_floating_point_t<T> = 0>
0343 XSIMD_INLINE batch<T, A> abs(batch<T, A> const& arg, requires_arch<sve>) noexcept
0344 {
0345 return svabs_x(detail::sve_ptrue<T>(), arg);
0346 }
0347
0348
0349 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0350 XSIMD_INLINE batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<sve>) noexcept
0351 {
0352 return svmad_x(detail::sve_ptrue<T>(), x, y, z);
0353 }
0354
0355
0356 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0357 XSIMD_INLINE batch<T, A> fnma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<sve>) noexcept
0358 {
0359 return svmsb_x(detail::sve_ptrue<T>(), x, y, z);
0360 }
0361
0362
0363 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0364 XSIMD_INLINE batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<sve>) noexcept
0365 {
0366 return -fnma(x, y, z, sve {});
0367 }
0368
0369
0370 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0371 XSIMD_INLINE batch<T, A> fnms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<sve>) noexcept
0372 {
0373 return -fma(x, y, z, sve {});
0374 }
0375
0376
0377
0378
0379
0380
0381 template <class A, class T, detail::enable_integral_t<T> = 0>
0382 XSIMD_INLINE batch<T, A> bitwise_and(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0383 {
0384 return svand_x(detail::sve_ptrue<T>(), lhs, rhs);
0385 }
0386
0387 template <class A>
0388 XSIMD_INLINE batch<float, A> bitwise_and(batch<float, A> const& lhs, batch<float, A> const& rhs, requires_arch<sve>) noexcept
0389 {
0390 const auto lhs_bits = svreinterpret_u32(lhs);
0391 const auto rhs_bits = svreinterpret_u32(rhs);
0392 const auto result_bits = svand_x(detail::sve_ptrue<float>(), lhs_bits, rhs_bits);
0393 return svreinterpret_f32(result_bits);
0394 }
0395
0396 template <class A>
0397 XSIMD_INLINE batch<double, A> bitwise_and(batch<double, A> const& lhs, batch<double, A> const& rhs, requires_arch<sve>) noexcept
0398 {
0399 const auto lhs_bits = svreinterpret_u64(lhs);
0400 const auto rhs_bits = svreinterpret_u64(rhs);
0401 const auto result_bits = svand_x(detail::sve_ptrue<double>(), lhs_bits, rhs_bits);
0402 return svreinterpret_f64(result_bits);
0403 }
0404
0405 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0406 XSIMD_INLINE batch_bool<T, A> bitwise_and(batch_bool<T, A> const& lhs, batch_bool<T, A> const& rhs, requires_arch<sve>) noexcept
0407 {
0408 return svand_z(detail::sve_ptrue<T>(), lhs, rhs);
0409 }
0410
0411
0412 template <class A, class T, detail::enable_integral_t<T> = 0>
0413 XSIMD_INLINE batch<T, A> bitwise_andnot(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0414 {
0415 return svbic_x(detail::sve_ptrue<T>(), lhs, rhs);
0416 }
0417
0418 template <class A>
0419 XSIMD_INLINE batch<float, A> bitwise_andnot(batch<float, A> const& lhs, batch<float, A> const& rhs, requires_arch<sve>) noexcept
0420 {
0421 const auto lhs_bits = svreinterpret_u32(lhs);
0422 const auto rhs_bits = svreinterpret_u32(rhs);
0423 const auto result_bits = svbic_x(detail::sve_ptrue<float>(), lhs_bits, rhs_bits);
0424 return svreinterpret_f32(result_bits);
0425 }
0426
0427 template <class A>
0428 XSIMD_INLINE batch<double, A> bitwise_andnot(batch<double, A> const& lhs, batch<double, A> const& rhs, requires_arch<sve>) noexcept
0429 {
0430 const auto lhs_bits = svreinterpret_u64(lhs);
0431 const auto rhs_bits = svreinterpret_u64(rhs);
0432 const auto result_bits = svbic_x(detail::sve_ptrue<double>(), lhs_bits, rhs_bits);
0433 return svreinterpret_f64(result_bits);
0434 }
0435
0436 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0437 XSIMD_INLINE batch_bool<T, A> bitwise_andnot(batch_bool<T, A> const& lhs, batch_bool<T, A> const& rhs, requires_arch<sve>) noexcept
0438 {
0439 return svbic_z(detail::sve_ptrue<T>(), lhs, rhs);
0440 }
0441
0442
0443 template <class A, class T, detail::enable_integral_t<T> = 0>
0444 XSIMD_INLINE batch<T, A> bitwise_or(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0445 {
0446 return svorr_x(detail::sve_ptrue<T>(), lhs, rhs);
0447 }
0448
0449 template <class A>
0450 XSIMD_INLINE batch<float, A> bitwise_or(batch<float, A> const& lhs, batch<float, A> const& rhs, requires_arch<sve>) noexcept
0451 {
0452 const auto lhs_bits = svreinterpret_u32(lhs);
0453 const auto rhs_bits = svreinterpret_u32(rhs);
0454 const auto result_bits = svorr_x(detail::sve_ptrue<float>(), lhs_bits, rhs_bits);
0455 return svreinterpret_f32(result_bits);
0456 }
0457
0458 template <class A>
0459 XSIMD_INLINE batch<double, A> bitwise_or(batch<double, A> const& lhs, batch<double, A> const& rhs, requires_arch<sve>) noexcept
0460 {
0461 const auto lhs_bits = svreinterpret_u64(lhs);
0462 const auto rhs_bits = svreinterpret_u64(rhs);
0463 const auto result_bits = svorr_x(detail::sve_ptrue<double>(), lhs_bits, rhs_bits);
0464 return svreinterpret_f64(result_bits);
0465 }
0466
0467 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0468 XSIMD_INLINE batch_bool<T, A> bitwise_or(batch_bool<T, A> const& lhs, batch_bool<T, A> const& rhs, requires_arch<sve>) noexcept
0469 {
0470 return svorr_z(detail::sve_ptrue<T>(), lhs, rhs);
0471 }
0472
0473
0474 template <class A, class T, detail::enable_integral_t<T> = 0>
0475 XSIMD_INLINE batch<T, A> bitwise_xor(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0476 {
0477 return sveor_x(detail::sve_ptrue<T>(), lhs, rhs);
0478 }
0479
0480 template <class A>
0481 XSIMD_INLINE batch<float, A> bitwise_xor(batch<float, A> const& lhs, batch<float, A> const& rhs, requires_arch<sve>) noexcept
0482 {
0483 const auto lhs_bits = svreinterpret_u32(lhs);
0484 const auto rhs_bits = svreinterpret_u32(rhs);
0485 const auto result_bits = sveor_x(detail::sve_ptrue<float>(), lhs_bits, rhs_bits);
0486 return svreinterpret_f32(result_bits);
0487 }
0488
0489 template <class A>
0490 XSIMD_INLINE batch<double, A> bitwise_xor(batch<double, A> const& lhs, batch<double, A> const& rhs, requires_arch<sve>) noexcept
0491 {
0492 const auto lhs_bits = svreinterpret_u64(lhs);
0493 const auto rhs_bits = svreinterpret_u64(rhs);
0494 const auto result_bits = sveor_x(detail::sve_ptrue<double>(), lhs_bits, rhs_bits);
0495 return svreinterpret_f64(result_bits);
0496 }
0497
0498 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0499 XSIMD_INLINE batch_bool<T, A> bitwise_xor(batch_bool<T, A> const& lhs, batch_bool<T, A> const& rhs, requires_arch<sve>) noexcept
0500 {
0501 return sveor_z(detail::sve_ptrue<T>(), lhs, rhs);
0502 }
0503
0504
0505 template <class A, class T, detail::enable_integral_t<T> = 0>
0506 XSIMD_INLINE batch<T, A> bitwise_not(batch<T, A> const& arg, requires_arch<sve>) noexcept
0507 {
0508 return svnot_x(detail::sve_ptrue<T>(), arg);
0509 }
0510
0511 template <class A>
0512 XSIMD_INLINE batch<float, A> bitwise_not(batch<float, A> const& arg, requires_arch<sve>) noexcept
0513 {
0514 const auto arg_bits = svreinterpret_u32(arg);
0515 const auto result_bits = svnot_x(detail::sve_ptrue<float>(), arg_bits);
0516 return svreinterpret_f32(result_bits);
0517 }
0518
0519 template <class A>
0520 XSIMD_INLINE batch<double, A> bitwise_not(batch<double, A> const& arg, requires_arch<sve>) noexcept
0521 {
0522 const auto arg_bits = svreinterpret_u64(arg);
0523 const auto result_bits = svnot_x(detail::sve_ptrue<double>(), arg_bits);
0524 return svreinterpret_f64(result_bits);
0525 }
0526
0527 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0528 XSIMD_INLINE batch_bool<T, A> bitwise_not(batch_bool<T, A> const& arg, requires_arch<sve>) noexcept
0529 {
0530 return svnot_z(detail::sve_ptrue<T>(), arg);
0531 }
0532
0533
0534
0535
0536
0537 namespace detail
0538 {
0539 template <class A, class T, class U>
0540 XSIMD_INLINE batch<U, A> sve_to_unsigned_batch_impl(batch<T, A> const& arg, index<1>) noexcept
0541 {
0542 return svreinterpret_u8(arg);
0543 }
0544
0545 template <class A, class T, class U>
0546 XSIMD_INLINE batch<U, A> sve_to_unsigned_batch_impl(batch<T, A> const& arg, index<2>) noexcept
0547 {
0548 return svreinterpret_u16(arg);
0549 }
0550
0551 template <class A, class T, class U>
0552 XSIMD_INLINE batch<U, A> sve_to_unsigned_batch_impl(batch<T, A> const& arg, index<4>) noexcept
0553 {
0554 return svreinterpret_u32(arg);
0555 }
0556
0557 template <class A, class T, class U>
0558 XSIMD_INLINE batch<U, A> sve_to_unsigned_batch_impl(batch<T, A> const& arg, index<8>) noexcept
0559 {
0560 return svreinterpret_u64(arg);
0561 }
0562
0563 template <class A, class T, class U = as_unsigned_integer_t<T>>
0564 XSIMD_INLINE batch<U, A> sve_to_unsigned_batch(batch<T, A> const& arg) noexcept
0565 {
0566 return sve_to_unsigned_batch_impl<A, T, U>(arg, index<sizeof(T)> {});
0567 }
0568 }
0569
0570
0571 template <class A, class T, detail::enable_integral_t<T> = 0>
0572 XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& arg, int n, requires_arch<sve>) noexcept
0573 {
0574 constexpr std::size_t size = sizeof(typename batch<T, A>::value_type) * 8;
0575 assert(0 <= n && static_cast<std::size_t>(n) < size && "index in bounds");
0576 return svlsl_x(detail::sve_ptrue<T>(), arg, n);
0577 }
0578
0579 template <class A, class T, detail::enable_integral_t<T> = 0>
0580 XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0581 {
0582 return svlsl_x(detail::sve_ptrue<T>(), lhs, detail::sve_to_unsigned_batch<A, T>(rhs));
0583 }
0584
0585
0586 template <class A, class T, detail::sve_enable_unsigned_int_t<T> = 0>
0587 XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& arg, int n, requires_arch<sve>) noexcept
0588 {
0589 constexpr std::size_t size = sizeof(typename batch<T, A>::value_type) * 8;
0590 assert(0 <= n && static_cast<std::size_t>(n) < size && "index in bounds");
0591 return svlsr_x(detail::sve_ptrue<T>(), arg, static_cast<T>(n));
0592 }
0593
0594 template <class A, class T, detail::sve_enable_unsigned_int_t<T> = 0>
0595 XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0596 {
0597 return svlsr_x(detail::sve_ptrue<T>(), lhs, rhs);
0598 }
0599
0600 template <class A, class T, detail::sve_enable_signed_int_t<T> = 0>
0601 XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& arg, int n, requires_arch<sve>) noexcept
0602 {
0603 constexpr std::size_t size = sizeof(typename batch<T, A>::value_type) * 8;
0604 assert(0 <= n && static_cast<std::size_t>(n) < size && "index in bounds");
0605 return svasr_x(detail::sve_ptrue<T>(), arg, static_cast<as_unsigned_integer_t<T>>(n));
0606 }
0607
0608 template <class A, class T, detail::sve_enable_signed_int_t<T> = 0>
0609 XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0610 {
0611 return svasr_x(detail::sve_ptrue<T>(), lhs, detail::sve_to_unsigned_batch<A, T>(rhs));
0612 }
0613
0614
0615
0616
0617
0618
0619 template <class A, class T, class V = typename batch<T, A>::value_type, detail::sve_enable_all_t<T> = 0>
0620 XSIMD_INLINE V reduce_add(batch<T, A> const& arg, requires_arch<sve>) noexcept
0621 {
0622
0623 return static_cast<V>(svaddv(detail::sve_ptrue<T>(), arg));
0624 }
0625
0626
0627 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0628 XSIMD_INLINE T reduce_max(batch<T, A> const& arg, requires_arch<sve>) noexcept
0629 {
0630 return svmaxv(detail::sve_ptrue<T>(), arg);
0631 }
0632
0633
0634 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0635 XSIMD_INLINE T reduce_min(batch<T, A> const& arg, requires_arch<sve>) noexcept
0636 {
0637 return svminv(detail::sve_ptrue<T>(), arg);
0638 }
0639
0640
0641 template <class A, class T, detail::sve_enable_floating_point_t<T> = 0>
0642 XSIMD_INLINE batch<T, A> haddp(const batch<T, A>* row, requires_arch<sve>) noexcept
0643 {
0644 constexpr std::size_t size = batch<T, A>::size;
0645 T sums[size];
0646 for (std::size_t i = 0; i < size; ++i)
0647 {
0648 sums[i] = reduce_add(row[i], sve {});
0649 }
0650 return svld1(detail::sve_ptrue<T>(), sums);
0651 }
0652
0653
0654
0655
0656
0657
0658 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0659 XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0660 {
0661 return svcmpeq(detail::sve_ptrue<T>(), lhs, rhs);
0662 }
0663
0664 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0665 XSIMD_INLINE batch_bool<T, A> eq(batch_bool<T, A> const& lhs, batch_bool<T, A> const& rhs, requires_arch<sve>) noexcept
0666 {
0667 const auto neq_result = sveor_z(detail::sve_ptrue<T>(), lhs, rhs);
0668 return svnot_z(detail::sve_ptrue<T>(), neq_result);
0669 }
0670
0671
0672 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0673 XSIMD_INLINE batch_bool<T, A> neq(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0674 {
0675 return svcmpne(detail::sve_ptrue<T>(), lhs, rhs);
0676 }
0677
0678 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0679 XSIMD_INLINE batch_bool<T, A> neq(batch_bool<T, A> const& lhs, batch_bool<T, A> const& rhs, requires_arch<sve>) noexcept
0680 {
0681 return sveor_z(detail::sve_ptrue<T>(), lhs, rhs);
0682 }
0683
0684
0685 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0686 XSIMD_INLINE batch_bool<T, A> lt(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0687 {
0688 return svcmplt(detail::sve_ptrue<T>(), lhs, rhs);
0689 }
0690
0691
0692 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0693 XSIMD_INLINE batch_bool<T, A> le(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0694 {
0695 return svcmple(detail::sve_ptrue<T>(), lhs, rhs);
0696 }
0697
0698
0699 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0700 XSIMD_INLINE batch_bool<T, A> gt(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0701 {
0702 return svcmpgt(detail::sve_ptrue<T>(), lhs, rhs);
0703 }
0704
0705
0706 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0707 XSIMD_INLINE batch_bool<T, A> ge(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0708 {
0709 return svcmpge(detail::sve_ptrue<T>(), lhs, rhs);
0710 }
0711
0712
0713
0714
0715
0716
0717 template <size_t N, class A, class T, detail::sve_enable_all_t<T> = 0>
0718 XSIMD_INLINE batch<T, A> rotate_left(batch<T, A> const& a, requires_arch<sve>) noexcept
0719 {
0720 return svext(a, a, N);
0721 }
0722
0723
0724 template <class A, class T, class I>
0725 XSIMD_INLINE batch<T, A> swizzle(batch<T, A> const& arg, batch<I, A> indices, requires_arch<sve>) noexcept
0726 {
0727 return svtbl(arg, indices);
0728 }
0729
0730 template <class A, class T, class I>
0731 XSIMD_INLINE batch<std::complex<T>, A> swizzle(batch<std::complex<T>, A> const& self,
0732 batch<I, A> indices,
0733 requires_arch<sve>) noexcept
0734 {
0735 const auto real = swizzle(self.real(), indices, sve {});
0736 const auto imag = swizzle(self.imag(), indices, sve {});
0737 return batch<std::complex<T>>(real, imag);
0738 }
0739
0740
0741 template <class A, class T, class I, I... idx>
0742 XSIMD_INLINE batch<T, A> swizzle(batch<T, A> const& arg, batch_constant<I, A, idx...> indices, requires_arch<sve>) noexcept
0743 {
0744 static_assert(batch<T, A>::size == sizeof...(idx), "invalid swizzle indices");
0745 return swizzle(arg, indices.as_batch(), sve {});
0746 }
0747
0748 template <class A, class T, class I, I... idx>
0749 XSIMD_INLINE batch<std::complex<T>, A> swizzle(batch<std::complex<T>, A> const& arg,
0750 batch_constant<I, A, idx...> indices,
0751 requires_arch<sve>) noexcept
0752 {
0753 static_assert(batch<std::complex<T>, A>::size == sizeof...(idx), "invalid swizzle indices");
0754 return swizzle(arg, indices.as_batch(), sve {});
0755 }
0756
0757
0758
0759
0760
0761
0762 namespace detail
0763 {
0764 template <class A, class T>
0765 XSIMD_INLINE batch<T, A> sve_extract_pair(batch<T, A> const&, batch<T, A> const& , std::size_t, ::xsimd::detail::index_sequence<>) noexcept
0766 {
0767 assert(false && "extract_pair out of bounds");
0768 return batch<T, A> {};
0769 }
0770
0771 template <class A, class T, size_t I, size_t... Is>
0772 XSIMD_INLINE batch<T, A> sve_extract_pair(batch<T, A> const& lhs, batch<T, A> const& rhs, std::size_t n, ::xsimd::detail::index_sequence<I, Is...>) noexcept
0773 {
0774 if (n == I)
0775 {
0776 return svext(rhs, lhs, I);
0777 }
0778 else
0779 {
0780 return sve_extract_pair(lhs, rhs, n, ::xsimd::detail::index_sequence<Is...>());
0781 }
0782 }
0783
0784 template <class A, class T, size_t... Is>
0785 XSIMD_INLINE batch<T, A> sve_extract_pair_impl(batch<T, A> const& lhs, batch<T, A> const& rhs, std::size_t n, ::xsimd::detail::index_sequence<0, Is...>) noexcept
0786 {
0787 if (n == 0)
0788 {
0789 return rhs;
0790 }
0791 else
0792 {
0793 return sve_extract_pair(lhs, rhs, n, ::xsimd::detail::index_sequence<Is...>());
0794 }
0795 }
0796 }
0797
0798 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0799 XSIMD_INLINE batch<T, A> extract_pair(batch<T, A> const& lhs, batch<T, A> const& rhs, std::size_t n, requires_arch<sve>) noexcept
0800 {
0801 constexpr std::size_t size = batch<T, A>::size;
0802 assert(n < size && "index in bounds");
0803 return detail::sve_extract_pair_impl(lhs, rhs, n, ::xsimd::detail::make_index_sequence<size>());
0804 }
0805
0806
0807 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0808 XSIMD_INLINE batch<T, A> select(batch_bool<T, A> const& cond, batch<T, A> const& a, batch<T, A> const& b, requires_arch<sve>) noexcept
0809 {
0810 return svsel(cond, a, b);
0811 }
0812
0813 template <class A, class T, bool... b>
0814 XSIMD_INLINE batch<T, A> select(batch_bool_constant<T, A, b...> const&, batch<T, A> const& true_br, batch<T, A> const& false_br, requires_arch<sve>) noexcept
0815 {
0816 return select(batch_bool<T, A> { b... }, true_br, false_br, sve {});
0817 }
0818
0819
0820 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0821 XSIMD_INLINE batch<T, A> zip_lo(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0822 {
0823 return svzip1(lhs, rhs);
0824 }
0825
0826
0827 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0828 XSIMD_INLINE batch<T, A> zip_hi(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<sve>) noexcept
0829 {
0830 return svzip2(lhs, rhs);
0831 }
0832
0833
0834
0835
0836
0837
0838 template <class A, class T, detail::sve_enable_floating_point_t<T> = 0>
0839 XSIMD_INLINE batch<T, A> rsqrt(batch<T, A> const& arg, requires_arch<sve>) noexcept
0840 {
0841 return svrsqrte(arg);
0842 }
0843
0844
0845 template <class A, class T, detail::sve_enable_floating_point_t<T> = 0>
0846 XSIMD_INLINE batch<T, A> sqrt(batch<T, A> const& arg, requires_arch<sve>) noexcept
0847 {
0848 return svsqrt_x(detail::sve_ptrue<T>(), arg);
0849 }
0850
0851
0852 template <class A, class T, detail::sve_enable_floating_point_t<T> = 0>
0853 XSIMD_INLINE batch<T, A> reciprocal(const batch<T, A>& arg, requires_arch<sve>) noexcept
0854 {
0855 return svrecpe(arg);
0856 }
0857
0858
0859
0860
0861
0862
0863 namespace detail
0864 {
0865 template <class A, class T, detail::enable_sized_integral_t<T, 4> = 0>
0866 XSIMD_INLINE batch<float, A> fast_cast(batch<T, A> const& arg, batch<float, A> const&, requires_arch<sve>) noexcept
0867 {
0868 return svcvt_f32_x(detail::sve_ptrue<T>(), arg);
0869 }
0870
0871 template <class A, class T, detail::enable_sized_integral_t<T, 8> = 0>
0872 XSIMD_INLINE batch<double, A> fast_cast(batch<T, A> const& arg, batch<double, A> const&, requires_arch<sve>) noexcept
0873 {
0874 return svcvt_f64_x(detail::sve_ptrue<T>(), arg);
0875 }
0876
0877 template <class A>
0878 XSIMD_INLINE batch<int32_t, A> fast_cast(batch<float, A> const& arg, batch<int32_t, A> const&, requires_arch<sve>) noexcept
0879 {
0880 return svcvt_s32_x(detail::sve_ptrue<float>(), arg);
0881 }
0882
0883 template <class A>
0884 XSIMD_INLINE batch<uint32_t, A> fast_cast(batch<float, A> const& arg, batch<uint32_t, A> const&, requires_arch<sve>) noexcept
0885 {
0886 return svcvt_u32_x(detail::sve_ptrue<float>(), arg);
0887 }
0888
0889 template <class A>
0890 XSIMD_INLINE batch<int64_t, A> fast_cast(batch<double, A> const& arg, batch<int64_t, A> const&, requires_arch<sve>) noexcept
0891 {
0892 return svcvt_s64_x(detail::sve_ptrue<double>(), arg);
0893 }
0894
0895 template <class A>
0896 XSIMD_INLINE batch<uint64_t, A> fast_cast(batch<double, A> const& arg, batch<uint64_t, A> const&, requires_arch<sve>) noexcept
0897 {
0898 return svcvt_u64_x(detail::sve_ptrue<double>(), arg);
0899 }
0900 }
0901
0902
0903
0904
0905
0906
0907 template <class A, class T, class... Args>
0908 XSIMD_INLINE batch<T, A> set(batch<T, A> const&, requires_arch<sve>, Args... args) noexcept
0909 {
0910 return detail::sve_vector_type<T> { args... };
0911 }
0912
0913 template <class A, class T, class... Args>
0914 XSIMD_INLINE batch<std::complex<T>, A> set(batch<std::complex<T>, A> const&, requires_arch<sve>,
0915 Args... args_complex) noexcept
0916 {
0917 return batch<std::complex<T>>(detail::sve_vector_type<T> { args_complex.real()... },
0918 detail::sve_vector_type<T> { args_complex.imag()... });
0919 }
0920
0921 template <class A, class T, class... Args>
0922 XSIMD_INLINE batch_bool<T, A> set(batch_bool<T, A> const&, requires_arch<sve>, Args... args) noexcept
0923 {
0924 using U = as_unsigned_integer_t<T>;
0925 const auto values = detail::sve_vector_type<U> { static_cast<U>(args)... };
0926 const auto zero = broadcast<A, U>(static_cast<U>(0), sve {});
0927 return svcmpne(detail::sve_ptrue<T>(), values, zero);
0928 }
0929
0930
0931 namespace detail
0932 {
0933
0934 XSIMD_INLINE svuint8_t sve_iota_impl(index<1>) noexcept { return svindex_u8(0, 1); }
0935 XSIMD_INLINE svuint16_t sve_iota_impl(index<2>) noexcept { return svindex_u16(0, 1); }
0936 XSIMD_INLINE svuint32_t sve_iota_impl(index<4>) noexcept { return svindex_u32(0, 1); }
0937 XSIMD_INLINE svuint64_t sve_iota_impl(index<8>) noexcept { return svindex_u64(0, 1); }
0938
0939 template <class T, class V = sve_vector_type<as_unsigned_integer_t<T>>>
0940 XSIMD_INLINE V sve_iota() noexcept { return sve_iota_impl(index<sizeof(T)> {}); }
0941 }
0942
0943 template <class A, class T, size_t I, detail::sve_enable_all_t<T> = 0>
0944 XSIMD_INLINE batch<T, A> insert(batch<T, A> const& arg, T val, index<I>, requires_arch<sve>) noexcept
0945 {
0946
0947 const auto iota = detail::sve_iota<T>();
0948 const auto index_predicate = svcmpeq(detail::sve_ptrue<T>(), iota, static_cast<as_unsigned_integer_t<T>>(I));
0949 return svsel(index_predicate, broadcast<A, T>(val, sve {}), arg);
0950 }
0951
0952
0953 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0954 XSIMD_INLINE bool all(batch_bool<T, A> const& arg, requires_arch<sve>) noexcept
0955 {
0956 return detail::sve_pcount<T>(arg) == batch_bool<T, A>::size;
0957 }
0958
0959
0960 template <class A, class T, detail::sve_enable_all_t<T> = 0>
0961 XSIMD_INLINE bool any(batch_bool<T, A> const& arg, requires_arch<sve>) noexcept
0962 {
0963 return svptest_any(arg, arg);
0964 }
0965
0966
0967 template <class A, class T, class R, detail::sve_enable_all_t<T> = 0, detail::enable_sized_unsigned_t<R, 1> = 0>
0968 XSIMD_INLINE batch<R, A> bitwise_cast(batch<T, A> const& arg, batch<R, A> const&, requires_arch<sve>) noexcept
0969 {
0970 return svreinterpret_u8(arg);
0971 }
0972
0973 template <class A, class T, class R, detail::sve_enable_all_t<T> = 0, detail::enable_sized_signed_t<R, 1> = 0>
0974 XSIMD_INLINE batch<R, A> bitwise_cast(batch<T, A> const& arg, batch<R, A> const&, requires_arch<sve>) noexcept
0975 {
0976 return svreinterpret_s8(arg);
0977 }
0978
0979 template <class A, class T, class R, detail::sve_enable_all_t<T> = 0, detail::enable_sized_unsigned_t<R, 2> = 0>
0980 XSIMD_INLINE batch<R, A> bitwise_cast(batch<T, A> const& arg, batch<R, A> const&, requires_arch<sve>) noexcept
0981 {
0982 return svreinterpret_u16(arg);
0983 }
0984
0985 template <class A, class T, class R, detail::sve_enable_all_t<T> = 0, detail::enable_sized_signed_t<R, 2> = 0>
0986 XSIMD_INLINE batch<R, A> bitwise_cast(batch<T, A> const& arg, batch<R, A> const&, requires_arch<sve>) noexcept
0987 {
0988 return svreinterpret_s16(arg);
0989 }
0990
0991 template <class A, class T, class R, detail::sve_enable_all_t<T> = 0, detail::enable_sized_unsigned_t<R, 4> = 0>
0992 XSIMD_INLINE batch<R, A> bitwise_cast(batch<T, A> const& arg, batch<R, A> const&, requires_arch<sve>) noexcept
0993 {
0994 return svreinterpret_u32(arg);
0995 }
0996
0997 template <class A, class T, class R, detail::sve_enable_all_t<T> = 0, detail::enable_sized_signed_t<R, 4> = 0>
0998 XSIMD_INLINE batch<R, A> bitwise_cast(batch<T, A> const& arg, batch<R, A> const&, requires_arch<sve>) noexcept
0999 {
1000 return svreinterpret_s32(arg);
1001 }
1002
1003 template <class A, class T, class R, detail::sve_enable_all_t<T> = 0, detail::enable_sized_unsigned_t<R, 8> = 0>
1004 XSIMD_INLINE batch<R, A> bitwise_cast(batch<T, A> const& arg, batch<R, A> const&, requires_arch<sve>) noexcept
1005 {
1006 return svreinterpret_u64(arg);
1007 }
1008
1009 template <class A, class T, class R, detail::sve_enable_all_t<T> = 0, detail::enable_sized_signed_t<R, 8> = 0>
1010 XSIMD_INLINE batch<R, A> bitwise_cast(batch<T, A> const& arg, batch<R, A> const&, requires_arch<sve>) noexcept
1011 {
1012 return svreinterpret_s64(arg);
1013 }
1014
1015 template <class A, class T, detail::sve_enable_all_t<T> = 0>
1016 XSIMD_INLINE batch<float, A> bitwise_cast(batch<T, A> const& arg, batch<float, A> const&, requires_arch<sve>) noexcept
1017 {
1018 return svreinterpret_f32(arg);
1019 }
1020
1021 template <class A, class T, detail::sve_enable_all_t<T> = 0>
1022 XSIMD_INLINE batch<double, A> bitwise_cast(batch<T, A> const& arg, batch<double, A> const&, requires_arch<sve>) noexcept
1023 {
1024 return svreinterpret_f64(arg);
1025 }
1026
1027
1028 template <class A, class T_out, class T_in, detail::sve_enable_all_t<T_in> = 0>
1029 XSIMD_INLINE batch_bool<T_out, A> batch_bool_cast(batch_bool<T_in, A> const& arg, batch_bool<T_out, A> const&, requires_arch<sve>) noexcept
1030 {
1031 return arg.data;
1032 }
1033
1034
1035 template <class A, class T, detail::sve_enable_all_t<T> = 0>
1036 XSIMD_INLINE batch<T, A> from_bool(batch_bool<T, A> const& arg, requires_arch<sve>) noexcept
1037 {
1038 return select(arg, batch<T, A>(1), batch<T, A>(0));
1039 }
1040
1041
1042 namespace detail
1043 {
1044 template <size_t N>
1045 struct sve_slider_left
1046 {
1047 template <class A, class T>
1048 XSIMD_INLINE batch<T, A> operator()(batch<T, A> const& arg) noexcept
1049 {
1050 using u8_vector = batch<uint8_t, A>;
1051 const auto left = svdup_n_u8(0);
1052 const auto right = bitwise_cast(arg, u8_vector {}, sve {}).data;
1053 const u8_vector result(svext(left, right, u8_vector::size - N));
1054 return bitwise_cast(result, batch<T, A> {}, sve {});
1055 }
1056 };
1057
1058 template <>
1059 struct sve_slider_left<0>
1060 {
1061 template <class A, class T>
1062 XSIMD_INLINE batch<T, A> operator()(batch<T, A> const& arg) noexcept
1063 {
1064 return arg;
1065 }
1066 };
1067 }
1068
1069 template <size_t N, class A, class T, detail::sve_enable_all_t<T> = 0>
1070 XSIMD_INLINE batch<T, A> slide_left(batch<T, A> const& arg, requires_arch<sve>) noexcept
1071 {
1072 return detail::sve_slider_left<N>()(arg);
1073 }
1074
1075
1076 namespace detail
1077 {
1078 template <size_t N>
1079 struct sve_slider_right
1080 {
1081 template <class A, class T>
1082 XSIMD_INLINE batch<T, A> operator()(batch<T, A> const& arg) noexcept
1083 {
1084 using u8_vector = batch<uint8_t, A>;
1085 const auto left = bitwise_cast(arg, u8_vector {}, sve {}).data;
1086 const auto right = svdup_n_u8(0);
1087 const u8_vector result(svext(left, right, N));
1088 return bitwise_cast(result, batch<T, A> {}, sve {});
1089 }
1090 };
1091
1092 template <>
1093 struct sve_slider_right<batch<uint8_t, sve>::size>
1094 {
1095 template <class A, class T>
1096 XSIMD_INLINE batch<T, A> operator()(batch<T, A> const&) noexcept
1097 {
1098 return batch<T, A> {};
1099 }
1100 };
1101 }
1102
1103 template <size_t N, class A, class T, detail::sve_enable_all_t<T> = 0>
1104 XSIMD_INLINE batch<T, A> slide_right(batch<T, A> const& arg, requires_arch<sve>) noexcept
1105 {
1106 return detail::sve_slider_right<N>()(arg);
1107 }
1108
1109
1110 template <class A, class T, detail::sve_enable_floating_point_t<T> = 0>
1111 XSIMD_INLINE batch_bool<T, A> isnan(batch<T, A> const& arg, requires_arch<sve>) noexcept
1112 {
1113 return !(arg == arg);
1114 }
1115
1116
1117 template <class A, class T, detail::sve_enable_floating_point_t<T> = 0>
1118 XSIMD_INLINE batch<T, A> nearbyint(batch<T, A> const& arg, requires_arch<sve>) noexcept
1119 {
1120 return svrintx_x(detail::sve_ptrue<T>(), arg);
1121 }
1122
1123
1124 template <class A>
1125 XSIMD_INLINE batch<int32_t, A> nearbyint_as_int(batch<float, A> const& arg, requires_arch<sve>) noexcept
1126 {
1127 const auto nearest = svrintx_x(detail::sve_ptrue<float>(), arg);
1128 return svcvt_s32_x(detail::sve_ptrue<float>(), nearest);
1129 }
1130
1131 template <class A>
1132 XSIMD_INLINE batch<int64_t, A> nearbyint_as_int(batch<double, A> const& arg, requires_arch<sve>) noexcept
1133 {
1134 const auto nearest = svrintx_x(detail::sve_ptrue<double>(), arg);
1135 return svcvt_s64_x(detail::sve_ptrue<double>(), nearest);
1136 }
1137
1138
1139 template <class A, class T, detail::sve_enable_floating_point_t<T> = 0>
1140 XSIMD_INLINE batch<T, A> ldexp(const batch<T, A>& x, const batch<as_integer_t<T>, A>& exp, requires_arch<sve>) noexcept
1141 {
1142 return svscale_x(detail::sve_ptrue<T>(), x, exp);
1143 }
1144
1145 }
1146 }
1147
1148 #endif