File indexing completed on 2025-08-28 09:11:35
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #ifndef XSIMD_RVV_HPP
0011 #define XSIMD_RVV_HPP
0012
0013 #include <complex>
0014 #include <type_traits>
0015 #include <utility>
0016
0017 #include "../types/xsimd_rvv_register.hpp"
0018 #include "xsimd_constants.hpp"
0019
0020
0021
0022
0023
0024
0025
0026
0027 #define XSIMD_RVV_JOIN_(x, y) x##y
0028 #define XSIMD_RVV_JOIN(x, y) XSIMD_RVV_JOIN_(x, y)
0029 #define XSIMD_RVV_PREFIX_T(T, S, then) XSIMD_RVV_JOIN(T, then)
0030 #define XSIMD_RVV_PREFIX_S(T, S, then) XSIMD_RVV_JOIN(S, then)
0031 #define XSIMD_RVV_PREFIX_M(T, S, then) XSIMD_RVV_JOIN(m1, then)
0032 #define XSIMD_RVV_PREFIX(T, S, then) then
0033
0034
0035
0036
0037
0038
0039
0040
0041 #define XSIMD_RVV_IDENTIFIER9(T, S, t, ...) t
0042 #define XSIMD_RVV_IDENTIFIER8(T, S, t, p, ...) XSIMD_RVV_JOIN(t, XSIMD_RVV_PREFIX##p(T, S, XSIMD_RVV_IDENTIFIER9(T, S, __VA_ARGS__)))
0043 #define XSIMD_RVV_IDENTIFIER7(T, S, t, p, ...) XSIMD_RVV_JOIN(t, XSIMD_RVV_PREFIX##p(T, S, XSIMD_RVV_IDENTIFIER8(T, S, __VA_ARGS__)))
0044 #define XSIMD_RVV_IDENTIFIER6(T, S, t, p, ...) XSIMD_RVV_JOIN(t, XSIMD_RVV_PREFIX##p(T, S, XSIMD_RVV_IDENTIFIER7(T, S, __VA_ARGS__)))
0045 #define XSIMD_RVV_IDENTIFIER5(T, S, t, p, ...) XSIMD_RVV_JOIN(t, XSIMD_RVV_PREFIX##p(T, S, XSIMD_RVV_IDENTIFIER6(T, S, __VA_ARGS__)))
0046 #define XSIMD_RVV_IDENTIFIER4(T, S, t, p, ...) XSIMD_RVV_JOIN(t, XSIMD_RVV_PREFIX##p(T, S, XSIMD_RVV_IDENTIFIER5(T, S, __VA_ARGS__)))
0047 #define XSIMD_RVV_IDENTIFIER3(T, S, t, p, ...) XSIMD_RVV_JOIN(t, XSIMD_RVV_PREFIX##p(T, S, XSIMD_RVV_IDENTIFIER4(T, S, __VA_ARGS__)))
0048 #define XSIMD_RVV_IDENTIFIER2(T, S, t, p, ...) XSIMD_RVV_JOIN(t, XSIMD_RVV_PREFIX##p(T, S, XSIMD_RVV_IDENTIFIER3(T, S, __VA_ARGS__)))
0049 #define XSIMD_RVV_IDENTIFIER1(T, S, t, p, ...) XSIMD_RVV_JOIN(t, XSIMD_RVV_PREFIX##p(T, S, XSIMD_RVV_IDENTIFIER2(T, S, __VA_ARGS__)))
0050 #define XSIMD_RVV_IDENTIFIER0(T, S, t, p, ...) XSIMD_RVV_JOIN(t, XSIMD_RVV_PREFIX##p(T, S, XSIMD_RVV_IDENTIFIER1(T, S, __VA_ARGS__)))
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065 #define XSIMD_RVV_REPARSE(v) (v)
0066 #define XSIMD_RVV_UNBRACKET(...) __VA_ARGS__
0067 #define XSIMD_RVV_ARG_LIST(T, S, name) (T, S, XSIMD_RVV_UNBRACKET name, , , , , , , , , , , , , , , , , , , , , )
0068 #define XSIMD_RVV_IDENTIFIER(T, S, name) XSIMD_RVV_REPARSE(XSIMD_RVV_IDENTIFIER0 XSIMD_RVV_ARG_LIST(T, S, name))
0069
0070
0071
0072
0073
0074 #define XSIMD_RVV_T , _T,
0075 #define XSIMD_RVV_S , _S,
0076 #define XSIMD_RVV_M , _M,
0077 #define XSIMD_RVV_TSM XSIMD_RVV_T XSIMD_RVV_S XSIMD_RVV_M
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090 #define XSIMD_RVV_WRAPPER_HEAD(NAME, SIGNATURE, ...) \
0091 namespace NAME##_cruft \
0092 { \
0093 template <class T> \
0094 struct ctx \
0095 { \
0096 static constexpr size_t width = XSIMD_RVV_BITS; \
0097 static constexpr size_t vl = width / (sizeof(T) * 8); \
0098 using vec = rvv_reg_t<T, width>; \
0099 using uvec = rvv_reg_t<as_unsigned_relaxed_t<T>, width>; \
0100 using svec = rvv_reg_t<as_signed_relaxed_t<T>, width>; \
0101 using fvec = rvv_reg_t<as_float_relaxed_t<T>, width>; \
0102 using bvec = rvv_bool_t<T, width>; \
0103 using scalar_vec = rvv_reg_t<T, types::detail::rvv_width_m1>; \
0104 using wide_vec = rvv_reg_t<T, width * 2>; \
0105 using narrow_vec = rvv_reg_t<T, width / 2>; \
0106 using type = SIGNATURE; \
0107 }; \
0108 template <class T> \
0109 using sig_t = typename ctx<T>::type; \
0110 template <class K, class T> \
0111 struct impl \
0112 { \
0113 void operator()() const noexcept {}; \
0114 }; \
0115 template <class K> \
0116 using impl_t = impl<K, sig_t<K>>;
0117
0118 #define XSIMD_RVV_WRAPPER_HEAD_NOVL(...) XSIMD_RVV_WRAPPER_HEAD(__VA_ARGS__)
0119 #define XSIMD_RVV_WRAPPER_HEAD_DROP_1ST(...) XSIMD_RVV_WRAPPER_HEAD(__VA_ARGS__)
0120 #define XSIMD_RVV_WRAPPER_HEAD_DROP_1ST_CUSTOM_ARGS(...) XSIMD_RVV_WRAPPER_HEAD(__VA_ARGS__)
0121 #define XSIMD_RVV_WRAPPER_HEAD_DROP_1ST_CUSTOM_ARGS_NOVL(...) XSIMD_RVV_WRAPPER_HEAD(__VA_ARGS__)
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133
0134 #define XSIMD_RVV_WRAPPER(KEY, CALLEE, ...) \
0135 template <class Ret, class... Args> \
0136 struct impl<KEY, Ret(Args...)> \
0137 { \
0138 using ctx = ctx<KEY>; \
0139 constexpr Ret operator()(Args... args) const noexcept \
0140 { \
0141 return CALLEE(args..., ctx::vl); \
0142 }; \
0143 };
0144 #define XSIMD_RVV_WRAPPER_NOVL(KEY, CALLEE, ...) \
0145 template <class Ret, class... Args> \
0146 struct impl<KEY, Ret(Args...)> \
0147 { \
0148 constexpr Ret operator()(Args... args) const noexcept \
0149 { \
0150 return CALLEE(args...); \
0151 }; \
0152 };
0153 #define XSIMD_RVV_WRAPPER_DROP_1ST(KEY, CALLEE, ...) \
0154 template <class Ret, class First, class... Args> \
0155 struct impl<KEY, Ret(First, Args...)> \
0156 { \
0157 using ctx = ctx<KEY>; \
0158 constexpr Ret operator()(First, Args... args) const noexcept \
0159 { \
0160 return CALLEE(args..., ctx::vl); \
0161 }; \
0162 };
0163 #define XSIMD_RVV_WRAPPER_DROP_1ST_CUSTOM_ARGS(KEY, CALLEE, SIGNATURE, ...) \
0164 template <class Ret, class First, class... Args> \
0165 struct impl<KEY, Ret(First, Args...)> \
0166 { \
0167 using ctx = ctx<KEY>; \
0168 constexpr Ret operator()(First, Args... args) const noexcept \
0169 { \
0170 return CALLEE(__VA_ARGS__, ctx::vl); \
0171 }; \
0172 };
0173 #define XSIMD_RVV_WRAPPER_DROP_1ST_CUSTOM_ARGS_NOVL(KEY, CALLEE, SIGNATURE, ...) \
0174 template <class Ret, class First, class... Args> \
0175 struct impl<KEY, Ret(First, Args...)> \
0176 { \
0177 constexpr Ret operator()(First, Args... args) const noexcept \
0178 { \
0179 return CALLEE(__VA_ARGS__); \
0180 }; \
0181 };
0182
0183
0184
0185
0186
0187 #define XSIMD_RVV_WRAPPER_TAIL(NAME, ...) \
0188 } \
0189 static constexpr struct : NAME##_cruft::impl_t<int8_t>, \
0190 NAME##_cruft::impl_t<uint8_t>, \
0191 NAME##_cruft::impl_t<int16_t>, \
0192 NAME##_cruft::impl_t<uint16_t>, \
0193 NAME##_cruft::impl_t<int32_t>, \
0194 NAME##_cruft::impl_t<uint32_t>, \
0195 NAME##_cruft::impl_t<int64_t>, \
0196 NAME##_cruft::impl_t<uint64_t>, \
0197 NAME##_cruft::impl_t<float>, \
0198 NAME##_cruft::impl_t<double> \
0199 { \
0200 using NAME##_cruft::impl_t<int8_t>::operator(); \
0201 using NAME##_cruft::impl_t<uint8_t>::operator(); \
0202 using NAME##_cruft::impl_t<int16_t>::operator(); \
0203 using NAME##_cruft::impl_t<uint16_t>::operator(); \
0204 using NAME##_cruft::impl_t<int32_t>::operator(); \
0205 using NAME##_cruft::impl_t<uint32_t>::operator(); \
0206 using NAME##_cruft::impl_t<int64_t>::operator(); \
0207 using NAME##_cruft::impl_t<uint64_t>::operator(); \
0208 using NAME##_cruft::impl_t<float>::operator(); \
0209 using NAME##_cruft::impl_t<double>::operator(); \
0210 } NAME {};
0211 #define XSIMD_RVV_WRAPPER_TAIL_NOVL(...) XSIMD_RVV_WRAPPER_TAIL(__VA_ARGS__)
0212 #define XSIMD_RVV_WRAPPER_TAIL_DROP_1ST(...) XSIMD_RVV_WRAPPER_TAIL(__VA_ARGS__)
0213 #define XSIMD_RVV_WRAPPER_TAIL_DROP_1ST_CUSTOM_ARGS(...) XSIMD_RVV_WRAPPER_TAIL(__VA_ARGS__)
0214 #define XSIMD_RVV_WRAPPER_TAIL_DROP_1ST_CUSTOM_ARGS_NOVL(...) XSIMD_RVV_WRAPPER_TAIL(__VA_ARGS__)
0215
0216
0217
0218 #define XSIMD_RVV_OVERLOAD_head(my_name, variant, ...) \
0219 XSIMD_RVV_WRAPPER_HEAD##variant(my_name, __VA_ARGS__)
0220 #define XSIMD_RVV_OVERLOAD_i(name, variant, ...) \
0221 XSIMD_RVV_WRAPPER##variant(int8_t, XSIMD_RVV_IDENTIFIER(i, 8, name), __VA_ARGS__) \
0222 XSIMD_RVV_WRAPPER##variant(int16_t, XSIMD_RVV_IDENTIFIER(i, 16, name), __VA_ARGS__) \
0223 XSIMD_RVV_WRAPPER##variant(int32_t, XSIMD_RVV_IDENTIFIER(i, 32, name), __VA_ARGS__) \
0224 XSIMD_RVV_WRAPPER##variant(int64_t, XSIMD_RVV_IDENTIFIER(i, 64, name), __VA_ARGS__)
0225 #define XSIMD_RVV_OVERLOAD_u(name, variant, ...) \
0226 XSIMD_RVV_WRAPPER##variant(uint8_t, XSIMD_RVV_IDENTIFIER(u, 8, name), __VA_ARGS__) \
0227 XSIMD_RVV_WRAPPER##variant(uint16_t, XSIMD_RVV_IDENTIFIER(u, 16, name), __VA_ARGS__) \
0228 XSIMD_RVV_WRAPPER##variant(uint32_t, XSIMD_RVV_IDENTIFIER(u, 32, name), __VA_ARGS__) \
0229 XSIMD_RVV_WRAPPER##variant(uint64_t, XSIMD_RVV_IDENTIFIER(u, 64, name), __VA_ARGS__)
0230 #define XSIMD_RVV_OVERLOAD_f(name, variant, ...) \
0231 XSIMD_RVV_WRAPPER##variant(float, XSIMD_RVV_IDENTIFIER(f, 32, name), __VA_ARGS__) \
0232 XSIMD_RVV_WRAPPER##variant(double, XSIMD_RVV_IDENTIFIER(f, 64, name), __VA_ARGS__)
0233 #define XSIMD_RVV_OVERLOAD_tail(my_name, variant, ...) \
0234 XSIMD_RVV_WRAPPER_TAIL##variant(my_name, __VA_ARGS__)
0235
0236
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252 #define XSIMD_RVV_OVERLOAD2(my_name, name_i, name_u, variant, ...) \
0253 XSIMD_RVV_OVERLOAD_head(my_name, variant, __VA_ARGS__) \
0254 XSIMD_RVV_OVERLOAD_i(name_i, variant, __VA_ARGS__) \
0255 XSIMD_RVV_OVERLOAD_u(name_u, variant, __VA_ARGS__) \
0256 XSIMD_RVV_OVERLOAD_tail(my_name, variant, __VA_ARGS__)
0257
0258 #define XSIMD_RVV_OVERLOAD3(my_name, name_i, name_u, name_f, variant, ...) \
0259 XSIMD_RVV_OVERLOAD_head(my_name, variant, __VA_ARGS__) \
0260 XSIMD_RVV_OVERLOAD_i(name_i, variant, __VA_ARGS__) \
0261 XSIMD_RVV_OVERLOAD_u(name_u, variant, __VA_ARGS__) \
0262 XSIMD_RVV_OVERLOAD_f(name_f, variant, __VA_ARGS__) \
0263 XSIMD_RVV_OVERLOAD_tail(my_name, variant, __VA_ARGS__)
0264
0265 #define XSIMD_RVV_OVERLOAD(my_name, name, ...) XSIMD_RVV_OVERLOAD3(my_name, name, name, name, __VA_ARGS__)
0266 #define XSIMD_RVV_OVERLOAD_INTS(my_name, name, ...) XSIMD_RVV_OVERLOAD2(my_name, name, name, __VA_ARGS__)
0267
0268 #define XSIMD_RVV_OVERLOAD_SINTS(my_name, name, variant, ...) \
0269 XSIMD_RVV_OVERLOAD_head(my_name, variant, __VA_ARGS__) \
0270 XSIMD_RVV_OVERLOAD_i(name, variant, __VA_ARGS__) \
0271 XSIMD_RVV_OVERLOAD_tail(my_name, variant, __VA_ARGS__)
0272
0273 #define XSIMD_RVV_OVERLOAD_UINTS(my_name, name, variant, ...) \
0274 XSIMD_RVV_OVERLOAD_head(my_name, variant, __VA_ARGS__) \
0275 XSIMD_RVV_OVERLOAD_u(name, variant, __VA_ARGS__) \
0276 XSIMD_RVV_OVERLOAD_tail(my_name, variant, __VA_ARGS__)
0277
0278 #define XSIMD_RVV_OVERLOAD_FLOATS(my_name, name, variant, ...) \
0279 XSIMD_RVV_OVERLOAD_head(my_name, variant, __VA_ARGS__) \
0280 XSIMD_RVV_OVERLOAD_f(name, variant, __VA_ARGS__) \
0281 XSIMD_RVV_OVERLOAD_tail(my_name, variant, __VA_ARGS__)
0282
0283
0284
0285 namespace xsimd
0286 {
0287 template <typename T, class A, T... Values>
0288 struct batch_constant;
0289
0290 namespace kernel
0291 {
0292 namespace detail
0293 {
0294 template <class T>
0295 using rvv_fix_char_t = types::detail::rvv_fix_char_t<T>;
0296 template <class T, size_t Width = XSIMD_RVV_BITS>
0297 using rvv_reg_t = types::detail::rvv_reg_t<T, Width>;
0298 template <class T, size_t Width = XSIMD_RVV_BITS>
0299 using rvv_bool_t = types::detail::rvv_bool_t<T, Width>;
0300
0301 template <size_t>
0302 struct as_signed_relaxed;
0303 template <>
0304 struct as_signed_relaxed<1>
0305 {
0306 using type = int8_t;
0307 };
0308 template <>
0309 struct as_signed_relaxed<2>
0310 {
0311 using type = int16_t;
0312 };
0313 template <>
0314 struct as_signed_relaxed<4>
0315 {
0316 using type = int32_t;
0317 };
0318 template <>
0319 struct as_signed_relaxed<8>
0320 {
0321 using type = int64_t;
0322 };
0323 template <class T>
0324 using as_signed_relaxed_t = typename as_signed_relaxed<sizeof(T)>::type;
0325 template <size_t>
0326 struct as_unsigned_relaxed;
0327 template <>
0328 struct as_unsigned_relaxed<1>
0329 {
0330 using type = uint8_t;
0331 };
0332 template <>
0333 struct as_unsigned_relaxed<2>
0334 {
0335 using type = uint16_t;
0336 };
0337 template <>
0338 struct as_unsigned_relaxed<4>
0339 {
0340 using type = uint32_t;
0341 };
0342 template <>
0343 struct as_unsigned_relaxed<8>
0344 {
0345 using type = uint64_t;
0346 };
0347 template <class T>
0348 using as_unsigned_relaxed_t = typename as_unsigned_relaxed<sizeof(T)>::type;
0349 template <size_t>
0350 struct as_float_relaxed;
0351 template <>
0352 struct as_float_relaxed<1>
0353 {
0354 using type = int8_t;
0355 };
0356 template <>
0357 struct as_float_relaxed<2>
0358 {
0359 using type = int16_t;
0360 };
0361 template <>
0362 struct as_float_relaxed<4>
0363 {
0364 using type = float;
0365 };
0366 template <>
0367 struct as_float_relaxed<8>
0368 {
0369 using type = double;
0370 };
0371 template <class T>
0372 using as_float_relaxed_t = typename as_float_relaxed<sizeof(T)>::type;
0373
0374 template <class T, class U>
0375 rvv_reg_t<T, U::width> rvvreinterpret(U const& arg) noexcept
0376 {
0377 return rvv_reg_t<T, U::width>(arg, types::detail::XSIMD_RVV_BITCAST);
0378 }
0379 template <class T, class A, class U>
0380 rvv_reg_t<T, A::width> rvvreinterpret(batch<U, A> const& arg) noexcept
0381 {
0382 typename batch<U, A>::register_type r = arg;
0383 return rvvreinterpret<T>(r);
0384 }
0385
0386 template <class A, class T, class U = as_unsigned_integer_t<T>>
0387 XSIMD_INLINE batch<U, A> rvv_to_unsigned_batch(batch<T, A> const& arg) noexcept
0388 {
0389 return rvvreinterpret<U>(arg.data);
0390 }
0391
0392 XSIMD_RVV_OVERLOAD(rvvid,
0393 (__riscv_vid_v_u XSIMD_RVV_S XSIMD_RVV_M), _DROP_1ST, uvec(T))
0394
0395 XSIMD_RVV_OVERLOAD3(rvvmv_splat,
0396 (__riscv_vmv_v_x_ XSIMD_RVV_TSM),
0397 (__riscv_vmv_v_x_ XSIMD_RVV_TSM),
0398 (__riscv_vfmv_v_f_ XSIMD_RVV_TSM), , vec(T))
0399
0400 XSIMD_RVV_OVERLOAD3(rvvmv_lane0,
0401 (__riscv_vmv_x),
0402 (__riscv_vmv_x),
0403 (__riscv_vfmv_f), _NOVL, T(vec))
0404
0405 XSIMD_RVV_OVERLOAD(rvvmerge, (__riscv_vmerge), , vec(vec, vec, bvec))
0406 XSIMD_RVV_OVERLOAD3(rvvmerge_splat,
0407 (__riscv_vmerge),
0408 (__riscv_vmerge),
0409 (__riscv_vfmerge), , vec(vec, T, bvec))
0410
0411
0412 XSIMD_RVV_OVERLOAD(rvvcpop, (__riscv_vcpop),
0413 , size_t(bvec));
0414
0415 template <class T, size_t Width>
0416 XSIMD_INLINE rvv_bool_t<T, Width> pmask8(uint8_t mask) noexcept
0417 {
0418 return rvv_bool_t<T, Width>(mask);
0419 }
0420 template <class T, size_t Width>
0421 XSIMD_INLINE rvv_bool_t<T, Width> pmask(uint64_t mask) noexcept
0422 {
0423 return rvv_bool_t<T, Width>(mask);
0424 }
0425
0426 template <class A, class T, size_t offset = 0, int shift = 0>
0427 XSIMD_INLINE rvv_reg_t<T, A::width> vindex() noexcept
0428 {
0429 auto index = rvvid(T {});
0430 if (shift < 0)
0431 index = __riscv_vsrl(index, -shift, batch<T, A>::size);
0432 else
0433 index = __riscv_vsll(index, shift, batch<T, A>::size);
0434 return __riscv_vadd(index, T(offset), batch<T, A>::size);
0435 }
0436
0437
0438 template <class T>
0439 using rvv_enable_signed_int_t = typename std::enable_if<std::is_integral<T>::value && std::is_signed<T>::value, int>::type;
0440
0441
0442 template <class T>
0443 using rvv_enable_unsigned_int_t = typename std::enable_if<std::is_integral<T>::value && std::is_unsigned<T>::value, int>::type;
0444
0445
0446 template <class T>
0447 using rvv_enable_floating_point_t = typename std::enable_if<std::is_floating_point<T>::value, int>::type;
0448
0449
0450 template <class T>
0451 using rvv_enable_signed_int_or_floating_point_t = typename std::enable_if<std::is_signed<T>::value, int>::type;
0452
0453
0454 template <class T>
0455 using rvv_enable_all_t = typename std::enable_if<std::is_arithmetic<T>::value, int>::type;
0456 }
0457
0458
0459
0460
0461
0462 namespace detail
0463 {
0464 template <class T, size_t Width>
0465 XSIMD_INLINE detail::rvv_reg_t<T, Width> broadcast(T arg) noexcept
0466 {
0467
0468
0469
0470 detail::rvv_fix_char_t<T> arg_not_char(arg);
0471 const auto splat = detail::rvvmv_splat(arg_not_char);
0472 return detail::rvv_reg_t<T, Width>(splat.get_bytes(), types::detail::XSIMD_RVV_BITCAST);
0473 }
0474 }
0475
0476
0477 template <class A, class T>
0478 XSIMD_INLINE batch<T, A> broadcast(T arg, requires_arch<rvv>) noexcept
0479 {
0480 return detail::broadcast<T, A::width>(arg);
0481 }
0482
0483
0484
0485
0486
0487 namespace detail
0488 {
0489 XSIMD_RVV_OVERLOAD(rvvle, (__riscv_vle XSIMD_RVV_S _v_ XSIMD_RVV_TSM), , vec(T const*))
0490 XSIMD_RVV_OVERLOAD(rvvse, (__riscv_vse XSIMD_RVV_S _v_ XSIMD_RVV_TSM), , void(T*, vec))
0491 }
0492
0493 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
0494 XSIMD_INLINE batch<T, A> load_aligned(T const* src, convert<T>, requires_arch<rvv>) noexcept
0495 {
0496 return detail::rvvle(reinterpret_cast<detail::rvv_fix_char_t<T> const*>(src));
0497 }
0498
0499 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
0500 XSIMD_INLINE batch<T, A> load_unaligned(T const* src, convert<T>, requires_arch<rvv>) noexcept
0501 {
0502 return load_aligned<A>(src, convert<T>(), rvv {});
0503 }
0504
0505
0506 namespace detail
0507 {
0508 template <class T, size_t W, typename std::enable_if<W >= types::detail::rvv_width_m1, int>::type = 0>
0509 XSIMD_INLINE rvv_reg_t<T, W * 2> rvvabut(rvv_reg_t<T, W> const& lo, rvv_reg_t<T, W> const& hi) noexcept
0510 {
0511 typename rvv_reg_t<T, W * 2>::register_type tmp;
0512 tmp = __riscv_vset(tmp, 0, lo);
0513 return __riscv_vset(tmp, 1, hi);
0514 }
0515
0516 template <class T, size_t W, typename std::enable_if<W<types::detail::rvv_width_m1, int>::type = 0> XSIMD_INLINE rvv_reg_t<T, W * 2> rvvabut(rvv_reg_t<T, W> const& lo, rvv_reg_t<T, W> const& hi) noexcept
0517 {
0518 return __riscv_vslideup(lo, hi, lo.vl, lo.vl * 2);
0519 }
0520
0521 XSIMD_RVV_OVERLOAD(rvvget_lo_, (__riscv_vget_ XSIMD_RVV_TSM), _DROP_1ST_CUSTOM_ARGS_NOVL, vec(T, wide_vec), args..., 0)
0522 XSIMD_RVV_OVERLOAD(rvvget_hi_, (__riscv_vget_ XSIMD_RVV_TSM), _DROP_1ST_CUSTOM_ARGS_NOVL, vec(T, wide_vec), args..., 1)
0523
0524 template <class T, size_t W, typename std::enable_if<W >= types::detail::rvv_width_m1, int>::type = 0>
0525 rvv_reg_t<T, W> rvvget_lo(rvv_reg_t<T, W * 2> const& vv) noexcept
0526 {
0527 typename rvv_reg_t<T, W>::register_type tmp = rvvget_lo_(T {}, vv);
0528 return tmp;
0529 }
0530 template <class T, size_t W, typename std::enable_if<W >= types::detail::rvv_width_m1, int>::type = 0>
0531 rvv_reg_t<T, W> rvvget_hi(rvv_reg_t<T, W * 2> const& vv) noexcept
0532 {
0533 typename rvv_reg_t<T, W>::register_type tmp = rvvget_hi_(T {}, vv);
0534 return tmp;
0535 }
0536 template <class T, size_t W, typename std::enable_if<W<types::detail::rvv_width_m1, int>::type = 0> rvv_reg_t<T, W> rvvget_lo(rvv_reg_t<T, W * 2> const& vv) noexcept
0537 {
0538 typename rvv_reg_t<T, W>::register_type tmp = vv;
0539 return tmp;
0540 }
0541 template <class T, size_t W, typename std::enable_if<W<types::detail::rvv_width_m1, int>::type = 0> rvv_reg_t<T, W> rvvget_hi(rvv_reg_t<T, W * 2> const& vv) noexcept
0542 {
0543 return __riscv_vslidedown(vv, vv.vl / 2, vv.vl);
0544 }
0545
0546 template <class A, class T, detail::rvv_enable_floating_point_t<T> = 0>
0547 XSIMD_INLINE batch<std::complex<T>, A> load_complex(batch<T, A> const& lo, batch<T, A> const& hi, requires_arch<rvv>) noexcept
0548 {
0549 const auto real_index = vindex<A, as_unsigned_integer_t<T>, 0, 1>();
0550 const auto imag_index = vindex<A, as_unsigned_integer_t<T>, 1, 1>();
0551 const auto index = rvvabut<as_unsigned_integer_t<T>, A::width>(real_index, imag_index);
0552 const auto input = rvvabut<T, A::width>(lo.data, hi.data);
0553 const rvv_reg_t<T, A::width * 2> result = __riscv_vrgather(input, index, index.vl);
0554
0555 return { rvvget_lo<T, A::width>(result), rvvget_hi<T, A::width>(result) };
0556 }
0557 }
0558
0559
0560
0561
0562
0563 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
0564 XSIMD_INLINE void store_aligned(T* dst, batch<T, A> const& src, requires_arch<rvv>) noexcept
0565 {
0566 detail::rvvse(reinterpret_cast<detail::rvv_fix_char_t<T>*>(dst), src);
0567 }
0568
0569 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
0570 XSIMD_INLINE void store_unaligned(T* dst, batch<T, A> const& src, requires_arch<rvv>) noexcept
0571 {
0572 store_aligned<A>(dst, src, rvv {});
0573 }
0574
0575
0576
0577
0578
0579 namespace detail
0580 {
0581 template <class T, class U>
0582 using rvv_enable_sg_t = typename std::enable_if<(sizeof(T) == sizeof(U) && (sizeof(T) == 4 || sizeof(T) == 8)), int>::type;
0583 XSIMD_RVV_OVERLOAD(rvvloxei, (__riscv_vloxei XSIMD_RVV_S), , vec(T const*, uvec))
0584 XSIMD_RVV_OVERLOAD(rvvsoxei, (__riscv_vsoxei XSIMD_RVV_S), , void(T*, uvec, vec))
0585 XSIMD_RVV_OVERLOAD3(rvvmul_splat,
0586 (__riscv_vmul),
0587 (__riscv_vmul),
0588 (__riscv_vfmul), , vec(vec, T))
0589 }
0590
0591
0592 template <class A, class T, class U, detail::rvv_enable_sg_t<T, U> = 0>
0593 XSIMD_INLINE void scatter(batch<T, A> const& vals, T* dst, batch<U, A> const& index, kernel::requires_arch<rvv>) noexcept
0594 {
0595 using UU = as_unsigned_integer_t<U>;
0596 const auto uindex = detail::rvv_to_unsigned_batch(index);
0597 auto* base = reinterpret_cast<detail::rvv_fix_char_t<T>*>(dst);
0598
0599 const auto bi = detail::rvvmul_splat(uindex, sizeof(T));
0600 detail::rvvsoxei(base, bi, vals);
0601 }
0602
0603
0604 template <class A, class T, class U, detail::rvv_enable_sg_t<T, U> = 0>
0605 XSIMD_INLINE batch<T, A> gather(batch<T, A> const&, T const* src, batch<U, A> const& index, kernel::requires_arch<rvv>) noexcept
0606 {
0607 using UU = as_unsigned_integer_t<U>;
0608 const auto uindex = detail::rvv_to_unsigned_batch(index);
0609 auto const* base = reinterpret_cast<detail::rvv_fix_char_t<T> const*>(src);
0610
0611 const auto bi = detail::rvvmul_splat(uindex, sizeof(T));
0612 return detail::rvvloxei(base, bi);
0613 }
0614
0615
0616
0617
0618
0619 namespace detail
0620 {
0621 XSIMD_RVV_OVERLOAD3(rvvadd,
0622 (__riscv_vadd),
0623 (__riscv_vadd),
0624 (__riscv_vfadd), , vec(vec, vec))
0625 XSIMD_RVV_OVERLOAD2(rvvsadd,
0626 (__riscv_vsadd),
0627 (__riscv_vsaddu), , vec(vec, vec))
0628 XSIMD_RVV_OVERLOAD3(rvvsub,
0629 (__riscv_vsub),
0630 (__riscv_vsub),
0631 (__riscv_vfsub), , vec(vec, vec))
0632 XSIMD_RVV_OVERLOAD2(rvvssub,
0633 (__riscv_vssub),
0634 (__riscv_vssubu), , vec(vec, vec))
0635 XSIMD_RVV_OVERLOAD2(rvvaadd,
0636 (__riscv_vaadd),
0637 (__riscv_vaaddu), , vec(vec, vec))
0638 XSIMD_RVV_OVERLOAD3(rvvmul,
0639 (__riscv_vmul),
0640 (__riscv_vmul),
0641 (__riscv_vfmul), , vec(vec, vec))
0642 XSIMD_RVV_OVERLOAD3(rvvdiv,
0643 (__riscv_vdiv),
0644 (__riscv_vdivu),
0645 (__riscv_vfdiv), , vec(vec, vec))
0646 XSIMD_RVV_OVERLOAD3(rvvmax,
0647 (__riscv_vmax),
0648 (__riscv_vmaxu),
0649 (__riscv_vfmax), , vec(vec, vec))
0650 XSIMD_RVV_OVERLOAD3(rvvmin,
0651 (__riscv_vmin),
0652 (__riscv_vminu),
0653 (__riscv_vfmin), , vec(vec, vec))
0654 XSIMD_RVV_OVERLOAD3(rvvneg,
0655 (__riscv_vneg),
0656 (abort),
0657 (__riscv_vfneg), , vec(vec))
0658 XSIMD_RVV_OVERLOAD_FLOATS(rvvabs,
0659 (__riscv_vfabs), , vec(vec))
0660 XSIMD_RVV_OVERLOAD3(rvvmacc,
0661 (__riscv_vmacc),
0662 (__riscv_vmacc),
0663 (__riscv_vfmacc), , vec(vec, vec, vec))
0664 XSIMD_RVV_OVERLOAD3(rvvnmsac,
0665 (__riscv_vnmsac),
0666 (__riscv_vnmsac),
0667 (__riscv_vfnmsac), , vec(vec, vec, vec))
0668 XSIMD_RVV_OVERLOAD3(rvvmadd,
0669 (__riscv_vmadd),
0670 (__riscv_vmadd),
0671 (__riscv_vfmadd), , vec(vec, vec, vec))
0672 XSIMD_RVV_OVERLOAD3(rvvnmsub,
0673 (__riscv_vnmsub),
0674 (__riscv_vnmsub),
0675 (__riscv_vfnmsub), , vec(vec, vec, vec))
0676
0677 #define RISCV_VMSXX(XX) \
0678 XSIMD_RVV_OVERLOAD3(rvvms##XX, \
0679 (__riscv_vms##XX), \
0680 (__riscv_vms##XX##u), \
0681 (__riscv_vmf##XX), , bvec(vec, vec)) \
0682 XSIMD_RVV_OVERLOAD3(rvvms##XX##_splat, \
0683 (__riscv_vms##XX), \
0684 (__riscv_vms##XX##u), \
0685 (__riscv_vmf##XX), , bvec(vec, T))
0686 #define __riscv_vmsequ __riscv_vmseq
0687 #define __riscv_vmsneu __riscv_vmsne
0688 RISCV_VMSXX(eq)
0689 RISCV_VMSXX(ne)
0690 RISCV_VMSXX(lt)
0691 RISCV_VMSXX(le)
0692 RISCV_VMSXX(gt)
0693 RISCV_VMSXX(ge)
0694 #undef __riscv_vmsequ
0695 #undef __riscv_vmsneu
0696 #undef RISCV_VMSXX
0697 }
0698
0699
0700 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
0701 XSIMD_INLINE batch<T, A> add(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
0702 {
0703 return detail::rvvadd(lhs, rhs);
0704 }
0705
0706
0707 template <class A, class T, detail::enable_integral_t<T> = 0>
0708 XSIMD_INLINE batch<T, A> sadd(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
0709 {
0710 return detail::rvvsadd(lhs, rhs);
0711 }
0712
0713
0714 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
0715 XSIMD_INLINE batch<T, A> sub(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
0716 {
0717 return detail::rvvsub(lhs, rhs);
0718 }
0719
0720
0721 template <class A, class T, detail::enable_integral_t<T> = 0>
0722 XSIMD_INLINE batch<T, A> ssub(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
0723 {
0724 return detail::rvvssub(lhs, rhs);
0725 }
0726
0727
0728 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
0729 XSIMD_INLINE batch<T, A> mul(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
0730 {
0731 return detail::rvvmul(lhs, rhs);
0732 }
0733
0734
0735 template <class A, class T, typename detail::rvv_enable_all_t<T> = 0>
0736 XSIMD_INLINE batch<T, A> div(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
0737 {
0738 return detail::rvvdiv(lhs, rhs);
0739 }
0740
0741
0742 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
0743 XSIMD_INLINE batch<T, A> max(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
0744 {
0745 return detail::rvvmax(lhs, rhs);
0746 }
0747
0748
0749 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
0750 XSIMD_INLINE batch<T, A> min(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
0751 {
0752 return detail::rvvmin(lhs, rhs);
0753 }
0754
0755
0756 template <class A, class T, detail::rvv_enable_unsigned_int_t<T> = 0>
0757 XSIMD_INLINE batch<T, A> neg(batch<T, A> const& arg, requires_arch<rvv>) noexcept
0758 {
0759 using S = as_signed_integer_t<T>;
0760 const auto as_signed = detail::rvvreinterpret<S>(arg);
0761 const auto result = detail::rvvneg(as_signed);
0762 return detail::rvvreinterpret<T>(result);
0763 }
0764
0765 template <class A, class T, detail::rvv_enable_signed_int_or_floating_point_t<T> = 0>
0766 XSIMD_INLINE batch<T, A> neg(batch<T, A> const& arg, requires_arch<rvv>) noexcept
0767 {
0768 return detail::rvvneg(arg);
0769 }
0770
0771
0772 template <class A, class T, detail::rvv_enable_unsigned_int_t<T> = 0>
0773 XSIMD_INLINE batch<T, A> abs(batch<T, A> const& arg, requires_arch<rvv>) noexcept
0774 {
0775 return arg;
0776 }
0777
0778 template <class A, class T, detail::rvv_enable_floating_point_t<T> = 0>
0779 XSIMD_INLINE batch<T, A> abs(batch<T, A> const& arg, requires_arch<rvv>) noexcept
0780 {
0781 return detail::rvvabs(arg);
0782 }
0783
0784
0785 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
0786 XSIMD_INLINE batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<rvv>) noexcept
0787 {
0788
0789 return detail::rvvmacc(z, x, y);
0790 }
0791
0792
0793 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
0794 XSIMD_INLINE batch<T, A> fnma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<rvv>) noexcept
0795 {
0796
0797 return detail::rvvnmsac(z, x, y);
0798 }
0799
0800
0801 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
0802 XSIMD_INLINE batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<rvv>) noexcept
0803 {
0804
0805
0806 return -fnma(x, y, z);
0807 }
0808
0809
0810 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
0811 XSIMD_INLINE batch<T, A> fnms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<rvv>) noexcept
0812 {
0813
0814
0815 return -fma(z, x, y);
0816 }
0817
0818
0819
0820
0821
0822 namespace detail
0823 {
0824 XSIMD_RVV_OVERLOAD_INTS(rvvand, (__riscv_vand), , vec(vec, vec))
0825 XSIMD_RVV_OVERLOAD_INTS(rvvor, (__riscv_vor), , vec(vec, vec))
0826 XSIMD_RVV_OVERLOAD_INTS(rvvor_splat, (__riscv_vor), , vec(vec, T))
0827 XSIMD_RVV_OVERLOAD_INTS(rvvxor, (__riscv_vxor), , vec(vec, vec))
0828 XSIMD_RVV_OVERLOAD_INTS(rvvnot, (__riscv_vnot), , vec(vec))
0829 XSIMD_RVV_OVERLOAD(rvvmand, (__riscv_vmand_mm_b XSIMD_RVV_S), , bvec(bvec, bvec))
0830 XSIMD_RVV_OVERLOAD(rvvmor, (__riscv_vmor_mm_b XSIMD_RVV_S), , bvec(bvec, bvec))
0831 XSIMD_RVV_OVERLOAD(rvvmxor, (__riscv_vmxor_mm_b XSIMD_RVV_S), , bvec(bvec, bvec))
0832 XSIMD_RVV_OVERLOAD(rvvmandn, (__riscv_vmandn_mm_b XSIMD_RVV_S), , bvec(bvec, bvec))
0833 XSIMD_RVV_OVERLOAD(rvvmnot, (__riscv_vmnot), , bvec(bvec))
0834 }
0835
0836
0837 template <class A, class T, detail::enable_integral_t<T> = 0>
0838 XSIMD_INLINE batch<T, A> bitwise_and(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
0839 {
0840 return detail::rvvand(lhs, rhs);
0841 }
0842
0843 template <class A, class T, detail::rvv_enable_floating_point_t<T> = 0>
0844 XSIMD_INLINE batch<T, A> bitwise_and(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
0845 {
0846 const auto lhs_bits = detail::rvv_to_unsigned_batch(lhs);
0847 const auto rhs_bits = detail::rvv_to_unsigned_batch(rhs);
0848 const auto result_bits = detail::rvvand(lhs_bits, rhs_bits);
0849 return detail::rvvreinterpret<T>(result_bits);
0850 }
0851
0852 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
0853 XSIMD_INLINE batch_bool<T, A> bitwise_and(batch_bool<T, A> const& lhs, batch_bool<T, A> const& rhs, requires_arch<rvv>) noexcept
0854 {
0855 return detail::rvvmand(lhs, rhs);
0856 }
0857
0858
0859 template <class A, class T, detail::enable_integral_t<T> = 0>
0860 XSIMD_INLINE batch<T, A> bitwise_andnot(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
0861 {
0862 const auto not_rhs = detail::rvvnot(rhs);
0863 return detail::rvvand(lhs, not_rhs);
0864 }
0865
0866 template <class A, class T, detail::rvv_enable_floating_point_t<T> = 0>
0867 XSIMD_INLINE batch<T, A> bitwise_andnot(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
0868 {
0869 const auto lhs_bits = detail::rvv_to_unsigned_batch(lhs);
0870 const auto rhs_bits = detail::rvv_to_unsigned_batch(rhs);
0871 const auto not_rhs = detail::rvvnot(rhs_bits);
0872 const auto result_bits = detail::rvvand(lhs_bits, not_rhs);
0873 return detail::rvvreinterpret<T>(result_bits);
0874 }
0875
0876 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
0877 XSIMD_INLINE batch_bool<T, A> bitwise_andnot(batch_bool<T, A> const& lhs, batch_bool<T, A> const& rhs, requires_arch<rvv>) noexcept
0878 {
0879 return detail::rvvmandn(lhs, rhs);
0880 }
0881
0882
0883 template <class A, class T, detail::enable_integral_t<T> = 0>
0884 XSIMD_INLINE batch<T, A> bitwise_or(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
0885 {
0886 return detail::rvvor(lhs, rhs);
0887 }
0888
0889 template <class A, class T, detail::rvv_enable_floating_point_t<T> = 0>
0890 XSIMD_INLINE batch<T, A> bitwise_or(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
0891 {
0892 const auto lhs_bits = detail::rvv_to_unsigned_batch(lhs);
0893 const auto rhs_bits = detail::rvv_to_unsigned_batch(rhs);
0894 const auto result_bits = detail::rvvor(lhs_bits, rhs_bits);
0895 return detail::rvvreinterpret<T>(result_bits);
0896 }
0897
0898 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
0899 XSIMD_INLINE batch_bool<T, A> bitwise_or(batch_bool<T, A> const& lhs, batch_bool<T, A> const& rhs, requires_arch<rvv>) noexcept
0900 {
0901 return detail::rvvmor(lhs, rhs);
0902 }
0903
0904
0905 template <class A, class T, detail::enable_integral_t<T> = 0>
0906 XSIMD_INLINE batch<T, A> bitwise_xor(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
0907 {
0908 return detail::rvvxor(lhs, rhs);
0909 }
0910
0911 template <class A, class T, detail::rvv_enable_floating_point_t<T> = 0>
0912 XSIMD_INLINE batch<T, A> bitwise_xor(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
0913 {
0914 const auto lhs_bits = detail::rvv_to_unsigned_batch(lhs);
0915 const auto rhs_bits = detail::rvv_to_unsigned_batch(rhs);
0916 const auto result_bits = detail::rvvxor(lhs_bits, rhs_bits);
0917 return detail::rvvreinterpret<T>(result_bits);
0918 }
0919
0920 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
0921 XSIMD_INLINE batch_bool<T, A> bitwise_xor(batch_bool<T, A> const& lhs, batch_bool<T, A> const& rhs, requires_arch<rvv>) noexcept
0922 {
0923 return detail::rvvmxor(lhs, rhs);
0924 }
0925
0926
0927 template <class A, class T, detail::enable_integral_t<T> = 0>
0928 XSIMD_INLINE batch<T, A> bitwise_not(batch<T, A> const& arg, requires_arch<rvv>) noexcept
0929 {
0930 return detail::rvvnot(arg);
0931 }
0932
0933 template <class A, class T, detail::rvv_enable_floating_point_t<T> = 0>
0934 XSIMD_INLINE batch<T, A> bitwise_not(batch<T, A> const& arg, requires_arch<rvv>) noexcept
0935 {
0936 const auto arg_bits = detail::rvv_to_unsigned_batch(arg);
0937 const auto result_bits = detail::rvvnot(arg_bits);
0938 return detail::rvvreinterpret<T>(result_bits);
0939 }
0940
0941 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
0942 XSIMD_INLINE batch_bool<T, A> bitwise_not(batch_bool<T, A> const& arg, requires_arch<rvv>) noexcept
0943 {
0944 return detail::rvvmnot(arg);
0945 }
0946
0947
0948
0949
0950
0951 namespace detail
0952 {
0953 XSIMD_RVV_OVERLOAD_INTS(rvvsll_splat, (__riscv_vsll), , vec(vec, size_t))
0954 XSIMD_RVV_OVERLOAD_INTS(rvvsll, (__riscv_vsll), , vec(vec, uvec))
0955 XSIMD_RVV_OVERLOAD2(rvvsr_splat,
0956 (__riscv_vsra),
0957 (__riscv_vsrl), , vec(vec, size_t))
0958 XSIMD_RVV_OVERLOAD2(rvvsr,
0959 (__riscv_vsra),
0960 (__riscv_vsrl), , vec(vec, uvec))
0961 }
0962
0963
0964 template <class A, class T, detail::enable_integral_t<T> = 0>
0965 XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& arg, int n, requires_arch<rvv>) noexcept
0966 {
0967 constexpr size_t size = sizeof(typename batch<T, A>::value_type) * 8;
0968 assert(0 <= n && static_cast<size_t>(n) < size && "index in bounds");
0969 return detail::rvvsll_splat(arg, n);
0970 }
0971
0972 template <class A, class T, detail::enable_integral_t<T> = 0>
0973 XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
0974 {
0975 return detail::rvvsll(lhs, detail::rvv_to_unsigned_batch<A, T>(rhs));
0976 }
0977
0978
0979 template <class A, class T, detail::enable_integral_t<T> = 0>
0980 XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& arg, int n, requires_arch<rvv>) noexcept
0981 {
0982 constexpr size_t size = sizeof(typename batch<T, A>::value_type) * 8;
0983 assert(0 <= n && static_cast<size_t>(n) < size && "index in bounds");
0984 return detail::rvvsr_splat(arg, n);
0985 }
0986
0987 template <class A, class T, detail::enable_integral_t<T> = 0>
0988 XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
0989 {
0990 return detail::rvvsr(lhs, detail::rvv_to_unsigned_batch<A, T>(rhs));
0991 }
0992
0993
0994
0995
0996
0997 namespace detail
0998 {
0999 XSIMD_RVV_OVERLOAD3(rvvredsum,
1000 (__riscv_vredsum),
1001 (__riscv_vredsum),
1002 (__riscv_vfredosum),
1003 , scalar_vec(vec, scalar_vec))
1004 XSIMD_RVV_OVERLOAD3(rvvredmax,
1005 (__riscv_vredmax),
1006 (__riscv_vredmaxu),
1007 (__riscv_vfredmax), , scalar_vec(vec, scalar_vec))
1008 XSIMD_RVV_OVERLOAD3(rvvredmin,
1009 (__riscv_vredmin),
1010 (__riscv_vredminu),
1011 (__riscv_vfredmin), , scalar_vec(vec, scalar_vec))
1012 XSIMD_RVV_OVERLOAD3(rvvslide1up,
1013 (__riscv_vslide1up),
1014 (__riscv_vslide1up),
1015 (__riscv_vfslide1up), , vec(vec, vec))
1016 XSIMD_RVV_OVERLOAD3(rvvslide1down,
1017 (__riscv_vslide1down),
1018 (__riscv_vslide1down),
1019 (__riscv_vfslide1down), , vec(vec, T))
1020
1021 template <class A, class T>
1022 XSIMD_INLINE T reduce_scalar(rvv_reg_t<T, types::detail::rvv_width_m1> const& arg)
1023 {
1024 return detail::rvvmv_lane0(rvv_reg_t<T, A::width>(arg.get_bytes(), types::detail::XSIMD_RVV_BITCAST));
1025 }
1026 }
1027
1028 template <class A, class T, class V = typename batch<T, A>::value_type, detail::rvv_enable_all_t<T> = 0>
1029 XSIMD_INLINE V reduce_add(batch<T, A> const& arg, requires_arch<rvv>) noexcept
1030 {
1031 const auto zero = detail::broadcast<T, types::detail::rvv_width_m1>(T(0));
1032 const auto r = detail::rvvredsum(arg, zero);
1033 return detail::reduce_scalar<A, T>(r);
1034 }
1035
1036
1037 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1038 XSIMD_INLINE T reduce_max(batch<T, A> const& arg, requires_arch<rvv>) noexcept
1039 {
1040 const auto lowest = detail::broadcast<T, types::detail::rvv_width_m1>(std::numeric_limits<T>::lowest());
1041 const auto r = detail::rvvredmax(arg, lowest);
1042 return detail::reduce_scalar<A, T>(r);
1043 }
1044
1045
1046 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1047 XSIMD_INLINE T reduce_min(batch<T, A> const& arg, requires_arch<rvv>) noexcept
1048 {
1049 const auto max = detail::broadcast<T, types::detail::rvv_width_m1>(std::numeric_limits<T>::max());
1050 const auto r = detail::rvvredmin(arg, max);
1051 return detail::reduce_scalar<A, T>(r);
1052 }
1053
1054
1055 template <class A, class T, detail::rvv_enable_floating_point_t<T> = 0>
1056 XSIMD_INLINE batch<T, A> haddp(const batch<T, A>* row, requires_arch<rvv>) noexcept
1057 {
1058 constexpr std::size_t size = batch<T, A>::size;
1059 T sums[size];
1060 #pragma unroll size
1061 for (std::size_t i = 0; i < size; ++i)
1062 {
1063 sums[i] = reduce_add(row[i], rvv {});
1064 }
1065 return load_aligned<A>(sums, convert<T>(), rvv {});
1066 }
1067
1068
1069
1070
1071
1072
1073 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1074 XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
1075 {
1076 return detail::rvvmseq(lhs, rhs);
1077 }
1078
1079 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1080 XSIMD_INLINE batch_bool<T, A> eq(batch_bool<T, A> const& lhs, batch_bool<T, A> const& rhs, requires_arch<rvv>) noexcept
1081 {
1082 const auto neq_result = detail::rvvmxor(lhs, rhs);
1083 return detail::rvvmnot(neq_result);
1084 }
1085
1086
1087 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1088 XSIMD_INLINE batch_bool<T, A> neq(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
1089 {
1090 return detail::rvvmsne(lhs, rhs);
1091 }
1092
1093 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1094 XSIMD_INLINE batch_bool<T, A> neq(batch_bool<T, A> const& lhs, batch_bool<T, A> const& rhs, requires_arch<rvv>) noexcept
1095 {
1096 return detail::rvvmxor(lhs, rhs);
1097 }
1098
1099
1100 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1101 XSIMD_INLINE batch_bool<T, A> lt(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
1102 {
1103 return detail::rvvmslt(lhs, rhs);
1104 }
1105
1106
1107 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1108 XSIMD_INLINE batch_bool<T, A> le(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
1109 {
1110 return detail::rvvmsle(lhs, rhs);
1111 }
1112
1113
1114 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1115 XSIMD_INLINE batch_bool<T, A> gt(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
1116 {
1117 return detail::rvvmsgt(lhs, rhs);
1118 }
1119
1120
1121 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1122 XSIMD_INLINE batch_bool<T, A> ge(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
1123 {
1124 return detail::rvvmsge(lhs, rhs);
1125 }
1126
1127
1128
1129
1130 namespace detail
1131 {
1132 XSIMD_RVV_OVERLOAD(rvvcompress, (__riscv_vcompress_tu), , vec(vec, vec, bvec))
1133 }
1134
1135 template <class A, class T>
1136 XSIMD_INLINE batch<T, A> compress(batch<T, A> const& x, batch_bool<T, A> const& mask, requires_arch<rvv>) noexcept
1137 {
1138 auto zero = broadcast<A>(T(0), rvv {});
1139 return detail::rvvcompress(zero, x, mask);
1140 }
1141
1142
1143
1144
1145 namespace detail
1146 {
1147 XSIMD_RVV_OVERLOAD(rvvrgather, (__riscv_vrgather), , vec(vec, uvec))
1148 XSIMD_RVV_OVERLOAD(rvvslideup, (__riscv_vslideup), , vec(vec, vec, size_t))
1149 XSIMD_RVV_OVERLOAD(rvvslidedown, (__riscv_vslidedown), , vec(vec, size_t))
1150 }
1151
1152
1153 template <class A, class T, class I, I... idx>
1154 XSIMD_INLINE batch<T, A> swizzle(batch<T, A> const& arg, batch_constant<I, A, idx...>, requires_arch<rvv>) noexcept
1155 {
1156 static_assert(batch<T, A>::size == sizeof...(idx), "invalid swizzle indices");
1157 const batch<I, A> indices { idx... };
1158 return detail::rvvrgather(arg, indices);
1159 }
1160
1161 template <class A, class T, class I, I... idx>
1162 XSIMD_INLINE batch<std::complex<T>, A> swizzle(batch<std::complex<T>, A> const& self,
1163 batch_constant<I, A, idx...>,
1164 requires_arch<rvv>) noexcept
1165 {
1166 const auto real = swizzle(self.real(), batch_constant<I, A, idx...> {}, rvv {});
1167 const auto imag = swizzle(self.imag(), batch_constant<I, A, idx...> {}, rvv {});
1168 return batch<std::complex<T>>(real, imag);
1169 }
1170
1171
1172
1173
1174
1175
1176
1177 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1178 XSIMD_INLINE batch<T, A> extract_pair(batch<T, A> const& lhs, batch<T, A> const& rhs, size_t n, requires_arch<rvv>) noexcept
1179 {
1180 const auto tmp = detail::rvvslidedown(rhs, n);
1181 return detail::rvvslideup(tmp, lhs, lhs.size - n);
1182 }
1183
1184
1185 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1186 XSIMD_INLINE batch<T, A> select(batch_bool<T, A> const& cond, batch<T, A> const& a, batch<T, A> const& b, requires_arch<rvv>) noexcept
1187 {
1188 return detail::rvvmerge(b, a, cond);
1189 }
1190
1191 template <class A, class T, bool... b>
1192 XSIMD_INLINE batch<T, A> select(batch_bool_constant<T, A, b...> const&, batch<T, A> const& true_br, batch<T, A> const& false_br, requires_arch<rvv>) noexcept
1193 {
1194 return select(batch_bool<T, A> { b... }, true_br, false_br, rvv {});
1195 }
1196
1197
1198 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1199 XSIMD_INLINE batch<T, A> zip_lo(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
1200 {
1201 const auto index = detail::vindex<A, as_unsigned_integer_t<T>, 0, -1>();
1202 const auto mask = detail::pmask8<T, A::width>(0xaa);
1203 return detail::rvvmerge(detail::rvvrgather(lhs, index),
1204 detail::rvvrgather(rhs, index),
1205 mask);
1206 }
1207
1208
1209 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1210 XSIMD_INLINE batch<T, A> zip_hi(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
1211 {
1212 const auto index = detail::vindex<A, as_unsigned_integer_t<T>, batch<T, A>::size / 2, -1>();
1213 const auto mask = detail::pmask8<T, A::width>(0xaa);
1214 return detail::rvvmerge(detail::rvvrgather(lhs, index),
1215 detail::rvvrgather(rhs, index),
1216 mask);
1217 }
1218
1219
1220 template <class A, class T, detail::rvv_enable_floating_point_t<T> = 0>
1221 XSIMD_INLINE void store_complex_aligned(std::complex<T>* dst, batch<std::complex<T>, A> const& src, requires_arch<rvv>) noexcept
1222 {
1223 const auto lo = zip_lo(src.real(), src.imag());
1224 const auto hi = zip_hi(src.real(), src.imag());
1225 T* buf = reinterpret_cast<T*>(dst);
1226 store_aligned(buf, lo, rvv {});
1227 store_aligned(buf + lo.size, hi, rvv {});
1228 }
1229
1230 template <class A, class T, detail::rvv_enable_floating_point_t<T> = 0>
1231 XSIMD_INLINE void store_complex_unaligned(std::complex<T>* dst, batch<std::complex<T>, A> const& src, requires_arch<rvv>) noexcept
1232 {
1233 store_complex_aligned(dst, src, rvv {});
1234 }
1235
1236
1237
1238
1239
1240 namespace detail
1241 {
1242 XSIMD_RVV_OVERLOAD_FLOATS(rvvfsqrt, (__riscv_vfsqrt), , vec(vec))
1243 XSIMD_RVV_OVERLOAD_FLOATS(rvvfrec7, (__riscv_vfrec7), , vec(vec))
1244 XSIMD_RVV_OVERLOAD_FLOATS(rvvfrsqrt7, (__riscv_vfrsqrt7), , vec(vec))
1245 }
1246
1247
1248 template <class A, class T, detail::rvv_enable_floating_point_t<T> = 0>
1249 XSIMD_INLINE batch<T, A> rsqrt(batch<T, A> const& arg, requires_arch<rvv>) noexcept
1250 {
1251 auto approx = detail::rvvfrsqrt7(arg);
1252 approx = approx * (1.5 - (0.5 * arg * approx * approx));
1253 return approx;
1254 }
1255
1256
1257 template <class A, class T, detail::rvv_enable_floating_point_t<T> = 0>
1258 XSIMD_INLINE batch<T, A> sqrt(batch<T, A> const& arg, requires_arch<rvv>) noexcept
1259 {
1260 return detail::rvvfsqrt(arg);
1261 }
1262
1263
1264 template <class A, class T, detail::rvv_enable_floating_point_t<T> = 0>
1265 XSIMD_INLINE batch<T, A> reciprocal(const batch<T, A>& arg, requires_arch<rvv>) noexcept
1266 {
1267 return detail::rvvfrec7(arg);
1268 }
1269
1270
1271
1272
1273
1274
1275 namespace detail
1276 {
1277 XSIMD_RVV_OVERLOAD2(rvvfcvt_rtz,
1278 (__riscv_vfcvt_rtz_x),
1279 (__riscv_vfcvt_rtz_xu), _DROP_1ST, vec(T, fvec))
1280 XSIMD_RVV_OVERLOAD2(rvvfcvt_rne,
1281 (__riscv_vfcvt_x),
1282 (__riscv_vfcvt_xu), _DROP_1ST_CUSTOM_ARGS, vec(T, fvec), args..., __RISCV_FRM_RNE)
1283 XSIMD_RVV_OVERLOAD2(rvvfcvt_rmm,
1284 (__riscv_vfcvt_x),
1285 (__riscv_vfcvt_xu), _DROP_1ST_CUSTOM_ARGS, vec(T, fvec), args..., __RISCV_FRM_RMM)
1286 XSIMD_RVV_OVERLOAD2(rvvfcvt,
1287 (__riscv_vfcvt_x),
1288 (__riscv_vfcvt_xu), _DROP_1ST, vec(T, fvec))
1289 XSIMD_RVV_OVERLOAD_INTS(rvvfcvt_f, (__riscv_vfcvt_f), , fvec(vec))
1290
1291 template <class T, class U>
1292 using rvv_enable_ftoi_t = typename std::enable_if<(sizeof(T) == sizeof(U) && std::is_floating_point<T>::value && !std::is_floating_point<U>::value), int>::type;
1293 template <class T, class U>
1294 using rvv_enable_itof_t = typename std::enable_if<(sizeof(T) == sizeof(U) && !std::is_floating_point<T>::value && std::is_floating_point<U>::value), int>::type;
1295
1296 template <class A, class T, class U, rvv_enable_ftoi_t<T, U> = 0>
1297 XSIMD_INLINE batch<U, A> fast_cast(batch<T, A> const& arg, batch<U, A> const&, requires_arch<rvv>) noexcept
1298 {
1299 return rvvfcvt_rtz(U {}, arg);
1300 }
1301 template <class A, class T, class U, rvv_enable_itof_t<T, U> = 0>
1302 XSIMD_INLINE batch<U, A> fast_cast(batch<T, A> const& arg, batch<U, A> const&, requires_arch<rvv>) noexcept
1303 {
1304 return rvvfcvt_f(arg);
1305 }
1306 }
1307
1308
1309
1310
1311
1312
1313 template <class A, class T, class... Args>
1314 XSIMD_INLINE batch<T, A> set(batch<T, A> const&, requires_arch<rvv>, Args... args) noexcept
1315 {
1316 const std::array<T, batch<T, A>::size> tmp { args... };
1317 return load_unaligned<A>(tmp.data(), convert<T>(), rvv {});
1318 }
1319
1320 template <class A, class T, class... Args>
1321 XSIMD_INLINE batch<std::complex<T>, A> set(batch<std::complex<T>, A> const&, requires_arch<rvv>,
1322 Args... args_complex) noexcept
1323 {
1324 return batch<std::complex<T>>(set(batch<T, rvv> {}, rvv {}, args_complex.real()...),
1325 set(batch<T, rvv> {}, rvv {}, args_complex.imag()...));
1326 }
1327
1328 template <class A, class T, class... Args>
1329 XSIMD_INLINE batch_bool<T, A> set(batch_bool<T, A> const&, requires_arch<rvv>, Args... args) noexcept
1330 {
1331 using U = as_unsigned_integer_t<T>;
1332 const auto values = set(batch<U, rvv> {}, rvv {}, static_cast<U>(args)...);
1333 const auto zero = broadcast<A>(U(0), rvv {});
1334 detail::rvv_bool_t<T> result = detail::rvvmsne(values, zero);
1335 return result;
1336 }
1337
1338
1339 template <class A, class T, size_t I, detail::rvv_enable_all_t<T> = 0>
1340 XSIMD_INLINE batch<T, A> insert(batch<T, A> const& arg, T val, index<I>, requires_arch<rvv>) noexcept
1341 {
1342 const auto mask = detail::pmask<T, A::width>(uint64_t(1) << I);
1343 return detail::rvvmerge_splat(arg, val, mask);
1344 }
1345
1346
1347 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1348 XSIMD_INLINE T get(batch<T, A> const& arg, size_t i, requires_arch<rvv>) noexcept
1349 {
1350 const auto tmp = detail::rvvslidedown(arg, i);
1351 return detail::rvvmv_lane0(tmp);
1352 }
1353
1354 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1355 XSIMD_INLINE std::complex<T> get(batch<std::complex<T>, A> const& arg, size_t i, requires_arch<rvv>) noexcept
1356 {
1357 const auto tmpr = detail::rvvslidedown(arg.real(), i);
1358 const auto tmpi = detail::rvvslidedown(arg.imag(), i);
1359 return std::complex<T> { detail::rvvmv_lane0(tmpr), detail::rvvmv_lane0(tmpi) };
1360 }
1361
1362
1363 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1364 XSIMD_INLINE bool all(batch_bool<T, A> const& arg, requires_arch<rvv>) noexcept
1365 {
1366 return detail::rvvcpop(arg) == batch_bool<T, A>::size;
1367 }
1368
1369
1370 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1371 XSIMD_INLINE bool any(batch_bool<T, A> const& arg, requires_arch<rvv>) noexcept
1372 {
1373 return detail::rvvcpop(arg) > 0;
1374 }
1375
1376
1377 template <class A, class T, class R, detail::rvv_enable_all_t<T> = 0, detail::rvv_enable_all_t<R> = 0>
1378 XSIMD_INLINE batch<R, A> bitwise_cast(batch<T, A> const& arg, batch<R, A> const&, requires_arch<rvv>) noexcept
1379 {
1380 return detail::rvv_reg_t<R, A::width>(arg.data.get_bytes(), types::detail::XSIMD_RVV_BITCAST);
1381 }
1382
1383
1384 template <class A, class T_out, class T_in, detail::rvv_enable_all_t<T_in> = 0>
1385 XSIMD_INLINE batch_bool<T_out, A> batch_bool_cast(batch_bool<T_in, A> const& arg, batch_bool<T_out, A> const&, requires_arch<rvv>) noexcept
1386 {
1387 using intermediate_t = typename detail::rvv_bool_t<T_out>;
1388 return intermediate_t(arg.data);
1389 }
1390
1391
1392 template <class A, class T, detail::rvv_enable_all_t<T> = 0>
1393 XSIMD_INLINE batch<T, A> from_bool(batch_bool<T, A> const& arg, requires_arch<rvv>) noexcept
1394 {
1395 const auto zero = broadcast<A>(T(0), rvv {});
1396 return detail::rvvmerge_splat(zero, T(1), arg);
1397 }
1398
1399 namespace detail
1400 {
1401 template <size_t Width>
1402 XSIMD_INLINE vuint8m1_t rvvslidedownbytes(vuint8m1_t arg, size_t i)
1403 {
1404 return __riscv_vslidedown(arg, i, types::detail::rvv_width_m1 / 8);
1405 }
1406 template <>
1407 XSIMD_INLINE vuint8m1_t rvvslidedownbytes<types::detail::rvv_width_mf2>(vuint8m1_t arg, size_t i)
1408 {
1409 const auto bytes = __riscv_vlmul_trunc_u8mf2(arg);
1410 const auto result = __riscv_vslidedown(bytes, i, types::detail::rvv_width_mf2 / 8);
1411 return __riscv_vlmul_ext_u8m1(result);
1412 }
1413 template <>
1414 XSIMD_INLINE vuint8m1_t rvvslidedownbytes<types::detail::rvv_width_mf4>(vuint8m1_t arg, size_t i)
1415 {
1416 const auto bytes = __riscv_vlmul_trunc_u8mf4(arg);
1417 const auto result = __riscv_vslidedown(bytes, i, types::detail::rvv_width_mf4 / 8);
1418 return __riscv_vlmul_ext_u8m1(result);
1419 }
1420 template <>
1421 XSIMD_INLINE vuint8m1_t rvvslidedownbytes<types::detail::rvv_width_mf8>(vuint8m1_t arg, size_t i)
1422 {
1423 const auto bytes = __riscv_vlmul_trunc_u8mf8(arg);
1424 const auto result = __riscv_vslidedown(bytes, i, types::detail::rvv_width_mf8 / 8);
1425 return __riscv_vlmul_ext_u8m1(result);
1426 }
1427 }
1428
1429
1430 template <size_t N, class A, class T, detail::rvv_enable_all_t<T> = 0>
1431 XSIMD_INLINE batch<T, A> slide_left(batch<T, A> const& arg, requires_arch<rvv>) noexcept
1432 {
1433 const auto zero = broadcast<A>(uint8_t(0), rvv {});
1434 const auto bytes = arg.data.get_bytes();
1435 return detail::rvvreinterpret<T>(detail::rvvslideup(zero, bytes, N));
1436 }
1437
1438
1439 template <size_t N, class A, class T, detail::rvv_enable_all_t<T> = 0>
1440 XSIMD_INLINE batch<T, A> slide_right(batch<T, A> const& arg, requires_arch<rvv>) noexcept
1441 {
1442 using reg_t = detail::rvv_reg_t<T, A::width>;
1443 const auto bytes = arg.data.get_bytes();
1444 return reg_t(detail::rvvslidedownbytes<A::width>(bytes, N), types::detail::XSIMD_RVV_BITCAST);
1445 }
1446
1447
1448 template <class A, class T, detail::rvv_enable_floating_point_t<T> = 0>
1449 XSIMD_INLINE batch_bool<T, A> isnan(batch<T, A> const& arg, requires_arch<rvv>) noexcept
1450 {
1451 return !(arg == arg);
1452 }
1453
1454 namespace detail
1455 {
1456 template <class T>
1457 using rvv_as_signed_integer_t = as_signed_integer_t<as_unsigned_integer_t<T>>;
1458
1459 template <class A, class T, class U = rvv_as_signed_integer_t<T>>
1460 XSIMD_INLINE batch<U, A> rvvfcvt_default(batch<T, A> const& arg) noexcept
1461 {
1462 return rvvfcvt_rne(U {}, arg);
1463 }
1464
1465 template <class A, class T, class U = rvv_as_signed_integer_t<T>>
1466 XSIMD_INLINE batch<U, A> rvvfcvt_afz(batch<T, A> const& arg) noexcept
1467 {
1468 return rvvfcvt_rmm(U {}, arg);
1469 }
1470 }
1471
1472
1473 template <class A, class T, class U = detail::rvv_as_signed_integer_t<T>>
1474 XSIMD_INLINE batch<U, A> nearbyint_as_int(batch<T, A> const& arg, requires_arch<rvv>) noexcept
1475 {
1476
1477 return detail::rvvfcvt_default(arg);
1478 }
1479
1480
1481 template <class A, class T, detail::rvv_enable_floating_point_t<T> = 0>
1482 XSIMD_INLINE batch<T, A> round(batch<T, A> const& arg, requires_arch<rvv>) noexcept
1483 {
1484
1485 const auto mask = abs(arg) < constants::maxflint<batch<T, A>>();
1486 return select(mask, to_float(detail::rvvfcvt_afz(arg)), arg, rvv {});
1487 }
1488
1489
1490 template <class A, class T, detail::rvv_enable_floating_point_t<T> = 0>
1491 XSIMD_INLINE batch<T, A> nearbyint(batch<T, A> const& arg, requires_arch<rvv>) noexcept
1492 {
1493
1494 const auto mask = abs(arg) < constants::maxflint<batch<T, A>>();
1495 return select(mask, to_float(detail::rvvfcvt_default(arg)), arg, rvv {});
1496 }
1497 }
1498 }
1499
1500 #endif