File indexing completed on 2025-08-28 09:11:37
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #ifndef XSIMD_WASM_HPP
0014 #define XSIMD_WASM_HPP
0015
0016 #include <type_traits>
0017
0018 #include "../types/xsimd_wasm_register.hpp"
0019
0020 namespace xsimd
0021 {
0022 template <typename T, class A, bool... Values>
0023 struct batch_bool_constant;
0024
0025 template <class T_out, class T_in, class A>
0026 XSIMD_INLINE batch<T_out, A> bitwise_cast(batch<T_in, A> const& x) noexcept;
0027
0028 template <typename T, class A, T... Values>
0029 struct batch_constant;
0030
0031 namespace kernel
0032 {
0033 using namespace types;
0034
0035
0036 template <class A, class T, size_t I>
0037 XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<generic>) noexcept;
0038 template <class A, typename T, typename ITy, ITy... Indices>
0039 XSIMD_INLINE batch<T, A> shuffle(batch<T, A> const& x, batch<T, A> const& y, batch_constant<ITy, A, Indices...>, requires_arch<generic>) noexcept;
0040 template <class A, class T>
0041 XSIMD_INLINE batch<T, A> avg(batch<T, A> const&, batch<T, A> const&, requires_arch<generic>) noexcept;
0042 template <class A, class T>
0043 XSIMD_INLINE void transpose(batch<T, A>* matrix_begin, batch<T, A>* matrix_end, requires_arch<generic>) noexcept;
0044
0045
0046 template <class A, class T, typename std::enable_if<std::is_integral<T>::value && std::is_signed<T>::value, void>::type>
0047 XSIMD_INLINE batch<T, A> abs(batch<T, A> const& self, requires_arch<wasm>) noexcept
0048 {
0049 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
0050 {
0051 return wasm_i8x16_abs(self);
0052 }
0053 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
0054 {
0055 return wasm_i16x8_abs(self);
0056 }
0057 else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
0058 {
0059 return wasm_i32x4_abs(self);
0060 }
0061 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
0062 {
0063 return wasm_i64x2_abs(self);
0064 }
0065 else
0066 {
0067 assert(false && "unsupported arch/op combination");
0068 return {};
0069 }
0070 }
0071
0072 template <class A>
0073 XSIMD_INLINE batch<float, A> abs(batch<float, A> const& self, requires_arch<wasm>) noexcept
0074 {
0075 return wasm_f32x4_abs(self);
0076 }
0077
0078 template <class A>
0079 XSIMD_INLINE batch<double, A> abs(batch<double, A> const& self, requires_arch<wasm>) noexcept
0080 {
0081 return wasm_f64x2_abs(self);
0082 }
0083
0084
0085 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0086 XSIMD_INLINE batch<T, A> add(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
0087 {
0088 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
0089 {
0090 return wasm_i8x16_add(self, other);
0091 }
0092 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
0093 {
0094 return wasm_i16x8_add(self, other);
0095 }
0096 else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
0097 {
0098 return wasm_i32x4_add(self, other);
0099 }
0100 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
0101 {
0102 return wasm_i64x2_add(self, other);
0103 }
0104 else
0105 {
0106 assert(false && "unsupported arch/op combination");
0107 return {};
0108 }
0109 }
0110
0111 template <class A>
0112 XSIMD_INLINE batch<float, A> add(batch<float, A> const& self, batch<float, A> const& other, requires_arch<wasm>) noexcept
0113 {
0114 return wasm_f32x4_add(self, other);
0115 }
0116
0117 template <class A>
0118 XSIMD_INLINE batch<double, A> add(batch<double, A> const& self, batch<double, A> const& other, requires_arch<wasm>) noexcept
0119 {
0120 return wasm_f64x2_add(self, other);
0121 }
0122
0123
0124 template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
0125 XSIMD_INLINE batch<T, A> avgr(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
0126 {
0127 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
0128 {
0129 return wasm_u8x16_avgr(self, other);
0130 }
0131 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
0132 {
0133 return wasm_u16x8_avgr(self, other);
0134 }
0135 else
0136 {
0137 return avgr(self, other, generic {});
0138 }
0139 }
0140
0141
0142 template <class A, class T, class = typename std::enable_if<std::is_unsigned<T>::value, void>::type>
0143 XSIMD_INLINE batch<T, A> avg(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
0144 {
0145 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
0146 {
0147 auto adj = ((self ^ other) << 7) >> 7;
0148 return avgr(self, other, A {}) - adj;
0149 }
0150 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
0151 {
0152 auto adj = ((self ^ other) << 15) >> 15;
0153 return avgr(self, other, A {}) - adj;
0154 }
0155 else
0156 {
0157 return avg(self, other, generic {});
0158 }
0159 }
0160
0161
0162 template <class A>
0163 XSIMD_INLINE bool all(batch_bool<float, A> const& self, requires_arch<wasm>) noexcept
0164 {
0165 return wasm_i32x4_bitmask(self) == 0x0F;
0166 }
0167 template <class A>
0168 XSIMD_INLINE bool all(batch_bool<double, A> const& self, requires_arch<wasm>) noexcept
0169 {
0170 return wasm_i64x2_bitmask(self) == 0x03;
0171 }
0172 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0173 XSIMD_INLINE bool all(batch_bool<T, A> const& self, requires_arch<wasm>) noexcept
0174 {
0175 return wasm_i8x16_bitmask(self) == 0xFFFF;
0176 }
0177
0178
0179 template <class A>
0180 XSIMD_INLINE bool any(batch_bool<float, A> const& self, requires_arch<wasm>) noexcept
0181 {
0182 return wasm_i32x4_bitmask(self) != 0;
0183 }
0184 template <class A>
0185 XSIMD_INLINE bool any(batch_bool<double, A> const& self, requires_arch<wasm>) noexcept
0186 {
0187 return wasm_i64x2_bitmask(self) != 0;
0188 }
0189 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0190 XSIMD_INLINE bool any(batch_bool<T, A> const& self, requires_arch<wasm>) noexcept
0191 {
0192 return wasm_i8x16_bitmask(self) != 0;
0193 }
0194
0195
0196 template <class A, class T_out, class T_in>
0197 XSIMD_INLINE batch_bool<T_out, A> batch_bool_cast(batch_bool<T_in, A> const& self, batch_bool<T_out, A> const&, requires_arch<wasm>) noexcept
0198 {
0199 return { bitwise_cast<T_out>(batch<T_in, A>(self.data)).data };
0200 }
0201
0202
0203 template <class A, class T>
0204 XSIMD_INLINE batch<T, A> bitwise_and(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
0205 {
0206 return wasm_v128_and(self, other);
0207 }
0208
0209 template <class A, class T>
0210 XSIMD_INLINE batch_bool<T, A> bitwise_and(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<wasm>) noexcept
0211 {
0212 return wasm_v128_and(self, other);
0213 }
0214
0215
0216 template <class A, class T>
0217 XSIMD_INLINE batch<T, A> bitwise_andnot(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
0218 {
0219 return wasm_v128_andnot(self, other);
0220 }
0221
0222 template <class A, class T>
0223 XSIMD_INLINE batch_bool<T, A> bitwise_andnot(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<wasm>) noexcept
0224 {
0225 return wasm_v128_andnot(self, other);
0226 }
0227
0228
0229 template <class A, class T, class Tp>
0230 XSIMD_INLINE batch<Tp, A> bitwise_cast(batch<T, A> const& self, batch<Tp, A> const&, requires_arch<wasm>) noexcept
0231 {
0232 return batch<Tp, A>(self.data);
0233 }
0234
0235
0236 template <class A, class T>
0237 XSIMD_INLINE batch<T, A> bitwise_or(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
0238 {
0239 return wasm_v128_or(self, other);
0240 }
0241
0242 template <class A, class T>
0243 XSIMD_INLINE batch_bool<T, A> bitwise_or(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<wasm>) noexcept
0244 {
0245 return wasm_v128_or(self, other);
0246 }
0247
0248
0249 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0250 XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, int32_t other, requires_arch<wasm>) noexcept
0251 {
0252 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
0253 {
0254 return wasm_i8x16_shl(self, other);
0255 }
0256 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
0257 {
0258 return wasm_i16x8_shl(self, other);
0259 }
0260 else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
0261 {
0262 return wasm_i32x4_shl(self, other);
0263 }
0264 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
0265 {
0266 return wasm_i64x2_shl(self, other);
0267 }
0268 else
0269 {
0270 assert(false && "unsupported arch/op combination");
0271 return {};
0272 }
0273 }
0274
0275
0276 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0277 XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, int32_t other, requires_arch<wasm>) noexcept
0278 {
0279 if (std::is_signed<T>::value)
0280 {
0281 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
0282 {
0283 return wasm_i8x16_shr(self, other);
0284 }
0285 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
0286 {
0287 return wasm_i16x8_shr(self, other);
0288 }
0289 else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
0290 {
0291 return wasm_i32x4_shr(self, other);
0292 }
0293 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
0294 {
0295 return wasm_i64x2_shr(self, other);
0296 }
0297 else
0298 {
0299 assert(false && "unsupported arch/op combination");
0300 return {};
0301 }
0302 }
0303 else
0304 {
0305 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
0306 {
0307 return wasm_u8x16_shr(self, other);
0308 }
0309 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
0310 {
0311 return wasm_u16x8_shr(self, other);
0312 }
0313 else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
0314 {
0315 return wasm_u32x4_shr(self, other);
0316 }
0317 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
0318 {
0319 return wasm_u64x2_shr(self, other);
0320 }
0321 else
0322 {
0323 assert(false && "unsupported arch/op combination");
0324 return {};
0325 }
0326 }
0327 }
0328
0329
0330 template <class A, class T>
0331 XSIMD_INLINE batch<T, A> bitwise_not(batch<T, A> const& self, requires_arch<wasm>) noexcept
0332 {
0333 return wasm_v128_not(self);
0334 }
0335
0336 template <class A, class T>
0337 XSIMD_INLINE batch_bool<T, A> bitwise_not(batch_bool<T, A> const& self, requires_arch<wasm>) noexcept
0338 {
0339 return wasm_v128_not(self);
0340 }
0341
0342
0343 template <class A, class T>
0344 XSIMD_INLINE batch<T, A> bitwise_xor(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
0345 {
0346 return wasm_v128_xor(self, other);
0347 }
0348
0349 template <class A, class T>
0350 XSIMD_INLINE batch_bool<T, A> bitwise_xor(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<wasm>) noexcept
0351 {
0352 return wasm_v128_xor(self, other);
0353 }
0354
0355
0356 template <class A>
0357 batch<float, A> XSIMD_INLINE broadcast(float val, requires_arch<wasm>) noexcept
0358 {
0359 return wasm_f32x4_splat(val);
0360 }
0361 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0362 XSIMD_INLINE batch<T, A> broadcast(T val, requires_arch<wasm>) noexcept
0363 {
0364 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
0365 {
0366 return wasm_i8x16_splat(val);
0367 }
0368 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
0369 {
0370 return wasm_i16x8_splat(val);
0371 }
0372 else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
0373 {
0374 return wasm_i32x4_splat(val);
0375 }
0376 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
0377 {
0378 return wasm_i64x2_splat(val);
0379 }
0380 else
0381 {
0382 assert(false && "unsupported arch/op combination");
0383 return {};
0384 }
0385 }
0386 template <class A>
0387 XSIMD_INLINE batch<double, A> broadcast(double val, requires_arch<wasm>) noexcept
0388 {
0389 return wasm_f64x2_splat(val);
0390 }
0391
0392
0393 template <class A>
0394 XSIMD_INLINE batch<float, A> ceil(batch<float, A> const& self, requires_arch<wasm>) noexcept
0395 {
0396 return wasm_f32x4_ceil(self);
0397 }
0398 template <class A>
0399 XSIMD_INLINE batch<double, A> ceil(batch<double, A> const& self, requires_arch<wasm>) noexcept
0400 {
0401 return wasm_f64x2_ceil(self);
0402 }
0403
0404
0405 template <class A>
0406 XSIMD_INLINE batch<float, A> div(batch<float, A> const& self, batch<float, A> const& other, requires_arch<wasm>) noexcept
0407 {
0408 return wasm_f32x4_div(self, other);
0409 }
0410 template <class A>
0411 XSIMD_INLINE batch<double, A> div(batch<double, A> const& self, batch<double, A> const& other, requires_arch<wasm>) noexcept
0412 {
0413 return wasm_f64x2_div(self, other);
0414 }
0415
0416
0417 template <class A>
0418 XSIMD_INLINE batch_bool<float, A> eq(batch<float, A> const& self, batch<float, A> const& other, requires_arch<wasm>) noexcept
0419 {
0420 return wasm_f32x4_eq(self, other);
0421 }
0422 template <class A>
0423 XSIMD_INLINE batch_bool<float, A> eq(batch_bool<float, A> const& self, batch_bool<float, A> const& other, requires_arch<wasm>) noexcept
0424 {
0425 return wasm_i32x4_eq(self, other);
0426 }
0427 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0428 XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
0429 {
0430 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
0431 {
0432 return wasm_i8x16_eq(self, other);
0433 }
0434 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
0435 {
0436 return wasm_i16x8_eq(self, other);
0437 }
0438 else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
0439 {
0440 return wasm_i32x4_eq(self, other);
0441 }
0442 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
0443 {
0444 return wasm_i64x2_eq(self, other);
0445 }
0446 else
0447 {
0448 assert(false && "unsupported arch/op combination");
0449 return {};
0450 }
0451 }
0452 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0453 XSIMD_INLINE batch_bool<T, A> eq(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<wasm>) noexcept
0454 {
0455 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
0456 {
0457 return wasm_i8x16_eq(self, other);
0458 }
0459 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
0460 {
0461 return wasm_i16x8_eq(self, other);
0462 }
0463 else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
0464 {
0465 return wasm_i32x4_eq(self, other);
0466 }
0467 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
0468 {
0469 return wasm_i64x2_eq(self, other);
0470 }
0471 else
0472 {
0473 assert(false && "unsupported arch/op combination");
0474 return {};
0475 }
0476 }
0477 template <class A>
0478 XSIMD_INLINE batch_bool<double, A> eq(batch<double, A> const& self, batch<double, A> const& other, requires_arch<wasm>) noexcept
0479 {
0480 return wasm_f64x2_eq(self, other);
0481 }
0482 template <class A>
0483 XSIMD_INLINE batch_bool<double, A> eq(batch_bool<double, A> const& self, batch_bool<double, A> const& other, requires_arch<wasm>) noexcept
0484 {
0485 return wasm_i64x2_eq(self, other);
0486 }
0487
0488
0489 namespace detail
0490 {
0491 template <class A>
0492 XSIMD_INLINE batch<float, A> fast_cast(batch<int32_t, A> const& self, batch<float, A> const&, requires_arch<wasm>) noexcept
0493 {
0494 return wasm_f32x4_convert_i32x4(self);
0495 }
0496
0497 template <class A>
0498 XSIMD_INLINE batch<double, A> fast_cast(batch<uint64_t, A> const& x, batch<double, A> const&, requires_arch<wasm>) noexcept
0499 {
0500
0501
0502 v128_t xH = wasm_u64x2_shr(x, 32);
0503 xH = wasm_v128_or(xH, wasm_f64x2_splat(19342813113834066795298816.));
0504 v128_t mask = wasm_i16x8_make(0xFFFF, 0xFFFF, 0x0000, 0x0000, 0xFFFF, 0xFFFF, 0x0000, 0x0000);
0505 v128_t xL = wasm_v128_or(wasm_v128_and(mask, x), wasm_v128_andnot(wasm_f64x2_splat(0x0010000000000000), mask));
0506 v128_t f = wasm_f64x2_sub(xH, wasm_f64x2_splat(19342813118337666422669312.));
0507 return wasm_f64x2_add(f, xL);
0508 }
0509
0510 template <class A>
0511 XSIMD_INLINE batch<double, A> fast_cast(batch<int64_t, A> const& x, batch<double, A> const&, requires_arch<wasm>) noexcept
0512 {
0513
0514
0515 v128_t xH = wasm_i32x4_shr(x, 16);
0516 xH = wasm_v128_and(xH, wasm_i16x8_make(0x0000, 0x0000, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0xFFFF, 0xFFFF));
0517 xH = wasm_i64x2_add(xH, wasm_f64x2_splat(442721857769029238784.));
0518 v128_t mask = wasm_i16x8_make(0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000);
0519 v128_t xL = wasm_v128_or(wasm_v128_and(mask, x), wasm_v128_andnot(wasm_f64x2_splat(0x0010000000000000), mask));
0520 v128_t f = wasm_f64x2_sub(xH, wasm_f64x2_splat(442726361368656609280.));
0521 return wasm_f64x2_add(f, xL);
0522 }
0523
0524 template <class A>
0525 XSIMD_INLINE batch<int32_t, A> fast_cast(batch<float, A> const& self, batch<int32_t, A> const&, requires_arch<wasm>) noexcept
0526 {
0527 return wasm_i32x4_make(
0528 static_cast<int32_t>(wasm_f32x4_extract_lane(self, 0)),
0529 static_cast<int32_t>(wasm_f32x4_extract_lane(self, 1)),
0530 static_cast<int32_t>(wasm_f32x4_extract_lane(self, 2)),
0531 static_cast<int32_t>(wasm_f32x4_extract_lane(self, 3)));
0532 }
0533 }
0534
0535
0536 template <class A>
0537 XSIMD_INLINE batch<float, A> floor(batch<float, A> const& self, requires_arch<wasm>) noexcept
0538 {
0539 return wasm_f32x4_floor(self);
0540 }
0541
0542 template <class A>
0543 XSIMD_INLINE batch<double, A> floor(batch<double, A> const& self, requires_arch<wasm>) noexcept
0544 {
0545 return wasm_f64x2_floor(self);
0546 }
0547
0548
0549 template <class A>
0550 XSIMD_INLINE batch_bool<float, A> from_mask(batch_bool<float, A> const&, uint64_t mask, requires_arch<wasm>) noexcept
0551 {
0552 alignas(A::alignment()) static const uint32_t lut[][4] = {
0553 { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
0554 { 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000 },
0555 { 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000 },
0556 { 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000 },
0557 { 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000 },
0558 { 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000 },
0559 { 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 },
0560 { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 },
0561 { 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF },
0562 { 0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF },
0563 { 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF },
0564 { 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF },
0565 { 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF },
0566 { 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF },
0567 { 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
0568 { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
0569 };
0570 assert(!(mask & ~0xFul) && "inbound mask");
0571 return wasm_v128_load((const v128_t*)lut[mask]);
0572 }
0573 template <class A>
0574 XSIMD_INLINE batch_bool<double, A> from_mask(batch_bool<double, A> const&, uint64_t mask, requires_arch<wasm>) noexcept
0575 {
0576 alignas(A::alignment()) static const uint64_t lut[][4] = {
0577 { 0x0000000000000000ul, 0x0000000000000000ul },
0578 { 0xFFFFFFFFFFFFFFFFul, 0x0000000000000000ul },
0579 { 0x0000000000000000ul, 0xFFFFFFFFFFFFFFFFul },
0580 { 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul },
0581 };
0582 assert(!(mask & ~0x3ul) && "inbound mask");
0583 return wasm_v128_load((const v128_t*)lut[mask]);
0584 }
0585 template <class T, class A, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0586 XSIMD_INLINE batch_bool<T, A> from_mask(batch_bool<T, A> const&, uint64_t mask, requires_arch<wasm>) noexcept
0587 {
0588 alignas(A::alignment()) static const uint64_t lut64[] = {
0589 0x0000000000000000,
0590 0x000000000000FFFF,
0591 0x00000000FFFF0000,
0592 0x00000000FFFFFFFF,
0593 0x0000FFFF00000000,
0594 0x0000FFFF0000FFFF,
0595 0x0000FFFFFFFF0000,
0596 0x0000FFFFFFFFFFFF,
0597 0xFFFF000000000000,
0598 0xFFFF00000000FFFF,
0599 0xFFFF0000FFFF0000,
0600 0xFFFF0000FFFFFFFF,
0601 0xFFFFFFFF00000000,
0602 0xFFFFFFFF0000FFFF,
0603 0xFFFFFFFFFFFF0000,
0604 0xFFFFFFFFFFFFFFFF,
0605 };
0606 alignas(A::alignment()) static const uint32_t lut32[] = {
0607 0x00000000,
0608 0x000000FF,
0609 0x0000FF00,
0610 0x0000FFFF,
0611 0x00FF0000,
0612 0x00FF00FF,
0613 0x00FFFF00,
0614 0x00FFFFFF,
0615 0xFF000000,
0616 0xFF0000FF,
0617 0xFF00FF00,
0618 0xFF00FFFF,
0619 0xFFFF0000,
0620 0xFFFF00FF,
0621 0xFFFFFF00,
0622 0xFFFFFFFF,
0623 };
0624 alignas(A::alignment()) static const uint32_t lut16[][4] = {
0625 { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
0626 { 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000 },
0627 { 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000 },
0628 { 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000 },
0629 { 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000 },
0630 { 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000 },
0631 { 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 },
0632 { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 },
0633 { 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF },
0634 { 0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF },
0635 { 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF },
0636 { 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF },
0637 { 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF },
0638 { 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF },
0639 { 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
0640 { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
0641 };
0642 alignas(A::alignment()) static const uint64_t lut8[][4] = {
0643 { 0x0000000000000000ul, 0x0000000000000000ul },
0644 { 0xFFFFFFFFFFFFFFFFul, 0x0000000000000000ul },
0645 { 0x0000000000000000ul, 0xFFFFFFFFFFFFFFFFul },
0646 { 0xFFFFFFFFFFFFFFFFul, 0xFFFFFFFFFFFFFFFFul },
0647 };
0648 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
0649 {
0650 assert(!(mask & ~0xFFFF) && "inbound mask");
0651 return wasm_i32x4_make(lut32[mask & 0xF], lut32[(mask >> 4) & 0xF], lut32[(mask >> 8) & 0xF], lut32[mask >> 12]);
0652 }
0653 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
0654 {
0655 assert(!(mask & ~0xFF) && "inbound mask");
0656 return wasm_i64x2_make(lut64[mask & 0xF], lut64[mask >> 4]);
0657 }
0658 else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
0659 {
0660 assert(!(mask & ~0xFul) && "inbound mask");
0661 return wasm_v128_load((const v128_t*)lut16[mask]);
0662 }
0663 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
0664 {
0665 assert(!(mask & ~0x3ul) && "inbound mask");
0666 return wasm_v128_load((const v128_t*)lut8[mask]);
0667 }
0668 }
0669
0670
0671 template <class A>
0672 XSIMD_INLINE batch_bool<float, A> ge(batch<float, A> const& self, batch<float, A> const& other, requires_arch<wasm>) noexcept
0673 {
0674 return wasm_f32x4_ge(self, other);
0675 }
0676 template <class A>
0677 XSIMD_INLINE batch_bool<double, A> ge(batch<double, A> const& self, batch<double, A> const& other, requires_arch<wasm>) noexcept
0678 {
0679 return wasm_f64x2_ge(self, other);
0680 }
0681
0682
0683 template <class A>
0684 XSIMD_INLINE batch_bool<float, A> gt(batch<float, A> const& self, batch<float, A> const& other, requires_arch<wasm>) noexcept
0685 {
0686 return wasm_f32x4_gt(self, other);
0687 }
0688 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0689 XSIMD_INLINE batch_bool<T, A> gt(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
0690 {
0691 if (std::is_signed<T>::value)
0692 {
0693 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
0694 {
0695 return wasm_i8x16_gt(self, other);
0696 }
0697 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
0698 {
0699 return wasm_i16x8_gt(self, other);
0700 }
0701 else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
0702 {
0703 return wasm_i32x4_gt(self, other);
0704 }
0705 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
0706 {
0707 return wasm_i64x2_gt(self, other);
0708 }
0709 else
0710 {
0711 assert(false && "unsupported arch/op combination");
0712 return {};
0713 }
0714 }
0715 else
0716 {
0717 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
0718 {
0719 return wasm_u8x16_gt(self, other);
0720 }
0721 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
0722 {
0723 return wasm_u16x8_gt(self, other);
0724 }
0725 else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
0726 {
0727 return wasm_u32x4_gt(self, other);
0728 }
0729 else
0730 {
0731 return gt(self, other, generic {});
0732 }
0733 }
0734 }
0735
0736 template <class A>
0737 XSIMD_INLINE batch_bool<double, A> gt(batch<double, A> const& self, batch<double, A> const& other, requires_arch<wasm>) noexcept
0738 {
0739 return wasm_f64x2_gt(self, other);
0740 }
0741
0742
0743 template <class A>
0744 XSIMD_INLINE batch<float, A> haddp(batch<float, A> const* row, requires_arch<wasm>) noexcept
0745 {
0746 v128_t tmp0 = wasm_i32x4_shuffle(row[0], row[1], 0, 4, 1, 5);
0747 v128_t tmp1 = wasm_i32x4_shuffle(row[0], row[1], 2, 6, 3, 7);
0748 v128_t tmp2 = wasm_i32x4_shuffle(row[2], row[3], 2, 6, 3, 7);
0749 tmp0 = wasm_f32x4_add(tmp0, tmp1);
0750 tmp1 = wasm_i32x4_shuffle(row[2], row[3], 0, 4, 1, 5);
0751 tmp1 = wasm_f32x4_add(tmp1, tmp2);
0752 tmp2 = wasm_i32x4_shuffle(tmp1, tmp0, 6, 7, 2, 3);
0753 tmp0 = wasm_i32x4_shuffle(tmp0, tmp1, 0, 1, 4, 5);
0754 return wasm_f32x4_add(tmp0, tmp2);
0755 }
0756 template <class A>
0757 XSIMD_INLINE batch<double, A> haddp(batch<double, A> const* row, requires_arch<wasm>) noexcept
0758 {
0759 return wasm_f64x2_add(wasm_i64x2_shuffle(row[0], row[1], 0, 2),
0760 wasm_i64x2_shuffle(row[0], row[1], 1, 3));
0761 }
0762
0763
0764 template <class A, size_t I>
0765 XSIMD_INLINE batch<float, A> insert(batch<float, A> const& self, float val, index<I> pos, requires_arch<wasm>) noexcept
0766 {
0767 return wasm_f32x4_replace_lane(self, pos, val);
0768 }
0769 template <class A, class T, size_t I, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0770 XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I> pos, requires_arch<wasm>) noexcept
0771 {
0772 if (std::is_signed<T>::value)
0773 {
0774 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
0775 {
0776 return wasm_i8x16_replace_lane(self, pos, val);
0777 }
0778 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
0779 {
0780 return wasm_i16x8_replace_lane(self, pos, val);
0781 }
0782 else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
0783 {
0784 return wasm_i32x4_replace_lane(self, pos, val);
0785 }
0786 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
0787 {
0788 return wasm_i64x2_replace_lane(self, pos, val);
0789 }
0790 else
0791 {
0792 assert(false && "unsupported arch/op combination");
0793 return {};
0794 }
0795 }
0796 else
0797 {
0798 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
0799 {
0800 return wasm_u8x16_replace_lane(self, pos, val);
0801 }
0802 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
0803 {
0804 return wasm_u16x8_replace_lane(self, pos, val);
0805 }
0806 else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
0807 {
0808 return wasm_u32x4_replace_lane(self, pos, val);
0809 }
0810 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
0811 {
0812 return wasm_u64x2_replace_lane(self, pos, val);
0813 }
0814 else
0815 {
0816 assert(false && "unsupported arch/op combination");
0817 return {};
0818 }
0819 }
0820 }
0821
0822 template <class A, size_t I>
0823 XSIMD_INLINE batch<double, A> insert(batch<double, A> const& self, double val, index<I> pos, requires_arch<wasm>) noexcept
0824 {
0825 return wasm_f64x2_replace_lane(self, pos, val);
0826 }
0827
0828
0829 template <class A>
0830 XSIMD_INLINE batch_bool<float, A> isnan(batch<float, A> const& self, requires_arch<wasm>) noexcept
0831 {
0832 return wasm_v128_or(wasm_f32x4_ne(self, self), wasm_f32x4_ne(self, self));
0833 }
0834 template <class A>
0835 XSIMD_INLINE batch_bool<double, A> isnan(batch<double, A> const& self, requires_arch<wasm>) noexcept
0836 {
0837 return wasm_v128_or(wasm_f64x2_ne(self, self), wasm_f64x2_ne(self, self));
0838 }
0839
0840
0841 template <class A>
0842 XSIMD_INLINE batch_bool<float, A> le(batch<float, A> const& self, batch<float, A> const& other, requires_arch<wasm>) noexcept
0843 {
0844 return wasm_f32x4_le(self, other);
0845 }
0846 template <class A>
0847 XSIMD_INLINE batch_bool<double, A> le(batch<double, A> const& self, batch<double, A> const& other, requires_arch<wasm>) noexcept
0848 {
0849 return wasm_f64x2_le(self, other);
0850 }
0851
0852
0853 template <class A>
0854 XSIMD_INLINE batch<float, A> load_aligned(float const* mem, convert<float>, requires_arch<wasm>) noexcept
0855 {
0856 return wasm_v128_load(mem);
0857 }
0858 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0859 XSIMD_INLINE batch<T, A> load_aligned(T const* mem, convert<T>, requires_arch<wasm>) noexcept
0860 {
0861 return wasm_v128_load((v128_t const*)mem);
0862 }
0863 template <class A>
0864 XSIMD_INLINE batch<double, A> load_aligned(double const* mem, convert<double>, requires_arch<wasm>) noexcept
0865 {
0866 return wasm_v128_load(mem);
0867 }
0868
0869
0870 namespace detail
0871 {
0872 template <class A>
0873 XSIMD_INLINE batch<std::complex<float>, A> load_complex(batch<float, A> const& hi, batch<float, A> const& lo, requires_arch<wasm>) noexcept
0874 {
0875 return { wasm_i32x4_shuffle(hi, lo, 0, 2, 4, 6), wasm_i32x4_shuffle(hi, lo, 1, 3, 5, 7) };
0876 }
0877 template <class A>
0878 XSIMD_INLINE batch<std::complex<double>, A> load_complex(batch<double, A> const& hi, batch<double, A> const& lo, requires_arch<wasm>) noexcept
0879 {
0880 return { wasm_i64x2_shuffle(hi, lo, 0, 2), wasm_i64x2_shuffle(hi, lo, 1, 3) };
0881 }
0882 }
0883
0884
0885 template <class A>
0886 XSIMD_INLINE batch<float, A> load_unaligned(float const* mem, convert<float>, requires_arch<wasm>) noexcept
0887 {
0888 return wasm_v128_load(mem);
0889 }
0890 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0891 XSIMD_INLINE batch<T, A> load_unaligned(T const* mem, convert<T>, requires_arch<wasm>) noexcept
0892 {
0893 return wasm_v128_load((v128_t const*)mem);
0894 }
0895 template <class A>
0896 XSIMD_INLINE batch<double, A> load_unaligned(double const* mem, convert<double>, requires_arch<wasm>) noexcept
0897 {
0898 return wasm_v128_load(mem);
0899 }
0900
0901
0902 template <class A>
0903 XSIMD_INLINE batch_bool<float, A> lt(batch<float, A> const& self, batch<float, A> const& other, requires_arch<wasm>) noexcept
0904 {
0905 return wasm_f32x4_lt(self, other);
0906 }
0907 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0908 XSIMD_INLINE batch_bool<T, A> lt(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
0909 {
0910 if (std::is_signed<T>::value)
0911 {
0912 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
0913 {
0914 return wasm_i8x16_lt(self, other);
0915 }
0916 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
0917 {
0918 return wasm_i16x8_lt(self, other);
0919 }
0920 else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
0921 {
0922 return wasm_i32x4_lt(self, other);
0923 }
0924 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
0925 {
0926 return wasm_i64x2_lt(self, other);
0927 }
0928 else
0929 {
0930 assert(false && "unsupported arch/op combination");
0931 return {};
0932 }
0933 }
0934 else
0935 {
0936 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
0937 {
0938 return wasm_u8x16_lt(self, other);
0939 }
0940 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
0941 {
0942 return wasm_u16x8_lt(self, other);
0943 }
0944 else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
0945 {
0946 return wasm_u32x4_lt(self, other);
0947 }
0948 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
0949 {
0950 auto xself = wasm_v128_xor(self, wasm_i64x2_splat(std::numeric_limits<int64_t>::lowest()));
0951 auto xother = wasm_v128_xor(other, wasm_i64x2_splat(std::numeric_limits<int64_t>::lowest()));
0952 v128_t tmp1 = wasm_i64x2_sub(xself, xother);
0953 v128_t tmp2 = wasm_v128_xor(xself, xother);
0954 v128_t tmp3 = wasm_v128_andnot(xself, xother);
0955 v128_t tmp4 = wasm_v128_andnot(tmp1, tmp2);
0956 v128_t tmp5 = wasm_v128_or(tmp3, tmp4);
0957 v128_t tmp6 = wasm_i32x4_shr(tmp5, 31);
0958 return wasm_i32x4_shuffle(tmp6, wasm_i32x4_splat(0), 1, 1, 3, 3);
0959 }
0960 else
0961 {
0962 assert(false && "unsupported arch/op combination");
0963 return {};
0964 }
0965 }
0966 }
0967
0968 template <class A>
0969 XSIMD_INLINE batch_bool<double, A> lt(batch<double, A> const& self, batch<double, A> const& other, requires_arch<wasm>) noexcept
0970 {
0971 return wasm_f64x2_lt(self, other);
0972 }
0973
0974
0975 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
0976 XSIMD_INLINE uint64_t mask(batch_bool<T, A> const& self, requires_arch<wasm>) noexcept
0977 {
0978 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
0979 {
0980 return wasm_i8x16_bitmask(self);
0981 }
0982 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
0983 {
0984 return wasm_i16x8_bitmask(self);
0985 }
0986 else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
0987 {
0988 return wasm_i32x4_bitmask(self);
0989 }
0990 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
0991 {
0992 return wasm_i64x2_bitmask(self);
0993 }
0994 else
0995 {
0996 assert(false && "unsupported arch/op combination");
0997 return {};
0998 }
0999 }
1000 template <class A>
1001 XSIMD_INLINE uint64_t mask(batch_bool<float, A> const& self, requires_arch<wasm>) noexcept
1002 {
1003 return wasm_i32x4_bitmask(self);
1004 }
1005
1006 template <class A>
1007 XSIMD_INLINE uint64_t mask(batch_bool<double, A> const& self, requires_arch<wasm>) noexcept
1008 {
1009 return wasm_i64x2_bitmask(self);
1010 }
1011
1012
1013 template <class A>
1014 XSIMD_INLINE batch<float, A> max(batch<float, A> const& self, batch<float, A> const& other, requires_arch<wasm>) noexcept
1015 {
1016 return wasm_f32x4_pmax(self, other);
1017 }
1018 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1019 XSIMD_INLINE batch<T, A> max(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
1020 {
1021 return select(self > other, self, other);
1022 }
1023 template <class A>
1024 XSIMD_INLINE batch<double, A> max(batch<double, A> const& self, batch<double, A> const& other, requires_arch<wasm>) noexcept
1025 {
1026 return wasm_f64x2_pmax(self, other);
1027 }
1028
1029
1030 template <class A>
1031 XSIMD_INLINE batch<float, A> min(batch<float, A> const& self, batch<float, A> const& other, requires_arch<wasm>) noexcept
1032 {
1033 return wasm_f32x4_pmin(self, other);
1034 }
1035 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1036 XSIMD_INLINE batch<T, A> min(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
1037 {
1038 return select(self <= other, self, other);
1039 }
1040 template <class A>
1041 XSIMD_INLINE batch<double, A> min(batch<double, A> const& self, batch<double, A> const& other, requires_arch<wasm>) noexcept
1042 {
1043 return wasm_f64x2_pmin(self, other);
1044 }
1045
1046
1047 template <class A>
1048 XSIMD_INLINE batch<float, A> mul(batch<float, A> const& self, batch<float, A> const& other, requires_arch<wasm>) noexcept
1049 {
1050 return wasm_f32x4_mul(self, other);
1051 }
1052 template <class A>
1053 XSIMD_INLINE batch<double, A> mul(batch<double, A> const& self, batch<double, A> const& other, requires_arch<wasm>) noexcept
1054 {
1055 return wasm_f64x2_mul(self, other);
1056 }
1057
1058
1059 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1060 XSIMD_INLINE batch<T, A> neg(batch<T, A> const& self, requires_arch<wasm>) noexcept
1061 {
1062 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1063 {
1064 return wasm_i8x16_neg(self);
1065 }
1066 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1067 {
1068 return wasm_i16x8_neg(self);
1069 }
1070 else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
1071 {
1072 return wasm_i32x4_neg(self);
1073 }
1074 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
1075 {
1076 return wasm_i64x2_neg(self);
1077 }
1078 else
1079 {
1080 assert(false && "unsupported arch/op combination");
1081 return {};
1082 }
1083 }
1084
1085 template <class A>
1086 XSIMD_INLINE batch<float, A> neg(batch<float, A> const& self, requires_arch<wasm>) noexcept
1087 {
1088 return wasm_f32x4_neg(self);
1089 }
1090
1091 template <class A>
1092 XSIMD_INLINE batch<double, A> neg(batch<double, A> const& self, requires_arch<wasm>) noexcept
1093 {
1094 return wasm_f64x2_neg(self);
1095 }
1096
1097
1098 template <class A>
1099 XSIMD_INLINE batch_bool<float, A> neq(batch<float, A> const& self, batch<float, A> const& other, requires_arch<wasm>) noexcept
1100 {
1101 return wasm_f32x4_ne(self, other);
1102 }
1103 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1104 XSIMD_INLINE batch_bool<T, A> neq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
1105 {
1106 return ~(self == other);
1107 }
1108 template <class A>
1109 XSIMD_INLINE batch_bool<float, A> neq(batch_bool<float, A> const& self, batch_bool<float, A> const& other, requires_arch<wasm>) noexcept
1110 {
1111 return wasm_f32x4_ne(self, other);
1112 }
1113 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1114 XSIMD_INLINE batch_bool<T, A> neq(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<wasm>) noexcept
1115 {
1116 return ~(self == other);
1117 }
1118
1119 template <class A>
1120 XSIMD_INLINE batch_bool<double, A> neq(batch<double, A> const& self, batch<double, A> const& other, requires_arch<wasm>) noexcept
1121 {
1122 return wasm_f64x2_ne(self, other);
1123 }
1124 template <class A>
1125 XSIMD_INLINE batch_bool<double, A> neq(batch_bool<double, A> const& self, batch_bool<double, A> const& other, requires_arch<wasm>) noexcept
1126 {
1127 return wasm_f64x2_ne(self, other);
1128 }
1129
1130
1131 template <class A>
1132 XSIMD_INLINE batch<float, A> reciprocal(batch<float, A> const& self, requires_arch<wasm>) noexcept
1133 {
1134 v128_t one = wasm_f32x4_splat(1.0f);
1135 return wasm_f32x4_div(one, self);
1136 }
1137 template <class A>
1138 XSIMD_INLINE batch<double, A> reciprocal(batch<double, A> const& self, requires_arch<wasm>) noexcept
1139 {
1140 v128_t one = wasm_f64x2_splat(1.0);
1141 return wasm_f64x2_div(one, self);
1142 }
1143
1144
1145 template <class A>
1146 XSIMD_INLINE float reduce_add(batch<float, A> const& self, requires_arch<wasm>) noexcept
1147 {
1148 v128_t tmp0 = wasm_f32x4_add(self, wasm_i32x4_shuffle(self, self, 6, 7, 2, 3));
1149 v128_t tmp1 = wasm_i32x4_shuffle(tmp0, tmp0, 1, 0, 4, 4);
1150 v128_t tmp2 = wasm_f32x4_add(tmp0, tmp1);
1151 v128_t tmp3 = wasm_i32x4_shuffle(tmp0, tmp2, 4, 1, 2, 3);
1152 return wasm_f32x4_extract_lane(tmp3, 0);
1153 }
1154 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1155 XSIMD_INLINE T reduce_add(batch<T, A> const& self, requires_arch<wasm>) noexcept
1156 {
1157 XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
1158 {
1159 v128_t tmp0 = wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 2, 3, 0, 0);
1160 v128_t tmp1 = wasm_i32x4_add(self, tmp0);
1161 v128_t tmp2 = wasm_i32x4_shuffle(tmp1, wasm_i32x4_splat(0), 1, 0, 0, 0);
1162 v128_t tmp3 = wasm_i32x4_add(tmp1, tmp2);
1163 return wasm_i32x4_extract_lane(tmp3, 0);
1164 }
1165 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
1166 {
1167 v128_t tmp0 = wasm_i32x4_shuffle(self, wasm_i32x4_splat(0), 2, 3, 0, 0);
1168 v128_t tmp1 = wasm_i64x2_add(self, tmp0);
1169 return wasm_i64x2_extract_lane(tmp1, 0);
1170 }
1171 else
1172 {
1173 return hadd(self, generic {});
1174 }
1175 }
1176 template <class A>
1177 XSIMD_INLINE double reduce_add(batch<double, A> const& self, requires_arch<wasm>) noexcept
1178 {
1179 v128_t tmp0 = wasm_i64x2_shuffle(self, self, 1, 3);
1180 v128_t tmp1 = wasm_f64x2_add(self, tmp0);
1181 v128_t tmp2 = wasm_i64x2_shuffle(tmp0, tmp1, 2, 1);
1182 return wasm_f64x2_extract_lane(tmp2, 0);
1183 }
1184
1185
1186 template <class A>
1187 XSIMD_INLINE batch<float, A> rsqrt(batch<float, A> const& self, requires_arch<wasm>) noexcept
1188 {
1189 v128_t one = wasm_f32x4_splat(1.0f);
1190 return wasm_f32x4_div(one, wasm_f32x4_sqrt(self));
1191 }
1192 template <class A>
1193 XSIMD_INLINE batch<double, A> rsqrt(batch<double, A> const& self, requires_arch<wasm>) noexcept
1194 {
1195 v128_t one = wasm_f64x2_splat(1.0);
1196 return wasm_f64x2_div(one, wasm_f64x2_sqrt(self));
1197 }
1198
1199
1200 template <size_t N, class A, class T>
1201 XSIMD_INLINE batch<T, A> slide_left(batch<T, A> const& x, requires_arch<wasm>) noexcept
1202 {
1203 return wasm_i8x16_shuffle(
1204 wasm_i64x2_const(0, 0), x, ((N) & 0xF0) ? 0 : 16 - ((N) & 0xF),
1205 ((N) & 0xF0) ? 0 : 17 - ((N) & 0xF), ((N) & 0xF0) ? 0 : 18 - ((N) & 0xF),
1206 ((N) & 0xF0) ? 0 : 19 - ((N) & 0xF), ((N) & 0xF0) ? 0 : 20 - ((N) & 0xF),
1207 ((N) & 0xF0) ? 0 : 21 - ((N) & 0xF), ((N) & 0xF0) ? 0 : 22 - ((N) & 0xF),
1208 ((N) & 0xF0) ? 0 : 23 - ((N) & 0xF), ((N) & 0xF0) ? 0 : 24 - ((N) & 0xF),
1209 ((N) & 0xF0) ? 0 : 25 - ((N) & 0xF), ((N) & 0xF0) ? 0 : 26 - ((N) & 0xF),
1210 ((N) & 0xF0) ? 0 : 27 - ((N) & 0xF), ((N) & 0xF0) ? 0 : 28 - ((N) & 0xF),
1211 ((N) & 0xF0) ? 0 : 29 - ((N) & 0xF), ((N) & 0xF0) ? 0 : 30 - ((N) & 0xF),
1212 ((N) & 0xF0) ? 0 : 31 - ((N) & 0xF));
1213 }
1214
1215
1216 template <size_t N, class A, class T>
1217 XSIMD_INLINE batch<T, A> slide_right(batch<T, A> const& x, requires_arch<wasm>) noexcept
1218 {
1219 return wasm_i8x16_shuffle(
1220 x, wasm_i64x2_const(0, 0), ((N) & 0xF0) ? 16 : ((N) & 0xF) + 0,
1221 ((N) & 0xF0) ? 16 : ((N) & 0xF) + 1, ((N) & 0xF0) ? 16 : ((N) & 0xF) + 2,
1222 ((N) & 0xF0) ? 16 : ((N) & 0xF) + 3, ((N) & 0xF0) ? 16 : ((N) & 0xF) + 4,
1223 ((N) & 0xF0) ? 16 : ((N) & 0xF) + 5, ((N) & 0xF0) ? 16 : ((N) & 0xF) + 6,
1224 ((N) & 0xF0) ? 16 : ((N) & 0xF) + 7, ((N) & 0xF0) ? 16 : ((N) & 0xF) + 8,
1225 ((N) & 0xF0) ? 16 : ((N) & 0xF) + 9, ((N) & 0xF0) ? 16 : ((N) & 0xF) + 10,
1226 ((N) & 0xF0) ? 16 : ((N) & 0xF) + 11, ((N) & 0xF0) ? 16 : ((N) & 0xF) + 12,
1227 ((N) & 0xF0) ? 16 : ((N) & 0xF) + 13, ((N) & 0xF0) ? 16 : ((N) & 0xF) + 14,
1228 ((N) & 0xF0) ? 16 : ((N) & 0xF) + 15);
1229 }
1230
1231
1232 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1233 XSIMD_INLINE batch<T, A> sadd(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
1234 {
1235 if (std::is_signed<T>::value)
1236 {
1237 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1238 {
1239 return wasm_i8x16_add_sat(self, other);
1240 }
1241 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1242 {
1243 return wasm_i16x8_add_sat(self, other);
1244 }
1245 else
1246 {
1247 return sadd(self, other, generic {});
1248 }
1249 }
1250 else
1251 {
1252 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1253 {
1254 return wasm_u8x16_add_sat(self, other);
1255 }
1256 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1257 {
1258 return wasm_u16x8_add_sat(self, other);
1259 }
1260 else
1261 {
1262 return sadd(self, other, generic {});
1263 }
1264 }
1265 }
1266
1267
1268 template <class A>
1269 XSIMD_INLINE batch<float, A> select(batch_bool<float, A> const& cond, batch<float, A> const& true_br, batch<float, A> const& false_br, requires_arch<wasm>) noexcept
1270 {
1271 return wasm_v128_or(wasm_v128_and(cond, true_br), wasm_v128_andnot(false_br, cond));
1272 }
1273
1274 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1275 XSIMD_INLINE batch<T, A> select(batch_bool<T, A> const& cond, batch<T, A> const& true_br, batch<T, A> const& false_br, requires_arch<wasm>) noexcept
1276 {
1277 return wasm_v128_or(wasm_v128_and(cond, true_br), wasm_v128_andnot(false_br, cond));
1278 }
1279 template <class A, class T, bool... Values, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1280 XSIMD_INLINE batch<T, A> select(batch_bool_constant<T, A, Values...> const&, batch<T, A> const& true_br, batch<T, A> const& false_br, requires_arch<wasm>) noexcept
1281 {
1282 return select(batch_bool<T, A> { Values... }, true_br, false_br, wasm {});
1283 }
1284 template <class A>
1285 XSIMD_INLINE batch<double, A> select(batch_bool<double, A> const& cond, batch<double, A> const& true_br, batch<double, A> const& false_br, requires_arch<wasm>) noexcept
1286 {
1287 return wasm_v128_or(wasm_v128_and(cond, true_br), wasm_v128_andnot(false_br, cond));
1288 }
1289
1290
1291 template <class A, class ITy, ITy I0, ITy I1, ITy I2, ITy I3>
1292 XSIMD_INLINE batch<float, A> shuffle(batch<float, A> const& x, batch<float, A> const& y, batch_constant<ITy, A, I0, I1, I2, I3>, requires_arch<wasm>) noexcept
1293 {
1294 return wasm_i32x4_shuffle(x, y, I0, I1, I2, I3);
1295 }
1296
1297 template <class A, class ITy, ITy I0, ITy I1>
1298 XSIMD_INLINE batch<double, A> shuffle(batch<double, A> const& x, batch<double, A> const& y, batch_constant<ITy, A, I0, I1>, requires_arch<wasm>) noexcept
1299 {
1300 return wasm_i64x2_shuffle(x, y, I0, I1);
1301 }
1302
1303
1304 template <class A, class... Values>
1305 XSIMD_INLINE batch<float, A> set(batch<float, A> const&, requires_arch<wasm>, Values... values) noexcept
1306 {
1307 static_assert(sizeof...(Values) == batch<float, A>::size, "consistent init");
1308 return wasm_f32x4_make(values...);
1309 }
1310
1311 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1312 XSIMD_INLINE batch<T, A> set(batch<T, A> const&, requires_arch<wasm>, T v0, T v1) noexcept
1313 {
1314 return wasm_i64x2_make(v0, v1);
1315 }
1316
1317 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1318 XSIMD_INLINE batch<T, A> set(batch<T, A> const&, requires_arch<wasm>, T v0, T v1, T v2, T v3) noexcept
1319 {
1320 return wasm_i32x4_make(v0, v1, v2, v3);
1321 }
1322
1323 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1324 XSIMD_INLINE batch<T, A> set(batch<T, A> const&, requires_arch<wasm>, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7) noexcept
1325 {
1326 return wasm_i16x8_make(v0, v1, v2, v3, v4, v5, v6, v7);
1327 }
1328
1329 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1330 XSIMD_INLINE batch<T, A> set(batch<T, A> const&, requires_arch<wasm>, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15) noexcept
1331 {
1332 return wasm_i8x16_make(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15);
1333 }
1334
1335 template <class A, class... Values>
1336 XSIMD_INLINE batch<double, A> set(batch<double, A> const&, requires_arch<wasm>, Values... values) noexcept
1337 {
1338 static_assert(sizeof...(Values) == batch<double, A>::size, "consistent init");
1339 return wasm_f64x2_make(values...);
1340 }
1341
1342 template <class A, class T, class... Values, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1343 XSIMD_INLINE batch_bool<T, A> set(batch_bool<T, A> const&, requires_arch<wasm>, Values... values) noexcept
1344 {
1345 return set(batch<T, A>(), A {}, static_cast<T>(values ? -1LL : 0LL)...).data;
1346 }
1347
1348 template <class A, class... Values>
1349 XSIMD_INLINE batch_bool<float, A> set(batch_bool<float, A> const&, requires_arch<wasm>, Values... values) noexcept
1350 {
1351 static_assert(sizeof...(Values) == batch_bool<float, A>::size, "consistent init");
1352 return set(batch<int32_t, A>(), A {}, static_cast<int32_t>(values ? -1LL : 0LL)...).data;
1353 }
1354
1355 template <class A, class... Values>
1356 XSIMD_INLINE batch_bool<double, A> set(batch_bool<double, A> const&, requires_arch<wasm>, Values... values) noexcept
1357 {
1358 static_assert(sizeof...(Values) == batch_bool<double, A>::size, "consistent init");
1359 return set(batch<int64_t, A>(), A {}, static_cast<int64_t>(values ? -1LL : 0LL)...).data;
1360 }
1361
1362
1363 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1364 XSIMD_INLINE batch<T, A> ssub(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
1365 {
1366 if (std::is_signed<T>::value)
1367 {
1368 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1369 {
1370 return wasm_i8x16_sub_sat(self, other);
1371 }
1372 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1373 {
1374 return wasm_i16x8_sub_sat(self, other);
1375 }
1376 else
1377 {
1378 return ssub(self, other, generic {});
1379 }
1380 }
1381 else
1382 {
1383 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1384 {
1385 return wasm_u8x16_sub_sat(self, other);
1386 }
1387 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1388 {
1389 return wasm_u16x8_sub_sat(self, other);
1390 }
1391 else
1392 {
1393 return ssub(self, other, generic {});
1394 }
1395 }
1396 }
1397
1398
1399 template <class A>
1400 XSIMD_INLINE void store_aligned(float* mem, batch<float, A> const& self, requires_arch<wasm>) noexcept
1401 {
1402 return wasm_v128_store(mem, self);
1403 }
1404 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1405 XSIMD_INLINE void store_aligned(T* mem, batch<T, A> const& self, requires_arch<wasm>) noexcept
1406 {
1407 return wasm_v128_store((v128_t*)mem, self);
1408 }
1409 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1410 XSIMD_INLINE void store_aligned(T* mem, batch_bool<T, A> const& self, requires_arch<wasm>) noexcept
1411 {
1412 return wasm_v128_store((v128_t*)mem, self);
1413 }
1414 template <class A>
1415 XSIMD_INLINE void store_aligned(double* mem, batch<double, A> const& self, requires_arch<wasm>) noexcept
1416 {
1417 return wasm_v128_store(mem, self);
1418 }
1419
1420
1421 namespace detail
1422 {
1423
1424 template <class A>
1425 XSIMD_INLINE batch<float, A> complex_low(batch<std::complex<float>, A> const& self, requires_arch<wasm>) noexcept
1426 {
1427 return wasm_i32x4_shuffle(self.real(), self.imag(), 0, 4, 1, 5);
1428 }
1429
1430 template <class A>
1431 XSIMD_INLINE batch<float, A> complex_high(batch<std::complex<float>, A> const& self, requires_arch<wasm>) noexcept
1432 {
1433 return wasm_i32x4_shuffle(self.real(), self.imag(), 2, 6, 3, 7);
1434 }
1435 template <class A>
1436 XSIMD_INLINE batch<double, A> complex_low(batch<std::complex<double>, A> const& self, requires_arch<wasm>) noexcept
1437 {
1438 return wasm_i64x2_shuffle(self.real(), self.imag(), 0, 2);
1439 }
1440 template <class A>
1441 XSIMD_INLINE batch<double, A> complex_high(batch<std::complex<double>, A> const& self, requires_arch<wasm>) noexcept
1442 {
1443 return wasm_i64x2_shuffle(self.real(), self.imag(), 1, 3);
1444 }
1445 }
1446
1447
1448 template <class A>
1449 XSIMD_INLINE void store_unaligned(float* mem, batch<float, A> const& self, requires_arch<wasm>) noexcept
1450 {
1451 return wasm_v128_store(mem, self);
1452 }
1453 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1454 XSIMD_INLINE void store_unaligned(T* mem, batch<T, A> const& self, requires_arch<wasm>) noexcept
1455 {
1456 return wasm_v128_store((v128_t*)mem, self);
1457 }
1458 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1459 XSIMD_INLINE void store_unaligned(T* mem, batch_bool<T, A> const& self, requires_arch<wasm>) noexcept
1460 {
1461 return wasm_v128_store((v128_t*)mem, self);
1462 }
1463 template <class A>
1464 XSIMD_INLINE void store_unaligned(double* mem, batch<double, A> const& self, requires_arch<wasm>) noexcept
1465 {
1466 return wasm_v128_store(mem, self);
1467 }
1468
1469
1470 template <class A>
1471 XSIMD_INLINE batch<float, A> sub(batch<float, A> const& self, batch<float, A> const& other, requires_arch<wasm>) noexcept
1472 {
1473 return wasm_f32x4_sub(self, other);
1474 }
1475 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1476 XSIMD_INLINE batch<T, A> sub(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
1477 {
1478 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1479 {
1480 return wasm_i8x16_sub(self, other);
1481 }
1482 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1483 {
1484 return wasm_i16x8_sub(self, other);
1485 }
1486 else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
1487 {
1488 return wasm_i32x4_sub(self, other);
1489 }
1490 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
1491 {
1492 return wasm_i64x2_sub(self, other);
1493 }
1494 else
1495 {
1496 assert(false && "unsupported arch/op combination");
1497 return {};
1498 }
1499 }
1500 template <class A>
1501 XSIMD_INLINE batch<double, A> sub(batch<double, A> const& self, batch<double, A> const& other, requires_arch<wasm>) noexcept
1502 {
1503 return wasm_f64x2_sub(self, other);
1504 }
1505
1506
1507 template <class A>
1508 XSIMD_INLINE batch<float, A> sqrt(batch<float, A> const& val, requires_arch<wasm>) noexcept
1509 {
1510 return wasm_f32x4_sqrt(val);
1511 }
1512 template <class A>
1513 XSIMD_INLINE batch<double, A> sqrt(batch<double, A> const& val, requires_arch<wasm>) noexcept
1514 {
1515 return wasm_f64x2_sqrt(val);
1516 }
1517
1518
1519 template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
1520 XSIMD_INLINE batch<float, A> swizzle(batch<float, A> const& self, batch_constant<uint32_t, A, V0, V1, V2, V3>, requires_arch<wasm>) noexcept
1521 {
1522 return wasm_i32x4_shuffle(self, self, V0, V1, V2, V3);
1523 }
1524
1525 template <class A, uint64_t V0, uint64_t V1>
1526 XSIMD_INLINE batch<double, A> swizzle(batch<double, A> const& self, batch_constant<uint64_t, A, V0, V1>, requires_arch<wasm>) noexcept
1527 {
1528 return wasm_i64x2_shuffle(self, self, V0, V1);
1529 }
1530
1531 template <class A, uint64_t V0, uint64_t V1>
1532 XSIMD_INLINE batch<uint64_t, A> swizzle(batch<uint64_t, A> const& self, batch_constant<uint64_t, A, V0, V1>, requires_arch<wasm>) noexcept
1533 {
1534 return wasm_i64x2_shuffle(self, self, V0, V1);
1535 }
1536
1537 template <class A, uint64_t V0, uint64_t V1>
1538 XSIMD_INLINE batch<int64_t, A> swizzle(batch<int64_t, A> const& self, batch_constant<uint64_t, A, V0, V1> mask, requires_arch<wasm>) noexcept
1539 {
1540 return bitwise_cast<int64_t>(swizzle(bitwise_cast<uint64_t>(self), mask, wasm {}));
1541 }
1542
1543 template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
1544 XSIMD_INLINE batch<uint32_t, A> swizzle(batch<uint32_t, A> const& self, batch_constant<uint32_t, A, V0, V1, V2, V3>, requires_arch<wasm>) noexcept
1545 {
1546 return wasm_i32x4_shuffle(self, self, V0, V1, V2, V3);
1547 }
1548
1549 template <class A, uint32_t V0, uint32_t V1, uint32_t V2, uint32_t V3>
1550 XSIMD_INLINE batch<int32_t, A> swizzle(batch<int32_t, A> const& self, batch_constant<uint32_t, A, V0, V1, V2, V3> mask, requires_arch<wasm>) noexcept
1551 {
1552 return bitwise_cast<int32_t>(swizzle(bitwise_cast<uint32_t>(self), mask, wasm {}));
1553 }
1554
1555 template <class A, uint16_t V0, uint16_t V1, uint16_t V2, uint16_t V3, uint16_t V4, uint16_t V5, uint16_t V6, uint16_t V7>
1556 XSIMD_INLINE batch<uint16_t, A> swizzle(batch<uint16_t, A> const& self, batch_constant<uint16_t, A, V0, V1, V2, V3, V4, V5, V6, V7>, requires_arch<wasm>) noexcept
1557 {
1558 return wasm_i16x8_shuffle(self, self, V0, V1, V2, V3, V4, V5, V6, V7);
1559 }
1560
1561 template <class A, uint16_t V0, uint16_t V1, uint16_t V2, uint16_t V3, uint16_t V4, uint16_t V5, uint16_t V6, uint16_t V7>
1562 XSIMD_INLINE batch<int16_t, A> swizzle(batch<int16_t, A> const& self, batch_constant<uint16_t, A, V0, V1, V2, V3, V4, V5, V6, V7> mask, requires_arch<wasm>) noexcept
1563 {
1564 return bitwise_cast<int16_t>(swizzle(bitwise_cast<uint16_t>(self), mask, wasm {}));
1565 }
1566
1567 template <class A, uint8_t V0, uint8_t V1, uint8_t V2, uint8_t V3, uint8_t V4, uint8_t V5, uint8_t V6, uint8_t V7,
1568 uint8_t V8, uint8_t V9, uint8_t V10, uint8_t V11, uint8_t V12, uint8_t V13, uint8_t V14, uint8_t V15>
1569 XSIMD_INLINE batch<uint8_t, A> swizzle(batch<uint8_t, A> const& self, batch_constant<uint8_t, A, V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15>, requires_arch<wasm>) noexcept
1570 {
1571 return wasm_i8x16_shuffle(self, self, V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15);
1572 }
1573
1574 template <class A, uint8_t V0, uint8_t V1, uint8_t V2, uint8_t V3, uint8_t V4, uint8_t V5, uint8_t V6, uint8_t V7,
1575 uint8_t V8, uint8_t V9, uint8_t V10, uint8_t V11, uint8_t V12, uint8_t V13, uint8_t V14, uint8_t V15>
1576 XSIMD_INLINE batch<int8_t, A> swizzle(batch<int8_t, A> const& self, batch_constant<uint8_t, A, V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15> mask, requires_arch<wasm>) noexcept
1577 {
1578 return bitwise_cast<int8_t>(swizzle(bitwise_cast<uint8_t>(self), mask, wasm {}));
1579 }
1580
1581
1582 template <class A, class T>
1583 XSIMD_INLINE void transpose(batch<T, A>* matrix_begin, batch<T, A>* matrix_end, requires_arch<wasm>) noexcept
1584 {
1585 assert((matrix_end - matrix_begin == batch<T, A>::size) && "correctly sized matrix");
1586 (void)matrix_end;
1587 XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
1588 {
1589 auto r0 = matrix_begin[0], r1 = matrix_begin[1], r2 = matrix_begin[2], r3 = matrix_begin[3];
1590
1591 auto t0 = wasm_i32x4_shuffle(r0, r1, 0, 4, 1, 5);
1592 auto t1 = wasm_i32x4_shuffle(r0, r1, 2, 6, 3, 7);
1593
1594 auto t2 = wasm_i32x4_shuffle(r2, r3, 0, 4, 1, 5);
1595 auto t3 = wasm_i32x4_shuffle(r2, r3, 2, 6, 3, 7);
1596
1597 matrix_begin[0] = wasm_i32x4_shuffle(t0, t2, 0, 1, 4, 5);
1598 matrix_begin[1] = wasm_i32x4_shuffle(t0, t2, 2, 3, 6, 7);
1599 matrix_begin[2] = wasm_i32x4_shuffle(t1, t3, 0, 1, 4, 5);
1600 matrix_begin[3] = wasm_i32x4_shuffle(t1, t3, 2, 3, 6, 7);
1601 }
1602 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
1603 {
1604 auto r0 = matrix_begin[0], r1 = matrix_begin[1];
1605
1606 matrix_begin[0] = wasm_i64x2_shuffle(r0, r1, 0, 2);
1607 matrix_begin[1] = wasm_i64x2_shuffle(r0, r1, 1, 3);
1608 }
1609 else
1610 {
1611 transpose(matrix_begin, matrix_end, generic {});
1612 }
1613 }
1614
1615
1616 template <class A>
1617 XSIMD_INLINE batch<float, A> trunc(batch<float, A> const& self, requires_arch<wasm>) noexcept
1618 {
1619 return wasm_f32x4_trunc(self);
1620 }
1621 template <class A>
1622 XSIMD_INLINE batch<double, A> trunc(batch<double, A> const& self, requires_arch<wasm>) noexcept
1623 {
1624 return wasm_f64x2_trunc(self);
1625 }
1626
1627
1628 template <class A>
1629 XSIMD_INLINE batch<float, A> zip_hi(batch<float, A> const& self, batch<float, A> const& other, requires_arch<wasm>) noexcept
1630 {
1631 return wasm_i32x4_shuffle(self, other, 2, 6, 3, 7);
1632 }
1633 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1634 XSIMD_INLINE batch<T, A> zip_hi(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
1635 {
1636 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1637 {
1638 return wasm_i8x16_shuffle(self, other, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31);
1639 }
1640 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1641 {
1642 return wasm_i16x8_shuffle(self, other, 4, 12, 5, 13, 6, 14, 7, 15);
1643 }
1644 else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
1645 {
1646 return wasm_i32x4_shuffle(self, other, 2, 6, 3, 7);
1647 }
1648 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
1649 {
1650 return wasm_i64x2_shuffle(self, other, 1, 3);
1651 }
1652 else
1653 {
1654 assert(false && "unsupported arch/op combination");
1655 return {};
1656 }
1657 }
1658 template <class A>
1659 XSIMD_INLINE batch<double, A> zip_hi(batch<double, A> const& self, batch<double, A> const& other, requires_arch<wasm>) noexcept
1660 {
1661 return wasm_i64x2_shuffle(self, other, 1, 3);
1662 }
1663
1664
1665 template <class A>
1666 XSIMD_INLINE batch<float, A> zip_lo(batch<float, A> const& self, batch<float, A> const& other, requires_arch<wasm>) noexcept
1667 {
1668 return wasm_i32x4_shuffle(self, other, 0, 4, 1, 5);
1669 }
1670 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1671 XSIMD_INLINE batch<T, A> zip_lo(batch<T, A> const& self, batch<T, A> const& other, requires_arch<wasm>) noexcept
1672 {
1673 XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
1674 {
1675 return wasm_i8x16_shuffle(self, other, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23);
1676 }
1677 else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
1678 {
1679 return wasm_i16x8_shuffle(self, other, 0, 8, 1, 9, 2, 10, 3, 11);
1680 }
1681 else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
1682 {
1683 return wasm_i32x4_shuffle(self, other, 0, 4, 1, 5);
1684 }
1685 else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
1686 {
1687 return wasm_i64x2_shuffle(self, other, 0, 2);
1688 }
1689 else
1690 {
1691 assert(false && "unsupported arch/op combination");
1692 return {};
1693 }
1694 }
1695 template <class A>
1696 XSIMD_INLINE batch<double, A> zip_lo(batch<double, A> const& self, batch<double, A> const& other, requires_arch<wasm>) noexcept
1697 {
1698 return wasm_i64x2_shuffle(self, other, 0, 2);
1699 }
1700 }
1701 }
1702
1703 #endif