File indexing completed on 2025-08-28 09:11:28
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #ifndef XSIMD_GENERIC_MEMORY_HPP
0013 #define XSIMD_GENERIC_MEMORY_HPP
0014
0015 #include <algorithm>
0016 #include <complex>
0017 #include <stdexcept>
0018
0019 #include "../../types/xsimd_batch_constant.hpp"
0020 #include "./xsimd_generic_details.hpp"
0021
0022 namespace xsimd
0023 {
0024 template <typename T, class A, T... Values>
0025 struct batch_constant;
0026
0027 template <typename T, class A, bool... Values>
0028 struct batch_bool_constant;
0029
0030 namespace kernel
0031 {
0032
0033 using namespace types;
0034
0035
0036 namespace detail
0037 {
0038 template <class IT, class A, class I, size_t... Is>
0039 XSIMD_INLINE batch<IT, A> create_compress_swizzle_mask(I bitmask, ::xsimd::detail::index_sequence<Is...>)
0040 {
0041 batch<IT, A> swizzle_mask(IT(0));
0042 alignas(A::alignment()) IT mask_buffer[batch<IT, A>::size] = { Is... };
0043 size_t inserted = 0;
0044 for (size_t i = 0; i < sizeof...(Is); ++i)
0045 if ((bitmask >> i) & 1u)
0046 std::swap(mask_buffer[inserted++], mask_buffer[i]);
0047 return batch<IT, A>::load_aligned(&mask_buffer[0]);
0048 }
0049 }
0050
0051 template <typename A, typename T>
0052 XSIMD_INLINE batch<T, A>
0053 compress(batch<T, A> const& x, batch_bool<T, A> const& mask,
0054 kernel::requires_arch<generic>) noexcept
0055 {
0056 using IT = as_unsigned_integer_t<T>;
0057 constexpr std::size_t size = batch_bool<T, A>::size;
0058 auto bitmask = mask.mask();
0059 auto z = select(mask, x, batch<T, A>((T)0));
0060 auto compress_mask = detail::create_compress_swizzle_mask<IT, A>(bitmask, ::xsimd::detail::make_index_sequence<size>());
0061 return swizzle(z, compress_mask);
0062 }
0063
0064
0065 namespace detail
0066 {
0067 template <class IT, class A, class I, size_t... Is>
0068 XSIMD_INLINE batch<IT, A> create_expand_swizzle_mask(I bitmask, ::xsimd::detail::index_sequence<Is...>)
0069 {
0070 batch<IT, A> swizzle_mask(IT(0));
0071 IT j = 0;
0072 (void)std::initializer_list<bool> { ((swizzle_mask = insert(swizzle_mask, j, index<Is>())), (j += ((bitmask >> Is) & 1u)), true)... };
0073 return swizzle_mask;
0074 }
0075 }
0076
0077 template <typename A, typename T>
0078 XSIMD_INLINE batch<T, A>
0079 expand(batch<T, A> const& x, batch_bool<T, A> const& mask,
0080 kernel::requires_arch<generic>) noexcept
0081 {
0082 constexpr std::size_t size = batch_bool<T, A>::size;
0083 auto bitmask = mask.mask();
0084 auto swizzle_mask = detail::create_expand_swizzle_mask<as_unsigned_integer_t<T>, A>(bitmask, ::xsimd::detail::make_index_sequence<size>());
0085 auto z = swizzle(x, swizzle_mask);
0086 return select(mask, z, batch<T, A>(T(0)));
0087 }
0088
0089
0090 template <class A, class T>
0091 XSIMD_INLINE batch<T, A> extract_pair(batch<T, A> const& self, batch<T, A> const& other, std::size_t i, requires_arch<generic>) noexcept
0092 {
0093 constexpr std::size_t size = batch<T, A>::size;
0094 assert(i < size && "index in bounds");
0095
0096 alignas(A::alignment()) T self_buffer[size];
0097 self.store_aligned(self_buffer);
0098
0099 alignas(A::alignment()) T other_buffer[size];
0100 other.store_aligned(other_buffer);
0101
0102 alignas(A::alignment()) T concat_buffer[size];
0103
0104 for (std::size_t j = 0; j < (size - i); ++j)
0105 {
0106 concat_buffer[j] = other_buffer[i + j];
0107 if (j < i)
0108 {
0109 concat_buffer[size - 1 - j] = self_buffer[i - 1 - j];
0110 }
0111 }
0112 return batch<T, A>::load_aligned(concat_buffer);
0113 }
0114
0115
0116 namespace detail
0117 {
0118
0119 template <size_t N, typename T, typename A, typename U, typename V, typename std::enable_if<N == 0, int>::type = 0>
0120 inline batch<T, A> gather(U const* src, batch<V, A> const& index,
0121 ::xsimd::index<N> I) noexcept
0122 {
0123 return insert(batch<T, A> {}, static_cast<T>(src[index.get(I)]), I);
0124 }
0125
0126 template <size_t N, typename T, typename A, typename U, typename V, typename std::enable_if<N != 0, int>::type = 0>
0127 inline batch<T, A>
0128 gather(U const* src, batch<V, A> const& index, ::xsimd::index<N> I) noexcept
0129 {
0130 static_assert(N <= batch<V, A>::size, "Incorrect value in recursion!");
0131
0132 const auto test = gather<N - 1, T, A>(src, index, {});
0133 return insert(test, static_cast<T>(src[index.get(I)]), I);
0134 }
0135 }
0136
0137 template <typename T, typename A, typename V>
0138 XSIMD_INLINE batch<T, A>
0139 gather(batch<T, A> const&, T const* src, batch<V, A> const& index,
0140 kernel::requires_arch<generic>) noexcept
0141 {
0142 static_assert(batch<T, A>::size == batch<V, A>::size,
0143 "Index and destination sizes must match");
0144
0145 return detail::gather<batch<V, A>::size - 1, T, A>(src, index, {});
0146 }
0147
0148
0149 template <typename T, typename A, typename U, typename V>
0150 XSIMD_INLINE detail::sizes_mismatch_t<T, U, batch<T, A>>
0151 gather(batch<T, A> const&, U const* src, batch<V, A> const& index,
0152 kernel::requires_arch<generic>) noexcept
0153 {
0154 static_assert(batch<T, A>::size == batch<V, A>::size,
0155 "Index and destination sizes must match");
0156
0157 return detail::gather<batch<V, A>::size - 1, T, A>(src, index, {});
0158 }
0159
0160
0161 template <typename T, typename A, typename U, typename V>
0162 XSIMD_INLINE detail::stride_match_t<T, U, batch<T, A>>
0163 gather(batch<T, A> const&, U const* src, batch<V, A> const& index,
0164 kernel::requires_arch<generic>) noexcept
0165 {
0166 static_assert(batch<T, A>::size == batch<V, A>::size,
0167 "Index and destination sizes must match");
0168
0169 return batch_cast<T>(kernel::gather(batch<U, A> {}, src, index, A {}));
0170 }
0171
0172
0173 template <class A, class T, size_t I>
0174 XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<generic>) noexcept
0175 {
0176 struct index_mask
0177 {
0178 static constexpr bool get(size_t index, size_t )
0179 {
0180 return index != I;
0181 }
0182 };
0183 batch<T, A> tmp(val);
0184 return select(make_batch_bool_constant<T, A, index_mask>(), self, tmp);
0185 }
0186
0187
0188 template <class A, size_t I, class T>
0189 XSIMD_INLINE T get(batch<T, A> const& self, ::xsimd::index<I>, requires_arch<generic>) noexcept
0190 {
0191 alignas(A::alignment()) T buffer[batch<T, A>::size];
0192 self.store_aligned(&buffer[0]);
0193 return buffer[I];
0194 }
0195
0196 template <class A, size_t I, class T>
0197 XSIMD_INLINE T get(batch_bool<T, A> const& self, ::xsimd::index<I>, requires_arch<generic>) noexcept
0198 {
0199 alignas(A::alignment()) T buffer[batch_bool<T, A>::size];
0200 self.store_aligned(&buffer[0]);
0201 return buffer[I];
0202 }
0203
0204 template <class A, size_t I, class T>
0205 XSIMD_INLINE auto get(batch<std::complex<T>, A> const& self, ::xsimd::index<I>, requires_arch<generic>) noexcept -> typename batch<std::complex<T>, A>::value_type
0206 {
0207 alignas(A::alignment()) T buffer[batch<std::complex<T>, A>::size];
0208 self.store_aligned(&buffer[0]);
0209 return buffer[I];
0210 }
0211
0212 template <class A, class T>
0213 XSIMD_INLINE T get(batch<T, A> const& self, std::size_t i, requires_arch<generic>) noexcept
0214 {
0215 alignas(A::alignment()) T buffer[batch<T, A>::size];
0216 self.store_aligned(&buffer[0]);
0217 return buffer[i];
0218 }
0219
0220 template <class A, class T>
0221 XSIMD_INLINE T get(batch_bool<T, A> const& self, std::size_t i, requires_arch<generic>) noexcept
0222 {
0223 alignas(A::alignment()) bool buffer[batch_bool<T, A>::size];
0224 self.store_aligned(&buffer[0]);
0225 return buffer[i];
0226 }
0227
0228 template <class A, class T>
0229 XSIMD_INLINE auto get(batch<std::complex<T>, A> const& self, std::size_t i, requires_arch<generic>) noexcept -> typename batch<std::complex<T>, A>::value_type
0230 {
0231 using T2 = typename batch<std::complex<T>, A>::value_type;
0232 alignas(A::alignment()) T2 buffer[batch<std::complex<T>, A>::size];
0233 self.store_aligned(&buffer[0]);
0234 return buffer[i];
0235 }
0236
0237
0238 namespace detail
0239 {
0240 template <class A, class T_in, class T_out>
0241 XSIMD_INLINE batch<T_out, A> load_aligned(T_in const* mem, convert<T_out>, requires_arch<generic>, with_fast_conversion) noexcept
0242 {
0243 using batch_type_in = batch<T_in, A>;
0244 using batch_type_out = batch<T_out, A>;
0245 return fast_cast(batch_type_in::load_aligned(mem), batch_type_out(), A {});
0246 }
0247 template <class A, class T_in, class T_out>
0248 XSIMD_INLINE batch<T_out, A> load_aligned(T_in const* mem, convert<T_out>, requires_arch<generic>, with_slow_conversion) noexcept
0249 {
0250 static_assert(!std::is_same<T_in, T_out>::value, "there should be a direct load for this type combination");
0251 using batch_type_out = batch<T_out, A>;
0252 alignas(A::alignment()) T_out buffer[batch_type_out::size];
0253 std::copy(mem, mem + batch_type_out::size, std::begin(buffer));
0254 return batch_type_out::load_aligned(buffer);
0255 }
0256 }
0257 template <class A, class T_in, class T_out>
0258 XSIMD_INLINE batch<T_out, A> load_aligned(T_in const* mem, convert<T_out> cvt, requires_arch<generic>) noexcept
0259 {
0260 return detail::load_aligned<A>(mem, cvt, A {}, detail::conversion_type<A, T_in, T_out> {});
0261 }
0262
0263
0264 namespace detail
0265 {
0266 template <class A, class T_in, class T_out>
0267 XSIMD_INLINE batch<T_out, A> load_unaligned(T_in const* mem, convert<T_out>, requires_arch<generic>, with_fast_conversion) noexcept
0268 {
0269 using batch_type_in = batch<T_in, A>;
0270 using batch_type_out = batch<T_out, A>;
0271 return fast_cast(batch_type_in::load_unaligned(mem), batch_type_out(), A {});
0272 }
0273
0274 template <class A, class T_in, class T_out>
0275 XSIMD_INLINE batch<T_out, A> load_unaligned(T_in const* mem, convert<T_out> cvt, requires_arch<generic>, with_slow_conversion) noexcept
0276 {
0277 static_assert(!std::is_same<T_in, T_out>::value, "there should be a direct load for this type combination");
0278 return load_aligned<A>(mem, cvt, generic {}, with_slow_conversion {});
0279 }
0280 }
0281 template <class A, class T_in, class T_out>
0282 XSIMD_INLINE batch<T_out, A> load_unaligned(T_in const* mem, convert<T_out> cvt, requires_arch<generic>) noexcept
0283 {
0284 return detail::load_unaligned<A>(mem, cvt, generic {}, detail::conversion_type<A, T_in, T_out> {});
0285 }
0286
0287
0288 template <size_t N, class A, class T>
0289 XSIMD_INLINE batch<T, A> rotate_right(batch<T, A> const& self, requires_arch<generic>) noexcept
0290 {
0291 struct rotate_generator
0292 {
0293 static constexpr size_t get(size_t index, size_t size)
0294 {
0295 return (index - N) % size;
0296 }
0297 };
0298
0299 return swizzle(self, make_batch_constant<as_unsigned_integer_t<T>, A, rotate_generator>(), A {});
0300 }
0301
0302 template <size_t N, class A, class T>
0303 XSIMD_INLINE batch<std::complex<T>, A> rotate_right(batch<std::complex<T>, A> const& self, requires_arch<generic>) noexcept
0304 {
0305 return { rotate_right<N>(self.real()), rotate_right<N>(self.imag()) };
0306 }
0307
0308
0309 template <size_t N, class A, class T>
0310 XSIMD_INLINE batch<T, A> rotate_left(batch<T, A> const& self, requires_arch<generic>) noexcept
0311 {
0312 struct rotate_generator
0313 {
0314 static constexpr size_t get(size_t index, size_t size)
0315 {
0316 return (index + N) % size;
0317 }
0318 };
0319
0320 return swizzle(self, make_batch_constant<as_unsigned_integer_t<T>, A, rotate_generator>(), A {});
0321 }
0322
0323 template <size_t N, class A, class T>
0324 XSIMD_INLINE batch<std::complex<T>, A> rotate_left(batch<std::complex<T>, A> const& self, requires_arch<generic>) noexcept
0325 {
0326 return { rotate_left<N>(self.real()), rotate_left<N>(self.imag()) };
0327 }
0328
0329
0330 namespace detail
0331 {
0332 template <size_t N, typename T, typename A, typename U, typename V, typename std::enable_if<N == 0, int>::type = 0>
0333 XSIMD_INLINE void scatter(batch<T, A> const& src, U* dst,
0334 batch<V, A> const& index,
0335 ::xsimd::index<N> I) noexcept
0336 {
0337 dst[index.get(I)] = static_cast<U>(src.get(I));
0338 }
0339
0340 template <size_t N, typename T, typename A, typename U, typename V, typename std::enable_if<N != 0, int>::type = 0>
0341 XSIMD_INLINE void
0342 scatter(batch<T, A> const& src, U* dst, batch<V, A> const& index,
0343 ::xsimd::index<N> I) noexcept
0344 {
0345 static_assert(N <= batch<V, A>::size, "Incorrect value in recursion!");
0346
0347 kernel::detail::scatter<N - 1, T, A, U, V>(
0348 src, dst, index, {});
0349 dst[index.get(I)] = static_cast<U>(src.get(I));
0350 }
0351 }
0352
0353 template <typename A, typename T, typename V>
0354 XSIMD_INLINE void
0355 scatter(batch<T, A> const& src, T* dst,
0356 batch<V, A> const& index,
0357 kernel::requires_arch<generic>) noexcept
0358 {
0359 static_assert(batch<T, A>::size == batch<V, A>::size,
0360 "Source and index sizes must match");
0361 kernel::detail::scatter<batch<V, A>::size - 1, T, A, T, V>(
0362 src, dst, index, {});
0363 }
0364
0365 template <typename A, typename T, typename U, typename V>
0366 XSIMD_INLINE detail::sizes_mismatch_t<T, U, void>
0367 scatter(batch<T, A> const& src, U* dst,
0368 batch<V, A> const& index,
0369 kernel::requires_arch<generic>) noexcept
0370 {
0371 static_assert(batch<T, A>::size == batch<V, A>::size,
0372 "Source and index sizes must match");
0373 kernel::detail::scatter<batch<V, A>::size - 1, T, A, U, V>(
0374 src, dst, index, {});
0375 }
0376
0377 template <typename A, typename T, typename U, typename V>
0378 XSIMD_INLINE detail::stride_match_t<T, U, void>
0379 scatter(batch<T, A> const& src, U* dst,
0380 batch<V, A> const& index,
0381 kernel::requires_arch<generic>) noexcept
0382 {
0383 static_assert(batch<T, A>::size == batch<V, A>::size,
0384 "Source and index sizes must match");
0385 const auto tmp = batch_cast<U>(src);
0386 kernel::scatter<A>(tmp, dst, index, A {});
0387 }
0388
0389
0390 namespace detail
0391 {
0392 constexpr bool is_swizzle_fst(size_t)
0393 {
0394 return true;
0395 }
0396 template <typename ITy, typename... ITys>
0397 constexpr bool is_swizzle_fst(size_t bsize, ITy index, ITys... indices)
0398 {
0399 return index < bsize && is_swizzle_fst(bsize, indices...);
0400 }
0401 constexpr bool is_swizzle_snd(size_t)
0402 {
0403 return true;
0404 }
0405 template <typename ITy, typename... ITys>
0406 constexpr bool is_swizzle_snd(size_t bsize, ITy index, ITys... indices)
0407 {
0408 return index >= bsize && is_swizzle_snd(bsize, indices...);
0409 }
0410
0411 constexpr bool is_zip_lo(size_t)
0412 {
0413 return true;
0414 }
0415
0416 template <typename ITy>
0417 constexpr bool is_zip_lo(size_t, ITy)
0418 {
0419 return false;
0420 }
0421
0422 template <typename ITy0, typename ITy1, typename... ITys>
0423 constexpr bool is_zip_lo(size_t bsize, ITy0 index0, ITy1 index1, ITys... indices)
0424 {
0425 return index0 == (bsize - (sizeof...(indices) + 2)) && index1 == (2 * bsize - (sizeof...(indices) + 2)) && is_zip_lo(bsize, indices...);
0426 }
0427
0428 constexpr bool is_zip_hi(size_t)
0429 {
0430 return true;
0431 }
0432
0433 template <typename ITy>
0434 constexpr bool is_zip_hi(size_t, ITy)
0435 {
0436 return false;
0437 }
0438
0439 template <typename ITy0, typename ITy1, typename... ITys>
0440 constexpr bool is_zip_hi(size_t bsize, ITy0 index0, ITy1 index1, ITys... indices)
0441 {
0442 return index0 == (bsize / 2 + bsize - (sizeof...(indices) + 2)) && index1 == (bsize / 2 + 2 * bsize - (sizeof...(indices) + 2)) && is_zip_hi(bsize, indices...);
0443 }
0444
0445 constexpr bool is_select(size_t)
0446 {
0447 return true;
0448 }
0449
0450 template <typename ITy, typename... ITys>
0451 constexpr bool is_select(size_t bsize, ITy index, ITys... indices)
0452 {
0453 return (index < bsize ? index : index - bsize) == (bsize - sizeof...(ITys)) && is_select(bsize, indices...);
0454 }
0455
0456 }
0457
0458 template <class A, typename T, typename ITy, ITy... Indices>
0459 XSIMD_INLINE batch<T, A> shuffle(batch<T, A> const& x, batch<T, A> const& y, batch_constant<ITy, A, Indices...>, requires_arch<generic>) noexcept
0460 {
0461 constexpr size_t bsize = sizeof...(Indices);
0462 static_assert(bsize == batch<T, A>::size, "valid shuffle");
0463
0464
0465 XSIMD_IF_CONSTEXPR(detail::is_swizzle_fst(bsize, Indices...))
0466 {
0467 return swizzle(x, batch_constant<ITy, A, ((Indices >= bsize) ? 0 : Indices)...>());
0468 }
0469
0470 XSIMD_IF_CONSTEXPR(detail::is_swizzle_snd(bsize, Indices...))
0471 {
0472 return swizzle(y, batch_constant<ITy, A, ((Indices >= bsize) ? (Indices - bsize) : 0 )...>());
0473 }
0474
0475 XSIMD_IF_CONSTEXPR(detail::is_zip_lo(bsize, Indices...))
0476 {
0477 return zip_lo(x, y);
0478 }
0479
0480 XSIMD_IF_CONSTEXPR(detail::is_zip_hi(bsize, Indices...))
0481 {
0482 return zip_hi(x, y);
0483 }
0484
0485 XSIMD_IF_CONSTEXPR(detail::is_select(bsize, Indices...))
0486 {
0487 return select(batch_bool_constant<T, A, (Indices < bsize)...>(), x, y);
0488 }
0489
0490 #if defined(__has_builtin) && !defined(XSIMD_WITH_EMULATED)
0491 #if __has_builtin(__builtin_shufflevector)
0492 #define builtin_shuffle __builtin_shufflevector
0493 #endif
0494 #endif
0495
0496 #if defined(builtin_shuffle)
0497 typedef T vty __attribute__((__vector_size__(sizeof(batch<T, A>))));
0498 return (typename batch<T, A>::register_type)builtin_shuffle((vty)x.data, (vty)y.data, Indices...);
0499
0500
0501
0502
0503
0504
0505
0506 #else
0507
0508
0509 batch<T, A> x_lane = swizzle(x, batch_constant<ITy, A, ((Indices >= bsize) ? (Indices - bsize) : Indices)...>());
0510 batch<T, A> y_lane = swizzle(y, batch_constant<ITy, A, ((Indices >= bsize) ? (Indices - bsize) : Indices)...>());
0511 batch_bool_constant<T, A, (Indices < bsize)...> select_x_lane;
0512 return select(select_x_lane, x_lane, y_lane);
0513 #endif
0514 }
0515
0516
0517 template <class T, class A>
0518 XSIMD_INLINE void store(batch_bool<T, A> const& self, bool* mem, requires_arch<generic>) noexcept
0519 {
0520 using batch_type = batch<T, A>;
0521 constexpr auto size = batch_bool<T, A>::size;
0522 alignas(A::alignment()) T buffer[size];
0523 kernel::store_aligned<A>(&buffer[0], batch_type(self), A {});
0524 for (std::size_t i = 0; i < size; ++i)
0525 mem[i] = bool(buffer[i]);
0526 }
0527
0528
0529 template <class A, class T_in, class T_out>
0530 XSIMD_INLINE void store_aligned(T_out* mem, batch<T_in, A> const& self, requires_arch<generic>) noexcept
0531 {
0532 static_assert(!std::is_same<T_in, T_out>::value, "there should be a direct store for this type combination");
0533 alignas(A::alignment()) T_in buffer[batch<T_in, A>::size];
0534 store_aligned(&buffer[0], self);
0535 std::copy(std::begin(buffer), std::end(buffer), mem);
0536 }
0537
0538
0539 template <class A, class T_in, class T_out>
0540 XSIMD_INLINE void store_unaligned(T_out* mem, batch<T_in, A> const& self, requires_arch<generic>) noexcept
0541 {
0542 static_assert(!std::is_same<T_in, T_out>::value, "there should be a direct store for this type combination");
0543 return store_aligned<A>(mem, self, generic {});
0544 }
0545
0546
0547 template <class A, class T, class ITy, ITy... Vs>
0548 XSIMD_INLINE batch<std::complex<T>, A> swizzle(batch<std::complex<T>, A> const& self, batch_constant<ITy, A, Vs...> mask, requires_arch<generic>) noexcept
0549 {
0550 return { swizzle(self.real(), mask), swizzle(self.imag(), mask) };
0551 }
0552
0553 template <class A, class T, class ITy>
0554 XSIMD_INLINE batch<T, A> swizzle(batch<T, A> const& self, batch<ITy, A> mask, requires_arch<generic>) noexcept
0555 {
0556 constexpr size_t size = batch<T, A>::size;
0557 alignas(A::alignment()) T self_buffer[size];
0558 store_aligned(&self_buffer[0], self);
0559
0560 alignas(A::alignment()) ITy mask_buffer[size];
0561 store_aligned(&mask_buffer[0], mask);
0562
0563 alignas(A::alignment()) T out_buffer[size];
0564 for (size_t i = 0; i < size; ++i)
0565 out_buffer[i] = self_buffer[mask_buffer[i]];
0566 return batch<T, A>::load_aligned(out_buffer);
0567 }
0568
0569 template <class A, class T, class ITy>
0570 XSIMD_INLINE batch<std::complex<T>, A> swizzle(batch<std::complex<T>, A> const& self, batch<ITy, A> mask, requires_arch<generic>) noexcept
0571 {
0572 return { swizzle(self.real(), mask), swizzle(self.imag(), mask) };
0573 }
0574
0575
0576 namespace detail
0577 {
0578 template <class A, class T>
0579 XSIMD_INLINE batch<std::complex<T>, A> load_complex(batch<T, A> const& , batch<T, A> const& , requires_arch<generic>) noexcept
0580 {
0581 static_assert(std::is_same<T, void>::value, "load_complex not implemented for the required architecture");
0582 }
0583
0584 template <class A, class T>
0585 XSIMD_INLINE batch<T, A> complex_high(batch<std::complex<T>, A> const& , requires_arch<generic>) noexcept
0586 {
0587 static_assert(std::is_same<T, void>::value, "complex_high not implemented for the required architecture");
0588 }
0589
0590 template <class A, class T>
0591 XSIMD_INLINE batch<T, A> complex_low(batch<std::complex<T>, A> const& , requires_arch<generic>) noexcept
0592 {
0593 static_assert(std::is_same<T, void>::value, "complex_low not implemented for the required architecture");
0594 }
0595 }
0596
0597 template <class A, class T_out, class T_in>
0598 XSIMD_INLINE batch<std::complex<T_out>, A> load_complex_aligned(std::complex<T_in> const* mem, convert<std::complex<T_out>>, requires_arch<generic>) noexcept
0599 {
0600 using real_batch = batch<T_out, A>;
0601 T_in const* buffer = reinterpret_cast<T_in const*>(mem);
0602 real_batch hi = real_batch::load_aligned(buffer),
0603 lo = real_batch::load_aligned(buffer + real_batch::size);
0604 return detail::load_complex(hi, lo, A {});
0605 }
0606
0607
0608 template <class A, class T_out, class T_in>
0609 XSIMD_INLINE batch<std::complex<T_out>, A> load_complex_unaligned(std::complex<T_in> const* mem, convert<std::complex<T_out>>, requires_arch<generic>) noexcept
0610 {
0611 using real_batch = batch<T_out, A>;
0612 T_in const* buffer = reinterpret_cast<T_in const*>(mem);
0613 real_batch hi = real_batch::load_unaligned(buffer),
0614 lo = real_batch::load_unaligned(buffer + real_batch::size);
0615 return detail::load_complex(hi, lo, A {});
0616 }
0617
0618
0619 template <class A, class T_out, class T_in>
0620 XSIMD_INLINE void store_complex_aligned(std::complex<T_out>* dst, batch<std::complex<T_in>, A> const& src, requires_arch<generic>) noexcept
0621 {
0622 using real_batch = batch<T_in, A>;
0623 real_batch hi = detail::complex_high(src, A {});
0624 real_batch lo = detail::complex_low(src, A {});
0625 T_out* buffer = reinterpret_cast<T_out*>(dst);
0626 lo.store_aligned(buffer);
0627 hi.store_aligned(buffer + real_batch::size);
0628 }
0629
0630
0631 template <class A, class T_out, class T_in>
0632 XSIMD_INLINE void store_complex_unaligned(std::complex<T_out>* dst, batch<std::complex<T_in>, A> const& src, requires_arch<generic>) noexcept
0633 {
0634 using real_batch = batch<T_in, A>;
0635 real_batch hi = detail::complex_high(src, A {});
0636 real_batch lo = detail::complex_low(src, A {});
0637 T_out* buffer = reinterpret_cast<T_out*>(dst);
0638 lo.store_unaligned(buffer);
0639 hi.store_unaligned(buffer + real_batch::size);
0640 }
0641
0642
0643 template <class A, class T>
0644 XSIMD_INLINE void transpose(batch<T, A>* matrix_begin, batch<T, A>* matrix_end, requires_arch<generic>) noexcept
0645 {
0646 assert((matrix_end - matrix_begin == batch<T, A>::size) && "correctly sized matrix");
0647 (void)matrix_end;
0648 alignas(A::alignment()) T scratch_buffer[batch<T, A>::size * batch<T, A>::size];
0649 for (size_t i = 0; i < batch<T, A>::size; ++i)
0650 {
0651 matrix_begin[i].store_aligned(&scratch_buffer[i * batch<T, A>::size]);
0652 }
0653
0654 for (size_t i = 0; i < batch<T, A>::size; ++i)
0655 {
0656 for (size_t j = 0; j < i; ++j)
0657 {
0658 std::swap(scratch_buffer[i * batch<T, A>::size + j],
0659 scratch_buffer[j * batch<T, A>::size + i]);
0660 }
0661 }
0662 for (size_t i = 0; i < batch<T, A>::size; ++i)
0663 {
0664 matrix_begin[i] = batch<T, A>::load_aligned(&scratch_buffer[i * batch<T, A>::size]);
0665 }
0666 }
0667
0668 }
0669
0670 }
0671
0672 #endif