File indexing completed on 2025-01-30 10:25:49
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028 #ifndef VC_COMMON_SIMDARRAY_H_
0029 #define VC_COMMON_SIMDARRAY_H_
0030
0031
0032
0033
0034
0035 #include <array>
0036 #include <limits>
0037
0038 #include "writemaskedvector.h"
0039 #include "simdarrayhelper.h"
0040 #include "simdmaskarray.h"
0041 #include "utility.h"
0042 #include "interleave.h"
0043 #include "indexsequence.h"
0044 #include "transpose.h"
0045 #include "macros.h"
0046
0047 namespace Vc_VERSIONED_NAMESPACE
0048 {
0049
0050 namespace Common
0051 {
0052
0053
0054
0055
0056
0057
0058 template <std::size_t N, class... Candidates> struct select_best_vector_type_impl;
0059
0060 template <std::size_t N, class T> struct select_best_vector_type_impl<N, T> {
0061 using type = T;
0062 };
0063
0064 template <std::size_t N, class T, class... Candidates>
0065 struct select_best_vector_type_impl<N, T, Candidates...> {
0066 using type = typename std::conditional<
0067 (N < T::Size), typename select_best_vector_type_impl<N, Candidates...>::type,
0068 T>::type;
0069 };
0070 template <class T, std::size_t N>
0071 struct select_best_vector_type : select_best_vector_type_impl<N,
0072 #ifdef Vc_IMPL_AVX2
0073 Vc::AVX2::Vector<T>,
0074 #elif defined Vc_IMPL_AVX
0075 Vc::AVX::Vector<T>,
0076 #endif
0077 #ifdef Vc_IMPL_SSE
0078 Vc::SSE::Vector<T>,
0079 #endif
0080 Vc::Scalar::Vector<T>> {
0081 };
0082
0083 }
0084
0085
0086 namespace internal
0087 {
0088 template <typename T> T Vc_INTRINSIC Vc_PURE product_helper_(const T &l, const T &r) { return l * r; }
0089 template <typename T> T Vc_INTRINSIC Vc_PURE sum_helper_(const T &l, const T &r) { return l + r; }
0090 }
0091
0092
0093 template <typename T, std::size_t N, typename V, std::size_t M>
0094 inline fixed_size_simd<T, N> min(const SimdArray<T, N, V, M> &x,
0095 const SimdArray<T, N, V, M> &y);
0096 template <typename T, std::size_t N, typename V, std::size_t M>
0097 inline fixed_size_simd<T, N> max(const SimdArray<T, N, V, M> &x,
0098 const SimdArray<T, N, V, M> &y);
0099
0100
0101
0102
0103
0104
0105 #define Vc_CURRENT_CLASS_NAME SimdArray
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115 template <typename T, std::size_t N, typename VectorType_>
0116 class SimdArray<T, N, VectorType_, N>
0117 {
0118 static_assert(std::is_same<T, double>::value || std::is_same<T, float>::value ||
0119 std::is_same<T, int32_t>::value ||
0120 std::is_same<T, uint32_t>::value ||
0121 std::is_same<T, int16_t>::value ||
0122 std::is_same<T, uint16_t>::value,
0123 "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, "
0124 "int16_t, uint16_t }");
0125 static_assert(
0126 std::is_same<VectorType_,
0127 typename Common::select_best_vector_type<T, N>::type>::value &&
0128 VectorType_::size() == N,
0129 "ERROR: leave the third and fourth template parameters with their defaults. They "
0130 "are implementation details.");
0131
0132 public:
0133 static constexpr bool is_atomic = true;
0134 using VectorType = VectorType_;
0135 using vector_type = VectorType;
0136 using storage_type = vector_type;
0137 using vectorentry_type = typename vector_type::VectorEntryType;
0138 using value_type = T;
0139 using mask_type = fixed_size_simd_mask<T, N>;
0140 using index_type = fixed_size_simd<int, N>;
0141 static constexpr std::size_t size() { return N; }
0142 using Mask = mask_type;
0143 using MaskType = Mask;
0144 using MaskArgument = const MaskType &;
0145 using VectorEntryType = vectorentry_type;
0146 using EntryType = value_type;
0147 using IndexType = index_type;
0148 using AsArg = const SimdArray &;
0149 using reference = Detail::ElementReference<SimdArray>;
0150 static constexpr std::size_t Size = size();
0151 static constexpr std::size_t MemoryAlignment = storage_type::MemoryAlignment;
0152
0153
0154 Vc_INTRINSIC SimdArray() = default;
0155
0156
0157 Vc_INTRINSIC SimdArray(const SimdArray &) = default;
0158 Vc_INTRINSIC SimdArray(SimdArray &&) = default;
0159 Vc_INTRINSIC SimdArray &operator=(const SimdArray &) = default;
0160
0161
0162 Vc_INTRINSIC SimdArray(const value_type &a) : data(a) {}
0163 Vc_INTRINSIC SimdArray(value_type &a) : data(a) {}
0164 Vc_INTRINSIC SimdArray(value_type &&a) : data(a) {}
0165 template <
0166 typename U,
0167 typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
0168 Vc_INTRINSIC SimdArray(U a)
0169 : SimdArray(static_cast<value_type>(a))
0170 {
0171 }
0172
0173
0174 template <class U, class V, class = enable_if<N == V::Size>>
0175 Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
0176 : data(simd_cast<vector_type>(internal_data(x)))
0177 {
0178 }
0179 template <class U, class V, class = enable_if<(N > V::Size && N <= 2 * V::Size)>,
0180 class = U>
0181 Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
0182 : data(simd_cast<vector_type>(internal_data(internal_data0(x)),
0183 internal_data(internal_data1(x))))
0184 {
0185 }
0186 template <class U, class V, class = enable_if<(N > 2 * V::Size && N <= 4 * V::Size)>,
0187 class = U, class = U>
0188 Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
0189 : data(simd_cast<vector_type>(internal_data(internal_data0(internal_data0(x))),
0190 internal_data(internal_data1(internal_data0(x))),
0191 internal_data(internal_data0(internal_data1(x))),
0192 internal_data(internal_data1(internal_data1(x)))))
0193 {
0194 }
0195
0196 template <typename V, std::size_t Pieces, std::size_t Index>
0197 Vc_INTRINSIC SimdArray(Common::Segment<V, Pieces, Index> &&x)
0198 : data(simd_cast<vector_type, Index>(x.data))
0199 {
0200 }
0201
0202 Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
0203 : data(init.begin(), Vc::Unaligned)
0204 {
0205 Vc_ASSERT(init.size() == size());
0206 }
0207
0208
0209 template <
0210 typename V,
0211 typename = enable_if<Traits::is_simd_vector<V>::value && !Traits::isSimdArray<V>::value>>
0212 Vc_INTRINSIC SimdArray(const V &x)
0213 : data(simd_cast<vector_type>(x))
0214 {
0215 }
0216
0217
0218
0219 template <typename U, typename A,
0220 typename =
0221 enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N &&
0222 !std::is_same<A, simd_abi::fixed_size<N>>::value>>
0223 Vc_INTRINSIC operator Vector<U, A>() const
0224 {
0225 return simd_cast<Vector<U, A>>(data);
0226 }
0227 operator fixed_size_simd<T, N> &()
0228 {
0229 return static_cast<fixed_size_simd<T, N> &>(*this);
0230 }
0231 operator const fixed_size_simd<T, N> &() const
0232 {
0233 return static_cast<const fixed_size_simd<T, N> &>(*this);
0234 }
0235
0236 #include "gatherinterface.h"
0237 #include "scatterinterface.h"
0238
0239 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerZero) : data() {}
0240 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerOne o) : data(o) {}
0241 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerIndexesFromZero i) : data(i)
0242 {
0243 }
0244 template <std::size_t Offset>
0245 explicit Vc_INTRINSIC SimdArray(
0246 Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset>)
0247 : data(Vc::IndexesFromZero)
0248 {
0249 data += value_type(Offset);
0250 }
0251
0252 Vc_INTRINSIC void setZero() { data.setZero(); }
0253 Vc_INTRINSIC void setZero(mask_type k) { data.setZero(internal_data(k)); }
0254 Vc_INTRINSIC void setZeroInverted() { data.setZeroInverted(); }
0255 Vc_INTRINSIC void setZeroInverted(mask_type k) { data.setZeroInverted(internal_data(k)); }
0256
0257 Vc_INTRINSIC void setQnan() { data.setQnan(); }
0258 Vc_INTRINSIC void setQnan(mask_type m) { data.setQnan(internal_data(m)); }
0259
0260
0261 template <typename Op, typename... Args>
0262 static Vc_INTRINSIC fixed_size_simd<T, N> fromOperation(Op op, Args &&... args)
0263 {
0264 fixed_size_simd<T, N> r;
0265 Common::unpackArgumentsAuto(op, r.data, std::forward<Args>(args)...);
0266 return r;
0267 }
0268
0269 template <typename Op, typename... Args>
0270 static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
0271 {
0272 Common::unpackArgumentsAuto(op, nullptr, std::forward<Args>(args)...);
0273 }
0274
0275 static Vc_INTRINSIC fixed_size_simd<T, N> Zero()
0276 {
0277 return SimdArray(Vc::Zero);
0278 }
0279 static Vc_INTRINSIC fixed_size_simd<T, N> One()
0280 {
0281 return SimdArray(Vc::One);
0282 }
0283 static Vc_INTRINSIC fixed_size_simd<T, N> IndexesFromZero()
0284 {
0285 return SimdArray(Vc::IndexesFromZero);
0286 }
0287 static Vc_INTRINSIC fixed_size_simd<T, N> Random()
0288 {
0289 return fromOperation(Common::Operations::random());
0290 }
0291
0292
0293 template <class U, class Flags = DefaultLoadTag,
0294 class = enable_if<std::is_arithmetic<U>::value &&
0295 Traits::is_load_store_flag<Flags>::value>>
0296 explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = {}) : data(mem, f)
0297 {
0298 }
0299
0300 template <typename... Args> Vc_INTRINSIC void load(Args &&... args)
0301 {
0302 data.load(std::forward<Args>(args)...);
0303 }
0304
0305 template <typename... Args> Vc_INTRINSIC void store(Args &&... args) const
0306 {
0307 data.store(std::forward<Args>(args)...);
0308 }
0309
0310 Vc_INTRINSIC mask_type operator!() const
0311 {
0312 return {private_init, !data};
0313 }
0314
0315 Vc_INTRINSIC fixed_size_simd<T, N> operator-() const
0316 {
0317 return {private_init, -data};
0318 }
0319
0320
0321 Vc_INTRINSIC fixed_size_simd<T, N> operator+() const { return *this; }
0322
0323 Vc_INTRINSIC fixed_size_simd<T, N> operator~() const
0324 {
0325 return {private_init, ~data};
0326 }
0327
0328 template <typename U,
0329 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
0330 Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator<<(U x) const
0331 {
0332 return {private_init, data << x};
0333 }
0334 template <typename U,
0335 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
0336 Vc_INTRINSIC fixed_size_simd<T, N> &operator<<=(U x)
0337 {
0338 data <<= x;
0339 return *this;
0340 }
0341 template <typename U,
0342 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
0343 Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator>>(U x) const
0344 {
0345 return {private_init, data >> x};
0346 }
0347 template <typename U,
0348 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
0349 Vc_INTRINSIC fixed_size_simd<T, N> &operator>>=(U x)
0350 {
0351 data >>= x;
0352 return *this;
0353 }
0354
0355 #define Vc_BINARY_OPERATOR_(op) \
0356 Vc_INTRINSIC fixed_size_simd<T, N> &operator op##=(const SimdArray &rhs) \
0357 { \
0358 data op## = rhs.data; \
0359 return *this; \
0360 }
0361 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
0362 Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
0363 Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
0364 #undef Vc_BINARY_OPERATOR_
0365
0366
0367 Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
0368 {
0369 return {private_init, isnegative(data)};
0370 }
0371
0372 private:
0373 friend reference;
0374 Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
0375 {
0376 return o.data[i];
0377 }
0378 template <typename U>
0379 Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
0380 noexcept(std::declval<value_type &>() = v))
0381 {
0382 o.data[i] = v;
0383 }
0384
0385 public:
0386
0387
0388
0389
0390
0391
0392 Vc_INTRINSIC reference operator[](size_t i) noexcept
0393 {
0394 static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
0395 return {*this, int(i)};
0396 }
0397 Vc_INTRINSIC value_type operator[](size_t i) const noexcept
0398 {
0399 return get(*this, int(i));
0400 }
0401
0402 Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(const mask_type &k)
0403 {
0404 return {*this, k};
0405 }
0406
0407 Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k)
0408 {
0409 data.assign(v.data, internal_data(k));
0410 }
0411
0412
0413 #define Vc_REDUCTION_FUNCTION_(name_) \
0414 Vc_INTRINSIC Vc_PURE value_type name_() const { return data.name_(); } \
0415 Vc_INTRINSIC Vc_PURE value_type name_(mask_type mask) const \
0416 { \
0417 return data.name_(internal_data(mask)); \
0418 } \
0419 Vc_NOTHING_EXPECTING_SEMICOLON
0420 Vc_REDUCTION_FUNCTION_(min);
0421 Vc_REDUCTION_FUNCTION_(max);
0422 Vc_REDUCTION_FUNCTION_(product);
0423 Vc_REDUCTION_FUNCTION_(sum);
0424 #undef Vc_REDUCTION_FUNCTION_
0425 Vc_INTRINSIC Vc_PURE fixed_size_simd<T, N> partialSum() const
0426 {
0427 return {private_init, data.partialSum()};
0428 }
0429
0430 template <typename F> Vc_INTRINSIC fixed_size_simd<T, N> apply(F &&f) const
0431 {
0432 return {private_init, data.apply(std::forward<F>(f))};
0433 }
0434 template <typename F> Vc_INTRINSIC fixed_size_simd<T, N> apply(F &&f, const mask_type &k) const
0435 {
0436 return {private_init, data.apply(std::forward<F>(f), k)};
0437 }
0438
0439 Vc_INTRINSIC fixed_size_simd<T, N> shifted(int amount) const
0440 {
0441 return {private_init, data.shifted(amount)};
0442 }
0443
0444 template <std::size_t NN>
0445 Vc_INTRINSIC fixed_size_simd<T, N> shifted(int amount, const SimdArray<value_type, NN> &shiftIn)
0446 const
0447 {
0448 return {private_init, data.shifted(amount, simd_cast<VectorType>(shiftIn))};
0449 }
0450
0451 Vc_INTRINSIC fixed_size_simd<T, N> rotated(int amount) const
0452 {
0453 return {private_init, data.rotated(amount)};
0454 }
0455
0456
0457 Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC fixed_size_simd<T, N> exponent() const
0458 {
0459 return {private_init, exponent(data)};
0460 }
0461
0462 Vc_INTRINSIC fixed_size_simd<T, N> interleaveLow(SimdArray x) const
0463 {
0464 return {private_init, data.interleaveLow(x.data)};
0465 }
0466 Vc_INTRINSIC fixed_size_simd<T, N> interleaveHigh(SimdArray x) const
0467 {
0468 return {private_init, data.interleaveHigh(x.data)};
0469 }
0470
0471 Vc_INTRINSIC fixed_size_simd<T, N> reversed() const
0472 {
0473 return {private_init, data.reversed()};
0474 }
0475
0476 Vc_INTRINSIC fixed_size_simd<T, N> sorted() const
0477 {
0478 return {private_init, data.sorted()};
0479 }
0480
0481 template <class G, class = decltype(std::declval<G>()(std::size_t())),
0482 class = enable_if<!Traits::is_simd_vector<G>::value>>
0483 Vc_INTRINSIC SimdArray(const G &gen) : data(gen)
0484 {
0485 }
0486 template <typename G> static Vc_INTRINSIC fixed_size_simd<T, N> generate(const G &gen)
0487 {
0488 return {private_init, VectorType::generate(gen)};
0489 }
0490
0491 Vc_DEPRECATED("use copysign(x, y) instead")
0492 Vc_INTRINSIC fixed_size_simd<T, N> copySign(const SimdArray &x) const
0493 {
0494 return {private_init, Vc::copysign(data, x.data)};
0495 }
0496
0497 friend VectorType &internal_data<>(SimdArray &x);
0498 friend const VectorType &internal_data<>(const SimdArray &x);
0499
0500
0501 Vc_INTRINSIC SimdArray(private_init_t, VectorType &&x) : data(std::move(x)) {}
0502
0503 Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type));
0504
0505 private:
0506
0507
0508
0509 alignas(static_cast<std::size_t>(
0510 Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(VectorType_) /
0511 VectorType_::size()>::value)) storage_type data;
0512 };
0513 template <typename T, std::size_t N, typename VectorType> constexpr std::size_t SimdArray<T, N, VectorType, N>::Size;
0514 template <typename T, std::size_t N, typename VectorType>
0515 constexpr std::size_t SimdArray<T, N, VectorType, N>::MemoryAlignment;
0516 template <typename T, std::size_t N, typename VectorType>
0517 #ifndef Vc_MSVC
0518 Vc_INTRINSIC
0519 #endif
0520 VectorType &internal_data(SimdArray<T, N, VectorType, N> &x)
0521 {
0522 return x.data;
0523 }
0524 template <typename T, std::size_t N, typename VectorType>
0525 #ifndef Vc_MSVC
0526 Vc_INTRINSIC
0527 #endif
0528 const VectorType &internal_data(const SimdArray<T, N, VectorType, N> &x)
0529 {
0530 return x.data;
0531 }
0532
0533
0534 template <class T> Vc_INTRINSIC T unwrap(const T &x) { return x; }
0535
0536 template <class T, size_t N, class V>
0537 Vc_INTRINSIC V unwrap(const SimdArray<T, N, V, N> &x)
0538 {
0539 return internal_data(x);
0540 }
0541
0542 template <class T, size_t Pieces, size_t Index>
0543 Vc_INTRINSIC auto unwrap(const Common::Segment<T, Pieces, Index> &x)
0544 -> decltype(x.to_fixed_size())
0545 {
0546 return unwrap(x.to_fixed_size());
0547 }
0548
0549
0550 template <typename T, std::size_t N, typename VectorType>
0551 template <class MT, class IT, int Scale>
0552 Vc_INTRINSIC void SimdArray<T, N, VectorType, N>::gatherImplementation(
0553 const Common::GatherArguments<MT, IT, Scale> &args)
0554 {
0555 data.gather(Common::make_gather<Scale>(args.address, unwrap(args.indexes)));
0556 }
0557 template <typename T, std::size_t N, typename VectorType>
0558 template <class MT, class IT, int Scale>
0559 Vc_INTRINSIC void SimdArray<T, N, VectorType, N>::gatherImplementation(
0560 const Common::GatherArguments<MT, IT, Scale> &args, MaskArgument mask)
0561 {
0562 data.gather(Common::make_gather<Scale>(args.address, unwrap(args.indexes)),
0563 mask);
0564 }
0565
0566
0567 template <typename T, std::size_t N, typename VectorType>
0568 template <typename MT, typename IT>
0569 inline void SimdArray<T, N, VectorType, N>::scatterImplementation(MT *mem,
0570 IT &&indexes) const
0571 {
0572 data.scatter(mem, unwrap(std::forward<IT>(indexes)));
0573 }
0574 template <typename T, std::size_t N, typename VectorType>
0575 template <typename MT, typename IT>
0576 inline void SimdArray<T, N, VectorType, N>::scatterImplementation(MT *mem,
0577 IT &&indexes,
0578 MaskArgument mask) const
0579 {
0580 data.scatter(mem, unwrap(std::forward<IT>(indexes)), mask);
0581 }
0582
0583
0584
0585
0586
0587
0588
0589
0590
0591
0592
0593
0594
0595
0596
0597
0598
0599
0600
0601
0602
0603
0604
0605
0606
0607
0608
0609
0610
0611
0612
0613
0614
0615
0616 template <typename T, size_t N, typename V, size_t Wt> class SimdArray
0617 {
0618 static_assert(std::is_same<T, double>::value ||
0619 std::is_same<T, float>::value ||
0620 std::is_same<T, int32_t>::value ||
0621 std::is_same<T, uint32_t>::value ||
0622 std::is_same<T, int16_t>::value ||
0623 std::is_same<T, uint16_t>::value, "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, int16_t, uint16_t }");
0624 static_assert(
0625 std::is_same<V, typename Common::select_best_vector_type<T, N>::type>::value &&
0626 V::size() == Wt,
0627 "ERROR: leave the third and fourth template parameters with their defaults. They "
0628 "are implementation details.");
0629 static_assert(
0630
0631 std::is_same<typename V::EntryType, typename V::VectorEntryType>::value ||
0632
0633 (N % V::size() == 0),
0634 "SimdArray<(un)signed short, N> on MIC only works correctly for N = k * "
0635 "MIC::(u)short_v::size(), i.e. k * 16.");
0636
0637 using my_traits = SimdArrayTraits<T, N>;
0638 static constexpr std::size_t N0 = my_traits::N0;
0639 static constexpr std::size_t N1 = my_traits::N1;
0640 using Split = Common::Split<N0>;
0641 template <typename U, std::size_t K> using CArray = U[K];
0642
0643 public:
0644 static constexpr bool is_atomic = false;
0645 using storage_type0 = typename my_traits::storage_type0;
0646 using storage_type1 = typename my_traits::storage_type1;
0647 static_assert(storage_type0::size() == N0, "");
0648
0649
0650
0651
0652 using vector_type = V;
0653 using vectorentry_type = typename storage_type0::vectorentry_type;
0654 typedef vectorentry_type alias_type Vc_MAY_ALIAS;
0655
0656
0657 using value_type = T;
0658
0659
0660 using mask_type = fixed_size_simd_mask<T, N>;
0661
0662
0663 using index_type = fixed_size_simd<int, N>;
0664
0665
0666
0667
0668
0669
0670
0671
0672
0673
0674
0675 static constexpr std::size_t size() { return N; }
0676
0677
0678 using Mask = mask_type;
0679
0680 using MaskType = Mask;
0681 using MaskArgument = const MaskType &;
0682 using VectorEntryType = vectorentry_type;
0683
0684 using EntryType = value_type;
0685
0686 using IndexType = index_type;
0687 using AsArg = const SimdArray &;
0688
0689 using reference = Detail::ElementReference<SimdArray>;
0690
0691
0692 static constexpr std::size_t MemoryAlignment =
0693 storage_type0::MemoryAlignment > storage_type1::MemoryAlignment
0694 ? storage_type0::MemoryAlignment
0695 : storage_type1::MemoryAlignment;
0696
0697
0698
0699
0700
0701 static Vc_INTRINSIC fixed_size_simd<T, N> Zero()
0702 {
0703 return SimdArray(Vc::Zero);
0704 }
0705
0706
0707 static Vc_INTRINSIC fixed_size_simd<T, N> One()
0708 {
0709 return SimdArray(Vc::One);
0710 }
0711
0712
0713 static Vc_INTRINSIC fixed_size_simd<T, N> IndexesFromZero()
0714 {
0715 return SimdArray(Vc::IndexesFromZero);
0716 }
0717
0718
0719 static Vc_INTRINSIC fixed_size_simd<T, N> Random()
0720 {
0721 return fromOperation(Common::Operations::random());
0722 }
0723
0724 template <class G, class = decltype(std::declval<G>()(std::size_t())),
0725 class = enable_if<!Traits::is_simd_vector<G>::value>>
0726 Vc_INTRINSIC SimdArray(const G &gen)
0727 : data0(gen), data1([&](std::size_t i) { return gen(i + storage_type0::size()); })
0728 {
0729 }
0730
0731
0732 template <typename G> static Vc_INTRINSIC fixed_size_simd<T, N> generate(const G &gen)
0733 {
0734 auto tmp = storage_type0::generate(gen);
0735
0736
0737
0738
0739 return {std::move(tmp),
0740 storage_type1::generate([&](std::size_t i) { return gen(i + N0); })};
0741 }
0742
0743
0744
0745
0746
0747
0748 SimdArray() = default;
0749
0750
0751
0752
0753
0754
0755 Vc_INTRINSIC SimdArray(value_type a) : data0(a), data1(a) {}
0756 template <
0757 typename U,
0758 typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
0759 SimdArray(U a)
0760 : SimdArray(static_cast<value_type>(a))
0761 {
0762 }
0763
0764
0765
0766 SimdArray(const SimdArray &) = default;
0767 SimdArray(SimdArray &&) = default;
0768 SimdArray &operator=(const SimdArray &) = default;
0769
0770
0771 template <typename U, typename Flags = DefaultLoadTag,
0772 typename = enable_if<std::is_arithmetic<U>::value &&
0773 Traits::is_load_store_flag<Flags>::value>>
0774 explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = {})
0775 : data0(mem, f), data1(mem + storage_type0::size(), f)
0776 {
0777 }
0778
0779
0780 #ifndef Vc_MSVC
0781
0782
0783
0784
0785
0786
0787 template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
0788 typename = enable_if<std::is_arithmetic<U>::value &&
0789 Traits::is_load_store_flag<Flags>::value>>
0790 explicit Vc_INTRINSIC SimdArray(CArray<U, Extent> &mem, Flags f = {})
0791 : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
0792 {
0793 }
0794
0795
0796
0797 template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
0798 typename = enable_if<std::is_arithmetic<U>::value &&
0799 Traits::is_load_store_flag<Flags>::value>>
0800 explicit Vc_INTRINSIC SimdArray(const CArray<U, Extent> &mem, Flags f = {})
0801 : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
0802 {
0803 }
0804 #endif
0805
0806
0807 Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
0808 : data0(init.begin(), Vc::Unaligned)
0809 , data1(init.begin() + storage_type0::size(), Vc::Unaligned)
0810 {
0811 Vc_ASSERT(init.size() == size());
0812 }
0813
0814 #include "gatherinterface.h"
0815 #include "scatterinterface.h"
0816
0817 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerZero) : data0(), data1() {}
0818 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerOne o) : data0(o), data1(o) {}
0819 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerIndexesFromZero i)
0820 : data0(i)
0821 , data1(Common::AddOffset<VectorSpecialInitializerIndexesFromZero,
0822 storage_type0::size()>())
0823 {
0824 }
0825 template <size_t Offset>
0826 explicit Vc_INTRINSIC SimdArray(
0827 Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset> i)
0828 : data0(i)
0829 , data1(Common::AddOffset<VectorSpecialInitializerIndexesFromZero,
0830 storage_type0::size() + Offset>())
0831 {
0832 }
0833
0834
0835 template <class W, class = enable_if<
0836 (Traits::is_simd_vector<W>::value &&
0837 Traits::simd_vector_size<W>::value == N &&
0838 !(std::is_convertible<Traits::entry_type_of<W>, T>::value &&
0839 Traits::isSimdArray<W>::value))>>
0840 Vc_INTRINSIC explicit SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x))
0841 {
0842 }
0843
0844
0845 template <class W, class = enable_if<
0846 (Traits::isSimdArray<W>::value &&
0847 Traits::simd_vector_size<W>::value == N &&
0848 std::is_convertible<Traits::entry_type_of<W>, T>::value)>,
0849 class = W>
0850 Vc_INTRINSIC SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x))
0851 {
0852 }
0853
0854 template <class W, std::size_t Pieces, std::size_t Index>
0855 Vc_INTRINSIC SimdArray(Common::Segment<W, Pieces, Index> &&x)
0856 : data0(Common::Segment<W, 2 * Pieces, 2 * Index>{x.data})
0857 , data1(Common::Segment<W, 2 * Pieces, 2 * Index + 1>{x.data})
0858 {
0859 }
0860
0861
0862
0863 template <typename U, typename A,
0864 typename =
0865 enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N &&
0866 !std::is_same<A, simd_abi::fixed_size<N>>::value>>
0867 operator Vector<U, A>() const
0868 {
0869 auto r = simd_cast<Vector<U, A>>(data0, data1);
0870 return r;
0871 }
0872 Vc_INTRINSIC operator fixed_size_simd<T, N> &()
0873 {
0874 return static_cast<fixed_size_simd<T, N> &>(*this);
0875 }
0876 Vc_INTRINSIC operator const fixed_size_simd<T, N> &() const
0877 {
0878 return static_cast<const fixed_size_simd<T, N> &>(*this);
0879 }
0880
0881
0882
0883 Vc_INTRINSIC void setZero()
0884 {
0885 data0.setZero();
0886 data1.setZero();
0887 }
0888 Vc_INTRINSIC void setZero(const mask_type &k)
0889 {
0890 data0.setZero(Split::lo(k));
0891 data1.setZero(Split::hi(k));
0892 }
0893 Vc_INTRINSIC void setZeroInverted()
0894 {
0895 data0.setZeroInverted();
0896 data1.setZeroInverted();
0897 }
0898 Vc_INTRINSIC void setZeroInverted(const mask_type &k)
0899 {
0900 data0.setZeroInverted(Split::lo(k));
0901 data1.setZeroInverted(Split::hi(k));
0902 }
0903
0904
0905 Vc_INTRINSIC void setQnan() {
0906 data0.setQnan();
0907 data1.setQnan();
0908 }
0909 Vc_INTRINSIC void setQnan(const mask_type &m) {
0910 data0.setQnan(Split::lo(m));
0911 data1.setQnan(Split::hi(m));
0912 }
0913
0914
0915 template <typename Op, typename... Args>
0916 static Vc_INTRINSIC fixed_size_simd<T, N> fromOperation(Op op, Args &&... args)
0917 {
0918 fixed_size_simd<T, N> r = {
0919 storage_type0::fromOperation(op, Split::lo(args)...),
0920
0921
0922 storage_type1::fromOperation(op, Split::hi(std::forward<Args>(args))...)};
0923 return r;
0924 }
0925
0926
0927 template <typename Op, typename... Args>
0928 static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
0929 {
0930 storage_type0::callOperation(op, Split::lo(args)...);
0931 storage_type1::callOperation(op, Split::hi(std::forward<Args>(args))...);
0932 }
0933
0934
0935 template <typename U, typename... Args> Vc_INTRINSIC void load(const U *mem, Args &&... args)
0936 {
0937 data0.load(mem, Split::lo(args)...);
0938
0939 data1.load(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
0940 }
0941
0942 template <typename U, typename... Args> Vc_INTRINSIC void store(U *mem, Args &&... args) const
0943 {
0944 data0.store(mem, Split::lo(args)...);
0945
0946 data1.store(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
0947 }
0948
0949 Vc_INTRINSIC mask_type operator!() const
0950 {
0951 return {!data0, !data1};
0952 }
0953
0954 Vc_INTRINSIC fixed_size_simd<T, N> operator-() const
0955 {
0956 return {-data0, -data1};
0957 }
0958
0959
0960 Vc_INTRINSIC fixed_size_simd<T, N> operator+() const { return *this; }
0961
0962 Vc_INTRINSIC fixed_size_simd<T, N> operator~() const
0963 {
0964 return {~data0, ~data1};
0965 }
0966
0967
0968 template <typename U,
0969 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
0970 Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator<<(U x) const
0971 {
0972 return {data0 << x, data1 << x};
0973 }
0974 template <typename U,
0975 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
0976 Vc_INTRINSIC fixed_size_simd<T, N> &operator<<=(U x)
0977 {
0978 data0 <<= x;
0979 data1 <<= x;
0980 return *this;
0981 }
0982 template <typename U,
0983 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
0984 Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator>>(U x) const
0985 {
0986 return {data0 >> x, data1 >> x};
0987 }
0988 template <typename U,
0989 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
0990 Vc_INTRINSIC fixed_size_simd<T, N> &operator>>=(U x)
0991 {
0992 data0 >>= x;
0993 data1 >>= x;
0994 return *this;
0995 }
0996
0997
0998 #define Vc_BINARY_OPERATOR_(op) \
0999 Vc_INTRINSIC fixed_size_simd<T, N> &operator op##=(const SimdArray &rhs) \
1000 { \
1001 data0 op## = rhs.data0; \
1002 data1 op## = rhs.data1; \
1003 return *this; \
1004 }
1005 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
1006 Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
1007 Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
1008 #undef Vc_BINARY_OPERATOR_
1009
1010
1011
1012
1013
1014 private:
1015 friend reference;
1016 Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
1017 {
1018 return reinterpret_cast<const alias_type *>(&o)[i];
1019 }
1020 template <typename U>
1021 Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
1022 noexcept(std::declval<value_type &>() = v))
1023 {
1024 reinterpret_cast<alias_type *>(&o)[i] = v;
1025 }
1026
1027 public:
1028
1029
1030
1031
1032
1033
1034
1035 Vc_INTRINSIC reference operator[](size_t i) noexcept
1036 {
1037 static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
1038 return {*this, int(i)};
1039 }
1040
1041
1042 Vc_INTRINSIC value_type operator[](size_t index) const noexcept
1043 {
1044 return get(*this, int(index));
1045 }
1046
1047
1048
1049
1050 Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(
1051 const mask_type &mask)
1052 {
1053 return {*this, mask};
1054 }
1055
1056
1057 Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k)
1058 {
1059 data0.assign(v.data0, internal_data0(k));
1060 data1.assign(v.data1, internal_data1(k));
1061 }
1062
1063
1064 #define Vc_REDUCTION_FUNCTION_(name_, binary_fun_, scalar_fun_) \
1065 private: \
1066 template <typename ForSfinae = void> \
1067 Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
1068 storage_type0::Size == storage_type1::Size, \
1069 value_type> name_##_impl() const \
1070 { \
1071 return binary_fun_(data0, data1).name_(); \
1072 } \
1073 \
1074 template <typename ForSfinae = void> \
1075 Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
1076 storage_type0::Size != storage_type1::Size, \
1077 value_type> name_##_impl() const \
1078 { \
1079 return scalar_fun_(data0.name_(), data1.name_()); \
1080 } \
1081 \
1082 public: \
1083 \
1084 Vc_INTRINSIC value_type name_() const { return name_##_impl(); } \
1085 \
1086 Vc_INTRINSIC value_type name_(const mask_type &mask) const \
1087 { \
1088 if (Vc_IS_UNLIKELY(Split::lo(mask).isEmpty())) { \
1089 return data1.name_(Split::hi(mask)); \
1090 } else if (Vc_IS_UNLIKELY(Split::hi(mask).isEmpty())) { \
1091 return data0.name_(Split::lo(mask)); \
1092 } else { \
1093 return scalar_fun_(data0.name_(Split::lo(mask)), \
1094 data1.name_(Split::hi(mask))); \
1095 } \
1096 } \
1097 Vc_NOTHING_EXPECTING_SEMICOLON
1098 Vc_REDUCTION_FUNCTION_(min, Vc::min, std::min);
1099 Vc_REDUCTION_FUNCTION_(max, Vc::max, std::max);
1100 Vc_REDUCTION_FUNCTION_(product, internal::product_helper_, internal::product_helper_);
1101 Vc_REDUCTION_FUNCTION_(sum, internal::sum_helper_, internal::sum_helper_);
1102 #undef Vc_REDUCTION_FUNCTION_
1103
1104 Vc_INTRINSIC Vc_PURE fixed_size_simd<T, N> partialSum() const
1105 {
1106 auto ps0 = data0.partialSum();
1107 auto tmp = data1;
1108 tmp[0] += ps0[data0.size() - 1];
1109 return {std::move(ps0), tmp.partialSum()};
1110 }
1111
1112
1113
1114 template <typename F> inline fixed_size_simd<T, N> apply(F &&f) const
1115 {
1116 return {data0.apply(f), data1.apply(f)};
1117 }
1118
1119 template <typename F>
1120 inline fixed_size_simd<T, N> apply(F &&f, const mask_type &k) const
1121 {
1122 return {data0.apply(f, Split::lo(k)), data1.apply(f, Split::hi(k))};
1123 }
1124
1125
1126
1127 inline fixed_size_simd<T, N> shifted(int amount) const
1128 {
1129 constexpr int SSize = Size;
1130 constexpr int SSize0 = storage_type0::Size;
1131 constexpr int SSize1 = storage_type1::Size;
1132 if (amount == 0) {
1133 return *this;
1134 }
1135 if (amount < 0) {
1136 if (amount > -SSize0) {
1137 return {data0.shifted(amount), data1.shifted(amount, data0)};
1138 }
1139 if (amount == -SSize0) {
1140 return {storage_type0(0), simd_cast<storage_type1>(data0)};
1141 }
1142 if (amount < -SSize0) {
1143 return {storage_type0(0), simd_cast<storage_type1>(data0.shifted(
1144 amount + SSize0))};
1145 }
1146 return Zero();
1147 } else {
1148 if (amount >= SSize) {
1149 return Zero();
1150 } else if (amount >= SSize0) {
1151 return {
1152 simd_cast<storage_type0>(data1).shifted(amount - SSize0),
1153 storage_type1(0)};
1154 } else if (amount >= SSize1) {
1155 return {data0.shifted(amount, data1), storage_type1(0)};
1156 } else {
1157 return {data0.shifted(amount, data1), data1.shifted(amount)};
1158 }
1159 }
1160 }
1161
1162 template <std::size_t NN>
1163 inline enable_if<
1164 !(std::is_same<storage_type0, storage_type1>::value &&
1165 N == NN),
1166 fixed_size_simd<T, N>>
1167 shifted(int amount, const SimdArray<value_type, NN> &shiftIn) const
1168 {
1169 constexpr int SSize = Size;
1170 if (amount < 0) {
1171 return fixed_size_simd<T, N>([&](int i) -> value_type {
1172 i += amount;
1173 if (i >= 0) {
1174 return operator[](i);
1175 } else if (i >= -SSize) {
1176 return shiftIn[i + SSize];
1177 }
1178 return 0;
1179 });
1180 }
1181 return fixed_size_simd<T, N>([&](int i) -> value_type {
1182 i += amount;
1183 if (i < SSize) {
1184 return operator[](i);
1185 } else if (i < 2 * SSize) {
1186 return shiftIn[i - SSize];
1187 }
1188 return 0;
1189 });
1190 }
1191
1192 private:
1193
1194
1195 template <std::size_t NN> struct bisectable_shift
1196 : public std::integral_constant<bool,
1197 std::is_same<storage_type0, storage_type1>::value &&
1198 N == NN>
1199 {
1200 };
1201
1202 public:
1203 template <std::size_t NN>
1204 inline fixed_size_simd<T, N> shifted(
1205 enable_if<bisectable_shift<NN>::value, int> amount,
1206 const SimdArray<value_type, NN> &shiftIn) const
1207 {
1208 constexpr int SSize = Size;
1209 if (amount < 0) {
1210 if (amount > -static_cast<int>(storage_type0::Size)) {
1211 return {data0.shifted(amount, internal_data1(shiftIn)),
1212 data1.shifted(amount, data0)};
1213 }
1214 if (amount == -static_cast<int>(storage_type0::Size)) {
1215 return {storage_type0(internal_data1(shiftIn)), storage_type1(data0)};
1216 }
1217 if (amount > -SSize) {
1218 return {
1219 internal_data1(shiftIn)
1220 .shifted(amount + static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1221 data0.shifted(amount + static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1222 }
1223 if (amount == -SSize) {
1224 return shiftIn;
1225 }
1226 if (amount > -2 * SSize) {
1227 return shiftIn.shifted(amount + SSize);
1228 }
1229 }
1230 if (amount == 0) {
1231 return *this;
1232 }
1233 if (amount < static_cast<int>(storage_type0::Size)) {
1234 return {data0.shifted(amount, data1),
1235 data1.shifted(amount, internal_data0(shiftIn))};
1236 }
1237 if (amount == static_cast<int>(storage_type0::Size)) {
1238 return {storage_type0(data1), storage_type1(internal_data0(shiftIn))};
1239 }
1240 if (amount < SSize) {
1241 return {data1.shifted(amount - static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1242 internal_data0(shiftIn)
1243 .shifted(amount - static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1244 }
1245 if (amount == SSize) {
1246 return shiftIn;
1247 }
1248 if (amount < 2 * SSize) {
1249 return shiftIn.shifted(amount - SSize);
1250 }
1251 return Zero();
1252 }
1253
1254
1255
1256 Vc_INTRINSIC fixed_size_simd<T, N> rotated(int amount) const
1257 {
1258 amount %= int(size());
1259 if (amount == 0) {
1260 return *this;
1261 } else if (amount < 0) {
1262 amount += size();
1263 }
1264
1265 #ifdef Vc_MSVC
1266
1267
1268
1269 alignas(MemoryAlignment) T tmp[N + data0.size()];
1270 data0.store(&tmp[0], Vc::Aligned);
1271 data1.store(&tmp[data0.size()], Vc::Aligned);
1272 data0.store(&tmp[N], Vc::Unaligned);
1273 fixed_size_simd<T, N> r;
1274 r.data0.load(&tmp[amount], Vc::Unaligned);
1275 r.data1.load(&tmp[(amount + data0.size()) % size()], Vc::Unaligned);
1276 return r;
1277 #else
1278 auto &&d0cvtd = simd_cast<storage_type1>(data0);
1279 auto &&d1cvtd = simd_cast<storage_type0>(data1);
1280 constexpr int size0 = storage_type0::size();
1281 constexpr int size1 = storage_type1::size();
1282
1283 if (amount == size0 && std::is_same<storage_type0, storage_type1>::value) {
1284 return {std::move(d1cvtd), std::move(d0cvtd)};
1285 } else if (amount < size1) {
1286 return {data0.shifted(amount, d1cvtd), data1.shifted(amount, d0cvtd)};
1287 } else if (amount == size1) {
1288 return {data0.shifted(amount, d1cvtd), std::move(d0cvtd)};
1289 } else if (int(size()) - amount < size1) {
1290 return {data0.shifted(amount - int(size()), d1cvtd.shifted(size1 - size0)),
1291 data1.shifted(amount - int(size()), data0.shifted(size0 - size1))};
1292 } else if (int(size()) - amount == size1) {
1293 return {data0.shifted(-size1, d1cvtd.shifted(size1 - size0)),
1294 simd_cast<storage_type1>(data0.shifted(size0 - size1))};
1295 } else if (amount <= size0) {
1296 return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1297 simd_cast<storage_type1>(data0.shifted(amount - size1))};
1298 } else {
1299 return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1300 simd_cast<storage_type1>(data0.shifted(amount - size1, d1cvtd))};
1301 }
1302 return *this;
1303 #endif
1304 }
1305
1306
1307
1308 Vc_INTRINSIC fixed_size_simd<T, N> interleaveLow(const SimdArray &x) const
1309 {
1310
1311 return {data0.interleaveLow(x.data0),
1312 simd_cast<storage_type1>(data0.interleaveHigh(x.data0))};
1313 }
1314
1315 Vc_INTRINSIC fixed_size_simd<T, N> interleaveHigh(const SimdArray &x) const
1316 {
1317 return interleaveHighImpl(
1318 x,
1319 std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1320 }
1321
1322 private:
1323
1324 Vc_INTRINSIC fixed_size_simd<T, N> interleaveHighImpl(const SimdArray &x, std::true_type) const
1325 {
1326 return {data1.interleaveLow(x.data1), data1.interleaveHigh(x.data1)};
1327 }
1328
1329 inline fixed_size_simd<T, N> interleaveHighImpl(const SimdArray &x, std::false_type) const
1330 {
1331 return {data0.interleaveHigh(x.data0)
1332 .shifted(storage_type1::Size,
1333 simd_cast<storage_type0>(data1.interleaveLow(x.data1))),
1334 data1.interleaveHigh(x.data1)};
1335 }
1336
1337 public:
1338
1339 inline fixed_size_simd<T, N> reversed() const
1340 {
1341 if (std::is_same<storage_type0, storage_type1>::value) {
1342 return {simd_cast<storage_type0>(data1).reversed(),
1343 simd_cast<storage_type1>(data0).reversed()};
1344 } else {
1345 #ifdef Vc_MSVC
1346
1347
1348
1349 alignas(MemoryAlignment) T tmp[N];
1350 data1.reversed().store(&tmp[0], Vc::Aligned);
1351 data0.reversed().store(&tmp[data1.size()], Vc::Unaligned);
1352 return fixed_size_simd<T, N>{&tmp[0], Vc::Aligned};
1353 #else
1354 return {data0.shifted(storage_type1::Size, data1).reversed(),
1355 simd_cast<storage_type1>(data0.reversed().shifted(
1356 storage_type0::Size - storage_type1::Size))};
1357 #endif
1358 }
1359 }
1360
1361 inline fixed_size_simd<T, N> sorted() const
1362 {
1363 return sortedImpl(
1364 std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1365 }
1366
1367
1368 Vc_INTRINSIC fixed_size_simd<T, N> sortedImpl(std::true_type) const
1369 {
1370 #ifdef Vc_DEBUG_SORTED
1371 std::cerr << "-- " << data0 << data1 << '\n';
1372 #endif
1373 const auto a = data0.sorted();
1374 const auto b = data1.sorted().reversed();
1375 const auto lo = Vc::min(a, b);
1376 const auto hi = Vc::max(a, b);
1377 return {lo.sorted(), hi.sorted()};
1378 }
1379
1380
1381 Vc_INTRINSIC fixed_size_simd<T, N> sortedImpl(std::false_type) const
1382 {
1383 using SortableArray =
1384 fixed_size_simd<value_type, Common::NextPowerOfTwo<size()>::value>;
1385 auto sortable = simd_cast<SortableArray>(*this);
1386 for (std::size_t i = Size; i < SortableArray::Size; ++i) {
1387 using limits = std::numeric_limits<value_type>;
1388 if (limits::has_infinity) {
1389 sortable[i] = limits::infinity();
1390 } else {
1391 sortable[i] = std::numeric_limits<value_type>::max();
1392 }
1393 }
1394 return simd_cast<fixed_size_simd<T, N>>(sortable.sorted());
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421 }
1422
1423
1424
1425
1426
1427
1428 static constexpr std::size_t Size = size();
1429
1430
1431 Vc_DEPRECATED("use exponent(x) instead")
1432 Vc_INTRINSIC fixed_size_simd<T, N> exponent() const
1433 {
1434 return {exponent(data0), exponent(data1)};
1435 }
1436
1437
1438 Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
1439 {
1440 return {isnegative(data0), isnegative(data1)};
1441 }
1442
1443
1444 Vc_DEPRECATED("use copysign(x, y) instead")
1445 Vc_INTRINSIC fixed_size_simd<T, N> copySign(const SimdArray &x) const
1446 {
1447 return {Vc::copysign(data0, x.data0),
1448 Vc::copysign(data1, x.data1)};
1449 }
1450
1451
1452
1453 friend storage_type0 &internal_data0<>(SimdArray &x);
1454 friend storage_type1 &internal_data1<>(SimdArray &x);
1455 friend const storage_type0 &internal_data0<>(const SimdArray &x);
1456 friend const storage_type1 &internal_data1<>(const SimdArray &x);
1457
1458
1459 Vc_INTRINSIC SimdArray(storage_type0 &&x, storage_type1 &&y)
1460 : data0(std::move(x)), data1(std::move(y))
1461 {
1462 }
1463
1464 Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type0));
1465
1466 private:
1467
1468
1469
1470 alignas(static_cast<std::size_t>(
1471 Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(V) /
1472 V::size()>::value)) storage_type0 data0;
1473 storage_type1 data1;
1474 };
1475 #undef Vc_CURRENT_CLASS_NAME
1476 template <typename T, std::size_t N, typename V, std::size_t M>
1477 constexpr std::size_t SimdArray<T, N, V, M>::Size;
1478 template <typename T, std::size_t N, typename V, std::size_t M>
1479 constexpr std::size_t SimdArray<T, N, V, M>::MemoryAlignment;
1480
1481
1482 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1483 template <class MT, class IT, int Scale>
1484 inline void SimdArray<T, N, VectorType, M>::gatherImplementation(
1485 const Common::GatherArguments<MT, IT, Scale> &args)
1486 {
1487 data0.gather(Common::make_gather<Scale>(
1488 args.address, Split::lo(Common::Operations::gather(), args.indexes)));
1489 data1.gather(Common::make_gather<Scale>(
1490 args.address, Split::hi(Common::Operations::gather(), args.indexes)));
1491 }
1492 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1493 template <class MT, class IT, int Scale>
1494 inline void SimdArray<T, N, VectorType, M>::gatherImplementation(
1495 const Common::GatherArguments<MT, IT, Scale> &args, MaskArgument mask)
1496 {
1497 data0.gather(Common::make_gather<Scale>(
1498 args.address, Split::lo(Common::Operations::gather(), args.indexes)),
1499 Split::lo(mask));
1500 data1.gather(Common::make_gather<Scale>(
1501 args.address, Split::hi(Common::Operations::gather(), args.indexes)),
1502 Split::hi(mask));
1503 }
1504
1505
1506 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1507 template <typename MT, typename IT>
1508 inline void SimdArray<T, N, VectorType, M>::scatterImplementation(MT *mem,
1509 IT &&indexes) const
1510 {
1511 data0.scatter(mem, Split::lo(Common::Operations::gather(),
1512 indexes));
1513
1514 data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)));
1515 }
1516 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1517 template <typename MT, typename IT>
1518 inline void SimdArray<T, N, VectorType, M>::scatterImplementation(MT *mem,
1519 IT &&indexes, MaskArgument mask) const
1520 {
1521 data0.scatter(mem, Split::lo(Common::Operations::gather(), indexes),
1522 Split::lo(mask));
1523
1524 data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)),
1525 Split::hi(mask));
1526 }
1527
1528
1529
1530 template <typename T, std::size_t N, typename V, std::size_t M>
1531 #ifndef Vc_MSVC
1532 Vc_INTRINSIC
1533 #endif
1534 typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1535 SimdArray<T, N, V, M> &x)
1536 {
1537 return x.data0;
1538 }
1539
1540 template <typename T, std::size_t N, typename V, std::size_t M>
1541 #ifndef Vc_MSVC
1542 Vc_INTRINSIC
1543 #endif
1544 typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1545 SimdArray<T, N, V, M> &x)
1546 {
1547 return x.data1;
1548 }
1549
1550 template <typename T, std::size_t N, typename V, std::size_t M>
1551 #ifndef Vc_MSVC
1552 Vc_INTRINSIC
1553 #endif
1554 const typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1555 const SimdArray<T, N, V, M> &x)
1556 {
1557 return x.data0;
1558 }
1559
1560 template <typename T, std::size_t N, typename V, std::size_t M>
1561 #ifndef Vc_MSVC
1562 Vc_INTRINSIC
1563 #endif
1564 const typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1565 const SimdArray<T, N, V, M> &x)
1566 {
1567 return x.data1;
1568 }
1569
1570
1571
1572
1573 #if defined Vc_MSVC && defined Vc_IMPL_SSE && !defined Vc_IMPL_AVX
1574 template <>
1575 Vc_INTRINSIC SimdArray<double, 8>::SimdArray(fixed_size_simd<double, 4> &&x,
1576 fixed_size_simd<double, 4> &&y)
1577 : data0(x), data1(0)
1578 {
1579 data1 = y;
1580 }
1581 #endif
1582
1583
1584 namespace Detail
1585 {
1586 #define Vc_FIXED_OP(op) \
1587 template <class T, int N, \
1588 class = typename std::enable_if<fixed_size_simd<T, N>::is_atomic>::type> \
1589 Vc_INTRINSIC fixed_size_simd<T, N> operator op(const fixed_size_simd<T, N> &a, \
1590 const fixed_size_simd<T, N> &b) \
1591 { \
1592 return {private_init, internal_data(a) op internal_data(b)}; \
1593 } \
1594 template <class T, int N, \
1595 class = typename std::enable_if<!fixed_size_simd<T, N>::is_atomic>::type, \
1596 class = T> \
1597 Vc_INTRINSIC fixed_size_simd<T, N> operator op(const fixed_size_simd<T, N> &a, \
1598 const fixed_size_simd<T, N> &b) \
1599 { \
1600 return {internal_data0(a) op internal_data0(b), \
1601 internal_data1(a) op internal_data1(b)}; \
1602 }
1603 Vc_ALL_ARITHMETICS(Vc_FIXED_OP);
1604 Vc_ALL_BINARY(Vc_FIXED_OP);
1605 Vc_ALL_SHIFTS(Vc_FIXED_OP);
1606 #undef Vc_FIXED_OP
1607 #define Vc_FIXED_OP(op) \
1608 template <class T, int N, \
1609 class = typename std::enable_if<fixed_size_simd<T, N>::is_atomic>::type> \
1610 Vc_INTRINSIC fixed_size_simd_mask<T, N> operator op(const fixed_size_simd<T, N> &a, \
1611 const fixed_size_simd<T, N> &b) \
1612 { \
1613 return {private_init, internal_data(a) op internal_data(b)}; \
1614 } \
1615 template <class T, int N, \
1616 class = typename std::enable_if<!fixed_size_simd<T, N>::is_atomic>::type, \
1617 class = T> \
1618 Vc_INTRINSIC fixed_size_simd_mask<T, N> operator op(const fixed_size_simd<T, N> &a, \
1619 const fixed_size_simd<T, N> &b) \
1620 { \
1621 return {internal_data0(a) op internal_data0(b), \
1622 internal_data1(a) op internal_data1(b)}; \
1623 }
1624 Vc_ALL_COMPARES(Vc_FIXED_OP);
1625 #undef Vc_FIXED_OP
1626 }
1627
1628
1629
1630 namespace result_vector_type_internal
1631 {
1632 template <typename T>
1633 using remove_cvref = typename std::remove_cv<typename std::remove_reference<T>::type>::type;
1634
1635 template <typename T>
1636 using is_integer_larger_than_int = std::integral_constant<
1637 bool, std::is_integral<T>::value &&(sizeof(T) > sizeof(int) ||
1638 std::is_same<T, long>::value ||
1639 std::is_same<T, unsigned long>::value)>;
1640
1641 template <
1642 typename L, typename R,
1643 std::size_t N = Traits::isSimdArray<L>::value ? Traits::simd_vector_size<L>::value
1644 : Traits::simd_vector_size<R>::value,
1645 bool = (Traits::isSimdArray<L>::value ||
1646 Traits::isSimdArray<R>::value) &&
1647 !(Traits::is_fixed_size_simd<L>::value &&
1648 Traits::is_fixed_size_simd<R>::value) &&
1649 ((std::is_arithmetic<remove_cvref<L>>::value &&
1650 !is_integer_larger_than_int<remove_cvref<L>>::value) ||
1651 (std::is_arithmetic<remove_cvref<R>>::value &&
1652 !is_integer_larger_than_int<remove_cvref<R>>::value) ||
1653
1654
1655 Traits::simd_vector_size<L>::value == Traits::simd_vector_size<R>::value)>
1656 struct evaluate;
1657
1658 template <typename L, typename R, std::size_t N> struct evaluate<L, R, N, true>
1659 {
1660 private:
1661 using LScalar = Traits::entry_type_of<L>;
1662 using RScalar = Traits::entry_type_of<R>;
1663
1664 template <bool B, typename T, typename F>
1665 using conditional = typename std::conditional<B, T, F>::type;
1666
1667 public:
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678 using type = fixed_size_simd<
1679 conditional<(std::is_integral<LScalar>::value &&std::is_integral<RScalar>::value &&
1680 sizeof(LScalar) < sizeof(int) &&
1681 sizeof(RScalar) < sizeof(int)),
1682 conditional<(sizeof(LScalar) == sizeof(RScalar)),
1683 conditional<std::is_unsigned<LScalar>::value, LScalar, RScalar>,
1684 conditional<(sizeof(LScalar) > sizeof(RScalar)), LScalar, RScalar>>,
1685 decltype(std::declval<LScalar>() + std::declval<RScalar>())>,
1686 N>;
1687 };
1688
1689 }
1690
1691 template <typename L, typename R>
1692 using result_vector_type = typename result_vector_type_internal::evaluate<L, R>::type;
1693
1694 #define Vc_BINARY_OPERATORS_(op_) \
1695 \
1696 template <typename L, typename R> \
1697 Vc_INTRINSIC result_vector_type<L, R> operator op_(L &&lhs, R &&rhs) \
1698 { \
1699 using Return = result_vector_type<L, R>; \
1700 return Vc::Detail::operator op_( \
1701 static_cast<const Return &>(std::forward<L>(lhs)), \
1702 static_cast<const Return &>(std::forward<R>(rhs))); \
1703 }
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATORS_);
1723 Vc_ALL_BINARY(Vc_BINARY_OPERATORS_);
1724
1725 #undef Vc_BINARY_OPERATORS_
1726 #define Vc_BINARY_OPERATORS_(op_) \
1727 \
1728 template <typename L, typename R> \
1729 Vc_INTRINSIC typename result_vector_type<L, R>::mask_type operator op_(L &&lhs, \
1730 R &&rhs) \
1731 { \
1732 using Promote = result_vector_type<L, R>; \
1733 return Promote(std::forward<L>(lhs)) op_ Promote(std::forward<R>(rhs)); \
1734 }
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753 Vc_ALL_COMPARES(Vc_BINARY_OPERATORS_);
1754
1755 #undef Vc_BINARY_OPERATORS_
1756
1757
1758 #define Vc_FORWARD_UNARY_OPERATOR(name_) \
1759 \
1760 template <typename T, std::size_t N, typename V, std::size_t M> \
1761 inline fixed_size_simd<T, N> name_(const SimdArray<T, N, V, M> &x) \
1762 { \
1763 return fixed_size_simd<T, N>::fromOperation( \
1764 Common::Operations::Forward_##name_(), x); \
1765 } \
1766 template <class T, int N> \
1767 fixed_size_simd<T, N> name_(const fixed_size_simd<T, N> &x) \
1768 { \
1769 return fixed_size_simd<T, N>::fromOperation( \
1770 Common::Operations::Forward_##name_(), x); \
1771 } \
1772 Vc_NOTHING_EXPECTING_SEMICOLON
1773
1774 #define Vc_FORWARD_UNARY_BOOL_OPERATOR(name_) \
1775 \
1776 template <typename T, std::size_t N, typename V, std::size_t M> \
1777 inline fixed_size_simd_mask<T, N> name_(const SimdArray<T, N, V, M> &x) \
1778 { \
1779 return fixed_size_simd_mask<T, N>::fromOperation( \
1780 Common::Operations::Forward_##name_(), x); \
1781 } \
1782 template <class T, int N> \
1783 fixed_size_simd_mask<T, N> name_(const fixed_size_simd<T, N> &x) \
1784 { \
1785 return fixed_size_simd_mask<T, N>::fromOperation( \
1786 Common::Operations::Forward_##name_(), x); \
1787 } \
1788 Vc_NOTHING_EXPECTING_SEMICOLON
1789
1790 #define Vc_FORWARD_BINARY_OPERATOR(name_) \
1791 \
1792 template <typename T, std::size_t N, typename V, std::size_t M> \
1793 inline fixed_size_simd<T, N> name_(const SimdArray<T, N, V, M> &x, \
1794 const SimdArray<T, N, V, M> &y) \
1795 { \
1796 return fixed_size_simd<T, N>::fromOperation( \
1797 Common::Operations::Forward_##name_(), x, y); \
1798 } \
1799 Vc_NOTHING_EXPECTING_SEMICOLON
1800
1801
1802
1803
1804
1805
1806 Vc_FORWARD_UNARY_OPERATOR(abs);
1807 Vc_FORWARD_UNARY_OPERATOR(asin);
1808 Vc_FORWARD_UNARY_OPERATOR(atan);
1809 Vc_FORWARD_BINARY_OPERATOR(atan2);
1810 Vc_FORWARD_UNARY_OPERATOR(ceil);
1811 Vc_FORWARD_BINARY_OPERATOR(copysign);
1812 Vc_FORWARD_UNARY_OPERATOR(cos);
1813 Vc_FORWARD_UNARY_OPERATOR(exp);
1814 Vc_FORWARD_UNARY_OPERATOR(exponent);
1815 Vc_FORWARD_UNARY_OPERATOR(floor);
1816
1817 template <typename T, std::size_t N>
1818 inline SimdArray<T, N> fma(const SimdArray<T, N> &a, const SimdArray<T, N> &b,
1819 const SimdArray<T, N> &c)
1820 {
1821 return SimdArray<T, N>::fromOperation(Common::Operations::Forward_fma(), a, b, c);
1822 }
1823 Vc_FORWARD_UNARY_BOOL_OPERATOR(isfinite);
1824 Vc_FORWARD_UNARY_BOOL_OPERATOR(isinf);
1825 Vc_FORWARD_UNARY_BOOL_OPERATOR(isnan);
1826 Vc_FORWARD_UNARY_BOOL_OPERATOR(isnegative);
1827
1828 template <typename T, std::size_t N>
1829 inline SimdArray<T, N> frexp(const SimdArray<T, N> &x, SimdArray<int, N> *e)
1830 {
1831 return SimdArray<T, N>::fromOperation(Common::Operations::Forward_frexp(), x, e);
1832 }
1833
1834 template <typename T, std::size_t N>
1835 inline SimdArray<T, N> ldexp(const SimdArray<T, N> &x, const SimdArray<int, N> &e)
1836 {
1837 return SimdArray<T, N>::fromOperation(Common::Operations::Forward_ldexp(), x, e);
1838 }
1839 Vc_FORWARD_UNARY_OPERATOR(log);
1840 Vc_FORWARD_UNARY_OPERATOR(log10);
1841 Vc_FORWARD_UNARY_OPERATOR(log2);
1842 Vc_FORWARD_UNARY_OPERATOR(reciprocal);
1843 Vc_FORWARD_UNARY_OPERATOR(round);
1844 Vc_FORWARD_UNARY_OPERATOR(rsqrt);
1845 Vc_FORWARD_UNARY_OPERATOR(sin);
1846
1847 template <typename T, std::size_t N>
1848 void sincos(const SimdArray<T, N> &x, SimdArray<T, N> *sin, SimdArray<T, N> *cos)
1849 {
1850 SimdArray<T, N>::callOperation(Common::Operations::Forward_sincos(), x, sin, cos);
1851 }
1852 Vc_FORWARD_UNARY_OPERATOR(sqrt);
1853 Vc_FORWARD_UNARY_OPERATOR(trunc);
1854 Vc_FORWARD_BINARY_OPERATOR(min);
1855 Vc_FORWARD_BINARY_OPERATOR(max);
1856
1857 #undef Vc_FORWARD_UNARY_OPERATOR
1858 #undef Vc_FORWARD_UNARY_BOOL_OPERATOR
1859 #undef Vc_FORWARD_BINARY_OPERATOR
1860
1861
1862 #ifdef Vc_MSVC
1863 #define Vc_DUMMY_ARG0 , int = 0
1864 #define Vc_DUMMY_ARG1 , long = 0
1865 #define Vc_DUMMY_ARG2 , short = 0
1866 #define Vc_DUMMY_ARG3 , char = '0'
1867 #define Vc_DUMMY_ARG4 , unsigned = 0u
1868 #define Vc_DUMMY_ARG5 , unsigned short = 0u
1869 #else
1870 #define Vc_DUMMY_ARG0
1871 #define Vc_DUMMY_ARG1
1872 #define Vc_DUMMY_ARG2
1873 #define Vc_DUMMY_ARG3
1874 #define Vc_DUMMY_ARG4
1875 #define Vc_DUMMY_ARG5
1876 #endif
1877
1878
1879
1880
1881
1882 template <typename Return, std::size_t N, typename T, typename... From>
1883 Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return>
1884 simd_cast_impl_smaller_input(const From &... xs, const T &last)
1885 {
1886 Return r = simd_cast<Return>(xs...);
1887 for (size_t i = 0; i < N; ++i) {
1888 r[i + N * sizeof...(From)] = static_cast<typename Return::EntryType>(last[i]);
1889 }
1890 return r;
1891 }
1892 template <typename Return, std::size_t N, typename T>
1893 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_smaller_input(const T &last)
1894 {
1895 Return r = Return();
1896 for (size_t i = 0; i < N; ++i) {
1897 r[i] = static_cast<typename Return::EntryType>(last[i]);
1898 }
1899 return r;
1900 }
1901 template <typename Return, std::size_t N, typename T, typename... From>
1902 Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return> simd_cast_impl_larger_input(
1903 const From &... xs, const T &last)
1904 {
1905 Return r = simd_cast<Return>(xs...);
1906 for (size_t i = N * sizeof...(From); i < Return::Size; ++i) {
1907 r[i] = static_cast<typename Return::EntryType>(last[i - N * sizeof...(From)]);
1908 }
1909 return r;
1910 }
1911 template <typename Return, std::size_t N, typename T>
1912 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_larger_input(const T &last)
1913 {
1914 Return r = Return();
1915 for (size_t i = 0; i < Return::size(); ++i) {
1916 r[i] = static_cast<typename Return::EntryType>(last[i]);
1917 }
1918 return r;
1919 }
1920
1921
1922 template <typename Return, typename T, typename... From>
1923 Vc_INTRINSIC_L Vc_CONST_L Return
1924 simd_cast_without_last(const From &... xs, const T &) Vc_INTRINSIC_R Vc_CONST_R;
1925
1926
1927 template <typename... Ts> struct are_all_types_equal;
1928 template <typename T>
1929 struct are_all_types_equal<T> : public std::integral_constant<bool, true>
1930 {
1931 };
1932 template <typename T0, typename T1, typename... Ts>
1933 struct are_all_types_equal<T0, T1, Ts...>
1934 : public std::integral_constant<
1935 bool, std::is_same<T0, T1>::value && are_all_types_equal<T1, Ts...>::value>
1936 {
1937 };
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959 template <typename Return, typename... Ts>
1960 Vc_INTRINSIC Vc_CONST Return
1961 simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b);
1962
1963
1964
1965 template <typename Return, std::size_t offset, typename From, typename... Froms>
1966 Vc_INTRINSIC Vc_CONST
1967 enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
1968 simd_cast_with_offset(const From &x, const Froms &... xs);
1969
1970 template <typename Return, std::size_t offset, typename From>
1971 Vc_INTRINSIC Vc_CONST
1972 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0), Return>
1973 simd_cast_with_offset(const From &x);
1974
1975 template <typename Return, std::size_t offset, typename From>
1976 Vc_INTRINSIC Vc_CONST
1977 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1978 ((Traits::isSimdArray<Return>::value &&
1979 !Traits::isAtomicSimdArray<Return>::value) ||
1980 (Traits::isSimdMaskArray<Return>::value &&
1981 !Traits::isAtomicSimdMaskArray<Return>::value))),
1982 Return>
1983 simd_cast_with_offset(const From &x);
1984
1985 template <typename Return, std::size_t offset, typename From>
1986 Vc_INTRINSIC Vc_CONST
1987 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1988 ((Traits::isSimdArray<Return>::value &&
1989 Traits::isAtomicSimdArray<Return>::value) ||
1990 (Traits::isSimdMaskArray<Return>::value &&
1991 Traits::isAtomicSimdMaskArray<Return>::value))),
1992 Return>
1993 simd_cast_with_offset(const From &x);
1994
1995 template <typename Return, std::size_t offset, typename From, typename... Froms>
1996 Vc_INTRINSIC Vc_CONST enable_if<
1997 (are_all_types_equal<From, Froms...>::value && From::Size <= offset), Return>
1998 simd_cast_with_offset(const From &, const Froms &... xs)
1999 {
2000 return simd_cast_with_offset<Return, offset - From::Size>(xs...);
2001 }
2002
2003
2004 template <typename Return, std::size_t offset, typename From>
2005 Vc_INTRINSIC Vc_CONST enable_if<(From::Size <= offset), Return> simd_cast_with_offset(
2006 const From &)
2007 {
2008 return Return(0);
2009 }
2010
2011
2012 template <typename T, typename... Ts> struct first_type_of_impl
2013 {
2014 using type = T;
2015 };
2016 template <typename... Ts> using first_type_of = typename first_type_of_impl<Ts...>::type;
2017
2018
2019 template <typename Return, typename From>
2020 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x);
2021 template <typename Return, typename... Froms>
2022 Vc_INTRINSIC Vc_CONST
2023 enable_if<(are_all_types_equal<Froms...>::value &&
2024 sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2025 Return>
2026 simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x);
2027
2028
2029
2030 template <typename Return, typename From, typename... Froms>
2031 Vc_INTRINSIC Vc_CONST enable_if<
2032 (are_all_types_equal<From, Froms...>::value &&
2033 (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
2034 Return>
2035 simd_cast_drop_arguments(Froms... xs, From x, From);
2036 template <typename Return, typename From>
2037 Vc_INTRINSIC Vc_CONST
2038 enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2039 simd_cast_drop_arguments(From x, From);
2040
2041 namespace
2042 {
2043 #ifdef Vc_DEBUG_SIMD_CAST
2044 void debugDoNothing(const std::initializer_list<void *> &) {}
2045 template <typename T0, typename... Ts>
2046 inline void vc_debug_(const char *prefix, const char *suffix, const T0 &arg0,
2047 const Ts &... args)
2048 {
2049 std::cerr << prefix << arg0;
2050 debugDoNothing({&(std::cerr << ", " << args)...});
2051 std::cerr << suffix;
2052 }
2053 #else
2054 template <typename T0, typename... Ts>
2055 Vc_INTRINSIC void vc_debug_(const char *, const char *, const T0 &, const Ts &...)
2056 {
2057 }
2058 #endif
2059 }
2060
2061
2062 template <size_t A, size_t B>
2063 struct is_less : public std::integral_constant<bool, (A < B)> {
2064 };
2065
2066
2067 template <size_t N>
2068 struct is_power_of_2 : public std::integral_constant<bool, ((N - 1) & N) == 0> {
2069 };
2070
2071
2072 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2073 template <typename Return, typename T, typename A, typename... Froms> \
2074 Vc_INTRINSIC Vc_CONST enable_if< \
2075 (Traits::isAtomic##SimdArrayType_<Return>::value && \
2076 is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
2077 are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2078 !detail::is_fixed_size_abi<A>::value), \
2079 Return> \
2080 simd_cast(NativeType_<T, A> x, Froms... xs) \
2081 { \
2082 vc_debug_("simd_cast{1}(", ")\n", x, xs...); \
2083 return {private_init, simd_cast<typename Return::storage_type>(x, xs...)}; \
2084 } \
2085 template <typename Return, typename T, typename A, typename... Froms> \
2086 Vc_INTRINSIC Vc_CONST enable_if< \
2087 (Traits::isAtomic##SimdArrayType_<Return>::value && \
2088 !is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
2089 are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2090 !detail::is_fixed_size_abi<A>::value), \
2091 Return> \
2092 simd_cast(NativeType_<T, A> x, Froms... xs) \
2093 { \
2094 vc_debug_("simd_cast{2}(", ")\n", x, xs...); \
2095 return {simd_cast_without_last<Return, NativeType_<T, A>, Froms...>(x, xs...)}; \
2096 } \
2097 template <typename Return, typename T, typename A, typename... Froms> \
2098 Vc_INTRINSIC Vc_CONST \
2099 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2100 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2101 is_less<Common::left_size<Return::Size>(), \
2102 NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2103 are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2104 !detail::is_fixed_size_abi<A>::value), \
2105 Return> \
2106 simd_cast(NativeType_<T, A> x, Froms... xs) \
2107 { \
2108 vc_debug_("simd_cast{3}(", ")\n", x, xs...); \
2109 using R0 = typename Return::storage_type0; \
2110 using R1 = typename Return::storage_type1; \
2111 return {simd_cast_drop_arguments<R0, Froms...>(x, xs...), \
2112 simd_cast_with_offset<R1, R0::Size>(x, xs...)}; \
2113 } \
2114 template <typename Return, typename T, typename A, typename... Froms> \
2115 Vc_INTRINSIC Vc_CONST \
2116 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2117 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2118 !is_less<Common::left_size<Return::Size>(), \
2119 NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2120 are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2121 !detail::is_fixed_size_abi<A>::value), \
2122 Return> \
2123 simd_cast(NativeType_<T, A> x, Froms... xs) \
2124 { \
2125 vc_debug_("simd_cast{4}(", ")\n", x, xs...); \
2126 using R0 = typename Return::storage_type0; \
2127 using R1 = typename Return::storage_type1; \
2128 return {simd_cast<R0>(x, xs...), R1(0)}; \
2129 } \
2130 Vc_NOTHING_EXPECTING_SEMICOLON
2131
2132 Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2133 Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2134 #undef Vc_SIMDARRAY_CASTS
2135
2136
2137 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2138 \
2139 template <typename Return, int offset, typename T, typename A> \
2140 Vc_INTRINSIC Vc_CONST \
2141 enable_if<Traits::isAtomic##SimdArrayType_<Return>::value, Return> \
2142 simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG0) \
2143 { \
2144 vc_debug_("simd_cast{offset, atomic}(", ")\n", offset, x); \
2145 return {private_init, simd_cast<typename Return::storage_type, offset>(x)}; \
2146 } \
2147 \
2148 template <typename Return, int offset, typename T, typename A> \
2149 Vc_INTRINSIC Vc_CONST \
2150 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2151 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2152 Return::Size * offset + Common::left_size<Return::Size>() < \
2153 NativeType_<T, A>::Size), \
2154 Return> \
2155 simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG1) \
2156 { \
2157 vc_debug_("simd_cast{offset, split Return}(", ")\n", offset, x); \
2158 using R0 = typename Return::storage_type0; \
2159 constexpr int entries_offset = offset * Return::Size; \
2160 constexpr int entries_offset_right = entries_offset + R0::Size; \
2161 return { \
2162 simd_cast_with_offset<typename Return::storage_type0, entries_offset>(x), \
2163 simd_cast_with_offset<typename Return::storage_type1, entries_offset_right>( \
2164 x)}; \
2165 } \
2166 \
2167 \
2168 template <typename Return, int offset, typename T, typename A> \
2169 Vc_INTRINSIC Vc_CONST \
2170 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2171 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2172 Return::Size * offset + Common::left_size<Return::Size>() >= \
2173 NativeType_<T, A>::Size), \
2174 Return> \
2175 simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG2) \
2176 { \
2177 vc_debug_("simd_cast{offset, R1::Zero}(", ")\n", offset, x); \
2178 using R0 = typename Return::storage_type0; \
2179 using R1 = typename Return::storage_type1; \
2180 constexpr int entries_offset = offset * Return::Size; \
2181 return {simd_cast_with_offset<R0, entries_offset>(x), R1(0)}; \
2182 } \
2183 Vc_NOTHING_EXPECTING_SEMICOLON
2184
2185 Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2186 Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2187 #undef Vc_SIMDARRAY_CASTS
2188
2189
2190 #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2191 \
2192 template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2193 Vc_INTRINSIC Vc_CONST \
2194 enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2195 (sizeof...(From) == 0 || N * sizeof...(From) < Return::Size) && \
2196 !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2197 Return> \
2198 simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2199 { \
2200 vc_debug_("simd_cast{indivisible}(", ")\n", x0, xs...); \
2201 return simd_cast<Return>(internal_data(x0), internal_data(xs)...); \
2202 } \
2203 \
2204 template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2205 Vc_INTRINSIC Vc_CONST \
2206 enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2207 (sizeof...(From) > 0 && (N * sizeof...(From) >= Return::Size)) && \
2208 !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2209 Return> \
2210 simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2211 { \
2212 vc_debug_("simd_cast{indivisible2}(", ")\n", x0, xs...); \
2213 return simd_cast_without_last<Return, \
2214 typename SimdArrayType_<T, N, V, N>::storage_type, \
2215 typename From::storage_type...>( \
2216 internal_data(x0), internal_data(xs)...); \
2217 } \
2218 \
2219 template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2220 typename... From> \
2221 Vc_INTRINSIC Vc_CONST enable_if< \
2222 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2223 !std::is_same<Return, SimdArrayType_<T, N, V, M>>::value && \
2224 is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2225 Return> \
2226 simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2227 { \
2228 vc_debug_("simd_cast{bisectable}(", ")\n", x0, xs...); \
2229 return simd_cast_interleaved_argument_order< \
2230 Return, typename SimdArrayType_<T, N, V, M>::storage_type0, \
2231 typename From::storage_type0...>(internal_data0(x0), internal_data0(xs)..., \
2232 internal_data1(x0), internal_data1(xs)...); \
2233 } \
2234
2235 \
2236 template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2237 typename... From> \
2238 Vc_INTRINSIC Vc_CONST enable_if< \
2239 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2240 !is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2241 Return> \
2242 simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2243 { \
2244 vc_debug_("simd_cast{bisectable2}(", ")\n", x0, xs...); \
2245 return simd_cast_without_last<Return, SimdArrayType_<T, N, V, M>, From...>( \
2246 x0, xs...); \
2247 } \
2248 \
2249 template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2250 typename... From> \
2251 Vc_INTRINSIC Vc_CONST enable_if< \
2252 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2253 N * (1 + sizeof...(From)) <= Return::Size && !is_power_of_2<N>::value), \
2254 Return> \
2255 simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2256 { \
2257 vc_debug_("simd_cast{remaining}(", ")\n", x0, xs...); \
2258 return simd_cast_impl_smaller_input<Return, N, SimdArrayType_<T, N, V, M>, \
2259 From...>(x0, xs...); \
2260 } \
2261 \
2262 template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2263 typename... From> \
2264 Vc_INTRINSIC Vc_CONST enable_if< \
2265 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2266 N * (1 + sizeof...(From)) > Return::Size && !is_power_of_2<N>::value), \
2267 Return> \
2268 simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2269 { \
2270 vc_debug_("simd_cast{remaining2}(", ")\n", x0, xs...); \
2271 return simd_cast_impl_larger_input<Return, N, SimdArrayType_<T, N, V, M>, \
2272 From...>(x0, xs...); \
2273 } \
2274 \
2275 template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2276 Vc_INTRINSIC Vc_CONST \
2277 enable_if<(N != M && N >= 2 * Return::Size && is_power_of_2<N>::value), Return> \
2278 simd_cast(const SimdArrayType_<T, N, V, M> &x) \
2279 { \
2280 vc_debug_("simd_cast{single bisectable}(", ")\n", x); \
2281 return simd_cast<Return>(internal_data0(x)); \
2282 } \
2283 template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2284 Vc_INTRINSIC Vc_CONST enable_if<(N != M && N > Return::Size && \
2285 N < 2 * Return::Size && is_power_of_2<N>::value), \
2286 Return> \
2287 simd_cast(const SimdArrayType_<T, N, V, M> &x) \
2288 { \
2289 vc_debug_("simd_cast{single bisectable2}(", ")\n", x); \
2290 return simd_cast<Return>(internal_data0(x), internal_data1(x)); \
2291 } \
2292 Vc_NOTHING_EXPECTING_SEMICOLON
2293
2294 Vc_SIMDARRAY_CASTS(SimdArray);
2295 Vc_SIMDARRAY_CASTS(SimdMaskArray);
2296 #undef Vc_SIMDARRAY_CASTS
2297 template <class Return, class T, int N, class... Ts,
2298 class = enable_if<!std::is_same<Return, fixed_size_simd<T, N>>::value>>
2299 Vc_INTRINSIC Return simd_cast(const fixed_size_simd<T, N> &x, const Ts &... xs)
2300 {
2301 return simd_cast<Return>(static_cast<const SimdArray<T, N> &>(x),
2302 static_cast<const SimdArray<T, N> &>(xs)...);
2303 }
2304 template <class Return, class T, int N, class... Ts,
2305 class = enable_if<!std::is_same<Return, fixed_size_simd_mask<T, N>>::value>>
2306 Vc_INTRINSIC Return simd_cast(const fixed_size_simd_mask<T, N> &x, const Ts &... xs)
2307 {
2308 return simd_cast<Return>(static_cast<const SimdMaskArray<T, N> &>(x),
2309 static_cast<const SimdMaskArray<T, N> &>(xs)...);
2310 }
2311
2312
2313 #define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2314 \
2315 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2316 std::size_t M> \
2317 Vc_INTRINSIC Vc_CONST enable_if<(offset == 0), Return> simd_cast( \
2318 const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG0) \
2319 { \
2320 vc_debug_("simd_cast{offset == 0}(", ")\n", offset, x); \
2321 return simd_cast<Return>(x); \
2322 } \
2323 \
2324 template <typename Return, int offset, typename T, std::size_t N, typename V> \
2325 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0), Return> simd_cast( \
2326 const SimdArrayType_<T, N, V, N> &x Vc_DUMMY_ARG1) \
2327 { \
2328 vc_debug_("simd_cast{offset, forward}(", ")\n", offset, x); \
2329 return simd_cast<Return, offset>(internal_data(x)); \
2330 } \
2331 \
2332 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2333 std::size_t M> \
2334 Vc_INTRINSIC Vc_CONST \
2335 enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2336 offset != 0 && Common::left_size<N>() % Return::Size == 0), \
2337 Return> \
2338 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG2) \
2339 { \
2340 vc_debug_("simd_cast{offset, right}(", ")\n", offset, x); \
2341 return simd_cast<Return, offset - Common::left_size<N>() / Return::Size>( \
2342 internal_data1(x)); \
2343 } \
2344
2345 \
2346 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2347 std::size_t M> \
2348 Vc_INTRINSIC Vc_CONST \
2349 enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2350 offset != 0 && Common::left_size<N>() % Return::Size != 0), \
2351 Return> \
2352 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG3) \
2353 { \
2354 vc_debug_("simd_cast{offset, right, nofit}(", ")\n", offset, x); \
2355 return simd_cast_with_offset<Return, \
2356 offset * Return::Size - Common::left_size<N>()>( \
2357 internal_data1(x)); \
2358 } \
2359 \
2360 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2361 std::size_t M> \
2362 Vc_INTRINSIC Vc_CONST enable_if< \
2363 (N != M && \
2364 offset != 0 && (offset + 1) * Return::Size <= Common::left_size<N>()), \
2365 Return> \
2366 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG4) \
2367 { \
2368 vc_debug_("simd_cast{offset, left}(", ")\n", offset, x); \
2369 return simd_cast<Return, offset>(internal_data0(x)); \
2370 } \
2371 \
2372 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2373 std::size_t M> \
2374 Vc_INTRINSIC Vc_CONST \
2375 enable_if<(N != M && (offset * Return::Size < Common::left_size<N>()) && \
2376 offset != 0 && (offset + 1) * Return::Size > Common::left_size<N>()), \
2377 Return> \
2378 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG5) \
2379 { \
2380 vc_debug_("simd_cast{offset, copy scalars}(", ")\n", offset, x); \
2381 using R = typename Return::EntryType; \
2382 Return r = Return(0); \
2383 for (std::size_t i = offset * Return::Size; \
2384 i < std::min(N, (offset + 1) * Return::Size); ++i) { \
2385 r[i - offset * Return::Size] = static_cast<R>(x[i]); \
2386 } \
2387 return r; \
2388 } \
2389 Vc_NOTHING_EXPECTING_SEMICOLON
2390 Vc_SIMDARRAY_CASTS(SimdArray);
2391 Vc_SIMDARRAY_CASTS(SimdMaskArray);
2392 #undef Vc_SIMDARRAY_CASTS
2393
2394 template <typename Return, typename From>
2395 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x)
2396 {
2397 return simd_cast<Return>(x);
2398 }
2399 template <typename Return, typename... Froms>
2400 Vc_INTRINSIC Vc_CONST
2401 enable_if<(are_all_types_equal<Froms...>::value &&
2402 sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2403 Return>
2404 simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x)
2405 {
2406 return simd_cast<Return>(xs..., x);
2407 }
2408
2409
2410
2411 template <typename Return, typename From, typename... Froms>
2412 Vc_INTRINSIC Vc_CONST enable_if<
2413 (are_all_types_equal<From, Froms...>::value &&
2414 (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
2415 Return>
2416 simd_cast_drop_arguments(Froms... xs, From x, From)
2417 {
2418 return simd_cast_drop_arguments<Return, Froms...>(xs..., x);
2419 }
2420 template <typename Return, typename From>
2421 Vc_INTRINSIC Vc_CONST
2422 enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2423 simd_cast_drop_arguments(From x, From)
2424 {
2425 return simd_cast_drop_arguments<Return>(x);
2426 }
2427
2428
2429 template <typename Return, std::size_t offset, typename From>
2430 Vc_INTRINSIC Vc_CONST
2431 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0),
2432 Return> simd_cast_with_offset(const From &x)
2433 {
2434 return simd_cast<Return, offset / Return::Size>(x);
2435 }
2436 template <typename Return, std::size_t offset, typename From>
2437 Vc_INTRINSIC Vc_CONST
2438 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2439 ((Traits::isSimdArray<Return>::value &&
2440 !Traits::isAtomicSimdArray<Return>::value) ||
2441 (Traits::isSimdMaskArray<Return>::value &&
2442 !Traits::isAtomicSimdMaskArray<Return>::value))),
2443 Return>
2444 simd_cast_with_offset(const From &x)
2445 {
2446 using R0 = typename Return::storage_type0;
2447 using R1 = typename Return::storage_type1;
2448 return {simd_cast_with_offset<R0, offset>(x),
2449 simd_cast_with_offset<R1, offset + R0::Size>(x)};
2450 }
2451 template <typename Return, std::size_t offset, typename From>
2452 Vc_INTRINSIC Vc_CONST
2453 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2454 ((Traits::isSimdArray<Return>::value &&
2455 Traits::isAtomicSimdArray<Return>::value) ||
2456 (Traits::isSimdMaskArray<Return>::value &&
2457 Traits::isAtomicSimdMaskArray<Return>::value))),
2458 Return>
2459 simd_cast_with_offset(const From &x)
2460 {
2461 return simd_cast<Return, offset / Return::Size>(x.shifted(offset % Return::Size));
2462 }
2463 template <typename Return, std::size_t offset, typename From, typename... Froms>
2464 Vc_INTRINSIC Vc_CONST
2465 enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
2466 simd_cast_with_offset(const From &x, const Froms &... xs)
2467 {
2468 return simd_cast<Return>(x, xs...);
2469 }
2470
2471
2472 template <typename Return, typename T, typename... From>
2473 Vc_INTRINSIC Vc_CONST Return simd_cast_without_last(const From &... xs, const T &)
2474 {
2475 return simd_cast<Return>(xs...);
2476 }
2477
2478
2479
2480 #ifdef Vc_MSVC
2481
2482
2483
2484 template <std::size_t I, typename T0>
2485 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0, const T0 &)
2486 {
2487 return a0;
2488 }
2489 template <std::size_t I, typename T0>
2490 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &, const T0 &b0)
2491 {
2492 return b0;
2493 }
2494 #endif
2495
2496
2497 template <std::size_t I, typename T0, typename... Ts>
2498 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0,
2499 const Ts &...,
2500 const T0 &,
2501 const Ts &...)
2502 {
2503 return a0;
2504 }
2505
2506 template <std::size_t I, typename T0, typename... Ts>
2507 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &,
2508 const Ts &...,
2509 const T0 &b0,
2510 const Ts &...)
2511 {
2512 return b0;
2513 }
2514
2515 template <std::size_t I, typename T0, typename... Ts>
2516 Vc_INTRINSIC Vc_CONST enable_if<(I > 1), T0> extract_interleaved(const T0 &,
2517 const Ts &... a,
2518 const T0 &,
2519 const Ts &... b)
2520 {
2521 return extract_interleaved<I - 2, Ts...>(a..., b...);
2522 }
2523
2524 template <typename Return, typename... Ts, std::size_t... Indexes>
2525 Vc_INTRINSIC Vc_CONST Return
2526 simd_cast_interleaved_argument_order_1(index_sequence<Indexes...>, const Ts &... a,
2527 const Ts &... b)
2528 {
2529 return simd_cast<Return>(extract_interleaved<Indexes, Ts...>(a..., b...)...);
2530 }
2531
2532
2533 template <typename Return, typename... Ts>
2534 Vc_INTRINSIC Vc_CONST Return
2535 simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b)
2536 {
2537 using seq = make_index_sequence<sizeof...(Ts)*2>;
2538 return simd_cast_interleaved_argument_order_1<Return, Ts...>(seq(), a..., b...);
2539 }
2540
2541
2542 #define Vc_CONDITIONAL_ASSIGN(name_, op_) \
2543 template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M, \
2544 typename U> \
2545 Vc_INTRINSIC enable_if<O == Operator::name_, void> conditional_assign( \
2546 SimdArray<T, N, V, VN> &lhs, M &&mask, U &&rhs) \
2547 { \
2548 lhs(mask) op_ rhs; \
2549 } \
2550 Vc_NOTHING_EXPECTING_SEMICOLON
2551 Vc_CONDITIONAL_ASSIGN( Assign, =);
2552 Vc_CONDITIONAL_ASSIGN( PlusAssign, +=);
2553 Vc_CONDITIONAL_ASSIGN( MinusAssign, -=);
2554 Vc_CONDITIONAL_ASSIGN( MultiplyAssign, *=);
2555 Vc_CONDITIONAL_ASSIGN( DivideAssign, /=);
2556 Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=);
2557 Vc_CONDITIONAL_ASSIGN( XorAssign, ^=);
2558 Vc_CONDITIONAL_ASSIGN( AndAssign, &=);
2559 Vc_CONDITIONAL_ASSIGN( OrAssign, |=);
2560 Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=);
2561 Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=);
2562 #undef Vc_CONDITIONAL_ASSIGN
2563
2564 #define Vc_CONDITIONAL_ASSIGN(name_, expr_) \
2565 template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M> \
2566 Vc_INTRINSIC enable_if<O == Operator::name_, SimdArray<T, N, V, VN>> \
2567 conditional_assign(SimdArray<T, N, V, VN> &lhs, M &&mask) \
2568 { \
2569 return expr_; \
2570 } \
2571 Vc_NOTHING_EXPECTING_SEMICOLON
2572 Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++);
2573 Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask));
2574 Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--);
2575 Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask));
2576 #undef Vc_CONDITIONAL_ASSIGN
2577
2578 namespace Common
2579 {
2580 template <typename T, size_t N, typename V>
2581 inline void transpose_impl(
2582 TransposeTag<4, 4>, SimdArray<T, N, V, N> *Vc_RESTRICT r[],
2583 const TransposeProxy<SimdArray<T, N, V, N>, SimdArray<T, N, V, N>,
2584 SimdArray<T, N, V, N>, SimdArray<T, N, V, N>> &proxy)
2585 {
2586 V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2587 &internal_data(*r[2]), &internal_data(*r[3])};
2588 transpose_impl(TransposeTag<4, 4>(), &r2[0],
2589 TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2590 internal_data(std::get<1>(proxy.in)),
2591 internal_data(std::get<2>(proxy.in)),
2592 internal_data(std::get<3>(proxy.in))});
2593 }
2594
2595 template <typename T, typename V>
2596 inline void transpose_impl(
2597 TransposeTag<2, 4>, SimdArray<T, 4, V, 1> *Vc_RESTRICT r[],
2598 const TransposeProxy<SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>,
2599 SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>> &proxy)
2600 {
2601 auto &lo = *r[0];
2602 auto &hi = *r[1];
2603 internal_data0(internal_data0(lo)) = internal_data0(std::get<0>(proxy.in));
2604 internal_data1(internal_data0(lo)) = internal_data0(std::get<1>(proxy.in));
2605 internal_data0(internal_data1(lo)) = internal_data0(std::get<2>(proxy.in));
2606 internal_data1(internal_data1(lo)) = internal_data0(std::get<3>(proxy.in));
2607 internal_data0(internal_data0(hi)) = internal_data1(std::get<0>(proxy.in));
2608 internal_data1(internal_data0(hi)) = internal_data1(std::get<1>(proxy.in));
2609 internal_data0(internal_data1(hi)) = internal_data1(std::get<2>(proxy.in));
2610 internal_data1(internal_data1(hi)) = internal_data1(std::get<3>(proxy.in));
2611 }
2612
2613 template <typename T, typename V>
2614 inline void transpose_impl(
2615 TransposeTag<4, 4>, SimdArray<T, 1, V, 1> *Vc_RESTRICT r[],
2616 const TransposeProxy<SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>,
2617 SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>> &proxy)
2618 {
2619 V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2620 &internal_data(*r[2]), &internal_data(*r[3])};
2621 transpose_impl(TransposeTag<4, 4>(), &r2[0],
2622 TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2623 internal_data(std::get<1>(proxy.in)),
2624 internal_data(std::get<2>(proxy.in)),
2625 internal_data(std::get<3>(proxy.in))});
2626 }
2627
2628 template <typename T, size_t N, typename V>
2629 inline void transpose_impl(
2630 TransposeTag<4, 4>, SimdArray<T, N, V, 1> *Vc_RESTRICT r[],
2631 const TransposeProxy<SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>,
2632 SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>> &proxy)
2633 {
2634 SimdArray<T, N, V, 1> *Vc_RESTRICT r0[4 / 2] = {r[0], r[1]};
2635 SimdArray<T, N, V, 1> *Vc_RESTRICT r1[4 / 2] = {r[2], r[3]};
2636 using H = SimdArray<T, 2>;
2637 transpose_impl(TransposeTag<2, 4>(), &r0[0],
2638 TransposeProxy<H, H, H, H>{internal_data0(std::get<0>(proxy.in)),
2639 internal_data0(std::get<1>(proxy.in)),
2640 internal_data0(std::get<2>(proxy.in)),
2641 internal_data0(std::get<3>(proxy.in))});
2642 transpose_impl(TransposeTag<2, 4>(), &r1[0],
2643 TransposeProxy<H, H, H, H>{internal_data1(std::get<0>(proxy.in)),
2644 internal_data1(std::get<1>(proxy.in)),
2645 internal_data1(std::get<2>(proxy.in)),
2646 internal_data1(std::get<3>(proxy.in))});
2647 }
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680 }
2681
2682
2683 namespace Detail
2684 {
2685
2686
2687 template <class T, size_t N, class V, size_t VSizeof>
2688 struct InterleaveImpl<SimdArray<T, N, V, N>, N, VSizeof> {
2689 template <class I, class... VV>
2690 static Vc_INTRINSIC void interleave(T *const data, const I &i, const VV &... vv)
2691 {
2692 InterleaveImpl<V, N, VSizeof>::interleave(data, i, internal_data(vv)...);
2693 }
2694 template <class I, class... VV>
2695 static Vc_INTRINSIC void deinterleave(T const *const data, const I &i, VV &... vv)
2696 {
2697 InterleaveImpl<V, N, VSizeof>::deinterleave(data, i, internal_data(vv)...);
2698 }
2699 };
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727 }
2728
2729
2730
2731 }
2732
2733
2734 namespace std
2735 {
2736 template <typename T, size_t N, typename V, size_t VN>
2737 struct numeric_limits<Vc::SimdArray<T, N, V, VN>> : public numeric_limits<T> {
2738 private:
2739 using R = Vc::SimdArray<T, N, V, VN>;
2740
2741 public:
2742 static Vc_ALWAYS_INLINE Vc_CONST R max() noexcept { return numeric_limits<T>::max(); }
2743 static Vc_ALWAYS_INLINE Vc_CONST R min() noexcept { return numeric_limits<T>::min(); }
2744 static Vc_ALWAYS_INLINE Vc_CONST R lowest() noexcept
2745 {
2746 return numeric_limits<T>::lowest();
2747 }
2748 static Vc_ALWAYS_INLINE Vc_CONST R epsilon() noexcept
2749 {
2750 return numeric_limits<T>::epsilon();
2751 }
2752 static Vc_ALWAYS_INLINE Vc_CONST R round_error() noexcept
2753 {
2754 return numeric_limits<T>::round_error();
2755 }
2756 static Vc_ALWAYS_INLINE Vc_CONST R infinity() noexcept
2757 {
2758 return numeric_limits<T>::infinity();
2759 }
2760 static Vc_ALWAYS_INLINE Vc_CONST R quiet_NaN() noexcept
2761 {
2762 return numeric_limits<T>::quiet_NaN();
2763 }
2764 static Vc_ALWAYS_INLINE Vc_CONST R signaling_NaN() noexcept
2765 {
2766 return numeric_limits<T>::signaling_NaN();
2767 }
2768 static Vc_ALWAYS_INLINE Vc_CONST R denorm_min() noexcept
2769 {
2770 return numeric_limits<T>::denorm_min();
2771 }
2772 };
2773 }
2774
2775
2776 #endif
2777
2778