File indexing completed on 2025-01-31 10:25:36
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028 #ifndef VC_AVX_VECTOR_H_
0029 #define VC_AVX_VECTOR_H_
0030
0031 #include "intrinsics.h"
0032 #include "casts.h"
0033 #include "../sse/vector.h"
0034 #include "shuffle.h"
0035 #include "vectorhelper.h"
0036 #include "mask.h"
0037 #include <algorithm>
0038 #include <cmath>
0039 #include "../common/aliasingentryhelper.h"
0040 #include "../common/memoryfwd.h"
0041 #include "../common/where.h"
0042 #include "macros.h"
0043
0044 #ifdef isfinite
0045 #undef isfinite
0046 #endif
0047 #ifdef isnan
0048 #undef isnan
0049 #endif
0050
0051 namespace Vc_VERSIONED_NAMESPACE
0052 {
0053 namespace Detail
0054 {
0055 template <typename T, typename Abi> struct VectorTraits
0056 {
0057 using mask_type = Vc::Mask<T, Abi>;
0058 using vector_type = Vc::Vector<T, Abi>;
0059 using writemasked_vector_type = Common::WriteMaskedVector<vector_type, mask_type>;
0060 using intrinsic_type = typename AVX::VectorTypeHelper<T>::Type;
0061 };
0062 }
0063
0064 #define Vc_CURRENT_CLASS_NAME Vector
0065 template <typename T> class Vector<T, VectorAbi::Avx>
0066 {
0067 public:
0068 using abi = VectorAbi::Avx;
0069
0070 private:
0071 using traits_type = Detail::VectorTraits<T, abi>;
0072 static_assert(
0073 std::is_arithmetic<T>::value,
0074 "Vector<T> only accepts arithmetic builtin types as template parameter T.");
0075
0076 using WriteMaskedVector = typename traits_type::writemasked_vector_type;
0077
0078 public:
0079 using VectorType = typename traits_type::intrinsic_type;
0080 using vector_type = VectorType;
0081
0082 using mask_type = typename traits_type::mask_type;
0083 using Mask = mask_type;
0084 using MaskType = mask_type;
0085 using MaskArg Vc_DEPRECATED_ALIAS("Use MaskArgument instead.") = typename Mask::AsArg;
0086 using MaskArgument = typename Mask::AsArg;
0087 using reference = Detail::ElementReference<Vector>;
0088
0089 Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(VectorType));
0090
0091 using EntryType = T;
0092 using value_type = EntryType;
0093 typedef EntryType VectorEntryType;
0094 static constexpr size_t Size = sizeof(VectorType) / sizeof(EntryType);
0095 static constexpr size_t MemoryAlignment = alignof(VectorType);
0096 using IndexType = fixed_size_simd<int, Size>;
0097 using index_type = IndexType;
0098 typedef Vector<T, abi> AsArg;
0099 typedef VectorType VectorTypeArg;
0100
0101 protected:
0102 template <typename U> using V = Vector<U, abi>;
0103
0104
0105 typedef AVX::VectorHelper<VectorType> HV;
0106
0107
0108 typedef AVX::VectorHelper<T> HT;
0109
0110
0111 template <typename V> static Vc_INTRINSIC VectorType _cast(V v)
0112 {
0113 return AVX::avx_cast<VectorType>(v);
0114 }
0115
0116 typedef Common::VectorMemoryUnion<VectorType, EntryType> StorageType;
0117 StorageType d;
0118
0119 using WidthT = Common::WidthT<VectorType>;
0120
0121
0122
0123 public:
0124 #include "../common/generalinterface.h"
0125
0126 static Vc_ALWAYS_INLINE_L Vector Random() Vc_ALWAYS_INLINE_R;
0127
0128
0129
0130 Vc_ALWAYS_INLINE Vector(VectorTypeArg x) : d(x) {}
0131
0132
0133 template <typename U>
0134 Vc_INTRINSIC Vector(
0135 V<U> x, typename std::enable_if<Traits::is_implicit_cast_allowed<U, T>::value,
0136 void *>::type = nullptr)
0137 : d(AVX::convert<U, T>(x.data()))
0138 {
0139 }
0140
0141 #if Vc_IS_VERSION_1
0142
0143 template <typename U>
0144 Vc_DEPRECATED("use simd_cast instead of explicit type casting to convert between "
0145 "vector types") Vc_INTRINSIC explicit Vector(
0146 V<U> x,
0147 typename std::enable_if<!Traits::is_implicit_cast_allowed<U, T>::value,
0148 void *>::type = nullptr)
0149 : d(Detail::zeroExtendIfNeeded(AVX::convert<U, T>(x.data())))
0150 {
0151 }
0152
0153
0154
0155 template <typename U,
0156 typename = enable_if<Traits::is_simd_vector<U>::value &&
0157 !std::is_same<Vector, Traits::decay<U>>::value>>
0158 Vc_DEPRECATED("use simd_cast instead of explicit type casting to convert between "
0159 "vector types") Vc_INTRINSIC_L
0160 explicit Vector(U &&x) Vc_INTRINSIC_R;
0161 #endif
0162
0163 Vc_INTRINSIC explicit Vector(reference a) : Vector(static_cast<EntryType>(a)) {}
0164
0165
0166
0167 Vc_INTRINSIC Vector(EntryType a) : d(Detail::avx_broadcast(a)) {}
0168 template <typename U>
0169 Vc_INTRINSIC Vector(U a,
0170 typename std::enable_if<std::is_same<U, int>::value &&
0171 !std::is_same<U, EntryType>::value,
0172 void *>::type = nullptr)
0173 : Vector(static_cast<EntryType>(a))
0174 {
0175 }
0176
0177
0178 explicit Vector(std::initializer_list<EntryType>)
0179 {
0180 static_assert(std::is_same<EntryType, void>::value,
0181 "A SIMD vector object cannot be initialized from an initializer list "
0182 "because the number of entries in the vector is target-dependent.");
0183 }
0184
0185 #include "../common/loadinterface.h"
0186 #include "../common/storeinterface.h"
0187
0188
0189
0190 Vc_INTRINSIC_L void setZero() Vc_INTRINSIC_R;
0191 Vc_INTRINSIC_L void setZero(const Mask &k) Vc_INTRINSIC_R;
0192 Vc_INTRINSIC_L void setZeroInverted(const Mask &k) Vc_INTRINSIC_R;
0193
0194 Vc_INTRINSIC_L void setQnan() Vc_INTRINSIC_R;
0195 Vc_INTRINSIC_L void setQnan(MaskArgument k) Vc_INTRINSIC_R;
0196
0197 #include "../common/gatherinterface.h"
0198 #include "../common/scatterinterface.h"
0199 #if defined Vc_IMPL_AVX2 && !defined Vc_MSVC
0200
0201
0202
0203
0204 template <class U, class A, int Scale, int N = Vector<U, A>::size(),
0205 class = enable_if<(Vector<U, A>::size() >= size() && sizeof(T) >= 4)>>
0206 Vc_INTRINSIC void gatherImplementation(
0207 const Common::GatherArguments<T, Vector<U, A>, Scale> &args)
0208 {
0209 d.v() = AVX::gather<sizeof(T) * Scale>(
0210 args.address,
0211 simd_cast<conditional_t<Size == 4, SSE::int_v, AVX2::int_v>>(args.indexes)
0212 .data());
0213 }
0214
0215
0216 template <class U, class A, int Scale, int N = Vector<U, A>::size(),
0217 class = enable_if<(Vector<U, A>::size() >= size() && sizeof(T) >= 4)>>
0218 Vc_INTRINSIC void gatherImplementation(
0219 const Common::GatherArguments<T, Vector<U, A>, Scale> &args, MaskArgument k)
0220 {
0221 d.v() = AVX::gather<sizeof(T) * Scale>(
0222 d.v(), k.data(), args.address,
0223 simd_cast<conditional_t<Size == 4, SSE::int_v, AVX2::int_v>>(args.indexes)
0224 .data());
0225 }
0226
0227
0228
0229
0230 template <
0231 class MT, class U, class A, int Scale,
0232 class = enable_if<(sizeof(T) == 2 && std::is_integral<MT>::value &&
0233 (sizeof(MT) <= 2) && Vector<U, A>::size() >= size())>>
0234 Vc_INTRINSIC void gatherImplementation(
0235 const Common::GatherArguments<MT, Vector<U, A>, Scale> &args)
0236 {
0237 using AVX2::int_v;
0238 const auto idx0 = simd_cast<int_v, 0>(args.indexes).data();
0239 const auto idx1 = simd_cast<int_v, 1>(args.indexes).data();
0240 *this = simd_cast<Vector>(int_v(AVX::gather<sizeof(MT) * Scale>(
0241 aliasing_cast<int>(args.address), idx0)),
0242 int_v(AVX::gather<sizeof(MT) * Scale>(
0243 aliasing_cast<int>(args.address), idx1)));
0244 if (sizeof(MT) == 1) {
0245 if (std::is_signed<MT>::value) {
0246 using Signed = AVX2::Vector<typename std::make_signed<T>::type>;
0247 *this = (simd_cast<Signed>(*this) << 8) >> 8;
0248 } else {
0249 *this &= 0xff;
0250 }
0251 }
0252 }
0253
0254
0255 template <
0256 class MT, class U, class A, int Scale,
0257 class = enable_if<(sizeof(T) == 2 && std::is_integral<MT>::value &&
0258 (sizeof(MT) <= 2) && Vector<U, A>::size() >= size())>>
0259 Vc_INTRINSIC void gatherImplementation(
0260 const Common::GatherArguments<MT, Vector<U, A>, Scale> &args, MaskArgument k)
0261 {
0262 using AVX2::int_v;
0263 const auto idx0 = simd_cast<int_v, 0>(args.indexes).data();
0264 const auto idx1 = simd_cast<int_v, 1>(args.indexes).data();
0265 const auto k0 = simd_cast<AVX2::int_m, 0>(k).data();
0266 const auto k1 = simd_cast<AVX2::int_m, 1>(k).data();
0267 auto v = simd_cast<Vector>(
0268 int_v(AVX::gather<sizeof(MT) * Scale>(
0269 _mm256_setzero_si256(), k0, aliasing_cast<int>(args.address), idx0)),
0270 int_v(AVX::gather<sizeof(MT) * Scale>(
0271 _mm256_setzero_si256(), k1, aliasing_cast<int>(args.address), idx1)));
0272 if (sizeof(MT) == 1) {
0273 if (std::is_signed<MT>::value) {
0274 using Signed = AVX2::Vector<typename std::make_signed<T>::type>;
0275 v = (simd_cast<Signed>(v) << 8) >> 8;
0276 } else {
0277 v &= 0xff;
0278 }
0279 }
0280 assign(v, k);
0281 }
0282
0283
0284
0285 template <class MT, class U, class A, int Scale>
0286 Vc_INTRINSIC enable_if<((sizeof(T) != 2 || sizeof(MT) > 2) &&
0287 Traits::is_valid_vector_argument<MT>::value &&
0288 !std::is_same<MT, T>::value &&
0289 Vector<U, A>::size() >= size()),
0290 void>
0291 gatherImplementation(const Common::GatherArguments<MT, Vector<U, A>, Scale> &args)
0292 {
0293 *this = simd_cast<Vector>(fixed_size_simd<MT, Size>(args));
0294 }
0295
0296
0297 template <class MT, class U, class A, int Scale>
0298 Vc_INTRINSIC enable_if<((sizeof(T) != 2 || sizeof(MT) > 2) &&
0299 Traits::is_valid_vector_argument<MT>::value &&
0300 !std::is_same<MT, T>::value &&
0301 Vector<U, A>::size() >= size()),
0302 void>
0303 gatherImplementation(const Common::GatherArguments<MT, Vector<U, A>, Scale> &args,
0304 MaskArgument k)
0305 {
0306 assign(simd_cast<Vector>(fixed_size_simd<MT, Size>(args, k)), k);
0307 }
0308 #endif
0309
0310
0311
0312 Vc_ALWAYS_INLINE Vector &operator++() { data() = Detail::add(data(), Detail::one(T()), T()); return *this; }
0313 Vc_ALWAYS_INLINE Vector &operator--() { data() = Detail::sub(data(), Detail::one(T()), T()); return *this; }
0314
0315 Vc_ALWAYS_INLINE Vector operator++(int) { const Vector r = *this; data() = Detail::add(data(), Detail::one(T()), T()); return r; }
0316 Vc_ALWAYS_INLINE Vector operator--(int) { const Vector r = *this; data() = Detail::sub(data(), Detail::one(T()), T()); return r; }
0317
0318 private:
0319 friend reference;
0320 Vc_INTRINSIC static value_type get(const Vector &o, int i) noexcept
0321 {
0322 return o.d.m(i);
0323 }
0324 template <typename U>
0325 Vc_INTRINSIC static void set(Vector &o, int i, U &&v) noexcept(
0326 noexcept(std::declval<value_type &>() = v))
0327 {
0328 return o.d.set(i, v);
0329 }
0330
0331 public:
0332
0333
0334
0335
0336
0337
0338 Vc_ALWAYS_INLINE reference operator[](size_t index) noexcept
0339 {
0340 static_assert(noexcept(reference{std::declval<Vector &>(), int()}), "");
0341 return {*this, int(index)};
0342 }
0343 Vc_ALWAYS_INLINE value_type operator[](size_t index) const noexcept
0344 {
0345 return d.m(index);
0346 }
0347
0348 Vc_INTRINSIC_L Vc_PURE_L Vector operator[](Permutation::ReversedTag) const Vc_INTRINSIC_R Vc_PURE_R;
0349 Vc_INTRINSIC_L Vc_PURE_L Vector operator[](const IndexType &perm) const Vc_INTRINSIC_R Vc_PURE_R;
0350
0351 Vc_INTRINSIC Vc_PURE Mask operator!() const
0352 {
0353 return *this == Zero();
0354 }
0355 Vc_ALWAYS_INLINE Vector operator~() const
0356 {
0357 #ifndef Vc_ENABLE_FLOAT_BIT_OPERATORS
0358 static_assert(std::is_integral<T>::value,
0359 "bit-complement can only be used with Vectors of integral type");
0360 #endif
0361 return Detail::andnot_(data(), Detail::allone<VectorType>());
0362 }
0363 Vc_ALWAYS_INLINE_L Vc_PURE_L Vector operator-() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
0364 Vc_INTRINSIC Vc_PURE Vector operator+() const { return *this; }
0365
0366
0367 #define Vc_OP_VEC(op) \
0368 Vc_INTRINSIC Vector &operator op##=(AsArg x); \
0369 Vc_INTRINSIC Vc_PURE Vector operator op(AsArg x) const \
0370 { \
0371 static_assert( \
0372 std::is_integral<T>::value, \
0373 "bitwise-operators can only be used with Vectors of integral type"); \
0374 }
0375 Vc_ALL_SHIFTS(Vc_OP_VEC);
0376 #undef Vc_OP_VEC
0377
0378 Vc_ALWAYS_INLINE_L Vector &operator>>=(int x) Vc_ALWAYS_INLINE_R;
0379 Vc_ALWAYS_INLINE_L Vector &operator<<=(int x) Vc_ALWAYS_INLINE_R;
0380 Vc_ALWAYS_INLINE_L Vector operator>>(int x) const Vc_ALWAYS_INLINE_R;
0381 Vc_ALWAYS_INLINE_L Vector operator<<(int x) const Vc_ALWAYS_INLINE_R;
0382
0383 Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC Vc_PURE Mask
0384 isNegative() const
0385 {
0386 return Vc::isnegative(*this);
0387 }
0388
0389 Vc_ALWAYS_INLINE void assign( const Vector &v, const Mask &mask ) {
0390 data() = Detail::blend(data(), v.data(), mask.data());
0391 }
0392
0393 template <typename V2>
0394 Vc_DEPRECATED("Use simd_cast instead of Vector::staticCast") Vc_ALWAYS_INLINE V2
0395 staticCast() const
0396 {
0397 return V2(*this);
0398 }
0399 template <typename V2>
0400 Vc_DEPRECATED("use reinterpret_components_cast instead") Vc_ALWAYS_INLINE V2
0401 reinterpretCast() const
0402 {
0403 return AVX::avx_cast<typename V2::VectorType>(data());
0404 }
0405
0406 Vc_ALWAYS_INLINE WriteMaskedVector operator()(const Mask &k)
0407 {
0408 return {*this, k};
0409 }
0410
0411 Vc_ALWAYS_INLINE VectorType &data() { return d.v(); }
0412 Vc_ALWAYS_INLINE const VectorType &data() const { return d.v(); }
0413
0414 template<int Index>
0415 Vc_INTRINSIC_L Vector broadcast() const Vc_INTRINSIC_R;
0416
0417 Vc_INTRINSIC_L std::pair<Vector, int> minIndex() const Vc_INTRINSIC_R;
0418 Vc_INTRINSIC_L std::pair<Vector, int> maxIndex() const Vc_INTRINSIC_R;
0419
0420 Vc_ALWAYS_INLINE EntryType min() const { return Detail::min(data(), T()); }
0421 Vc_ALWAYS_INLINE EntryType max() const { return Detail::max(data(), T()); }
0422 Vc_ALWAYS_INLINE EntryType product() const { return Detail::mul(data(), T()); }
0423 Vc_ALWAYS_INLINE EntryType sum() const { return Detail::add(data(), T()); }
0424 Vc_ALWAYS_INLINE_L Vector partialSum() const Vc_ALWAYS_INLINE_R;
0425
0426 Vc_ALWAYS_INLINE_L EntryType min(MaskArgument m) const Vc_ALWAYS_INLINE_R;
0427 Vc_ALWAYS_INLINE_L EntryType max(MaskArgument m) const Vc_ALWAYS_INLINE_R;
0428 Vc_ALWAYS_INLINE_L EntryType product(MaskArgument m) const Vc_ALWAYS_INLINE_R;
0429 Vc_ALWAYS_INLINE_L EntryType sum(MaskArgument m) const Vc_ALWAYS_INLINE_R;
0430
0431 Vc_INTRINSIC_L Vector shifted(int amount, Vector shiftIn) const Vc_INTRINSIC_R;
0432 Vc_INTRINSIC_L Vector shifted(int amount) const Vc_INTRINSIC_R;
0433 Vc_INTRINSIC_L Vector rotated(int amount) const Vc_INTRINSIC_R;
0434 Vc_INTRINSIC_L Vc_PURE_L Vector reversed() const Vc_INTRINSIC_R Vc_PURE_R;
0435 Vc_ALWAYS_INLINE_L Vc_PURE_L Vector sorted() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
0436
0437 template <typename F> void callWithValuesSorted(F &&f)
0438 {
0439 EntryType value = d.m(0);
0440 f(value);
0441 for (size_t i = 1; i < Size; ++i) {
0442 if (d.m(i) != value) {
0443 value = d.m(i);
0444 f(value);
0445 }
0446 }
0447 }
0448
0449 template <typename F> Vc_INTRINSIC void call(F &&f) const
0450 {
0451 Common::for_all_vector_entries<Size>([&](size_t i) { f(EntryType(d.m(i))); });
0452 }
0453
0454 template <typename F> Vc_INTRINSIC void call(F &&f, const Mask &mask) const
0455 {
0456 for (size_t i : where(mask)) {
0457 f(EntryType(d.m(i)));
0458 }
0459 }
0460
0461 template <typename F> Vc_INTRINSIC Vector apply(F &&f) const
0462 {
0463 Vector r;
0464 Common::for_all_vector_entries<Size>(
0465 [&](size_t i) { r.d.set(i, f(EntryType(d.m(i)))); });
0466 return r;
0467 }
0468
0469 template <typename F> Vc_INTRINSIC Vector apply(F &&f, const Mask &mask) const
0470 {
0471 Vector r(*this);
0472 for (size_t i : where(mask)) {
0473 r.d.set(i, f(EntryType(r.d.m(i))));
0474 }
0475 return r;
0476 }
0477
0478 template<typename IndexT> Vc_INTRINSIC void fill(EntryType (&f)(IndexT)) {
0479 Common::for_all_vector_entries<Size>([&](size_t i) { d.set(i, f(i)); });
0480 }
0481 Vc_INTRINSIC void fill(EntryType (&f)()) {
0482 Common::for_all_vector_entries<Size>([&](size_t i) { d.set(i, f()); });
0483 }
0484
0485 template <typename G> static Vc_INTRINSIC_L Vector generate(G gen) Vc_INTRINSIC_R;
0486
0487 Vc_DEPRECATED("use copysign(x, y) instead") Vc_INTRINSIC Vector
0488 copySign(AsArg x) const
0489 {
0490 return Vc::copysign(*this, x);
0491 }
0492
0493 Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC Vector exponent() const
0494 {
0495 Vc::exponent(*this);
0496 }
0497
0498 Vc_INTRINSIC_L Vector interleaveLow(Vector x) const Vc_INTRINSIC_R;
0499 Vc_INTRINSIC_L Vector interleaveHigh(Vector x) const Vc_INTRINSIC_R;
0500 };
0501 #undef Vc_CURRENT_CLASS_NAME
0502 template <typename T> constexpr size_t Vector<T, VectorAbi::Avx>::Size;
0503 template <typename T> constexpr size_t Vector<T, VectorAbi::Avx>::MemoryAlignment;
0504
0505 #define Vc_CONDITIONAL_ASSIGN(name_, op_) \
0506 template <Operator O, typename T, typename M, typename U> \
0507 Vc_INTRINSIC enable_if<O == Operator::name_, void> conditional_assign( \
0508 AVX2::Vector<T> &lhs, M &&mask, U &&rhs) \
0509 { \
0510 lhs(mask) op_ rhs; \
0511 } \
0512 Vc_NOTHING_EXPECTING_SEMICOLON
0513 Vc_CONDITIONAL_ASSIGN( Assign, =);
0514 Vc_CONDITIONAL_ASSIGN( PlusAssign, +=);
0515 Vc_CONDITIONAL_ASSIGN( MinusAssign, -=);
0516 Vc_CONDITIONAL_ASSIGN( MultiplyAssign, *=);
0517 Vc_CONDITIONAL_ASSIGN( DivideAssign, /=);
0518 Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=);
0519 Vc_CONDITIONAL_ASSIGN( XorAssign, ^=);
0520 Vc_CONDITIONAL_ASSIGN( AndAssign, &=);
0521 Vc_CONDITIONAL_ASSIGN( OrAssign, |=);
0522 Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=);
0523 Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=);
0524 #undef Vc_CONDITIONAL_ASSIGN
0525
0526 #define Vc_CONDITIONAL_ASSIGN(name_, expr_) \
0527 template <Operator O, typename T, typename M> \
0528 Vc_INTRINSIC enable_if<O == Operator::name_, AVX2::Vector<T>> conditional_assign( \
0529 AVX2::Vector<T> &lhs, M &&mask) \
0530 { \
0531 return expr_; \
0532 } \
0533 Vc_NOTHING_EXPECTING_SEMICOLON
0534 Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++);
0535 Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask));
0536 Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--);
0537 Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask));
0538 #undef Vc_CONDITIONAL_ASSIGN
0539
0540 }
0541
0542 #include "vector.tcc"
0543 #include "simd_cast.h"
0544
0545 #endif