File indexing completed on 2025-01-31 10:25:44
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028 #ifndef VC_SSE_MASK_H_
0029 #define VC_SSE_MASK_H_
0030
0031 #include "intrinsics.h"
0032 #include "../common/maskbool.h"
0033 #include "detail.h"
0034 #include "macros.h"
0035
0036 namespace Vc_VERSIONED_NAMESPACE
0037 {
0038 namespace Detail
0039 {
0040 template <size_t Size>
0041 Vc_INTRINSIC_L Vc_CONST_L int mask_count(__m128i) Vc_INTRINSIC_R Vc_CONST_R;
0042 template <size_t Size>
0043 Vc_INTRINSIC_L Vc_CONST_L int mask_to_int(__m128i) Vc_INTRINSIC_R Vc_CONST_R;
0044 template <size_t Size>
0045 Vc_INTRINSIC_L Vc_CONST_L bool is_equal(__m128, __m128) Vc_INTRINSIC_R Vc_CONST_R;
0046 template <size_t Size>
0047 Vc_INTRINSIC_L Vc_CONST_L bool is_not_equal(__m128, __m128) Vc_INTRINSIC_R Vc_CONST_R;
0048 }
0049
0050 using SSE::sse_cast;
0051
0052 template <typename T> class Mask<T, VectorAbi::Sse>
0053 {
0054 using abi = VectorAbi::Sse;
0055 friend class Mask< double, abi>;
0056 friend class Mask< float, abi>;
0057 friend class Mask< int32_t, abi>;
0058 friend class Mask<uint32_t, abi>;
0059 friend class Mask< int16_t, abi>;
0060 friend class Mask<uint16_t, abi>;
0061
0062
0063
0064
0065 typedef Common::MaskBool<sizeof(T)> MaskBool;
0066
0067 typedef Common::Storage<T, SSE::VectorTraits<T>::Size> Storage;
0068
0069 public:
0070
0071
0072
0073
0074 typedef bool EntryType;
0075 using value_type = EntryType;
0076
0077
0078
0079
0080 using EntryReference = Detail::ElementReference<Mask>;
0081 using reference = EntryReference;
0082
0083
0084
0085
0086
0087 typedef MaskBool VectorEntryType;
0088
0089
0090
0091
0092 using VectorType = typename Storage::VectorType;
0093
0094
0095
0096
0097 using Vector = SSE::Vector<T>;
0098
0099 public:
0100 Vc_FREE_STORE_OPERATORS_ALIGNED(16);
0101 static constexpr size_t Size = SSE::VectorTraits<T>::Size;
0102 static constexpr size_t MemoryAlignment = Size;
0103 static constexpr std::size_t size() { return Size; }
0104
0105
0106 #if defined Vc_MSVC && defined _WIN32
0107 typedef const Mask &Argument;
0108 #else
0109 typedef Mask Argument;
0110 #endif
0111
0112 Vc_INTRINSIC Mask() = default;
0113 Vc_INTRINSIC Mask(const Mask &) = default;
0114 Vc_INTRINSIC Mask &operator=(const Mask &) = default;
0115
0116 Vc_INTRINSIC Mask(const __m128 &x) : d(sse_cast<VectorType>(x)) {}
0117 Vc_INTRINSIC Mask(const __m128d &x) : d(sse_cast<VectorType>(x)) {}
0118 Vc_INTRINSIC Mask(const __m128i &x) : d(sse_cast<VectorType>(x)) {}
0119 Vc_INTRINSIC explicit Mask(VectorSpecialInitializerZero) : Mask(_mm_setzero_ps()) {}
0120 Vc_INTRINSIC explicit Mask(VectorSpecialInitializerOne) : Mask(SSE::_mm_setallone_ps()) {}
0121 Vc_INTRINSIC explicit Mask(bool b) : Mask(b ? SSE::_mm_setallone_ps() : _mm_setzero_ps()) {}
0122 Vc_INTRINSIC static Mask Zero() { return Mask{Vc::Zero}; }
0123 Vc_INTRINSIC static Mask One() { return Mask{Vc::One}; }
0124
0125
0126 template <typename U>
0127 Vc_INTRINSIC Mask(
0128 U &&rhs, Common::enable_if_mask_converts_implicitly<Mask, T, U> = nullarg)
0129 : d(sse_cast<VectorType>(
0130 Detail::mask_cast<Traits::simd_vector_size<U>::value, Size, __m128>(
0131 rhs.dataI())))
0132 {
0133 }
0134
0135 #if Vc_IS_VERSION_1
0136
0137 template <typename U>
0138 Vc_DEPRECATED("use simd_cast instead of explicit type casting to convert between "
0139 "mask types") Vc_INTRINSIC
0140 explicit Mask(U &&rhs,
0141 Common::enable_if_mask_converts_explicitly<T, U> = nullarg);
0142 #endif
0143
0144 Vc_ALWAYS_INLINE explicit Mask(const bool *mem) { load(mem); }
0145 template<typename Flags> Vc_ALWAYS_INLINE explicit Mask(const bool *mem, Flags f) { load(mem, f); }
0146
0147 Vc_ALWAYS_INLINE_L void load(const bool *mem) Vc_ALWAYS_INLINE_R;
0148 template<typename Flags> Vc_ALWAYS_INLINE void load(const bool *mem, Flags) { load(mem); }
0149
0150 Vc_ALWAYS_INLINE_L void store(bool *) const Vc_ALWAYS_INLINE_R;
0151 template<typename Flags> Vc_ALWAYS_INLINE void store(bool *mem, Flags) const { store(mem); }
0152
0153 Vc_ALWAYS_INLINE Vc_PURE bool operator==(const Mask &rhs) const
0154 {
0155 return Detail::is_equal<Size>(dataF(), rhs.dataF());
0156 }
0157 Vc_ALWAYS_INLINE Vc_PURE bool operator!=(const Mask &rhs) const
0158 {
0159 return Detail::is_not_equal<Size>(dataF(), rhs.dataF());
0160 }
0161
0162 Vc_ALWAYS_INLINE Vc_PURE Mask operator!() const
0163 {
0164 #ifdef Vc_GCC
0165 return ~dataI();
0166 #else
0167 return _mm_andnot_si128(dataI(), SSE::_mm_setallone_si128());
0168 #endif
0169 }
0170
0171 Vc_ALWAYS_INLINE Mask &operator&=(const Mask &rhs) { d.v() = SSE::sse_cast<VectorType>(_mm_and_ps(dataF(), rhs.dataF())); return *this; }
0172 Vc_ALWAYS_INLINE Mask &operator|=(const Mask &rhs) { d.v() = SSE::sse_cast<VectorType>(_mm_or_ps (dataF(), rhs.dataF())); return *this; }
0173 Vc_ALWAYS_INLINE Mask &operator^=(const Mask &rhs) { d.v() = SSE::sse_cast<VectorType>(_mm_xor_ps(dataF(), rhs.dataF())); return *this; }
0174
0175 Vc_ALWAYS_INLINE Vc_PURE Mask operator&(const Mask &rhs) const { return _mm_and_ps(dataF(), rhs.dataF()); }
0176 Vc_ALWAYS_INLINE Vc_PURE Mask operator|(const Mask &rhs) const { return _mm_or_ps (dataF(), rhs.dataF()); }
0177 Vc_ALWAYS_INLINE Vc_PURE Mask operator^(const Mask &rhs) const { return _mm_xor_ps(dataF(), rhs.dataF()); }
0178
0179 Vc_ALWAYS_INLINE Vc_PURE Mask operator&&(const Mask &rhs) const { return _mm_and_ps(dataF(), rhs.dataF()); }
0180 Vc_ALWAYS_INLINE Vc_PURE Mask operator||(const Mask &rhs) const { return _mm_or_ps (dataF(), rhs.dataF()); }
0181
0182 Vc_ALWAYS_INLINE Vc_PURE bool isFull () const { return
0183 #ifdef Vc_USE_PTEST
0184 _mm_testc_si128(dataI(), SSE::_mm_setallone_si128());
0185 #else
0186 _mm_movemask_epi8(dataI()) == 0xffff;
0187 #endif
0188 }
0189 Vc_ALWAYS_INLINE Vc_PURE bool isNotEmpty() const { return
0190 #ifdef Vc_USE_PTEST
0191 0 == _mm_testz_si128(dataI(), dataI());
0192 #else
0193 _mm_movemask_epi8(dataI()) != 0x0000;
0194 #endif
0195 }
0196 Vc_ALWAYS_INLINE Vc_PURE bool isEmpty() const { return
0197 #ifdef Vc_USE_PTEST
0198 0 != _mm_testz_si128(dataI(), dataI());
0199 #else
0200 _mm_movemask_epi8(dataI()) == 0x0000;
0201 #endif
0202 }
0203 Vc_ALWAYS_INLINE Vc_PURE bool isMix() const {
0204 #ifdef Vc_USE_PTEST
0205 return _mm_test_mix_ones_zeros(dataI(), SSE::_mm_setallone_si128());
0206 #else
0207 const int tmp = _mm_movemask_epi8(dataI());
0208 return tmp != 0 && (tmp ^ 0xffff) != 0;
0209 #endif
0210 }
0211
0212 Vc_ALWAYS_INLINE Vc_PURE int shiftMask() const { return _mm_movemask_epi8(dataI()); }
0213
0214 Vc_ALWAYS_INLINE Vc_PURE int toInt() const { return Detail::mask_to_int<Size>(dataI()); }
0215
0216 Vc_ALWAYS_INLINE Vc_PURE VectorType data() const { return d.v(); }
0217 Vc_ALWAYS_INLINE Vc_PURE __m128 dataF() const { return SSE::sse_cast<__m128 >(d.v()); }
0218 Vc_ALWAYS_INLINE Vc_PURE __m128i dataI() const { return SSE::sse_cast<__m128i>(d.v()); }
0219 Vc_ALWAYS_INLINE Vc_PURE __m128d dataD() const { return SSE::sse_cast<__m128d>(d.v()); }
0220
0221 private:
0222 friend reference;
0223 static Vc_INTRINSIC Vc_PURE value_type get(const Mask &m, int i) noexcept
0224 {
0225 return m.toInt() & (1 << i);
0226 }
0227 template <typename U>
0228 static Vc_INTRINSIC void set(Mask &m, int i,
0229 U &&v) noexcept(noexcept(MaskBool(std::declval<U>())))
0230 {
0231 m.d.set(i, MaskBool(std::forward<U>(v)));
0232 }
0233
0234 public:
0235
0236
0237
0238
0239
0240
0241 Vc_ALWAYS_INLINE reference operator[](size_t index) noexcept
0242 {
0243 return {*this, int(index)};
0244 }
0245 Vc_ALWAYS_INLINE Vc_PURE value_type operator[](size_t index) const noexcept
0246 {
0247 return get(*this, index);
0248 }
0249
0250 Vc_ALWAYS_INLINE Vc_PURE int count() const
0251 {
0252 return Detail::mask_count<Size>(dataI());
0253 }
0254
0255
0256
0257
0258
0259
0260 Vc_ALWAYS_INLINE_L Vc_PURE_L int firstOne() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
0261
0262 template <typename G> static Vc_INTRINSIC_L Mask generate(G &&gen) Vc_INTRINSIC_R;
0263 Vc_INTRINSIC_L Vc_PURE_L Mask shifted(int amount) const Vc_INTRINSIC_R Vc_PURE_R;
0264
0265 private:
0266 #ifdef Vc_COMPILE_BENCHMARKS
0267 public:
0268 #endif
0269 Storage d;
0270 };
0271 template <typename T> constexpr size_t Mask<T, VectorAbi::Sse>::Size;
0272 template <typename T> constexpr size_t Mask<T, VectorAbi::Sse>::MemoryAlignment;
0273
0274 }
0275
0276 #include "mask.tcc"
0277
0278 #endif