Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-31 10:25:44

0001 /*  This file is part of the Vc library. {{{
0002 Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
0003 
0004 Redistribution and use in source and binary forms, with or without
0005 modification, are permitted provided that the following conditions are met:
0006     * Redistributions of source code must retain the above copyright
0007       notice, this list of conditions and the following disclaimer.
0008     * Redistributions in binary form must reproduce the above copyright
0009       notice, this list of conditions and the following disclaimer in the
0010       documentation and/or other materials provided with the distribution.
0011     * Neither the names of contributing organizations nor the
0012       names of its contributors may be used to endorse or promote products
0013       derived from this software without specific prior written permission.
0014 
0015 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
0016 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
0017 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0018 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
0019 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
0020 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
0021 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0022 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0023 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
0024 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0025 
0026 }}}*/
0027 
0028 #ifndef VC_SSE_MASK_H_
0029 #define VC_SSE_MASK_H_
0030 
0031 #include "intrinsics.h"
0032 #include "../common/maskbool.h"
0033 #include "detail.h"
0034 #include "macros.h"
0035 
0036 namespace Vc_VERSIONED_NAMESPACE
0037 {
0038 namespace Detail
0039 {
0040 template <size_t Size>
0041 Vc_INTRINSIC_L Vc_CONST_L int mask_count(__m128i) Vc_INTRINSIC_R Vc_CONST_R;
0042 template <size_t Size>
0043 Vc_INTRINSIC_L Vc_CONST_L int mask_to_int(__m128i) Vc_INTRINSIC_R Vc_CONST_R;
0044 template <size_t Size>
0045 Vc_INTRINSIC_L Vc_CONST_L bool is_equal(__m128, __m128) Vc_INTRINSIC_R Vc_CONST_R;
0046 template <size_t Size>
0047 Vc_INTRINSIC_L Vc_CONST_L bool is_not_equal(__m128, __m128) Vc_INTRINSIC_R Vc_CONST_R;
0048 }  // namespace Detail
0049 
0050 using SSE::sse_cast;
0051 
0052 template <typename T> class Mask<T, VectorAbi::Sse>
0053 {
0054     using abi = VectorAbi::Sse;
0055     friend class Mask<  double, abi>;
0056     friend class Mask<   float, abi>;
0057     friend class Mask< int32_t, abi>;
0058     friend class Mask<uint32_t, abi>;
0059     friend class Mask< int16_t, abi>;
0060     friend class Mask<uint16_t, abi>;
0061 
0062     /**
0063      * A helper type for aliasing the entries in the mask but behaving like a bool.
0064      */
0065     typedef Common::MaskBool<sizeof(T)> MaskBool;
0066 
0067     typedef Common::Storage<T, SSE::VectorTraits<T>::Size> Storage;
0068 
0069 public:
0070 
0071     /**
0072      * The \c EntryType of masks is always bool, independent of \c T.
0073      */
0074     typedef bool EntryType;
0075     using value_type = EntryType;
0076 
0077     /**
0078      * The return type of the non-const subscript operator.
0079      */
0080     using EntryReference = Detail::ElementReference<Mask>;
0081     using reference = EntryReference;
0082 
0083     /**
0084      * The \c VectorEntryType, in contrast to \c EntryType, reveals information about the SIMD
0085      * implementation. This type is useful for the \c sizeof operator in generic functions.
0086      */
0087     typedef MaskBool VectorEntryType;
0088 
0089     /**
0090      * The \c VectorType reveals the implementation-specific internal type used for the SIMD type.
0091      */
0092     using VectorType = typename Storage::VectorType;
0093 
0094     /**
0095      * The associated Vector<T> type.
0096      */
0097     using Vector = SSE::Vector<T>;
0098 
0099 public:
0100     Vc_FREE_STORE_OPERATORS_ALIGNED(16);
0101     static constexpr size_t Size = SSE::VectorTraits<T>::Size;
0102     static constexpr size_t MemoryAlignment = Size;
0103     static constexpr std::size_t size() { return Size; }
0104 
0105         // abstracts the way Masks are passed to functions, it can easily be changed to const ref here
0106 #if defined Vc_MSVC && defined _WIN32
0107         typedef const Mask &Argument;
0108 #else
0109         typedef Mask Argument;
0110 #endif
0111 
0112         Vc_INTRINSIC Mask() = default;
0113         Vc_INTRINSIC Mask(const Mask &) = default;
0114         Vc_INTRINSIC Mask &operator=(const Mask &) = default;
0115 
0116         Vc_INTRINSIC Mask(const __m128  &x) : d(sse_cast<VectorType>(x)) {}
0117         Vc_INTRINSIC Mask(const __m128d &x) : d(sse_cast<VectorType>(x)) {}
0118         Vc_INTRINSIC Mask(const __m128i &x) : d(sse_cast<VectorType>(x)) {}
0119         Vc_INTRINSIC explicit Mask(VectorSpecialInitializerZero) : Mask(_mm_setzero_ps()) {}
0120         Vc_INTRINSIC explicit Mask(VectorSpecialInitializerOne) : Mask(SSE::_mm_setallone_ps()) {}
0121         Vc_INTRINSIC explicit Mask(bool b) : Mask(b ? SSE::_mm_setallone_ps() : _mm_setzero_ps()) {}
0122         Vc_INTRINSIC static Mask Zero() { return Mask{Vc::Zero}; }
0123         Vc_INTRINSIC static Mask One() { return Mask{Vc::One}; }
0124 
0125         // implicit cast
0126         template <typename U>
0127         Vc_INTRINSIC Mask(
0128             U &&rhs, Common::enable_if_mask_converts_implicitly<Mask, T, U> = nullarg)
0129             : d(sse_cast<VectorType>(
0130                   Detail::mask_cast<Traits::simd_vector_size<U>::value, Size, __m128>(
0131                       rhs.dataI())))
0132         {
0133         }
0134 
0135 #if Vc_IS_VERSION_1
0136         // explicit cast, implemented via simd_cast (implementation in sse/simd_cast.h)
0137         template <typename U>
0138         Vc_DEPRECATED("use simd_cast instead of explicit type casting to convert between "
0139                       "mask types") Vc_INTRINSIC
0140             explicit Mask(U &&rhs,
0141                           Common::enable_if_mask_converts_explicitly<T, U> = nullarg);
0142 #endif
0143 
0144         Vc_ALWAYS_INLINE explicit Mask(const bool *mem) { load(mem); }
0145         template<typename Flags> Vc_ALWAYS_INLINE explicit Mask(const bool *mem, Flags f) { load(mem, f); }
0146 
0147         Vc_ALWAYS_INLINE_L void load(const bool *mem) Vc_ALWAYS_INLINE_R;
0148         template<typename Flags> Vc_ALWAYS_INLINE void load(const bool *mem, Flags) { load(mem); }
0149 
0150         Vc_ALWAYS_INLINE_L void store(bool *) const Vc_ALWAYS_INLINE_R;
0151         template<typename Flags> Vc_ALWAYS_INLINE void store(bool *mem, Flags) const { store(mem); }
0152 
0153         Vc_ALWAYS_INLINE Vc_PURE bool operator==(const Mask &rhs) const
0154         {
0155             return Detail::is_equal<Size>(dataF(), rhs.dataF());
0156         }
0157         Vc_ALWAYS_INLINE Vc_PURE bool operator!=(const Mask &rhs) const
0158         {
0159             return Detail::is_not_equal<Size>(dataF(), rhs.dataF());
0160         }
0161 
0162         Vc_ALWAYS_INLINE Vc_PURE Mask operator!() const
0163         {
0164 #ifdef Vc_GCC
0165             return ~dataI();
0166 #else
0167             return _mm_andnot_si128(dataI(), SSE::_mm_setallone_si128());
0168 #endif
0169         }
0170 
0171         Vc_ALWAYS_INLINE Mask &operator&=(const Mask &rhs) { d.v() = SSE::sse_cast<VectorType>(_mm_and_ps(dataF(), rhs.dataF())); return *this; }
0172         Vc_ALWAYS_INLINE Mask &operator|=(const Mask &rhs) { d.v() = SSE::sse_cast<VectorType>(_mm_or_ps (dataF(), rhs.dataF())); return *this; }
0173         Vc_ALWAYS_INLINE Mask &operator^=(const Mask &rhs) { d.v() = SSE::sse_cast<VectorType>(_mm_xor_ps(dataF(), rhs.dataF())); return *this; }
0174 
0175         Vc_ALWAYS_INLINE Vc_PURE Mask operator&(const Mask &rhs) const { return _mm_and_ps(dataF(), rhs.dataF()); }
0176         Vc_ALWAYS_INLINE Vc_PURE Mask operator|(const Mask &rhs) const { return _mm_or_ps (dataF(), rhs.dataF()); }
0177         Vc_ALWAYS_INLINE Vc_PURE Mask operator^(const Mask &rhs) const { return _mm_xor_ps(dataF(), rhs.dataF()); }
0178 
0179         Vc_ALWAYS_INLINE Vc_PURE Mask operator&&(const Mask &rhs) const { return _mm_and_ps(dataF(), rhs.dataF()); }
0180         Vc_ALWAYS_INLINE Vc_PURE Mask operator||(const Mask &rhs) const { return _mm_or_ps (dataF(), rhs.dataF()); }
0181 
0182         Vc_ALWAYS_INLINE Vc_PURE bool isFull () const { return
0183 #ifdef Vc_USE_PTEST
0184             _mm_testc_si128(dataI(), SSE::_mm_setallone_si128()); // return 1 if (0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff) == (~0 & d.v())
0185 #else
0186             _mm_movemask_epi8(dataI()) == 0xffff;
0187 #endif
0188         }
0189         Vc_ALWAYS_INLINE Vc_PURE bool isNotEmpty() const { return
0190 #ifdef Vc_USE_PTEST
0191             0 == _mm_testz_si128(dataI(), dataI()); // return 1 if (0, 0, 0, 0) == (d.v() & d.v())
0192 #else
0193             _mm_movemask_epi8(dataI()) != 0x0000;
0194 #endif
0195         }
0196         Vc_ALWAYS_INLINE Vc_PURE bool isEmpty() const { return
0197 #ifdef Vc_USE_PTEST
0198             0 != _mm_testz_si128(dataI(), dataI()); // return 1 if (0, 0, 0, 0) == (d.v() & d.v())
0199 #else
0200             _mm_movemask_epi8(dataI()) == 0x0000;
0201 #endif
0202         }
0203         Vc_ALWAYS_INLINE Vc_PURE bool isMix() const {
0204 #ifdef Vc_USE_PTEST
0205             return _mm_test_mix_ones_zeros(dataI(), SSE::_mm_setallone_si128());
0206 #else
0207             const int tmp = _mm_movemask_epi8(dataI());
0208             return tmp != 0 && (tmp ^ 0xffff) != 0;
0209 #endif
0210         }
0211 
0212         Vc_ALWAYS_INLINE Vc_PURE int shiftMask() const { return _mm_movemask_epi8(dataI()); }
0213 
0214         Vc_ALWAYS_INLINE Vc_PURE int toInt() const { return Detail::mask_to_int<Size>(dataI()); }
0215 
0216         Vc_ALWAYS_INLINE Vc_PURE VectorType  data() const { return d.v(); }
0217         Vc_ALWAYS_INLINE Vc_PURE __m128  dataF() const { return SSE::sse_cast<__m128 >(d.v()); }
0218         Vc_ALWAYS_INLINE Vc_PURE __m128i dataI() const { return SSE::sse_cast<__m128i>(d.v()); }
0219         Vc_ALWAYS_INLINE Vc_PURE __m128d dataD() const { return SSE::sse_cast<__m128d>(d.v()); }
0220 
0221 private:
0222     friend reference;
0223     static Vc_INTRINSIC Vc_PURE value_type get(const Mask &m, int i) noexcept
0224     {
0225         return m.toInt() & (1 << i);
0226     }
0227     template <typename U>
0228     static Vc_INTRINSIC void set(Mask &m, int i,
0229                                  U &&v) noexcept(noexcept(MaskBool(std::declval<U>())))
0230     {
0231         m.d.set(i, MaskBool(std::forward<U>(v)));
0232     }
0233 
0234 public:
0235     /**
0236      * \note the returned object models the concept of a reference and
0237      * as such it can exist longer than the data it is referencing.
0238      * \note to avoid lifetime issues, we strongly advice not to store
0239      * any reference objects.
0240      */
0241     Vc_ALWAYS_INLINE reference operator[](size_t index) noexcept
0242     {
0243         return {*this, int(index)};
0244     }
0245     Vc_ALWAYS_INLINE Vc_PURE value_type operator[](size_t index) const noexcept
0246     {
0247         return get(*this, index);
0248     }
0249 
0250         Vc_ALWAYS_INLINE Vc_PURE int count() const
0251         {
0252             return Detail::mask_count<Size>(dataI());
0253         }
0254 
0255         /**
0256          * Returns the index of the first one in the mask.
0257          *
0258          * The return value is undefined if the mask is empty.
0259          */
0260         Vc_ALWAYS_INLINE_L Vc_PURE_L int firstOne() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
0261 
0262         template <typename G> static Vc_INTRINSIC_L Mask generate(G &&gen) Vc_INTRINSIC_R;
0263         Vc_INTRINSIC_L Vc_PURE_L Mask shifted(int amount) const Vc_INTRINSIC_R Vc_PURE_R;
0264 
0265     private:
0266 #ifdef Vc_COMPILE_BENCHMARKS
0267     public:
0268 #endif
0269         Storage d;
0270 };
0271 template <typename T> constexpr size_t Mask<T, VectorAbi::Sse>::Size;
0272 template <typename T> constexpr size_t Mask<T, VectorAbi::Sse>::MemoryAlignment;
0273 
0274 }  // namespace Vc
0275 
0276 #include "mask.tcc"
0277 
0278 #endif // VC_SSE_MASK_H_