Back to home page

EIC code displayed by LXR

 
 

    


Warning, /include/Vc/avx/mask.tcc is written in an unsupported language. File is not indexed.

0001 /*  This file is part of the Vc library. {{{
0002 Copyright © 2011-2015 Matthias Kretz <kretz@kde.org>
0003 
0004 Redistribution and use in source and binary forms, with or without
0005 modification, are permitted provided that the following conditions are met:
0006     * Redistributions of source code must retain the above copyright
0007       notice, this list of conditions and the following disclaimer.
0008     * Redistributions in binary form must reproduce the above copyright
0009       notice, this list of conditions and the following disclaimer in the
0010       documentation and/or other materials provided with the distribution.
0011     * Neither the names of contributing organizations nor the
0012       names of its contributors may be used to endorse or promote products
0013       derived from this software without specific prior written permission.
0014 
0015 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
0016 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
0017 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0018 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
0019 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
0020 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
0021 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0022 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0023 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
0024 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0025 
0026 }}}*/
0027 
0028 namespace Vc_VERSIONED_NAMESPACE
0029 {
0030 // store {{{1
0031 template <typename T>
0032 template <typename Flags>
0033 Vc_INTRINSIC void Mask<T, VectorAbi::Avx>::store(bool *mem, Flags f) const
0034 {
0035     Detail::mask_store<Size>(dataI(), mem, f);
0036 }
0037 
0038 // load {{{1
0039 template <typename T>
0040 template <typename Flags>
0041 Vc_INTRINSIC void Mask<T, VectorAbi::Avx>::load(const bool *mem, Flags f)
0042 {
0043     d.v() = AVX::avx_cast<VectorType>(Detail::mask_load<VectorTypeF, Size>(mem, f));
0044 }
0045 
0046 // operator[] {{{1
0047 #ifdef Vc_IMPL_AVX2
0048 template <>
0049 Vc_INTRINSIC Vc_PURE bool AVX2::Mask<int16_t>::get(const AVX2::Mask<int16_t> &m,
0050                                                    int index) noexcept
0051 {
0052     return m.shiftMask() & (1 << 2 * index);
0053 }
0054 template <>
0055 Vc_INTRINSIC Vc_PURE bool AVX2::Mask<uint16_t>::get(const AVX2::Mask<uint16_t> &m,
0056                                                     int index) noexcept
0057 {
0058     return m.shiftMask() & (1 << 2 * index);
0059 }
0060 #endif
0061 // operator== {{{1
0062 template <> Vc_INTRINSIC Vc_PURE bool AVX2::double_m::operator==(const AVX2::double_m &rhs) const
0063 { return Detail::movemask(dataD()) == Detail::movemask(rhs.dataD()); }
0064 #ifdef Vc_IMPL_AVX2
0065 template <> Vc_INTRINSIC Vc_PURE bool AVX2::short_m::operator==(const AVX2::short_m &rhs) const
0066 { return Detail::movemask(dataI()) == Detail::movemask(rhs.dataI()); }
0067 template <> Vc_INTRINSIC Vc_PURE bool AVX2::ushort_m::operator==(const AVX2::ushort_m &rhs) const
0068 { return Detail::movemask(dataI()) == Detail::movemask(rhs.dataI()); }
0069 #endif
0070 
0071 // isFull, isNotEmpty, isEmpty, isMix specializations{{{1
0072 template <typename T> Vc_INTRINSIC bool Mask<T, VectorAbi::Avx>::isFull() const {
0073     if (sizeof(T) == 8) {
0074         return 0 != Detail::testc(dataD(), Detail::allone<VectorTypeD>());
0075     } else if (sizeof(T) == 4) {
0076         return 0 != Detail::testc(dataF(), Detail::allone<VectorTypeF>());
0077     } else {
0078         return 0 != Detail::testc(dataI(), Detail::allone<VectorTypeI>());
0079     }
0080 }
0081 
0082 template <typename T> Vc_INTRINSIC bool Mask<T, VectorAbi::Avx>::isNotEmpty() const {
0083     if (sizeof(T) == 8) {
0084         return 0 == Detail::testz(dataD(), dataD());
0085     } else if (sizeof(T) == 4) {
0086         return 0 == Detail::testz(dataF(), dataF());
0087     } else {
0088         return 0 == Detail::testz(dataI(), dataI());
0089     }
0090 }
0091 
0092 template <typename T> Vc_INTRINSIC bool Mask<T, VectorAbi::Avx>::isEmpty() const {
0093     if (sizeof(T) == 8) {
0094         return 0 != Detail::testz(dataD(), dataD());
0095     } else if (sizeof(T) == 4) {
0096         return 0 != Detail::testz(dataF(), dataF());
0097     } else {
0098         return 0 != Detail::testz(dataI(), dataI());
0099     }
0100 }
0101 
0102 template <typename T> Vc_INTRINSIC bool Mask<T, VectorAbi::Avx>::isMix() const {
0103     if (sizeof(T) == 8) {
0104         return 0 != Detail::testnzc(dataD(), Detail::allone<VectorTypeD>());
0105     } else if (sizeof(T) == 4) {
0106         return 0 != Detail::testnzc(dataF(), Detail::allone<VectorTypeF>());
0107     } else {
0108         return 0 != Detail::testnzc(dataI(), Detail::allone<VectorTypeI>());
0109     }
0110 }
0111 
0112 // generate {{{1
0113 template <typename M, typename G>
0114 Vc_INTRINSIC M generate_impl(G &&gen, std::integral_constant<int, 4 + 32>)
0115 {
0116     return _mm256_setr_epi64x(
0117         gen(0) ? 0xffffffffffffffffull : 0, gen(1) ? 0xffffffffffffffffull : 0,
0118         gen(2) ? 0xffffffffffffffffull : 0, gen(3) ? 0xffffffffffffffffull : 0);
0119 }
0120 template <typename M, typename G>
0121 Vc_INTRINSIC M generate_impl(G &&gen, std::integral_constant<int, 8 + 32>)
0122 {
0123     return _mm256_setr_epi32(gen(0) ? 0xfffffffful : 0, gen(1) ? 0xfffffffful : 0,
0124                              gen(2) ? 0xfffffffful : 0, gen(3) ? 0xfffffffful : 0,
0125                              gen(4) ? 0xfffffffful : 0, gen(5) ? 0xfffffffful : 0,
0126                              gen(6) ? 0xfffffffful : 0, gen(7) ? 0xfffffffful : 0);
0127 }
0128 template <typename M, typename G>
0129 Vc_INTRINSIC M generate_impl(G &&gen, std::integral_constant<int, 16 + 32>)
0130 {
0131     return _mm256_setr_epi16(gen(0) ? 0xfffful : 0, gen(1) ? 0xfffful : 0,
0132                              gen(2) ? 0xfffful : 0, gen(3) ? 0xfffful : 0,
0133                              gen(4) ? 0xfffful : 0, gen(5) ? 0xfffful : 0,
0134                              gen(6) ? 0xfffful : 0, gen(7) ? 0xfffful : 0,
0135                              gen(8) ? 0xfffful : 0, gen(9) ? 0xfffful : 0,
0136                              gen(10) ? 0xfffful : 0, gen(11) ? 0xfffful : 0,
0137                              gen(12) ? 0xfffful : 0, gen(13) ? 0xfffful : 0,
0138                              gen(14) ? 0xfffful : 0, gen(15) ? 0xfffful : 0);
0139 }
0140 template <typename T>
0141 template <typename G>
0142 Vc_INTRINSIC AVX2::Mask<T> Mask<T, VectorAbi::Avx>::generate(G &&gen)
0143 {
0144     return generate_impl<AVX2::Mask<T>>(std::forward<G>(gen),
0145                                   std::integral_constant<int, Size + sizeof(Storage)>());
0146 }
0147 // shifted {{{1
0148 template <typename T> Vc_INTRINSIC Vc_PURE AVX2::Mask<T> Mask<T, VectorAbi::Avx>::shifted(int amount) const
0149 {
0150     switch (amount * int(sizeof(VectorEntryType))) {
0151     case   0: return *this;
0152     case   1: return Detail::shifted<  1>(dataI());
0153     case   2: return Detail::shifted<  2>(dataI());
0154     case   3: return Detail::shifted<  3>(dataI());
0155     case   4: return Detail::shifted<  4>(dataI());
0156     case   5: return Detail::shifted<  5>(dataI());
0157     case   6: return Detail::shifted<  6>(dataI());
0158     case   7: return Detail::shifted<  7>(dataI());
0159     case   8: return Detail::shifted<  8>(dataI());
0160     case   9: return Detail::shifted<  9>(dataI());
0161     case  10: return Detail::shifted< 10>(dataI());
0162     case  11: return Detail::shifted< 11>(dataI());
0163     case  12: return Detail::shifted< 12>(dataI());
0164     case  13: return Detail::shifted< 13>(dataI());
0165     case  14: return Detail::shifted< 14>(dataI());
0166     case  15: return Detail::shifted< 15>(dataI());
0167     case  16: return Detail::shifted< 16>(dataI());
0168     case  17: return Detail::shifted< 17>(dataI());
0169     case  18: return Detail::shifted< 18>(dataI());
0170     case  19: return Detail::shifted< 19>(dataI());
0171     case  20: return Detail::shifted< 20>(dataI());
0172     case  21: return Detail::shifted< 21>(dataI());
0173     case  22: return Detail::shifted< 22>(dataI());
0174     case  23: return Detail::shifted< 23>(dataI());
0175     case  24: return Detail::shifted< 24>(dataI());
0176     case  25: return Detail::shifted< 25>(dataI());
0177     case  26: return Detail::shifted< 26>(dataI());
0178     case  27: return Detail::shifted< 27>(dataI());
0179     case  28: return Detail::shifted< 28>(dataI());
0180     case  29: return Detail::shifted< 29>(dataI());
0181     case  30: return Detail::shifted< 30>(dataI());
0182     case  31: return Detail::shifted< 31>(dataI());
0183     case  -1: return Detail::shifted< -1>(dataI());
0184     case  -2: return Detail::shifted< -2>(dataI());
0185     case  -3: return Detail::shifted< -3>(dataI());
0186     case  -4: return Detail::shifted< -4>(dataI());
0187     case  -5: return Detail::shifted< -5>(dataI());
0188     case  -6: return Detail::shifted< -6>(dataI());
0189     case  -7: return Detail::shifted< -7>(dataI());
0190     case  -8: return Detail::shifted< -8>(dataI());
0191     case  -9: return Detail::shifted< -9>(dataI());
0192     case -10: return Detail::shifted<-10>(dataI());
0193     case -11: return Detail::shifted<-11>(dataI());
0194     case -12: return Detail::shifted<-12>(dataI());
0195     case -13: return Detail::shifted<-13>(dataI());
0196     case -14: return Detail::shifted<-14>(dataI());
0197     case -15: return Detail::shifted<-15>(dataI());
0198     case -16: return Detail::shifted<-16>(dataI());
0199     case -17: return Detail::shifted<-17>(dataI());
0200     case -18: return Detail::shifted<-18>(dataI());
0201     case -19: return Detail::shifted<-19>(dataI());
0202     case -20: return Detail::shifted<-20>(dataI());
0203     case -21: return Detail::shifted<-21>(dataI());
0204     case -22: return Detail::shifted<-22>(dataI());
0205     case -23: return Detail::shifted<-23>(dataI());
0206     case -24: return Detail::shifted<-24>(dataI());
0207     case -25: return Detail::shifted<-25>(dataI());
0208     case -26: return Detail::shifted<-26>(dataI());
0209     case -27: return Detail::shifted<-27>(dataI());
0210     case -28: return Detail::shifted<-28>(dataI());
0211     case -29: return Detail::shifted<-29>(dataI());
0212     case -30: return Detail::shifted<-30>(dataI());
0213     case -31: return Detail::shifted<-31>(dataI());
0214     }
0215     return Zero();
0216 }
0217 // }}}1
0218 
0219 /*
0220 template<> Vc_INTRINSIC AVX2::Mask< 4, 32> &AVX2::Mask< 4, 32>::operator=(const std::array<bool, 4> &values) {
0221     static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
0222     unsigned int x = *reinterpret_cast<const unsigned int *>(values.data());
0223     x *= 0xffu;
0224     __m128i y = _mm_cvtsi32_si128(x); //  4 Bytes
0225     y = _mm_unpacklo_epi8(y, y);    //  8 Bytes
0226     y = _mm_unpacklo_epi16(y, y);   // 16 Bytes
0227     d.v() = AVX::avx_cast<__m256>(AVX::concat(_mm_unpacklo_epi32(y, y), _mm_unpackhi_epi32(y, y)));
0228     return *this;
0229 }
0230 template<> Vc_INTRINSIC AVX2::Mask< 8, 32> &AVX2::Mask< 8, 32>::operator=(const std::array<bool, 8> &values) {
0231     static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
0232     unsigned long long x = *reinterpret_cast<const unsigned long long *>(values.data());
0233     x *= 0xffull;
0234     __m128i y = _mm_cvtsi64_si128(x); //  8 Bytes
0235     y = _mm_unpacklo_epi8(y, y);   // 16 Bytes
0236     d.v() = AVX::avx_cast<__m256>(AVX::concat(_mm_unpacklo_epi16(y, y), _mm_unpackhi_epi16(y, y)));
0237     return *this;
0238 }
0239 template<> Vc_INTRINSIC AVX2::Mask< 8, 16> &AVX2::Mask< 8, 16>::operator=(const std::array<bool, 8> &values) {
0240     static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
0241     unsigned long long x = *reinterpret_cast<const unsigned long long *>(values.data());
0242     x *= 0xffull;
0243     __m128i y = _mm_cvtsi64_si128(x); //  8 Bytes
0244     d.v() = AVX::avx_cast<__m128>(_mm_unpacklo_epi8(y, y));
0245     return *this;
0246 }
0247 template<> Vc_INTRINSIC AVX2::Mask<16, 16> &AVX2::Mask<16, 16>::operator=(const std::array<bool, 16> &values) {
0248     static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
0249     __m128i x = _mm_loadu_si128(reinterpret_cast<const __m128i *>(values.data()));
0250     d.v() = _mm_andnot_ps(AVX::_mm_setallone_ps(), AVX::avx_cast<__m128>(_mm_sub_epi8(x, _mm_set1_epi8(1))));
0251     return *this;
0252 }
0253 
0254 template<> Vc_INTRINSIC AVX2::Mask< 4, 32>::operator std::array<bool, 4>() const {
0255     static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
0256     __m128i x = _mm_packs_epi32(AVX::lo128(dataI()), AVX::hi128(dataI())); // 64bit -> 32bit
0257     x = _mm_packs_epi32(x, x); // 32bit -> 16bit
0258     x = _mm_srli_epi16(x, 15);
0259     x = _mm_packs_epi16(x, x); // 16bit ->  8bit
0260     std::array<bool, 4> r;
0261     asm volatile("vmovd %1,%0" : "=m"(*r.data()) : "x"(x));
0262     return r;
0263 }
0264 template<> Vc_INTRINSIC AVX2::Mask< 8, 32>::operator std::array<bool, 8>() const {
0265     static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
0266     __m128i x = _mm_packs_epi32(AVX::lo128(dataI()), AVX::hi128(dataI())); // 32bit -> 16bit
0267     x = _mm_srli_epi16(x, 15);
0268     x = _mm_packs_epi16(x, x); // 16bit ->  8bit
0269     std::array<bool, 8> r;
0270     asm volatile("vmovq %1,%0" : "=m"(*r.data()) : "x"(x));
0271     return r;
0272 }
0273 template<> Vc_INTRINSIC AVX2::Mask< 8, 16>::operator std::array<bool, 8>() const {
0274     static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
0275     __m128i x = _mm_srli_epi16(dataI(), 15);
0276     x = _mm_packs_epi16(x, x); // 16bit ->  8bit
0277     std::array<bool, 8> r;
0278     asm volatile("vmovq %1,%0" : "=m"(*r.data()) : "x"(x));
0279     return r;
0280 }
0281 template<> Vc_INTRINSIC AVX2::Mask<16, 16>::operator std::array<bool, 16>() const {
0282     static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
0283     __m128 x = _mm_and_ps(d.v(), AVX::avx_cast<__m128>(_mm_set1_epi32(0x01010101)));
0284     std::array<bool, 16> r;
0285     asm volatile("vmovups %1,%0" : "=m"(*r.data()) : "x"(x));
0286     return r;
0287 }
0288 */
0289 
0290 }
0291 
0292 // vim: foldmethod=marker