Warning, /include/Vc/avx/mask.tcc is written in an unsupported language. File is not indexed.
0001 /* This file is part of the Vc library. {{{
0002 Copyright © 2011-2015 Matthias Kretz <kretz@kde.org>
0003
0004 Redistribution and use in source and binary forms, with or without
0005 modification, are permitted provided that the following conditions are met:
0006 * Redistributions of source code must retain the above copyright
0007 notice, this list of conditions and the following disclaimer.
0008 * Redistributions in binary form must reproduce the above copyright
0009 notice, this list of conditions and the following disclaimer in the
0010 documentation and/or other materials provided with the distribution.
0011 * Neither the names of contributing organizations nor the
0012 names of its contributors may be used to endorse or promote products
0013 derived from this software without specific prior written permission.
0014
0015 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
0016 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
0017 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0018 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
0019 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
0020 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
0021 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0022 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0023 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
0024 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0025
0026 }}}*/
0027
0028 namespace Vc_VERSIONED_NAMESPACE
0029 {
0030 // store {{{1
0031 template <typename T>
0032 template <typename Flags>
0033 Vc_INTRINSIC void Mask<T, VectorAbi::Avx>::store(bool *mem, Flags f) const
0034 {
0035 Detail::mask_store<Size>(dataI(), mem, f);
0036 }
0037
0038 // load {{{1
0039 template <typename T>
0040 template <typename Flags>
0041 Vc_INTRINSIC void Mask<T, VectorAbi::Avx>::load(const bool *mem, Flags f)
0042 {
0043 d.v() = AVX::avx_cast<VectorType>(Detail::mask_load<VectorTypeF, Size>(mem, f));
0044 }
0045
0046 // operator[] {{{1
0047 #ifdef Vc_IMPL_AVX2
0048 template <>
0049 Vc_INTRINSIC Vc_PURE bool AVX2::Mask<int16_t>::get(const AVX2::Mask<int16_t> &m,
0050 int index) noexcept
0051 {
0052 return m.shiftMask() & (1 << 2 * index);
0053 }
0054 template <>
0055 Vc_INTRINSIC Vc_PURE bool AVX2::Mask<uint16_t>::get(const AVX2::Mask<uint16_t> &m,
0056 int index) noexcept
0057 {
0058 return m.shiftMask() & (1 << 2 * index);
0059 }
0060 #endif
0061 // operator== {{{1
0062 template <> Vc_INTRINSIC Vc_PURE bool AVX2::double_m::operator==(const AVX2::double_m &rhs) const
0063 { return Detail::movemask(dataD()) == Detail::movemask(rhs.dataD()); }
0064 #ifdef Vc_IMPL_AVX2
0065 template <> Vc_INTRINSIC Vc_PURE bool AVX2::short_m::operator==(const AVX2::short_m &rhs) const
0066 { return Detail::movemask(dataI()) == Detail::movemask(rhs.dataI()); }
0067 template <> Vc_INTRINSIC Vc_PURE bool AVX2::ushort_m::operator==(const AVX2::ushort_m &rhs) const
0068 { return Detail::movemask(dataI()) == Detail::movemask(rhs.dataI()); }
0069 #endif
0070
0071 // isFull, isNotEmpty, isEmpty, isMix specializations{{{1
0072 template <typename T> Vc_INTRINSIC bool Mask<T, VectorAbi::Avx>::isFull() const {
0073 if (sizeof(T) == 8) {
0074 return 0 != Detail::testc(dataD(), Detail::allone<VectorTypeD>());
0075 } else if (sizeof(T) == 4) {
0076 return 0 != Detail::testc(dataF(), Detail::allone<VectorTypeF>());
0077 } else {
0078 return 0 != Detail::testc(dataI(), Detail::allone<VectorTypeI>());
0079 }
0080 }
0081
0082 template <typename T> Vc_INTRINSIC bool Mask<T, VectorAbi::Avx>::isNotEmpty() const {
0083 if (sizeof(T) == 8) {
0084 return 0 == Detail::testz(dataD(), dataD());
0085 } else if (sizeof(T) == 4) {
0086 return 0 == Detail::testz(dataF(), dataF());
0087 } else {
0088 return 0 == Detail::testz(dataI(), dataI());
0089 }
0090 }
0091
0092 template <typename T> Vc_INTRINSIC bool Mask<T, VectorAbi::Avx>::isEmpty() const {
0093 if (sizeof(T) == 8) {
0094 return 0 != Detail::testz(dataD(), dataD());
0095 } else if (sizeof(T) == 4) {
0096 return 0 != Detail::testz(dataF(), dataF());
0097 } else {
0098 return 0 != Detail::testz(dataI(), dataI());
0099 }
0100 }
0101
0102 template <typename T> Vc_INTRINSIC bool Mask<T, VectorAbi::Avx>::isMix() const {
0103 if (sizeof(T) == 8) {
0104 return 0 != Detail::testnzc(dataD(), Detail::allone<VectorTypeD>());
0105 } else if (sizeof(T) == 4) {
0106 return 0 != Detail::testnzc(dataF(), Detail::allone<VectorTypeF>());
0107 } else {
0108 return 0 != Detail::testnzc(dataI(), Detail::allone<VectorTypeI>());
0109 }
0110 }
0111
0112 // generate {{{1
0113 template <typename M, typename G>
0114 Vc_INTRINSIC M generate_impl(G &&gen, std::integral_constant<int, 4 + 32>)
0115 {
0116 return _mm256_setr_epi64x(
0117 gen(0) ? 0xffffffffffffffffull : 0, gen(1) ? 0xffffffffffffffffull : 0,
0118 gen(2) ? 0xffffffffffffffffull : 0, gen(3) ? 0xffffffffffffffffull : 0);
0119 }
0120 template <typename M, typename G>
0121 Vc_INTRINSIC M generate_impl(G &&gen, std::integral_constant<int, 8 + 32>)
0122 {
0123 return _mm256_setr_epi32(gen(0) ? 0xfffffffful : 0, gen(1) ? 0xfffffffful : 0,
0124 gen(2) ? 0xfffffffful : 0, gen(3) ? 0xfffffffful : 0,
0125 gen(4) ? 0xfffffffful : 0, gen(5) ? 0xfffffffful : 0,
0126 gen(6) ? 0xfffffffful : 0, gen(7) ? 0xfffffffful : 0);
0127 }
0128 template <typename M, typename G>
0129 Vc_INTRINSIC M generate_impl(G &&gen, std::integral_constant<int, 16 + 32>)
0130 {
0131 return _mm256_setr_epi16(gen(0) ? 0xfffful : 0, gen(1) ? 0xfffful : 0,
0132 gen(2) ? 0xfffful : 0, gen(3) ? 0xfffful : 0,
0133 gen(4) ? 0xfffful : 0, gen(5) ? 0xfffful : 0,
0134 gen(6) ? 0xfffful : 0, gen(7) ? 0xfffful : 0,
0135 gen(8) ? 0xfffful : 0, gen(9) ? 0xfffful : 0,
0136 gen(10) ? 0xfffful : 0, gen(11) ? 0xfffful : 0,
0137 gen(12) ? 0xfffful : 0, gen(13) ? 0xfffful : 0,
0138 gen(14) ? 0xfffful : 0, gen(15) ? 0xfffful : 0);
0139 }
0140 template <typename T>
0141 template <typename G>
0142 Vc_INTRINSIC AVX2::Mask<T> Mask<T, VectorAbi::Avx>::generate(G &&gen)
0143 {
0144 return generate_impl<AVX2::Mask<T>>(std::forward<G>(gen),
0145 std::integral_constant<int, Size + sizeof(Storage)>());
0146 }
0147 // shifted {{{1
0148 template <typename T> Vc_INTRINSIC Vc_PURE AVX2::Mask<T> Mask<T, VectorAbi::Avx>::shifted(int amount) const
0149 {
0150 switch (amount * int(sizeof(VectorEntryType))) {
0151 case 0: return *this;
0152 case 1: return Detail::shifted< 1>(dataI());
0153 case 2: return Detail::shifted< 2>(dataI());
0154 case 3: return Detail::shifted< 3>(dataI());
0155 case 4: return Detail::shifted< 4>(dataI());
0156 case 5: return Detail::shifted< 5>(dataI());
0157 case 6: return Detail::shifted< 6>(dataI());
0158 case 7: return Detail::shifted< 7>(dataI());
0159 case 8: return Detail::shifted< 8>(dataI());
0160 case 9: return Detail::shifted< 9>(dataI());
0161 case 10: return Detail::shifted< 10>(dataI());
0162 case 11: return Detail::shifted< 11>(dataI());
0163 case 12: return Detail::shifted< 12>(dataI());
0164 case 13: return Detail::shifted< 13>(dataI());
0165 case 14: return Detail::shifted< 14>(dataI());
0166 case 15: return Detail::shifted< 15>(dataI());
0167 case 16: return Detail::shifted< 16>(dataI());
0168 case 17: return Detail::shifted< 17>(dataI());
0169 case 18: return Detail::shifted< 18>(dataI());
0170 case 19: return Detail::shifted< 19>(dataI());
0171 case 20: return Detail::shifted< 20>(dataI());
0172 case 21: return Detail::shifted< 21>(dataI());
0173 case 22: return Detail::shifted< 22>(dataI());
0174 case 23: return Detail::shifted< 23>(dataI());
0175 case 24: return Detail::shifted< 24>(dataI());
0176 case 25: return Detail::shifted< 25>(dataI());
0177 case 26: return Detail::shifted< 26>(dataI());
0178 case 27: return Detail::shifted< 27>(dataI());
0179 case 28: return Detail::shifted< 28>(dataI());
0180 case 29: return Detail::shifted< 29>(dataI());
0181 case 30: return Detail::shifted< 30>(dataI());
0182 case 31: return Detail::shifted< 31>(dataI());
0183 case -1: return Detail::shifted< -1>(dataI());
0184 case -2: return Detail::shifted< -2>(dataI());
0185 case -3: return Detail::shifted< -3>(dataI());
0186 case -4: return Detail::shifted< -4>(dataI());
0187 case -5: return Detail::shifted< -5>(dataI());
0188 case -6: return Detail::shifted< -6>(dataI());
0189 case -7: return Detail::shifted< -7>(dataI());
0190 case -8: return Detail::shifted< -8>(dataI());
0191 case -9: return Detail::shifted< -9>(dataI());
0192 case -10: return Detail::shifted<-10>(dataI());
0193 case -11: return Detail::shifted<-11>(dataI());
0194 case -12: return Detail::shifted<-12>(dataI());
0195 case -13: return Detail::shifted<-13>(dataI());
0196 case -14: return Detail::shifted<-14>(dataI());
0197 case -15: return Detail::shifted<-15>(dataI());
0198 case -16: return Detail::shifted<-16>(dataI());
0199 case -17: return Detail::shifted<-17>(dataI());
0200 case -18: return Detail::shifted<-18>(dataI());
0201 case -19: return Detail::shifted<-19>(dataI());
0202 case -20: return Detail::shifted<-20>(dataI());
0203 case -21: return Detail::shifted<-21>(dataI());
0204 case -22: return Detail::shifted<-22>(dataI());
0205 case -23: return Detail::shifted<-23>(dataI());
0206 case -24: return Detail::shifted<-24>(dataI());
0207 case -25: return Detail::shifted<-25>(dataI());
0208 case -26: return Detail::shifted<-26>(dataI());
0209 case -27: return Detail::shifted<-27>(dataI());
0210 case -28: return Detail::shifted<-28>(dataI());
0211 case -29: return Detail::shifted<-29>(dataI());
0212 case -30: return Detail::shifted<-30>(dataI());
0213 case -31: return Detail::shifted<-31>(dataI());
0214 }
0215 return Zero();
0216 }
0217 // }}}1
0218
0219 /*
0220 template<> Vc_INTRINSIC AVX2::Mask< 4, 32> &AVX2::Mask< 4, 32>::operator=(const std::array<bool, 4> &values) {
0221 static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
0222 unsigned int x = *reinterpret_cast<const unsigned int *>(values.data());
0223 x *= 0xffu;
0224 __m128i y = _mm_cvtsi32_si128(x); // 4 Bytes
0225 y = _mm_unpacklo_epi8(y, y); // 8 Bytes
0226 y = _mm_unpacklo_epi16(y, y); // 16 Bytes
0227 d.v() = AVX::avx_cast<__m256>(AVX::concat(_mm_unpacklo_epi32(y, y), _mm_unpackhi_epi32(y, y)));
0228 return *this;
0229 }
0230 template<> Vc_INTRINSIC AVX2::Mask< 8, 32> &AVX2::Mask< 8, 32>::operator=(const std::array<bool, 8> &values) {
0231 static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
0232 unsigned long long x = *reinterpret_cast<const unsigned long long *>(values.data());
0233 x *= 0xffull;
0234 __m128i y = _mm_cvtsi64_si128(x); // 8 Bytes
0235 y = _mm_unpacklo_epi8(y, y); // 16 Bytes
0236 d.v() = AVX::avx_cast<__m256>(AVX::concat(_mm_unpacklo_epi16(y, y), _mm_unpackhi_epi16(y, y)));
0237 return *this;
0238 }
0239 template<> Vc_INTRINSIC AVX2::Mask< 8, 16> &AVX2::Mask< 8, 16>::operator=(const std::array<bool, 8> &values) {
0240 static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
0241 unsigned long long x = *reinterpret_cast<const unsigned long long *>(values.data());
0242 x *= 0xffull;
0243 __m128i y = _mm_cvtsi64_si128(x); // 8 Bytes
0244 d.v() = AVX::avx_cast<__m128>(_mm_unpacklo_epi8(y, y));
0245 return *this;
0246 }
0247 template<> Vc_INTRINSIC AVX2::Mask<16, 16> &AVX2::Mask<16, 16>::operator=(const std::array<bool, 16> &values) {
0248 static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
0249 __m128i x = _mm_loadu_si128(reinterpret_cast<const __m128i *>(values.data()));
0250 d.v() = _mm_andnot_ps(AVX::_mm_setallone_ps(), AVX::avx_cast<__m128>(_mm_sub_epi8(x, _mm_set1_epi8(1))));
0251 return *this;
0252 }
0253
0254 template<> Vc_INTRINSIC AVX2::Mask< 4, 32>::operator std::array<bool, 4>() const {
0255 static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
0256 __m128i x = _mm_packs_epi32(AVX::lo128(dataI()), AVX::hi128(dataI())); // 64bit -> 32bit
0257 x = _mm_packs_epi32(x, x); // 32bit -> 16bit
0258 x = _mm_srli_epi16(x, 15);
0259 x = _mm_packs_epi16(x, x); // 16bit -> 8bit
0260 std::array<bool, 4> r;
0261 asm volatile("vmovd %1,%0" : "=m"(*r.data()) : "x"(x));
0262 return r;
0263 }
0264 template<> Vc_INTRINSIC AVX2::Mask< 8, 32>::operator std::array<bool, 8>() const {
0265 static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
0266 __m128i x = _mm_packs_epi32(AVX::lo128(dataI()), AVX::hi128(dataI())); // 32bit -> 16bit
0267 x = _mm_srli_epi16(x, 15);
0268 x = _mm_packs_epi16(x, x); // 16bit -> 8bit
0269 std::array<bool, 8> r;
0270 asm volatile("vmovq %1,%0" : "=m"(*r.data()) : "x"(x));
0271 return r;
0272 }
0273 template<> Vc_INTRINSIC AVX2::Mask< 8, 16>::operator std::array<bool, 8>() const {
0274 static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
0275 __m128i x = _mm_srli_epi16(dataI(), 15);
0276 x = _mm_packs_epi16(x, x); // 16bit -> 8bit
0277 std::array<bool, 8> r;
0278 asm volatile("vmovq %1,%0" : "=m"(*r.data()) : "x"(x));
0279 return r;
0280 }
0281 template<> Vc_INTRINSIC AVX2::Mask<16, 16>::operator std::array<bool, 16>() const {
0282 static_assert(sizeof(bool) == 1, "Vc expects bool to have a sizeof 1 Byte");
0283 __m128 x = _mm_and_ps(d.v(), AVX::avx_cast<__m128>(_mm_set1_epi32(0x01010101)));
0284 std::array<bool, 16> r;
0285 asm volatile("vmovups %1,%0" : "=m"(*r.data()) : "x"(x));
0286 return r;
0287 }
0288 */
0289
0290 }
0291
0292 // vim: foldmethod=marker