Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-31 10:25:45

0001 /*  This file is part of the Vc library. {{{
0002 Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
0003 
0004 Redistribution and use in source and binary forms, with or without
0005 modification, are permitted provided that the following conditions are met:
0006     * Redistributions of source code must retain the above copyright
0007       notice, this list of conditions and the following disclaimer.
0008     * Redistributions in binary form must reproduce the above copyright
0009       notice, this list of conditions and the following disclaimer in the
0010       documentation and/or other materials provided with the distribution.
0011     * Neither the names of contributing organizations nor the
0012       names of its contributors may be used to endorse or promote products
0013       derived from this software without specific prior written permission.
0014 
0015 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
0016 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
0017 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0018 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
0019 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
0020 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
0021 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0022 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0023 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
0024 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0025 
0026 }}}*/
0027 
0028 #ifndef VC_SSE_SIMD_CAST_H_
0029 #define VC_SSE_SIMD_CAST_H_
0030 
0031 #include "../common/utility.h"
0032 #ifdef Vc_IMPL_AVX
0033 #include "../avx/casts.h"
0034 #endif
0035 
0036 #ifndef VC_SSE_VECTOR_H_
0037 #error "Vc/sse/vector.h needs to be included before Vc/sse/simd_cast.h"
0038 #endif
0039 #include "macros.h"
0040 
0041 namespace Vc_VERSIONED_NAMESPACE
0042 {
0043 namespace SSE
0044 {
0045 
0046 // Declarations: helper macros Vc_SIMD_CAST_[1248] {{{1
0047 #define Vc_SIMD_CAST_1(from_, to_)                                                       \
0048     template <typename To>                                                               \
0049     Vc_INTRINSIC Vc_CONST To simd_cast(                                                  \
0050         from_ x, enable_if<std::is_same<To, to_>::value> = nullarg)
0051 
0052 #define Vc_SIMD_CAST_2(from_, to_)                                                       \
0053     template <typename To>                                                               \
0054     Vc_INTRINSIC Vc_CONST To simd_cast(                                                  \
0055         from_ x0, from_ x1, enable_if<std::is_same<To, to_>::value> = nullarg)
0056 
0057 #define Vc_SIMD_CAST_4(from_, to_)                                                       \
0058     template <typename To>                                                               \
0059     Vc_INTRINSIC Vc_CONST To simd_cast(                                                  \
0060         from_ x0, from_ x1, from_ x2, from_ x3,                                          \
0061         enable_if<std::is_same<To, to_>::value> = nullarg)
0062 
0063 #define Vc_SIMD_CAST_8(from_, to_)                                                       \
0064     template <typename To>                                                               \
0065     Vc_INTRINSIC Vc_CONST To simd_cast(                                                  \
0066         from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, from_ x5, from_ x6, from_ x7,  \
0067         enable_if<std::is_same<To, to_>::value> = nullarg)
0068 
0069 // Declarations: Vector casts without offset {{{1
0070 // 1 SSE::Vector to 1 SSE::Vector {{{2
0071 Vc_SIMD_CAST_1( float_v,    int_v);
0072 Vc_SIMD_CAST_1(double_v,    int_v);
0073 Vc_SIMD_CAST_1(  uint_v,    int_v);
0074 Vc_SIMD_CAST_1( short_v,    int_v);
0075 Vc_SIMD_CAST_1(ushort_v,    int_v);
0076 Vc_SIMD_CAST_1( float_v,   uint_v);
0077 Vc_SIMD_CAST_1(double_v,   uint_v);
0078 Vc_SIMD_CAST_1(   int_v,   uint_v);
0079 Vc_SIMD_CAST_1( short_v,   uint_v);
0080 Vc_SIMD_CAST_1(ushort_v,   uint_v);
0081 Vc_SIMD_CAST_1(double_v,  float_v);
0082 Vc_SIMD_CAST_1(   int_v,  float_v);
0083 Vc_SIMD_CAST_1(  uint_v,  float_v);
0084 Vc_SIMD_CAST_1( short_v,  float_v);
0085 Vc_SIMD_CAST_1(ushort_v,  float_v);
0086 Vc_SIMD_CAST_1( float_v, double_v);
0087 Vc_SIMD_CAST_1(   int_v, double_v);
0088 Vc_SIMD_CAST_1(  uint_v, double_v);
0089 Vc_SIMD_CAST_1( short_v, double_v);
0090 Vc_SIMD_CAST_1(ushort_v, double_v);
0091 Vc_SIMD_CAST_1(   int_v,  short_v);
0092 Vc_SIMD_CAST_1(  uint_v,  short_v);
0093 Vc_SIMD_CAST_1( float_v,  short_v);
0094 Vc_SIMD_CAST_1(double_v,  short_v);
0095 Vc_SIMD_CAST_1(ushort_v,  short_v);
0096 Vc_SIMD_CAST_1(   int_v, ushort_v);
0097 Vc_SIMD_CAST_1(  uint_v, ushort_v);
0098 Vc_SIMD_CAST_1( float_v, ushort_v);
0099 Vc_SIMD_CAST_1(double_v, ushort_v);
0100 Vc_SIMD_CAST_1( short_v, ushort_v);
0101 
0102 // 2 SSE::Vector to 1 SSE::Vector {{{2
0103 Vc_SIMD_CAST_2(double_v,    int_v);
0104 Vc_SIMD_CAST_2(double_v,   uint_v);
0105 Vc_SIMD_CAST_2(double_v,  float_v);
0106 Vc_SIMD_CAST_2(   int_v,  short_v);
0107 Vc_SIMD_CAST_2(  uint_v,  short_v);
0108 Vc_SIMD_CAST_2( float_v,  short_v);
0109 Vc_SIMD_CAST_2(double_v,  short_v);
0110 Vc_SIMD_CAST_2(   int_v, ushort_v);
0111 Vc_SIMD_CAST_2(  uint_v, ushort_v);
0112 Vc_SIMD_CAST_2( float_v, ushort_v);
0113 Vc_SIMD_CAST_2(double_v, ushort_v);
0114 
0115 // 3 SSE::Vector to 1 SSE::Vector {{{2
0116 #define Vc_CAST_(To_)                                                                    \
0117     template <typename Return>                                                           \
0118     Vc_INTRINSIC Vc_CONST enable_if<std::is_same<Return, To_>::value, Return>
0119 Vc_CAST_(short_v) simd_cast(double_v a, double_v b, double_v c);
0120 Vc_CAST_(ushort_v) simd_cast(double_v a, double_v b, double_v c);
0121 
0122 // 4 SSE::Vector to 1 SSE::Vector {{{2
0123 Vc_SIMD_CAST_4(double_v,  short_v);
0124 Vc_SIMD_CAST_4(double_v, ushort_v);
0125 //}}}2
0126 }  // namespace SSE
0127 using SSE::simd_cast;
0128 
0129 // 1 Scalar::Vector to 1 SSE::Vector {{{2
0130 template <typename Return, typename T>
0131 Vc_INTRINSIC Vc_CONST Return
0132 simd_cast(Scalar::Vector<T> x,
0133           enable_if<std::is_same<Return, SSE::double_v>::value> = nullarg);
0134 template <typename Return, typename T>
0135 Vc_INTRINSIC Vc_CONST Return
0136 simd_cast(Scalar::Vector<T> x,
0137           enable_if<std::is_same<Return, SSE::float_v>::value> = nullarg);
0138 template <typename Return, typename T>
0139 Vc_INTRINSIC Vc_CONST Return
0140 simd_cast(Scalar::Vector<T> x,
0141           enable_if<std::is_same<Return, SSE::int_v>::value> = nullarg);
0142 template <typename Return, typename T>
0143 Vc_INTRINSIC Vc_CONST Return
0144 simd_cast(Scalar::Vector<T> x,
0145           enable_if<std::is_same<Return, SSE::uint_v>::value> = nullarg);
0146 template <typename Return, typename T>
0147 Vc_INTRINSIC Vc_CONST Return
0148 simd_cast(Scalar::Vector<T> x,
0149           enable_if<std::is_same<Return, SSE::short_v>::value> = nullarg);
0150 template <typename Return, typename T>
0151 Vc_INTRINSIC Vc_CONST Return
0152 simd_cast(Scalar::Vector<T> x,
0153           enable_if<std::is_same<Return, SSE::ushort_v>::value> = nullarg);
0154 
0155 // 2 Scalar::Vector to 1 SSE::Vector {{{2
0156 template <typename Return, typename T>
0157 Vc_INTRINSIC Vc_CONST Return
0158 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0159           enable_if<std::is_same<Return, SSE::double_v>::value> = nullarg);
0160 template <typename Return, typename T>
0161 Vc_INTRINSIC Vc_CONST Return
0162 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0163           enable_if<std::is_same<Return, SSE::float_v>::value> = nullarg);
0164 template <typename Return, typename T>
0165 Vc_INTRINSIC Vc_CONST Return
0166 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0167           enable_if<std::is_same<Return, SSE::int_v>::value> = nullarg);
0168 template <typename Return, typename T>
0169 Vc_INTRINSIC Vc_CONST Return
0170 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0171           enable_if<std::is_same<Return, SSE::uint_v>::value> = nullarg);
0172 template <typename Return, typename T>
0173 Vc_INTRINSIC Vc_CONST Return
0174 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0175           enable_if<std::is_same<Return, SSE::short_v>::value> = nullarg);
0176 template <typename Return, typename T>
0177 Vc_INTRINSIC Vc_CONST Return
0178 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0179           enable_if<std::is_same<Return, SSE::ushort_v>::value> = nullarg);
0180 
0181 // 3 Scalar::Vector to 1 SSE::Vector {{{2
0182 template <typename Return, typename T>
0183 Vc_INTRINSIC Vc_CONST Return
0184 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x3,
0185           enable_if<std::is_same<Return, SSE::float_v>::value> = nullarg);
0186 template <typename Return, typename T>
0187 Vc_INTRINSIC Vc_CONST Return
0188 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x3,
0189           enable_if<std::is_same<Return, SSE::int_v>::value> = nullarg);
0190 template <typename Return, typename T>
0191 Vc_INTRINSIC Vc_CONST Return
0192 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x3,
0193           enable_if<std::is_same<Return, SSE::uint_v>::value> = nullarg);
0194 template <typename Return, typename T>
0195 Vc_INTRINSIC Vc_CONST Return
0196 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x3,
0197           enable_if<std::is_same<Return, SSE::short_v>::value> = nullarg);
0198 template <typename Return, typename T>
0199 Vc_INTRINSIC Vc_CONST Return
0200 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x3,
0201           enable_if<std::is_same<Return, SSE::ushort_v>::value> = nullarg);
0202 
0203 // 4 Scalar::Vector to 1 SSE::Vector {{{2
0204 template <typename Return, typename T>
0205 Vc_INTRINSIC Vc_CONST Return
0206 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0207           Scalar::Vector<T> x3,
0208           enable_if<std::is_same<Return, SSE::float_v>::value> = nullarg);
0209 template <typename Return, typename T>
0210 Vc_INTRINSIC Vc_CONST Return
0211 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0212           Scalar::Vector<T> x3,
0213           enable_if<std::is_same<Return, SSE::int_v>::value> = nullarg);
0214 template <typename Return, typename T>
0215 Vc_INTRINSIC Vc_CONST Return
0216 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0217           Scalar::Vector<T> x3,
0218           enable_if<std::is_same<Return, SSE::uint_v>::value> = nullarg);
0219 template <typename Return, typename T>
0220 Vc_INTRINSIC Vc_CONST Return
0221 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0222           Scalar::Vector<T> x3,
0223           enable_if<std::is_same<Return, SSE::short_v>::value> = nullarg);
0224 template <typename Return, typename T>
0225 Vc_INTRINSIC Vc_CONST Return
0226 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0227           Scalar::Vector<T> x3,
0228           enable_if<std::is_same<Return, SSE::ushort_v>::value> = nullarg);
0229 
0230 // 5 Scalar::Vector to 1 SSE::Vector {{{2
0231 template <typename Return, typename T>
0232 Vc_INTRINSIC Vc_CONST Return
0233 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0234           Scalar::Vector<T> x3, Scalar::Vector<T> x4,
0235           enable_if<std::is_same<Return, SSE::short_v>::value> = nullarg);
0236 template <typename Return, typename T>
0237 Vc_INTRINSIC Vc_CONST Return
0238 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0239           Scalar::Vector<T> x3, Scalar::Vector<T> x4,
0240           enable_if<std::is_same<Return, SSE::ushort_v>::value> = nullarg);
0241 
0242 // 6 Scalar::Vector to 1 SSE::Vector {{{2
0243 template <typename Return, typename T>
0244 Vc_INTRINSIC Vc_CONST Return
0245 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0246           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0247           enable_if<std::is_same<Return, SSE::short_v>::value> = nullarg);
0248 template <typename Return, typename T>
0249 Vc_INTRINSIC Vc_CONST Return
0250 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0251           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0252           enable_if<std::is_same<Return, SSE::ushort_v>::value> = nullarg);
0253 
0254 // 7 Scalar::Vector to 1 SSE::Vector {{{2
0255 template <typename Return, typename T>
0256 Vc_INTRINSIC Vc_CONST Return
0257 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0258           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0259           Scalar::Vector<T> x6,
0260           enable_if<std::is_same<Return, SSE::short_v>::value> = nullarg);
0261 template <typename Return, typename T>
0262 Vc_INTRINSIC Vc_CONST Return
0263 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0264           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0265           Scalar::Vector<T> x6,
0266           enable_if<std::is_same<Return, SSE::ushort_v>::value> = nullarg);
0267 
0268 // 8 Scalar::Vector to 1 SSE::Vector {{{2
0269 template <typename Return, typename T>
0270 Vc_INTRINSIC Vc_CONST Return
0271 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0272           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0273           Scalar::Vector<T> x6, Scalar::Vector<T> x7,
0274           enable_if<std::is_same<Return, SSE::short_v>::value> = nullarg);
0275 template <typename Return, typename T>
0276 Vc_INTRINSIC Vc_CONST Return
0277 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0278           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0279           Scalar::Vector<T> x6, Scalar::Vector<T> x7,
0280           enable_if<std::is_same<Return, SSE::ushort_v>::value> = nullarg);
0281 
0282 // SSE::Vector to Scalar::Vector {{{2
0283 template <typename To, typename FromT>
0284 Vc_INTRINSIC Vc_CONST To
0285 simd_cast(SSE::Vector<FromT> x, enable_if<Scalar::is_vector<To>::value> = nullarg);
0286 
0287 // helper macros Vc_SIMD_CAST_[1248] {{{1
0288 #undef Vc_SIMD_CAST_1
0289 #undef Vc_SIMD_CAST_2
0290 #undef Vc_SIMD_CAST_4
0291 #undef Vc_SIMD_CAST_8
0292 #define Vc_SIMD_CAST_1(from_, to_)                                                       \
0293     template <typename To>                                                               \
0294     Vc_INTRINSIC Vc_CONST To simd_cast(from_ x, enable_if<std::is_same<To, to_>::value>)
0295 
0296 #define Vc_SIMD_CAST_2(from_, to_)                                                       \
0297     template <typename To>                                                               \
0298     Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1,                               \
0299                                        enable_if<std::is_same<To, to_>::value>)
0300 
0301 #define Vc_SIMD_CAST_4(from_, to_)                                                       \
0302     template <typename To>                                                               \
0303     Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, from_ x3,           \
0304                                        enable_if<std::is_same<To, to_>::value>)
0305 
0306 #define Vc_SIMD_CAST_8(from_, to_)                                                       \
0307     template <typename To>                                                               \
0308     Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, \
0309                                        from_ x5, from_ x6, from_ x7,                     \
0310                                        enable_if<std::is_same<To, to_>::value>)
0311 
0312 // Vector casts without offset {{{1
0313 namespace SSE
0314 {
0315 // helper functions {{{2
0316 Vc_INTRINSIC __m128i convert_int32_to_int16(__m128i a, __m128i b)
0317 {
0318     auto tmp0 = _mm_unpacklo_epi16(a, b);        // 0 4 X X 1 5 X X
0319     auto tmp1 = _mm_unpackhi_epi16(a, b);        // 2 6 X X 3 7 X X
0320     auto tmp2 = _mm_unpacklo_epi16(tmp0, tmp1);  // 0 2 4 6 X X X X
0321     auto tmp3 = _mm_unpackhi_epi16(tmp0, tmp1);  // 1 3 5 7 X X X X
0322     return _mm_unpacklo_epi16(tmp2, tmp3);       // 0 1 2 3 4 5 6 7
0323 }
0324 
0325 // 1 SSE::Vector to 1 SSE::Vector {{{2
0326 // to int_v {{{3
0327 Vc_SIMD_CAST_1( float_v,    int_v) { return convert< float, int>(x.data()); }
0328 Vc_SIMD_CAST_1(double_v,    int_v) { return convert<double, int>(x.data()); }
0329 Vc_SIMD_CAST_1(  uint_v,    int_v) { return convert<  uint, int>(x.data()); }
0330 Vc_SIMD_CAST_1( short_v,    int_v) { return convert< short, int>(x.data()); }
0331 Vc_SIMD_CAST_1(ushort_v,    int_v) { return convert<ushort, int>(x.data()); }
0332 // to uint_v {{{3
0333 Vc_SIMD_CAST_1( float_v,   uint_v) { return convert< float, uint>(x.data()); }
0334 Vc_SIMD_CAST_1(double_v,   uint_v) { return convert<double, uint>(x.data()); }
0335 Vc_SIMD_CAST_1(   int_v,   uint_v) { return convert<   int, uint>(x.data()); }
0336 Vc_SIMD_CAST_1( short_v,   uint_v) { return convert< short, uint>(x.data()); }
0337 Vc_SIMD_CAST_1(ushort_v,   uint_v) { return convert<ushort, uint>(x.data()); }
0338 // to float_v {{{3
0339 Vc_SIMD_CAST_1(double_v,  float_v) { return convert<double, float>(x.data()); }
0340 Vc_SIMD_CAST_1(   int_v,  float_v) { return convert<   int, float>(x.data()); }
0341 Vc_SIMD_CAST_1(  uint_v,  float_v) { return convert<  uint, float>(x.data()); }
0342 Vc_SIMD_CAST_1( short_v,  float_v) { return convert< short, float>(x.data()); }
0343 Vc_SIMD_CAST_1(ushort_v,  float_v) { return convert<ushort, float>(x.data()); }
0344 // to double_v {{{3
0345 Vc_SIMD_CAST_1( float_v, double_v) { return convert< float, double>(x.data()); }
0346 Vc_SIMD_CAST_1(   int_v, double_v) { return convert<   int, double>(x.data()); }
0347 Vc_SIMD_CAST_1(  uint_v, double_v) { return convert<  uint, double>(x.data()); }
0348 Vc_SIMD_CAST_1( short_v, double_v) { return convert< short, double>(x.data()); }
0349 Vc_SIMD_CAST_1(ushort_v, double_v) { return convert<ushort, double>(x.data()); }
0350 // to short_v {{{3
0351 /*
0352  * §4.7 p3 (integral conversions)
0353  *  If the destination type is signed, the value is unchanged if it can be represented in the
0354  *  destination type (and bit-field width); otherwise, the value is implementation-defined.
0355  *
0356  * See also below for the Vc_SIMD_CAST_2
0357  *
0358  * the alternative, which is probably incorrect for all compilers out there:
0359     Vc_SIMD_CAST_1(   int_v,  short_v) { return _mm_packs_epi32(x.data(), _mm_setzero_si128()); }
0360     Vc_SIMD_CAST_1(  uint_v,  short_v) { return _mm_packs_epi32(x.data(), _mm_setzero_si128()); }
0361     Vc_SIMD_CAST_2(   int_v,  short_v) { return _mm_packs_epi32(x0.data(), x1.data()); }
0362     Vc_SIMD_CAST_2(  uint_v,  short_v) { return _mm_packs_epi32(x0.data(), x1.data()); }
0363  */
0364 Vc_SIMD_CAST_1(   int_v,  short_v) { return SSE::convert_int32_to_int16(x.data(), _mm_setzero_si128()); }
0365 Vc_SIMD_CAST_1(  uint_v,  short_v) { return SSE::convert_int32_to_int16(x.data(), _mm_setzero_si128()); }
0366 Vc_SIMD_CAST_1( float_v,  short_v) { return _mm_packs_epi32(simd_cast<SSE::int_v>(x).data(), _mm_setzero_si128()); }
0367 Vc_SIMD_CAST_1(double_v,  short_v) { return _mm_packs_epi32(simd_cast<SSE::int_v>(x).data(), _mm_setzero_si128()); }
0368 Vc_SIMD_CAST_1(ushort_v,  short_v) { return x.data(); }
0369 // to ushort_v {{{3
0370 Vc_SIMD_CAST_1(   int_v, ushort_v) { return SSE::convert_int32_to_int16(x.data(), _mm_setzero_si128()); }
0371 Vc_SIMD_CAST_1(  uint_v, ushort_v) { return SSE::convert_int32_to_int16(x.data(), _mm_setzero_si128()); }
0372 Vc_SIMD_CAST_1( float_v, ushort_v) { return simd_cast<SSE::ushort_v>(simd_cast<SSE::int_v>(x)); }
0373 Vc_SIMD_CAST_1(double_v, ushort_v) { return simd_cast<SSE::ushort_v>(simd_cast<SSE::int_v>(x)); }
0374 Vc_SIMD_CAST_1( short_v, ushort_v) { return x.data(); }
0375 // 2 SSE::Vector to 1 SSE::Vector {{{2
0376 Vc_SIMD_CAST_2(double_v,    int_v) {
0377 #ifdef Vc_IMPL_AVX
0378     return AVX::convert<double, int>(AVX::concat(x0.data(), x1.data()));
0379 #else
0380     return _mm_unpacklo_epi64(convert<double, int>(x0.data()), convert<double, int>(x1.data()));
0381 #endif
0382 }
0383 Vc_SIMD_CAST_2(double_v,   uint_v) {
0384 #ifdef Vc_IMPL_AVX
0385     return AVX::convert<double, uint>(AVX::concat(x0.data(), x1.data()));
0386 #else
0387     return _mm_unpacklo_epi64(convert<double, uint>(x0.data()), convert<double, uint>(x1.data()));
0388 #endif
0389 }
0390 Vc_SIMD_CAST_2(double_v,  float_v) {
0391 #ifdef Vc_IMPL_AVX
0392     return _mm256_cvtpd_ps(AVX::concat(x0.data(), x1.data()));
0393 #else
0394     return _mm_movelh_ps(_mm_cvtpd_ps(x0.data()), _mm_cvtpd_ps(x1.data()));
0395 #endif
0396 }
0397 
0398 Vc_SIMD_CAST_2(   int_v,  short_v) { return SSE::convert_int32_to_int16(x0.data(), x1.data()); }
0399 Vc_SIMD_CAST_2(  uint_v,  short_v) { return SSE::convert_int32_to_int16(x0.data(), x1.data()); }
0400 Vc_SIMD_CAST_2( float_v,  short_v) { return _mm_packs_epi32(simd_cast<SSE::int_v>(x0).data(), simd_cast<SSE::int_v>(x1).data()); }
0401 Vc_SIMD_CAST_2(double_v,  short_v) { return _mm_packs_epi32(simd_cast<SSE::int_v>(x0, x1).data(), _mm_setzero_si128()); }
0402 
0403 Vc_SIMD_CAST_2(   int_v, ushort_v) { return SSE::convert_int32_to_int16(x0.data(), x1.data()); }
0404 Vc_SIMD_CAST_2(  uint_v, ushort_v) { return SSE::convert_int32_to_int16(x0.data(), x1.data()); }
0405 Vc_SIMD_CAST_2( float_v, ushort_v) { return simd_cast<SSE::ushort_v>(simd_cast<SSE::int_v>(x0), simd_cast<SSE::int_v>(x1)); }
0406 Vc_SIMD_CAST_2(double_v, ushort_v) { return simd_cast<SSE::ushort_v>(simd_cast<SSE::int_v>(x0, x1)); }
0407 
0408 // 3 SSE::Vector to 1 SSE::Vector {{{2
0409 Vc_CAST_(short_v) simd_cast(double_v a, double_v b, double_v c)
0410 {
0411     return simd_cast<short_v>(simd_cast<int_v>(a, b), simd_cast<int_v>(c));
0412 }
0413 Vc_CAST_(ushort_v) simd_cast(double_v a, double_v b, double_v c)
0414 {
0415     return simd_cast<ushort_v>(simd_cast<int_v>(a, b), simd_cast<int_v>(c));
0416 }
0417 #undef Vc_CAST_
0418 
0419 // 4 SSE::Vector to 1 SSE::Vector {{{2
0420 Vc_SIMD_CAST_4(double_v,  short_v) { return _mm_packs_epi32(simd_cast<SSE::int_v>(x0, x1).data(), simd_cast<SSE::int_v>(x2, x3).data()); }
0421 Vc_SIMD_CAST_4(double_v, ushort_v) { return simd_cast<SSE::ushort_v>(simd_cast<SSE::int_v>(x0, x1), simd_cast<SSE::int_v>(x2, x3)); }
0422 }  // namespace SSE
0423 
0424 // 1 Scalar::Vector to 1 SSE::Vector {{{2
0425 template <typename Return, typename T>
0426 Vc_INTRINSIC Vc_CONST Return
0427     simd_cast(Scalar::Vector<T> x,
0428               enable_if<std::is_same<Return, SSE::double_v>::value> )
0429 {
0430     return _mm_setr_pd(x.data(), 0.);  // FIXME: use register-register mov
0431 }
0432 template <typename Return, typename T>
0433 Vc_INTRINSIC Vc_CONST Return
0434     simd_cast(Scalar::Vector<T> x,
0435               enable_if<std::is_same<Return, SSE::float_v>::value> )
0436 {
0437     return _mm_setr_ps(x.data(), 0.f, 0.f, 0.f);  // FIXME: use register-register mov
0438 }
0439 template <typename Return, typename T>
0440 Vc_INTRINSIC Vc_CONST Return
0441     simd_cast(Scalar::Vector<T> x,
0442               enable_if<std::is_same<Return, SSE::int_v>::value> )
0443 {
0444     return _mm_setr_epi32(x.data(), 0, 0, 0);  // FIXME: use register-register mov
0445 }
0446 template <typename Return, typename T>
0447 Vc_INTRINSIC Vc_CONST Return
0448     simd_cast(Scalar::Vector<T> x,
0449               enable_if<std::is_same<Return, SSE::uint_v>::value> )
0450 {
0451     return _mm_setr_epi32(uint(x.data()), 0, 0, 0);  // FIXME: use register-register mov
0452 }
0453 template <typename Return, typename T>
0454 Vc_INTRINSIC Vc_CONST Return
0455     simd_cast(Scalar::Vector<T> x,
0456               enable_if<std::is_same<Return, SSE::short_v>::value> )
0457 {
0458     return _mm_setr_epi16(
0459         x.data(), 0, 0, 0, 0, 0, 0, 0);  // FIXME: use register-register mov
0460 }
0461 template <typename Return, typename T>
0462 Vc_INTRINSIC Vc_CONST Return
0463     simd_cast(Scalar::Vector<T> x,
0464               enable_if<std::is_same<Return, SSE::ushort_v>::value> )
0465 {
0466     return _mm_setr_epi16(
0467         x.data(), 0, 0, 0, 0, 0, 0, 0);  // FIXME: use register-register mov
0468 }
0469 
0470 // 2 Scalar::Vector to 1 SSE::Vector {{{2
0471 template <typename Return, typename T>
0472 Vc_INTRINSIC Vc_CONST Return
0473     simd_cast(Scalar::Vector<T> x0,
0474               Scalar::Vector<T> x1,
0475               enable_if<std::is_same<Return, SSE::double_v>::value> )
0476 {
0477     return _mm_setr_pd(x0.data(), x1.data());  // FIXME: use register-register mov
0478 }
0479 template <typename Return, typename T>
0480 Vc_INTRINSIC Vc_CONST Return
0481     simd_cast(Scalar::Vector<T> x0,
0482               Scalar::Vector<T> x1,
0483               enable_if<std::is_same<Return, SSE::float_v>::value> )
0484 {
0485     return _mm_setr_ps(x0.data(), x1.data(), 0.f, 0.f);  // FIXME: use register-register mov
0486 }
0487 template <typename Return, typename T>
0488 Vc_INTRINSIC Vc_CONST Return
0489     simd_cast(Scalar::Vector<T> x0,
0490               Scalar::Vector<T> x1,
0491               enable_if<std::is_same<Return, SSE::int_v>::value> )
0492 {
0493     return _mm_setr_epi32(x0.data(), x1.data(), 0, 0);  // FIXME: use register-register mov
0494 }
0495 template <typename Return, typename T>
0496 Vc_INTRINSIC Vc_CONST Return
0497     simd_cast(Scalar::Vector<T> x0,
0498               Scalar::Vector<T> x1,
0499               enable_if<std::is_same<Return, SSE::uint_v>::value> )
0500 {
0501     return _mm_setr_epi32(uint(x0.data()), uint(x1.data()), 0,
0502                           0);  // FIXME: use register-register mov
0503 }
0504 template <typename Return, typename T>
0505 Vc_INTRINSIC Vc_CONST Return
0506     simd_cast(Scalar::Vector<T> x0,
0507               Scalar::Vector<T> x1,
0508               enable_if<std::is_same<Return, SSE::short_v>::value> )
0509 {
0510     return _mm_setr_epi16(
0511         x0.data(), x1.data(), 0, 0, 0, 0, 0, 0);  // FIXME: use register-register mov
0512 }
0513 template <typename Return, typename T>
0514 Vc_INTRINSIC Vc_CONST Return
0515     simd_cast(Scalar::Vector<T> x0,
0516               Scalar::Vector<T> x1,
0517               enable_if<std::is_same<Return, SSE::ushort_v>::value> )
0518 {
0519     return _mm_setr_epi16(
0520         x0.data(), x1.data(), 0, 0, 0, 0, 0, 0);  // FIXME: use register-register mov
0521 }
0522 
0523 // 3 Scalar::Vector to 1 SSE::Vector {{{2
0524 template <typename Return, typename T>
0525 Vc_INTRINSIC Vc_CONST Return
0526 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0527           enable_if<std::is_same<Return, SSE::float_v>::value>)
0528 {
0529     return _mm_setr_ps(x0.data(), x1.data(), x2.data(), 0.f);
0530 }
0531 template <typename Return, typename T>
0532 Vc_INTRINSIC Vc_CONST Return
0533 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0534           enable_if<std::is_same<Return, SSE::int_v>::value>)
0535 {
0536     return _mm_setr_epi32(x0.data(), x1.data(), x2.data(), 0);
0537 }
0538 template <typename Return, typename T>
0539 Vc_INTRINSIC Vc_CONST Return
0540 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0541           enable_if<std::is_same<Return, SSE::uint_v>::value>)
0542 {
0543     return _mm_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()),
0544                           0);  // FIXME: use register-register mov
0545 }
0546 template <typename Return, typename T>
0547 Vc_INTRINSIC Vc_CONST Return
0548 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0549           enable_if<std::is_same<Return, SSE::short_v>::value>)
0550 {
0551     return _mm_setr_epi16(x0.data(), x1.data(), x2.data(), 0, 0, 0, 0, 0);
0552 }
0553 template <typename Return, typename T>
0554 Vc_INTRINSIC Vc_CONST Return
0555 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0556           enable_if<std::is_same<Return, SSE::ushort_v>::value>)
0557 {
0558     return _mm_setr_epi16(x0.data(), x1.data(), x2.data(), 0, 0, 0, 0, 0);
0559 }
0560 
0561 // 4 Scalar::Vector to 1 SSE::Vector {{{2
0562 template <typename Return, typename T>
0563 Vc_INTRINSIC Vc_CONST Return
0564     simd_cast(Scalar::Vector<T> x0,
0565               Scalar::Vector<T> x1,
0566               Scalar::Vector<T> x2,
0567               Scalar::Vector<T> x3,
0568               enable_if<std::is_same<Return, SSE::float_v>::value> )
0569 {
0570     return _mm_setr_ps(
0571         x0.data(), x1.data(), x2.data(), x3.data());  // FIXME: use register-register mov
0572 }
0573 template <typename Return, typename T>
0574 Vc_INTRINSIC Vc_CONST Return
0575     simd_cast(Scalar::Vector<T> x0,
0576               Scalar::Vector<T> x1,
0577               Scalar::Vector<T> x2,
0578               Scalar::Vector<T> x3,
0579               enable_if<std::is_same<Return, SSE::int_v>::value> )
0580 {
0581     return _mm_setr_epi32(
0582         x0.data(), x1.data(), x2.data(), x3.data());  // FIXME: use register-register mov
0583 }
0584 template <typename Return, typename T>
0585 Vc_INTRINSIC Vc_CONST Return
0586     simd_cast(Scalar::Vector<T> x0,
0587               Scalar::Vector<T> x1,
0588               Scalar::Vector<T> x2,
0589               Scalar::Vector<T> x3,
0590               enable_if<std::is_same<Return, SSE::uint_v>::value> )
0591 {
0592     return _mm_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()),
0593                           uint(x3.data()));  // FIXME: use register-register mov
0594 }
0595 template <typename Return, typename T>
0596 Vc_INTRINSIC Vc_CONST Return
0597     simd_cast(Scalar::Vector<T> x0,
0598               Scalar::Vector<T> x1,
0599               Scalar::Vector<T> x2,
0600               Scalar::Vector<T> x3,
0601               enable_if<std::is_same<Return, SSE::short_v>::value> )
0602 {
0603     return _mm_setr_epi16(
0604         x0.data(), x1.data(), x2.data(), x3.data(), 0, 0, 0, 0);  // FIXME: use register-register mov
0605 }
0606 template <typename Return, typename T>
0607 Vc_INTRINSIC Vc_CONST Return
0608     simd_cast(Scalar::Vector<T> x0,
0609               Scalar::Vector<T> x1,
0610               Scalar::Vector<T> x2,
0611               Scalar::Vector<T> x3,
0612               enable_if<std::is_same<Return, SSE::ushort_v>::value> )
0613 {
0614     return _mm_setr_epi16(
0615         x0.data(), x1.data(), x2.data(), x3.data(), 0, 0, 0, 0);  // FIXME: use register-register mov
0616 }
0617 
0618 // 5 Scalar::Vector to 1 SSE::Vector {{{2
0619 template <typename Return, typename T>
0620 Vc_INTRINSIC Vc_CONST Return
0621 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0622           Scalar::Vector<T> x3, Scalar::Vector<T> x4,
0623           enable_if<std::is_same<Return, SSE::short_v>::value>)
0624 {
0625     return _mm_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), 0, 0, 0);
0626 }
0627 template <typename Return, typename T>
0628 Vc_INTRINSIC Vc_CONST Return
0629 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0630           Scalar::Vector<T> x3, Scalar::Vector<T> x4,
0631           enable_if<std::is_same<Return, SSE::ushort_v>::value>)
0632 {
0633     return _mm_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), 0, 0, 0);
0634 }
0635 
0636 // 6 Scalar::Vector to 1 SSE::Vector {{{2
0637 template <typename Return, typename T>
0638 Vc_INTRINSIC Vc_CONST Return
0639 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0640           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0641           enable_if<std::is_same<Return, SSE::short_v>::value>)
0642 {
0643     return _mm_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
0644                           x5.data(), 0, 0);
0645 }
0646 template <typename Return, typename T>
0647 Vc_INTRINSIC Vc_CONST Return
0648 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0649           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0650           enable_if<std::is_same<Return, SSE::ushort_v>::value>)
0651 {
0652     return _mm_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
0653                           x5.data(), 0, 0);
0654 }
0655 
0656 // 7 Scalar::Vector to 1 SSE::Vector {{{2
0657 template <typename Return, typename T>
0658 Vc_INTRINSIC Vc_CONST Return
0659 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0660           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0661           Scalar::Vector<T> x6, enable_if<std::is_same<Return, SSE::short_v>::value>)
0662 {
0663     return _mm_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
0664                           x5.data(), x6.data(), 0);
0665 }
0666 template <typename Return, typename T>
0667 Vc_INTRINSIC Vc_CONST Return
0668 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0669           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0670           Scalar::Vector<T> x6, enable_if<std::is_same<Return, SSE::ushort_v>::value>)
0671 {
0672     return _mm_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
0673                           x5.data(), x6.data(), 0);
0674 }
0675 
0676 // 8 Scalar::Vector to 1 SSE::Vector {{{2
0677 template <typename Return, typename T>
0678 Vc_INTRINSIC Vc_CONST Return
0679     simd_cast(Scalar::Vector<T> x0,
0680               Scalar::Vector<T> x1,
0681               Scalar::Vector<T> x2,
0682               Scalar::Vector<T> x3,
0683               Scalar::Vector<T> x4,
0684               Scalar::Vector<T> x5,
0685               Scalar::Vector<T> x6,
0686               Scalar::Vector<T> x7,
0687               enable_if<std::is_same<Return, SSE::short_v>::value> )
0688 {
0689     return _mm_setr_epi16(x0.data(),
0690                           x1.data(),
0691                           x2.data(),
0692                           x3.data(),
0693                           x4.data(),
0694                           x5.data(),
0695                           x6.data(),
0696                           x7.data());  // FIXME: use register-register mov
0697 }
0698 template <typename Return, typename T>
0699 Vc_INTRINSIC Vc_CONST Return
0700     simd_cast(Scalar::Vector<T> x0,
0701               Scalar::Vector<T> x1,
0702               Scalar::Vector<T> x2,
0703               Scalar::Vector<T> x3,
0704               Scalar::Vector<T> x4,
0705               Scalar::Vector<T> x5,
0706               Scalar::Vector<T> x6,
0707               Scalar::Vector<T> x7,
0708               enable_if<std::is_same<Return, SSE::ushort_v>::value> )
0709 {
0710     return _mm_setr_epi16(x0.data(),
0711                           x1.data(),
0712                           x2.data(),
0713                           x3.data(),
0714                           x4.data(),
0715                           x5.data(),
0716                           x6.data(),
0717                           x7.data());  // FIXME: use register-register mov
0718 }
0719 
0720 // SSE::Vector to Scalar::Vector {{{2
0721 template <typename To, typename FromT>
0722 Vc_INTRINSIC Vc_CONST To
0723     simd_cast(SSE::Vector<FromT> x, enable_if<Scalar::is_vector<To>::value> )
0724 {
0725     return static_cast<To>(x[0]);
0726 }
0727 
0728 // Mask casts without offset {{{1
0729 // 1 SSE Mask to 1 SSE Mask {{{2
0730 template <typename Return, typename T>
0731 Vc_INTRINSIC Vc_CONST Return
0732     simd_cast(SSE::Mask<T> x, enable_if<SSE::is_mask<Return>::value> = nullarg)
0733 {
0734     using M = SSE::Mask<T>;
0735     return {Detail::mask_cast<M::Size, Return::Size, __m128>(x.dataI())};
0736 }
0737 // 2 SSE Masks to 1 SSE Mask {{{2
0738 template <typename Return, typename T>
0739 Vc_INTRINSIC Vc_CONST Return simd_cast(
0740     SSE::Mask<T> x0,
0741     SSE::Mask<T> x1,
0742     enable_if<SSE::is_mask<Return>::value && Mask<T, VectorAbi::Sse>::Size * 2 == Return::Size> = nullarg)
0743 {
0744     return SSE::sse_cast<__m128>(_mm_packs_epi16(x0.dataI(), x1.dataI()));
0745 }
0746 template <typename Return, typename T>
0747 Vc_INTRINSIC Vc_CONST Return simd_cast(
0748     SSE::Mask<T> x0,
0749     SSE::Mask<T> x1,
0750     enable_if<SSE::is_mask<Return>::value && Mask<T, VectorAbi::Sse>::Size * 4 == Return::Size> = nullarg)
0751 {
0752     return SSE::sse_cast<__m128>(
0753         _mm_packs_epi16(_mm_packs_epi16(x0.dataI(), x1.dataI()), _mm_setzero_si128()));
0754 }
0755 // 4 SSE Masks to 1 SSE Mask {{{2
0756 template <typename Return, typename T>
0757 Vc_INTRINSIC Vc_CONST Return simd_cast(
0758     SSE::Mask<T> x0,
0759     SSE::Mask<T> x1,
0760     SSE::Mask<T> x2,
0761     SSE::Mask<T> x3,
0762     enable_if<SSE::is_mask<Return>::value && Mask<T, VectorAbi::Sse>::Size * 4 == Return::Size> = nullarg)
0763 {
0764     return SSE::sse_cast<__m128>(_mm_packs_epi16(_mm_packs_epi16(x0.dataI(), x1.dataI()),
0765                                                  _mm_packs_epi16(x2.dataI(), x3.dataI())));
0766 }
0767 
0768 // 1 Scalar Mask to 1 SSE Mask {{{2
0769 template <typename Return, typename T>
0770 Vc_INTRINSIC Vc_CONST Return
0771     simd_cast(Scalar::Mask<T> x, enable_if<SSE::is_mask<Return>::value> = nullarg)
0772 {
0773     Return m(false);
0774     m[0] = x[0];
0775     return m;
0776 }
0777 // 2 Scalar Masks to 1 SSE Mask {{{2
0778 template <typename Return, typename T>
0779 Vc_INTRINSIC Vc_CONST Return
0780     simd_cast(Scalar::Mask<T> x0, Scalar::Mask<T> x1, enable_if<SSE::is_mask<Return>::value> = nullarg)
0781 {
0782     Return m(false);
0783     m[0] = x0[0];
0784     m[1] = x1[0];
0785     return m;
0786 }
0787 // 4 Scalar Masks to 1 SSE Mask {{{2
0788 template <typename Return, typename T>
0789 Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Mask<T> x0,
0790                                        Scalar::Mask<T> x1,
0791                                        Scalar::Mask<T> x2,
0792                                        Scalar::Mask<T> x3,
0793                                        enable_if<SSE::is_mask<Return>::value> = nullarg)
0794 {
0795     Return m(false);
0796     m[0] = x0[0];
0797     m[1] = x1[0];
0798     if (Return::Size >= 4) {
0799         m[2] = x2[0];
0800         m[3] = x3[0];
0801     }
0802     return m;
0803 }
0804 // 8 Scalar Masks to 1 SSE Mask {{{2
0805 template <typename Return, typename T>
0806 Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Mask<T> x0,
0807                                        Scalar::Mask<T> x1,
0808                                        Scalar::Mask<T> x2,
0809                                        Scalar::Mask<T> x3,
0810                                        Scalar::Mask<T> x4,
0811                                        Scalar::Mask<T> x5,
0812                                        Scalar::Mask<T> x6,
0813                                        Scalar::Mask<T> x7,
0814                                        enable_if<SSE::is_mask<Return>::value> = nullarg)
0815 {
0816     Return m(false);
0817     m[0] = x0[0];
0818     m[1] = x1[0];
0819     if (Return::Size >= 4) {
0820         m[2] = x2[0];
0821         m[3] = x3[0];
0822     }
0823     if (Return::Size >= 8) {
0824         m[4] = x4[0];
0825         m[5] = x5[0];
0826         m[6] = x6[0];
0827         m[7] = x7[0];
0828     }
0829     return m;
0830 }
0831 
0832 // 1 SSE::Mask to 1 Scalar::Mask {{{2
0833 template <typename To, typename FromT>
0834 Vc_INTRINSIC Vc_CONST To
0835     simd_cast(SSE::Mask<FromT> x, enable_if<Scalar::is_mask<To>::value> = nullarg)
0836 {
0837     return static_cast<To>(x[0]);
0838 }
0839 // offset == 0 | convert from SSE::Mask/Vector to SSE::Mask/Vector {{{1
0840 template <typename Return, int offset, typename V>
0841 Vc_INTRINSIC Vc_CONST Return
0842     simd_cast(V &&x, enable_if<offset == 0 && ((SSE::is_vector<Traits::decay<V>>::value &&
0843                                                 SSE::is_vector<Return>::value) ||
0844                                                (SSE::is_mask<Traits::decay<V>>::value &&
0845                                                 SSE::is_mask<Return>::value))> = nullarg)
0846 {
0847     return simd_cast<Return>(x);
0848 }
0849 
0850 template <typename Return, int offset, typename V>
0851 Vc_INTRINSIC Vc_CONST Return
0852     simd_cast(V &&x,
0853               enable_if<offset == 0 && ((Scalar::is_vector<Traits::decay<V>>::value &&
0854                                          SSE::is_vector<Return>::value) ||
0855                                         (Scalar::is_mask<Traits::decay<V>>::value &&
0856                                          SSE::is_mask<Return>::value))> = nullarg)
0857 {
0858     return simd_cast<Return>(x);
0859 }
0860 
0861 // Vector casts with offset {{{1
0862 // SSE to SSE (Vector) {{{2
0863 template <typename Return, int offset, typename V>
0864 Vc_INTRINSIC Vc_CONST Return simd_cast(
0865     V x,
0866     enable_if<offset != 0 && (SSE::is_vector<Return>::value && SSE::is_vector<V>::value)> = nullarg)
0867 {
0868     constexpr int shift = (sizeof(V) / V::Size) * offset * Return::Size;
0869     static_assert(shift > 0 && shift < 16, "");
0870     return simd_cast<Return>(V{SSE::sse_cast<typename V::VectorType>(
0871         _mm_srli_si128(SSE::sse_cast<__m128i>(x.data()), shift & 0xff))});
0872 }
0873 
0874 // SSE to Scalar (Vector) {{{2
0875 template <typename Return, int offset, typename T>
0876 Vc_INTRINSIC Vc_CONST Return
0877     simd_cast(SSE::Vector<T> x,
0878               enable_if<offset != 0 && Scalar::is_vector<Return>::value> = nullarg)
0879 {
0880     return static_cast<typename Return::EntryType>(x[offset]);
0881 }
0882 
0883 // Mask casts with offset {{{1
0884 // SSE to SSE (Mask)
0885 template <typename Return, int offset, typename V>
0886 Vc_INTRINSIC Vc_CONST Return simd_cast(
0887     V x,
0888     enable_if<offset != 0 && (SSE::is_mask<Return>::value && SSE::is_mask<V>::value)> = nullarg)
0889 {
0890     constexpr int shift = (sizeof(V) / V::Size) * offset * Return::Size;
0891     static_assert(shift > 0 && shift < 16, "");
0892     return simd_cast<Return>(V{SSE::sse_cast<typename V::VectorType>(
0893         _mm_srli_si128(SSE::sse_cast<__m128i>(x.data()), shift & 0xff))});
0894 }
0895 
0896 // undef Vc_SIMD_CAST_[1248] {{{1
0897 #undef Vc_SIMD_CAST_1
0898 #undef Vc_SIMD_CAST_2
0899 #undef Vc_SIMD_CAST_4
0900 #undef Vc_SIMD_CAST_8
0901 // }}}1
0902 
0903 }  // namespace Vc
0904 
0905 #endif // VC_SSE_SIMD_CAST_H_
0906 
0907 // vim: foldmethod=marker