File indexing completed on 2025-01-31 10:25:45
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028 #ifndef VC_SSE_SIMD_CAST_H_
0029 #define VC_SSE_SIMD_CAST_H_
0030
0031 #include "../common/utility.h"
0032 #ifdef Vc_IMPL_AVX
0033 #include "../avx/casts.h"
0034 #endif
0035
0036 #ifndef VC_SSE_VECTOR_H_
0037 #error "Vc/sse/vector.h needs to be included before Vc/sse/simd_cast.h"
0038 #endif
0039 #include "macros.h"
0040
0041 namespace Vc_VERSIONED_NAMESPACE
0042 {
0043 namespace SSE
0044 {
0045
0046
0047 #define Vc_SIMD_CAST_1(from_, to_) \
0048 template <typename To> \
0049 Vc_INTRINSIC Vc_CONST To simd_cast( \
0050 from_ x, enable_if<std::is_same<To, to_>::value> = nullarg)
0051
0052 #define Vc_SIMD_CAST_2(from_, to_) \
0053 template <typename To> \
0054 Vc_INTRINSIC Vc_CONST To simd_cast( \
0055 from_ x0, from_ x1, enable_if<std::is_same<To, to_>::value> = nullarg)
0056
0057 #define Vc_SIMD_CAST_4(from_, to_) \
0058 template <typename To> \
0059 Vc_INTRINSIC Vc_CONST To simd_cast( \
0060 from_ x0, from_ x1, from_ x2, from_ x3, \
0061 enable_if<std::is_same<To, to_>::value> = nullarg)
0062
0063 #define Vc_SIMD_CAST_8(from_, to_) \
0064 template <typename To> \
0065 Vc_INTRINSIC Vc_CONST To simd_cast( \
0066 from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, from_ x5, from_ x6, from_ x7, \
0067 enable_if<std::is_same<To, to_>::value> = nullarg)
0068
0069
0070
0071 Vc_SIMD_CAST_1( float_v, int_v);
0072 Vc_SIMD_CAST_1(double_v, int_v);
0073 Vc_SIMD_CAST_1( uint_v, int_v);
0074 Vc_SIMD_CAST_1( short_v, int_v);
0075 Vc_SIMD_CAST_1(ushort_v, int_v);
0076 Vc_SIMD_CAST_1( float_v, uint_v);
0077 Vc_SIMD_CAST_1(double_v, uint_v);
0078 Vc_SIMD_CAST_1( int_v, uint_v);
0079 Vc_SIMD_CAST_1( short_v, uint_v);
0080 Vc_SIMD_CAST_1(ushort_v, uint_v);
0081 Vc_SIMD_CAST_1(double_v, float_v);
0082 Vc_SIMD_CAST_1( int_v, float_v);
0083 Vc_SIMD_CAST_1( uint_v, float_v);
0084 Vc_SIMD_CAST_1( short_v, float_v);
0085 Vc_SIMD_CAST_1(ushort_v, float_v);
0086 Vc_SIMD_CAST_1( float_v, double_v);
0087 Vc_SIMD_CAST_1( int_v, double_v);
0088 Vc_SIMD_CAST_1( uint_v, double_v);
0089 Vc_SIMD_CAST_1( short_v, double_v);
0090 Vc_SIMD_CAST_1(ushort_v, double_v);
0091 Vc_SIMD_CAST_1( int_v, short_v);
0092 Vc_SIMD_CAST_1( uint_v, short_v);
0093 Vc_SIMD_CAST_1( float_v, short_v);
0094 Vc_SIMD_CAST_1(double_v, short_v);
0095 Vc_SIMD_CAST_1(ushort_v, short_v);
0096 Vc_SIMD_CAST_1( int_v, ushort_v);
0097 Vc_SIMD_CAST_1( uint_v, ushort_v);
0098 Vc_SIMD_CAST_1( float_v, ushort_v);
0099 Vc_SIMD_CAST_1(double_v, ushort_v);
0100 Vc_SIMD_CAST_1( short_v, ushort_v);
0101
0102
0103 Vc_SIMD_CAST_2(double_v, int_v);
0104 Vc_SIMD_CAST_2(double_v, uint_v);
0105 Vc_SIMD_CAST_2(double_v, float_v);
0106 Vc_SIMD_CAST_2( int_v, short_v);
0107 Vc_SIMD_CAST_2( uint_v, short_v);
0108 Vc_SIMD_CAST_2( float_v, short_v);
0109 Vc_SIMD_CAST_2(double_v, short_v);
0110 Vc_SIMD_CAST_2( int_v, ushort_v);
0111 Vc_SIMD_CAST_2( uint_v, ushort_v);
0112 Vc_SIMD_CAST_2( float_v, ushort_v);
0113 Vc_SIMD_CAST_2(double_v, ushort_v);
0114
0115
0116 #define Vc_CAST_(To_) \
0117 template <typename Return> \
0118 Vc_INTRINSIC Vc_CONST enable_if<std::is_same<Return, To_>::value, Return>
0119 Vc_CAST_(short_v) simd_cast(double_v a, double_v b, double_v c);
0120 Vc_CAST_(ushort_v) simd_cast(double_v a, double_v b, double_v c);
0121
0122
0123 Vc_SIMD_CAST_4(double_v, short_v);
0124 Vc_SIMD_CAST_4(double_v, ushort_v);
0125
0126 }
0127 using SSE::simd_cast;
0128
0129
0130 template <typename Return, typename T>
0131 Vc_INTRINSIC Vc_CONST Return
0132 simd_cast(Scalar::Vector<T> x,
0133 enable_if<std::is_same<Return, SSE::double_v>::value> = nullarg);
0134 template <typename Return, typename T>
0135 Vc_INTRINSIC Vc_CONST Return
0136 simd_cast(Scalar::Vector<T> x,
0137 enable_if<std::is_same<Return, SSE::float_v>::value> = nullarg);
0138 template <typename Return, typename T>
0139 Vc_INTRINSIC Vc_CONST Return
0140 simd_cast(Scalar::Vector<T> x,
0141 enable_if<std::is_same<Return, SSE::int_v>::value> = nullarg);
0142 template <typename Return, typename T>
0143 Vc_INTRINSIC Vc_CONST Return
0144 simd_cast(Scalar::Vector<T> x,
0145 enable_if<std::is_same<Return, SSE::uint_v>::value> = nullarg);
0146 template <typename Return, typename T>
0147 Vc_INTRINSIC Vc_CONST Return
0148 simd_cast(Scalar::Vector<T> x,
0149 enable_if<std::is_same<Return, SSE::short_v>::value> = nullarg);
0150 template <typename Return, typename T>
0151 Vc_INTRINSIC Vc_CONST Return
0152 simd_cast(Scalar::Vector<T> x,
0153 enable_if<std::is_same<Return, SSE::ushort_v>::value> = nullarg);
0154
0155
0156 template <typename Return, typename T>
0157 Vc_INTRINSIC Vc_CONST Return
0158 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0159 enable_if<std::is_same<Return, SSE::double_v>::value> = nullarg);
0160 template <typename Return, typename T>
0161 Vc_INTRINSIC Vc_CONST Return
0162 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0163 enable_if<std::is_same<Return, SSE::float_v>::value> = nullarg);
0164 template <typename Return, typename T>
0165 Vc_INTRINSIC Vc_CONST Return
0166 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0167 enable_if<std::is_same<Return, SSE::int_v>::value> = nullarg);
0168 template <typename Return, typename T>
0169 Vc_INTRINSIC Vc_CONST Return
0170 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0171 enable_if<std::is_same<Return, SSE::uint_v>::value> = nullarg);
0172 template <typename Return, typename T>
0173 Vc_INTRINSIC Vc_CONST Return
0174 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0175 enable_if<std::is_same<Return, SSE::short_v>::value> = nullarg);
0176 template <typename Return, typename T>
0177 Vc_INTRINSIC Vc_CONST Return
0178 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0179 enable_if<std::is_same<Return, SSE::ushort_v>::value> = nullarg);
0180
0181
0182 template <typename Return, typename T>
0183 Vc_INTRINSIC Vc_CONST Return
0184 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x3,
0185 enable_if<std::is_same<Return, SSE::float_v>::value> = nullarg);
0186 template <typename Return, typename T>
0187 Vc_INTRINSIC Vc_CONST Return
0188 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x3,
0189 enable_if<std::is_same<Return, SSE::int_v>::value> = nullarg);
0190 template <typename Return, typename T>
0191 Vc_INTRINSIC Vc_CONST Return
0192 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x3,
0193 enable_if<std::is_same<Return, SSE::uint_v>::value> = nullarg);
0194 template <typename Return, typename T>
0195 Vc_INTRINSIC Vc_CONST Return
0196 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x3,
0197 enable_if<std::is_same<Return, SSE::short_v>::value> = nullarg);
0198 template <typename Return, typename T>
0199 Vc_INTRINSIC Vc_CONST Return
0200 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x3,
0201 enable_if<std::is_same<Return, SSE::ushort_v>::value> = nullarg);
0202
0203
0204 template <typename Return, typename T>
0205 Vc_INTRINSIC Vc_CONST Return
0206 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0207 Scalar::Vector<T> x3,
0208 enable_if<std::is_same<Return, SSE::float_v>::value> = nullarg);
0209 template <typename Return, typename T>
0210 Vc_INTRINSIC Vc_CONST Return
0211 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0212 Scalar::Vector<T> x3,
0213 enable_if<std::is_same<Return, SSE::int_v>::value> = nullarg);
0214 template <typename Return, typename T>
0215 Vc_INTRINSIC Vc_CONST Return
0216 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0217 Scalar::Vector<T> x3,
0218 enable_if<std::is_same<Return, SSE::uint_v>::value> = nullarg);
0219 template <typename Return, typename T>
0220 Vc_INTRINSIC Vc_CONST Return
0221 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0222 Scalar::Vector<T> x3,
0223 enable_if<std::is_same<Return, SSE::short_v>::value> = nullarg);
0224 template <typename Return, typename T>
0225 Vc_INTRINSIC Vc_CONST Return
0226 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0227 Scalar::Vector<T> x3,
0228 enable_if<std::is_same<Return, SSE::ushort_v>::value> = nullarg);
0229
0230
0231 template <typename Return, typename T>
0232 Vc_INTRINSIC Vc_CONST Return
0233 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0234 Scalar::Vector<T> x3, Scalar::Vector<T> x4,
0235 enable_if<std::is_same<Return, SSE::short_v>::value> = nullarg);
0236 template <typename Return, typename T>
0237 Vc_INTRINSIC Vc_CONST Return
0238 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0239 Scalar::Vector<T> x3, Scalar::Vector<T> x4,
0240 enable_if<std::is_same<Return, SSE::ushort_v>::value> = nullarg);
0241
0242
0243 template <typename Return, typename T>
0244 Vc_INTRINSIC Vc_CONST Return
0245 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0246 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0247 enable_if<std::is_same<Return, SSE::short_v>::value> = nullarg);
0248 template <typename Return, typename T>
0249 Vc_INTRINSIC Vc_CONST Return
0250 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0251 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0252 enable_if<std::is_same<Return, SSE::ushort_v>::value> = nullarg);
0253
0254
0255 template <typename Return, typename T>
0256 Vc_INTRINSIC Vc_CONST Return
0257 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0258 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0259 Scalar::Vector<T> x6,
0260 enable_if<std::is_same<Return, SSE::short_v>::value> = nullarg);
0261 template <typename Return, typename T>
0262 Vc_INTRINSIC Vc_CONST Return
0263 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0264 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0265 Scalar::Vector<T> x6,
0266 enable_if<std::is_same<Return, SSE::ushort_v>::value> = nullarg);
0267
0268
0269 template <typename Return, typename T>
0270 Vc_INTRINSIC Vc_CONST Return
0271 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0272 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0273 Scalar::Vector<T> x6, Scalar::Vector<T> x7,
0274 enable_if<std::is_same<Return, SSE::short_v>::value> = nullarg);
0275 template <typename Return, typename T>
0276 Vc_INTRINSIC Vc_CONST Return
0277 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0278 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0279 Scalar::Vector<T> x6, Scalar::Vector<T> x7,
0280 enable_if<std::is_same<Return, SSE::ushort_v>::value> = nullarg);
0281
0282
0283 template <typename To, typename FromT>
0284 Vc_INTRINSIC Vc_CONST To
0285 simd_cast(SSE::Vector<FromT> x, enable_if<Scalar::is_vector<To>::value> = nullarg);
0286
0287
0288 #undef Vc_SIMD_CAST_1
0289 #undef Vc_SIMD_CAST_2
0290 #undef Vc_SIMD_CAST_4
0291 #undef Vc_SIMD_CAST_8
0292 #define Vc_SIMD_CAST_1(from_, to_) \
0293 template <typename To> \
0294 Vc_INTRINSIC Vc_CONST To simd_cast(from_ x, enable_if<std::is_same<To, to_>::value>)
0295
0296 #define Vc_SIMD_CAST_2(from_, to_) \
0297 template <typename To> \
0298 Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, \
0299 enable_if<std::is_same<To, to_>::value>)
0300
0301 #define Vc_SIMD_CAST_4(from_, to_) \
0302 template <typename To> \
0303 Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, from_ x3, \
0304 enable_if<std::is_same<To, to_>::value>)
0305
0306 #define Vc_SIMD_CAST_8(from_, to_) \
0307 template <typename To> \
0308 Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, \
0309 from_ x5, from_ x6, from_ x7, \
0310 enable_if<std::is_same<To, to_>::value>)
0311
0312
0313 namespace SSE
0314 {
0315
0316 Vc_INTRINSIC __m128i convert_int32_to_int16(__m128i a, __m128i b)
0317 {
0318 auto tmp0 = _mm_unpacklo_epi16(a, b);
0319 auto tmp1 = _mm_unpackhi_epi16(a, b);
0320 auto tmp2 = _mm_unpacklo_epi16(tmp0, tmp1);
0321 auto tmp3 = _mm_unpackhi_epi16(tmp0, tmp1);
0322 return _mm_unpacklo_epi16(tmp2, tmp3);
0323 }
0324
0325
0326
0327 Vc_SIMD_CAST_1( float_v, int_v) { return convert< float, int>(x.data()); }
0328 Vc_SIMD_CAST_1(double_v, int_v) { return convert<double, int>(x.data()); }
0329 Vc_SIMD_CAST_1( uint_v, int_v) { return convert< uint, int>(x.data()); }
0330 Vc_SIMD_CAST_1( short_v, int_v) { return convert< short, int>(x.data()); }
0331 Vc_SIMD_CAST_1(ushort_v, int_v) { return convert<ushort, int>(x.data()); }
0332
0333 Vc_SIMD_CAST_1( float_v, uint_v) { return convert< float, uint>(x.data()); }
0334 Vc_SIMD_CAST_1(double_v, uint_v) { return convert<double, uint>(x.data()); }
0335 Vc_SIMD_CAST_1( int_v, uint_v) { return convert< int, uint>(x.data()); }
0336 Vc_SIMD_CAST_1( short_v, uint_v) { return convert< short, uint>(x.data()); }
0337 Vc_SIMD_CAST_1(ushort_v, uint_v) { return convert<ushort, uint>(x.data()); }
0338
0339 Vc_SIMD_CAST_1(double_v, float_v) { return convert<double, float>(x.data()); }
0340 Vc_SIMD_CAST_1( int_v, float_v) { return convert< int, float>(x.data()); }
0341 Vc_SIMD_CAST_1( uint_v, float_v) { return convert< uint, float>(x.data()); }
0342 Vc_SIMD_CAST_1( short_v, float_v) { return convert< short, float>(x.data()); }
0343 Vc_SIMD_CAST_1(ushort_v, float_v) { return convert<ushort, float>(x.data()); }
0344
0345 Vc_SIMD_CAST_1( float_v, double_v) { return convert< float, double>(x.data()); }
0346 Vc_SIMD_CAST_1( int_v, double_v) { return convert< int, double>(x.data()); }
0347 Vc_SIMD_CAST_1( uint_v, double_v) { return convert< uint, double>(x.data()); }
0348 Vc_SIMD_CAST_1( short_v, double_v) { return convert< short, double>(x.data()); }
0349 Vc_SIMD_CAST_1(ushort_v, double_v) { return convert<ushort, double>(x.data()); }
0350
0351
0352
0353
0354
0355
0356
0357
0358
0359
0360
0361
0362
0363
0364 Vc_SIMD_CAST_1( int_v, short_v) { return SSE::convert_int32_to_int16(x.data(), _mm_setzero_si128()); }
0365 Vc_SIMD_CAST_1( uint_v, short_v) { return SSE::convert_int32_to_int16(x.data(), _mm_setzero_si128()); }
0366 Vc_SIMD_CAST_1( float_v, short_v) { return _mm_packs_epi32(simd_cast<SSE::int_v>(x).data(), _mm_setzero_si128()); }
0367 Vc_SIMD_CAST_1(double_v, short_v) { return _mm_packs_epi32(simd_cast<SSE::int_v>(x).data(), _mm_setzero_si128()); }
0368 Vc_SIMD_CAST_1(ushort_v, short_v) { return x.data(); }
0369
0370 Vc_SIMD_CAST_1( int_v, ushort_v) { return SSE::convert_int32_to_int16(x.data(), _mm_setzero_si128()); }
0371 Vc_SIMD_CAST_1( uint_v, ushort_v) { return SSE::convert_int32_to_int16(x.data(), _mm_setzero_si128()); }
0372 Vc_SIMD_CAST_1( float_v, ushort_v) { return simd_cast<SSE::ushort_v>(simd_cast<SSE::int_v>(x)); }
0373 Vc_SIMD_CAST_1(double_v, ushort_v) { return simd_cast<SSE::ushort_v>(simd_cast<SSE::int_v>(x)); }
0374 Vc_SIMD_CAST_1( short_v, ushort_v) { return x.data(); }
0375
0376 Vc_SIMD_CAST_2(double_v, int_v) {
0377 #ifdef Vc_IMPL_AVX
0378 return AVX::convert<double, int>(AVX::concat(x0.data(), x1.data()));
0379 #else
0380 return _mm_unpacklo_epi64(convert<double, int>(x0.data()), convert<double, int>(x1.data()));
0381 #endif
0382 }
0383 Vc_SIMD_CAST_2(double_v, uint_v) {
0384 #ifdef Vc_IMPL_AVX
0385 return AVX::convert<double, uint>(AVX::concat(x0.data(), x1.data()));
0386 #else
0387 return _mm_unpacklo_epi64(convert<double, uint>(x0.data()), convert<double, uint>(x1.data()));
0388 #endif
0389 }
0390 Vc_SIMD_CAST_2(double_v, float_v) {
0391 #ifdef Vc_IMPL_AVX
0392 return _mm256_cvtpd_ps(AVX::concat(x0.data(), x1.data()));
0393 #else
0394 return _mm_movelh_ps(_mm_cvtpd_ps(x0.data()), _mm_cvtpd_ps(x1.data()));
0395 #endif
0396 }
0397
0398 Vc_SIMD_CAST_2( int_v, short_v) { return SSE::convert_int32_to_int16(x0.data(), x1.data()); }
0399 Vc_SIMD_CAST_2( uint_v, short_v) { return SSE::convert_int32_to_int16(x0.data(), x1.data()); }
0400 Vc_SIMD_CAST_2( float_v, short_v) { return _mm_packs_epi32(simd_cast<SSE::int_v>(x0).data(), simd_cast<SSE::int_v>(x1).data()); }
0401 Vc_SIMD_CAST_2(double_v, short_v) { return _mm_packs_epi32(simd_cast<SSE::int_v>(x0, x1).data(), _mm_setzero_si128()); }
0402
0403 Vc_SIMD_CAST_2( int_v, ushort_v) { return SSE::convert_int32_to_int16(x0.data(), x1.data()); }
0404 Vc_SIMD_CAST_2( uint_v, ushort_v) { return SSE::convert_int32_to_int16(x0.data(), x1.data()); }
0405 Vc_SIMD_CAST_2( float_v, ushort_v) { return simd_cast<SSE::ushort_v>(simd_cast<SSE::int_v>(x0), simd_cast<SSE::int_v>(x1)); }
0406 Vc_SIMD_CAST_2(double_v, ushort_v) { return simd_cast<SSE::ushort_v>(simd_cast<SSE::int_v>(x0, x1)); }
0407
0408
0409 Vc_CAST_(short_v) simd_cast(double_v a, double_v b, double_v c)
0410 {
0411 return simd_cast<short_v>(simd_cast<int_v>(a, b), simd_cast<int_v>(c));
0412 }
0413 Vc_CAST_(ushort_v) simd_cast(double_v a, double_v b, double_v c)
0414 {
0415 return simd_cast<ushort_v>(simd_cast<int_v>(a, b), simd_cast<int_v>(c));
0416 }
0417 #undef Vc_CAST_
0418
0419
0420 Vc_SIMD_CAST_4(double_v, short_v) { return _mm_packs_epi32(simd_cast<SSE::int_v>(x0, x1).data(), simd_cast<SSE::int_v>(x2, x3).data()); }
0421 Vc_SIMD_CAST_4(double_v, ushort_v) { return simd_cast<SSE::ushort_v>(simd_cast<SSE::int_v>(x0, x1), simd_cast<SSE::int_v>(x2, x3)); }
0422 }
0423
0424
0425 template <typename Return, typename T>
0426 Vc_INTRINSIC Vc_CONST Return
0427 simd_cast(Scalar::Vector<T> x,
0428 enable_if<std::is_same<Return, SSE::double_v>::value> )
0429 {
0430 return _mm_setr_pd(x.data(), 0.);
0431 }
0432 template <typename Return, typename T>
0433 Vc_INTRINSIC Vc_CONST Return
0434 simd_cast(Scalar::Vector<T> x,
0435 enable_if<std::is_same<Return, SSE::float_v>::value> )
0436 {
0437 return _mm_setr_ps(x.data(), 0.f, 0.f, 0.f);
0438 }
0439 template <typename Return, typename T>
0440 Vc_INTRINSIC Vc_CONST Return
0441 simd_cast(Scalar::Vector<T> x,
0442 enable_if<std::is_same<Return, SSE::int_v>::value> )
0443 {
0444 return _mm_setr_epi32(x.data(), 0, 0, 0);
0445 }
0446 template <typename Return, typename T>
0447 Vc_INTRINSIC Vc_CONST Return
0448 simd_cast(Scalar::Vector<T> x,
0449 enable_if<std::is_same<Return, SSE::uint_v>::value> )
0450 {
0451 return _mm_setr_epi32(uint(x.data()), 0, 0, 0);
0452 }
0453 template <typename Return, typename T>
0454 Vc_INTRINSIC Vc_CONST Return
0455 simd_cast(Scalar::Vector<T> x,
0456 enable_if<std::is_same<Return, SSE::short_v>::value> )
0457 {
0458 return _mm_setr_epi16(
0459 x.data(), 0, 0, 0, 0, 0, 0, 0);
0460 }
0461 template <typename Return, typename T>
0462 Vc_INTRINSIC Vc_CONST Return
0463 simd_cast(Scalar::Vector<T> x,
0464 enable_if<std::is_same<Return, SSE::ushort_v>::value> )
0465 {
0466 return _mm_setr_epi16(
0467 x.data(), 0, 0, 0, 0, 0, 0, 0);
0468 }
0469
0470
0471 template <typename Return, typename T>
0472 Vc_INTRINSIC Vc_CONST Return
0473 simd_cast(Scalar::Vector<T> x0,
0474 Scalar::Vector<T> x1,
0475 enable_if<std::is_same<Return, SSE::double_v>::value> )
0476 {
0477 return _mm_setr_pd(x0.data(), x1.data());
0478 }
0479 template <typename Return, typename T>
0480 Vc_INTRINSIC Vc_CONST Return
0481 simd_cast(Scalar::Vector<T> x0,
0482 Scalar::Vector<T> x1,
0483 enable_if<std::is_same<Return, SSE::float_v>::value> )
0484 {
0485 return _mm_setr_ps(x0.data(), x1.data(), 0.f, 0.f);
0486 }
0487 template <typename Return, typename T>
0488 Vc_INTRINSIC Vc_CONST Return
0489 simd_cast(Scalar::Vector<T> x0,
0490 Scalar::Vector<T> x1,
0491 enable_if<std::is_same<Return, SSE::int_v>::value> )
0492 {
0493 return _mm_setr_epi32(x0.data(), x1.data(), 0, 0);
0494 }
0495 template <typename Return, typename T>
0496 Vc_INTRINSIC Vc_CONST Return
0497 simd_cast(Scalar::Vector<T> x0,
0498 Scalar::Vector<T> x1,
0499 enable_if<std::is_same<Return, SSE::uint_v>::value> )
0500 {
0501 return _mm_setr_epi32(uint(x0.data()), uint(x1.data()), 0,
0502 0);
0503 }
0504 template <typename Return, typename T>
0505 Vc_INTRINSIC Vc_CONST Return
0506 simd_cast(Scalar::Vector<T> x0,
0507 Scalar::Vector<T> x1,
0508 enable_if<std::is_same<Return, SSE::short_v>::value> )
0509 {
0510 return _mm_setr_epi16(
0511 x0.data(), x1.data(), 0, 0, 0, 0, 0, 0);
0512 }
0513 template <typename Return, typename T>
0514 Vc_INTRINSIC Vc_CONST Return
0515 simd_cast(Scalar::Vector<T> x0,
0516 Scalar::Vector<T> x1,
0517 enable_if<std::is_same<Return, SSE::ushort_v>::value> )
0518 {
0519 return _mm_setr_epi16(
0520 x0.data(), x1.data(), 0, 0, 0, 0, 0, 0);
0521 }
0522
0523
0524 template <typename Return, typename T>
0525 Vc_INTRINSIC Vc_CONST Return
0526 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0527 enable_if<std::is_same<Return, SSE::float_v>::value>)
0528 {
0529 return _mm_setr_ps(x0.data(), x1.data(), x2.data(), 0.f);
0530 }
0531 template <typename Return, typename T>
0532 Vc_INTRINSIC Vc_CONST Return
0533 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0534 enable_if<std::is_same<Return, SSE::int_v>::value>)
0535 {
0536 return _mm_setr_epi32(x0.data(), x1.data(), x2.data(), 0);
0537 }
0538 template <typename Return, typename T>
0539 Vc_INTRINSIC Vc_CONST Return
0540 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0541 enable_if<std::is_same<Return, SSE::uint_v>::value>)
0542 {
0543 return _mm_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()),
0544 0);
0545 }
0546 template <typename Return, typename T>
0547 Vc_INTRINSIC Vc_CONST Return
0548 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0549 enable_if<std::is_same<Return, SSE::short_v>::value>)
0550 {
0551 return _mm_setr_epi16(x0.data(), x1.data(), x2.data(), 0, 0, 0, 0, 0);
0552 }
0553 template <typename Return, typename T>
0554 Vc_INTRINSIC Vc_CONST Return
0555 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0556 enable_if<std::is_same<Return, SSE::ushort_v>::value>)
0557 {
0558 return _mm_setr_epi16(x0.data(), x1.data(), x2.data(), 0, 0, 0, 0, 0);
0559 }
0560
0561
0562 template <typename Return, typename T>
0563 Vc_INTRINSIC Vc_CONST Return
0564 simd_cast(Scalar::Vector<T> x0,
0565 Scalar::Vector<T> x1,
0566 Scalar::Vector<T> x2,
0567 Scalar::Vector<T> x3,
0568 enable_if<std::is_same<Return, SSE::float_v>::value> )
0569 {
0570 return _mm_setr_ps(
0571 x0.data(), x1.data(), x2.data(), x3.data());
0572 }
0573 template <typename Return, typename T>
0574 Vc_INTRINSIC Vc_CONST Return
0575 simd_cast(Scalar::Vector<T> x0,
0576 Scalar::Vector<T> x1,
0577 Scalar::Vector<T> x2,
0578 Scalar::Vector<T> x3,
0579 enable_if<std::is_same<Return, SSE::int_v>::value> )
0580 {
0581 return _mm_setr_epi32(
0582 x0.data(), x1.data(), x2.data(), x3.data());
0583 }
0584 template <typename Return, typename T>
0585 Vc_INTRINSIC Vc_CONST Return
0586 simd_cast(Scalar::Vector<T> x0,
0587 Scalar::Vector<T> x1,
0588 Scalar::Vector<T> x2,
0589 Scalar::Vector<T> x3,
0590 enable_if<std::is_same<Return, SSE::uint_v>::value> )
0591 {
0592 return _mm_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()),
0593 uint(x3.data()));
0594 }
0595 template <typename Return, typename T>
0596 Vc_INTRINSIC Vc_CONST Return
0597 simd_cast(Scalar::Vector<T> x0,
0598 Scalar::Vector<T> x1,
0599 Scalar::Vector<T> x2,
0600 Scalar::Vector<T> x3,
0601 enable_if<std::is_same<Return, SSE::short_v>::value> )
0602 {
0603 return _mm_setr_epi16(
0604 x0.data(), x1.data(), x2.data(), x3.data(), 0, 0, 0, 0);
0605 }
0606 template <typename Return, typename T>
0607 Vc_INTRINSIC Vc_CONST Return
0608 simd_cast(Scalar::Vector<T> x0,
0609 Scalar::Vector<T> x1,
0610 Scalar::Vector<T> x2,
0611 Scalar::Vector<T> x3,
0612 enable_if<std::is_same<Return, SSE::ushort_v>::value> )
0613 {
0614 return _mm_setr_epi16(
0615 x0.data(), x1.data(), x2.data(), x3.data(), 0, 0, 0, 0);
0616 }
0617
0618
0619 template <typename Return, typename T>
0620 Vc_INTRINSIC Vc_CONST Return
0621 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0622 Scalar::Vector<T> x3, Scalar::Vector<T> x4,
0623 enable_if<std::is_same<Return, SSE::short_v>::value>)
0624 {
0625 return _mm_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), 0, 0, 0);
0626 }
0627 template <typename Return, typename T>
0628 Vc_INTRINSIC Vc_CONST Return
0629 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0630 Scalar::Vector<T> x3, Scalar::Vector<T> x4,
0631 enable_if<std::is_same<Return, SSE::ushort_v>::value>)
0632 {
0633 return _mm_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), 0, 0, 0);
0634 }
0635
0636
0637 template <typename Return, typename T>
0638 Vc_INTRINSIC Vc_CONST Return
0639 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0640 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0641 enable_if<std::is_same<Return, SSE::short_v>::value>)
0642 {
0643 return _mm_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
0644 x5.data(), 0, 0);
0645 }
0646 template <typename Return, typename T>
0647 Vc_INTRINSIC Vc_CONST Return
0648 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0649 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0650 enable_if<std::is_same<Return, SSE::ushort_v>::value>)
0651 {
0652 return _mm_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
0653 x5.data(), 0, 0);
0654 }
0655
0656
0657 template <typename Return, typename T>
0658 Vc_INTRINSIC Vc_CONST Return
0659 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0660 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0661 Scalar::Vector<T> x6, enable_if<std::is_same<Return, SSE::short_v>::value>)
0662 {
0663 return _mm_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
0664 x5.data(), x6.data(), 0);
0665 }
0666 template <typename Return, typename T>
0667 Vc_INTRINSIC Vc_CONST Return
0668 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0669 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0670 Scalar::Vector<T> x6, enable_if<std::is_same<Return, SSE::ushort_v>::value>)
0671 {
0672 return _mm_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
0673 x5.data(), x6.data(), 0);
0674 }
0675
0676
0677 template <typename Return, typename T>
0678 Vc_INTRINSIC Vc_CONST Return
0679 simd_cast(Scalar::Vector<T> x0,
0680 Scalar::Vector<T> x1,
0681 Scalar::Vector<T> x2,
0682 Scalar::Vector<T> x3,
0683 Scalar::Vector<T> x4,
0684 Scalar::Vector<T> x5,
0685 Scalar::Vector<T> x6,
0686 Scalar::Vector<T> x7,
0687 enable_if<std::is_same<Return, SSE::short_v>::value> )
0688 {
0689 return _mm_setr_epi16(x0.data(),
0690 x1.data(),
0691 x2.data(),
0692 x3.data(),
0693 x4.data(),
0694 x5.data(),
0695 x6.data(),
0696 x7.data());
0697 }
0698 template <typename Return, typename T>
0699 Vc_INTRINSIC Vc_CONST Return
0700 simd_cast(Scalar::Vector<T> x0,
0701 Scalar::Vector<T> x1,
0702 Scalar::Vector<T> x2,
0703 Scalar::Vector<T> x3,
0704 Scalar::Vector<T> x4,
0705 Scalar::Vector<T> x5,
0706 Scalar::Vector<T> x6,
0707 Scalar::Vector<T> x7,
0708 enable_if<std::is_same<Return, SSE::ushort_v>::value> )
0709 {
0710 return _mm_setr_epi16(x0.data(),
0711 x1.data(),
0712 x2.data(),
0713 x3.data(),
0714 x4.data(),
0715 x5.data(),
0716 x6.data(),
0717 x7.data());
0718 }
0719
0720
0721 template <typename To, typename FromT>
0722 Vc_INTRINSIC Vc_CONST To
0723 simd_cast(SSE::Vector<FromT> x, enable_if<Scalar::is_vector<To>::value> )
0724 {
0725 return static_cast<To>(x[0]);
0726 }
0727
0728
0729
0730 template <typename Return, typename T>
0731 Vc_INTRINSIC Vc_CONST Return
0732 simd_cast(SSE::Mask<T> x, enable_if<SSE::is_mask<Return>::value> = nullarg)
0733 {
0734 using M = SSE::Mask<T>;
0735 return {Detail::mask_cast<M::Size, Return::Size, __m128>(x.dataI())};
0736 }
0737
0738 template <typename Return, typename T>
0739 Vc_INTRINSIC Vc_CONST Return simd_cast(
0740 SSE::Mask<T> x0,
0741 SSE::Mask<T> x1,
0742 enable_if<SSE::is_mask<Return>::value && Mask<T, VectorAbi::Sse>::Size * 2 == Return::Size> = nullarg)
0743 {
0744 return SSE::sse_cast<__m128>(_mm_packs_epi16(x0.dataI(), x1.dataI()));
0745 }
0746 template <typename Return, typename T>
0747 Vc_INTRINSIC Vc_CONST Return simd_cast(
0748 SSE::Mask<T> x0,
0749 SSE::Mask<T> x1,
0750 enable_if<SSE::is_mask<Return>::value && Mask<T, VectorAbi::Sse>::Size * 4 == Return::Size> = nullarg)
0751 {
0752 return SSE::sse_cast<__m128>(
0753 _mm_packs_epi16(_mm_packs_epi16(x0.dataI(), x1.dataI()), _mm_setzero_si128()));
0754 }
0755
0756 template <typename Return, typename T>
0757 Vc_INTRINSIC Vc_CONST Return simd_cast(
0758 SSE::Mask<T> x0,
0759 SSE::Mask<T> x1,
0760 SSE::Mask<T> x2,
0761 SSE::Mask<T> x3,
0762 enable_if<SSE::is_mask<Return>::value && Mask<T, VectorAbi::Sse>::Size * 4 == Return::Size> = nullarg)
0763 {
0764 return SSE::sse_cast<__m128>(_mm_packs_epi16(_mm_packs_epi16(x0.dataI(), x1.dataI()),
0765 _mm_packs_epi16(x2.dataI(), x3.dataI())));
0766 }
0767
0768
0769 template <typename Return, typename T>
0770 Vc_INTRINSIC Vc_CONST Return
0771 simd_cast(Scalar::Mask<T> x, enable_if<SSE::is_mask<Return>::value> = nullarg)
0772 {
0773 Return m(false);
0774 m[0] = x[0];
0775 return m;
0776 }
0777
0778 template <typename Return, typename T>
0779 Vc_INTRINSIC Vc_CONST Return
0780 simd_cast(Scalar::Mask<T> x0, Scalar::Mask<T> x1, enable_if<SSE::is_mask<Return>::value> = nullarg)
0781 {
0782 Return m(false);
0783 m[0] = x0[0];
0784 m[1] = x1[0];
0785 return m;
0786 }
0787
0788 template <typename Return, typename T>
0789 Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Mask<T> x0,
0790 Scalar::Mask<T> x1,
0791 Scalar::Mask<T> x2,
0792 Scalar::Mask<T> x3,
0793 enable_if<SSE::is_mask<Return>::value> = nullarg)
0794 {
0795 Return m(false);
0796 m[0] = x0[0];
0797 m[1] = x1[0];
0798 if (Return::Size >= 4) {
0799 m[2] = x2[0];
0800 m[3] = x3[0];
0801 }
0802 return m;
0803 }
0804
0805 template <typename Return, typename T>
0806 Vc_INTRINSIC Vc_CONST Return simd_cast(Scalar::Mask<T> x0,
0807 Scalar::Mask<T> x1,
0808 Scalar::Mask<T> x2,
0809 Scalar::Mask<T> x3,
0810 Scalar::Mask<T> x4,
0811 Scalar::Mask<T> x5,
0812 Scalar::Mask<T> x6,
0813 Scalar::Mask<T> x7,
0814 enable_if<SSE::is_mask<Return>::value> = nullarg)
0815 {
0816 Return m(false);
0817 m[0] = x0[0];
0818 m[1] = x1[0];
0819 if (Return::Size >= 4) {
0820 m[2] = x2[0];
0821 m[3] = x3[0];
0822 }
0823 if (Return::Size >= 8) {
0824 m[4] = x4[0];
0825 m[5] = x5[0];
0826 m[6] = x6[0];
0827 m[7] = x7[0];
0828 }
0829 return m;
0830 }
0831
0832
0833 template <typename To, typename FromT>
0834 Vc_INTRINSIC Vc_CONST To
0835 simd_cast(SSE::Mask<FromT> x, enable_if<Scalar::is_mask<To>::value> = nullarg)
0836 {
0837 return static_cast<To>(x[0]);
0838 }
0839
0840 template <typename Return, int offset, typename V>
0841 Vc_INTRINSIC Vc_CONST Return
0842 simd_cast(V &&x, enable_if<offset == 0 && ((SSE::is_vector<Traits::decay<V>>::value &&
0843 SSE::is_vector<Return>::value) ||
0844 (SSE::is_mask<Traits::decay<V>>::value &&
0845 SSE::is_mask<Return>::value))> = nullarg)
0846 {
0847 return simd_cast<Return>(x);
0848 }
0849
0850 template <typename Return, int offset, typename V>
0851 Vc_INTRINSIC Vc_CONST Return
0852 simd_cast(V &&x,
0853 enable_if<offset == 0 && ((Scalar::is_vector<Traits::decay<V>>::value &&
0854 SSE::is_vector<Return>::value) ||
0855 (Scalar::is_mask<Traits::decay<V>>::value &&
0856 SSE::is_mask<Return>::value))> = nullarg)
0857 {
0858 return simd_cast<Return>(x);
0859 }
0860
0861
0862
0863 template <typename Return, int offset, typename V>
0864 Vc_INTRINSIC Vc_CONST Return simd_cast(
0865 V x,
0866 enable_if<offset != 0 && (SSE::is_vector<Return>::value && SSE::is_vector<V>::value)> = nullarg)
0867 {
0868 constexpr int shift = (sizeof(V) / V::Size) * offset * Return::Size;
0869 static_assert(shift > 0 && shift < 16, "");
0870 return simd_cast<Return>(V{SSE::sse_cast<typename V::VectorType>(
0871 _mm_srli_si128(SSE::sse_cast<__m128i>(x.data()), shift & 0xff))});
0872 }
0873
0874
0875 template <typename Return, int offset, typename T>
0876 Vc_INTRINSIC Vc_CONST Return
0877 simd_cast(SSE::Vector<T> x,
0878 enable_if<offset != 0 && Scalar::is_vector<Return>::value> = nullarg)
0879 {
0880 return static_cast<typename Return::EntryType>(x[offset]);
0881 }
0882
0883
0884
0885 template <typename Return, int offset, typename V>
0886 Vc_INTRINSIC Vc_CONST Return simd_cast(
0887 V x,
0888 enable_if<offset != 0 && (SSE::is_mask<Return>::value && SSE::is_mask<V>::value)> = nullarg)
0889 {
0890 constexpr int shift = (sizeof(V) / V::Size) * offset * Return::Size;
0891 static_assert(shift > 0 && shift < 16, "");
0892 return simd_cast<Return>(V{SSE::sse_cast<typename V::VectorType>(
0893 _mm_srli_si128(SSE::sse_cast<__m128i>(x.data()), shift & 0xff))});
0894 }
0895
0896
0897 #undef Vc_SIMD_CAST_1
0898 #undef Vc_SIMD_CAST_2
0899 #undef Vc_SIMD_CAST_4
0900 #undef Vc_SIMD_CAST_8
0901
0902
0903 }
0904
0905 #endif
0906
0907