File indexing completed on 2025-01-31 10:25:35
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028 #ifndef VC_AVX_SIMD_CAST_H_
0029 #define VC_AVX_SIMD_CAST_H_
0030
0031 #ifndef VC_AVX_VECTOR_H_
0032 #error "Vc/avx/vector.h needs to be included before Vc/avx/simd_cast.h"
0033 #endif
0034 #include "macros.h"
0035
0036 namespace Vc_VERSIONED_NAMESPACE
0037 {
0038
0039 #define Vc_SIMD_CAST_AVX_1(from_, to_) \
0040 template <typename To> \
0041 Vc_INTRINSIC Vc_CONST To simd_cast( \
0042 AVX2::from_ x, enable_if<std::is_same<To, AVX2::to_>::value> = nullarg)
0043
0044 #define Vc_SIMD_CAST_AVX_2(from_, to_) \
0045 template <typename To> \
0046 Vc_INTRINSIC Vc_CONST To simd_cast( \
0047 AVX2::from_ x0, AVX2::from_ x1, \
0048 enable_if<std::is_same<To, AVX2::to_>::value> = nullarg)
0049
0050 #define Vc_SIMD_CAST_AVX_3(from_, to_) \
0051 template <typename To> \
0052 Vc_INTRINSIC Vc_CONST To simd_cast( \
0053 AVX2::from_ x0, AVX2::from_ x1, AVX2::from_ x2, \
0054 enable_if<std::is_same<To, AVX2::to_>::value> = nullarg)
0055
0056 #define Vc_SIMD_CAST_AVX_4(from_, to_) \
0057 template <typename To> \
0058 Vc_INTRINSIC Vc_CONST To simd_cast( \
0059 AVX2::from_ x0, AVX2::from_ x1, AVX2::from_ x2, AVX2::from_ x3, \
0060 enable_if<std::is_same<To, AVX2::to_>::value> = nullarg)
0061
0062 #define Vc_SIMD_CAST_1(from_, to_) \
0063 template <typename To> \
0064 Vc_INTRINSIC Vc_CONST To simd_cast( \
0065 from_ x, enable_if<std::is_same<To, to_>::value> = nullarg)
0066
0067 #define Vc_SIMD_CAST_2(from_, to_) \
0068 template <typename To> \
0069 Vc_INTRINSIC Vc_CONST To simd_cast( \
0070 from_ x0, from_ x1, enable_if<std::is_same<To, to_>::value> = nullarg)
0071
0072 #define Vc_SIMD_CAST_3(from_, to_) \
0073 template <typename To> \
0074 Vc_INTRINSIC Vc_CONST To simd_cast( \
0075 from_ x0, from_ x1, from_ x2, enable_if<std::is_same<To, to_>::value> = nullarg)
0076
0077 #define Vc_SIMD_CAST_4(from_, to_) \
0078 template <typename To> \
0079 Vc_INTRINSIC Vc_CONST To simd_cast( \
0080 from_ x0, from_ x1, from_ x2, from_ x3, \
0081 enable_if<std::is_same<To, to_>::value> = nullarg)
0082
0083 #define Vc_SIMD_CAST_5(from_, to_) \
0084 template <typename To> \
0085 Vc_INTRINSIC Vc_CONST To simd_cast( \
0086 from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, \
0087 enable_if<std::is_same<To, to_>::value> = nullarg)
0088
0089 #define Vc_SIMD_CAST_6(from_, to_) \
0090 template <typename To> \
0091 Vc_INTRINSIC Vc_CONST To simd_cast( \
0092 from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, from_ x5, \
0093 enable_if<std::is_same<To, to_>::value> = nullarg)
0094
0095 #define Vc_SIMD_CAST_7(from_, to_) \
0096 template <typename To> \
0097 Vc_INTRINSIC Vc_CONST To simd_cast( \
0098 from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, from_ x5, from_ x6, \
0099 enable_if<std::is_same<To, to_>::value> = nullarg)
0100
0101 #define Vc_SIMD_CAST_8(from_, to_) \
0102 template <typename To> \
0103 Vc_INTRINSIC Vc_CONST To simd_cast( \
0104 from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, from_ x5, from_ x6, from_ x7, \
0105 enable_if<std::is_same<To, to_>::value> = nullarg)
0106
0107 #define Vc_SIMD_CAST_OFFSET(from_, to_, offset_) \
0108 static_assert(from_::size() >= to_::size() * (offset_ + 1), \
0109 "this offset cannot exist for this type combination"); \
0110 template <typename To, int offset> \
0111 Vc_INTRINSIC Vc_CONST To simd_cast( \
0112 from_ x, \
0113 enable_if<(offset == offset_ && std::is_same<To, to_>::value)> = nullarg)
0114
0115
0116
0117 template <typename To, typename From>
0118 Vc_INTRINSIC Vc_CONST To
0119 simd_cast(From x, enable_if<(AVX2::is_vector<To>::value && SSE::is_vector<From>::value &&
0120 SSE::Vector<typename To::EntryType>::Size == To::Size)> =
0121 nullarg);
0122 template <typename To, typename From>
0123 Vc_INTRINSIC Vc_CONST To simd_cast(
0124 From x0, From x1,
0125 enable_if<(AVX2::is_vector<To>::value && SSE::is_vector<From>::value &&
0126 SSE::Vector<typename To::EntryType>::Size == To::Size)> = nullarg);
0127 template <typename To, typename From>
0128 Vc_INTRINSIC Vc_CONST To simd_cast(
0129 From x0, From x1, From x2,
0130 enable_if<(AVX2::is_vector<To>::value && SSE::is_vector<From>::value &&
0131 SSE::Vector<typename To::EntryType>::Size == To::Size)> = nullarg);
0132 template <typename To, typename From>
0133 Vc_INTRINSIC Vc_CONST To simd_cast(
0134 From x0, From x1, From x2, From x3,
0135 enable_if<(AVX2::is_vector<To>::value && SSE::is_vector<From>::value &&
0136 SSE::Vector<typename To::EntryType>::Size == To::Size)> = nullarg);
0137 template <typename To, typename From>
0138 Vc_INTRINSIC Vc_CONST To simd_cast(
0139 From x0, From x1, From x2, From x3, From x4, From x5, From x6, From x7,
0140 enable_if<(AVX2::is_vector<To>::value && SSE::is_vector<From>::value &&
0141 SSE::Vector<typename To::EntryType>::Size == To::Size)> = nullarg);
0142
0143
0144
0145 Vc_SIMD_CAST_AVX_1( float_v, double_v);
0146
0147 Vc_SIMD_CAST_AVX_1(double_v, float_v);
0148 Vc_SIMD_CAST_AVX_2(double_v, float_v);
0149
0150 #ifdef Vc_IMPL_AVX2
0151 Vc_SIMD_CAST_AVX_1( int_v, double_v);
0152 Vc_SIMD_CAST_AVX_1( uint_v, double_v);
0153 Vc_SIMD_CAST_AVX_1( short_v, double_v);
0154 Vc_SIMD_CAST_AVX_1(ushort_v, double_v);
0155
0156 Vc_SIMD_CAST_AVX_1( int_v, float_v);
0157 Vc_SIMD_CAST_AVX_1( uint_v, float_v);
0158 Vc_SIMD_CAST_AVX_1( short_v, float_v);
0159 Vc_SIMD_CAST_AVX_1(ushort_v, float_v);
0160
0161 Vc_SIMD_CAST_AVX_1(double_v, int_v);
0162 Vc_SIMD_CAST_AVX_1( float_v, int_v);
0163 Vc_SIMD_CAST_AVX_1( uint_v, int_v);
0164 Vc_SIMD_CAST_AVX_1( short_v, int_v);
0165 Vc_SIMD_CAST_AVX_1(ushort_v, int_v);
0166 Vc_SIMD_CAST_AVX_2(double_v, int_v);
0167
0168 Vc_SIMD_CAST_AVX_1(double_v, uint_v);
0169 Vc_SIMD_CAST_AVX_1( float_v, uint_v);
0170 Vc_SIMD_CAST_AVX_1( int_v, uint_v);
0171 Vc_SIMD_CAST_AVX_1( short_v, uint_v);
0172 Vc_SIMD_CAST_AVX_1(ushort_v, uint_v);
0173 Vc_SIMD_CAST_AVX_2(double_v, uint_v);
0174
0175 Vc_SIMD_CAST_AVX_1(double_v, short_v);
0176 Vc_SIMD_CAST_AVX_1( float_v, short_v);
0177 Vc_SIMD_CAST_AVX_1( int_v, short_v);
0178 Vc_SIMD_CAST_AVX_1( uint_v, short_v);
0179 Vc_SIMD_CAST_AVX_1(ushort_v, short_v);
0180 Vc_SIMD_CAST_AVX_2(double_v, short_v);
0181 Vc_SIMD_CAST_AVX_2( float_v, short_v);
0182 Vc_SIMD_CAST_AVX_2( int_v, short_v);
0183 Vc_SIMD_CAST_AVX_2( uint_v, short_v);
0184 Vc_SIMD_CAST_AVX_3(double_v, short_v);
0185 Vc_SIMD_CAST_AVX_4(double_v, short_v);
0186
0187 Vc_SIMD_CAST_AVX_1(double_v, ushort_v);
0188 Vc_SIMD_CAST_AVX_1( float_v, ushort_v);
0189 Vc_SIMD_CAST_AVX_1( int_v, ushort_v);
0190 Vc_SIMD_CAST_AVX_1( uint_v, ushort_v);
0191 Vc_SIMD_CAST_AVX_1( short_v, ushort_v);
0192 Vc_SIMD_CAST_AVX_2(double_v, ushort_v);
0193 Vc_SIMD_CAST_AVX_2( float_v, ushort_v);
0194 Vc_SIMD_CAST_AVX_2( int_v, ushort_v);
0195 Vc_SIMD_CAST_AVX_2( uint_v, ushort_v);
0196 Vc_SIMD_CAST_AVX_3(double_v, ushort_v);
0197 Vc_SIMD_CAST_AVX_4(double_v, ushort_v);
0198 #endif
0199
0200
0201 Vc_SIMD_CAST_1(SSE::double_v, AVX2::double_v);
0202 Vc_SIMD_CAST_1(SSE:: float_v, AVX2::double_v);
0203 Vc_SIMD_CAST_1(SSE:: int_v, AVX2::double_v);
0204 Vc_SIMD_CAST_1(SSE:: uint_v, AVX2::double_v);
0205 Vc_SIMD_CAST_1(SSE:: short_v, AVX2::double_v);
0206 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2::double_v);
0207
0208 Vc_SIMD_CAST_1(SSE::double_v, AVX2:: float_v);
0209 Vc_SIMD_CAST_1(SSE:: float_v, AVX2:: float_v);
0210 Vc_SIMD_CAST_1(SSE:: int_v, AVX2:: float_v);
0211 Vc_SIMD_CAST_1(SSE:: uint_v, AVX2:: float_v);
0212 Vc_SIMD_CAST_1(SSE:: short_v, AVX2:: float_v);
0213 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2:: float_v);
0214
0215 #ifdef Vc_IMPL_AVX2
0216 Vc_SIMD_CAST_1(SSE::double_v, AVX2:: int_v);
0217 Vc_SIMD_CAST_1(SSE::double_v, AVX2:: uint_v);
0218 Vc_SIMD_CAST_1(SSE::double_v, AVX2:: short_v);
0219 Vc_SIMD_CAST_1(SSE::double_v, AVX2::ushort_v);
0220
0221 Vc_SIMD_CAST_1(SSE:: float_v, AVX2:: int_v);
0222 Vc_SIMD_CAST_1(SSE:: float_v, AVX2:: uint_v);
0223 Vc_SIMD_CAST_1(SSE:: float_v, AVX2:: short_v);
0224 Vc_SIMD_CAST_1(SSE:: float_v, AVX2::ushort_v);
0225
0226 Vc_SIMD_CAST_1(SSE:: int_v, AVX2:: int_v);
0227 Vc_SIMD_CAST_1(SSE:: uint_v, AVX2:: int_v);
0228 Vc_SIMD_CAST_1(SSE:: short_v, AVX2:: int_v);
0229 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2:: int_v);
0230
0231 Vc_SIMD_CAST_1(SSE:: int_v, AVX2:: uint_v);
0232 Vc_SIMD_CAST_1(SSE:: uint_v, AVX2:: uint_v);
0233 Vc_SIMD_CAST_1(SSE:: short_v, AVX2:: uint_v);
0234 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2:: uint_v);
0235
0236 Vc_SIMD_CAST_1(SSE:: int_v, AVX2:: short_v);
0237 Vc_SIMD_CAST_1(SSE:: uint_v, AVX2:: short_v);
0238 Vc_SIMD_CAST_1(SSE:: short_v, AVX2:: short_v);
0239 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2:: short_v);
0240
0241 Vc_SIMD_CAST_1(SSE:: int_v, AVX2::ushort_v);
0242 Vc_SIMD_CAST_1(SSE:: uint_v, AVX2::ushort_v);
0243 Vc_SIMD_CAST_1(SSE:: short_v, AVX2::ushort_v);
0244 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2::ushort_v);
0245 #endif
0246
0247
0248 Vc_SIMD_CAST_2(SSE::double_v, AVX2::double_v);
0249
0250 Vc_SIMD_CAST_2(SSE::double_v, AVX2:: float_v);
0251 Vc_SIMD_CAST_2(SSE:: float_v, AVX2:: float_v);
0252 Vc_SIMD_CAST_2(SSE:: int_v, AVX2:: float_v);
0253 Vc_SIMD_CAST_2(SSE:: uint_v, AVX2:: float_v);
0254
0255 #ifdef Vc_IMPL_AVX2
0256 Vc_SIMD_CAST_2(SSE::double_v, AVX2:: int_v);
0257 Vc_SIMD_CAST_2(SSE::double_v, AVX2:: uint_v);
0258 Vc_SIMD_CAST_2(SSE::double_v, AVX2:: short_v);
0259 Vc_SIMD_CAST_2(SSE::double_v, AVX2::ushort_v);
0260
0261 Vc_SIMD_CAST_2(SSE:: float_v, AVX2:: int_v);
0262 Vc_SIMD_CAST_2(SSE:: float_v, AVX2:: uint_v);
0263 Vc_SIMD_CAST_2(SSE:: float_v, AVX2:: short_v);
0264 Vc_SIMD_CAST_2(SSE:: float_v, AVX2::ushort_v);
0265
0266 Vc_SIMD_CAST_2(SSE:: int_v, AVX2:: int_v);
0267 Vc_SIMD_CAST_2(SSE:: uint_v, AVX2:: int_v);
0268
0269 Vc_SIMD_CAST_2(SSE:: int_v, AVX2:: uint_v);
0270 Vc_SIMD_CAST_2(SSE:: uint_v, AVX2:: uint_v);
0271
0272 Vc_SIMD_CAST_2(SSE:: int_v, AVX2:: short_v);
0273 Vc_SIMD_CAST_2(SSE:: uint_v, AVX2:: short_v);
0274 Vc_SIMD_CAST_2(SSE:: short_v, AVX2:: short_v);
0275 Vc_SIMD_CAST_2(SSE::ushort_v, AVX2:: short_v);
0276
0277 Vc_SIMD_CAST_2(SSE:: int_v, AVX2::ushort_v);
0278 Vc_SIMD_CAST_2(SSE:: uint_v, AVX2::ushort_v);
0279 Vc_SIMD_CAST_2(SSE:: short_v, AVX2::ushort_v);
0280 Vc_SIMD_CAST_2(SSE::ushort_v, AVX2::ushort_v);
0281 #endif
0282
0283
0284 Vc_SIMD_CAST_3(SSE::double_v, AVX2:: float_v);
0285
0286 #ifdef Vc_IMPL_AVX2
0287 Vc_SIMD_CAST_3(SSE::double_v, AVX2:: int_v);
0288 Vc_SIMD_CAST_3(SSE::double_v, AVX2:: uint_v);
0289 Vc_SIMD_CAST_3(SSE::double_v, AVX2:: short_v);
0290 Vc_SIMD_CAST_3(SSE::double_v, AVX2::ushort_v);
0291
0292 Vc_SIMD_CAST_3(SSE:: float_v, AVX2:: short_v);
0293 Vc_SIMD_CAST_3(SSE:: float_v, AVX2::ushort_v);
0294
0295 Vc_SIMD_CAST_3(SSE:: int_v, AVX2:: short_v);
0296 Vc_SIMD_CAST_3(SSE:: uint_v, AVX2:: short_v);
0297
0298 Vc_SIMD_CAST_3(SSE:: int_v, AVX2::ushort_v);
0299 Vc_SIMD_CAST_3(SSE:: uint_v, AVX2::ushort_v);
0300 #endif
0301
0302
0303 Vc_SIMD_CAST_4(SSE::double_v, AVX2:: float_v);
0304
0305 #ifdef Vc_IMPL_AVX2
0306 Vc_SIMD_CAST_4(SSE::double_v, AVX2:: int_v);
0307 Vc_SIMD_CAST_4(SSE::double_v, AVX2:: uint_v);
0308 Vc_SIMD_CAST_4(SSE::double_v, AVX2:: short_v);
0309 Vc_SIMD_CAST_4(SSE::double_v, AVX2::ushort_v);
0310
0311 Vc_SIMD_CAST_4(SSE:: float_v, AVX2:: short_v);
0312 Vc_SIMD_CAST_4(SSE:: float_v, AVX2::ushort_v);
0313
0314 Vc_SIMD_CAST_4(SSE:: int_v, AVX2:: short_v);
0315 Vc_SIMD_CAST_4(SSE:: uint_v, AVX2:: short_v);
0316
0317 Vc_SIMD_CAST_4(SSE:: int_v, AVX2::ushort_v);
0318 Vc_SIMD_CAST_4(SSE:: uint_v, AVX2::ushort_v);
0319 #endif
0320
0321
0322 #ifdef Vc_IMPL_AVX2
0323 Vc_SIMD_CAST_5(SSE::double_v, AVX2:: short_v);
0324 Vc_SIMD_CAST_5(SSE::double_v, AVX2::ushort_v);
0325 #endif
0326
0327
0328 #ifdef Vc_IMPL_AVX2
0329 Vc_SIMD_CAST_6(SSE::double_v, AVX2:: short_v);
0330 Vc_SIMD_CAST_6(SSE::double_v, AVX2::ushort_v);
0331 #endif
0332
0333
0334 #ifdef Vc_IMPL_AVX2
0335 Vc_SIMD_CAST_7(SSE::double_v, AVX2:: short_v);
0336 Vc_SIMD_CAST_7(SSE::double_v, AVX2::ushort_v);
0337 #endif
0338
0339
0340 #ifdef Vc_IMPL_AVX2
0341 Vc_SIMD_CAST_8(SSE::double_v, AVX2:: short_v);
0342 Vc_SIMD_CAST_8(SSE::double_v, AVX2::ushort_v);
0343 #endif
0344
0345
0346 Vc_SIMD_CAST_1(AVX2::double_v, SSE::double_v);
0347 Vc_SIMD_CAST_1(AVX2::double_v, SSE:: float_v);
0348 Vc_SIMD_CAST_1(AVX2::double_v, SSE:: int_v);
0349 Vc_SIMD_CAST_1(AVX2::double_v, SSE:: uint_v);
0350 Vc_SIMD_CAST_1(AVX2::double_v, SSE:: short_v);
0351 Vc_SIMD_CAST_1(AVX2::double_v, SSE::ushort_v);
0352
0353 Vc_SIMD_CAST_1(AVX2:: float_v, SSE::double_v);
0354 Vc_SIMD_CAST_1(AVX2:: float_v, SSE:: float_v);
0355 Vc_SIMD_CAST_1(AVX2:: float_v, SSE:: int_v);
0356 Vc_SIMD_CAST_1(AVX2:: float_v, SSE:: uint_v);
0357 Vc_SIMD_CAST_1(AVX2:: float_v, SSE:: short_v);
0358 Vc_SIMD_CAST_1(AVX2:: float_v, SSE::ushort_v);
0359
0360 #ifdef Vc_IMPL_AVX2
0361 Vc_SIMD_CAST_1(AVX2:: int_v, SSE::double_v);
0362 Vc_SIMD_CAST_1(AVX2:: int_v, SSE:: float_v);
0363 Vc_SIMD_CAST_1(AVX2:: int_v, SSE:: uint_v);
0364 Vc_SIMD_CAST_1(AVX2:: int_v, SSE:: int_v);
0365 Vc_SIMD_CAST_1(AVX2:: int_v, SSE:: short_v);
0366 Vc_SIMD_CAST_1(AVX2:: int_v, SSE::ushort_v);
0367
0368 Vc_SIMD_CAST_1(AVX2:: uint_v, SSE::double_v);
0369 Vc_SIMD_CAST_1(AVX2:: uint_v, SSE:: float_v);
0370 Vc_SIMD_CAST_1(AVX2:: uint_v, SSE:: int_v);
0371 Vc_SIMD_CAST_1(AVX2:: uint_v, SSE:: uint_v);
0372 Vc_SIMD_CAST_1(AVX2:: uint_v, SSE:: short_v);
0373 Vc_SIMD_CAST_1(AVX2:: uint_v, SSE::ushort_v);
0374
0375 Vc_SIMD_CAST_1(AVX2:: short_v, SSE::double_v);
0376 Vc_SIMD_CAST_1(AVX2:: short_v, SSE:: float_v);
0377 Vc_SIMD_CAST_1(AVX2:: short_v, SSE:: int_v);
0378 Vc_SIMD_CAST_1(AVX2:: short_v, SSE:: uint_v);
0379 Vc_SIMD_CAST_1(AVX2:: short_v, SSE:: short_v);
0380 Vc_SIMD_CAST_1(AVX2:: short_v, SSE::ushort_v);
0381
0382 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE::double_v);
0383 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE:: float_v);
0384 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE:: int_v);
0385 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE:: uint_v);
0386 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE:: short_v);
0387 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE::ushort_v);
0388 #endif
0389
0390
0391 Vc_SIMD_CAST_2(AVX2::double_v, SSE:: short_v);
0392 Vc_SIMD_CAST_2(AVX2::double_v, SSE::ushort_v);
0393
0394
0395 template <typename Return, typename T>
0396 Vc_INTRINSIC Vc_CONST Return
0397 simd_cast(Scalar::Vector<T> x,
0398 enable_if<std::is_same<Return, AVX2::double_v>::value> = nullarg);
0399 template <typename Return, typename T>
0400 Vc_INTRINSIC Vc_CONST Return
0401 simd_cast(Scalar::Vector<T> x,
0402 enable_if<std::is_same<Return, AVX2::float_v>::value> = nullarg);
0403 #ifdef Vc_IMPL_AVX2
0404 template <typename Return, typename T>
0405 Vc_INTRINSIC Vc_CONST Return
0406 simd_cast(Scalar::Vector<T> x,
0407 enable_if<std::is_same<Return, AVX2::int_v>::value> = nullarg);
0408 template <typename Return, typename T>
0409 Vc_INTRINSIC Vc_CONST Return
0410 simd_cast(Scalar::Vector<T> x,
0411 enable_if<std::is_same<Return, AVX2::uint_v>::value> = nullarg);
0412 template <typename Return, typename T>
0413 Vc_INTRINSIC Vc_CONST Return
0414 simd_cast(Scalar::Vector<T> x,
0415 enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0416 template <typename Return, typename T>
0417 Vc_INTRINSIC Vc_CONST Return
0418 simd_cast(Scalar::Vector<T> x,
0419 enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0420 #endif
0421
0422
0423 template <typename Return, typename T>
0424 Vc_INTRINSIC Vc_CONST Return
0425 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0426 enable_if<std::is_same<Return, AVX2::double_v>::value> = nullarg);
0427 template <typename Return, typename T>
0428 Vc_INTRINSIC Vc_CONST Return
0429 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0430 enable_if<std::is_same<Return, AVX2::float_v>::value> = nullarg);
0431 #ifdef Vc_IMPL_AVX2
0432 template <typename Return, typename T>
0433 Vc_INTRINSIC Vc_CONST Return
0434 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0435 enable_if<std::is_same<Return, AVX2::int_v>::value> = nullarg);
0436 template <typename Return, typename T>
0437 Vc_INTRINSIC Vc_CONST Return
0438 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0439 enable_if<std::is_same<Return, AVX2::uint_v>::value> = nullarg);
0440 template <typename Return, typename T>
0441 Vc_INTRINSIC Vc_CONST Return
0442 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0443 enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0444 template <typename Return, typename T>
0445 Vc_INTRINSIC Vc_CONST Return
0446 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0447 enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0448 #endif
0449
0450
0451 template <typename Return, typename T>
0452 Vc_INTRINSIC Vc_CONST Return
0453 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0454 enable_if<std::is_same<Return, AVX2::double_v>::value> = nullarg);
0455 template <typename Return, typename T>
0456 Vc_INTRINSIC Vc_CONST Return
0457 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0458 enable_if<std::is_same<Return, AVX2::float_v>::value> = nullarg);
0459 #ifdef Vc_IMPL_AVX2
0460 template <typename Return, typename T>
0461 Vc_INTRINSIC Vc_CONST Return
0462 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0463 enable_if<std::is_same<Return, AVX2::int_v>::value> = nullarg);
0464 template <typename Return, typename T>
0465 Vc_INTRINSIC Vc_CONST Return
0466 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0467 enable_if<std::is_same<Return, AVX2::uint_v>::value> = nullarg);
0468 template <typename Return, typename T>
0469 Vc_INTRINSIC Vc_CONST Return
0470 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0471 enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0472 template <typename Return, typename T>
0473 Vc_INTRINSIC Vc_CONST Return
0474 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0475 enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0476 #endif
0477
0478
0479 template <typename Return, typename T>
0480 Vc_INTRINSIC Vc_CONST Return
0481 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0482 Scalar::Vector<T> x3,
0483 enable_if<std::is_same<Return, AVX2::double_v>::value> = nullarg);
0484 template <typename Return, typename T>
0485 Vc_INTRINSIC Vc_CONST Return
0486 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0487 Scalar::Vector<T> x3,
0488 enable_if<std::is_same<Return, AVX2::float_v>::value> = nullarg);
0489 #ifdef Vc_IMPL_AVX2
0490 template <typename Return, typename T>
0491 Vc_INTRINSIC Vc_CONST Return
0492 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0493 Scalar::Vector<T> x3,
0494 enable_if<std::is_same<Return, AVX2::int_v>::value> = nullarg);
0495 template <typename Return, typename T>
0496 Vc_INTRINSIC Vc_CONST Return
0497 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0498 Scalar::Vector<T> x3,
0499 enable_if<std::is_same<Return, AVX2::uint_v>::value> = nullarg);
0500 template <typename Return, typename T>
0501 Vc_INTRINSIC Vc_CONST Return
0502 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0503 Scalar::Vector<T> x3,
0504 enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0505 template <typename Return, typename T>
0506 Vc_INTRINSIC Vc_CONST Return
0507 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0508 Scalar::Vector<T> x3,
0509 enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0510 #endif
0511
0512
0513 template <typename Return, typename T>
0514 Vc_INTRINSIC Vc_CONST Return
0515 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0516 Scalar::Vector<T> x3, Scalar::Vector<T> x4,
0517 enable_if<std::is_same<Return, AVX2::float_v>::value> = nullarg);
0518 #ifdef Vc_IMPL_AVX2
0519 template <typename Return, typename T>
0520 Vc_INTRINSIC Vc_CONST Return
0521 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0522 Scalar::Vector<T> x3, Scalar::Vector<T> x4,
0523 enable_if<std::is_same<Return, AVX2::int_v>::value> = nullarg);
0524 template <typename Return, typename T>
0525 Vc_INTRINSIC Vc_CONST Return
0526 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0527 Scalar::Vector<T> x3, Scalar::Vector<T> x4,
0528 enable_if<std::is_same<Return, AVX2::uint_v>::value> = nullarg);
0529 template <typename Return, typename T>
0530 Vc_INTRINSIC Vc_CONST Return
0531 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0532 Scalar::Vector<T> x3, Scalar::Vector<T> x4,
0533 enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0534 template <typename Return, typename T>
0535 Vc_INTRINSIC Vc_CONST Return
0536 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0537 Scalar::Vector<T> x3, Scalar::Vector<T> x4,
0538 enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0539 #endif
0540
0541
0542 template <typename Return, typename T>
0543 Vc_INTRINSIC Vc_CONST Return
0544 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0545 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0546 enable_if<std::is_same<Return, AVX2::float_v>::value> = nullarg);
0547 #ifdef Vc_IMPL_AVX2
0548 template <typename Return, typename T>
0549 Vc_INTRINSIC Vc_CONST Return
0550 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0551 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0552 enable_if<std::is_same<Return, AVX2::int_v>::value> = nullarg);
0553 template <typename Return, typename T>
0554 Vc_INTRINSIC Vc_CONST Return
0555 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0556 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0557 enable_if<std::is_same<Return, AVX2::uint_v>::value> = nullarg);
0558 template <typename Return, typename T>
0559 Vc_INTRINSIC Vc_CONST Return
0560 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0561 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0562 enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0563 template <typename Return, typename T>
0564 Vc_INTRINSIC Vc_CONST Return
0565 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0566 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0567 enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0568 #endif
0569
0570
0571 template <typename Return, typename T>
0572 Vc_INTRINSIC Vc_CONST Return
0573 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0574 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0575 Scalar::Vector<T> x6,
0576 enable_if<std::is_same<Return, AVX2::float_v>::value> = nullarg);
0577 #ifdef Vc_IMPL_AVX2
0578 template <typename Return, typename T>
0579 Vc_INTRINSIC Vc_CONST Return
0580 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0581 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0582 Scalar::Vector<T> x6,
0583 enable_if<std::is_same<Return, AVX2::int_v>::value> = nullarg);
0584 template <typename Return, typename T>
0585 Vc_INTRINSIC Vc_CONST Return
0586 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0587 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0588 Scalar::Vector<T> x6,
0589 enable_if<std::is_same<Return, AVX2::uint_v>::value> = nullarg);
0590 template <typename Return, typename T>
0591 Vc_INTRINSIC Vc_CONST Return
0592 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0593 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0594 Scalar::Vector<T> x6,
0595 enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0596 template <typename Return, typename T>
0597 Vc_INTRINSIC Vc_CONST Return
0598 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0599 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0600 Scalar::Vector<T> x6,
0601 enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0602 #endif
0603
0604
0605 template <typename Return, typename T>
0606 Vc_INTRINSIC Vc_CONST Return
0607 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0608 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0609 Scalar::Vector<T> x6, Scalar::Vector<T> x7,
0610 enable_if<std::is_same<Return, AVX2::float_v>::value> = nullarg);
0611 #ifdef Vc_IMPL_AVX2
0612 template <typename Return, typename T>
0613 Vc_INTRINSIC Vc_CONST Return
0614 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0615 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0616 Scalar::Vector<T> x6, Scalar::Vector<T> x7,
0617 enable_if<std::is_same<Return, AVX2::int_v>::value> = nullarg);
0618 template <typename Return, typename T>
0619 Vc_INTRINSIC Vc_CONST Return
0620 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0621 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0622 Scalar::Vector<T> x6, Scalar::Vector<T> x7,
0623 enable_if<std::is_same<Return, AVX2::uint_v>::value> = nullarg);
0624 template <typename Return, typename T>
0625 Vc_INTRINSIC Vc_CONST Return
0626 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0627 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0628 Scalar::Vector<T> x6, Scalar::Vector<T> x7,
0629 enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0630 template <typename Return, typename T>
0631 Vc_INTRINSIC Vc_CONST Return
0632 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0633 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0634 Scalar::Vector<T> x6, Scalar::Vector<T> x7,
0635 enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0636 #endif
0637
0638
0639 #ifdef Vc_IMPL_AVX2
0640 template <typename Return, typename T>
0641 Vc_INTRINSIC Vc_CONST Return
0642 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0643 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0644 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0645 enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0646 template <typename Return, typename T>
0647 Vc_INTRINSIC Vc_CONST Return
0648 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0649 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0650 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0651 enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0652 #endif
0653
0654
0655 #ifdef Vc_IMPL_AVX2
0656 template <typename Return, typename T>
0657 Vc_INTRINSIC Vc_CONST Return
0658 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0659 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0660 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0661 Scalar::Vector<T> x9,
0662 enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0663 template <typename Return, typename T>
0664 Vc_INTRINSIC Vc_CONST Return
0665 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0666 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0667 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0668 Scalar::Vector<T> x9,
0669 enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0670 #endif
0671
0672
0673 #ifdef Vc_IMPL_AVX2
0674 template <typename Return, typename T>
0675 Vc_INTRINSIC Vc_CONST Return
0676 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0677 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0678 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0679 Scalar::Vector<T> x9, Scalar::Vector<T> x10,
0680 enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0681 template <typename Return, typename T>
0682 Vc_INTRINSIC Vc_CONST Return
0683 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0684 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0685 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0686 Scalar::Vector<T> x9, Scalar::Vector<T> x10,
0687 enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0688 #endif
0689
0690
0691 #ifdef Vc_IMPL_AVX2
0692 template <typename Return, typename T>
0693 Vc_INTRINSIC Vc_CONST Return
0694 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0695 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0696 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0697 Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
0698 enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0699 template <typename Return, typename T>
0700 Vc_INTRINSIC Vc_CONST Return
0701 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0702 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0703 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0704 Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
0705 enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0706 #endif
0707
0708
0709 #ifdef Vc_IMPL_AVX2
0710 template <typename Return, typename T>
0711 Vc_INTRINSIC Vc_CONST Return
0712 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0713 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0714 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0715 Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
0716 Scalar::Vector<T> x12,
0717 enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0718 template <typename Return, typename T>
0719 Vc_INTRINSIC Vc_CONST Return
0720 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0721 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0722 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0723 Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
0724 Scalar::Vector<T> x12,
0725 enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0726 #endif
0727
0728
0729 #ifdef Vc_IMPL_AVX2
0730 template <typename Return, typename T>
0731 Vc_INTRINSIC Vc_CONST Return
0732 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0733 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0734 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0735 Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
0736 Scalar::Vector<T> x12, Scalar::Vector<T> x13,
0737 enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0738 template <typename Return, typename T>
0739 Vc_INTRINSIC Vc_CONST Return
0740 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0741 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0742 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0743 Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
0744 Scalar::Vector<T> x12, Scalar::Vector<T> x13,
0745 enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0746 #endif
0747
0748
0749 #ifdef Vc_IMPL_AVX2
0750 template <typename Return, typename T>
0751 Vc_INTRINSIC Vc_CONST Return
0752 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0753 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0754 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0755 Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
0756 Scalar::Vector<T> x12, Scalar::Vector<T> x13, Scalar::Vector<T> x14,
0757 enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0758 template <typename Return, typename T>
0759 Vc_INTRINSIC Vc_CONST Return
0760 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0761 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0762 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0763 Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
0764 Scalar::Vector<T> x12, Scalar::Vector<T> x13, Scalar::Vector<T> x14,
0765 enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0766 #endif
0767
0768
0769 #ifdef Vc_IMPL_AVX2
0770 template <typename Return, typename T>
0771 Vc_INTRINSIC Vc_CONST Return
0772 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0773 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0774 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0775 Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
0776 Scalar::Vector<T> x12, Scalar::Vector<T> x13, Scalar::Vector<T> x14,
0777 Scalar::Vector<T> x15,
0778 enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0779 template <typename Return, typename T>
0780 Vc_INTRINSIC Vc_CONST Return
0781 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0782 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0783 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0784 Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
0785 Scalar::Vector<T> x12, Scalar::Vector<T> x13, Scalar::Vector<T> x14,
0786 Scalar::Vector<T> x15,
0787 enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0788 #endif
0789
0790
0791 template <typename To, typename FromT>
0792 Vc_INTRINSIC Vc_CONST To simd_cast(AVX2::Vector<FromT> x,
0793 enable_if<Scalar::is_vector<To>::value> = nullarg);
0794
0795
0796
0797 template <typename Return, typename T>
0798 Vc_INTRINSIC Vc_CONST Return
0799 simd_cast(const AVX2::Mask<T> &k, enable_if<AVX2::is_mask<Return>::value> = nullarg);
0800
0801
0802 Vc_SIMD_CAST_AVX_2(double_m, float_m);
0803 #ifdef Vc_IMPL_AVX2
0804 Vc_SIMD_CAST_AVX_2(double_m, int_m);
0805 Vc_SIMD_CAST_AVX_2(double_m, uint_m);
0806 Vc_SIMD_CAST_AVX_2(double_m, short_m);
0807 Vc_SIMD_CAST_AVX_2(double_m, ushort_m);
0808
0809 Vc_SIMD_CAST_AVX_2( float_m, short_m);
0810 Vc_SIMD_CAST_AVX_2( float_m, ushort_m);
0811
0812 Vc_SIMD_CAST_AVX_2( int_m, short_m);
0813 Vc_SIMD_CAST_AVX_2( int_m, ushort_m);
0814
0815 Vc_SIMD_CAST_AVX_2( uint_m, short_m);
0816 Vc_SIMD_CAST_AVX_2( uint_m, ushort_m);
0817 #endif
0818
0819
0820 #ifdef Vc_IMPL_AVX2
0821 Vc_SIMD_CAST_AVX_4(double_m, short_m);
0822 Vc_SIMD_CAST_AVX_4(double_m, ushort_m);
0823 #endif
0824
0825
0826 Vc_SIMD_CAST_1(SSE::double_m, AVX2::double_m);
0827 Vc_SIMD_CAST_1(SSE::double_m, AVX2:: float_m);
0828 #ifdef Vc_IMPL_AVX2
0829 Vc_SIMD_CAST_1(SSE::double_m, AVX2:: int_m);
0830 Vc_SIMD_CAST_1(SSE::double_m, AVX2:: uint_m);
0831 Vc_SIMD_CAST_1(SSE::double_m, AVX2:: short_m);
0832 Vc_SIMD_CAST_1(SSE::double_m, AVX2::ushort_m);
0833 #endif
0834
0835 Vc_SIMD_CAST_1(SSE:: float_m, AVX2::double_m);
0836 Vc_SIMD_CAST_1(SSE:: int_m, AVX2::double_m);
0837 Vc_SIMD_CAST_1(SSE:: uint_m, AVX2::double_m);
0838 Vc_SIMD_CAST_1(SSE:: short_m, AVX2::double_m);
0839 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2::double_m);
0840
0841 Vc_SIMD_CAST_1(SSE:: float_m, AVX2:: float_m);
0842 Vc_SIMD_CAST_1(SSE:: int_m, AVX2:: float_m);
0843 Vc_SIMD_CAST_1(SSE:: uint_m, AVX2:: float_m);
0844 Vc_SIMD_CAST_1(SSE:: short_m, AVX2:: float_m);
0845 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2:: float_m);
0846 #ifdef Vc_IMPL_AVX2
0847 Vc_SIMD_CAST_1(SSE:: float_m, AVX2:: int_m);
0848 Vc_SIMD_CAST_1(SSE:: float_m, AVX2:: uint_m);
0849 Vc_SIMD_CAST_1(SSE:: int_m, AVX2:: int_m);
0850 Vc_SIMD_CAST_1(SSE:: int_m, AVX2:: uint_m);
0851 Vc_SIMD_CAST_1(SSE:: uint_m, AVX2:: int_m);
0852 Vc_SIMD_CAST_1(SSE:: uint_m, AVX2:: uint_m);
0853
0854 Vc_SIMD_CAST_1(SSE:: float_m, AVX2:: short_m);
0855 Vc_SIMD_CAST_1(SSE:: int_m, AVX2:: short_m);
0856 Vc_SIMD_CAST_1(SSE:: uint_m, AVX2:: short_m);
0857 Vc_SIMD_CAST_1(SSE:: short_m, AVX2:: short_m);
0858 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2:: short_m);
0859 Vc_SIMD_CAST_1(SSE:: float_m, AVX2::ushort_m);
0860 Vc_SIMD_CAST_1(SSE:: int_m, AVX2::ushort_m);
0861 Vc_SIMD_CAST_1(SSE:: uint_m, AVX2::ushort_m);
0862 Vc_SIMD_CAST_1(SSE:: short_m, AVX2::ushort_m);
0863 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2::ushort_m);
0864
0865 Vc_SIMD_CAST_1(SSE:: short_m, AVX2:: int_m);
0866 Vc_SIMD_CAST_1(SSE:: short_m, AVX2:: uint_m);
0867
0868 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2:: int_m);
0869 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2:: uint_m);
0870 #endif
0871
0872
0873 Vc_SIMD_CAST_2(SSE::double_m, AVX2::double_m);
0874 Vc_SIMD_CAST_2(SSE::double_m, AVX2:: float_m);
0875 Vc_SIMD_CAST_2(SSE:: float_m, AVX2:: float_m);
0876 Vc_SIMD_CAST_2(SSE:: int_m, AVX2:: float_m);
0877 Vc_SIMD_CAST_2(SSE:: uint_m, AVX2:: float_m);
0878 #ifdef Vc_IMPL_AVX2
0879 Vc_SIMD_CAST_2(SSE::double_m, AVX2:: int_m);
0880 Vc_SIMD_CAST_2(SSE::double_m, AVX2:: uint_m);
0881 Vc_SIMD_CAST_2(SSE::double_m, AVX2:: short_m);
0882 Vc_SIMD_CAST_2(SSE::double_m, AVX2::ushort_m);
0883
0884 Vc_SIMD_CAST_2(SSE:: float_m, AVX2:: int_m);
0885 Vc_SIMD_CAST_2(SSE:: float_m, AVX2:: uint_m);
0886 Vc_SIMD_CAST_2(SSE:: float_m, AVX2:: short_m);
0887 Vc_SIMD_CAST_2(SSE:: float_m, AVX2::ushort_m);
0888
0889 Vc_SIMD_CAST_2(SSE:: int_m, AVX2:: int_m);
0890 Vc_SIMD_CAST_2(SSE:: int_m, AVX2:: uint_m);
0891 Vc_SIMD_CAST_2(SSE:: int_m, AVX2:: short_m);
0892 Vc_SIMD_CAST_2(SSE:: int_m, AVX2::ushort_m);
0893
0894 Vc_SIMD_CAST_2(SSE:: uint_m, AVX2:: int_m);
0895 Vc_SIMD_CAST_2(SSE:: uint_m, AVX2:: uint_m);
0896 Vc_SIMD_CAST_2(SSE:: uint_m, AVX2:: short_m);
0897 Vc_SIMD_CAST_2(SSE:: uint_m, AVX2::ushort_m);
0898
0899 Vc_SIMD_CAST_2(SSE:: short_m, AVX2:: short_m);
0900 Vc_SIMD_CAST_2(SSE:: short_m, AVX2::ushort_m);
0901 Vc_SIMD_CAST_2(SSE::ushort_m, AVX2:: short_m);
0902 Vc_SIMD_CAST_2(SSE::ushort_m, AVX2::ushort_m);
0903 #endif
0904
0905
0906 Vc_SIMD_CAST_4(SSE::double_m, AVX2:: float_m);
0907 #ifdef Vc_IMPL_AVX2
0908 Vc_SIMD_CAST_4(SSE::double_m, AVX2:: int_m);
0909 Vc_SIMD_CAST_4(SSE::double_m, AVX2:: uint_m);
0910 Vc_SIMD_CAST_4(SSE::double_m, AVX2:: short_m);
0911 Vc_SIMD_CAST_4(SSE::double_m, AVX2::ushort_m);
0912 Vc_SIMD_CAST_4(SSE:: float_m, AVX2:: short_m);
0913 Vc_SIMD_CAST_4(SSE:: float_m, AVX2::ushort_m);
0914 Vc_SIMD_CAST_4(SSE:: int_m, AVX2:: short_m);
0915 Vc_SIMD_CAST_4(SSE:: int_m, AVX2::ushort_m);
0916 Vc_SIMD_CAST_4(SSE:: uint_m, AVX2:: short_m);
0917 Vc_SIMD_CAST_4(SSE:: uint_m, AVX2::ushort_m);
0918 #endif
0919
0920
0921 template <typename Return, typename T>
0922 Vc_INTRINSIC Vc_CONST Return
0923 simd_cast(Scalar::Mask<T> k,
0924 enable_if<AVX2::is_mask<Return>::value> = nullarg);
0925
0926
0927 template <typename Return, typename T>
0928 Vc_INTRINSIC Vc_CONST Return
0929 simd_cast(Scalar::Mask<T> k0, Scalar::Mask<T> k1,
0930 enable_if<AVX2::is_mask<Return>::value> = nullarg);
0931
0932
0933 template <typename Return, typename T>
0934 Vc_INTRINSIC Vc_CONST Return simd_cast(
0935 Scalar::Mask<T> k0, Scalar::Mask<T> k1, Scalar::Mask<T> k2, Scalar::Mask<T> k3,
0936 enable_if<(AVX2::is_mask<Return>::value && Return::Size >= 4)> = nullarg);
0937
0938
0939 template <typename Return, typename T>
0940 Vc_INTRINSIC Vc_CONST Return simd_cast(
0941 Scalar::Mask<T> k0, Scalar::Mask<T> k1, Scalar::Mask<T> k2, Scalar::Mask<T> k3,
0942 Scalar::Mask<T> k4, Scalar::Mask<T> k5, Scalar::Mask<T> k6, Scalar::Mask<T> k7,
0943 enable_if<(AVX2::is_mask<Return>::value && Return::Size >= 8)> = nullarg);
0944
0945
0946 template <typename Return, typename T>
0947 Vc_INTRINSIC Vc_CONST Return
0948 simd_cast(Scalar::Mask<T> k0, Scalar::Mask<T> k1, Scalar::Mask<T> k2, Scalar::Mask<T> k3,
0949 Scalar::Mask<T> k4, Scalar::Mask<T> k5, Scalar::Mask<T> k6, Scalar::Mask<T> k7,
0950 Scalar::Mask<T> k8, Scalar::Mask<T> k9, Scalar::Mask<T> k10,
0951 Scalar::Mask<T> k11, Scalar::Mask<T> k12, Scalar::Mask<T> k13,
0952 Scalar::Mask<T> k14, Scalar::Mask<T> k15,
0953 enable_if<(AVX2::is_mask<Return>::value && Return::Size >= 16)> = nullarg);
0954
0955
0956 Vc_SIMD_CAST_1(AVX2::double_m, SSE::double_m);
0957 Vc_SIMD_CAST_1(AVX2::double_m, SSE:: float_m);
0958 Vc_SIMD_CAST_1(AVX2::double_m, SSE:: int_m);
0959 Vc_SIMD_CAST_1(AVX2::double_m, SSE:: uint_m);
0960 Vc_SIMD_CAST_1(AVX2::double_m, SSE:: short_m);
0961 Vc_SIMD_CAST_1(AVX2::double_m, SSE::ushort_m);
0962
0963 Vc_SIMD_CAST_1(AVX2:: float_m, SSE::double_m);
0964 Vc_SIMD_CAST_1(AVX2:: float_m, SSE:: float_m);
0965 Vc_SIMD_CAST_1(AVX2:: float_m, SSE:: int_m);
0966 Vc_SIMD_CAST_1(AVX2:: float_m, SSE:: uint_m);
0967 Vc_SIMD_CAST_1(AVX2:: float_m, SSE:: short_m);
0968 Vc_SIMD_CAST_1(AVX2:: float_m, SSE::ushort_m);
0969
0970 #ifdef Vc_IMPL_AVX2
0971 Vc_SIMD_CAST_1(AVX2:: int_m, SSE::double_m);
0972 Vc_SIMD_CAST_1(AVX2:: int_m, SSE:: float_m);
0973 Vc_SIMD_CAST_1(AVX2:: int_m, SSE:: int_m);
0974 Vc_SIMD_CAST_1(AVX2:: int_m, SSE:: uint_m);
0975 Vc_SIMD_CAST_1(AVX2:: int_m, SSE:: short_m);
0976 Vc_SIMD_CAST_1(AVX2:: int_m, SSE::ushort_m);
0977
0978 Vc_SIMD_CAST_1(AVX2:: uint_m, SSE::double_m);
0979 Vc_SIMD_CAST_1(AVX2:: uint_m, SSE:: float_m);
0980 Vc_SIMD_CAST_1(AVX2:: uint_m, SSE:: int_m);
0981 Vc_SIMD_CAST_1(AVX2:: uint_m, SSE:: uint_m);
0982 Vc_SIMD_CAST_1(AVX2:: uint_m, SSE:: short_m);
0983 Vc_SIMD_CAST_1(AVX2:: uint_m, SSE::ushort_m);
0984
0985 Vc_SIMD_CAST_1(AVX2:: short_m, SSE::double_m);
0986 Vc_SIMD_CAST_1(AVX2:: short_m, SSE:: float_m);
0987 Vc_SIMD_CAST_1(AVX2:: short_m, SSE:: int_m);
0988 Vc_SIMD_CAST_1(AVX2:: short_m, SSE:: uint_m);
0989 Vc_SIMD_CAST_1(AVX2:: short_m, SSE:: short_m);
0990 Vc_SIMD_CAST_1(AVX2:: short_m, SSE::ushort_m);
0991
0992 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE::double_m);
0993 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE:: float_m);
0994 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE:: int_m);
0995 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE:: uint_m);
0996 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE:: short_m);
0997 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE::ushort_m);
0998 #endif
0999
1000
1001 Vc_SIMD_CAST_2(AVX2::double_m, SSE:: short_m);
1002 Vc_SIMD_CAST_2(AVX2::double_m, SSE::ushort_m);
1003
1004
1005 template <typename To, typename FromT>
1006 Vc_INTRINSIC Vc_CONST To simd_cast(AVX2::Mask<FromT> x,
1007 enable_if<Scalar::is_mask<To>::value> = nullarg);
1008
1009
1010 template <typename Return, int offset, typename From>
1011 Vc_INTRINSIC Vc_CONST enable_if<
1012 (offset == 0 &&
1013 ((AVX2::is_vector<From>::value && !Scalar::is_vector<Return>::value &&
1014 Traits::is_simd_vector<Return>::value && !Traits::isSimdArray<Return>::value) ||
1015 (AVX2::is_mask<From>::value && !Scalar::is_mask<Return>::value &&
1016 Traits::is_simd_mask<Return>::value &&
1017 !Traits::isSimdMaskArray<Return>::value))),
1018 Return>
1019 simd_cast(const From &x);
1020
1021 template <typename Return, int offset, typename From>
1022 Vc_INTRINSIC Vc_CONST Return simd_cast(
1023 const From &x,
1024 enable_if<offset == 0 && ((SSE::is_vector<From>::value &&
1025 AVX2::is_vector<Return>::value) ||
1026 (SSE::is_mask<From>::value &&
1027 AVX2::is_mask<Return>::value))> = nullarg);
1028
1029
1030
1031 template <typename Return, int offset, typename T>
1032 Vc_INTRINSIC Vc_CONST enable_if<(AVX2::is_vector<Return>::value && offset != 0),
1033 Return>
1034 simd_cast(AVX2::Vector<T> x);
1035
1036 template <typename Return, int offset, typename T>
1037 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_vector<Return>::value &&
1038 sizeof(AVX2::Vector<T>) == 32),
1039 Return>
1040 simd_cast(AVX2::Vector<T> x);
1041 template <typename Return, int offset, typename T>
1042 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_vector<Return>::value &&
1043 sizeof(AVX2::Vector<T>) == 16),
1044 Return>
1045 simd_cast(AVX2::Vector<T> x);
1046
1047 Vc_SIMD_CAST_OFFSET(SSE:: short_v, AVX2::double_v, 1);
1048 Vc_SIMD_CAST_OFFSET(SSE::ushort_v, AVX2::double_v, 1);
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077 Vc_SIMD_CAST_OFFSET(SSE:: short_m, AVX2::double_m, 1);
1078 Vc_SIMD_CAST_OFFSET(SSE::ushort_m, AVX2::double_m, 1);
1079
1080
1081 template <typename Return, int offset, typename T>
1082 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_mask<Return>::value &&
1083 sizeof(AVX2::Mask<T>) == 32),
1084 Return>
1085 simd_cast(AVX2::Mask<T> x);
1086 template <typename Return, int offset, typename T>
1087 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_mask<Return>::value &&
1088 sizeof(AVX2::Mask<T>) == 16),
1089 Return>
1090 simd_cast(AVX2::Mask<T> x);
1091
1092
1093 #undef Vc_SIMD_CAST_AVX_1
1094 #define Vc_SIMD_CAST_AVX_1(from_, to_) \
1095 template <typename To> \
1096 Vc_INTRINSIC Vc_CONST To simd_cast(AVX2::from_ x, \
1097 enable_if<std::is_same<To, AVX2::to_>::value>)
1098
1099 #undef Vc_SIMD_CAST_AVX_2
1100 #define Vc_SIMD_CAST_AVX_2(from_, to_) \
1101 static_assert(AVX2::from_::size() * 2 <= AVX2::to_::size(), \
1102 "this type combination is wrong"); \
1103 template <typename To> \
1104 Vc_INTRINSIC Vc_CONST To simd_cast(AVX2::from_ x0, AVX2::from_ x1, \
1105 enable_if<std::is_same<To, AVX2::to_>::value>)
1106
1107 #undef Vc_SIMD_CAST_AVX_3
1108 #define Vc_SIMD_CAST_AVX_3(from_, to_) \
1109 template <typename To> \
1110 Vc_INTRINSIC Vc_CONST To simd_cast(AVX2::from_ x0, AVX2::from_ x1, AVX2::from_ x2, \
1111 enable_if<std::is_same<To, AVX2::to_>::value>)
1112
1113 #undef Vc_SIMD_CAST_AVX_4
1114 #define Vc_SIMD_CAST_AVX_4(from_, to_) \
1115 template <typename To> \
1116 Vc_INTRINSIC Vc_CONST To simd_cast(AVX2::from_ x0, AVX2::from_ x1, AVX2::from_ x2, \
1117 AVX2::from_ x3, \
1118 enable_if<std::is_same<To, AVX2::to_>::value>)
1119
1120 #undef Vc_SIMD_CAST_1
1121 #define Vc_SIMD_CAST_1(from_, to_) \
1122 template <typename To> \
1123 Vc_INTRINSIC Vc_CONST To simd_cast(from_ x, enable_if<std::is_same<To, to_>::value>)
1124
1125 #undef Vc_SIMD_CAST_2
1126 #define Vc_SIMD_CAST_2(from_, to_) \
1127 template <typename To> \
1128 Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, \
1129 enable_if<std::is_same<To, to_>::value>)
1130
1131 #undef Vc_SIMD_CAST_3
1132 #define Vc_SIMD_CAST_3(from_, to_) \
1133 template <typename To> \
1134 Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, \
1135 enable_if<std::is_same<To, to_>::value>)
1136
1137 #undef Vc_SIMD_CAST_4
1138 #define Vc_SIMD_CAST_4(from_, to_) \
1139 template <typename To> \
1140 Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, from_ x3, \
1141 enable_if<std::is_same<To, to_>::value>)
1142
1143 #undef Vc_SIMD_CAST_5
1144 #define Vc_SIMD_CAST_5(from_, to_) \
1145 template <typename To> \
1146 Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, \
1147 enable_if<std::is_same<To, to_>::value>)
1148
1149 #undef Vc_SIMD_CAST_6
1150 #define Vc_SIMD_CAST_6(from_, to_) \
1151 template <typename To> \
1152 Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, \
1153 from_ x5, \
1154 enable_if<std::is_same<To, to_>::value>)
1155
1156 #undef Vc_SIMD_CAST_7
1157 #define Vc_SIMD_CAST_7(from_, to_) \
1158 template <typename To> \
1159 Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, \
1160 from_ x5, from_ x6, \
1161 enable_if<std::is_same<To, to_>::value>)
1162
1163 #undef Vc_SIMD_CAST_8
1164 #define Vc_SIMD_CAST_8(from_, to_) \
1165 template <typename To> \
1166 Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, \
1167 from_ x5, from_ x6, from_ x7, \
1168 enable_if<std::is_same<To, to_>::value>)
1169
1170 #undef Vc_SIMD_CAST_OFFSET
1171 #define Vc_SIMD_CAST_OFFSET(from_, to_, offset_) \
1172 static_assert(from_::size() >= to_::size() * (offset_ + 1), \
1173 "this offset cannot exist for this type combination"); \
1174 template <typename To, int offset> \
1175 Vc_INTRINSIC Vc_CONST To simd_cast( \
1176 from_ x, enable_if<(offset == offset_ && std::is_same<To, to_>::value)>)
1177
1178
1179
1180 template <typename To, typename From>
1181 Vc_INTRINSIC Vc_CONST To
1182 simd_cast(From x, enable_if<(AVX2::is_vector<To>::value && SSE::is_vector<From>::value &&
1183 SSE::Vector<typename To::EntryType>::Size == To::Size)>)
1184 {
1185 return simd_cast<SSE::Vector<typename To::EntryType>>(x).data();
1186 }
1187 template <typename To, typename From>
1188 Vc_INTRINSIC Vc_CONST To
1189 simd_cast(From x0, From x1,
1190 enable_if<(AVX2::is_vector<To>::value && SSE::is_vector<From>::value &&
1191 SSE::Vector<typename To::EntryType>::Size == To::Size)>)
1192 {
1193 return simd_cast<SSE::Vector<typename To::EntryType>>(x0, x1).data();
1194 }
1195 template <typename To, typename From>
1196 Vc_INTRINSIC Vc_CONST To
1197 simd_cast(From x0, From x1, From x2,
1198 enable_if<(AVX2::is_vector<To>::value && SSE::is_vector<From>::value &&
1199 SSE::Vector<typename To::EntryType>::Size == To::Size)>)
1200 {
1201 return simd_cast<SSE::Vector<typename To::EntryType>>(x0, x1, x2).data();
1202 }
1203 template <typename To, typename From>
1204 Vc_INTRINSIC Vc_CONST To
1205 simd_cast(From x0, From x1, From x2, From x3,
1206 enable_if<(AVX2::is_vector<To>::value && SSE::is_vector<From>::value &&
1207 SSE::Vector<typename To::EntryType>::Size == To::Size)>)
1208 {
1209 return simd_cast<SSE::Vector<typename To::EntryType>>(x0, x1, x2, x3).data();
1210 }
1211 template <typename To, typename From>
1212 Vc_INTRINSIC Vc_CONST To
1213 simd_cast(From x0, From x1, From x2, From x3, From x4, From x5, From x6, From x7,
1214 enable_if<(AVX2::is_vector<To>::value && SSE::is_vector<From>::value &&
1215 SSE::Vector<typename To::EntryType>::Size == To::Size)>)
1216 {
1217 return simd_cast<SSE::Vector<typename To::EntryType>>(x0, x1, x2, x3, x4, x5, x6, x7)
1218 .data();
1219 }
1220
1221
1222
1223
1224 Vc_SIMD_CAST_AVX_1( float_v, double_v) { return _mm256_cvtps_pd(AVX::lo128(x.data())); }
1225 #ifdef Vc_IMPL_AVX2
1226 Vc_SIMD_CAST_AVX_1( int_v, double_v) { return AVX::convert< int, double>(AVX::lo128(x.data())); }
1227 Vc_SIMD_CAST_AVX_1( uint_v, double_v) { return AVX::convert< uint, double>(AVX::lo128(x.data())); }
1228 Vc_SIMD_CAST_AVX_1( short_v, double_v) { return AVX::convert< short, double>(AVX::lo128(x.data())); }
1229 Vc_SIMD_CAST_AVX_1(ushort_v, double_v) { return AVX::convert<ushort, double>(AVX::lo128(x.data())); }
1230 #endif
1231
1232
1233 Vc_SIMD_CAST_AVX_1(double_v, float_v) { return AVX::zeroExtend(_mm256_cvtpd_ps(x.data())); }
1234 #ifdef Vc_IMPL_AVX2
1235 Vc_SIMD_CAST_AVX_1( int_v, float_v) { return AVX::convert< int, float>(x.data()); }
1236 Vc_SIMD_CAST_AVX_1( uint_v, float_v) { return AVX::convert< uint, float>(x.data()); }
1237 Vc_SIMD_CAST_AVX_1( short_v, float_v) { return AVX::convert< short, float>(AVX::lo128(x.data())); }
1238 Vc_SIMD_CAST_AVX_1(ushort_v, float_v) { return AVX::convert<ushort, float>(AVX::lo128(x.data())); }
1239 #endif
1240
1241
1242 Vc_SIMD_CAST_AVX_2(double_v, float_v) { return AVX::concat(_mm256_cvtpd_ps(x0.data()), _mm256_cvtpd_ps(x1.data())); }
1243
1244
1245 #ifdef Vc_IMPL_AVX2
1246 Vc_SIMD_CAST_AVX_1(double_v, int_v) { return AVX::zeroExtend(_mm256_cvttpd_epi32(x.data())); }
1247 Vc_SIMD_CAST_AVX_1( float_v, int_v) { return _mm256_cvttps_epi32(x.data()); }
1248 Vc_SIMD_CAST_AVX_1( uint_v, int_v) { return x.data(); }
1249 Vc_SIMD_CAST_AVX_1( short_v, int_v) { return _mm256_cvtepi16_epi32(AVX::lo128(x.data())); }
1250 Vc_SIMD_CAST_AVX_1(ushort_v, int_v) { return _mm256_cvtepu16_epi32(AVX::lo128(x.data())); }
1251 #endif
1252
1253
1254 #ifdef Vc_IMPL_AVX2
1255 Vc_SIMD_CAST_AVX_2(double_v, int_v) { return AVX::concat(_mm256_cvttpd_epi32(x0.data()), _mm256_cvttpd_epi32(x1.data())); }
1256 #endif
1257
1258
1259 #ifdef Vc_IMPL_AVX2
1260 Vc_SIMD_CAST_AVX_1(double_v, uint_v) { return AVX::zeroExtend(AVX::convert<double, uint>(x.data())); }
1261 Vc_SIMD_CAST_AVX_1( float_v, uint_v) {
1262 return _mm256_blendv_epi8(
1263 _mm256_cvttps_epi32(x.data()),
1264 _mm256_add_epi32(
1265 _mm256_cvttps_epi32(_mm256_sub_ps(x.data(), AVX::set2power31_ps())),
1266 AVX::set2power31_epu32()),
1267 _mm256_castps_si256(AVX::cmpge_ps(x.data(), AVX::set2power31_ps())));
1268 }
1269 Vc_SIMD_CAST_AVX_1( int_v, uint_v) { return x.data(); }
1270 Vc_SIMD_CAST_AVX_1( short_v, uint_v) { return _mm256_cvtepi16_epi32(AVX::lo128(x.data())); }
1271 Vc_SIMD_CAST_AVX_1(ushort_v, uint_v) { return _mm256_cvtepu16_epi32(AVX::lo128(x.data())); }
1272 #endif
1273
1274
1275 #ifdef Vc_IMPL_AVX2
1276 Vc_SIMD_CAST_AVX_2(double_v, uint_v) { return AVX::concat(AVX::convert<double, uint>(x0.data()), AVX::convert<double, uint>(x1.data())); }
1277 #endif
1278
1279
1280 #ifdef Vc_IMPL_AVX2
1281 Vc_SIMD_CAST_AVX_1(double_v, short_v) { return AVX::zeroExtend(_mm_packs_epi32(_mm256_cvttpd_epi32(x.data()), _mm_setzero_si128())); }
1282 Vc_SIMD_CAST_AVX_1( float_v, short_v) {
1283 const auto tmp = _mm256_cvttps_epi32(x.data());
1284 return AVX::zeroExtend(_mm_packs_epi32(AVX::lo128(tmp), AVX::hi128(tmp)));
1285 }
1286 Vc_SIMD_CAST_AVX_1( int_v, short_v) { return AVX::zeroExtend(AVX::convert< int, short>(x.data())); }
1287 Vc_SIMD_CAST_AVX_1( uint_v, short_v) { return AVX::zeroExtend(AVX::convert<uint, short>(x.data())); }
1288 Vc_SIMD_CAST_AVX_1(ushort_v, short_v) { return x.data(); }
1289 #endif
1290
1291
1292 #ifdef Vc_IMPL_AVX2
1293 Vc_SIMD_CAST_AVX_2(double_v, short_v) {
1294 const auto tmp0 = _mm256_cvttpd_epi32(x0.data());
1295 const auto tmp1 = _mm256_cvttpd_epi32(x1.data());
1296 return AVX::zeroExtend(_mm_packs_epi32(tmp0, tmp1));
1297 }
1298 Vc_SIMD_CAST_AVX_2( float_v, short_v) {
1299 using AVX2::short_v;
1300 using AVX2::int_v;
1301 return simd_cast<short_v>(simd_cast<int_v>(x0), simd_cast<int_v>(x1));
1302 }
1303 Vc_SIMD_CAST_AVX_2( int_v, short_v) {
1304 const auto shuf = _mm256_setr_epi8(
1305 0, 1, 4, 5, 8, 9, 12, 13, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80,
1306 0, 1, 4, 5, 8, 9, 12, 13, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80);
1307 auto a = _mm256_shuffle_epi8(x0.data(), shuf);
1308 auto b = _mm256_shuffle_epi8(x1.data(), shuf);
1309 return Mem::permute4x64<X0, X2, X1, X3>(_mm256_unpacklo_epi64(a, b));
1310 }
1311 Vc_SIMD_CAST_AVX_2( uint_v, short_v) {
1312 const auto shuf = _mm256_setr_epi8(
1313 0, 1, 4, 5, 8, 9, 12, 13, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80,
1314 0, 1, 4, 5, 8, 9, 12, 13, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80);
1315 auto a = _mm256_shuffle_epi8(x0.data(), shuf);
1316 auto b = _mm256_shuffle_epi8(x1.data(), shuf);
1317 return Mem::permute4x64<X0, X2, X1, X3>(_mm256_unpacklo_epi64(a, b));
1318 }
1319 #endif
1320
1321
1322 #ifdef Vc_IMPL_AVX2
1323 Vc_SIMD_CAST_AVX_3(double_v, short_v) {
1324 const auto tmp0 = _mm256_cvttpd_epi32(x0.data());
1325 const auto tmp1 = _mm256_cvttpd_epi32(x1.data());
1326 const auto tmp2 = _mm256_cvttpd_epi32(x2.data());
1327 return AVX::concat(_mm_packs_epi32(tmp0, tmp1), _mm_packs_epi32(tmp2, _mm_setzero_si128()));
1328 }
1329 #endif
1330
1331
1332 #ifdef Vc_IMPL_AVX2
1333 Vc_SIMD_CAST_AVX_4(double_v, short_v) {
1334 const auto tmp0 = _mm256_cvttpd_epi32(x0.data());
1335 const auto tmp1 = _mm256_cvttpd_epi32(x1.data());
1336 const auto tmp2 = _mm256_cvttpd_epi32(x2.data());
1337 const auto tmp3 = _mm256_cvttpd_epi32(x3.data());
1338 return AVX::concat(_mm_packs_epi32(tmp0, tmp1), _mm_packs_epi32(tmp2, tmp3));
1339 }
1340 #endif
1341
1342
1343 #ifdef Vc_IMPL_AVX2
1344 Vc_SIMD_CAST_AVX_1(double_v, ushort_v) {
1345 const auto tmp = _mm256_cvttpd_epi32(x.data());
1346 return AVX::zeroExtend(_mm_packus_epi32(tmp, _mm_setzero_si128()));
1347 }
1348 Vc_SIMD_CAST_AVX_1( float_v, ushort_v) {
1349 const auto tmp = _mm256_cvttps_epi32(x.data());
1350 return AVX::zeroExtend(_mm_packus_epi32(AVX::lo128(tmp), AVX::hi128(tmp)));
1351 }
1352 Vc_SIMD_CAST_AVX_1( int_v, ushort_v) { return AVX::zeroExtend(AVX::convert< int, ushort>(x.data())); }
1353 Vc_SIMD_CAST_AVX_1( uint_v, ushort_v) { return AVX::zeroExtend(AVX::convert<uint, ushort>(x.data())); }
1354 Vc_SIMD_CAST_AVX_1( short_v, ushort_v) { return x.data(); }
1355 #endif
1356
1357
1358 #ifdef Vc_IMPL_AVX2
1359 Vc_SIMD_CAST_AVX_2(double_v, ushort_v) {
1360 const auto tmp0 = _mm256_cvttpd_epi32(x0.data());
1361 const auto tmp1 = _mm256_cvttpd_epi32(x1.data());
1362 return AVX::zeroExtend(_mm_packus_epi32(tmp0, tmp1));
1363 }
1364 Vc_SIMD_CAST_AVX_2( float_v, ushort_v) {
1365 using AVX2::ushort_v;
1366 using AVX2::int_v;
1367 return simd_cast<ushort_v>(simd_cast<int_v>(x0), simd_cast<int_v>(x1));
1368 }
1369 Vc_SIMD_CAST_AVX_2( int_v, ushort_v) {
1370 auto tmp0 = _mm256_unpacklo_epi16(x0.data(), x1.data());
1371 auto tmp1 = _mm256_unpackhi_epi16(x0.data(), x1.data());
1372 auto tmp2 = _mm256_unpacklo_epi16(tmp0, tmp1);
1373 auto tmp3 = _mm256_unpackhi_epi16(tmp0, tmp1);
1374 return Mem::permute4x64<X0, X2, X1, X3>(_mm256_unpacklo_epi16(tmp2, tmp3));
1375 }
1376 Vc_SIMD_CAST_AVX_2( uint_v, ushort_v) {
1377 auto tmp0 = _mm256_unpacklo_epi16(x0.data(), x1.data());
1378 auto tmp1 = _mm256_unpackhi_epi16(x0.data(), x1.data());
1379 auto tmp2 = _mm256_unpacklo_epi16(tmp0, tmp1);
1380 auto tmp3 = _mm256_unpackhi_epi16(tmp0, tmp1);
1381 return Mem::permute4x64<X0, X2, X1, X3>(_mm256_unpacklo_epi16(tmp2, tmp3));
1382 }
1383 #endif
1384
1385
1386 #ifdef Vc_IMPL_AVX2
1387 Vc_SIMD_CAST_AVX_3(double_v, ushort_v) {
1388 const auto tmp0 = _mm256_cvttpd_epi32(x0.data());
1389 const auto tmp1 = _mm256_cvttpd_epi32(x1.data());
1390 const auto tmp2 = _mm256_cvttpd_epi32(x2.data());
1391 return AVX::concat(_mm_packus_epi32(tmp0, tmp1),
1392 _mm_packus_epi32(tmp2, _mm_setzero_si128()));
1393 }
1394 #endif
1395
1396
1397 #ifdef Vc_IMPL_AVX2
1398 Vc_SIMD_CAST_AVX_4(double_v, ushort_v) {
1399 const auto tmp0 = _mm256_cvttpd_epi32(x0.data());
1400 const auto tmp1 = _mm256_cvttpd_epi32(x1.data());
1401 const auto tmp2 = _mm256_cvttpd_epi32(x2.data());
1402 const auto tmp3 = _mm256_cvttpd_epi32(x3.data());
1403 return AVX::concat(_mm_packus_epi32(tmp0, tmp1), _mm_packus_epi32(tmp2, tmp3));
1404 }
1405 #endif
1406
1407
1408 Vc_SIMD_CAST_1(SSE::double_v, AVX2::double_v) { return AVX::zeroExtend(x.data()); }
1409 Vc_SIMD_CAST_1(SSE:: float_v, AVX2::double_v) { return _mm256_cvtps_pd(x.data()); }
1410 Vc_SIMD_CAST_1(SSE:: int_v, AVX2::double_v) { return _mm256_cvtepi32_pd(x.data()); }
1411 Vc_SIMD_CAST_1(SSE:: uint_v, AVX2::double_v) { using namespace AvxIntrinsics; return _mm256_add_pd(_mm256_cvtepi32_pd(_mm_sub_epi32(x.data(), _mm_setmin_epi32())), set1_pd(1u << 31)); }
1412 Vc_SIMD_CAST_1(SSE:: short_v, AVX2::double_v) { return simd_cast<AVX2::double_v>(simd_cast<SSE::int_v>(x)); }
1413 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2::double_v) { return simd_cast<AVX2::double_v>(simd_cast<SSE::int_v>(x)); }
1414
1415 Vc_SIMD_CAST_1(SSE::double_v, AVX2:: float_v) { return AVX::zeroExtend(simd_cast<SSE:: float_v>(x).data()); }
1416 Vc_SIMD_CAST_1(SSE:: float_v, AVX2:: float_v) { return AVX::zeroExtend(x.data()); }
1417 Vc_SIMD_CAST_1(SSE:: int_v, AVX2:: float_v) { return AVX::zeroExtend(_mm_cvtepi32_ps(x.data())); }
1418 Vc_SIMD_CAST_1(SSE:: uint_v, AVX2:: float_v) { return AVX::zeroExtend(simd_cast<SSE::float_v>(x).data()); }
1419 Vc_SIMD_CAST_1(SSE:: short_v, AVX2:: float_v) { return AVX::convert< short, float>(x.data()); }
1420 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2:: float_v) { return AVX::convert<ushort, float>(x.data()); }
1421
1422 #ifdef Vc_IMPL_AVX2
1423 Vc_SIMD_CAST_1(SSE::double_v, AVX2:: int_v) { return AVX::zeroExtend(simd_cast<SSE:: int_v>(x).data()); }
1424 Vc_SIMD_CAST_1(SSE::double_v, AVX2:: uint_v) { return AVX::zeroExtend(simd_cast<SSE:: uint_v>(x).data()); }
1425 Vc_SIMD_CAST_1(SSE::double_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast<SSE:: short_v>(x).data()); }
1426 Vc_SIMD_CAST_1(SSE::double_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast<SSE::ushort_v>(x).data()); }
1427
1428 Vc_SIMD_CAST_1(SSE:: float_v, AVX2:: int_v) { return AVX::zeroExtend(simd_cast<SSE::int_v>(x).data()); }
1429 Vc_SIMD_CAST_1(SSE:: float_v, AVX2:: uint_v) { return AVX::zeroExtend(simd_cast<SSE::uint_v>(x).data()); }
1430 Vc_SIMD_CAST_1(SSE:: float_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast<SSE::short_v>(x).data()); }
1431 Vc_SIMD_CAST_1(SSE:: float_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast<SSE::ushort_v>(x).data()); }
1432
1433 Vc_SIMD_CAST_1(SSE:: int_v, AVX2:: int_v) { return AVX::zeroExtend(x.data()); }
1434 Vc_SIMD_CAST_1(SSE:: uint_v, AVX2:: int_v) { return AVX::zeroExtend(x.data()); }
1435 Vc_SIMD_CAST_1(SSE:: short_v, AVX2:: int_v) { return AVX::convert< short, int>(x.data()); }
1436 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2:: int_v) { return AVX::convert<ushort, int>(x.data()); }
1437
1438 Vc_SIMD_CAST_1(SSE:: int_v, AVX2:: uint_v) { return AVX::zeroExtend(x.data()); }
1439 Vc_SIMD_CAST_1(SSE:: uint_v, AVX2:: uint_v) { return AVX::zeroExtend(x.data()); }
1440 Vc_SIMD_CAST_1(SSE:: short_v, AVX2:: uint_v) { return AVX::convert< short, uint>(x.data()); }
1441 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2:: uint_v) { return AVX::convert<ushort, uint>(x.data()); }
1442
1443 Vc_SIMD_CAST_1(SSE:: int_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast<SSE::short_v>(x).data()); }
1444 Vc_SIMD_CAST_1(SSE:: uint_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast<SSE::short_v>(x).data()); }
1445 Vc_SIMD_CAST_1(SSE:: short_v, AVX2:: short_v) { return AVX::zeroExtend(x.data()); }
1446 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2:: short_v) { return AVX::zeroExtend(x.data()); }
1447
1448 Vc_SIMD_CAST_1(SSE:: int_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast<SSE::ushort_v>(x).data()); }
1449 Vc_SIMD_CAST_1(SSE:: uint_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast<SSE::ushort_v>(x).data()); }
1450 Vc_SIMD_CAST_1(SSE:: short_v, AVX2::ushort_v) { return AVX::zeroExtend(x.data()); }
1451 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2::ushort_v) { return AVX::zeroExtend(x.data()); }
1452 #endif
1453
1454
1455 Vc_SIMD_CAST_2(SSE::double_v, AVX2::double_v) { return AVX::concat(x0.data(), x1.data()); }
1456
1457 Vc_SIMD_CAST_2(SSE::double_v, AVX2:: float_v) { return AVX::zeroExtend(simd_cast<SSE:: float_v>(x0, x1).data()); }
1458 Vc_SIMD_CAST_2(SSE:: float_v, AVX2:: float_v) { return AVX::concat(x0.data(), x1.data()); }
1459 Vc_SIMD_CAST_2(SSE:: int_v, AVX2:: float_v) { return AVX::convert< int, float>(AVX::concat(x0.data(), x1.data())); }
1460 Vc_SIMD_CAST_2(SSE:: uint_v, AVX2:: float_v) { return AVX::convert<uint, float>(AVX::concat(x0.data(), x1.data())); }
1461
1462 #ifdef Vc_IMPL_AVX2
1463 Vc_SIMD_CAST_2(SSE::double_v, AVX2:: int_v) { return AVX::zeroExtend(simd_cast<SSE:: int_v>(x0, x1).data()); }
1464 Vc_SIMD_CAST_2(SSE::double_v, AVX2:: uint_v) { return AVX::zeroExtend(simd_cast<SSE:: uint_v>(x0, x1).data()); }
1465 Vc_SIMD_CAST_2(SSE::double_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast<SSE:: short_v>(x0, x1).data()); }
1466 Vc_SIMD_CAST_2(SSE::double_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast<SSE::ushort_v>(x0, x1).data()); }
1467
1468 Vc_SIMD_CAST_2(SSE:: float_v, AVX2:: int_v) { return simd_cast<AVX2:: int_v>(simd_cast<AVX2::float_v>(x0, x1)); }
1469 Vc_SIMD_CAST_2(SSE:: float_v, AVX2:: uint_v) { return simd_cast<AVX2::uint_v>(simd_cast<AVX2::float_v>(x0, x1)); }
1470 Vc_SIMD_CAST_2(SSE:: float_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast<SSE:: short_v>(x0, x1).data()); }
1471 Vc_SIMD_CAST_2(SSE:: float_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast<SSE::ushort_v>(x0, x1).data()); }
1472
1473 Vc_SIMD_CAST_2(SSE:: int_v, AVX2:: int_v) { return AVX::concat(x0.data(), x1.data()); }
1474 Vc_SIMD_CAST_2(SSE:: uint_v, AVX2:: int_v) { return AVX::concat(x0.data(), x1.data()); }
1475
1476 Vc_SIMD_CAST_2(SSE:: int_v, AVX2:: uint_v) { return AVX::concat(x0.data(), x1.data()); }
1477 Vc_SIMD_CAST_2(SSE:: uint_v, AVX2:: uint_v) { return AVX::concat(x0.data(), x1.data()); }
1478
1479 Vc_SIMD_CAST_2(SSE:: int_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast<SSE:: short_v>(x0, x1).data()); }
1480 Vc_SIMD_CAST_2(SSE:: uint_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast<SSE:: short_v>(x0, x1).data()); }
1481 Vc_SIMD_CAST_2(SSE:: short_v, AVX2:: short_v) { return AVX::concat(x0.data(), x1.data()); }
1482 Vc_SIMD_CAST_2(SSE::ushort_v, AVX2:: short_v) { return AVX::concat(x0.data(), x1.data()); }
1483
1484 Vc_SIMD_CAST_2(SSE:: int_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast<SSE::ushort_v>(x0, x1).data()); }
1485 Vc_SIMD_CAST_2(SSE:: uint_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast<SSE::ushort_v>(x0, x1).data()); }
1486 Vc_SIMD_CAST_2(SSE:: short_v, AVX2::ushort_v) { return AVX::concat(x0.data(), x1.data()); }
1487 Vc_SIMD_CAST_2(SSE::ushort_v, AVX2::ushort_v) { return AVX::concat(x0.data(), x1.data()); }
1488 #endif
1489
1490 Vc_SIMD_CAST_3(SSE::double_v, AVX2:: float_v) { return simd_cast<AVX2:: float_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2)); }
1491
1492 #ifdef Vc_IMPL_AVX2
1493 Vc_SIMD_CAST_3(SSE::double_v, AVX2:: int_v) { return simd_cast<AVX2:: int_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2)); }
1494 Vc_SIMD_CAST_3(SSE::double_v, AVX2:: uint_v) { return simd_cast<AVX2::uint_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2)); }
1495 Vc_SIMD_CAST_3(SSE::double_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast<SSE:: short_v>(x0, x1, x2).data()); }
1496 Vc_SIMD_CAST_3(SSE::double_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast<SSE::ushort_v>(x0, x1, x2).data()); }
1497
1498 Vc_SIMD_CAST_3(SSE:: float_v, AVX2:: short_v) { return simd_cast<AVX2:: short_v>(simd_cast<AVX2::float_v>(x0, x1), simd_cast<AVX2::float_v>(x2)); }
1499 Vc_SIMD_CAST_3(SSE:: float_v, AVX2::ushort_v) { return simd_cast<AVX2::ushort_v>(simd_cast<AVX2::float_v>(x0, x1), simd_cast<AVX2::float_v>(x2)); }
1500
1501 Vc_SIMD_CAST_3(SSE:: int_v, AVX2:: short_v) { return simd_cast<AVX2:: short_v>(simd_cast<AVX2:: int_v>(x0, x1), simd_cast<AVX2:: int_v>(x2)); }
1502 Vc_SIMD_CAST_3(SSE:: uint_v, AVX2:: short_v) { return simd_cast<AVX2:: short_v>(simd_cast<AVX2::uint_v>(x0, x1), simd_cast<AVX2::uint_v>(x2)); }
1503
1504 Vc_SIMD_CAST_3(SSE:: int_v, AVX2::ushort_v) { return simd_cast<AVX2::ushort_v>(simd_cast<AVX2:: int_v>(x0, x1), simd_cast<AVX2:: int_v>(x2)); }
1505 Vc_SIMD_CAST_3(SSE:: uint_v, AVX2::ushort_v) { return simd_cast<AVX2::ushort_v>(simd_cast<AVX2::uint_v>(x0, x1), simd_cast<AVX2::uint_v>(x2)); }
1506 #endif
1507
1508
1509 Vc_SIMD_CAST_4(SSE::double_v, AVX2:: float_v) { return simd_cast<AVX2:: float_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3)); }
1510
1511 #ifdef Vc_IMPL_AVX2
1512 Vc_SIMD_CAST_4(SSE::double_v, AVX2:: int_v) { return simd_cast<AVX2:: int_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3)); }
1513 Vc_SIMD_CAST_4(SSE::double_v, AVX2:: uint_v) { return simd_cast<AVX2::uint_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3)); }
1514 Vc_SIMD_CAST_4(SSE::double_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast<SSE:: short_v>(x0, x1, x2, x3).data()); }
1515 Vc_SIMD_CAST_4(SSE::double_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast<SSE::ushort_v>(x0, x1, x2, x3).data()); }
1516
1517 Vc_SIMD_CAST_4(SSE:: float_v, AVX2:: short_v) { return simd_cast<AVX2:: short_v>(simd_cast<AVX2::float_v>(x0, x1), simd_cast<AVX2::float_v>(x2, x3)); }
1518 Vc_SIMD_CAST_4(SSE:: float_v, AVX2::ushort_v) { return simd_cast<AVX2::ushort_v>(simd_cast<AVX2::float_v>(x0, x1), simd_cast<AVX2::float_v>(x2, x3)); }
1519
1520 Vc_SIMD_CAST_4(SSE:: int_v, AVX2:: short_v) { return simd_cast<AVX2:: short_v>(simd_cast<AVX2:: int_v>(x0, x1), simd_cast<AVX2:: int_v>(x2, x3)); }
1521 Vc_SIMD_CAST_4(SSE:: uint_v, AVX2:: short_v) { return simd_cast<AVX2:: short_v>(simd_cast<AVX2::uint_v>(x0, x1), simd_cast<AVX2::uint_v>(x2, x3)); }
1522
1523 Vc_SIMD_CAST_4(SSE:: int_v, AVX2::ushort_v) { return simd_cast<AVX2::ushort_v>(simd_cast<AVX2:: int_v>(x0, x1), simd_cast<AVX2:: int_v>(x2, x3)); }
1524 Vc_SIMD_CAST_4(SSE:: uint_v, AVX2::ushort_v) { return simd_cast<AVX2::ushort_v>(simd_cast<AVX2::uint_v>(x0, x1), simd_cast<AVX2::uint_v>(x2, x3)); }
1525 #endif
1526
1527
1528 #ifdef Vc_IMPL_AVX2
1529 Vc_SIMD_CAST_5(SSE::double_v, AVX2:: short_v) { return simd_cast<AVX2:: short_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3), simd_cast<AVX2::double_v>(x4)); }
1530 Vc_SIMD_CAST_5(SSE::double_v, AVX2::ushort_v) { return simd_cast<AVX2::ushort_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3), simd_cast<AVX2::double_v>(x4)); }
1531 #endif
1532
1533
1534 #ifdef Vc_IMPL_AVX2
1535 Vc_SIMD_CAST_6(SSE::double_v, AVX2:: short_v) { return simd_cast<AVX2:: short_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3), simd_cast<AVX2::double_v>(x4, x5)); }
1536 Vc_SIMD_CAST_6(SSE::double_v, AVX2::ushort_v) { return simd_cast<AVX2::ushort_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3), simd_cast<AVX2::double_v>(x4, x5)); }
1537 #endif
1538
1539
1540 #ifdef Vc_IMPL_AVX2
1541 Vc_SIMD_CAST_7(SSE::double_v, AVX2:: short_v) { return simd_cast<AVX2:: short_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3), simd_cast<AVX2::double_v>(x4, x5), simd_cast<AVX2::double_v>(x6)); }
1542 Vc_SIMD_CAST_7(SSE::double_v, AVX2::ushort_v) { return simd_cast<AVX2::ushort_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3), simd_cast<AVX2::double_v>(x4, x5), simd_cast<AVX2::double_v>(x6)); }
1543 #endif
1544
1545
1546 #ifdef Vc_IMPL_AVX2
1547 Vc_SIMD_CAST_8(SSE::double_v, AVX2:: short_v) { return simd_cast<AVX2:: short_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3), simd_cast<AVX2::double_v>(x4, x5), simd_cast<AVX2::double_v>(x6, x7)); }
1548 Vc_SIMD_CAST_8(SSE::double_v, AVX2::ushort_v) { return simd_cast<AVX2::ushort_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3), simd_cast<AVX2::double_v>(x4, x5), simd_cast<AVX2::double_v>(x6, x7)); }
1549 #endif
1550
1551
1552 Vc_SIMD_CAST_1(AVX2::double_v, SSE::double_v) { return AVX::lo128(x.data()); }
1553 Vc_SIMD_CAST_1(AVX2:: float_v, SSE:: float_v) { return AVX::lo128(x.data()); }
1554 #ifdef Vc_IMPL_AVX2
1555 Vc_SIMD_CAST_1(AVX2:: int_v, SSE:: int_v) { return AVX::lo128(x.data()); }
1556 Vc_SIMD_CAST_1(AVX2:: uint_v, SSE:: uint_v) { return AVX::lo128(x.data()); }
1557 Vc_SIMD_CAST_1(AVX2:: short_v, SSE:: short_v) { return AVX::lo128(x.data()); }
1558 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE::ushort_v) { return AVX::lo128(x.data()); }
1559 #endif
1560
1561 Vc_SIMD_CAST_1(AVX2::double_v, SSE:: float_v) { return simd_cast<SSE:: float_v>(simd_cast<AVX2:: float_v>(x)); }
1562 Vc_SIMD_CAST_1(AVX2::double_v, SSE:: int_v) { return AVX::convert<double, int>(x.data()); }
1563 Vc_SIMD_CAST_1(AVX2::double_v, SSE:: uint_v) { return AVX::convert<double, unsigned int>(x.data()); }
1564 Vc_SIMD_CAST_1(AVX2::double_v, SSE:: short_v) { return AVX::convert<double, short>(x.data()); }
1565 Vc_SIMD_CAST_1(AVX2::double_v, SSE::ushort_v) { return AVX::convert<double, unsigned short>(x.data()); }
1566
1567 Vc_SIMD_CAST_1(AVX2:: float_v, SSE::double_v) { return simd_cast<SSE::double_v>(simd_cast<SSE:: float_v>(x)); }
1568 Vc_SIMD_CAST_1(AVX2:: float_v, SSE:: int_v) { return simd_cast<SSE:: int_v>(simd_cast<SSE:: float_v>(x)); }
1569 Vc_SIMD_CAST_1(AVX2:: float_v, SSE:: uint_v) { return simd_cast<SSE:: uint_v>(simd_cast<SSE:: float_v>(x)); }
1570 Vc_SIMD_CAST_1(AVX2:: float_v, SSE:: short_v) { return AVX::convert<float, short>(x.data()); }
1571 Vc_SIMD_CAST_1(AVX2:: float_v, SSE::ushort_v) { return AVX::convert<float, unsigned short>(x.data()); }
1572
1573 #ifdef Vc_IMPL_AVX2
1574 Vc_SIMD_CAST_1(AVX2:: int_v, SSE::double_v) { return SSE::convert<int, double>(AVX::lo128(x.data())); }
1575 Vc_SIMD_CAST_1(AVX2:: int_v, SSE:: float_v) { return SSE::convert<int, float>(AVX::lo128(x.data())); }
1576 Vc_SIMD_CAST_1(AVX2:: int_v, SSE:: uint_v) { return AVX::lo128(x.data()); }
1577 Vc_SIMD_CAST_1(AVX2:: int_v, SSE:: short_v) { return AVX::convert<int, short>(x.data()); }
1578 Vc_SIMD_CAST_1(AVX2:: int_v, SSE::ushort_v) { return AVX::convert<int, ushort>(x.data()); }
1579
1580 Vc_SIMD_CAST_1(AVX2:: uint_v, SSE::double_v) { return SSE::convert<uint, double>(AVX::lo128(x.data())); }
1581 Vc_SIMD_CAST_1(AVX2:: uint_v, SSE:: float_v) { return SSE::convert<uint, float>(AVX::lo128(x.data())); }
1582 Vc_SIMD_CAST_1(AVX2:: uint_v, SSE:: int_v) { return AVX::lo128(x.data()); }
1583 Vc_SIMD_CAST_1(AVX2:: uint_v, SSE:: short_v) { return AVX::convert<uint, short>(x.data()); }
1584 Vc_SIMD_CAST_1(AVX2:: uint_v, SSE::ushort_v) { return AVX::convert<uint, ushort>(x.data()); }
1585
1586 Vc_SIMD_CAST_1(AVX2:: short_v, SSE::double_v) { return simd_cast<SSE::double_v>(simd_cast<SSE:: short_v>(x)); }
1587 Vc_SIMD_CAST_1(AVX2:: short_v, SSE:: float_v) { return simd_cast<SSE:: float_v>(simd_cast<SSE:: short_v>(x)); }
1588 Vc_SIMD_CAST_1(AVX2:: short_v, SSE:: int_v) { return simd_cast<SSE:: int_v>(simd_cast<SSE:: short_v>(x)); }
1589 Vc_SIMD_CAST_1(AVX2:: short_v, SSE:: uint_v) { return simd_cast<SSE:: uint_v>(simd_cast<SSE:: short_v>(x)); }
1590 Vc_SIMD_CAST_1(AVX2:: short_v, SSE::ushort_v) { return simd_cast<SSE::ushort_v>(simd_cast<SSE:: short_v>(x)); }
1591
1592 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE::double_v) { return simd_cast<SSE::double_v>(simd_cast<SSE::ushort_v>(x)); }
1593 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE:: float_v) { return simd_cast<SSE:: float_v>(simd_cast<SSE::ushort_v>(x)); }
1594 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE:: int_v) { return simd_cast<SSE:: int_v>(simd_cast<SSE::ushort_v>(x)); }
1595 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE:: uint_v) { return simd_cast<SSE:: uint_v>(simd_cast<SSE::ushort_v>(x)); }
1596 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE:: short_v) { return simd_cast<SSE:: short_v>(simd_cast<SSE::ushort_v>(x)); }
1597 #endif
1598
1599
1600 Vc_SIMD_CAST_2(AVX2::double_v, SSE:: short_v) {
1601 const auto tmp0 = _mm256_cvttpd_epi32(x0.data());
1602 const auto tmp1 = _mm256_cvttpd_epi32(x1.data());
1603 return _mm_packs_epi32(tmp0, tmp1);
1604 }
1605 Vc_SIMD_CAST_2(AVX2::double_v, SSE::ushort_v) {
1606 const auto tmp0 = _mm256_cvttpd_epi32(x0.data());
1607 const auto tmp1 = _mm256_cvttpd_epi32(x1.data());
1608 return _mm_packus_epi32(tmp0, tmp1);
1609 }
1610
1611
1612 template <typename Return, typename T>
1613 Vc_INTRINSIC Vc_CONST Return
1614 simd_cast(Scalar::Vector<T> x,
1615 enable_if<std::is_same<Return, AVX2::double_v>::value>)
1616 {
1617 return AVX::zeroExtend(_mm_setr_pd(x.data(), 0.));
1618 }
1619 template <typename Return, typename T>
1620 Vc_INTRINSIC Vc_CONST Return
1621 simd_cast(Scalar::Vector<T> x,
1622 enable_if<std::is_same<Return, AVX2::float_v>::value>)
1623 {
1624 return AVX::zeroExtend(_mm_setr_ps(x.data(), 0.f, 0.f, 0.f));
1625 }
1626 #ifdef Vc_IMPL_AVX2
1627 template <typename Return, typename T>
1628 Vc_INTRINSIC Vc_CONST Return
1629 simd_cast(Scalar::Vector<T> x,
1630 enable_if<std::is_same<Return, AVX2::int_v>::value>)
1631 {
1632 return _mm256_setr_epi32(x.data(), 0, 0, 0, 0, 0, 0, 0);
1633 }
1634 template <typename Return, typename T>
1635 Vc_INTRINSIC Vc_CONST Return
1636 simd_cast(Scalar::Vector<T> x,
1637 enable_if<std::is_same<Return, AVX2::uint_v>::value>)
1638 {
1639 return _mm256_setr_epi32(uint(x.data()), 0, 0, 0, 0, 0, 0, 0);
1640 }
1641 template <typename Return, typename T>
1642 Vc_INTRINSIC Vc_CONST Return
1643 simd_cast(Scalar::Vector<T> x,
1644 enable_if<std::is_same<Return, AVX2::short_v>::value>)
1645 {
1646 return _mm256_setr_epi16(x.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1647 }
1648 template <typename Return, typename T>
1649 Vc_INTRINSIC Vc_CONST Return
1650 simd_cast(Scalar::Vector<T> x,
1651 enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
1652 {
1653 return _mm256_setr_epi16(x.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1654 }
1655 #endif
1656
1657
1658 template <typename Return, typename T>
1659 Vc_INTRINSIC Vc_CONST Return
1660 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
1661 enable_if<std::is_same<Return, AVX2::double_v>::value>)
1662 {
1663 return AVX::zeroExtend(_mm_setr_pd(x0.data(), x1.data()));
1664 }
1665 template <typename Return, typename T>
1666 Vc_INTRINSIC Vc_CONST Return
1667 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
1668 enable_if<std::is_same<Return, AVX2::float_v>::value>)
1669 {
1670 return AVX::zeroExtend(_mm_setr_ps(x0.data(), x1.data(), 0.f, 0.f));
1671 }
1672 #ifdef Vc_IMPL_AVX2
1673 template <typename Return, typename T>
1674 Vc_INTRINSIC Vc_CONST Return
1675 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
1676 enable_if<std::is_same<Return, AVX2::int_v>::value>)
1677 {
1678 return _mm256_setr_epi32(x0.data(), x1.data(), 0, 0, 0, 0, 0, 0);
1679 }
1680 template <typename Return, typename T>
1681 Vc_INTRINSIC Vc_CONST Return
1682 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
1683 enable_if<std::is_same<Return, AVX2::uint_v>::value>)
1684 {
1685 return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), 0, 0, 0, 0, 0, 0);
1686 }
1687 template <typename Return, typename T>
1688 Vc_INTRINSIC Vc_CONST Return
1689 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
1690 enable_if<std::is_same<Return, AVX2::short_v>::value>)
1691 {
1692 return _mm256_setr_epi16(x0.data(), x1.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1693 }
1694 template <typename Return, typename T>
1695 Vc_INTRINSIC Vc_CONST Return
1696 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
1697 enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
1698 {
1699 return _mm256_setr_epi16(x0.data(), x1.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1700 }
1701 #endif
1702
1703
1704 template <typename Return, typename T>
1705 Vc_INTRINSIC Vc_CONST Return
1706 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1707 enable_if<std::is_same<Return, AVX2::double_v>::value>)
1708 {
1709 return _mm256_setr_pd(x0.data(), x1.data(), x2.data(), 0);
1710 }
1711 template <typename Return, typename T>
1712 Vc_INTRINSIC Vc_CONST Return
1713 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1714 enable_if<std::is_same<Return, AVX2::float_v>::value>)
1715 {
1716 return AVX::zeroExtend(_mm_setr_ps(x0.data(), x1.data(), x2.data(), 0));
1717 }
1718 #ifdef Vc_IMPL_AVX2
1719 template <typename Return, typename T>
1720 Vc_INTRINSIC Vc_CONST Return
1721 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1722 enable_if<std::is_same<Return, AVX2::int_v>::value>)
1723 {
1724 return _mm256_setr_epi32(x0.data(), x1.data(), x2.data(), 0, 0, 0, 0, 0);
1725 }
1726 template <typename Return, typename T>
1727 Vc_INTRINSIC Vc_CONST Return
1728 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1729 enable_if<std::is_same<Return, AVX2::uint_v>::value>)
1730 {
1731 return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()), 0, 0, 0,
1732 0, 0);
1733 }
1734 template <typename Return, typename T>
1735 Vc_INTRINSIC Vc_CONST Return
1736 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1737 enable_if<std::is_same<Return, AVX2::short_v>::value>)
1738 {
1739 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1740 }
1741 template <typename Return, typename T>
1742 Vc_INTRINSIC Vc_CONST Return
1743 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1744 enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
1745 {
1746 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1747 }
1748 #endif
1749
1750
1751 template <typename Return, typename T>
1752 Vc_INTRINSIC Vc_CONST Return
1753 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1754 Scalar::Vector<T> x3,
1755 enable_if<std::is_same<Return, AVX2::double_v>::value>)
1756 {
1757 return _mm256_setr_pd(x0.data(), x1.data(), x2.data(), x3.data());
1758 }
1759 template <typename Return, typename T>
1760 Vc_INTRINSIC Vc_CONST Return
1761 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1762 Scalar::Vector<T> x3,
1763 enable_if<std::is_same<Return, AVX2::float_v>::value>)
1764 {
1765 return AVX::zeroExtend(_mm_setr_ps(x0.data(), x1.data(), x2.data(), x3.data()));
1766 }
1767 #ifdef Vc_IMPL_AVX2
1768 template <typename Return, typename T>
1769 Vc_INTRINSIC Vc_CONST Return
1770 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1771 Scalar::Vector<T> x3,
1772 enable_if<std::is_same<Return, AVX2::int_v>::value>)
1773 {
1774 return _mm256_setr_epi32(x0.data(), x1.data(), x2.data(), x3.data(), 0, 0, 0, 0);
1775 }
1776 template <typename Return, typename T>
1777 Vc_INTRINSIC Vc_CONST Return
1778 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1779 Scalar::Vector<T> x3,
1780 enable_if<std::is_same<Return, AVX2::uint_v>::value>)
1781 {
1782 return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()),
1783 uint(x3.data()), 0, 0, 0, 0);
1784 }
1785 template <typename Return, typename T>
1786 Vc_INTRINSIC Vc_CONST Return
1787 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1788 Scalar::Vector<T> x3,
1789 enable_if<std::is_same<Return, AVX2::short_v>::value>)
1790 {
1791 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1792 }
1793 template <typename Return, typename T>
1794 Vc_INTRINSIC Vc_CONST Return
1795 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1796 Scalar::Vector<T> x3,
1797 enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
1798 {
1799 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1800 }
1801 #endif
1802
1803
1804 template <typename Return, typename T>
1805 Vc_INTRINSIC Vc_CONST Return
1806 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1807 Scalar::Vector<T> x3, Scalar::Vector<T> x4,
1808 enable_if<std::is_same<Return, AVX2::float_v>::value>)
1809 {
1810 return _mm256_setr_ps(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), 0, 0, 0);
1811 }
1812 #ifdef Vc_IMPL_AVX2
1813 template <typename Return, typename T>
1814 Vc_INTRINSIC Vc_CONST Return
1815 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1816 Scalar::Vector<T> x3, Scalar::Vector<T> x4,
1817 enable_if<std::is_same<Return, AVX2::int_v>::value>)
1818 {
1819 return _mm256_setr_epi32(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), 0, 0, 0);
1820 }
1821 template <typename Return, typename T>
1822 Vc_INTRINSIC Vc_CONST Return
1823 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1824 Scalar::Vector<T> x3, Scalar::Vector<T> x4,
1825 enable_if<std::is_same<Return, AVX2::uint_v>::value>)
1826 {
1827 return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()),
1828 uint(x3.data()), uint(x4.data()), 0, 0, 0);
1829 }
1830 template <typename Return, typename T>
1831 Vc_INTRINSIC Vc_CONST Return
1832 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1833 Scalar::Vector<T> x3, Scalar::Vector<T> x4,
1834 enable_if<std::is_same<Return, AVX2::short_v>::value>)
1835 {
1836 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1837 }
1838 template <typename Return, typename T>
1839 Vc_INTRINSIC Vc_CONST Return
1840 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1841 Scalar::Vector<T> x3, Scalar::Vector<T> x4,
1842 enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
1843 {
1844 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1845 }
1846 #endif
1847
1848
1849 template <typename Return, typename T>
1850 Vc_INTRINSIC Vc_CONST Return
1851 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1852 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1853 enable_if<std::is_same<Return, AVX2::float_v>::value>)
1854 {
1855 return _mm256_setr_ps(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1856 x5.data(), 0, 0);
1857 }
1858 #ifdef Vc_IMPL_AVX2
1859 template <typename Return, typename T>
1860 Vc_INTRINSIC Vc_CONST Return
1861 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1862 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1863 enable_if<std::is_same<Return, AVX2::int_v>::value>)
1864 {
1865 return _mm256_setr_epi32(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1866 x5.data(), 0, 0);
1867 }
1868 template <typename Return, typename T>
1869 Vc_INTRINSIC Vc_CONST Return
1870 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1871 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1872 enable_if<std::is_same<Return, AVX2::uint_v>::value>)
1873 {
1874 return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()),
1875 uint(x3.data()), uint(x4.data()), uint(x5.data()), 0, 0);
1876 }
1877 template <typename Return, typename T>
1878 Vc_INTRINSIC Vc_CONST Return
1879 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1880 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1881 enable_if<std::is_same<Return, AVX2::short_v>::value>)
1882 {
1883 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1884 x5.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1885 }
1886 template <typename Return, typename T>
1887 Vc_INTRINSIC Vc_CONST Return
1888 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1889 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1890 enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
1891 {
1892 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1893 x5.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1894 }
1895 #endif
1896
1897
1898 template <typename Return, typename T>
1899 Vc_INTRINSIC Vc_CONST Return
1900 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1901 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1902 Scalar::Vector<T> x6,
1903 enable_if<std::is_same<Return, AVX2::float_v>::value>)
1904 {
1905 return _mm256_setr_ps(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1906 x5.data(), x6.data(), 0);
1907 }
1908 #ifdef Vc_IMPL_AVX2
1909 template <typename Return, typename T>
1910 Vc_INTRINSIC Vc_CONST Return
1911 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1912 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1913 Scalar::Vector<T> x6,
1914 enable_if<std::is_same<Return, AVX2::int_v>::value>)
1915 {
1916 return _mm256_setr_epi32(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1917 x5.data(), x6.data(), 0);
1918 }
1919 template <typename Return, typename T>
1920 Vc_INTRINSIC Vc_CONST Return
1921 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1922 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1923 Scalar::Vector<T> x6,
1924 enable_if<std::is_same<Return, AVX2::uint_v>::value>)
1925 {
1926 return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()),
1927 uint(x3.data()), uint(x4.data()), uint(x5.data()),
1928 uint(x6.data()), 0);
1929 }
1930 template <typename Return, typename T>
1931 Vc_INTRINSIC Vc_CONST Return
1932 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1933 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1934 Scalar::Vector<T> x6,
1935 enable_if<std::is_same<Return, AVX2::short_v>::value>)
1936 {
1937 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1938 x5.data(), x6.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0);
1939 }
1940 template <typename Return, typename T>
1941 Vc_INTRINSIC Vc_CONST Return
1942 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1943 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1944 Scalar::Vector<T> x6,
1945 enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
1946 {
1947 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1948 x5.data(), x6.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0);
1949 }
1950 #endif
1951
1952
1953 template <typename Return, typename T>
1954 Vc_INTRINSIC Vc_CONST Return
1955 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1956 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1957 Scalar::Vector<T> x6, Scalar::Vector<T> x7,
1958 enable_if<std::is_same<Return, AVX2::float_v>::value>)
1959 {
1960 return _mm256_setr_ps(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1961 x5.data(), x6.data(), x7.data());
1962 }
1963 #ifdef Vc_IMPL_AVX2
1964 template <typename Return, typename T>
1965 Vc_INTRINSIC Vc_CONST Return
1966 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1967 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1968 Scalar::Vector<T> x6, Scalar::Vector<T> x7,
1969 enable_if<std::is_same<Return, AVX2::int_v>::value>)
1970 {
1971 return _mm256_setr_epi32(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1972 x5.data(), x6.data(), x7.data());
1973 }
1974 template <typename Return, typename T>
1975 Vc_INTRINSIC Vc_CONST Return
1976 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1977 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1978 Scalar::Vector<T> x6, Scalar::Vector<T> x7,
1979 enable_if<std::is_same<Return, AVX2::uint_v>::value>)
1980 {
1981 return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()),
1982 uint(x3.data()), uint(x4.data()), uint(x5.data()),
1983 uint(x6.data()), uint(x7.data()));
1984 }
1985 template <typename Return, typename T>
1986 Vc_INTRINSIC Vc_CONST Return
1987 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1988 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1989 Scalar::Vector<T> x6, Scalar::Vector<T> x7,
1990 enable_if<std::is_same<Return, AVX2::short_v>::value>)
1991 {
1992 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1993 x5.data(), x6.data(), x7.data(), 0, 0, 0, 0, 0, 0, 0, 0);
1994 }
1995 template <typename Return, typename T>
1996 Vc_INTRINSIC Vc_CONST Return
1997 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1998 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1999 Scalar::Vector<T> x6, Scalar::Vector<T> x7,
2000 enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
2001 {
2002 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2003 x5.data(), x6.data(), x7.data(), 0, 0, 0, 0, 0, 0, 0, 0);
2004 }
2005 #endif
2006
2007
2008 #ifdef Vc_IMPL_AVX2
2009 template <typename Return, typename T>
2010 Vc_INTRINSIC Vc_CONST Return
2011 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2012 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2013 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2014 enable_if<std::is_same<Return, AVX2::short_v>::value>)
2015 {
2016 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2017 x5.data(), x6.data(), x7.data(), x8.data(), 0, 0, 0, 0, 0, 0,
2018 0);
2019 }
2020 template <typename Return, typename T>
2021 Vc_INTRINSIC Vc_CONST Return
2022 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2023 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2024 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2025 enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
2026 {
2027 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2028 x5.data(), x6.data(), x7.data(), x8.data(), 0, 0, 0, 0, 0, 0,
2029 0);
2030 }
2031 #endif
2032
2033
2034 #ifdef Vc_IMPL_AVX2
2035 template <typename Return, typename T>
2036 Vc_INTRINSIC Vc_CONST Return
2037 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2038 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2039 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2040 Scalar::Vector<T> x9, enable_if<std::is_same<Return, AVX2::short_v>::value>)
2041 {
2042 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2043 x5.data(), x6.data(), x7.data(), x8.data(), x9.data(), 0, 0,
2044 0, 0, 0, 0);
2045 }
2046 template <typename Return, typename T>
2047 Vc_INTRINSIC Vc_CONST Return
2048 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2049 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2050 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2051 Scalar::Vector<T> x9, enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
2052 {
2053 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2054 x5.data(), x6.data(), x7.data(), x8.data(), x9.data(), 0, 0,
2055 0, 0, 0, 0);
2056 }
2057 #endif
2058
2059
2060 #ifdef Vc_IMPL_AVX2
2061 template <typename Return, typename T>
2062 Vc_INTRINSIC Vc_CONST Return
2063 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2064 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2065 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2066 Scalar::Vector<T> x9, Scalar::Vector<T> x10,
2067 enable_if<std::is_same<Return, AVX2::short_v>::value>)
2068 {
2069 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2070 x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2071 x10.data(), 0, 0, 0, 0, 0);
2072 }
2073 template <typename Return, typename T>
2074 Vc_INTRINSIC Vc_CONST Return
2075 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2076 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2077 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2078 Scalar::Vector<T> x9, Scalar::Vector<T> x10,
2079 enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
2080 {
2081 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2082 x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2083 x10.data(), 0, 0, 0, 0, 0);
2084 }
2085 #endif
2086
2087
2088 #ifdef Vc_IMPL_AVX2
2089 template <typename Return, typename T>
2090 Vc_INTRINSIC Vc_CONST Return
2091 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2092 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2093 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2094 Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
2095 enable_if<std::is_same<Return, AVX2::short_v>::value>)
2096 {
2097 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2098 x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2099 x10.data(), x11.data(), 0, 0, 0, 0);
2100 }
2101 template <typename Return, typename T>
2102 Vc_INTRINSIC Vc_CONST Return
2103 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2104 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2105 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2106 Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
2107 enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
2108 {
2109 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2110 x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2111 x10.data(), x11.data(), 0, 0, 0, 0);
2112 }
2113 #endif
2114
2115
2116 #ifdef Vc_IMPL_AVX2
2117 template <typename Return, typename T>
2118 Vc_INTRINSIC Vc_CONST Return
2119 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2120 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2121 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2122 Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
2123 Scalar::Vector<T> x12, enable_if<std::is_same<Return, AVX2::short_v>::value>)
2124 {
2125 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2126 x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2127 x10.data(), x11.data(), x12.data(), 0, 0, 0);
2128 }
2129 template <typename Return, typename T>
2130 Vc_INTRINSIC Vc_CONST Return
2131 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2132 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2133 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2134 Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
2135 Scalar::Vector<T> x12, enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
2136 {
2137 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2138 x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2139 x10.data(), x11.data(), x12.data(), 0, 0, 0);
2140 }
2141 #endif
2142
2143
2144 #ifdef Vc_IMPL_AVX2
2145 template <typename Return, typename T>
2146 Vc_INTRINSIC Vc_CONST Return
2147 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2148 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2149 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2150 Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
2151 Scalar::Vector<T> x12, Scalar::Vector<T> x13,
2152 enable_if<std::is_same<Return, AVX2::short_v>::value>)
2153 {
2154 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2155 x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2156 x10.data(), x11.data(), x12.data(), x13.data(), 0, 0);
2157 }
2158 template <typename Return, typename T>
2159 Vc_INTRINSIC Vc_CONST Return
2160 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2161 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2162 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2163 Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
2164 Scalar::Vector<T> x12, Scalar::Vector<T> x13,
2165 enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
2166 {
2167 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2168 x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2169 x10.data(), x11.data(), x12.data(), x13.data(), 0, 0);
2170 }
2171 #endif
2172
2173
2174 #ifdef Vc_IMPL_AVX2
2175 template <typename Return, typename T>
2176 Vc_INTRINSIC Vc_CONST Return
2177 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2178 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2179 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2180 Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
2181 Scalar::Vector<T> x12, Scalar::Vector<T> x13, Scalar::Vector<T> x14,
2182 enable_if<std::is_same<Return, AVX2::short_v>::value>)
2183 {
2184 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2185 x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2186 x10.data(), x11.data(), x12.data(), x13.data(), x14.data(),
2187 0);
2188 }
2189 template <typename Return, typename T>
2190 Vc_INTRINSIC Vc_CONST Return
2191 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2192 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2193 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2194 Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
2195 Scalar::Vector<T> x12, Scalar::Vector<T> x13, Scalar::Vector<T> x14,
2196 enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
2197 {
2198 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2199 x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2200 x10.data(), x11.data(), x12.data(), x13.data(), x14.data(),
2201 0);
2202 }
2203 #endif
2204
2205
2206 #ifdef Vc_IMPL_AVX2
2207 template <typename Return, typename T>
2208 Vc_INTRINSIC Vc_CONST Return
2209 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2210 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2211 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2212 Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
2213 Scalar::Vector<T> x12, Scalar::Vector<T> x13, Scalar::Vector<T> x14,
2214 Scalar::Vector<T> x15, enable_if<std::is_same<Return, AVX2::short_v>::value>)
2215 {
2216 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2217 x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2218 x10.data(), x11.data(), x12.data(), x13.data(), x14.data(),
2219 x15.data());
2220 }
2221 template <typename Return, typename T>
2222 Vc_INTRINSIC Vc_CONST Return
2223 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2224 Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2225 Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2226 Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
2227 Scalar::Vector<T> x12, Scalar::Vector<T> x13, Scalar::Vector<T> x14,
2228 Scalar::Vector<T> x15, enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
2229 {
2230 return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2231 x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2232 x10.data(), x11.data(), x12.data(), x13.data(), x14.data(),
2233 x15.data());
2234 }
2235 #endif
2236
2237
2238 template <typename To, typename FromT>
2239 Vc_INTRINSIC Vc_CONST To
2240 simd_cast(AVX2::Vector<FromT> x, enable_if<Scalar::is_vector<To>::value>)
2241 {
2242 return static_cast<To>(x[0]);
2243 }
2244
2245
2246
2247 template <typename Return, typename T>
2248 Vc_INTRINSIC Vc_CONST Return
2249 simd_cast(const AVX2::Mask<T> &k, enable_if<AVX2::is_mask<Return>::value>)
2250 {
2251 return {Detail::mask_cast<Mask<T, VectorAbi::Avx>::Size, Return::Size,
2252 typename Return::VectorTypeF>(k.dataI())};
2253 }
2254
2255
2256 Vc_SIMD_CAST_AVX_2(double_m, float_m) { return AVX::concat(_mm_packs_epi32(AVX::lo128(x0.dataI()), AVX::hi128(x0.dataI())), _mm_packs_epi32(AVX::lo128(x1.dataI()), AVX::hi128(x1.dataI()))); }
2257 #ifdef Vc_IMPL_AVX2
2258 Vc_SIMD_CAST_AVX_2(double_m, int_m) { return Mem::permute4x64<X0, X2, X1, X3>(_mm256_packs_epi32(x0.dataI(), x1.dataI())); }
2259 Vc_SIMD_CAST_AVX_2(double_m, uint_m) { return Mem::permute4x64<X0, X2, X1, X3>(_mm256_packs_epi32(x0.dataI(), x1.dataI())); }
2260 Vc_SIMD_CAST_AVX_2(double_m, short_m) { return AVX::zeroExtend(_mm_packs_epi16(_mm_packs_epi32(AVX::lo128(x0.dataI()), AVX::hi128(x0.dataI())), _mm_packs_epi32(AVX::lo128(x1.dataI()), AVX::hi128(x1.dataI())))); }
2261 Vc_SIMD_CAST_AVX_2(double_m, ushort_m) { return AVX::zeroExtend(_mm_packs_epi16(_mm_packs_epi32(AVX::lo128(x0.dataI()), AVX::hi128(x0.dataI())), _mm_packs_epi32(AVX::lo128(x1.dataI()), AVX::hi128(x1.dataI())))); }
2262
2263 Vc_SIMD_CAST_AVX_2( float_m, short_m) { return Mem::permute4x64<X0, X2, X1, X3>(_mm256_packs_epi16(x0.dataI(), x1.dataI())); }
2264 Vc_SIMD_CAST_AVX_2( float_m, ushort_m) { return Mem::permute4x64<X0, X2, X1, X3>(_mm256_packs_epi16(x0.dataI(), x1.dataI())); }
2265
2266 Vc_SIMD_CAST_AVX_2( int_m, short_m) { return Mem::permute4x64<X0, X2, X1, X3>(_mm256_packs_epi16(x0.dataI(), x1.dataI())); }
2267 Vc_SIMD_CAST_AVX_2( int_m, ushort_m) { return Mem::permute4x64<X0, X2, X1, X3>(_mm256_packs_epi16(x0.dataI(), x1.dataI())); }
2268
2269 Vc_SIMD_CAST_AVX_2( uint_m, short_m) { return Mem::permute4x64<X0, X2, X1, X3>(_mm256_packs_epi16(x0.dataI(), x1.dataI())); }
2270 Vc_SIMD_CAST_AVX_2( uint_m, ushort_m) { return Mem::permute4x64<X0, X2, X1, X3>(_mm256_packs_epi16(x0.dataI(), x1.dataI())); }
2271 #endif
2272
2273
2274 #ifdef Vc_IMPL_AVX2
2275 Vc_SIMD_CAST_AVX_4(double_m, short_m)
2276 {
2277 using namespace AVX;
2278 const auto tmp = _mm256_packs_epi32(
2279 _mm256_packs_epi32(x0.dataI(), x1.dataI())
2280 ,
2281 _mm256_packs_epi32(x2.dataI(), x3.dataI())
2282 );
2283 return concat(_mm_unpacklo_epi32(lo128(tmp), hi128(tmp)),
2284 _mm_unpackhi_epi32(lo128(tmp), hi128(tmp)));
2285 }
2286 Vc_SIMD_CAST_AVX_4(double_m, ushort_m) { return simd_cast<AVX2::short_m>(x0, x1, x2, x3).data(); }
2287 #endif
2288
2289
2290 Vc_SIMD_CAST_1(SSE::double_m, AVX2::double_m) { return AVX::zeroExtend(x.data()); }
2291 Vc_SIMD_CAST_1(SSE::double_m, AVX2:: float_m) { return AVX::zeroExtend(simd_cast<SSE:: float_m>(x).data()); }
2292 #ifdef Vc_IMPL_AVX2
2293 Vc_SIMD_CAST_1(SSE::double_m, AVX2:: int_m) { return AVX::zeroExtend(simd_cast<SSE:: int_m>(x).data()); }
2294 Vc_SIMD_CAST_1(SSE::double_m, AVX2:: uint_m) { return AVX::zeroExtend(simd_cast<SSE:: uint_m>(x).data()); }
2295 Vc_SIMD_CAST_1(SSE::double_m, AVX2:: short_m) { return AVX::zeroExtend(simd_cast<SSE:: short_m>(x).data()); }
2296 Vc_SIMD_CAST_1(SSE::double_m, AVX2::ushort_m) { return AVX::zeroExtend(simd_cast<SSE::ushort_m>(x).data()); }
2297 #endif
2298
2299 Vc_SIMD_CAST_1(SSE:: float_m, AVX2::double_m) { return AVX::concat(_mm_unpacklo_ps(x.dataF(), x.dataF()), _mm_unpackhi_ps(x.dataF(), x.dataF())); }
2300 Vc_SIMD_CAST_1(SSE:: int_m, AVX2::double_m) { return AVX::concat(_mm_unpacklo_ps(x.dataF(), x.dataF()), _mm_unpackhi_ps(x.dataF(), x.dataF())); }
2301 Vc_SIMD_CAST_1(SSE:: uint_m, AVX2::double_m) { return AVX::concat(_mm_unpacklo_ps(x.dataF(), x.dataF()), _mm_unpackhi_ps(x.dataF(), x.dataF())); }
2302 Vc_SIMD_CAST_1(SSE:: short_m, AVX2::double_m) { auto tmp = _mm_unpacklo_epi16(x.dataI(), x.dataI()); return AVX::concat(_mm_unpacklo_epi32(tmp, tmp), _mm_unpackhi_epi32(tmp, tmp)); }
2303 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2::double_m) { auto tmp = _mm_unpacklo_epi16(x.dataI(), x.dataI()); return AVX::concat(_mm_unpacklo_epi32(tmp, tmp), _mm_unpackhi_epi32(tmp, tmp)); }
2304
2305 Vc_SIMD_CAST_1(SSE:: float_m, AVX2:: float_m) { return AVX::zeroExtend(x.dataF()); }
2306 Vc_SIMD_CAST_1(SSE:: int_m, AVX2:: float_m) { return AVX::zeroExtend(x.dataF()); }
2307 Vc_SIMD_CAST_1(SSE:: uint_m, AVX2:: float_m) { return AVX::zeroExtend(x.dataF()); }
2308 Vc_SIMD_CAST_1(SSE:: short_m, AVX2:: float_m) { return AVX::concat(_mm_unpacklo_epi16(x.dataI(), x.dataI()), _mm_unpackhi_epi16(x.dataI(), x.dataI())); }
2309 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2:: float_m) { return AVX::concat(_mm_unpacklo_epi16(x.dataI(), x.dataI()), _mm_unpackhi_epi16(x.dataI(), x.dataI())); }
2310
2311 #ifdef Vc_IMPL_AVX2
2312 Vc_SIMD_CAST_1(SSE:: float_m, AVX2:: int_m) { return AVX::zeroExtend(x.data()); }
2313 Vc_SIMD_CAST_1(SSE:: float_m, AVX2:: uint_m) { return AVX::zeroExtend(x.data()); }
2314 Vc_SIMD_CAST_1(SSE:: int_m, AVX2:: int_m) { return AVX::zeroExtend(x.data()); }
2315 Vc_SIMD_CAST_1(SSE:: int_m, AVX2:: uint_m) { return AVX::zeroExtend(x.data()); }
2316 Vc_SIMD_CAST_1(SSE:: uint_m, AVX2:: int_m) { return AVX::zeroExtend(x.data()); }
2317 Vc_SIMD_CAST_1(SSE:: uint_m, AVX2:: uint_m) { return AVX::zeroExtend(x.data()); }
2318
2319 Vc_SIMD_CAST_1(SSE:: float_m, AVX2:: short_m) { return AVX::zeroExtend(simd_cast<SSE:: short_m>(x).data()); }
2320 Vc_SIMD_CAST_1(SSE:: int_m, AVX2:: short_m) { return AVX::zeroExtend(simd_cast<SSE:: short_m>(x).data()); }
2321 Vc_SIMD_CAST_1(SSE:: uint_m, AVX2:: short_m) { return AVX::zeroExtend(simd_cast<SSE:: short_m>(x).data()); }
2322 Vc_SIMD_CAST_1(SSE:: short_m, AVX2:: short_m) { return AVX::zeroExtend(simd_cast<SSE:: short_m>(x).data()); }
2323 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2:: short_m) { return AVX::zeroExtend(simd_cast<SSE:: short_m>(x).data()); }
2324 Vc_SIMD_CAST_1(SSE:: float_m, AVX2::ushort_m) { return AVX::zeroExtend(simd_cast<SSE::ushort_m>(x).data()); }
2325 Vc_SIMD_CAST_1(SSE:: int_m, AVX2::ushort_m) { return AVX::zeroExtend(simd_cast<SSE::ushort_m>(x).data()); }
2326 Vc_SIMD_CAST_1(SSE:: uint_m, AVX2::ushort_m) { return AVX::zeroExtend(simd_cast<SSE::ushort_m>(x).data()); }
2327 Vc_SIMD_CAST_1(SSE:: short_m, AVX2::ushort_m) { return AVX::zeroExtend(simd_cast<SSE::ushort_m>(x).data()); }
2328 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2::ushort_m) { return AVX::zeroExtend(simd_cast<SSE::ushort_m>(x).data()); }
2329
2330 Vc_SIMD_CAST_1(SSE:: short_m, AVX2:: int_m) { const auto v = Mem::permute4x64<X0, X2, X1, X3>(AVX::avx_cast<__m256i>(x.data())); return _mm256_unpacklo_epi16(v, v); }
2331 Vc_SIMD_CAST_1(SSE:: short_m, AVX2:: uint_m) { const auto v = Mem::permute4x64<X0, X2, X1, X3>(AVX::avx_cast<__m256i>(x.data())); return _mm256_unpacklo_epi16(v, v); }
2332
2333 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2:: int_m) { const auto v = Mem::permute4x64<X0, X2, X1, X3>(AVX::avx_cast<__m256i>(x.data())); return _mm256_unpacklo_epi16(v, v); }
2334 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2:: uint_m) { const auto v = Mem::permute4x64<X0, X2, X1, X3>(AVX::avx_cast<__m256i>(x.data())); return _mm256_unpacklo_epi16(v, v); }
2335 #endif
2336
2337
2338 Vc_SIMD_CAST_2(SSE::double_m, AVX2::double_m) { return AVX::concat(x0.data(), x1.data()); }
2339 Vc_SIMD_CAST_2(SSE::double_m, AVX2:: float_m) { return AVX::zeroExtend(_mm_packs_epi32(x0.dataI(), x1.dataI())); }
2340 Vc_SIMD_CAST_2(SSE:: float_m, AVX2:: float_m) { return AVX::concat(x0.data(), x1.data()); }
2341 Vc_SIMD_CAST_2(SSE:: int_m, AVX2:: float_m) { return AVX::concat(x0.data(), x1.data()); }
2342 Vc_SIMD_CAST_2(SSE:: uint_m, AVX2:: float_m) { return AVX::concat(x0.data(), x1.data()); }
2343
2344 #ifdef Vc_IMPL_AVX2
2345 Vc_SIMD_CAST_2(SSE::double_m, AVX2:: int_m) { return AVX::zeroExtend(_mm_packs_epi32(x0.dataI(), x1.dataI())); }
2346 Vc_SIMD_CAST_2(SSE::double_m, AVX2:: uint_m) { return AVX::zeroExtend(_mm_packs_epi32(x0.dataI(), x1.dataI())); }
2347 Vc_SIMD_CAST_2(SSE::double_m, AVX2:: short_m) { return AVX::zeroExtend(_mm_packs_epi16(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_setzero_si128())); }
2348 Vc_SIMD_CAST_2(SSE::double_m, AVX2::ushort_m) { return AVX::zeroExtend(_mm_packs_epi16(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_setzero_si128())); }
2349
2350 Vc_SIMD_CAST_2(SSE:: float_m, AVX2:: int_m) { return AVX::concat(x0.data(), x1.data()); }
2351 Vc_SIMD_CAST_2(SSE:: float_m, AVX2:: uint_m) { return AVX::concat(x0.data(), x1.data()); }
2352 Vc_SIMD_CAST_2(SSE:: float_m, AVX2:: short_m) { return AVX::zeroExtend(_mm_packs_epi16(x0.dataI(), x1.dataI())); }
2353 Vc_SIMD_CAST_2(SSE:: float_m, AVX2::ushort_m) { return AVX::zeroExtend(_mm_packs_epi16(x0.dataI(), x1.dataI())); }
2354
2355 Vc_SIMD_CAST_2(SSE:: int_m, AVX2:: int_m) { return AVX::concat(x0.data(), x1.data()); }
2356 Vc_SIMD_CAST_2(SSE:: int_m, AVX2:: uint_m) { return AVX::concat(x0.data(), x1.data()); }
2357 Vc_SIMD_CAST_2(SSE:: int_m, AVX2:: short_m) { return AVX::zeroExtend(_mm_packs_epi16(x0.dataI(), x1.dataI())); }
2358 Vc_SIMD_CAST_2(SSE:: int_m, AVX2::ushort_m) { return AVX::zeroExtend(_mm_packs_epi16(x0.dataI(), x1.dataI())); }
2359
2360 Vc_SIMD_CAST_2(SSE:: uint_m, AVX2:: int_m) { return AVX::concat(x0.data(), x1.data()); }
2361 Vc_SIMD_CAST_2(SSE:: uint_m, AVX2:: uint_m) { return AVX::concat(x0.data(), x1.data()); }
2362 Vc_SIMD_CAST_2(SSE:: uint_m, AVX2:: short_m) { return AVX::zeroExtend(_mm_packs_epi16(x0.dataI(), x1.dataI())); }
2363 Vc_SIMD_CAST_2(SSE:: uint_m, AVX2::ushort_m) { return AVX::zeroExtend(_mm_packs_epi16(x0.dataI(), x1.dataI())); }
2364
2365 Vc_SIMD_CAST_2(SSE:: short_m, AVX2:: short_m) { return AVX::concat(x0.data(), x1.data()); }
2366 Vc_SIMD_CAST_2(SSE:: short_m, AVX2::ushort_m) { return AVX::concat(x0.data(), x1.data()); }
2367 Vc_SIMD_CAST_2(SSE::ushort_m, AVX2:: short_m) { return AVX::concat(x0.data(), x1.data()); }
2368 Vc_SIMD_CAST_2(SSE::ushort_m, AVX2::ushort_m) { return AVX::concat(x0.data(), x1.data()); }
2369 #endif
2370
2371
2372 Vc_SIMD_CAST_4(SSE::double_m, AVX2:: float_m) { return AVX::concat(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_packs_epi32(x2.dataI(), x3.dataI())); }
2373 #ifdef Vc_IMPL_AVX2
2374 Vc_SIMD_CAST_4(SSE::double_m, AVX2:: int_m) { return AVX::concat(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_packs_epi32(x2.dataI(), x3.dataI())); }
2375 Vc_SIMD_CAST_4(SSE::double_m, AVX2:: uint_m) { return AVX::concat(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_packs_epi32(x2.dataI(), x3.dataI())); }
2376 Vc_SIMD_CAST_4(SSE::double_m, AVX2:: short_m) { return AVX::zeroExtend(_mm_packs_epi16(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_packs_epi32(x2.dataI(), x3.dataI()))); }
2377 Vc_SIMD_CAST_4(SSE::double_m, AVX2::ushort_m) { return AVX::zeroExtend(_mm_packs_epi16(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_packs_epi32(x2.dataI(), x3.dataI()))); }
2378 Vc_SIMD_CAST_4(SSE:: float_m, AVX2:: short_m) { return AVX::concat(_mm_packs_epi16(x0.dataI(), x1.dataI()), _mm_packs_epi16(x2.dataI(), x3.dataI())); }
2379 Vc_SIMD_CAST_4(SSE:: float_m, AVX2::ushort_m) { return AVX::concat(_mm_packs_epi16(x0.dataI(), x1.dataI()), _mm_packs_epi16(x2.dataI(), x3.dataI())); }
2380 Vc_SIMD_CAST_4(SSE:: int_m, AVX2:: short_m) { return AVX::concat(_mm_packs_epi16(x0.dataI(), x1.dataI()), _mm_packs_epi16(x2.dataI(), x3.dataI())); }
2381 Vc_SIMD_CAST_4(SSE:: int_m, AVX2::ushort_m) { return AVX::concat(_mm_packs_epi16(x0.dataI(), x1.dataI()), _mm_packs_epi16(x2.dataI(), x3.dataI())); }
2382 Vc_SIMD_CAST_4(SSE:: uint_m, AVX2:: short_m) { return AVX::concat(_mm_packs_epi16(x0.dataI(), x1.dataI()), _mm_packs_epi16(x2.dataI(), x3.dataI())); }
2383 Vc_SIMD_CAST_4(SSE:: uint_m, AVX2::ushort_m) { return AVX::concat(_mm_packs_epi16(x0.dataI(), x1.dataI()), _mm_packs_epi16(x2.dataI(), x3.dataI())); }
2384 #endif
2385
2386
2387 template <typename Return, typename T>
2388 Vc_INTRINSIC Vc_CONST Return
2389 simd_cast(Scalar::Mask<T> k, enable_if<AVX2::is_mask<Return>::value>)
2390 {
2391 Return r{false};
2392 r[0] = k.data();
2393 return r;
2394 }
2395
2396
2397 template <typename Return, typename T>
2398 Vc_INTRINSIC Vc_CONST Return
2399 simd_cast(Scalar::Mask<T> k0, Scalar::Mask<T> k1,
2400 enable_if<AVX2::is_mask<Return>::value>)
2401 {
2402 Return r{false};
2403 r[0] = k0.data();
2404 r[1] = k1.data();
2405 return r;
2406 }
2407
2408
2409 template <typename Return, typename T>
2410 Vc_INTRINSIC Vc_CONST Return
2411 simd_cast(Scalar::Mask<T> k0, Scalar::Mask<T> k1, Scalar::Mask<T> k2, Scalar::Mask<T> k3,
2412 enable_if<(AVX2::is_mask<Return>::value && Return::Size >= 4)>)
2413 {
2414 Return r{false};
2415 r[0] = k0.data();
2416 r[1] = k1.data();
2417 r[2] = k2.data();
2418 r[3] = k3.data();
2419 return r;
2420 }
2421
2422
2423 template <typename Return, typename T>
2424 Vc_INTRINSIC Vc_CONST Return
2425 simd_cast(Scalar::Mask<T> k0, Scalar::Mask<T> k1, Scalar::Mask<T> k2, Scalar::Mask<T> k3,
2426 Scalar::Mask<T> k4, Scalar::Mask<T> k5, Scalar::Mask<T> k6, Scalar::Mask<T> k7,
2427 enable_if<(AVX2::is_mask<Return>::value && Return::Size >= 8)>)
2428 {
2429 Return r{false};
2430 r[0] = k0.data();
2431 r[1] = k1.data();
2432 r[2] = k2.data();
2433 r[3] = k3.data();
2434 r[4] = k4.data();
2435 r[5] = k5.data();
2436 r[6] = k6.data();
2437 r[7] = k7.data();
2438 return r;
2439 }
2440
2441
2442 template <typename Return, typename T>
2443 Vc_INTRINSIC Vc_CONST Return
2444 simd_cast(Scalar::Mask<T> k0, Scalar::Mask<T> k1, Scalar::Mask<T> k2, Scalar::Mask<T> k3,
2445 Scalar::Mask<T> k4, Scalar::Mask<T> k5, Scalar::Mask<T> k6, Scalar::Mask<T> k7,
2446 Scalar::Mask<T> k8, Scalar::Mask<T> k9, Scalar::Mask<T> k10,
2447 Scalar::Mask<T> k11, Scalar::Mask<T> k12, Scalar::Mask<T> k13,
2448 Scalar::Mask<T> k14, Scalar::Mask<T> k15,
2449 enable_if<(AVX2::is_mask<Return>::value && Return::Size >= 16)>)
2450 {
2451 Return r{false};
2452 r[0] = k0.data();
2453 r[1] = k1.data();
2454 r[2] = k2.data();
2455 r[3] = k3.data();
2456 r[4] = k4.data();
2457 r[5] = k5.data();
2458 r[6] = k6.data();
2459 r[7] = k7.data();
2460 r[8] = k8.data();
2461 r[9] = k9.data();
2462 r[10] = k10.data();
2463 r[11] = k11.data();
2464 r[12] = k12.data();
2465 r[13] = k13.data();
2466 r[14] = k14.data();
2467 r[15] = k15.data();
2468 return r;
2469 }
2470
2471
2472 Vc_SIMD_CAST_1(AVX2::double_m, SSE::double_m) { return AVX::lo128(x.data()); }
2473 Vc_SIMD_CAST_1(AVX2::double_m, SSE:: float_m) { return _mm_packs_epi32(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); }
2474 Vc_SIMD_CAST_1(AVX2::double_m, SSE:: int_m) { return _mm_packs_epi32(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); }
2475 Vc_SIMD_CAST_1(AVX2::double_m, SSE:: uint_m) { return _mm_packs_epi32(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); }
2476 Vc_SIMD_CAST_1(AVX2::double_m, SSE:: short_m) { return _mm_packs_epi16(_mm_packs_epi32(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())), _mm_setzero_si128()); }
2477 Vc_SIMD_CAST_1(AVX2::double_m, SSE::ushort_m) { return _mm_packs_epi16(_mm_packs_epi32(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())), _mm_setzero_si128()); }
2478
2479 Vc_SIMD_CAST_1(AVX2:: float_m, SSE::double_m) { return _mm_unpacklo_ps(AVX::lo128(x.data()), AVX::lo128(x.data())); }
2480 Vc_SIMD_CAST_1(AVX2:: float_m, SSE:: float_m) { return AVX::lo128(x.data()); }
2481 Vc_SIMD_CAST_1(AVX2:: float_m, SSE:: int_m) { return AVX::lo128(x.data()); }
2482 Vc_SIMD_CAST_1(AVX2:: float_m, SSE:: uint_m) { return AVX::lo128(x.data()); }
2483 Vc_SIMD_CAST_1(AVX2:: float_m, SSE:: short_m) { return _mm_packs_epi16(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); }
2484 Vc_SIMD_CAST_1(AVX2:: float_m, SSE::ushort_m) { return _mm_packs_epi16(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); }
2485
2486 #ifdef Vc_IMPL_AVX2
2487 Vc_SIMD_CAST_1(AVX2:: int_m, SSE::double_m) { return _mm_unpacklo_epi32(AVX::lo128(x.dataI()), AVX::lo128(x.dataI())); }
2488 Vc_SIMD_CAST_1(AVX2:: int_m, SSE:: float_m) { return AVX::lo128(x.dataI()); }
2489 Vc_SIMD_CAST_1(AVX2:: int_m, SSE:: int_m) { return AVX::lo128(x.dataI()); }
2490 Vc_SIMD_CAST_1(AVX2:: int_m, SSE:: uint_m) { return AVX::lo128(x.dataI()); }
2491 Vc_SIMD_CAST_1(AVX2:: int_m, SSE:: short_m) { return _mm_packs_epi16(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); }
2492 Vc_SIMD_CAST_1(AVX2:: int_m, SSE::ushort_m) { return _mm_packs_epi16(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); }
2493
2494 Vc_SIMD_CAST_1(AVX2:: uint_m, SSE::double_m) { return _mm_unpacklo_epi32(AVX::lo128(x.dataI()), AVX::lo128(x.dataI())); }
2495 Vc_SIMD_CAST_1(AVX2:: uint_m, SSE:: float_m) { return AVX::lo128(x.dataI()); }
2496 Vc_SIMD_CAST_1(AVX2:: uint_m, SSE:: int_m) { return AVX::lo128(x.dataI()); }
2497 Vc_SIMD_CAST_1(AVX2:: uint_m, SSE:: uint_m) { return AVX::lo128(x.dataI()); }
2498 Vc_SIMD_CAST_1(AVX2:: uint_m, SSE:: short_m) { return _mm_packs_epi16(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); }
2499 Vc_SIMD_CAST_1(AVX2:: uint_m, SSE::ushort_m) { return _mm_packs_epi16(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); }
2500
2501 Vc_SIMD_CAST_1(AVX2:: short_m, SSE::double_m) { return simd_cast<SSE::double_m>(SSE::short_m(AVX::lo128(x.data()))); }
2502 Vc_SIMD_CAST_1(AVX2:: short_m, SSE:: float_m) { return simd_cast<SSE:: float_m>(SSE::short_m(AVX::lo128(x.data()))); }
2503 Vc_SIMD_CAST_1(AVX2:: short_m, SSE:: int_m) { return simd_cast<SSE:: int_m>(SSE::short_m(AVX::lo128(x.data()))); }
2504 Vc_SIMD_CAST_1(AVX2:: short_m, SSE:: uint_m) { return simd_cast<SSE:: uint_m>(SSE::short_m(AVX::lo128(x.data()))); }
2505 Vc_SIMD_CAST_1(AVX2:: short_m, SSE:: short_m) { return simd_cast<SSE:: short_m>(SSE::short_m(AVX::lo128(x.data()))); }
2506 Vc_SIMD_CAST_1(AVX2:: short_m, SSE::ushort_m) { return simd_cast<SSE::ushort_m>(SSE::short_m(AVX::lo128(x.data()))); }
2507
2508 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE::double_m) { return simd_cast<SSE::double_m>(SSE::ushort_m(AVX::lo128(x.data()))); }
2509 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE:: float_m) { return simd_cast<SSE:: float_m>(SSE::ushort_m(AVX::lo128(x.data()))); }
2510 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE:: int_m) { return simd_cast<SSE:: int_m>(SSE::ushort_m(AVX::lo128(x.data()))); }
2511 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE:: uint_m) { return simd_cast<SSE:: uint_m>(SSE::ushort_m(AVX::lo128(x.data()))); }
2512 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE:: short_m) { return simd_cast<SSE:: short_m>(SSE::ushort_m(AVX::lo128(x.data()))); }
2513 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE::ushort_m) { return simd_cast<SSE::ushort_m>(SSE::ushort_m(AVX::lo128(x.data()))); }
2514 #endif
2515
2516
2517 Vc_SIMD_CAST_2(AVX2::double_m, SSE:: short_m) { return _mm_packs_epi16(_mm_packs_epi32(AVX::lo128(x0.dataI()), AVX::hi128(x0.dataI())), _mm_packs_epi32(AVX::lo128(x1.dataI()), AVX::hi128(x1.dataI()))); }
2518 Vc_SIMD_CAST_2(AVX2::double_m, SSE::ushort_m) { return _mm_packs_epi16(_mm_packs_epi32(AVX::lo128(x0.dataI()), AVX::hi128(x0.dataI())), _mm_packs_epi32(AVX::lo128(x1.dataI()), AVX::hi128(x1.dataI()))); }
2519
2520
2521 template <typename To, typename FromT>
2522 Vc_INTRINSIC Vc_CONST To
2523 simd_cast(AVX2::Mask<FromT> x, enable_if<Scalar::is_mask<To>::value>)
2524 {
2525 return static_cast<To>(x[0]);
2526 }
2527
2528
2529 template <typename Return, int offset, typename From>
2530 Vc_INTRINSIC Vc_CONST enable_if<
2531 (offset == 0 &&
2532 ((AVX2::is_vector<From>::value && !Scalar::is_vector<Return>::value &&
2533 Traits::is_simd_vector<Return>::value && !Traits::isSimdArray<Return>::value) ||
2534 (AVX2::is_mask<From>::value && !Scalar::is_mask<Return>::value &&
2535 Traits::is_simd_mask<Return>::value &&
2536 !Traits::isSimdMaskArray<Return>::value))),
2537 Return>
2538 simd_cast(const From &x)
2539 {
2540 return simd_cast<Return>(x);
2541 }
2542
2543
2544 template <typename Return, int offset, typename From>
2545 Vc_INTRINSIC Vc_CONST Return
2546 simd_cast(const From &x,
2547 enable_if<offset == 0 && ((SSE::is_vector<From>::value &&
2548 AVX2::is_vector<Return>::value) ||
2549 (SSE::is_mask<From>::value &&
2550 AVX2::is_mask<Return>::value))>)
2551 {
2552 return simd_cast<Return>(x);
2553 }
2554
2555
2556
2557 template <typename Return, int offset, typename T>
2558 Vc_INTRINSIC Vc_CONST enable_if<(AVX2::is_vector<Return>::value && offset != 0),
2559 Return>
2560 simd_cast(AVX2::Vector<T> x)
2561 {
2562
2563
2564 using V = AVX2::Vector<T>;
2565 constexpr int shift = sizeof(T) * offset * Return::Size;
2566 static_assert(shift > 0 && shift < sizeof(x), "");
2567 if (shift < 16) {
2568 return simd_cast<Return>(V{AVX::avx_cast<typename V::VectorType>(
2569 _mm_srli_si128(AVX::avx_cast<__m128i>(AVX::lo128(x.data())), shift))});
2570 } else if (shift == 16) {
2571 return simd_cast<Return>(V{Mem::permute128<X1, Const0>(x.data())});
2572 } else {
2573 #ifdef Vc_MSVC
2574 #pragma warning(push)
2575 #pragma warning(disable : 4556)
2576
2577 #endif
2578 return simd_cast<Return>(V{AVX::avx_cast<typename V::VectorType>(
2579 _mm_srli_si128(AVX::avx_cast<__m128i>(AVX::hi128(x.data())), shift - 16))});
2580 #ifdef Vc_MSVC
2581 #pragma warning(pop)
2582 #endif
2583 }
2584 }
2585
2586 template <typename Return, int offset, typename T>
2587 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_vector<Return>::value &&
2588 sizeof(AVX2::Vector<T>) == 32),
2589 Return>
2590 simd_cast(AVX2::Vector<T> x)
2591 {
2592 using V = AVX2::Vector<T>;
2593 constexpr int shift = sizeof(V) / V::Size * offset * Return::Size;
2594 static_assert(shift > 0, "");
2595 static_assert(shift < sizeof(V), "");
2596 using SseVector = SSE::Vector<typename V::EntryType>;
2597 if (shift == 16) {
2598 return simd_cast<Return>(SseVector{AVX::hi128(x.data())});
2599 }
2600 using Intrin = typename SseVector::VectorType;
2601 return simd_cast<Return>(SseVector{AVX::avx_cast<Intrin>(
2602 _mm_alignr_epi8(AVX::avx_cast<__m128i>(AVX::hi128(x.data())),
2603 AVX::avx_cast<__m128i>(AVX::lo128(x.data())), shift))});
2604 }
2605 template <typename Return, int offset, typename T>
2606 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_vector<Return>::value &&
2607 sizeof(AVX2::Vector<T>) == 16),
2608 Return>
2609 simd_cast(AVX2::Vector<T> x)
2610 {
2611 using V = AVX2::Vector<T>;
2612 constexpr int shift = sizeof(V) / V::Size * offset * Return::Size;
2613 static_assert(shift > 0, "");
2614 static_assert(shift < sizeof(V), "");
2615 using SseVector = SSE::Vector<typename V::EntryType>;
2616 return simd_cast<Return>(SseVector{_mm_srli_si128(x.data(), shift)});
2617 }
2618
2619 Vc_SIMD_CAST_OFFSET(SSE:: short_v, AVX2::double_v, 1) { return simd_cast<AVX2::double_v>(simd_cast<SSE::int_v, 1>(x)); }
2620 Vc_SIMD_CAST_OFFSET(SSE::ushort_v, AVX2::double_v, 1) { return simd_cast<AVX2::double_v>(simd_cast<SSE::int_v, 1>(x)); }
2621
2622
2623
2624
2625
2626
2627
2628
2629 template <typename Return, int offset, typename T>
2630 Vc_INTRINSIC Vc_CONST Return
2631 simd_cast(const AVX2::Mask<T> &k,
2632 enable_if<(AVX2::is_mask<Return>::value && offset == 1 &&
2633 AVX2::Mask<T>::Size == Return::Size * 2)> = nullarg)
2634 {
2635 const auto tmp = AVX::hi128(k.dataI());
2636 return AVX::concat(_mm_unpacklo_epi8(tmp, tmp), _mm_unpackhi_epi8(tmp, tmp));
2637 }
2638 template <typename Return, int offset, typename T>
2639 Vc_INTRINSIC Vc_CONST Return
2640 simd_cast(const AVX2::Mask<T> &k,
2641 enable_if<(AVX2::is_mask<Return>::value && offset == 1 &&
2642 AVX2::Mask<T>::Size == Return::Size * 4)> = nullarg)
2643 {
2644 auto tmp = AVX::lo128(k.dataI());
2645 tmp = _mm_unpackhi_epi8(tmp, tmp);
2646 return AVX::concat(_mm_unpacklo_epi16(tmp, tmp), _mm_unpackhi_epi16(tmp, tmp));
2647 }
2648 template <typename Return, int offset, typename T>
2649 Vc_INTRINSIC Vc_CONST Return
2650 simd_cast(const AVX2::Mask<T> &k,
2651 enable_if<(AVX2::is_mask<Return>::value && offset == 2 &&
2652 AVX2::Mask<T>::Size == Return::Size * 4)> = nullarg)
2653 {
2654 auto tmp = AVX::hi128(k.dataI());
2655 tmp = _mm_unpacklo_epi8(tmp, tmp);
2656 return AVX::concat(_mm_unpacklo_epi16(tmp, tmp), _mm_unpackhi_epi16(tmp, tmp));
2657 }
2658 template <typename Return, int offset, typename T>
2659 Vc_INTRINSIC Vc_CONST Return
2660 simd_cast(const AVX2::Mask<T> &k,
2661 enable_if<(AVX2::is_mask<Return>::value && offset == 3 &&
2662 AVX2::Mask<T>::Size == Return::Size * 4)> = nullarg)
2663 {
2664 auto tmp = AVX::hi128(k.dataI());
2665 tmp = _mm_unpackhi_epi8(tmp, tmp);
2666 return AVX::concat(_mm_unpacklo_epi16(tmp, tmp), _mm_unpackhi_epi16(tmp, tmp));
2667 }
2668
2669
2670 Vc_SIMD_CAST_OFFSET(SSE:: short_m, AVX2::double_m, 1) { auto tmp = _mm_unpackhi_epi16(x.dataI(), x.dataI()); return AVX::concat(_mm_unpacklo_epi32(tmp, tmp), _mm_unpackhi_epi32(tmp, tmp)); }
2671 Vc_SIMD_CAST_OFFSET(SSE::ushort_m, AVX2::double_m, 1) { auto tmp = _mm_unpackhi_epi16(x.dataI(), x.dataI()); return AVX::concat(_mm_unpacklo_epi32(tmp, tmp), _mm_unpackhi_epi32(tmp, tmp)); }
2672
2673
2674 template <typename Return, int offset, typename T>
2675 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_mask<Return>::value &&
2676 sizeof(AVX2::Mask<T>) == 32),
2677 Return>
2678 simd_cast(AVX2::Mask<T> x)
2679 {
2680 using M = AVX2::Mask<T>;
2681 constexpr int shift = sizeof(M) / M::Size * offset * Return::Size;
2682 static_assert(shift > 0, "");
2683 static_assert(shift < sizeof(M), "");
2684 using SseVector = SSE::Mask<Traits::entry_type_of<typename M::Vector>>;
2685 if (shift == 16) {
2686 return simd_cast<Return>(SseVector{AVX::hi128(x.data())});
2687 }
2688 using Intrin = typename SseVector::VectorType;
2689 return simd_cast<Return>(SseVector{AVX::avx_cast<Intrin>(
2690 _mm_alignr_epi8(AVX::hi128(x.dataI()), AVX::lo128(x.dataI()), shift))});
2691 }
2692
2693 template <typename Return, int offset, typename T>
2694 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_mask<Return>::value &&
2695 sizeof(AVX2::Mask<T>) == 16),
2696 Return>
2697 simd_cast(AVX2::Mask<T> x)
2698 {
2699 return simd_cast<Return, offset>(simd_cast<SSE::Mask<T>>(x));
2700 }
2701
2702
2703 #undef Vc_SIMD_CAST_AVX_1
2704 #undef Vc_SIMD_CAST_AVX_2
2705 #undef Vc_SIMD_CAST_AVX_3
2706 #undef Vc_SIMD_CAST_AVX_4
2707
2708 #undef Vc_SIMD_CAST_1
2709 #undef Vc_SIMD_CAST_2
2710 #undef Vc_SIMD_CAST_3
2711 #undef Vc_SIMD_CAST_4
2712 #undef Vc_SIMD_CAST_5
2713 #undef Vc_SIMD_CAST_6
2714 #undef Vc_SIMD_CAST_7
2715 #undef Vc_SIMD_CAST_8
2716
2717 #undef Vc_SIMD_CAST_OFFSET
2718
2719
2720 }
2721
2722 #endif
2723
2724