Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-31 10:25:35

0001 /*  This file is part of the Vc library. {{{
0002 Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
0003 
0004 Redistribution and use in source and binary forms, with or without
0005 modification, are permitted provided that the following conditions are met:
0006     * Redistributions of source code must retain the above copyright
0007       notice, this list of conditions and the following disclaimer.
0008     * Redistributions in binary form must reproduce the above copyright
0009       notice, this list of conditions and the following disclaimer in the
0010       documentation and/or other materials provided with the distribution.
0011     * Neither the names of contributing organizations nor the
0012       names of its contributors may be used to endorse or promote products
0013       derived from this software without specific prior written permission.
0014 
0015 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
0016 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
0017 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0018 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
0019 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
0020 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
0021 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0022 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0023 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
0024 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0025 
0026 }}}*/
0027 
0028 #ifndef VC_AVX_SIMD_CAST_H_
0029 #define VC_AVX_SIMD_CAST_H_
0030 
0031 #ifndef VC_AVX_VECTOR_H_
0032 #error "Vc/avx/vector.h needs to be included before Vc/avx/simd_cast.h"
0033 #endif
0034 #include "macros.h"
0035 
0036 namespace Vc_VERSIONED_NAMESPACE
0037 {
0038 // Declarations: helper macros Vc_SIMD_CAST_AVX_[124] & Vc_SIMD_CAST_[124] {{{1
0039 #define Vc_SIMD_CAST_AVX_1(from_, to_)                                                   \
0040     template <typename To>                                                               \
0041     Vc_INTRINSIC Vc_CONST To simd_cast(                                                  \
0042         AVX2::from_ x, enable_if<std::is_same<To, AVX2::to_>::value> = nullarg)
0043 
0044 #define Vc_SIMD_CAST_AVX_2(from_, to_)                                                   \
0045     template <typename To>                                                               \
0046     Vc_INTRINSIC Vc_CONST To simd_cast(                                                  \
0047         AVX2::from_ x0, AVX2::from_ x1,                                                  \
0048         enable_if<std::is_same<To, AVX2::to_>::value> = nullarg)
0049 
0050 #define Vc_SIMD_CAST_AVX_3(from_, to_)                                                   \
0051     template <typename To>                                                               \
0052     Vc_INTRINSIC Vc_CONST To simd_cast(                                                  \
0053         AVX2::from_ x0, AVX2::from_ x1, AVX2::from_ x2,                                  \
0054         enable_if<std::is_same<To, AVX2::to_>::value> = nullarg)
0055 
0056 #define Vc_SIMD_CAST_AVX_4(from_, to_)                                                   \
0057     template <typename To>                                                               \
0058     Vc_INTRINSIC Vc_CONST To simd_cast(                                                  \
0059         AVX2::from_ x0, AVX2::from_ x1, AVX2::from_ x2, AVX2::from_ x3,                  \
0060         enable_if<std::is_same<To, AVX2::to_>::value> = nullarg)
0061 
0062 #define Vc_SIMD_CAST_1(from_, to_)                                                       \
0063     template <typename To>                                                               \
0064     Vc_INTRINSIC Vc_CONST To simd_cast(                                                  \
0065         from_ x, enable_if<std::is_same<To, to_>::value> = nullarg)
0066 
0067 #define Vc_SIMD_CAST_2(from_, to_)                                                       \
0068     template <typename To>                                                               \
0069     Vc_INTRINSIC Vc_CONST To simd_cast(                                                  \
0070         from_ x0, from_ x1, enable_if<std::is_same<To, to_>::value> = nullarg)
0071 
0072 #define Vc_SIMD_CAST_3(from_, to_)                                                       \
0073     template <typename To>                                                               \
0074     Vc_INTRINSIC Vc_CONST To simd_cast(                                                  \
0075         from_ x0, from_ x1, from_ x2, enable_if<std::is_same<To, to_>::value> = nullarg)
0076 
0077 #define Vc_SIMD_CAST_4(from_, to_)                                                       \
0078     template <typename To>                                                               \
0079     Vc_INTRINSIC Vc_CONST To simd_cast(                                                  \
0080         from_ x0, from_ x1, from_ x2, from_ x3,                                          \
0081         enable_if<std::is_same<To, to_>::value> = nullarg)
0082 
0083 #define Vc_SIMD_CAST_5(from_, to_)                                                       \
0084     template <typename To>                                                               \
0085     Vc_INTRINSIC Vc_CONST To simd_cast(                                                  \
0086         from_ x0, from_ x1, from_ x2, from_ x3, from_ x4,                                \
0087         enable_if<std::is_same<To, to_>::value> = nullarg)
0088 
0089 #define Vc_SIMD_CAST_6(from_, to_)                                                       \
0090     template <typename To>                                                               \
0091     Vc_INTRINSIC Vc_CONST To simd_cast(                                                  \
0092         from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, from_ x5,                      \
0093         enable_if<std::is_same<To, to_>::value> = nullarg)
0094 
0095 #define Vc_SIMD_CAST_7(from_, to_)                                                       \
0096     template <typename To>                                                               \
0097     Vc_INTRINSIC Vc_CONST To simd_cast(                                                  \
0098         from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, from_ x5, from_ x6,            \
0099         enable_if<std::is_same<To, to_>::value> = nullarg)
0100 
0101 #define Vc_SIMD_CAST_8(from_, to_)                                                       \
0102     template <typename To>                                                               \
0103     Vc_INTRINSIC Vc_CONST To simd_cast(                                                  \
0104         from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, from_ x5, from_ x6, from_ x7,  \
0105         enable_if<std::is_same<To, to_>::value> = nullarg)
0106 
0107 #define Vc_SIMD_CAST_OFFSET(from_, to_, offset_)                                         \
0108     static_assert(from_::size() >= to_::size() * (offset_ + 1),                          \
0109                   "this offset cannot exist for this type combination");                 \
0110     template <typename To, int offset>                                                   \
0111     Vc_INTRINSIC Vc_CONST To simd_cast(                                                  \
0112         from_ x,                                                                         \
0113         enable_if<(offset == offset_ && std::is_same<To, to_>::value)> = nullarg)
0114 
0115 // Declaration: SSE -> AVX where the AVX Vector is integral and thus of equal size() {{{1
0116 // as the equivalent SSE Vector
0117 template <typename To, typename From>
0118 Vc_INTRINSIC Vc_CONST To
0119 simd_cast(From x, enable_if<(AVX2::is_vector<To>::value && SSE::is_vector<From>::value &&
0120                              SSE::Vector<typename To::EntryType>::Size == To::Size)> =
0121                       nullarg);
0122 template <typename To, typename From>
0123 Vc_INTRINSIC Vc_CONST To simd_cast(
0124     From x0, From x1,
0125     enable_if<(AVX2::is_vector<To>::value && SSE::is_vector<From>::value &&
0126                SSE::Vector<typename To::EntryType>::Size == To::Size)> = nullarg);
0127 template <typename To, typename From>
0128 Vc_INTRINSIC Vc_CONST To simd_cast(
0129     From x0, From x1, From x2,
0130     enable_if<(AVX2::is_vector<To>::value && SSE::is_vector<From>::value &&
0131                SSE::Vector<typename To::EntryType>::Size == To::Size)> = nullarg);
0132 template <typename To, typename From>
0133 Vc_INTRINSIC Vc_CONST To simd_cast(
0134     From x0, From x1, From x2, From x3,
0135     enable_if<(AVX2::is_vector<To>::value && SSE::is_vector<From>::value &&
0136                SSE::Vector<typename To::EntryType>::Size == To::Size)> = nullarg);
0137 template <typename To, typename From>
0138 Vc_INTRINSIC Vc_CONST To simd_cast(
0139     From x0, From x1, From x2, From x3, From x4, From x5, From x6, From x7,
0140     enable_if<(AVX2::is_vector<To>::value && SSE::is_vector<From>::value &&
0141                SSE::Vector<typename To::EntryType>::Size == To::Size)> = nullarg);
0142 
0143 // Declarations: Vector casts without offset {{{1
0144 // AVX2::Vector {{{2
0145 Vc_SIMD_CAST_AVX_1( float_v, double_v);
0146 
0147 Vc_SIMD_CAST_AVX_1(double_v,  float_v);
0148 Vc_SIMD_CAST_AVX_2(double_v,  float_v);
0149 
0150 #ifdef Vc_IMPL_AVX2
0151 Vc_SIMD_CAST_AVX_1(   int_v, double_v);
0152 Vc_SIMD_CAST_AVX_1(  uint_v, double_v);
0153 Vc_SIMD_CAST_AVX_1( short_v, double_v);
0154 Vc_SIMD_CAST_AVX_1(ushort_v, double_v);
0155 
0156 Vc_SIMD_CAST_AVX_1(   int_v,  float_v);
0157 Vc_SIMD_CAST_AVX_1(  uint_v,  float_v);
0158 Vc_SIMD_CAST_AVX_1( short_v,  float_v);
0159 Vc_SIMD_CAST_AVX_1(ushort_v,  float_v);
0160 
0161 Vc_SIMD_CAST_AVX_1(double_v,    int_v);
0162 Vc_SIMD_CAST_AVX_1( float_v,    int_v);
0163 Vc_SIMD_CAST_AVX_1(  uint_v,    int_v);
0164 Vc_SIMD_CAST_AVX_1( short_v,    int_v);
0165 Vc_SIMD_CAST_AVX_1(ushort_v,    int_v);
0166 Vc_SIMD_CAST_AVX_2(double_v,    int_v);
0167 
0168 Vc_SIMD_CAST_AVX_1(double_v,   uint_v);
0169 Vc_SIMD_CAST_AVX_1( float_v,   uint_v);
0170 Vc_SIMD_CAST_AVX_1(   int_v,   uint_v);
0171 Vc_SIMD_CAST_AVX_1( short_v,   uint_v);
0172 Vc_SIMD_CAST_AVX_1(ushort_v,   uint_v);
0173 Vc_SIMD_CAST_AVX_2(double_v,   uint_v);
0174 
0175 Vc_SIMD_CAST_AVX_1(double_v,  short_v);
0176 Vc_SIMD_CAST_AVX_1( float_v,  short_v);
0177 Vc_SIMD_CAST_AVX_1(   int_v,  short_v);
0178 Vc_SIMD_CAST_AVX_1(  uint_v,  short_v);
0179 Vc_SIMD_CAST_AVX_1(ushort_v,  short_v);
0180 Vc_SIMD_CAST_AVX_2(double_v,  short_v);
0181 Vc_SIMD_CAST_AVX_2( float_v,  short_v);
0182 Vc_SIMD_CAST_AVX_2(   int_v,  short_v);
0183 Vc_SIMD_CAST_AVX_2(  uint_v,  short_v);
0184 Vc_SIMD_CAST_AVX_3(double_v,  short_v);
0185 Vc_SIMD_CAST_AVX_4(double_v,  short_v);
0186 
0187 Vc_SIMD_CAST_AVX_1(double_v, ushort_v);
0188 Vc_SIMD_CAST_AVX_1( float_v, ushort_v);
0189 Vc_SIMD_CAST_AVX_1(   int_v, ushort_v);
0190 Vc_SIMD_CAST_AVX_1(  uint_v, ushort_v);
0191 Vc_SIMD_CAST_AVX_1( short_v, ushort_v);
0192 Vc_SIMD_CAST_AVX_2(double_v, ushort_v);
0193 Vc_SIMD_CAST_AVX_2( float_v, ushort_v);
0194 Vc_SIMD_CAST_AVX_2(   int_v, ushort_v);
0195 Vc_SIMD_CAST_AVX_2(  uint_v, ushort_v);
0196 Vc_SIMD_CAST_AVX_3(double_v, ushort_v);
0197 Vc_SIMD_CAST_AVX_4(double_v, ushort_v);
0198 #endif
0199 
0200 // 1 SSE::Vector to 1 AVX2::Vector {{{2
0201 Vc_SIMD_CAST_1(SSE::double_v, AVX2::double_v);
0202 Vc_SIMD_CAST_1(SSE:: float_v, AVX2::double_v);
0203 Vc_SIMD_CAST_1(SSE::   int_v, AVX2::double_v);
0204 Vc_SIMD_CAST_1(SSE::  uint_v, AVX2::double_v);
0205 Vc_SIMD_CAST_1(SSE:: short_v, AVX2::double_v);
0206 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2::double_v);
0207 
0208 Vc_SIMD_CAST_1(SSE::double_v, AVX2:: float_v);
0209 Vc_SIMD_CAST_1(SSE:: float_v, AVX2:: float_v);
0210 Vc_SIMD_CAST_1(SSE::   int_v, AVX2:: float_v);
0211 Vc_SIMD_CAST_1(SSE::  uint_v, AVX2:: float_v);
0212 Vc_SIMD_CAST_1(SSE:: short_v, AVX2:: float_v);
0213 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2:: float_v);
0214 
0215 #ifdef Vc_IMPL_AVX2
0216 Vc_SIMD_CAST_1(SSE::double_v, AVX2::   int_v);
0217 Vc_SIMD_CAST_1(SSE::double_v, AVX2::  uint_v);
0218 Vc_SIMD_CAST_1(SSE::double_v, AVX2:: short_v);
0219 Vc_SIMD_CAST_1(SSE::double_v, AVX2::ushort_v);
0220 
0221 Vc_SIMD_CAST_1(SSE:: float_v, AVX2::   int_v);
0222 Vc_SIMD_CAST_1(SSE:: float_v, AVX2::  uint_v);
0223 Vc_SIMD_CAST_1(SSE:: float_v, AVX2:: short_v);
0224 Vc_SIMD_CAST_1(SSE:: float_v, AVX2::ushort_v);
0225 
0226 Vc_SIMD_CAST_1(SSE::   int_v, AVX2::   int_v);
0227 Vc_SIMD_CAST_1(SSE::  uint_v, AVX2::   int_v);
0228 Vc_SIMD_CAST_1(SSE:: short_v, AVX2::   int_v);
0229 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2::   int_v);
0230 
0231 Vc_SIMD_CAST_1(SSE::   int_v, AVX2::  uint_v);
0232 Vc_SIMD_CAST_1(SSE::  uint_v, AVX2::  uint_v);
0233 Vc_SIMD_CAST_1(SSE:: short_v, AVX2::  uint_v);
0234 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2::  uint_v);
0235 
0236 Vc_SIMD_CAST_1(SSE::   int_v, AVX2:: short_v);
0237 Vc_SIMD_CAST_1(SSE::  uint_v, AVX2:: short_v);
0238 Vc_SIMD_CAST_1(SSE:: short_v, AVX2:: short_v);
0239 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2:: short_v);
0240 
0241 Vc_SIMD_CAST_1(SSE::   int_v, AVX2::ushort_v);
0242 Vc_SIMD_CAST_1(SSE::  uint_v, AVX2::ushort_v);
0243 Vc_SIMD_CAST_1(SSE:: short_v, AVX2::ushort_v);
0244 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2::ushort_v);
0245 #endif
0246 
0247 // 2 SSE::Vector to 1 AVX2::Vector {{{2
0248 Vc_SIMD_CAST_2(SSE::double_v, AVX2::double_v);
0249 
0250 Vc_SIMD_CAST_2(SSE::double_v, AVX2:: float_v);
0251 Vc_SIMD_CAST_2(SSE:: float_v, AVX2:: float_v);
0252 Vc_SIMD_CAST_2(SSE::   int_v, AVX2:: float_v);
0253 Vc_SIMD_CAST_2(SSE::  uint_v, AVX2:: float_v);
0254 
0255 #ifdef Vc_IMPL_AVX2
0256 Vc_SIMD_CAST_2(SSE::double_v, AVX2::   int_v);
0257 Vc_SIMD_CAST_2(SSE::double_v, AVX2::  uint_v);
0258 Vc_SIMD_CAST_2(SSE::double_v, AVX2:: short_v);
0259 Vc_SIMD_CAST_2(SSE::double_v, AVX2::ushort_v);
0260 
0261 Vc_SIMD_CAST_2(SSE:: float_v, AVX2::   int_v);
0262 Vc_SIMD_CAST_2(SSE:: float_v, AVX2::  uint_v);
0263 Vc_SIMD_CAST_2(SSE:: float_v, AVX2:: short_v);
0264 Vc_SIMD_CAST_2(SSE:: float_v, AVX2::ushort_v);
0265 
0266 Vc_SIMD_CAST_2(SSE::   int_v, AVX2::   int_v);
0267 Vc_SIMD_CAST_2(SSE::  uint_v, AVX2::   int_v);
0268 
0269 Vc_SIMD_CAST_2(SSE::   int_v, AVX2::  uint_v);
0270 Vc_SIMD_CAST_2(SSE::  uint_v, AVX2::  uint_v);
0271 
0272 Vc_SIMD_CAST_2(SSE::   int_v, AVX2:: short_v);
0273 Vc_SIMD_CAST_2(SSE::  uint_v, AVX2:: short_v);
0274 Vc_SIMD_CAST_2(SSE:: short_v, AVX2:: short_v);
0275 Vc_SIMD_CAST_2(SSE::ushort_v, AVX2:: short_v);
0276 
0277 Vc_SIMD_CAST_2(SSE::   int_v, AVX2::ushort_v);
0278 Vc_SIMD_CAST_2(SSE::  uint_v, AVX2::ushort_v);
0279 Vc_SIMD_CAST_2(SSE:: short_v, AVX2::ushort_v);
0280 Vc_SIMD_CAST_2(SSE::ushort_v, AVX2::ushort_v);
0281 #endif
0282 
0283 // 3 SSE::Vector to 1 AVX2::Vector {{{2
0284 Vc_SIMD_CAST_3(SSE::double_v, AVX2:: float_v);
0285 
0286 #ifdef Vc_IMPL_AVX2
0287 Vc_SIMD_CAST_3(SSE::double_v, AVX2::   int_v);
0288 Vc_SIMD_CAST_3(SSE::double_v, AVX2::  uint_v);
0289 Vc_SIMD_CAST_3(SSE::double_v, AVX2:: short_v);
0290 Vc_SIMD_CAST_3(SSE::double_v, AVX2::ushort_v);
0291 
0292 Vc_SIMD_CAST_3(SSE:: float_v, AVX2:: short_v);
0293 Vc_SIMD_CAST_3(SSE:: float_v, AVX2::ushort_v);
0294 
0295 Vc_SIMD_CAST_3(SSE::   int_v, AVX2:: short_v);
0296 Vc_SIMD_CAST_3(SSE::  uint_v, AVX2:: short_v);
0297 
0298 Vc_SIMD_CAST_3(SSE::   int_v, AVX2::ushort_v);
0299 Vc_SIMD_CAST_3(SSE::  uint_v, AVX2::ushort_v);
0300 #endif
0301 
0302 // 4 SSE::Vector to 1 AVX2::Vector {{{2
0303 Vc_SIMD_CAST_4(SSE::double_v, AVX2:: float_v);
0304 
0305 #ifdef Vc_IMPL_AVX2
0306 Vc_SIMD_CAST_4(SSE::double_v, AVX2::   int_v);
0307 Vc_SIMD_CAST_4(SSE::double_v, AVX2::  uint_v);
0308 Vc_SIMD_CAST_4(SSE::double_v, AVX2:: short_v);
0309 Vc_SIMD_CAST_4(SSE::double_v, AVX2::ushort_v);
0310 
0311 Vc_SIMD_CAST_4(SSE:: float_v, AVX2:: short_v);
0312 Vc_SIMD_CAST_4(SSE:: float_v, AVX2::ushort_v);
0313 
0314 Vc_SIMD_CAST_4(SSE::   int_v, AVX2:: short_v);
0315 Vc_SIMD_CAST_4(SSE::  uint_v, AVX2:: short_v);
0316 
0317 Vc_SIMD_CAST_4(SSE::   int_v, AVX2::ushort_v);
0318 Vc_SIMD_CAST_4(SSE::  uint_v, AVX2::ushort_v);
0319 #endif
0320 
0321 // 5 SSE::Vector to 1 AVX2::Vector {{{2
0322 #ifdef Vc_IMPL_AVX2
0323 Vc_SIMD_CAST_5(SSE::double_v, AVX2:: short_v);
0324 Vc_SIMD_CAST_5(SSE::double_v, AVX2::ushort_v);
0325 #endif
0326 
0327 // 6 SSE::Vector to 1 AVX2::Vector {{{2
0328 #ifdef Vc_IMPL_AVX2
0329 Vc_SIMD_CAST_6(SSE::double_v, AVX2:: short_v);
0330 Vc_SIMD_CAST_6(SSE::double_v, AVX2::ushort_v);
0331 #endif
0332 
0333 // 7 SSE::Vector to 1 AVX2::Vector {{{2
0334 #ifdef Vc_IMPL_AVX2
0335 Vc_SIMD_CAST_7(SSE::double_v, AVX2:: short_v);
0336 Vc_SIMD_CAST_7(SSE::double_v, AVX2::ushort_v);
0337 #endif
0338 
0339 // 8 SSE::Vector to 1 AVX2::Vector {{{2
0340 #ifdef Vc_IMPL_AVX2
0341 Vc_SIMD_CAST_8(SSE::double_v, AVX2:: short_v);
0342 Vc_SIMD_CAST_8(SSE::double_v, AVX2::ushort_v);
0343 #endif
0344 
0345 // 1 AVX2::Vector to 1 SSE::Vector {{{2
0346 Vc_SIMD_CAST_1(AVX2::double_v, SSE::double_v);
0347 Vc_SIMD_CAST_1(AVX2::double_v, SSE:: float_v);
0348 Vc_SIMD_CAST_1(AVX2::double_v, SSE::   int_v);
0349 Vc_SIMD_CAST_1(AVX2::double_v, SSE::  uint_v);
0350 Vc_SIMD_CAST_1(AVX2::double_v, SSE:: short_v);
0351 Vc_SIMD_CAST_1(AVX2::double_v, SSE::ushort_v);
0352 
0353 Vc_SIMD_CAST_1(AVX2:: float_v, SSE::double_v);
0354 Vc_SIMD_CAST_1(AVX2:: float_v, SSE:: float_v);
0355 Vc_SIMD_CAST_1(AVX2:: float_v, SSE::   int_v);
0356 Vc_SIMD_CAST_1(AVX2:: float_v, SSE::  uint_v);
0357 Vc_SIMD_CAST_1(AVX2:: float_v, SSE:: short_v);
0358 Vc_SIMD_CAST_1(AVX2:: float_v, SSE::ushort_v);
0359 
0360 #ifdef Vc_IMPL_AVX2
0361 Vc_SIMD_CAST_1(AVX2::   int_v, SSE::double_v);
0362 Vc_SIMD_CAST_1(AVX2::   int_v, SSE:: float_v);
0363 Vc_SIMD_CAST_1(AVX2::   int_v, SSE::  uint_v);
0364 Vc_SIMD_CAST_1(AVX2::   int_v, SSE::   int_v);
0365 Vc_SIMD_CAST_1(AVX2::   int_v, SSE:: short_v);
0366 Vc_SIMD_CAST_1(AVX2::   int_v, SSE::ushort_v);
0367 
0368 Vc_SIMD_CAST_1(AVX2::  uint_v, SSE::double_v);
0369 Vc_SIMD_CAST_1(AVX2::  uint_v, SSE:: float_v);
0370 Vc_SIMD_CAST_1(AVX2::  uint_v, SSE::   int_v);
0371 Vc_SIMD_CAST_1(AVX2::  uint_v, SSE::  uint_v);
0372 Vc_SIMD_CAST_1(AVX2::  uint_v, SSE:: short_v);
0373 Vc_SIMD_CAST_1(AVX2::  uint_v, SSE::ushort_v);
0374 
0375 Vc_SIMD_CAST_1(AVX2:: short_v, SSE::double_v);
0376 Vc_SIMD_CAST_1(AVX2:: short_v, SSE:: float_v);
0377 Vc_SIMD_CAST_1(AVX2:: short_v, SSE::   int_v);
0378 Vc_SIMD_CAST_1(AVX2:: short_v, SSE::  uint_v);
0379 Vc_SIMD_CAST_1(AVX2:: short_v, SSE:: short_v);
0380 Vc_SIMD_CAST_1(AVX2:: short_v, SSE::ushort_v);
0381 
0382 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE::double_v);
0383 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE:: float_v);
0384 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE::   int_v);
0385 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE::  uint_v);
0386 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE:: short_v);
0387 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE::ushort_v);
0388 #endif
0389 
0390 // 2 AVX2::Vector to 1 SSE::Vector {{{2
0391 Vc_SIMD_CAST_2(AVX2::double_v, SSE:: short_v);
0392 Vc_SIMD_CAST_2(AVX2::double_v, SSE::ushort_v);
0393 
0394 // 1 Scalar::Vector to 1 AVX2::Vector {{{2
0395 template <typename Return, typename T>
0396 Vc_INTRINSIC Vc_CONST Return
0397 simd_cast(Scalar::Vector<T> x,
0398           enable_if<std::is_same<Return, AVX2::double_v>::value> = nullarg);
0399 template <typename Return, typename T>
0400 Vc_INTRINSIC Vc_CONST Return
0401 simd_cast(Scalar::Vector<T> x,
0402           enable_if<std::is_same<Return, AVX2::float_v>::value> = nullarg);
0403 #ifdef Vc_IMPL_AVX2
0404 template <typename Return, typename T>
0405 Vc_INTRINSIC Vc_CONST Return
0406 simd_cast(Scalar::Vector<T> x,
0407           enable_if<std::is_same<Return, AVX2::int_v>::value> = nullarg);
0408 template <typename Return, typename T>
0409 Vc_INTRINSIC Vc_CONST Return
0410 simd_cast(Scalar::Vector<T> x,
0411           enable_if<std::is_same<Return, AVX2::uint_v>::value> = nullarg);
0412 template <typename Return, typename T>
0413 Vc_INTRINSIC Vc_CONST Return
0414 simd_cast(Scalar::Vector<T> x,
0415           enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0416 template <typename Return, typename T>
0417 Vc_INTRINSIC Vc_CONST Return
0418 simd_cast(Scalar::Vector<T> x,
0419           enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0420 #endif
0421 
0422 // 2 Scalar::Vector to 1 AVX2::Vector {{{2
0423 template <typename Return, typename T>
0424 Vc_INTRINSIC Vc_CONST Return
0425 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0426           enable_if<std::is_same<Return, AVX2::double_v>::value> = nullarg);
0427 template <typename Return, typename T>
0428 Vc_INTRINSIC Vc_CONST Return
0429 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0430           enable_if<std::is_same<Return, AVX2::float_v>::value> = nullarg);
0431 #ifdef Vc_IMPL_AVX2
0432 template <typename Return, typename T>
0433 Vc_INTRINSIC Vc_CONST Return
0434 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0435           enable_if<std::is_same<Return, AVX2::int_v>::value> = nullarg);
0436 template <typename Return, typename T>
0437 Vc_INTRINSIC Vc_CONST Return
0438 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0439           enable_if<std::is_same<Return, AVX2::uint_v>::value> = nullarg);
0440 template <typename Return, typename T>
0441 Vc_INTRINSIC Vc_CONST Return
0442 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0443           enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0444 template <typename Return, typename T>
0445 Vc_INTRINSIC Vc_CONST Return
0446 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
0447           enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0448 #endif
0449 
0450 // 3 Scalar::Vector to 1 AVX2::Vector {{{2
0451 template <typename Return, typename T>
0452 Vc_INTRINSIC Vc_CONST Return
0453 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0454           enable_if<std::is_same<Return, AVX2::double_v>::value> = nullarg);
0455 template <typename Return, typename T>
0456 Vc_INTRINSIC Vc_CONST Return
0457 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0458           enable_if<std::is_same<Return, AVX2::float_v>::value> = nullarg);
0459 #ifdef Vc_IMPL_AVX2
0460 template <typename Return, typename T>
0461 Vc_INTRINSIC Vc_CONST Return
0462 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0463           enable_if<std::is_same<Return, AVX2::int_v>::value> = nullarg);
0464 template <typename Return, typename T>
0465 Vc_INTRINSIC Vc_CONST Return
0466 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0467           enable_if<std::is_same<Return, AVX2::uint_v>::value> = nullarg);
0468 template <typename Return, typename T>
0469 Vc_INTRINSIC Vc_CONST Return
0470 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0471           enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0472 template <typename Return, typename T>
0473 Vc_INTRINSIC Vc_CONST Return
0474 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0475           enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0476 #endif
0477 
0478 // 4 Scalar::Vector to 1 AVX2::Vector {{{2
0479 template <typename Return, typename T>
0480 Vc_INTRINSIC Vc_CONST Return
0481 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0482           Scalar::Vector<T> x3,
0483           enable_if<std::is_same<Return, AVX2::double_v>::value> = nullarg);
0484 template <typename Return, typename T>
0485 Vc_INTRINSIC Vc_CONST Return
0486 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0487           Scalar::Vector<T> x3,
0488           enable_if<std::is_same<Return, AVX2::float_v>::value> = nullarg);
0489 #ifdef Vc_IMPL_AVX2
0490 template <typename Return, typename T>
0491 Vc_INTRINSIC Vc_CONST Return
0492 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0493           Scalar::Vector<T> x3,
0494           enable_if<std::is_same<Return, AVX2::int_v>::value> = nullarg);
0495 template <typename Return, typename T>
0496 Vc_INTRINSIC Vc_CONST Return
0497 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0498           Scalar::Vector<T> x3,
0499           enable_if<std::is_same<Return, AVX2::uint_v>::value> = nullarg);
0500 template <typename Return, typename T>
0501 Vc_INTRINSIC Vc_CONST Return
0502 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0503           Scalar::Vector<T> x3,
0504           enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0505 template <typename Return, typename T>
0506 Vc_INTRINSIC Vc_CONST Return
0507 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0508           Scalar::Vector<T> x3,
0509           enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0510 #endif
0511 
0512 // 5 Scalar::Vector to 1 AVX2::Vector {{{2
0513 template <typename Return, typename T>
0514 Vc_INTRINSIC Vc_CONST Return
0515 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0516           Scalar::Vector<T> x3, Scalar::Vector<T> x4,
0517           enable_if<std::is_same<Return, AVX2::float_v>::value> = nullarg);
0518 #ifdef Vc_IMPL_AVX2
0519 template <typename Return, typename T>
0520 Vc_INTRINSIC Vc_CONST Return
0521 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0522           Scalar::Vector<T> x3, Scalar::Vector<T> x4,
0523           enable_if<std::is_same<Return, AVX2::int_v>::value> = nullarg);
0524 template <typename Return, typename T>
0525 Vc_INTRINSIC Vc_CONST Return
0526 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0527           Scalar::Vector<T> x3, Scalar::Vector<T> x4,
0528           enable_if<std::is_same<Return, AVX2::uint_v>::value> = nullarg);
0529 template <typename Return, typename T>
0530 Vc_INTRINSIC Vc_CONST Return
0531 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0532           Scalar::Vector<T> x3, Scalar::Vector<T> x4,
0533           enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0534 template <typename Return, typename T>
0535 Vc_INTRINSIC Vc_CONST Return
0536 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0537           Scalar::Vector<T> x3, Scalar::Vector<T> x4,
0538           enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0539 #endif
0540 
0541 // 6 Scalar::Vector to 1 AVX2::Vector {{{2
0542 template <typename Return, typename T>
0543 Vc_INTRINSIC Vc_CONST Return
0544 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0545           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0546           enable_if<std::is_same<Return, AVX2::float_v>::value> = nullarg);
0547 #ifdef Vc_IMPL_AVX2
0548 template <typename Return, typename T>
0549 Vc_INTRINSIC Vc_CONST Return
0550 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0551           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0552           enable_if<std::is_same<Return, AVX2::int_v>::value> = nullarg);
0553 template <typename Return, typename T>
0554 Vc_INTRINSIC Vc_CONST Return
0555 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0556           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0557           enable_if<std::is_same<Return, AVX2::uint_v>::value> = nullarg);
0558 template <typename Return, typename T>
0559 Vc_INTRINSIC Vc_CONST Return
0560 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0561           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0562           enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0563 template <typename Return, typename T>
0564 Vc_INTRINSIC Vc_CONST Return
0565 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0566           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0567           enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0568 #endif
0569 
0570 // 7 Scalar::Vector to 1 AVX2::Vector {{{2
0571 template <typename Return, typename T>
0572 Vc_INTRINSIC Vc_CONST Return
0573 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0574           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0575           Scalar::Vector<T> x6,
0576           enable_if<std::is_same<Return, AVX2::float_v>::value> = nullarg);
0577 #ifdef Vc_IMPL_AVX2
0578 template <typename Return, typename T>
0579 Vc_INTRINSIC Vc_CONST Return
0580 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0581           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0582           Scalar::Vector<T> x6,
0583           enable_if<std::is_same<Return, AVX2::int_v>::value> = nullarg);
0584 template <typename Return, typename T>
0585 Vc_INTRINSIC Vc_CONST Return
0586 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0587           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0588           Scalar::Vector<T> x6,
0589           enable_if<std::is_same<Return, AVX2::uint_v>::value> = nullarg);
0590 template <typename Return, typename T>
0591 Vc_INTRINSIC Vc_CONST Return
0592 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0593           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0594           Scalar::Vector<T> x6,
0595           enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0596 template <typename Return, typename T>
0597 Vc_INTRINSIC Vc_CONST Return
0598 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0599           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0600           Scalar::Vector<T> x6,
0601           enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0602 #endif
0603 
0604 // 8 Scalar::Vector to 1 AVX2::Vector {{{2
0605 template <typename Return, typename T>
0606 Vc_INTRINSIC Vc_CONST Return
0607 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0608           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0609           Scalar::Vector<T> x6, Scalar::Vector<T> x7,
0610           enable_if<std::is_same<Return, AVX2::float_v>::value> = nullarg);
0611 #ifdef Vc_IMPL_AVX2
0612 template <typename Return, typename T>
0613 Vc_INTRINSIC Vc_CONST Return
0614 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0615           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0616           Scalar::Vector<T> x6, Scalar::Vector<T> x7,
0617           enable_if<std::is_same<Return, AVX2::int_v>::value> = nullarg);
0618 template <typename Return, typename T>
0619 Vc_INTRINSIC Vc_CONST Return
0620 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0621           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0622           Scalar::Vector<T> x6, Scalar::Vector<T> x7,
0623           enable_if<std::is_same<Return, AVX2::uint_v>::value> = nullarg);
0624 template <typename Return, typename T>
0625 Vc_INTRINSIC Vc_CONST Return
0626 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0627           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0628           Scalar::Vector<T> x6, Scalar::Vector<T> x7,
0629           enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0630 template <typename Return, typename T>
0631 Vc_INTRINSIC Vc_CONST Return
0632 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0633           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0634           Scalar::Vector<T> x6, Scalar::Vector<T> x7,
0635           enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0636 #endif
0637 
0638 // 9 Scalar::Vector to 1 AVX2::Vector {{{2
0639 #ifdef Vc_IMPL_AVX2
0640 template <typename Return, typename T>
0641 Vc_INTRINSIC Vc_CONST Return
0642 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0643           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0644           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0645           enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0646 template <typename Return, typename T>
0647 Vc_INTRINSIC Vc_CONST Return
0648 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0649           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0650           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0651           enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0652 #endif
0653 
0654 // 10 Scalar::Vector to 1 AVX2::Vector {{{2
0655 #ifdef Vc_IMPL_AVX2
0656 template <typename Return, typename T>
0657 Vc_INTRINSIC Vc_CONST Return
0658 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0659           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0660           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0661           Scalar::Vector<T> x9,
0662           enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0663 template <typename Return, typename T>
0664 Vc_INTRINSIC Vc_CONST Return
0665 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0666           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0667           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0668           Scalar::Vector<T> x9,
0669           enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0670 #endif
0671 
0672 // 11 Scalar::Vector to 1 AVX2::Vector {{{2
0673 #ifdef Vc_IMPL_AVX2
0674 template <typename Return, typename T>
0675 Vc_INTRINSIC Vc_CONST Return
0676 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0677           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0678           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0679           Scalar::Vector<T> x9, Scalar::Vector<T> x10,
0680           enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0681 template <typename Return, typename T>
0682 Vc_INTRINSIC Vc_CONST Return
0683 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0684           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0685           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0686           Scalar::Vector<T> x9, Scalar::Vector<T> x10,
0687           enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0688 #endif
0689 
0690 // 12 Scalar::Vector to 1 AVX2::Vector {{{2
0691 #ifdef Vc_IMPL_AVX2
0692 template <typename Return, typename T>
0693 Vc_INTRINSIC Vc_CONST Return
0694 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0695           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0696           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0697           Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
0698           enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0699 template <typename Return, typename T>
0700 Vc_INTRINSIC Vc_CONST Return
0701 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0702           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0703           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0704           Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
0705           enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0706 #endif
0707 
0708 // 13 Scalar::Vector to 1 AVX2::Vector {{{2
0709 #ifdef Vc_IMPL_AVX2
0710 template <typename Return, typename T>
0711 Vc_INTRINSIC Vc_CONST Return
0712 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0713           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0714           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0715           Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
0716           Scalar::Vector<T> x12,
0717           enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0718 template <typename Return, typename T>
0719 Vc_INTRINSIC Vc_CONST Return
0720 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0721           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0722           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0723           Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
0724           Scalar::Vector<T> x12,
0725           enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0726 #endif
0727 
0728 // 14 Scalar::Vector to 1 AVX2::Vector {{{2
0729 #ifdef Vc_IMPL_AVX2
0730 template <typename Return, typename T>
0731 Vc_INTRINSIC Vc_CONST Return
0732 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0733           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0734           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0735           Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
0736           Scalar::Vector<T> x12, Scalar::Vector<T> x13,
0737           enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0738 template <typename Return, typename T>
0739 Vc_INTRINSIC Vc_CONST Return
0740 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0741           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0742           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0743           Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
0744           Scalar::Vector<T> x12, Scalar::Vector<T> x13,
0745           enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0746 #endif
0747 
0748 // 15 Scalar::Vector to 1 AVX2::Vector {{{2
0749 #ifdef Vc_IMPL_AVX2
0750 template <typename Return, typename T>
0751 Vc_INTRINSIC Vc_CONST Return
0752 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0753           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0754           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0755           Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
0756           Scalar::Vector<T> x12, Scalar::Vector<T> x13, Scalar::Vector<T> x14,
0757           enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0758 template <typename Return, typename T>
0759 Vc_INTRINSIC Vc_CONST Return
0760 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0761           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0762           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0763           Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
0764           Scalar::Vector<T> x12, Scalar::Vector<T> x13, Scalar::Vector<T> x14,
0765           enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0766 #endif
0767 
0768 // 16 Scalar::Vector to 1 AVX2::Vector {{{2
0769 #ifdef Vc_IMPL_AVX2
0770 template <typename Return, typename T>
0771 Vc_INTRINSIC Vc_CONST Return
0772 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0773           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0774           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0775           Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
0776           Scalar::Vector<T> x12, Scalar::Vector<T> x13, Scalar::Vector<T> x14,
0777           Scalar::Vector<T> x15,
0778           enable_if<std::is_same<Return, AVX2::short_v>::value> = nullarg);
0779 template <typename Return, typename T>
0780 Vc_INTRINSIC Vc_CONST Return
0781 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
0782           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
0783           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
0784           Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
0785           Scalar::Vector<T> x12, Scalar::Vector<T> x13, Scalar::Vector<T> x14,
0786           Scalar::Vector<T> x15,
0787           enable_if<std::is_same<Return, AVX2::ushort_v>::value> = nullarg);
0788 #endif
0789 
0790 // 1 AVX2::Vector to 1 Scalar::Vector {{{2
0791 template <typename To, typename FromT>
0792 Vc_INTRINSIC Vc_CONST To simd_cast(AVX2::Vector<FromT> x,
0793                                    enable_if<Scalar::is_vector<To>::value> = nullarg);
0794 
0795 // Declarations: Mask casts without offset {{{1
0796 // 1 AVX2::Mask to 1 AVX2::Mask {{{2
0797 template <typename Return, typename T>
0798 Vc_INTRINSIC Vc_CONST Return
0799     simd_cast(const AVX2::Mask<T> &k, enable_if<AVX2::is_mask<Return>::value> = nullarg);
0800 
0801 // 2 AVX2::Mask to 1 AVX2::Mask {{{2
0802 Vc_SIMD_CAST_AVX_2(double_m,  float_m);
0803 #ifdef Vc_IMPL_AVX2
0804 Vc_SIMD_CAST_AVX_2(double_m,    int_m);
0805 Vc_SIMD_CAST_AVX_2(double_m,   uint_m);
0806 Vc_SIMD_CAST_AVX_2(double_m,  short_m);
0807 Vc_SIMD_CAST_AVX_2(double_m, ushort_m);
0808 
0809 Vc_SIMD_CAST_AVX_2( float_m,  short_m);
0810 Vc_SIMD_CAST_AVX_2( float_m, ushort_m);
0811 
0812 Vc_SIMD_CAST_AVX_2(   int_m,  short_m);
0813 Vc_SIMD_CAST_AVX_2(   int_m, ushort_m);
0814 
0815 Vc_SIMD_CAST_AVX_2(  uint_m,  short_m);
0816 Vc_SIMD_CAST_AVX_2(  uint_m, ushort_m);
0817 #endif
0818 
0819 // 4 AVX2::Mask to 1 AVX2::Mask {{{2
0820 #ifdef Vc_IMPL_AVX2
0821 Vc_SIMD_CAST_AVX_4(double_m,  short_m);
0822 Vc_SIMD_CAST_AVX_4(double_m, ushort_m);
0823 #endif
0824 
0825 // 1 SSE::Mask to 1 AVX2::Mask {{{2
0826 Vc_SIMD_CAST_1(SSE::double_m, AVX2::double_m);
0827 Vc_SIMD_CAST_1(SSE::double_m, AVX2:: float_m);
0828 #ifdef Vc_IMPL_AVX2
0829 Vc_SIMD_CAST_1(SSE::double_m, AVX2::   int_m);
0830 Vc_SIMD_CAST_1(SSE::double_m, AVX2::  uint_m);
0831 Vc_SIMD_CAST_1(SSE::double_m, AVX2:: short_m);
0832 Vc_SIMD_CAST_1(SSE::double_m, AVX2::ushort_m);
0833 #endif
0834 
0835 Vc_SIMD_CAST_1(SSE:: float_m, AVX2::double_m);
0836 Vc_SIMD_CAST_1(SSE::   int_m, AVX2::double_m);
0837 Vc_SIMD_CAST_1(SSE::  uint_m, AVX2::double_m);
0838 Vc_SIMD_CAST_1(SSE:: short_m, AVX2::double_m);
0839 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2::double_m);
0840 
0841 Vc_SIMD_CAST_1(SSE:: float_m, AVX2:: float_m);
0842 Vc_SIMD_CAST_1(SSE::   int_m, AVX2:: float_m);
0843 Vc_SIMD_CAST_1(SSE::  uint_m, AVX2:: float_m);
0844 Vc_SIMD_CAST_1(SSE:: short_m, AVX2:: float_m);
0845 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2:: float_m);
0846 #ifdef Vc_IMPL_AVX2
0847 Vc_SIMD_CAST_1(SSE:: float_m, AVX2::   int_m);
0848 Vc_SIMD_CAST_1(SSE:: float_m, AVX2::  uint_m);
0849 Vc_SIMD_CAST_1(SSE::   int_m, AVX2::   int_m);
0850 Vc_SIMD_CAST_1(SSE::   int_m, AVX2::  uint_m);
0851 Vc_SIMD_CAST_1(SSE::  uint_m, AVX2::   int_m);
0852 Vc_SIMD_CAST_1(SSE::  uint_m, AVX2::  uint_m);
0853 
0854 Vc_SIMD_CAST_1(SSE:: float_m, AVX2:: short_m);
0855 Vc_SIMD_CAST_1(SSE::   int_m, AVX2:: short_m);
0856 Vc_SIMD_CAST_1(SSE::  uint_m, AVX2:: short_m);
0857 Vc_SIMD_CAST_1(SSE:: short_m, AVX2:: short_m);
0858 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2:: short_m);
0859 Vc_SIMD_CAST_1(SSE:: float_m, AVX2::ushort_m);
0860 Vc_SIMD_CAST_1(SSE::   int_m, AVX2::ushort_m);
0861 Vc_SIMD_CAST_1(SSE::  uint_m, AVX2::ushort_m);
0862 Vc_SIMD_CAST_1(SSE:: short_m, AVX2::ushort_m);
0863 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2::ushort_m);
0864 
0865 Vc_SIMD_CAST_1(SSE:: short_m, AVX2::   int_m);
0866 Vc_SIMD_CAST_1(SSE:: short_m, AVX2::  uint_m);
0867 
0868 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2::   int_m);
0869 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2::  uint_m);
0870 #endif
0871 
0872 // 2 SSE::Mask to 1 AVX2::Mask {{{2
0873 Vc_SIMD_CAST_2(SSE::double_m, AVX2::double_m);
0874 Vc_SIMD_CAST_2(SSE::double_m, AVX2:: float_m);
0875 Vc_SIMD_CAST_2(SSE:: float_m, AVX2:: float_m);
0876 Vc_SIMD_CAST_2(SSE::   int_m, AVX2:: float_m);
0877 Vc_SIMD_CAST_2(SSE::  uint_m, AVX2:: float_m);
0878 #ifdef Vc_IMPL_AVX2
0879 Vc_SIMD_CAST_2(SSE::double_m, AVX2::   int_m);
0880 Vc_SIMD_CAST_2(SSE::double_m, AVX2::  uint_m);
0881 Vc_SIMD_CAST_2(SSE::double_m, AVX2:: short_m);
0882 Vc_SIMD_CAST_2(SSE::double_m, AVX2::ushort_m);
0883 
0884 Vc_SIMD_CAST_2(SSE:: float_m, AVX2::   int_m);
0885 Vc_SIMD_CAST_2(SSE:: float_m, AVX2::  uint_m);
0886 Vc_SIMD_CAST_2(SSE:: float_m, AVX2:: short_m);
0887 Vc_SIMD_CAST_2(SSE:: float_m, AVX2::ushort_m);
0888 
0889 Vc_SIMD_CAST_2(SSE::   int_m, AVX2::   int_m);
0890 Vc_SIMD_CAST_2(SSE::   int_m, AVX2::  uint_m);
0891 Vc_SIMD_CAST_2(SSE::   int_m, AVX2:: short_m);
0892 Vc_SIMD_CAST_2(SSE::   int_m, AVX2::ushort_m);
0893 
0894 Vc_SIMD_CAST_2(SSE::  uint_m, AVX2::   int_m);
0895 Vc_SIMD_CAST_2(SSE::  uint_m, AVX2::  uint_m);
0896 Vc_SIMD_CAST_2(SSE::  uint_m, AVX2:: short_m);
0897 Vc_SIMD_CAST_2(SSE::  uint_m, AVX2::ushort_m);
0898 
0899 Vc_SIMD_CAST_2(SSE:: short_m, AVX2:: short_m);
0900 Vc_SIMD_CAST_2(SSE:: short_m, AVX2::ushort_m);
0901 Vc_SIMD_CAST_2(SSE::ushort_m, AVX2:: short_m);
0902 Vc_SIMD_CAST_2(SSE::ushort_m, AVX2::ushort_m);
0903 #endif
0904 
0905 // 4 SSE::Mask to 1 AVX2::Mask {{{2
0906 Vc_SIMD_CAST_4(SSE::double_m, AVX2:: float_m);
0907 #ifdef Vc_IMPL_AVX2
0908 Vc_SIMD_CAST_4(SSE::double_m, AVX2::   int_m);
0909 Vc_SIMD_CAST_4(SSE::double_m, AVX2::  uint_m);
0910 Vc_SIMD_CAST_4(SSE::double_m, AVX2:: short_m);
0911 Vc_SIMD_CAST_4(SSE::double_m, AVX2::ushort_m);
0912 Vc_SIMD_CAST_4(SSE:: float_m, AVX2:: short_m);
0913 Vc_SIMD_CAST_4(SSE:: float_m, AVX2::ushort_m);
0914 Vc_SIMD_CAST_4(SSE::   int_m, AVX2:: short_m);
0915 Vc_SIMD_CAST_4(SSE::   int_m, AVX2::ushort_m);
0916 Vc_SIMD_CAST_4(SSE::  uint_m, AVX2:: short_m);
0917 Vc_SIMD_CAST_4(SSE::  uint_m, AVX2::ushort_m);
0918 #endif
0919 
0920 // 1 Scalar::Mask to 1 AVX2::Mask {{{2
0921 template <typename Return, typename T>
0922 Vc_INTRINSIC Vc_CONST Return
0923 simd_cast(Scalar::Mask<T> k,
0924           enable_if<AVX2::is_mask<Return>::value> = nullarg);
0925 
0926 // 2 Scalar::Mask to 1 AVX2::Mask {{{2
0927 template <typename Return, typename T>
0928 Vc_INTRINSIC Vc_CONST Return
0929 simd_cast(Scalar::Mask<T> k0, Scalar::Mask<T> k1,
0930           enable_if<AVX2::is_mask<Return>::value> = nullarg);
0931 
0932 // 4 Scalar::Mask to 1 AVX2::Mask {{{2
0933 template <typename Return, typename T>
0934 Vc_INTRINSIC Vc_CONST Return simd_cast(
0935     Scalar::Mask<T> k0, Scalar::Mask<T> k1, Scalar::Mask<T> k2, Scalar::Mask<T> k3,
0936     enable_if<(AVX2::is_mask<Return>::value && Return::Size >= 4)> = nullarg);
0937 
0938 // 8 Scalar::Mask to 1 AVX2::Mask {{{2
0939 template <typename Return, typename T>
0940 Vc_INTRINSIC Vc_CONST Return simd_cast(
0941     Scalar::Mask<T> k0, Scalar::Mask<T> k1, Scalar::Mask<T> k2, Scalar::Mask<T> k3,
0942     Scalar::Mask<T> k4, Scalar::Mask<T> k5, Scalar::Mask<T> k6, Scalar::Mask<T> k7,
0943     enable_if<(AVX2::is_mask<Return>::value && Return::Size >= 8)> = nullarg);
0944 
0945 // 16 Scalar::Mask to 1 AVX2::Mask {{{2
0946 template <typename Return, typename T>
0947 Vc_INTRINSIC Vc_CONST Return
0948 simd_cast(Scalar::Mask<T> k0, Scalar::Mask<T> k1, Scalar::Mask<T> k2, Scalar::Mask<T> k3,
0949           Scalar::Mask<T> k4, Scalar::Mask<T> k5, Scalar::Mask<T> k6, Scalar::Mask<T> k7,
0950           Scalar::Mask<T> k8, Scalar::Mask<T> k9, Scalar::Mask<T> k10,
0951           Scalar::Mask<T> k11, Scalar::Mask<T> k12, Scalar::Mask<T> k13,
0952           Scalar::Mask<T> k14, Scalar::Mask<T> k15,
0953           enable_if<(AVX2::is_mask<Return>::value && Return::Size >= 16)> = nullarg);
0954 
0955 // 1 AVX2::Mask to 1 SSE::Mask {{{2
0956 Vc_SIMD_CAST_1(AVX2::double_m, SSE::double_m);
0957 Vc_SIMD_CAST_1(AVX2::double_m, SSE:: float_m);
0958 Vc_SIMD_CAST_1(AVX2::double_m, SSE::   int_m);
0959 Vc_SIMD_CAST_1(AVX2::double_m, SSE::  uint_m);
0960 Vc_SIMD_CAST_1(AVX2::double_m, SSE:: short_m);
0961 Vc_SIMD_CAST_1(AVX2::double_m, SSE::ushort_m);
0962 
0963 Vc_SIMD_CAST_1(AVX2:: float_m, SSE::double_m);
0964 Vc_SIMD_CAST_1(AVX2:: float_m, SSE:: float_m);
0965 Vc_SIMD_CAST_1(AVX2:: float_m, SSE::   int_m);
0966 Vc_SIMD_CAST_1(AVX2:: float_m, SSE::  uint_m);
0967 Vc_SIMD_CAST_1(AVX2:: float_m, SSE:: short_m);
0968 Vc_SIMD_CAST_1(AVX2:: float_m, SSE::ushort_m);
0969 
0970 #ifdef Vc_IMPL_AVX2
0971 Vc_SIMD_CAST_1(AVX2::   int_m, SSE::double_m);
0972 Vc_SIMD_CAST_1(AVX2::   int_m, SSE:: float_m);
0973 Vc_SIMD_CAST_1(AVX2::   int_m, SSE::   int_m);
0974 Vc_SIMD_CAST_1(AVX2::   int_m, SSE::  uint_m);
0975 Vc_SIMD_CAST_1(AVX2::   int_m, SSE:: short_m);
0976 Vc_SIMD_CAST_1(AVX2::   int_m, SSE::ushort_m);
0977 
0978 Vc_SIMD_CAST_1(AVX2::  uint_m, SSE::double_m);
0979 Vc_SIMD_CAST_1(AVX2::  uint_m, SSE:: float_m);
0980 Vc_SIMD_CAST_1(AVX2::  uint_m, SSE::   int_m);
0981 Vc_SIMD_CAST_1(AVX2::  uint_m, SSE::  uint_m);
0982 Vc_SIMD_CAST_1(AVX2::  uint_m, SSE:: short_m);
0983 Vc_SIMD_CAST_1(AVX2::  uint_m, SSE::ushort_m);
0984 
0985 Vc_SIMD_CAST_1(AVX2:: short_m, SSE::double_m);
0986 Vc_SIMD_CAST_1(AVX2:: short_m, SSE:: float_m);
0987 Vc_SIMD_CAST_1(AVX2:: short_m, SSE::   int_m);
0988 Vc_SIMD_CAST_1(AVX2:: short_m, SSE::  uint_m);
0989 Vc_SIMD_CAST_1(AVX2:: short_m, SSE:: short_m);
0990 Vc_SIMD_CAST_1(AVX2:: short_m, SSE::ushort_m);
0991 
0992 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE::double_m);
0993 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE:: float_m);
0994 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE::   int_m);
0995 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE::  uint_m);
0996 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE:: short_m);
0997 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE::ushort_m);
0998 #endif
0999 
1000 // 2 AVX2::Mask to 1 SSE::Mask {{{2
1001 Vc_SIMD_CAST_2(AVX2::double_m, SSE:: short_m);
1002 Vc_SIMD_CAST_2(AVX2::double_m, SSE::ushort_m);
1003 
1004 // 1 AVX2::Mask to 1 Scalar::Mask {{{2
1005 template <typename To, typename FromT>
1006 Vc_INTRINSIC Vc_CONST To simd_cast(AVX2::Mask<FromT> x,
1007                                    enable_if<Scalar::is_mask<To>::value> = nullarg);
1008 
1009 // Declaration: offset == 0 | convert from AVX2::Mask/Vector {{{1
1010 template <typename Return, int offset, typename From>
1011 Vc_INTRINSIC Vc_CONST enable_if<
1012     (offset == 0 &&
1013      ((AVX2::is_vector<From>::value && !Scalar::is_vector<Return>::value &&
1014        Traits::is_simd_vector<Return>::value && !Traits::isSimdArray<Return>::value) ||
1015       (AVX2::is_mask<From>::value && !Scalar::is_mask<Return>::value &&
1016        Traits::is_simd_mask<Return>::value &&
1017        !Traits::isSimdMaskArray<Return>::value))),
1018     Return>
1019 simd_cast(const From &x);
1020 // Declaration: offset == 0 | convert from SSE::Mask/Vector to AVX2::Mask/Vector {{{1
1021 template <typename Return, int offset, typename From>
1022 Vc_INTRINSIC Vc_CONST Return simd_cast(
1023     const From &x,
1024     enable_if<offset == 0 && ((SSE::is_vector<From>::value &&
1025                                AVX2::is_vector<Return>::value) ||
1026                               (SSE::is_mask<From>::value &&
1027                                AVX2::is_mask<Return>::value))> = nullarg);
1028 
1029 // Declarations: Vector casts with offset {{{1
1030 // AVX2 to AVX2 {{{2
1031 template <typename Return, int offset, typename T>
1032 Vc_INTRINSIC Vc_CONST enable_if<(AVX2::is_vector<Return>::value && offset != 0),
1033                                 Return>
1034 simd_cast(AVX2::Vector<T> x);
1035 // AVX2 to SSE (Vector<T>) {{{2
1036 template <typename Return, int offset, typename T>
1037 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_vector<Return>::value &&
1038                                  sizeof(AVX2::Vector<T>) == 32),
1039                                 Return>
1040 simd_cast(AVX2::Vector<T> x);
1041 template <typename Return, int offset, typename T>
1042 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_vector<Return>::value &&
1043                                  sizeof(AVX2::Vector<T>) == 16),
1044                                 Return>
1045 simd_cast(AVX2::Vector<T> x);
1046 // SSE to AVX2 {{{2
1047 Vc_SIMD_CAST_OFFSET(SSE:: short_v, AVX2::double_v, 1);
1048 Vc_SIMD_CAST_OFFSET(SSE::ushort_v, AVX2::double_v, 1);
1049 
1050 // Declarations: Mask casts with offset {{{1
1051 // 1 AVX2::Mask to N AVX2::Mask {{{2
1052 /* This declaration confuses GCC (4.9.2). If the declarations are there the definitions
1053  * are ignored by the compiler. ;-(
1054 template <typename Return, int offset, typename T>
1055 Vc_INTRINSIC_L Vc_CONST_L Return
1056 simd_cast(const AVX2::Mask<T> &k,
1057           enable_if<sizeof(k) == 32 && sizeof(Return) == 32 && offset == 1 &&
1058                     AVX2::is_mask<Return>::value> = nullarg) Vc_INTRINSIC_R Vc_CONST_R;
1059 template <typename Return, int offset, typename T>
1060 Vc_INTRINSIC_L Vc_CONST_L Return
1061 simd_cast(const AVX2::Mask<T> &k,
1062           enable_if<sizeof(k) == 32 && sizeof(Return) == 16 && offset == 1 &&
1063                     AVX2::is_mask<Return>::value> = nullarg) Vc_INTRINSIC_R Vc_CONST_R;
1064 template <typename Return, int offset, typename T>
1065 Vc_INTRINSIC_L Vc_CONST_L Return
1066 simd_cast(const AVX2::Mask<T> &k,
1067           enable_if<sizeof(k) == 16 && sizeof(Return) == 32 && offset == 1 &&
1068                     AVX2::is_mask<Return>::value> = nullarg) Vc_INTRINSIC_R Vc_CONST_R;
1069 template <typename Return, int offset, typename T>
1070 Vc_INTRINSIC_L Vc_CONST_L Return
1071 simd_cast(const AVX2::Mask<T> &k,
1072           enable_if<sizeof(k) == 16 && sizeof(Return) == 16 && offset == 1 &&
1073                     AVX2::is_mask<Return>::value> = nullarg) Vc_INTRINSIC_R Vc_CONST_R;
1074                     */
1075 
1076 // 1 SSE::Mask to N AVX2(2)::Mask {{{2
1077 Vc_SIMD_CAST_OFFSET(SSE:: short_m, AVX2::double_m, 1);
1078 Vc_SIMD_CAST_OFFSET(SSE::ushort_m, AVX2::double_m, 1);
1079 
1080 // AVX2 to SSE (Mask<T>) {{{2
1081 template <typename Return, int offset, typename T>
1082 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_mask<Return>::value &&
1083                                  sizeof(AVX2::Mask<T>) == 32),
1084                                 Return>
1085 simd_cast(AVX2::Mask<T> x);
1086 template <typename Return, int offset, typename T>
1087 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_mask<Return>::value &&
1088                                  sizeof(AVX2::Mask<T>) == 16),
1089                                 Return>
1090 simd_cast(AVX2::Mask<T> x);
1091 
1092 // helper macros Vc_SIMD_CAST_AVX_[124] & Vc_SIMD_CAST_[124] {{{1
1093 #undef Vc_SIMD_CAST_AVX_1
1094 #define Vc_SIMD_CAST_AVX_1(from_, to_)                                                   \
1095     template <typename To>                                                               \
1096     Vc_INTRINSIC Vc_CONST To simd_cast(AVX2::from_ x,                                    \
1097                                        enable_if<std::is_same<To, AVX2::to_>::value>)
1098 
1099 #undef Vc_SIMD_CAST_AVX_2
1100 #define Vc_SIMD_CAST_AVX_2(from_, to_)                                                   \
1101     static_assert(AVX2::from_::size() * 2 <= AVX2::to_::size(),                          \
1102                   "this type combination is wrong");                                     \
1103     template <typename To>                                                               \
1104     Vc_INTRINSIC Vc_CONST To simd_cast(AVX2::from_ x0, AVX2::from_ x1,                   \
1105                                        enable_if<std::is_same<To, AVX2::to_>::value>)
1106 
1107 #undef Vc_SIMD_CAST_AVX_3
1108 #define Vc_SIMD_CAST_AVX_3(from_, to_)                                                   \
1109     template <typename To>                                                               \
1110     Vc_INTRINSIC Vc_CONST To simd_cast(AVX2::from_ x0, AVX2::from_ x1, AVX2::from_ x2,   \
1111                                        enable_if<std::is_same<To, AVX2::to_>::value>)
1112 
1113 #undef Vc_SIMD_CAST_AVX_4
1114 #define Vc_SIMD_CAST_AVX_4(from_, to_)                                                   \
1115     template <typename To>                                                               \
1116     Vc_INTRINSIC Vc_CONST To simd_cast(AVX2::from_ x0, AVX2::from_ x1, AVX2::from_ x2,   \
1117                                        AVX2::from_ x3,                                   \
1118                                        enable_if<std::is_same<To, AVX2::to_>::value>)
1119 
1120 #undef Vc_SIMD_CAST_1
1121 #define Vc_SIMD_CAST_1(from_, to_)                                                       \
1122     template <typename To>                                                               \
1123     Vc_INTRINSIC Vc_CONST To simd_cast(from_ x, enable_if<std::is_same<To, to_>::value>)
1124 
1125 #undef Vc_SIMD_CAST_2
1126 #define Vc_SIMD_CAST_2(from_, to_)                                                       \
1127     template <typename To>                                                               \
1128     Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1,                               \
1129                                        enable_if<std::is_same<To, to_>::value>)
1130 
1131 #undef Vc_SIMD_CAST_3
1132 #define Vc_SIMD_CAST_3(from_, to_)                                                       \
1133     template <typename To>                                                               \
1134     Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2,                     \
1135                                        enable_if<std::is_same<To, to_>::value>)
1136 
1137 #undef Vc_SIMD_CAST_4
1138 #define Vc_SIMD_CAST_4(from_, to_)                                                       \
1139     template <typename To>                                                               \
1140     Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, from_ x3,           \
1141                                        enable_if<std::is_same<To, to_>::value>)
1142 
1143 #undef Vc_SIMD_CAST_5
1144 #define Vc_SIMD_CAST_5(from_, to_)                                                       \
1145     template <typename To>                                                               \
1146     Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, \
1147                                        enable_if<std::is_same<To, to_>::value>)
1148 
1149 #undef Vc_SIMD_CAST_6
1150 #define Vc_SIMD_CAST_6(from_, to_)                                                       \
1151     template <typename To>                                                               \
1152     Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, \
1153                                        from_ x5,                                         \
1154                                        enable_if<std::is_same<To, to_>::value>)
1155 
1156 #undef Vc_SIMD_CAST_7
1157 #define Vc_SIMD_CAST_7(from_, to_)                                                       \
1158     template <typename To>                                                               \
1159     Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, \
1160                                        from_ x5, from_ x6,                               \
1161                                        enable_if<std::is_same<To, to_>::value>)
1162 
1163 #undef Vc_SIMD_CAST_8
1164 #define Vc_SIMD_CAST_8(from_, to_)                                                       \
1165     template <typename To>                                                               \
1166     Vc_INTRINSIC Vc_CONST To simd_cast(from_ x0, from_ x1, from_ x2, from_ x3, from_ x4, \
1167                                        from_ x5, from_ x6, from_ x7,                     \
1168                                        enable_if<std::is_same<To, to_>::value>)
1169 
1170 #undef Vc_SIMD_CAST_OFFSET
1171 #define Vc_SIMD_CAST_OFFSET(from_, to_, offset_)                                         \
1172     static_assert(from_::size() >= to_::size() * (offset_ + 1),                          \
1173                   "this offset cannot exist for this type combination");                 \
1174     template <typename To, int offset>                                                   \
1175     Vc_INTRINSIC Vc_CONST To simd_cast(                                                  \
1176         from_ x, enable_if<(offset == offset_ && std::is_same<To, to_>::value)>)
1177 
1178 // SSE -> AVX2 where the AVX2 Vector is integral and thus of equal size() as the {{{1
1179 // equivalent SSE Vector
1180 template <typename To, typename From>
1181 Vc_INTRINSIC Vc_CONST To
1182 simd_cast(From x, enable_if<(AVX2::is_vector<To>::value && SSE::is_vector<From>::value &&
1183                              SSE::Vector<typename To::EntryType>::Size == To::Size)>)
1184 {
1185     return simd_cast<SSE::Vector<typename To::EntryType>>(x).data();
1186 }
1187 template <typename To, typename From>
1188 Vc_INTRINSIC Vc_CONST To
1189 simd_cast(From x0, From x1,
1190           enable_if<(AVX2::is_vector<To>::value && SSE::is_vector<From>::value &&
1191                      SSE::Vector<typename To::EntryType>::Size == To::Size)>)
1192 {
1193     return simd_cast<SSE::Vector<typename To::EntryType>>(x0, x1).data();
1194 }
1195 template <typename To, typename From>
1196 Vc_INTRINSIC Vc_CONST To
1197 simd_cast(From x0, From x1, From x2,
1198           enable_if<(AVX2::is_vector<To>::value && SSE::is_vector<From>::value &&
1199                      SSE::Vector<typename To::EntryType>::Size == To::Size)>)
1200 {
1201     return simd_cast<SSE::Vector<typename To::EntryType>>(x0, x1, x2).data();
1202 }
1203 template <typename To, typename From>
1204 Vc_INTRINSIC Vc_CONST To
1205 simd_cast(From x0, From x1, From x2, From x3,
1206           enable_if<(AVX2::is_vector<To>::value && SSE::is_vector<From>::value &&
1207                      SSE::Vector<typename To::EntryType>::Size == To::Size)>)
1208 {
1209     return simd_cast<SSE::Vector<typename To::EntryType>>(x0, x1, x2, x3).data();
1210 }
1211 template <typename To, typename From>
1212 Vc_INTRINSIC Vc_CONST To
1213 simd_cast(From x0, From x1, From x2, From x3, From x4, From x5, From x6, From x7,
1214           enable_if<(AVX2::is_vector<To>::value && SSE::is_vector<From>::value &&
1215                      SSE::Vector<typename To::EntryType>::Size == To::Size)>)
1216 {
1217     return simd_cast<SSE::Vector<typename To::EntryType>>(x0, x1, x2, x3, x4, x5, x6, x7)
1218         .data();
1219 }
1220 
1221 // Vector casts without offset {{{1
1222 // AVX2::Vector {{{2
1223 // 1: to double_v {{{3
1224 Vc_SIMD_CAST_AVX_1( float_v, double_v) { return _mm256_cvtps_pd(AVX::lo128(x.data())); }
1225 #ifdef Vc_IMPL_AVX2
1226 Vc_SIMD_CAST_AVX_1(   int_v, double_v) { return AVX::convert<   int, double>(AVX::lo128(x.data())); }
1227 Vc_SIMD_CAST_AVX_1(  uint_v, double_v) { return AVX::convert<  uint, double>(AVX::lo128(x.data())); }
1228 Vc_SIMD_CAST_AVX_1( short_v, double_v) { return AVX::convert< short, double>(AVX::lo128(x.data())); }
1229 Vc_SIMD_CAST_AVX_1(ushort_v, double_v) { return AVX::convert<ushort, double>(AVX::lo128(x.data())); }
1230 #endif
1231 
1232 // 1: to float_v {{{3
1233 Vc_SIMD_CAST_AVX_1(double_v,  float_v) { return AVX::zeroExtend(_mm256_cvtpd_ps(x.data())); }
1234 #ifdef Vc_IMPL_AVX2
1235 Vc_SIMD_CAST_AVX_1(   int_v,  float_v) { return AVX::convert<   int, float>(x.data()); }
1236 Vc_SIMD_CAST_AVX_1(  uint_v,  float_v) { return AVX::convert<  uint, float>(x.data()); }
1237 Vc_SIMD_CAST_AVX_1( short_v,  float_v) { return AVX::convert< short, float>(AVX::lo128(x.data())); }
1238 Vc_SIMD_CAST_AVX_1(ushort_v,  float_v) { return AVX::convert<ushort, float>(AVX::lo128(x.data())); }
1239 #endif
1240 
1241 // 2: to float_v {{{3
1242 Vc_SIMD_CAST_AVX_2(double_v,  float_v) { return AVX::concat(_mm256_cvtpd_ps(x0.data()), _mm256_cvtpd_ps(x1.data())); }
1243 
1244 // 1: to int_v {{{3
1245 #ifdef Vc_IMPL_AVX2
1246 Vc_SIMD_CAST_AVX_1(double_v,    int_v) { return AVX::zeroExtend(_mm256_cvttpd_epi32(x.data())); }
1247 Vc_SIMD_CAST_AVX_1( float_v,    int_v) { return _mm256_cvttps_epi32(x.data()); }
1248 Vc_SIMD_CAST_AVX_1(  uint_v,    int_v) { return x.data(); }
1249 Vc_SIMD_CAST_AVX_1( short_v,    int_v) { return _mm256_cvtepi16_epi32(AVX::lo128(x.data())); }
1250 Vc_SIMD_CAST_AVX_1(ushort_v,    int_v) { return _mm256_cvtepu16_epi32(AVX::lo128(x.data())); }
1251 #endif
1252 
1253 // 2: to int_v {{{3
1254 #ifdef Vc_IMPL_AVX2
1255 Vc_SIMD_CAST_AVX_2(double_v,    int_v) { return AVX::concat(_mm256_cvttpd_epi32(x0.data()), _mm256_cvttpd_epi32(x1.data())); }
1256 #endif
1257 
1258 // 1: to uint_v {{{3
1259 #ifdef Vc_IMPL_AVX2
1260 Vc_SIMD_CAST_AVX_1(double_v,   uint_v) { return AVX::zeroExtend(AVX::convert<double, uint>(x.data())); }
1261 Vc_SIMD_CAST_AVX_1( float_v,   uint_v) {
1262     return _mm256_blendv_epi8(
1263         _mm256_cvttps_epi32(x.data()),
1264         _mm256_add_epi32(
1265             _mm256_cvttps_epi32(_mm256_sub_ps(x.data(), AVX::set2power31_ps())),
1266             AVX::set2power31_epu32()),
1267         _mm256_castps_si256(AVX::cmpge_ps(x.data(), AVX::set2power31_ps())));
1268 }
1269 Vc_SIMD_CAST_AVX_1(   int_v,   uint_v) { return x.data(); }
1270 Vc_SIMD_CAST_AVX_1( short_v,   uint_v) { return _mm256_cvtepi16_epi32(AVX::lo128(x.data())); }
1271 Vc_SIMD_CAST_AVX_1(ushort_v,   uint_v) { return _mm256_cvtepu16_epi32(AVX::lo128(x.data())); }
1272 #endif
1273 
1274 // 2: to uint_v {{{3
1275 #ifdef Vc_IMPL_AVX2
1276 Vc_SIMD_CAST_AVX_2(double_v,   uint_v) { return AVX::concat(AVX::convert<double, uint>(x0.data()), AVX::convert<double, uint>(x1.data())); }
1277 #endif
1278 
1279 // 1: to short_v {{{3
1280 #ifdef Vc_IMPL_AVX2
1281 Vc_SIMD_CAST_AVX_1(double_v, short_v) { return AVX::zeroExtend(_mm_packs_epi32(_mm256_cvttpd_epi32(x.data()), _mm_setzero_si128())); }
1282 Vc_SIMD_CAST_AVX_1( float_v, short_v) {
1283     const auto tmp = _mm256_cvttps_epi32(x.data());
1284     return AVX::zeroExtend(_mm_packs_epi32(AVX::lo128(tmp), AVX::hi128(tmp)));
1285 }
1286 Vc_SIMD_CAST_AVX_1(   int_v,  short_v) { return AVX::zeroExtend(AVX::convert< int, short>(x.data())); }
1287 Vc_SIMD_CAST_AVX_1(  uint_v,  short_v) { return AVX::zeroExtend(AVX::convert<uint, short>(x.data())); }
1288 Vc_SIMD_CAST_AVX_1(ushort_v,  short_v) { return x.data(); }
1289 #endif
1290 
1291 // 2: to short_v {{{3
1292 #ifdef Vc_IMPL_AVX2
1293 Vc_SIMD_CAST_AVX_2(double_v,  short_v) {
1294     const auto tmp0 = _mm256_cvttpd_epi32(x0.data());
1295     const auto tmp1 = _mm256_cvttpd_epi32(x1.data());
1296     return AVX::zeroExtend(_mm_packs_epi32(tmp0, tmp1));
1297 }
1298 Vc_SIMD_CAST_AVX_2( float_v,  short_v) {
1299     using AVX2::short_v;
1300     using AVX2::int_v;
1301     return simd_cast<short_v>(simd_cast<int_v>(x0), simd_cast<int_v>(x1));
1302 }
1303 Vc_SIMD_CAST_AVX_2(   int_v,  short_v) {
1304     const auto shuf = _mm256_setr_epi8(
1305         0, 1, 4, 5, 8, 9, 12, 13, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80,
1306         0, 1, 4, 5, 8, 9, 12, 13, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80);
1307     auto a = _mm256_shuffle_epi8(x0.data(), shuf);
1308     auto b = _mm256_shuffle_epi8(x1.data(), shuf);
1309     return Mem::permute4x64<X0, X2, X1, X3>(_mm256_unpacklo_epi64(a, b));
1310 }
1311 Vc_SIMD_CAST_AVX_2(  uint_v,  short_v) {
1312     const auto shuf = _mm256_setr_epi8(
1313         0, 1, 4, 5, 8, 9, 12, 13, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80,
1314         0, 1, 4, 5, 8, 9, 12, 13, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80);
1315     auto a = _mm256_shuffle_epi8(x0.data(), shuf);
1316     auto b = _mm256_shuffle_epi8(x1.data(), shuf);
1317     return Mem::permute4x64<X0, X2, X1, X3>(_mm256_unpacklo_epi64(a, b));
1318 }
1319 #endif
1320 
1321 // 3: to short_v {{{3
1322 #ifdef Vc_IMPL_AVX2
1323 Vc_SIMD_CAST_AVX_3(double_v,  short_v) {
1324     const auto tmp0 = _mm256_cvttpd_epi32(x0.data());
1325     const auto tmp1 = _mm256_cvttpd_epi32(x1.data());
1326     const auto tmp2 = _mm256_cvttpd_epi32(x2.data());
1327     return AVX::concat(_mm_packs_epi32(tmp0, tmp1), _mm_packs_epi32(tmp2, _mm_setzero_si128()));
1328 }
1329 #endif
1330 
1331 // 4: to short_v {{{3
1332 #ifdef Vc_IMPL_AVX2
1333 Vc_SIMD_CAST_AVX_4(double_v,  short_v) {
1334     const auto tmp0 = _mm256_cvttpd_epi32(x0.data());
1335     const auto tmp1 = _mm256_cvttpd_epi32(x1.data());
1336     const auto tmp2 = _mm256_cvttpd_epi32(x2.data());
1337     const auto tmp3 = _mm256_cvttpd_epi32(x3.data());
1338     return AVX::concat(_mm_packs_epi32(tmp0, tmp1), _mm_packs_epi32(tmp2, tmp3));
1339 }
1340 #endif
1341 
1342 // 1: to ushort_v {{{3
1343 #ifdef Vc_IMPL_AVX2
1344 Vc_SIMD_CAST_AVX_1(double_v, ushort_v) {
1345     const auto tmp = _mm256_cvttpd_epi32(x.data());
1346     return AVX::zeroExtend(_mm_packus_epi32(tmp, _mm_setzero_si128()));
1347 }
1348 Vc_SIMD_CAST_AVX_1( float_v, ushort_v) {
1349     const auto tmp = _mm256_cvttps_epi32(x.data());
1350     return AVX::zeroExtend(_mm_packus_epi32(AVX::lo128(tmp), AVX::hi128(tmp)));
1351 }
1352 Vc_SIMD_CAST_AVX_1(   int_v, ushort_v) { return AVX::zeroExtend(AVX::convert< int, ushort>(x.data())); }
1353 Vc_SIMD_CAST_AVX_1(  uint_v, ushort_v) { return AVX::zeroExtend(AVX::convert<uint, ushort>(x.data())); }
1354 Vc_SIMD_CAST_AVX_1( short_v, ushort_v) { return x.data(); }
1355 #endif
1356 
1357 // 2: to ushort_v {{{3
1358 #ifdef Vc_IMPL_AVX2
1359 Vc_SIMD_CAST_AVX_2(double_v, ushort_v) {
1360     const auto tmp0 = _mm256_cvttpd_epi32(x0.data());
1361     const auto tmp1 = _mm256_cvttpd_epi32(x1.data());
1362     return AVX::zeroExtend(_mm_packus_epi32(tmp0, tmp1));
1363 }
1364 Vc_SIMD_CAST_AVX_2( float_v, ushort_v) {
1365     using AVX2::ushort_v;
1366     using AVX2::int_v;
1367     return simd_cast<ushort_v>(simd_cast<int_v>(x0), simd_cast<int_v>(x1));
1368 }
1369 Vc_SIMD_CAST_AVX_2(   int_v, ushort_v) {
1370     auto tmp0 = _mm256_unpacklo_epi16(x0.data(), x1.data());
1371     auto tmp1 = _mm256_unpackhi_epi16(x0.data(), x1.data());
1372     auto tmp2 = _mm256_unpacklo_epi16(tmp0, tmp1);
1373     auto tmp3 = _mm256_unpackhi_epi16(tmp0, tmp1);
1374     return Mem::permute4x64<X0, X2, X1, X3>(_mm256_unpacklo_epi16(tmp2, tmp3));
1375 }
1376 Vc_SIMD_CAST_AVX_2(  uint_v, ushort_v) {
1377     auto tmp0 = _mm256_unpacklo_epi16(x0.data(), x1.data());
1378     auto tmp1 = _mm256_unpackhi_epi16(x0.data(), x1.data());
1379     auto tmp2 = _mm256_unpacklo_epi16(tmp0, tmp1);
1380     auto tmp3 = _mm256_unpackhi_epi16(tmp0, tmp1);
1381     return Mem::permute4x64<X0, X2, X1, X3>(_mm256_unpacklo_epi16(tmp2, tmp3));
1382 }
1383 #endif
1384 
1385 // 3: to ushort_v {{{3
1386 #ifdef Vc_IMPL_AVX2
1387 Vc_SIMD_CAST_AVX_3(double_v, ushort_v) {
1388     const auto tmp0 = _mm256_cvttpd_epi32(x0.data());
1389     const auto tmp1 = _mm256_cvttpd_epi32(x1.data());
1390     const auto tmp2 = _mm256_cvttpd_epi32(x2.data());
1391     return AVX::concat(_mm_packus_epi32(tmp0, tmp1),
1392                        _mm_packus_epi32(tmp2, _mm_setzero_si128()));
1393 }
1394 #endif
1395 
1396 // 4: to ushort_v {{{3
1397 #ifdef Vc_IMPL_AVX2
1398 Vc_SIMD_CAST_AVX_4(double_v, ushort_v) {
1399     const auto tmp0 = _mm256_cvttpd_epi32(x0.data());
1400     const auto tmp1 = _mm256_cvttpd_epi32(x1.data());
1401     const auto tmp2 = _mm256_cvttpd_epi32(x2.data());
1402     const auto tmp3 = _mm256_cvttpd_epi32(x3.data());
1403     return AVX::concat(_mm_packus_epi32(tmp0, tmp1), _mm_packus_epi32(tmp2, tmp3));
1404 }
1405 #endif
1406 
1407 // 1 SSE::Vector to 1 AVX2::Vector {{{2
1408 Vc_SIMD_CAST_1(SSE::double_v, AVX2::double_v) { return AVX::zeroExtend(x.data()); }
1409 Vc_SIMD_CAST_1(SSE:: float_v, AVX2::double_v) { return _mm256_cvtps_pd(x.data()); }
1410 Vc_SIMD_CAST_1(SSE::   int_v, AVX2::double_v) { return _mm256_cvtepi32_pd(x.data()); }
1411 Vc_SIMD_CAST_1(SSE::  uint_v, AVX2::double_v) { using namespace AvxIntrinsics; return _mm256_add_pd(_mm256_cvtepi32_pd(_mm_sub_epi32(x.data(), _mm_setmin_epi32())), set1_pd(1u << 31)); }
1412 Vc_SIMD_CAST_1(SSE:: short_v, AVX2::double_v) { return simd_cast<AVX2::double_v>(simd_cast<SSE::int_v>(x)); }
1413 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2::double_v) { return simd_cast<AVX2::double_v>(simd_cast<SSE::int_v>(x)); }
1414 
1415 Vc_SIMD_CAST_1(SSE::double_v, AVX2:: float_v) { return AVX::zeroExtend(simd_cast<SSE:: float_v>(x).data()); }
1416 Vc_SIMD_CAST_1(SSE:: float_v, AVX2:: float_v) { return AVX::zeroExtend(x.data()); }
1417 Vc_SIMD_CAST_1(SSE::   int_v, AVX2:: float_v) { return AVX::zeroExtend(_mm_cvtepi32_ps(x.data())); }
1418 Vc_SIMD_CAST_1(SSE::  uint_v, AVX2:: float_v) { return AVX::zeroExtend(simd_cast<SSE::float_v>(x).data()); }
1419 Vc_SIMD_CAST_1(SSE:: short_v, AVX2:: float_v) { return AVX::convert< short, float>(x.data()); }
1420 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2:: float_v) { return AVX::convert<ushort, float>(x.data()); }
1421 
1422 #ifdef Vc_IMPL_AVX2
1423 Vc_SIMD_CAST_1(SSE::double_v, AVX2::   int_v) { return AVX::zeroExtend(simd_cast<SSE::   int_v>(x).data()); }
1424 Vc_SIMD_CAST_1(SSE::double_v, AVX2::  uint_v) { return AVX::zeroExtend(simd_cast<SSE::  uint_v>(x).data()); }
1425 Vc_SIMD_CAST_1(SSE::double_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast<SSE:: short_v>(x).data()); }
1426 Vc_SIMD_CAST_1(SSE::double_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast<SSE::ushort_v>(x).data()); }
1427 
1428 Vc_SIMD_CAST_1(SSE:: float_v, AVX2::   int_v) { return AVX::zeroExtend(simd_cast<SSE::int_v>(x).data()); }
1429 Vc_SIMD_CAST_1(SSE:: float_v, AVX2::  uint_v) { return AVX::zeroExtend(simd_cast<SSE::uint_v>(x).data()); }
1430 Vc_SIMD_CAST_1(SSE:: float_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast<SSE::short_v>(x).data()); }
1431 Vc_SIMD_CAST_1(SSE:: float_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast<SSE::ushort_v>(x).data()); }
1432 
1433 Vc_SIMD_CAST_1(SSE::   int_v, AVX2::   int_v) { return AVX::zeroExtend(x.data()); }
1434 Vc_SIMD_CAST_1(SSE::  uint_v, AVX2::   int_v) { return AVX::zeroExtend(x.data()); }
1435 Vc_SIMD_CAST_1(SSE:: short_v, AVX2::   int_v) { return AVX::convert< short,  int>(x.data()); }
1436 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2::   int_v) { return AVX::convert<ushort,  int>(x.data()); }
1437 
1438 Vc_SIMD_CAST_1(SSE::   int_v, AVX2::  uint_v) { return AVX::zeroExtend(x.data()); }
1439 Vc_SIMD_CAST_1(SSE::  uint_v, AVX2::  uint_v) { return AVX::zeroExtend(x.data()); }
1440 Vc_SIMD_CAST_1(SSE:: short_v, AVX2::  uint_v) { return AVX::convert< short, uint>(x.data()); }
1441 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2::  uint_v) { return AVX::convert<ushort, uint>(x.data()); }
1442 
1443 Vc_SIMD_CAST_1(SSE::   int_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast<SSE::short_v>(x).data()); }
1444 Vc_SIMD_CAST_1(SSE::  uint_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast<SSE::short_v>(x).data()); }
1445 Vc_SIMD_CAST_1(SSE:: short_v, AVX2:: short_v) { return AVX::zeroExtend(x.data()); }
1446 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2:: short_v) { return AVX::zeroExtend(x.data()); }
1447 
1448 Vc_SIMD_CAST_1(SSE::   int_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast<SSE::ushort_v>(x).data()); }
1449 Vc_SIMD_CAST_1(SSE::  uint_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast<SSE::ushort_v>(x).data()); }
1450 Vc_SIMD_CAST_1(SSE:: short_v, AVX2::ushort_v) { return AVX::zeroExtend(x.data()); }
1451 Vc_SIMD_CAST_1(SSE::ushort_v, AVX2::ushort_v) { return AVX::zeroExtend(x.data()); }
1452 #endif
1453 
1454 // 2 SSE::Vector to 1 AVX2::Vector {{{2
1455 Vc_SIMD_CAST_2(SSE::double_v, AVX2::double_v) { return AVX::concat(x0.data(), x1.data()); }
1456 
1457 Vc_SIMD_CAST_2(SSE::double_v, AVX2:: float_v) { return AVX::zeroExtend(simd_cast<SSE:: float_v>(x0, x1).data()); }
1458 Vc_SIMD_CAST_2(SSE:: float_v, AVX2:: float_v) { return AVX::concat(x0.data(), x1.data()); }
1459 Vc_SIMD_CAST_2(SSE::   int_v, AVX2:: float_v) { return AVX::convert< int, float>(AVX::concat(x0.data(), x1.data())); }
1460 Vc_SIMD_CAST_2(SSE::  uint_v, AVX2:: float_v) { return AVX::convert<uint, float>(AVX::concat(x0.data(), x1.data())); }
1461 
1462 #ifdef Vc_IMPL_AVX2
1463 Vc_SIMD_CAST_2(SSE::double_v, AVX2::   int_v) { return AVX::zeroExtend(simd_cast<SSE::   int_v>(x0, x1).data()); }
1464 Vc_SIMD_CAST_2(SSE::double_v, AVX2::  uint_v) { return AVX::zeroExtend(simd_cast<SSE::  uint_v>(x0, x1).data()); }
1465 Vc_SIMD_CAST_2(SSE::double_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast<SSE:: short_v>(x0, x1).data()); }
1466 Vc_SIMD_CAST_2(SSE::double_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast<SSE::ushort_v>(x0, x1).data()); }
1467 
1468 Vc_SIMD_CAST_2(SSE:: float_v, AVX2::   int_v) { return simd_cast<AVX2:: int_v>(simd_cast<AVX2::float_v>(x0, x1)); }
1469 Vc_SIMD_CAST_2(SSE:: float_v, AVX2::  uint_v) { return simd_cast<AVX2::uint_v>(simd_cast<AVX2::float_v>(x0, x1)); }
1470 Vc_SIMD_CAST_2(SSE:: float_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast<SSE:: short_v>(x0, x1).data()); }
1471 Vc_SIMD_CAST_2(SSE:: float_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast<SSE::ushort_v>(x0, x1).data()); }
1472 
1473 Vc_SIMD_CAST_2(SSE::   int_v, AVX2::   int_v) { return AVX::concat(x0.data(), x1.data()); }
1474 Vc_SIMD_CAST_2(SSE::  uint_v, AVX2::   int_v) { return AVX::concat(x0.data(), x1.data()); }
1475 
1476 Vc_SIMD_CAST_2(SSE::   int_v, AVX2::  uint_v) { return AVX::concat(x0.data(), x1.data()); }
1477 Vc_SIMD_CAST_2(SSE::  uint_v, AVX2::  uint_v) { return AVX::concat(x0.data(), x1.data()); }
1478 
1479 Vc_SIMD_CAST_2(SSE::   int_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast<SSE:: short_v>(x0, x1).data()); }
1480 Vc_SIMD_CAST_2(SSE::  uint_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast<SSE:: short_v>(x0, x1).data()); }
1481 Vc_SIMD_CAST_2(SSE:: short_v, AVX2:: short_v) { return AVX::concat(x0.data(), x1.data()); }
1482 Vc_SIMD_CAST_2(SSE::ushort_v, AVX2:: short_v) { return AVX::concat(x0.data(), x1.data()); }
1483 
1484 Vc_SIMD_CAST_2(SSE::   int_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast<SSE::ushort_v>(x0, x1).data()); }
1485 Vc_SIMD_CAST_2(SSE::  uint_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast<SSE::ushort_v>(x0, x1).data()); }
1486 Vc_SIMD_CAST_2(SSE:: short_v, AVX2::ushort_v) { return AVX::concat(x0.data(), x1.data()); }
1487 Vc_SIMD_CAST_2(SSE::ushort_v, AVX2::ushort_v) { return AVX::concat(x0.data(), x1.data()); }
1488 #endif
1489 // 3 SSE::Vector to 1 AVX2::Vector {{{2
1490 Vc_SIMD_CAST_3(SSE::double_v, AVX2:: float_v) { return simd_cast<AVX2:: float_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2)); }
1491 
1492 #ifdef Vc_IMPL_AVX2
1493 Vc_SIMD_CAST_3(SSE::double_v, AVX2::   int_v) { return simd_cast<AVX2:: int_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2)); }
1494 Vc_SIMD_CAST_3(SSE::double_v, AVX2::  uint_v) { return simd_cast<AVX2::uint_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2)); }
1495 Vc_SIMD_CAST_3(SSE::double_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast<SSE:: short_v>(x0, x1, x2).data()); }
1496 Vc_SIMD_CAST_3(SSE::double_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast<SSE::ushort_v>(x0, x1, x2).data()); }
1497 
1498 Vc_SIMD_CAST_3(SSE:: float_v, AVX2:: short_v) { return simd_cast<AVX2:: short_v>(simd_cast<AVX2::float_v>(x0, x1), simd_cast<AVX2::float_v>(x2)); }
1499 Vc_SIMD_CAST_3(SSE:: float_v, AVX2::ushort_v) { return simd_cast<AVX2::ushort_v>(simd_cast<AVX2::float_v>(x0, x1), simd_cast<AVX2::float_v>(x2)); }
1500 
1501 Vc_SIMD_CAST_3(SSE::   int_v, AVX2:: short_v) { return simd_cast<AVX2:: short_v>(simd_cast<AVX2:: int_v>(x0, x1), simd_cast<AVX2:: int_v>(x2)); }
1502 Vc_SIMD_CAST_3(SSE::  uint_v, AVX2:: short_v) { return simd_cast<AVX2:: short_v>(simd_cast<AVX2::uint_v>(x0, x1), simd_cast<AVX2::uint_v>(x2)); }
1503 
1504 Vc_SIMD_CAST_3(SSE::   int_v, AVX2::ushort_v) { return simd_cast<AVX2::ushort_v>(simd_cast<AVX2:: int_v>(x0, x1), simd_cast<AVX2:: int_v>(x2)); }
1505 Vc_SIMD_CAST_3(SSE::  uint_v, AVX2::ushort_v) { return simd_cast<AVX2::ushort_v>(simd_cast<AVX2::uint_v>(x0, x1), simd_cast<AVX2::uint_v>(x2)); }
1506 #endif
1507 
1508 // 4 SSE::Vector to 1 AVX2::Vector {{{2
1509 Vc_SIMD_CAST_4(SSE::double_v, AVX2:: float_v) { return simd_cast<AVX2:: float_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3)); }
1510 
1511 #ifdef Vc_IMPL_AVX2
1512 Vc_SIMD_CAST_4(SSE::double_v, AVX2::   int_v) { return simd_cast<AVX2:: int_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3)); }
1513 Vc_SIMD_CAST_4(SSE::double_v, AVX2::  uint_v) { return simd_cast<AVX2::uint_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3)); }
1514 Vc_SIMD_CAST_4(SSE::double_v, AVX2:: short_v) { return AVX::zeroExtend(simd_cast<SSE:: short_v>(x0, x1, x2, x3).data()); }
1515 Vc_SIMD_CAST_4(SSE::double_v, AVX2::ushort_v) { return AVX::zeroExtend(simd_cast<SSE::ushort_v>(x0, x1, x2, x3).data()); }
1516 
1517 Vc_SIMD_CAST_4(SSE:: float_v, AVX2:: short_v) { return simd_cast<AVX2:: short_v>(simd_cast<AVX2::float_v>(x0, x1), simd_cast<AVX2::float_v>(x2, x3)); }
1518 Vc_SIMD_CAST_4(SSE:: float_v, AVX2::ushort_v) { return simd_cast<AVX2::ushort_v>(simd_cast<AVX2::float_v>(x0, x1), simd_cast<AVX2::float_v>(x2, x3)); }
1519 
1520 Vc_SIMD_CAST_4(SSE::   int_v, AVX2:: short_v) { return simd_cast<AVX2:: short_v>(simd_cast<AVX2:: int_v>(x0, x1), simd_cast<AVX2:: int_v>(x2, x3)); }
1521 Vc_SIMD_CAST_4(SSE::  uint_v, AVX2:: short_v) { return simd_cast<AVX2:: short_v>(simd_cast<AVX2::uint_v>(x0, x1), simd_cast<AVX2::uint_v>(x2, x3)); }
1522 
1523 Vc_SIMD_CAST_4(SSE::   int_v, AVX2::ushort_v) { return simd_cast<AVX2::ushort_v>(simd_cast<AVX2:: int_v>(x0, x1), simd_cast<AVX2:: int_v>(x2, x3)); }
1524 Vc_SIMD_CAST_4(SSE::  uint_v, AVX2::ushort_v) { return simd_cast<AVX2::ushort_v>(simd_cast<AVX2::uint_v>(x0, x1), simd_cast<AVX2::uint_v>(x2, x3)); }
1525 #endif
1526 
1527 // 5 SSE::Vector to 1 AVX2::Vector {{{2
1528 #ifdef Vc_IMPL_AVX2
1529 Vc_SIMD_CAST_5(SSE::double_v, AVX2:: short_v) { return simd_cast<AVX2:: short_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3), simd_cast<AVX2::double_v>(x4)); }
1530 Vc_SIMD_CAST_5(SSE::double_v, AVX2::ushort_v) { return simd_cast<AVX2::ushort_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3), simd_cast<AVX2::double_v>(x4)); }
1531 #endif
1532 
1533 // 6 SSE::Vector to 1 AVX2::Vector {{{2
1534 #ifdef Vc_IMPL_AVX2
1535 Vc_SIMD_CAST_6(SSE::double_v, AVX2:: short_v) { return simd_cast<AVX2:: short_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3), simd_cast<AVX2::double_v>(x4, x5)); }
1536 Vc_SIMD_CAST_6(SSE::double_v, AVX2::ushort_v) { return simd_cast<AVX2::ushort_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3), simd_cast<AVX2::double_v>(x4, x5)); }
1537 #endif
1538 
1539 // 7 SSE::Vector to 1 AVX2::Vector {{{2
1540 #ifdef Vc_IMPL_AVX2
1541 Vc_SIMD_CAST_7(SSE::double_v, AVX2:: short_v) { return simd_cast<AVX2:: short_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3), simd_cast<AVX2::double_v>(x4, x5), simd_cast<AVX2::double_v>(x6)); }
1542 Vc_SIMD_CAST_7(SSE::double_v, AVX2::ushort_v) { return simd_cast<AVX2::ushort_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3), simd_cast<AVX2::double_v>(x4, x5), simd_cast<AVX2::double_v>(x6)); }
1543 #endif
1544 
1545 // 8 SSE::Vector to 1 AVX2::Vector {{{2
1546 #ifdef Vc_IMPL_AVX2
1547 Vc_SIMD_CAST_8(SSE::double_v, AVX2:: short_v) { return simd_cast<AVX2:: short_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3), simd_cast<AVX2::double_v>(x4, x5), simd_cast<AVX2::double_v>(x6, x7)); }
1548 Vc_SIMD_CAST_8(SSE::double_v, AVX2::ushort_v) { return simd_cast<AVX2::ushort_v>(simd_cast<AVX2::double_v>(x0, x1), simd_cast<AVX2::double_v>(x2, x3), simd_cast<AVX2::double_v>(x4, x5), simd_cast<AVX2::double_v>(x6, x7)); }
1549 #endif
1550 
1551 // 1 AVX2::Vector to 1 SSE::Vector {{{2
1552 Vc_SIMD_CAST_1(AVX2::double_v, SSE::double_v) { return AVX::lo128(x.data()); }
1553 Vc_SIMD_CAST_1(AVX2:: float_v, SSE:: float_v) { return AVX::lo128(x.data()); }
1554 #ifdef Vc_IMPL_AVX2
1555 Vc_SIMD_CAST_1(AVX2::   int_v, SSE::   int_v) { return AVX::lo128(x.data()); }
1556 Vc_SIMD_CAST_1(AVX2::  uint_v, SSE::  uint_v) { return AVX::lo128(x.data()); }
1557 Vc_SIMD_CAST_1(AVX2:: short_v, SSE:: short_v) { return AVX::lo128(x.data()); }
1558 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE::ushort_v) { return AVX::lo128(x.data()); }
1559 #endif
1560 
1561 Vc_SIMD_CAST_1(AVX2::double_v, SSE:: float_v) { return simd_cast<SSE:: float_v>(simd_cast<AVX2:: float_v>(x)); }
1562 Vc_SIMD_CAST_1(AVX2::double_v, SSE::   int_v) { return AVX::convert<double, int>(x.data()); }
1563 Vc_SIMD_CAST_1(AVX2::double_v, SSE::  uint_v) { return AVX::convert<double, unsigned int>(x.data()); }
1564 Vc_SIMD_CAST_1(AVX2::double_v, SSE:: short_v) { return AVX::convert<double, short>(x.data()); }
1565 Vc_SIMD_CAST_1(AVX2::double_v, SSE::ushort_v) { return AVX::convert<double, unsigned short>(x.data()); }
1566 
1567 Vc_SIMD_CAST_1(AVX2:: float_v, SSE::double_v) { return simd_cast<SSE::double_v>(simd_cast<SSE:: float_v>(x)); }
1568 Vc_SIMD_CAST_1(AVX2:: float_v, SSE::   int_v) { return simd_cast<SSE::   int_v>(simd_cast<SSE:: float_v>(x)); }
1569 Vc_SIMD_CAST_1(AVX2:: float_v, SSE::  uint_v) { return simd_cast<SSE::  uint_v>(simd_cast<SSE:: float_v>(x)); }
1570 Vc_SIMD_CAST_1(AVX2:: float_v, SSE:: short_v) { return AVX::convert<float, short>(x.data()); }
1571 Vc_SIMD_CAST_1(AVX2:: float_v, SSE::ushort_v) { return AVX::convert<float, unsigned short>(x.data()); }
1572 
1573 #ifdef Vc_IMPL_AVX2
1574 Vc_SIMD_CAST_1(AVX2::   int_v, SSE::double_v) { return SSE::convert<int, double>(AVX::lo128(x.data())); }
1575 Vc_SIMD_CAST_1(AVX2::   int_v, SSE:: float_v) { return SSE::convert<int, float>(AVX::lo128(x.data())); }
1576 Vc_SIMD_CAST_1(AVX2::   int_v, SSE::  uint_v) { return AVX::lo128(x.data()); }
1577 Vc_SIMD_CAST_1(AVX2::   int_v, SSE:: short_v) { return AVX::convert<int,  short>(x.data()); }
1578 Vc_SIMD_CAST_1(AVX2::   int_v, SSE::ushort_v) { return AVX::convert<int, ushort>(x.data()); }
1579 
1580 Vc_SIMD_CAST_1(AVX2::  uint_v, SSE::double_v) { return SSE::convert<uint, double>(AVX::lo128(x.data())); }
1581 Vc_SIMD_CAST_1(AVX2::  uint_v, SSE:: float_v) { return SSE::convert<uint, float>(AVX::lo128(x.data())); }
1582 Vc_SIMD_CAST_1(AVX2::  uint_v, SSE::   int_v) { return AVX::lo128(x.data()); }
1583 Vc_SIMD_CAST_1(AVX2::  uint_v, SSE:: short_v) { return AVX::convert<uint,  short>(x.data()); }
1584 Vc_SIMD_CAST_1(AVX2::  uint_v, SSE::ushort_v) { return AVX::convert<uint, ushort>(x.data()); }
1585 
1586 Vc_SIMD_CAST_1(AVX2:: short_v, SSE::double_v) { return simd_cast<SSE::double_v>(simd_cast<SSE:: short_v>(x)); }
1587 Vc_SIMD_CAST_1(AVX2:: short_v, SSE:: float_v) { return simd_cast<SSE:: float_v>(simd_cast<SSE:: short_v>(x)); }
1588 Vc_SIMD_CAST_1(AVX2:: short_v, SSE::   int_v) { return simd_cast<SSE::   int_v>(simd_cast<SSE:: short_v>(x)); }
1589 Vc_SIMD_CAST_1(AVX2:: short_v, SSE::  uint_v) { return simd_cast<SSE::  uint_v>(simd_cast<SSE:: short_v>(x)); }
1590 Vc_SIMD_CAST_1(AVX2:: short_v, SSE::ushort_v) { return simd_cast<SSE::ushort_v>(simd_cast<SSE:: short_v>(x)); }
1591 
1592 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE::double_v) { return simd_cast<SSE::double_v>(simd_cast<SSE::ushort_v>(x)); }
1593 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE:: float_v) { return simd_cast<SSE:: float_v>(simd_cast<SSE::ushort_v>(x)); }
1594 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE::   int_v) { return simd_cast<SSE::   int_v>(simd_cast<SSE::ushort_v>(x)); }
1595 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE::  uint_v) { return simd_cast<SSE::  uint_v>(simd_cast<SSE::ushort_v>(x)); }
1596 Vc_SIMD_CAST_1(AVX2::ushort_v, SSE:: short_v) { return simd_cast<SSE:: short_v>(simd_cast<SSE::ushort_v>(x)); }
1597 #endif
1598 
1599 // 2 AVX2::Vector to 1 SSE::Vector {{{2
1600 Vc_SIMD_CAST_2(AVX2::double_v, SSE:: short_v) {
1601     const auto tmp0 = _mm256_cvttpd_epi32(x0.data());
1602     const auto tmp1 = _mm256_cvttpd_epi32(x1.data());
1603     return _mm_packs_epi32(tmp0, tmp1);
1604 }
1605 Vc_SIMD_CAST_2(AVX2::double_v, SSE::ushort_v) {
1606     const auto tmp0 = _mm256_cvttpd_epi32(x0.data());
1607     const auto tmp1 = _mm256_cvttpd_epi32(x1.data());
1608     return _mm_packus_epi32(tmp0, tmp1);
1609 }
1610 
1611 // 1 Scalar::Vector to 1 AVX2::Vector {{{2
1612 template <typename Return, typename T>
1613 Vc_INTRINSIC Vc_CONST Return
1614 simd_cast(Scalar::Vector<T> x,
1615           enable_if<std::is_same<Return, AVX2::double_v>::value>)
1616 {
1617     return AVX::zeroExtend(_mm_setr_pd(x.data(), 0.));
1618 }
1619 template <typename Return, typename T>
1620 Vc_INTRINSIC Vc_CONST Return
1621 simd_cast(Scalar::Vector<T> x,
1622           enable_if<std::is_same<Return, AVX2::float_v>::value>)
1623 {
1624     return AVX::zeroExtend(_mm_setr_ps(x.data(), 0.f, 0.f, 0.f));
1625 }
1626 #ifdef Vc_IMPL_AVX2
1627 template <typename Return, typename T>
1628 Vc_INTRINSIC Vc_CONST Return
1629 simd_cast(Scalar::Vector<T> x,
1630           enable_if<std::is_same<Return, AVX2::int_v>::value>)
1631 {
1632     return _mm256_setr_epi32(x.data(), 0, 0, 0, 0, 0, 0, 0);
1633 }
1634 template <typename Return, typename T>
1635 Vc_INTRINSIC Vc_CONST Return
1636 simd_cast(Scalar::Vector<T> x,
1637           enable_if<std::is_same<Return, AVX2::uint_v>::value>)
1638 {
1639     return _mm256_setr_epi32(uint(x.data()), 0, 0, 0, 0, 0, 0, 0);
1640 }
1641 template <typename Return, typename T>
1642 Vc_INTRINSIC Vc_CONST Return
1643 simd_cast(Scalar::Vector<T> x,
1644           enable_if<std::is_same<Return, AVX2::short_v>::value>)
1645 {
1646     return _mm256_setr_epi16(x.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1647 }
1648 template <typename Return, typename T>
1649 Vc_INTRINSIC Vc_CONST Return
1650 simd_cast(Scalar::Vector<T> x,
1651           enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
1652 {
1653     return _mm256_setr_epi16(x.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1654 }
1655 #endif
1656 
1657 // 2 Scalar::Vector to 1 AVX2::Vector {{{2
1658 template <typename Return, typename T>
1659 Vc_INTRINSIC Vc_CONST Return
1660 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
1661           enable_if<std::is_same<Return, AVX2::double_v>::value>)
1662 {
1663     return AVX::zeroExtend(_mm_setr_pd(x0.data(), x1.data()));
1664 }
1665 template <typename Return, typename T>
1666 Vc_INTRINSIC Vc_CONST Return
1667 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
1668           enable_if<std::is_same<Return, AVX2::float_v>::value>)
1669 {
1670     return AVX::zeroExtend(_mm_setr_ps(x0.data(), x1.data(), 0.f, 0.f));
1671 }
1672 #ifdef Vc_IMPL_AVX2
1673 template <typename Return, typename T>
1674 Vc_INTRINSIC Vc_CONST Return
1675 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
1676           enable_if<std::is_same<Return, AVX2::int_v>::value>)
1677 {
1678     return _mm256_setr_epi32(x0.data(), x1.data(), 0, 0, 0, 0, 0, 0);
1679 }
1680 template <typename Return, typename T>
1681 Vc_INTRINSIC Vc_CONST Return
1682 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
1683           enable_if<std::is_same<Return, AVX2::uint_v>::value>)
1684 {
1685     return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), 0, 0, 0, 0, 0, 0);
1686 }
1687 template <typename Return, typename T>
1688 Vc_INTRINSIC Vc_CONST Return
1689 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
1690           enable_if<std::is_same<Return, AVX2::short_v>::value>)
1691 {
1692     return _mm256_setr_epi16(x0.data(), x1.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1693 }
1694 template <typename Return, typename T>
1695 Vc_INTRINSIC Vc_CONST Return
1696 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1,
1697           enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
1698 {
1699     return _mm256_setr_epi16(x0.data(), x1.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1700 }
1701 #endif
1702 
1703 // 3 Scalar::Vector to 1 AVX2::Vector {{{2
1704 template <typename Return, typename T>
1705 Vc_INTRINSIC Vc_CONST Return
1706 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1707           enable_if<std::is_same<Return, AVX2::double_v>::value>)
1708 {
1709     return _mm256_setr_pd(x0.data(), x1.data(), x2.data(), 0);
1710 }
1711 template <typename Return, typename T>
1712 Vc_INTRINSIC Vc_CONST Return
1713 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1714           enable_if<std::is_same<Return, AVX2::float_v>::value>)
1715 {
1716     return AVX::zeroExtend(_mm_setr_ps(x0.data(), x1.data(), x2.data(), 0));
1717 }
1718 #ifdef Vc_IMPL_AVX2
1719 template <typename Return, typename T>
1720 Vc_INTRINSIC Vc_CONST Return
1721 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1722           enable_if<std::is_same<Return, AVX2::int_v>::value>)
1723 {
1724     return _mm256_setr_epi32(x0.data(), x1.data(), x2.data(), 0, 0, 0, 0, 0);
1725 }
1726 template <typename Return, typename T>
1727 Vc_INTRINSIC Vc_CONST Return
1728 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1729           enable_if<std::is_same<Return, AVX2::uint_v>::value>)
1730 {
1731     return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()), 0, 0, 0,
1732                              0, 0);
1733 }
1734 template <typename Return, typename T>
1735 Vc_INTRINSIC Vc_CONST Return
1736 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1737           enable_if<std::is_same<Return, AVX2::short_v>::value>)
1738 {
1739     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1740 }
1741 template <typename Return, typename T>
1742 Vc_INTRINSIC Vc_CONST Return
1743 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1744           enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
1745 {
1746     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1747 }
1748 #endif
1749 
1750 // 4 Scalar::Vector to 1 AVX2::Vector {{{2
1751 template <typename Return, typename T>
1752 Vc_INTRINSIC Vc_CONST Return
1753 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1754           Scalar::Vector<T> x3,
1755           enable_if<std::is_same<Return, AVX2::double_v>::value>)
1756 {
1757     return _mm256_setr_pd(x0.data(), x1.data(), x2.data(), x3.data());
1758 }
1759 template <typename Return, typename T>
1760 Vc_INTRINSIC Vc_CONST Return
1761 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1762           Scalar::Vector<T> x3,
1763           enable_if<std::is_same<Return, AVX2::float_v>::value>)
1764 {
1765     return AVX::zeroExtend(_mm_setr_ps(x0.data(), x1.data(), x2.data(), x3.data()));
1766 }
1767 #ifdef Vc_IMPL_AVX2
1768 template <typename Return, typename T>
1769 Vc_INTRINSIC Vc_CONST Return
1770 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1771           Scalar::Vector<T> x3,
1772           enable_if<std::is_same<Return, AVX2::int_v>::value>)
1773 {
1774     return _mm256_setr_epi32(x0.data(), x1.data(), x2.data(), x3.data(), 0, 0, 0, 0);
1775 }
1776 template <typename Return, typename T>
1777 Vc_INTRINSIC Vc_CONST Return
1778 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1779           Scalar::Vector<T> x3,
1780           enable_if<std::is_same<Return, AVX2::uint_v>::value>)
1781 {
1782     return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()),
1783                              uint(x3.data()), 0, 0, 0, 0);
1784 }
1785 template <typename Return, typename T>
1786 Vc_INTRINSIC Vc_CONST Return
1787 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1788           Scalar::Vector<T> x3,
1789           enable_if<std::is_same<Return, AVX2::short_v>::value>)
1790 {
1791     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1792 }
1793 template <typename Return, typename T>
1794 Vc_INTRINSIC Vc_CONST Return
1795 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1796           Scalar::Vector<T> x3,
1797           enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
1798 {
1799     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1800 }
1801 #endif
1802 
1803 // 5 Scalar::Vector to 1 AVX2::Vector {{{2
1804 template <typename Return, typename T>
1805 Vc_INTRINSIC Vc_CONST Return
1806 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1807           Scalar::Vector<T> x3, Scalar::Vector<T> x4,
1808           enable_if<std::is_same<Return, AVX2::float_v>::value>)
1809 {
1810     return _mm256_setr_ps(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), 0, 0, 0);
1811 }
1812 #ifdef Vc_IMPL_AVX2
1813 template <typename Return, typename T>
1814 Vc_INTRINSIC Vc_CONST Return
1815 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1816           Scalar::Vector<T> x3, Scalar::Vector<T> x4,
1817           enable_if<std::is_same<Return, AVX2::int_v>::value>)
1818 {
1819     return _mm256_setr_epi32(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), 0, 0, 0);
1820 }
1821 template <typename Return, typename T>
1822 Vc_INTRINSIC Vc_CONST Return
1823 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1824           Scalar::Vector<T> x3, Scalar::Vector<T> x4,
1825           enable_if<std::is_same<Return, AVX2::uint_v>::value>)
1826 {
1827     return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()),
1828                              uint(x3.data()), uint(x4.data()), 0, 0, 0);
1829 }
1830 template <typename Return, typename T>
1831 Vc_INTRINSIC Vc_CONST Return
1832 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1833           Scalar::Vector<T> x3, Scalar::Vector<T> x4,
1834           enable_if<std::is_same<Return, AVX2::short_v>::value>)
1835 {
1836     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1837 }
1838 template <typename Return, typename T>
1839 Vc_INTRINSIC Vc_CONST Return
1840 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1841           Scalar::Vector<T> x3, Scalar::Vector<T> x4,
1842           enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
1843 {
1844     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1845 }
1846 #endif
1847 
1848 // 6 Scalar::Vector to 1 AVX2::Vector {{{2
1849 template <typename Return, typename T>
1850 Vc_INTRINSIC Vc_CONST Return
1851 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1852           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1853           enable_if<std::is_same<Return, AVX2::float_v>::value>)
1854 {
1855     return _mm256_setr_ps(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1856                           x5.data(), 0, 0);
1857 }
1858 #ifdef Vc_IMPL_AVX2
1859 template <typename Return, typename T>
1860 Vc_INTRINSIC Vc_CONST Return
1861 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1862           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1863           enable_if<std::is_same<Return, AVX2::int_v>::value>)
1864 {
1865     return _mm256_setr_epi32(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1866                              x5.data(), 0, 0);
1867 }
1868 template <typename Return, typename T>
1869 Vc_INTRINSIC Vc_CONST Return
1870 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1871           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1872           enable_if<std::is_same<Return, AVX2::uint_v>::value>)
1873 {
1874     return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()),
1875                              uint(x3.data()), uint(x4.data()), uint(x5.data()), 0, 0);
1876 }
1877 template <typename Return, typename T>
1878 Vc_INTRINSIC Vc_CONST Return
1879 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1880           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1881           enable_if<std::is_same<Return, AVX2::short_v>::value>)
1882 {
1883     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1884                           x5.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1885 }
1886 template <typename Return, typename T>
1887 Vc_INTRINSIC Vc_CONST Return
1888 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1889           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1890           enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
1891 {
1892     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1893                           x5.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1894 }
1895 #endif
1896 
1897 // 7 Scalar::Vector to 1 AVX2::Vector {{{2
1898 template <typename Return, typename T>
1899 Vc_INTRINSIC Vc_CONST Return
1900 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1901           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1902           Scalar::Vector<T> x6,
1903           enable_if<std::is_same<Return, AVX2::float_v>::value>)
1904 {
1905     return _mm256_setr_ps(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1906                           x5.data(), x6.data(), 0);
1907 }
1908 #ifdef Vc_IMPL_AVX2
1909 template <typename Return, typename T>
1910 Vc_INTRINSIC Vc_CONST Return
1911 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1912           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1913           Scalar::Vector<T> x6,
1914           enable_if<std::is_same<Return, AVX2::int_v>::value>)
1915 {
1916     return _mm256_setr_epi32(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1917                              x5.data(), x6.data(), 0);
1918 }
1919 template <typename Return, typename T>
1920 Vc_INTRINSIC Vc_CONST Return
1921 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1922           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1923           Scalar::Vector<T> x6,
1924           enable_if<std::is_same<Return, AVX2::uint_v>::value>)
1925 {
1926     return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()),
1927                              uint(x3.data()), uint(x4.data()), uint(x5.data()),
1928                              uint(x6.data()), 0);
1929 }
1930 template <typename Return, typename T>
1931 Vc_INTRINSIC Vc_CONST Return
1932 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1933           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1934           Scalar::Vector<T> x6,
1935           enable_if<std::is_same<Return, AVX2::short_v>::value>)
1936 {
1937     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1938                           x5.data(), x6.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0);
1939 }
1940 template <typename Return, typename T>
1941 Vc_INTRINSIC Vc_CONST Return
1942 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1943           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1944           Scalar::Vector<T> x6,
1945           enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
1946 {
1947     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1948                           x5.data(), x6.data(), 0, 0, 0, 0, 0, 0, 0, 0, 0);
1949 }
1950 #endif
1951 
1952 // 8 Scalar::Vector to 1 AVX2::Vector {{{2
1953 template <typename Return, typename T>
1954 Vc_INTRINSIC Vc_CONST Return
1955 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1956           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1957           Scalar::Vector<T> x6, Scalar::Vector<T> x7,
1958           enable_if<std::is_same<Return, AVX2::float_v>::value>)
1959 {
1960     return _mm256_setr_ps(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1961                           x5.data(), x6.data(), x7.data());
1962 }
1963 #ifdef Vc_IMPL_AVX2
1964 template <typename Return, typename T>
1965 Vc_INTRINSIC Vc_CONST Return
1966 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1967           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1968           Scalar::Vector<T> x6, Scalar::Vector<T> x7,
1969           enable_if<std::is_same<Return, AVX2::int_v>::value>)
1970 {
1971     return _mm256_setr_epi32(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1972                              x5.data(), x6.data(), x7.data());
1973 }
1974 template <typename Return, typename T>
1975 Vc_INTRINSIC Vc_CONST Return
1976 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1977           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1978           Scalar::Vector<T> x6, Scalar::Vector<T> x7,
1979           enable_if<std::is_same<Return, AVX2::uint_v>::value>)
1980 {
1981     return _mm256_setr_epi32(uint(x0.data()), uint(x1.data()), uint(x2.data()),
1982                              uint(x3.data()), uint(x4.data()), uint(x5.data()),
1983                              uint(x6.data()), uint(x7.data()));
1984 }
1985 template <typename Return, typename T>
1986 Vc_INTRINSIC Vc_CONST Return
1987 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1988           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1989           Scalar::Vector<T> x6, Scalar::Vector<T> x7,
1990           enable_if<std::is_same<Return, AVX2::short_v>::value>)
1991 {
1992     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
1993                           x5.data(), x6.data(), x7.data(), 0, 0, 0, 0, 0, 0, 0, 0);
1994 }
1995 template <typename Return, typename T>
1996 Vc_INTRINSIC Vc_CONST Return
1997 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
1998           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
1999           Scalar::Vector<T> x6, Scalar::Vector<T> x7,
2000           enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
2001 {
2002     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2003                           x5.data(), x6.data(), x7.data(), 0, 0, 0, 0, 0, 0, 0, 0);
2004 }
2005 #endif
2006 
2007 // 9 Scalar::Vector to 1 AVX2::Vector {{{2
2008 #ifdef Vc_IMPL_AVX2
2009 template <typename Return, typename T>
2010 Vc_INTRINSIC Vc_CONST Return
2011 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2012           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2013           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2014           enable_if<std::is_same<Return, AVX2::short_v>::value>)
2015 {
2016     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2017                              x5.data(), x6.data(), x7.data(), x8.data(), 0, 0, 0, 0, 0, 0,
2018                              0);
2019 }
2020 template <typename Return, typename T>
2021 Vc_INTRINSIC Vc_CONST Return
2022 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2023           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2024           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2025           enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
2026 {
2027     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2028                              x5.data(), x6.data(), x7.data(), x8.data(), 0, 0, 0, 0, 0, 0,
2029                              0);
2030 }
2031 #endif
2032 
2033 // 10 Scalar::Vector to 1 AVX2::Vector {{{2
2034 #ifdef Vc_IMPL_AVX2
2035 template <typename Return, typename T>
2036 Vc_INTRINSIC Vc_CONST Return
2037 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2038           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2039           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2040           Scalar::Vector<T> x9, enable_if<std::is_same<Return, AVX2::short_v>::value>)
2041 {
2042     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2043                              x5.data(), x6.data(), x7.data(), x8.data(), x9.data(), 0, 0,
2044                              0, 0, 0, 0);
2045 }
2046 template <typename Return, typename T>
2047 Vc_INTRINSIC Vc_CONST Return
2048 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2049           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2050           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2051           Scalar::Vector<T> x9, enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
2052 {
2053     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2054                              x5.data(), x6.data(), x7.data(), x8.data(), x9.data(), 0, 0,
2055                              0, 0, 0, 0);
2056 }
2057 #endif
2058 
2059 // 11 Scalar::Vector to 1 AVX2::Vector {{{2
2060 #ifdef Vc_IMPL_AVX2
2061 template <typename Return, typename T>
2062 Vc_INTRINSIC Vc_CONST Return
2063 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2064           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2065           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2066           Scalar::Vector<T> x9, Scalar::Vector<T> x10,
2067           enable_if<std::is_same<Return, AVX2::short_v>::value>)
2068 {
2069     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2070                              x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2071                              x10.data(), 0, 0, 0, 0, 0);
2072 }
2073 template <typename Return, typename T>
2074 Vc_INTRINSIC Vc_CONST Return
2075 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2076           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2077           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2078           Scalar::Vector<T> x9, Scalar::Vector<T> x10,
2079           enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
2080 {
2081     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2082                              x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2083                              x10.data(), 0, 0, 0, 0, 0);
2084 }
2085 #endif
2086 
2087 // 12 Scalar::Vector to 1 AVX2::Vector {{{2
2088 #ifdef Vc_IMPL_AVX2
2089 template <typename Return, typename T>
2090 Vc_INTRINSIC Vc_CONST Return
2091 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2092           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2093           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2094           Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
2095           enable_if<std::is_same<Return, AVX2::short_v>::value>)
2096 {
2097     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2098                              x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2099                              x10.data(), x11.data(), 0, 0, 0, 0);
2100 }
2101 template <typename Return, typename T>
2102 Vc_INTRINSIC Vc_CONST Return
2103 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2104           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2105           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2106           Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
2107           enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
2108 {
2109     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2110                              x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2111                              x10.data(), x11.data(), 0, 0, 0, 0);
2112 }
2113 #endif
2114 
2115 // 13 Scalar::Vector to 1 AVX2::Vector {{{2
2116 #ifdef Vc_IMPL_AVX2
2117 template <typename Return, typename T>
2118 Vc_INTRINSIC Vc_CONST Return
2119 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2120           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2121           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2122           Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
2123           Scalar::Vector<T> x12, enable_if<std::is_same<Return, AVX2::short_v>::value>)
2124 {
2125     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2126                              x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2127                              x10.data(), x11.data(), x12.data(), 0, 0, 0);
2128 }
2129 template <typename Return, typename T>
2130 Vc_INTRINSIC Vc_CONST Return
2131 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2132           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2133           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2134           Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
2135           Scalar::Vector<T> x12, enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
2136 {
2137     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2138                              x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2139                              x10.data(), x11.data(), x12.data(), 0, 0, 0);
2140 }
2141 #endif
2142 
2143 // 14 Scalar::Vector to 1 AVX2::Vector {{{2
2144 #ifdef Vc_IMPL_AVX2
2145 template <typename Return, typename T>
2146 Vc_INTRINSIC Vc_CONST Return
2147 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2148           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2149           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2150           Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
2151           Scalar::Vector<T> x12, Scalar::Vector<T> x13,
2152           enable_if<std::is_same<Return, AVX2::short_v>::value>)
2153 {
2154     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2155                              x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2156                              x10.data(), x11.data(), x12.data(), x13.data(), 0, 0);
2157 }
2158 template <typename Return, typename T>
2159 Vc_INTRINSIC Vc_CONST Return
2160 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2161           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2162           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2163           Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
2164           Scalar::Vector<T> x12, Scalar::Vector<T> x13,
2165           enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
2166 {
2167     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2168                              x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2169                              x10.data(), x11.data(), x12.data(), x13.data(), 0, 0);
2170 }
2171 #endif
2172 
2173 // 15 Scalar::Vector to 1 AVX2::Vector {{{2
2174 #ifdef Vc_IMPL_AVX2
2175 template <typename Return, typename T>
2176 Vc_INTRINSIC Vc_CONST Return
2177 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2178           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2179           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2180           Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
2181           Scalar::Vector<T> x12, Scalar::Vector<T> x13, Scalar::Vector<T> x14,
2182           enable_if<std::is_same<Return, AVX2::short_v>::value>)
2183 {
2184     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2185                              x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2186                              x10.data(), x11.data(), x12.data(), x13.data(), x14.data(),
2187                              0);
2188 }
2189 template <typename Return, typename T>
2190 Vc_INTRINSIC Vc_CONST Return
2191 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2192           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2193           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2194           Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
2195           Scalar::Vector<T> x12, Scalar::Vector<T> x13, Scalar::Vector<T> x14,
2196           enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
2197 {
2198     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2199                              x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2200                              x10.data(), x11.data(), x12.data(), x13.data(), x14.data(),
2201                              0);
2202 }
2203 #endif
2204 
2205 // 16 Scalar::Vector to 1 AVX2::Vector {{{2
2206 #ifdef Vc_IMPL_AVX2
2207 template <typename Return, typename T>
2208 Vc_INTRINSIC Vc_CONST Return
2209 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2210           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2211           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2212           Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
2213           Scalar::Vector<T> x12, Scalar::Vector<T> x13, Scalar::Vector<T> x14,
2214           Scalar::Vector<T> x15, enable_if<std::is_same<Return, AVX2::short_v>::value>)
2215 {
2216     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2217                              x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2218                              x10.data(), x11.data(), x12.data(), x13.data(), x14.data(),
2219                              x15.data());
2220 }
2221 template <typename Return, typename T>
2222 Vc_INTRINSIC Vc_CONST Return
2223 simd_cast(Scalar::Vector<T> x0, Scalar::Vector<T> x1, Scalar::Vector<T> x2,
2224           Scalar::Vector<T> x3, Scalar::Vector<T> x4, Scalar::Vector<T> x5,
2225           Scalar::Vector<T> x6, Scalar::Vector<T> x7, Scalar::Vector<T> x8,
2226           Scalar::Vector<T> x9, Scalar::Vector<T> x10, Scalar::Vector<T> x11,
2227           Scalar::Vector<T> x12, Scalar::Vector<T> x13, Scalar::Vector<T> x14,
2228           Scalar::Vector<T> x15, enable_if<std::is_same<Return, AVX2::ushort_v>::value>)
2229 {
2230     return _mm256_setr_epi16(x0.data(), x1.data(), x2.data(), x3.data(), x4.data(),
2231                              x5.data(), x6.data(), x7.data(), x8.data(), x9.data(),
2232                              x10.data(), x11.data(), x12.data(), x13.data(), x14.data(),
2233                              x15.data());
2234 }
2235 #endif
2236 
2237 // 1 AVX2::Vector to 1 Scalar::Vector {{{2
2238 template <typename To, typename FromT>
2239 Vc_INTRINSIC Vc_CONST To
2240 simd_cast(AVX2::Vector<FromT> x, enable_if<Scalar::is_vector<To>::value>)
2241 {
2242     return static_cast<To>(x[0]);
2243 }
2244 
2245 // Mask casts without offset {{{1
2246 // 1 AVX2::Mask to 1 AVX2::Mask {{{2
2247 template <typename Return, typename T>
2248 Vc_INTRINSIC Vc_CONST Return
2249     simd_cast(const AVX2::Mask<T> &k, enable_if<AVX2::is_mask<Return>::value>)
2250 {
2251     return {Detail::mask_cast<Mask<T, VectorAbi::Avx>::Size, Return::Size,
2252                               typename Return::VectorTypeF>(k.dataI())};
2253 }
2254 
2255 // 2 AVX2::Mask to 1 AVX2::Mask {{{2
2256 Vc_SIMD_CAST_AVX_2(double_m,  float_m) { return AVX::concat(_mm_packs_epi32(AVX::lo128(x0.dataI()), AVX::hi128(x0.dataI())), _mm_packs_epi32(AVX::lo128(x1.dataI()), AVX::hi128(x1.dataI()))); }
2257 #ifdef Vc_IMPL_AVX2
2258 Vc_SIMD_CAST_AVX_2(double_m,    int_m) { return Mem::permute4x64<X0, X2, X1, X3>(_mm256_packs_epi32(x0.dataI(), x1.dataI())); }
2259 Vc_SIMD_CAST_AVX_2(double_m,   uint_m) { return Mem::permute4x64<X0, X2, X1, X3>(_mm256_packs_epi32(x0.dataI(), x1.dataI())); }
2260 Vc_SIMD_CAST_AVX_2(double_m,  short_m) { return AVX::zeroExtend(_mm_packs_epi16(_mm_packs_epi32(AVX::lo128(x0.dataI()), AVX::hi128(x0.dataI())), _mm_packs_epi32(AVX::lo128(x1.dataI()), AVX::hi128(x1.dataI())))); }
2261 Vc_SIMD_CAST_AVX_2(double_m, ushort_m) { return AVX::zeroExtend(_mm_packs_epi16(_mm_packs_epi32(AVX::lo128(x0.dataI()), AVX::hi128(x0.dataI())), _mm_packs_epi32(AVX::lo128(x1.dataI()), AVX::hi128(x1.dataI())))); }
2262 
2263 Vc_SIMD_CAST_AVX_2( float_m,  short_m) { return Mem::permute4x64<X0, X2, X1, X3>(_mm256_packs_epi16(x0.dataI(), x1.dataI())); }
2264 Vc_SIMD_CAST_AVX_2( float_m, ushort_m) { return Mem::permute4x64<X0, X2, X1, X3>(_mm256_packs_epi16(x0.dataI(), x1.dataI())); }
2265 
2266 Vc_SIMD_CAST_AVX_2(   int_m,  short_m) { return Mem::permute4x64<X0, X2, X1, X3>(_mm256_packs_epi16(x0.dataI(), x1.dataI())); }
2267 Vc_SIMD_CAST_AVX_2(   int_m, ushort_m) { return Mem::permute4x64<X0, X2, X1, X3>(_mm256_packs_epi16(x0.dataI(), x1.dataI())); }
2268 
2269 Vc_SIMD_CAST_AVX_2(  uint_m,  short_m) { return Mem::permute4x64<X0, X2, X1, X3>(_mm256_packs_epi16(x0.dataI(), x1.dataI())); }
2270 Vc_SIMD_CAST_AVX_2(  uint_m, ushort_m) { return Mem::permute4x64<X0, X2, X1, X3>(_mm256_packs_epi16(x0.dataI(), x1.dataI())); }
2271 #endif
2272 
2273 // 4 AVX2::Mask to 1 AVX2::Mask {{{2
2274 #ifdef Vc_IMPL_AVX2
2275 Vc_SIMD_CAST_AVX_4(double_m, short_m)
2276 {
2277     using namespace AVX;
2278     const auto tmp = _mm256_packs_epi32(
2279         _mm256_packs_epi32(x0.dataI(), x1.dataI())  // a0 a1 b0 b1 a2 a3 b2 b3
2280         ,
2281         _mm256_packs_epi32(x2.dataI(), x3.dataI())  // c0 c1 d0 d1 c2 c3 d2 d3
2282         );  // a0 a1 b0 b1 c0 c1 d0 d1 a2 a3 b2 b3 c2 c3 d2 d3
2283     return concat(_mm_unpacklo_epi32(lo128(tmp), hi128(tmp)),   // a0 a1 a2 a3 b0 b1 b2 b3
2284                   _mm_unpackhi_epi32(lo128(tmp), hi128(tmp)));  // c0 c1 c2 c3 d0 d1 d2 d3
2285 }
2286 Vc_SIMD_CAST_AVX_4(double_m, ushort_m) { return simd_cast<AVX2::short_m>(x0, x1, x2, x3).data(); }
2287 #endif
2288 
2289 // 1 SSE::Mask to 1 AVX2::Mask {{{2
2290 Vc_SIMD_CAST_1(SSE::double_m, AVX2::double_m) { return AVX::zeroExtend(x.data()); }
2291 Vc_SIMD_CAST_1(SSE::double_m, AVX2:: float_m) { return AVX::zeroExtend(simd_cast<SSE:: float_m>(x).data()); }
2292 #ifdef Vc_IMPL_AVX2
2293 Vc_SIMD_CAST_1(SSE::double_m, AVX2::   int_m) { return AVX::zeroExtend(simd_cast<SSE::   int_m>(x).data()); }
2294 Vc_SIMD_CAST_1(SSE::double_m, AVX2::  uint_m) { return AVX::zeroExtend(simd_cast<SSE::  uint_m>(x).data()); }
2295 Vc_SIMD_CAST_1(SSE::double_m, AVX2:: short_m) { return AVX::zeroExtend(simd_cast<SSE:: short_m>(x).data()); }
2296 Vc_SIMD_CAST_1(SSE::double_m, AVX2::ushort_m) { return AVX::zeroExtend(simd_cast<SSE::ushort_m>(x).data()); }
2297 #endif
2298 
2299 Vc_SIMD_CAST_1(SSE:: float_m, AVX2::double_m) { return AVX::concat(_mm_unpacklo_ps(x.dataF(), x.dataF()), _mm_unpackhi_ps(x.dataF(), x.dataF())); }
2300 Vc_SIMD_CAST_1(SSE::   int_m, AVX2::double_m) { return AVX::concat(_mm_unpacklo_ps(x.dataF(), x.dataF()), _mm_unpackhi_ps(x.dataF(), x.dataF())); }
2301 Vc_SIMD_CAST_1(SSE::  uint_m, AVX2::double_m) { return AVX::concat(_mm_unpacklo_ps(x.dataF(), x.dataF()), _mm_unpackhi_ps(x.dataF(), x.dataF())); }
2302 Vc_SIMD_CAST_1(SSE:: short_m, AVX2::double_m) { auto tmp = _mm_unpacklo_epi16(x.dataI(), x.dataI()); return AVX::concat(_mm_unpacklo_epi32(tmp, tmp), _mm_unpackhi_epi32(tmp, tmp)); }
2303 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2::double_m) { auto tmp = _mm_unpacklo_epi16(x.dataI(), x.dataI()); return AVX::concat(_mm_unpacklo_epi32(tmp, tmp), _mm_unpackhi_epi32(tmp, tmp)); }
2304 
2305 Vc_SIMD_CAST_1(SSE:: float_m, AVX2:: float_m) { return AVX::zeroExtend(x.dataF()); }
2306 Vc_SIMD_CAST_1(SSE::   int_m, AVX2:: float_m) { return AVX::zeroExtend(x.dataF()); }
2307 Vc_SIMD_CAST_1(SSE::  uint_m, AVX2:: float_m) { return AVX::zeroExtend(x.dataF()); }
2308 Vc_SIMD_CAST_1(SSE:: short_m, AVX2:: float_m) { return AVX::concat(_mm_unpacklo_epi16(x.dataI(), x.dataI()), _mm_unpackhi_epi16(x.dataI(), x.dataI())); }
2309 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2:: float_m) { return AVX::concat(_mm_unpacklo_epi16(x.dataI(), x.dataI()), _mm_unpackhi_epi16(x.dataI(), x.dataI())); }
2310 
2311 #ifdef Vc_IMPL_AVX2
2312 Vc_SIMD_CAST_1(SSE:: float_m, AVX2::   int_m) { return AVX::zeroExtend(x.data()); }
2313 Vc_SIMD_CAST_1(SSE:: float_m, AVX2::  uint_m) { return AVX::zeroExtend(x.data()); }
2314 Vc_SIMD_CAST_1(SSE::   int_m, AVX2::   int_m) { return AVX::zeroExtend(x.data()); }
2315 Vc_SIMD_CAST_1(SSE::   int_m, AVX2::  uint_m) { return AVX::zeroExtend(x.data()); }
2316 Vc_SIMD_CAST_1(SSE::  uint_m, AVX2::   int_m) { return AVX::zeroExtend(x.data()); }
2317 Vc_SIMD_CAST_1(SSE::  uint_m, AVX2::  uint_m) { return AVX::zeroExtend(x.data()); }
2318 
2319 Vc_SIMD_CAST_1(SSE:: float_m, AVX2:: short_m) { return AVX::zeroExtend(simd_cast<SSE:: short_m>(x).data()); }
2320 Vc_SIMD_CAST_1(SSE::   int_m, AVX2:: short_m) { return AVX::zeroExtend(simd_cast<SSE:: short_m>(x).data()); }
2321 Vc_SIMD_CAST_1(SSE::  uint_m, AVX2:: short_m) { return AVX::zeroExtend(simd_cast<SSE:: short_m>(x).data()); }
2322 Vc_SIMD_CAST_1(SSE:: short_m, AVX2:: short_m) { return AVX::zeroExtend(simd_cast<SSE:: short_m>(x).data()); }
2323 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2:: short_m) { return AVX::zeroExtend(simd_cast<SSE:: short_m>(x).data()); }
2324 Vc_SIMD_CAST_1(SSE:: float_m, AVX2::ushort_m) { return AVX::zeroExtend(simd_cast<SSE::ushort_m>(x).data()); }
2325 Vc_SIMD_CAST_1(SSE::   int_m, AVX2::ushort_m) { return AVX::zeroExtend(simd_cast<SSE::ushort_m>(x).data()); }
2326 Vc_SIMD_CAST_1(SSE::  uint_m, AVX2::ushort_m) { return AVX::zeroExtend(simd_cast<SSE::ushort_m>(x).data()); }
2327 Vc_SIMD_CAST_1(SSE:: short_m, AVX2::ushort_m) { return AVX::zeroExtend(simd_cast<SSE::ushort_m>(x).data()); }
2328 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2::ushort_m) { return AVX::zeroExtend(simd_cast<SSE::ushort_m>(x).data()); }
2329 
2330 Vc_SIMD_CAST_1(SSE:: short_m, AVX2::   int_m) { const auto v = Mem::permute4x64<X0, X2, X1, X3>(AVX::avx_cast<__m256i>(x.data())); return _mm256_unpacklo_epi16(v, v); }
2331 Vc_SIMD_CAST_1(SSE:: short_m, AVX2::  uint_m) { const auto v = Mem::permute4x64<X0, X2, X1, X3>(AVX::avx_cast<__m256i>(x.data())); return _mm256_unpacklo_epi16(v, v); }
2332 
2333 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2::   int_m) { const auto v = Mem::permute4x64<X0, X2, X1, X3>(AVX::avx_cast<__m256i>(x.data())); return _mm256_unpacklo_epi16(v, v); }
2334 Vc_SIMD_CAST_1(SSE::ushort_m, AVX2::  uint_m) { const auto v = Mem::permute4x64<X0, X2, X1, X3>(AVX::avx_cast<__m256i>(x.data())); return _mm256_unpacklo_epi16(v, v); }
2335 #endif
2336 
2337 // 2 SSE::Mask to 1 AVX2::Mask {{{2
2338 Vc_SIMD_CAST_2(SSE::double_m, AVX2::double_m) { return AVX::concat(x0.data(), x1.data()); }
2339 Vc_SIMD_CAST_2(SSE::double_m, AVX2:: float_m) { return AVX::zeroExtend(_mm_packs_epi32(x0.dataI(), x1.dataI())); }
2340 Vc_SIMD_CAST_2(SSE:: float_m, AVX2:: float_m) { return AVX::concat(x0.data(), x1.data()); }
2341 Vc_SIMD_CAST_2(SSE::   int_m, AVX2:: float_m) { return AVX::concat(x0.data(), x1.data()); }
2342 Vc_SIMD_CAST_2(SSE::  uint_m, AVX2:: float_m) { return AVX::concat(x0.data(), x1.data()); }
2343 
2344 #ifdef Vc_IMPL_AVX2
2345 Vc_SIMD_CAST_2(SSE::double_m, AVX2::   int_m) { return AVX::zeroExtend(_mm_packs_epi32(x0.dataI(), x1.dataI())); }
2346 Vc_SIMD_CAST_2(SSE::double_m, AVX2::  uint_m) { return AVX::zeroExtend(_mm_packs_epi32(x0.dataI(), x1.dataI())); }
2347 Vc_SIMD_CAST_2(SSE::double_m, AVX2:: short_m) { return AVX::zeroExtend(_mm_packs_epi16(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_setzero_si128())); }
2348 Vc_SIMD_CAST_2(SSE::double_m, AVX2::ushort_m) { return AVX::zeroExtend(_mm_packs_epi16(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_setzero_si128())); }
2349 
2350 Vc_SIMD_CAST_2(SSE:: float_m, AVX2::   int_m) { return AVX::concat(x0.data(), x1.data()); }
2351 Vc_SIMD_CAST_2(SSE:: float_m, AVX2::  uint_m) { return AVX::concat(x0.data(), x1.data()); }
2352 Vc_SIMD_CAST_2(SSE:: float_m, AVX2:: short_m) { return AVX::zeroExtend(_mm_packs_epi16(x0.dataI(), x1.dataI())); }
2353 Vc_SIMD_CAST_2(SSE:: float_m, AVX2::ushort_m) { return AVX::zeroExtend(_mm_packs_epi16(x0.dataI(), x1.dataI())); }
2354 
2355 Vc_SIMD_CAST_2(SSE::   int_m, AVX2::   int_m) { return AVX::concat(x0.data(), x1.data()); }
2356 Vc_SIMD_CAST_2(SSE::   int_m, AVX2::  uint_m) { return AVX::concat(x0.data(), x1.data()); }
2357 Vc_SIMD_CAST_2(SSE::   int_m, AVX2:: short_m) { return AVX::zeroExtend(_mm_packs_epi16(x0.dataI(), x1.dataI())); }
2358 Vc_SIMD_CAST_2(SSE::   int_m, AVX2::ushort_m) { return AVX::zeroExtend(_mm_packs_epi16(x0.dataI(), x1.dataI())); }
2359 
2360 Vc_SIMD_CAST_2(SSE::  uint_m, AVX2::   int_m) { return AVX::concat(x0.data(), x1.data()); }
2361 Vc_SIMD_CAST_2(SSE::  uint_m, AVX2::  uint_m) { return AVX::concat(x0.data(), x1.data()); }
2362 Vc_SIMD_CAST_2(SSE::  uint_m, AVX2:: short_m) { return AVX::zeroExtend(_mm_packs_epi16(x0.dataI(), x1.dataI())); }
2363 Vc_SIMD_CAST_2(SSE::  uint_m, AVX2::ushort_m) { return AVX::zeroExtend(_mm_packs_epi16(x0.dataI(), x1.dataI())); }
2364 
2365 Vc_SIMD_CAST_2(SSE:: short_m, AVX2:: short_m) { return AVX::concat(x0.data(), x1.data()); }
2366 Vc_SIMD_CAST_2(SSE:: short_m, AVX2::ushort_m) { return AVX::concat(x0.data(), x1.data()); }
2367 Vc_SIMD_CAST_2(SSE::ushort_m, AVX2:: short_m) { return AVX::concat(x0.data(), x1.data()); }
2368 Vc_SIMD_CAST_2(SSE::ushort_m, AVX2::ushort_m) { return AVX::concat(x0.data(), x1.data()); }
2369 #endif
2370 
2371 // 4 SSE::Mask to 1 AVX2::Mask {{{2
2372 Vc_SIMD_CAST_4(SSE::double_m, AVX2:: float_m) { return AVX::concat(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_packs_epi32(x2.dataI(), x3.dataI())); }
2373 #ifdef Vc_IMPL_AVX2
2374 Vc_SIMD_CAST_4(SSE::double_m, AVX2::   int_m) { return AVX::concat(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_packs_epi32(x2.dataI(), x3.dataI())); }
2375 Vc_SIMD_CAST_4(SSE::double_m, AVX2::  uint_m) { return AVX::concat(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_packs_epi32(x2.dataI(), x3.dataI())); }
2376 Vc_SIMD_CAST_4(SSE::double_m, AVX2:: short_m) { return AVX::zeroExtend(_mm_packs_epi16(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_packs_epi32(x2.dataI(), x3.dataI()))); }
2377 Vc_SIMD_CAST_4(SSE::double_m, AVX2::ushort_m) { return AVX::zeroExtend(_mm_packs_epi16(_mm_packs_epi32(x0.dataI(), x1.dataI()), _mm_packs_epi32(x2.dataI(), x3.dataI()))); }
2378 Vc_SIMD_CAST_4(SSE:: float_m, AVX2:: short_m) { return AVX::concat(_mm_packs_epi16(x0.dataI(), x1.dataI()), _mm_packs_epi16(x2.dataI(), x3.dataI())); }
2379 Vc_SIMD_CAST_4(SSE:: float_m, AVX2::ushort_m) { return AVX::concat(_mm_packs_epi16(x0.dataI(), x1.dataI()), _mm_packs_epi16(x2.dataI(), x3.dataI())); }
2380 Vc_SIMD_CAST_4(SSE::   int_m, AVX2:: short_m) { return AVX::concat(_mm_packs_epi16(x0.dataI(), x1.dataI()), _mm_packs_epi16(x2.dataI(), x3.dataI())); }
2381 Vc_SIMD_CAST_4(SSE::   int_m, AVX2::ushort_m) { return AVX::concat(_mm_packs_epi16(x0.dataI(), x1.dataI()), _mm_packs_epi16(x2.dataI(), x3.dataI())); }
2382 Vc_SIMD_CAST_4(SSE::  uint_m, AVX2:: short_m) { return AVX::concat(_mm_packs_epi16(x0.dataI(), x1.dataI()), _mm_packs_epi16(x2.dataI(), x3.dataI())); }
2383 Vc_SIMD_CAST_4(SSE::  uint_m, AVX2::ushort_m) { return AVX::concat(_mm_packs_epi16(x0.dataI(), x1.dataI()), _mm_packs_epi16(x2.dataI(), x3.dataI())); }
2384 #endif
2385 
2386 // 1 Scalar::Mask to 1 AVX2::Mask {{{2
2387 template <typename Return, typename T>
2388 Vc_INTRINSIC Vc_CONST Return
2389 simd_cast(Scalar::Mask<T> k, enable_if<AVX2::is_mask<Return>::value>)
2390 {
2391     Return r{false};
2392     r[0] = k.data();
2393     return r;
2394 }
2395 
2396 // 2 Scalar::Mask to 1 AVX2::Mask {{{2
2397 template <typename Return, typename T>
2398 Vc_INTRINSIC Vc_CONST Return
2399 simd_cast(Scalar::Mask<T> k0, Scalar::Mask<T> k1,
2400           enable_if<AVX2::is_mask<Return>::value>)
2401 {
2402     Return r{false};
2403     r[0] = k0.data();
2404     r[1] = k1.data();
2405     return r;
2406 }
2407 
2408 // 4 Scalar::Mask to 1 AVX2::Mask {{{2
2409 template <typename Return, typename T>
2410 Vc_INTRINSIC Vc_CONST Return
2411 simd_cast(Scalar::Mask<T> k0, Scalar::Mask<T> k1, Scalar::Mask<T> k2, Scalar::Mask<T> k3,
2412           enable_if<(AVX2::is_mask<Return>::value && Return::Size >= 4)>)
2413 {
2414     Return r{false};
2415     r[0] = k0.data();
2416     r[1] = k1.data();
2417     r[2] = k2.data();
2418     r[3] = k3.data();
2419     return r;
2420 }
2421 
2422 // 8 Scalar::Mask to 1 AVX2::Mask {{{2
2423 template <typename Return, typename T>
2424 Vc_INTRINSIC Vc_CONST Return
2425 simd_cast(Scalar::Mask<T> k0, Scalar::Mask<T> k1, Scalar::Mask<T> k2, Scalar::Mask<T> k3,
2426           Scalar::Mask<T> k4, Scalar::Mask<T> k5, Scalar::Mask<T> k6, Scalar::Mask<T> k7,
2427           enable_if<(AVX2::is_mask<Return>::value && Return::Size >= 8)>)
2428 {
2429     Return r{false};
2430     r[0] = k0.data();
2431     r[1] = k1.data();
2432     r[2] = k2.data();
2433     r[3] = k3.data();
2434     r[4] = k4.data();
2435     r[5] = k5.data();
2436     r[6] = k6.data();
2437     r[7] = k7.data();
2438     return r;
2439 }
2440 
2441 // 16 Scalar::Mask to 1 AVX2::Mask {{{2
2442 template <typename Return, typename T>
2443 Vc_INTRINSIC Vc_CONST Return
2444 simd_cast(Scalar::Mask<T> k0, Scalar::Mask<T> k1, Scalar::Mask<T> k2, Scalar::Mask<T> k3,
2445           Scalar::Mask<T> k4, Scalar::Mask<T> k5, Scalar::Mask<T> k6, Scalar::Mask<T> k7,
2446           Scalar::Mask<T> k8, Scalar::Mask<T> k9, Scalar::Mask<T> k10,
2447           Scalar::Mask<T> k11, Scalar::Mask<T> k12, Scalar::Mask<T> k13,
2448           Scalar::Mask<T> k14, Scalar::Mask<T> k15,
2449           enable_if<(AVX2::is_mask<Return>::value && Return::Size >= 16)>)
2450 {
2451     Return r{false};
2452     r[0] = k0.data();
2453     r[1] = k1.data();
2454     r[2] = k2.data();
2455     r[3] = k3.data();
2456     r[4] = k4.data();
2457     r[5] = k5.data();
2458     r[6] = k6.data();
2459     r[7] = k7.data();
2460     r[8] = k8.data();
2461     r[9] = k9.data();
2462     r[10] = k10.data();
2463     r[11] = k11.data();
2464     r[12] = k12.data();
2465     r[13] = k13.data();
2466     r[14] = k14.data();
2467     r[15] = k15.data();
2468     return r;
2469 }
2470 
2471 // 1 AVX2::Mask to 1 SSE::Mask {{{2
2472 Vc_SIMD_CAST_1(AVX2::double_m, SSE::double_m) { return AVX::lo128(x.data()); }
2473 Vc_SIMD_CAST_1(AVX2::double_m, SSE:: float_m) { return _mm_packs_epi32(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); }
2474 Vc_SIMD_CAST_1(AVX2::double_m, SSE::   int_m) { return _mm_packs_epi32(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); }
2475 Vc_SIMD_CAST_1(AVX2::double_m, SSE::  uint_m) { return _mm_packs_epi32(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); }
2476 Vc_SIMD_CAST_1(AVX2::double_m, SSE:: short_m) { return _mm_packs_epi16(_mm_packs_epi32(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())), _mm_setzero_si128()); }
2477 Vc_SIMD_CAST_1(AVX2::double_m, SSE::ushort_m) { return _mm_packs_epi16(_mm_packs_epi32(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())), _mm_setzero_si128()); }
2478 
2479 Vc_SIMD_CAST_1(AVX2:: float_m, SSE::double_m) { return _mm_unpacklo_ps(AVX::lo128(x.data()), AVX::lo128(x.data())); }
2480 Vc_SIMD_CAST_1(AVX2:: float_m, SSE:: float_m) { return AVX::lo128(x.data()); }
2481 Vc_SIMD_CAST_1(AVX2:: float_m, SSE::   int_m) { return AVX::lo128(x.data()); }
2482 Vc_SIMD_CAST_1(AVX2:: float_m, SSE::  uint_m) { return AVX::lo128(x.data()); }
2483 Vc_SIMD_CAST_1(AVX2:: float_m, SSE:: short_m) { return _mm_packs_epi16(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); }
2484 Vc_SIMD_CAST_1(AVX2:: float_m, SSE::ushort_m) { return _mm_packs_epi16(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); }
2485 
2486 #ifdef Vc_IMPL_AVX2
2487 Vc_SIMD_CAST_1(AVX2::   int_m, SSE::double_m) { return _mm_unpacklo_epi32(AVX::lo128(x.dataI()), AVX::lo128(x.dataI())); }
2488 Vc_SIMD_CAST_1(AVX2::   int_m, SSE:: float_m) { return AVX::lo128(x.dataI()); }
2489 Vc_SIMD_CAST_1(AVX2::   int_m, SSE::   int_m) { return AVX::lo128(x.dataI()); }
2490 Vc_SIMD_CAST_1(AVX2::   int_m, SSE::  uint_m) { return AVX::lo128(x.dataI()); }
2491 Vc_SIMD_CAST_1(AVX2::   int_m, SSE:: short_m) { return _mm_packs_epi16(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); }
2492 Vc_SIMD_CAST_1(AVX2::   int_m, SSE::ushort_m) { return _mm_packs_epi16(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); }
2493 
2494 Vc_SIMD_CAST_1(AVX2::  uint_m, SSE::double_m) { return _mm_unpacklo_epi32(AVX::lo128(x.dataI()), AVX::lo128(x.dataI())); }
2495 Vc_SIMD_CAST_1(AVX2::  uint_m, SSE:: float_m) { return AVX::lo128(x.dataI()); }
2496 Vc_SIMD_CAST_1(AVX2::  uint_m, SSE::   int_m) { return AVX::lo128(x.dataI()); }
2497 Vc_SIMD_CAST_1(AVX2::  uint_m, SSE::  uint_m) { return AVX::lo128(x.dataI()); }
2498 Vc_SIMD_CAST_1(AVX2::  uint_m, SSE:: short_m) { return _mm_packs_epi16(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); }
2499 Vc_SIMD_CAST_1(AVX2::  uint_m, SSE::ushort_m) { return _mm_packs_epi16(AVX::lo128(x.dataI()), AVX::hi128(x.dataI())); }
2500 
2501 Vc_SIMD_CAST_1(AVX2:: short_m, SSE::double_m) { return simd_cast<SSE::double_m>(SSE::short_m(AVX::lo128(x.data()))); }
2502 Vc_SIMD_CAST_1(AVX2:: short_m, SSE:: float_m) { return simd_cast<SSE:: float_m>(SSE::short_m(AVX::lo128(x.data()))); }
2503 Vc_SIMD_CAST_1(AVX2:: short_m, SSE::   int_m) { return simd_cast<SSE::   int_m>(SSE::short_m(AVX::lo128(x.data()))); }
2504 Vc_SIMD_CAST_1(AVX2:: short_m, SSE::  uint_m) { return simd_cast<SSE::  uint_m>(SSE::short_m(AVX::lo128(x.data()))); }
2505 Vc_SIMD_CAST_1(AVX2:: short_m, SSE:: short_m) { return simd_cast<SSE:: short_m>(SSE::short_m(AVX::lo128(x.data()))); }
2506 Vc_SIMD_CAST_1(AVX2:: short_m, SSE::ushort_m) { return simd_cast<SSE::ushort_m>(SSE::short_m(AVX::lo128(x.data()))); }
2507 
2508 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE::double_m) { return simd_cast<SSE::double_m>(SSE::ushort_m(AVX::lo128(x.data()))); }
2509 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE:: float_m) { return simd_cast<SSE:: float_m>(SSE::ushort_m(AVX::lo128(x.data()))); }
2510 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE::   int_m) { return simd_cast<SSE::   int_m>(SSE::ushort_m(AVX::lo128(x.data()))); }
2511 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE::  uint_m) { return simd_cast<SSE::  uint_m>(SSE::ushort_m(AVX::lo128(x.data()))); }
2512 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE:: short_m) { return simd_cast<SSE:: short_m>(SSE::ushort_m(AVX::lo128(x.data()))); }
2513 Vc_SIMD_CAST_1(AVX2::ushort_m, SSE::ushort_m) { return simd_cast<SSE::ushort_m>(SSE::ushort_m(AVX::lo128(x.data()))); }
2514 #endif
2515 
2516 // 2 AVX2::Mask to 1 SSE::Mask {{{2
2517 Vc_SIMD_CAST_2(AVX2::double_m, SSE:: short_m) { return _mm_packs_epi16(_mm_packs_epi32(AVX::lo128(x0.dataI()), AVX::hi128(x0.dataI())), _mm_packs_epi32(AVX::lo128(x1.dataI()), AVX::hi128(x1.dataI()))); }
2518 Vc_SIMD_CAST_2(AVX2::double_m, SSE::ushort_m) { return _mm_packs_epi16(_mm_packs_epi32(AVX::lo128(x0.dataI()), AVX::hi128(x0.dataI())), _mm_packs_epi32(AVX::lo128(x1.dataI()), AVX::hi128(x1.dataI()))); }
2519 
2520 // 1 AVX2::Mask to 1 Scalar::Mask {{{2
2521 template <typename To, typename FromT>
2522 Vc_INTRINSIC Vc_CONST To
2523 simd_cast(AVX2::Mask<FromT> x, enable_if<Scalar::is_mask<To>::value>)
2524 {
2525     return static_cast<To>(x[0]);
2526 }
2527 
2528 // offset == 0 | convert from AVX2::Mask/Vector {{{1
2529 template <typename Return, int offset, typename From>
2530 Vc_INTRINSIC Vc_CONST enable_if<
2531     (offset == 0 &&
2532      ((AVX2::is_vector<From>::value && !Scalar::is_vector<Return>::value &&
2533        Traits::is_simd_vector<Return>::value && !Traits::isSimdArray<Return>::value) ||
2534       (AVX2::is_mask<From>::value && !Scalar::is_mask<Return>::value &&
2535        Traits::is_simd_mask<Return>::value &&
2536        !Traits::isSimdMaskArray<Return>::value))),
2537     Return>
2538 simd_cast(const From &x)
2539 {
2540     return simd_cast<Return>(x);
2541 }
2542 
2543 // offset == 0 | convert from SSE::Mask/Vector to AVX2::Mask/Vector {{{1
2544 template <typename Return, int offset, typename From>
2545 Vc_INTRINSIC Vc_CONST Return
2546 simd_cast(const From &x,
2547           enable_if<offset == 0 && ((SSE::is_vector<From>::value &&
2548                                      AVX2::is_vector<Return>::value) ||
2549                                     (SSE::is_mask<From>::value &&
2550                                      AVX2::is_mask<Return>::value))>)
2551 {
2552     return simd_cast<Return>(x);
2553 }
2554 
2555 // Vector casts with offset {{{1
2556 // AVX2 to AVX2 {{{2
2557 template <typename Return, int offset, typename T>
2558 Vc_INTRINSIC Vc_CONST enable_if<(AVX2::is_vector<Return>::value && offset != 0),
2559                                 Return>
2560     simd_cast(AVX2::Vector<T> x)
2561 {
2562     // TODO: there certainly is potential for leaving out the shift/permute
2563     // instruction at the cost of a lot more specializations
2564     using V = AVX2::Vector<T>;
2565     constexpr int shift = sizeof(T) * offset * Return::Size;
2566     static_assert(shift > 0 && shift < sizeof(x), "");
2567     if (shift < 16) {
2568         return simd_cast<Return>(V{AVX::avx_cast<typename V::VectorType>(
2569             _mm_srli_si128(AVX::avx_cast<__m128i>(AVX::lo128(x.data())), shift))});
2570     } else if (shift == 16) {
2571         return simd_cast<Return>(V{Mem::permute128<X1, Const0>(x.data())});
2572     } else {
2573 #ifdef Vc_MSVC
2574 #pragma warning(push)
2575 #pragma warning(disable : 4556)  // value of intrinsic immediate argument '-8' is out of
2576                                  // range '0 - 255'
2577 #endif
2578         return simd_cast<Return>(V{AVX::avx_cast<typename V::VectorType>(
2579             _mm_srli_si128(AVX::avx_cast<__m128i>(AVX::hi128(x.data())), shift - 16))});
2580 #ifdef Vc_MSVC
2581 #pragma warning(pop)
2582 #endif
2583     }
2584 }
2585 // AVX2 to SSE (Vector<T>) {{{2
2586 template <typename Return, int offset, typename T>
2587 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_vector<Return>::value &&
2588                                  sizeof(AVX2::Vector<T>) == 32),
2589                                 Return>
2590     simd_cast(AVX2::Vector<T> x)
2591 {
2592     using V = AVX2::Vector<T>;
2593     constexpr int shift = sizeof(V) / V::Size * offset * Return::Size;
2594     static_assert(shift > 0, "");
2595     static_assert(shift < sizeof(V), "");
2596     using SseVector = SSE::Vector<typename V::EntryType>;
2597     if (shift == 16) {
2598         return simd_cast<Return>(SseVector{AVX::hi128(x.data())});
2599     }
2600     using Intrin = typename SseVector::VectorType;
2601     return simd_cast<Return>(SseVector{AVX::avx_cast<Intrin>(
2602         _mm_alignr_epi8(AVX::avx_cast<__m128i>(AVX::hi128(x.data())),
2603                         AVX::avx_cast<__m128i>(AVX::lo128(x.data())), shift))});
2604 }
2605 template <typename Return, int offset, typename T>
2606 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_vector<Return>::value &&
2607                                  sizeof(AVX2::Vector<T>) == 16),
2608                                 Return>
2609     simd_cast(AVX2::Vector<T> x)
2610 {
2611     using V = AVX2::Vector<T>;
2612     constexpr int shift = sizeof(V) / V::Size * offset * Return::Size;
2613     static_assert(shift > 0, "");
2614     static_assert(shift < sizeof(V), "");
2615     using SseVector = SSE::Vector<typename V::EntryType>;
2616     return simd_cast<Return>(SseVector{_mm_srli_si128(x.data(), shift)});
2617 }
2618 // SSE to AVX2 {{{2
2619 Vc_SIMD_CAST_OFFSET(SSE:: short_v, AVX2::double_v, 1) { return simd_cast<AVX2::double_v>(simd_cast<SSE::int_v, 1>(x)); }
2620 Vc_SIMD_CAST_OFFSET(SSE::ushort_v, AVX2::double_v, 1) { return simd_cast<AVX2::double_v>(simd_cast<SSE::int_v, 1>(x)); }
2621 
2622 // Mask casts with offset {{{1
2623 // 1 AVX2::Mask to N AVX2::Mask {{{2
2624 // float_v and (u)int_v have size 8, double_v has size 4, and (u)short_v have size 16. Consequently,
2625 // offset can 0, 1, 2, or 3.
2626 // - offset == 0 is already done.
2627 // - offset == 1 can be 16 -> 8, 16 -> 4, 8 -> 4, and 16 -> 4
2628 // - offset == 2 && offset == 3 can only be 16 -> 4
2629 template <typename Return, int offset, typename T>
2630 Vc_INTRINSIC Vc_CONST Return
2631 simd_cast(const AVX2::Mask<T> &k,
2632           enable_if<(AVX2::is_mask<Return>::value && offset == 1 &&
2633                      AVX2::Mask<T>::Size == Return::Size * 2)> = nullarg)
2634 {
2635     const auto tmp = AVX::hi128(k.dataI());
2636     return AVX::concat(_mm_unpacklo_epi8(tmp, tmp), _mm_unpackhi_epi8(tmp, tmp));
2637 }
2638 template <typename Return, int offset, typename T>
2639 Vc_INTRINSIC Vc_CONST Return
2640 simd_cast(const AVX2::Mask<T> &k,
2641           enable_if<(AVX2::is_mask<Return>::value && offset == 1 &&
2642                      AVX2::Mask<T>::Size == Return::Size * 4)> = nullarg)
2643 {
2644     auto tmp = AVX::lo128(k.dataI());
2645     tmp = _mm_unpackhi_epi8(tmp, tmp);
2646     return AVX::concat(_mm_unpacklo_epi16(tmp, tmp), _mm_unpackhi_epi16(tmp, tmp));
2647 }
2648 template <typename Return, int offset, typename T>
2649 Vc_INTRINSIC Vc_CONST Return
2650 simd_cast(const AVX2::Mask<T> &k,
2651           enable_if<(AVX2::is_mask<Return>::value && offset == 2 &&
2652                      AVX2::Mask<T>::Size == Return::Size * 4)> = nullarg)
2653 {
2654     auto tmp = AVX::hi128(k.dataI());
2655     tmp = _mm_unpacklo_epi8(tmp, tmp);
2656     return AVX::concat(_mm_unpacklo_epi16(tmp, tmp), _mm_unpackhi_epi16(tmp, tmp));
2657 }
2658 template <typename Return, int offset, typename T>
2659 Vc_INTRINSIC Vc_CONST Return
2660 simd_cast(const AVX2::Mask<T> &k,
2661           enable_if<(AVX2::is_mask<Return>::value && offset == 3 &&
2662                      AVX2::Mask<T>::Size == Return::Size * 4)> = nullarg)
2663 {
2664     auto tmp = AVX::hi128(k.dataI());
2665     tmp = _mm_unpackhi_epi8(tmp, tmp);
2666     return AVX::concat(_mm_unpacklo_epi16(tmp, tmp), _mm_unpackhi_epi16(tmp, tmp));
2667 }
2668 
2669 // 1 SSE::Mask to N AVX2::Mask {{{2
2670 Vc_SIMD_CAST_OFFSET(SSE:: short_m, AVX2::double_m, 1) { auto tmp = _mm_unpackhi_epi16(x.dataI(), x.dataI()); return AVX::concat(_mm_unpacklo_epi32(tmp, tmp), _mm_unpackhi_epi32(tmp, tmp)); }
2671 Vc_SIMD_CAST_OFFSET(SSE::ushort_m, AVX2::double_m, 1) { auto tmp = _mm_unpackhi_epi16(x.dataI(), x.dataI()); return AVX::concat(_mm_unpacklo_epi32(tmp, tmp), _mm_unpackhi_epi32(tmp, tmp)); }
2672 
2673 // AVX2 to SSE (Mask<T>) {{{2
2674 template <typename Return, int offset, typename T>
2675 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_mask<Return>::value &&
2676                                  sizeof(AVX2::Mask<T>) == 32),
2677                                 Return>
2678     simd_cast(AVX2::Mask<T> x)
2679 {
2680     using M = AVX2::Mask<T>;
2681     constexpr int shift = sizeof(M) / M::Size * offset * Return::Size;
2682     static_assert(shift > 0, "");
2683     static_assert(shift < sizeof(M), "");
2684     using SseVector = SSE::Mask<Traits::entry_type_of<typename M::Vector>>;
2685     if (shift == 16) {
2686         return simd_cast<Return>(SseVector{AVX::hi128(x.data())});
2687     }
2688     using Intrin = typename SseVector::VectorType;
2689     return simd_cast<Return>(SseVector{AVX::avx_cast<Intrin>(
2690         _mm_alignr_epi8(AVX::hi128(x.dataI()), AVX::lo128(x.dataI()), shift))});
2691 }
2692 
2693 template <typename Return, int offset, typename T>
2694 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0 && SSE::is_mask<Return>::value &&
2695                                  sizeof(AVX2::Mask<T>) == 16),
2696                                 Return>
2697     simd_cast(AVX2::Mask<T> x)
2698 {
2699     return simd_cast<Return, offset>(simd_cast<SSE::Mask<T>>(x));
2700 }
2701 
2702 // undef Vc_SIMD_CAST_AVX_[1234] & Vc_SIMD_CAST_[12345678] {{{1
2703 #undef Vc_SIMD_CAST_AVX_1
2704 #undef Vc_SIMD_CAST_AVX_2
2705 #undef Vc_SIMD_CAST_AVX_3
2706 #undef Vc_SIMD_CAST_AVX_4
2707 
2708 #undef Vc_SIMD_CAST_1
2709 #undef Vc_SIMD_CAST_2
2710 #undef Vc_SIMD_CAST_3
2711 #undef Vc_SIMD_CAST_4
2712 #undef Vc_SIMD_CAST_5
2713 #undef Vc_SIMD_CAST_6
2714 #undef Vc_SIMD_CAST_7
2715 #undef Vc_SIMD_CAST_8
2716 
2717 #undef Vc_SIMD_CAST_OFFSET
2718 // }}}1
2719 
2720 }  // namespace Vc
2721 
2722 #endif // VC_AVX_SIMD_CAST_H_
2723 
2724 // vim: foldmethod=marker