Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-30 10:25:49

0001 /*  This file is part of the Vc library. {{{
0002 Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
0003 
0004 Redistribution and use in source and binary forms, with or without
0005 modification, are permitted provided that the following conditions are met:
0006     * Redistributions of source code must retain the above copyright
0007       notice, this list of conditions and the following disclaimer.
0008     * Redistributions in binary form must reproduce the above copyright
0009       notice, this list of conditions and the following disclaimer in the
0010       documentation and/or other materials provided with the distribution.
0011     * Neither the names of contributing organizations nor the
0012       names of its contributors may be used to endorse or promote products
0013       derived from this software without specific prior written permission.
0014 
0015 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
0016 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
0017 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0018 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
0019 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
0020 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
0021 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0022 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0023 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
0024 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0025 
0026 }}}*/
0027 
0028 #ifndef VC_COMMON_SIMDARRAY_H_
0029 #define VC_COMMON_SIMDARRAY_H_
0030 
0031 //#define Vc_DEBUG_SIMD_CAST 1
0032 //#define Vc_DEBUG_SORTED 1
0033 //#include "../IO"
0034 
0035 #include <array>
0036 #include <limits>
0037 
0038 #include "writemaskedvector.h"
0039 #include "simdarrayhelper.h"
0040 #include "simdmaskarray.h"
0041 #include "utility.h"
0042 #include "interleave.h"
0043 #include "indexsequence.h"
0044 #include "transpose.h"
0045 #include "macros.h"
0046 
0047 namespace Vc_VERSIONED_NAMESPACE
0048 {
0049 // select_best_vector_type {{{
0050 namespace Common
0051 {
0052 /// \addtogroup SimdArray
0053 /// @{
0054 /**
0055  * \internal
0056  * Selects the best SIMD type out of a typelist to store N scalar values.
0057  */
0058 template <std::size_t N, class... Candidates> struct select_best_vector_type_impl;
0059 // last candidate; this one must work; assume it does:
0060 template <std::size_t N, class T> struct select_best_vector_type_impl<N, T> {
0061     using type = T;
0062 };
0063 // check the next candidate; use it if N >= T::size(); recurse otherwise:
0064 template <std::size_t N, class T, class... Candidates>
0065 struct select_best_vector_type_impl<N, T, Candidates...> {
0066     using type = typename std::conditional<
0067         (N < T::Size), typename select_best_vector_type_impl<N, Candidates...>::type,
0068         T>::type;
0069 };
0070 template <class T, std::size_t N>
0071 struct select_best_vector_type : select_best_vector_type_impl<N,
0072 #ifdef Vc_IMPL_AVX2
0073                                                               Vc::AVX2::Vector<T>,
0074 #elif defined Vc_IMPL_AVX
0075                                                               Vc::AVX::Vector<T>,
0076 #endif
0077 #ifdef Vc_IMPL_SSE
0078                                                               Vc::SSE::Vector<T>,
0079 #endif
0080                                                               Vc::Scalar::Vector<T>> {
0081 };
0082 /// @}
0083 }  // namespace Common
0084 // }}}
0085 // internal namespace (product & sum helper) {{{1
0086 namespace internal
0087 {
0088 template <typename T> T Vc_INTRINSIC Vc_PURE product_helper_(const T &l, const T &r) { return l * r; }
0089 template <typename T> T Vc_INTRINSIC Vc_PURE sum_helper_(const T &l, const T &r) { return l + r; }
0090 }  // namespace internal
0091 
0092 // min & max declarations {{{1
0093 template <typename T, std::size_t N, typename V, std::size_t M>
0094 inline fixed_size_simd<T, N> min(const SimdArray<T, N, V, M> &x,
0095                                  const SimdArray<T, N, V, M> &y);
0096 template <typename T, std::size_t N, typename V, std::size_t M>
0097 inline fixed_size_simd<T, N> max(const SimdArray<T, N, V, M> &x,
0098                                  const SimdArray<T, N, V, M> &y);
0099 
0100 // SimdArray class {{{1
0101 /// \addtogroup SimdArray
0102 /// @{
0103 
0104 // atomic SimdArray {{{1
0105 #define Vc_CURRENT_CLASS_NAME SimdArray
0106 /**\internal
0107  * Specialization of `SimdArray<T, N, VectorType, VectorSize>` for the case where `N ==
0108  * VectorSize`.
0109  *
0110  * This is specialized for implementation purposes: Since the general implementation uses
0111  * two SimdArray data members it recurses over different SimdArray instantiations. The
0112  * recursion is ended by this specialization, which has a single \p VectorType_ data
0113  * member to which all functions are forwarded more or less directly.
0114  */
0115 template <typename T, std::size_t N, typename VectorType_>
0116 class SimdArray<T, N, VectorType_, N>
0117 {
0118     static_assert(std::is_same<T, double>::value || std::is_same<T, float>::value ||
0119                       std::is_same<T, int32_t>::value ||
0120                       std::is_same<T, uint32_t>::value ||
0121                       std::is_same<T, int16_t>::value ||
0122                       std::is_same<T, uint16_t>::value,
0123                   "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, "
0124                   "int16_t, uint16_t }");
0125     static_assert(
0126         std::is_same<VectorType_,
0127                      typename Common::select_best_vector_type<T, N>::type>::value &&
0128             VectorType_::size() == N,
0129         "ERROR: leave the third and fourth template parameters with their defaults. They "
0130         "are implementation details.");
0131 
0132 public:
0133     static constexpr bool is_atomic = true;
0134     using VectorType = VectorType_;
0135     using vector_type = VectorType;
0136     using storage_type = vector_type;
0137     using vectorentry_type = typename vector_type::VectorEntryType;
0138     using value_type = T;
0139     using mask_type = fixed_size_simd_mask<T, N>;
0140     using index_type = fixed_size_simd<int, N>;
0141     static constexpr std::size_t size() { return N; }
0142     using Mask = mask_type;
0143     using MaskType = Mask;
0144     using MaskArgument = const MaskType &;
0145     using VectorEntryType = vectorentry_type;
0146     using EntryType = value_type;
0147     using IndexType = index_type;
0148     using AsArg = const SimdArray &;
0149     using reference = Detail::ElementReference<SimdArray>;
0150     static constexpr std::size_t Size = size();
0151     static constexpr std::size_t MemoryAlignment = storage_type::MemoryAlignment;
0152 
0153     // zero init
0154     Vc_INTRINSIC SimdArray() = default;
0155 
0156     // default copy ctor/operator
0157     Vc_INTRINSIC SimdArray(const SimdArray &) = default;
0158     Vc_INTRINSIC SimdArray(SimdArray &&) = default;
0159     Vc_INTRINSIC SimdArray &operator=(const SimdArray &) = default;
0160 
0161     // broadcast
0162     Vc_INTRINSIC SimdArray(const value_type &a) : data(a) {}
0163     Vc_INTRINSIC SimdArray(value_type &a) : data(a) {}
0164     Vc_INTRINSIC SimdArray(value_type &&a) : data(a) {}
0165     template <
0166         typename U,
0167         typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
0168     Vc_INTRINSIC SimdArray(U a)
0169         : SimdArray(static_cast<value_type>(a))
0170     {
0171     }
0172 
0173     // implicit casts
0174     template <class U, class V, class = enable_if<N == V::Size>>
0175     Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
0176         : data(simd_cast<vector_type>(internal_data(x)))
0177     {
0178     }
0179     template <class U, class V, class = enable_if<(N > V::Size && N <= 2 * V::Size)>,
0180               class = U>
0181     Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
0182         : data(simd_cast<vector_type>(internal_data(internal_data0(x)),
0183                                       internal_data(internal_data1(x))))
0184     {
0185     }
0186     template <class U, class V, class = enable_if<(N > 2 * V::Size && N <= 4 * V::Size)>,
0187               class = U, class = U>
0188     Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
0189         : data(simd_cast<vector_type>(internal_data(internal_data0(internal_data0(x))),
0190                                       internal_data(internal_data1(internal_data0(x))),
0191                                       internal_data(internal_data0(internal_data1(x))),
0192                                       internal_data(internal_data1(internal_data1(x)))))
0193     {
0194     }
0195 
0196     template <typename V, std::size_t Pieces, std::size_t Index>
0197     Vc_INTRINSIC SimdArray(Common::Segment<V, Pieces, Index> &&x)
0198         : data(simd_cast<vector_type, Index>(x.data))
0199     {
0200     }
0201 
0202     Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
0203         : data(init.begin(), Vc::Unaligned)
0204     {
0205         Vc_ASSERT(init.size() == size());
0206     }
0207 
0208     // implicit conversion from underlying vector_type
0209     template <
0210         typename V,
0211         typename = enable_if<Traits::is_simd_vector<V>::value && !Traits::isSimdArray<V>::value>>
0212     Vc_INTRINSIC SimdArray(const V &x)
0213         : data(simd_cast<vector_type>(x))
0214     {
0215     }
0216 
0217     // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
0218     // T implicitly convertible to U
0219     template <typename U, typename A,
0220               typename =
0221                   enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N &&
0222                             !std::is_same<A, simd_abi::fixed_size<N>>::value>>
0223     Vc_INTRINSIC operator Vector<U, A>() const
0224     {
0225         return simd_cast<Vector<U, A>>(data);
0226     }
0227     operator fixed_size_simd<T, N> &()
0228     {
0229         return static_cast<fixed_size_simd<T, N> &>(*this);
0230     }
0231     operator const fixed_size_simd<T, N> &() const
0232     {
0233         return static_cast<const fixed_size_simd<T, N> &>(*this);
0234     }
0235 
0236 #include "gatherinterface.h"
0237 #include "scatterinterface.h"
0238 
0239     explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerZero) : data() {}
0240     explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerOne o) : data(o) {}
0241     explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerIndexesFromZero i) : data(i)
0242     {
0243     }
0244     template <std::size_t Offset>
0245     explicit Vc_INTRINSIC SimdArray(
0246         Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset>)
0247         : data(Vc::IndexesFromZero)
0248     {
0249         data += value_type(Offset);
0250     }
0251 
0252     Vc_INTRINSIC void setZero() { data.setZero(); }
0253     Vc_INTRINSIC void setZero(mask_type k) { data.setZero(internal_data(k)); }
0254     Vc_INTRINSIC void setZeroInverted() { data.setZeroInverted(); }
0255     Vc_INTRINSIC void setZeroInverted(mask_type k) { data.setZeroInverted(internal_data(k)); }
0256 
0257     Vc_INTRINSIC void setQnan() { data.setQnan(); }
0258     Vc_INTRINSIC void setQnan(mask_type m) { data.setQnan(internal_data(m)); }
0259 
0260     // internal: execute specified Operation
0261     template <typename Op, typename... Args>
0262     static Vc_INTRINSIC fixed_size_simd<T, N> fromOperation(Op op, Args &&... args)
0263     {
0264         fixed_size_simd<T, N> r;
0265         Common::unpackArgumentsAuto(op, r.data, std::forward<Args>(args)...);
0266         return r;
0267     }
0268 
0269     template <typename Op, typename... Args>
0270     static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
0271     {
0272         Common::unpackArgumentsAuto(op, nullptr, std::forward<Args>(args)...);
0273     }
0274 
0275     static Vc_INTRINSIC fixed_size_simd<T, N> Zero()
0276     {
0277         return SimdArray(Vc::Zero);
0278     }
0279     static Vc_INTRINSIC fixed_size_simd<T, N> One()
0280     {
0281         return SimdArray(Vc::One);
0282     }
0283     static Vc_INTRINSIC fixed_size_simd<T, N> IndexesFromZero()
0284     {
0285         return SimdArray(Vc::IndexesFromZero);
0286     }
0287     static Vc_INTRINSIC fixed_size_simd<T, N> Random()
0288     {
0289         return fromOperation(Common::Operations::random());
0290     }
0291 
0292     // load ctor
0293     template <class U, class Flags = DefaultLoadTag,
0294               class = enable_if<std::is_arithmetic<U>::value &&
0295                                 Traits::is_load_store_flag<Flags>::value>>
0296     explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = {}) : data(mem, f)
0297     {
0298     }
0299 
0300     template <typename... Args> Vc_INTRINSIC void load(Args &&... args)
0301     {
0302         data.load(std::forward<Args>(args)...);
0303     }
0304 
0305     template <typename... Args> Vc_INTRINSIC void store(Args &&... args) const
0306     {
0307         data.store(std::forward<Args>(args)...);
0308     }
0309 
0310     Vc_INTRINSIC mask_type operator!() const
0311     {
0312         return {private_init, !data};
0313     }
0314 
0315     Vc_INTRINSIC fixed_size_simd<T, N> operator-() const
0316     {
0317         return {private_init, -data};
0318     }
0319 
0320     /// Returns a copy of itself
0321     Vc_INTRINSIC fixed_size_simd<T, N> operator+() const { return *this; }
0322 
0323     Vc_INTRINSIC fixed_size_simd<T, N> operator~() const
0324     {
0325         return {private_init, ~data};
0326     }
0327 
0328     template <typename U,
0329               typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
0330     Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator<<(U x) const
0331     {
0332         return {private_init, data << x};
0333     }
0334     template <typename U,
0335               typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
0336     Vc_INTRINSIC fixed_size_simd<T, N> &operator<<=(U x)
0337     {
0338         data <<= x;
0339         return *this;
0340     }
0341     template <typename U,
0342               typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
0343     Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator>>(U x) const
0344     {
0345         return {private_init, data >> x};
0346     }
0347     template <typename U,
0348               typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
0349     Vc_INTRINSIC fixed_size_simd<T, N> &operator>>=(U x)
0350     {
0351         data >>= x;
0352         return *this;
0353     }
0354 
0355 #define Vc_BINARY_OPERATOR_(op)                                                          \
0356     Vc_INTRINSIC fixed_size_simd<T, N> &operator op##=(const SimdArray &rhs)             \
0357     {                                                                                    \
0358         data op## = rhs.data;                                                            \
0359         return *this;                                                                    \
0360     }
0361     Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
0362     Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
0363     Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
0364 #undef Vc_BINARY_OPERATOR_
0365 
0366     /// \copydoc Vector::isNegative
0367     Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
0368     {
0369         return {private_init, isnegative(data)};
0370     }
0371 
0372 private:
0373     friend reference;
0374     Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
0375     {
0376         return o.data[i];
0377     }
0378     template <typename U>
0379     Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
0380         noexcept(std::declval<value_type &>() = v))
0381     {
0382         o.data[i] = v;
0383     }
0384 
0385 public:
0386     /**
0387      * \note the returned object models the concept of a reference and
0388      * as such it can exist longer than the data it is referencing.
0389      * \note to avoid lifetime issues, we strongly advice not to store
0390      * any reference objects.
0391      */
0392     Vc_INTRINSIC reference operator[](size_t i) noexcept
0393     {
0394         static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
0395         return {*this, int(i)};
0396     }
0397     Vc_INTRINSIC value_type operator[](size_t i) const noexcept
0398     {
0399         return get(*this, int(i));
0400     }
0401 
0402     Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(const mask_type &k)
0403     {
0404         return {*this, k};
0405     }
0406 
0407     Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k)
0408     {
0409         data.assign(v.data, internal_data(k));
0410     }
0411 
0412     // reductions ////////////////////////////////////////////////////////
0413 #define Vc_REDUCTION_FUNCTION_(name_)                                                    \
0414     Vc_INTRINSIC Vc_PURE value_type name_() const { return data.name_(); }               \
0415     Vc_INTRINSIC Vc_PURE value_type name_(mask_type mask) const                          \
0416     {                                                                                    \
0417         return data.name_(internal_data(mask));                                          \
0418     }                                                                                    \
0419     Vc_NOTHING_EXPECTING_SEMICOLON
0420     Vc_REDUCTION_FUNCTION_(min);
0421     Vc_REDUCTION_FUNCTION_(max);
0422     Vc_REDUCTION_FUNCTION_(product);
0423     Vc_REDUCTION_FUNCTION_(sum);
0424 #undef Vc_REDUCTION_FUNCTION_
0425     Vc_INTRINSIC Vc_PURE fixed_size_simd<T, N> partialSum() const
0426     {
0427         return {private_init, data.partialSum()};
0428     }
0429 
0430     template <typename F> Vc_INTRINSIC fixed_size_simd<T, N> apply(F &&f) const
0431     {
0432         return {private_init, data.apply(std::forward<F>(f))};
0433     }
0434     template <typename F> Vc_INTRINSIC fixed_size_simd<T, N> apply(F &&f, const mask_type &k) const
0435     {
0436         return {private_init, data.apply(std::forward<F>(f), k)};
0437     }
0438 
0439     Vc_INTRINSIC fixed_size_simd<T, N> shifted(int amount) const
0440     {
0441         return {private_init, data.shifted(amount)};
0442     }
0443 
0444     template <std::size_t NN>
0445     Vc_INTRINSIC fixed_size_simd<T, N> shifted(int amount, const SimdArray<value_type, NN> &shiftIn)
0446         const
0447     {
0448         return {private_init, data.shifted(amount, simd_cast<VectorType>(shiftIn))};
0449     }
0450 
0451     Vc_INTRINSIC fixed_size_simd<T, N> rotated(int amount) const
0452     {
0453         return {private_init, data.rotated(amount)};
0454     }
0455 
0456     /// \copydoc Vector::exponent
0457     Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC fixed_size_simd<T, N> exponent() const
0458     {
0459         return {private_init, exponent(data)};
0460     }
0461 
0462     Vc_INTRINSIC fixed_size_simd<T, N> interleaveLow(SimdArray x) const
0463     {
0464         return {private_init, data.interleaveLow(x.data)};
0465     }
0466     Vc_INTRINSIC fixed_size_simd<T, N> interleaveHigh(SimdArray x) const
0467     {
0468         return {private_init, data.interleaveHigh(x.data)};
0469     }
0470 
0471     Vc_INTRINSIC fixed_size_simd<T, N> reversed() const
0472     {
0473         return {private_init, data.reversed()};
0474     }
0475 
0476     Vc_INTRINSIC fixed_size_simd<T, N> sorted() const
0477     {
0478         return {private_init, data.sorted()};
0479     }
0480 
0481     template <class G, class = decltype(std::declval<G>()(std::size_t())),
0482               class = enable_if<!Traits::is_simd_vector<G>::value>>
0483     Vc_INTRINSIC SimdArray(const G &gen) : data(gen)
0484     {
0485     }
0486     template <typename G> static Vc_INTRINSIC fixed_size_simd<T, N> generate(const G &gen)
0487     {
0488         return {private_init, VectorType::generate(gen)};
0489     }
0490 
0491     Vc_DEPRECATED("use copysign(x, y) instead")
0492         Vc_INTRINSIC fixed_size_simd<T, N> copySign(const SimdArray &x) const
0493     {
0494         return {private_init, Vc::copysign(data, x.data)};
0495     }
0496 
0497     friend VectorType &internal_data<>(SimdArray &x);
0498     friend const VectorType &internal_data<>(const SimdArray &x);
0499 
0500     /// \internal
0501     Vc_INTRINSIC SimdArray(private_init_t, VectorType &&x) : data(std::move(x)) {}
0502 
0503     Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type));
0504 
0505 private:
0506     // The alignas attribute attached to the class declaration above is ignored by ICC
0507     // 17.0.0 (at least). So just move the alignas attribute down here where it works for
0508     // all compilers.
0509     alignas(static_cast<std::size_t>(
0510         Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(VectorType_) /
0511                                  VectorType_::size()>::value)) storage_type data;
0512 };
0513 template <typename T, std::size_t N, typename VectorType> constexpr std::size_t SimdArray<T, N, VectorType, N>::Size;
0514 template <typename T, std::size_t N, typename VectorType>
0515 constexpr std::size_t SimdArray<T, N, VectorType, N>::MemoryAlignment;
0516 template <typename T, std::size_t N, typename VectorType>
0517 #ifndef Vc_MSVC
0518 Vc_INTRINSIC
0519 #endif
0520 VectorType &internal_data(SimdArray<T, N, VectorType, N> &x)
0521 {
0522     return x.data;
0523 }
0524 template <typename T, std::size_t N, typename VectorType>
0525 #ifndef Vc_MSVC
0526 Vc_INTRINSIC
0527 #endif
0528 const VectorType &internal_data(const SimdArray<T, N, VectorType, N> &x)
0529 {
0530     return x.data;
0531 }
0532 
0533 // unwrap {{{2
0534 template <class T> Vc_INTRINSIC T unwrap(const T &x) { return x; }
0535 
0536 template <class T, size_t N, class V>
0537 Vc_INTRINSIC V unwrap(const SimdArray<T, N, V, N> &x)
0538 {
0539     return internal_data(x);
0540 }
0541 
0542 template <class T, size_t Pieces, size_t Index>
0543 Vc_INTRINSIC auto unwrap(const Common::Segment<T, Pieces, Index> &x)
0544     -> decltype(x.to_fixed_size())
0545 {
0546     return unwrap(x.to_fixed_size());
0547 }
0548 
0549 // gatherImplementation {{{2
0550 template <typename T, std::size_t N, typename VectorType>
0551 template <class MT, class IT, int Scale>
0552 Vc_INTRINSIC void SimdArray<T, N, VectorType, N>::gatherImplementation(
0553     const Common::GatherArguments<MT, IT, Scale> &args)
0554 {
0555     data.gather(Common::make_gather<Scale>(args.address, unwrap(args.indexes)));
0556 }
0557 template <typename T, std::size_t N, typename VectorType>
0558 template <class MT, class IT, int Scale>
0559 Vc_INTRINSIC void SimdArray<T, N, VectorType, N>::gatherImplementation(
0560     const Common::GatherArguments<MT, IT, Scale> &args, MaskArgument mask)
0561 {
0562     data.gather(Common::make_gather<Scale>(args.address, unwrap(args.indexes)),
0563                 mask);
0564 }
0565 
0566 // scatterImplementation {{{2
0567 template <typename T, std::size_t N, typename VectorType>
0568 template <typename MT, typename IT>
0569 inline void SimdArray<T, N, VectorType, N>::scatterImplementation(MT *mem,
0570                                                                  IT &&indexes) const
0571 {
0572     data.scatter(mem, unwrap(std::forward<IT>(indexes)));
0573 }
0574 template <typename T, std::size_t N, typename VectorType>
0575 template <typename MT, typename IT>
0576 inline void SimdArray<T, N, VectorType, N>::scatterImplementation(MT *mem,
0577                                                                  IT &&indexes,
0578                                                                  MaskArgument mask) const
0579 {
0580     data.scatter(mem, unwrap(std::forward<IT>(indexes)), mask);
0581 }
0582 
0583 // generic SimdArray {{{1
0584 /**
0585  * Data-parallel arithmetic type with user-defined number of elements.
0586  *
0587  * \tparam T The type of the vector's elements. The supported types currently are limited
0588  *           to the types supported by Vc::Vector<T>.
0589  *
0590  * \tparam N The number of elements to store and process concurrently. You can choose an
0591  *           arbitrary number, though not every number is a good idea.
0592  *           Generally, a power of two value or the sum of two power of two values might
0593  *           work efficiently, though this depends a lot on the target system.
0594  *
0595  * \tparam V Don't change the default value unless you really know what you are doing.
0596  *           This type is set to the underlying native Vc::Vector type used in the
0597  *           implementation of the type.
0598  *           Having it as part of the type name guards against some cases of ODR
0599  *           violations (i.e. linking incompatible translation units / libraries).
0600  *
0601  * \tparam Wt Don't ever change the default value.
0602  *           This parameter is an unfortunate implementation detail shining through.
0603  *
0604  * \warning Choosing \p N too large (what “too large” means depends on the target) will
0605  *          result in excessive compilation times and high (or too high) register
0606  *          pressure, thus potentially negating the improvement from concurrent execution.
0607  *          As a rule of thumb, keep \p N less or equal to `2 * float_v::size()`.
0608  *
0609  * \warning A special portability concern arises from a current limitation in the MIC
0610  *          implementation (Intel Knights Corner), where SimdArray types with \p T = \p
0611  *          (u)short require an \p N either less than short_v::size() or a multiple of
0612  *          short_v::size().
0613  *
0614  * \headerfile simdarray.h <Vc/SimdArray>
0615  */
0616 template <typename T, size_t N, typename V, size_t Wt> class SimdArray
0617 {
0618     static_assert(std::is_same<T,   double>::value ||
0619                   std::is_same<T,    float>::value ||
0620                   std::is_same<T,  int32_t>::value ||
0621                   std::is_same<T, uint32_t>::value ||
0622                   std::is_same<T,  int16_t>::value ||
0623                   std::is_same<T, uint16_t>::value, "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, int16_t, uint16_t }");
0624     static_assert(
0625         std::is_same<V, typename Common::select_best_vector_type<T, N>::type>::value &&
0626             V::size() == Wt,
0627         "ERROR: leave the third and fourth template parameters with their defaults. They "
0628         "are implementation details.");
0629     static_assert(
0630         // either the EntryType and VectorEntryType of the main V are equal
0631         std::is_same<typename V::EntryType, typename V::VectorEntryType>::value ||
0632             // or N is a multiple of V::size()
0633             (N % V::size() == 0),
0634         "SimdArray<(un)signed short, N> on MIC only works correctly for N = k * "
0635         "MIC::(u)short_v::size(), i.e. k * 16.");
0636 
0637     using my_traits = SimdArrayTraits<T, N>;
0638     static constexpr std::size_t N0 = my_traits::N0;
0639     static constexpr std::size_t N1 = my_traits::N1;
0640     using Split = Common::Split<N0>;
0641     template <typename U, std::size_t K> using CArray = U[K];
0642 
0643 public:
0644     static constexpr bool is_atomic = false;
0645     using storage_type0 = typename my_traits::storage_type0;
0646     using storage_type1 = typename my_traits::storage_type1;
0647     static_assert(storage_type0::size() == N0, "");
0648 
0649     /**\internal
0650      * This type reveals the implementation-specific type used for the data member.
0651      */
0652     using vector_type = V;
0653     using vectorentry_type = typename storage_type0::vectorentry_type;
0654     typedef vectorentry_type alias_type Vc_MAY_ALIAS;
0655 
0656     /// The type of the elements (i.e.\ \p T)
0657     using value_type = T;
0658 
0659     /// The type of the mask used for masked operations and returned from comparisons.
0660     using mask_type = fixed_size_simd_mask<T, N>;
0661 
0662     /// The type of the vector used for indexes in gather and scatter operations.
0663     using index_type = fixed_size_simd<int, N>;
0664 
0665     /**
0666      * Returns \p N, the number of scalar components in an object of this type.
0667      *
0668      * The size of the SimdArray, i.e. the number of scalar elements in the vector. In
0669      * contrast to Vector::size() you have control over this value via the \p N template
0670      * parameter of the SimdArray class template.
0671      *
0672      * \returns The number of scalar values stored and manipulated concurrently by objects
0673      * of this type.
0674      */
0675     static constexpr std::size_t size() { return N; }
0676 
0677     /// \copydoc mask_type
0678     using Mask = mask_type;
0679     /// \copydoc mask_type
0680     using MaskType = Mask;
0681     using MaskArgument = const MaskType &;
0682     using VectorEntryType = vectorentry_type;
0683     /// \copydoc value_type
0684     using EntryType = value_type;
0685     /// \copydoc index_type
0686     using IndexType = index_type;
0687     using AsArg = const SimdArray &;
0688 
0689     using reference = Detail::ElementReference<SimdArray>;
0690 
0691     ///\copydoc Vector::MemoryAlignment
0692     static constexpr std::size_t MemoryAlignment =
0693         storage_type0::MemoryAlignment > storage_type1::MemoryAlignment
0694             ? storage_type0::MemoryAlignment
0695             : storage_type1::MemoryAlignment;
0696 
0697     /// \name Generators
0698     ///@{
0699 
0700     ///\copybrief Vector::Zero
0701     static Vc_INTRINSIC fixed_size_simd<T, N> Zero()
0702     {
0703         return SimdArray(Vc::Zero);
0704     }
0705 
0706     ///\copybrief Vector::One
0707     static Vc_INTRINSIC fixed_size_simd<T, N> One()
0708     {
0709         return SimdArray(Vc::One);
0710     }
0711 
0712     ///\copybrief Vector::IndexesFromZero
0713     static Vc_INTRINSIC fixed_size_simd<T, N> IndexesFromZero()
0714     {
0715         return SimdArray(Vc::IndexesFromZero);
0716     }
0717 
0718     ///\copydoc Vector::Random
0719     static Vc_INTRINSIC fixed_size_simd<T, N> Random()
0720     {
0721         return fromOperation(Common::Operations::random());
0722     }
0723 
0724     template <class G, class = decltype(std::declval<G>()(std::size_t())),
0725               class = enable_if<!Traits::is_simd_vector<G>::value>>
0726     Vc_INTRINSIC SimdArray(const G &gen)
0727         : data0(gen), data1([&](std::size_t i) { return gen(i + storage_type0::size()); })
0728     {
0729     }
0730 
0731     ///\copybrief Vector::generate
0732     template <typename G> static Vc_INTRINSIC fixed_size_simd<T, N> generate(const G &gen) // {{{2
0733     {
0734         auto tmp = storage_type0::generate(gen);  // GCC bug: the order of evaluation in
0735                                                   // an initializer list is well-defined
0736                                                   // (front to back), but GCC 4.8 doesn't
0737                                                   // implement this correctly. Therefore
0738                                                   // we enforce correct order.
0739         return {std::move(tmp),
0740                 storage_type1::generate([&](std::size_t i) { return gen(i + N0); })};
0741     }
0742     ///@}
0743 
0744     /// \name Compile-Time Constant Initialization
0745     ///@{
0746 
0747     ///\copydoc Vector::Vector()
0748     SimdArray() = default;
0749     ///@}
0750 
0751     /// \name Conversion/Broadcast Constructors
0752     ///@{
0753 
0754     ///\copydoc Vector::Vector(EntryType)
0755     Vc_INTRINSIC SimdArray(value_type a) : data0(a), data1(a) {}
0756     template <
0757         typename U,
0758         typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
0759     SimdArray(U a)
0760         : SimdArray(static_cast<value_type>(a))
0761     {
0762     }
0763     ///@}
0764 
0765     // default copy ctor/operator
0766     SimdArray(const SimdArray &) = default;
0767     SimdArray(SimdArray &&) = default;
0768     SimdArray &operator=(const SimdArray &) = default;
0769 
0770     // load ctor
0771     template <typename U, typename Flags = DefaultLoadTag,
0772               typename = enable_if<std::is_arithmetic<U>::value &&
0773                                    Traits::is_load_store_flag<Flags>::value>>
0774     explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = {})
0775         : data0(mem, f), data1(mem + storage_type0::size(), f)
0776     {
0777     }
0778 
0779 // MSVC does overload resolution differently and takes the const U *mem overload (I hope)
0780 #ifndef Vc_MSVC
0781     /**\internal
0782      * Load from a C-array. This is basically the same function as the load constructor
0783      * above, except that the forwarding reference overload would steal the deal and the
0784      * constructor above doesn't get called. This overload is required to enable loads
0785      * from C-arrays.
0786      */
0787     template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
0788               typename = enable_if<std::is_arithmetic<U>::value &&
0789                                    Traits::is_load_store_flag<Flags>::value>>
0790     explicit Vc_INTRINSIC SimdArray(CArray<U, Extent> &mem, Flags f = {})
0791         : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
0792     {
0793     }
0794     /**\internal
0795      * Const overload of the above.
0796      */
0797     template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
0798               typename = enable_if<std::is_arithmetic<U>::value &&
0799                                    Traits::is_load_store_flag<Flags>::value>>
0800     explicit Vc_INTRINSIC SimdArray(const CArray<U, Extent> &mem, Flags f = {})
0801         : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
0802     {
0803     }
0804 #endif
0805 
0806     // initializer list
0807     Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
0808         : data0(init.begin(), Vc::Unaligned)
0809         , data1(init.begin() + storage_type0::size(), Vc::Unaligned)
0810     {
0811         Vc_ASSERT(init.size() == size());
0812     }
0813 
0814 #include "gatherinterface.h"
0815 #include "scatterinterface.h"
0816 
0817     explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerZero) : data0(), data1() {}
0818     explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerOne o) : data0(o), data1(o) {}
0819     explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerIndexesFromZero i)
0820         : data0(i)
0821         , data1(Common::AddOffset<VectorSpecialInitializerIndexesFromZero,
0822                                   storage_type0::size()>())
0823     {
0824     }
0825     template <size_t Offset>
0826     explicit Vc_INTRINSIC SimdArray(
0827         Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset> i)
0828         : data0(i)
0829         , data1(Common::AddOffset<VectorSpecialInitializerIndexesFromZero,
0830                                   storage_type0::size() + Offset>())
0831     {
0832     }
0833 
0834     // explicit casts
0835     template <class W, class = enable_if<
0836                            (Traits::is_simd_vector<W>::value &&
0837                             Traits::simd_vector_size<W>::value == N &&
0838                             !(std::is_convertible<Traits::entry_type_of<W>, T>::value &&
0839                               Traits::isSimdArray<W>::value))>>
0840     Vc_INTRINSIC explicit SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x))
0841     {
0842     }
0843 
0844     // implicit casts
0845     template <class W, class = enable_if<
0846                            (Traits::isSimdArray<W>::value &&
0847                             Traits::simd_vector_size<W>::value == N &&
0848                             std::is_convertible<Traits::entry_type_of<W>, T>::value)>,
0849               class = W>
0850     Vc_INTRINSIC SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x))
0851     {
0852     }
0853 
0854     template <class W, std::size_t Pieces, std::size_t Index>
0855     Vc_INTRINSIC SimdArray(Common::Segment<W, Pieces, Index> &&x)
0856         : data0(Common::Segment<W, 2 * Pieces, 2 * Index>{x.data})
0857         , data1(Common::Segment<W, 2 * Pieces, 2 * Index + 1>{x.data})
0858     {
0859     }
0860 
0861     // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
0862     // T implicitly convertible to U
0863     template <typename U, typename A,
0864               typename =
0865                   enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N &&
0866                             !std::is_same<A, simd_abi::fixed_size<N>>::value>>
0867     operator Vector<U, A>() const
0868     {
0869         auto r = simd_cast<Vector<U, A>>(data0, data1);
0870         return r;
0871     }
0872     Vc_INTRINSIC operator fixed_size_simd<T, N> &()
0873     {
0874         return static_cast<fixed_size_simd<T, N> &>(*this);
0875     }
0876     Vc_INTRINSIC operator const fixed_size_simd<T, N> &() const
0877     {
0878         return static_cast<const fixed_size_simd<T, N> &>(*this);
0879     }
0880 
0881     //////////////////// other functions ///////////////
0882 
0883     Vc_INTRINSIC void setZero()
0884     {
0885         data0.setZero();
0886         data1.setZero();
0887     }
0888     Vc_INTRINSIC void setZero(const mask_type &k)
0889     {
0890         data0.setZero(Split::lo(k));
0891         data1.setZero(Split::hi(k));
0892     }
0893     Vc_INTRINSIC void setZeroInverted()
0894     {
0895         data0.setZeroInverted();
0896         data1.setZeroInverted();
0897     }
0898     Vc_INTRINSIC void setZeroInverted(const mask_type &k)
0899     {
0900         data0.setZeroInverted(Split::lo(k));
0901         data1.setZeroInverted(Split::hi(k));
0902     }
0903 
0904 
0905     Vc_INTRINSIC void setQnan() {
0906         data0.setQnan();
0907         data1.setQnan();
0908     }
0909     Vc_INTRINSIC void setQnan(const mask_type &m) {
0910         data0.setQnan(Split::lo(m));
0911         data1.setQnan(Split::hi(m));
0912     }
0913 
0914     ///\internal execute specified Operation
0915     template <typename Op, typename... Args>
0916     static Vc_INTRINSIC fixed_size_simd<T, N> fromOperation(Op op, Args &&... args)
0917     {
0918         fixed_size_simd<T, N> r = {
0919             storage_type0::fromOperation(op, Split::lo(args)...),  // no forward here - it
0920                                                                    // could move and thus
0921                                                                    // break the next line
0922             storage_type1::fromOperation(op, Split::hi(std::forward<Args>(args))...)};
0923         return r;
0924     }
0925 
0926     ///\internal
0927     template <typename Op, typename... Args>
0928     static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
0929     {
0930         storage_type0::callOperation(op, Split::lo(args)...);
0931         storage_type1::callOperation(op, Split::hi(std::forward<Args>(args))...);
0932     }
0933 
0934 
0935     template <typename U, typename... Args> Vc_INTRINSIC void load(const U *mem, Args &&... args)
0936     {
0937         data0.load(mem, Split::lo(args)...);  // no forward here - it could move and thus
0938                                               // break the next line
0939         data1.load(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
0940     }
0941 
0942     template <typename U, typename... Args> Vc_INTRINSIC void store(U *mem, Args &&... args) const
0943     {
0944         data0.store(mem, Split::lo(args)...);  // no forward here - it could move and thus
0945                                                // break the next line
0946         data1.store(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
0947     }
0948 
0949     Vc_INTRINSIC mask_type operator!() const
0950     {
0951         return {!data0, !data1};
0952     }
0953 
0954     Vc_INTRINSIC fixed_size_simd<T, N> operator-() const
0955     {
0956         return {-data0, -data1};
0957     }
0958 
0959     /// Returns a copy of itself
0960     Vc_INTRINSIC fixed_size_simd<T, N> operator+() const { return *this; }
0961 
0962     Vc_INTRINSIC fixed_size_simd<T, N> operator~() const
0963     {
0964         return {~data0, ~data1};
0965     }
0966 
0967     // left/right shift operators {{{2
0968     template <typename U,
0969               typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
0970     Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator<<(U x) const
0971     {
0972         return {data0 << x, data1 << x};
0973     }
0974     template <typename U,
0975               typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
0976     Vc_INTRINSIC fixed_size_simd<T, N> &operator<<=(U x)
0977     {
0978         data0 <<= x;
0979         data1 <<= x;
0980         return *this;
0981     }
0982     template <typename U,
0983               typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
0984     Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator>>(U x) const
0985     {
0986         return {data0 >> x, data1 >> x};
0987     }
0988     template <typename U,
0989               typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
0990     Vc_INTRINSIC fixed_size_simd<T, N> &operator>>=(U x)
0991     {
0992         data0 >>= x;
0993         data1 >>= x;
0994         return *this;
0995     }
0996 
0997     // binary operators {{{2
0998 #define Vc_BINARY_OPERATOR_(op)                                                          \
0999     Vc_INTRINSIC fixed_size_simd<T, N> &operator op##=(const SimdArray &rhs)             \
1000     {                                                                                    \
1001         data0 op## = rhs.data0;                                                          \
1002         data1 op## = rhs.data1;                                                          \
1003         return *this;                                                                    \
1004     }
1005     Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
1006     Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
1007     Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
1008 #undef Vc_BINARY_OPERATOR_
1009 
1010     // operator[] {{{2
1011     /// \name Scalar Subscript Operators
1012     ///@{
1013 
1014 private:
1015     friend reference;
1016     Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
1017     {
1018         return reinterpret_cast<const alias_type *>(&o)[i];
1019     }
1020     template <typename U>
1021     Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
1022         noexcept(std::declval<value_type &>() = v))
1023     {
1024         reinterpret_cast<alias_type *>(&o)[i] = v;
1025     }
1026 
1027 public:
1028     ///\copydoc Vector::operator[](size_t)
1029     /**
1030      * \note the returned object models the concept of a reference and
1031      * as such it can exist longer than the data it is referencing.
1032      * \note to avoid lifetime issues, we strongly advice not to store
1033      * any reference objects.
1034      */
1035     Vc_INTRINSIC reference operator[](size_t i) noexcept
1036     {
1037         static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
1038         return {*this, int(i)};
1039     }
1040 
1041     ///\copydoc Vector::operator[](size_t) const
1042     Vc_INTRINSIC value_type operator[](size_t index) const noexcept
1043     {
1044         return get(*this, int(index));
1045     }
1046     ///@}
1047 
1048     // operator(){{{2
1049     ///\copydoc Vector::operator()(MaskType)
1050     Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(
1051         const mask_type &mask)
1052     {
1053         return {*this, mask};
1054     }
1055 
1056     ///\internal
1057     Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k) //{{{2
1058     {
1059         data0.assign(v.data0, internal_data0(k));
1060         data1.assign(v.data1, internal_data1(k));
1061     }
1062 
1063     // reductions {{{2
1064 #define Vc_REDUCTION_FUNCTION_(name_, binary_fun_, scalar_fun_)                          \
1065 private:                                                                                 \
1066     template <typename ForSfinae = void>                                                 \
1067     Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value &&                       \
1068                                storage_type0::Size == storage_type1::Size,           \
1069                            value_type> name_##_impl() const                              \
1070     {                                                                                    \
1071         return binary_fun_(data0, data1).name_();                                        \
1072     }                                                                                    \
1073                                                                                          \
1074     template <typename ForSfinae = void>                                                 \
1075     Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value &&                       \
1076                                storage_type0::Size != storage_type1::Size,           \
1077                            value_type> name_##_impl() const                              \
1078     {                                                                                    \
1079         return scalar_fun_(data0.name_(), data1.name_());                                \
1080     }                                                                                    \
1081                                                                                          \
1082 public:                                                                                  \
1083     /**\copybrief Vector::##name_ */                                                     \
1084     Vc_INTRINSIC value_type name_() const { return name_##_impl(); }                     \
1085     /**\copybrief Vector::##name_ */                                                     \
1086     Vc_INTRINSIC value_type name_(const mask_type &mask) const                           \
1087     {                                                                                    \
1088         if (Vc_IS_UNLIKELY(Split::lo(mask).isEmpty())) {                                 \
1089             return data1.name_(Split::hi(mask));                                         \
1090         } else if (Vc_IS_UNLIKELY(Split::hi(mask).isEmpty())) {                          \
1091             return data0.name_(Split::lo(mask));                                         \
1092         } else {                                                                         \
1093             return scalar_fun_(data0.name_(Split::lo(mask)),                             \
1094                                data1.name_(Split::hi(mask)));                            \
1095         }                                                                                \
1096     }                                                                                    \
1097     Vc_NOTHING_EXPECTING_SEMICOLON
1098     Vc_REDUCTION_FUNCTION_(min, Vc::min, std::min);
1099     Vc_REDUCTION_FUNCTION_(max, Vc::max, std::max);
1100     Vc_REDUCTION_FUNCTION_(product, internal::product_helper_, internal::product_helper_);
1101     Vc_REDUCTION_FUNCTION_(sum, internal::sum_helper_, internal::sum_helper_);
1102 #undef Vc_REDUCTION_FUNCTION_
1103     ///\copybrief Vector::partialSum
1104     Vc_INTRINSIC Vc_PURE fixed_size_simd<T, N> partialSum() const //{{{2
1105     {
1106         auto ps0 = data0.partialSum();
1107         auto tmp = data1;
1108         tmp[0] += ps0[data0.size() - 1];
1109         return {std::move(ps0), tmp.partialSum()};
1110     }
1111 
1112     // apply {{{2
1113     ///\copybrief Vector::apply(F &&) const
1114     template <typename F> inline fixed_size_simd<T, N> apply(F &&f) const
1115     {
1116         return {data0.apply(f), data1.apply(f)};
1117     }
1118     ///\copybrief Vector::apply(F &&, MaskType) const
1119     template <typename F>
1120     inline fixed_size_simd<T, N> apply(F &&f, const mask_type &k) const
1121     {
1122         return {data0.apply(f, Split::lo(k)), data1.apply(f, Split::hi(k))};
1123     }
1124 
1125     // shifted {{{2
1126     ///\copybrief Vector::shifted(int) const
1127     inline fixed_size_simd<T, N> shifted(int amount) const
1128     {
1129         constexpr int SSize = Size;
1130         constexpr int SSize0 = storage_type0::Size;
1131         constexpr int SSize1 = storage_type1::Size;
1132         if (amount == 0) {
1133             return *this;
1134         }
1135         if (amount < 0) {
1136             if (amount > -SSize0) {
1137                 return {data0.shifted(amount), data1.shifted(amount, data0)};
1138             }
1139             if (amount == -SSize0) {
1140                 return {storage_type0(0), simd_cast<storage_type1>(data0)};
1141             }
1142             if (amount < -SSize0) {
1143                 return {storage_type0(0), simd_cast<storage_type1>(data0.shifted(
1144                                                    amount + SSize0))};
1145             }
1146             return Zero();
1147         } else {
1148             if (amount >= SSize) {
1149                 return Zero();
1150             } else if (amount >= SSize0) {
1151                 return {
1152                     simd_cast<storage_type0>(data1).shifted(amount - SSize0),
1153                     storage_type1(0)};
1154             } else if (amount >= SSize1) {
1155                 return {data0.shifted(amount, data1), storage_type1(0)};
1156             } else {
1157                 return {data0.shifted(amount, data1), data1.shifted(amount)};
1158             }
1159         }
1160     }
1161 
1162     template <std::size_t NN>
1163     inline enable_if<
1164         !(std::is_same<storage_type0, storage_type1>::value &&  // not bisectable
1165           N == NN),
1166         fixed_size_simd<T, N>>
1167         shifted(int amount, const SimdArray<value_type, NN> &shiftIn) const
1168     {
1169         constexpr int SSize = Size;
1170         if (amount < 0) {
1171             return fixed_size_simd<T, N>([&](int i) -> value_type {
1172                 i += amount;
1173                 if (i >= 0) {
1174                     return operator[](i);
1175                 } else if (i >= -SSize) {
1176                     return shiftIn[i + SSize];
1177                 }
1178                 return 0;
1179             });
1180         }
1181         return fixed_size_simd<T, N>([&](int i) -> value_type {
1182             i += amount;
1183             if (i < SSize) {
1184                 return operator[](i);
1185             } else if (i < 2 * SSize) {
1186                 return shiftIn[i - SSize];
1187             }
1188             return 0;
1189         });
1190     }
1191 
1192 private:
1193     // workaround for MSVC not understanding the simpler and shorter expression of the boolean
1194     // expression directly in the enable_if below
1195     template <std::size_t NN> struct bisectable_shift
1196         : public std::integral_constant<bool,
1197                                         std::is_same<storage_type0, storage_type1>::value &&  // bisectable
1198                                         N == NN>
1199     {
1200     };
1201 
1202 public:
1203     template <std::size_t NN>
1204     inline fixed_size_simd<T, N> shifted(
1205         enable_if<bisectable_shift<NN>::value, int> amount,
1206         const SimdArray<value_type, NN> &shiftIn) const
1207     {
1208         constexpr int SSize = Size;
1209         if (amount < 0) {
1210             if (amount > -static_cast<int>(storage_type0::Size)) {
1211                 return {data0.shifted(amount, internal_data1(shiftIn)),
1212                         data1.shifted(amount, data0)};
1213             }
1214             if (amount == -static_cast<int>(storage_type0::Size)) {
1215                 return {storage_type0(internal_data1(shiftIn)), storage_type1(data0)};
1216             }
1217             if (amount > -SSize) {
1218                 return {
1219                     internal_data1(shiftIn)
1220                         .shifted(amount + static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1221                     data0.shifted(amount + static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1222             }
1223             if (amount == -SSize) {
1224                 return shiftIn;
1225             }
1226             if (amount > -2 * SSize) {
1227                 return shiftIn.shifted(amount + SSize);
1228             }
1229         }
1230         if (amount == 0) {
1231             return *this;
1232         }
1233         if (amount < static_cast<int>(storage_type0::Size)) {
1234             return {data0.shifted(amount, data1),
1235                     data1.shifted(amount, internal_data0(shiftIn))};
1236         }
1237         if (amount == static_cast<int>(storage_type0::Size)) {
1238             return {storage_type0(data1), storage_type1(internal_data0(shiftIn))};
1239         }
1240         if (amount < SSize) {
1241             return {data1.shifted(amount - static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1242                     internal_data0(shiftIn)
1243                         .shifted(amount - static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1244         }
1245         if (amount == SSize) {
1246             return shiftIn;
1247         }
1248         if (amount < 2 * SSize) {
1249             return shiftIn.shifted(amount - SSize);
1250         }
1251         return Zero();
1252     }
1253 
1254     // rotated {{{2
1255     ///\copybrief Vector::rotated
1256     Vc_INTRINSIC fixed_size_simd<T, N> rotated(int amount) const
1257     {
1258         amount %= int(size());
1259         if (amount == 0) {
1260             return *this;
1261         } else if (amount < 0) {
1262             amount += size();
1263         }
1264 
1265 #ifdef Vc_MSVC
1266         // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use store
1267         // ->
1268         // load to implement the function instead.
1269         alignas(MemoryAlignment) T tmp[N + data0.size()];
1270         data0.store(&tmp[0], Vc::Aligned);
1271         data1.store(&tmp[data0.size()], Vc::Aligned);
1272         data0.store(&tmp[N], Vc::Unaligned);
1273         fixed_size_simd<T, N> r;
1274         r.data0.load(&tmp[amount], Vc::Unaligned);
1275         r.data1.load(&tmp[(amount + data0.size()) % size()], Vc::Unaligned);
1276         return r;
1277 #else
1278         auto &&d0cvtd = simd_cast<storage_type1>(data0);
1279         auto &&d1cvtd = simd_cast<storage_type0>(data1);
1280         constexpr int size0 = storage_type0::size();
1281         constexpr int size1 = storage_type1::size();
1282 
1283         if (amount == size0 && std::is_same<storage_type0, storage_type1>::value) {
1284             return {std::move(d1cvtd), std::move(d0cvtd)};
1285         } else if (amount < size1) {
1286             return {data0.shifted(amount, d1cvtd), data1.shifted(amount, d0cvtd)};
1287         } else if (amount == size1) {
1288             return {data0.shifted(amount, d1cvtd), std::move(d0cvtd)};
1289         } else if (int(size()) - amount < size1) {
1290             return {data0.shifted(amount - int(size()), d1cvtd.shifted(size1 - size0)),
1291                     data1.shifted(amount - int(size()), data0.shifted(size0 - size1))};
1292         } else if (int(size()) - amount == size1) {
1293             return {data0.shifted(-size1, d1cvtd.shifted(size1 - size0)),
1294                     simd_cast<storage_type1>(data0.shifted(size0 - size1))};
1295         } else if (amount <= size0) {
1296             return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1297                     simd_cast<storage_type1>(data0.shifted(amount - size1))};
1298         } else {
1299             return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1300                     simd_cast<storage_type1>(data0.shifted(amount - size1, d1cvtd))};
1301         }
1302         return *this;
1303 #endif
1304     }
1305 
1306     // interleaveLow/-High {{{2
1307     ///\internal \copydoc Vector::interleaveLow
1308     Vc_INTRINSIC fixed_size_simd<T, N> interleaveLow(const SimdArray &x) const
1309     {
1310         // return data0[0], x.data0[0], data0[1], x.data0[1], ...
1311         return {data0.interleaveLow(x.data0),
1312                 simd_cast<storage_type1>(data0.interleaveHigh(x.data0))};
1313     }
1314     ///\internal \copydoc Vector::interleaveHigh
1315     Vc_INTRINSIC fixed_size_simd<T, N> interleaveHigh(const SimdArray &x) const
1316     {
1317         return interleaveHighImpl(
1318             x,
1319             std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1320     }
1321 
1322 private:
1323     ///\internal
1324     Vc_INTRINSIC fixed_size_simd<T, N> interleaveHighImpl(const SimdArray &x, std::true_type) const
1325     {
1326         return {data1.interleaveLow(x.data1), data1.interleaveHigh(x.data1)};
1327     }
1328     ///\internal
1329     inline fixed_size_simd<T, N> interleaveHighImpl(const SimdArray &x, std::false_type) const
1330     {
1331         return {data0.interleaveHigh(x.data0)
1332                     .shifted(storage_type1::Size,
1333                              simd_cast<storage_type0>(data1.interleaveLow(x.data1))),
1334                 data1.interleaveHigh(x.data1)};
1335     }
1336 
1337 public:
1338     ///\copybrief Vector::reversed
1339     inline fixed_size_simd<T, N> reversed() const //{{{2
1340     {
1341         if (std::is_same<storage_type0, storage_type1>::value) {
1342             return {simd_cast<storage_type0>(data1).reversed(),
1343                     simd_cast<storage_type1>(data0).reversed()};
1344         } else {
1345 #ifdef Vc_MSVC
1346             // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use
1347             // store
1348             // -> load to implement the function instead.
1349             alignas(MemoryAlignment) T tmp[N];
1350             data1.reversed().store(&tmp[0], Vc::Aligned);
1351             data0.reversed().store(&tmp[data1.size()], Vc::Unaligned);
1352             return fixed_size_simd<T, N>{&tmp[0], Vc::Aligned};
1353 #else
1354             return {data0.shifted(storage_type1::Size, data1).reversed(),
1355                     simd_cast<storage_type1>(data0.reversed().shifted(
1356                         storage_type0::Size - storage_type1::Size))};
1357 #endif
1358         }
1359     }
1360     ///\copydoc Vector::sorted
1361     inline fixed_size_simd<T, N> sorted() const  //{{{2
1362     {
1363         return sortedImpl(
1364             std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1365     }
1366 
1367     ///\internal
1368     Vc_INTRINSIC fixed_size_simd<T, N> sortedImpl(std::true_type) const
1369     {
1370 #ifdef Vc_DEBUG_SORTED
1371         std::cerr << "-- " << data0 << data1 << '\n';
1372 #endif
1373         const auto a = data0.sorted();
1374         const auto b = data1.sorted().reversed();
1375         const auto lo = Vc::min(a, b);
1376         const auto hi = Vc::max(a, b);
1377         return {lo.sorted(), hi.sorted()};
1378     }
1379 
1380     ///\internal
1381     Vc_INTRINSIC fixed_size_simd<T, N> sortedImpl(std::false_type) const
1382     {
1383         using SortableArray =
1384             fixed_size_simd<value_type, Common::NextPowerOfTwo<size()>::value>;
1385         auto sortable = simd_cast<SortableArray>(*this);
1386         for (std::size_t i = Size; i < SortableArray::Size; ++i) {
1387             using limits = std::numeric_limits<value_type>;
1388             if (limits::has_infinity) {
1389                 sortable[i] = limits::infinity();
1390             } else {
1391                 sortable[i] = std::numeric_limits<value_type>::max();
1392             }
1393         }
1394         return simd_cast<fixed_size_simd<T, N>>(sortable.sorted());
1395 
1396         /* The following implementation appears to be less efficient. But this may need further
1397          * work.
1398         const auto a = data0.sorted();
1399         const auto b = data1.sorted();
1400 #ifdef Vc_DEBUG_SORTED
1401         std::cerr << "== " << a << b << '\n';
1402 #endif
1403         auto aIt = Vc::begin(a);
1404         auto bIt = Vc::begin(b);
1405         const auto aEnd = Vc::end(a);
1406         const auto bEnd = Vc::end(b);
1407         return SimdArray::generate([&](std::size_t) {
1408             if (aIt == aEnd) {
1409                 return *(bIt++);
1410             }
1411             if (bIt == bEnd) {
1412                 return *(aIt++);
1413             }
1414             if (*aIt < *bIt) {
1415                 return *(aIt++);
1416             } else {
1417                 return *(bIt++);
1418             }
1419         });
1420         */
1421     }
1422 
1423     /// \name Deprecated Members
1424     ///@{
1425 
1426     ///\copydoc size
1427     ///\deprecated Use size() instead.
1428     static constexpr std::size_t Size = size();
1429 
1430     /// \copydoc Vector::exponent
1431     Vc_DEPRECATED("use exponent(x) instead")
1432         Vc_INTRINSIC fixed_size_simd<T, N> exponent() const
1433     {
1434         return {exponent(data0), exponent(data1)};
1435     }
1436 
1437     /// \copydoc Vector::isNegative
1438     Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
1439     {
1440         return {isnegative(data0), isnegative(data1)};
1441     }
1442 
1443     ///\copydoc Vector::copySign
1444     Vc_DEPRECATED("use copysign(x, y) instead")
1445         Vc_INTRINSIC fixed_size_simd<T, N> copySign(const SimdArray &x) const
1446     {
1447         return {Vc::copysign(data0, x.data0),
1448                 Vc::copysign(data1, x.data1)};
1449     }
1450     ///@}
1451 
1452     // internal_data0/1 {{{2
1453     friend storage_type0 &internal_data0<>(SimdArray &x);
1454     friend storage_type1 &internal_data1<>(SimdArray &x);
1455     friend const storage_type0 &internal_data0<>(const SimdArray &x);
1456     friend const storage_type1 &internal_data1<>(const SimdArray &x);
1457 
1458     /// \internal
1459     Vc_INTRINSIC SimdArray(storage_type0 &&x, storage_type1 &&y) //{{{2
1460         : data0(std::move(x)), data1(std::move(y))
1461     {
1462     }
1463 
1464     Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type0));
1465 
1466 private: //{{{2
1467     // The alignas attribute attached to the class declaration above is ignored by ICC
1468     // 17.0.0 (at least). So just move the alignas attribute down here where it works for
1469     // all compilers.
1470     alignas(static_cast<std::size_t>(
1471         Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(V) /
1472                                  V::size()>::value)) storage_type0 data0;
1473     storage_type1 data1;
1474 };
1475 #undef Vc_CURRENT_CLASS_NAME
1476 template <typename T, std::size_t N, typename V, std::size_t M>
1477 constexpr std::size_t SimdArray<T, N, V, M>::Size;
1478 template <typename T, std::size_t N, typename V, std::size_t M>
1479 constexpr std::size_t SimdArray<T, N, V, M>::MemoryAlignment;
1480 
1481 // gatherImplementation {{{2
1482 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1483 template <class MT, class IT, int Scale>
1484 inline void SimdArray<T, N, VectorType, M>::gatherImplementation(
1485     const Common::GatherArguments<MT, IT, Scale> &args)
1486 {
1487     data0.gather(Common::make_gather<Scale>(
1488         args.address, Split::lo(Common::Operations::gather(), args.indexes)));
1489     data1.gather(Common::make_gather<Scale>(
1490         args.address, Split::hi(Common::Operations::gather(), args.indexes)));
1491 }
1492 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1493 template <class MT, class IT, int Scale>
1494 inline void SimdArray<T, N, VectorType, M>::gatherImplementation(
1495     const Common::GatherArguments<MT, IT, Scale> &args, MaskArgument mask)
1496 {
1497     data0.gather(Common::make_gather<Scale>(
1498                      args.address, Split::lo(Common::Operations::gather(), args.indexes)),
1499                  Split::lo(mask));
1500     data1.gather(Common::make_gather<Scale>(
1501                      args.address, Split::hi(Common::Operations::gather(), args.indexes)),
1502                  Split::hi(mask));
1503 }
1504 
1505 // scatterImplementation {{{2
1506 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1507 template <typename MT, typename IT>
1508 inline void SimdArray<T, N, VectorType, M>::scatterImplementation(MT *mem,
1509                                                                  IT &&indexes) const
1510 {
1511     data0.scatter(mem, Split::lo(Common::Operations::gather(),
1512                                 indexes));  // don't forward indexes - it could move and
1513                                             // thus break the next line
1514     data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)));
1515 }
1516 template <typename T, std::size_t N, typename VectorType, std::size_t M>
1517 template <typename MT, typename IT>
1518 inline void SimdArray<T, N, VectorType, M>::scatterImplementation(MT *mem,
1519                                                                  IT &&indexes, MaskArgument mask) const
1520 {
1521     data0.scatter(mem, Split::lo(Common::Operations::gather(), indexes),
1522                  Split::lo(mask));  // don't forward indexes - it could move and
1523                                     // thus break the next line
1524     data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)),
1525                  Split::hi(mask));
1526 }
1527 
1528 // internal_data0/1 (SimdArray) {{{1
1529 ///\internal Returns the first data member of a generic SimdArray
1530 template <typename T, std::size_t N, typename V, std::size_t M>
1531 #ifndef Vc_MSVC
1532 Vc_INTRINSIC
1533 #endif
1534 typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1535     SimdArray<T, N, V, M> &x)
1536 {
1537     return x.data0;
1538 }
1539 ///\internal Returns the second data member of a generic SimdArray
1540 template <typename T, std::size_t N, typename V, std::size_t M>
1541 #ifndef Vc_MSVC
1542 Vc_INTRINSIC
1543 #endif
1544 typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1545     SimdArray<T, N, V, M> &x)
1546 {
1547     return x.data1;
1548 }
1549 ///\internal Returns the first data member of a generic SimdArray (const overload)
1550 template <typename T, std::size_t N, typename V, std::size_t M>
1551 #ifndef Vc_MSVC
1552 Vc_INTRINSIC
1553 #endif
1554 const typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1555     const SimdArray<T, N, V, M> &x)
1556 {
1557     return x.data0;
1558 }
1559 ///\internal Returns the second data member of a generic SimdArray (const overload)
1560 template <typename T, std::size_t N, typename V, std::size_t M>
1561 #ifndef Vc_MSVC
1562 Vc_INTRINSIC
1563 #endif
1564 const typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1565     const SimdArray<T, N, V, M> &x)
1566 {
1567     return x.data1;
1568 }
1569 
1570 // MSVC workaround for SimdArray(storage_type0, storage_type1) ctor{{{1
1571 // MSVC sometimes stores x to data1. By first broadcasting 0 and then assigning y
1572 // in the body the bug is supressed.
1573 #if defined Vc_MSVC && defined Vc_IMPL_SSE && !defined Vc_IMPL_AVX
1574 template <>
1575 Vc_INTRINSIC SimdArray<double, 8>::SimdArray(fixed_size_simd<double, 4> &&x,
1576                                              fixed_size_simd<double, 4> &&y)
1577     : data0(x), data1(0)
1578 {
1579     data1 = y;
1580 }
1581 #endif
1582 
1583 // binary operators {{{
1584 namespace Detail
1585 {
1586 #define Vc_FIXED_OP(op)                                                                  \
1587     template <class T, int N,                                                            \
1588               class = typename std::enable_if<fixed_size_simd<T, N>::is_atomic>::type>   \
1589     Vc_INTRINSIC fixed_size_simd<T, N> operator op(const fixed_size_simd<T, N> &a,       \
1590                                                    const fixed_size_simd<T, N> &b)       \
1591     {                                                                                    \
1592         return {private_init, internal_data(a) op internal_data(b)};                     \
1593     }                                                                                    \
1594     template <class T, int N,                                                            \
1595               class = typename std::enable_if<!fixed_size_simd<T, N>::is_atomic>::type,  \
1596               class = T>                                                                 \
1597     Vc_INTRINSIC fixed_size_simd<T, N> operator op(const fixed_size_simd<T, N> &a,       \
1598                                                    const fixed_size_simd<T, N> &b)       \
1599     {                                                                                    \
1600         return {internal_data0(a) op internal_data0(b),                                  \
1601                 internal_data1(a) op internal_data1(b)};                                 \
1602     }
1603 Vc_ALL_ARITHMETICS(Vc_FIXED_OP);
1604 Vc_ALL_BINARY(Vc_FIXED_OP);
1605 Vc_ALL_SHIFTS(Vc_FIXED_OP);
1606 #undef Vc_FIXED_OP
1607 #define Vc_FIXED_OP(op)                                                                  \
1608     template <class T, int N,                                                            \
1609               class = typename std::enable_if<fixed_size_simd<T, N>::is_atomic>::type>   \
1610     Vc_INTRINSIC fixed_size_simd_mask<T, N> operator op(const fixed_size_simd<T, N> &a,  \
1611                                                         const fixed_size_simd<T, N> &b)  \
1612     {                                                                                    \
1613         return {private_init, internal_data(a) op internal_data(b)};                     \
1614     }                                                                                    \
1615     template <class T, int N,                                                            \
1616               class = typename std::enable_if<!fixed_size_simd<T, N>::is_atomic>::type,  \
1617               class = T>                                                                 \
1618     Vc_INTRINSIC fixed_size_simd_mask<T, N> operator op(const fixed_size_simd<T, N> &a,  \
1619                                                         const fixed_size_simd<T, N> &b)  \
1620     {                                                                                    \
1621         return {internal_data0(a) op internal_data0(b),                                  \
1622                 internal_data1(a) op internal_data1(b)};                                 \
1623     }
1624 Vc_ALL_COMPARES(Vc_FIXED_OP);
1625 #undef Vc_FIXED_OP
1626 }  // namespace Detail
1627 
1628 // }}}
1629 // binary operators {{{1
1630 namespace result_vector_type_internal
1631 {
1632 template <typename T>
1633 using remove_cvref = typename std::remove_cv<typename std::remove_reference<T>::type>::type;
1634 
1635 template <typename T>
1636 using is_integer_larger_than_int = std::integral_constant<
1637     bool, std::is_integral<T>::value &&(sizeof(T) > sizeof(int) ||
1638                                         std::is_same<T, long>::value ||
1639                                         std::is_same<T, unsigned long>::value)>;
1640 
1641 template <
1642     typename L, typename R,
1643     std::size_t N = Traits::isSimdArray<L>::value ? Traits::simd_vector_size<L>::value
1644                                                   : Traits::simd_vector_size<R>::value,
1645     bool = (Traits::isSimdArray<L>::value ||
1646             Traits::isSimdArray<R>::value) &&  // one of the operands must be a SimdArray
1647            !(Traits::is_fixed_size_simd<L>::value &&        // if both are fixed_size, use
1648              Traits::is_fixed_size_simd<R>::value) &&       // common/operators.h
1649            ((std::is_arithmetic<remove_cvref<L>>::value &&  // one of the operands is a
1650              !is_integer_larger_than_int<remove_cvref<L>>::value) ||  // scalar type
1651             (std::is_arithmetic<remove_cvref<R>>::value &&
1652              !is_integer_larger_than_int<remove_cvref<R>>::value) ||
1653             // or one of the operands is Vector<T> with Vector<T>::size() ==
1654             // SimdArray::size()
1655             Traits::simd_vector_size<L>::value == Traits::simd_vector_size<R>::value)>
1656 struct evaluate;
1657 
1658 template <typename L, typename R, std::size_t N> struct evaluate<L, R, N, true>
1659 {
1660 private:
1661     using LScalar = Traits::entry_type_of<L>;
1662     using RScalar = Traits::entry_type_of<R>;
1663 
1664     template <bool B, typename T, typename F>
1665     using conditional = typename std::conditional<B, T, F>::type;
1666 
1667 public:
1668     // In principle we want the exact same rules for SimdArray<T> ⨉ SimdArray<U> as the standard
1669     // defines for T ⨉ U. BUT: short ⨉ short returns int (because all integral types smaller than
1670     // int are promoted to int before any operation). This would imply that SIMD types with integral
1671     // types smaller than int are more or less useless - and you could use SimdArray<int> from the
1672     // start. Therefore we special-case those operations where the scalar type of both operands is
1673     // integral and smaller than int.
1674     // In addition, there is no generic support for 64-bit int SIMD types. Therefore
1675     // promotion to a 64-bit integral type (including `long` because it can potentially have 64
1676     // bits) also is not done. But if one of the operands is a scalar type that is larger than int
1677     // then the operator is disabled altogether. We do not want an implicit demotion.
1678     using type = fixed_size_simd<
1679         conditional<(std::is_integral<LScalar>::value &&std::is_integral<RScalar>::value &&
1680                      sizeof(LScalar) < sizeof(int) &&
1681                      sizeof(RScalar) < sizeof(int)),
1682                     conditional<(sizeof(LScalar) == sizeof(RScalar)),
1683                                 conditional<std::is_unsigned<LScalar>::value, LScalar, RScalar>,
1684                                 conditional<(sizeof(LScalar) > sizeof(RScalar)), LScalar, RScalar>>,
1685                     decltype(std::declval<LScalar>() + std::declval<RScalar>())>,
1686         N>;
1687 };
1688 
1689 }  // namespace result_vector_type_internal
1690 
1691 template <typename L, typename R>
1692 using result_vector_type = typename result_vector_type_internal::evaluate<L, R>::type;
1693 
1694 #define Vc_BINARY_OPERATORS_(op_)                                                        \
1695     /*!\brief Applies op_ component-wise and concurrently.  */                           \
1696     template <typename L, typename R>                                                    \
1697     Vc_INTRINSIC result_vector_type<L, R> operator op_(L &&lhs, R &&rhs)                 \
1698     {                                                                                    \
1699         using Return = result_vector_type<L, R>;                                         \
1700         return Vc::Detail::operator op_(                                                 \
1701             static_cast<const Return &>(std::forward<L>(lhs)),                           \
1702             static_cast<const Return &>(std::forward<R>(rhs)));                          \
1703     }
1704 /**
1705  * \name Arithmetic and Bitwise Operators
1706  *
1707  * Applies the operator component-wise and concurrently on \p lhs and \p rhs and returns
1708  * a new SimdArray object containing the result values.
1709  *
1710  * This operator only participates in overload resolution if:
1711  * \li At least one of the template parameters \p L or \p R is a SimdArray type.
1712  * \li Either \p L or \p R is a fundamental arithmetic type but not an integral type
1713  *     larger than \c int \n
1714  *     or \n
1715  *     \p L or \p R is a Vc::Vector type with equal number of elements (Vector::size() ==
1716  *     SimdArray::size()).
1717  *
1718  * The return type of the operator is a SimdArray type using the more precise EntryType of
1719  * \p L or \p R and the same number of elements as the SimdArray argument(s).
1720  */
1721 ///@{
1722 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATORS_);
1723 Vc_ALL_BINARY(Vc_BINARY_OPERATORS_);
1724 ///@}
1725 #undef Vc_BINARY_OPERATORS_
1726 #define Vc_BINARY_OPERATORS_(op_)                                                        \
1727     /*!\brief Applies op_ component-wise and concurrently.  */                           \
1728     template <typename L, typename R>                                                    \
1729     Vc_INTRINSIC typename result_vector_type<L, R>::mask_type operator op_(L &&lhs,      \
1730                                                                            R &&rhs)      \
1731     {                                                                                    \
1732         using Promote = result_vector_type<L, R>;                                        \
1733         return Promote(std::forward<L>(lhs)) op_ Promote(std::forward<R>(rhs));          \
1734     }
1735 /**
1736  * \name Compare Operators
1737  *
1738  * Applies the operator component-wise and concurrently on \p lhs and \p rhs and returns
1739  * a new SimdMaskArray object containing the result values.
1740  *
1741  * This operator only participates in overload resolution if (same rules as above):
1742  * \li At least one of the template parameters \p L or \p R is a SimdArray type.
1743  * \li Either \p L or \p R is a fundamental arithmetic type but not an integral type
1744  *     larger than \c int \n
1745  *     or \n
1746  *     \p L or \p R is a Vc::Vector type with equal number of elements (Vector::size() ==
1747  *     SimdArray::size()).
1748  *
1749  * The return type of the operator is a SimdMaskArray type using the more precise EntryType of
1750  * \p L or \p R and the same number of elements as the SimdArray argument(s).
1751  */
1752 ///@{
1753 Vc_ALL_COMPARES(Vc_BINARY_OPERATORS_);
1754 ///@}
1755 #undef Vc_BINARY_OPERATORS_
1756 
1757 // math functions {{{1
1758 #define Vc_FORWARD_UNARY_OPERATOR(name_)                                                 \
1759     /*!\brief Applies the std::name_ function component-wise and concurrently. */        \
1760     template <typename T, std::size_t N, typename V, std::size_t M>                      \
1761     inline fixed_size_simd<T, N> name_(const SimdArray<T, N, V, M> &x)                   \
1762     {                                                                                    \
1763         return fixed_size_simd<T, N>::fromOperation(                                     \
1764             Common::Operations::Forward_##name_(), x);                                   \
1765     }                                                                                    \
1766     template <class T, int N>                                                            \
1767     fixed_size_simd<T, N> name_(const fixed_size_simd<T, N> &x)                          \
1768     {                                                                                    \
1769         return fixed_size_simd<T, N>::fromOperation(                                     \
1770             Common::Operations::Forward_##name_(), x);                                   \
1771     }                                                                                    \
1772     Vc_NOTHING_EXPECTING_SEMICOLON
1773 
1774 #define Vc_FORWARD_UNARY_BOOL_OPERATOR(name_)                                            \
1775     /*!\brief Applies the std::name_ function component-wise and concurrently. */        \
1776     template <typename T, std::size_t N, typename V, std::size_t M>                      \
1777     inline fixed_size_simd_mask<T, N> name_(const SimdArray<T, N, V, M> &x)              \
1778     {                                                                                    \
1779         return fixed_size_simd_mask<T, N>::fromOperation(                                \
1780             Common::Operations::Forward_##name_(), x);                                   \
1781     }                                                                                    \
1782     template <class T, int N>                                                            \
1783     fixed_size_simd_mask<T, N> name_(const fixed_size_simd<T, N> &x)                     \
1784     {                                                                                    \
1785         return fixed_size_simd_mask<T, N>::fromOperation(                                \
1786             Common::Operations::Forward_##name_(), x);                                   \
1787     }                                                                                    \
1788     Vc_NOTHING_EXPECTING_SEMICOLON
1789 
1790 #define Vc_FORWARD_BINARY_OPERATOR(name_)                                                \
1791     /*!\brief Applies the std::name_ function component-wise and concurrently. */        \
1792     template <typename T, std::size_t N, typename V, std::size_t M>                      \
1793     inline fixed_size_simd<T, N> name_(const SimdArray<T, N, V, M> &x,                   \
1794                                        const SimdArray<T, N, V, M> &y)                   \
1795     {                                                                                    \
1796         return fixed_size_simd<T, N>::fromOperation(                                     \
1797             Common::Operations::Forward_##name_(), x, y);                                \
1798     }                                                                                    \
1799     Vc_NOTHING_EXPECTING_SEMICOLON
1800 
1801 /**
1802  * \name Math functions
1803  * These functions evaluate the
1804  */
1805 ///@{
1806 Vc_FORWARD_UNARY_OPERATOR(abs);
1807 Vc_FORWARD_UNARY_OPERATOR(asin);
1808 Vc_FORWARD_UNARY_OPERATOR(atan);
1809 Vc_FORWARD_BINARY_OPERATOR(atan2);
1810 Vc_FORWARD_UNARY_OPERATOR(ceil);
1811 Vc_FORWARD_BINARY_OPERATOR(copysign);
1812 Vc_FORWARD_UNARY_OPERATOR(cos);
1813 Vc_FORWARD_UNARY_OPERATOR(exp);
1814 Vc_FORWARD_UNARY_OPERATOR(exponent);
1815 Vc_FORWARD_UNARY_OPERATOR(floor);
1816 /// Applies the std::fma function component-wise and concurrently.
1817 template <typename T, std::size_t N>
1818 inline SimdArray<T, N> fma(const SimdArray<T, N> &a, const SimdArray<T, N> &b,
1819                            const SimdArray<T, N> &c)
1820 {
1821     return SimdArray<T, N>::fromOperation(Common::Operations::Forward_fma(), a, b, c);
1822 }
1823 Vc_FORWARD_UNARY_BOOL_OPERATOR(isfinite);
1824 Vc_FORWARD_UNARY_BOOL_OPERATOR(isinf);
1825 Vc_FORWARD_UNARY_BOOL_OPERATOR(isnan);
1826 Vc_FORWARD_UNARY_BOOL_OPERATOR(isnegative);
1827 /// Applies the std::frexp function component-wise and concurrently.
1828 template <typename T, std::size_t N>
1829 inline SimdArray<T, N> frexp(const SimdArray<T, N> &x, SimdArray<int, N> *e)
1830 {
1831     return SimdArray<T, N>::fromOperation(Common::Operations::Forward_frexp(), x, e);
1832 }
1833 /// Applies the std::ldexp function component-wise and concurrently.
1834 template <typename T, std::size_t N>
1835 inline SimdArray<T, N> ldexp(const SimdArray<T, N> &x, const SimdArray<int, N> &e)
1836 {
1837     return SimdArray<T, N>::fromOperation(Common::Operations::Forward_ldexp(), x, e);
1838 }
1839 Vc_FORWARD_UNARY_OPERATOR(log);
1840 Vc_FORWARD_UNARY_OPERATOR(log10);
1841 Vc_FORWARD_UNARY_OPERATOR(log2);
1842 Vc_FORWARD_UNARY_OPERATOR(reciprocal);
1843 Vc_FORWARD_UNARY_OPERATOR(round);
1844 Vc_FORWARD_UNARY_OPERATOR(rsqrt);
1845 Vc_FORWARD_UNARY_OPERATOR(sin);
1846 /// Determines sine and cosine concurrently and component-wise on \p x.
1847 template <typename T, std::size_t N>
1848 void sincos(const SimdArray<T, N> &x, SimdArray<T, N> *sin, SimdArray<T, N> *cos)
1849 {
1850     SimdArray<T, N>::callOperation(Common::Operations::Forward_sincos(), x, sin, cos);
1851 }
1852 Vc_FORWARD_UNARY_OPERATOR(sqrt);
1853 Vc_FORWARD_UNARY_OPERATOR(trunc);
1854 Vc_FORWARD_BINARY_OPERATOR(min);
1855 Vc_FORWARD_BINARY_OPERATOR(max);
1856 ///@}
1857 #undef Vc_FORWARD_UNARY_OPERATOR
1858 #undef Vc_FORWARD_UNARY_BOOL_OPERATOR
1859 #undef Vc_FORWARD_BINARY_OPERATOR
1860 
1861 // simd_cast {{{1
1862 #ifdef Vc_MSVC
1863 #define Vc_DUMMY_ARG0 , int = 0
1864 #define Vc_DUMMY_ARG1 , long = 0
1865 #define Vc_DUMMY_ARG2 , short = 0
1866 #define Vc_DUMMY_ARG3 , char = '0'
1867 #define Vc_DUMMY_ARG4 , unsigned = 0u
1868 #define Vc_DUMMY_ARG5 , unsigned short = 0u
1869 #else
1870 #define Vc_DUMMY_ARG0
1871 #define Vc_DUMMY_ARG1
1872 #define Vc_DUMMY_ARG2
1873 #define Vc_DUMMY_ARG3
1874 #define Vc_DUMMY_ARG4
1875 #define Vc_DUMMY_ARG5
1876 #endif  // Vc_MSVC
1877 
1878 // simd_cast_impl_smaller_input {{{2
1879 // The following function can be implemented without the sizeof...(From) overload.
1880 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
1881 // function in two works around the issue.
1882 template <typename Return, std::size_t N, typename T, typename... From>
1883 Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return>
1884 simd_cast_impl_smaller_input(const From &... xs, const T &last)
1885 {
1886     Return r = simd_cast<Return>(xs...);
1887     for (size_t i = 0; i < N; ++i) {
1888         r[i + N * sizeof...(From)] = static_cast<typename Return::EntryType>(last[i]);
1889     }
1890     return r;
1891 }
1892 template <typename Return, std::size_t N, typename T>
1893 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_smaller_input(const T &last)
1894 {
1895     Return r = Return();
1896     for (size_t i = 0; i < N; ++i) {
1897         r[i] = static_cast<typename Return::EntryType>(last[i]);
1898     }
1899     return r;
1900 }
1901 template <typename Return, std::size_t N, typename T, typename... From>
1902 Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return> simd_cast_impl_larger_input(
1903     const From &... xs, const T &last)
1904 {
1905     Return r = simd_cast<Return>(xs...);
1906     for (size_t i = N * sizeof...(From); i < Return::Size; ++i) {
1907         r[i] = static_cast<typename Return::EntryType>(last[i - N * sizeof...(From)]);
1908     }
1909     return r;
1910 }
1911 template <typename Return, std::size_t N, typename T>
1912 Vc_INTRINSIC Vc_CONST Return simd_cast_impl_larger_input(const T &last)
1913 {
1914     Return r = Return();
1915     for (size_t i = 0; i < Return::size(); ++i) {
1916         r[i] = static_cast<typename Return::EntryType>(last[i]);
1917     }
1918     return r;
1919 }
1920 
1921 // simd_cast_without_last (declaration) {{{2
1922 template <typename Return, typename T, typename... From>
1923 Vc_INTRINSIC_L Vc_CONST_L Return
1924     simd_cast_without_last(const From &... xs, const T &) Vc_INTRINSIC_R Vc_CONST_R;
1925 
1926 // are_all_types_equal {{{2
1927 template <typename... Ts> struct are_all_types_equal;
1928 template <typename T>
1929 struct are_all_types_equal<T> : public std::integral_constant<bool, true>
1930 {
1931 };
1932 template <typename T0, typename T1, typename... Ts>
1933 struct are_all_types_equal<T0, T1, Ts...>
1934     : public std::integral_constant<
1935           bool, std::is_same<T0, T1>::value && are_all_types_equal<T1, Ts...>::value>
1936 {
1937 };
1938 
1939 // simd_cast_interleaved_argument_order (declarations) {{{2
1940 /*! \internal
1941   The need for simd_cast_interleaved_argument_order stems from a shortcoming in pack
1942   expansion of variadic templates in C++. For a simd_cast with SimdArray arguments that
1943   are bisectable (i.e.  \c storage_type0 and \c storage_type1 are equal) the generic
1944   implementation needs to forward to a simd_cast of the \c internal_data0 and \c
1945   internal_data1 of the arguments. But the required order of arguments is
1946   `internal_data0(arg0), internal_data1(arg0), internal_data0(arg1), ...`. This is
1947   impossible to achieve with pack expansion. It is only possible to write
1948   `internal_data0(args)..., internal_data1(args)...` and thus have the argument order
1949   mixed up. The simd_cast_interleaved_argument_order “simply” calls simd_cast with the
1950   arguments correctly reordered (i.e. interleaved).
1951 
1952   The implementation of simd_cast_interleaved_argument_order is done generically, so that
1953   it supports any number of arguments. The central idea of the implementation is an
1954   `extract` function which returns one value of an argument pack determined via an index
1955   passed as template argument. This index is generated via an index_sequence. The
1956   `extract` function uses two argument packs (of equal size) to easily return values from
1957   the front and middle of the argument pack (for doing the deinterleave).
1958  */
1959 template <typename Return, typename... Ts>
1960 Vc_INTRINSIC Vc_CONST Return
1961     simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b);
1962 
1963 // simd_cast_with_offset (declarations and one impl) {{{2
1964 // offset == 0 {{{3
1965 template <typename Return, std::size_t offset, typename From, typename... Froms>
1966 Vc_INTRINSIC Vc_CONST
1967     enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
1968         simd_cast_with_offset(const From &x, const Froms &... xs);
1969 // offset > 0 && offset divisible by Return::Size {{{3
1970 template <typename Return, std::size_t offset, typename From>
1971 Vc_INTRINSIC Vc_CONST
1972     enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0), Return>
1973         simd_cast_with_offset(const From &x);
1974 // offset > 0 && offset NOT divisible && Return is non-atomic simd(mask)array {{{3
1975 template <typename Return, std::size_t offset, typename From>
1976 Vc_INTRINSIC Vc_CONST
1977     enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1978                ((Traits::isSimdArray<Return>::value &&
1979                  !Traits::isAtomicSimdArray<Return>::value) ||
1980                 (Traits::isSimdMaskArray<Return>::value &&
1981                  !Traits::isAtomicSimdMaskArray<Return>::value))),
1982               Return>
1983         simd_cast_with_offset(const From &x);
1984 // offset > 0 && offset NOT divisible && Return is atomic simd(mask)array {{{3
1985 template <typename Return, std::size_t offset, typename From>
1986 Vc_INTRINSIC Vc_CONST
1987     enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1988                ((Traits::isSimdArray<Return>::value &&
1989                  Traits::isAtomicSimdArray<Return>::value) ||
1990                 (Traits::isSimdMaskArray<Return>::value &&
1991                  Traits::isAtomicSimdMaskArray<Return>::value))),
1992               Return>
1993         simd_cast_with_offset(const From &x);
1994 // offset > first argument (drops first arg) {{{3
1995 template <typename Return, std::size_t offset, typename From, typename... Froms>
1996 Vc_INTRINSIC Vc_CONST enable_if<
1997     (are_all_types_equal<From, Froms...>::value && From::Size <= offset), Return>
1998     simd_cast_with_offset(const From &, const Froms &... xs)
1999 {
2000     return simd_cast_with_offset<Return, offset - From::Size>(xs...);
2001 }
2002 
2003 // offset > first and only argument (returns Zero) {{{3
2004 template <typename Return, std::size_t offset, typename From>
2005 Vc_INTRINSIC Vc_CONST enable_if<(From::Size <= offset), Return> simd_cast_with_offset(
2006     const From &)
2007 {
2008     return Return(0);
2009 }
2010 
2011 // first_type_of {{{2
2012 template <typename T, typename... Ts> struct first_type_of_impl
2013 {
2014     using type = T;
2015 };
2016 template <typename... Ts> using first_type_of = typename first_type_of_impl<Ts...>::type;
2017 
2018 // simd_cast_drop_arguments (declarations) {{{2
2019 template <typename Return, typename From>
2020 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x);
2021 template <typename Return, typename... Froms>
2022 Vc_INTRINSIC Vc_CONST
2023     enable_if<(are_all_types_equal<Froms...>::value &&
2024                sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2025               Return>
2026         simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x);
2027 // The following function can be implemented without the sizeof...(From) overload.
2028 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
2029 // function in two works around the issue.
2030 template <typename Return, typename From, typename... Froms>
2031 Vc_INTRINSIC Vc_CONST enable_if<
2032     (are_all_types_equal<From, Froms...>::value &&
2033      (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
2034     Return>
2035 simd_cast_drop_arguments(Froms... xs, From x, From);
2036 template <typename Return, typename From>
2037 Vc_INTRINSIC Vc_CONST
2038     enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2039     simd_cast_drop_arguments(From x, From);
2040 
2041 namespace
2042 {
2043 #ifdef Vc_DEBUG_SIMD_CAST
2044 void debugDoNothing(const std::initializer_list<void *> &) {}
2045 template <typename T0, typename... Ts>
2046 inline void vc_debug_(const char *prefix, const char *suffix, const T0 &arg0,
2047                       const Ts &... args)
2048 {
2049     std::cerr << prefix << arg0;
2050     debugDoNothing({&(std::cerr << ", " << args)...});
2051     std::cerr << suffix;
2052 }
2053 #else
2054 template <typename T0, typename... Ts>
2055 Vc_INTRINSIC void vc_debug_(const char *, const char *, const T0 &, const Ts &...)
2056 {
2057 }
2058 #endif
2059 }  // unnamed namespace
2060 
2061 // is_less trait{{{2
2062 template <size_t A, size_t B>
2063 struct is_less : public std::integral_constant<bool, (A < B)> {
2064 };
2065 
2066 // is_power_of_2 trait{{{2
2067 template <size_t N>
2068 struct is_power_of_2 : public std::integral_constant<bool, ((N - 1) & N) == 0> {
2069 };
2070 
2071 // simd_cast<T>(xs...) to SimdArray/-mask {{{2
2072 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_)                                  \
2073     template <typename Return, typename T, typename A, typename... Froms>                \
2074     Vc_INTRINSIC Vc_CONST enable_if<                                                     \
2075         (Traits::isAtomic##SimdArrayType_<Return>::value &&                              \
2076          is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value &&     \
2077          are_all_types_equal<NativeType_<T, A>, Froms...>::value &&                      \
2078          !detail::is_fixed_size_abi<A>::value),                                          \
2079         Return>                                                                          \
2080     simd_cast(NativeType_<T, A> x, Froms... xs)                                          \
2081     {                                                                                    \
2082         vc_debug_("simd_cast{1}(", ")\n", x, xs...);                                     \
2083         return {private_init, simd_cast<typename Return::storage_type>(x, xs...)};       \
2084     }                                                                                    \
2085     template <typename Return, typename T, typename A, typename... Froms>                \
2086     Vc_INTRINSIC Vc_CONST enable_if<                                                     \
2087         (Traits::isAtomic##SimdArrayType_<Return>::value &&                              \
2088          !is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value &&    \
2089          are_all_types_equal<NativeType_<T, A>, Froms...>::value &&                      \
2090          !detail::is_fixed_size_abi<A>::value),                                          \
2091         Return>                                                                          \
2092     simd_cast(NativeType_<T, A> x, Froms... xs)                                          \
2093     {                                                                                    \
2094         vc_debug_("simd_cast{2}(", ")\n", x, xs...);                                     \
2095         return {simd_cast_without_last<Return, NativeType_<T, A>, Froms...>(x, xs...)};  \
2096     }                                                                                    \
2097     template <typename Return, typename T, typename A, typename... Froms>                \
2098     Vc_INTRINSIC Vc_CONST                                                                \
2099         enable_if<(Traits::is##SimdArrayType_<Return>::value &&                          \
2100                    !Traits::isAtomic##SimdArrayType_<Return>::value &&                   \
2101                    is_less<Common::left_size<Return::Size>(),                            \
2102                            NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value &&    \
2103                    are_all_types_equal<NativeType_<T, A>, Froms...>::value &&            \
2104                    !detail::is_fixed_size_abi<A>::value),                                \
2105                   Return>                                                                \
2106         simd_cast(NativeType_<T, A> x, Froms... xs)                                      \
2107     {                                                                                    \
2108         vc_debug_("simd_cast{3}(", ")\n", x, xs...);                                     \
2109         using R0 = typename Return::storage_type0;                                       \
2110         using R1 = typename Return::storage_type1;                                       \
2111         return {simd_cast_drop_arguments<R0, Froms...>(x, xs...),                        \
2112                 simd_cast_with_offset<R1, R0::Size>(x, xs...)};                          \
2113     }                                                                                    \
2114     template <typename Return, typename T, typename A, typename... Froms>                \
2115     Vc_INTRINSIC Vc_CONST                                                                \
2116         enable_if<(Traits::is##SimdArrayType_<Return>::value &&                          \
2117                    !Traits::isAtomic##SimdArrayType_<Return>::value &&                   \
2118                    !is_less<Common::left_size<Return::Size>(),                           \
2119                             NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value &&   \
2120                    are_all_types_equal<NativeType_<T, A>, Froms...>::value &&            \
2121                    !detail::is_fixed_size_abi<A>::value),                                \
2122                   Return>                                                                \
2123         simd_cast(NativeType_<T, A> x, Froms... xs)                                      \
2124     {                                                                                    \
2125         vc_debug_("simd_cast{4}(", ")\n", x, xs...);                                     \
2126         using R0 = typename Return::storage_type0;                                       \
2127         using R1 = typename Return::storage_type1;                                       \
2128         return {simd_cast<R0>(x, xs...), R1(0)};                                         \
2129     }                                                                                    \
2130     Vc_NOTHING_EXPECTING_SEMICOLON
2131 
2132 Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2133 Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2134 #undef Vc_SIMDARRAY_CASTS
2135 
2136 // simd_cast<SimdArray/-mask, offset>(V) {{{2
2137 #define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_)                                  \
2138     /* SIMD Vector/Mask to atomic SimdArray/simdmaskarray */                             \
2139     template <typename Return, int offset, typename T, typename A>                       \
2140     Vc_INTRINSIC Vc_CONST                                                                \
2141         enable_if<Traits::isAtomic##SimdArrayType_<Return>::value, Return>               \
2142         simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG0)                                     \
2143     {                                                                                    \
2144         vc_debug_("simd_cast{offset, atomic}(", ")\n", offset, x);                       \
2145         return {private_init, simd_cast<typename Return::storage_type, offset>(x)};      \
2146     }                                                                                    \
2147     /* both halves of Return array are extracted from argument */                        \
2148     template <typename Return, int offset, typename T, typename A>                       \
2149     Vc_INTRINSIC Vc_CONST                                                                \
2150         enable_if<(Traits::is##SimdArrayType_<Return>::value &&                          \
2151                    !Traits::isAtomic##SimdArrayType_<Return>::value &&                   \
2152                    Return::Size * offset + Common::left_size<Return::Size>() <           \
2153                        NativeType_<T, A>::Size),                                         \
2154                   Return>                                                                \
2155         simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG1)                                     \
2156     {                                                                                    \
2157         vc_debug_("simd_cast{offset, split Return}(", ")\n", offset, x);                 \
2158         using R0 = typename Return::storage_type0;                                       \
2159         constexpr int entries_offset = offset * Return::Size;                            \
2160         constexpr int entries_offset_right = entries_offset + R0::Size;                  \
2161         return {                                                                         \
2162             simd_cast_with_offset<typename Return::storage_type0, entries_offset>(x),    \
2163             simd_cast_with_offset<typename Return::storage_type1, entries_offset_right>( \
2164                 x)};                                                                     \
2165     }                                                                                    \
2166     /* SIMD Vector/Mask to non-atomic SimdArray/simdmaskarray */                         \
2167     /* right half of Return array is zero */                                             \
2168     template <typename Return, int offset, typename T, typename A>                       \
2169     Vc_INTRINSIC Vc_CONST                                                                \
2170         enable_if<(Traits::is##SimdArrayType_<Return>::value &&                          \
2171                    !Traits::isAtomic##SimdArrayType_<Return>::value &&                   \
2172                    Return::Size * offset + Common::left_size<Return::Size>() >=          \
2173                        NativeType_<T, A>::Size),                                         \
2174                   Return>                                                                \
2175         simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG2)                                     \
2176     {                                                                                    \
2177         vc_debug_("simd_cast{offset, R1::Zero}(", ")\n", offset, x);                     \
2178         using R0 = typename Return::storage_type0;                                       \
2179         using R1 = typename Return::storage_type1;                                       \
2180         constexpr int entries_offset = offset * Return::Size;                            \
2181         return {simd_cast_with_offset<R0, entries_offset>(x), R1(0)};                    \
2182     }                                                                                    \
2183     Vc_NOTHING_EXPECTING_SEMICOLON
2184 
2185 Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2186 Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2187 #undef Vc_SIMDARRAY_CASTS
2188 
2189 // simd_cast<T>(xs...) from SimdArray/-mask {{{2
2190 #define Vc_SIMDARRAY_CASTS(SimdArrayType_)                                               \
2191     /* indivisible SimdArrayType_ */                                                     \
2192     template <typename Return, typename T, std::size_t N, typename V, typename... From>  \
2193     Vc_INTRINSIC Vc_CONST                                                                \
2194         enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value &&    \
2195                    (sizeof...(From) == 0 || N * sizeof...(From) < Return::Size) &&       \
2196                    !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value),            \
2197                   Return>                                                                \
2198         simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs)              \
2199     {                                                                                    \
2200         vc_debug_("simd_cast{indivisible}(", ")\n", x0, xs...);                          \
2201         return simd_cast<Return>(internal_data(x0), internal_data(xs)...);               \
2202     }                                                                                    \
2203     /* indivisible SimdArrayType_ && can drop arguments from the end */                  \
2204     template <typename Return, typename T, std::size_t N, typename V, typename... From>  \
2205     Vc_INTRINSIC Vc_CONST                                                                \
2206         enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value &&    \
2207                    (sizeof...(From) > 0 && (N * sizeof...(From) >= Return::Size)) &&     \
2208                    !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value),            \
2209                   Return>                                                                \
2210         simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs)              \
2211     {                                                                                    \
2212         vc_debug_("simd_cast{indivisible2}(", ")\n", x0, xs...);                         \
2213         return simd_cast_without_last<Return,                                            \
2214                                       typename SimdArrayType_<T, N, V, N>::storage_type, \
2215                                       typename From::storage_type...>(                   \
2216             internal_data(x0), internal_data(xs)...);                                    \
2217     }                                                                                    \
2218     /* bisectable SimdArrayType_ (N = 2^n) && never too large */                         \
2219     template <typename Return, typename T, std::size_t N, typename V, std::size_t M,     \
2220               typename... From>                                                          \
2221     Vc_INTRINSIC Vc_CONST enable_if<                                                     \
2222         (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value &&    \
2223          !std::is_same<Return, SimdArrayType_<T, N, V, M>>::value &&                     \
2224          is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value),  \
2225         Return>                                                                          \
2226     simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs)                  \
2227     {                                                                                    \
2228         vc_debug_("simd_cast{bisectable}(", ")\n", x0, xs...);                           \
2229         return simd_cast_interleaved_argument_order<                                     \
2230             Return, typename SimdArrayType_<T, N, V, M>::storage_type0,                  \
2231             typename From::storage_type0...>(internal_data0(x0), internal_data0(xs)...,  \
2232                                              internal_data1(x0), internal_data1(xs)...); \
2233     }                                                                                    \
2234     /* bisectable SimdArrayType_ (N = 2^n) && input so large that at least the last      \
2235      * input can be dropped */                                                           \
2236     template <typename Return, typename T, std::size_t N, typename V, std::size_t M,     \
2237               typename... From>                                                          \
2238     Vc_INTRINSIC Vc_CONST enable_if<                                                     \
2239         (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value &&    \
2240          !is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2241         Return>                                                                          \
2242     simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs)                  \
2243     {                                                                                    \
2244         vc_debug_("simd_cast{bisectable2}(", ")\n", x0, xs...);                          \
2245         return simd_cast_without_last<Return, SimdArrayType_<T, N, V, M>, From...>(      \
2246             x0, xs...);                                                                  \
2247     }                                                                                    \
2248     /* remaining SimdArrayType_ input never larger (N != 2^n) */                         \
2249     template <typename Return, typename T, std::size_t N, typename V, std::size_t M,     \
2250               typename... From>                                                          \
2251     Vc_INTRINSIC Vc_CONST enable_if<                                                     \
2252         (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value &&    \
2253          N * (1 + sizeof...(From)) <= Return::Size && !is_power_of_2<N>::value),         \
2254         Return>                                                                          \
2255     simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs)                  \
2256     {                                                                                    \
2257         vc_debug_("simd_cast{remaining}(", ")\n", x0, xs...);                            \
2258         return simd_cast_impl_smaller_input<Return, N, SimdArrayType_<T, N, V, M>,       \
2259                                             From...>(x0, xs...);                         \
2260     }                                                                                    \
2261     /* remaining SimdArrayType_ input larger (N != 2^n) */                               \
2262     template <typename Return, typename T, std::size_t N, typename V, std::size_t M,     \
2263               typename... From>                                                          \
2264     Vc_INTRINSIC Vc_CONST enable_if<                                                     \
2265         (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value &&    \
2266          N * (1 + sizeof...(From)) > Return::Size && !is_power_of_2<N>::value),          \
2267         Return>                                                                          \
2268     simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs)                  \
2269     {                                                                                    \
2270         vc_debug_("simd_cast{remaining2}(", ")\n", x0, xs...);                           \
2271         return simd_cast_impl_larger_input<Return, N, SimdArrayType_<T, N, V, M>,        \
2272                                            From...>(x0, xs...);                          \
2273     }                                                                                    \
2274     /* a single bisectable SimdArrayType_ (N = 2^n) too large */                         \
2275     template <typename Return, typename T, std::size_t N, typename V, std::size_t M>     \
2276     Vc_INTRINSIC Vc_CONST                                                                \
2277         enable_if<(N != M && N >= 2 * Return::Size && is_power_of_2<N>::value), Return>  \
2278         simd_cast(const SimdArrayType_<T, N, V, M> &x)                                   \
2279     {                                                                                    \
2280         vc_debug_("simd_cast{single bisectable}(", ")\n", x);                            \
2281         return simd_cast<Return>(internal_data0(x));                                     \
2282     }                                                                                    \
2283     template <typename Return, typename T, std::size_t N, typename V, std::size_t M>     \
2284     Vc_INTRINSIC Vc_CONST enable_if<(N != M && N > Return::Size &&                       \
2285                                      N < 2 * Return::Size && is_power_of_2<N>::value),   \
2286                                     Return>                                              \
2287     simd_cast(const SimdArrayType_<T, N, V, M> &x)                                       \
2288     {                                                                                    \
2289         vc_debug_("simd_cast{single bisectable2}(", ")\n", x);                           \
2290         return simd_cast<Return>(internal_data0(x), internal_data1(x));                  \
2291     }                                                                                    \
2292     Vc_NOTHING_EXPECTING_SEMICOLON
2293 
2294 Vc_SIMDARRAY_CASTS(SimdArray);
2295 Vc_SIMDARRAY_CASTS(SimdMaskArray);
2296 #undef Vc_SIMDARRAY_CASTS
2297 template <class Return, class T, int N, class... Ts,
2298           class = enable_if<!std::is_same<Return, fixed_size_simd<T, N>>::value>>
2299 Vc_INTRINSIC Return simd_cast(const fixed_size_simd<T, N> &x, const Ts &... xs)
2300 {
2301     return simd_cast<Return>(static_cast<const SimdArray<T, N> &>(x),
2302                              static_cast<const SimdArray<T, N> &>(xs)...);
2303 }
2304 template <class Return, class T, int N, class... Ts,
2305           class = enable_if<!std::is_same<Return, fixed_size_simd_mask<T, N>>::value>>
2306 Vc_INTRINSIC Return simd_cast(const fixed_size_simd_mask<T, N> &x, const Ts &... xs)
2307 {
2308     return simd_cast<Return>(static_cast<const SimdMaskArray<T, N> &>(x),
2309                              static_cast<const SimdMaskArray<T, N> &>(xs)...);
2310 }
2311 
2312 // simd_cast<T, offset>(SimdArray/-mask) {{{2
2313 #define Vc_SIMDARRAY_CASTS(SimdArrayType_)                                               \
2314     /* offset == 0 is like without offset */                                             \
2315     template <typename Return, int offset, typename T, std::size_t N, typename V,        \
2316               std::size_t M>                                                             \
2317     Vc_INTRINSIC Vc_CONST enable_if<(offset == 0), Return> simd_cast(                    \
2318         const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG0)                               \
2319     {                                                                                    \
2320         vc_debug_("simd_cast{offset == 0}(", ")\n", offset, x);                          \
2321         return simd_cast<Return>(x);                                                     \
2322     }                                                                                    \
2323     /* forward to V */                                                                   \
2324     template <typename Return, int offset, typename T, std::size_t N, typename V>        \
2325     Vc_INTRINSIC Vc_CONST enable_if<(offset != 0), Return> simd_cast(                    \
2326         const SimdArrayType_<T, N, V, N> &x Vc_DUMMY_ARG1)                               \
2327     {                                                                                    \
2328         vc_debug_("simd_cast{offset, forward}(", ")\n", offset, x);                      \
2329         return simd_cast<Return, offset>(internal_data(x));                              \
2330     }                                                                                    \
2331     /* convert from right member of SimdArray */                                         \
2332     template <typename Return, int offset, typename T, std::size_t N, typename V,        \
2333               std::size_t M>                                                             \
2334     Vc_INTRINSIC Vc_CONST                                                                \
2335         enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() &&          \
2336                    offset != 0 && Common::left_size<N>() % Return::Size == 0),           \
2337                   Return>                                                                \
2338         simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG2)                     \
2339     {                                                                                    \
2340         vc_debug_("simd_cast{offset, right}(", ")\n", offset, x);                        \
2341         return simd_cast<Return, offset - Common::left_size<N>() / Return::Size>(        \
2342             internal_data1(x));                                                          \
2343     }                                                                                    \
2344     /* same as above except for odd cases where offset * Return::Size doesn't fit the    \
2345      * left side of the SimdArray */                                                     \
2346     template <typename Return, int offset, typename T, std::size_t N, typename V,        \
2347               std::size_t M>                                                             \
2348     Vc_INTRINSIC Vc_CONST                                                                \
2349         enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() &&          \
2350                    offset != 0 && Common::left_size<N>() % Return::Size != 0),           \
2351                   Return>                                                                \
2352         simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG3)                     \
2353     {                                                                                    \
2354         vc_debug_("simd_cast{offset, right, nofit}(", ")\n", offset, x);                 \
2355         return simd_cast_with_offset<Return,                                             \
2356                                      offset * Return::Size - Common::left_size<N>()>(    \
2357             internal_data1(x));                                                          \
2358     }                                                                                    \
2359     /* convert from left member of SimdArray */                                          \
2360     template <typename Return, int offset, typename T, std::size_t N, typename V,        \
2361               std::size_t M>                                                             \
2362     Vc_INTRINSIC Vc_CONST enable_if<                                                     \
2363         (N != M && /*offset * Return::Size < Common::left_size<N>() &&*/                 \
2364          offset != 0 && (offset + 1) * Return::Size <= Common::left_size<N>()),          \
2365         Return>                                                                          \
2366     simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG4)                         \
2367     {                                                                                    \
2368         vc_debug_("simd_cast{offset, left}(", ")\n", offset, x);                         \
2369         return simd_cast<Return, offset>(internal_data0(x));                             \
2370     }                                                                                    \
2371     /* fallback to copying scalars */                                                    \
2372     template <typename Return, int offset, typename T, std::size_t N, typename V,        \
2373               std::size_t M>                                                             \
2374     Vc_INTRINSIC Vc_CONST                                                                \
2375         enable_if<(N != M && (offset * Return::Size < Common::left_size<N>()) &&         \
2376                    offset != 0 && (offset + 1) * Return::Size > Common::left_size<N>()), \
2377                   Return>                                                                \
2378         simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG5)                     \
2379     {                                                                                    \
2380         vc_debug_("simd_cast{offset, copy scalars}(", ")\n", offset, x);                 \
2381         using R = typename Return::EntryType;                                            \
2382         Return r = Return(0);                                                            \
2383         for (std::size_t i = offset * Return::Size;                                      \
2384              i < std::min(N, (offset + 1) * Return::Size); ++i) {                        \
2385             r[i - offset * Return::Size] = static_cast<R>(x[i]);                         \
2386         }                                                                                \
2387         return r;                                                                        \
2388     }                                                                                    \
2389     Vc_NOTHING_EXPECTING_SEMICOLON
2390 Vc_SIMDARRAY_CASTS(SimdArray);
2391 Vc_SIMDARRAY_CASTS(SimdMaskArray);
2392 #undef Vc_SIMDARRAY_CASTS
2393 // simd_cast_drop_arguments (definitions) {{{2
2394 template <typename Return, typename From>
2395 Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x)
2396 {
2397     return simd_cast<Return>(x);
2398 }
2399 template <typename Return, typename... Froms>
2400 Vc_INTRINSIC Vc_CONST
2401     enable_if<(are_all_types_equal<Froms...>::value &&
2402                sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2403               Return>
2404         simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x)
2405 {
2406     return simd_cast<Return>(xs..., x);
2407 }
2408 // The following function can be implemented without the sizeof...(From) overload.
2409 // However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
2410 // function in two works around the issue.
2411 template <typename Return, typename From, typename... Froms>
2412 Vc_INTRINSIC Vc_CONST enable_if<
2413     (are_all_types_equal<From, Froms...>::value &&
2414      (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
2415     Return>
2416 simd_cast_drop_arguments(Froms... xs, From x, From)
2417 {
2418     return simd_cast_drop_arguments<Return, Froms...>(xs..., x);
2419 }
2420 template <typename Return, typename From>
2421 Vc_INTRINSIC Vc_CONST
2422     enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2423     simd_cast_drop_arguments(From x, From)
2424 {
2425     return simd_cast_drop_arguments<Return>(x);
2426 }
2427 
2428 // simd_cast_with_offset (definitions) {{{2
2429     template <typename Return, std::size_t offset, typename From>
2430     Vc_INTRINSIC Vc_CONST
2431     enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0),
2432               Return> simd_cast_with_offset(const From &x)
2433 {
2434     return simd_cast<Return, offset / Return::Size>(x);
2435 }
2436 template <typename Return, std::size_t offset, typename From>
2437 Vc_INTRINSIC Vc_CONST
2438     enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2439                ((Traits::isSimdArray<Return>::value &&
2440                  !Traits::isAtomicSimdArray<Return>::value) ||
2441                 (Traits::isSimdMaskArray<Return>::value &&
2442                  !Traits::isAtomicSimdMaskArray<Return>::value))),
2443               Return>
2444         simd_cast_with_offset(const From &x)
2445 {
2446     using R0 = typename Return::storage_type0;
2447     using R1 = typename Return::storage_type1;
2448     return {simd_cast_with_offset<R0, offset>(x),
2449             simd_cast_with_offset<R1, offset + R0::Size>(x)};
2450 }
2451 template <typename Return, std::size_t offset, typename From>
2452 Vc_INTRINSIC Vc_CONST
2453     enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2454                ((Traits::isSimdArray<Return>::value &&
2455                  Traits::isAtomicSimdArray<Return>::value) ||
2456                 (Traits::isSimdMaskArray<Return>::value &&
2457                  Traits::isAtomicSimdMaskArray<Return>::value))),
2458               Return>
2459         simd_cast_with_offset(const From &x)
2460 {
2461     return simd_cast<Return, offset / Return::Size>(x.shifted(offset % Return::Size));
2462 }
2463 template <typename Return, std::size_t offset, typename From, typename... Froms>
2464 Vc_INTRINSIC Vc_CONST
2465     enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
2466         simd_cast_with_offset(const From &x, const Froms &... xs)
2467 {
2468     return simd_cast<Return>(x, xs...);
2469 }
2470 
2471 // simd_cast_without_last (definition) {{{2
2472 template <typename Return, typename T, typename... From>
2473 Vc_INTRINSIC Vc_CONST Return simd_cast_without_last(const From &... xs, const T &)
2474 {
2475     return simd_cast<Return>(xs...);
2476 }
2477 
2478 // simd_cast_interleaved_argument_order (definitions) {{{2
2479 
2480 #ifdef Vc_MSVC
2481 // MSVC doesn't see that the Ts pack below can be empty and thus complains when extract_interleaved
2482 // is called with only 2 arguments. These overloads here are *INCORRECT standard C++*, but they make
2483 // MSVC do the right thing.
2484 template <std::size_t I, typename T0>
2485 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0, const T0 &)
2486 {
2487     return a0;
2488 }
2489 template <std::size_t I, typename T0>
2490 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &, const T0 &b0)
2491 {
2492     return b0;
2493 }
2494 #endif  // Vc_MSVC
2495 
2496 /// \internal returns the first argument
2497 template <std::size_t I, typename T0, typename... Ts>
2498 Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0,
2499                                                                   const Ts &...,
2500                                                                   const T0 &,
2501                                                                   const Ts &...)
2502 {
2503     return a0;
2504 }
2505 /// \internal returns the center argument
2506 template <std::size_t I, typename T0, typename... Ts>
2507 Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &,
2508                                                                   const Ts &...,
2509                                                                   const T0 &b0,
2510                                                                   const Ts &...)
2511 {
2512     return b0;
2513 }
2514 /// \internal drops the first and center arguments and recurses
2515 template <std::size_t I, typename T0, typename... Ts>
2516 Vc_INTRINSIC Vc_CONST enable_if<(I > 1), T0> extract_interleaved(const T0 &,
2517                                                                  const Ts &... a,
2518                                                                  const T0 &,
2519                                                                  const Ts &... b)
2520 {
2521     return extract_interleaved<I - 2, Ts...>(a..., b...);
2522 }
2523 /// \internal calls simd_cast with correct argument order thanks to extract_interleaved
2524 template <typename Return, typename... Ts, std::size_t... Indexes>
2525 Vc_INTRINSIC Vc_CONST Return
2526     simd_cast_interleaved_argument_order_1(index_sequence<Indexes...>, const Ts &... a,
2527                                            const Ts &... b)
2528 {
2529     return simd_cast<Return>(extract_interleaved<Indexes, Ts...>(a..., b...)...);
2530 }
2531 /// \internal constructs the necessary index_sequence to pass it to
2532 /// simd_cast_interleaved_argument_order_1
2533 template <typename Return, typename... Ts>
2534 Vc_INTRINSIC Vc_CONST Return
2535     simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b)
2536 {
2537     using seq = make_index_sequence<sizeof...(Ts)*2>;
2538     return simd_cast_interleaved_argument_order_1<Return, Ts...>(seq(), a..., b...);
2539 }
2540 
2541 // conditional_assign {{{1
2542 #define Vc_CONDITIONAL_ASSIGN(name_, op_)                                                \
2543     template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M,  \
2544               typename U>                                                                \
2545     Vc_INTRINSIC enable_if<O == Operator::name_, void> conditional_assign(               \
2546         SimdArray<T, N, V, VN> &lhs, M &&mask, U &&rhs)                                  \
2547     {                                                                                    \
2548         lhs(mask) op_ rhs;                                                               \
2549     }                                                                                    \
2550     Vc_NOTHING_EXPECTING_SEMICOLON
2551 Vc_CONDITIONAL_ASSIGN(          Assign,  =);
2552 Vc_CONDITIONAL_ASSIGN(      PlusAssign, +=);
2553 Vc_CONDITIONAL_ASSIGN(     MinusAssign, -=);
2554 Vc_CONDITIONAL_ASSIGN(  MultiplyAssign, *=);
2555 Vc_CONDITIONAL_ASSIGN(    DivideAssign, /=);
2556 Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=);
2557 Vc_CONDITIONAL_ASSIGN(       XorAssign, ^=);
2558 Vc_CONDITIONAL_ASSIGN(       AndAssign, &=);
2559 Vc_CONDITIONAL_ASSIGN(        OrAssign, |=);
2560 Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=);
2561 Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=);
2562 #undef Vc_CONDITIONAL_ASSIGN
2563 
2564 #define Vc_CONDITIONAL_ASSIGN(name_, expr_)                                              \
2565     template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M>  \
2566     Vc_INTRINSIC enable_if<O == Operator::name_, SimdArray<T, N, V, VN>>                 \
2567     conditional_assign(SimdArray<T, N, V, VN> &lhs, M &&mask)                            \
2568     {                                                                                    \
2569         return expr_;                                                                    \
2570     }                                                                                    \
2571     Vc_NOTHING_EXPECTING_SEMICOLON
2572 Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++);
2573 Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask));
2574 Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--);
2575 Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask));
2576 #undef Vc_CONDITIONAL_ASSIGN
2577 // transpose_impl {{{1
2578 namespace Common
2579 {
2580 template <typename T, size_t N, typename V>
2581 inline void transpose_impl(
2582     TransposeTag<4, 4>, SimdArray<T, N, V, N> *Vc_RESTRICT r[],
2583     const TransposeProxy<SimdArray<T, N, V, N>, SimdArray<T, N, V, N>,
2584                          SimdArray<T, N, V, N>, SimdArray<T, N, V, N>> &proxy)
2585 {
2586     V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2587                             &internal_data(*r[2]), &internal_data(*r[3])};
2588     transpose_impl(TransposeTag<4, 4>(), &r2[0],
2589                    TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2590                                               internal_data(std::get<1>(proxy.in)),
2591                                               internal_data(std::get<2>(proxy.in)),
2592                                               internal_data(std::get<3>(proxy.in))});
2593 }
2594 
2595 template <typename T, typename V>
2596 inline void transpose_impl(
2597     TransposeTag<2, 4>, SimdArray<T, 4, V, 1> *Vc_RESTRICT r[],
2598     const TransposeProxy<SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>,
2599                          SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>> &proxy)
2600 {
2601     auto &lo = *r[0];
2602     auto &hi = *r[1];
2603     internal_data0(internal_data0(lo)) = internal_data0(std::get<0>(proxy.in));
2604     internal_data1(internal_data0(lo)) = internal_data0(std::get<1>(proxy.in));
2605     internal_data0(internal_data1(lo)) = internal_data0(std::get<2>(proxy.in));
2606     internal_data1(internal_data1(lo)) = internal_data0(std::get<3>(proxy.in));
2607     internal_data0(internal_data0(hi)) = internal_data1(std::get<0>(proxy.in));
2608     internal_data1(internal_data0(hi)) = internal_data1(std::get<1>(proxy.in));
2609     internal_data0(internal_data1(hi)) = internal_data1(std::get<2>(proxy.in));
2610     internal_data1(internal_data1(hi)) = internal_data1(std::get<3>(proxy.in));
2611 }
2612 
2613 template <typename T, typename V>
2614 inline void transpose_impl(
2615     TransposeTag<4, 4>, SimdArray<T, 1, V, 1> *Vc_RESTRICT r[],
2616     const TransposeProxy<SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>,
2617                          SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>> &proxy)
2618 {
2619     V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2620                             &internal_data(*r[2]), &internal_data(*r[3])};
2621     transpose_impl(TransposeTag<4, 4>(), &r2[0],
2622                    TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2623                                               internal_data(std::get<1>(proxy.in)),
2624                                               internal_data(std::get<2>(proxy.in)),
2625                                               internal_data(std::get<3>(proxy.in))});
2626 }
2627 
2628 template <typename T, size_t N, typename V>
2629 inline void transpose_impl(
2630     TransposeTag<4, 4>, SimdArray<T, N, V, 1> *Vc_RESTRICT r[],
2631     const TransposeProxy<SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>,
2632                          SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>> &proxy)
2633 {
2634     SimdArray<T, N, V, 1> *Vc_RESTRICT r0[4 / 2] = {r[0], r[1]};
2635     SimdArray<T, N, V, 1> *Vc_RESTRICT r1[4 / 2] = {r[2], r[3]};
2636     using H = SimdArray<T, 2>;
2637     transpose_impl(TransposeTag<2, 4>(), &r0[0],
2638                    TransposeProxy<H, H, H, H>{internal_data0(std::get<0>(proxy.in)),
2639                                               internal_data0(std::get<1>(proxy.in)),
2640                                               internal_data0(std::get<2>(proxy.in)),
2641                                               internal_data0(std::get<3>(proxy.in))});
2642     transpose_impl(TransposeTag<2, 4>(), &r1[0],
2643                    TransposeProxy<H, H, H, H>{internal_data1(std::get<0>(proxy.in)),
2644                                               internal_data1(std::get<1>(proxy.in)),
2645                                               internal_data1(std::get<2>(proxy.in)),
2646                                               internal_data1(std::get<3>(proxy.in))});
2647 }
2648 
2649 /* TODO:
2650 template <typename T, std::size_t N, typename V, std::size_t VSize>
2651 inline enable_if<(N > VSize), void> transpose_impl(
2652     std::array<SimdArray<T, N, V, VSize> * Vc_RESTRICT, 4> & r,
2653     const TransposeProxy<SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>,
2654                          SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>> &proxy)
2655 {
2656     typedef SimdArray<T, N, V, VSize> SA;
2657     std::array<typename SA::storage_type0 * Vc_RESTRICT, 4> r0 = {
2658         {&internal_data0(*r[0]), &internal_data0(*r[1]), &internal_data0(*r[2]),
2659          &internal_data0(*r[3])}};
2660     transpose_impl(
2661         r0, TransposeProxy<typename SA::storage_type0, typename SA::storage_type0,
2662                            typename SA::storage_type0, typename SA::storage_type0>{
2663                 internal_data0(std::get<0>(proxy.in)),
2664                 internal_data0(std::get<1>(proxy.in)),
2665                 internal_data0(std::get<2>(proxy.in)),
2666                 internal_data0(std::get<3>(proxy.in))});
2667 
2668     std::array<typename SA::storage_type1 * Vc_RESTRICT, 4> r1 = {
2669         {&internal_data1(*r[0]), &internal_data1(*r[1]), &internal_data1(*r[2]),
2670          &internal_data1(*r[3])}};
2671     transpose_impl(
2672         r1, TransposeProxy<typename SA::storage_type1, typename SA::storage_type1,
2673                            typename SA::storage_type1, typename SA::storage_type1>{
2674                 internal_data1(std::get<0>(proxy.in)),
2675                 internal_data1(std::get<1>(proxy.in)),
2676                 internal_data1(std::get<2>(proxy.in)),
2677                 internal_data1(std::get<3>(proxy.in))});
2678 }
2679 */
2680 }  // namespace Common
2681 
2682 // }}}1
2683 namespace Detail
2684 {
2685 // InterleaveImpl for SimdArrays {{{
2686 // atomic {{{1
2687 template <class T, size_t N,  class V, size_t VSizeof>
2688 struct InterleaveImpl<SimdArray<T, N, V, N>, N, VSizeof> {
2689     template <class I, class... VV>
2690     static Vc_INTRINSIC void interleave(T *const data, const I &i, const VV &... vv)
2691     {
2692         InterleaveImpl<V, N, VSizeof>::interleave(data, i, internal_data(vv)...);
2693     }
2694     template <class I, class... VV>
2695     static Vc_INTRINSIC void deinterleave(T const *const data, const I &i, VV &... vv)
2696     {
2697         InterleaveImpl<V, N, VSizeof>::deinterleave(data, i, internal_data(vv)...);
2698     }
2699 };
2700 
2701 // generic (TODO) {{{1
2702 /*
2703 template <class T, size_t N, class V, size_t Wt, size_t VSizeof>
2704 struct InterleaveImpl<SimdArray<T, N, V, Wt>, N, VSizeof> {
2705     using SA = SimdArray<T, N, V, Wt>;
2706     using SA0 = typename SA::storage_type0;
2707     using SA1 = typename SA::storage_type1;
2708 
2709     template <class I, class... VV>
2710     static Vc_INTRINSIC void interleave(T *const data, const I &i, const VV &... vv)
2711     {
2712         InterleaveImpl<SA0, SA0::size(), sizeof(SA0)>::interleave(
2713             data, i,  // i needs to be split
2714             internal_data0(vv)...);
2715         InterleaveImpl<SA1, SA1::size(), sizeof(SA1)>::interleave(
2716             data,  // how far to advance data?
2717             i,     // i needs to be split
2718             internal_data1(vv)...);
2719     }
2720     template <class I, class... VV>
2721     static Vc_INTRINSIC void deinterleave(T const *const data, const I &i, VV &... vv)
2722     {
2723         InterleaveImpl<V, N, VSizeof>::deinterleave(data, i, internal_data(vv)...);
2724     }
2725 };
2726 */
2727 }  // namespace Detail
2728 // }}}
2729 /// @}
2730 
2731 } // namespace Vc_VERSIONED_NAMESPACE
2732 
2733 // numeric_limits {{{1
2734 namespace std
2735 {
2736 template <typename T, size_t N, typename V, size_t VN>
2737 struct numeric_limits<Vc::SimdArray<T, N, V, VN>> : public numeric_limits<T> {
2738 private:
2739     using R = Vc::SimdArray<T, N, V, VN>;
2740 
2741 public:
2742     static Vc_ALWAYS_INLINE Vc_CONST R max() noexcept { return numeric_limits<T>::max(); }
2743     static Vc_ALWAYS_INLINE Vc_CONST R min() noexcept { return numeric_limits<T>::min(); }
2744     static Vc_ALWAYS_INLINE Vc_CONST R lowest() noexcept
2745     {
2746         return numeric_limits<T>::lowest();
2747     }
2748     static Vc_ALWAYS_INLINE Vc_CONST R epsilon() noexcept
2749     {
2750         return numeric_limits<T>::epsilon();
2751     }
2752     static Vc_ALWAYS_INLINE Vc_CONST R round_error() noexcept
2753     {
2754         return numeric_limits<T>::round_error();
2755     }
2756     static Vc_ALWAYS_INLINE Vc_CONST R infinity() noexcept
2757     {
2758         return numeric_limits<T>::infinity();
2759     }
2760     static Vc_ALWAYS_INLINE Vc_CONST R quiet_NaN() noexcept
2761     {
2762         return numeric_limits<T>::quiet_NaN();
2763     }
2764     static Vc_ALWAYS_INLINE Vc_CONST R signaling_NaN() noexcept
2765     {
2766         return numeric_limits<T>::signaling_NaN();
2767     }
2768     static Vc_ALWAYS_INLINE Vc_CONST R denorm_min() noexcept
2769     {
2770         return numeric_limits<T>::denorm_min();
2771     }
2772 };
2773 }  // namespace std
2774 //}}}1
2775 
2776 #endif // VC_COMMON_SIMDARRAY_H_
2777 
2778 // vim: foldmethod=marker