Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-30 10:25:50

0001 /*  This file is part of the Vc library. {{{
0002 Copyright © 2014-2015 Matthias Kretz <kretz@kde.org>
0003 
0004 Redistribution and use in source and binary forms, with or without
0005 modification, are permitted provided that the following conditions are met:
0006     * Redistributions of source code must retain the above copyright
0007       notice, this list of conditions and the following disclaimer.
0008     * Redistributions in binary form must reproduce the above copyright
0009       notice, this list of conditions and the following disclaimer in the
0010       documentation and/or other materials provided with the distribution.
0011     * Neither the names of contributing organizations nor the
0012       names of its contributors may be used to endorse or promote products
0013       derived from this software without specific prior written permission.
0014 
0015 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
0016 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
0017 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0018 DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
0019 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
0020 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
0021 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0022 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0023 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
0024 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0025 
0026 }}}*/
0027 
0028 #ifndef VC_COMMON_SIMDIZE_H_
0029 #define VC_COMMON_SIMDIZE_H_
0030 
0031 #include <tuple>
0032 #include <array>
0033 
0034 #include "../Allocator"
0035 #include "interleavedmemory.h"
0036 
0037 /*!
0038 \addtogroup Simdize
0039 
0040 Automatic type vectorization.
0041 
0042 Struct Vectorization
0043 ======================
0044 
0045 The `Vc::simdize<T>` expression transforms the type \c T to a vectorized type. This requires the type
0046 \c T to be a class template instance or an arithmetic type.
0047 
0048 Example:
0049 First, we declare a class template for a three-dimensional point. The template parameter \c T
0050 determines the type of the members and is \c float in the scalar (classical) case.
0051 \code
0052 template <typename T> struct PointTemplate
0053 {
0054   T x, y, z;
0055 
0056   // Declares tuple_size and makes the members accessible via get<N>(point), allowing
0057   // the simdize implementation to convert between Point and PointV (see below).
0058   Vc_SIMDIZE_INTERFACE((x, y, z));
0059 
0060   PointTemplate(T xx, T yy, T zz) : x{xx}, y{yy}, z{zz} {};
0061 
0062   // The following function will automatically be vectorized in the PointV type.
0063   T distance_to_origin() const {
0064     using std::sqrt;
0065     return sqrt(x * x + y * y + z * z);
0066   }
0067 };
0068 \endcode
0069 
0070 In the following we create a type alias for the scalar type, which simply means instantiating
0071 \c PointTemplate with \c float. The resulting type can then be transformed with \ref simdize.
0072 \code
0073 using Point  = PointTemplate<float>;  // A simple struct with three floats and two functions.
0074 using PointV = Vc::simdize<Point>;    // The vectorization of Point stores three float_v and thus
0075                                       // float_v::size() Points.
0076 \endcode
0077 
0078 The following shows a code example using the above \c Point and \c PointV types.
0079 \code
0080 PointV pv = Point{0.f, 1.f, 2.f};  // Constructs a PointV containing PointV::size()
0081                                    // copies of Point{0, 1, 2}.
0082 for (int i = 1; i < int(pv.size()); ++i) {
0083   assign(pv, i, {i + 0.f, i + 1.f, i + 2.f});
0084 }
0085 
0086 const Vc::float_v l = pv.distance_to_origin();
0087 std::cout << l << '\n';
0088 // prints [2.23607, 3.74166, 5.38516, 7.07107, 8.77496, 10.4881, 12.2066, 13.9284] with
0089 // float_v::size() == 8
0090 
0091 const Point most_distant = extract(pv, (l.max() == l).firstOne());
0092 std::cout << '(' << most_distant.x << ", " << most_distant.y << ", " << most_distant.z << ")\n";
0093 // prints (7, 8, 9) with float_v::size() == 8
0094 \endcode
0095 
0096 Iterator Vectorization
0097 ======================
0098 
0099 `Vc::simdize<Iterator>` can also be used to turn an iterator type into a new iterator type with `Vc::simdize<Iterator::value_type>` as its `value_type`.
0100 Note that `Vc::simdize<double>` turns into `Vc::Vector<double>`, which makes it easy to iterate over a given container of builtin arithmetics using `Vc::Vector`.
0101 \code
0102 void classic(const std::vector<Point> &data) {
0103   using It = std::vector<Point>::const_iterator;
0104   const It end = data.end();
0105   for (It it = data.begin(); it != end; ++it) {
0106     Point x = *it;
0107     do_something(x);
0108   }
0109 }
0110 
0111 void vectorized(const std::vector<float> &data) {
0112   using It = Vc::simdize<std::vector<Point>::const_iterator>;
0113   const It end = data.end();
0114   for (It it = data.begin(); it != end; ++it) {
0115     Vc::simdize<Point> x = *it;  // i.e. PointV
0116     do_something(x);
0117   }
0118 }
0119 \endcode
0120 
0121  */
0122 namespace Vc_VERSIONED_NAMESPACE
0123 {
0124 /**\internal
0125  * \ingroup Simdize
0126  * This namespace contains all the required code for implementing simdize<T>. None of this
0127  * code should be directly accessed by users, though the unit test for simdize<T>
0128  * certainly may look into some of the details if necessary.
0129  */
0130 namespace SimdizeDetail  // {{{
0131 {
0132 /**
0133  * \addtogroup Simdize
0134  * @{
0135  */
0136 using std::is_same;
0137 using std::is_base_of;
0138 using std::false_type;
0139 using std::true_type;
0140 using std::iterator_traits;
0141 using std::conditional;
0142 using std::size_t;
0143 
0144 /**\internal
0145  * Typelist is a simple helper class for supporting multiple parameter packs in one class
0146  * template.
0147  */
0148 template <typename... Ts> struct Typelist;
0149 
0150 /**\internal
0151  * The Category identifies how the type argument to simdize<T> has to be transformed.
0152  */
0153 enum class Category {
0154     ///\internal No transformation
0155     NoTransformation,
0156     ///\internal simple Vector<T> transformation
0157     ArithmeticVectorizable,
0158     ///\internal transform an input iterator to return vectorized entries
0159     InputIterator,
0160     ///\internal transform a forward iterator to return vectorized entries
0161     OutputIterator,
0162     ///\internal transform an output iterator to return vectorized entries
0163     ForwardIterator,
0164     ///\internal transform a bidirectional iterator to return vectorized entries
0165     BidirectionalIterator,
0166     ///\internal transform a random access iterator to return vectorized entries
0167     RandomAccessIterator,
0168     ///\internal transform a class template recursively
0169     ClassTemplate
0170 };
0171 
0172 /**\internal
0173  * iteratorCategories<T>(int()) returns whether iterator_traits<T>::iterator_category is a
0174  * valid type and whether it is derived from RandomAccessIterator or ForwardIterator.
0175  */
0176 template <typename T, typename ItCat = typename T::iterator_category>
0177 constexpr Category iteratorCategories(int, ItCat * = nullptr)
0178 {
0179     return is_base_of<std::random_access_iterator_tag, ItCat>::value
0180                ? Category::RandomAccessIterator
0181                : is_base_of<std::bidirectional_iterator_tag, ItCat>::value
0182                      ? Category::BidirectionalIterator
0183                      : is_base_of<std::forward_iterator_tag, ItCat>::value
0184                            ? Category::ForwardIterator
0185                            : is_base_of<std::output_iterator_tag, ItCat>::value
0186                                  ? Category::OutputIterator
0187                                  : is_base_of<std::input_iterator_tag, ItCat>::value
0188                                        ? Category::InputIterator
0189                                        : Category::NoTransformation;
0190 }
0191 /**\internal
0192  * This overload is selected for pointer types => RandomAccessIterator.
0193  */
0194 template <typename T>
0195 constexpr enable_if<std::is_pointer<T>::value, Category> iteratorCategories(float)
0196 {
0197     return Category::RandomAccessIterator;
0198 }
0199 /**\internal
0200  * This overload is selected if T does not work with iterator_traits.
0201  */
0202 template <typename T> constexpr Category iteratorCategories(...)
0203 {
0204     return Category::NoTransformation;
0205 }
0206 
0207 /**\internal
0208  * Simple trait to identify whether a type T is a class template or not.
0209  */
0210 template <typename T> struct is_class_template : public false_type
0211 {
0212 };
0213 template <template <typename...> class C, typename... Ts>
0214 struct is_class_template<C<Ts...>> : public true_type
0215 {
0216 };
0217 
0218 /**\internal
0219  * Returns the Category for the given type \p T.
0220  */
0221 template <typename T> constexpr Category typeCategory()
0222 {
0223     return (is_same<T, bool>::value || is_same<T, short>::value ||
0224             is_same<T, unsigned short>::value || is_same<T, int>::value ||
0225             is_same<T, unsigned int>::value || is_same<T, float>::value ||
0226             is_same<T, double>::value)
0227                ? Category::ArithmeticVectorizable
0228                : iteratorCategories<T>(int()) != Category::NoTransformation
0229                      ? iteratorCategories<T>(int())
0230                      : is_class_template<T>::value ? Category::ClassTemplate
0231                                                    : Category::NoTransformation;
0232 }
0233 
0234 /**\internal
0235  * Trait determining the number of data members that get<N>(x) can access.
0236  * The type \p T either has to provide a std::tuple_size specialization or contain a
0237  * constexpr tuple_size member.
0238  */
0239 template <typename T, size_t TupleSize = std::tuple_size<T>::value>
0240 constexpr size_t determine_tuple_size()
0241 {
0242     return TupleSize;
0243 }
0244 template <typename T, size_t TupleSize = T::tuple_size>
0245 constexpr size_t determine_tuple_size(size_t = T::tuple_size)
0246 {
0247     return TupleSize;
0248 }
0249 
0250 // workaround for MSVC limitation: constexpr functions in template arguments
0251 // confuse the compiler
0252 template <typename T> struct determine_tuple_size_
0253 : public std::integral_constant<size_t, determine_tuple_size<T>()>
0254 {};
0255 
0256 namespace
0257 {
0258 template <typename T> struct The_simdization_for_the_requested_type_is_not_implemented;
0259 }  // unnamed namespace
0260 
0261 /**\internal
0262  * The type behind the simdize expression whose member type \c type determines the
0263  * transformed type.
0264  *
0265  * \tparam T The type to be transformed.
0266  * \tparam N The width the resulting vectorized type should have. A value of 0 lets the
0267  *           implementation choose the width.
0268  * \tparam MT The base type to use for mask types. If set to \c void the implementation
0269  *            chooses the type itself.
0270  * \tparam Category The type category of \p T. This determines the implementation strategy
0271  *                  (via template specialization).
0272  */
0273 template <typename T, size_t N, typename MT, Category = typeCategory<T>()>
0274 struct ReplaceTypes : public The_simdization_for_the_requested_type_is_not_implemented<T>
0275 {
0276 };
0277 
0278 /**\internal
0279  * Specialization of ReplaceTypes that is used for types that should not be transformed by
0280  * simdize.
0281  */
0282 template <typename T, size_t N, typename MT> struct ReplaceTypes<T, N, MT, Category::NoTransformation>
0283 {
0284     typedef T type;
0285 };
0286 
0287 /**\internal
0288  * The ReplaceTypes class template is nicer to use as an alias template. This is exported
0289  * to the outer Vc namespace.
0290  */
0291 template <typename T, size_t N = 0, typename MT = void>
0292 using simdize = typename SimdizeDetail::ReplaceTypes<T, N, MT>::type;
0293 
0294 // Alias for Vector<T, Abi> with size() == N, or SimdArray<T, N> otherwise.
0295 template <class T, size_t N,
0296           class Best = typename Common::select_best_vector_type<T, N>::type>
0297 using deduce_vector_t =
0298     typename std::conditional<Best::size() == N, Best, SimdArray<T, N>>::type;
0299 
0300 /**\internal
0301  * ReplaceTypes specialization for simdizable arithmetic types. This results in either
0302  * Vector<T> or SimdArray<T, N>.
0303  */
0304 template <typename T, size_t N, typename MT>
0305 struct ReplaceTypes<T, N, MT, Category::ArithmeticVectorizable>
0306     : public conditional<N == 0, Vector<T>, deduce_vector_t<T, N>> {
0307 };
0308 
0309 /**\internal
0310  * ReplaceTypes specialization for bool. This results either in Mask<MT> or
0311  * SimdMaskArray<MT, N>.
0312  */
0313 template <size_t N, typename MT>
0314 struct ReplaceTypes<bool, N, MT, Category::ArithmeticVectorizable>
0315     : public std::enable_if<true, typename ReplaceTypes<MT, N, MT>::type::mask_type> {
0316 };
0317 /**\internal
0318  * ReplaceTypes specialization for bool and MT = void. In that case MT is set to float.
0319  */
0320 template <size_t N>
0321 struct ReplaceTypes<bool, N, void, Category::ArithmeticVectorizable>
0322     : public ReplaceTypes<bool, N, float, Category::ArithmeticVectorizable>
0323 {
0324 };
0325 
0326 /**\internal
0327  * This type substitutes the first type (\p T) in \p Remaining via simdize<T, N, MT> and
0328  * appends it to the Typelist in \p Replaced. If \p N = 0, the first simdize expression
0329  * that yields a vectorized type determines \p N for the subsequent SubstituteOneByOne
0330  * instances.
0331  */
0332 template <size_t N, typename MT, typename Replaced, typename... Remaining>
0333 struct SubstituteOneByOne;
0334 
0335 /**\internal
0336  * Template specialization for the case that there is at least one type in \p Remaining.
0337  * The member type \p type recurses via SubstituteOneByOne.
0338  */
0339 template <size_t N, typename MT, typename... Replaced, typename T,
0340           typename... Remaining>
0341 struct SubstituteOneByOne<N, MT, Typelist<Replaced...>, T, Remaining...>
0342 {
0343 private:
0344     /**\internal
0345      * If \p U::size() yields a constant expression convertible to size_t then value will
0346      * be equal to U::size(), 0 otherwise.
0347      */
0348     template <typename U, size_t M = U::Size>
0349     static std::integral_constant<size_t, M> size_or_0(int);
0350     template <typename U> static std::integral_constant<size_t, 0> size_or_0(...);
0351 
0352     ///\internal The vectorized type for \p T.
0353     using V = simdize<T, N, MT>;
0354 
0355     /**\internal
0356      * Determine the new \p N to use for the SubstituteOneByOne expression below. If N is
0357      * non-zero that value is used. Otherwise size_or_0<V> determines the new value.
0358      */
0359     static constexpr auto NewN = N != 0 ? N : decltype(size_or_0<V>(int()))::value;
0360 
0361     /**\internal
0362      * Determine the new \p MT type to use for the SubstituteOneByOne expression below.
0363      * This is normally the old \p MT type. However, if N != NewN and MT = void, NewMT is
0364      * set to either \c float or \p T, depending on whether \p T is \c bool or not.
0365      */
0366     typedef conditional_t<(N != NewN && is_same<MT, void>::value),
0367                           conditional_t<is_same<T, bool>::value, float, T>, MT> NewMT;
0368 
0369 public:
0370     /**\internal
0371      * An alias to the type member of the completed recursion over SubstituteOneByOne.
0372      */
0373     using type = typename SubstituteOneByOne<NewN, NewMT, Typelist<Replaced..., V>,
0374                                              Remaining...>::type;
0375 };
0376 
0377 ///\internal Generates the SubstitutedWithValues member. This needs specialization for the
0378 /// number of types in the template argument list.
0379 template <size_t Size, typename... Replaced> struct SubstitutedBase;
0380 ///\internal Specialization for one type parameter.
0381 template <typename Replaced> struct SubstitutedBase<1, Replaced> {
0382     template <typename ValueT, template <typename, ValueT...> class C, ValueT... Values>
0383     using SubstitutedWithValues = C<Replaced, Values...>;
0384 };
0385 ///\internal Specialization for two type parameters.
0386 template <typename R0, typename R1> struct SubstitutedBase<2, R0, R1>
0387 {
0388     template <typename ValueT, template <typename, typename, ValueT...> class C,
0389               ValueT... Values>
0390     using SubstitutedWithValues = C<R0, R1, Values...>;
0391 };
0392 ///\internal Specialization for three type parameters.
0393 template <typename R0, typename R1, typename R2> struct SubstitutedBase<3, R0, R1, R2>
0394 {
0395     template <typename ValueT, template <typename, typename, typename, ValueT...> class C,
0396               ValueT... Values>
0397     using SubstitutedWithValues = C<R0, R1, R2, Values...>;
0398 };
0399 #if defined Vc_ICC || defined Vc_MSVC
0400 #define Vc_VALUE_PACK_EXPANSION_IS_BROKEN 1
0401 #endif
0402 ///\internal Specialization for four type parameters.
0403 template <typename... Replaced> struct SubstitutedBase<4, Replaced...> {
0404 #ifndef Vc_VALUE_PACK_EXPANSION_IS_BROKEN
0405     template <typename ValueT,
0406               template <typename, typename, typename, typename, ValueT...> class C,
0407               ValueT... Values>
0408     using SubstitutedWithValues = C<Replaced..., Values...>;
0409 #endif // Vc_VALUE_PACK_EXPANSION_IS_BROKEN
0410 };
0411 ///\internal Specialization for five type parameters.
0412 template <typename... Replaced> struct SubstitutedBase<5, Replaced...> {
0413 #ifndef Vc_VALUE_PACK_EXPANSION_IS_BROKEN
0414     template <typename ValueT, template <typename, typename, typename, typename, typename,
0415                                          ValueT...> class C,
0416               ValueT... Values>
0417     using SubstitutedWithValues = C<Replaced..., Values...>;
0418 #endif // Vc_VALUE_PACK_EXPANSION_IS_BROKEN
0419 };
0420 ///\internal Specialization for six type parameters.
0421 template <typename... Replaced> struct SubstitutedBase<6, Replaced...> {
0422 #ifndef Vc_VALUE_PACK_EXPANSION_IS_BROKEN
0423     template <typename ValueT, template <typename, typename, typename, typename, typename,
0424                                          typename, ValueT...> class C,
0425               ValueT... Values>
0426     using SubstitutedWithValues = C<Replaced..., Values...>;
0427 #endif // Vc_VALUE_PACK_EXPANSION_IS_BROKEN
0428 };
0429 ///\internal Specialization for seven type parameters.
0430 template <typename... Replaced> struct SubstitutedBase<7, Replaced...> {
0431 #ifndef Vc_VALUE_PACK_EXPANSION_IS_BROKEN
0432     template <typename ValueT, template <typename, typename, typename, typename, typename,
0433                                          typename, typename, ValueT...> class C,
0434               ValueT... Values>
0435     using SubstitutedWithValues = C<Replaced..., Values...>;
0436 #endif // Vc_VALUE_PACK_EXPANSION_IS_BROKEN
0437 };
0438 ///\internal Specialization for eight type parameters.
0439 template <typename... Replaced> struct SubstitutedBase<8, Replaced...> {
0440 #ifndef Vc_VALUE_PACK_EXPANSION_IS_BROKEN
0441     template <typename ValueT, template <typename, typename, typename, typename, typename,
0442                                          typename, typename, typename, ValueT...> class C,
0443               ValueT... Values>
0444     using SubstitutedWithValues = C<Replaced..., Values...>;
0445 #endif // Vc_VALUE_PACK_EXPANSION_IS_BROKEN
0446 };
0447 
0448 /**\internal
0449  * Template specialization that ends the recursion and determines the return type \p type.
0450  * The end of the recursion is identified by an empty typelist (i.e. no template
0451  * parameters) after the Typelist parameter.
0452  */
0453 template <size_t N_, typename MT, typename Replaced0, typename... Replaced>
0454 struct SubstituteOneByOne<N_, MT, Typelist<Replaced0, Replaced...>>
0455 {
0456     /**\internal
0457      * Return type for returning the vector width and list of substituted types
0458      */
0459     struct type
0460         : public SubstitutedBase<sizeof...(Replaced) + 1, Replaced0, Replaced...> {
0461         static constexpr auto N = N_;
0462         /**\internal
0463          * Alias template to construct a class template instantiation with the replaced
0464          * types.
0465          */
0466         template <template <typename...> class C>
0467         using Substituted = C<Replaced0, Replaced...>;
0468     };
0469 };
0470 
0471 /**\internal
0472  * Vectorized class templates are not substituted directly by ReplaceTypes/simdize.
0473  * Instead the replaced type is used as a base class for an adapter type which enables
0474  * the addition of extra operations. Specifically the following features are added:
0475  * \li a constexpr \p size() function, which returns the width of the vectorization. Note
0476  *     that this may hide a \p size() member in the original class template (e.g. for STL
0477  *     container classes).
0478  * \li The member type \p base_type is an alias for the vectorized (i.e. substituted)
0479  *     class template
0480  * \li The member type \p scalar_type is an alias for the class template argument
0481  *     originally passed to the \ref simdize expression.
0482  *
0483  * \tparam Scalar
0484  * \tparam Base
0485  * \tparam N
0486  */
0487 template <typename Scalar, typename Base, size_t N> class Adapter;
0488 
0489 /**\internal
0490  * Specialization of ReplaceTypes for class templates (\p C) where each template argument
0491  * needs to be substituted via SubstituteOneByOne.
0492  */
0493 template <template <typename...> class C, typename... Ts, size_t N, typename MT>
0494 struct ReplaceTypes<C<Ts...>, N, MT, Category::ClassTemplate>
0495 {
0496     ///\internal The \p type member of the SubstituteOneByOne instantiation
0497     using SubstitutionResult =
0498         typename SubstituteOneByOne<N, MT, Typelist<>, Ts...>::type;
0499     /**\internal
0500      * This expression instantiates the class template \p C with the substituted template
0501      * arguments in the \p Ts parameter pack. The alias \p Vectorized thus is the
0502      * vectorized equivalent to \p C<Ts...>.
0503      */
0504     using Vectorized = typename SubstitutionResult::template Substituted<C>;
0505     /**\internal
0506      * The result type of this ReplaceTypes instantiation is set to \p C<Ts...> if no
0507      * template parameter substitution was done in SubstituteOneByOne. Otherwise, the type
0508      * aliases an Adapter instantiation.
0509      */
0510     using type = conditional_t<is_same<C<Ts...>, Vectorized>::value, C<Ts...>,
0511                                Adapter<C<Ts...>, Vectorized, SubstitutionResult::N>>;
0512 };
0513 
0514 /**\internal
0515  * Specialization of the ReplaceTypes class template allowing transformation of class
0516  * templates with non-type parameters. This is impossible to express with variadic
0517  * templates and therefore requires a lot of code duplication.
0518  */
0519 #ifdef Vc_VALUE_PACK_EXPANSION_IS_BROKEN
0520 // ICC barfs on packs of values
0521 #define Vc_DEFINE_NONTYPE_REPLACETYPES_(ValueType_)                                      \
0522     template <template <typename, ValueType_...> class C, typename T, ValueType_ Value0, \
0523               ValueType_... Values>                                                      \
0524     struct is_class_template<C<T, Value0, Values...>> : public true_type {               \
0525     };                                                                                   \
0526     template <template <typename, typename, ValueType_...> class C, typename T0,         \
0527               typename T1, ValueType_ Value0, ValueType_... Values>                      \
0528     struct is_class_template<C<T0, T1, Value0, Values...>> : public true_type {          \
0529     };                                                                                   \
0530     template <template <typename, typename, typename, ValueType_...> class C,            \
0531               typename T0, typename T1, typename T2, ValueType_ Value0,                  \
0532               ValueType_... Values>                                                      \
0533     struct is_class_template<C<T0, T1, T2, Value0, Values...>> : public true_type {      \
0534     };                                                                                   \
0535     template <template <typename, typename, typename, typename, ValueType_...> class C,  \
0536               typename T0, typename T1, typename T2, typename T3, ValueType_ Value0,     \
0537               ValueType_... Values>                                                      \
0538     struct is_class_template<C<T0, T1, T2, T3, Value0, Values...>> : public true_type {  \
0539     };                                                                                   \
0540     template <template <typename, typename, typename, typename, typename, ValueType_...> \
0541               class C,                                                                   \
0542               typename T0, typename T1, typename T2, typename T3, typename T4,           \
0543               ValueType_ Value0, ValueType_... Values>                                   \
0544     struct is_class_template<C<T0, T1, T2, T3, T4, Value0, Values...>>                   \
0545         : public true_type {                                                             \
0546     };                                                                                   \
0547     template <template <typename, typename, typename, typename, typename, typename,      \
0548                         ValueType_...> class C,                                          \
0549               typename T0, typename T1, typename T2, typename T3, typename T4,           \
0550               typename T5, ValueType_ Value0, ValueType_... Values>                      \
0551     struct is_class_template<C<T0, T1, T2, T3, T4, T5, Value0, Values...>>               \
0552         : public true_type {                                                             \
0553     };                                                                                   \
0554     template <template <typename, typename, typename, typename, typename, typename,      \
0555                         typename, ValueType_...> class C,                                \
0556               typename T0, typename T1, typename T2, typename T3, typename T4,           \
0557               typename T5, typename T6, ValueType_ Value0, ValueType_... Values>         \
0558     struct is_class_template<C<T0, T1, T2, T3, T4, T5, T6, Value0, Values...>>           \
0559         : public true_type {                                                             \
0560     };                                                                                   \
0561     template <template <typename, ValueType_> class C, typename T0, ValueType_ Value0,   \
0562               size_t N, typename MT>                                                     \
0563     struct ReplaceTypes<C<T0, Value0>, N, MT, Category::ClassTemplate> {                 \
0564         typedef typename SubstituteOneByOne<N, MT, Typelist<>, T0>::type tmp;            \
0565         typedef typename tmp::template SubstitutedWithValues<ValueType_, C, Value0>      \
0566             Substituted;                                                                 \
0567         static constexpr auto NN = tmp::N;                                               \
0568         typedef conditional_t<is_same<C<T0, Value0>, Substituted>::value, C<T0, Value0>, \
0569                               Adapter<C<T0, Value0>, Substituted, NN>> type;             \
0570     };                                                                                   \
0571     template <template <typename, typename, ValueType_> class C, typename T0,            \
0572               typename T1, ValueType_ Value0, size_t N, typename MT>                     \
0573     struct ReplaceTypes<C<T0, T1, Value0>, N, MT, Category::ClassTemplate> {             \
0574         typedef typename SubstituteOneByOne<N, MT, Typelist<>, T0, T1>::type tmp;        \
0575         typedef typename tmp::template SubstitutedWithValues<ValueType_, C, Value0>      \
0576             Substituted;                                                                 \
0577         static constexpr auto NN = tmp::N;                                               \
0578         typedef conditional_t<is_same<C<T0, T1, Value0>, Substituted>::value,            \
0579                               C<T0, T1, Value0>,                                         \
0580                               Adapter<C<T0, T1, Value0>, Substituted, NN>> type;         \
0581     };                                                                                   \
0582     template <template <typename, typename, typename, ValueType_> class C, typename T0,  \
0583               typename T1, typename T2, ValueType_ Value0, size_t N, typename MT>        \
0584     struct ReplaceTypes<C<T0, T1, T2, Value0>, N, MT, Category::ClassTemplate> {         \
0585         typedef typename SubstituteOneByOne<N, MT, Typelist<>, T0, T1, T2>::type tmp;    \
0586         typedef typename tmp::template SubstitutedWithValues<ValueType_, C, Value0>      \
0587             Substituted;                                                                 \
0588         static constexpr auto NN = tmp::N;                                               \
0589         typedef conditional_t<is_same<C<T0, T1, T2, Value0>, Substituted>::value,        \
0590                               C<T0, T1, T2, Value0>,                                     \
0591                               Adapter<C<T0, T1, T2, Value0>, Substituted, NN>> type;     \
0592     }
0593 #else
0594 #define Vc_DEFINE_NONTYPE_REPLACETYPES_(ValueType_)                                      \
0595     template <template <typename, ValueType_...> class C, typename T, ValueType_ Value0, \
0596               ValueType_... Values>                                                      \
0597     struct is_class_template<C<T, Value0, Values...>> : public true_type {               \
0598     };                                                                                   \
0599     template <template <typename, typename, ValueType_...> class C, typename T0,         \
0600               typename T1, ValueType_ Value0, ValueType_... Values>                      \
0601     struct is_class_template<C<T0, T1, Value0, Values...>> : public true_type {          \
0602     };                                                                                   \
0603     template <template <typename, typename, typename, ValueType_...> class C,            \
0604               typename T0, typename T1, typename T2, ValueType_ Value0,                  \
0605               ValueType_... Values>                                                      \
0606     struct is_class_template<C<T0, T1, T2, Value0, Values...>> : public true_type {      \
0607     };                                                                                   \
0608     template <template <typename, typename, typename, typename, ValueType_...> class C,  \
0609               typename T0, typename T1, typename T2, typename T3, ValueType_ Value0,     \
0610               ValueType_... Values>                                                      \
0611     struct is_class_template<C<T0, T1, T2, T3, Value0, Values...>> : public true_type {  \
0612     };                                                                                   \
0613     template <template <typename, typename, typename, typename, typename, ValueType_...> \
0614               class C,                                                                   \
0615               typename T0, typename T1, typename T2, typename T3, typename T4,           \
0616               ValueType_ Value0, ValueType_... Values>                                   \
0617     struct is_class_template<C<T0, T1, T2, T3, T4, Value0, Values...>>                   \
0618         : public true_type {                                                             \
0619     };                                                                                   \
0620     template <template <typename, typename, typename, typename, typename, typename,      \
0621                         ValueType_...> class C,                                          \
0622               typename T0, typename T1, typename T2, typename T3, typename T4,           \
0623               typename T5, ValueType_ Value0, ValueType_... Values>                      \
0624     struct is_class_template<C<T0, T1, T2, T3, T4, T5, Value0, Values...>>               \
0625         : public true_type {                                                             \
0626     };                                                                                   \
0627     template <template <typename, typename, typename, typename, typename, typename,      \
0628                         typename, ValueType_...> class C,                                \
0629               typename T0, typename T1, typename T2, typename T3, typename T4,           \
0630               typename T5, typename T6, ValueType_ Value0, ValueType_... Values>         \
0631     struct is_class_template<C<T0, T1, T2, T3, T4, T5, T6, Value0, Values...>>           \
0632         : public true_type {                                                             \
0633     };                                                                                   \
0634     template <template <typename, ValueType_...> class C, typename T0,                   \
0635               ValueType_ Value0, ValueType_... Values, size_t N, typename MT>            \
0636     struct ReplaceTypes<C<T0, Value0, Values...>, N, MT, Category::ClassTemplate> {      \
0637         typedef typename SubstituteOneByOne<N, MT, Typelist<>, T0>::type tmp;            \
0638         typedef typename tmp::template SubstitutedWithValues<ValueType_, C, Value0,      \
0639                                                              Values...> Substituted;     \
0640         static constexpr auto NN = tmp::N;                                               \
0641         typedef conditional_t<is_same<C<T0, Value0, Values...>, Substituted>::value,     \
0642                               C<T0, Value0, Values...>,                                  \
0643                               Adapter<C<T0, Value0, Values...>, Substituted, NN>> type;  \
0644     };                                                                                   \
0645     template <template <typename, typename, ValueType_...> class C, typename T0,         \
0646               typename T1, ValueType_ Value0, ValueType_... Values, size_t N,            \
0647               typename MT>                                                               \
0648     struct ReplaceTypes<C<T0, T1, Value0, Values...>, N, MT, Category::ClassTemplate> {  \
0649         typedef typename SubstituteOneByOne<N, MT, Typelist<>, T0, T1>::type tmp;        \
0650         typedef typename tmp::template SubstitutedWithValues<ValueType_, C, Value0,      \
0651                                                              Values...> Substituted;     \
0652         static constexpr auto NN = tmp::N;                                               \
0653         typedef conditional_t<is_same<C<T0, T1, Value0, Values...>, Substituted>::value, \
0654                               C<T0, T1, Value0, Values...>,                              \
0655                               Adapter<C<T0, T1, Value0, Values...>, Substituted, NN>>    \
0656             type;                                                                        \
0657     };                                                                                   \
0658     template <template <typename, typename, typename, ValueType_...> class C,            \
0659               typename T0, typename T1, typename T2, ValueType_ Value0,                  \
0660               ValueType_... Values, size_t N, typename MT>                               \
0661     struct ReplaceTypes<C<T0, T1, T2, Value0, Values...>, N, MT,                         \
0662                         Category::ClassTemplate> {                                       \
0663         typedef typename SubstituteOneByOne<N, MT, Typelist<>, T0, T1, T2>::type tmp;    \
0664         typedef typename tmp::template SubstitutedWithValues<ValueType_, C, Value0,      \
0665                                                              Values...> Substituted;     \
0666         static constexpr auto NN = tmp::N;                                               \
0667         typedef conditional_t<                                                           \
0668             is_same<C<T0, T1, T2, Value0, Values...>, Substituted>::value,               \
0669             C<T0, T1, T2, Value0, Values...>,                                            \
0670             Adapter<C<T0, T1, T2, Value0, Values...>, Substituted, NN>> type;            \
0671     }
0672 #endif  // Vc_VALUE_PACK_EXPANSION_IS_BROKEN
0673 Vc_DEFINE_NONTYPE_REPLACETYPES_(bool);
0674 Vc_DEFINE_NONTYPE_REPLACETYPES_(wchar_t);
0675 Vc_DEFINE_NONTYPE_REPLACETYPES_(char);
0676 Vc_DEFINE_NONTYPE_REPLACETYPES_(  signed char);
0677 Vc_DEFINE_NONTYPE_REPLACETYPES_(unsigned char);
0678 Vc_DEFINE_NONTYPE_REPLACETYPES_(  signed short);
0679 Vc_DEFINE_NONTYPE_REPLACETYPES_(unsigned short);
0680 Vc_DEFINE_NONTYPE_REPLACETYPES_(  signed int);
0681 Vc_DEFINE_NONTYPE_REPLACETYPES_(unsigned int);
0682 Vc_DEFINE_NONTYPE_REPLACETYPES_(  signed long);
0683 Vc_DEFINE_NONTYPE_REPLACETYPES_(unsigned long);
0684 Vc_DEFINE_NONTYPE_REPLACETYPES_(  signed long long);
0685 Vc_DEFINE_NONTYPE_REPLACETYPES_(unsigned long long);
0686 #undef Vc_DEFINE_NONTYPE_REPLACETYPES_
0687 
0688 // preferred_construction {{{
0689 namespace preferred_construction_impl
0690 {
0691 template <typename T> T create();
0692 // 0: paren init
0693 template <class Type, class... Init, class = decltype(Type(create<Init>()...))>
0694 constexpr std::integral_constant<int, 0> test(int);
0695 // 1: 1-brace init
0696 template <class Type, class... Init, class = decltype(Type{create<Init>()...})>
0697 constexpr std::integral_constant<int, 1> test(float);
0698 // 2: 2-brace init
0699 template <class Type, class... Init, class T, class = decltype(Type{{create<Init>()...}})>
0700 constexpr std::integral_constant<int, 2> test(T);
0701 // 3: no init at all
0702 template <class Type, class... Init> constexpr std::integral_constant<int, 3> test(...);
0703 }  // namespace preferred_construction_impl
0704 
0705 template <class Type, class... Init>
0706 constexpr inline decltype(preferred_construction_impl::test<Type, Init...>(0))
0707 preferred_construction()
0708 {
0709     return {};
0710 }
0711 
0712 // }}}
0713 // get_dispatcher {{{
0714 /**\internal
0715  * Uses either the `vc_get_<I>` member function of \p x or `std::get<I>(x)` to retrieve
0716  * the `I`-th member of \p x.
0717  */
0718 template <size_t I, typename T,
0719           typename R = decltype(std::declval<T &>().template vc_get_<I>())>
0720 R get_dispatcher(T &x, void * = nullptr)
0721 {
0722     return x.template vc_get_<I>();
0723 }
0724 template <size_t I, typename T,
0725           typename R = decltype(std::declval<const T &>().template vc_get_<I>())>
0726 R get_dispatcher(const T &x, void * = nullptr)
0727 {
0728     return x.template vc_get_<I>();
0729 }
0730 template <size_t I, typename T, typename R = decltype(std::get<I>(std::declval<T &>()))>
0731 R get_dispatcher(T &x, int = 0)
0732 {
0733     return std::get<I>(x);
0734 }
0735 template <size_t I, typename T,
0736           typename R = decltype(std::get<I>(std::declval<const T &>()))>
0737 R get_dispatcher(const T &x, int = 0)
0738 {
0739     return std::get<I>(x);
0740 }
0741 
0742 // }}}
0743 // my_tuple_element {{{
0744 template <size_t I, class T, class = void>
0745 struct my_tuple_element : std::tuple_element<I, T> {
0746 };
0747 
0748 template <size_t I, class T>
0749 struct my_tuple_element<
0750     I, T, typename std::conditional<
0751               true, void, decltype(std::declval<T>().template vc_get_<I>())>::type> {
0752     using type =
0753         typename std::decay<decltype(std::declval<T>().template vc_get_<I>())>::type;
0754 };
0755 
0756 // }}}
0757 // homogeneous_sizeof {{{
0758 /**\internal
0759  * This trait determines the `sizeof` of all fundamental types (i.e. recursively, when
0760  * needed) in the template parameter pack \p Ts. If all fundamental types have equal
0761  * `sizeof`, the value is "returned" in the `value` member. Otherwise `value` is 0.
0762  */
0763 template <class... Ts> struct homogeneous_sizeof;
0764 template <class T, class = void> struct homogeneous_sizeof_one;
0765 template <class T>
0766 struct homogeneous_sizeof_one<T,
0767                               typename std::enable_if<std::is_arithmetic<T>::value>::type>
0768     : std::integral_constant<size_t, sizeof(T)> {
0769 };
0770 template <class T0> struct homogeneous_sizeof<T0> : homogeneous_sizeof_one<T0> {
0771 };
0772 
0773 template <class T0, class... Ts>
0774 struct homogeneous_sizeof<T0, Ts...>
0775     : std::integral_constant<size_t, homogeneous_sizeof<T0>::value ==
0776                                              homogeneous_sizeof<Ts...>::value
0777                                          ? homogeneous_sizeof<T0>::value
0778                                          : 0> {
0779 };
0780 
0781 template <class T, size_t... Is>
0782 std::integral_constant<
0783     size_t, homogeneous_sizeof<typename my_tuple_element<Is, T>::type...>::value>
0784     homogeneous_sizeof_helper(index_sequence<Is...>);
0785 
0786 template <class T>
0787 struct homogeneous_sizeof_one<T, typename std::enable_if<std::is_class<T>::value>::type>
0788     : decltype(homogeneous_sizeof_helper<T>(
0789           make_index_sequence<determine_tuple_size_<T>::value>())) {
0790 };
0791 
0792 // }}}
0793 // class Adapter {{{
0794 template <typename Scalar, typename Base, size_t N> class Adapter : public Base
0795 {
0796 private:
0797     /// helper for the broadcast ctor below, error case
0798     template <std::size_t... Indexes, int X>
0799     Adapter(Vc::index_sequence<Indexes...>, const Scalar,
0800             std::integral_constant<int, X>)
0801     {
0802         static_assert(
0803             X < 3, "Failed to construct an object of type Base. Neither via "
0804                    "parenthesis-init, brace-init, nor double-brace init appear to work.");
0805     }
0806 
0807     /// helper for the broadcast ctor below using double braces for Base initialization
0808     template <std::size_t... Indexes>
0809     Adapter(Vc::index_sequence<Indexes...>, const Scalar &x_,
0810             std::integral_constant<int, 2>)
0811         : Base{{get_dispatcher<Indexes>(x_)...}}
0812     {
0813     }
0814 
0815     /// helper for the broadcast ctor below using single braces for Base initialization
0816     template <std::size_t... Indexes>
0817     Adapter(Vc::index_sequence<Indexes...>, const Scalar &x_,
0818             std::integral_constant<int, 1>)
0819         : Base{get_dispatcher<Indexes>(x_)...}
0820     {
0821     }
0822 
0823     /// helper for the broadcast ctor below using parenthesis for Base initialization
0824     template <std::size_t... Indexes>
0825     Adapter(Vc::index_sequence<Indexes...>, const Scalar &x_,
0826             std::integral_constant<int, 0>)
0827         : Base(get_dispatcher<Indexes>(x_)...)
0828     {
0829     }
0830 
0831     template <std::size_t... Indexes>
0832     Adapter(Vc::index_sequence<Indexes...> seq_, const Scalar &x_)
0833         : Adapter(seq_, x_,
0834                   preferred_construction<Base, decltype(get_dispatcher<Indexes>(
0835                                                    std::declval<const Scalar &>()))...>())
0836     {
0837     }
0838 
0839 public:
0840     /// The SIMD vector width of the members.
0841     static constexpr size_t size() { return N; }
0842     static constexpr size_t Size = N;
0843 
0844     /// The vectorized base class template instantiation this Adapter class derives from.
0845     using base_type = Base;
0846     /// The original non-vectorized class template instantiation that was passed to the
0847     /// simdize expression.
0848     using scalar_type = Scalar;
0849 
0850     /// Allow default construction. This is automatically ill-formed if Base() is
0851     /// ill-formed.
0852     Adapter() = default;
0853 
0854     /// Defaulted copy and move construction and assignment
0855 #if defined Vc_CLANG && Vc_CLANG < 0x30700
0856     Vc_INTRINSIC Adapter(const Adapter &x) : Base(x) {}
0857 #else
0858     Adapter(const Adapter &) = default;
0859 #endif
0860     /// Defaulted copy and move construction and assignment
0861     Adapter(Adapter &&) = default;
0862     /// Defaulted copy and move construction and assignment
0863     Adapter &operator=(const Adapter &) = default;
0864     /// Defaulted copy and move construction and assignment
0865     Adapter &operator=(Adapter &&) = default;
0866 
0867     /// Broadcast constructor
0868     template <typename U, size_t TupleSize = determine_tuple_size_<Scalar>::value,
0869               typename Seq = Vc::make_index_sequence<TupleSize>,
0870               typename = enable_if<std::is_convertible<U, Scalar>::value>>
0871     Adapter(U &&x_)
0872         : Adapter(Seq(), static_cast<const Scalar &>(x_))
0873     {
0874     }
0875 
0876     /// Generator constructor {{{
0877     template <class F,
0878               class = decltype(static_cast<Scalar>(std::declval<F>()(
0879                   size_t())))>  // F returns objects that are convertible to S
0880     Adapter(F &&fun);           // implementation below
0881 
0882     // }}}
0883     /// perfect forward all Base constructors
0884     template <typename A0, typename... Args,
0885               typename = typename std::enable_if<
0886                   !Traits::is_index_sequence<A0>::value &&
0887                   (sizeof...(Args) > 0 || !std::is_convertible<A0, Scalar>::value)>::type>
0888     Adapter(A0 &&arg0_, Args &&... arguments_)
0889         : Base(std::forward<A0>(arg0_), std::forward<Args>(arguments_)...)
0890     {
0891     }
0892 
0893     /// perfect forward Base constructors that accept an initializer_list
0894     template <typename T,
0895               typename = decltype(Base(std::declval<const std::initializer_list<T> &>()))>
0896     Adapter(const std::initializer_list<T> &l_)
0897         : Base(l_)
0898     {
0899     }
0900 
0901     /// Overload the new operator to adhere to the alignment requirements which C++11
0902     /// ignores by default.
0903     void *operator new(size_t size)
0904     {
0905         return Vc::Common::aligned_malloc<alignof(Adapter)>(size);
0906     }
0907     void *operator new(size_t, void *p_) { return p_; }
0908     void *operator new[](size_t size)
0909     {
0910         return Vc::Common::aligned_malloc<alignof(Adapter)>(size);
0911     }
0912     void *operator new[](size_t , void *p_) { return p_; }
0913     void operator delete(void *ptr_, size_t) { Vc::Common::free(ptr_); }
0914     void operator delete(void *, void *) {}
0915     void operator delete[](void *ptr_, size_t) { Vc::Common::free(ptr_); }
0916     void operator delete[](void *, void *) {}
0917 };  // }}}
0918 // delete compare operators for Adapter {{{
0919 /**\internal
0920  * Delete compare operators for simdize<tuple<...>> types because the tuple compares
0921  * require the compares to be bool based.
0922  */
0923 template <class... TTypes, class... TTypesV, class... UTypes, class... UTypesV, size_t N>
0924 inline void operator==(
0925     const Adapter<std::tuple<TTypes...>, std::tuple<TTypesV...>, N> &t,
0926     const Adapter<std::tuple<UTypes...>, std::tuple<UTypesV...>, N> &u) = delete;
0927 template <class... TTypes, class... TTypesV, class... UTypes, class... UTypesV, size_t N>
0928 inline void operator!=(
0929     const Adapter<std::tuple<TTypes...>, std::tuple<TTypesV...>, N> &t,
0930     const Adapter<std::tuple<UTypes...>, std::tuple<UTypesV...>, N> &u) = delete;
0931 template <class... TTypes, class... TTypesV, class... UTypes, class... UTypesV, size_t N>
0932 inline void operator<=(
0933     const Adapter<std::tuple<TTypes...>, std::tuple<TTypesV...>, N> &t,
0934     const Adapter<std::tuple<UTypes...>, std::tuple<UTypesV...>, N> &u) = delete;
0935 template <class... TTypes, class... TTypesV, class... UTypes, class... UTypesV, size_t N>
0936 inline void operator>=(
0937     const Adapter<std::tuple<TTypes...>, std::tuple<TTypesV...>, N> &t,
0938     const Adapter<std::tuple<UTypes...>, std::tuple<UTypesV...>, N> &u) = delete;
0939 template <class... TTypes, class... TTypesV, class... UTypes, class... UTypesV, size_t N>
0940 inline void operator<(
0941     const Adapter<std::tuple<TTypes...>, std::tuple<TTypesV...>, N> &t,
0942     const Adapter<std::tuple<UTypes...>, std::tuple<UTypesV...>, N> &u) = delete;
0943 template <class... TTypes, class... TTypesV, class... UTypes, class... UTypesV, size_t N>
0944 inline void operator>(
0945     const Adapter<std::tuple<TTypes...>, std::tuple<TTypesV...>, N> &t,
0946     const Adapter<std::tuple<UTypes...>, std::tuple<UTypesV...>, N> &u) = delete;
0947 // }}}
0948 /** @}*/
0949 }  // namespace SimdizeDetail }}}
0950 }  // namespace Vc
0951 
0952 namespace std  // {{{
0953 {
0954 /**\internal
0955  * A std::tuple_size specialization for the SimdizeDetail::Adapter class.
0956  */
0957 template <typename Scalar, typename Base, size_t N>
0958 class tuple_size<Vc::SimdizeDetail::Adapter<Scalar, Base, N>> : public tuple_size<Base>
0959 {
0960 };
0961 /**\internal
0962  * A std::tuple_element specialization for the SimdizeDetail::Adapter class.
0963  */
0964 template <size_t I, typename Scalar, typename Base, size_t N>
0965 class tuple_element<I, Vc::SimdizeDetail::Adapter<Scalar, Base, N>>
0966     : public tuple_element<I, Base>
0967 {
0968 };
0969 // std::get does not need additional work because Vc::Adapter derives from
0970 // C<Ts...> and therefore if get<N>(C<Ts...>) works it works for Adapter as well.
0971 
0972 /**\internal
0973  * A std::allocator specialization for SimdizeDetail::Adapter which uses the Vc::Allocator
0974  * class to make allocation correctly aligned per default.
0975  */
0976 template <typename S, typename T, size_t N>
0977 class allocator<Vc::SimdizeDetail::Adapter<S, T, N>>
0978     : public Vc::Allocator<Vc::SimdizeDetail::Adapter<S, T, N>>
0979 {
0980 public:
0981     template <typename U> struct rebind
0982     {
0983         typedef std::allocator<U> other;
0984     };
0985 };
0986 }  // namespace std }}}
0987 
0988 namespace Vc_VERSIONED_NAMESPACE
0989 {
0990 namespace SimdizeDetail
0991 {
0992 /**\addtogroup Simdize
0993  * @{
0994  */
0995 /**\internal
0996  * Since std::decay can ICE GCC (with types that are declared as may_alias), this is used
0997  * as an alternative approach. Using decltype the template type deduction implements the
0998  * std::decay behavior.
0999  */
1000 template <typename T> static inline T decay_workaround(const T &x) { return x; }
1001 
1002 // assign_impl {{{
1003 /**\internal
1004  * Generic implementation of assign using the std::tuple get interface.
1005  */
1006 template <typename S, typename T, size_t N, size_t... Indexes>
1007 inline void assign_impl(Adapter<S, T, N> &a, size_t i, const S &x,
1008                         Vc::index_sequence<Indexes...>)
1009 {
1010     const std::tuple<decltype(decay_workaround(get_dispatcher<Indexes>(x)))...> tmp(
1011         decay_workaround(get_dispatcher<Indexes>(x))...);
1012     auto &&unused = {(get_dispatcher<Indexes>(a)[i] = get_dispatcher<Indexes>(tmp), 0)...};
1013     if (&unused == &unused) {}
1014 }  // }}}
1015 // construct (parens, braces, double-braces) {{{
1016 template <class S, class... Args>
1017 S construct(std::integral_constant<int, 0>, Args &&... args)
1018 {
1019     return S(std::forward<Args>(args)...);
1020 }
1021 template <class S, class... Args>
1022 S construct(std::integral_constant<int, 1>, Args &&... args)
1023 {
1024     return S{std::forward<Args>(args)...};
1025 }
1026 template <class S, class... Args>
1027 S construct(std::integral_constant<int, 2>, Args &&... args)
1028 {
1029     return S{{std::forward<Args>(args)...}};
1030 }
1031 // }}}
1032 // extract_impl {{{
1033 /**\internal
1034  * index_sequence based implementation for extract below.
1035  */
1036 template <typename S, typename T, size_t N, size_t... Indexes>
1037 inline S extract_impl(const Adapter<S, T, N> &a, size_t i, Vc::index_sequence<Indexes...>)
1038 {
1039     const std::tuple<decltype(decay_workaround(get_dispatcher<Indexes>(a)[i]))...> tmp(
1040         decay_workaround(get_dispatcher<Indexes>(a)[i])...);
1041     return construct<S>(
1042         preferred_construction<S, decltype(decay_workaround(
1043                                       get_dispatcher<Indexes>(a)[i]))...>(),
1044         decay_workaround(get_dispatcher<Indexes>(a)[i])...);
1045     //return S(get_dispatcher<Indexes>(tmp)...);
1046 }
1047 // }}}
1048 // shifted_impl {{{
1049 template <typename S, typename T, std::size_t N, std::size_t... Indexes>
1050 inline Adapter<S, T, N> shifted_impl(const Adapter<S, T, N> &a, int shift,
1051                                      Vc::index_sequence<Indexes...>)
1052 {
1053     Adapter<S, T, N> r;
1054     auto &&unused = {(get_dispatcher<Indexes>(r) = get_dispatcher<Indexes>(a).shifted(shift), 0)...};
1055     if (&unused == &unused) {}
1056     return r;
1057 }
1058 // }}}
1059 // shifted(Adapter) {{{
1060 /**
1061  * Returns a new vectorized object where each entry is shifted by \p shift. This basically
1062  * calls Vector<T>::shifted on every entry.
1063  *
1064  * \param a The object to apply the shift on.
1065  * \param shift The number of entries to shift by.
1066  * \returns a copy of \p a shifted by \p shift.
1067  */
1068 template <typename S, typename T, size_t N>
1069 inline Adapter<S, T, N> shifted(const Adapter<S, T, N> &a, int shift)
1070 {
1071     return shifted_impl(a, shift, Vc::make_index_sequence<determine_tuple_size<T>()>());
1072 }
1073 // }}}
1074 // swap_impl {{{
1075 /** \internal
1076  * Generic implementation of simdize_swap using the std::tuple get interface.
1077  */
1078 template <typename S, typename T, std::size_t N, std::size_t... Indexes>
1079 inline void swap_impl(Adapter<S, T, N> &a, std::size_t i, S &x,
1080                       Vc::index_sequence<Indexes...>)
1081 {
1082     const auto &a_const = a;
1083     const std::tuple<decltype(decay_workaround(get_dispatcher<Indexes>(a_const)[0]))...>
1084         tmp{decay_workaround(get_dispatcher<Indexes>(a_const)[i])...};
1085     auto &&unused = {(get_dispatcher<Indexes>(a)[i] = get_dispatcher<Indexes>(x), 0)...};
1086     auto &&unused2 = {(get_dispatcher<Indexes>(x) = get_dispatcher<Indexes>(tmp), 0)...};
1087     if (&unused == &unused2) {}
1088 }
1089 template <typename S, typename T, std::size_t N, std::size_t... Indexes>
1090 inline void swap_impl(Adapter<S, T, N> &a, std::size_t i, Adapter<S, T, N> &b,
1091                       std::size_t j, Vc::index_sequence<Indexes...>)
1092 {
1093     const auto &a_const = a;
1094     const auto &b_const = b;
1095     const std::tuple<decltype(decay_workaround(get_dispatcher<Indexes>(a_const)[0]))...>
1096         tmp{decay_workaround(get_dispatcher<Indexes>(a_const)[i])...};
1097     auto &&unused = {(get_dispatcher<Indexes>(a)[i] = get_dispatcher<Indexes>(b_const)[j], 0)...};
1098     auto &&unused2 = {(get_dispatcher<Indexes>(b)[j] = get_dispatcher<Indexes>(tmp), 0)...};
1099     if (&unused == &unused2) {}
1100 }
1101 // }}}
1102 // swap(Adapter) {{{
1103 /**
1104  * Swaps one scalar object \p x with a SIMD slot at offset \p i in the simdized object \p
1105  * a.
1106  */
1107 template <typename S, typename T, std::size_t N>
1108 inline void swap(Adapter<S, T, N> &a, std::size_t i, S &x)
1109 {
1110     swap_impl(a, i, x, Vc::make_index_sequence<determine_tuple_size<T>()>());
1111 }
1112 template <typename S, typename T, std::size_t N>
1113 inline void swap(Adapter<S, T, N> &a, std::size_t i, Adapter<S, T, N> &b, std::size_t j)
1114 {
1115     swap_impl(a, i, b, j, Vc::make_index_sequence<determine_tuple_size<T>()>());
1116 }
1117 // }}}
1118 template <typename A> class Scalar  // {{{
1119 {
1120     using reference = typename std::add_lvalue_reference<A>::type;
1121     using S = typename A::scalar_type;
1122     using IndexSeq = Vc::make_index_sequence<determine_tuple_size<S>()>;
1123 
1124 public:
1125     Scalar(reference aa, size_t ii) : a(aa), i(ii) {}
1126 
1127     // delete copy and move to keep the type a pure proxy temporary object.
1128     Scalar(const Scalar &) = delete;
1129     Scalar(Scalar &&) = delete;
1130     Scalar &operator=(const Scalar &) = delete;
1131     Scalar &operator=(Scalar &&) = delete;
1132 
1133     void operator=(const S &x) { assign_impl(a, i, x, IndexSeq()); }
1134     operator S() const { return extract_impl(a, i, IndexSeq()); }
1135 
1136     template <typename AA>
1137     friend inline void swap(Scalar<AA> &&a, typename AA::scalar_type &b);
1138     template <typename AA>
1139     friend inline void swap(typename AA::scalar_type &b, Scalar<AA> &&a);
1140     template <typename AA> friend inline void swap(Scalar<AA> &&a, Scalar<AA> &&b);
1141 
1142 private:
1143     reference a;
1144     size_t i;
1145 };  // }}}
1146 // swap(Scalar) {{{
1147 /// std::swap interface to swapping one scalar object with a (virtual) reference to
1148 /// another object inside a vectorized object
1149 template <typename A> inline void swap(Scalar<A> &&a, typename A::scalar_type &b)
1150 {
1151     swap_impl(a.a, a.i, b, typename Scalar<A>::IndexSeq());
1152 }
1153 /// std::swap interface to swapping one scalar object with a (virtual) reference to
1154 /// another object inside a vectorized object
1155 template <typename A> inline void swap(typename A::scalar_type &b, Scalar<A> &&a)
1156 {
1157     swap_impl(a.a, a.i, b, typename Scalar<A>::IndexSeq());
1158 }
1159 
1160 template <typename A> inline void swap(Scalar<A> &&a, Scalar<A> &&b)
1161 {
1162     swap_impl(a.a, a.i, b.a, b.i, typename Scalar<A>::IndexSeq());
1163 }
1164 // }}}
1165 // load_interleaved_impl {{{
1166 template <class S, class T, size_t N, size_t... I>
1167 inline void load_interleaved_impl(Vc::index_sequence<I...>, Adapter<S, T, N> &a,
1168                                   const S *mem)
1169 {
1170     const InterleavedMemoryWrapper<S, decltype(decay_workaround(get_dispatcher<0>(a)))>
1171     wrapper(const_cast<S *>(mem));
1172     Vc::tie(get_dispatcher<I>(a)...) = wrapper[0];
1173 }
1174 // }}}
1175 // store_interleaved_impl {{{
1176 template <class S, class T, size_t N, size_t... I>
1177 inline void store_interleaved_impl(Vc::index_sequence<I...>, const Adapter<S, T, N> &a,
1178                                    S *mem)
1179 {
1180     InterleavedMemoryWrapper<S, decltype(decay_workaround(get_dispatcher<0>(a)))> wrapper(
1181         mem);
1182     wrapper[0] = Vc::tie(get_dispatcher<I>(a)...);
1183 }
1184 // }}}
1185 template <typename A> class Interface  // {{{
1186 {
1187     using reference = typename std::add_lvalue_reference<A>::type;
1188     using IndexSeq =
1189         Vc::make_index_sequence<determine_tuple_size<typename A::scalar_type>()>;
1190 
1191 public:
1192     Interface(reference aa) : a(aa) {}
1193 
1194     Scalar<A> operator[](size_t i)
1195     {
1196         return {a, i};
1197     }
1198     typename A::scalar_type operator[](size_t i) const
1199     {
1200         return extract_impl(a, i, IndexSeq());
1201     }
1202 
1203     A shifted(int amount) const
1204     {
1205         return shifted_impl(a, amount, IndexSeq());
1206     }
1207 
1208     void load(const typename A::scalar_type *mem) { load_interleaved(*this, mem); }
1209     void store(typename A::scalar_type *mem) { store_interleaved(*this, mem); }
1210 
1211 private:
1212     reference a;
1213 };  // }}}
1214 }  // namespace SimdizeDetail
1215 // assign {{{
1216 /**
1217  * Assigns one scalar object \p x to a SIMD slot at offset \p i in the simdized object \p
1218  * a.
1219  */
1220 template <typename S, typename T, size_t N>
1221 inline void assign(SimdizeDetail::Adapter<S, T, N> &a, size_t i, const S &x)
1222 {
1223     SimdizeDetail::assign_impl(
1224         a, i, x, Vc::make_index_sequence<SimdizeDetail::determine_tuple_size<T>()>());
1225 }
1226 /**\internal
1227  * Overload for standard Vector/SimdArray types.
1228  */
1229 template <typename V, typename = enable_if<Traits::is_simd_vector<V>::value>>
1230 Vc_INTRINSIC void assign(V &v, size_t i, typename V::EntryType x)
1231 {
1232     v[i] = x;
1233 }
1234 // }}}
1235 // extract {{{
1236 /**
1237  * Extracts and returns one scalar object from a SIMD slot at offset \p i in the simdized
1238  * object \p a.
1239  */
1240 template <typename S, typename T, size_t N>
1241 inline S extract(const SimdizeDetail::Adapter<S, T, N> &a, size_t i)
1242 {
1243     return SimdizeDetail::extract_impl(
1244         a, i, Vc::make_index_sequence<SimdizeDetail::determine_tuple_size<S>()>());
1245 }
1246 /**\internal
1247  * Overload for standard Vector/SimdArray types.
1248  */
1249 template <typename V, typename = enable_if<Traits::is_simd_vector<V>::value>>
1250 Vc_INTRINSIC typename V::EntryType extract(const V &v, size_t i)
1251 {
1252     return v[i];
1253 }
1254 // }}}
1255 // load_interleaved {{{
1256 template <class S, class T, size_t N>
1257 inline void load_interleaved(SimdizeDetail::Adapter<S, T, N> &a, const S *mem)
1258 {
1259     if (SimdizeDetail::homogeneous_sizeof<S>::value == 0) {
1260         Common::unrolled_loop<std::size_t, 0, N>(
1261             [&](std::size_t i) { assign(a, i, mem[i]); });
1262     } else {
1263         constexpr size_t TupleSize = SimdizeDetail::determine_tuple_size_<S>::value;
1264         SimdizeDetail::load_interleaved_impl(Vc::make_index_sequence<TupleSize>(), a,
1265                                              mem);
1266     }
1267 }
1268 template <
1269     class V, class T,
1270     class = enable_if<Traits::is_simd_vector<V>::value && std::is_arithmetic<T>::value>>
1271 Vc_INTRINSIC void load_interleaved(V &a, const T *mem)
1272 {
1273     a.load(mem, Vc::Unaligned);
1274 }
1275 // }}}
1276 // store_interleaved {{{
1277 template <class S, class T, size_t N>
1278 inline void store_interleaved(const SimdizeDetail::Adapter<S, T, N> &a, S *mem)
1279 {
1280     if (SimdizeDetail::homogeneous_sizeof<S>::value == 0) {
1281         Common::unrolled_loop<std::size_t, 0, N>(
1282             [&](std::size_t i) { mem[i] = extract(a, i); });
1283     } else {
1284         constexpr size_t TupleSize = SimdizeDetail::determine_tuple_size_<S>::value;
1285         SimdizeDetail::store_interleaved_impl(Vc::make_index_sequence<TupleSize>(), a,
1286                                               mem);
1287     }
1288 }
1289 template <
1290     class V, class T,
1291     class = enable_if<Traits::is_simd_vector<V>::value && std::is_arithmetic<T>::value>>
1292 Vc_INTRINSIC void store_interleaved(const V &a, T *mem)
1293 {
1294     a.store(mem, Vc::Unaligned);
1295 }
1296 // }}}
1297 // decorate(Adapter) {{{
1298 template <typename S, typename T, size_t N>
1299 SimdizeDetail::Interface<SimdizeDetail::Adapter<S, T, N>> decorate(
1300     SimdizeDetail::Adapter<S, T, N> &a)
1301 {
1302     return {a};
1303 }
1304 template <typename S, typename T, size_t N>
1305 const SimdizeDetail::Interface<const SimdizeDetail::Adapter<S, T, N>> decorate(
1306     const SimdizeDetail::Adapter<S, T, N> &a)
1307 {
1308     return {a};
1309 }
1310 template <class V, class = typename std::enable_if<
1311                        Traits::is_simd_vector<typename std::decay<V>::type>::value>>
1312 V &&decorate(V &&v)
1313 {
1314     return std::forward<V>(v);
1315 }
1316 // }}}
1317 namespace SimdizeDetail
1318 {
1319 // Adapter::Adapter(F) Generator {{{
1320 template <typename Scalar, typename Base, size_t N>
1321 template <class F, class>
1322 Adapter<Scalar, Base, N>::Adapter(F &&fun)
1323 {
1324     for (size_t i = 0; i < N; ++i) {
1325         Vc::assign(*this, i, fun(i));
1326     }
1327 }
1328 // }}}
1329 namespace IteratorDetails  // {{{
1330 {
1331 enum class Mutable { Yes, No };
1332 
1333 template <typename It, typename V, size_t I, size_t End>
1334 Vc_INTRINSIC V fromIteratorImpl(enable_if<(I == End), It>)
1335 {
1336     return {};
1337 }
1338 template <typename It, typename V, size_t I, size_t End>
1339 Vc_INTRINSIC V fromIteratorImpl(enable_if<(I < End), It> it)
1340 {
1341     V r = fromIteratorImpl<It, V, I + 1, End>(it);
1342     Traits::decay<decltype(get_dispatcher<I>(r))> tmp;
1343     for (size_t j = 0; j < V::size(); ++j, ++it) {
1344         tmp[j] = get_dispatcher<I>(*it);
1345     }
1346     get_dispatcher<I>(r) = tmp;
1347     return r;
1348 }
1349 template <typename It, typename V>
1350 Vc_INTRINSIC V fromIterator(enable_if<!Traits::is_simd_vector<V>::value, const It &> it)
1351 {
1352     return fromIteratorImpl<It, V, 0, determine_tuple_size<V>()>(it);
1353 }
1354 
1355 template <typename It, typename V>
1356 Vc_INTRINSIC V fromIterator(
1357     enable_if<
1358         Traits::is_simd_vector<V>::value && Traits::has_contiguous_storage<It>::value, It>
1359         it)
1360 {
1361 #ifndef _MSC_VER
1362     // this check potentially moves it past the end of a container, which is UB. Some STL
1363     // implementations, like MS STL, trap this.
1364     Vc_ASSERT(&*it + 1 == &*(it + 1));
1365 #endif
1366     return V(&*it, Vc::Unaligned);
1367 }
1368 
1369 template <typename It, typename V>
1370 Vc_INTRINSIC V fromIterator(enable_if<Traits::is_simd_vector<V>::value &&
1371                                           !Traits::has_contiguous_storage<It>::value,
1372                                       It>
1373                                 it)
1374 {
1375     V r;
1376     for (size_t j = 0; j < V::size(); ++j, ++it) {
1377         r[j] = *it;
1378     }
1379     return r;
1380 }
1381 
1382 // Note: §13.5.6 says: “An expression x->m is interpreted as (x.operator->())->m for a
1383 // class object x of type T if T::operator->() exists and if the operator is selected as
1384 // the best match function by the overload resolution mechanism (13.3).”
1385 template <typename T, typename value_vector, Mutable> class Pointer;
1386 
1387 /**\internal
1388  * Proxy type for a pointer returned from operator->(). The mutable variant requires at
1389  * least a ForwardIterator. An InputIterator cannot work since no valid copies and
1390  * independent iteration can be guaranteed.
1391  *
1392  * The implementation creates the pointer-like behavior by creating an lvalue for the
1393  * proxied data. This
1394  */
1395 template <typename T, typename value_vector> class Pointer<T, value_vector, Mutable::Yes>
1396 {
1397     static constexpr auto Size = value_vector::size();
1398 
1399 public:
1400     /// \returns a pointer to the (temporary) member object.
1401     value_vector *operator->() { return &data; }
1402 
1403     /**
1404      * A Pointer can only be constructed from a scalar iterator or move constructed (for
1405      * function returns).
1406      */
1407     Pointer() = delete;
1408     Pointer(const Pointer &) = delete;
1409     Pointer &operator=(const Pointer &) = delete;
1410     Pointer &operator=(Pointer &&) = delete;
1411 
1412     /// required for returning the Pointer
1413     Pointer(Pointer &&) = default;
1414 
1415     /**
1416      * Writes the vectorized object back to the scalar objects referenced by the
1417      * iterator. This store is done unconditionally for the mutable variant of the
1418      * Pointer. The immutable Pointer OTOH does not store back at all.
1419      */
1420     ~Pointer()
1421     {
1422         // store data back to where it came from
1423         for (size_t i = 0; i < Size; ++i, ++begin_iterator) {
1424             *begin_iterator = extract(data, i);
1425         }
1426     }
1427 
1428     /// Construct the Pointer object from the values returned by the scalar iterator \p it.
1429     Pointer(const T &it) : data(fromIterator<T, value_vector>(it)), begin_iterator(it) {}
1430 
1431 private:
1432     /// The vectorized object needed for dereferencing the pointer.
1433     value_vector data;
1434     /// A copy of the scalar iterator, used for storing the results back.
1435     T begin_iterator;
1436 };
1437 /**\internal
1438  * The immutable variant of the Pointer class specialization above. It behaves the same as
1439  * the mutable Pointer except that it returns a const pointer from \c operator-> and
1440  * avoids the write back in the destructor.
1441  */
1442 template <typename T, typename value_vector> class Pointer<T, value_vector, Mutable::No>
1443 {
1444     static constexpr auto Size = value_vector::size();
1445 
1446 public:
1447     const value_vector *operator->() const { return &data; }
1448 
1449     Pointer() = delete;
1450     Pointer(const Pointer &) = delete;
1451     Pointer &operator=(const Pointer &) = delete;
1452     Pointer &operator=(Pointer &&) = delete;
1453 
1454     Pointer(Pointer &&) = default;  // required for returning the Pointer
1455 
1456     Pointer(const T &it) : data(fromIterator<T, value_vector>(it)) {}
1457 
1458 private:
1459     value_vector data;
1460 };
1461 
1462 /**\internal
1463  * The Reference class behaves as much as possible like a reference to an object of type
1464  * \p value_vector. The \p Mutable parameter determines whether the referenced object my
1465  * be modified or not (basically whether it's a ref or a const-ref, though the semantics
1466  * of mutable are actually stricter than that of const. Const only determines the logical
1467  * constness whereas mutability identifies the constness on the bit-level.)
1468  *
1469  * \tparam T The scalar iterator type.
1470  * \tparam value_vector The vector object the scalar iterator needs to fill.
1471  * \tparam M A flag that determines whether the reference acts as a mutable or immutable
1472  *           reference.
1473  */
1474 template <typename T, typename value_vector, Mutable M> class Reference;
1475 
1476 ///\internal mutable specialization of the Reference proxy class
1477 template <typename T, typename value_vector>
1478 class Reference<T, value_vector, Mutable::Yes> : public value_vector
1479 {
1480     static constexpr auto Size = value_vector::size();
1481 
1482     using reference = typename std::add_lvalue_reference<T>::type;
1483     reference scalar_it;
1484 
1485 public:
1486     /// Construct the reference from the given iterator \p first_it and store a reference
1487     /// to the iterator for write back in the assignment operator.
1488     Reference(reference first_it)
1489         : value_vector(fromIterator<T, value_vector>(first_it)), scalar_it(first_it)
1490     {
1491     }
1492 
1493     /// disable all copy and move operations, except the one needed for function returns
1494     Reference(const Reference &) = delete;
1495     Reference(Reference &&) = default;
1496     Reference &operator=(const Reference &) = delete;
1497     Reference &operator=(Reference &&) = delete;
1498 
1499     /**
1500      * Assignment to the reference assigns to the storage pointed to by the scalar
1501      * iterator as well as the reference object itself. (The compiler should eliminate the
1502      * store to \c this if it's never used since it is clearly a dead store.)
1503      */
1504     void operator=(const value_vector &x)
1505     {
1506         static_cast<value_vector &>(*this) = x;
1507         auto it = scalar_it;
1508         for (size_t i = 0; i < Size; ++i, ++it) {
1509             *it = extract(x, i);
1510         }
1511     }
1512 };
1513 #define Vc_OP(op_)                                                                       \
1514     template <typename T0, typename V0, typename T1, typename V1>                        \
1515     decltype(std::declval<const V0 &>() op_ std::declval<const V1 &>()) operator op_(    \
1516         const Reference<T0, V0, Mutable::Yes> &x,                                        \
1517         const Reference<T1, V1, Mutable::Yes> &y)                                        \
1518     {                                                                                    \
1519         return static_cast<const V0 &>(x) op_ static_cast<const V1 &>(y);                \
1520     }
1521 Vc_ALL_COMPARES(Vc_OP);
1522 Vc_ALL_ARITHMETICS(Vc_OP);
1523 Vc_ALL_BINARY(Vc_OP);
1524 Vc_ALL_LOGICAL(Vc_OP);
1525 Vc_ALL_SHIFTS(Vc_OP);
1526 #undef Vc_OP
1527 
1528 ///\internal immutable specialization of the Reference proxy class
1529 template <typename T, typename value_vector>
1530 class Reference<T, value_vector, Mutable::No> : public value_vector
1531 {
1532     static constexpr auto Size = value_vector::size();
1533 
1534 public:
1535     Reference(const T &it) : value_vector(fromIterator<T, value_vector>(it)) {}
1536 
1537     Reference(const Reference &) = delete;
1538     Reference(Reference &&) = default;
1539     Reference &operator=(const Reference &) = delete;
1540     Reference &operator=(Reference &&) = delete;
1541 
1542     /// Explicitly disable assignment to an immutable reference.
1543     void operator=(const value_vector &x) = delete;
1544 };
1545 
1546 template <typename T, size_t N,
1547           IteratorDetails::Mutable M =
1548               (Traits::is_output_iterator<T>::value ? Mutable::Yes : Mutable::No),
1549           typename V = simdize<typename std::iterator_traits<T>::value_type, N>,
1550           size_t Size = V::Size,
1551           typename = typename std::iterator_traits<T>::iterator_category>
1552 class Iterator;
1553 
1554 template <typename T, size_t N, IteratorDetails::Mutable M, typename V, size_t Size_>
1555 class Iterator<T, N, M, V, Size_, std::forward_iterator_tag>
1556 {
1557 public:
1558     using iterator_category = typename std::iterator_traits<T>::iterator_category;
1559     using difference_type = typename std::iterator_traits<T>::difference_type;
1560     using value_type = V;
1561     using pointer = IteratorDetails::Pointer<T, V, M>;
1562     using reference = IteratorDetails::Reference<T, V, M>;
1563     using const_pointer = IteratorDetails::Pointer<T, V, IteratorDetails::Mutable::No>;
1564     using const_reference =
1565         IteratorDetails::Reference<T, V, IteratorDetails::Mutable::No>;
1566 
1567     /// Returns the vector width the iterator covers with each step.
1568     static constexpr std::size_t size() { return Size_; }
1569     static constexpr std::size_t Size = Size_;
1570 
1571     Iterator() = default;
1572 
1573     /**
1574      * A vectorizing iterator is typically initialized from a scalar iterator. The
1575      * scalar iterator points to the first entry to place into the vectorized object.
1576      * Subsequent entries returned by the iterator are used to fill the rest of the
1577      * vectorized object.
1578      */
1579     Iterator(const T &x) : scalar_it(x) {}
1580     /**
1581      * Move optimization of the above constructor.
1582      */
1583     Iterator(T &&x) : scalar_it(std::move(x)) {}
1584     /**
1585      * Reset the vectorizing iterator to the given start point \p x.
1586      */
1587     Iterator &operator=(const T &x)
1588     {
1589         scalar_it = x;
1590         return *this;
1591     }
1592     /**
1593      * Move optimization of the above constructor.
1594      */
1595     Iterator &operator=(T &&x)
1596     {
1597         scalar_it = std::move(x);
1598         return *this;
1599     }
1600 
1601     /// Default copy constructor.
1602     Iterator(const Iterator &) = default;
1603     /// Default move constructor.
1604     Iterator(Iterator &&) = default;
1605     /// Default copy assignment.
1606     Iterator &operator=(const Iterator &) = default;
1607     /// Default move assignment.
1608     Iterator &operator=(Iterator &&) = default;
1609 
1610     /// Advances the iterator by one vector width, or respectively N scalar steps.
1611     Iterator &operator++()
1612     {
1613         std::advance(scalar_it, Size);
1614         return *this;
1615     }
1616     /// Postfix overload of the above.
1617     Iterator operator++(int)
1618     {
1619         Iterator copy(*this);
1620         operator++();
1621         return copy;
1622     }
1623 
1624     /**
1625      * Returns whether the two iterators point to the same scalar entry.
1626      *
1627      * \warning If the end iterator you compare against is not a multiple of the SIMD
1628      * width away from the incrementing iterator then the two iterators may pass each
1629      * other without ever comparing equal. In debug builds (when NDEBUG is not
1630      * defined) an assertion tries to locate such passing iterators.
1631      */
1632     bool operator==(const Iterator &rhs) const
1633     {
1634 #ifndef NDEBUG
1635         if (scalar_it == rhs.scalar_it) {
1636             return true;
1637         } else {
1638             T it(scalar_it);
1639             for (size_t i = 1; i < Size; ++i) {
1640                 Vc_ASSERT((++it != rhs.scalar_it));
1641             }
1642             return false;
1643         }
1644 #else
1645         return scalar_it == rhs.scalar_it;
1646 #endif
1647     }
1648     /**
1649      * Returns whether the two iterators point to different scalar entries.
1650      *
1651      * \warning If the end iterator you compare against is not a multiple of the SIMD
1652      * width away from the incrementing iterator then the two iterators may pass each
1653      * other without ever comparing equal. In debug builds (when NDEBUG is not
1654      * defined) an assertion tries to locate such passing iterators.
1655      */
1656     bool operator!=(const Iterator &rhs) const
1657     {
1658         return !operator==(rhs);
1659     }
1660 
1661     pointer operator->() { return scalar_it; }
1662 
1663     /**
1664      * Returns a copy of the objects behind the iterator in a vectorized type. You can use
1665      * the assignment operator to modify the values in the container referenced by the
1666      * iterator. Use of any other mutating operation is undefined behavior and will most
1667      * likely not be reflected in the container.
1668      */
1669     reference operator*() { return scalar_it; }
1670 
1671     const_pointer operator->() const { return scalar_it; }
1672 
1673     /**
1674      * Returns a copy of the objects behind the iterator in a vectorized type.
1675      *
1676      * \warning This does not behave like the standard iterator interface as it does not
1677      * return an lvalue reference. Thus, changes to the container the iterator references
1678      * will not be reflected in the reference object you receive.
1679      */
1680     const_reference operator*() const { return scalar_it; }
1681 
1682     /**
1683      * Returns a const lvalue reference to the underlying scalar iterator. This
1684      * effectively allows you to cast simdized iterator objects to their scalar ancestor
1685      * type.
1686      *
1687      * Example:
1688      * \code
1689         const auto mask = *it == value_v;
1690         if (any_of(mask)) {
1691           return static_cast<ScalarIt>(it) + mask.firstOne();
1692         }
1693      * \endcode
1694      */
1695     operator const T &() const { return scalar_it; }
1696 
1697 protected:
1698     T scalar_it;
1699 };
1700 
1701 /**
1702  * This is the iterator type created when applying simdize to a bidirectional
1703  * iterator type.
1704  */
1705 template <typename T, size_t N, IteratorDetails::Mutable M, typename V, size_t Size>
1706 class Iterator<T, N, M, V, Size, std::bidirectional_iterator_tag>
1707     : public Iterator<T, N, M, V, Size, std::forward_iterator_tag>
1708 {
1709     using Base = Iterator<T, N, M, V, Size, std::forward_iterator_tag>;
1710 
1711 protected:
1712     using Base::scalar_it;
1713 
1714 public:
1715     using pointer = typename Base::pointer;
1716     using reference = typename Base::reference;
1717     using const_pointer = typename Base::const_pointer;
1718     using const_reference = typename Base::const_reference;
1719 
1720     using Iterator<T, N, M, V, Size,
1721                    std::forward_iterator_tag>::Iterator;  // in short: "using
1722                                                           // Base::Iterator", but that
1723                                                           // confuses ICC
1724     /// Advances the iterator by one vector width, or respectively N scalar steps.
1725     Iterator &operator--()
1726     {
1727         std::advance(scalar_it, -Size);
1728         return *this;
1729     }
1730     /// Postfix overload of the above.
1731     Iterator operator--(int)
1732     {
1733         Iterator copy(*this);
1734         operator--();
1735         return copy;
1736     }
1737 };
1738 
1739 /**
1740  * This is the iterator type created when applying simdize to a random access iterator
1741  * type.
1742  */
1743 template <typename T, size_t N, IteratorDetails::Mutable M, typename V, size_t Size>
1744 class Iterator<T, N, M, V, Size, std::random_access_iterator_tag>
1745     : public Iterator<T, N, M, V, Size, std::bidirectional_iterator_tag>
1746 {
1747     using Base = Iterator<T, N, M, V, Size, std::bidirectional_iterator_tag>;
1748 
1749 protected:
1750     using Base::scalar_it;
1751 
1752 public:
1753     using pointer = typename Base::pointer;
1754     using reference = typename Base::reference;
1755     using const_pointer = typename Base::const_pointer;
1756     using const_reference = typename Base::const_reference;
1757     using difference_type = typename std::iterator_traits<T>::difference_type;
1758 
1759     using Iterator<T, N, M, V, Size, std::bidirectional_iterator_tag>::
1760         Iterator;  // in short: "using Base::Iterator", but that confuses ICC
1761 
1762     Iterator &operator+=(difference_type n)
1763     {
1764         scalar_it += n * difference_type(Size);
1765         return *this;
1766     }
1767     Iterator operator+(difference_type n) const { return Iterator(*this) += n; }
1768 
1769     Iterator &operator-=(difference_type n)
1770     {
1771         scalar_it -= n * difference_type(Size);
1772         return *this;
1773     }
1774     Iterator operator-(difference_type n) const { return Iterator(*this) -= n; }
1775 
1776     difference_type operator-(const Iterator &rhs) const
1777     {
1778         constexpr difference_type n = Size;
1779         Vc_ASSERT((scalar_it - rhs.scalar_it) % n ==
1780                   0);  // if this fails the two iterators are not a multiple of the vector
1781                        // width apart. The distance would be fractional and that doesn't
1782                        // make too much sense for iteration. Therefore, it is a
1783                        // precondition for the distance of the two iterators to be a
1784                        // multiple of Size.
1785         return (scalar_it - rhs.scalar_it) / n;
1786     }
1787 
1788     /**
1789      * Returns whether all entries accessed via iterator dereferencing come before the
1790      * iterator \p rhs.
1791      */
1792     bool operator<(const Iterator &rhs) const
1793     {
1794         return rhs.scalar_it - scalar_it >= difference_type(Size);
1795     }
1796 
1797     bool operator>(const Iterator &rhs) const
1798     {
1799         return scalar_it - rhs.scalar_it >= difference_type(Size);
1800     }
1801 
1802     bool operator<=(const Iterator &rhs) const
1803     {
1804         return rhs.scalar_it - scalar_it >= difference_type(Size) - 1;
1805     }
1806 
1807     bool operator>=(const Iterator &rhs) const
1808     {
1809         return scalar_it - rhs.scalar_it >= difference_type(Size) - 1;
1810     }
1811 
1812     reference operator[](difference_type i) { return *(*this + i); }
1813     const_reference operator[](difference_type i) const { return *(*this + i); }
1814 };
1815 
1816 template <typename T, size_t N, IteratorDetails::Mutable M, typename V, size_t Size>
1817 Iterator<T, N, M, V, Size, std::random_access_iterator_tag> operator+(
1818     typename Iterator<T, N, M, V, Size, std::random_access_iterator_tag>::difference_type
1819         n,
1820     const Iterator<T, N, M, V, Size, std::random_access_iterator_tag> &i)
1821 {
1822     return i + n;
1823 }
1824 
1825 }  // namespace IteratorDetails }}}
1826 
1827 /**\internal
1828  *
1829  * Creates a member type \p type that acts as a vectorizing bidirectional iterator.
1830  *
1831  * \tparam T The bidirectional iterator type to be transformed.
1832  * \tparam N The width the resulting vectorized type should have.
1833  * \tparam MT The base type to use for mask types. Ignored for this specialization.
1834  */
1835 template <typename T, size_t N, typename MT>
1836 struct ReplaceTypes<T, N, MT, Category::ForwardIterator>
1837 {
1838     using type = IteratorDetails::Iterator<T, N>;
1839 };
1840 template <typename T, size_t N, typename MT>
1841 struct ReplaceTypes<T, N, MT, Category::BidirectionalIterator>
1842 {
1843     using type = IteratorDetails::Iterator<T, N>;
1844 };
1845 template <typename T, size_t N, typename MT>
1846 struct ReplaceTypes<T, N, MT, Category::RandomAccessIterator>
1847 {
1848     using type = IteratorDetails::Iterator<T, N>;
1849 };
1850 
1851 /**\internal
1852  * Implementation for conditional assignment of whole vectorized objects.
1853  */
1854 template <Vc::Operator Op, typename S, typename T, std::size_t N, typename M, typename U,
1855           std::size_t Offset>
1856 Vc_INTRINSIC Vc::enable_if<(Offset >= determine_tuple_size_<S>::value && M::Size == N), void>
1857     conditional_assign(Adapter<S, T, N> &, const M &, const U &)
1858 {
1859 }
1860 template <Vc::Operator Op, typename S, typename T, std::size_t N, typename M, typename U,
1861           std::size_t Offset = 0>
1862 Vc_INTRINSIC Vc::enable_if<(Offset < determine_tuple_size_<S>::value && M::Size == N), void>
1863     conditional_assign(Adapter<S, T, N> &lhs, const M &mask, const U &rhs)
1864 {
1865     using V = typename std::decay<decltype(get_dispatcher<Offset>(lhs))>::type;
1866     using M2 = typename V::mask_type;
1867     conditional_assign<Op>(get_dispatcher<Offset>(lhs), simd_cast<M2>(mask), get_dispatcher<Offset>(rhs));
1868     conditional_assign<Op, S, T, N, M, U, Offset + 1>(lhs, mask, rhs);
1869 }
1870 template <Vc::Operator Op, typename S, typename T, std::size_t N, typename M,
1871           std::size_t Offset>
1872 Vc_INTRINSIC Vc::enable_if<(Offset >= determine_tuple_size_<S>::value && M::Size == N), void>
1873     conditional_assign(Adapter<S, T, N> &, const M &)
1874 {
1875 }
1876 template <Vc::Operator Op, typename S, typename T, std::size_t N, typename M,
1877           std::size_t Offset = 0>
1878 Vc_INTRINSIC Vc::enable_if<(Offset < determine_tuple_size_<S>::value && M::Size == N), void>
1879     conditional_assign(Adapter<S, T, N> &lhs, const M &mask)
1880 {
1881     using V = typename std::decay<decltype(get_dispatcher<Offset>(lhs))>::type;
1882     using M2 = typename V::mask_type;
1883     conditional_assign<Op>(get_dispatcher<Offset>(lhs), simd_cast<M2>(mask));
1884     conditional_assign<Op, S, T, N, M, Offset + 1>(lhs, mask);
1885 }
1886 
1887 /** @}*/
1888 }  // namespace SimdizeDetail
1889 
1890 // user API {{{
1891 /*!\ingroup Simdize
1892  * Vectorize/Simdize the given type T.
1893  *
1894  * \tparam T This type must be a class template instance where the template arguments can
1895  * be recursively replaced with their vectorized variant. If the type implements a
1896  * specific interface for introspection and member modification, the resulting type can
1897  * easily be constructed from objects of type T and scalar objects of type T can be
1898  * extracted from it.
1899  *
1900  * \tparam N This value determines the width of the vectorization. Per default it is set
1901  * to 0 making the implementation choose the value considering the compilation target and
1902  * the given type T.
1903  *
1904  * \tparam MT This type determines the type to be used when replacing bool with Mask<MT>.
1905  * If it is set to void the implementation choosed the type as smart as possible.
1906  *
1907  * \see Vc_SIMDIZE_STRUCT, Vc_SIMDIZE_MEMBER
1908  */
1909 template <typename T, size_t N = 0, typename MT = void>
1910 using simdize = SimdizeDetail::simdize<T, N, MT>;
1911 
1912 /*!\ingroup Simdize
1913  * Declares functions and constants for introspection by the simdize functions. This
1914  * allows e.g. conversion between scalar \c T and \c simdize<T>.
1915  *
1916  * \param MEMBERS_ The data members of this struct/class listed inside extra parenthesis.
1917  * The extra parenthesis are required because the macro would otherwise see a variable
1918  * number of arguments.
1919  *
1920  * Example:
1921  * \code
1922  * template <typename T, typename U> struct X {
1923  *   T a;
1924  *   U b;
1925  *   Vc_SIMDIZE_INTERFACE((a, b));
1926  * };
1927  * \endcode
1928  *
1929  * \note You must use this macros in the public section of a class.
1930  */
1931 #define Vc_SIMDIZE_INTERFACE(MEMBERS_)                                                   \
1932     template <std::size_t N_>                                                            \
1933     inline auto vc_get_()->decltype(std::get<N_>(std::tie MEMBERS_))                     \
1934     {                                                                                    \
1935         return std::get<N_>(std::tie MEMBERS_);                                          \
1936     }                                                                                    \
1937     template <std::size_t N_>                                                            \
1938     inline auto vc_get_() const->decltype(std::get<N_>(std::tie MEMBERS_))               \
1939     {                                                                                    \
1940         return std::get<N_>(std::tie MEMBERS_);                                          \
1941     }                                                                                    \
1942     enum : std::size_t {                                                                 \
1943         tuple_size = std::tuple_size<decltype(std::make_tuple MEMBERS_)>::value          \
1944     }
1945 // }}}
1946 }  // namespace Vc
1947 
1948 namespace std  // {{{
1949 {
1950 using Vc::SimdizeDetail::swap;
1951 }  // namespace std }}}
1952 
1953 #endif  // VC_COMMON_SIMDIZE_H_
1954 
1955 // vim: foldmethod=marker