xsimd/types/xsimd_api.hpp

0001 /***************************************************************************
0002  * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and         *
0003  * Martin Renou                                                             *
0004  * Copyright (c) QuantStack                                                 *
0005  * Copyright (c) Serge Guelton                                              *
0006  *                                                                          *
0007  * Distributed under the terms of the BSD 3-Clause License.                 *
0008  *                                                                          *
0009  * The full license is in the file LICENSE, distributed with this software. *
0010  ****************************************************************************/
0011
0012 #ifndef XSIMD_API_HPP
0013 #define XSIMD_API_HPP
0014
0015 #include <complex>
0016 #include <cstddef>
0017 #include <limits>
0018 #include <ostream>
0019
0020 #include "../arch/xsimd_isa.hpp"
0021 #include "../types/xsimd_batch.hpp"
0022 #include "../types/xsimd_traits.hpp"
0023
0024 namespace xsimd
0025 {
0026     /**
0027      * high level free functions
0028      *
0029      * @defgroup batch_arithmetic Arithmetic operators
0030      * @defgroup batch_constant Constant batches
0031      * @defgroup batch_cond Conditional operators
0032      * @defgroup batch_data_transfer Memory operators
0033      * @defgroup batch_math Basic math operators
0034      * @defgroup batch_math_extra Extra math operators
0035      * @defgroup batch_fp Floating point manipulation
0036      * @defgroup batch_rounding Rounding operators
0037      * @defgroup batch_conversion Conversion operators
0038      * @defgroup batch_complex Complex operators
0039      * @defgroup batch_logical Logical operators
0040      * @defgroup batch_bitwise Bitwise operators
0041      * @defgroup batch_reducers Reducers
0042      * @defgroup batch_miscellaneous Miscellaneous
0043      * @defgroup batch_trigo Trigonometry
0044      *
0045      * @defgroup batch_bool_logical Boolean logical operators
0046      * @defgroup batch_bool_reducers Boolean reducers
0047      */
0048
0049     /**
0050      * @ingroup batch_math
0051      *
0052      * Computes the absolute values of each scalar in the batch \c x.
0053      * @param x batch of integer or floating point values.
0054      * @return the absolute values of \c x.
0055      */
0056     template <class T, class A>
0057     XSIMD_INLINE batch<T, A> abs(batch<T, A> const& x) noexcept
0058     {
0059         detail::static_check_supported_config<T, A>();
0060         return kernel::abs<A>(x, A {});
0061     }
0062
0063     /**
0064      * @ingroup batch_complex
0065      *
0066      * Computes the absolute values of each complex in the batch \c z.
0067      * @param z batch of complex values.
0068      * @return the absolute values of \c z.
0069      */
0070     template <class T, class A>
0071     XSIMD_INLINE batch<T, A> abs(batch<std::complex<T>, A> const& z) noexcept
0072     {
0073         detail::static_check_supported_config<T, A>();
0074         return kernel::abs<A>(z, A {});
0075     }
0076
0077     /**
0078      * @ingroup batch_arithmetic
0079      *
0080      * Computes the sum of the batches \c x and \c y.
0081      * @param x batch or scalar involved in the addition.
0082      * @param y batch or scalar involved in the addition.
0083      * @return the sum of \c x and \c y
0084      */
0085     template <class T, class A>
0086     XSIMD_INLINE auto add(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x + y)
0087     {
0088         detail::static_check_supported_config<T, A>();
0089         return x + y;
0090     }
0091
0092     /**
0093      * @ingroup batch_trigo
0094      *
0095      * Computes the arc cosine of the batch \c x.
0096      * @param x batch of floating point values.
0097      * @return the arc cosine of \c x.
0098      */
0099     template <class T, class A>
0100     XSIMD_INLINE batch<T, A> acos(batch<T, A> const& x) noexcept
0101     {
0102         detail::static_check_supported_config<T, A>();
0103         return kernel::acos<A>(x, A {});
0104     }
0105
0106     /**
0107      * @ingroup batch_trigo
0108      *
0109      * Computes the inverse hyperbolic cosine of the batch \c x.
0110      * @param x batch of floating point values.
0111      * @return the inverse hyperbolic cosine of \c x.
0112      */
0113     template <class T, class A>
0114     XSIMD_INLINE batch<T, A> acosh(batch<T, A> const& x) noexcept
0115     {
0116         detail::static_check_supported_config<T, A>();
0117         return kernel::acosh<A>(x, A {});
0118     }
0119
0120     /**
0121      * @ingroup batch_complex
0122      *
0123      * Computes the argument of the batch \c z.
0124      * @param z batch of complex or real values.
0125      * @return the argument of \c z.
0126      */
0127     template <class T, class A>
0128     XSIMD_INLINE real_batch_type_t<batch<T, A>> arg(batch<T, A> const& z) noexcept
0129     {
0130         detail::static_check_supported_config<T, A>();
0131         return kernel::arg<A>(z, A {});
0132     }
0133
0134     /**
0135      * @ingroup batch_trigo
0136      *
0137      * Computes the arc sine of the batch \c x.
0138      * @param x batch of floating point values.
0139      * @return the arc sine of \c x.
0140      */
0141     template <class T, class A>
0142     XSIMD_INLINE batch<T, A> asin(batch<T, A> const& x) noexcept
0143     {
0144         detail::static_check_supported_config<T, A>();
0145         return kernel::asin<A>(x, A {});
0146     }
0147
0148     /**
0149      * @ingroup batch_trigo
0150      *
0151      * Computes the inverse hyperbolic sine of the batch \c x.
0152      * @param x batch of floating point values.
0153      * @return the inverse hyperbolic sine of \c x.
0154      */
0155     template <class T, class A>
0156     XSIMD_INLINE batch<T, A> asinh(batch<T, A> const& x) noexcept
0157     {
0158         detail::static_check_supported_config<T, A>();
0159         return kernel::asinh<A>(x, A {});
0160     }
0161
0162     /**
0163      * @ingroup batch_trigo
0164      *
0165      * Computes the arc tangent of the batch \c x.
0166      * @param x batch of floating point values.
0167      * @return the arc tangent of \c x.
0168      */
0169     template <class T, class A>
0170     XSIMD_INLINE batch<T, A> atan(batch<T, A> const& x) noexcept
0171     {
0172         detail::static_check_supported_config<T, A>();
0173         return kernel::atan<A>(x, A {});
0174     }
0175
0176     /**
0177      * @ingroup batch_trigo
0178      *
0179      * Computes the arc tangent of the batch \c x/y, using the signs of the
0180      * arguments to determine the correct quadrant.
0181      * @param x batch of floating point values.
0182      * @param y batch of floating point values.
0183      * @return the arc tangent of \c x/y.
0184      */
0185     template <class T, class A>
0186     XSIMD_INLINE batch<T, A> atan2(batch<T, A> const& x, batch<T, A> const& y) noexcept
0187     {
0188         detail::static_check_supported_config<T, A>();
0189         return kernel::atan2<A>(x, y, A {});
0190     }
0191
0192     /**
0193      * @ingroup batch_trigo
0194      *
0195      * Computes the inverse hyperbolic tangent of the batch \c x.
0196      * @param x batch of floating point values.
0197      * @return the inverse hyperbolic tangent of \c x.
0198      */
0199     template <class T, class A>
0200     XSIMD_INLINE batch<T, A> atanh(batch<T, A> const& x) noexcept
0201     {
0202         detail::static_check_supported_config<T, A>();
0203         return kernel::atanh<A>(x, A {});
0204     }
0205
0206     /**
0207      * @ingroup batch_math
0208      *
0209      * Computes the average of batches \c x and \c y
0210      * @param x batch of T
0211      * @param y batch of T
0212      * @return the average of elements between \c x and \c y.
0213      */
0214     template <class T, class A>
0215     XSIMD_INLINE batch<T, A> avg(batch<T, A> const& x, batch<T, A> const& y) noexcept
0216     {
0217         detail::static_check_supported_config<T, A>();
0218         return kernel::avg<A>(x, y, A {});
0219     }
0220
0221     /**
0222      * @ingroup batch_math
0223      *
0224      * Computes the rounded average of batches \c x and \c y
0225      * @param x batch of T
0226      * @param y batch of T
0227      * @return the rounded average of elements between \c x and \c y.
0228      */
0229     template <class T, class A>
0230     XSIMD_INLINE batch<T, A> avgr(batch<T, A> const& x, batch<T, A> const& y) noexcept
0231     {
0232         detail::static_check_supported_config<T, A>();
0233         return kernel::avgr<A>(x, y, A {});
0234     }
0235
0236     /**
0237      * @ingroup batch_conversion
0238      *
0239      * Perform a static_cast from \c T_in to \c T_out on \c \c x.
0240      * @param x batch_bool of \c T_in
0241      * @return \c x cast to \c T_out
0242      */
0243     template <class T_out, class T_in, class A>
0244     XSIMD_INLINE batch_bool<T_out, A> batch_bool_cast(batch_bool<T_in, A> const& x) noexcept
0245     {
0246         detail::static_check_supported_config<T_out, A>();
0247         detail::static_check_supported_config<T_in, A>();
0248         static_assert(batch_bool<T_out, A>::size == batch_bool<T_in, A>::size, "Casting between incompatibles batch_bool types.");
0249         return kernel::batch_bool_cast<A>(x, batch_bool<T_out, A> {}, A {});
0250     }
0251
0252     /**
0253      * @ingroup batch_conversion
0254      *
0255      * Perform a static_cast from \c T_in to \c T_out on \c \c x.
0256      * @param x batch of \c T_in
0257      * @return \c x cast to \c T_out
0258      */
0259     template <class T_out, class T_in, class A>
0260     XSIMD_INLINE batch<T_out, A> batch_cast(batch<T_in, A> const& x) noexcept
0261     {
0262         detail::static_check_supported_config<T_out, A>();
0263         detail::static_check_supported_config<T_in, A>();
0264         return kernel::batch_cast<A>(x, batch<T_out, A> {}, A {});
0265     }
0266
0267     /**
0268      * @ingroup batch_miscellaneous
0269      *
0270      * Computes the bit of sign of \c x
0271      * @param x batch of scalar
0272      * @return bit of sign of \c x
0273      */
0274     template <class T, class A>
0275     XSIMD_INLINE batch<T, A> bitofsign(batch<T, A> const& x) noexcept
0276     {
0277         detail::static_check_supported_config<T, A>();
0278         return kernel::bitofsign<A>(x, A {});
0279     }
0280
0281     /**
0282      * @ingroup batch_bitwise
0283      *
0284      * Computes the bitwise and of the batches \c x and \c y.
0285      * @param x batch involved in the operation.
0286      * @param y batch involved in the operation.
0287      * @return the result of the bitwise and.
0288      */
0289     template <class T, class A>
0290     XSIMD_INLINE auto bitwise_and(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x & y)
0291     {
0292         detail::static_check_supported_config<T, A>();
0293         return x & y;
0294     }
0295
0296     /**
0297      * @ingroup batch_bitwise
0298      *
0299      * Computes the bitwise and of the batches \c x and \c y.
0300      * @param x batch involved in the operation.
0301      * @param y batch involved in the operation.
0302      * @return the result of the bitwise and.
0303      */
0304     template <class T, class A>
0305     XSIMD_INLINE auto bitwise_and(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept -> decltype(x & y)
0306     {
0307         detail::static_check_supported_config<T, A>();
0308         return x & y;
0309     }
0310
0311     /**
0312      * @ingroup batch_bitwise
0313      *
0314      * Computes the bitwise and not of batches \c x and \c y.
0315      * @param x batch involved in the operation.
0316      * @param y batch involved in the operation.
0317      * @return the result of the bitwise and not.
0318      */
0319     template <class T, class A>
0320     XSIMD_INLINE batch<T, A> bitwise_andnot(batch<T, A> const& x, batch<T, A> const& y) noexcept
0321     {
0322         detail::static_check_supported_config<T, A>();
0323         return kernel::bitwise_andnot<A>(x, y, A {});
0324     }
0325
0326     /**
0327      * @ingroup batch_bool_logical
0328      *
0329      * Computes the bitwise and not of batches \c x and \c y.
0330      * @param x batch involved in the operation.
0331      * @param y batch involved in the operation.
0332      * @return the result of the bitwise and not.
0333      */
0334     template <class T, class A>
0335     XSIMD_INLINE batch_bool<T, A> bitwise_andnot(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept
0336     {
0337         detail::static_check_supported_config<T, A>();
0338         return kernel::bitwise_andnot<A>(x, y, A {});
0339     }
0340
0341     /**
0342      * @ingroup batch_conversion
0343      *
0344      * Perform a reinterpret_cast from \c T_in to \c T_out on \c x.
0345      * @param x batch of \c T_in
0346      * @return \c x reinterpreted as \c T_out
0347      */
0348     template <class T_out, class T_in, class A>
0349     XSIMD_INLINE batch<T_out, A> bitwise_cast(batch<T_in, A> const& x) noexcept
0350     {
0351         detail::static_check_supported_config<T_in, A>();
0352         detail::static_check_supported_config<T_out, A>();
0353         return kernel::bitwise_cast<A>(x, batch<T_out, A> {}, A {});
0354     }
0355
0356     /**
0357      * @ingroup batch_bitwise
0358      *
0359      * Perform a bitwise shift to the left
0360      * @param x batch of \c T_in
0361      * @param shift scalar amount to shift
0362      * @return shifted \c x.
0363      */
0364     template <class T, class A>
0365     XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& x, int shift) noexcept
0366     {
0367         detail::static_check_supported_config<T, A>();
0368         return kernel::bitwise_lshift<A>(x, shift, A {});
0369     }
0370     template <class T, class A>
0371     XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& x, batch<T, A> const& shift) noexcept
0372     {
0373         detail::static_check_supported_config<T, A>();
0374         return kernel::bitwise_lshift<A>(x, shift, A {});
0375     }
0376
0377     /**
0378      * @ingroup batch_bitwise
0379      *
0380      * Computes the bitwise not of batch \c x.
0381      * @param x batch involved in the operation.
0382      * @return the result of the bitwise not.
0383      */
0384     template <class T, class A>
0385     XSIMD_INLINE batch<T, A> bitwise_not(batch<T, A> const& x) noexcept
0386     {
0387         detail::static_check_supported_config<T, A>();
0388         return kernel::bitwise_not<A>(x, A {});
0389     }
0390
0391     /**
0392      * @ingroup batch_bitwise
0393      *
0394      * Computes the bitwise not of batch \c x.
0395      * @param x batch involved in the operation.
0396      * @return the result of the bitwise not.
0397      */
0398     template <class T, class A>
0399     XSIMD_INLINE batch_bool<T, A> bitwise_not(batch_bool<T, A> const& x) noexcept
0400     {
0401         detail::static_check_supported_config<T, A>();
0402         return kernel::bitwise_not<A>(x, A {});
0403     }
0404
0405     /**
0406      * @ingroup batch_bitwise
0407      *
0408      * Computes the bitwise or of the batches \c x and \c y.
0409      * @param x scalar or batch of scalars
0410      * @param y scalar or batch of scalars
0411      * @return the result of the bitwise or.
0412      */
0413     template <class T, class A>
0414     XSIMD_INLINE auto bitwise_or(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x | y)
0415     {
0416         detail::static_check_supported_config<T, A>();
0417         return x | y;
0418     }
0419
0420     /**
0421      * @ingroup batch_bitwise
0422      *
0423      * Computes the bitwise or of the batches \c x and \c y.
0424      * @param x scalar or batch of scalars
0425      * @param y scalar or batch of scalars
0426      * @return the result of the bitwise or.
0427      */
0428     template <class T, class A>
0429     XSIMD_INLINE auto bitwise_or(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept -> decltype(x | y)
0430     {
0431         detail::static_check_supported_config<T, A>();
0432         return x | y;
0433     }
0434
0435     /**
0436      * @ingroup batch_bitwise
0437      *
0438      * Perform a bitwise shift to the right
0439      * @param x batch of \c T_in
0440      * @param shift scalar amount to shift
0441      * @return shifted \c x.
0442      */
0443     template <class T, class A>
0444     XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& x, int shift) noexcept
0445     {
0446         detail::static_check_supported_config<T, A>();
0447         return kernel::bitwise_rshift<A>(x, shift, A {});
0448     }
0449     template <class T, class A>
0450     XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& x, batch<T, A> const& shift) noexcept
0451     {
0452         detail::static_check_supported_config<T, A>();
0453         return kernel::bitwise_rshift<A>(x, shift, A {});
0454     }
0455
0456     /**
0457      * @ingroup batch_bitwise
0458      *
0459      * Computes the bitwise xor of the batches \c x and \c y.
0460      * @param x scalar or batch of scalars
0461      * @param y scalar or batch of scalars
0462      * @return the result of the bitwise xor.
0463      */
0464     template <class T, class A>
0465     XSIMD_INLINE auto bitwise_xor(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x ^ y)
0466     {
0467         detail::static_check_supported_config<T, A>();
0468         return x ^ y;
0469     }
0470
0471     /**
0472      * @ingroup batch_bitwise
0473      *
0474      * Computes the bitwise xor of the batches \c x and \c y.
0475      * @param x scalar or batch of scalars
0476      * @param y scalar or batch of scalars
0477      * @return the result of the bitwise xor.
0478      */
0479     template <class T, class A>
0480     XSIMD_INLINE auto bitwise_xor(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept -> decltype(x ^ y)
0481     {
0482         detail::static_check_supported_config<T, A>();
0483         return x ^ y;
0484     }
0485
0486     /**
0487      * @ingroup batch_data_transfer
0488      *
0489      * Creates a batch from the single value \c v.
0490      * @param v the value used to initialize the batch
0491      * @return a new batch instance
0492      */
0493     template <class T, class A = default_arch>
0494     XSIMD_INLINE batch<T, A> broadcast(T v) noexcept
0495     {
0496         detail::static_check_supported_config<T, A>();
0497         return batch<T, A>::broadcast(v);
0498     }
0499
0500     /**
0501      * @ingroup batch_data_transfer
0502      *
0503      * Creates a batch from the single value \c v and
0504      * the specified batch value type \c To.
0505      * @param v the value used to initialize the batch
0506      * @return a new batch instance
0507      */
0508     template <class To, class A = default_arch, class From>
0509     XSIMD_INLINE simd_return_type<From, To, A> broadcast_as(From v) noexcept
0510     {
0511         detail::static_check_supported_config<From, A>();
0512         using batch_value_type = typename simd_return_type<From, To, A>::value_type;
0513         using value_type = typename std::conditional<std::is_same<From, bool>::value,
0514                                                      bool,
0515                                                      batch_value_type>::type;
0516         return simd_return_type<From, To, A>(value_type(v));
0517     }
0518
0519     /**
0520      * @ingroup batch_math
0521      *
0522      * Computes the cubic root of the batch \c x.
0523      * @param x batch of floating point values.
0524      * @return the cubic root of \c x.
0525      */
0526     template <class T, class A>
0527     XSIMD_INLINE batch<T, A> cbrt(batch<T, A> const& x) noexcept
0528     {
0529         detail::static_check_supported_config<T, A>();
0530         return kernel::cbrt<A>(x, A {});
0531     }
0532
0533     /**
0534      * @ingroup batch_rounding
0535      *
0536      * Computes the batch of smallest integer values not less than
0537      * scalars in \c x.
0538      * @param x batch of floating point values.
0539      * @return the batch of smallest integer values not less than \c x.
0540      */
0541     template <class T, class A>
0542     XSIMD_INLINE batch<T, A> ceil(batch<T, A> const& x) noexcept
0543     {
0544         detail::static_check_supported_config<T, A>();
0545         return kernel::ceil<A>(x, A {});
0546     }
0547
0548     /**
0549      * @ingroup batch_math
0550      *
0551      * Clips the values of the batch \c x between those of the batches \c lo and \c hi.
0552      * @param x batch of scalar values.
0553      * @param lo batch of scalar values.
0554      * @param hi batch of scalar values.
0555      * @return the result of the clipping.
0556      */
0557     template <class T, class A>
0558     XSIMD_INLINE batch<T, A> clip(batch<T, A> const& x, batch<T, A> const& lo, batch<T, A> const& hi) noexcept
0559     {
0560         detail::static_check_supported_config<T, A>();
0561         return kernel::clip(x, lo, hi, A {});
0562     }
0563
0564     /**
0565      * @ingroup batch_data_transfer
0566      *
0567      * Pick elements from \c x selected by \c mask, and append them to the
0568      * resulting vector, zeroing the remaining slots
0569      */
0570     template <class T, class A>
0571     XSIMD_INLINE batch<T, A> compress(batch<T, A> const& x, batch_bool<T, A> const& mask) noexcept
0572     {
0573         detail::static_check_supported_config<T, A>();
0574         return kernel::compress<A>(x, mask, A {});
0575     }
0576
0577     /**
0578      * @ingroup batch_complex
0579      *
0580      * Computes the conjugate of the batch \c z.
0581      * @param z batch of complex values.
0582      * @return the argument of \c z.
0583      */
0584     template <class A, class T>
0585     XSIMD_INLINE complex_batch_type_t<batch<T, A>> conj(batch<T, A> const& z) noexcept
0586     {
0587         return kernel::conj(z, A {});
0588     }
0589
0590     /**
0591      * @ingroup batch_miscellaneous
0592      *
0593      * Computes a value whose  absolute  value  matches
0594      *        that of \c x, but whose sign bit matches that of \c y.
0595      * @param x batch of scalars
0596      * @param y batch of scalars
0597      * @return batch whose absolute  value  matches that of \c x, but whose sign bit
0598      * matches that of \c y.
0599      */
0600     template <class T, class A>
0601     XSIMD_INLINE batch<T, A> copysign(batch<T, A> const& x, batch<T, A> const& y) noexcept
0602     {
0603         detail::static_check_supported_config<T, A>();
0604         return kernel::copysign<A>(x, y, A {});
0605     }
0606
0607     /**
0608      * @ingroup batch_trigo
0609      *
0610      * Computes the cosine of the batch \c x.
0611      * @param x batch of floating point values.
0612      * @return the cosine of \c x.
0613      */
0614     template <class T, class A>
0615     XSIMD_INLINE batch<T, A> cos(batch<T, A> const& x) noexcept
0616     {
0617         detail::static_check_supported_config<T, A>();
0618         return kernel::cos<A>(x, A {});
0619     }
0620
0621     /**
0622      * @ingroup batch_trigo
0623      *
0624      * computes the hyperbolic cosine of the batch \c x.
0625      * @param x batch of floating point values.
0626      * @return the hyperbolic cosine of \c x.
0627      */
0628     template <class T, class A>
0629     XSIMD_INLINE batch<T, A> cosh(batch<T, A> const& x) noexcept
0630     {
0631         detail::static_check_supported_config<T, A>();
0632         return kernel::cosh<A>(x, A {});
0633     }
0634
0635     /**
0636      * @ingroup batch_reducers
0637      *
0638      * Count the number of values set to true in the batch \c x
0639      * @param x boolean or batch of boolean
0640      * @return the result of the counting.
0641      */
0642     template <class T, class A>
0643     XSIMD_INLINE size_t count(batch_bool<T, A> const& x) noexcept
0644     {
0645         detail::static_check_supported_config<T, A>();
0646         return kernel::count<A>(x, A {});
0647     }
0648
0649     /**
0650      * @ingroup batch_arithmetic
0651      *
0652      * Subtract 1 to batch \c x.
0653      * @param x batch involved in the decrement.
0654      * @return the subtraction of \c x and 1.
0655      */
0656     template <class T, class A>
0657     XSIMD_INLINE batch<T, A> decr(batch<T, A> const& x) noexcept
0658     {
0659         detail::static_check_supported_config<T, A>();
0660         return kernel::decr<A>(x, A {});
0661     }
0662
0663     /**
0664      * @ingroup batch_arithmetic
0665      *
0666      * Subtract 1 to batch \c x for each element where \c mask is true.
0667      * @param x batch involved in the increment.
0668      * @param mask whether to perform the increment or not. Can be a \c
0669      *             batch_bool or a \c batch_bool_constant.
0670      * @return the subtraction of \c x and 1 when \c mask is true.
0671      */
0672     template <class T, class A, class Mask>
0673     XSIMD_INLINE batch<T, A> decr_if(batch<T, A> const& x, Mask const& mask) noexcept
0674     {
0675         detail::static_check_supported_config<T, A>();
0676         return kernel::decr_if<A>(x, mask, A {});
0677     }
0678
0679     /**
0680      * @ingroup batch_arithmetic
0681      *
0682      * Computes the division of the batch \c x by the batch \c y.
0683      * @param x scalar or batch of scalars
0684      * @param y scalar or batch of scalars
0685      * @return the result of the division.
0686      */
0687     template <class T, class A>
0688     XSIMD_INLINE auto div(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x / y)
0689     {
0690         detail::static_check_supported_config<T, A>();
0691         return x / y;
0692     }
0693
0694     /**
0695      * @ingroup batch_logical
0696      *
0697      * Element-wise equality comparison of batches \c x and \c y.
0698      * @param x batch of scalars
0699      * @param y batch of scalars
0700      * @return a boolean batch.
0701      */
0702     template <class T, class A>
0703     XSIMD_INLINE auto eq(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x == y)
0704     {
0705         detail::static_check_supported_config<T, A>();
0706         return x == y;
0707     }
0708
0709     /**
0710      * @ingroup batch_logical
0711      *
0712      * Element-wise equality comparison of batches of boolean values \c x and \c y.
0713      * @param x batch of booleans involved in the comparison.
0714      * @param y batch of booleans involved in the comparison.
0715      * @return a boolean batch.
0716      */
0717     template <class T, class A>
0718     XSIMD_INLINE auto eq(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept -> decltype(x == y)
0719     {
0720         detail::static_check_supported_config<T, A>();
0721         return x == y;
0722     }
0723
0724     /**
0725      * @ingroup batch_math
0726      *
0727      * Computes the natural exponential of the batch \c x.
0728      * @param x batch of floating point values.
0729      * @return the natural exponential of \c x.
0730      */
0731     template <class T, class A>
0732     XSIMD_INLINE batch<T, A> exp(batch<T, A> const& x) noexcept
0733     {
0734         detail::static_check_supported_config<T, A>();
0735         return kernel::exp<A>(x, A {});
0736     }
0737
0738     /**
0739      * @ingroup batch_math
0740      *
0741      * Computes the base 10 exponential of the batch \c x.
0742      * @param x batch of floating point values.
0743      * @return the base 10 exponential of \c x.
0744      */
0745     template <class T, class A>
0746     XSIMD_INLINE batch<T, A> exp10(batch<T, A> const& x) noexcept
0747     {
0748         detail::static_check_supported_config<T, A>();
0749         return kernel::exp10<A>(x, A {});
0750     }
0751
0752     /**
0753      * @ingroup batch_math
0754      *
0755      * Computes the base 2 exponential of the batch \c x.
0756      * @param x batch of floating point values.
0757      * @return the base 2 exponential of \c x.
0758      */
0759     template <class T, class A>
0760     XSIMD_INLINE batch<T, A> exp2(batch<T, A> const& x) noexcept
0761     {
0762         detail::static_check_supported_config<T, A>();
0763         return kernel::exp2<A>(x, A {});
0764     }
0765
0766     /**
0767      * @ingroup batch_data_transfer
0768      *
0769      * Load contiguous elements from \c x and place them in slots selected by \c
0770      * mask, zeroing the other slots
0771      */
0772     template <class T, class A>
0773     XSIMD_INLINE batch<T, A> expand(batch<T, A> const& x, batch_bool<T, A> const& mask) noexcept
0774     {
0775         detail::static_check_supported_config<T, A>();
0776         return kernel::expand<A>(x, mask, A {});
0777     }
0778
0779     /**
0780      * @ingroup batch_math
0781      *
0782      * Computes the natural exponential of the batch \c x, minus one.
0783      * @param x batch of floating point values.
0784      * @return the natural exponential of \c x, minus one.
0785      */
0786     template <class T, class A>
0787     XSIMD_INLINE batch<T, A> expm1(batch<T, A> const& x) noexcept
0788     {
0789         detail::static_check_supported_config<T, A>();
0790         return kernel::expm1<A>(x, A {});
0791     }
0792
0793     /**
0794      * @ingroup batch_math_extra
0795      *
0796      * Computes the error function of the batch \c x.
0797      * @param x batch of floating point values.
0798      * @return the error function of \c x.
0799      */
0800     template <class T, class A>
0801     XSIMD_INLINE batch<T, A> erf(batch<T, A> const& x) noexcept
0802     {
0803         detail::static_check_supported_config<T, A>();
0804         return kernel::erf<A>(x, A {});
0805     }
0806
0807     /**
0808      * @ingroup batch_math_extra
0809      *
0810      * Computes the complementary error function of the batch \c x.
0811      * @param x batch of floating point values.
0812      * @return the error function of \c x.
0813      */
0814     template <class T, class A>
0815     XSIMD_INLINE batch<T, A> erfc(batch<T, A> const& x) noexcept
0816     {
0817         detail::static_check_supported_config<T, A>();
0818         return kernel::erfc<A>(x, A {});
0819     }
0820
0821     /**
0822      * Extract vector from pair of vectors
0823      * extracts the lowest vector elements from the second source \c x
0824      * and the highest vector elements from the first source \c y
0825      * Concatenates the results into th Return value.
0826      * @param x batch of integer or floating point values.
0827      * @param y batch of integer or floating point values.
0828      * @param i integer specifying the lowest vector element to extract from the first source register
0829      * @return.
0830      */
0831     template <class T, class A>
0832     XSIMD_INLINE batch<T, A> extract_pair(batch<T, A> const& x, batch<T, A> const& y, std::size_t i) noexcept
0833     {
0834         detail::static_check_supported_config<T, A>();
0835         return kernel::extract_pair<A>(x, y, i, A {});
0836     }
0837
0838     /**
0839      * @ingroup batch_math
0840      *
0841      * Computes the absolute values of each scalar in the batch \c x.
0842      * @param x batch floating point values.
0843      * @return the absolute values of \c x.
0844      */
0845     template <class T, class A>
0846     XSIMD_INLINE batch<T, A> fabs(batch<T, A> const& x) noexcept
0847     {
0848         detail::static_check_supported_config<T, A>();
0849         return kernel::abs<A>(x, A {});
0850     }
0851
0852     /**
0853      * @ingroup batch_math
0854      *
0855      * Computes the positive difference between \c x and \c y, that is,
0856      * <tt>max(0, x-y)</tt>.
0857      * @param x batch of floating point values.
0858      * @param y batch of floating point values.
0859      * @return the positive difference.
0860      */
0861     template <class T, class A>
0862     XSIMD_INLINE batch<T, A> fdim(batch<T, A> const& x, batch<T, A> const& y) noexcept
0863     {
0864         detail::static_check_supported_config<T, A>();
0865         return kernel::fdim<A>(x, y, A {});
0866     }
0867
0868     /**
0869      * @ingroup batch_rounding
0870      *
0871      * Computes the batch of largest integer values not greater than
0872      * scalars in \c x.
0873      * @param x batch of floating point values.
0874      * @return the batch of largest integer values not greater than \c x.
0875      */
0876     template <class T, class A>
0877     XSIMD_INLINE batch<T, A> floor(batch<T, A> const& x) noexcept
0878     {
0879         detail::static_check_supported_config<T, A>();
0880         return kernel::floor<A>(x, A {});
0881     }
0882
0883     /**
0884      * @ingroup batch_arithmetic
0885      *
0886      * Computes <tt>(x*y) + z</tt> in a single instruction when possible.
0887      * @param x a batch of integer or floating point values.
0888      * @param y a batch of integer or floating point values.
0889      * @param z a batch of integer or floating point values.
0890      * @return the result of the fused multiply-add operation.
0891      */
0892     template <class T, class A>
0893     XSIMD_INLINE batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept
0894     {
0895         detail::static_check_supported_config<T, A>();
0896         return kernel::fma<A>(x, y, z, A {});
0897     }
0898
0899     /**
0900      * @ingroup batch_math
0901      *
0902      * Computes the larger values of the batches \c x and \c y.
0903      * @param x a batch of integer or floating point values.
0904      * @param y a batch of integer or floating point values.
0905      * @return a batch of the larger values.
0906      */
0907     template <class T, class A>
0908     XSIMD_INLINE batch<T, A> fmax(batch<T, A> const& x, batch<T, A> const& y) noexcept
0909     {
0910         detail::static_check_supported_config<T, A>();
0911         return kernel::max<A>(x, y, A {});
0912     }
0913
0914     /**
0915      * @ingroup batch_math
0916      *
0917      * Computes the smaller values of the batches \c x and \c y.
0918      * @param x a batch of integer or floating point values.
0919      * @param y a batch of integer or floating point values.
0920      * @return a batch of the smaller values.
0921      */
0922     template <class T, class A>
0923     XSIMD_INLINE batch<T, A> fmin(batch<T, A> const& x, batch<T, A> const& y) noexcept
0924     {
0925         detail::static_check_supported_config<T, A>();
0926         return kernel::min<A>(x, y, A {});
0927     }
0928
0929     /**
0930      * @ingroup batch_math
0931      *
0932      * Computes the modulo of the batch \c x by the batch \c y.
0933      * @param x batch involved in the modulo.
0934      * @param y batch involved in the modulo.
0935      * @return the result of the modulo.
0936      */
0937     template <class T, class A>
0938     XSIMD_INLINE batch<T, A> fmod(batch<T, A> const& x, batch<T, A> const& y) noexcept
0939     {
0940         detail::static_check_supported_config<T, A>();
0941         return kernel::fmod<A>(x, y, A {});
0942     }
0943
0944     /**
0945      * @ingroup batch_arithmetic
0946      *
0947      * Computes <tt>(x*y) - z</tt> in a single instruction when possible.
0948      * @param x a batch of integer or floating point values.
0949      * @param y a batch of integer or floating point values.
0950      * @param z a batch of integer or floating point values.
0951      * @return the result of the fused multiply-sub operation.
0952      */
0953     template <class T, class A>
0954     XSIMD_INLINE batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept
0955     {
0956         detail::static_check_supported_config<T, A>();
0957         return kernel::fms<A>(x, y, z, A {});
0958     }
0959
0960     /**
0961      * @ingroup batch_arithmetic
0962      *
0963      * Computes <tt>-(x*y) + z</tt> in a single instruction when possible.
0964      * @param x a batch of integer or floating point values.
0965      * @param y a batch of integer or floating point values.
0966      * @param z a batch of integer or floating point values.
0967      * @return the result of the fused negated multiply-add operation.
0968      */
0969     template <class T, class A>
0970     XSIMD_INLINE batch<T, A> fnma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept
0971     {
0972         detail::static_check_supported_config<T, A>();
0973         return kernel::fnma<A>(x, y, z, A {});
0974     }
0975
0976     /**
0977      * @ingroup batch_arithmetic
0978      *
0979      * Computes <tt>-(x*y) - z</tt> in a single instruction when possible.
0980      * @param x a batch of integer or floating point values.
0981      * @param y a batch of integer or floating point values.
0982      * @param z a batch of integer or floating point values.
0983      * @return the result of the fused negated multiply-sub operation.
0984      */
0985     template <class T, class A>
0986     XSIMD_INLINE batch<T, A> fnms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept
0987     {
0988         detail::static_check_supported_config<T, A>();
0989         return kernel::fnms<A>(x, y, z, A {});
0990     }
0991
0992     /**
0993      * @ingroup batch_fp
0994      *
0995      * Split split the number x into a normalized fraction and an exponent which is stored in exp
0996      * @param x a batch of integer or floating point values.
0997      * @param y a batch of integer or floating point values.
0998      * @return the normalized fraction of x
0999      */
1000     template <class T, class A>
1001     XSIMD_INLINE batch<T, A> frexp(const batch<T, A>& x, batch<as_integer_t<T>, A>& y) noexcept
1002     {
1003         detail::static_check_supported_config<T, A>();
1004         return kernel::frexp<A>(x, y, A {});
1005     }
1006
1007     /**
1008      * @ingroup batch_logical
1009      *
1010      * Element-wise greater or equal comparison of batches \c x and \c y.
1011      * @tparam X the actual type of batch.
1012      * @param x batch involved in the comparison.
1013      * @param y batch involved in the comparison.
1014      * @return a boolean batch.
1015      */
1016     template <class T, class A>
1017     XSIMD_INLINE batch_bool<T, A> ge(batch<T, A> const& x, batch<T, A> const& y) noexcept
1018     {
1019         detail::static_check_supported_config<T, A>();
1020         return x >= y;
1021     }
1022
1023     /**
1024      * @ingroup batch_logical
1025      *
1026      * Element-wise greater than comparison of batches \c x and \c y.
1027      * @tparam X the actual type of batch.
1028      * @param x batch involved in the comparison.
1029      * @param y batch involved in the comparison.
1030      * @return a boolean batch.
1031      */
1032     template <class T, class A>
1033     XSIMD_INLINE batch_bool<T, A> gt(batch<T, A> const& x, batch<T, A> const& y) noexcept
1034     {
1035         detail::static_check_supported_config<T, A>();
1036         return x > y;
1037     }
1038
1039     /**
1040      * @ingroup batch_reducers
1041      *
1042      * Parallel horizontal addition: adds the scalars of each batch
1043      * in the array pointed by \c row and store them in a returned
1044      * batch.
1045      * @param row an array of \c N batches
1046      * @return the result of the reduction.
1047      */
1048     template <class T, class A>
1049     XSIMD_INLINE batch<T, A> haddp(batch<T, A> const* row) noexcept
1050     {
1051         detail::static_check_supported_config<T, A>();
1052         return kernel::haddp<A>(row, A {});
1053     }
1054
1055     /**
1056      * @ingroup batch_math
1057      *
1058      * Computes the square root of the sum of the squares of the batches
1059      * \c x, and \c y.
1060      * @param x batch of floating point values.
1061      * @param y batch of floating point values.
1062      * @return the square root of the sum of the squares of \c x and \c y.
1063      */
1064     template <class T, class A>
1065     XSIMD_INLINE batch<T, A> hypot(batch<T, A> const& x, batch<T, A> const& y) noexcept
1066     {
1067         detail::static_check_supported_config<T, A>();
1068         return kernel::hypot<A>(x, y, A {});
1069     }
1070
1071     /**
1072      * @ingroup batch_complex
1073      *
1074      * Computes the imaginary part of the batch \c x.
1075      * @param x batch of complex or real values.
1076      * @return the argument of \c x.
1077      */
1078     template <class T, class A>
1079     XSIMD_INLINE real_batch_type_t<batch<T, A>> imag(batch<T, A> const& x) noexcept
1080     {
1081         detail::static_check_supported_config<T, A>();
1082         return kernel::imag<A>(x, A {});
1083     }
1084
1085     /**
1086      * @ingroup batch_arithmetic
1087      *
1088      * Add 1 to batch \c x.
1089      * @param x batch involved in the increment.
1090      * @return the sum of \c x and 1.
1091      */
1092     template <class T, class A>
1093     XSIMD_INLINE batch<T, A> incr(batch<T, A> const& x) noexcept
1094     {
1095         detail::static_check_supported_config<T, A>();
1096         return kernel::incr<A>(x, A {});
1097     }
1098
1099     /**
1100      * @ingroup batch_arithmetic
1101      *
1102      * Add 1 to batch \c x for each element where \c mask is true.
1103      * @param x batch involved in the increment.
1104      * @param mask whether to perform the increment or not. Can be a \c
1105      *             batch_bool or a \c batch_bool_constant.
1106      * @return the sum of \c x and 1 when \c mask is true.
1107      */
1108     template <class T, class A, class Mask>
1109     XSIMD_INLINE batch<T, A> incr_if(batch<T, A> const& x, Mask const& mask) noexcept
1110     {
1111         detail::static_check_supported_config<T, A>();
1112         return kernel::incr_if<A>(x, mask, A {});
1113     }
1114
1115     /**
1116      * @ingroup batch_constant
1117      *
1118      * Return a batch of scalars representing positive infinity
1119      * @return a batch of positive infinity
1120      */
1121     template <class B>
1122     XSIMD_INLINE B infinity()
1123     {
1124         using T = typename B::value_type;
1125         using A = typename B::arch_type;
1126         detail::static_check_supported_config<T, A>();
1127         return B(std::numeric_limits<T>::infinity());
1128     }
1129
1130     /**
1131      * @ingroup batch_data_transfer
1132      *
1133      * Create a new batch equivalent to \c x but with element \c val set at position \c pos
1134      * @param x batch
1135      * @param val value to set
1136      * @param pos index of the updated slot
1137      * @return copy of \c x with position \c pos set to \c val
1138      */
1139     template <class T, class A, size_t I>
1140     XSIMD_INLINE batch<T, A> insert(batch<T, A> const& x, T val, index<I> pos) noexcept
1141     {
1142         detail::static_check_supported_config<T, A>();
1143         return kernel::insert<A>(x, val, pos, A {});
1144     }
1145
1146     /**
1147      * @ingroup batch_logical
1148      *
1149      * Determines if the scalars in the given batch \c x represent an even integer value
1150      * @param x batch of floating point values.
1151      * @return a batch of booleans.
1152      */
1153     template <class T, class A>
1154     XSIMD_INLINE batch_bool<T, A> is_even(batch<T, A> const& x) noexcept
1155     {
1156         detail::static_check_supported_config<T, A>();
1157         return kernel::is_even<A>(x, A {});
1158     }
1159
1160     /**
1161      * @ingroup batch_logical
1162      *
1163      * Determines if the floating-point scalars in the given batch \c x represent integer value
1164      * @param x batch of floating point values.
1165      * @return a batch of booleans.
1166      */
1167     template <class T, class A>
1168     XSIMD_INLINE batch_bool<T, A> is_flint(batch<T, A> const& x) noexcept
1169     {
1170         detail::static_check_supported_config<T, A>();
1171         return kernel::is_flint<A>(x, A {});
1172     }
1173
1174     /**
1175      * @ingroup batch_logical
1176      *
1177      * Determines if the scalars in the given batch \c x represent an odd integer value
1178      * @param x batch of floating point values.
1179      * @return a batch of booleans.
1180      */
1181     template <class T, class A>
1182     XSIMD_INLINE batch_bool<T, A> is_odd(batch<T, A> const& x) noexcept
1183     {
1184         detail::static_check_supported_config<T, A>();
1185         return kernel::is_odd<A>(x, A {});
1186     }
1187
1188     /**
1189      * @ingroup batch_logical
1190      *
1191      * Determines if the scalars in the given batch \c x are inf values.
1192      * @param x batch of floating point values.
1193      * @return a batch of booleans.
1194      */
1195     template <class T, class A>
1196     XSIMD_INLINE typename batch<T, A>::batch_bool_type isinf(batch<T, A> const& x) noexcept
1197     {
1198         detail::static_check_supported_config<T, A>();
1199         return kernel::isinf<A>(x, A {});
1200     }
1201
1202     /**
1203      * @ingroup batch_logical
1204      *
1205      * Determines if the scalars in the given batch \c x are finite values.
1206      * @param x batch of floating point values.
1207      * @return a batch of booleans.
1208      */
1209     template <class T, class A>
1210     XSIMD_INLINE typename batch<T, A>::batch_bool_type isfinite(batch<T, A> const& x) noexcept
1211     {
1212         detail::static_check_supported_config<T, A>();
1213         return kernel::isfinite<A>(x, A {});
1214     }
1215
1216     /**
1217      * @ingroup batch_logical
1218      *
1219      * Determines if the scalars in the given batch \c x are NaN values.
1220      * @param x batch of floating point values.
1221      * @return a batch of booleans.
1222      */
1223     template <class T, class A>
1224     XSIMD_INLINE typename batch<T, A>::batch_bool_type isnan(batch<T, A> const& x) noexcept
1225     {
1226         detail::static_check_supported_config<T, A>();
1227         return kernel::isnan<A>(x, A {});
1228     }
1229
1230     /**
1231      * @ingroup batch_math_extra
1232      *
1233      * Computes the multiplication of the floating point number \c x by 2 raised to the power \c y.
1234      * @param x batch of floating point values.
1235      * @param y batch of integer values.
1236      * @return a batch of floating point values.
1237      */
1238     template <class T, class A>
1239     XSIMD_INLINE batch<T, A> ldexp(const batch<T, A>& x, const batch<as_integer_t<T>, A>& y) noexcept
1240     {
1241         detail::static_check_supported_config<T, A>();
1242         return kernel::ldexp<A>(x, y, A {});
1243     }
1244
1245     /**
1246      * @ingroup batch_logical
1247      *
1248      * Element-wise lesser or equal to comparison of batches \c x and \c y.
1249      * @param x batch involved in the comparison.
1250      * @param y batch involved in the comparison.
1251      * @return a boolean batch.
1252      */
1253     template <class T, class A>
1254     XSIMD_INLINE batch_bool<T, A> le(batch<T, A> const& x, batch<T, A> const& y) noexcept
1255     {
1256         detail::static_check_supported_config<T, A>();
1257         return x <= y;
1258     }
1259
1260     /**
1261      * @ingroup batch_math_extra
1262      *
1263      * Computes the natural logarithm of the gamma function of the batch \c x.
1264      * @param x batch of floating point values.
1265      * @return the natural logarithm of the gamma function of \c x.
1266      */
1267     template <class T, class A>
1268     XSIMD_INLINE batch<T, A> lgamma(batch<T, A> const& x) noexcept
1269     {
1270         detail::static_check_supported_config<T, A>();
1271         return kernel::lgamma<A>(x, A {});
1272     }
1273
1274     /**
1275      * @ingroup batch_data_transfer
1276      *
1277      * Creates a batch from the buffer \c ptr and the specifed
1278      * batch value type \c To. The memory needs to be aligned.
1279      * @param ptr the memory buffer to read
1280      * @return a new batch instance
1281      */
1282     template <class To, class A = default_arch, class From>
1283     XSIMD_INLINE simd_return_type<From, To, A> load_as(From const* ptr, aligned_mode) noexcept
1284     {
1285         using batch_value_type = typename simd_return_type<From, To, A>::value_type;
1286         detail::static_check_supported_config<From, A>();
1287         detail::static_check_supported_config<To, A>();
1288         return kernel::load_aligned<A>(ptr, kernel::convert<batch_value_type> {}, A {});
1289     }
1290
1291     template <class To, class A = default_arch>
1292     XSIMD_INLINE simd_return_type<bool, To, A> load_as(bool const* ptr, aligned_mode) noexcept
1293     {
1294         detail::static_check_supported_config<To, A>();
1295         return simd_return_type<bool, To, A>::load_aligned(ptr);
1296     }
1297
1298     template <class To, class A = default_arch, class From>
1299     XSIMD_INLINE simd_return_type<std::complex<From>, To, A> load_as(std::complex<From> const* ptr, aligned_mode) noexcept
1300     {
1301         detail::static_check_supported_config<To, A>();
1302         using batch_value_type = typename simd_return_type<std::complex<From>, To, A>::value_type;
1303         return kernel::load_complex_aligned<A>(ptr, kernel::convert<batch_value_type> {}, A {});
1304     }
1305
1306 #ifdef XSIMD_ENABLE_XTL_COMPLEX
1307     template <class To, class A = default_arch, class From, bool i3ec>
1308     XSIMD_INLINE simd_return_type<xtl::xcomplex<From, From, i3ec>, To, A> load_as(xtl::xcomplex<From, From, i3ec> const* ptr, aligned_mode) noexcept
1309     {
1310         detail::static_check_supported_config<To, A>();
1311         detail::static_check_supported_config<From, A>();
1312         return load_as<To>(reinterpret_cast<std::complex<From> const*>(ptr), aligned_mode());
1313     }
1314 #endif
1315
1316     /**
1317      * @ingroup batch_data_transfer
1318      *
1319      * Creates a batch from the buffer \c ptr and the specifed
1320      * batch value type \c To. The memory does not need to be aligned.
1321      * @param ptr the memory buffer to read
1322      * @return a new batch instance
1323      */
1324     template <class To, class A = default_arch, class From>
1325     XSIMD_INLINE simd_return_type<From, To, A> load_as(From const* ptr, unaligned_mode) noexcept
1326     {
1327         using batch_value_type = typename simd_return_type<From, To, A>::value_type;
1328         detail::static_check_supported_config<To, A>();
1329         detail::static_check_supported_config<From, A>();
1330         return kernel::load_unaligned<A>(ptr, kernel::convert<batch_value_type> {}, A {});
1331     }
1332
1333     template <class To, class A = default_arch>
1334     XSIMD_INLINE simd_return_type<bool, To, A> load_as(bool const* ptr, unaligned_mode) noexcept
1335     {
1336         return simd_return_type<bool, To, A>::load_unaligned(ptr);
1337     }
1338
1339     template <class To, class A = default_arch, class From>
1340     XSIMD_INLINE simd_return_type<std::complex<From>, To, A> load_as(std::complex<From> const* ptr, unaligned_mode) noexcept
1341     {
1342         detail::static_check_supported_config<To, A>();
1343         detail::static_check_supported_config<From, A>();
1344         using batch_value_type = typename simd_return_type<std::complex<From>, To, A>::value_type;
1345         return kernel::load_complex_unaligned<A>(ptr, kernel::convert<batch_value_type> {}, A {});
1346     }
1347
1348 #ifdef XSIMD_ENABLE_XTL_COMPLEX
1349     template <class To, class A = default_arch, class From, bool i3ec>
1350     XSIMD_INLINE simd_return_type<xtl::xcomplex<From, From, i3ec>, To, A> load_as(xtl::xcomplex<From, From, i3ec> const* ptr, unaligned_mode) noexcept
1351     {
1352         detail::static_check_supported_config<To, A>();
1353         detail::static_check_supported_config<From, A>();
1354         return load_as<To>(reinterpret_cast<std::complex<From> const*>(ptr), unaligned_mode());
1355     }
1356 #endif
1357
1358     /**
1359      * @ingroup batch_data_transfer
1360      *
1361      * Creates a batch from the buffer \c ptr. The
1362      * memory needs to be aligned.
1363      * @param ptr the memory buffer to read
1364      * @return a new batch instance
1365      */
1366     template <class A = default_arch, class From>
1367     XSIMD_INLINE batch<From, A> load(From const* ptr, aligned_mode = {}) noexcept
1368     {
1369         detail::static_check_supported_config<From, A>();
1370         return load_as<From, A>(ptr, aligned_mode {});
1371     }
1372
1373     /**
1374      * @ingroup batch_data_transfer
1375      *
1376      * Creates a batch from the buffer \c ptr. The
1377      * memory does not need to be aligned.
1378      * @param ptr the memory buffer to read
1379      * @return a new batch instance
1380      */
1381     template <class A = default_arch, class From>
1382     XSIMD_INLINE batch<From, A> load(From const* ptr, unaligned_mode) noexcept
1383     {
1384         detail::static_check_supported_config<From, A>();
1385         return load_as<From, A>(ptr, unaligned_mode {});
1386     }
1387
1388     /**
1389      * @ingroup batch_data_transfer
1390      *
1391      * Creates a batch from the buffer \c ptr. The
1392      * memory needs to be aligned.
1393      * @param ptr the memory buffer to read
1394      * @return a new batch instance
1395      */
1396     template <class A = default_arch, class From>
1397     XSIMD_INLINE batch<From, A> load_aligned(From const* ptr) noexcept
1398     {
1399         detail::static_check_supported_config<From, A>();
1400         return load_as<From, A>(ptr, aligned_mode {});
1401     }
1402
1403     /**
1404      * @ingroup batch_data_transfer
1405      *
1406      * Creates a batch from the buffer \c ptr. The
1407      * memory does not need to be aligned.
1408      * @param ptr the memory buffer to read
1409      * @return a new batch instance
1410      */
1411     template <class A = default_arch, class From>
1412     XSIMD_INLINE batch<From, A> load_unaligned(From const* ptr) noexcept
1413     {
1414         detail::static_check_supported_config<From, A>();
1415         return load_as<From, A>(ptr, unaligned_mode {});
1416     }
1417
1418     /**
1419      * @ingroup batch_math
1420      *
1421      * Computes the natural logarithm of the batch \c x.
1422      * @param x batch of floating point values.
1423      * @return the natural logarithm of \c x.
1424      */
1425     template <class T, class A>
1426     XSIMD_INLINE batch<T, A> log(batch<T, A> const& x) noexcept
1427     {
1428         detail::static_check_supported_config<T, A>();
1429         return kernel::log<A>(x, A {});
1430     }
1431
1432     /**
1433      * @ingroup batch_math
1434      * Computes the base 2 logarithm of the batch \c x.
1435      * @param x batch of floating point values.
1436      * @return the base 2 logarithm of \c x.
1437      */
1438     template <class T, class A>
1439     XSIMD_INLINE batch<T, A> log2(batch<T, A> const& x) noexcept
1440     {
1441         detail::static_check_supported_config<T, A>();
1442         return kernel::log2<A>(x, A {});
1443     }
1444
1445     /**
1446      * @ingroup batch_math
1447      * Computes the base 10 logarithm of the batch \c x.
1448      * @param x batch of floating point values.
1449      * @return the base 10 logarithm of \c x.
1450      */
1451     template <class T, class A>
1452     XSIMD_INLINE batch<T, A> log10(batch<T, A> const& x) noexcept
1453     {
1454         detail::static_check_supported_config<T, A>();
1455         return kernel::log10<A>(x, A {});
1456     }
1457
1458     /**
1459      * @ingroup batch_math
1460      * Computes the natural logarithm of one plus the batch \c x.
1461      * @param x batch of floating point values.
1462      * @return the natural logarithm of one plus \c x.
1463      */
1464     template <class T, class A>
1465     XSIMD_INLINE batch<T, A> log1p(batch<T, A> const& x) noexcept
1466     {
1467         detail::static_check_supported_config<T, A>();
1468         return kernel::log1p<A>(x, A {});
1469     }
1470
1471     /**
1472      * @ingroup batch_logical
1473      *
1474      * Element-wise lesser than comparison of batches \c x and \c y.
1475      * @param x batch involved in the comparison.
1476      * @param y batch involved in the comparison.
1477      * @return a boolean batch.
1478      */
1479     template <class T, class A>
1480     XSIMD_INLINE batch_bool<T, A> lt(batch<T, A> const& x, batch<T, A> const& y) noexcept
1481     {
1482         detail::static_check_supported_config<T, A>();
1483         return x < y;
1484     }
1485
1486     /**
1487      * @ingroup batch_math
1488      *
1489      * Computes the larger values of the batches \c x and \c y.
1490      * @param x a batch of integer or floating point values.
1491      * @param y a batch of integer or floating point values.
1492      * @return a batch of the larger values.
1493      */
1494     template <class T, class A>
1495     XSIMD_INLINE batch<T, A> max(batch<T, A> const& x, batch<T, A> const& y) noexcept
1496     {
1497         detail::static_check_supported_config<T, A>();
1498         return kernel::max<A>(x, y, A {});
1499     }
1500
1501     /**
1502      * @ingroup batch_math
1503      *
1504      * Computes the smaller values of the batches \c x and \c y.
1505      * @param x a batch of integer or floating point values.
1506      * @param y a batch of integer or floating point values.
1507      * @return a batch of the smaller values.
1508      */
1509     template <class T, class A>
1510     XSIMD_INLINE batch<T, A> min(batch<T, A> const& x, batch<T, A> const& y) noexcept
1511     {
1512         detail::static_check_supported_config<T, A>();
1513         return kernel::min<A>(x, y, A {});
1514     }
1515
1516     /**
1517      * @ingroup batch_constant
1518      *
1519      * Return a batch of scalars representing positive infinity
1520      * @return a batch of positive infinity
1521      */
1522     template <class B>
1523     XSIMD_INLINE B minusinfinity() noexcept
1524     {
1525         using T = typename B::value_type;
1526         using A = typename B::arch_type;
1527         detail::static_check_supported_config<T, A>();
1528         return B(-std::numeric_limits<T>::infinity());
1529     }
1530
1531     /**
1532      * @ingroup batch_arithmetic
1533      *
1534      * Computes the integer modulo of the batch \c x by the batch \c y.
1535      * @param x batch involved in the modulo.
1536      * @param y batch involved in the modulo.
1537      * @return the result of the modulo.
1538      */
1539     template <class T, class A>
1540     XSIMD_INLINE auto mod(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x % y)
1541     {
1542         detail::static_check_supported_config<T, A>();
1543         return x % y;
1544     }
1545
1546     /**
1547      * @ingroup batch_arithmetic
1548      *
1549      * Computes the product of the batches \c x and \c y.
1550      * @tparam X the actual type of batch.
1551      * @param x batch involved in the product.
1552      * @param y batch involved in the product.
1553      * @return the result of the product.
1554      */
1555     template <class T, class A>
1556     XSIMD_INLINE auto mul(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x * y)
1557     {
1558         detail::static_check_supported_config<T, A>();
1559         return x * y;
1560     }
1561
1562     /**
1563      * @ingroup batch_rounding
1564      *
1565      * Rounds the scalars in \c x to integer values (in floating point format), using
1566      * the current rounding mode.
1567      * @param x batch of floating point values.
1568      * @return the batch of nearest integer values.
1569      */
1570     template <class T, class A>
1571     XSIMD_INLINE batch<T, A> nearbyint(batch<T, A> const& x) noexcept
1572     {
1573         detail::static_check_supported_config<T, A>();
1574         return kernel::nearbyint<A>(x, A {});
1575     }
1576
1577     /**
1578      * @ingroup batch_rounding
1579      *
1580      * Rounds the scalars in \c x to integer values (in integer format) using
1581      * the current rounding mode.
1582      * @param x batch of floating point values.
1583      * @return the batch of nearest integer values.
1584      *
1585      * @warning For very large values the conversion to int silently overflows.
1586      */
1587     template <class T, class A>
1588     XSIMD_INLINE batch<as_integer_t<T>, A>
1589     nearbyint_as_int(batch<T, A> const& x) noexcept
1590     {
1591         detail::static_check_supported_config<T, A>();
1592         return kernel::nearbyint_as_int(x, A {});
1593     }
1594
1595     /**
1596      * @ingroup batch_logical
1597      *
1598      * Element-wise inequality comparison of batches \c x and \c y.
1599      * @param x batch involved in the comparison.
1600      * @param y batch involved in the comparison.
1601      * @return a boolean batch.
1602      */
1603     template <class T, class A>
1604     XSIMD_INLINE auto neq(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x != y)
1605     {
1606         detail::static_check_supported_config<T, A>();
1607         return x != y;
1608     }
1609
1610     /**
1611      * @ingroup batch_logical
1612      *
1613      * Element-wise inequality comparison of batches of boolean values \c x and \c y.
1614      * @param x batch of booleans involved in the comparison.
1615      * @param y batch of booleans involved in the comparison.
1616      * @return a boolean batch.
1617      */
1618     template <class T, class A>
1619     XSIMD_INLINE auto neq(batch_bool<T, A> const& x, batch_bool<T, A> const& y) noexcept -> decltype(x != y)
1620     {
1621         detail::static_check_supported_config<T, A>();
1622         return x != y;
1623     }
1624
1625     /**
1626      * @ingroup batch_arithmetic
1627      *
1628      * Computes the opposite of the batch \c x.
1629      * @param x batch involved in the operation.
1630      * @return the opposite of \c x.
1631      */
1632     template <class T, class A>
1633     XSIMD_INLINE batch<T, A> neg(batch<T, A> const& x) noexcept
1634     {
1635         detail::static_check_supported_config<T, A>();
1636         return -x;
1637     }
1638
1639     /**
1640      * @ingroup batch_math_extra
1641      *
1642      * Computes  the next representable  floating-point
1643      *        value  following  x  in the direction of y
1644      * @param x batch of floating point values.
1645      * @param y batch of floating point values.
1646      * @return \c x raised to the power \c y.
1647      */
1648     template <class T, class A>
1649     XSIMD_INLINE batch<T, A> nextafter(batch<T, A> const& x, batch<T, A> const& y) noexcept
1650     {
1651         detail::static_check_supported_config<T, A>();
1652         return kernel::nextafter<A>(x, y, A {});
1653     }
1654
1655     /**
1656      * @ingroup batch_complex
1657      *
1658      * Computes the norm of the batch \c x.
1659      * @param x batch of complex or real values.
1660      * @return the norm of \c x.
1661      */
1662     template <class T, class A>
1663     XSIMD_INLINE real_batch_type_t<batch<T, A>> norm(batch<T, A> const& x) noexcept
1664     {
1665         detail::static_check_supported_config<T, A>();
1666         return kernel::norm(x, A {});
1667     }
1668
1669     /**
1670      * @ingroup batch_math
1671      *
1672      * Returns a complex batch with magnitude \c r and phase angle \c theta.
1673      * @param r The magnitude of the desired complex result.
1674      * @param theta The phase angle of the desired complex result.
1675      * @return \c r exp(i * \c theta).
1676      */
1677     template <class T, class A>
1678     XSIMD_INLINE complex_batch_type_t<batch<T, A>> polar(batch<T, A> const& r, batch<T, A> const& theta = batch<T, A> {}) noexcept
1679     {
1680         detail::static_check_supported_config<T, A>();
1681         return kernel::polar<A>(r, theta, A {});
1682     }
1683
1684     /**
1685      * @ingroup batch_arithmetic
1686      *
1687      * No-op on \c x.
1688      * @param x batch involved in the operation.
1689      * @return \c x.
1690      */
1691     template <class T, class A>
1692     XSIMD_INLINE batch<T, A> pos(batch<T, A> const& x) noexcept
1693     {
1694         detail::static_check_supported_config<T, A>();
1695         return +x;
1696     }
1697
1698     /**
1699      * @ingroup batch_math
1700      *
1701      * Computes the value of the batch \c x raised to the power
1702      * \c y.
1703      * @param x batch of floating point values.
1704      * @param y batch of floating point values.
1705      * @return \c x raised to the power \c y.
1706      */
1707     template <class T, class A>
1708     XSIMD_INLINE batch<T, A> pow(batch<T, A> const& x, batch<T, A> const& y) noexcept
1709     {
1710         detail::static_check_supported_config<T, A>();
1711         return kernel::pow<A>(x, y, A {});
1712     }
1713
1714     /**
1715      * @ingroup batch_math
1716      *
1717      * Computes the value of the batch \c x raised to the power
1718      * \c y.
1719      * @param x batch of complex floating point values.
1720      * @param y batch of floating point values.
1721      * @return \c x raised to the power \c y.
1722      */
1723     template <class T, class A>
1724     XSIMD_INLINE batch<std::complex<T>, A> pow(batch<std::complex<T>, A> const& x, batch<T, A> const& y) noexcept
1725     {
1726         detail::static_check_supported_config<T, A>();
1727         return kernel::pow<A>(x, y, A {});
1728     }
1729
1730     /**
1731      * @ingroup batch_math
1732      *
1733      * Computes the value of the batch \c x raised to the power
1734      * \c y.
1735      * @param x batch of complex floating point values.
1736      * @param y batch of floating point values.
1737      * @return \c x raised to the power \c y.
1738      */
1739     template <class T, class A>
1740     XSIMD_INLINE batch<std::complex<T>, A> pow(batch<T, A> const& x, batch<std::complex<T>, A> const& y) noexcept
1741     {
1742         detail::static_check_supported_config<T, A>();
1743         return kernel::pow<A>(x, y, A {});
1744     }
1745
1746     /**
1747      * @ingroup batch_math
1748      *
1749      * Computes the value of the batch \c x raised to the power
1750      * \c y.
1751      * @param x batch of integral values.
1752      * @param y batch of integral values.
1753      * @return \c x raised to the power \c y.
1754      */
1755     template <class T, class ITy, class A, class = typename std::enable_if<std::is_integral<ITy>::value, void>::type>
1756     XSIMD_INLINE batch<T, A> pow(batch<T, A> const& x, ITy y) noexcept
1757     {
1758         detail::static_check_supported_config<T, A>();
1759         return kernel::ipow<A>(x, y, A {});
1760     }
1761
1762     /**
1763      * @ingroup batch_complex
1764      *
1765      * Computes the projection of the batch \c z.
1766      * @param z batch of complex or real values.
1767      * @return the projection of \c z.
1768      */
1769     template <class T, class A>
1770     XSIMD_INLINE complex_batch_type_t<batch<T, A>> proj(batch<T, A> const& z) noexcept
1771     {
1772         detail::static_check_supported_config<T, A>();
1773         return kernel::proj(z, A {});
1774     }
1775
1776     /**
1777      * @ingroup batch_complex
1778      *
1779      * Computes the real part of the batch \c z.
1780      * @param z batch of complex or real values.
1781      * @return the argument of \c z.
1782      */
1783     template <class T, class A>
1784     XSIMD_INLINE real_batch_type_t<batch<T, A>> real(batch<T, A> const& z) noexcept
1785     {
1786         detail::static_check_supported_config<T, A>();
1787         return kernel::real<A>(z, A {});
1788     }
1789
1790     /**
1791      * @ingroup batch_arithmetic
1792      *
1793      * Computes the approximate reciprocal of the batch \c x.
1794      * The maximum relative error for this approximation is
1795      * less than 1.5*2^-12.
1796      * @param x batch of floating point numbers.
1797      * @return the reciprocal.
1798      */
1799     template <class T, class A, class = typename std::enable_if<std::is_floating_point<T>::value, void>::type>
1800     XSIMD_INLINE batch<T, A> reciprocal(batch<T, A> const& x) noexcept
1801     {
1802         detail::static_check_supported_config<T, A>();
1803         return kernel::reciprocal(x, A {});
1804     }
1805
1806     /**
1807      * @ingroup batch_reducers
1808      *
1809      * Generic reducer using only batch operations
1810      * @param f reducing function, accepting `batch ()(batch, batch)`
1811      * @param x batch involved in the reduction
1812      * @return the result of the reduction, as a scalar.
1813      */
1814     template <class T, class A, class F>
1815     XSIMD_INLINE T reduce(F&& f, batch<T, A> const& x) noexcept
1816     {
1817         detail::static_check_supported_config<T, A>();
1818         return kernel::detail::reduce(std::forward<F>(f), x, std::integral_constant<unsigned, batch<T, A>::size>());
1819     }
1820
1821     /**
1822      * @ingroup batch_reducers
1823      *
1824      * Adds all the scalars of the batch \c x.
1825      * @param x batch involved in the reduction
1826      * @return the result of the reduction.
1827      */
1828     template <class T, class A>
1829     XSIMD_INLINE T reduce_add(batch<T, A> const& x) noexcept
1830     {
1831         detail::static_check_supported_config<T, A>();
1832         return kernel::reduce_add<A>(x, A {});
1833     }
1834
1835     /**
1836      * @ingroup batch_reducers
1837      *
1838      * Max of all the scalars of the batch \c x.
1839      * @param x batch involved in the reduction
1840      * @return the result of the reduction.
1841      */
1842     template <class T, class A>
1843     XSIMD_INLINE T reduce_max(batch<T, A> const& x) noexcept
1844     {
1845         detail::static_check_supported_config<T, A>();
1846         return kernel::reduce_max<A>(x, A {});
1847     }
1848
1849     /**
1850      * @ingroup batch_reducers
1851      *
1852      * Min of all the scalars of the batch \c x.
1853      * @param x batch involved in the reduction
1854      * @return the result of the reduction.
1855      */
1856     template <class T, class A>
1857     XSIMD_INLINE T reduce_min(batch<T, A> const& x) noexcept
1858     {
1859         detail::static_check_supported_config<T, A>();
1860         return kernel::reduce_min<A>(x, A {});
1861     }
1862
1863     /**
1864      * @ingroup batch_math
1865      *
1866      * Computes the remainder of dividing \c x by \c y
1867      * @param x batch of scalar values
1868      * @param y batch of scalar values
1869      * @return the result of the addition.
1870      */
1871     template <class T, class A>
1872     XSIMD_INLINE batch<T, A> remainder(batch<T, A> const& x, batch<T, A> const& y) noexcept
1873     {
1874         detail::static_check_supported_config<T, A>();
1875         return kernel::remainder<A>(x, y, A {});
1876     }
1877
1878     /**
1879      * @ingroup batch_rounding
1880      *
1881      * Rounds the scalars in \c x to integer values (in floating point format), using
1882      * the current rounding mode.
1883      * @param x batch of floating point values.
1884      * @return the batch of rounded values.
1885      */
1886     template <class T, class A>
1887     XSIMD_INLINE batch<T, A> rint(batch<T, A> const& x) noexcept
1888     {
1889         detail::static_check_supported_config<T, A>();
1890         return nearbyint(x);
1891     }
1892
1893     /**
1894      * @ingroup batch_data_transfer
1895      *
1896      * Slide the whole batch to the left by \c n elements, and reintroduce the
1897      * slided out elements from the right. This is different from
1898      * \c rotl that rotates each batch element to the left.
1899      *
1900      * @tparam N Amount of elements to rotate to the left.
1901      * @param x batch of integer values.
1902      * @return rotated batch.
1903      */
1904     template <size_t N, class T, class A>
1905     XSIMD_INLINE batch<T, A> rotate_left(batch<T, A> const& x) noexcept
1906     {
1907         detail::static_check_supported_config<T, A>();
1908         return kernel::rotate_left<N, A>(x, A {});
1909     }
1910
1911     /**
1912      * @ingroup batch_data_transfer
1913      *
1914      * Slide the whole batch to the right by \c n elements, and reintroduce the
1915      * slided out elements from the left. This is different from
1916      * \c rotr that rotates each batch element to the right.
1917      *
1918      * @tparam N Amount of elements to rotate to the right.
1919      * @param x batch of integer values.
1920      * @return rotated batch.
1921      */
1922     template <size_t N, class T, class A>
1923     XSIMD_INLINE batch<T, A> rotate_right(batch<T, A> const& x) noexcept
1924     {
1925         detail::static_check_supported_config<T, A>();
1926         return kernel::rotate_right<N, A>(x, A {});
1927     }
1928
1929     /**
1930      * @ingroup batch_bitwise
1931      *
1932      * Perform a bitwise shift to the left, reintroducing the shifted out bits
1933      * to the right
1934      * @param x batch to rotate
1935      * @param shift scalar amount to shift
1936      * @return rotated \c x.
1937      */
1938     template <class T, class A>
1939     XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& x, int shift) noexcept
1940     {
1941         detail::static_check_supported_config<T, A>();
1942         return kernel::rotl<A>(x, shift, A {});
1943     }
1944     template <class T, class A>
1945     XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& x, batch<T, A> const& shift) noexcept
1946     {
1947         detail::static_check_supported_config<T, A>();
1948         return kernel::rotl<A>(x, shift, A {});
1949     }
1950
1951     /**
1952      * @ingroup batch_bitwise
1953      *
1954      * Perform a bitwise shift to the right, reintroducing the shifted out bits
1955      * to the left.
1956      * @param x batch to rotate
1957      * @param shift scalar amount to shift
1958      * @return rotated \c x.
1959      */
1960     template <class T, class A>
1961     XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& x, int shift) noexcept
1962     {
1963         detail::static_check_supported_config<T, A>();
1964         return kernel::rotr<A>(x, shift, A {});
1965     }
1966     template <class T, class A>
1967     XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& x, batch<T, A> const& shift) noexcept
1968     {
1969         detail::static_check_supported_config<T, A>();
1970         return kernel::rotr<A>(x, shift, A {});
1971     }
1972
1973     /**
1974      * @ingroup batch_rounding
1975      *
1976      * Computes the batch of nearest integer values to scalars in \c x (in
1977      * floating point format), rounding halfway cases away from zero, regardless
1978      * of the current rounding mode.
1979      * @param x batch of flaoting point values.
1980      * @return the batch of nearest integer values.
1981      */
1982     template <class T, class A>
1983     XSIMD_INLINE batch<T, A> round(batch<T, A> const& x) noexcept
1984     {
1985         detail::static_check_supported_config<T, A>();
1986         return kernel::round<A>(x, A {});
1987     }
1988
1989     /**
1990      * @ingroup batch_math
1991      *
1992      * Computes an estimate of the inverse square root of the batch \c x.
1993      *
1994      * @warning Unlike most xsimd function, this does not return the same result as the
1995      * equivalent scalar operation, trading accuracy for speed.
1996      *
1997      * @param x batch of floating point values.
1998      * @return the inverse square root of \c x.
1999      */
2000     template <class T, class A>
2001     XSIMD_INLINE batch<T, A> rsqrt(batch<T, A> const& x) noexcept
2002     {
2003         detail::static_check_supported_config<T, A>();
2004         return kernel::rsqrt<A>(x, A {});
2005     }
2006
2007     /**
2008      * @ingroup batch_arithmetic
2009      *
2010      * Computes the saturate sum of the batch \c x and the batch \c y.
2011
2012      * @tparam X the actual type of batch.
2013      * @param x batch involved in the saturated addition.
2014      * @param y batch involved in the saturated addition.
2015      * @return the result of the saturated addition.
2016      */
2017     template <class T, class A>
2018     XSIMD_INLINE batch<T, A> sadd(batch<T, A> const& x, batch<T, A> const& y) noexcept
2019     {
2020         detail::static_check_supported_config<T, A>();
2021         return kernel::sadd<A>(x, y, A {});
2022     }
2023
2024     /**
2025      * @ingroup batch_cond
2026      *
2027      * Ternary operator for batches: selects values from the batches \c true_br or \c false_br
2028      * depending on the boolean values in the constant batch \c cond. Equivalent to
2029      * \code{.cpp}
2030      * for(std::size_t i = 0; i < N; ++i)
2031      *     res[i] = cond[i] ? true_br[i] : false_br[i];
2032      * \endcode
2033      * @param cond batch condition.
2034      * @param true_br batch values for truthy condition.
2035      * @param false_br batch value for falsy condition.
2036      * @return the result of the selection.
2037      */
2038     template <class T, class A>
2039     XSIMD_INLINE batch<T, A> select(batch_bool<T, A> const& cond, batch<T, A> const& true_br, batch<T, A> const& false_br) noexcept
2040     {
2041         detail::static_check_supported_config<T, A>();
2042         return kernel::select<A>(cond, true_br, false_br, A {});
2043     }
2044
2045     /**
2046      * @ingroup batch_cond
2047      *
2048      * Ternary operator for batches: selects values from the batches \c true_br or \c false_br
2049      * depending on the boolean values in the constant batch \c cond. Equivalent to
2050      * \code{.cpp}
2051      * for(std::size_t i = 0; i < N; ++i)
2052      *     res[i] = cond[i] ? true_br[i] : false_br[i];
2053      * \endcode
2054      * @param cond batch condition.
2055      * @param true_br batch values for truthy condition.
2056      * @param false_br batch value for falsy condition.
2057      * @return the result of the selection.
2058      */
2059     template <class T, class A>
2060     XSIMD_INLINE batch<std::complex<T>, A> select(batch_bool<T, A> const& cond, batch<std::complex<T>, A> const& true_br, batch<std::complex<T>, A> const& false_br) noexcept
2061     {
2062         detail::static_check_supported_config<T, A>();
2063         return kernel::select<A>(cond, true_br, false_br, A {});
2064     }
2065
2066     /**
2067      * @ingroup batch_cond
2068      *
2069      * Ternary operator for batches: selects values from the batches \c true_br or \c false_br
2070      * depending on the boolean values in the constant batch \c cond. Equivalent to
2071      * \code{.cpp}
2072      * for(std::size_t i = 0; i < N; ++i)
2073      *     res[i] = cond[i] ? true_br[i] : false_br[i];
2074      * \endcode
2075      * @param cond constant batch condition.
2076      * @param true_br batch values for truthy condition.
2077      * @param false_br batch value for falsy condition.
2078      * @return the result of the selection.
2079      */
2080     template <class T, class A, bool... Values>
2081     XSIMD_INLINE batch<T, A> select(batch_bool_constant<T, A, Values...> const& cond, batch<T, A> const& true_br, batch<T, A> const& false_br) noexcept
2082     {
2083         detail::static_check_supported_config<T, A>();
2084         return kernel::select<A>(cond, true_br, false_br, A {});
2085     }
2086
2087     /**
2088      * @ingroup batch_data_transfer
2089      *
2090      * Combine elements from \c x and \c y according to selector \c mask
2091      * @param x batch
2092      * @param y batch
2093      * @param mask constant batch mask of integer elements of the same size as
2094      * element of \c x and \c y. Each element of the mask index the vector that
2095      * would be formed by the concatenation of \c x and \c y. For instance
2096      * \code{.cpp}
2097      * batch_constant<uint32_t, sse2, 0, 4, 3, 7>
2098      * \endcode
2099      * Picks \c x[0], \c y[0], \c x[3], \c y[3]
2100      *
2101      * @return combined batch
2102      */
2103     template <class T, class A, class Vt, Vt... Values>
2104     XSIMD_INLINE typename std::enable_if<std::is_arithmetic<T>::value, batch<T, A>>::type
2105     shuffle(batch<T, A> const& x, batch<T, A> const& y, batch_constant<Vt, A, Values...> mask) noexcept
2106     {
2107         static_assert(sizeof(T) == sizeof(Vt), "consistent mask");
2108         detail::static_check_supported_config<T, A>();
2109         return kernel::shuffle<A>(x, y, mask, A {});
2110     }
2111
2112     /**
2113      * @ingroup batch_miscellaneous
2114      *
2115      * Computes the sign of \c x
2116      * @param x batch
2117      * @return -1 for each negative element, -1 or +1 for each null element and +1 for each element
2118      */
2119     template <class T, class A>
2120     XSIMD_INLINE batch<T, A> sign(batch<T, A> const& x) noexcept
2121     {
2122         detail::static_check_supported_config<T, A>();
2123         return kernel::sign<A>(x, A {});
2124     }
2125
2126     /**
2127      * @ingroup batch_miscellaneous
2128      *
2129      * Computes the sign of \c x, assuming x doesn't have any zero
2130      * @param x batch
2131      * @return -1 for each negative element, -1 or +1 for each null element and +1 for each element
2132      */
2133     template <class T, class A>
2134     XSIMD_INLINE batch<T, A> signnz(batch<T, A> const& x) noexcept
2135     {
2136         detail::static_check_supported_config<T, A>();
2137         return kernel::signnz<A>(x, A {});
2138     }
2139
2140     /**
2141      * @ingroup batch_trigo
2142      *
2143      * Computes the sine of the batch \c x.
2144      * @param x batch of floating point values.
2145      * @return the sine of \c x.
2146      */
2147     template <class T, class A>
2148     XSIMD_INLINE batch<T, A> sin(batch<T, A> const& x) noexcept
2149     {
2150         detail::static_check_supported_config<T, A>();
2151         return kernel::sin<A>(x, A {});
2152     }
2153
2154     /**
2155      * @ingroup batch_trigo
2156      *
2157      * Computes the sine and the cosine of the batch \c x. This method is faster
2158      * than calling sine and cosine independently.
2159      * @param x batch of floating point values.
2160      * @return a pair containing the sine then the cosine of  batch \c x
2161      */
2162     template <class T, class A>
2163     XSIMD_INLINE std::pair<batch<T, A>, batch<T, A>> sincos(batch<T, A> const& x) noexcept
2164     {
2165         detail::static_check_supported_config<T, A>();
2166         return kernel::sincos<A>(x, A {});
2167     }
2168
2169     /**
2170      * @ingroup batch_trigo
2171      *
2172      * Computes the hyperbolic sine of the batch \c x.
2173      * @param x batch of floating point values.
2174      * @return the hyperbolic sine of \c x.
2175      */
2176     template <class T, class A>
2177     XSIMD_INLINE batch<T, A> sinh(batch<T, A> const& x) noexcept
2178     {
2179         detail::static_check_supported_config<T, A>();
2180         return kernel::sinh<A>(x, A {});
2181     }
2182
2183     /**
2184      * @ingroup batch_data_transfer
2185      *
2186      * Slide the whole batch to the left by \c n bytes. This is different from
2187      * \c bitwise_lshift that shifts each batch element to the left.
2188      *
2189      * @tparam N Amount of bytes to slide to the left.
2190      * @param x batch of integer values.
2191      * @return slided batch.
2192      */
2193     template <size_t N, class T, class A>
2194     XSIMD_INLINE batch<T, A> slide_left(batch<T, A> const& x) noexcept
2195     {
2196         static_assert(std::is_integral<T>::value, "can only slide batch of integers");
2197         detail::static_check_supported_config<T, A>();
2198         return kernel::slide_left<N, A>(x, A {});
2199     }
2200
2201     /**
2202      * @ingroup batch_data_transfer
2203      *
2204      * Slide the whole batch to the right by \c N bytes. This is different from
2205      * \c bitwise_rshift that shifts each batch element to the right.
2206      *
2207      * @tparam N Amount of bytes to slide to the right.
2208      * @param x batch of integer values.
2209      * @return slided batch.
2210      */
2211     template <size_t N, class T, class A>
2212     XSIMD_INLINE batch<T, A> slide_right(batch<T, A> const& x) noexcept
2213     {
2214         static_assert(std::is_integral<T>::value, "can only slide batch of integers");
2215         detail::static_check_supported_config<T, A>();
2216         return kernel::slide_right<N, A>(x, A {});
2217     }
2218
2219     /**
2220      * @ingroup batch_math
2221      *
2222      * Computes the square root of the batch \c x.
2223      * @param x batch of floating point values.
2224      * @return the square root of \c x.
2225      */
2226     template <class T, class A>
2227     XSIMD_INLINE batch<T, A> sqrt(batch<T, A> const& x) noexcept
2228     {
2229         detail::static_check_supported_config<T, A>();
2230         return kernel::sqrt<A>(x, A {});
2231     }
2232
2233     /**
2234      * @ingroup batch_arithmetic
2235      *
2236      * Computes the saturate difference of the batch \c x and the batch \c y.
2237      * @tparam X the actual type of batch.
2238      * @param x batch involved in the saturated difference.
2239      * @param y batch involved in the saturated difference.
2240      * @return the result of the saturated difference.
2241      */
2242     template <class T, class A>
2243     XSIMD_INLINE batch<T, A> ssub(batch<T, A> const& x, batch<T, A> const& y) noexcept
2244     {
2245         detail::static_check_supported_config<T, A>();
2246         return kernel::ssub<A>(x, y, A {});
2247     }
2248
2249     /**
2250      * @ingroup batch_data_transfer
2251      *
2252      * Copy content of batch \c src to the buffer \c dst. The
2253      * memory needs to be aligned.
2254      * @param dst the memory buffer to write to
2255      * @param src the batch to copy
2256      */
2257     template <class To, class A = default_arch, class From>
2258     XSIMD_INLINE void store_as(To* dst, batch<From, A> const& src, aligned_mode) noexcept
2259     {
2260         detail::static_check_supported_config<From, A>();
2261         kernel::store_aligned<A>(dst, src, A {});
2262     }
2263
2264     template <class A = default_arch, class From>
2265     XSIMD_INLINE void store_as(bool* dst, batch_bool<From, A> const& src, aligned_mode) noexcept
2266     {
2267         detail::static_check_supported_config<From, A>();
2268         kernel::store<A>(src, dst, A {});
2269     }
2270
2271     template <class To, class A = default_arch, class From>
2272     XSIMD_INLINE void store_as(std::complex<To>* dst, batch<std::complex<From>, A> const& src, aligned_mode) noexcept
2273     {
2274         detail::static_check_supported_config<std::complex<From>, A>();
2275         kernel::store_complex_aligned<A>(dst, src, A {});
2276     }
2277
2278 #ifdef XSIMD_ENABLE_XTL_COMPLEX
2279     template <class To, class A = default_arch, class From, bool i3ec>
2280     XSIMD_INLINE void store_as(xtl::xcomplex<To, To, i3ec>* dst, batch<std::complex<From>, A> const& src, aligned_mode) noexcept
2281     {
2282         store_as(reinterpret_cast<std::complex<To>*>(dst), src, aligned_mode());
2283     }
2284 #endif
2285
2286     /**
2287      * @ingroup batch_data_transfer
2288      *
2289      * Copy content of batch \c src to the buffer \c dst. The
2290      * memory does not need to be aligned.
2291      * @param dst the memory buffer to write to
2292      * @param src the batch to copy
2293      */
2294     template <class To, class A = default_arch, class From>
2295     XSIMD_INLINE void store_as(To* dst, batch<From, A> const& src, unaligned_mode) noexcept
2296     {
2297         detail::static_check_supported_config<From, A>();
2298         kernel::store_unaligned<A>(dst, src, A {});
2299     }
2300
2301     template <class A = default_arch, class From>
2302     XSIMD_INLINE void store_as(bool* dst, batch_bool<From, A> const& src, unaligned_mode) noexcept
2303     {
2304         detail::static_check_supported_config<From, A>();
2305         kernel::store<A>(src, dst, A {});
2306     }
2307
2308     template <class To, class A = default_arch, class From>
2309     XSIMD_INLINE void store_as(std::complex<To>* dst, batch<std::complex<From>, A> const& src, unaligned_mode) noexcept
2310     {
2311         detail::static_check_supported_config<std::complex<From>, A>();
2312         kernel::store_complex_unaligned<A>(dst, src, A {});
2313     }
2314
2315 #ifdef XSIMD_ENABLE_XTL_COMPLEX
2316     template <class To, class A = default_arch, class From, bool i3ec>
2317     XSIMD_INLINE void store_as(xtl::xcomplex<To, To, i3ec>* dst, batch<std::complex<From>, A> const& src, unaligned_mode) noexcept
2318     {
2319         detail::static_check_supported_config<std::complex<From>, A>();
2320         store_as(reinterpret_cast<std::complex<To>*>(dst), src, unaligned_mode());
2321     }
2322 #endif
2323
2324     /**
2325      * @ingroup batch_data_transfer
2326      *
2327      * Copy content of batch \c val to the buffer \c mem. The
2328      * memory does not need to be aligned.
2329      * @param mem the memory buffer to write to
2330      * @param val the batch to copy from
2331      */
2332     template <class A, class T>
2333     XSIMD_INLINE void store(T* mem, batch<T, A> const& val, aligned_mode = {}) noexcept
2334     {
2335         store_as<T, A>(mem, val, aligned_mode {});
2336     }
2337
2338     /**
2339      * @ingroup batch_data_transfer
2340      *
2341      * Copy content of batch \c val to the buffer \c mem. The
2342      * memory does not need to be aligned.
2343      * @param mem the memory buffer to write to
2344      * @param val the batch to copy from
2345      */
2346     template <class A, class T>
2347     XSIMD_INLINE void store(T* mem, batch<T, A> const& val, unaligned_mode) noexcept
2348     {
2349         store_as<T, A>(mem, val, unaligned_mode {});
2350     }
2351
2352     /**
2353      * @ingroup batch_data_transfer
2354      *
2355      * Copy content of batch \c val to the buffer \c mem. The
2356      * memory needs to be aligned.
2357      * @param mem the memory buffer to write to
2358      * @param val the batch to copy from
2359      */
2360     template <class A, class T>
2361     XSIMD_INLINE void store_aligned(T* mem, batch<T, A> const& val) noexcept
2362     {
2363         store_as<T, A>(mem, val, aligned_mode {});
2364     }
2365
2366     /**
2367      * @ingroup batch_data_transfer
2368      *
2369      * Copy content of batch \c val to the buffer \c mem. The
2370      * memory does not need to be aligned.
2371      * @param mem the memory buffer to write to
2372      * @param val the batch to copy
2373      */
2374     template <class A, class T>
2375     XSIMD_INLINE void store_unaligned(T* mem, batch<T, A> const& val) noexcept
2376     {
2377         store_as<T, A>(mem, val, unaligned_mode {});
2378     }
2379
2380     /**
2381      * @ingroup batch_arithmetic
2382      *
2383      * Computes the difference between \c x and \c y
2384      * @tparam X the actual type of batch.
2385      * @param x scalar or batch of scalars
2386      * @param y scalar or batch of scalars
2387      * @return the difference between \c x and \c y
2388      */
2389     template <class T, class A>
2390     XSIMD_INLINE auto sub(batch<T, A> const& x, batch<T, A> const& y) noexcept -> decltype(x - y)
2391     {
2392         detail::static_check_supported_config<T, A>();
2393         return x - y;
2394     }
2395
2396     /**
2397      * @ingroup batch_data_transfer
2398      *
2399      * Rearrange elements from \c x according to constant mask \c mask
2400      * @param x batch
2401      * @param mask constant batch mask of integer elements of the same size as
2402      * element of \c x
2403      * @return swizzled batch
2404      */
2405     template <class T, class A, class Vt, Vt... Values>
2406     XSIMD_INLINE typename std::enable_if<std::is_arithmetic<T>::value, batch<T, A>>::type
2407     swizzle(batch<T, A> const& x, batch_constant<Vt, A, Values...> mask) noexcept
2408     {
2409         static_assert(sizeof(T) == sizeof(Vt), "consistent mask");
2410         detail::static_check_supported_config<T, A>();
2411         return kernel::swizzle<A>(x, mask, A {});
2412     }
2413     template <class T, class A, class Vt, Vt... Values>
2414     XSIMD_INLINE batch<std::complex<T>, A> swizzle(batch<std::complex<T>, A> const& x, batch_constant<Vt, A, Values...> mask) noexcept
2415     {
2416         static_assert(sizeof(T) == sizeof(Vt), "consistent mask");
2417         detail::static_check_supported_config<T, A>();
2418         return kernel::swizzle<A>(x, mask, A {});
2419     }
2420
2421     /**
2422      * @ingroup batch_data_transfer
2423      *
2424      * Rearrange elements from \c x according to mask \c mask
2425      * @param x batch
2426      * @param mask batch mask of integer elements of the same size as
2427      * element of \c x
2428      * @return swizzled batch
2429      */
2430     template <class T, class A, class Vt>
2431     XSIMD_INLINE typename std::enable_if<std::is_arithmetic<T>::value, batch<T, A>>::type
2432     swizzle(batch<T, A> const& x, batch<Vt, A> mask) noexcept
2433     {
2434         static_assert(sizeof(T) == sizeof(Vt), "consistent mask");
2435         detail::static_check_supported_config<T, A>();
2436         return kernel::swizzle<A>(x, mask, A {});
2437     }
2438
2439     template <class T, class A, class Vt>
2440     XSIMD_INLINE batch<std::complex<T>, A> swizzle(batch<std::complex<T>, A> const& x, batch<Vt, A> mask) noexcept
2441     {
2442         static_assert(sizeof(T) == sizeof(Vt), "consistent mask");
2443         detail::static_check_supported_config<T, A>();
2444         return kernel::swizzle<A>(x, mask, A {});
2445     }
2446
2447     /**
2448      * @ingroup batch_trigo
2449      *
2450      * Computes the tangent of the batch \c x.
2451      * @param x batch of floating point values.
2452      * @return the tangent of \c x.
2453      */
2454     template <class T, class A>
2455     XSIMD_INLINE batch<T, A> tan(batch<T, A> const& x) noexcept
2456     {
2457         detail::static_check_supported_config<T, A>();
2458         return kernel::tan<A>(x, A {});
2459     }
2460
2461     /**
2462      * @ingroup batch_trigo
2463      *
2464      * Computes the hyperbolic tangent of the batch \c x.
2465      * @param x batch of floating point values.
2466      * @return the hyperbolic tangent of \c x.
2467      */
2468     template <class T, class A>
2469     XSIMD_INLINE batch<T, A> tanh(batch<T, A> const& x) noexcept
2470     {
2471         detail::static_check_supported_config<T, A>();
2472         return kernel::tanh<A>(x, A {});
2473     }
2474
2475     /**
2476      * @ingroup batch_math_extra
2477      *
2478      * Computes the gamma function of the batch \c x.
2479      * @param x batch of floating point values.
2480      * @return the gamma function of \c x.
2481      */
2482     template <class T, class A>
2483     XSIMD_INLINE batch<T, A> tgamma(batch<T, A> const& x) noexcept
2484     {
2485         detail::static_check_supported_config<T, A>();
2486         return kernel::tgamma<A>(x, A {});
2487     }
2488
2489     /**
2490      * @ingroup batch_conversion
2491      *
2492      * Perform a conversion from \c i to a value of an floating point type of the same size as \c T.
2493      * This is equivalent to \c batch_cast<as_float_t<T>>(i)
2494      * @param i batch of integers.
2495      * @return \c i converted to a value of an floating point type of the same size as \c T
2496      */
2497     template <class T, class A>
2498     XSIMD_INLINE batch<as_float_t<T>, A> to_float(batch<T, A> const& i) noexcept
2499     {
2500         detail::static_check_supported_config<T, A>();
2501         return batch_cast<as_float_t<T>>(i);
2502     }
2503
2504     /**
2505      * @ingroup batch_conversion
2506      *
2507      * Perform a conversion from \c x to a value of an integer type of the same size as \c T
2508      * This is equivalent to \c batch_cast<as_integer_t<T>>(x)
2509      * @param x batch.
2510      * @return \c x converted to a value of an integer type of the same size as \c T
2511      */
2512     template <class T, class A>
2513     XSIMD_INLINE batch<as_integer_t<T>, A> to_int(batch<T, A> const& x) noexcept
2514     {
2515         detail::static_check_supported_config<T, A>();
2516         return batch_cast<as_integer_t<T>>(x);
2517     }
2518
2519     /**
2520      * @ingroup batch_data_transfer
2521      *
2522      * Transposes in place the matrix whose line are each of the batch passed as
2523      * argument.
2524      * @param matrix_begin pointer to the first line of the matrix to transpose
2525      * @param matrix_end pointer to one element after the last line of the matrix to transpose
2526      *
2527      */
2528     template <class T, class A>
2529     XSIMD_INLINE void transpose(batch<T, A>* matrix_begin, batch<T, A>* matrix_end) noexcept
2530     {
2531         assert((matrix_end - matrix_begin == batch<T, A>::size) && "correctly sized matrix");
2532         detail::static_check_supported_config<T, A>();
2533         return kernel::transpose(matrix_begin, matrix_end, A {});
2534     }
2535
2536     /**
2537      * @ingroup batch_rounding
2538      *
2539      * Computes the batch of nearest integer values not greater in magnitude
2540      * than scalars in \c x.
2541      * @param x batch of floating point values.
2542      * @return the batch of nearest integer values not greater in magnitude than \c x.
2543      */
2544     template <class T, class A>
2545     XSIMD_INLINE batch<T, A> trunc(batch<T, A> const& x) noexcept
2546     {
2547         detail::static_check_supported_config<T, A>();
2548         return kernel::trunc<A>(x, A {});
2549     }
2550
2551     /**
2552      * @ingroup batch_data_transfer
2553      *
2554      * Unpack and interleave data from the HIGH half of batches \c x and \c y.
2555      * Store the results in the Return value.
2556      * @param x a batch of integer or floating point or double precision values.
2557      * @param y a batch of integer or floating point or double precision values.
2558      * @return a batch of the high part of shuffled values.
2559      */
2560     template <class T, class A>
2561     XSIMD_INLINE batch<T, A> zip_hi(batch<T, A> const& x, batch<T, A> const& y) noexcept
2562     {
2563         detail::static_check_supported_config<T, A>();
2564         return kernel::zip_hi<A>(x, y, A {});
2565     }
2566
2567     /**
2568      * @ingroup batch_data_transfer
2569      *
2570      * Unpack and interleave data from the LOW half of batches \c x and \c y.
2571      * Store the results in the Return value.
2572      * @param x a batch of integer or floating point or double precision values.
2573      * @param y a batch of integer or floating point or double precision values.
2574      * @return a batch of the low part of shuffled values.
2575      */
2576     template <class T, class A>
2577     XSIMD_INLINE batch<T, A> zip_lo(batch<T, A> const& x, batch<T, A> const& y) noexcept
2578     {
2579         detail::static_check_supported_config<T, A>();
2580         return kernel::zip_lo<A>(x, y, A {});
2581     }
2582
2583     /**
2584      * @ingroup batch_conversion
2585      *
2586      * Cast a \c batch_bool of \c T into a \c batch of the same type using the
2587      * following rule: if an element of \c self is true, it maps to -1 in the
2588      * returned integral batch, otherwise it maps to 0.
2589      *
2590      * @param self batch_bool of \c T
2591      * @return \c self cast to a \c batch of \c T
2592      */
2593     template <class T, class A, typename std::enable_if<std::is_integral<T>::value, int>::type = 3>
2594     XSIMD_INLINE batch<T, A> bitwise_cast(batch_bool<T, A> const& self) noexcept
2595     {
2596         T z(0);
2597         detail::static_check_supported_config<T, A>();
2598         return select(self, batch<T, A>(T(~z)), batch<T, A>(z));
2599     }
2600
2601     template <class T, class A, typename std::enable_if<std::is_floating_point<T>::value, int>::type = 3>
2602     XSIMD_INLINE batch<T, A> bitwise_cast(batch_bool<T, A> const& self) noexcept
2603     {
2604         T z0(0), z1(0);
2605         using int_type = as_unsigned_integer_t<T>;
2606         int_type value(~int_type(0));
2607         std::memcpy(&z1, &value, sizeof(int_type));
2608         detail::static_check_supported_config<T, A>();
2609         return select(self, batch<T, A>(z1), batch<T, A>(z0));
2610     }
2611
2612     /**
2613      * @ingroup batch_bool_reducers
2614      *
2615      * Returns true if all the boolean values in the batch are true,
2616      * false otherwise.
2617      * @param x the batch to reduce.
2618      * @return a boolean scalar.
2619      */
2620     template <class T, class A>
2621     XSIMD_INLINE bool all(batch_bool<T, A> const& x) noexcept
2622     {
2623         detail::static_check_supported_config<T, A>();
2624         return kernel::all<A>(x, A {});
2625     }
2626
2627     /**
2628      * @ingroup batch_bool_reducers
2629      *
2630      * Return true if any of the boolean values in the batch is true,
2631      * false otherwise.
2632      * @param x the batch to reduce.
2633      * @return a boolean scalar.
2634      */
2635     template <class T, class A>
2636     XSIMD_INLINE bool any(batch_bool<T, A> const& x) noexcept
2637     {
2638         detail::static_check_supported_config<T, A>();
2639         return kernel::any<A>(x, A {});
2640     }
2641
2642     /**
2643      * @ingroup batch_bool_reducers
2644      *
2645      * Return true if none of the boolean values in the batch is true,
2646      * false otherwise.
2647      * @param x the batch to reduce.
2648      * @return a boolean scalar.
2649      */
2650     template <class T, class A>
2651     XSIMD_INLINE bool none(batch_bool<T, A> const& x) noexcept
2652     {
2653         detail::static_check_supported_config<T, A>();
2654         return !xsimd::any(x);
2655     }
2656
2657     /**
2658      * @ingroup batch_miscellaneous
2659      *
2660      * Dump the content of batch \c x to stream \c o
2661      * @param o the stream where the batch is dumped
2662      * @param x batch to dump.
2663      * @return a reference to \c o
2664      */
2665     template <class T, class A>
2666     XSIMD_INLINE std::ostream& operator<<(std::ostream& o, batch<T, A> const& x) noexcept
2667     {
2668         detail::static_check_supported_config<T, A>();
2669         constexpr auto size = batch<T, A>::size;
2670         alignas(A::alignment()) T buffer[size];
2671         x.store_aligned(&buffer[0]);
2672         o << '(';
2673         for (std::size_t i = 0; i < size - 1; ++i)
2674             o << buffer[i] << ", ";
2675         return o << buffer[size - 1] << ')';
2676     }
2677
2678     /**
2679      * @ingroup batch_miscellaneous
2680      *
2681      * Dump the content of batch \c x to stream \c o
2682      * @param o the stream where the batch is dumped
2683      * @param x batch to dump.
2684      * @return a reference to \c o
2685      */
2686     template <class T, class A>
2687     XSIMD_INLINE std::ostream& operator<<(std::ostream& o, batch_bool<T, A> const& x) noexcept
2688     {
2689         detail::static_check_supported_config<T, A>();
2690         constexpr auto size = batch_bool<T, A>::size;
2691         alignas(A::alignment()) bool buffer[size];
2692         x.store_aligned(&buffer[0]);
2693         o << '(';
2694         for (std::size_t i = 0; i < size - 1; ++i)
2695             o << buffer[i] << ", ";
2696         return o << buffer[size - 1] << ')';
2697     }
2698 }
2699
2700 #endif