src/Core/GenericPacketMath.h

0001 // This file is part of Eigen, a lightweight C++ template library
0002 // for linear algebra.
0003 //
0004 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
0005 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
0006 //
0007 // This Source Code Form is subject to the terms of the Mozilla
0008 // Public License v. 2.0. If a copy of the MPL was not distributed
0009 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
0010
0011 #ifndef EIGEN_GENERIC_PACKET_MATH_H
0012 #define EIGEN_GENERIC_PACKET_MATH_H
0013
0014 namespace Eigen {
0015
0016 namespace internal {
0017
0018 /** \internal
0019   * \file GenericPacketMath.h
0020   *
0021   * Default implementation for types not supported by the vectorization.
0022   * In practice these functions are provided to make easier the writing
0023   * of generic vectorized code.
0024   */
0025
0026 #ifndef EIGEN_DEBUG_ALIGNED_LOAD
0027 #define EIGEN_DEBUG_ALIGNED_LOAD
0028 #endif
0029
0030 #ifndef EIGEN_DEBUG_UNALIGNED_LOAD
0031 #define EIGEN_DEBUG_UNALIGNED_LOAD
0032 #endif
0033
0034 #ifndef EIGEN_DEBUG_ALIGNED_STORE
0035 #define EIGEN_DEBUG_ALIGNED_STORE
0036 #endif
0037
0038 #ifndef EIGEN_DEBUG_UNALIGNED_STORE
0039 #define EIGEN_DEBUG_UNALIGNED_STORE
0040 #endif
0041
0042 struct default_packet_traits
0043 {
0044   enum {
0045     HasHalfPacket = 0,
0046
0047     HasAdd       = 1,
0048     HasSub       = 1,
0049     HasShift     = 1,
0050     HasMul       = 1,
0051     HasNegate    = 1,
0052     HasAbs       = 1,
0053     HasArg       = 0,
0054     HasAbs2      = 1,
0055     HasAbsDiff   = 0,
0056     HasMin       = 1,
0057     HasMax       = 1,
0058     HasConj      = 1,
0059     HasSetLinear = 1,
0060     HasBlend     = 0,
0061     // This flag is used to indicate whether packet comparison is supported.
0062     // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true.
0063     HasCmp       = 0,
0064
0065     HasDiv    = 0,
0066     HasSqrt   = 0,
0067     HasRsqrt  = 0,
0068     HasExp    = 0,
0069     HasExpm1  = 0,
0070     HasLog    = 0,
0071     HasLog1p  = 0,
0072     HasLog10  = 0,
0073     HasPow    = 0,
0074
0075     HasSin    = 0,
0076     HasCos    = 0,
0077     HasTan    = 0,
0078     HasASin   = 0,
0079     HasACos   = 0,
0080     HasATan   = 0,
0081     HasSinh   = 0,
0082     HasCosh   = 0,
0083     HasTanh   = 0,
0084     HasLGamma = 0,
0085     HasDiGamma = 0,
0086     HasZeta = 0,
0087     HasPolygamma = 0,
0088     HasErf = 0,
0089     HasErfc = 0,
0090     HasNdtri = 0,
0091     HasBessel = 0,
0092     HasIGamma = 0,
0093     HasIGammaDerA = 0,
0094     HasGammaSampleDerAlpha = 0,
0095     HasIGammac = 0,
0096     HasBetaInc = 0,
0097
0098     HasRound  = 0,
0099     HasRint   = 0,
0100     HasFloor  = 0,
0101     HasCeil   = 0,
0102     HasSign   = 0
0103   };
0104 };
0105
0106 template<typename T> struct packet_traits : default_packet_traits
0107 {
0108   typedef T type;
0109   typedef T half;
0110   enum {
0111     Vectorizable = 0,
0112     size = 1,
0113     AlignedOnScalar = 0,
0114     HasHalfPacket = 0
0115   };
0116   enum {
0117     HasAdd    = 0,
0118     HasSub    = 0,
0119     HasMul    = 0,
0120     HasNegate = 0,
0121     HasAbs    = 0,
0122     HasAbs2   = 0,
0123     HasMin    = 0,
0124     HasMax    = 0,
0125     HasConj   = 0,
0126     HasSetLinear = 0
0127   };
0128 };
0129
0130 template<typename T> struct packet_traits<const T> : packet_traits<T> { };
0131
0132 template<typename T> struct unpacket_traits
0133 {
0134   typedef T type;
0135   typedef T half;
0136   enum
0137   {
0138     size = 1,
0139     alignment = 1,
0140     vectorizable = false,
0141     masked_load_available=false,
0142     masked_store_available=false
0143   };
0144 };
0145
0146 template<typename T> struct unpacket_traits<const T> : unpacket_traits<T> { };
0147
0148 template <typename Src, typename Tgt> struct type_casting_traits {
0149   enum {
0150     VectorizedCast = 0,
0151     SrcCoeffRatio = 1,
0152     TgtCoeffRatio = 1
0153   };
0154 };
0155
0156 /** \internal Wrapper to ensure that multiple packet types can map to the same
0157     same underlying vector type. */
0158 template<typename T, int unique_id = 0>
0159 struct eigen_packet_wrapper
0160 {
0161   EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
0162   EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
0163   EIGEN_ALWAYS_INLINE eigen_packet_wrapper() {}
0164   EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
0165   EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
0166     m_val = v;
0167     return *this;
0168   }
0169
0170   T m_val;
0171 };
0172
0173
0174 /** \internal A convenience utility for determining if the type is a scalar.
0175  * This is used to enable some generic packet implementations.
0176  */
0177 template<typename Packet>
0178 struct is_scalar {
0179   typedef typename unpacket_traits<Packet>::type Scalar;
0180   enum {
0181     value = internal::is_same<Packet, Scalar>::value
0182   };
0183 };
0184
0185 /** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
0186 template <typename SrcPacket, typename TgtPacket>
0187 EIGEN_DEVICE_FUNC inline TgtPacket
0188 pcast(const SrcPacket& a) {
0189   return static_cast<TgtPacket>(a);
0190 }
0191 template <typename SrcPacket, typename TgtPacket>
0192 EIGEN_DEVICE_FUNC inline TgtPacket
0193 pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
0194   return static_cast<TgtPacket>(a);
0195 }
0196 template <typename SrcPacket, typename TgtPacket>
0197 EIGEN_DEVICE_FUNC inline TgtPacket
0198 pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
0199   return static_cast<TgtPacket>(a);
0200 }
0201 template <typename SrcPacket, typename TgtPacket>
0202 EIGEN_DEVICE_FUNC inline TgtPacket
0203 pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/,
0204       const SrcPacket& /*e*/, const SrcPacket& /*f*/, const SrcPacket& /*g*/, const SrcPacket& /*h*/) {
0205   return static_cast<TgtPacket>(a);
0206 }
0207
0208 /** \internal \returns reinterpret_cast<Target>(a) */
0209 template <typename Target, typename Packet>
0210 EIGEN_DEVICE_FUNC inline Target
0211 preinterpret(const Packet& a); /* { return reinterpret_cast<const Target&>(a); } */
0212
0213 /** \internal \returns a + b (coeff-wise) */
0214 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0215 padd(const Packet& a, const Packet& b) { return a+b; }
0216 // Avoid compiler warning for boolean algebra.
0217 template<> EIGEN_DEVICE_FUNC inline bool
0218 padd(const bool& a, const bool& b) { return a || b; }
0219
0220 /** \internal \returns a - b (coeff-wise) */
0221 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0222 psub(const Packet& a, const Packet& b) { return a-b; }
0223
0224 /** \internal \returns -a (coeff-wise) */
0225 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0226 pnegate(const Packet& a) { return -a; }
0227
0228 template<> EIGEN_DEVICE_FUNC inline bool
0229 pnegate(const bool& a) { return !a; }
0230
0231 /** \internal \returns conj(a) (coeff-wise) */
0232 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0233 pconj(const Packet& a) { return numext::conj(a); }
0234
0235 /** \internal \returns a * b (coeff-wise) */
0236 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0237 pmul(const Packet& a, const Packet& b) { return a*b; }
0238 // Avoid compiler warning for boolean algebra.
0239 template<> EIGEN_DEVICE_FUNC inline bool
0240 pmul(const bool& a, const bool& b) { return a && b; }
0241
0242 /** \internal \returns a / b (coeff-wise) */
0243 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0244 pdiv(const Packet& a, const Packet& b) { return a/b; }
0245
0246 // In the generic case, memset to all one bits.
0247 template<typename Packet, typename EnableIf = void>
0248 struct ptrue_impl {
0249   static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/){
0250     Packet b;
0251     memset(static_cast<void*>(&b), 0xff, sizeof(Packet));
0252     return b;
0253   }
0254 };
0255
0256 // For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value).
0257 // Although this is technically not a valid bitmask, the scalar path for pselect
0258 // uses a comparison to zero, so this should still work in most cases. We don't
0259 // have another option, since the scalar type requires initialization.
0260 template<typename T>
0261 struct ptrue_impl<T,
0262     typename internal::enable_if<is_scalar<T>::value && NumTraits<T>::RequireInitialization>::type > {
0263   static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/){
0264     return T(1);
0265   }
0266 };
0267
0268 /** \internal \returns one bits. */
0269 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0270 ptrue(const Packet& a) {
0271   return ptrue_impl<Packet>::run(a);
0272 }
0273
0274 // In the general case, memset to zero.
0275 template<typename Packet, typename EnableIf = void>
0276 struct pzero_impl {
0277   static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
0278     Packet b;
0279     memset(static_cast<void*>(&b), 0x00, sizeof(Packet));
0280     return b;
0281   }
0282 };
0283
0284 // For scalars, explicitly set to Scalar(0), since the underlying representation
0285 // for zero may not consist of all-zero bits.
0286 template<typename T>
0287 struct pzero_impl<T,
0288     typename internal::enable_if<is_scalar<T>::value>::type> {
0289   static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) {
0290     return T(0);
0291   }
0292 };
0293
0294 /** \internal \returns packet of zeros */
0295 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0296 pzero(const Packet& a) {
0297   return pzero_impl<Packet>::run(a);
0298 }
0299
0300 /** \internal \returns a <= b as a bit mask */
0301 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0302 pcmp_le(const Packet& a, const Packet& b)  { return a<=b ? ptrue(a) : pzero(a); }
0303
0304 /** \internal \returns a < b as a bit mask */
0305 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0306 pcmp_lt(const Packet& a, const Packet& b)  { return a<b ? ptrue(a) : pzero(a); }
0307
0308 /** \internal \returns a == b as a bit mask */
0309 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0310 pcmp_eq(const Packet& a, const Packet& b) { return a==b ? ptrue(a) : pzero(a); }
0311
0312 /** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
0313 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0314 pcmp_lt_or_nan(const Packet& a, const Packet& b) { return a>=b ? pzero(a) : ptrue(a); }
0315
0316 template<typename T>
0317 struct bit_and {
0318   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
0319     return a & b;
0320   }
0321 };
0322
0323 template<typename T>
0324 struct bit_or {
0325   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
0326     return a | b;
0327   }
0328 };
0329
0330 template<typename T>
0331 struct bit_xor {
0332   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const {
0333     return a ^ b;
0334   }
0335 };
0336
0337 template<typename T>
0338 struct bit_not {
0339   EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a) const {
0340     return ~a;
0341   }
0342 };
0343
0344 // Use operators &, |, ^, ~.
0345 template<typename T>
0346 struct operator_bitwise_helper {
0347   EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }
0348   EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }
0349   EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor<T>()(a, b); }
0350   EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not<T>()(a); }
0351 };
0352
0353 // Apply binary operations byte-by-byte
0354 template<typename T>
0355 struct bytewise_bitwise_helper {
0356   EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
0357     return binary(a, b, bit_and<unsigned char>());
0358   }
0359   EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) {
0360     return binary(a, b, bit_or<unsigned char>());
0361    }
0362   EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
0363     return binary(a, b, bit_xor<unsigned char>());
0364   }
0365   EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) {
0366     return unary(a,bit_not<unsigned char>());
0367    }
0368
0369  private:
0370   template<typename Op>
0371   EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
0372     const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
0373     T c;
0374     unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
0375     for (size_t i = 0; i < sizeof(T); ++i) {
0376       *c_ptr++ = op(*a_ptr++);
0377     }
0378     return c;
0379   }
0380
0381   template<typename Op>
0382   EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {
0383     const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
0384     const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
0385     T c;
0386     unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
0387     for (size_t i = 0; i < sizeof(T); ++i) {
0388       *c_ptr++ = op(*a_ptr++, *b_ptr++);
0389     }
0390     return c;
0391   }
0392 };
0393
0394 // In the general case, use byte-by-byte manipulation.
0395 template<typename T, typename EnableIf = void>
0396 struct bitwise_helper : public bytewise_bitwise_helper<T> {};
0397
0398 // For integers or non-trivial scalars, use binary operators.
0399 template<typename T>
0400 struct bitwise_helper<T,
0401   typename internal::enable_if<
0402     is_scalar<T>::value && (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>::type
0403   > : public operator_bitwise_helper<T> {};
0404
0405 /** \internal \returns the bitwise and of \a a and \a b */
0406 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0407 pand(const Packet& a, const Packet& b) {
0408   return bitwise_helper<Packet>::bitwise_and(a, b);
0409 }
0410
0411 /** \internal \returns the bitwise or of \a a and \a b */
0412 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0413 por(const Packet& a, const Packet& b) {
0414   return bitwise_helper<Packet>::bitwise_or(a, b);
0415 }
0416
0417 /** \internal \returns the bitwise xor of \a a and \a b */
0418 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0419 pxor(const Packet& a, const Packet& b) {
0420   return bitwise_helper<Packet>::bitwise_xor(a, b);
0421 }
0422
0423 /** \internal \returns the bitwise not of \a a */
0424 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0425 pnot(const Packet& a) {
0426   return bitwise_helper<Packet>::bitwise_not(a);
0427 }
0428
0429 /** \internal \returns the bitwise and of \a a and not \a b */
0430 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0431 pandnot(const Packet& a, const Packet& b) { return pand(a, pnot(b)); }
0432
0433 // In the general case, use bitwise select.
0434 template<typename Packet, typename EnableIf = void>
0435 struct pselect_impl {
0436   static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
0437     return por(pand(a,mask),pandnot(b,mask));
0438   }
0439 };
0440
0441 // For scalars, use ternary select.
0442 template<typename Packet>
0443 struct pselect_impl<Packet,
0444     typename internal::enable_if<is_scalar<Packet>::value>::type > {
0445   static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
0446     return numext::equal_strict(mask, Packet(0)) ? b : a;
0447   }
0448 };
0449
0450 /** \internal \returns \a or \b for each field in packet according to \mask */
0451 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0452 pselect(const Packet& mask, const Packet& a, const Packet& b) {
0453   return pselect_impl<Packet>::run(mask, a, b);
0454 }
0455
0456 template<> EIGEN_DEVICE_FUNC inline bool pselect<bool>(
0457     const bool& cond, const bool& a, const bool& b) {
0458   return cond ? a : b;
0459 }
0460
0461 /** \internal \returns the min or of \a a and \a b (coeff-wise)
0462     If either \a a or \a b are NaN, the result is implementation defined. */
0463 template<int NaNPropagation>
0464 struct pminmax_impl {
0465   template <typename Packet, typename Op>
0466   static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
0467     return op(a,b);
0468   }
0469 };
0470
0471 /** \internal \returns the min or max of \a a and \a b (coeff-wise)
0472     If either \a a or \a b are NaN, NaN is returned. */
0473 template<>
0474 struct pminmax_impl<PropagateNaN> {
0475   template <typename Packet, typename Op>
0476   static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
0477   Packet not_nan_mask_a = pcmp_eq(a, a);
0478   Packet not_nan_mask_b = pcmp_eq(b, b);
0479   return pselect(not_nan_mask_a,
0480                  pselect(not_nan_mask_b, op(a, b), b),
0481                  a);
0482   }
0483 };
0484
0485 /** \internal \returns the min or max of \a a and \a b (coeff-wise)
0486     If both \a a and \a b are NaN, NaN is returned.
0487     Equivalent to std::fmin(a, b).  */
0488 template<>
0489 struct pminmax_impl<PropagateNumbers> {
0490   template <typename Packet, typename Op>
0491   static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
0492   Packet not_nan_mask_a = pcmp_eq(a, a);
0493   Packet not_nan_mask_b = pcmp_eq(b, b);
0494   return pselect(not_nan_mask_a,
0495                  pselect(not_nan_mask_b, op(a, b), a),
0496                  b);
0497   }
0498 };
0499
0500
0501 #ifndef SYCL_DEVICE_ONLY
0502 #define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) Func
0503 #else
0504 #define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) \
0505 [](const Type& a, const Type& b) { \
0506         return Func(a, b);}
0507 #endif
0508
0509 /** \internal \returns the min of \a a and \a b  (coeff-wise).
0510     If \a a or \b b is NaN, the return value is implementation defined. */
0511 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0512 pmin(const Packet& a, const Packet& b) { return numext::mini(a,b); }
0513
0514 /** \internal \returns the min of \a a and \a b  (coeff-wise).
0515     NaNPropagation determines the NaN propagation semantics. */
0516 template <int NaNPropagation, typename Packet>
0517 EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
0518   return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
0519 }
0520
0521 /** \internal \returns the max of \a a and \a b  (coeff-wise)
0522     If \a a or \b b is NaN, the return value is implementation defined. */
0523 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0524 pmax(const Packet& a, const Packet& b) { return numext::maxi(a, b); }
0525
0526 /** \internal \returns the max of \a a and \a b  (coeff-wise).
0527     NaNPropagation determines the NaN propagation semantics. */
0528 template <int NaNPropagation, typename Packet>
0529 EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
0530   return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet,(pmax<Packet>)));
0531 }
0532
0533 /** \internal \returns the absolute value of \a a */
0534 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0535 pabs(const Packet& a) { return numext::abs(a); }
0536 template<> EIGEN_DEVICE_FUNC inline unsigned int
0537 pabs(const unsigned int& a) { return a; }
0538 template<> EIGEN_DEVICE_FUNC inline unsigned long
0539 pabs(const unsigned long& a) { return a; }
0540 template<> EIGEN_DEVICE_FUNC inline unsigned long long
0541 pabs(const unsigned long long& a) { return a; }
0542
0543 /** \internal \returns the addsub value of \a a,b */
0544 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0545 paddsub(const Packet& a, const Packet& b) {
0546   return pselect(peven_mask(a), padd(a, b), psub(a, b));
0547  }
0548
0549 /** \internal \returns the phase angle of \a a */
0550 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0551 parg(const Packet& a) { using numext::arg; return arg(a); }
0552
0553
0554 /** \internal \returns \a a logically shifted by N bits to the right */
0555 template<int N> EIGEN_DEVICE_FUNC inline int
0556 parithmetic_shift_right(const int& a) { return a >> N; }
0557 template<int N> EIGEN_DEVICE_FUNC inline long int
0558 parithmetic_shift_right(const long int& a) { return a >> N; }
0559
0560 /** \internal \returns \a a arithmetically shifted by N bits to the right */
0561 template<int N> EIGEN_DEVICE_FUNC inline int
0562 plogical_shift_right(const int& a) { return static_cast<int>(static_cast<unsigned int>(a) >> N); }
0563 template<int N> EIGEN_DEVICE_FUNC inline long int
0564 plogical_shift_right(const long int& a) { return static_cast<long>(static_cast<unsigned long>(a) >> N); }
0565
0566 /** \internal \returns \a a shifted by N bits to the left */
0567 template<int N> EIGEN_DEVICE_FUNC inline int
0568 plogical_shift_left(const int& a) { return a << N; }
0569 template<int N> EIGEN_DEVICE_FUNC inline long int
0570 plogical_shift_left(const long int& a) { return a << N; }
0571
0572 /** \internal \returns the significant and exponent of the underlying floating point numbers
0573   * See https://en.cppreference.com/w/cpp/numeric/math/frexp
0574   */
0575 template <typename Packet>
0576 EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {
0577   int exp;
0578   EIGEN_USING_STD(frexp);
0579   Packet result = static_cast<Packet>(frexp(a, &exp));
0580   exponent = static_cast<Packet>(exp);
0581   return result;
0582 }
0583
0584 /** \internal \returns a * 2^((int)exponent)
0585   * See https://en.cppreference.com/w/cpp/numeric/math/ldexp
0586   */
0587 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0588 pldexp(const Packet &a, const Packet &exponent) {
0589   EIGEN_USING_STD(ldexp)
0590   return static_cast<Packet>(ldexp(a, static_cast<int>(exponent)));
0591 }
0592
0593 /** \internal \returns the min of \a a and \a b  (coeff-wise) */
0594 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0595 pabsdiff(const Packet& a, const Packet& b) { return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b)); }
0596
0597 /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
0598 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0599 pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
0600
0601 /** \internal \returns a packet version of \a *from, (un-aligned load) */
0602 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0603 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
0604
0605 /** \internal \returns a packet version of \a *from, (un-aligned masked load)
0606  * There is no generic implementation. We only have implementations for specialized
0607  * cases. Generic case should not be called.
0608  */
0609 template<typename Packet> EIGEN_DEVICE_FUNC inline
0610 typename enable_if<unpacket_traits<Packet>::masked_load_available, Packet>::type
0611 ploadu(const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
0612
0613 /** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
0614 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0615 pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
0616
0617 /** \internal \returns a packet with constant coefficients set from bits */
0618 template<typename Packet,typename BitsType> EIGEN_DEVICE_FUNC inline Packet
0619 pset1frombits(BitsType a);
0620
0621 /** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
0622 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0623 pload1(const typename unpacket_traits<Packet>::type  *a) { return pset1<Packet>(*a); }
0624
0625 /** \internal \returns a packet with elements of \a *from duplicated.
0626   * For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and
0627   * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
0628   * Currently, this function is only used for scalar * complex products.
0629   */
0630 template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
0631 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
0632
0633 /** \internal \returns a packet with elements of \a *from quadrupled.
0634   * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
0635   * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
0636   * Currently, this function is only used in matrix products.
0637   * For packet-size smaller or equal to 4, this function is equivalent to pload1
0638   */
0639 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0640 ploadquad(const typename unpacket_traits<Packet>::type* from)
0641 { return pload1<Packet>(from); }
0642
0643 /** \internal equivalent to
0644   * \code
0645   * a0 = pload1(a+0);
0646   * a1 = pload1(a+1);
0647   * a2 = pload1(a+2);
0648   * a3 = pload1(a+3);
0649   * \endcode
0650   * \sa pset1, pload1, ploaddup, pbroadcast2
0651   */
0652 template<typename Packet> EIGEN_DEVICE_FUNC
0653 inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
0654                         Packet& a0, Packet& a1, Packet& a2, Packet& a3)
0655 {
0656   a0 = pload1<Packet>(a+0);
0657   a1 = pload1<Packet>(a+1);
0658   a2 = pload1<Packet>(a+2);
0659   a3 = pload1<Packet>(a+3);
0660 }
0661
0662 /** \internal equivalent to
0663   * \code
0664   * a0 = pload1(a+0);
0665   * a1 = pload1(a+1);
0666   * \endcode
0667   * \sa pset1, pload1, ploaddup, pbroadcast4
0668   */
0669 template<typename Packet> EIGEN_DEVICE_FUNC
0670 inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
0671                         Packet& a0, Packet& a1)
0672 {
0673   a0 = pload1<Packet>(a+0);
0674   a1 = pload1<Packet>(a+1);
0675 }
0676
0677 /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
0678 template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
0679 plset(const typename unpacket_traits<Packet>::type& a) { return a; }
0680
0681 /** \internal \returns a packet with constant coefficients \a a, e.g.: (x, 0, x, 0),
0682      where x is the value of all 1-bits. */
0683 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0684 peven_mask(const Packet& /*a*/) {
0685   typedef typename unpacket_traits<Packet>::type Scalar;
0686   const size_t n = unpacket_traits<Packet>::size;
0687   EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
0688   for(size_t i = 0; i < n; ++i) {
0689     memset(elements+i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
0690   }
0691   return ploadu<Packet>(elements);
0692 }
0693
0694
0695 /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
0696 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
0697 { (*to) = from; }
0698
0699 /** \internal copy the packet \a from to \a *to, (un-aligned store) */
0700 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
0701 {  (*to) = from; }
0702
0703 /** \internal copy the packet \a from to \a *to, (un-aligned store with a mask)
0704  * There is no generic implementation. We only have implementations for specialized
0705  * cases. Generic case should not be called.
0706  */
0707 template<typename Scalar, typename Packet>
0708 EIGEN_DEVICE_FUNC inline
0709 typename enable_if<unpacket_traits<Packet>::masked_store_available, void>::type
0710 pstoreu(Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
0711
0712  template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
0713  { return ploadu<Packet>(from); }
0714
0715  template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/)
0716  { pstore(to, from); }
0717
0718 /** \internal tries to do cache prefetching of \a addr */
0719 template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
0720 {
0721 #if defined(EIGEN_HIP_DEVICE_COMPILE)
0722   // do nothing
0723 #elif defined(EIGEN_CUDA_ARCH)
0724 #if defined(__LP64__) || EIGEN_OS_WIN64
0725   // 64-bit pointer operand constraint for inlined asm
0726   asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
0727 #else
0728   // 32-bit pointer operand constraint for inlined asm
0729   asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
0730 #endif
0731 #elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
0732   __builtin_prefetch(addr);
0733 #endif
0734 }
0735
0736 /** \internal \returns the reversed elements of \a a*/
0737 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
0738 { return a; }
0739
0740 /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
0741 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
0742 {
0743   return Packet(numext::imag(a),numext::real(a));
0744 }
0745
0746 /**************************
0747 * Special math functions
0748 ***************************/
0749
0750 /** \internal \returns the sine of \a a (coeff-wise) */
0751 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0752 Packet psin(const Packet& a) { EIGEN_USING_STD(sin); return sin(a); }
0753
0754 /** \internal \returns the cosine of \a a (coeff-wise) */
0755 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0756 Packet pcos(const Packet& a) { EIGEN_USING_STD(cos); return cos(a); }
0757
0758 /** \internal \returns the tan of \a a (coeff-wise) */
0759 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0760 Packet ptan(const Packet& a) { EIGEN_USING_STD(tan); return tan(a); }
0761
0762 /** \internal \returns the arc sine of \a a (coeff-wise) */
0763 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0764 Packet pasin(const Packet& a) { EIGEN_USING_STD(asin); return asin(a); }
0765
0766 /** \internal \returns the arc cosine of \a a (coeff-wise) */
0767 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0768 Packet pacos(const Packet& a) { EIGEN_USING_STD(acos); return acos(a); }
0769
0770 /** \internal \returns the arc tangent of \a a (coeff-wise) */
0771 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0772 Packet patan(const Packet& a) { EIGEN_USING_STD(atan); return atan(a); }
0773
0774 /** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
0775 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0776 Packet psinh(const Packet& a) { EIGEN_USING_STD(sinh); return sinh(a); }
0777
0778 /** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
0779 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0780 Packet pcosh(const Packet& a) { EIGEN_USING_STD(cosh); return cosh(a); }
0781
0782 /** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
0783 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0784 Packet ptanh(const Packet& a) { EIGEN_USING_STD(tanh); return tanh(a); }
0785
0786 /** \internal \returns the exp of \a a (coeff-wise) */
0787 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0788 Packet pexp(const Packet& a) { EIGEN_USING_STD(exp); return exp(a); }
0789
0790 /** \internal \returns the expm1 of \a a (coeff-wise) */
0791 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0792 Packet pexpm1(const Packet& a) { return numext::expm1(a); }
0793
0794 /** \internal \returns the log of \a a (coeff-wise) */
0795 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0796 Packet plog(const Packet& a) { EIGEN_USING_STD(log); return log(a); }
0797
0798 /** \internal \returns the log1p of \a a (coeff-wise) */
0799 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0800 Packet plog1p(const Packet& a) { return numext::log1p(a); }
0801
0802 /** \internal \returns the log10 of \a a (coeff-wise) */
0803 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0804 Packet plog10(const Packet& a) { EIGEN_USING_STD(log10); return log10(a); }
0805
0806 /** \internal \returns the log10 of \a a (coeff-wise) */
0807 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0808 Packet plog2(const Packet& a) {
0809   typedef typename internal::unpacket_traits<Packet>::type Scalar;
0810   return pmul(pset1<Packet>(Scalar(EIGEN_LOG2E)), plog(a));
0811 }
0812
0813 /** \internal \returns the square-root of \a a (coeff-wise) */
0814 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0815 Packet psqrt(const Packet& a) { return numext::sqrt(a); }
0816
0817 /** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
0818 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0819 Packet prsqrt(const Packet& a) {
0820   typedef typename internal::unpacket_traits<Packet>::type Scalar;
0821   return pdiv(pset1<Packet>(Scalar(1)), psqrt(a));
0822 }
0823
0824 /** \internal \returns the rounded value of \a a (coeff-wise) */
0825 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0826 Packet pround(const Packet& a) { using numext::round; return round(a); }
0827
0828 /** \internal \returns the floor of \a a (coeff-wise) */
0829 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0830 Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
0831
0832 /** \internal \returns the rounded value of \a a (coeff-wise) with current
0833  * rounding mode */
0834 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0835 Packet print(const Packet& a) { using numext::rint; return rint(a); }
0836
0837 /** \internal \returns the ceil of \a a (coeff-wise) */
0838 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
0839 Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
0840
0841 /** \internal \returns the first element of a packet */
0842 template<typename Packet>
0843 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
0844 pfirst(const Packet& a)
0845 { return a; }
0846
0847 /** \internal \returns the sum of the elements of upper and lower half of \a a if \a a is larger than 4.
0848   * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
0849   * For packet-size smaller or equal to 4, this boils down to a noop.
0850   */
0851 template<typename Packet>
0852 EIGEN_DEVICE_FUNC inline typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
0853 predux_half_dowto4(const Packet& a)
0854 { return a; }
0855
0856 // Slow generic implementation of Packet reduction.
0857 template <typename Packet, typename Op>
0858 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
0859 predux_helper(const Packet& a, Op op) {
0860   typedef typename unpacket_traits<Packet>::type Scalar;
0861   const size_t n = unpacket_traits<Packet>::size;
0862   EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
0863   pstoreu<Scalar>(elements, a);
0864   for(size_t k = n / 2; k > 0; k /= 2)  {
0865     for(size_t i = 0; i < k; ++i) {
0866       elements[i] = op(elements[i], elements[i + k]);
0867     }
0868   }
0869   return elements[0];
0870 }
0871
0872 /** \internal \returns the sum of the elements of \a a*/
0873 template<typename Packet>
0874 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
0875 predux(const Packet& a)
0876 {
0877   return a;
0878 }
0879
0880 /** \internal \returns the product of the elements of \a a */
0881 template <typename Packet>
0882 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(
0883     const Packet& a) {
0884   typedef typename unpacket_traits<Packet>::type Scalar;
0885   return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul<Scalar>)));
0886 }
0887
0888 /** \internal \returns the min of the elements of \a a */
0889 template <typename Packet>
0890 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
0891     const Packet &a) {
0892   typedef typename unpacket_traits<Packet>::type Scalar;
0893   return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<PropagateFast, Scalar>)));
0894 }
0895
0896 template <int NaNPropagation, typename Packet>
0897 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
0898     const Packet& a) {
0899   typedef typename unpacket_traits<Packet>::type Scalar;
0900   return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
0901 }
0902
0903 /** \internal \returns the min of the elements of \a a */
0904 template <typename Packet>
0905 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
0906     const Packet &a) {
0907   typedef typename unpacket_traits<Packet>::type Scalar;
0908   return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<PropagateFast, Scalar>)));
0909 }
0910
0911 template <int NaNPropagation, typename Packet>
0912 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
0913     const Packet& a) {
0914   typedef typename unpacket_traits<Packet>::type Scalar;
0915   return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
0916 }
0917
0918 #undef EIGEN_BINARY_OP_NAN_PROPAGATION
0919
0920 /** \internal \returns true if all coeffs of \a a means "true"
0921   * It is supposed to be called on values returned by pcmp_*.
0922   */
0923 // not needed yet
0924 // template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a)
0925 // { return bool(a); }
0926
0927 /** \internal \returns true if any coeffs of \a a means "true"
0928   * It is supposed to be called on values returned by pcmp_*.
0929   */
0930 template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a)
0931 {
0932   // Dirty but generic implementation where "true" is assumed to be non 0 and all the sames.
0933   // It is expected that "true" is either:
0934   //  - Scalar(1)
0935   //  - bits full of ones (NaN for floats),
0936   //  - or first bit equals to 1 (1 for ints, smallest denormal for floats).
0937   // For all these cases, taking the sum is just fine, and this boils down to a no-op for scalars.
0938   typedef typename unpacket_traits<Packet>::type Scalar;
0939   return numext::not_equal_strict(predux(a), Scalar(0));
0940 }
0941
0942 /***************************************************************************
0943 * The following functions might not have to be overwritten for vectorized types
0944 ***************************************************************************/
0945
0946 /** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned */
0947 // NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
0948 template<typename Packet>
0949 inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
0950 {
0951   pstore(to, pset1<Packet>(a));
0952 }
0953
0954 /** \internal \returns a * b + c (coeff-wise) */
0955 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
0956 pmadd(const Packet&  a,
0957          const Packet&  b,
0958          const Packet&  c)
0959 { return padd(pmul(a, b),c); }
0960
0961 /** \internal \returns a packet version of \a *from.
0962   * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
0963 template<typename Packet, int Alignment>
0964 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
0965 {
0966   if(Alignment >= unpacket_traits<Packet>::alignment)
0967     return pload<Packet>(from);
0968   else
0969     return ploadu<Packet>(from);
0970 }
0971
0972 /** \internal copy the packet \a from to \a *to.
0973   * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
0974 template<typename Scalar, typename Packet, int Alignment>
0975 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
0976 {
0977   if(Alignment >= unpacket_traits<Packet>::alignment)
0978     pstore(to, from);
0979   else
0980     pstoreu(to, from);
0981 }
0982
0983 /** \internal \returns a packet version of \a *from.
0984   * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the
0985   * hardware if available to speedup the loading of data that won't be modified
0986   * by the current computation.
0987   */
0988 template<typename Packet, int LoadMode>
0989 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
0990 {
0991   return ploadt<Packet, LoadMode>(from);
0992 }
0993
0994 /***************************************************************************
0995 * Fast complex products (GCC generates a function call which is very slow)
0996 ***************************************************************************/
0997
0998 // Eigen+CUDA does not support complexes.
0999 #if !defined(EIGEN_GPUCC)
1000
1001 template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
1002 { return std::complex<float>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
1003
1004 template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
1005 { return std::complex<double>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
1006
1007 #endif
1008
1009
1010 /***************************************************************************
1011  * PacketBlock, that is a collection of N packets where the number of words
1012  * in the packet is a multiple of N.
1013 ***************************************************************************/
1014 template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {
1015   Packet packet[N];
1016 };
1017
1018 template<typename Packet> EIGEN_DEVICE_FUNC inline void
1019 ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
1020   // Nothing to do in the scalar case, i.e. a 1x1 matrix.
1021 }
1022
1023 /***************************************************************************
1024  * Selector, i.e. vector of N boolean values used to select (i.e. blend)
1025  * words from 2 packets.
1026 ***************************************************************************/
1027 template <size_t N> struct Selector {
1028   bool select[N];
1029 };
1030
1031 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
1032 pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
1033   return ifPacket.select[0] ? thenPacket : elsePacket;
1034 }
1035
1036 } // end namespace internal
1037
1038 } // end namespace Eigen
1039
1040 #endif // EIGEN_GENERIC_PACKET_MATH_H