Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-16 10:13:27

0001 // This file is part of Eigen, a lightweight C++ template library
0002 // for linear algebra.
0003 //
0004 // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
0005 //
0006 // This Source Code Form is subject to the terms of the Mozilla
0007 // Public License v. 2.0. If a copy of the MPL was not distributed
0008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
0009 
0010 #ifndef EIGEN_ASSIGNMENT_FUNCTORS_H
0011 #define EIGEN_ASSIGNMENT_FUNCTORS_H
0012 
0013 namespace Eigen {
0014 
0015 namespace internal {
0016   
0017 /** \internal
0018   * \brief Template functor for scalar/packet assignment
0019   *
0020   */
0021 template<typename DstScalar,typename SrcScalar> struct assign_op {
0022 
0023   EIGEN_EMPTY_STRUCT_CTOR(assign_op)
0024   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; }
0025   
0026   template<int Alignment, typename Packet>
0027   EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
0028   { internal::pstoret<DstScalar,Packet,Alignment>(a,b); }
0029 };
0030 
0031 // Empty overload for void type (used by PermutationMatrix)
0032 template<typename DstScalar> struct assign_op<DstScalar,void> {};
0033 
0034 template<typename DstScalar,typename SrcScalar>
0035 struct functor_traits<assign_op<DstScalar,SrcScalar> > {
0036   enum {
0037     Cost = NumTraits<DstScalar>::ReadCost,
0038     PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::Vectorizable && packet_traits<SrcScalar>::Vectorizable
0039   };
0040 };
0041 
0042 /** \internal
0043   * \brief Template functor for scalar/packet assignment with addition
0044   *
0045   */
0046 template<typename DstScalar,typename SrcScalar> struct add_assign_op {
0047 
0048   EIGEN_EMPTY_STRUCT_CTOR(add_assign_op)
0049   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a += b; }
0050   
0051   template<int Alignment, typename Packet>
0052   EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
0053   { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::padd(internal::ploadt<Packet,Alignment>(a),b)); }
0054 };
0055 template<typename DstScalar,typename SrcScalar>
0056 struct functor_traits<add_assign_op<DstScalar,SrcScalar> > {
0057   enum {
0058     Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::AddCost,
0059     PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasAdd
0060   };
0061 };
0062 
0063 /** \internal
0064   * \brief Template functor for scalar/packet assignment with subtraction
0065   *
0066   */
0067 template<typename DstScalar,typename SrcScalar> struct sub_assign_op {
0068 
0069   EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op)
0070   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a -= b; }
0071   
0072   template<int Alignment, typename Packet>
0073   EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
0074   { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::psub(internal::ploadt<Packet,Alignment>(a),b)); }
0075 };
0076 template<typename DstScalar,typename SrcScalar>
0077 struct functor_traits<sub_assign_op<DstScalar,SrcScalar> > {
0078   enum {
0079     Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::AddCost,
0080     PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasSub
0081   };
0082 };
0083 
0084 /** \internal
0085   * \brief Template functor for scalar/packet assignment with multiplication
0086   *
0087   */
0088 template<typename DstScalar, typename SrcScalar=DstScalar>
0089 struct mul_assign_op {
0090 
0091   EIGEN_EMPTY_STRUCT_CTOR(mul_assign_op)
0092   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a *= b; }
0093   
0094   template<int Alignment, typename Packet>
0095   EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
0096   { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::pmul(internal::ploadt<Packet,Alignment>(a),b)); }
0097 };
0098 template<typename DstScalar, typename SrcScalar>
0099 struct functor_traits<mul_assign_op<DstScalar,SrcScalar> > {
0100   enum {
0101     Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::MulCost,
0102     PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasMul
0103   };
0104 };
0105 
0106 /** \internal
0107   * \brief Template functor for scalar/packet assignment with diviving
0108   *
0109   */
0110 template<typename DstScalar, typename SrcScalar=DstScalar> struct div_assign_op {
0111 
0112   EIGEN_EMPTY_STRUCT_CTOR(div_assign_op)
0113   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a /= b; }
0114   
0115   template<int Alignment, typename Packet>
0116   EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
0117   { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::pdiv(internal::ploadt<Packet,Alignment>(a),b)); }
0118 };
0119 template<typename DstScalar, typename SrcScalar>
0120 struct functor_traits<div_assign_op<DstScalar,SrcScalar> > {
0121   enum {
0122     Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::MulCost,
0123     PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasDiv
0124   };
0125 };
0126 
0127 /** \internal
0128   * \brief Template functor for scalar/packet assignment with swapping
0129   *
0130   * It works as follow. For a non-vectorized evaluation loop, we have:
0131   *   for(i) func(A.coeffRef(i), B.coeff(i));
0132   * where B is a SwapWrapper expression. The trick is to make SwapWrapper::coeff behaves like a non-const coeffRef.
0133   * Actually, SwapWrapper might not even be needed since even if B is a plain expression, since it has to be writable
0134   * B.coeff already returns a const reference to the underlying scalar value.
0135   * 
0136   * The case of a vectorized loop is more tricky:
0137   *   for(i,j) func.assignPacket<A_Align>(&A.coeffRef(i,j), B.packet<B_Align>(i,j));
0138   * Here, B must be a SwapWrapper whose packet function actually returns a proxy object holding a Scalar*,
0139   * the actual alignment and Packet type.
0140   *
0141   */
0142 template<typename Scalar> struct swap_assign_op {
0143 
0144   EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op)
0145   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const
0146   {
0147 #ifdef EIGEN_GPUCC
0148     // FIXME is there some kind of cuda::swap?
0149     Scalar t=b; const_cast<Scalar&>(b)=a; a=t;
0150 #else
0151     using std::swap;
0152     swap(a,const_cast<Scalar&>(b));
0153 #endif
0154   }
0155 };
0156 template<typename Scalar>
0157 struct functor_traits<swap_assign_op<Scalar> > {
0158   enum {
0159     Cost = 3 * NumTraits<Scalar>::ReadCost,
0160     PacketAccess = 
0161     #if defined(EIGEN_VECTORIZE_AVX) && EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<800 || defined(__apple_build_version__))
0162     // This is a partial workaround for a bug in clang generating bad code
0163     // when mixing 256/512 bits loads and 128 bits moves.
0164     // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1684
0165     //     https://bugs.llvm.org/show_bug.cgi?id=40815
0166     0
0167     #else
0168     packet_traits<Scalar>::Vectorizable
0169     #endif
0170   };
0171 };
0172 
0173 } // namespace internal
0174 
0175 } // namespace Eigen
0176 
0177 #endif // EIGEN_ASSIGNMENT_FUNCTORS_H