File indexing completed on 2025-01-18 09:56:10
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034 #ifndef EIGEN_ASSIGN_VML_H
0035 #define EIGEN_ASSIGN_VML_H
0036
0037 namespace Eigen {
0038
0039 namespace internal {
0040
0041 template<typename Dst, typename Src>
0042 class vml_assign_traits
0043 {
0044 private:
0045 enum {
0046 DstHasDirectAccess = Dst::Flags & DirectAccessBit,
0047 SrcHasDirectAccess = Src::Flags & DirectAccessBit,
0048 StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
0049 InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
0050 : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
0051 : int(Dst::RowsAtCompileTime),
0052 InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
0053 : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
0054 : int(Dst::MaxRowsAtCompileTime),
0055 MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
0056
0057 MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
0058 MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
0059 VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
0060 LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD
0061 };
0062 public:
0063 enum {
0064 EnableVml = MightEnableVml && LargeEnough,
0065 Traversal = MightLinearize ? LinearTraversal : DefaultTraversal
0066 };
0067 };
0068
0069 #define EIGEN_PP_EXPAND(ARG) ARG
0070 #if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
0071 #define EIGEN_VMLMODE_EXPAND_xLA , VML_HA
0072 #else
0073 #define EIGEN_VMLMODE_EXPAND_xLA , VML_LA
0074 #endif
0075
0076 #define EIGEN_VMLMODE_EXPAND_x_
0077
0078 #define EIGEN_VMLMODE_PREFIX_xLA vm
0079 #define EIGEN_VMLMODE_PREFIX_x_ v
0080 #define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_x,VMLMODE)
0081
0082 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
0083 template< typename DstXprType, typename SrcXprNested> \
0084 struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>, \
0085 Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
0086 typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
0087 static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
0088 resize_if_allowed(dst, src, func); \
0089 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
0090 if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \
0091 VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \
0092 (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE) ); \
0093 } else { \
0094 const Index outerSize = dst.outerSize(); \
0095 for(Index outer = 0; outer < outerSize; ++outer) { \
0096 const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \
0097 &(src.nestedExpression().coeffRef(0, outer)); \
0098 EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
0099 VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, \
0100 (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \
0101 } \
0102 } \
0103 } \
0104 }; \
0105
0106
0107 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
0108 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE) \
0109 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE)
0110
0111 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) \
0112 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \
0113 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE)
0114
0115 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \
0116 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
0117 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
0118
0119
0120 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sin, Sin, LA)
0121 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(asin, Asin, LA)
0122 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sinh, Sinh, LA)
0123 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cos, Cos, LA)
0124 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(acos, Acos, LA)
0125 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cosh, Cosh, LA)
0126 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tan, Tan, LA)
0127 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(atan, Atan, LA)
0128 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tanh, Tanh, LA)
0129
0130 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(exp, Exp, LA)
0131 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log, Ln, LA)
0132 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log10, Log10, LA)
0133 EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt, Sqrt, _)
0134
0135 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr, _)
0136 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(arg, Arg, _)
0137 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round, _)
0138 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor, _)
0139 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _)
0140
0141 #define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
0142 template< typename DstXprType, typename SrcXprNested, typename Plain> \
0143 struct Assignment<DstXprType, CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
0144 const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> >, assign_op<EIGENTYPE,EIGENTYPE>, \
0145 Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
0146 typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
0147 const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType; \
0148 static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
0149 resize_if_allowed(dst, src, func); \
0150 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
0151 VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other); \
0152 if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \
0153 { \
0154 VMLOP( dst.size(), (const VMLTYPE*)src.lhs().data(), exponent, \
0155 (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE) ); \
0156 } else { \
0157 const Index outerSize = dst.outerSize(); \
0158 for(Index outer = 0; outer < outerSize; ++outer) { \
0159 const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.lhs().coeffRef(outer,0)) : \
0160 &(src.lhs().coeffRef(0, outer)); \
0161 EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
0162 VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \
0163 (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \
0164 } \
0165 } \
0166 } \
0167 };
0168
0169 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float, float, LA)
0170 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double, double, LA)
0171 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8, LA)
0172 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA)
0173
0174 }
0175
0176 }
0177
0178 #endif