Back to home page

EIC code displayed by LXR

 
 

    


Warning, file /include/eigen3/Eigen/src/Core/util/BlasUtil.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 // This file is part of Eigen, a lightweight C++ template library
0002 // for linear algebra.
0003 //
0004 // Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
0005 //
0006 // This Source Code Form is subject to the terms of the Mozilla
0007 // Public License v. 2.0. If a copy of the MPL was not distributed
0008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
0009 
0010 #ifndef EIGEN_BLASUTIL_H
0011 #define EIGEN_BLASUTIL_H
0012 
0013 // This file contains many lightweight helper classes used to
0014 // implement and control fast level 2 and level 3 BLAS-like routines.
0015 
0016 namespace Eigen {
0017 
0018 namespace internal {
0019 
0020 // forward declarations
0021 template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs=false, bool ConjugateRhs=false>
0022 struct gebp_kernel;
0023 
0024 template<typename Scalar, typename Index, typename DataMapper, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false>
0025 struct gemm_pack_rhs;
0026 
0027 template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, typename Packet, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
0028 struct gemm_pack_lhs;
0029 
0030 template<
0031   typename Index,
0032   typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
0033   typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
0034   int ResStorageOrder, int ResInnerStride>
0035 struct general_matrix_matrix_product;
0036 
0037 template<typename Index,
0038          typename LhsScalar, typename LhsMapper, int LhsStorageOrder, bool ConjugateLhs,
0039          typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version=Specialized>
0040 struct general_matrix_vector_product;
0041 
0042 template<typename From,typename To> struct get_factor {
0043   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return To(x); }
0044 };
0045 
0046 template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::Real> {
0047   EIGEN_DEVICE_FUNC
0048   static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return numext::real(x); }
0049 };
0050 
0051 
0052 template<typename Scalar, typename Index>
0053 class BlasVectorMapper {
0054   public:
0055   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {}
0056 
0057   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
0058     return m_data[i];
0059   }
0060   template <typename Packet, int AlignmentType>
0061   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet load(Index i) const {
0062     return ploadt<Packet, AlignmentType>(m_data + i);
0063   }
0064 
0065   template <typename Packet>
0066   EIGEN_DEVICE_FUNC bool aligned(Index i) const {
0067     return (UIntPtr(m_data+i)%sizeof(Packet))==0;
0068   }
0069 
0070   protected:
0071   Scalar* m_data;
0072 };
0073 
0074 template<typename Scalar, typename Index, int AlignmentType, int Incr=1>
0075 class BlasLinearMapper;
0076 
0077 template<typename Scalar, typename Index, int AlignmentType>
0078 class BlasLinearMapper<Scalar,Index,AlignmentType>
0079 {
0080 public:
0081   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data, Index incr=1)
0082     : m_data(data)
0083   {
0084     EIGEN_ONLY_USED_FOR_DEBUG(incr);
0085     eigen_assert(incr==1);
0086   }
0087 
0088   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
0089     internal::prefetch(&operator()(i));
0090   }
0091 
0092   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
0093     return m_data[i];
0094   }
0095 
0096   template<typename PacketType>
0097   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i) const {
0098     return ploadt<PacketType, AlignmentType>(m_data + i);
0099   }
0100 
0101   template<typename PacketType>
0102   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const {
0103     pstoret<Scalar, PacketType, AlignmentType>(m_data + i, p);
0104   }
0105 
0106 protected:
0107   Scalar *m_data;
0108 };
0109 
0110 // Lightweight helper class to access matrix coefficients.
0111 template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned, int Incr = 1>
0112 class blas_data_mapper;
0113 
0114 // TMP to help PacketBlock store implementation.
0115 // There's currently no known use case for PacketBlock load.
0116 // The default implementation assumes ColMajor order.
0117 // It always store each packet sequentially one `stride` apart.
0118 template<typename Index, typename Scalar, typename Packet, int n, int idx, int StorageOrder>
0119 struct PacketBlockManagement
0120 {
0121   PacketBlockManagement<Index, Scalar, Packet, n, idx - 1, StorageOrder> pbm;
0122   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const {
0123     pbm.store(to, stride, i, j, block);
0124     pstoreu<Scalar>(to + i + (j + idx)*stride, block.packet[idx]);
0125   }
0126 };
0127 
0128 // PacketBlockManagement specialization to take care of RowMajor order without ifs.
0129 template<typename Index, typename Scalar, typename Packet, int n, int idx>
0130 struct PacketBlockManagement<Index, Scalar, Packet, n, idx, RowMajor>
0131 {
0132   PacketBlockManagement<Index, Scalar, Packet, n, idx - 1, RowMajor> pbm;
0133   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const {
0134     pbm.store(to, stride, i, j, block);
0135     pstoreu<Scalar>(to + j + (i + idx)*stride, block.packet[idx]);
0136   }
0137 };
0138 
0139 template<typename Index, typename Scalar, typename Packet, int n, int StorageOrder>
0140 struct PacketBlockManagement<Index, Scalar, Packet, n, -1, StorageOrder>
0141 {
0142   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const {
0143     EIGEN_UNUSED_VARIABLE(to);
0144     EIGEN_UNUSED_VARIABLE(stride);
0145     EIGEN_UNUSED_VARIABLE(i);
0146     EIGEN_UNUSED_VARIABLE(j);
0147     EIGEN_UNUSED_VARIABLE(block);
0148   }
0149 };
0150 
0151 template<typename Index, typename Scalar, typename Packet, int n>
0152 struct PacketBlockManagement<Index, Scalar, Packet, n, -1, RowMajor>
0153 {
0154   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(Scalar *to, const Index stride, Index i, Index j, const PacketBlock<Packet, n> &block) const {
0155     EIGEN_UNUSED_VARIABLE(to);
0156     EIGEN_UNUSED_VARIABLE(stride);
0157     EIGEN_UNUSED_VARIABLE(i);
0158     EIGEN_UNUSED_VARIABLE(j);
0159     EIGEN_UNUSED_VARIABLE(block);
0160   }
0161 };
0162 
0163 template<typename Scalar, typename Index, int StorageOrder, int AlignmentType>
0164 class blas_data_mapper<Scalar,Index,StorageOrder,AlignmentType,1>
0165 {
0166 public:
0167   typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper;
0168   typedef BlasVectorMapper<Scalar, Index> VectorMapper;
0169 
0170   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr=1)
0171    : m_data(data), m_stride(stride)
0172   {
0173     EIGEN_ONLY_USED_FOR_DEBUG(incr);
0174     eigen_assert(incr==1);
0175   }
0176 
0177   EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
0178   getSubMapper(Index i, Index j) const {
0179     return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride);
0180   }
0181 
0182   EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
0183     return LinearMapper(&operator()(i, j));
0184   }
0185 
0186   EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
0187     return VectorMapper(&operator()(i, j));
0188   }
0189 
0190 
0191   EIGEN_DEVICE_FUNC
0192   EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const {
0193     return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride];
0194   }
0195 
0196   template<typename PacketType>
0197   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i, Index j) const {
0198     return ploadt<PacketType, AlignmentType>(&operator()(i, j));
0199   }
0200 
0201   template <typename PacketT, int AlignmentT>
0202   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const {
0203     return ploadt<PacketT, AlignmentT>(&operator()(i, j));
0204   }
0205 
0206   template<typename SubPacket>
0207   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
0208     pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
0209   }
0210 
0211   template<typename SubPacket>
0212   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
0213     return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
0214   }
0215 
0216   EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; }
0217   EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
0218 
0219   EIGEN_DEVICE_FUNC Index firstAligned(Index size) const {
0220     if (UIntPtr(m_data)%sizeof(Scalar)) {
0221       return -1;
0222     }
0223     return internal::first_default_aligned(m_data, size);
0224   }
0225 
0226   template<typename SubPacket, int n>
0227   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketBlock(Index i, Index j, const PacketBlock<SubPacket, n> &block) const {
0228     PacketBlockManagement<Index, Scalar, SubPacket, n, n-1, StorageOrder> pbm;
0229     pbm.store(m_data, m_stride, i, j, block);
0230   }
0231 protected:
0232   Scalar* EIGEN_RESTRICT m_data;
0233   const Index m_stride;
0234 };
0235 
0236 // Implementation of non-natural increment (i.e. inner-stride != 1)
0237 // The exposed API is not complete yet compared to the Incr==1 case
0238 // because some features makes less sense in this case.
0239 template<typename Scalar, typename Index, int AlignmentType, int Incr>
0240 class BlasLinearMapper
0241 {
0242 public:
0243   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data,Index incr) : m_data(data), m_incr(incr) {}
0244 
0245   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
0246     internal::prefetch(&operator()(i));
0247   }
0248 
0249   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
0250     return m_data[i*m_incr.value()];
0251   }
0252 
0253   template<typename PacketType>
0254   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i) const {
0255     return pgather<Scalar,PacketType>(m_data + i*m_incr.value(), m_incr.value());
0256   }
0257 
0258   template<typename PacketType>
0259   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const {
0260     pscatter<Scalar, PacketType>(m_data + i*m_incr.value(), p, m_incr.value());
0261   }
0262 
0263 protected:
0264   Scalar *m_data;
0265   const internal::variable_if_dynamic<Index,Incr> m_incr;
0266 };
0267 
0268 template<typename Scalar, typename Index, int StorageOrder, int AlignmentType,int Incr>
0269 class blas_data_mapper
0270 {
0271 public:
0272   typedef BlasLinearMapper<Scalar, Index, AlignmentType,Incr> LinearMapper;
0273 
0274   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr) : m_data(data), m_stride(stride), m_incr(incr) {}
0275 
0276   EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE blas_data_mapper
0277   getSubMapper(Index i, Index j) const {
0278     return blas_data_mapper(&operator()(i, j), m_stride, m_incr.value());
0279   }
0280 
0281   EIGEN_DEVICE_FUNC  EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
0282     return LinearMapper(&operator()(i, j), m_incr.value());
0283   }
0284 
0285   EIGEN_DEVICE_FUNC
0286   EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const {
0287     return m_data[StorageOrder==RowMajor ? j*m_incr.value() + i*m_stride : i*m_incr.value() + j*m_stride];
0288   }
0289 
0290   template<typename PacketType>
0291   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(Index i, Index j) const {
0292     return pgather<Scalar,PacketType>(&operator()(i, j),m_incr.value());
0293   }
0294 
0295   template <typename PacketT, int AlignmentT>
0296   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const {
0297     return pgather<Scalar,PacketT>(&operator()(i, j),m_incr.value());
0298   }
0299 
0300   template<typename SubPacket>
0301   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
0302     pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
0303   }
0304 
0305   template<typename SubPacket>
0306   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
0307     return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
0308   }
0309 
0310   // storePacketBlock_helper defines a way to access values inside the PacketBlock, this is essentially required by the Complex types.
0311   template<typename SubPacket, typename ScalarT, int n, int idx>
0312   struct storePacketBlock_helper
0313   {
0314     storePacketBlock_helper<SubPacket, ScalarT, n, idx-1> spbh;
0315     EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
0316       spbh.store(sup, i,j,block);
0317       for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
0318       {
0319         ScalarT *v = &sup->operator()(i+l, j+idx);
0320         *v = block.packet[idx][l];
0321       }
0322     }
0323   };
0324 
0325   template<typename SubPacket, int n, int idx>
0326   struct storePacketBlock_helper<SubPacket, std::complex<float>, n, idx>
0327   {
0328     storePacketBlock_helper<SubPacket, std::complex<float>, n, idx-1> spbh;
0329     EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
0330       spbh.store(sup,i,j,block);
0331       for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
0332       {
0333         std::complex<float> *v = &sup->operator()(i+l, j+idx);
0334         v->real(block.packet[idx].v[2*l+0]);
0335         v->imag(block.packet[idx].v[2*l+1]);
0336       }
0337     }
0338   };
0339 
0340   template<typename SubPacket, int n, int idx>
0341   struct storePacketBlock_helper<SubPacket, std::complex<double>, n, idx>
0342   {
0343     storePacketBlock_helper<SubPacket, std::complex<double>, n, idx-1> spbh;
0344     EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>* sup, Index i, Index j, const PacketBlock<SubPacket, n>& block) const {
0345       spbh.store(sup,i,j,block);
0346       for(int l = 0; l < unpacket_traits<SubPacket>::size; l++)
0347       {
0348         std::complex<double> *v = &sup->operator()(i+l, j+idx);
0349         v->real(block.packet[idx].v[2*l+0]);
0350         v->imag(block.packet[idx].v[2*l+1]);
0351       }
0352     }
0353   };
0354 
0355   template<typename SubPacket, typename ScalarT, int n>
0356   struct storePacketBlock_helper<SubPacket, ScalarT, n, -1>
0357   {
0358     EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {
0359     }
0360   };
0361 
0362   template<typename SubPacket, int n>
0363   struct storePacketBlock_helper<SubPacket, std::complex<float>, n, -1>
0364   {
0365     EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {
0366     }
0367   };
0368 
0369   template<typename SubPacket, int n>
0370   struct storePacketBlock_helper<SubPacket, std::complex<double>, n, -1>
0371   {
0372     EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void store(const blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType, Incr>*, Index, Index, const PacketBlock<SubPacket, n>& ) const {
0373     }
0374   };
0375   // This function stores a PacketBlock on m_data, this approach is really quite slow compare to Incr=1 and should be avoided when possible.
0376   template<typename SubPacket, int n>
0377   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketBlock(Index i, Index j, const PacketBlock<SubPacket, n>&block) const {
0378     storePacketBlock_helper<SubPacket, Scalar, n, n-1> spb;
0379     spb.store(this, i,j,block);
0380   }
0381 protected:
0382   Scalar* EIGEN_RESTRICT m_data;
0383   const Index m_stride;
0384   const internal::variable_if_dynamic<Index,Incr> m_incr;
0385 };
0386 
0387 // lightweight helper class to access matrix coefficients (const version)
0388 template<typename Scalar, typename Index, int StorageOrder>
0389 class const_blas_data_mapper : public blas_data_mapper<const Scalar, Index, StorageOrder> {
0390   public:
0391   EIGEN_ALWAYS_INLINE const_blas_data_mapper(const Scalar *data, Index stride) : blas_data_mapper<const Scalar, Index, StorageOrder>(data, stride) {}
0392 
0393   EIGEN_ALWAYS_INLINE const_blas_data_mapper<Scalar, Index, StorageOrder> getSubMapper(Index i, Index j) const {
0394     return const_blas_data_mapper<Scalar, Index, StorageOrder>(&(this->operator()(i, j)), this->m_stride);
0395   }
0396 };
0397 
0398 
0399 /* Helper class to analyze the factors of a Product expression.
0400  * In particular it allows to pop out operator-, scalar multiples,
0401  * and conjugate */
0402 template<typename XprType> struct blas_traits
0403 {
0404   typedef typename traits<XprType>::Scalar Scalar;
0405   typedef const XprType& ExtractType;
0406   typedef XprType _ExtractType;
0407   enum {
0408     IsComplex = NumTraits<Scalar>::IsComplex,
0409     IsTransposed = false,
0410     NeedToConjugate = false,
0411     HasUsableDirectAccess = (    (int(XprType::Flags)&DirectAccessBit)
0412                               && (   bool(XprType::IsVectorAtCompileTime)
0413                                   || int(inner_stride_at_compile_time<XprType>::ret) == 1)
0414                              ) ?  1 : 0,
0415     HasScalarFactor = false
0416   };
0417   typedef typename conditional<bool(HasUsableDirectAccess),
0418     ExtractType,
0419     typename _ExtractType::PlainObject
0420     >::type DirectLinearAccessType;
0421   static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return x; }
0422   static inline EIGEN_DEVICE_FUNC const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
0423 };
0424 
0425 // pop conjugate
0426 template<typename Scalar, typename NestedXpr>
0427 struct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> >
0428  : blas_traits<NestedXpr>
0429 {
0430   typedef blas_traits<NestedXpr> Base;
0431   typedef CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> XprType;
0432   typedef typename Base::ExtractType ExtractType;
0433 
0434   enum {
0435     IsComplex = NumTraits<Scalar>::IsComplex,
0436     NeedToConjugate = Base::NeedToConjugate ? 0 : IsComplex
0437   };
0438   static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
0439   static inline Scalar extractScalarFactor(const XprType& x) { return conj(Base::extractScalarFactor(x.nestedExpression())); }
0440 };
0441 
0442 // pop scalar multiple
0443 template<typename Scalar, typename NestedXpr, typename Plain>
0444 struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> >
0445  : blas_traits<NestedXpr>
0446 {
0447   enum {
0448     HasScalarFactor = true
0449   };
0450   typedef blas_traits<NestedXpr> Base;
0451   typedef CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> XprType;
0452   typedef typename Base::ExtractType ExtractType;
0453   static inline EIGEN_DEVICE_FUNC ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); }
0454   static inline EIGEN_DEVICE_FUNC Scalar extractScalarFactor(const XprType& x)
0455   { return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); }
0456 };
0457 template<typename Scalar, typename NestedXpr, typename Plain>
0458 struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > >
0459  : blas_traits<NestedXpr>
0460 {
0461   enum {
0462     HasScalarFactor = true
0463   };
0464   typedef blas_traits<NestedXpr> Base;
0465   typedef CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > XprType;
0466   typedef typename Base::ExtractType ExtractType;
0467   static inline ExtractType extract(const XprType& x) { return Base::extract(x.lhs()); }
0468   static inline Scalar extractScalarFactor(const XprType& x)
0469   { return Base::extractScalarFactor(x.lhs()) * x.rhs().functor().m_other; }
0470 };
0471 template<typename Scalar, typename Plain1, typename Plain2>
0472 struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1>,
0473                                                             const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain2> > >
0474  : blas_traits<CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1> >
0475 {};
0476 
0477 // pop opposite
0478 template<typename Scalar, typename NestedXpr>
0479 struct blas_traits<CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> >
0480  : blas_traits<NestedXpr>
0481 {
0482   enum {
0483     HasScalarFactor = true
0484   };
0485   typedef blas_traits<NestedXpr> Base;
0486   typedef CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> XprType;
0487   typedef typename Base::ExtractType ExtractType;
0488   static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
0489   static inline Scalar extractScalarFactor(const XprType& x)
0490   { return - Base::extractScalarFactor(x.nestedExpression()); }
0491 };
0492 
0493 // pop/push transpose
0494 template<typename NestedXpr>
0495 struct blas_traits<Transpose<NestedXpr> >
0496  : blas_traits<NestedXpr>
0497 {
0498   typedef typename NestedXpr::Scalar Scalar;
0499   typedef blas_traits<NestedXpr> Base;
0500   typedef Transpose<NestedXpr> XprType;
0501   typedef Transpose<const typename Base::_ExtractType>  ExtractType; // const to get rid of a compile error; anyway blas traits are only used on the RHS
0502   typedef Transpose<const typename Base::_ExtractType> _ExtractType;
0503   typedef typename conditional<bool(Base::HasUsableDirectAccess),
0504     ExtractType,
0505     typename ExtractType::PlainObject
0506     >::type DirectLinearAccessType;
0507   enum {
0508     IsTransposed = Base::IsTransposed ? 0 : 1
0509   };
0510   static inline ExtractType extract(const XprType& x) { return ExtractType(Base::extract(x.nestedExpression())); }
0511   static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); }
0512 };
0513 
0514 template<typename T>
0515 struct blas_traits<const T>
0516      : blas_traits<T>
0517 {};
0518 
0519 template<typename T, bool HasUsableDirectAccess=blas_traits<T>::HasUsableDirectAccess>
0520 struct extract_data_selector {
0521   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static const typename T::Scalar* run(const T& m)
0522   {
0523     return blas_traits<T>::extract(m).data();
0524   }
0525 };
0526 
0527 template<typename T>
0528 struct extract_data_selector<T,false> {
0529   static typename T::Scalar* run(const T&) { return 0; }
0530 };
0531 
0532 template<typename T>
0533 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const typename T::Scalar* extract_data(const T& m)
0534 {
0535   return extract_data_selector<T>::run(m);
0536 }
0537 
0538 /**
0539  * \c combine_scalar_factors extracts and multiplies factors from GEMM and GEMV products.
0540  * There is a specialization for booleans
0541  */
0542 template<typename ResScalar, typename Lhs, typename Rhs>
0543 struct combine_scalar_factors_impl
0544 {
0545   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static ResScalar run(const Lhs& lhs, const Rhs& rhs)
0546   {
0547     return blas_traits<Lhs>::extractScalarFactor(lhs) * blas_traits<Rhs>::extractScalarFactor(rhs);
0548   }
0549   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static ResScalar run(const ResScalar& alpha, const Lhs& lhs, const Rhs& rhs)
0550   {
0551     return alpha * blas_traits<Lhs>::extractScalarFactor(lhs) * blas_traits<Rhs>::extractScalarFactor(rhs);
0552   }
0553 };
0554 template<typename Lhs, typename Rhs>
0555 struct combine_scalar_factors_impl<bool, Lhs, Rhs>
0556 {
0557   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(const Lhs& lhs, const Rhs& rhs)
0558   {
0559     return blas_traits<Lhs>::extractScalarFactor(lhs) && blas_traits<Rhs>::extractScalarFactor(rhs);
0560   }
0561   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static bool run(const bool& alpha, const Lhs& lhs, const Rhs& rhs)
0562   {
0563     return alpha && blas_traits<Lhs>::extractScalarFactor(lhs) && blas_traits<Rhs>::extractScalarFactor(rhs);
0564   }
0565 };
0566 
0567 template<typename ResScalar, typename Lhs, typename Rhs>
0568 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ResScalar combine_scalar_factors(const ResScalar& alpha, const Lhs& lhs, const Rhs& rhs)
0569 {
0570   return combine_scalar_factors_impl<ResScalar,Lhs,Rhs>::run(alpha, lhs, rhs);
0571 }
0572 template<typename ResScalar, typename Lhs, typename Rhs>
0573 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE ResScalar combine_scalar_factors(const Lhs& lhs, const Rhs& rhs)
0574 {
0575   return combine_scalar_factors_impl<ResScalar,Lhs,Rhs>::run(lhs, rhs);
0576 }
0577 
0578 
0579 } // end namespace internal
0580 
0581 } // end namespace Eigen
0582 
0583 #endif // EIGEN_BLASUTIL_H