Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-04-19 09:06:40

0001 // This file is part of Eigen, a lightweight C++ template library
0002 // for linear algebra.
0003 //
0004 // Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
0005 //
0006 // This Source Code Form is subject to the terms of the Mozilla
0007 // Public License v. 2.0. If a copy of the MPL was not distributed
0008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
0009 
0010 #ifndef EIGEN_SPARSEDENSEPRODUCT_H
0011 #define EIGEN_SPARSEDENSEPRODUCT_H
0012 
0013 namespace RivetEigen { 
0014 
0015 namespace internal {
0016 
0017 template <> struct product_promote_storage_type<Sparse,Dense, OuterProduct> { typedef Sparse ret; };
0018 template <> struct product_promote_storage_type<Dense,Sparse, OuterProduct> { typedef Sparse ret; };
0019 
0020 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType,
0021          typename AlphaType,
0022          int LhsStorageOrder = ((SparseLhsType::Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor,
0023          bool ColPerCol = ((DenseRhsType::Flags&RowMajorBit)==0) || DenseRhsType::ColsAtCompileTime==1>
0024 struct sparse_time_dense_product_impl;
0025 
0026 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType>
0027 struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, typename DenseResType::Scalar, RowMajor, true>
0028 {
0029   typedef typename internal::remove_all<SparseLhsType>::type Lhs;
0030   typedef typename internal::remove_all<DenseRhsType>::type Rhs;
0031   typedef typename internal::remove_all<DenseResType>::type Res;
0032   typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
0033   typedef evaluator<Lhs> LhsEval;
0034   static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
0035   {
0036     LhsEval lhsEval(lhs);
0037     
0038     Index n = lhs.outerSize();
0039 #ifdef EIGEN_HAS_OPENMP
0040     RivetEigen::initParallel();
0041     Index threads = RivetEigen::nbThreads();
0042 #endif
0043     
0044     for(Index c=0; c<rhs.cols(); ++c)
0045     {
0046 #ifdef EIGEN_HAS_OPENMP
0047       // This 20000 threshold has been found experimentally on 2D and 3D Poisson problems.
0048       // It basically represents the minimal amount of work to be done to be worth it.
0049       if(threads>1 && lhsEval.nonZerosEstimate() > 20000)
0050       {
0051         #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads)
0052         for(Index i=0; i<n; ++i)
0053           processRow(lhsEval,rhs,res,alpha,i,c);
0054       }
0055       else
0056 #endif
0057       {
0058         for(Index i=0; i<n; ++i)
0059           processRow(lhsEval,rhs,res,alpha,i,c);
0060       }
0061     }
0062   }
0063   
0064   static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha, Index i, Index col)
0065   {
0066     typename Res::Scalar tmp(0);
0067     for(LhsInnerIterator it(lhsEval,i); it ;++it)
0068       tmp += it.value() * rhs.coeff(it.index(),col);
0069     res.coeffRef(i,col) += alpha * tmp;
0070   }
0071   
0072 };
0073 
0074 // FIXME: what is the purpose of the following specialization? Is it for the BlockedSparse format?
0075 // -> let's disable it for now as it is conflicting with generic scalar*matrix and matrix*scalar operators
0076 // template<typename T1, typename T2/*, int _Options, typename _StrideType*/>
0077 // struct ScalarBinaryOpTraits<T1, Ref<T2/*, _Options, _StrideType*/> >
0078 // {
0079 //   enum {
0080 //     Defined = 1
0081 //   };
0082 //   typedef typename CwiseUnaryOp<scalar_multiple2_op<T1, typename T2::Scalar>, T2>::PlainObject ReturnType;
0083 // };
0084 
0085 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType, typename AlphaType>
0086 struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, AlphaType, ColMajor, true>
0087 {
0088   typedef typename internal::remove_all<SparseLhsType>::type Lhs;
0089   typedef typename internal::remove_all<DenseRhsType>::type Rhs;
0090   typedef typename internal::remove_all<DenseResType>::type Res;
0091   typedef evaluator<Lhs> LhsEval;
0092   typedef typename LhsEval::InnerIterator LhsInnerIterator;
0093   static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)
0094   {
0095     LhsEval lhsEval(lhs);
0096     for(Index c=0; c<rhs.cols(); ++c)
0097     {
0098       for(Index j=0; j<lhs.outerSize(); ++j)
0099       {
0100 //        typename Res::Scalar rhs_j = alpha * rhs.coeff(j,c);
0101         typename ScalarBinaryOpTraits<AlphaType, typename Rhs::Scalar>::ReturnType rhs_j(alpha * rhs.coeff(j,c));
0102         for(LhsInnerIterator it(lhsEval,j); it ;++it)
0103           res.coeffRef(it.index(),c) += it.value() * rhs_j;
0104       }
0105     }
0106   }
0107 };
0108 
0109 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType>
0110 struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, typename DenseResType::Scalar, RowMajor, false>
0111 {
0112   typedef typename internal::remove_all<SparseLhsType>::type Lhs;
0113   typedef typename internal::remove_all<DenseRhsType>::type Rhs;
0114   typedef typename internal::remove_all<DenseResType>::type Res;
0115   typedef evaluator<Lhs> LhsEval;
0116   typedef typename LhsEval::InnerIterator LhsInnerIterator;
0117   static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
0118   {
0119     Index n = lhs.rows();
0120     LhsEval lhsEval(lhs);
0121 
0122 #ifdef EIGEN_HAS_OPENMP
0123     RivetEigen::initParallel();
0124     Index threads = RivetEigen::nbThreads();
0125     // This 20000 threshold has been found experimentally on 2D and 3D Poisson problems.
0126     // It basically represents the minimal amount of work to be done to be worth it.
0127     if(threads>1 && lhsEval.nonZerosEstimate()*rhs.cols() > 20000)
0128     {
0129       #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads)
0130       for(Index i=0; i<n; ++i)
0131         processRow(lhsEval,rhs,res,alpha,i);
0132     }
0133     else
0134 #endif
0135     {
0136       for(Index i=0; i<n; ++i)
0137         processRow(lhsEval, rhs, res, alpha, i);
0138     }
0139   }
0140 
0141   static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, Res& res, const typename Res::Scalar& alpha, Index i)
0142   {
0143     typename Res::RowXpr res_i(res.row(i));
0144     for(LhsInnerIterator it(lhsEval,i); it ;++it)
0145       res_i += (alpha*it.value()) * rhs.row(it.index());
0146   }
0147 };
0148 
0149 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType>
0150 struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, typename DenseResType::Scalar, ColMajor, false>
0151 {
0152   typedef typename internal::remove_all<SparseLhsType>::type Lhs;
0153   typedef typename internal::remove_all<DenseRhsType>::type Rhs;
0154   typedef typename internal::remove_all<DenseResType>::type Res;
0155   typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
0156   static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
0157   {
0158     evaluator<Lhs> lhsEval(lhs);
0159     for(Index j=0; j<lhs.outerSize(); ++j)
0160     {
0161       typename Rhs::ConstRowXpr rhs_j(rhs.row(j));
0162       for(LhsInnerIterator it(lhsEval,j); it ;++it)
0163         res.row(it.index()) += (alpha*it.value()) * rhs_j;
0164     }
0165   }
0166 };
0167 
0168 template<typename SparseLhsType, typename DenseRhsType, typename DenseResType,typename AlphaType>
0169 inline void sparse_time_dense_product(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)
0170 {
0171   sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, AlphaType>::run(lhs, rhs, res, alpha);
0172 }
0173 
0174 } // end namespace internal
0175 
0176 namespace internal {
0177 
0178 template<typename Lhs, typename Rhs, int ProductType>
0179 struct generic_product_impl<Lhs, Rhs, SparseShape, DenseShape, ProductType>
0180  : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SparseShape,DenseShape,ProductType> >
0181 {
0182   typedef typename Product<Lhs,Rhs>::Scalar Scalar;
0183   
0184   template<typename Dest>
0185   static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
0186   {
0187     typedef typename nested_eval<Lhs,((Rhs::Flags&RowMajorBit)==0) ? 1 : Rhs::ColsAtCompileTime>::type LhsNested;
0188     typedef typename nested_eval<Rhs,((Lhs::Flags&RowMajorBit)==0) ? 1 : Dynamic>::type RhsNested;
0189     LhsNested lhsNested(lhs);
0190     RhsNested rhsNested(rhs);
0191     internal::sparse_time_dense_product(lhsNested, rhsNested, dst, alpha);
0192   }
0193 };
0194 
0195 template<typename Lhs, typename Rhs, int ProductType>
0196 struct generic_product_impl<Lhs, Rhs, SparseTriangularShape, DenseShape, ProductType>
0197   : generic_product_impl<Lhs, Rhs, SparseShape, DenseShape, ProductType>
0198 {};
0199 
0200 template<typename Lhs, typename Rhs, int ProductType>
0201 struct generic_product_impl<Lhs, Rhs, DenseShape, SparseShape, ProductType>
0202   : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SparseShape,ProductType> >
0203 {
0204   typedef typename Product<Lhs,Rhs>::Scalar Scalar;
0205   
0206   template<typename Dst>
0207   static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
0208   {
0209     typedef typename nested_eval<Lhs,((Rhs::Flags&RowMajorBit)==0) ? Dynamic : 1>::type LhsNested;
0210     typedef typename nested_eval<Rhs,((Lhs::Flags&RowMajorBit)==RowMajorBit) ? 1 : Lhs::RowsAtCompileTime>::type RhsNested;
0211     LhsNested lhsNested(lhs);
0212     RhsNested rhsNested(rhs);
0213     
0214     // transpose everything
0215     Transpose<Dst> dstT(dst);
0216     internal::sparse_time_dense_product(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha);
0217   }
0218 };
0219 
0220 template<typename Lhs, typename Rhs, int ProductType>
0221 struct generic_product_impl<Lhs, Rhs, DenseShape, SparseTriangularShape, ProductType>
0222   : generic_product_impl<Lhs, Rhs, DenseShape, SparseShape, ProductType>
0223 {};
0224 
0225 template<typename LhsT, typename RhsT, bool NeedToTranspose>
0226 struct sparse_dense_outer_product_evaluator
0227 {
0228 protected:
0229   typedef typename conditional<NeedToTranspose,RhsT,LhsT>::type Lhs1;
0230   typedef typename conditional<NeedToTranspose,LhsT,RhsT>::type ActualRhs;
0231   typedef Product<LhsT,RhsT,DefaultProduct> ProdXprType;
0232   
0233   // if the actual left-hand side is a dense vector,
0234   // then build a sparse-view so that we can seamlessly iterate over it.
0235   typedef typename conditional<is_same<typename internal::traits<Lhs1>::StorageKind,Sparse>::value,
0236             Lhs1, SparseView<Lhs1> >::type ActualLhs;
0237   typedef typename conditional<is_same<typename internal::traits<Lhs1>::StorageKind,Sparse>::value,
0238             Lhs1 const&, SparseView<Lhs1> >::type LhsArg;
0239             
0240   typedef evaluator<ActualLhs> LhsEval;
0241   typedef evaluator<ActualRhs> RhsEval;
0242   typedef typename evaluator<ActualLhs>::InnerIterator LhsIterator;
0243   typedef typename ProdXprType::Scalar Scalar;
0244   
0245 public:
0246   enum {
0247     Flags = NeedToTranspose ? RowMajorBit : 0,
0248     CoeffReadCost = HugeCost
0249   };
0250   
0251   class InnerIterator : public LhsIterator
0252   {
0253   public:
0254     InnerIterator(const sparse_dense_outer_product_evaluator &xprEval, Index outer)
0255       : LhsIterator(xprEval.m_lhsXprImpl, 0),
0256         m_outer(outer),
0257         m_empty(false),
0258         m_factor(get(xprEval.m_rhsXprImpl, outer, typename internal::traits<ActualRhs>::StorageKind() ))
0259     {}
0260     
0261     EIGEN_STRONG_INLINE Index outer() const { return m_outer; }
0262     EIGEN_STRONG_INLINE Index row()   const { return NeedToTranspose ? m_outer : LhsIterator::index(); }
0263     EIGEN_STRONG_INLINE Index col()   const { return NeedToTranspose ? LhsIterator::index() : m_outer; }
0264 
0265     EIGEN_STRONG_INLINE Scalar value() const { return LhsIterator::value() * m_factor; }
0266     EIGEN_STRONG_INLINE operator bool() const { return LhsIterator::operator bool() && (!m_empty); }
0267     
0268   protected:
0269     Scalar get(const RhsEval &rhs, Index outer, Dense = Dense()) const
0270     {
0271       return rhs.coeff(outer);
0272     }
0273     
0274     Scalar get(const RhsEval &rhs, Index outer, Sparse = Sparse())
0275     {
0276       typename RhsEval::InnerIterator it(rhs, outer);
0277       if (it && it.index()==0 && it.value()!=Scalar(0))
0278         return it.value();
0279       m_empty = true;
0280       return Scalar(0);
0281     }
0282     
0283     Index m_outer;
0284     bool m_empty;
0285     Scalar m_factor;
0286   };
0287   
0288   sparse_dense_outer_product_evaluator(const Lhs1 &lhs, const ActualRhs &rhs)
0289      : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs)
0290   {
0291     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
0292   }
0293   
0294   // transpose case
0295   sparse_dense_outer_product_evaluator(const ActualRhs &rhs, const Lhs1 &lhs)
0296      : m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs)
0297   {
0298     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
0299   }
0300     
0301 protected:
0302   const LhsArg m_lhs;
0303   evaluator<ActualLhs> m_lhsXprImpl;
0304   evaluator<ActualRhs> m_rhsXprImpl;
0305 };
0306 
0307 // sparse * dense outer product
0308 template<typename Lhs, typename Rhs>
0309 struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, OuterProduct, SparseShape, DenseShape>
0310   : sparse_dense_outer_product_evaluator<Lhs,Rhs, Lhs::IsRowMajor>
0311 {
0312   typedef sparse_dense_outer_product_evaluator<Lhs,Rhs, Lhs::IsRowMajor> Base;
0313   
0314   typedef Product<Lhs, Rhs> XprType;
0315   typedef typename XprType::PlainObject PlainObject;
0316 
0317   explicit product_evaluator(const XprType& xpr)
0318     : Base(xpr.lhs(), xpr.rhs())
0319   {}
0320   
0321 };
0322 
0323 template<typename Lhs, typename Rhs>
0324 struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, OuterProduct, DenseShape, SparseShape>
0325   : sparse_dense_outer_product_evaluator<Lhs,Rhs, Rhs::IsRowMajor>
0326 {
0327   typedef sparse_dense_outer_product_evaluator<Lhs,Rhs, Rhs::IsRowMajor> Base;
0328   
0329   typedef Product<Lhs, Rhs> XprType;
0330   typedef typename XprType::PlainObject PlainObject;
0331 
0332   explicit product_evaluator(const XprType& xpr)
0333     : Base(xpr.lhs(), xpr.rhs())
0334   {}
0335   
0336 };
0337 
0338 } // end namespace internal
0339 
0340 } // end namespace RivetEigen
0341 
0342 #endif // EIGEN_SPARSEDENSEPRODUCT_H