Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:56:15

0001 // This file is part of Eigen, a lightweight C++ template library
0002 // for linear algebra.
0003 //
0004 // Copyright (C) 2011-2018 Gael Guennebaud <gael.guennebaud@inria.fr>
0005 //
0006 // This Source Code Form is subject to the terms of the Mozilla
0007 // Public License v. 2.0. If a copy of the MPL was not distributed
0008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
0009 
0010 #ifndef EIGEN_PARTIALREDUX_H
0011 #define EIGEN_PARTIALREDUX_H
0012 
0013 namespace Eigen { 
0014 
0015 namespace internal {
0016 
0017 
0018 /***************************************************************************
0019 *
0020 * This file provides evaluators for partial reductions.
0021 * There are two modes:
0022 *
0023 *  - scalar path: simply calls the respective function on the column or row.
0024 *    -> nothing special here, all the tricky part is handled by the return
0025 *       types of VectorwiseOp's members. They embed the functor calling the
0026 *       respective DenseBase's member function.
0027 *
0028 *  - vectorized path: implements a packet-wise reductions followed by
0029 *    some (optional) processing of the outcome, e.g., division by n for mean.
0030 *
0031 * For the vectorized path let's observe that the packet-size and outer-unrolling
0032 * are both decided by the assignement logic. So all we have to do is to decide
0033 * on the inner unrolling.
0034 *
0035 * For the unrolling, we can reuse "internal::redux_vec_unroller" from Redux.h,
0036 * but be need to be careful to specify correct increment.
0037 *
0038 ***************************************************************************/
0039 
0040 
0041 /* logic deciding a strategy for unrolling of vectorized paths */
0042 template<typename Func, typename Evaluator>
0043 struct packetwise_redux_traits
0044 {
0045   enum {
0046     OuterSize = int(Evaluator::IsRowMajor) ? Evaluator::RowsAtCompileTime : Evaluator::ColsAtCompileTime,
0047     Cost = OuterSize == Dynamic ? HugeCost
0048          : OuterSize * Evaluator::CoeffReadCost + (OuterSize-1) * functor_traits<Func>::Cost,
0049     Unrolling = Cost <= EIGEN_UNROLLING_LIMIT ? CompleteUnrolling : NoUnrolling
0050   };
0051 
0052 };
0053 
0054 /* Value to be returned when size==0 , by default let's return 0 */
0055 template<typename PacketType,typename Func>
0056 EIGEN_DEVICE_FUNC
0057 PacketType packetwise_redux_empty_value(const Func& ) { return pset1<PacketType>(0); }
0058 
0059 /* For products the default is 1 */
0060 template<typename PacketType,typename Scalar>
0061 EIGEN_DEVICE_FUNC
0062 PacketType packetwise_redux_empty_value(const scalar_product_op<Scalar,Scalar>& ) { return pset1<PacketType>(1); }
0063 
0064 /* Perform the actual reduction */
0065 template<typename Func, typename Evaluator,
0066          int Unrolling = packetwise_redux_traits<Func, Evaluator>::Unrolling
0067 >
0068 struct packetwise_redux_impl;
0069 
0070 /* Perform the actual reduction with unrolling */
0071 template<typename Func, typename Evaluator>
0072 struct packetwise_redux_impl<Func, Evaluator, CompleteUnrolling>
0073 {
0074   typedef redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime> Base;
0075   typedef typename Evaluator::Scalar Scalar;
0076 
0077   template<typename PacketType>
0078   EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
0079   PacketType run(const Evaluator &eval, const Func& func, Index /*size*/)
0080   {
0081     return redux_vec_unroller<Func, Evaluator, 0, packetwise_redux_traits<Func, Evaluator>::OuterSize>::template run<PacketType>(eval,func);
0082   }
0083 };
0084 
0085 /* Add a specialization of redux_vec_unroller for size==0 at compiletime.
0086  * This specialization is not required for general reductions, which is
0087  * why it is defined here.
0088  */
0089 template<typename Func, typename Evaluator, int Start>
0090 struct redux_vec_unroller<Func, Evaluator, Start, 0>
0091 {
0092   template<typename PacketType>
0093   EIGEN_DEVICE_FUNC
0094   static EIGEN_STRONG_INLINE PacketType run(const Evaluator &, const Func& f)
0095   {
0096     return packetwise_redux_empty_value<PacketType>(f);
0097   }
0098 };
0099 
0100 /* Perform the actual reduction for dynamic sizes */
0101 template<typename Func, typename Evaluator>
0102 struct packetwise_redux_impl<Func, Evaluator, NoUnrolling>
0103 {
0104   typedef typename Evaluator::Scalar Scalar;
0105   typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
0106 
0107   template<typename PacketType>
0108   EIGEN_DEVICE_FUNC
0109   static PacketType run(const Evaluator &eval, const Func& func, Index size)
0110   {
0111     if(size==0)
0112       return packetwise_redux_empty_value<PacketType>(func);
0113     
0114     const Index size4 = (size-1)&(~3);
0115     PacketType p = eval.template packetByOuterInner<Unaligned,PacketType>(0,0);
0116     Index i = 1;
0117     // This loop is optimized for instruction pipelining:
0118     // - each iteration generates two independent instructions
0119     // - thanks to branch prediction and out-of-order execution we have independent instructions across loops
0120     for(; i<size4; i+=4)
0121       p = func.packetOp(p,
0122             func.packetOp(
0123               func.packetOp(eval.template packetByOuterInner<Unaligned,PacketType>(i+0,0),eval.template packetByOuterInner<Unaligned,PacketType>(i+1,0)),
0124               func.packetOp(eval.template packetByOuterInner<Unaligned,PacketType>(i+2,0),eval.template packetByOuterInner<Unaligned,PacketType>(i+3,0))));
0125     for(; i<size; ++i)
0126       p = func.packetOp(p, eval.template packetByOuterInner<Unaligned,PacketType>(i,0));
0127     return p;
0128   }
0129 };
0130 
0131 template< typename ArgType, typename MemberOp, int Direction>
0132 struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
0133   : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> >
0134 {
0135   typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;
0136   typedef typename internal::nested_eval<ArgType,1>::type ArgTypeNested;
0137   typedef typename internal::add_const_on_value_type<ArgTypeNested>::type ConstArgTypeNested;
0138   typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
0139   typedef typename ArgType::Scalar InputScalar;
0140   typedef typename XprType::Scalar Scalar;
0141   enum {
0142     TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) :  int(ArgType::ColsAtCompileTime)
0143   };
0144   typedef typename MemberOp::template Cost<int(TraversalSize)> CostOpType;
0145   enum {
0146     CoeffReadCost = TraversalSize==Dynamic ? HugeCost
0147                   : TraversalSize==0 ? 1
0148                   : int(TraversalSize) * int(evaluator<ArgType>::CoeffReadCost) + int(CostOpType::value),
0149     
0150     _ArgFlags = evaluator<ArgType>::Flags,
0151 
0152     _Vectorizable =  bool(int(_ArgFlags)&PacketAccessBit)
0153                   && bool(MemberOp::Vectorizable)
0154                   && (Direction==int(Vertical) ? bool(_ArgFlags&RowMajorBit) : (_ArgFlags&RowMajorBit)==0)
0155                   && (TraversalSize!=0),
0156                   
0157     Flags = (traits<XprType>::Flags&RowMajorBit)
0158           | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit)))
0159           | (_Vectorizable ? PacketAccessBit : 0)
0160           | LinearAccessBit,
0161     
0162     Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized
0163   };
0164 
0165   EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr)
0166     : m_arg(xpr.nestedExpression()), m_functor(xpr.functor())
0167   {
0168     EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : (TraversalSize==0 ? 1 : int(CostOpType::value)));
0169     EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
0170   }
0171 
0172   typedef typename XprType::CoeffReturnType CoeffReturnType;
0173 
0174   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0175   const Scalar coeff(Index i, Index j) const
0176   {
0177     return coeff(Direction==Vertical ? j : i);
0178   }
0179 
0180   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0181   const Scalar coeff(Index index) const
0182   {
0183     return m_functor(m_arg.template subVector<DirectionType(Direction)>(index));
0184   }
0185 
0186   template<int LoadMode,typename PacketType>
0187   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0188   PacketType packet(Index i, Index j) const
0189   {
0190     return packet<LoadMode,PacketType>(Direction==Vertical ? j : i);
0191   }
0192   
0193   template<int LoadMode,typename PacketType>
0194   EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
0195   PacketType packet(Index idx) const
0196   {
0197     enum { PacketSize = internal::unpacket_traits<PacketType>::size };
0198     typedef Block<const ArgTypeNestedCleaned,
0199                   Direction==Vertical ? int(ArgType::RowsAtCompileTime) : int(PacketSize),
0200                   Direction==Vertical ? int(PacketSize) : int(ArgType::ColsAtCompileTime),
0201                   true /* InnerPanel */> PanelType;
0202     
0203     PanelType panel(m_arg,
0204                     Direction==Vertical ? 0 : idx,
0205                     Direction==Vertical ? idx : 0,
0206                     Direction==Vertical ? m_arg.rows() : Index(PacketSize),
0207                     Direction==Vertical ? Index(PacketSize) : m_arg.cols());
0208 
0209     // FIXME
0210     // See bug 1612, currently if PacketSize==1 (i.e. complex<double> with 128bits registers) then the storage-order of panel get reversed
0211     // and methods like packetByOuterInner do not make sense anymore in this context.
0212     // So let's just by pass "vectorization" in this case:
0213     if(PacketSize==1)
0214       return internal::pset1<PacketType>(coeff(idx));
0215     
0216     typedef typename internal::redux_evaluator<PanelType> PanelEvaluator;
0217     PanelEvaluator panel_eval(panel);
0218     typedef typename MemberOp::BinaryOp BinaryOp;
0219     PacketType p = internal::packetwise_redux_impl<BinaryOp,PanelEvaluator>::template run<PacketType>(panel_eval,m_functor.binaryFunc(),m_arg.outerSize());
0220     return p;
0221   }
0222 
0223 protected:
0224   ConstArgTypeNested m_arg;
0225   const MemberOp m_functor;
0226 };
0227 
0228 } // end namespace internal
0229 
0230 } // end namespace Eigen
0231 
0232 #endif // EIGEN_PARTIALREDUX_H