src/Tensor/TensorAssign.h

0001 // This file is part of Eigen, a lightweight C++ template library
0002 // for linear algebra.
0003 //
0004 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
0005 //
0006 // This Source Code Form is subject to the terms of the Mozilla
0007 // Public License v. 2.0. If a copy of the MPL was not distributed
0008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
0009
0010 #ifndef EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H
0011 #define EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H
0012
0013 namespace Eigen {
0014
0015 /** \class TensorAssign
0016   * \ingroup CXX11_Tensor_Module
0017   *
0018   * \brief The tensor assignment class.
0019   *
0020   * This class is represents the assignment of the values resulting from the evaluation of
0021   * the rhs expression to the memory locations denoted by the lhs expression.
0022   */
0023 namespace internal {
0024 template<typename LhsXprType, typename RhsXprType>
0025 struct traits<TensorAssignOp<LhsXprType, RhsXprType> >
0026 {
0027   typedef typename LhsXprType::Scalar Scalar;
0028   typedef typename traits<LhsXprType>::StorageKind StorageKind;
0029   typedef typename promote_index_type<typename traits<LhsXprType>::Index,
0030                                       typename traits<RhsXprType>::Index>::type Index;
0031   typedef typename LhsXprType::Nested LhsNested;
0032   typedef typename RhsXprType::Nested RhsNested;
0033   typedef typename remove_reference<LhsNested>::type _LhsNested;
0034   typedef typename remove_reference<RhsNested>::type _RhsNested;
0035   static const std::size_t NumDimensions = internal::traits<LhsXprType>::NumDimensions;
0036   static const int Layout = internal::traits<LhsXprType>::Layout;
0037   typedef typename traits<LhsXprType>::PointerType PointerType;
0038
0039   enum {
0040     Flags = 0
0041   };
0042 };
0043
0044 template<typename LhsXprType, typename RhsXprType>
0045 struct eval<TensorAssignOp<LhsXprType, RhsXprType>, Eigen::Dense>
0046 {
0047   typedef const TensorAssignOp<LhsXprType, RhsXprType>& type;
0048 };
0049
0050 template<typename LhsXprType, typename RhsXprType>
0051 struct nested<TensorAssignOp<LhsXprType, RhsXprType>, 1, typename eval<TensorAssignOp<LhsXprType, RhsXprType> >::type>
0052 {
0053   typedef TensorAssignOp<LhsXprType, RhsXprType> type;
0054 };
0055
0056 }  // end namespace internal
0057
0058
0059
0060 template<typename LhsXprType, typename RhsXprType>
0061 class TensorAssignOp : public TensorBase<TensorAssignOp<LhsXprType, RhsXprType> >
0062 {
0063   public:
0064   typedef typename Eigen::internal::traits<TensorAssignOp>::Scalar Scalar;
0065   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
0066   typedef typename LhsXprType::CoeffReturnType CoeffReturnType;
0067   typedef typename Eigen::internal::nested<TensorAssignOp>::type Nested;
0068   typedef typename Eigen::internal::traits<TensorAssignOp>::StorageKind StorageKind;
0069   typedef typename Eigen::internal::traits<TensorAssignOp>::Index Index;
0070
0071   static const int NumDims = Eigen::internal::traits<TensorAssignOp>::NumDimensions;
0072
0073   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorAssignOp(LhsXprType& lhs, const RhsXprType& rhs)
0074       : m_lhs_xpr(lhs), m_rhs_xpr(rhs) {}
0075
0076     /** \returns the nested expressions */
0077     EIGEN_DEVICE_FUNC
0078     typename internal::remove_all<typename LhsXprType::Nested>::type&
0079     lhsExpression() const { return *((typename internal::remove_all<typename LhsXprType::Nested>::type*)&m_lhs_xpr); }
0080
0081     EIGEN_DEVICE_FUNC
0082     const typename internal::remove_all<typename RhsXprType::Nested>::type&
0083     rhsExpression() const { return m_rhs_xpr; }
0084
0085   protected:
0086     typename internal::remove_all<typename LhsXprType::Nested>::type& m_lhs_xpr;
0087     const typename internal::remove_all<typename RhsXprType::Nested>::type& m_rhs_xpr;
0088 };
0089
0090
0091 template<typename LeftArgType, typename RightArgType, typename Device>
0092 struct TensorEvaluator<const TensorAssignOp<LeftArgType, RightArgType>, Device>
0093 {
0094   typedef TensorAssignOp<LeftArgType, RightArgType> XprType;
0095   typedef typename XprType::Index Index;
0096   typedef typename XprType::Scalar Scalar;
0097   typedef typename XprType::CoeffReturnType CoeffReturnType;
0098   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
0099   typedef typename TensorEvaluator<RightArgType, Device>::Dimensions Dimensions;
0100   typedef StorageMemory<CoeffReturnType, Device> Storage;
0101   typedef typename Storage::Type EvaluatorPointerType;
0102
0103   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
0104   static const int NumDims = XprType::NumDims;
0105
0106   enum {
0107     IsAligned         = int(TensorEvaluator<LeftArgType, Device>::IsAligned) &
0108                         int(TensorEvaluator<RightArgType, Device>::IsAligned),
0109     PacketAccess      = int(TensorEvaluator<LeftArgType, Device>::PacketAccess) &
0110                         int(TensorEvaluator<RightArgType, Device>::PacketAccess),
0111     BlockAccess       = int(TensorEvaluator<LeftArgType, Device>::BlockAccess) &
0112                         int(TensorEvaluator<RightArgType, Device>::BlockAccess),
0113     PreferBlockAccess = int(TensorEvaluator<LeftArgType, Device>::PreferBlockAccess) |
0114                         int(TensorEvaluator<RightArgType, Device>::PreferBlockAccess),
0115     Layout            = TensorEvaluator<LeftArgType, Device>::Layout,
0116     RawAccess         = TensorEvaluator<LeftArgType, Device>::RawAccess
0117   };
0118
0119   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
0120   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
0121   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
0122
0123   typedef typename TensorEvaluator<const RightArgType, Device>::TensorBlock
0124       RightTensorBlock;
0125   //===--------------------------------------------------------------------===//
0126
0127   TensorEvaluator(const XprType& op, const Device& device) :
0128       m_leftImpl(op.lhsExpression(), device),
0129       m_rightImpl(op.rhsExpression(), device)
0130   {
0131     EIGEN_STATIC_ASSERT(
0132         (static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) ==
0133          static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout)),
0134         YOU_MADE_A_PROGRAMMING_MISTAKE);
0135   }
0136
0137   EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
0138   {
0139     // The dimensions of the lhs and the rhs tensors should be equal to prevent
0140     // overflows and ensure the result is fully initialized.
0141     // TODO: use left impl instead if right impl dimensions are known at compile time.
0142     return m_rightImpl.dimensions();
0143   }
0144
0145   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
0146     eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions()));
0147     m_leftImpl.evalSubExprsIfNeeded(NULL);
0148     // If the lhs provides raw access to its storage area (i.e. if m_leftImpl.data() returns a non
0149     // null value), attempt to evaluate the rhs expression in place. Returns true iff in place
0150     // evaluation isn't supported and the caller still needs to manually assign the values generated
0151     // by the rhs to the lhs.
0152     return m_rightImpl.evalSubExprsIfNeeded(m_leftImpl.data());
0153   }
0154
0155 #ifdef EIGEN_USE_THREADS
0156   template <typename EvalSubExprsCallback>
0157   EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
0158       EvaluatorPointerType, EvalSubExprsCallback done) {
0159     m_leftImpl.evalSubExprsIfNeededAsync(nullptr, [this, done](bool) {
0160       m_rightImpl.evalSubExprsIfNeededAsync(
0161           m_leftImpl.data(), [done](bool need_assign) { done(need_assign); });
0162     });
0163   }
0164 #endif  // EIGEN_USE_THREADS
0165
0166   EIGEN_STRONG_INLINE void cleanup() {
0167     m_leftImpl.cleanup();
0168     m_rightImpl.cleanup();
0169   }
0170
0171   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) {
0172     m_leftImpl.coeffRef(i) = m_rightImpl.coeff(i);
0173   }
0174   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) {
0175
0176     const int LhsStoreMode = TensorEvaluator<LeftArgType, Device>::IsAligned ? Aligned : Unaligned;
0177     const int RhsLoadMode = TensorEvaluator<RightArgType, Device>::IsAligned ? Aligned : Unaligned;
0178     m_leftImpl.template writePacket<LhsStoreMode>(i, m_rightImpl.template packet<RhsLoadMode>(i));
0179   }
0180   EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
0181   {
0182     return m_leftImpl.coeff(index);
0183   }
0184   template<int LoadMode>
0185   EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
0186   {
0187     return m_leftImpl.template packet<LoadMode>(index);
0188   }
0189
0190   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
0191   costPerCoeff(bool vectorized) const {
0192     // We assume that evalPacket or evalScalar is called to perform the
0193     // assignment and account for the cost of the write here, but reduce left
0194     // cost by one load because we are using m_leftImpl.coeffRef.
0195     TensorOpCost left = m_leftImpl.costPerCoeff(vectorized);
0196     return m_rightImpl.costPerCoeff(vectorized) +
0197            TensorOpCost(
0198                numext::maxi(0.0, left.bytes_loaded() - sizeof(CoeffReturnType)),
0199                left.bytes_stored(), left.compute_cycles()) +
0200            TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize);
0201   }
0202
0203   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0204   internal::TensorBlockResourceRequirements getResourceRequirements() const {
0205     return internal::TensorBlockResourceRequirements::merge(
0206         m_leftImpl.getResourceRequirements(),
0207         m_rightImpl.getResourceRequirements());
0208   }
0209
0210   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalBlock(
0211       TensorBlockDesc& desc, TensorBlockScratch& scratch) {
0212     if (TensorEvaluator<LeftArgType, Device>::RawAccess &&
0213         m_leftImpl.data() != NULL) {
0214       // If destination has raw data access, we pass it as a potential
0215       // destination for a block descriptor evaluation.
0216       desc.template AddDestinationBuffer<Layout>(
0217           /*dst_base=*/m_leftImpl.data() + desc.offset(),
0218           /*dst_strides=*/internal::strides<Layout>(m_leftImpl.dimensions()));
0219     }
0220
0221     RightTensorBlock block = m_rightImpl.block(desc, scratch, /*root_of_expr_ast=*/true);
0222     // If block was evaluated into a destination, there is no need to do assignment.
0223     if (block.kind() != internal::TensorBlockKind::kMaterializedInOutput) {
0224       m_leftImpl.writeBlock(desc, block);
0225     }
0226     block.cleanup();
0227   }
0228
0229 #ifdef EIGEN_USE_SYCL
0230   // binding placeholder accessors to a command group handler for SYCL
0231   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
0232     m_leftImpl.bind(cgh);
0233     m_rightImpl.bind(cgh);
0234   }
0235 #endif
0236
0237   EIGEN_DEVICE_FUNC EvaluatorPointerType data() const { return m_leftImpl.data(); }
0238
0239  private:
0240   TensorEvaluator<LeftArgType, Device> m_leftImpl;
0241   TensorEvaluator<RightArgType, Device> m_rightImpl;
0242 };
0243
0244 }
0245
0246
0247 #endif // EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H