src/Tensor/TensorReverse.h

0001 // This file is part of Eigen, a lightweight C++ template library
0002 // for linear algebra.
0003 //
0004 // Copyright (C) 2014 Navdeep Jaitly <ndjaitly@google.com>
0005 //                    Benoit Steiner <benoit.steiner.goog@gmail.com>
0006 //
0007 // This Source Code Form is subject to the terms of the Mozilla
0008 // Public License v. 2.0. If a copy of the MPL was not distributed
0009 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
0010
0011 #ifndef EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
0012 #define EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
0013 namespace Eigen {
0014
0015 /** \class TensorReverse
0016   * \ingroup CXX11_Tensor_Module
0017   *
0018   * \brief Tensor reverse elements class.
0019   *
0020   */
0021 namespace internal {
0022 template<typename ReverseDimensions, typename XprType>
0023 struct traits<TensorReverseOp<ReverseDimensions,
0024                               XprType> > : public traits<XprType>
0025 {
0026   typedef typename XprType::Scalar Scalar;
0027   typedef traits<XprType> XprTraits;
0028   typedef typename XprTraits::StorageKind StorageKind;
0029   typedef typename XprTraits::Index Index;
0030   typedef typename XprType::Nested Nested;
0031   typedef typename remove_reference<Nested>::type _Nested;
0032   static const int NumDimensions = XprTraits::NumDimensions;
0033   static const int Layout = XprTraits::Layout;
0034   typedef typename XprTraits::PointerType PointerType;
0035 };
0036
0037 template<typename ReverseDimensions, typename XprType>
0038 struct eval<TensorReverseOp<ReverseDimensions, XprType>, Eigen::Dense>
0039 {
0040   typedef const TensorReverseOp<ReverseDimensions, XprType>& type;
0041 };
0042
0043 template<typename ReverseDimensions, typename XprType>
0044 struct nested<TensorReverseOp<ReverseDimensions, XprType>, 1,
0045             typename eval<TensorReverseOp<ReverseDimensions, XprType> >::type>
0046 {
0047   typedef TensorReverseOp<ReverseDimensions, XprType> type;
0048 };
0049
0050 }  // end namespace internal
0051
0052 template<typename ReverseDimensions, typename XprType>
0053 class TensorReverseOp : public TensorBase<TensorReverseOp<ReverseDimensions,
0054                                           XprType>, WriteAccessors>
0055 {
0056   public:
0057     typedef TensorBase<TensorReverseOp<ReverseDimensions, XprType>, WriteAccessors>Base;
0058     typedef typename Eigen::internal::traits<TensorReverseOp>::Scalar Scalar;
0059     typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
0060     typedef typename XprType::CoeffReturnType CoeffReturnType;
0061     typedef typename Eigen::internal::nested<TensorReverseOp>::type Nested;
0062     typedef typename Eigen::internal::traits<TensorReverseOp>::StorageKind
0063                                                                       StorageKind;
0064     typedef typename Eigen::internal::traits<TensorReverseOp>::Index Index;
0065
0066     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReverseOp(
0067       const XprType& expr, const ReverseDimensions& reverse_dims)
0068       : m_xpr(expr), m_reverse_dims(reverse_dims) { }
0069
0070     EIGEN_DEVICE_FUNC
0071     const ReverseDimensions& reverse() const { return m_reverse_dims; }
0072
0073     EIGEN_DEVICE_FUNC
0074     const typename internal::remove_all<typename XprType::Nested>::type&
0075     expression() const { return m_xpr; }
0076
0077     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorReverseOp)
0078
0079
0080   protected:
0081     typename XprType::Nested m_xpr;
0082     const ReverseDimensions m_reverse_dims;
0083 };
0084
0085 // Eval as rvalue
0086 template<typename ReverseDimensions, typename ArgType, typename Device>
0087 struct TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>, Device>
0088 {
0089   typedef TensorReverseOp<ReverseDimensions, ArgType> XprType;
0090   typedef typename XprType::Index Index;
0091   static const int NumDims = internal::array_size<ReverseDimensions>::value;
0092   typedef DSizes<Index, NumDims> Dimensions;
0093   typedef typename XprType::Scalar Scalar;
0094   typedef typename XprType::CoeffReturnType CoeffReturnType;
0095   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
0096   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
0097   typedef StorageMemory<CoeffReturnType, Device> Storage;
0098   typedef typename Storage::Type EvaluatorPointerType;
0099
0100   enum {
0101     IsAligned         = false,
0102     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
0103     BlockAccess       = NumDims > 0,
0104     PreferBlockAccess = true,
0105     Layout            = TensorEvaluator<ArgType, Device>::Layout,
0106     CoordAccess       = false,  // to be implemented
0107     RawAccess         = false
0108   };
0109
0110   typedef internal::TensorIntDivisor<Index> IndexDivisor;
0111
0112   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
0113   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
0114   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
0115
0116   typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
0117       ArgTensorBlock;
0118
0119   typedef typename internal::TensorMaterializedBlock<CoeffReturnType, NumDims,
0120                                                      Layout, Index>
0121       TensorBlock;
0122   //===--------------------------------------------------------------------===//
0123
0124   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
0125       : m_impl(op.expression(), device),
0126         m_reverse(op.reverse()),
0127         m_device(device)
0128   {
0129     // Reversing a scalar isn't supported yet. It would be a no-op anyway.
0130     EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
0131
0132     // Compute strides
0133     m_dimensions = m_impl.dimensions();
0134     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
0135       m_strides[0] = 1;
0136       for (int i = 1; i < NumDims; ++i) {
0137         m_strides[i] = m_strides[i-1] * m_dimensions[i-1];
0138         if (m_strides[i] > 0) m_fastStrides[i] = IndexDivisor(m_strides[i]);
0139       }
0140     } else {
0141       m_strides[NumDims-1] = 1;
0142       for (int i = NumDims - 2; i >= 0; --i) {
0143         m_strides[i] = m_strides[i+1] * m_dimensions[i+1];
0144         if (m_strides[i] > 0) m_fastStrides[i] = IndexDivisor(m_strides[i]);
0145       }
0146     }
0147   }
0148
0149   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0150   const Dimensions& dimensions() const { return m_dimensions; }
0151
0152   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
0153     m_impl.evalSubExprsIfNeeded(NULL);
0154     return true;
0155   }
0156
0157 #ifdef EIGEN_USE_THREADS
0158   template <typename EvalSubExprsCallback>
0159   EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
0160       EvaluatorPointerType, EvalSubExprsCallback done) {
0161     m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
0162   }
0163 #endif  // EIGEN_USE_THREADS
0164
0165   EIGEN_STRONG_INLINE void cleanup() {
0166     m_impl.cleanup();
0167   }
0168
0169   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index reverseIndex(
0170       Index index) const {
0171     eigen_assert(index < dimensions().TotalSize());
0172     Index inputIndex = 0;
0173     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
0174       EIGEN_UNROLL_LOOP
0175       for (int i = NumDims - 1; i > 0; --i) {
0176         Index idx = index / m_fastStrides[i];
0177         index -= idx * m_strides[i];
0178         if (m_reverse[i]) {
0179           idx = m_dimensions[i] - idx - 1;
0180         }
0181         inputIndex += idx * m_strides[i] ;
0182       }
0183       if (m_reverse[0]) {
0184         inputIndex += (m_dimensions[0] - index - 1);
0185       } else {
0186         inputIndex += index;
0187       }
0188     } else {
0189       EIGEN_UNROLL_LOOP
0190       for (int i = 0; i < NumDims - 1; ++i) {
0191         Index idx = index / m_fastStrides[i];
0192         index -= idx * m_strides[i];
0193         if (m_reverse[i]) {
0194           idx = m_dimensions[i] - idx - 1;
0195         }
0196         inputIndex += idx * m_strides[i] ;
0197       }
0198       if (m_reverse[NumDims-1]) {
0199         inputIndex += (m_dimensions[NumDims-1] - index - 1);
0200       } else {
0201         inputIndex += index;
0202       }
0203     }
0204     return inputIndex;
0205   }
0206
0207   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(
0208       Index index) const  {
0209     return m_impl.coeff(reverseIndex(index));
0210   }
0211
0212   template<int LoadMode>
0213   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0214   PacketReturnType packet(Index index) const
0215   {
0216     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
0217     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
0218
0219     // TODO(ndjaitly): write a better packing routine that uses
0220     // local structure.
0221     EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type
0222                                                             values[PacketSize];
0223     EIGEN_UNROLL_LOOP
0224     for (int i = 0; i < PacketSize; ++i) {
0225       values[i] = coeff(index+i);
0226     }
0227     PacketReturnType rslt = internal::pload<PacketReturnType>(values);
0228     return rslt;
0229   }
0230
0231   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0232   internal::TensorBlockResourceRequirements getResourceRequirements() const {
0233     const size_t target_size = m_device.lastLevelCacheSize();
0234     // Block evaluation reads underlying memory in reverse order, and default
0235     // cost model does not properly catch this in bytes stored/loaded.
0236     return internal::TensorBlockResourceRequirements::skewed<Scalar>(
0237                target_size)
0238         .addCostPerCoeff({0, 0, 24});
0239   }
0240
0241   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
0242   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
0243           bool /*root_of_expr_ast*/ = false) const {
0244     // TODO(ezhulenev): If underlying tensor expression supports and prefers
0245     // block evaluation we must use it. Currently we use coeff and packet
0246     // access into the underlying tensor expression.
0247     // static const bool useBlockAccessForArgType =
0248     //     TensorEvaluator<ArgType, Device>::BlockAccess &&
0249     //     TensorEvaluator<ArgType, Device>::PreferBlockAccess;
0250
0251     static const bool isColMajor =
0252         static_cast<int>(Layout) == static_cast<int>(ColMajor);
0253
0254     static const Index inner_dim_idx = isColMajor ? 0 : NumDims - 1;
0255     const bool inner_dim_reversed = m_reverse[inner_dim_idx];
0256
0257     // Offset in the output block.
0258     Index block_offset = 0;
0259
0260     // Offset in the input Tensor.
0261     Index input_offset = reverseIndex(desc.offset());
0262
0263     // Initialize output block iterator state. Dimension in this array are
0264     // always in inner_most -> outer_most order (col major layout).
0265     array<BlockIteratorState, NumDims> it;
0266     for (int i = 0; i < NumDims; ++i) {
0267       const int dim = isColMajor ? i : NumDims - 1 - i;
0268       it[i].size = desc.dimension(dim);
0269       it[i].count = 0;
0270       it[i].reverse = m_reverse[dim];
0271
0272       it[i].block_stride =
0273           i == 0 ? 1 : (it[i - 1].size * it[i - 1].block_stride);
0274       it[i].block_span = it[i].block_stride * (it[i].size - 1);
0275
0276       it[i].input_stride = m_strides[dim];
0277       it[i].input_span = it[i].input_stride * (it[i].size - 1);
0278
0279       if (it[i].reverse) {
0280         it[i].input_stride = -1 * it[i].input_stride;
0281         it[i].input_span = -1 * it[i].input_span;
0282       }
0283     }
0284
0285     // If multiple inner dimensions have the same reverse flag, check if we can
0286     // merge them into a single virtual inner dimension.
0287     int effective_inner_dim = 0;
0288     for (int i = 1; i < NumDims; ++i) {
0289       if (it[i].reverse != it[effective_inner_dim].reverse) break;
0290       if (it[i].block_stride != it[effective_inner_dim].size) break;
0291       if (it[i].block_stride != numext::abs(it[i].input_stride)) break;
0292
0293       it[i].size = it[effective_inner_dim].size * it[i].size;
0294
0295       it[i].block_stride = 1;
0296       it[i].input_stride = (inner_dim_reversed ? -1 : 1);
0297
0298       it[i].block_span = it[i].block_stride * (it[i].size - 1);
0299       it[i].input_span = it[i].input_stride * (it[i].size - 1);
0300
0301       effective_inner_dim = i;
0302     }
0303
0304     eigen_assert(it[effective_inner_dim].block_stride == 1);
0305     eigen_assert(it[effective_inner_dim].input_stride ==
0306                  (inner_dim_reversed ? -1 : 1));
0307
0308     const Index inner_dim_size = it[effective_inner_dim].size;
0309
0310     // Prepare storage for the materialized reverse result.
0311     const typename TensorBlock::Storage block_storage =
0312         TensorBlock::prepareStorage(desc, scratch);
0313     CoeffReturnType* block_buffer = block_storage.data();
0314
0315     while (it[NumDims - 1].count < it[NumDims - 1].size) {
0316       // Copy inner-most dimension data from reversed location in input.
0317       Index dst = block_offset;
0318       Index src = input_offset;
0319
0320       // NOTE(ezhulenev): Adding vectorized path with internal::preverse showed
0321       // worse results in benchmarks than a simple coefficient loop.
0322       if (inner_dim_reversed) {
0323         for (Index i = 0; i < inner_dim_size; ++i) {
0324           block_buffer[dst] = m_impl.coeff(src);
0325           ++dst;
0326           --src;
0327         }
0328       } else {
0329         for (Index i = 0; i < inner_dim_size; ++i) {
0330           block_buffer[dst] = m_impl.coeff(src);
0331           ++dst;
0332           ++src;
0333         }
0334       }
0335
0336       // For the 1d tensor we need to generate only one inner-most dimension.
0337       if ((NumDims - effective_inner_dim) == 1) break;
0338
0339       // Update offset.
0340       for (Index i = effective_inner_dim + 1; i < NumDims; ++i) {
0341         if (++it[i].count < it[i].size) {
0342           block_offset += it[i].block_stride;
0343           input_offset += it[i].input_stride;
0344           break;
0345         }
0346         if (i != NumDims - 1) it[i].count = 0;
0347         block_offset -= it[i].block_span;
0348         input_offset -= it[i].input_span;
0349       }
0350     }
0351
0352     return block_storage.AsTensorMaterializedBlock();
0353   }
0354
0355   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
0356     double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() +
0357                                      2 * TensorOpCost::MulCost<Index>() +
0358                                      TensorOpCost::DivCost<Index>());
0359     for (int i = 0; i < NumDims; ++i) {
0360       if (m_reverse[i]) {
0361         compute_cost += 2 * TensorOpCost::AddCost<Index>();
0362       }
0363     }
0364     return m_impl.costPerCoeff(vectorized) +
0365            TensorOpCost(0, 0, compute_cost, false /* vectorized */, PacketSize);
0366   }
0367
0368   EIGEN_DEVICE_FUNC typename Storage::Type data() const { return NULL; }
0369
0370 #ifdef EIGEN_USE_SYCL
0371   // binding placeholder accessors to a command group handler for SYCL
0372   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
0373     m_impl.bind(cgh);
0374   }
0375 #endif
0376
0377  protected:
0378   Dimensions m_dimensions;
0379   array<Index, NumDims> m_strides;
0380   array<IndexDivisor, NumDims> m_fastStrides;
0381   TensorEvaluator<ArgType, Device> m_impl;
0382   ReverseDimensions m_reverse;
0383   const Device EIGEN_DEVICE_REF m_device;
0384
0385  private:
0386   struct BlockIteratorState {
0387     BlockIteratorState()
0388         : size(0),
0389           count(0),
0390           reverse(false),
0391           block_stride(0),
0392           block_span(0),
0393           input_stride(0),
0394           input_span(0) {}
0395
0396     Index size;
0397     Index count;
0398     bool reverse;
0399     Index block_stride;
0400     Index block_span;
0401     Index input_stride;
0402     Index input_span;
0403   };
0404 };
0405
0406 // Eval as lvalue
0407
0408 template <typename ReverseDimensions, typename ArgType, typename Device>
0409 struct TensorEvaluator<TensorReverseOp<ReverseDimensions, ArgType>, Device>
0410     : public TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>,
0411                              Device> {
0412   typedef TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>,
0413                           Device> Base;
0414   typedef TensorReverseOp<ReverseDimensions, ArgType> XprType;
0415   typedef typename XprType::Index Index;
0416   static const int NumDims = internal::array_size<ReverseDimensions>::value;
0417   typedef DSizes<Index, NumDims> Dimensions;
0418
0419   enum {
0420     IsAligned = false,
0421     PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
0422     BlockAccess = false,
0423     PreferBlockAccess = false,
0424     Layout = TensorEvaluator<ArgType, Device>::Layout,
0425     CoordAccess = false,  // to be implemented
0426     RawAccess = false
0427   };
0428   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
0429       : Base(op, device) {}
0430
0431   typedef typename XprType::Scalar Scalar;
0432   typedef typename XprType::CoeffReturnType CoeffReturnType;
0433   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
0434   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
0435
0436   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
0437   typedef internal::TensorBlockNotImplemented TensorBlock;
0438   //===--------------------------------------------------------------------===//
0439
0440   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0441   const Dimensions& dimensions() const { return this->m_dimensions; }
0442
0443   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
0444     return this->m_impl.coeffRef(this->reverseIndex(index));
0445   }
0446
0447   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0448   void writePacket(Index index, const PacketReturnType& x) {
0449     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
0450     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
0451
0452     // This code is pilfered from TensorMorphing.h
0453     EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize];
0454     internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
0455     EIGEN_UNROLL_LOOP
0456     for (int i = 0; i < PacketSize; ++i) {
0457       this->coeffRef(index+i) = values[i];
0458     }
0459   }
0460 };
0461
0462
0463 }  // end namespace Eigen
0464
0465 #endif // EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H