src/Tensor/TensorMorphing.h

0001 // This file is part of Eigen, a lightweight C++ template library
0002 // for linear algebra.
0003 //
0004 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
0005 //
0006 // This Source Code Form is subject to the terms of the Mozilla
0007 // Public License v. 2.0. If a copy of the MPL was not distributed
0008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
0009
0010 #ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
0011 #define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
0012
0013 namespace Eigen {
0014
0015 /** \class TensorReshaping
0016   * \ingroup CXX11_Tensor_Module
0017   *
0018   * \brief Tensor reshaping class.
0019   *
0020   *
0021   */
0022 namespace internal {
0023 template<typename NewDimensions, typename XprType>
0024 struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprType>
0025 {
0026   typedef typename XprType::Scalar Scalar;
0027   typedef traits<XprType> XprTraits;
0028   typedef typename XprTraits::StorageKind StorageKind;
0029   typedef typename XprTraits::Index Index;
0030   typedef typename XprType::Nested Nested;
0031   typedef typename remove_reference<Nested>::type _Nested;
0032   static const int NumDimensions = array_size<NewDimensions>::value;
0033   static const int Layout = XprTraits::Layout;
0034   typedef typename XprTraits::PointerType PointerType;
0035 };
0036
0037 template<typename NewDimensions, typename XprType>
0038 struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense>
0039 {
0040   typedef const TensorReshapingOp<NewDimensions, XprType>EIGEN_DEVICE_REF type;
0041 };
0042
0043 template<typename NewDimensions, typename XprType>
0044 struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type>
0045 {
0046   typedef TensorReshapingOp<NewDimensions, XprType> type;
0047 };
0048
0049 }  // end namespace internal
0050
0051
0052
0053 template<typename NewDimensions, typename XprType>
0054 class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors>
0055 {
0056   public:
0057   typedef TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors> Base;
0058   typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
0059   typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
0060   typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
0061   typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
0062   typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index;
0063
0064   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims)
0065       : m_xpr(expr), m_dims(dims) {}
0066
0067     EIGEN_DEVICE_FUNC
0068     const NewDimensions& dimensions() const { return m_dims; }
0069
0070     EIGEN_DEVICE_FUNC
0071     const typename internal::remove_all<typename XprType::Nested>::type&
0072     expression() const { return m_xpr; }
0073
0074     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorReshapingOp)
0075
0076   protected:
0077     typename XprType::Nested m_xpr;
0078     const NewDimensions m_dims;
0079 };
0080
0081
0082 // Eval as rvalue
0083 template<typename NewDimensions, typename ArgType, typename Device>
0084 struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
0085 {
0086   typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
0087   typedef NewDimensions Dimensions;
0088
0089   typedef typename XprType::Index Index;
0090   typedef typename XprType::Scalar Scalar;
0091   typedef typename XprType::CoeffReturnType CoeffReturnType;
0092   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
0093   typedef StorageMemory<CoeffReturnType, Device> Storage;
0094   typedef typename Storage::Type EvaluatorPointerType;
0095   typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
0096
0097   static const int NumOutputDims = internal::array_size<Dimensions>::value;
0098   static const int NumInputDims  = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
0099
0100   enum ReshapingKind {
0101     // We do not use layout information to determine reshaping kind.
0102     // Depending on the layout `N` can be inner or outer dimension.
0103     OneByN = 0,  // expr.reshape(1, N)
0104     NByOne = 1,  // expr.reshape(N, 1)
0105     Runtime = 2  // Reshape dimensions are dynamic (specified at runtime).
0106   };
0107
0108   // clang-format off
0109   static const ReshapingKind kind =
0110 #if defined(EIGEN_HAS_INDEX_LIST)
0111         (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(/*index=*/0, /*value=*/1)) ? OneByN
0112       : (NumOutputDims == 2 && internal::index_statically_eq<NewDimensions>(/*index=*/1, /*value=*/1)) ? NByOne
0113       : Runtime;
0114 #else
0115         Runtime;
0116 #endif
0117   // clang-format on
0118
0119   enum {
0120     IsAligned         = TensorEvaluator<ArgType, Device>::IsAligned,
0121     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
0122     // For trivial reshapes with raw access to underlying data we will provide
0123     // zero overhead block access.
0124     // TODO(ezhulenev): Consider adding block access without raw access?
0125     BlockAccess       = TensorEvaluator<ArgType, Device>::RawAccess &&
0126                         NumInputDims > 0 && NumOutputDims > 0,
0127     PreferBlockAccess = false,
0128     Layout            = TensorEvaluator<ArgType, Device>::Layout,
0129     CoordAccess       = false,  // to be implemented
0130     RawAccess         = TensorEvaluator<ArgType, Device>::RawAccess
0131   };
0132
0133   typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
0134
0135   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
0136   typedef internal::TensorBlockDescriptor<NumOutputDims, Index> TensorBlockDesc;
0137   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
0138
0139   typedef
0140       typename internal::TensorMaterializedBlock<ScalarNoConst, NumOutputDims,
0141                                                  Layout, Index>
0142           TensorBlock;
0143   //===--------------------------------------------------------------------===//
0144
0145   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
0146       : m_impl(op.expression(), device), m_dimensions(op.dimensions())
0147   {
0148     // The total size of the reshaped tensor must be equal to the total size
0149     // of the input tensor.
0150     eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions()));
0151   }
0152
0153   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
0154
0155 #ifdef EIGEN_USE_THREADS
0156   template <typename EvalSubExprsCallback>
0157   EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
0158       EvaluatorPointerType data, EvalSubExprsCallback done) {
0159     m_impl.evalSubExprsIfNeededAsync(data, std::move(done));
0160   }
0161 #endif
0162
0163   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
0164     return m_impl.evalSubExprsIfNeeded(data);
0165   }
0166   EIGEN_STRONG_INLINE void cleanup() {
0167     m_impl.cleanup();
0168   }
0169
0170   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
0171   {
0172     return m_impl.coeff(index);
0173   }
0174
0175   template<int LoadMode>
0176   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
0177   {
0178     return m_impl.template packet<LoadMode>(index);
0179   }
0180
0181   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
0182     return m_impl.costPerCoeff(vectorized);
0183   }
0184
0185   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0186   internal::TensorBlockResourceRequirements getResourceRequirements() const {
0187     return internal::TensorBlockResourceRequirements::any();
0188   }
0189
0190   // required in block(OutputTensorBlock* output_block) const
0191   // For C++03 compatibility this must be defined outside the method
0192   struct BlockIteratorState {
0193     Index stride;
0194     Index span;
0195     Index size;
0196     Index count;
0197   };
0198
0199   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
0200   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
0201           bool /*root_of_expr_ast*/ = false) const {
0202     eigen_assert(m_impl.data() != NULL);
0203     eigen_assert((kind == Runtime) ||
0204                  (kind == OneByN && desc.dimensions()[0] == 1) ||
0205                  (kind == NByOne && desc.dimensions()[1] == 1));
0206
0207     if (kind == OneByN || kind == NByOne) {
0208       // We can guarantee at compile time that block is just a contiguous slice
0209       // of the underlying expression memory buffer.
0210       return TensorBlock(internal::TensorBlockKind::kView,
0211                            m_impl.data() + desc.offset(), desc.dimensions());
0212     } else {
0213       // This will do additional runtime checks, and in the end it might be also
0214       // a view, or it might be a block materialized in the temporary buffer.
0215       return TensorBlock::materialize(m_impl.data(), m_dimensions, desc,
0216                                         scratch);
0217     }
0218   }
0219
0220   EIGEN_DEVICE_FUNC typename Storage::Type data() const {
0221     return constCast(m_impl.data());
0222   }
0223
0224   EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
0225
0226   #ifdef EIGEN_USE_SYCL
0227   // binding placeholder accessors to a command group handler for SYCL
0228   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
0229     m_impl.bind(cgh);
0230   }
0231   #endif
0232  protected:
0233   TensorEvaluator<ArgType, Device> m_impl;
0234   NewDimensions m_dimensions;
0235 };
0236
0237
0238 // Eval as lvalue
0239 template<typename NewDimensions, typename ArgType, typename Device>
0240   struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device>
0241   : public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
0242
0243 {
0244   typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base;
0245   typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
0246   typedef NewDimensions Dimensions;
0247
0248   enum {
0249     IsAligned         = TensorEvaluator<ArgType, Device>::IsAligned,
0250     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
0251     BlockAccess       = TensorEvaluator<ArgType, Device>::RawAccess,
0252     PreferBlockAccess = false,
0253     Layout            = TensorEvaluator<ArgType, Device>::Layout,
0254     CoordAccess       = false,  // to be implemented
0255     RawAccess         = TensorEvaluator<ArgType, Device>::RawAccess
0256   };
0257
0258   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
0259     : Base(op, device)
0260   { }
0261
0262   typedef typename XprType::Index Index;
0263   typedef typename XprType::Scalar Scalar;
0264   typedef typename XprType::CoeffReturnType CoeffReturnType;
0265   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
0266
0267   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
0268   typedef internal::TensorBlockDescriptor<TensorEvaluator::NumOutputDims, Index>
0269       TensorBlockDesc;
0270   //===--------------------------------------------------------------------===//
0271
0272   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
0273   {
0274     return this->m_impl.coeffRef(index);
0275   }
0276
0277   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0278   void writePacket(Index index, const PacketReturnType& x)
0279   {
0280     this->m_impl.template writePacket<StoreMode>(index, x);
0281   }
0282
0283   template <typename TensorBlock>
0284   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
0285       const TensorBlockDesc& desc, const TensorBlock& block) {
0286     assert(this->m_impl.data() != NULL);
0287
0288     typedef typename TensorBlock::XprType TensorBlockExpr;
0289     typedef internal::TensorBlockAssignment<
0290         Scalar, TensorEvaluator::NumOutputDims, TensorBlockExpr, Index>
0291         TensorBlockAssign;
0292
0293     TensorBlockAssign::Run(
0294         TensorBlockAssign::target(desc.dimensions(),
0295                                   internal::strides<Layout>(this->dimensions()),
0296                                   this->m_impl.data(), desc.offset()),
0297         block.expr());
0298   }
0299 };
0300
0301
0302 /** \class TensorSlicing
0303   * \ingroup CXX11_Tensor_Module
0304   *
0305   * \brief Tensor slicing class.
0306   *
0307   *
0308   */
0309 namespace internal {
0310 template<typename StartIndices, typename Sizes, typename XprType>
0311 struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<XprType>
0312 {
0313   typedef typename XprType::Scalar Scalar;
0314   typedef traits<XprType> XprTraits;
0315   typedef typename XprTraits::StorageKind StorageKind;
0316   typedef typename XprTraits::Index Index;
0317   typedef typename XprType::Nested Nested;
0318   typedef typename remove_reference<Nested>::type _Nested;
0319   static const int NumDimensions = array_size<StartIndices>::value;
0320   static const int Layout = XprTraits::Layout;
0321   typedef typename XprTraits::PointerType PointerType;
0322 };
0323
0324 template<typename StartIndices, typename Sizes, typename XprType>
0325 struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense>
0326 {
0327   typedef const TensorSlicingOp<StartIndices, Sizes, XprType>EIGEN_DEVICE_REF type;
0328 };
0329
0330 template<typename StartIndices, typename Sizes, typename XprType>
0331 struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type>
0332 {
0333   typedef TensorSlicingOp<StartIndices, Sizes, XprType> type;
0334 };
0335
0336 }  // end namespace internal
0337
0338
0339
0340 template<typename StartIndices, typename Sizes, typename XprType>
0341 class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> >
0342 {
0343   public:
0344   typedef TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> > Base;
0345   typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar;
0346   typedef typename XprType::CoeffReturnType CoeffReturnType;
0347   typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested;
0348   typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind;
0349   typedef typename Eigen::internal::traits<TensorSlicingOp>::Index Index;
0350
0351   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes)
0352       : m_xpr(expr), m_indices(indices), m_sizes(sizes) {}
0353
0354     EIGEN_DEVICE_FUNC
0355     const StartIndices& startIndices() const { return m_indices; }
0356     EIGEN_DEVICE_FUNC
0357     const Sizes& sizes() const { return m_sizes; }
0358
0359     EIGEN_DEVICE_FUNC
0360     const typename internal::remove_all<typename XprType::Nested>::type&
0361     expression() const { return m_xpr; }
0362
0363     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorSlicingOp)
0364
0365   protected:
0366     typename XprType::Nested m_xpr;
0367     const StartIndices m_indices;
0368     const Sizes m_sizes;
0369 };
0370
0371
0372 // Fixme: figure out the exact threshold
0373 namespace {
0374 template <typename Index, typename Device, bool BlockAccess> struct MemcpyTriggerForSlicing {
0375   EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
0376   EIGEN_DEVICE_FUNC bool operator ()(Index total, Index contiguous) const {
0377     const bool prefer_block_evaluation = BlockAccess && total > 32*1024;
0378     return !prefer_block_evaluation && contiguous > threshold_;
0379   }
0380
0381  private:
0382   Index threshold_;
0383 };
0384
0385 // It is very expensive to start the memcpy kernel on GPU: we therefore only
0386 // use it for large copies.
0387 #ifdef EIGEN_USE_GPU
0388 template <typename Index, bool BlockAccess> struct MemcpyTriggerForSlicing<Index, GpuDevice, BlockAccess>  {
0389   EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) { }
0390   EIGEN_DEVICE_FUNC bool operator ()(Index, Index contiguous) const { return contiguous > 4*1024*1024; }
0391 };
0392 #endif
0393
0394 // It is very expensive to start the memcpy kernel on GPU: we therefore only
0395 // use it for large copies.
0396 #ifdef EIGEN_USE_SYCL
0397 template <typename Index, bool BlockAccess> struct MemcpyTriggerForSlicing<Index, Eigen::SyclDevice, BlockAccess>  {
0398   EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const SyclDevice&) { }
0399   EIGEN_DEVICE_FUNC bool operator ()(Index, Index contiguous) const { return contiguous > 4*1024*1024; }
0400 };
0401 #endif
0402
0403 }
0404
0405 // Eval as rvalue
0406 template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
0407 struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
0408 {
0409   typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
0410   static const int NumDims = internal::array_size<Sizes>::value;
0411
0412   typedef typename XprType::Index Index;
0413   typedef typename XprType::Scalar Scalar;
0414   typedef typename XprType::CoeffReturnType CoeffReturnType;
0415   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
0416   typedef Sizes Dimensions;
0417   typedef StorageMemory<CoeffReturnType, Device> Storage;
0418   typedef StorageMemory<typename internal::remove_const<CoeffReturnType>::type, Device> ConstCastStorage;
0419   typedef typename Storage::Type EvaluatorPointerType;
0420
0421   enum {
0422     // Alignment can't be guaranteed at compile time since it depends on the
0423     // slice offsets and sizes.
0424     IsAligned         = false,
0425     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
0426     BlockAccess       = TensorEvaluator<ArgType, Device>::BlockAccess &&
0427                         // FIXME: Temporary workaround for bug in slicing of bool tensors.
0428                         !internal::is_same<typename internal::remove_const<Scalar>::type, bool>::value,
0429     PreferBlockAccess = true,
0430     Layout            = TensorEvaluator<ArgType, Device>::Layout,
0431     CoordAccess       = false,
0432     RawAccess         = false
0433   };
0434
0435   typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
0436
0437   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
0438   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
0439   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
0440
0441   // Tensor slicing does not change the block type.
0442   typedef typename TensorEvaluator<const ArgType, Device>::TensorBlock
0443       TensorBlock;
0444   //===--------------------------------------------------------------------===//
0445
0446   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
0447       : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
0448   {
0449     m_is_identity = true;
0450     for (int i = 0; i < internal::array_size<Dimensions>::value; ++i) {
0451       eigen_assert(m_impl.dimensions()[i] >=
0452                    op.sizes()[i] + op.startIndices()[i]);
0453       if (m_impl.dimensions()[i] != op.sizes()[i] ||
0454           op.startIndices()[i] != 0) {
0455         m_is_identity = false;
0456       }
0457     }
0458
0459     // No strides for scalars.
0460     if (NumDims == 0) return;
0461
0462     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
0463     const Sizes& output_dims = op.sizes();
0464     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
0465       m_inputStrides[0] = 1;
0466       for (int i = 1; i < NumDims; ++i) {
0467         m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
0468       }
0469
0470      // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
0471       m_outputStrides[0] = 1;
0472       for (int i = 1; i < NumDims; ++i) {
0473         m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
0474         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
0475       }
0476     } else {
0477       m_inputStrides[NumDims-1] = 1;
0478       for (int i = NumDims - 2; i >= 0; --i) {
0479         m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
0480       }
0481
0482      // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed.
0483       m_outputStrides[NumDims-1] = 1;
0484       for (int i = NumDims - 2; i >= 0; --i) {
0485         m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
0486         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
0487       }
0488     }
0489   }
0490
0491   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
0492
0493   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
0494     m_impl.evalSubExprsIfNeeded(NULL);
0495     if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization
0496         && data && m_impl.data()) {
0497       Index contiguous_values = 1;
0498       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
0499         for (int i = 0; i < NumDims; ++i) {
0500           contiguous_values *= dimensions()[i];
0501           if (dimensions()[i] != m_impl.dimensions()[i]) {
0502             break;
0503           }
0504         }
0505       } else {
0506         for (int i = NumDims-1; i >= 0; --i) {
0507           contiguous_values *= dimensions()[i];
0508           if (dimensions()[i] != m_impl.dimensions()[i]) {
0509             break;
0510           }
0511         }
0512       }
0513       // Use memcpy if it's going to be faster than using the regular evaluation.
0514       const MemcpyTriggerForSlicing<Index, Device, BlockAccess> trigger(m_device);
0515       if (trigger(internal::array_prod(dimensions()), contiguous_values)) {
0516         EvaluatorPointerType src = (EvaluatorPointerType)m_impl.data();
0517         for (Index i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
0518           Index offset = srcCoeff(i);
0519           m_device.memcpy((void*)(m_device.get(data + i)), m_device.get(src+offset), contiguous_values * sizeof(Scalar));
0520         }
0521         return false;
0522       }
0523     }
0524     return true;
0525   }
0526
0527 #ifdef EIGEN_USE_THREADS
0528   template <typename EvalSubExprsCallback>
0529   EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
0530       EvaluatorPointerType /*data*/, EvalSubExprsCallback done) {
0531     m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
0532   }
0533 #endif  // EIGEN_USE_THREADS
0534
0535   EIGEN_STRONG_INLINE void cleanup() {
0536     m_impl.cleanup();
0537   }
0538
0539   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
0540   {
0541     if (m_is_identity) {
0542       return m_impl.coeff(index);
0543     } else {
0544       return m_impl.coeff(srcCoeff(index));
0545     }
0546   }
0547
0548   template<int LoadMode>
0549   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
0550   {
0551     const int packetSize = PacketType<CoeffReturnType, Device>::size;
0552     EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
0553     eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
0554
0555     if (m_is_identity) {
0556       return m_impl.template packet<LoadMode>(index);
0557     }
0558
0559     Index inputIndices[] = {0, 0};
0560     Index indices[] = {index, index + packetSize - 1};
0561     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
0562       EIGEN_UNROLL_LOOP
0563       for (int i = NumDims - 1; i > 0; --i) {
0564         const Index idx0 = indices[0] / m_fastOutputStrides[i];
0565         const Index idx1 = indices[1] / m_fastOutputStrides[i];
0566         inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
0567         inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
0568         indices[0] -= idx0 * m_outputStrides[i];
0569         indices[1] -= idx1 * m_outputStrides[i];
0570       }
0571       inputIndices[0] += (indices[0] + m_offsets[0]);
0572       inputIndices[1] += (indices[1] + m_offsets[0]);
0573     } else {
0574       EIGEN_UNROLL_LOOP
0575       for (int i = 0; i < NumDims - 1; ++i) {
0576         const Index idx0 = indices[0] / m_fastOutputStrides[i];
0577         const Index idx1 = indices[1] / m_fastOutputStrides[i];
0578         inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
0579         inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
0580         indices[0] -= idx0 * m_outputStrides[i];
0581         indices[1] -= idx1 * m_outputStrides[i];
0582       }
0583       inputIndices[0] += (indices[0] + m_offsets[NumDims-1]);
0584       inputIndices[1] += (indices[1] + m_offsets[NumDims-1]);
0585     }
0586     if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
0587       PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
0588       return rslt;
0589     }
0590     else {
0591       EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
0592       values[0] = m_impl.coeff(inputIndices[0]);
0593       values[packetSize-1] = m_impl.coeff(inputIndices[1]);
0594       EIGEN_UNROLL_LOOP
0595       for (int i = 1; i < packetSize-1; ++i) {
0596         values[i] = coeff(index+i);
0597       }
0598       PacketReturnType rslt = internal::pload<PacketReturnType>(values);
0599       return rslt;
0600     }
0601   }
0602
0603   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
0604     return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
0605   }
0606
0607   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0608   internal::TensorBlockResourceRequirements getResourceRequirements() const {
0609     const size_t target_size = m_device.lastLevelCacheSize();
0610     return internal::TensorBlockResourceRequirements::merge(
0611         internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
0612         m_impl.getResourceRequirements());
0613   }
0614
0615   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
0616   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
0617           bool /*root_of_expr_ast*/ = false) const {
0618     TensorBlockDesc arg_desc = desc.WithOffset(srcCoeff(desc.offset()));
0619     TensorBlock block = m_impl.block(arg_desc, scratch);
0620     if (!arg_desc.HasDestinationBuffer()) desc.DropDestinationBuffer();
0621     return block;
0622   }
0623
0624   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const {
0625     typename Storage::Type result = constCast(m_impl.data());
0626     if (result) {
0627       Index offset = 0;
0628       if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
0629         for (int i = 0; i < NumDims; ++i) {
0630           if (m_dimensions[i] != m_impl.dimensions()[i]) {
0631             offset += m_offsets[i] * m_inputStrides[i];
0632             for (int j = i+1; j < NumDims; ++j) {
0633               if (m_dimensions[j] > 1) {
0634                 return NULL;
0635               }
0636               offset += m_offsets[j] * m_inputStrides[j];
0637             }
0638             break;
0639           }
0640         }
0641       } else {
0642         for (int i = NumDims - 1; i >= 0; --i) {
0643           if (m_dimensions[i] != m_impl.dimensions()[i]) {
0644             offset += m_offsets[i] * m_inputStrides[i];
0645             for (int j = i-1; j >= 0; --j) {
0646               if (m_dimensions[j] > 1) {
0647                 return NULL;
0648               }
0649               offset += m_offsets[j] * m_inputStrides[j];
0650             }
0651             break;
0652           }
0653         }
0654       }
0655       return result + offset;
0656     }
0657     return NULL;
0658   }
0659 #ifdef EIGEN_USE_SYCL
0660   // binding placeholder accessors to a command group handler for SYCL
0661   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
0662     m_impl.bind(cgh);
0663   }
0664 #endif
0665
0666  protected:
0667   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
0668   {
0669     Index inputIndex = 0;
0670     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
0671       EIGEN_UNROLL_LOOP
0672       for (int i = NumDims - 1; i > 0; --i) {
0673         const Index idx = index / m_fastOutputStrides[i];
0674         inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
0675         index -= idx * m_outputStrides[i];
0676       }
0677       inputIndex += (index + m_offsets[0]);
0678     } else {
0679       EIGEN_UNROLL_LOOP
0680       for (int i = 0; i < NumDims - 1; ++i) {
0681         const Index idx = index / m_fastOutputStrides[i];
0682         inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
0683         index -= idx * m_outputStrides[i];
0684       }
0685       inputIndex += (index + m_offsets[NumDims-1]);
0686     }
0687     return inputIndex;
0688   }
0689
0690   array<Index, NumDims> m_outputStrides;
0691   array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
0692   array<Index, NumDims> m_inputStrides;
0693   TensorEvaluator<ArgType, Device> m_impl;
0694   const Device EIGEN_DEVICE_REF m_device;
0695   Dimensions m_dimensions;
0696   bool m_is_identity;
0697   const StartIndices m_offsets;
0698 };
0699
0700
0701 // Eval as lvalue
0702 template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
0703 struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
0704   : public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
0705 {
0706   typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base;
0707   typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
0708   static const int NumDims = internal::array_size<Sizes>::value;
0709
0710   typedef typename XprType::Index Index;
0711   typedef typename XprType::Scalar Scalar;
0712   typedef typename XprType::CoeffReturnType CoeffReturnType;
0713   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
0714   typedef Sizes Dimensions;
0715
0716   enum {
0717     IsAligned         = false,
0718     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
0719     BlockAccess       = TensorEvaluator<ArgType, Device>::BlockAccess,
0720     PreferBlockAccess = true,
0721     Layout            = TensorEvaluator<ArgType, Device>::Layout,
0722     CoordAccess       = false,
0723     RawAccess         = (NumDims == 1) & TensorEvaluator<ArgType, Device>::RawAccess
0724   };
0725
0726   typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
0727
0728   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
0729   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
0730   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
0731   //===--------------------------------------------------------------------===//
0732
0733   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
0734     : Base(op, device)
0735     { }
0736
0737   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
0738   {
0739     if (this->m_is_identity) {
0740       return this->m_impl.coeffRef(index);
0741     } else {
0742       return this->m_impl.coeffRef(this->srcCoeff(index));
0743     }
0744   }
0745
0746   template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0747   void writePacket(Index index, const PacketReturnType& x)
0748   {
0749     if (this->m_is_identity) {
0750       this->m_impl.template writePacket<StoreMode>(index, x);
0751       return;
0752     }
0753
0754     const int packetSize = PacketType<CoeffReturnType, Device>::size;
0755     Index inputIndices[] = {0, 0};
0756     Index indices[] = {index, index + packetSize - 1};
0757     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
0758       EIGEN_UNROLL_LOOP
0759       for (int i = NumDims - 1; i > 0; --i) {
0760         const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
0761         const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
0762         inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
0763         inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
0764         indices[0] -= idx0 * this->m_outputStrides[i];
0765         indices[1] -= idx1 * this->m_outputStrides[i];
0766       }
0767       inputIndices[0] += (indices[0] + this->m_offsets[0]);
0768       inputIndices[1] += (indices[1] + this->m_offsets[0]);
0769     } else {
0770       EIGEN_UNROLL_LOOP
0771       for (int i = 0; i < NumDims - 1; ++i) {
0772         const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
0773         const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
0774         inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
0775         inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
0776         indices[0] -= idx0 * this->m_outputStrides[i];
0777         indices[1] -= idx1 * this->m_outputStrides[i];
0778       }
0779       inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]);
0780       inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]);
0781     }
0782     if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
0783       this->m_impl.template writePacket<StoreMode>(inputIndices[0], x);
0784     }
0785     else {
0786       EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
0787       internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
0788       this->m_impl.coeffRef(inputIndices[0]) = values[0];
0789       this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
0790       EIGEN_UNROLL_LOOP
0791       for (int i = 1; i < packetSize-1; ++i) {
0792         this->coeffRef(index+i) = values[i];
0793       }
0794     }
0795   }
0796
0797   template<typename TensorBlock>
0798   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(
0799       const TensorBlockDesc& desc, const TensorBlock& block) {
0800     TensorBlockDesc arg_desc = desc.WithOffset(this->srcCoeff(desc.offset()));
0801     this->m_impl.writeBlock(arg_desc, block);
0802   }
0803 };
0804
0805 namespace internal {
0806 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
0807 struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > : public traits<XprType>
0808 {
0809   typedef typename XprType::Scalar Scalar;
0810   typedef traits<XprType> XprTraits;
0811   typedef typename XprTraits::StorageKind StorageKind;
0812   typedef typename XprTraits::Index Index;
0813   typedef typename XprType::Nested Nested;
0814   typedef typename remove_reference<Nested>::type _Nested;
0815   static const int NumDimensions = array_size<StartIndices>::value;
0816   static const int Layout = XprTraits::Layout;
0817   typedef typename XprTraits::PointerType PointerType;
0818 };
0819
0820 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
0821 struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense>
0822 {
0823   typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>EIGEN_DEVICE_REF type;
0824 };
0825
0826 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
0827 struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1, typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >::type>
0828 {
0829   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> type;
0830 };
0831
0832 }  // end namespace internal
0833
0834
0835 template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
0836 class TensorStridingSlicingOp : public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >
0837 {
0838   public:
0839   typedef TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > Base;
0840   typedef typename internal::traits<TensorStridingSlicingOp>::Scalar Scalar;
0841   typedef typename XprType::CoeffReturnType CoeffReturnType;
0842   typedef typename internal::nested<TensorStridingSlicingOp>::type Nested;
0843   typedef typename internal::traits<TensorStridingSlicingOp>::StorageKind StorageKind;
0844   typedef typename internal::traits<TensorStridingSlicingOp>::Index Index;
0845
0846   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp(
0847     const XprType& expr, const StartIndices& startIndices,
0848     const StopIndices& stopIndices, const Strides& strides)
0849       : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices),
0850         m_strides(strides) {}
0851
0852     EIGEN_DEVICE_FUNC
0853     const StartIndices& startIndices() const { return m_startIndices; }
0854     EIGEN_DEVICE_FUNC
0855     const StartIndices& stopIndices() const { return m_stopIndices; }
0856     EIGEN_DEVICE_FUNC
0857     const StartIndices& strides() const { return m_strides; }
0858
0859     EIGEN_DEVICE_FUNC
0860     const typename internal::remove_all<typename XprType::Nested>::type&
0861     expression() const { return m_xpr; }
0862
0863     EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(TensorStridingSlicingOp)
0864
0865   protected:
0866     typename XprType::Nested m_xpr;
0867     const StartIndices m_startIndices;
0868     const StopIndices m_stopIndices;
0869     const Strides m_strides;
0870 };
0871
0872 // Eval as rvalue
0873 template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
0874 struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
0875 {
0876   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
0877   static const int NumDims = internal::array_size<Strides>::value;
0878   typedef typename XprType::Index Index;
0879   typedef typename XprType::Scalar Scalar;
0880   typedef typename XprType::CoeffReturnType CoeffReturnType;
0881   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
0882   typedef StorageMemory<CoeffReturnType, Device> Storage;
0883   typedef typename Storage::Type EvaluatorPointerType;
0884   typedef Strides Dimensions;
0885
0886   enum {
0887     // Alignment can't be guaranteed at compile time since it depends on the
0888     // slice offsets and sizes.
0889     IsAligned = false,
0890     PacketAccess = false,
0891     BlockAccess = false,
0892     PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
0893     Layout = TensorEvaluator<ArgType, Device>::Layout,
0894     RawAccess = false
0895   };
0896
0897   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
0898   typedef internal::TensorBlockNotImplemented TensorBlock;
0899   //===--------------------------------------------------------------------===//
0900
0901   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
0902       : m_impl(op.expression(), device),
0903         m_device(device),
0904         m_strides(op.strides())
0905   {
0906     // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
0907     DSizes<Index, NumDims> startIndicesClamped, stopIndicesClamped;
0908     for (ptrdiff_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
0909       eigen_assert(m_strides[i] != 0 && "0 stride is invalid");
0910       if (m_strides[i] > 0) {
0911         startIndicesClamped[i] =
0912             clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]);
0913         stopIndicesClamped[i] =
0914             clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]);
0915       } else {
0916         /* implies m_strides[i] < 0 by assert */
0917         startIndicesClamped[i] =
0918             clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1);
0919         stopIndicesClamped[i] =
0920             clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1);
0921       }
0922       m_startIndices[i] = startIndicesClamped[i];
0923     }
0924
0925     typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
0926     const InputDimensions& input_dims = m_impl.dimensions();
0927
0928     // compute output tensor shape
0929     m_is_identity = true;
0930     for (int i = 0; i < NumDims; i++) {
0931       Index interval = stopIndicesClamped[i] - startIndicesClamped[i];
0932       if (interval == 0 || ((interval < 0) != (m_strides[i] < 0))) {
0933         m_dimensions[i] = 0;
0934       } else {
0935         m_dimensions[i] =
0936             (interval / m_strides[i]) + (interval % m_strides[i] != 0 ? 1 : 0);
0937         eigen_assert(m_dimensions[i] >= 0);
0938       }
0939       if (m_strides[i] != 1 || interval != m_impl.dimensions()[i]) {
0940         m_is_identity = false;
0941       }
0942     }
0943
0944     Strides output_dims = m_dimensions;
0945
0946     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
0947       m_inputStrides[0] = m_strides[0];
0948       m_offsets[0] = startIndicesClamped[0];
0949       Index previousDimProduct = 1;
0950       for (int i = 1; i < NumDims; ++i) {
0951         previousDimProduct *= input_dims[i-1];
0952         m_inputStrides[i] = previousDimProduct * m_strides[i];
0953         m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
0954       }
0955
0956       // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
0957       m_outputStrides[0] = 1;
0958       for (int i = 1; i < NumDims; ++i) {
0959         m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
0960         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
0961       }
0962     } else {
0963       m_inputStrides[NumDims-1] = m_strides[NumDims-1];
0964       m_offsets[NumDims-1] = startIndicesClamped[NumDims-1];
0965       Index previousDimProduct = 1;
0966       for (int i = NumDims - 2; i >= 0; --i) {
0967         previousDimProduct *= input_dims[i+1];
0968         m_inputStrides[i] = previousDimProduct * m_strides[i];
0969         m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
0970       }
0971
0972       m_outputStrides[NumDims-1] = 1;
0973       for (int i = NumDims - 2; i >= 0; --i) {
0974         m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
0975         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i] > 0 ? m_outputStrides[i] : 1);
0976       }
0977     }
0978   }
0979
0980   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
0981
0982
0983   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
0984     m_impl.evalSubExprsIfNeeded(NULL);
0985     return true;
0986   }
0987
0988   EIGEN_STRONG_INLINE void cleanup() {
0989     m_impl.cleanup();
0990   }
0991
0992   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
0993   {
0994     if (m_is_identity) {
0995       return m_impl.coeff(index);
0996     } else {
0997       return m_impl.coeff(srcCoeff(index));
0998     }
0999   }
1000
1001   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
1002     return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, m_is_identity ? 1 : NumDims);
1003   }
1004
1005   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Storage::Type data() const {
1006     return NULL;
1007   }
1008 #ifdef EIGEN_USE_SYCL
1009   // binding placeholder accessors to a command group handler for SYCL
1010   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
1011     m_impl.bind(cgh);
1012   }
1013 #endif
1014  protected:
1015   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
1016   {
1017     Index inputIndex = 0;
1018     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
1019       EIGEN_UNROLL_LOOP
1020       for (int i = NumDims - 1; i >= 0; --i) {
1021         const Index idx = index / m_fastOutputStrides[i];
1022         inputIndex += idx * m_inputStrides[i] + m_offsets[i];
1023         index -= idx * m_outputStrides[i];
1024       }
1025     } else {
1026       EIGEN_UNROLL_LOOP
1027       for (int i = 0; i < NumDims; ++i) {
1028         const Index idx = index / m_fastOutputStrides[i];
1029         inputIndex += idx * m_inputStrides[i] + m_offsets[i];
1030         index -= idx * m_outputStrides[i];
1031       }
1032     }
1033     return inputIndex;
1034   }
1035
1036   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) {
1037 #ifndef SYCL_DEVICE_ONLY
1038     return numext::maxi(min, numext::mini(max,value));
1039 #else
1040     return cl::sycl::clamp(value, min, max);
1041 #endif
1042   }
1043
1044   array<Index, NumDims> m_outputStrides;
1045   array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
1046   array<Index, NumDims> m_inputStrides;
1047   bool m_is_identity;
1048   TensorEvaluator<ArgType, Device> m_impl;
1049   const Device EIGEN_DEVICE_REF m_device;
1050   DSizes<Index, NumDims> m_startIndices; // clamped startIndices
1051   DSizes<Index, NumDims> m_dimensions;
1052   DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
1053   const Strides m_strides;
1054 };
1055
1056 // Eval as lvalue
1057 template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
1058 struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
1059   : public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
1060 {
1061   typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base;
1062   typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
1063   static const int NumDims = internal::array_size<Strides>::value;
1064
1065   enum {
1066     IsAligned = false,
1067     PacketAccess = false,
1068     BlockAccess = false,
1069     PreferBlockAccess = TensorEvaluator<ArgType, Device>::PreferBlockAccess,
1070     Layout = TensorEvaluator<ArgType, Device>::Layout,
1071     CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
1072     RawAccess = false
1073   };
1074
1075   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
1076   typedef internal::TensorBlockNotImplemented TensorBlock;
1077   //===--------------------------------------------------------------------===//
1078
1079   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
1080     : Base(op, device)
1081     { }
1082
1083   typedef typename XprType::Index Index;
1084   typedef typename XprType::Scalar Scalar;
1085   typedef typename XprType::CoeffReturnType CoeffReturnType;
1086   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
1087   typedef Strides Dimensions;
1088
1089   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
1090   {
1091     if (this->m_is_identity) {
1092       return this->m_impl.coeffRef(index);
1093     } else {
1094       return this->m_impl.coeffRef(this->srcCoeff(index));
1095     }
1096   }
1097 };
1098
1099
1100 } // end namespace Eigen
1101
1102 #endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H