src/Tensor/TensorPadding.h

0001 // This file is part of Eigen, a lightweight C++ template library
0002 // for linear algebra.
0003 //
0004 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
0005 //
0006 // This Source Code Form is subject to the terms of the Mozilla
0007 // Public License v. 2.0. If a copy of the MPL was not distributed
0008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
0009
0010 #ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
0011 #define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
0012
0013 namespace Eigen {
0014
0015 /** \class TensorPadding
0016   * \ingroup CXX11_Tensor_Module
0017   *
0018   * \brief Tensor padding class.
0019   * At the moment only padding with a constant value is supported.
0020   *
0021   */
0022 namespace internal {
0023 template<typename PaddingDimensions, typename XprType>
0024 struct traits<TensorPaddingOp<PaddingDimensions, XprType> > : public traits<XprType>
0025 {
0026   typedef typename XprType::Scalar Scalar;
0027   typedef traits<XprType> XprTraits;
0028   typedef typename XprTraits::StorageKind StorageKind;
0029   typedef typename XprTraits::Index Index;
0030   typedef typename XprType::Nested Nested;
0031   typedef typename remove_reference<Nested>::type _Nested;
0032   static const int NumDimensions = XprTraits::NumDimensions;
0033   static const int Layout = XprTraits::Layout;
0034   typedef typename XprTraits::PointerType PointerType;
0035 };
0036
0037 template<typename PaddingDimensions, typename XprType>
0038 struct eval<TensorPaddingOp<PaddingDimensions, XprType>, Eigen::Dense>
0039 {
0040   typedef const TensorPaddingOp<PaddingDimensions, XprType>& type;
0041 };
0042
0043 template<typename PaddingDimensions, typename XprType>
0044 struct nested<TensorPaddingOp<PaddingDimensions, XprType>, 1, typename eval<TensorPaddingOp<PaddingDimensions, XprType> >::type>
0045 {
0046   typedef TensorPaddingOp<PaddingDimensions, XprType> type;
0047 };
0048
0049 }  // end namespace internal
0050
0051
0052
0053 template<typename PaddingDimensions, typename XprType>
0054 class TensorPaddingOp : public TensorBase<TensorPaddingOp<PaddingDimensions, XprType>, ReadOnlyAccessors>
0055 {
0056   public:
0057   typedef typename Eigen::internal::traits<TensorPaddingOp>::Scalar Scalar;
0058   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
0059   typedef typename XprType::CoeffReturnType CoeffReturnType;
0060   typedef typename Eigen::internal::nested<TensorPaddingOp>::type Nested;
0061   typedef typename Eigen::internal::traits<TensorPaddingOp>::StorageKind StorageKind;
0062   typedef typename Eigen::internal::traits<TensorPaddingOp>::Index Index;
0063
0064   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPaddingOp(const XprType& expr, const PaddingDimensions& padding_dims, const Scalar padding_value)
0065       : m_xpr(expr), m_padding_dims(padding_dims), m_padding_value(padding_value) {}
0066
0067     EIGEN_DEVICE_FUNC
0068     const PaddingDimensions& padding() const { return m_padding_dims; }
0069     EIGEN_DEVICE_FUNC
0070     Scalar padding_value() const { return m_padding_value; }
0071
0072     EIGEN_DEVICE_FUNC
0073     const typename internal::remove_all<typename XprType::Nested>::type&
0074     expression() const { return m_xpr; }
0075
0076   protected:
0077     typename XprType::Nested m_xpr;
0078     const PaddingDimensions m_padding_dims;
0079     const Scalar m_padding_value;
0080 };
0081
0082
0083 // Eval as rvalue
0084 template<typename PaddingDimensions, typename ArgType, typename Device>
0085 struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device>
0086 {
0087   typedef TensorPaddingOp<PaddingDimensions, ArgType> XprType;
0088   typedef typename XprType::Index Index;
0089   static const int NumDims = internal::array_size<PaddingDimensions>::value;
0090   typedef DSizes<Index, NumDims> Dimensions;
0091   typedef typename XprType::Scalar Scalar;
0092   typedef typename XprType::CoeffReturnType CoeffReturnType;
0093   typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
0094   static const int PacketSize = PacketType<CoeffReturnType, Device>::size;
0095   typedef StorageMemory<CoeffReturnType, Device> Storage;
0096   typedef typename Storage::Type EvaluatorPointerType;
0097
0098   enum {
0099     IsAligned         = true,
0100     PacketAccess      = TensorEvaluator<ArgType, Device>::PacketAccess,
0101     BlockAccess       = TensorEvaluator<ArgType, Device>::RawAccess,
0102     PreferBlockAccess = true,
0103     Layout            = TensorEvaluator<ArgType, Device>::Layout,
0104     CoordAccess       = true,
0105     RawAccess         = false
0106   };
0107
0108   typedef typename internal::remove_const<Scalar>::type ScalarNoConst;
0109
0110   //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
0111   typedef internal::TensorBlockDescriptor<NumDims, Index> TensorBlockDesc;
0112   typedef internal::TensorBlockScratchAllocator<Device> TensorBlockScratch;
0113
0114   typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
0115                                                      Layout, Index>
0116       TensorBlock;
0117   //===--------------------------------------------------------------------===//
0118
0119   EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
0120       : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value()), m_device(device)
0121   {
0122     // The padding op doesn't change the rank of the tensor. Directly padding a scalar would lead
0123     // to a vector, which doesn't make sense. Instead one should reshape the scalar into a vector
0124     // of 1 element first and then pad.
0125     EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
0126
0127     // Compute dimensions
0128     m_dimensions = m_impl.dimensions();
0129     for (int i = 0; i < NumDims; ++i) {
0130       m_dimensions[i] += m_padding[i].first + m_padding[i].second;
0131     }
0132     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
0133     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
0134       m_inputStrides[0] = 1;
0135       m_outputStrides[0] = 1;
0136       for (int i = 1; i < NumDims; ++i) {
0137         m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
0138         m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
0139       }
0140       m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1];
0141     } else {
0142       m_inputStrides[NumDims - 1] = 1;
0143       m_outputStrides[NumDims] = 1;
0144       for (int i = NumDims - 2; i >= 0; --i) {
0145         m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
0146         m_outputStrides[i+1] = m_outputStrides[i+2] * m_dimensions[i+1];
0147       }
0148       m_outputStrides[0] = m_outputStrides[1] * m_dimensions[0];
0149     }
0150   }
0151
0152   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
0153
0154   EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType) {
0155     m_impl.evalSubExprsIfNeeded(NULL);
0156     return true;
0157   }
0158
0159 #ifdef EIGEN_USE_THREADS
0160   template <typename EvalSubExprsCallback>
0161   EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
0162       EvaluatorPointerType, EvalSubExprsCallback done) {
0163     m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
0164   }
0165 #endif  // EIGEN_USE_THREADS
0166
0167   EIGEN_STRONG_INLINE void cleanup() {
0168     m_impl.cleanup();
0169   }
0170
0171   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
0172   {
0173     eigen_assert(index < dimensions().TotalSize());
0174     Index inputIndex = 0;
0175     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
0176       EIGEN_UNROLL_LOOP
0177       for (int i = NumDims - 1; i > 0; --i) {
0178         const Index idx = index / m_outputStrides[i];
0179         if (isPaddingAtIndexForDim(idx, i)) {
0180           return m_paddingValue;
0181         }
0182         inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
0183         index -= idx * m_outputStrides[i];
0184       }
0185       if (isPaddingAtIndexForDim(index, 0)) {
0186         return m_paddingValue;
0187       }
0188       inputIndex += (index - m_padding[0].first);
0189     } else {
0190       EIGEN_UNROLL_LOOP
0191       for (int i = 0; i < NumDims - 1; ++i) {
0192         const Index idx = index / m_outputStrides[i+1];
0193         if (isPaddingAtIndexForDim(idx, i)) {
0194           return m_paddingValue;
0195         }
0196         inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
0197         index -= idx * m_outputStrides[i+1];
0198       }
0199       if (isPaddingAtIndexForDim(index, NumDims-1)) {
0200         return m_paddingValue;
0201       }
0202       inputIndex += (index - m_padding[NumDims-1].first);
0203     }
0204     return m_impl.coeff(inputIndex);
0205   }
0206
0207   template<int LoadMode>
0208   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
0209   {
0210     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
0211       return packetColMajor(index);
0212     }
0213     return packetRowMajor(index);
0214   }
0215
0216   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
0217     TensorOpCost cost = m_impl.costPerCoeff(vectorized);
0218     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
0219       EIGEN_UNROLL_LOOP
0220       for (int i = 0; i < NumDims; ++i)
0221         updateCostPerDimension(cost, i, i == 0);
0222     } else {
0223       EIGEN_UNROLL_LOOP
0224       for (int i = NumDims - 1; i >= 0; --i)
0225         updateCostPerDimension(cost, i, i == NumDims - 1);
0226     }
0227     return cost;
0228   }
0229
0230   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0231   internal::TensorBlockResourceRequirements getResourceRequirements() const {
0232     const size_t target_size = m_device.lastLevelCacheSize();
0233     return internal::TensorBlockResourceRequirements::merge(
0234         internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
0235         m_impl.getResourceRequirements());
0236   }
0237
0238   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock
0239   block(TensorBlockDesc& desc, TensorBlockScratch& scratch,
0240           bool /*root_of_expr_ast*/ = false) const {
0241     // If one of the dimensions is zero, return empty block view.
0242     if (desc.size() == 0) {
0243       return TensorBlock(internal::TensorBlockKind::kView, NULL,
0244                            desc.dimensions());
0245     }
0246
0247     static const bool IsColMajor = Layout == static_cast<int>(ColMajor);
0248     const int inner_dim_idx = IsColMajor ? 0 : NumDims - 1;
0249
0250     Index offset = desc.offset();
0251
0252     // Compute offsets in the output tensor corresponding to the desc.offset().
0253     DSizes<Index, NumDims> output_offsets;
0254     for (int i = NumDims - 1; i > 0; --i) {
0255       const int dim = IsColMajor ? i : NumDims - i - 1;
0256       const int stride_dim = IsColMajor ? dim : dim + 1;
0257       output_offsets[dim] = offset / m_outputStrides[stride_dim];
0258       offset -= output_offsets[dim] * m_outputStrides[stride_dim];
0259     }
0260     output_offsets[inner_dim_idx] = offset;
0261
0262     // Offsets in the input corresponding to output offsets.
0263     DSizes<Index, NumDims> input_offsets = output_offsets;
0264     for (int i = 0; i < NumDims; ++i) {
0265       const int dim = IsColMajor ? i : NumDims - i - 1;
0266       input_offsets[dim] = input_offsets[dim] - m_padding[dim].first;
0267     }
0268
0269     // Compute offset in the input buffer (at this point it might be illegal and
0270     // point outside of the input buffer, because we don't check for negative
0271     // offsets, it will be autocorrected in the block iteration loop below).
0272     Index input_offset = 0;
0273     for (int i = 0; i < NumDims; ++i) {
0274       const int dim = IsColMajor ? i : NumDims - i - 1;
0275       input_offset += input_offsets[dim] * m_inputStrides[dim];
0276     }
0277
0278     // Destination buffer and scratch buffer both indexed from 0 and have the
0279     // same dimensions as the requested block (for destination buffer this
0280     // property is guaranteed by `desc.destination()`).
0281     Index output_offset = 0;
0282     const DSizes<Index, NumDims> output_strides =
0283         internal::strides<Layout>(desc.dimensions());
0284
0285     // NOTE(ezhulenev): We initialize bock iteration state for `NumDims - 1`
0286     // dimensions, skipping innermost dimension. In theory it should be possible
0287     // to squeeze matching innermost dimensions, however in practice that did
0288     // not show any improvements in benchmarks. Also in practice first outer
0289     // dimension usually has padding, and will prevent squeezing.
0290
0291     // Initialize output block iterator state. Dimension in this array are
0292     // always in inner_most -> outer_most order (col major layout).
0293     array<BlockIteratorState, NumDims - 1> it;
0294     for (int i = 0; i < NumDims - 1; ++i) {
0295       const int dim = IsColMajor ? i + 1 : NumDims - i - 2;
0296       it[i].count = 0;
0297       it[i].size = desc.dimension(dim);
0298
0299       it[i].input_stride = m_inputStrides[dim];
0300       it[i].input_span = it[i].input_stride * (it[i].size - 1);
0301
0302       it[i].output_stride = output_strides[dim];
0303       it[i].output_span = it[i].output_stride * (it[i].size - 1);
0304     }
0305
0306     const Index input_inner_dim_size =
0307         static_cast<Index>(m_impl.dimensions()[inner_dim_idx]);
0308
0309     // Total output size.
0310     const Index output_size = desc.size();
0311
0312     // We will fill inner dimension of this size in the output. It might be
0313     // larger than the inner dimension in the input, so we might have to pad
0314     // before/after we copy values from the input inner dimension.
0315     const Index output_inner_dim_size = desc.dimension(inner_dim_idx);
0316
0317     // How many values to fill with padding BEFORE reading from the input inner
0318     // dimension.
0319     const Index output_inner_pad_before_size =
0320         input_offsets[inner_dim_idx] < 0
0321             ? numext::mini(numext::abs(input_offsets[inner_dim_idx]),
0322                            output_inner_dim_size)
0323             : 0;
0324
0325     // How many values we can actually copy from the input inner dimension.
0326     const Index output_inner_copy_size = numext::mini(
0327         // Want to copy from input.
0328         (output_inner_dim_size - output_inner_pad_before_size),
0329         // Can copy from input.
0330         numext::maxi(input_inner_dim_size - (input_offsets[inner_dim_idx] +
0331                                              output_inner_pad_before_size),
0332                      Index(0)));
0333
0334     eigen_assert(output_inner_copy_size >= 0);
0335
0336     // How many values to fill with padding AFTER reading from the input inner
0337     // dimension.
0338     const Index output_inner_pad_after_size =
0339         (output_inner_dim_size - output_inner_copy_size -
0340          output_inner_pad_before_size);
0341
0342     // Sanity check, sum of all sizes must be equal to the output size.
0343     eigen_assert(output_inner_dim_size ==
0344                  (output_inner_pad_before_size + output_inner_copy_size +
0345                   output_inner_pad_after_size));
0346
0347     // Keep track of current coordinates and padding in the output.
0348     DSizes<Index, NumDims> output_coord = output_offsets;
0349     DSizes<Index, NumDims> output_padded;
0350     for (int i = 0; i < NumDims; ++i) {
0351       const int dim = IsColMajor ? i : NumDims - i - 1;
0352       output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
0353     }
0354
0355     typedef internal::StridedLinearBufferCopy<ScalarNoConst, Index> LinCopy;
0356
0357     // Prepare storage for the materialized padding result.
0358     const typename TensorBlock::Storage block_storage =
0359         TensorBlock::prepareStorage(desc, scratch);
0360
0361     // TODO(ezhulenev): Squeeze multiple non-padded inner dimensions into a
0362     // single logical inner dimension.
0363
0364     // When possible we squeeze writes for the innermost (only if non-padded)
0365     // dimension with the first padded dimension. This allows to reduce the
0366     // number of calls to LinCopy and better utilize vector instructions.
0367     const bool squeeze_writes =
0368         NumDims > 1 &&
0369         // inner dimension is not padded
0370         (input_inner_dim_size == m_dimensions[inner_dim_idx]) &&
0371         // and equal to the block inner dimension
0372         (input_inner_dim_size == output_inner_dim_size);
0373
0374     const int squeeze_dim = IsColMajor ? inner_dim_idx + 1 : inner_dim_idx - 1;
0375
0376     // Maximum coordinate on a squeeze dimension that we can write to.
0377     const Index squeeze_max_coord =
0378         squeeze_writes ? numext::mini(
0379                              // max non-padded element in the input
0380                              static_cast<Index>(m_dimensions[squeeze_dim] -
0381                                                 m_padding[squeeze_dim].second),
0382                              // max element in the output buffer
0383                              static_cast<Index>(output_offsets[squeeze_dim] +
0384                                                 desc.dimension(squeeze_dim)))
0385                        : static_cast<Index>(0);
0386
0387     // Iterate copying data from `m_impl.data()` to the output buffer.
0388     for (Index size = 0; size < output_size;) {
0389       // Detect if we are in the padded region (exclude innermost dimension).
0390       bool is_padded = false;
0391       for (int j = 1; j < NumDims; ++j) {
0392         const int dim = IsColMajor ? j : NumDims - j - 1;
0393         is_padded = output_padded[dim];
0394         if (is_padded) break;
0395       }
0396
0397       if (is_padded) {
0398         // Fill single innermost dimension with padding value.
0399         size += output_inner_dim_size;
0400
0401         LinCopy::template Run<LinCopy::Kind::FillLinear>(
0402             typename LinCopy::Dst(output_offset, 1, block_storage.data()),
0403             typename LinCopy::Src(0, 0, &m_paddingValue),
0404             output_inner_dim_size);
0405
0406
0407       } else if (squeeze_writes) {
0408         // Squeeze multiple reads from innermost dimensions.
0409         const Index squeeze_num = squeeze_max_coord - output_coord[squeeze_dim];
0410         size += output_inner_dim_size * squeeze_num;
0411
0412         // Copy `squeeze_num` inner dimensions from input to output.
0413         LinCopy::template Run<LinCopy::Kind::Linear>(
0414             typename LinCopy::Dst(output_offset, 1, block_storage.data()),
0415             typename LinCopy::Src(input_offset, 1, m_impl.data()),
0416             output_inner_dim_size * squeeze_num);
0417
0418         // Update iteration state for only `squeeze_num - 1` processed inner
0419         // dimensions, because we have another iteration state update at the end
0420         // of the loop that will update iteration state for the last inner
0421         // processed dimension.
0422         it[0].count += (squeeze_num - 1);
0423         input_offset += it[0].input_stride * (squeeze_num - 1);
0424         output_offset += it[0].output_stride * (squeeze_num - 1);
0425         output_coord[squeeze_dim] += (squeeze_num - 1);
0426
0427       } else {
0428         // Single read from innermost dimension.
0429         size += output_inner_dim_size;
0430
0431         {  // Fill with padding before copying from input inner dimension.
0432           const Index out = output_offset;
0433
0434           LinCopy::template Run<LinCopy::Kind::FillLinear>(
0435               typename LinCopy::Dst(out, 1, block_storage.data()),
0436               typename LinCopy::Src(0, 0, &m_paddingValue),
0437               output_inner_pad_before_size);
0438         }
0439
0440         {  // Copy data from input inner dimension.
0441           const Index out = output_offset + output_inner_pad_before_size;
0442           const Index in = input_offset + output_inner_pad_before_size;
0443
0444           eigen_assert(output_inner_copy_size == 0 || m_impl.data() != NULL);
0445
0446           LinCopy::template Run<LinCopy::Kind::Linear>(
0447               typename LinCopy::Dst(out, 1, block_storage.data()),
0448               typename LinCopy::Src(in, 1, m_impl.data()),
0449               output_inner_copy_size);
0450         }
0451
0452         {  // Fill with padding after copying from input inner dimension.
0453           const Index out = output_offset + output_inner_pad_before_size +
0454                             output_inner_copy_size;
0455
0456           LinCopy::template Run<LinCopy::Kind::FillLinear>(
0457               typename LinCopy::Dst(out, 1, block_storage.data()),
0458               typename LinCopy::Src(0, 0, &m_paddingValue),
0459               output_inner_pad_after_size);
0460         }
0461       }
0462
0463       for (int j = 0; j < NumDims - 1; ++j) {
0464         const int dim = IsColMajor ? j + 1 : NumDims - j - 2;
0465
0466         if (++it[j].count < it[j].size) {
0467           input_offset += it[j].input_stride;
0468           output_offset += it[j].output_stride;
0469           output_coord[dim] += 1;
0470           output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
0471           break;
0472         }
0473         it[j].count = 0;
0474         input_offset -= it[j].input_span;
0475         output_offset -= it[j].output_span;
0476         output_coord[dim] -= it[j].size - 1;
0477         output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
0478       }
0479     }
0480
0481     return block_storage.AsTensorMaterializedBlock();
0482   }
0483
0484   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return NULL; }
0485
0486 #ifdef EIGEN_USE_SYCL
0487   // binding placeholder accessors to a command group handler for SYCL
0488   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
0489     m_impl.bind(cgh);
0490   }
0491 #endif
0492
0493  private:
0494   struct BlockIteratorState {
0495     BlockIteratorState()
0496         : count(0),
0497           size(0),
0498           input_stride(0),
0499           input_span(0),
0500           output_stride(0),
0501           output_span(0) {}
0502
0503     Index count;
0504     Index size;
0505     Index input_stride;
0506     Index input_span;
0507     Index output_stride;
0508     Index output_span;
0509   };
0510
0511   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(
0512       Index index, int dim_index) const {
0513 #if defined(EIGEN_HAS_INDEX_LIST)
0514     return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) &&
0515             index < m_padding[dim_index].first) ||
0516         (!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) &&
0517          index >= m_dimensions[dim_index] - m_padding[dim_index].second);
0518 #else
0519     return (index < m_padding[dim_index].first) ||
0520            (index >= m_dimensions[dim_index] - m_padding[dim_index].second);
0521 #endif
0522   }
0523
0524   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero(
0525       int dim_index) const {
0526 #if defined(EIGEN_HAS_INDEX_LIST)
0527     return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0);
0528 #else
0529     EIGEN_UNUSED_VARIABLE(dim_index);
0530     return false;
0531 #endif
0532   }
0533
0534   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero(
0535       int dim_index) const {
0536 #if defined(EIGEN_HAS_INDEX_LIST)
0537     return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0);
0538 #else
0539     EIGEN_UNUSED_VARIABLE(dim_index);
0540     return false;
0541 #endif
0542   }
0543
0544
0545   void updateCostPerDimension(TensorOpCost& cost, int i, bool first) const {
0546     const double in = static_cast<double>(m_impl.dimensions()[i]);
0547     const double out = in + m_padding[i].first + m_padding[i].second;
0548     if (out == 0)
0549       return;
0550     const double reduction = in / out;
0551     cost *= reduction;
0552     if (first) {
0553       cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
0554                     reduction * (1 * TensorOpCost::AddCost<Index>()));
0555     } else {
0556       cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
0557                                  2 * TensorOpCost::MulCost<Index>() +
0558                     reduction * (2 * TensorOpCost::MulCost<Index>() +
0559                                  1 * TensorOpCost::DivCost<Index>()));
0560     }
0561   }
0562
0563  protected:
0564
0565   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
0566   {
0567     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
0568     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
0569
0570     const Index initialIndex = index;
0571     Index inputIndex = 0;
0572     EIGEN_UNROLL_LOOP
0573     for (int i = NumDims - 1; i > 0; --i) {
0574       const Index firstIdx = index;
0575       const Index lastIdx = index + PacketSize - 1;
0576       const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i];
0577       const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i];
0578       const Index lastPaddedRight = m_outputStrides[i+1];
0579
0580       if (!isLeftPaddingCompileTimeZero(i) && lastIdx < lastPaddedLeft) {
0581         // all the coefficient are in the padding zone.
0582         return internal::pset1<PacketReturnType>(m_paddingValue);
0583       }
0584       else if (!isRightPaddingCompileTimeZero(i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
0585         // all the coefficient are in the padding zone.
0586         return internal::pset1<PacketReturnType>(m_paddingValue);
0587       }
0588       else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
0589         // all the coefficient are between the 2 padding zones.
0590         const Index idx = index / m_outputStrides[i];
0591         inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
0592         index -= idx * m_outputStrides[i];
0593       }
0594       else {
0595         // Every other case
0596         return packetWithPossibleZero(initialIndex);
0597       }
0598     }
0599
0600     const Index lastIdx = index + PacketSize - 1;
0601     const Index firstIdx = index;
0602     const Index lastPaddedLeft = m_padding[0].first;
0603     const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
0604     const Index lastPaddedRight = m_outputStrides[1];
0605
0606     if (!isLeftPaddingCompileTimeZero(0) && lastIdx < lastPaddedLeft) {
0607       // all the coefficient are in the padding zone.
0608       return internal::pset1<PacketReturnType>(m_paddingValue);
0609     }
0610     else if (!isRightPaddingCompileTimeZero(0) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
0611       // all the coefficient are in the padding zone.
0612       return internal::pset1<PacketReturnType>(m_paddingValue);
0613     }
0614     else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
0615       // all the coefficient are between the 2 padding zones.
0616       inputIndex += (index - m_padding[0].first);
0617       return m_impl.template packet<Unaligned>(inputIndex);
0618     }
0619     // Every other case
0620     return packetWithPossibleZero(initialIndex);
0621   }
0622
0623   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
0624   {
0625     EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
0626     eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
0627
0628     const Index initialIndex = index;
0629     Index inputIndex = 0;
0630     EIGEN_UNROLL_LOOP
0631     for (int i = 0; i < NumDims - 1; ++i) {
0632       const Index firstIdx = index;
0633       const Index lastIdx = index + PacketSize - 1;
0634       const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i+1];
0635       const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1];
0636       const Index lastPaddedRight = m_outputStrides[i];
0637
0638       if (!isLeftPaddingCompileTimeZero(i) && lastIdx < lastPaddedLeft) {
0639         // all the coefficient are in the padding zone.
0640         return internal::pset1<PacketReturnType>(m_paddingValue);
0641       }
0642       else if (!isRightPaddingCompileTimeZero(i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
0643         // all the coefficient are in the padding zone.
0644         return internal::pset1<PacketReturnType>(m_paddingValue);
0645       }
0646       else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
0647         // all the coefficient are between the 2 padding zones.
0648         const Index idx = index / m_outputStrides[i+1];
0649         inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
0650         index -= idx * m_outputStrides[i+1];
0651       }
0652       else {
0653         // Every other case
0654         return packetWithPossibleZero(initialIndex);
0655       }
0656     }
0657
0658     const Index lastIdx = index + PacketSize - 1;
0659     const Index firstIdx = index;
0660     const Index lastPaddedLeft = m_padding[NumDims-1].first;
0661     const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second);
0662     const Index lastPaddedRight = m_outputStrides[NumDims-1];
0663
0664     if (!isLeftPaddingCompileTimeZero(NumDims-1) && lastIdx < lastPaddedLeft) {
0665       // all the coefficient are in the padding zone.
0666       return internal::pset1<PacketReturnType>(m_paddingValue);
0667     }
0668     else if (!isRightPaddingCompileTimeZero(NumDims-1) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
0669       // all the coefficient are in the padding zone.
0670       return internal::pset1<PacketReturnType>(m_paddingValue);
0671     }
0672     else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
0673       // all the coefficient are between the 2 padding zones.
0674       inputIndex += (index - m_padding[NumDims-1].first);
0675       return m_impl.template packet<Unaligned>(inputIndex);
0676     }
0677     // Every other case
0678     return packetWithPossibleZero(initialIndex);
0679   }
0680
0681   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
0682   {
0683     EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
0684     EIGEN_UNROLL_LOOP
0685     for (int i = 0; i < PacketSize; ++i) {
0686       values[i] = coeff(index+i);
0687     }
0688     PacketReturnType rslt = internal::pload<PacketReturnType>(values);
0689     return rslt;
0690   }
0691
0692   Dimensions m_dimensions;
0693   array<Index, NumDims+1> m_outputStrides;
0694   array<Index, NumDims> m_inputStrides;
0695   TensorEvaluator<ArgType, Device> m_impl;
0696   PaddingDimensions m_padding;
0697
0698   Scalar m_paddingValue;
0699
0700   const Device EIGEN_DEVICE_REF m_device;
0701 };
0702
0703
0704
0705
0706 } // end namespace Eigen
0707
0708 #endif // EIGEN_CXX11_TENSOR_TENSOR_PADDING_H