File indexing completed on 2025-01-18 09:56:10
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #ifndef EIGEN_ASSIGN_EVALUATOR_H
0013 #define EIGEN_ASSIGN_EVALUATOR_H
0014
0015 namespace Eigen {
0016
0017
0018
0019 namespace internal {
0020
0021
0022
0023
0024
0025
0026
0027 template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc, int MaxPacketSize = -1>
0028 struct copy_using_evaluator_traits
0029 {
0030 typedef typename DstEvaluator::XprType Dst;
0031 typedef typename Dst::Scalar DstScalar;
0032
0033 enum {
0034 DstFlags = DstEvaluator::Flags,
0035 SrcFlags = SrcEvaluator::Flags
0036 };
0037
0038 public:
0039 enum {
0040 DstAlignment = DstEvaluator::Alignment,
0041 SrcAlignment = SrcEvaluator::Alignment,
0042 DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
0043 JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
0044 };
0045
0046 private:
0047 enum {
0048 InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
0049 : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
0050 : int(Dst::RowsAtCompileTime),
0051 InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
0052 : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
0053 : int(Dst::MaxRowsAtCompileTime),
0054 RestrictedInnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(InnerSize,MaxPacketSize),
0055 RestrictedLinearSize = EIGEN_SIZE_MIN_PREFER_FIXED(Dst::SizeAtCompileTime,MaxPacketSize),
0056 OuterStride = int(outer_stride_at_compile_time<Dst>::ret),
0057 MaxSizeAtCompileTime = Dst::SizeAtCompileTime
0058 };
0059
0060
0061 typedef typename find_best_packet<DstScalar,RestrictedLinearSize>::type LinearPacketType;
0062 typedef typename find_best_packet<DstScalar,RestrictedInnerSize>::type InnerPacketType;
0063
0064 enum {
0065 LinearPacketSize = unpacket_traits<LinearPacketType>::size,
0066 InnerPacketSize = unpacket_traits<InnerPacketType>::size
0067 };
0068
0069 public:
0070 enum {
0071 LinearRequiredAlignment = unpacket_traits<LinearPacketType>::alignment,
0072 InnerRequiredAlignment = unpacket_traits<InnerPacketType>::alignment
0073 };
0074
0075 private:
0076 enum {
0077 DstIsRowMajor = DstFlags&RowMajorBit,
0078 SrcIsRowMajor = SrcFlags&RowMajorBit,
0079 StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
0080 MightVectorize = bool(StorageOrdersAgree)
0081 && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
0082 && bool(functor_traits<AssignFunc>::PacketAccess),
0083 MayInnerVectorize = MightVectorize
0084 && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
0085 && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
0086 && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
0087 MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
0088 MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess)
0089 && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
0090
0091
0092 MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
0093 && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize)))
0094
0095
0096
0097
0098 };
0099
0100 public:
0101 enum {
0102 Traversal = int(Dst::SizeAtCompileTime) == 0 ? int(AllAtOnceTraversal)
0103 : (int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize)) ? int(LinearVectorizedTraversal)
0104 : int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
0105 : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
0106 : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
0107 : int(MayLinearize) ? int(LinearTraversal)
0108 : int(DefaultTraversal),
0109 Vectorized = int(Traversal) == InnerVectorizedTraversal
0110 || int(Traversal) == LinearVectorizedTraversal
0111 || int(Traversal) == SliceVectorizedTraversal
0112 };
0113
0114 typedef typename conditional<int(Traversal)==LinearVectorizedTraversal, LinearPacketType, InnerPacketType>::type PacketType;
0115
0116 private:
0117 enum {
0118 ActualPacketSize = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize
0119 : Vectorized ? InnerPacketSize
0120 : 1,
0121 UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
0122 MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
0123 && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
0124 MayUnrollInner = int(InnerSize) != Dynamic
0125 && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
0126 };
0127
0128 public:
0129 enum {
0130 Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
0131 ? (
0132 int(MayUnrollCompletely) ? int(CompleteUnrolling)
0133 : int(MayUnrollInner) ? int(InnerUnrolling)
0134 : int(NoUnrolling)
0135 )
0136 : int(Traversal) == int(LinearVectorizedTraversal)
0137 ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
0138 ? int(CompleteUnrolling)
0139 : int(NoUnrolling) )
0140 : int(Traversal) == int(LinearTraversal)
0141 ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
0142 : int(NoUnrolling) )
0143 #if EIGEN_UNALIGNED_VECTORIZE
0144 : int(Traversal) == int(SliceVectorizedTraversal)
0145 ? ( bool(MayUnrollInner) ? int(InnerUnrolling)
0146 : int(NoUnrolling) )
0147 #endif
0148 : int(NoUnrolling)
0149 };
0150
0151 #ifdef EIGEN_DEBUG_ASSIGN
0152 static void debug()
0153 {
0154 std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl;
0155 std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl;
0156 std::cerr.setf(std::ios::hex, std::ios::basefield);
0157 std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl;
0158 std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl;
0159 std::cerr.unsetf(std::ios::hex);
0160 EIGEN_DEBUG_VAR(DstAlignment)
0161 EIGEN_DEBUG_VAR(SrcAlignment)
0162 EIGEN_DEBUG_VAR(LinearRequiredAlignment)
0163 EIGEN_DEBUG_VAR(InnerRequiredAlignment)
0164 EIGEN_DEBUG_VAR(JointAlignment)
0165 EIGEN_DEBUG_VAR(InnerSize)
0166 EIGEN_DEBUG_VAR(InnerMaxSize)
0167 EIGEN_DEBUG_VAR(LinearPacketSize)
0168 EIGEN_DEBUG_VAR(InnerPacketSize)
0169 EIGEN_DEBUG_VAR(ActualPacketSize)
0170 EIGEN_DEBUG_VAR(StorageOrdersAgree)
0171 EIGEN_DEBUG_VAR(MightVectorize)
0172 EIGEN_DEBUG_VAR(MayLinearize)
0173 EIGEN_DEBUG_VAR(MayInnerVectorize)
0174 EIGEN_DEBUG_VAR(MayLinearVectorize)
0175 EIGEN_DEBUG_VAR(MaySliceVectorize)
0176 std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl;
0177 EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost)
0178 EIGEN_DEBUG_VAR(DstEvaluator::CoeffReadCost)
0179 EIGEN_DEBUG_VAR(Dst::SizeAtCompileTime)
0180 EIGEN_DEBUG_VAR(UnrollingLimit)
0181 EIGEN_DEBUG_VAR(MayUnrollCompletely)
0182 EIGEN_DEBUG_VAR(MayUnrollInner)
0183 std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl;
0184 std::cerr << std::endl;
0185 }
0186 #endif
0187 };
0188
0189
0190
0191
0192
0193
0194
0195
0196
0197 template<typename Kernel, int Index, int Stop>
0198 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
0199 {
0200
0201 typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
0202 typedef typename DstEvaluatorType::XprType DstXprType;
0203
0204 enum {
0205 outer = Index / DstXprType::InnerSizeAtCompileTime,
0206 inner = Index % DstXprType::InnerSizeAtCompileTime
0207 };
0208
0209 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
0210 {
0211 kernel.assignCoeffByOuterInner(outer, inner);
0212 copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
0213 }
0214 };
0215
0216 template<typename Kernel, int Stop>
0217 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, Stop, Stop>
0218 {
0219 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
0220 };
0221
0222 template<typename Kernel, int Index_, int Stop>
0223 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
0224 {
0225 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
0226 {
0227 kernel.assignCoeffByOuterInner(outer, Index_);
0228 copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Index_+1, Stop>::run(kernel, outer);
0229 }
0230 };
0231
0232 template<typename Kernel, int Stop>
0233 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Stop, Stop>
0234 {
0235 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { }
0236 };
0237
0238
0239
0240
0241
0242 template<typename Kernel, int Index, int Stop>
0243 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
0244 {
0245 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel)
0246 {
0247 kernel.assignCoeff(Index);
0248 copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index+1, Stop>::run(kernel);
0249 }
0250 };
0251
0252 template<typename Kernel, int Stop>
0253 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Stop, Stop>
0254 {
0255 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
0256 };
0257
0258
0259
0260
0261
0262 template<typename Kernel, int Index, int Stop>
0263 struct copy_using_evaluator_innervec_CompleteUnrolling
0264 {
0265
0266 typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
0267 typedef typename DstEvaluatorType::XprType DstXprType;
0268 typedef typename Kernel::PacketType PacketType;
0269
0270 enum {
0271 outer = Index / DstXprType::InnerSizeAtCompileTime,
0272 inner = Index % DstXprType::InnerSizeAtCompileTime,
0273 SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
0274 DstAlignment = Kernel::AssignmentTraits::DstAlignment
0275 };
0276
0277 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
0278 {
0279 kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
0280 enum { NextIndex = Index + unpacket_traits<PacketType>::size };
0281 copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
0282 }
0283 };
0284
0285 template<typename Kernel, int Stop>
0286 struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
0287 {
0288 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
0289 };
0290
0291 template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
0292 struct copy_using_evaluator_innervec_InnerUnrolling
0293 {
0294 typedef typename Kernel::PacketType PacketType;
0295 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
0296 {
0297 kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
0298 enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
0299 copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);
0300 }
0301 };
0302
0303 template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
0304 struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>
0305 {
0306 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
0307 };
0308
0309
0310
0311
0312
0313
0314
0315 template<typename Kernel,
0316 int Traversal = Kernel::AssignmentTraits::Traversal,
0317 int Unrolling = Kernel::AssignmentTraits::Unrolling>
0318 struct dense_assignment_loop;
0319
0320
0321
0322
0323
0324
0325 template<typename Kernel, int Unrolling>
0326 struct dense_assignment_loop<Kernel, AllAtOnceTraversal, Unrolling>
0327 {
0328 EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel& )
0329 {
0330 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
0331 EIGEN_STATIC_ASSERT(int(DstXprType::SizeAtCompileTime) == 0,
0332 EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT)
0333 }
0334 };
0335
0336
0337
0338
0339
0340 template<typename Kernel>
0341 struct dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>
0342 {
0343 EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel)
0344 {
0345 for(Index outer = 0; outer < kernel.outerSize(); ++outer) {
0346 for(Index inner = 0; inner < kernel.innerSize(); ++inner) {
0347 kernel.assignCoeffByOuterInner(outer, inner);
0348 }
0349 }
0350 }
0351 };
0352
0353 template<typename Kernel>
0354 struct dense_assignment_loop<Kernel, DefaultTraversal, CompleteUnrolling>
0355 {
0356 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
0357 {
0358 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
0359 copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
0360 }
0361 };
0362
0363 template<typename Kernel>
0364 struct dense_assignment_loop<Kernel, DefaultTraversal, InnerUnrolling>
0365 {
0366 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
0367 {
0368 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
0369
0370 const Index outerSize = kernel.outerSize();
0371 for(Index outer = 0; outer < outerSize; ++outer)
0372 copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
0373 }
0374 };
0375
0376
0377
0378
0379
0380
0381
0382
0383
0384 template <bool IsAligned = false>
0385 struct unaligned_dense_assignment_loop
0386 {
0387
0388 template <typename Kernel>
0389 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {}
0390 };
0391
0392 template <>
0393 struct unaligned_dense_assignment_loop<false>
0394 {
0395
0396
0397
0398 #if EIGEN_COMP_MSVC
0399 template <typename Kernel>
0400 static EIGEN_DONT_INLINE void run(Kernel &kernel,
0401 Index start,
0402 Index end)
0403 #else
0404 template <typename Kernel>
0405 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel,
0406 Index start,
0407 Index end)
0408 #endif
0409 {
0410 for (Index index = start; index < end; ++index)
0411 kernel.assignCoeff(index);
0412 }
0413 };
0414
0415 template<typename Kernel>
0416 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling>
0417 {
0418 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
0419 {
0420 const Index size = kernel.size();
0421 typedef typename Kernel::Scalar Scalar;
0422 typedef typename Kernel::PacketType PacketType;
0423 enum {
0424 requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment,
0425 packetSize = unpacket_traits<PacketType>::size,
0426 dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
0427 dstAlignment = packet_traits<Scalar>::AlignedOnScalar ? int(requestedAlignment)
0428 : int(Kernel::AssignmentTraits::DstAlignment),
0429 srcAlignment = Kernel::AssignmentTraits::JointAlignment
0430 };
0431 const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned<requestedAlignment>(kernel.dstDataPtr(), size);
0432 const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
0433
0434 unaligned_dense_assignment_loop<dstIsAligned!=0>::run(kernel, 0, alignedStart);
0435
0436 for(Index index = alignedStart; index < alignedEnd; index += packetSize)
0437 kernel.template assignPacket<dstAlignment, srcAlignment, PacketType>(index);
0438
0439 unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
0440 }
0441 };
0442
0443 template<typename Kernel>
0444 struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling>
0445 {
0446 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
0447 {
0448 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
0449 typedef typename Kernel::PacketType PacketType;
0450
0451 enum { size = DstXprType::SizeAtCompileTime,
0452 packetSize =unpacket_traits<PacketType>::size,
0453 alignedSize = (int(size)/packetSize)*packetSize };
0454
0455 copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
0456 copy_using_evaluator_DefaultTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
0457 }
0458 };
0459
0460
0461
0462
0463
0464 template<typename Kernel>
0465 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling>
0466 {
0467 typedef typename Kernel::PacketType PacketType;
0468 enum {
0469 SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
0470 DstAlignment = Kernel::AssignmentTraits::DstAlignment
0471 };
0472 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
0473 {
0474 const Index innerSize = kernel.innerSize();
0475 const Index outerSize = kernel.outerSize();
0476 const Index packetSize = unpacket_traits<PacketType>::size;
0477 for(Index outer = 0; outer < outerSize; ++outer)
0478 for(Index inner = 0; inner < innerSize; inner+=packetSize)
0479 kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
0480 }
0481 };
0482
0483 template<typename Kernel>
0484 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling>
0485 {
0486 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
0487 {
0488 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
0489 copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
0490 }
0491 };
0492
0493 template<typename Kernel>
0494 struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
0495 {
0496 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
0497 {
0498 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
0499 typedef typename Kernel::AssignmentTraits Traits;
0500 const Index outerSize = kernel.outerSize();
0501 for(Index outer = 0; outer < outerSize; ++outer)
0502 copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
0503 Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
0504 }
0505 };
0506
0507
0508
0509
0510
0511 template<typename Kernel>
0512 struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling>
0513 {
0514 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
0515 {
0516 const Index size = kernel.size();
0517 for(Index i = 0; i < size; ++i)
0518 kernel.assignCoeff(i);
0519 }
0520 };
0521
0522 template<typename Kernel>
0523 struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling>
0524 {
0525 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
0526 {
0527 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
0528 copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
0529 }
0530 };
0531
0532
0533
0534
0535
0536 template<typename Kernel>
0537 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
0538 {
0539 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
0540 {
0541 typedef typename Kernel::Scalar Scalar;
0542 typedef typename Kernel::PacketType PacketType;
0543 enum {
0544 packetSize = unpacket_traits<PacketType>::size,
0545 requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
0546 alignable = packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar),
0547 dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment),
0548 dstAlignment = alignable ? int(requestedAlignment)
0549 : int(Kernel::AssignmentTraits::DstAlignment)
0550 };
0551 const Scalar *dst_ptr = kernel.dstDataPtr();
0552 if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0)
0553 {
0554
0555 return dense_assignment_loop<Kernel,DefaultTraversal,NoUnrolling>::run(kernel);
0556 }
0557 const Index packetAlignedMask = packetSize - 1;
0558 const Index innerSize = kernel.innerSize();
0559 const Index outerSize = kernel.outerSize();
0560 const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
0561 Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
0562
0563 for(Index outer = 0; outer < outerSize; ++outer)
0564 {
0565 const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
0566
0567 for(Index inner = 0; inner<alignedStart ; ++inner)
0568 kernel.assignCoeffByOuterInner(outer, inner);
0569
0570
0571 for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize)
0572 kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
0573
0574
0575 for(Index inner = alignedEnd; inner<innerSize ; ++inner)
0576 kernel.assignCoeffByOuterInner(outer, inner);
0577
0578 alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
0579 }
0580 }
0581 };
0582
0583 #if EIGEN_UNALIGNED_VECTORIZE
0584 template<typename Kernel>
0585 struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
0586 {
0587 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
0588 {
0589 typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
0590 typedef typename Kernel::PacketType PacketType;
0591
0592 enum { innerSize = DstXprType::InnerSizeAtCompileTime,
0593 packetSize =unpacket_traits<PacketType>::size,
0594 vectorizableSize = (int(innerSize) / int(packetSize)) * int(packetSize),
0595 size = DstXprType::SizeAtCompileTime };
0596
0597 for(Index outer = 0; outer < kernel.outerSize(); ++outer)
0598 {
0599 copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
0600 copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, innerSize>::run(kernel, outer);
0601 }
0602 }
0603 };
0604 #endif
0605
0606
0607
0608
0609
0610
0611
0612
0613
0614
0615
0616
0617 template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
0618 class generic_dense_assignment_kernel
0619 {
0620 protected:
0621 typedef typename DstEvaluatorTypeT::XprType DstXprType;
0622 typedef typename SrcEvaluatorTypeT::XprType SrcXprType;
0623 public:
0624
0625 typedef DstEvaluatorTypeT DstEvaluatorType;
0626 typedef SrcEvaluatorTypeT SrcEvaluatorType;
0627 typedef typename DstEvaluatorType::Scalar Scalar;
0628 typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
0629 typedef typename AssignmentTraits::PacketType PacketType;
0630
0631
0632 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0633 generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
0634 : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr)
0635 {
0636 #ifdef EIGEN_DEBUG_ASSIGN
0637 AssignmentTraits::debug();
0638 #endif
0639 }
0640
0641 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index size() const EIGEN_NOEXCEPT { return m_dstExpr.size(); }
0642 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index innerSize() const EIGEN_NOEXCEPT { return m_dstExpr.innerSize(); }
0643 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerSize() const EIGEN_NOEXCEPT { return m_dstExpr.outerSize(); }
0644 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index rows() const EIGEN_NOEXCEPT { return m_dstExpr.rows(); }
0645 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index cols() const EIGEN_NOEXCEPT { return m_dstExpr.cols(); }
0646 EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR Index outerStride() const EIGEN_NOEXCEPT { return m_dstExpr.outerStride(); }
0647
0648 EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() EIGEN_NOEXCEPT { return m_dst; }
0649 EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const EIGEN_NOEXCEPT { return m_src; }
0650
0651
0652 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col)
0653 {
0654 m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col));
0655 }
0656
0657
0658 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index)
0659 {
0660 m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index));
0661 }
0662
0663
0664 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner)
0665 {
0666 Index row = rowIndexByOuterInner(outer, inner);
0667 Index col = colIndexByOuterInner(outer, inner);
0668 assignCoeff(row, col);
0669 }
0670
0671
0672 template<int StoreMode, int LoadMode, typename PacketType>
0673 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col)
0674 {
0675 m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(row,col), m_src.template packet<LoadMode,PacketType>(row,col));
0676 }
0677
0678 template<int StoreMode, int LoadMode, typename PacketType>
0679 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index)
0680 {
0681 m_functor.template assignPacket<StoreMode>(&m_dst.coeffRef(index), m_src.template packet<LoadMode,PacketType>(index));
0682 }
0683
0684 template<int StoreMode, int LoadMode, typename PacketType>
0685 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner)
0686 {
0687 Index row = rowIndexByOuterInner(outer, inner);
0688 Index col = colIndexByOuterInner(outer, inner);
0689 assignPacket<StoreMode,LoadMode,PacketType>(row, col);
0690 }
0691
0692 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner)
0693 {
0694 typedef typename DstEvaluatorType::ExpressionTraits Traits;
0695 return int(Traits::RowsAtCompileTime) == 1 ? 0
0696 : int(Traits::ColsAtCompileTime) == 1 ? inner
0697 : int(DstEvaluatorType::Flags)&RowMajorBit ? outer
0698 : inner;
0699 }
0700
0701 EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner)
0702 {
0703 typedef typename DstEvaluatorType::ExpressionTraits Traits;
0704 return int(Traits::ColsAtCompileTime) == 1 ? 0
0705 : int(Traits::RowsAtCompileTime) == 1 ? inner
0706 : int(DstEvaluatorType::Flags)&RowMajorBit ? inner
0707 : outer;
0708 }
0709
0710 EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const
0711 {
0712 return m_dstExpr.data();
0713 }
0714
0715 protected:
0716 DstEvaluatorType& m_dst;
0717 const SrcEvaluatorType& m_src;
0718 const Functor &m_functor;
0719
0720 DstXprType& m_dstExpr;
0721 };
0722
0723
0724
0725
0726
0727 template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor>
0728 class restricted_packet_dense_assignment_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn>
0729 {
0730 protected:
0731 typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, BuiltIn> Base;
0732 public:
0733 typedef typename Base::Scalar Scalar;
0734 typedef typename Base::DstXprType DstXprType;
0735 typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, 4> AssignmentTraits;
0736 typedef typename AssignmentTraits::PacketType PacketType;
0737
0738 EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr)
0739 : Base(dst, src, func, dstExpr)
0740 {
0741 }
0742 };
0743
0744
0745
0746
0747
0748 template<typename DstXprType,typename SrcXprType, typename Functor>
0749 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0750 void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &)
0751 {
0752 EIGEN_ONLY_USED_FOR_DEBUG(dst);
0753 EIGEN_ONLY_USED_FOR_DEBUG(src);
0754 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
0755 }
0756
0757 template<typename DstXprType,typename SrcXprType, typename T1, typename T2>
0758 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0759 void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op<T1,T2> &)
0760 {
0761 Index dstRows = src.rows();
0762 Index dstCols = src.cols();
0763 if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols)))
0764 dst.resize(dstRows, dstCols);
0765 eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols);
0766 }
0767
0768 template<typename DstXprType, typename SrcXprType, typename Functor>
0769 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
0770 {
0771 typedef evaluator<DstXprType> DstEvaluatorType;
0772 typedef evaluator<SrcXprType> SrcEvaluatorType;
0773
0774 SrcEvaluatorType srcEvaluator(src);
0775
0776
0777
0778 resize_if_allowed(dst, src, func);
0779
0780 DstEvaluatorType dstEvaluator(dst);
0781
0782 typedef generic_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
0783 Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
0784
0785 dense_assignment_loop<Kernel>::run(kernel);
0786 }
0787
0788
0789 #ifndef EIGEN_GPU_COMPILE_PHASE
0790 template<typename DstXprType>
0791 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const Eigen::CwiseNullaryOp<Eigen::internal::scalar_constant_op<typename DstXprType::Scalar>, DstXprType>& src, const internal::assign_op<typename DstXprType::Scalar,typename DstXprType::Scalar>& func)
0792 {
0793 resize_if_allowed(dst, src, func);
0794 std::fill_n(dst.data(), dst.size(), src.functor()());
0795 }
0796 #endif
0797
0798 template<typename DstXprType, typename SrcXprType>
0799 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src)
0800 {
0801 call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
0802 }
0803
0804
0805
0806
0807
0808
0809
0810
0811 template<typename DstShape, typename SrcShape> struct AssignmentKind;
0812
0813
0814 struct Dense2Dense {};
0815 struct EigenBase2EigenBase {};
0816
0817 template<typename,typename> struct AssignmentKind { typedef EigenBase2EigenBase Kind; };
0818 template<> struct AssignmentKind<DenseShape,DenseShape> { typedef Dense2Dense Kind; };
0819
0820
0821 template< typename DstXprType, typename SrcXprType, typename Functor,
0822 typename Kind = typename AssignmentKind< typename evaluator_traits<DstXprType>::Shape , typename evaluator_traits<SrcXprType>::Shape >::Kind,
0823 typename EnableIf = void>
0824 struct Assignment;
0825
0826
0827
0828
0829
0830
0831
0832 template<typename Dst, typename Src>
0833 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0834 void call_assignment(Dst& dst, const Src& src)
0835 {
0836 call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
0837 }
0838 template<typename Dst, typename Src>
0839 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0840 void call_assignment(const Dst& dst, const Src& src)
0841 {
0842 call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
0843 }
0844
0845
0846 template<typename Dst, typename Src, typename Func>
0847 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0848 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
0849 {
0850 typename plain_matrix_type<Src>::type tmp(src);
0851 call_assignment_no_alias(dst, tmp, func);
0852 }
0853
0854 template<typename Dst, typename Src, typename Func>
0855 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0856 void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
0857 {
0858 call_assignment_no_alias(dst, src, func);
0859 }
0860
0861
0862
0863 template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
0864 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0865 void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
0866 {
0867 call_assignment_no_alias(dst.expression(), src, func);
0868 }
0869
0870
0871 template<typename Dst, typename Src, typename Func>
0872 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0873 void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
0874 {
0875 enum {
0876 NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
0877 || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)
0878 ) && int(Dst::SizeAtCompileTime) != 1
0879 };
0880
0881 typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst>::type ActualDstTypeCleaned;
0882 typedef typename internal::conditional<NeedToTranspose, Transpose<Dst>, Dst&>::type ActualDstType;
0883 ActualDstType actualDst(dst);
0884
0885
0886 EIGEN_STATIC_ASSERT_LVALUE(Dst)
0887 EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src)
0888 EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar);
0889
0890 Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
0891 }
0892
0893 template<typename Dst, typename Src, typename Func>
0894 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0895 void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
0896 {
0897 typedef evaluator<Dst> DstEvaluatorType;
0898 typedef evaluator<Src> SrcEvaluatorType;
0899 typedef restricted_packet_dense_assignment_kernel<DstEvaluatorType,SrcEvaluatorType,Func> Kernel;
0900
0901 EIGEN_STATIC_ASSERT_LVALUE(Dst)
0902 EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
0903
0904 SrcEvaluatorType srcEvaluator(src);
0905 resize_if_allowed(dst, src, func);
0906
0907 DstEvaluatorType dstEvaluator(dst);
0908 Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
0909
0910 dense_assignment_loop<Kernel>::run(kernel);
0911 }
0912
0913 template<typename Dst, typename Src>
0914 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0915 void call_assignment_no_alias(Dst& dst, const Src& src)
0916 {
0917 call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
0918 }
0919
0920 template<typename Dst, typename Src, typename Func>
0921 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0922 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
0923 {
0924
0925 EIGEN_STATIC_ASSERT_LVALUE(Dst)
0926 EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src)
0927 EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar);
0928
0929 Assignment<Dst,Src,Func>::run(dst, src, func);
0930 }
0931 template<typename Dst, typename Src>
0932 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
0933 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
0934 {
0935 call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
0936 }
0937
0938
0939 template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, const Src &src);
0940
0941
0942
0943
0944 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
0945 struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak>
0946 {
0947 EIGEN_DEVICE_FUNC
0948 static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
0949 {
0950 #ifndef EIGEN_NO_DEBUG
0951 internal::check_for_aliasing(dst, src);
0952 #endif
0953
0954 call_dense_assignment_loop(dst, src, func);
0955 }
0956 };
0957
0958
0959
0960
0961
0962 template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
0963 struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Weak>
0964 {
0965 EIGEN_DEVICE_FUNC
0966 static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &)
0967 {
0968 Index dstRows = src.rows();
0969 Index dstCols = src.cols();
0970 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
0971 dst.resize(dstRows, dstCols);
0972
0973 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
0974 src.evalTo(dst);
0975 }
0976
0977
0978
0979 template<typename SrcScalarType>
0980 EIGEN_DEVICE_FUNC
0981 static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,SrcScalarType> &)
0982 {
0983 Index dstRows = src.rows();
0984 Index dstCols = src.cols();
0985 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
0986 dst.resize(dstRows, dstCols);
0987
0988 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
0989 src.addTo(dst);
0990 }
0991
0992 template<typename SrcScalarType>
0993 EIGEN_DEVICE_FUNC
0994 static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,SrcScalarType> &)
0995 {
0996 Index dstRows = src.rows();
0997 Index dstCols = src.cols();
0998 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
0999 dst.resize(dstRows, dstCols);
1000
1001 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
1002 src.subTo(dst);
1003 }
1004 };
1005
1006 }
1007
1008 }
1009
1010 #endif