Warning, file /include/eigen3/Eigen/src/Core/util/Memory.h was not indexed
or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020 #ifndef EIGEN_MEMORY_H
0021 #define EIGEN_MEMORY_H
0022
0023 #ifndef EIGEN_MALLOC_ALREADY_ALIGNED
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034 #if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
0035 && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
0036 #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
0037 #else
0038 #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
0039 #endif
0040
0041
0042
0043
0044
0045 #if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
0046 #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
0047 #else
0048 #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
0049 #endif
0050
0051 #if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
0052 || (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
0053 || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
0054 || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
0055 #define EIGEN_MALLOC_ALREADY_ALIGNED 1
0056 #else
0057 #define EIGEN_MALLOC_ALREADY_ALIGNED 0
0058 #endif
0059
0060 #endif
0061
0062 namespace Eigen {
0063
0064 namespace internal {
0065
0066 EIGEN_DEVICE_FUNC
0067 inline void throw_std_bad_alloc()
0068 {
0069 #ifdef EIGEN_EXCEPTIONS
0070 throw std::bad_alloc();
0071 #else
0072 std::size_t huge = static_cast<std::size_t>(-1);
0073 #if defined(EIGEN_HIPCC)
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083 new int[huge];
0084 #else
0085 void* unused = ::operator new(huge);
0086 EIGEN_UNUSED_VARIABLE(unused);
0087 #endif
0088 #endif
0089 }
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100 EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
0101 {
0102 eigen_assert(alignment >= sizeof(void*) && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*) and a power of 2");
0103
0104 EIGEN_USING_STD(malloc)
0105 void *original = malloc(size+alignment);
0106
0107 if (original == 0) return 0;
0108 void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment);
0109 *(reinterpret_cast<void**>(aligned) - 1) = original;
0110 return aligned;
0111 }
0112
0113
0114 EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr)
0115 {
0116 if (ptr) {
0117 EIGEN_USING_STD(free)
0118 free(*(reinterpret_cast<void**>(ptr) - 1));
0119 }
0120 }
0121
0122
0123
0124
0125
0126
0127 inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
0128 {
0129 if (ptr == 0) return handmade_aligned_malloc(size);
0130 void *original = *(reinterpret_cast<void**>(ptr) - 1);
0131 std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
0132 original = std::realloc(original,size+EIGEN_DEFAULT_ALIGN_BYTES);
0133 if (original == 0) return 0;
0134 void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES);
0135 void *previous_aligned = static_cast<char *>(original)+previous_offset;
0136 if(aligned!=previous_aligned)
0137 std::memmove(aligned, previous_aligned, size);
0138
0139 *(reinterpret_cast<void**>(aligned) - 1) = original;
0140 return aligned;
0141 }
0142
0143
0144
0145
0146
0147 #ifdef EIGEN_NO_MALLOC
0148 EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
0149 {
0150 eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
0151 }
0152 #elif defined EIGEN_RUNTIME_NO_MALLOC
0153 EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false)
0154 {
0155 static bool value = true;
0156 if (update == 1)
0157 value = new_value;
0158 return value;
0159 }
0160 EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
0161 EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
0162 EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
0163 {
0164 eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
0165 }
0166 #else
0167 EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
0168 {}
0169 #endif
0170
0171
0172
0173
0174 EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
0175 {
0176 check_that_malloc_is_allowed();
0177
0178 void *result;
0179 #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
0180
0181 EIGEN_USING_STD(malloc)
0182 result = malloc(size);
0183
0184 #if EIGEN_DEFAULT_ALIGN_BYTES==16
0185 eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade aligned memory allocator.");
0186 #endif
0187 #else
0188 result = handmade_aligned_malloc(size);
0189 #endif
0190
0191 if(!result && size)
0192 throw_std_bad_alloc();
0193
0194 return result;
0195 }
0196
0197
0198 EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
0199 {
0200 #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
0201
0202 EIGEN_USING_STD(free)
0203 free(ptr);
0204
0205 #else
0206 handmade_aligned_free(ptr);
0207 #endif
0208 }
0209
0210
0211
0212
0213
0214
0215 inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
0216 {
0217 EIGEN_UNUSED_VARIABLE(old_size)
0218
0219 void *result;
0220 #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
0221 result = std::realloc(ptr,new_size);
0222 #else
0223 result = handmade_aligned_realloc(ptr,new_size,old_size);
0224 #endif
0225
0226 if (!result && new_size)
0227 throw_std_bad_alloc();
0228
0229 return result;
0230 }
0231
0232
0233
0234
0235
0236
0237
0238
0239 template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size)
0240 {
0241 return aligned_malloc(size);
0242 }
0243
0244 template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std::size_t size)
0245 {
0246 check_that_malloc_is_allowed();
0247
0248 EIGEN_USING_STD(malloc)
0249 void *result = malloc(size);
0250
0251 if(!result && size)
0252 throw_std_bad_alloc();
0253 return result;
0254 }
0255
0256
0257 template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *ptr)
0258 {
0259 aligned_free(ptr);
0260 }
0261
0262 template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
0263 {
0264 EIGEN_USING_STD(free)
0265 free(ptr);
0266 }
0267
0268 template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
0269 {
0270 return aligned_realloc(ptr, new_size, old_size);
0271 }
0272
0273 template<> inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size, std::size_t)
0274 {
0275 return std::realloc(ptr, new_size);
0276 }
0277
0278
0279
0280
0281
0282
0283
0284
0285 template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, std::size_t size)
0286 {
0287
0288 if(ptr)
0289 while(size) ptr[--size].~T();
0290 }
0291
0292
0293
0294
0295 template<typename T> EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, std::size_t size)
0296 {
0297 std::size_t i;
0298 EIGEN_TRY
0299 {
0300 for (i = 0; i < size; ++i) ::new (ptr + i) T;
0301 return ptr;
0302 }
0303 EIGEN_CATCH(...)
0304 {
0305 destruct_elements_of_array(ptr, i);
0306 EIGEN_THROW;
0307 }
0308 return NULL;
0309 }
0310
0311
0312
0313
0314
0315 template<typename T>
0316 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size)
0317 {
0318 if(size > std::size_t(-1) / sizeof(T))
0319 throw_std_bad_alloc();
0320 }
0321
0322
0323
0324
0325
0326 template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size)
0327 {
0328 check_size_for_overflow<T>(size);
0329 T *result = reinterpret_cast<T*>(aligned_malloc(sizeof(T)*size));
0330 EIGEN_TRY
0331 {
0332 return construct_elements_of_array(result, size);
0333 }
0334 EIGEN_CATCH(...)
0335 {
0336 aligned_free(result);
0337 EIGEN_THROW;
0338 }
0339 return result;
0340 }
0341
0342 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size)
0343 {
0344 check_size_for_overflow<T>(size);
0345 T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
0346 EIGEN_TRY
0347 {
0348 return construct_elements_of_array(result, size);
0349 }
0350 EIGEN_CATCH(...)
0351 {
0352 conditional_aligned_free<Align>(result);
0353 EIGEN_THROW;
0354 }
0355 return result;
0356 }
0357
0358
0359
0360
0361 template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size)
0362 {
0363 destruct_elements_of_array<T>(ptr, size);
0364 Eigen::internal::aligned_free(ptr);
0365 }
0366
0367
0368
0369
0370 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, std::size_t size)
0371 {
0372 destruct_elements_of_array<T>(ptr, size);
0373 conditional_aligned_free<Align>(ptr);
0374 }
0375
0376 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t new_size, std::size_t old_size)
0377 {
0378 check_size_for_overflow<T>(new_size);
0379 check_size_for_overflow<T>(old_size);
0380 if(new_size < old_size)
0381 destruct_elements_of_array(pts+new_size, old_size-new_size);
0382 T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
0383 if(new_size > old_size)
0384 {
0385 EIGEN_TRY
0386 {
0387 construct_elements_of_array(result+old_size, new_size-old_size);
0388 }
0389 EIGEN_CATCH(...)
0390 {
0391 conditional_aligned_free<Align>(result);
0392 EIGEN_THROW;
0393 }
0394 }
0395 return result;
0396 }
0397
0398
0399 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size)
0400 {
0401 if(size==0)
0402 return 0;
0403 check_size_for_overflow<T>(size);
0404 T *result = reinterpret_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
0405 if(NumTraits<T>::RequireInitialization)
0406 {
0407 EIGEN_TRY
0408 {
0409 construct_elements_of_array(result, size);
0410 }
0411 EIGEN_CATCH(...)
0412 {
0413 conditional_aligned_free<Align>(result);
0414 EIGEN_THROW;
0415 }
0416 }
0417 return result;
0418 }
0419
0420 template<typename T, bool Align> inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size)
0421 {
0422 check_size_for_overflow<T>(new_size);
0423 check_size_for_overflow<T>(old_size);
0424 if(NumTraits<T>::RequireInitialization && (new_size < old_size))
0425 destruct_elements_of_array(pts+new_size, old_size-new_size);
0426 T *result = reinterpret_cast<T*>(conditional_aligned_realloc<Align>(reinterpret_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
0427 if(NumTraits<T>::RequireInitialization && (new_size > old_size))
0428 {
0429 EIGEN_TRY
0430 {
0431 construct_elements_of_array(result+old_size, new_size-old_size);
0432 }
0433 EIGEN_CATCH(...)
0434 {
0435 conditional_aligned_free<Align>(result);
0436 EIGEN_THROW;
0437 }
0438 }
0439 return result;
0440 }
0441
0442 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, std::size_t size)
0443 {
0444 if(NumTraits<T>::RequireInitialization)
0445 destruct_elements_of_array<T>(ptr, size);
0446 conditional_aligned_free<Align>(ptr);
0447 }
0448
0449
0450
0451
0452
0453
0454
0455
0456
0457
0458
0459
0460
0461
0462
0463
0464
0465
0466
0467
0468 template<int Alignment, typename Scalar, typename Index>
0469 EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size)
0470 {
0471 const Index ScalarSize = sizeof(Scalar);
0472 const Index AlignmentSize = Alignment / ScalarSize;
0473 const Index AlignmentMask = AlignmentSize-1;
0474
0475 if(AlignmentSize<=1)
0476 {
0477
0478
0479 return 0;
0480 }
0481 else if( (UIntPtr(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0)
0482 {
0483
0484
0485 return size;
0486 }
0487 else
0488 {
0489 Index first = (AlignmentSize - (Index((UIntPtr(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
0490 return (first < size) ? first : size;
0491 }
0492 }
0493
0494
0495
0496 template<typename Scalar, typename Index>
0497 EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size)
0498 {
0499 typedef typename packet_traits<Scalar>::type DefaultPacketType;
0500 return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);
0501 }
0502
0503
0504
0505 template<typename Index>
0506 inline Index first_multiple(Index size, Index base)
0507 {
0508 return ((size+base-1)/base)*base;
0509 }
0510
0511
0512
0513 template<typename T, bool UseMemcpy> struct smart_copy_helper;
0514
0515 template<typename T> EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target)
0516 {
0517 smart_copy_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
0518 }
0519
0520 template<typename T> struct smart_copy_helper<T,true> {
0521 EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
0522 {
0523 IntPtr size = IntPtr(end)-IntPtr(start);
0524 if(size==0) return;
0525 eigen_internal_assert(start!=0 && end!=0 && target!=0);
0526 EIGEN_USING_STD(memcpy)
0527 memcpy(target, start, size);
0528 }
0529 };
0530
0531 template<typename T> struct smart_copy_helper<T,false> {
0532 EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
0533 { std::copy(start, end, target); }
0534 };
0535
0536
0537 template<typename T, bool UseMemmove> struct smart_memmove_helper;
0538
0539 template<typename T> void smart_memmove(const T* start, const T* end, T* target)
0540 {
0541 smart_memmove_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
0542 }
0543
0544 template<typename T> struct smart_memmove_helper<T,true> {
0545 static inline void run(const T* start, const T* end, T* target)
0546 {
0547 IntPtr size = IntPtr(end)-IntPtr(start);
0548 if(size==0) return;
0549 eigen_internal_assert(start!=0 && end!=0 && target!=0);
0550 std::memmove(target, start, size);
0551 }
0552 };
0553
0554 template<typename T> struct smart_memmove_helper<T,false> {
0555 static inline void run(const T* start, const T* end, T* target)
0556 {
0557 if (UIntPtr(target) < UIntPtr(start))
0558 {
0559 std::copy(start, end, target);
0560 }
0561 else
0562 {
0563 std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
0564 std::copy_backward(start, end, target + count);
0565 }
0566 }
0567 };
0568
0569 #if EIGEN_HAS_RVALUE_REFERENCES
0570 template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)
0571 {
0572 return std::move(start, end, target);
0573 }
0574 #else
0575 template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)
0576 {
0577 return std::copy(start, end, target);
0578 }
0579 #endif
0580
0581
0582
0583
0584
0585
0586
0587 #if ! defined EIGEN_ALLOCA && ! defined EIGEN_GPU_COMPILE_PHASE
0588 #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
0589 #define EIGEN_ALLOCA alloca
0590 #elif EIGEN_COMP_MSVC
0591 #define EIGEN_ALLOCA _alloca
0592 #endif
0593 #endif
0594
0595
0596
0597
0598
0599
0600 #if defined(__clang__) && defined(__thumb__)
0601 #undef EIGEN_ALLOCA
0602 #endif
0603
0604
0605
0606 template<typename T> class aligned_stack_memory_handler : noncopyable
0607 {
0608 public:
0609
0610
0611
0612
0613
0614
0615 EIGEN_DEVICE_FUNC
0616 aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
0617 : m_ptr(ptr), m_size(size), m_deallocate(dealloc)
0618 {
0619 if(NumTraits<T>::RequireInitialization && m_ptr)
0620 Eigen::internal::construct_elements_of_array(m_ptr, size);
0621 }
0622 EIGEN_DEVICE_FUNC
0623 ~aligned_stack_memory_handler()
0624 {
0625 if(NumTraits<T>::RequireInitialization && m_ptr)
0626 Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);
0627 if(m_deallocate)
0628 Eigen::internal::aligned_free(m_ptr);
0629 }
0630 protected:
0631 T* m_ptr;
0632 std::size_t m_size;
0633 bool m_deallocate;
0634 };
0635
0636 #ifdef EIGEN_ALLOCA
0637
0638 template<typename Xpr, int NbEvaluations,
0639 bool MapExternalBuffer = nested_eval<Xpr,NbEvaluations>::Evaluate && Xpr::MaxSizeAtCompileTime==Dynamic
0640 >
0641 struct local_nested_eval_wrapper
0642 {
0643 static const bool NeedExternalBuffer = false;
0644 typedef typename Xpr::Scalar Scalar;
0645 typedef typename nested_eval<Xpr,NbEvaluations>::type ObjectType;
0646 ObjectType object;
0647
0648 EIGEN_DEVICE_FUNC
0649 local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) : object(xpr)
0650 {
0651 EIGEN_UNUSED_VARIABLE(ptr);
0652 eigen_internal_assert(ptr==0);
0653 }
0654 };
0655
0656 template<typename Xpr, int NbEvaluations>
0657 struct local_nested_eval_wrapper<Xpr,NbEvaluations,true>
0658 {
0659 static const bool NeedExternalBuffer = true;
0660 typedef typename Xpr::Scalar Scalar;
0661 typedef typename plain_object_eval<Xpr>::type PlainObject;
0662 typedef Map<PlainObject,EIGEN_DEFAULT_ALIGN_BYTES> ObjectType;
0663 ObjectType object;
0664
0665 EIGEN_DEVICE_FUNC
0666 local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr)
0667 : object(ptr==0 ? reinterpret_cast<Scalar*>(Eigen::internal::aligned_malloc(sizeof(Scalar)*xpr.size())) : ptr, xpr.rows(), xpr.cols()),
0668 m_deallocate(ptr==0)
0669 {
0670 if(NumTraits<Scalar>::RequireInitialization && object.data())
0671 Eigen::internal::construct_elements_of_array(object.data(), object.size());
0672 object = xpr;
0673 }
0674
0675 EIGEN_DEVICE_FUNC
0676 ~local_nested_eval_wrapper()
0677 {
0678 if(NumTraits<Scalar>::RequireInitialization && object.data())
0679 Eigen::internal::destruct_elements_of_array(object.data(), object.size());
0680 if(m_deallocate)
0681 Eigen::internal::aligned_free(object.data());
0682 }
0683
0684 private:
0685 bool m_deallocate;
0686 };
0687
0688 #endif
0689
0690 template<typename T> class scoped_array : noncopyable
0691 {
0692 T* m_ptr;
0693 public:
0694 explicit scoped_array(std::ptrdiff_t size)
0695 {
0696 m_ptr = new T[size];
0697 }
0698 ~scoped_array()
0699 {
0700 delete[] m_ptr;
0701 }
0702 T& operator[](std::ptrdiff_t i) { return m_ptr[i]; }
0703 const T& operator[](std::ptrdiff_t i) const { return m_ptr[i]; }
0704 T* &ptr() { return m_ptr; }
0705 const T* ptr() const { return m_ptr; }
0706 operator const T*() const { return m_ptr; }
0707 };
0708
0709 template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
0710 {
0711 std::swap(a.ptr(),b.ptr());
0712 }
0713
0714 }
0715
0716
0717
0718
0719
0720
0721
0722
0723
0724
0725
0726
0727
0728
0729
0730
0731
0732
0733
0734
0735
0736
0737
0738
0739
0740
0741 #ifdef EIGEN_ALLOCA
0742
0743 #if EIGEN_DEFAULT_ALIGN_BYTES>0
0744
0745
0746 #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((internal::UIntPtr(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)))
0747 #else
0748 #define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
0749 #endif
0750
0751 #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
0752 Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
0753 TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \
0754 : reinterpret_cast<TYPE*>( \
0755 (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \
0756 : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \
0757 Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
0758
0759
0760 #define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) \
0761 Eigen::internal::local_nested_eval_wrapper<XPR_T,N> EIGEN_CAT(NAME,_wrapper)(XPR, reinterpret_cast<typename XPR_T::Scalar*>( \
0762 ( (Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::NeedExternalBuffer) && ((sizeof(typename XPR_T::Scalar)*XPR.size())<=EIGEN_STACK_ALLOCATION_LIMIT) ) \
0763 ? EIGEN_ALIGNED_ALLOCA( sizeof(typename XPR_T::Scalar)*XPR.size() ) : 0 ) ) ; \
0764 typename Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::ObjectType NAME(EIGEN_CAT(NAME,_wrapper).object)
0765
0766 #else
0767
0768 #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
0769 Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
0770 TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
0771 Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
0772
0773
0774 #define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) typename Eigen::internal::nested_eval<XPR_T,N>::type NAME(XPR)
0775
0776 #endif
0777
0778
0779
0780
0781
0782
0783 #if EIGEN_HAS_CXX17_OVERALIGN
0784
0785
0786
0787 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign)
0788 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
0789 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW
0790 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size)
0791
0792 #else
0793
0794
0795 #if EIGEN_MAX_ALIGN_BYTES!=0 && !defined(EIGEN_HIP_DEVICE_COMPILE)
0796 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
0797 EIGEN_DEVICE_FUNC \
0798 void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \
0799 EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
0800 EIGEN_CATCH (...) { return 0; } \
0801 }
0802 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
0803 EIGEN_DEVICE_FUNC \
0804 void *operator new(std::size_t size) { \
0805 return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
0806 } \
0807 EIGEN_DEVICE_FUNC \
0808 void *operator new[](std::size_t size) { \
0809 return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
0810 } \
0811 EIGEN_DEVICE_FUNC \
0812 void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
0813 EIGEN_DEVICE_FUNC \
0814 void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
0815 EIGEN_DEVICE_FUNC \
0816 void operator delete(void * ptr, std::size_t ) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
0817 EIGEN_DEVICE_FUNC \
0818 void operator delete[](void * ptr, std::size_t ) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
0819 \
0820 \
0821 \
0822 EIGEN_DEVICE_FUNC \
0823 static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \
0824 EIGEN_DEVICE_FUNC \
0825 static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \
0826 EIGEN_DEVICE_FUNC \
0827 void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \
0828 EIGEN_DEVICE_FUNC \
0829 void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \
0830 \
0831 EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
0832 EIGEN_DEVICE_FUNC \
0833 void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \
0834 Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
0835 } \
0836 typedef void eigen_aligned_operator_new_marker_type;
0837 #else
0838 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
0839 #endif
0840
0841 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
0842 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
0843 EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool( \
0844 ((Size)!=Eigen::Dynamic) && \
0845 (((EIGEN_MAX_ALIGN_BYTES>=16) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES )==0)) || \
0846 ((EIGEN_MAX_ALIGN_BYTES>=32) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/2)==0)) || \
0847 ((EIGEN_MAX_ALIGN_BYTES>=64) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/4)==0)) )))
0848
0849 #endif
0850
0851
0852
0853
0854
0855
0856
0857
0858
0859
0860
0861
0862
0863
0864
0865
0866
0867
0868
0869
0870
0871
0872
0873
0874
0875
0876
0877 template<class T>
0878 class aligned_allocator : public std::allocator<T>
0879 {
0880 public:
0881 typedef std::size_t size_type;
0882 typedef std::ptrdiff_t difference_type;
0883 typedef T* pointer;
0884 typedef const T* const_pointer;
0885 typedef T& reference;
0886 typedef const T& const_reference;
0887 typedef T value_type;
0888
0889 template<class U>
0890 struct rebind
0891 {
0892 typedef aligned_allocator<U> other;
0893 };
0894
0895 aligned_allocator() : std::allocator<T>() {}
0896
0897 aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {}
0898
0899 template<class U>
0900 aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {}
0901
0902 ~aligned_allocator() {}
0903
0904 #if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_LEAST(7,0)
0905
0906
0907
0908 size_type max_size() const {
0909 return (std::numeric_limits<std::ptrdiff_t>::max)()/sizeof(T);
0910 }
0911 #endif
0912
0913 pointer allocate(size_type num, const void* = 0)
0914 {
0915 internal::check_size_for_overflow<T>(num);
0916 return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );
0917 }
0918
0919 void deallocate(pointer p, size_type )
0920 {
0921 internal::aligned_free(p);
0922 }
0923 };
0924
0925
0926
0927 #if !defined(EIGEN_NO_CPUID)
0928 # if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64
0929 # if defined(__PIC__) && EIGEN_ARCH_i386
0930
0931 # define EIGEN_CPUID(abcd,func,id) \
0932 __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
0933 # elif defined(__PIC__) && EIGEN_ARCH_x86_64
0934
0935
0936 # define EIGEN_CPUID(abcd,func,id) \
0937 __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id));
0938 # else
0939
0940 # define EIGEN_CPUID(abcd,func,id) \
0941 __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
0942 # endif
0943 # elif EIGEN_COMP_MSVC
0944 # if (EIGEN_COMP_MSVC > 1500) && EIGEN_ARCH_i386_OR_x86_64
0945 # define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
0946 # endif
0947 # endif
0948 #endif
0949
0950 namespace internal {
0951
0952 #ifdef EIGEN_CPUID
0953
0954 inline bool cpuid_is_vendor(int abcd[4], const int vendor[3])
0955 {
0956 return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2];
0957 }
0958
0959 inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
0960 {
0961 int abcd[4];
0962 l1 = l2 = l3 = 0;
0963 int cache_id = 0;
0964 int cache_type = 0;
0965 do {
0966 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
0967 EIGEN_CPUID(abcd,0x4,cache_id);
0968 cache_type = (abcd[0] & 0x0F) >> 0;
0969 if(cache_type==1||cache_type==3)
0970 {
0971 int cache_level = (abcd[0] & 0xE0) >> 5;
0972 int ways = (abcd[1] & 0xFFC00000) >> 22;
0973 int partitions = (abcd[1] & 0x003FF000) >> 12;
0974 int line_size = (abcd[1] & 0x00000FFF) >> 0;
0975 int sets = (abcd[2]);
0976
0977 int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
0978
0979 switch(cache_level)
0980 {
0981 case 1: l1 = cache_size; break;
0982 case 2: l2 = cache_size; break;
0983 case 3: l3 = cache_size; break;
0984 default: break;
0985 }
0986 }
0987 cache_id++;
0988 } while(cache_type>0 && cache_id<16);
0989 }
0990
0991 inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)
0992 {
0993 int abcd[4];
0994 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
0995 l1 = l2 = l3 = 0;
0996 EIGEN_CPUID(abcd,0x00000002,0);
0997 unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
0998 bool check_for_p2_core2 = false;
0999 for(int i=0; i<14; ++i)
1000 {
1001 switch(bytes[i])
1002 {
1003 case 0x0A: l1 = 8; break;
1004 case 0x0C: l1 = 16; break;
1005 case 0x0E: l1 = 24; break;
1006 case 0x10: l1 = 16; break;
1007 case 0x15: l1 = 16; break;
1008 case 0x2C: l1 = 32; break;
1009 case 0x30: l1 = 32; break;
1010 case 0x60: l1 = 16; break;
1011 case 0x66: l1 = 8; break;
1012 case 0x67: l1 = 16; break;
1013 case 0x68: l1 = 32; break;
1014 case 0x1A: l2 = 96; break;
1015 case 0x22: l3 = 512; break;
1016 case 0x23: l3 = 1024; break;
1017 case 0x25: l3 = 2048; break;
1018 case 0x29: l3 = 4096; break;
1019 case 0x39: l2 = 128; break;
1020 case 0x3A: l2 = 192; break;
1021 case 0x3B: l2 = 128; break;
1022 case 0x3C: l2 = 256; break;
1023 case 0x3D: l2 = 384; break;
1024 case 0x3E: l2 = 512; break;
1025 case 0x40: l2 = 0; break;
1026 case 0x41: l2 = 128; break;
1027 case 0x42: l2 = 256; break;
1028 case 0x43: l2 = 512; break;
1029 case 0x44: l2 = 1024; break;
1030 case 0x45: l2 = 2048; break;
1031 case 0x46: l3 = 4096; break;
1032 case 0x47: l3 = 8192; break;
1033 case 0x48: l2 = 3072; break;
1034 case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;
1035 case 0x4A: l3 = 6144; break;
1036 case 0x4B: l3 = 8192; break;
1037 case 0x4C: l3 = 12288; break;
1038 case 0x4D: l3 = 16384; break;
1039 case 0x4E: l2 = 6144; break;
1040 case 0x78: l2 = 1024; break;
1041 case 0x79: l2 = 128; break;
1042 case 0x7A: l2 = 256; break;
1043 case 0x7B: l2 = 512; break;
1044 case 0x7C: l2 = 1024; break;
1045 case 0x7D: l2 = 2048; break;
1046 case 0x7E: l2 = 256; break;
1047 case 0x7F: l2 = 512; break;
1048 case 0x80: l2 = 512; break;
1049 case 0x81: l2 = 128; break;
1050 case 0x82: l2 = 256; break;
1051 case 0x83: l2 = 512; break;
1052 case 0x84: l2 = 1024; break;
1053 case 0x85: l2 = 2048; break;
1054 case 0x86: l2 = 512; break;
1055 case 0x87: l2 = 1024; break;
1056 case 0x88: l3 = 2048; break;
1057 case 0x89: l3 = 4096; break;
1058 case 0x8A: l3 = 8192; break;
1059 case 0x8D: l3 = 3072; break;
1060
1061 default: break;
1062 }
1063 }
1064 if(check_for_p2_core2 && l2 == l3)
1065 l3 = 0;
1066 l1 *= 1024;
1067 l2 *= 1024;
1068 l3 *= 1024;
1069 }
1070
1071 inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
1072 {
1073 if(max_std_funcs>=4)
1074 queryCacheSizes_intel_direct(l1,l2,l3);
1075 else if(max_std_funcs>=2)
1076 queryCacheSizes_intel_codes(l1,l2,l3);
1077 else
1078 l1 = l2 = l3 = 0;
1079 }
1080
1081 inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
1082 {
1083 int abcd[4];
1084 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
1085
1086
1087 EIGEN_CPUID(abcd,0x80000000,0);
1088 if(static_cast<numext::uint32_t>(abcd[0]) >= static_cast<numext::uint32_t>(0x80000006))
1089 {
1090 EIGEN_CPUID(abcd,0x80000005,0);
1091 l1 = (abcd[2] >> 24) * 1024;
1092 abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
1093 EIGEN_CPUID(abcd,0x80000006,0);
1094 l2 = (abcd[2] >> 16) * 1024;
1095 l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024;
1096 }
1097 else
1098 {
1099 l1 = l2 = l3 = 0;
1100 }
1101 }
1102 #endif
1103
1104
1105
1106 inline void queryCacheSizes(int& l1, int& l2, int& l3)
1107 {
1108 #ifdef EIGEN_CPUID
1109 int abcd[4];
1110 const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e};
1111 const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163};
1112 const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574};
1113
1114
1115 EIGEN_CPUID(abcd,0x0,0);
1116 int max_std_funcs = abcd[0];
1117 if(cpuid_is_vendor(abcd,GenuineIntel))
1118 queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
1119 else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))
1120 queryCacheSizes_amd(l1,l2,l3);
1121 else
1122
1123 queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136 #else
1137 l1 = l2 = l3 = -1;
1138 #endif
1139 }
1140
1141
1142
1143 inline int queryL1CacheSize()
1144 {
1145 int l1(-1), l2, l3;
1146 queryCacheSizes(l1,l2,l3);
1147 return l1;
1148 }
1149
1150
1151
1152 inline int queryTopLevelCacheSize()
1153 {
1154 int l1, l2(-1), l3(-1);
1155 queryCacheSizes(l1,l2,l3);
1156 return (std::max)(l2,l3);
1157 }
1158
1159 }
1160
1161 }
1162
1163 #endif