File indexing completed on 2025-10-31 09:00:33
0001 
0002 
0003 
0004 
0005 
0006 
0007 
0008 
0009 
0010 
0011 #ifndef EIGEN_CONFIGURE_VECTORIZATION_H
0012 #define EIGEN_CONFIGURE_VECTORIZATION_H
0013 
0014 
0015 
0016 
0017 
0018 
0019 
0020 
0021 
0022 
0023 
0024 
0025 
0026 
0027 
0028 
0029 
0030 
0031 
0032 
0033 
0034 
0035 
0036 #if (defined EIGEN_CUDACC)
0037   #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
0038   #define EIGEN_ALIGNOF(x) __alignof(x)
0039 #elif EIGEN_HAS_ALIGNAS
0040   #define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n)
0041   #define EIGEN_ALIGNOF(x) alignof(x)
0042 #elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
0043   #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
0044   #define EIGEN_ALIGNOF(x) __alignof(x)
0045 #elif EIGEN_COMP_MSVC
0046   #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
0047   #define EIGEN_ALIGNOF(x) __alignof(x)
0048 #elif EIGEN_COMP_SUNCC
0049   
0050   #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
0051   #define EIGEN_ALIGNOF(x) __alignof(x)
0052 #else
0053   #error Please tell me what is the equivalent of alignas(n) and alignof(x) for your compiler
0054 #endif
0055 
0056 
0057 #if defined(EIGEN_DONT_VECTORIZE)
0058   #if defined(EIGEN_GPUCC)
0059     
0060     
0061     #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
0062   #else
0063     #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
0064   #endif
0065 #elif defined(__AVX512F__)
0066   
0067   #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
0068 #elif defined(__AVX__)
0069   
0070   #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
0071 #else
0072   #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
0073 #endif
0074 
0075 
0076 
0077 #define EIGEN_MIN_ALIGN_BYTES 16
0078 
0079 
0080 
0081 
0082 
0083 #if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN))  && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
0084 #error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
0085 #endif
0086 
0087 
0088 
0089 #if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
0090   #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
0091     #undef EIGEN_MAX_STATIC_ALIGN_BYTES
0092   #endif
0093   #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
0094 #endif
0095 
0096 #ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
0097 
0098   
0099 
0100   
0101   
0102   
0103   
0104   
0105   
0106   #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || EIGEN_ARCH_MIPS)
0107   #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
0108   #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6)
0109   
0110   
0111   
0112   #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
0113   #else
0114   #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
0115   #endif
0116 
0117   
0118   #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
0119   && !EIGEN_GCC3_OR_OLDER \
0120   && !EIGEN_COMP_SUNCC \
0121   && !EIGEN_OS_QNX
0122     #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
0123   #else
0124     #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
0125   #endif
0126 
0127   #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
0128     #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
0129   #else
0130     #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
0131   #endif
0132 
0133 #endif
0134 
0135 
0136 #if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
0137 #undef EIGEN_MAX_STATIC_ALIGN_BYTES
0138 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
0139 #endif
0140 
0141 #if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
0142   #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
0143 #endif
0144 
0145 
0146 
0147 
0148 
0149 
0150 
0151 
0152 #define EIGEN_ALIGN8  EIGEN_ALIGN_TO_BOUNDARY(8)
0153 #define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
0154 #define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
0155 #define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
0156 #if EIGEN_MAX_STATIC_ALIGN_BYTES>0
0157 #define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
0158 #else
0159 #define EIGEN_ALIGN_MAX
0160 #endif
0161 
0162 
0163 
0164 
0165 #if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
0166 #error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
0167 #endif
0168 
0169 #ifdef EIGEN_DONT_ALIGN
0170   #ifdef EIGEN_MAX_ALIGN_BYTES
0171     #undef EIGEN_MAX_ALIGN_BYTES
0172   #endif
0173   #define EIGEN_MAX_ALIGN_BYTES 0
0174 #elif !defined(EIGEN_MAX_ALIGN_BYTES)
0175   #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
0176 #endif
0177 
0178 #if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
0179 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
0180 #else
0181 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
0182 #endif
0183 
0184 
0185 #ifndef EIGEN_UNALIGNED_VECTORIZE
0186 #define EIGEN_UNALIGNED_VECTORIZE 1
0187 #endif
0188 
0189 
0190 
0191 
0192 
0193 #if EIGEN_MAX_ALIGN_BYTES==0
0194   #ifndef EIGEN_DONT_VECTORIZE
0195     #define EIGEN_DONT_VECTORIZE
0196   #endif
0197 #endif
0198 
0199 
0200 
0201 
0202 #if EIGEN_COMP_MSVC
0203   #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
0204   #if (EIGEN_COMP_MSVC >= 1500) 
0205     
0206     #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
0207       #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
0208     #endif
0209   #endif
0210 #else
0211   #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )
0212     #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
0213   #endif
0214 #endif
0215 
0216 #if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC))
0217 
0218   #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
0219 
0220     
0221     
0222     
0223     #define EIGEN_VECTORIZE
0224     #define EIGEN_VECTORIZE_SSE
0225     #define EIGEN_VECTORIZE_SSE2
0226 
0227     
0228     
0229     
0230     
0231     #ifdef __SSE3__
0232       #define EIGEN_VECTORIZE_SSE3
0233     #endif
0234     #ifdef __SSSE3__
0235       #define EIGEN_VECTORIZE_SSSE3
0236     #endif
0237     #ifdef __SSE4_1__
0238       #define EIGEN_VECTORIZE_SSE4_1
0239     #endif
0240     #ifdef __SSE4_2__
0241       #define EIGEN_VECTORIZE_SSE4_2
0242     #endif
0243     #ifdef __AVX__
0244       #ifndef EIGEN_USE_SYCL 
0245         #define EIGEN_VECTORIZE_AVX
0246       #endif
0247       #define EIGEN_VECTORIZE_SSE3
0248       #define EIGEN_VECTORIZE_SSSE3
0249       #define EIGEN_VECTORIZE_SSE4_1
0250       #define EIGEN_VECTORIZE_SSE4_2
0251     #endif
0252     #ifdef __AVX2__
0253       #ifndef EIGEN_USE_SYCL 
0254         #define EIGEN_VECTORIZE_AVX2
0255         #define EIGEN_VECTORIZE_AVX
0256       #endif
0257       #define EIGEN_VECTORIZE_SSE3
0258       #define EIGEN_VECTORIZE_SSSE3
0259       #define EIGEN_VECTORIZE_SSE4_1
0260       #define EIGEN_VECTORIZE_SSE4_2
0261     #endif
0262     #if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))
0263       
0264       
0265       #define EIGEN_VECTORIZE_FMA
0266     #endif
0267     #if defined(__AVX512F__)
0268       #ifndef EIGEN_VECTORIZE_FMA
0269       #if EIGEN_COMP_GNUC
0270       #error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
0271       #else
0272       #error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
0273       #endif
0274       #endif
0275       #ifndef EIGEN_USE_SYCL
0276         #define EIGEN_VECTORIZE_AVX512
0277         #define EIGEN_VECTORIZE_AVX2
0278         #define EIGEN_VECTORIZE_AVX
0279       #endif
0280       #define EIGEN_VECTORIZE_FMA
0281       #define EIGEN_VECTORIZE_SSE3
0282       #define EIGEN_VECTORIZE_SSSE3
0283       #define EIGEN_VECTORIZE_SSE4_1
0284       #define EIGEN_VECTORIZE_SSE4_2
0285       #ifndef EIGEN_USE_SYCL
0286         #ifdef __AVX512DQ__
0287           #define EIGEN_VECTORIZE_AVX512DQ
0288         #endif
0289         #ifdef __AVX512ER__
0290           #define EIGEN_VECTORIZE_AVX512ER
0291         #endif
0292         #ifdef __AVX512BF16__
0293           #define EIGEN_VECTORIZE_AVX512BF16
0294         #endif
0295       #endif
0296     #endif
0297 
0298     
0299     #if defined(__apple_build_version__) && (__apple_build_version__ == 11000033 ) && ( __MAC_OS_X_VERSION_MIN_REQUIRED == 101500 )
0300       
0301       
0302       #ifdef EIGEN_VECTORIZE_AVX
0303         #undef EIGEN_VECTORIZE_AVX
0304         #warning "Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. "
0305         #ifdef EIGEN_VECTORIZE_AVX2
0306           #undef EIGEN_VECTORIZE_AVX2
0307         #endif
0308         #ifdef EIGEN_VECTORIZE_FMA
0309           #undef EIGEN_VECTORIZE_FMA
0310         #endif
0311         #ifdef EIGEN_VECTORIZE_AVX512
0312           #undef EIGEN_VECTORIZE_AVX512
0313         #endif
0314         #ifdef EIGEN_VECTORIZE_AVX512DQ
0315           #undef EIGEN_VECTORIZE_AVX512DQ
0316         #endif
0317         #ifdef EIGEN_VECTORIZE_AVX512ER
0318           #undef EIGEN_VECTORIZE_AVX512ER
0319         #endif
0320       #endif
0321       
0322       
0323       
0324       
0325       
0326       
0327       
0328     #endif
0329 
0330     
0331 
0332     
0333     
0334     
0335     
0336     
0337     
0338     
0339     extern "C" {
0340       
0341       
0342       #if EIGEN_COMP_ICC >= 1110
0343         #include <immintrin.h>
0344       #else
0345         #include <mmintrin.h>
0346         #include <emmintrin.h>
0347         #include <xmmintrin.h>
0348         #ifdef  EIGEN_VECTORIZE_SSE3
0349         #include <pmmintrin.h>
0350         #endif
0351         #ifdef EIGEN_VECTORIZE_SSSE3
0352         #include <tmmintrin.h>
0353         #endif
0354         #ifdef EIGEN_VECTORIZE_SSE4_1
0355         #include <smmintrin.h>
0356         #endif
0357         #ifdef EIGEN_VECTORIZE_SSE4_2
0358         #include <nmmintrin.h>
0359         #endif
0360         #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
0361         #include <immintrin.h>
0362         #endif
0363       #endif
0364     } 
0365 
0366   #elif defined __VSX__
0367 
0368     #define EIGEN_VECTORIZE
0369     #define EIGEN_VECTORIZE_VSX
0370     #include <altivec.h>
0371     
0372     
0373     #undef bool
0374     #undef vector
0375     #undef pixel
0376 
0377   #elif defined __ALTIVEC__
0378 
0379     #define EIGEN_VECTORIZE
0380     #define EIGEN_VECTORIZE_ALTIVEC
0381     #include <altivec.h>
0382     
0383     
0384     #undef bool
0385     #undef vector
0386     #undef pixel
0387 
0388   #elif ((defined  __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE)
0389 
0390     #define EIGEN_VECTORIZE
0391     #define EIGEN_VECTORIZE_NEON
0392     #include <arm_neon.h>
0393 
0394   
0395   
0396   #elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE)
0397 
0398     #define EIGEN_VECTORIZE
0399     #define EIGEN_VECTORIZE_SVE
0400     #include <arm_sve.h>
0401 
0402     
0403     
0404     #if defined __ARM_FEATURE_SVE_BITS
0405       #define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS
0406     #else
0407 #error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set."
0408 #endif
0409 
0410 #elif (defined __s390x__ && defined __VEC__)
0411 
0412 #define EIGEN_VECTORIZE
0413 #define EIGEN_VECTORIZE_ZVECTOR
0414 #include <vecintrin.h>
0415 
0416 #elif defined __mips_msa
0417 
0418 
0419 
0420 #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
0421 #if defined(__LP64__)
0422 #define EIGEN_MIPS_64
0423 #else
0424 #define EIGEN_MIPS_32
0425 #endif
0426 #define EIGEN_VECTORIZE
0427 #define EIGEN_VECTORIZE_MSA
0428 #include <msa.h>
0429 #endif
0430 
0431 #endif
0432 #endif
0433 
0434 
0435 
0436 
0437 #if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
0438   #include <arm_fp16.h>
0439 #endif
0440 
0441 #if defined(__F16C__) && (!defined(EIGEN_GPUCC) && (!defined(EIGEN_COMP_CLANG) || EIGEN_COMP_CLANG>=380))
0442   
0443   #define EIGEN_HAS_FP16_C
0444 
0445   #if defined(EIGEN_COMP_CLANG)
0446     
0447     
0448     
0449     #include <immintrin.h>
0450   #endif
0451 #endif
0452 
0453 #if defined EIGEN_CUDACC
0454   #define EIGEN_VECTORIZE_GPU
0455   #include <vector_types.h>
0456   #if EIGEN_CUDA_SDK_VER >= 70500
0457     #define EIGEN_HAS_CUDA_FP16
0458   #endif
0459 #endif
0460 
0461 #if defined(EIGEN_HAS_CUDA_FP16)
0462   #include <cuda_runtime_api.h>
0463   #include <cuda_fp16.h>
0464 #endif
0465 
0466 #if defined(EIGEN_HIPCC)
0467   #define EIGEN_VECTORIZE_GPU
0468   #include <hip/hip_vector_types.h>
0469   #define EIGEN_HAS_HIP_FP16
0470   #include <hip/hip_fp16.h>
0471 #endif
0472 
0473 
0474 
0475 namespace Eigen {
0476 
0477 inline static const char *SimdInstructionSetsInUse(void) {
0478 #if defined(EIGEN_VECTORIZE_AVX512)
0479   return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
0480 #elif defined(EIGEN_VECTORIZE_AVX)
0481   return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
0482 #elif defined(EIGEN_VECTORIZE_SSE4_2)
0483   return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
0484 #elif defined(EIGEN_VECTORIZE_SSE4_1)
0485   return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
0486 #elif defined(EIGEN_VECTORIZE_SSSE3)
0487   return "SSE, SSE2, SSE3, SSSE3";
0488 #elif defined(EIGEN_VECTORIZE_SSE3)
0489   return "SSE, SSE2, SSE3";
0490 #elif defined(EIGEN_VECTORIZE_SSE2)
0491   return "SSE, SSE2";
0492 #elif defined(EIGEN_VECTORIZE_ALTIVEC)
0493   return "AltiVec";
0494 #elif defined(EIGEN_VECTORIZE_VSX)
0495   return "VSX";
0496 #elif defined(EIGEN_VECTORIZE_NEON)
0497   return "ARM NEON";
0498 #elif defined(EIGEN_VECTORIZE_SVE)
0499   return "ARM SVE";
0500 #elif defined(EIGEN_VECTORIZE_ZVECTOR)
0501   return "S390X ZVECTOR";
0502 #elif defined(EIGEN_VECTORIZE_MSA)
0503   return "MIPS MSA";
0504 #else
0505   return "None";
0506 #endif
0507 }
0508 
0509 } 
0510 
0511 
0512 #endif