Warning, file /include/eigen3/Eigen/src/Core/util/ConfigureVectorization.h was not indexed
or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #ifndef EIGEN_CONFIGURE_VECTORIZATION_H
0012 #define EIGEN_CONFIGURE_VECTORIZATION_H
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036 #if (defined EIGEN_CUDACC)
0037 #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
0038 #define EIGEN_ALIGNOF(x) __alignof(x)
0039 #elif EIGEN_HAS_ALIGNAS
0040 #define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n)
0041 #define EIGEN_ALIGNOF(x) alignof(x)
0042 #elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
0043 #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
0044 #define EIGEN_ALIGNOF(x) __alignof(x)
0045 #elif EIGEN_COMP_MSVC
0046 #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
0047 #define EIGEN_ALIGNOF(x) __alignof(x)
0048 #elif EIGEN_COMP_SUNCC
0049
0050 #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
0051 #define EIGEN_ALIGNOF(x) __alignof(x)
0052 #else
0053 #error Please tell me what is the equivalent of alignas(n) and alignof(x) for your compiler
0054 #endif
0055
0056
0057 #if defined(EIGEN_DONT_VECTORIZE)
0058 #if defined(EIGEN_GPUCC)
0059
0060
0061 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
0062 #else
0063 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
0064 #endif
0065 #elif defined(__AVX512F__)
0066
0067 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
0068 #elif defined(__AVX__)
0069
0070 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
0071 #else
0072 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
0073 #endif
0074
0075
0076
0077 #define EIGEN_MIN_ALIGN_BYTES 16
0078
0079
0080
0081
0082
0083 #if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
0084 #error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
0085 #endif
0086
0087
0088
0089 #if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
0090 #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
0091 #undef EIGEN_MAX_STATIC_ALIGN_BYTES
0092 #endif
0093 #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
0094 #endif
0095
0096 #ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106 #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || EIGEN_ARCH_MIPS)
0107 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
0108 #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6)
0109
0110
0111
0112 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
0113 #else
0114 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
0115 #endif
0116
0117
0118 #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
0119 && !EIGEN_GCC3_OR_OLDER \
0120 && !EIGEN_COMP_SUNCC \
0121 && !EIGEN_OS_QNX
0122 #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
0123 #else
0124 #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
0125 #endif
0126
0127 #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
0128 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
0129 #else
0130 #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
0131 #endif
0132
0133 #endif
0134
0135
0136 #if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
0137 #undef EIGEN_MAX_STATIC_ALIGN_BYTES
0138 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
0139 #endif
0140
0141 #if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
0142 #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
0143 #endif
0144
0145
0146
0147
0148
0149
0150
0151
0152 #define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
0153 #define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
0154 #define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
0155 #define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
0156 #if EIGEN_MAX_STATIC_ALIGN_BYTES>0
0157 #define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
0158 #else
0159 #define EIGEN_ALIGN_MAX
0160 #endif
0161
0162
0163
0164
0165 #if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
0166 #error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
0167 #endif
0168
0169 #ifdef EIGEN_DONT_ALIGN
0170 #ifdef EIGEN_MAX_ALIGN_BYTES
0171 #undef EIGEN_MAX_ALIGN_BYTES
0172 #endif
0173 #define EIGEN_MAX_ALIGN_BYTES 0
0174 #elif !defined(EIGEN_MAX_ALIGN_BYTES)
0175 #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
0176 #endif
0177
0178 #if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
0179 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
0180 #else
0181 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
0182 #endif
0183
0184
0185 #ifndef EIGEN_UNALIGNED_VECTORIZE
0186 #define EIGEN_UNALIGNED_VECTORIZE 1
0187 #endif
0188
0189
0190
0191
0192
0193 #if EIGEN_MAX_ALIGN_BYTES==0
0194 #ifndef EIGEN_DONT_VECTORIZE
0195 #define EIGEN_DONT_VECTORIZE
0196 #endif
0197 #endif
0198
0199
0200
0201
0202 #if EIGEN_COMP_MSVC
0203 #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
0204 #if (EIGEN_COMP_MSVC >= 1500)
0205
0206 #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
0207 #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
0208 #endif
0209 #endif
0210 #else
0211 #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )
0212 #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
0213 #endif
0214 #endif
0215
0216 #if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC))
0217
0218 #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
0219
0220
0221
0222
0223 #define EIGEN_VECTORIZE
0224 #define EIGEN_VECTORIZE_SSE
0225 #define EIGEN_VECTORIZE_SSE2
0226
0227
0228
0229
0230
0231 #ifdef __SSE3__
0232 #define EIGEN_VECTORIZE_SSE3
0233 #endif
0234 #ifdef __SSSE3__
0235 #define EIGEN_VECTORIZE_SSSE3
0236 #endif
0237 #ifdef __SSE4_1__
0238 #define EIGEN_VECTORIZE_SSE4_1
0239 #endif
0240 #ifdef __SSE4_2__
0241 #define EIGEN_VECTORIZE_SSE4_2
0242 #endif
0243 #ifdef __AVX__
0244 #ifndef EIGEN_USE_SYCL
0245 #define EIGEN_VECTORIZE_AVX
0246 #endif
0247 #define EIGEN_VECTORIZE_SSE3
0248 #define EIGEN_VECTORIZE_SSSE3
0249 #define EIGEN_VECTORIZE_SSE4_1
0250 #define EIGEN_VECTORIZE_SSE4_2
0251 #endif
0252 #ifdef __AVX2__
0253 #ifndef EIGEN_USE_SYCL
0254 #define EIGEN_VECTORIZE_AVX2
0255 #define EIGEN_VECTORIZE_AVX
0256 #endif
0257 #define EIGEN_VECTORIZE_SSE3
0258 #define EIGEN_VECTORIZE_SSSE3
0259 #define EIGEN_VECTORIZE_SSE4_1
0260 #define EIGEN_VECTORIZE_SSE4_2
0261 #endif
0262 #if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))
0263
0264
0265 #define EIGEN_VECTORIZE_FMA
0266 #endif
0267 #if defined(__AVX512F__)
0268 #ifndef EIGEN_VECTORIZE_FMA
0269 #if EIGEN_COMP_GNUC
0270 #error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
0271 #else
0272 #error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
0273 #endif
0274 #endif
0275 #ifndef EIGEN_USE_SYCL
0276 #define EIGEN_VECTORIZE_AVX512
0277 #define EIGEN_VECTORIZE_AVX2
0278 #define EIGEN_VECTORIZE_AVX
0279 #endif
0280 #define EIGEN_VECTORIZE_FMA
0281 #define EIGEN_VECTORIZE_SSE3
0282 #define EIGEN_VECTORIZE_SSSE3
0283 #define EIGEN_VECTORIZE_SSE4_1
0284 #define EIGEN_VECTORIZE_SSE4_2
0285 #ifndef EIGEN_USE_SYCL
0286 #ifdef __AVX512DQ__
0287 #define EIGEN_VECTORIZE_AVX512DQ
0288 #endif
0289 #ifdef __AVX512ER__
0290 #define EIGEN_VECTORIZE_AVX512ER
0291 #endif
0292 #ifdef __AVX512BF16__
0293 #define EIGEN_VECTORIZE_AVX512BF16
0294 #endif
0295 #endif
0296 #endif
0297
0298
0299 #if defined(__apple_build_version__) && (__apple_build_version__ == 11000033 ) && ( __MAC_OS_X_VERSION_MIN_REQUIRED == 101500 )
0300
0301
0302 #ifdef EIGEN_VECTORIZE_AVX
0303 #undef EIGEN_VECTORIZE_AVX
0304 #warning "Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. "
0305 #ifdef EIGEN_VECTORIZE_AVX2
0306 #undef EIGEN_VECTORIZE_AVX2
0307 #endif
0308 #ifdef EIGEN_VECTORIZE_FMA
0309 #undef EIGEN_VECTORIZE_FMA
0310 #endif
0311 #ifdef EIGEN_VECTORIZE_AVX512
0312 #undef EIGEN_VECTORIZE_AVX512
0313 #endif
0314 #ifdef EIGEN_VECTORIZE_AVX512DQ
0315 #undef EIGEN_VECTORIZE_AVX512DQ
0316 #endif
0317 #ifdef EIGEN_VECTORIZE_AVX512ER
0318 #undef EIGEN_VECTORIZE_AVX512ER
0319 #endif
0320 #endif
0321
0322
0323
0324
0325
0326
0327
0328 #endif
0329
0330
0331
0332
0333
0334
0335
0336
0337
0338
0339 extern "C" {
0340
0341
0342 #if EIGEN_COMP_ICC >= 1110
0343 #include <immintrin.h>
0344 #else
0345 #include <mmintrin.h>
0346 #include <emmintrin.h>
0347 #include <xmmintrin.h>
0348 #ifdef EIGEN_VECTORIZE_SSE3
0349 #include <pmmintrin.h>
0350 #endif
0351 #ifdef EIGEN_VECTORIZE_SSSE3
0352 #include <tmmintrin.h>
0353 #endif
0354 #ifdef EIGEN_VECTORIZE_SSE4_1
0355 #include <smmintrin.h>
0356 #endif
0357 #ifdef EIGEN_VECTORIZE_SSE4_2
0358 #include <nmmintrin.h>
0359 #endif
0360 #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
0361 #include <immintrin.h>
0362 #endif
0363 #endif
0364 }
0365
0366 #elif defined __VSX__
0367
0368 #define EIGEN_VECTORIZE
0369 #define EIGEN_VECTORIZE_VSX
0370 #include <altivec.h>
0371
0372
0373 #undef bool
0374 #undef vector
0375 #undef pixel
0376
0377 #elif defined __ALTIVEC__
0378
0379 #define EIGEN_VECTORIZE
0380 #define EIGEN_VECTORIZE_ALTIVEC
0381 #include <altivec.h>
0382
0383
0384 #undef bool
0385 #undef vector
0386 #undef pixel
0387
0388 #elif ((defined __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE)
0389
0390 #define EIGEN_VECTORIZE
0391 #define EIGEN_VECTORIZE_NEON
0392 #include <arm_neon.h>
0393
0394
0395
0396 #elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE)
0397
0398 #define EIGEN_VECTORIZE
0399 #define EIGEN_VECTORIZE_SVE
0400 #include <arm_sve.h>
0401
0402
0403
0404 #if defined __ARM_FEATURE_SVE_BITS
0405 #define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS
0406 #else
0407 #error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set."
0408 #endif
0409
0410 #elif (defined __s390x__ && defined __VEC__)
0411
0412 #define EIGEN_VECTORIZE
0413 #define EIGEN_VECTORIZE_ZVECTOR
0414 #include <vecintrin.h>
0415
0416 #elif defined __mips_msa
0417
0418
0419
0420 #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
0421 #if defined(__LP64__)
0422 #define EIGEN_MIPS_64
0423 #else
0424 #define EIGEN_MIPS_32
0425 #endif
0426 #define EIGEN_VECTORIZE
0427 #define EIGEN_VECTORIZE_MSA
0428 #include <msa.h>
0429 #endif
0430
0431 #endif
0432 #endif
0433
0434
0435
0436
0437 #if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
0438 #include <arm_fp16.h>
0439 #endif
0440
0441 #if defined(__F16C__) && (!defined(EIGEN_GPUCC) && (!defined(EIGEN_COMP_CLANG) || EIGEN_COMP_CLANG>=380))
0442
0443 #define EIGEN_HAS_FP16_C
0444
0445 #if defined(EIGEN_COMP_CLANG)
0446
0447
0448
0449 #include <immintrin.h>
0450 #endif
0451 #endif
0452
0453 #if defined EIGEN_CUDACC
0454 #define EIGEN_VECTORIZE_GPU
0455 #include <vector_types.h>
0456 #if EIGEN_CUDA_SDK_VER >= 70500
0457 #define EIGEN_HAS_CUDA_FP16
0458 #endif
0459 #endif
0460
0461 #if defined(EIGEN_HAS_CUDA_FP16)
0462 #include <cuda_runtime_api.h>
0463 #include <cuda_fp16.h>
0464 #endif
0465
0466 #if defined(EIGEN_HIPCC)
0467 #define EIGEN_VECTORIZE_GPU
0468 #include <hip/hip_vector_types.h>
0469 #define EIGEN_HAS_HIP_FP16
0470 #include <hip/hip_fp16.h>
0471 #endif
0472
0473
0474
0475 namespace Eigen {
0476
0477 inline static const char *SimdInstructionSetsInUse(void) {
0478 #if defined(EIGEN_VECTORIZE_AVX512)
0479 return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
0480 #elif defined(EIGEN_VECTORIZE_AVX)
0481 return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
0482 #elif defined(EIGEN_VECTORIZE_SSE4_2)
0483 return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
0484 #elif defined(EIGEN_VECTORIZE_SSE4_1)
0485 return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
0486 #elif defined(EIGEN_VECTORIZE_SSSE3)
0487 return "SSE, SSE2, SSE3, SSSE3";
0488 #elif defined(EIGEN_VECTORIZE_SSE3)
0489 return "SSE, SSE2, SSE3";
0490 #elif defined(EIGEN_VECTORIZE_SSE2)
0491 return "SSE, SSE2";
0492 #elif defined(EIGEN_VECTORIZE_ALTIVEC)
0493 return "AltiVec";
0494 #elif defined(EIGEN_VECTORIZE_VSX)
0495 return "VSX";
0496 #elif defined(EIGEN_VECTORIZE_NEON)
0497 return "ARM NEON";
0498 #elif defined(EIGEN_VECTORIZE_SVE)
0499 return "ARM SVE";
0500 #elif defined(EIGEN_VECTORIZE_ZVECTOR)
0501 return "S390X ZVECTOR";
0502 #elif defined(EIGEN_VECTORIZE_MSA)
0503 return "MIPS MSA";
0504 #else
0505 return "None";
0506 #endif
0507 }
0508
0509 }
0510
0511
0512 #endif