Back to home page

EIC code displayed by LXR

 
 

    


Warning, file /include/eigen3/Eigen/src/Core/util/ConfigureVectorization.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 // This file is part of Eigen, a lightweight C++ template library
0002 // for linear algebra.
0003 //
0004 // Copyright (C) 2008-2018 Gael Guennebaud <gael.guennebaud@inria.fr>
0005 // Copyright (C) 2020, Arm Limited and Contributors
0006 //
0007 // This Source Code Form is subject to the terms of the Mozilla
0008 // Public License v. 2.0. If a copy of the MPL was not distributed
0009 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
0010 
0011 #ifndef EIGEN_CONFIGURE_VECTORIZATION_H
0012 #define EIGEN_CONFIGURE_VECTORIZATION_H
0013 
0014 //------------------------------------------------------------------------------------------
0015 // Static and dynamic alignment control
0016 //
0017 // The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES
0018 // as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively.
0019 // The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not,
0020 // a default value is automatically computed based on architecture, compiler, and OS.
0021 //
0022 // This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX}
0023 // to be used to declare statically aligned buffers.
0024 //------------------------------------------------------------------------------------------
0025 
0026 
0027 /* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
0028  * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
0029  * so that vectorization doesn't affect binary compatibility.
0030  *
0031  * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
0032  * vectorized and non-vectorized code.
0033  * 
0034  * FIXME: this code can be cleaned up once we switch to proper C++11 only.
0035  */
0036 #if (defined EIGEN_CUDACC)
0037   #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
0038   #define EIGEN_ALIGNOF(x) __alignof(x)
0039 #elif EIGEN_HAS_ALIGNAS
0040   #define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n)
0041   #define EIGEN_ALIGNOF(x) alignof(x)
0042 #elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM
0043   #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
0044   #define EIGEN_ALIGNOF(x) __alignof(x)
0045 #elif EIGEN_COMP_MSVC
0046   #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
0047   #define EIGEN_ALIGNOF(x) __alignof(x)
0048 #elif EIGEN_COMP_SUNCC
0049   // FIXME not sure about this one:
0050   #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
0051   #define EIGEN_ALIGNOF(x) __alignof(x)
0052 #else
0053   #error Please tell me what is the equivalent of alignas(n) and alignof(x) for your compiler
0054 #endif
0055 
0056 // If the user explicitly disable vectorization, then we also disable alignment
0057 #if defined(EIGEN_DONT_VECTORIZE)
0058   #if defined(EIGEN_GPUCC)
0059     // GPU code is always vectorized and requires memory alignment for
0060     // statically allocated buffers.
0061     #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
0062   #else
0063     #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
0064   #endif
0065 #elif defined(__AVX512F__)
0066   // 64 bytes static alignment is preferred only if really required
0067   #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
0068 #elif defined(__AVX__)
0069   // 32 bytes static alignment is preferred only if really required
0070   #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
0071 #else
0072   #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
0073 #endif
0074 
0075 
0076 // EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense
0077 #define EIGEN_MIN_ALIGN_BYTES 16
0078 
0079 // Defined the boundary (in bytes) on which the data needs to be aligned. Note
0080 // that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
0081 // aligned at all regardless of the value of this #define.
0082 
0083 #if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN))  && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
0084 #error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
0085 #endif
0086 
0087 // EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprecated
0088 // They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0
0089 #if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
0090   #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
0091     #undef EIGEN_MAX_STATIC_ALIGN_BYTES
0092   #endif
0093   #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
0094 #endif
0095 
0096 #ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
0097 
0098   // Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES
0099 
0100   // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
0101   // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
0102   // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
0103   // certain common platform (compiler+architecture combinations) to avoid these problems.
0104   // Only static alignment is really problematic (relies on nonstandard compiler extensions),
0105   // try to keep heap alignment even when we have to disable static alignment.
0106   #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || EIGEN_ARCH_MIPS)
0107   #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
0108   #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6)
0109   // Old versions of GCC on ARM, at least 4.4, were once seen to have buggy static alignment support.
0110   // Not sure which version fixed it, hopefully it doesn't affect 4.7, which is still somewhat in use.
0111   // 4.8 and newer seem definitely unaffected.
0112   #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
0113   #else
0114   #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
0115   #endif
0116 
0117   // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
0118   #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
0119   && !EIGEN_GCC3_OR_OLDER \
0120   && !EIGEN_COMP_SUNCC \
0121   && !EIGEN_OS_QNX
0122     #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
0123   #else
0124     #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
0125   #endif
0126 
0127   #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
0128     #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
0129   #else
0130     #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
0131   #endif
0132 
0133 #endif
0134 
0135 // If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_STATIC_ALIGN_BYTES
0136 #if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
0137 #undef EIGEN_MAX_STATIC_ALIGN_BYTES
0138 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
0139 #endif
0140 
0141 #if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
0142   #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
0143 #endif
0144 
0145 // At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not.
0146 // It takes into account both the user choice to explicitly enable/disable alignment (by setting EIGEN_MAX_STATIC_ALIGN_BYTES)
0147 // and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT).
0148 // Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used.
0149 
0150 
0151 // Shortcuts to EIGEN_ALIGN_TO_BOUNDARY
0152 #define EIGEN_ALIGN8  EIGEN_ALIGN_TO_BOUNDARY(8)
0153 #define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
0154 #define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
0155 #define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
0156 #if EIGEN_MAX_STATIC_ALIGN_BYTES>0
0157 #define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
0158 #else
0159 #define EIGEN_ALIGN_MAX
0160 #endif
0161 
0162 
0163 // Dynamic alignment control
0164 
0165 #if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
0166 #error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
0167 #endif
0168 
0169 #ifdef EIGEN_DONT_ALIGN
0170   #ifdef EIGEN_MAX_ALIGN_BYTES
0171     #undef EIGEN_MAX_ALIGN_BYTES
0172   #endif
0173   #define EIGEN_MAX_ALIGN_BYTES 0
0174 #elif !defined(EIGEN_MAX_ALIGN_BYTES)
0175   #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
0176 #endif
0177 
0178 #if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
0179 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
0180 #else
0181 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
0182 #endif
0183 
0184 
0185 #ifndef EIGEN_UNALIGNED_VECTORIZE
0186 #define EIGEN_UNALIGNED_VECTORIZE 1
0187 #endif
0188 
0189 //----------------------------------------------------------------------
0190 
0191 // if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
0192 // account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
0193 #if EIGEN_MAX_ALIGN_BYTES==0
0194   #ifndef EIGEN_DONT_VECTORIZE
0195     #define EIGEN_DONT_VECTORIZE
0196   #endif
0197 #endif
0198 
0199 
0200 // The following (except #include <malloc.h> and _M_IX86_FP ??) can likely be
0201 // removed as gcc 4.1 and msvc 2008 are not supported anyways.
0202 #if EIGEN_COMP_MSVC
0203   #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
0204   #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later
0205     // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
0206     #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
0207       #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
0208     #endif
0209   #endif
0210 #else
0211   #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) )
0212     #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
0213   #endif
0214 #endif
0215 
0216 #if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC))
0217 
0218   #if defined (EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
0219 
0220     // Defines symbols for compile-time detection of which instructions are
0221     // used.
0222     // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used
0223     #define EIGEN_VECTORIZE
0224     #define EIGEN_VECTORIZE_SSE
0225     #define EIGEN_VECTORIZE_SSE2
0226 
0227     // Detect sse3/ssse3/sse4:
0228     // gcc and icc defines __SSE3__, ...
0229     // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you
0230     // want to force the use of those instructions with msvc.
0231     #ifdef __SSE3__
0232       #define EIGEN_VECTORIZE_SSE3
0233     #endif
0234     #ifdef __SSSE3__
0235       #define EIGEN_VECTORIZE_SSSE3
0236     #endif
0237     #ifdef __SSE4_1__
0238       #define EIGEN_VECTORIZE_SSE4_1
0239     #endif
0240     #ifdef __SSE4_2__
0241       #define EIGEN_VECTORIZE_SSE4_2
0242     #endif
0243     #ifdef __AVX__
0244       #ifndef EIGEN_USE_SYCL 
0245         #define EIGEN_VECTORIZE_AVX
0246       #endif
0247       #define EIGEN_VECTORIZE_SSE3
0248       #define EIGEN_VECTORIZE_SSSE3
0249       #define EIGEN_VECTORIZE_SSE4_1
0250       #define EIGEN_VECTORIZE_SSE4_2
0251     #endif
0252     #ifdef __AVX2__
0253       #ifndef EIGEN_USE_SYCL 
0254         #define EIGEN_VECTORIZE_AVX2
0255         #define EIGEN_VECTORIZE_AVX
0256       #endif
0257       #define EIGEN_VECTORIZE_SSE3
0258       #define EIGEN_VECTORIZE_SSSE3
0259       #define EIGEN_VECTORIZE_SSE4_1
0260       #define EIGEN_VECTORIZE_SSE4_2
0261     #endif
0262     #if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))
0263       // MSVC does not expose a switch dedicated for FMA
0264       // For MSVC, AVX2 => FMA
0265       #define EIGEN_VECTORIZE_FMA
0266     #endif
0267     #if defined(__AVX512F__)
0268       #ifndef EIGEN_VECTORIZE_FMA
0269       #if EIGEN_COMP_GNUC
0270       #error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
0271       #else
0272       #error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
0273       #endif
0274       #endif
0275       #ifndef EIGEN_USE_SYCL
0276         #define EIGEN_VECTORIZE_AVX512
0277         #define EIGEN_VECTORIZE_AVX2
0278         #define EIGEN_VECTORIZE_AVX
0279       #endif
0280       #define EIGEN_VECTORIZE_FMA
0281       #define EIGEN_VECTORIZE_SSE3
0282       #define EIGEN_VECTORIZE_SSSE3
0283       #define EIGEN_VECTORIZE_SSE4_1
0284       #define EIGEN_VECTORIZE_SSE4_2
0285       #ifndef EIGEN_USE_SYCL
0286         #ifdef __AVX512DQ__
0287           #define EIGEN_VECTORIZE_AVX512DQ
0288         #endif
0289         #ifdef __AVX512ER__
0290           #define EIGEN_VECTORIZE_AVX512ER
0291         #endif
0292         #ifdef __AVX512BF16__
0293           #define EIGEN_VECTORIZE_AVX512BF16
0294         #endif
0295       #endif
0296     #endif
0297 
0298     // Disable AVX support on broken xcode versions
0299     #if defined(__apple_build_version__) && (__apple_build_version__ == 11000033 ) && ( __MAC_OS_X_VERSION_MIN_REQUIRED == 101500 )
0300       // A nasty bug in the clang compiler shipped with xcode in a common compilation situation
0301       // when XCode 11.0 and Mac deployment target macOS 10.15 is https://trac.macports.org/ticket/58776#no1
0302       #ifdef EIGEN_VECTORIZE_AVX
0303         #undef EIGEN_VECTORIZE_AVX
0304         #warning "Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. "
0305         #ifdef EIGEN_VECTORIZE_AVX2
0306           #undef EIGEN_VECTORIZE_AVX2
0307         #endif
0308         #ifdef EIGEN_VECTORIZE_FMA
0309           #undef EIGEN_VECTORIZE_FMA
0310         #endif
0311         #ifdef EIGEN_VECTORIZE_AVX512
0312           #undef EIGEN_VECTORIZE_AVX512
0313         #endif
0314         #ifdef EIGEN_VECTORIZE_AVX512DQ
0315           #undef EIGEN_VECTORIZE_AVX512DQ
0316         #endif
0317         #ifdef EIGEN_VECTORIZE_AVX512ER
0318           #undef EIGEN_VECTORIZE_AVX512ER
0319         #endif
0320       #endif
0321       // NOTE: Confirmed test failures in XCode 11.0, and XCode 11.2 with  -macosx-version-min=10.15 and AVX
0322       // NOTE using -macosx-version-min=10.15 with Xcode 11.0 results in runtime segmentation faults in many tests, 11.2 produce core dumps in 3 tests
0323       // NOTE using -macosx-version-min=10.14 produces functioning and passing tests in all cases
0324       // NOTE __clang_version__ "11.0.0 (clang-1100.0.33.8)"  XCode 11.0 <- Produces many segfault and core dumping tests
0325       //                                                                    with  -macosx-version-min=10.15 and AVX
0326       // NOTE __clang_version__ "11.0.0 (clang-1100.0.33.12)" XCode 11.2 <- Produces 3 core dumping tests with  
0327       //                                                                    -macosx-version-min=10.15 and AVX
0328     #endif
0329 
0330     // include files
0331 
0332     // This extern "C" works around a MINGW-w64 compilation issue
0333     // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354
0334     // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do).
0335     // However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations
0336     // with conflicting linkage.  The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know;
0337     // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too.
0338     // notice that since these are C headers, the extern "C" is theoretically needed anyways.
0339     extern "C" {
0340       // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
0341       // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
0342       #if EIGEN_COMP_ICC >= 1110
0343         #include <immintrin.h>
0344       #else
0345         #include <mmintrin.h>
0346         #include <emmintrin.h>
0347         #include <xmmintrin.h>
0348         #ifdef  EIGEN_VECTORIZE_SSE3
0349         #include <pmmintrin.h>
0350         #endif
0351         #ifdef EIGEN_VECTORIZE_SSSE3
0352         #include <tmmintrin.h>
0353         #endif
0354         #ifdef EIGEN_VECTORIZE_SSE4_1
0355         #include <smmintrin.h>
0356         #endif
0357         #ifdef EIGEN_VECTORIZE_SSE4_2
0358         #include <nmmintrin.h>
0359         #endif
0360         #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
0361         #include <immintrin.h>
0362         #endif
0363       #endif
0364     } // end extern "C"
0365 
0366   #elif defined __VSX__
0367 
0368     #define EIGEN_VECTORIZE
0369     #define EIGEN_VECTORIZE_VSX
0370     #include <altivec.h>
0371     // We need to #undef all these ugly tokens defined in <altivec.h>
0372     // => use __vector instead of vector
0373     #undef bool
0374     #undef vector
0375     #undef pixel
0376 
0377   #elif defined __ALTIVEC__
0378 
0379     #define EIGEN_VECTORIZE
0380     #define EIGEN_VECTORIZE_ALTIVEC
0381     #include <altivec.h>
0382     // We need to #undef all these ugly tokens defined in <altivec.h>
0383     // => use __vector instead of vector
0384     #undef bool
0385     #undef vector
0386     #undef pixel
0387 
0388   #elif ((defined  __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE)
0389 
0390     #define EIGEN_VECTORIZE
0391     #define EIGEN_VECTORIZE_NEON
0392     #include <arm_neon.h>
0393 
0394   // We currently require SVE to be enabled explicitly via EIGEN_ARM64_USE_SVE and
0395   // will not select the backend automatically
0396   #elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE)
0397 
0398     #define EIGEN_VECTORIZE
0399     #define EIGEN_VECTORIZE_SVE
0400     #include <arm_sve.h>
0401 
0402     // Since we depend on knowing SVE vector lengths at compile-time, we need
0403     // to ensure a fixed lengths is set
0404     #if defined __ARM_FEATURE_SVE_BITS
0405       #define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS
0406     #else
0407 #error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set."
0408 #endif
0409 
0410 #elif (defined __s390x__ && defined __VEC__)
0411 
0412 #define EIGEN_VECTORIZE
0413 #define EIGEN_VECTORIZE_ZVECTOR
0414 #include <vecintrin.h>
0415 
0416 #elif defined __mips_msa
0417 
0418 // Limit MSA optimizations to little-endian CPUs for now.
0419 // TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs?
0420 #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
0421 #if defined(__LP64__)
0422 #define EIGEN_MIPS_64
0423 #else
0424 #define EIGEN_MIPS_32
0425 #endif
0426 #define EIGEN_VECTORIZE
0427 #define EIGEN_VECTORIZE_MSA
0428 #include <msa.h>
0429 #endif
0430 
0431 #endif
0432 #endif
0433 
0434 // Following the Arm ACLE arm_neon.h should also include arm_fp16.h but not all
0435 // compilers seem to follow this. We therefore include it explicitly.
0436 // See also: https://bugs.llvm.org/show_bug.cgi?id=47955
0437 #if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
0438   #include <arm_fp16.h>
0439 #endif
0440 
0441 #if defined(__F16C__) && (!defined(EIGEN_GPUCC) && (!defined(EIGEN_COMP_CLANG) || EIGEN_COMP_CLANG>=380))
0442   // We can use the optimized fp16 to float and float to fp16 conversion routines
0443   #define EIGEN_HAS_FP16_C
0444 
0445   #if defined(EIGEN_COMP_CLANG)
0446     // Workaround for clang: The FP16C intrinsics for clang are included by
0447     // immintrin.h, as opposed to emmintrin.h as suggested by Intel:
0448     // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=FP16C&expand=1711
0449     #include <immintrin.h>
0450   #endif
0451 #endif
0452 
0453 #if defined EIGEN_CUDACC
0454   #define EIGEN_VECTORIZE_GPU
0455   #include <vector_types.h>
0456   #if EIGEN_CUDA_SDK_VER >= 70500
0457     #define EIGEN_HAS_CUDA_FP16
0458   #endif
0459 #endif
0460 
0461 #if defined(EIGEN_HAS_CUDA_FP16)
0462   #include <cuda_runtime_api.h>
0463   #include <cuda_fp16.h>
0464 #endif
0465 
0466 #if defined(EIGEN_HIPCC)
0467   #define EIGEN_VECTORIZE_GPU
0468   #include <hip/hip_vector_types.h>
0469   #define EIGEN_HAS_HIP_FP16
0470   #include <hip/hip_fp16.h>
0471 #endif
0472 
0473 
0474 /** \brief Namespace containing all symbols from the %Eigen library. */
0475 namespace Eigen {
0476 
0477 inline static const char *SimdInstructionSetsInUse(void) {
0478 #if defined(EIGEN_VECTORIZE_AVX512)
0479   return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
0480 #elif defined(EIGEN_VECTORIZE_AVX)
0481   return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
0482 #elif defined(EIGEN_VECTORIZE_SSE4_2)
0483   return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
0484 #elif defined(EIGEN_VECTORIZE_SSE4_1)
0485   return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
0486 #elif defined(EIGEN_VECTORIZE_SSSE3)
0487   return "SSE, SSE2, SSE3, SSSE3";
0488 #elif defined(EIGEN_VECTORIZE_SSE3)
0489   return "SSE, SSE2, SSE3";
0490 #elif defined(EIGEN_VECTORIZE_SSE2)
0491   return "SSE, SSE2";
0492 #elif defined(EIGEN_VECTORIZE_ALTIVEC)
0493   return "AltiVec";
0494 #elif defined(EIGEN_VECTORIZE_VSX)
0495   return "VSX";
0496 #elif defined(EIGEN_VECTORIZE_NEON)
0497   return "ARM NEON";
0498 #elif defined(EIGEN_VECTORIZE_SVE)
0499   return "ARM SVE";
0500 #elif defined(EIGEN_VECTORIZE_ZVECTOR)
0501   return "S390X ZVECTOR";
0502 #elif defined(EIGEN_VECTORIZE_MSA)
0503   return "MIPS MSA";
0504 #else
0505   return "None";
0506 #endif
0507 }
0508 
0509 } // end namespace Eigen
0510 
0511 
0512 #endif // EIGEN_CONFIGURE_VECTORIZATION_H