Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 09:11:38

0001 /***************************************************************************
0002  * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and         *
0003  * Martin Renou                                                             *
0004  * Copyright (c) QuantStack                                                 *
0005  * Copyright (c) Serge Guelton                                              *
0006  *                                                                          *
0007  * Distributed under the terms of the BSD 3-Clause License.                 *
0008  *                                                                          *
0009  * The full license is in the file LICENSE, distributed with this software. *
0010  ****************************************************************************/
0011 
0012 #ifndef XSIMD_CPUID_HPP
0013 #define XSIMD_CPUID_HPP
0014 
0015 #include <algorithm>
0016 #include <cstring>
0017 
0018 #if defined(__linux__) && (defined(__ARM_NEON) || defined(_M_ARM) || defined(__riscv_vector))
0019 #include <asm/hwcap.h>
0020 #include <sys/auxv.h>
0021 
0022 #ifndef HWCAP2_I8MM
0023 #define HWCAP2_I8MM (1 << 13)
0024 #endif
0025 
0026 #endif
0027 
0028 #if defined(_MSC_VER)
0029 // Contains the definition of __cpuidex
0030 #include <intrin.h>
0031 #endif
0032 
0033 #include "../types/xsimd_all_registers.hpp"
0034 
0035 namespace xsimd
0036 {
0037     namespace detail
0038     {
0039         struct supported_arch
0040         {
0041 
0042 #define ARCH_FIELD_EX(arch, field_name) \
0043     unsigned field_name;                \
0044     XSIMD_INLINE bool has(::xsimd::arch) const { return this->field_name; }
0045 
0046 #define ARCH_FIELD_EX_REUSE(arch, field_name) \
0047     XSIMD_INLINE bool has(::xsimd::arch) const { return this->field_name; }
0048 
0049 #define ARCH_FIELD(name) ARCH_FIELD_EX(name, name)
0050 
0051             ARCH_FIELD(sse2)
0052             ARCH_FIELD(sse3)
0053 
0054             ARCH_FIELD(ssse3)
0055             ARCH_FIELD(sse4_1)
0056             ARCH_FIELD(sse4_2)
0057             // ARCH_FIELD(sse4a)
0058             ARCH_FIELD_EX(fma3<::xsimd::sse4_2>, fma3_sse42)
0059             ARCH_FIELD(fma4)
0060             // ARCH_FIELD(xop)
0061             ARCH_FIELD(avx)
0062             ARCH_FIELD_EX(fma3<::xsimd::avx>, fma3_avx)
0063             ARCH_FIELD(avx2)
0064             ARCH_FIELD(avxvnni)
0065             ARCH_FIELD_EX(fma3<::xsimd::avx2>, fma3_avx2)
0066             ARCH_FIELD(avx512f)
0067             ARCH_FIELD(avx512cd)
0068             ARCH_FIELD(avx512dq)
0069             ARCH_FIELD(avx512bw)
0070             ARCH_FIELD(avx512er)
0071             ARCH_FIELD(avx512pf)
0072             ARCH_FIELD(avx512ifma)
0073             ARCH_FIELD(avx512vbmi)
0074             ARCH_FIELD_EX(avx512vnni<::xsimd::avx512bw>, avx512vnni_bw)
0075             ARCH_FIELD_EX(avx512vnni<::xsimd::avx512vbmi>, avx512vnni_vbmi)
0076             ARCH_FIELD(neon)
0077             ARCH_FIELD(neon64)
0078             ARCH_FIELD_EX(i8mm<::xsimd::neon64>, i8mm_neon64)
0079             ARCH_FIELD_EX(detail::sve<512>, sve)
0080             ARCH_FIELD_EX_REUSE(detail::sve<256>, sve)
0081             ARCH_FIELD_EX_REUSE(detail::sve<128>, sve)
0082             ARCH_FIELD_EX(detail::rvv<512>, rvv)
0083             ARCH_FIELD_EX_REUSE(detail::rvv<256>, rvv)
0084             ARCH_FIELD_EX_REUSE(detail::rvv<128>, rvv)
0085             ARCH_FIELD(wasm)
0086 
0087 #undef ARCH_FIELD
0088 
0089             XSIMD_INLINE supported_arch() noexcept
0090             {
0091                 memset(this, 0, sizeof(supported_arch));
0092 
0093 #if XSIMD_WITH_WASM
0094                 wasm = 1;
0095 #endif
0096 
0097 #if defined(__aarch64__) || defined(_M_ARM64)
0098                 neon = 1;
0099                 neon64 = 1;
0100 #if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
0101                 i8mm_neon64 = bool(getauxval(AT_HWCAP2) & HWCAP2_I8MM);
0102 #endif
0103 #elif defined(__ARM_NEON) || defined(_M_ARM)
0104 
0105 #if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
0106                 neon = bool(getauxval(AT_HWCAP) & HWCAP_NEON);
0107 #endif
0108 
0109 #elif defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS > 0
0110 
0111 #if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
0112                 sve = bool(getauxval(AT_HWCAP) & HWCAP_SVE);
0113 #endif
0114 
0115 #elif defined(__riscv_vector) && defined(__riscv_v_fixed_vlen) && __riscv_v_fixed_vlen > 0
0116 
0117 #if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
0118 #ifndef HWCAP_V
0119 #define HWCAP_V (1 << ('V' - 'A'))
0120 #endif
0121                 rvv = bool(getauxval(AT_HWCAP) & HWCAP_V);
0122 #endif
0123 
0124 #elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86)
0125 
0126                 auto get_xcr0_low = []() noexcept
0127                 {
0128                     uint32_t xcr0;
0129 
0130 #if defined(_MSC_VER) && _MSC_VER >= 1400
0131 
0132                     xcr0 = (uint32_t)_xgetbv(0);
0133 
0134 #elif defined(__GNUC__)
0135 
0136                     __asm__(
0137                         "xorl %%ecx, %%ecx\n"
0138                         "xgetbv\n"
0139                         : "=a"(xcr0)
0140                         :
0141 #if defined(__i386__)
0142                         : "ecx", "edx"
0143 #else
0144                         : "rcx", "rdx"
0145 #endif
0146                     );
0147 
0148 #else /* _MSC_VER < 1400 */
0149 #error "_MSC_VER < 1400 is not supported"
0150 #endif /* _MSC_VER && _MSC_VER >= 1400 */
0151                     return xcr0;
0152                 };
0153 
0154                 auto get_cpuid = [](int reg[4], int level, int count = 0) noexcept
0155                 {
0156 
0157 #if defined(_MSC_VER)
0158                     __cpuidex(reg, level, count);
0159 
0160 #elif defined(__INTEL_COMPILER)
0161                     __cpuid(reg, level);
0162 
0163 #elif defined(__GNUC__) || defined(__clang__)
0164 
0165 #if defined(__i386__) && defined(__PIC__)
0166                     // %ebx may be the PIC register
0167                     __asm__("xchg{l}\t{%%}ebx, %1\n\t"
0168                             "cpuid\n\t"
0169                             "xchg{l}\t{%%}ebx, %1\n\t"
0170                             : "=a"(reg[0]), "=r"(reg[1]), "=c"(reg[2]), "=d"(reg[3])
0171                             : "0"(level), "2"(count));
0172 
0173 #else
0174                     __asm__("cpuid\n\t"
0175                             : "=a"(reg[0]), "=b"(reg[1]), "=c"(reg[2]), "=d"(reg[3])
0176                             : "0"(level), "2"(count));
0177 #endif
0178 
0179 #else
0180 #error "Unsupported configuration"
0181 #endif
0182                 };
0183 
0184                 int regs1[4];
0185 
0186                 get_cpuid(regs1, 0x1);
0187 
0188                 // OS can explicitly disable the usage of SSE/AVX extensions
0189                 // by setting an appropriate flag in CR0 register
0190                 //
0191                 // https://docs.kernel.org/admin-guide/hw-vuln/gather_data_sampling.html
0192 
0193                 unsigned sse_state_os_enabled = 1;
0194                 unsigned avx_state_os_enabled = 1;
0195                 unsigned avx512_state_os_enabled = 1;
0196 
0197                 // OSXSAVE: A value of 1 indicates that the OS has set CR4.OSXSAVE[bit
0198                 // 18] to enable XSETBV/XGETBV instructions to access XCR0 and
0199                 // to support processor extended state management using
0200                 // XSAVE/XRSTOR.
0201                 bool osxsave = regs1[2] >> 27 & 1;
0202                 if (osxsave)
0203                 {
0204 
0205                     uint32_t xcr0 = get_xcr0_low();
0206 
0207                     sse_state_os_enabled = xcr0 >> 1 & 1;
0208                     avx_state_os_enabled = xcr0 >> 2 & sse_state_os_enabled;
0209                     avx512_state_os_enabled = xcr0 >> 6 & avx_state_os_enabled;
0210                 }
0211 
0212                 sse2 = regs1[3] >> 26 & sse_state_os_enabled;
0213                 sse3 = regs1[2] >> 0 & sse_state_os_enabled;
0214                 ssse3 = regs1[2] >> 9 & sse_state_os_enabled;
0215                 sse4_1 = regs1[2] >> 19 & sse_state_os_enabled;
0216                 sse4_2 = regs1[2] >> 20 & sse_state_os_enabled;
0217                 fma3_sse42 = regs1[2] >> 12 & sse_state_os_enabled;
0218 
0219                 avx = regs1[2] >> 28 & avx_state_os_enabled;
0220                 fma3_avx = avx && fma3_sse42;
0221 
0222                 int regs8[4];
0223                 get_cpuid(regs8, 0x80000001);
0224                 fma4 = regs8[2] >> 16 & avx_state_os_enabled;
0225 
0226                 // sse4a = regs[2] >> 6 & 1;
0227 
0228                 // xop = regs[2] >> 11 & 1;
0229 
0230                 int regs7[4];
0231                 get_cpuid(regs7, 0x7);
0232                 avx2 = regs7[1] >> 5 & avx_state_os_enabled;
0233 
0234                 int regs7a[4];
0235                 get_cpuid(regs7a, 0x7, 0x1);
0236                 avxvnni = regs7a[0] >> 4 & avx_state_os_enabled;
0237 
0238                 fma3_avx2 = avx2 && fma3_sse42;
0239 
0240                 avx512f = regs7[1] >> 16 & avx512_state_os_enabled;
0241                 avx512cd = regs7[1] >> 28 & avx512_state_os_enabled;
0242                 avx512dq = regs7[1] >> 17 & avx512_state_os_enabled;
0243                 avx512bw = regs7[1] >> 30 & avx512_state_os_enabled;
0244                 avx512er = regs7[1] >> 27 & avx512_state_os_enabled;
0245                 avx512pf = regs7[1] >> 26 & avx512_state_os_enabled;
0246                 avx512ifma = regs7[1] >> 21 & avx512_state_os_enabled;
0247                 avx512vbmi = regs7[2] >> 1 & avx512_state_os_enabled;
0248                 avx512vnni_bw = regs7[2] >> 11 & avx512_state_os_enabled;
0249                 avx512vnni_vbmi = avx512vbmi && avx512vnni_bw;
0250 #endif
0251             }
0252         };
0253     } // namespace detail
0254 
0255     XSIMD_INLINE detail::supported_arch available_architectures() noexcept
0256     {
0257         static detail::supported_arch supported;
0258         return supported;
0259     }
0260 }
0261 
0262 #endif