File indexing completed on 2025-08-28 09:11:38
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #ifndef XSIMD_CPUID_HPP
0013 #define XSIMD_CPUID_HPP
0014
0015 #include <algorithm>
0016 #include <cstring>
0017
0018 #if defined(__linux__) && (defined(__ARM_NEON) || defined(_M_ARM) || defined(__riscv_vector))
0019 #include <asm/hwcap.h>
0020 #include <sys/auxv.h>
0021
0022 #ifndef HWCAP2_I8MM
0023 #define HWCAP2_I8MM (1 << 13)
0024 #endif
0025
0026 #endif
0027
0028 #if defined(_MSC_VER)
0029
0030 #include <intrin.h>
0031 #endif
0032
0033 #include "../types/xsimd_all_registers.hpp"
0034
0035 namespace xsimd
0036 {
0037 namespace detail
0038 {
0039 struct supported_arch
0040 {
0041
0042 #define ARCH_FIELD_EX(arch, field_name) \
0043 unsigned field_name; \
0044 XSIMD_INLINE bool has(::xsimd::arch) const { return this->field_name; }
0045
0046 #define ARCH_FIELD_EX_REUSE(arch, field_name) \
0047 XSIMD_INLINE bool has(::xsimd::arch) const { return this->field_name; }
0048
0049 #define ARCH_FIELD(name) ARCH_FIELD_EX(name, name)
0050
0051 ARCH_FIELD(sse2)
0052 ARCH_FIELD(sse3)
0053
0054 ARCH_FIELD(ssse3)
0055 ARCH_FIELD(sse4_1)
0056 ARCH_FIELD(sse4_2)
0057
0058 ARCH_FIELD_EX(fma3<::xsimd::sse4_2>, fma3_sse42)
0059 ARCH_FIELD(fma4)
0060
0061 ARCH_FIELD(avx)
0062 ARCH_FIELD_EX(fma3<::xsimd::avx>, fma3_avx)
0063 ARCH_FIELD(avx2)
0064 ARCH_FIELD(avxvnni)
0065 ARCH_FIELD_EX(fma3<::xsimd::avx2>, fma3_avx2)
0066 ARCH_FIELD(avx512f)
0067 ARCH_FIELD(avx512cd)
0068 ARCH_FIELD(avx512dq)
0069 ARCH_FIELD(avx512bw)
0070 ARCH_FIELD(avx512er)
0071 ARCH_FIELD(avx512pf)
0072 ARCH_FIELD(avx512ifma)
0073 ARCH_FIELD(avx512vbmi)
0074 ARCH_FIELD_EX(avx512vnni<::xsimd::avx512bw>, avx512vnni_bw)
0075 ARCH_FIELD_EX(avx512vnni<::xsimd::avx512vbmi>, avx512vnni_vbmi)
0076 ARCH_FIELD(neon)
0077 ARCH_FIELD(neon64)
0078 ARCH_FIELD_EX(i8mm<::xsimd::neon64>, i8mm_neon64)
0079 ARCH_FIELD_EX(detail::sve<512>, sve)
0080 ARCH_FIELD_EX_REUSE(detail::sve<256>, sve)
0081 ARCH_FIELD_EX_REUSE(detail::sve<128>, sve)
0082 ARCH_FIELD_EX(detail::rvv<512>, rvv)
0083 ARCH_FIELD_EX_REUSE(detail::rvv<256>, rvv)
0084 ARCH_FIELD_EX_REUSE(detail::rvv<128>, rvv)
0085 ARCH_FIELD(wasm)
0086
0087 #undef ARCH_FIELD
0088
0089 XSIMD_INLINE supported_arch() noexcept
0090 {
0091 memset(this, 0, sizeof(supported_arch));
0092
0093 #if XSIMD_WITH_WASM
0094 wasm = 1;
0095 #endif
0096
0097 #if defined(__aarch64__) || defined(_M_ARM64)
0098 neon = 1;
0099 neon64 = 1;
0100 #if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
0101 i8mm_neon64 = bool(getauxval(AT_HWCAP2) & HWCAP2_I8MM);
0102 #endif
0103 #elif defined(__ARM_NEON) || defined(_M_ARM)
0104
0105 #if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
0106 neon = bool(getauxval(AT_HWCAP) & HWCAP_NEON);
0107 #endif
0108
0109 #elif defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS > 0
0110
0111 #if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
0112 sve = bool(getauxval(AT_HWCAP) & HWCAP_SVE);
0113 #endif
0114
0115 #elif defined(__riscv_vector) && defined(__riscv_v_fixed_vlen) && __riscv_v_fixed_vlen > 0
0116
0117 #if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
0118 #ifndef HWCAP_V
0119 #define HWCAP_V (1 << ('V' - 'A'))
0120 #endif
0121 rvv = bool(getauxval(AT_HWCAP) & HWCAP_V);
0122 #endif
0123
0124 #elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86)
0125
0126 auto get_xcr0_low = []() noexcept
0127 {
0128 uint32_t xcr0;
0129
0130 #if defined(_MSC_VER) && _MSC_VER >= 1400
0131
0132 xcr0 = (uint32_t)_xgetbv(0);
0133
0134 #elif defined(__GNUC__)
0135
0136 __asm__(
0137 "xorl %%ecx, %%ecx\n"
0138 "xgetbv\n"
0139 : "=a"(xcr0)
0140 :
0141 #if defined(__i386__)
0142 : "ecx", "edx"
0143 #else
0144 : "rcx", "rdx"
0145 #endif
0146 );
0147
0148 #else
0149 #error "_MSC_VER < 1400 is not supported"
0150 #endif
0151 return xcr0;
0152 };
0153
0154 auto get_cpuid = [](int reg[4], int level, int count = 0) noexcept
0155 {
0156
0157 #if defined(_MSC_VER)
0158 __cpuidex(reg, level, count);
0159
0160 #elif defined(__INTEL_COMPILER)
0161 __cpuid(reg, level);
0162
0163 #elif defined(__GNUC__) || defined(__clang__)
0164
0165 #if defined(__i386__) && defined(__PIC__)
0166
0167 __asm__("xchg{l}\t{%%}ebx, %1\n\t"
0168 "cpuid\n\t"
0169 "xchg{l}\t{%%}ebx, %1\n\t"
0170 : "=a"(reg[0]), "=r"(reg[1]), "=c"(reg[2]), "=d"(reg[3])
0171 : "0"(level), "2"(count));
0172
0173 #else
0174 __asm__("cpuid\n\t"
0175 : "=a"(reg[0]), "=b"(reg[1]), "=c"(reg[2]), "=d"(reg[3])
0176 : "0"(level), "2"(count));
0177 #endif
0178
0179 #else
0180 #error "Unsupported configuration"
0181 #endif
0182 };
0183
0184 int regs1[4];
0185
0186 get_cpuid(regs1, 0x1);
0187
0188
0189
0190
0191
0192
0193 unsigned sse_state_os_enabled = 1;
0194 unsigned avx_state_os_enabled = 1;
0195 unsigned avx512_state_os_enabled = 1;
0196
0197
0198
0199
0200
0201 bool osxsave = regs1[2] >> 27 & 1;
0202 if (osxsave)
0203 {
0204
0205 uint32_t xcr0 = get_xcr0_low();
0206
0207 sse_state_os_enabled = xcr0 >> 1 & 1;
0208 avx_state_os_enabled = xcr0 >> 2 & sse_state_os_enabled;
0209 avx512_state_os_enabled = xcr0 >> 6 & avx_state_os_enabled;
0210 }
0211
0212 sse2 = regs1[3] >> 26 & sse_state_os_enabled;
0213 sse3 = regs1[2] >> 0 & sse_state_os_enabled;
0214 ssse3 = regs1[2] >> 9 & sse_state_os_enabled;
0215 sse4_1 = regs1[2] >> 19 & sse_state_os_enabled;
0216 sse4_2 = regs1[2] >> 20 & sse_state_os_enabled;
0217 fma3_sse42 = regs1[2] >> 12 & sse_state_os_enabled;
0218
0219 avx = regs1[2] >> 28 & avx_state_os_enabled;
0220 fma3_avx = avx && fma3_sse42;
0221
0222 int regs8[4];
0223 get_cpuid(regs8, 0x80000001);
0224 fma4 = regs8[2] >> 16 & avx_state_os_enabled;
0225
0226
0227
0228
0229
0230 int regs7[4];
0231 get_cpuid(regs7, 0x7);
0232 avx2 = regs7[1] >> 5 & avx_state_os_enabled;
0233
0234 int regs7a[4];
0235 get_cpuid(regs7a, 0x7, 0x1);
0236 avxvnni = regs7a[0] >> 4 & avx_state_os_enabled;
0237
0238 fma3_avx2 = avx2 && fma3_sse42;
0239
0240 avx512f = regs7[1] >> 16 & avx512_state_os_enabled;
0241 avx512cd = regs7[1] >> 28 & avx512_state_os_enabled;
0242 avx512dq = regs7[1] >> 17 & avx512_state_os_enabled;
0243 avx512bw = regs7[1] >> 30 & avx512_state_os_enabled;
0244 avx512er = regs7[1] >> 27 & avx512_state_os_enabled;
0245 avx512pf = regs7[1] >> 26 & avx512_state_os_enabled;
0246 avx512ifma = regs7[1] >> 21 & avx512_state_os_enabled;
0247 avx512vbmi = regs7[2] >> 1 & avx512_state_os_enabled;
0248 avx512vnni_bw = regs7[2] >> 11 & avx512_state_os_enabled;
0249 avx512vnni_vbmi = avx512vbmi && avx512vnni_bw;
0250 #endif
0251 }
0252 };
0253 }
0254
0255 XSIMD_INLINE detail::supported_arch available_architectures() noexcept
0256 {
0257 static detail::supported_arch supported;
0258 return supported;
0259 }
0260 }
0261
0262 #endif