File indexing completed on 2025-01-30 10:25:57
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028 #ifndef VC_GLOBAL_H_
0029 #define VC_GLOBAL_H_
0030
0031 #include <cstdint>
0032 #include "fwddecl.h"
0033
0034 #ifdef DOXYGEN
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048 #define Vc_ICC __INTEL_COMPILER_BUILD_DATE
0049 #undef Vc_ICC
0050
0051
0052
0053
0054
0055
0056
0057 #define Vc_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
0058 #undef Vc_CLANG
0059
0060
0061
0062
0063
0064
0065
0066 #define Vc_APPLECLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
0067 #undef Vc_APPLECLANG
0068
0069
0070
0071
0072
0073
0074
0075 #define Vc_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
0076
0077
0078
0079
0080
0081
0082
0083 #define Vc_MSVC _MSC_FULL_VER
0084 #undef Vc_MSVC
0085
0086
0087 #else
0088
0089
0090 #ifdef __INTEL_COMPILER
0091 #define Vc_ICC __INTEL_COMPILER_BUILD_DATE
0092 #elif defined(__clang__) && defined(__apple_build_version__)
0093 #define Vc_APPLECLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
0094 #elif defined(__clang__)
0095 #define Vc_CLANG (__clang_major__ * 0x10000 + __clang_minor__ * 0x100 + __clang_patchlevel__)
0096 #elif defined(__GNUC__)
0097 #define Vc_GCC (__GNUC__ * 0x10000 + __GNUC_MINOR__ * 0x100 + __GNUC_PATCHLEVEL__)
0098 #elif defined(_MSC_VER)
0099 #define Vc_MSVC _MSC_FULL_VER
0100 #else
0101 #define Vc_UNSUPPORTED_COMPILER 1
0102 #endif
0103
0104 #if defined Vc_GCC && Vc_GCC >= 0x60000
0105 #define Vc_RESET_DIAGNOSTICS _Pragma("GCC diagnostic pop")
0106 #pragma GCC diagnostic push
0107 #pragma GCC diagnostic ignored "-Wignored-attributes"
0108 #else
0109 #define Vc_RESET_DIAGNOSTICS
0110 #endif
0111
0112 #if defined Vc_ICC
0113
0114
0115
0116
0117
0118 #pragma warning disable 2922
0119 #endif
0120
0121 #if __cplusplus < 201103 && (!defined Vc_MSVC || _MSC_VER < 1900)
0122 # error "Vc requires support for C++11."
0123 #elif __cplusplus >= 201402L
0124 # define Vc_CXX14 1
0125 # if __cplusplus > 201700L
0126 # define Vc_CXX17 1
0127 # endif
0128 #endif
0129
0130 #if defined(__GNUC__) && !defined(Vc_NO_INLINE_ASM)
0131 #define Vc_GNU_ASM 1
0132 #endif
0133
0134 #ifdef Vc_GCC
0135 # if Vc_GCC >= 0x70000 && defined __i386__
0136
0137
0138
0139 # ifdef __GLIBC_PREREQ
0140 # if __GLIBC_PREREQ(2,26)
0141 # define Vc_HAVE_STD_MAX_ALIGN_T 1
0142 # endif
0143 # endif
0144 # elif Vc_GCC >= 0x40900
0145 # define Vc_HAVE_STD_MAX_ALIGN_T 1
0146 # else
0147 # define Vc_HAVE_MAX_ALIGN_T 1
0148 # endif
0149 #elif !defined(Vc_CLANG) && !defined(Vc_ICC)
0150
0151
0152
0153 # define Vc_HAVE_STD_MAX_ALIGN_T 1
0154 #endif
0155
0156 #if defined(Vc_GCC) || defined(Vc_CLANG) || defined Vc_APPLECLANG
0157 #define Vc_USE_BUILTIN_VECTOR_TYPES 1
0158 #endif
0159
0160 #ifdef Vc_MSVC
0161 # define Vc_CDECL __cdecl
0162 # define Vc_VDECL __vectorcall
0163 #else
0164 # define Vc_CDECL
0165 # define Vc_VDECL
0166 #endif
0167
0168
0169
0170
0171
0172 #define Scalar 0x00100000
0173 #define SSE 0x00200000
0174 #define SSE2 0x00300000
0175 #define SSE3 0x00400000
0176 #define SSSE3 0x00500000
0177 #define SSE4_1 0x00600000
0178 #define SSE4_2 0x00700000
0179 #define AVX 0x00800000
0180 #define AVX2 0x00900000
0181
0182 #define XOP 0x00000001
0183 #define FMA4 0x00000002
0184 #define F16C 0x00000004
0185 #define POPCNT 0x00000008
0186 #define SSE4a 0x00000010
0187 #define FMA 0x00000020
0188 #define BMI2 0x00000040
0189
0190 #define IMPL_MASK 0xFFF00000
0191 #define EXT_MASK 0x000FFFFF
0192
0193 #ifdef Vc_MSVC
0194 # ifdef _M_IX86_FP
0195 # if _M_IX86_FP >= 1
0196 # ifndef __SSE__
0197 # define __SSE__ 1
0198 # endif
0199 # endif
0200 # if _M_IX86_FP >= 2
0201 # ifndef __SSE2__
0202 # define __SSE2__ 1
0203 # endif
0204 # endif
0205 # elif defined(_M_AMD64)
0206
0207 # ifndef __SSE__
0208 # define __SSE__ 1
0209 # endif
0210 # ifndef __SSE2__
0211 # define __SSE2__ 1
0212 # endif
0213 # endif
0214 #endif
0215
0216 #if defined Vc_ICC && !defined __POPCNT__
0217 # if defined __SSE4_2__ || defined __SSE4A__
0218 # define __POPCNT__ 1
0219 # endif
0220 #endif
0221
0222 #ifdef VC_IMPL
0223 #error "You are using the old VC_IMPL macro. Since Vc 1.0 all Vc macros start with Vc_, i.e. a lower-case 'c'"
0224 #endif
0225
0226 #ifndef Vc_IMPL
0227
0228 # if defined(__AVX2__)
0229 # define Vc_IMPL_AVX2 1
0230 # define Vc_IMPL_AVX 1
0231 # elif defined(__AVX__)
0232 # define Vc_IMPL_AVX 1
0233 # else
0234 # if defined(__SSE4_2__)
0235 # define Vc_IMPL_SSE 1
0236 # define Vc_IMPL_SSE4_2 1
0237 # endif
0238 # if defined(__SSE4_1__)
0239 # define Vc_IMPL_SSE 1
0240 # define Vc_IMPL_SSE4_1 1
0241 # endif
0242 # if defined(__SSE3__)
0243 # define Vc_IMPL_SSE 1
0244 # define Vc_IMPL_SSE3 1
0245 # endif
0246 # if defined(__SSSE3__)
0247 # define Vc_IMPL_SSE 1
0248 # define Vc_IMPL_SSSE3 1
0249 # endif
0250 # if defined(__SSE2__)
0251 # define Vc_IMPL_SSE 1
0252 # define Vc_IMPL_SSE2 1
0253 # endif
0254
0255 # if defined(Vc_IMPL_SSE)
0256
0257 # else
0258 # define Vc_IMPL_Scalar 1
0259 # endif
0260 # endif
0261 # if !defined(Vc_IMPL_Scalar)
0262 # ifdef __FMA4__
0263 # define Vc_IMPL_FMA4 1
0264 # endif
0265 # ifdef __XOP__
0266 # define Vc_IMPL_XOP 1
0267 # endif
0268 # ifdef __F16C__
0269 # define Vc_IMPL_F16C 1
0270 # endif
0271 # ifdef __POPCNT__
0272 # define Vc_IMPL_POPCNT 1
0273 # endif
0274 # ifdef __SSE4A__
0275 # define Vc_IMPL_SSE4a 1
0276 # endif
0277 # ifdef __FMA__
0278 # define Vc_IMPL_FMA 1
0279 # endif
0280 # ifdef __BMI2__
0281 # define Vc_IMPL_BMI2 1
0282 # endif
0283 # endif
0284
0285 #else
0286
0287 # if (Vc_IMPL & IMPL_MASK) == AVX2
0288 # define Vc_IMPL_AVX2 1
0289 # define Vc_IMPL_AVX 1
0290 # elif (Vc_IMPL & IMPL_MASK) == AVX
0291 # define Vc_IMPL_AVX 1
0292 # elif (Vc_IMPL & IMPL_MASK) == Scalar
0293 # define Vc_IMPL_Scalar 1
0294 # elif (Vc_IMPL & IMPL_MASK) == SSE4_2
0295 # define Vc_IMPL_SSE4_2 1
0296 # define Vc_IMPL_SSE4_1 1
0297 # define Vc_IMPL_SSSE3 1
0298 # define Vc_IMPL_SSE3 1
0299 # define Vc_IMPL_SSE2 1
0300 # define Vc_IMPL_SSE 1
0301 # elif (Vc_IMPL & IMPL_MASK) == SSE4_1
0302 # define Vc_IMPL_SSE4_1 1
0303 # define Vc_IMPL_SSSE3 1
0304 # define Vc_IMPL_SSE3 1
0305 # define Vc_IMPL_SSE2 1
0306 # define Vc_IMPL_SSE 1
0307 # elif (Vc_IMPL & IMPL_MASK) == SSSE3
0308 # define Vc_IMPL_SSSE3 1
0309 # define Vc_IMPL_SSE3 1
0310 # define Vc_IMPL_SSE2 1
0311 # define Vc_IMPL_SSE 1
0312 # elif (Vc_IMPL & IMPL_MASK) == SSE3
0313 # define Vc_IMPL_SSE3 1
0314 # define Vc_IMPL_SSE2 1
0315 # define Vc_IMPL_SSE 1
0316 # elif (Vc_IMPL & IMPL_MASK) == SSE2
0317 # define Vc_IMPL_SSE2 1
0318 # define Vc_IMPL_SSE 1
0319 # elif (Vc_IMPL & IMPL_MASK) == SSE
0320 # define Vc_IMPL_SSE 1
0321 # if defined(__SSE4_2__)
0322 # define Vc_IMPL_SSE4_2 1
0323 # endif
0324 # if defined(__SSE4_1__)
0325 # define Vc_IMPL_SSE4_1 1
0326 # endif
0327 # if defined(__SSE3__)
0328 # define Vc_IMPL_SSE3 1
0329 # endif
0330 # if defined(__SSSE3__)
0331 # define Vc_IMPL_SSSE3 1
0332 # endif
0333 # if defined(__SSE2__)
0334 # define Vc_IMPL_SSE2 1
0335 # endif
0336 # elif (Vc_IMPL & IMPL_MASK) == 0 && (Vc_IMPL & SSE4a)
0337
0338
0339 # define Vc_IMPL_SSE3 1
0340 # define Vc_IMPL_SSE2 1
0341 # define Vc_IMPL_SSE 1
0342 # endif
0343 # if (Vc_IMPL & XOP)
0344 # define Vc_IMPL_XOP 1
0345 # endif
0346 # if (Vc_IMPL & FMA4)
0347 # define Vc_IMPL_FMA4 1
0348 # endif
0349 # if (Vc_IMPL & F16C)
0350 # define Vc_IMPL_F16C 1
0351 # endif
0352 # if (!defined(Vc_IMPL_Scalar) && defined(__POPCNT__)) || (Vc_IMPL & POPCNT)
0353 # define Vc_IMPL_POPCNT 1
0354 # endif
0355 # if (Vc_IMPL & SSE4a)
0356 # define Vc_IMPL_SSE4a 1
0357 # endif
0358 # if (Vc_IMPL & FMA)
0359 # define Vc_IMPL_FMA 1
0360 # endif
0361 # if (Vc_IMPL & BMI2)
0362 # define Vc_IMPL_BMI2 1
0363 # endif
0364 # undef Vc_IMPL
0365
0366 #endif
0367
0368
0369 #ifdef __AVX__
0370 # define Vc_USE_VEX_CODING 1
0371 #endif
0372
0373 #ifdef Vc_IMPL_AVX
0374
0375 # define Vc_IMPL_SSE4_2 1
0376 # define Vc_IMPL_SSE4_1 1
0377 # define Vc_IMPL_SSSE3 1
0378 # define Vc_IMPL_SSE3 1
0379 # define Vc_IMPL_SSE2 1
0380 # define Vc_IMPL_SSE 1
0381 #endif
0382
0383 #if defined(Vc_CLANG) && Vc_CLANG >= 0x30600 && Vc_CLANG < 0x30700
0384 # if defined(Vc_IMPL_AVX)
0385 # warning "clang 3.6.x miscompiles AVX code, frequently losing 50% of the data. Vc will fall back to SSE4 instead."
0386 # undef Vc_IMPL_AVX
0387 # if defined(Vc_IMPL_AVX2)
0388 # undef Vc_IMPL_AVX2
0389 # endif
0390 # endif
0391 #endif
0392
0393 # if !defined(Vc_IMPL_Scalar) && !defined(Vc_IMPL_SSE) && !defined(Vc_IMPL_AVX)
0394 # error "No suitable Vc implementation was selected! Probably Vc_IMPL was set to an invalid value."
0395 # elif defined(Vc_IMPL_SSE) && !defined(Vc_IMPL_SSE2)
0396 # error "SSE requested but no SSE2 support. Vc needs at least SSE2!"
0397 # endif
0398
0399 #undef Scalar
0400 #undef SSE
0401 #undef SSE2
0402 #undef SSE3
0403 #undef SSSE3
0404 #undef SSE4_1
0405 #undef SSE4_2
0406 #undef AVX
0407 #undef AVX2
0408
0409 #undef XOP
0410 #undef FMA4
0411 #undef F16C
0412 #undef POPCNT
0413 #undef SSE4a
0414 #undef FMA
0415 #undef BMI2
0416
0417 #undef IMPL_MASK
0418 #undef EXT_MASK
0419
0420 #if defined Vc_IMPL_AVX2
0421 #define Vc_DEFAULT_IMPL_AVX2
0422 #elif defined Vc_IMPL_AVX
0423 #define Vc_DEFAULT_IMPL_AVX
0424 #elif defined Vc_IMPL_SSE
0425 #define Vc_DEFAULT_IMPL_SSE
0426 #elif defined Vc_IMPL_Scalar
0427 #define Vc_DEFAULT_IMPL_Scalar
0428 #else
0429 #error "Preprocessor logic broken. Please report a bug."
0430 #endif
0431
0432 #endif
0433
0434 namespace Vc_VERSIONED_NAMESPACE
0435 {
0436
0437 typedef signed char int8_t;
0438 typedef unsigned char uint8_t;
0439 typedef signed short int16_t;
0440 typedef unsigned short uint16_t;
0441 typedef signed int int32_t;
0442 typedef unsigned int uint32_t;
0443 typedef signed long long int64_t;
0444 typedef unsigned long long uint64_t;
0445
0446
0447
0448
0449
0450
0451
0452 enum MallocAlignment {
0453
0454
0455
0456
0457
0458 AlignOnVector,
0459
0460
0461
0462
0463
0464 AlignOnCacheline,
0465
0466
0467
0468
0469
0470 AlignOnPage
0471 };
0472
0473
0474
0475
0476
0477
0478
0479
0480
0481
0482 enum Implementation : std::uint_least32_t {
0483
0484 ScalarImpl,
0485
0486 SSE2Impl,
0487
0488 SSE3Impl,
0489
0490 SSSE3Impl,
0491
0492 SSE41Impl,
0493
0494 SSE42Impl,
0495
0496 AVXImpl,
0497
0498 AVX2Impl,
0499
0500 MICImpl,
0501 ImplementationMask = 0xfff
0502 };
0503
0504
0505
0506
0507
0508
0509
0510
0511
0512
0513
0514 enum ExtraInstructions : std::uint_least32_t {
0515
0516 Float16cInstructions = 0x01000,
0517
0518 Fma4Instructions = 0x02000,
0519
0520 XopInstructions = 0x04000,
0521
0522 PopcntInstructions = 0x08000,
0523
0524 Sse4aInstructions = 0x10000,
0525
0526 FmaInstructions = 0x20000,
0527
0528 VexInstructions = 0x40000,
0529
0530 Bmi2Instructions = 0x80000,
0531
0532
0533
0534 ExtraInstructionsMask = 0xfffff000u
0535 };
0536
0537
0538
0539
0540
0541
0542
0543
0544
0545
0546 template <unsigned int Features> struct ImplementationT {
0547
0548 static constexpr Implementation current()
0549 {
0550 return static_cast<Implementation>(Features & ImplementationMask);
0551 }
0552
0553 static constexpr bool is(Implementation impl)
0554 {
0555 return static_cast<unsigned int>(impl) == current();
0556 }
0557
0558
0559
0560
0561 static constexpr bool is_between(Implementation low, Implementation high)
0562 {
0563 return static_cast<unsigned int>(low) <= current() &&
0564 static_cast<unsigned int>(high) >= current();
0565 }
0566
0567
0568
0569 static constexpr bool runs_on(unsigned int extraInstructions)
0570 {
0571 return (extraInstructions & Features & ExtraInstructionsMask) ==
0572 (Features & ExtraInstructionsMask);
0573 }
0574 };
0575
0576
0577
0578
0579
0580
0581 using CurrentImplementation = ImplementationT<
0582 #ifdef Vc_IMPL_Scalar
0583 ScalarImpl
0584 #elif defined(Vc_IMPL_AVX2)
0585 AVX2Impl
0586 #elif defined(Vc_IMPL_AVX)
0587 AVXImpl
0588 #elif defined(Vc_IMPL_SSE4_2)
0589 SSE42Impl
0590 #elif defined(Vc_IMPL_SSE4_1)
0591 SSE41Impl
0592 #elif defined(Vc_IMPL_SSSE3)
0593 SSSE3Impl
0594 #elif defined(Vc_IMPL_SSE3)
0595 SSE3Impl
0596 #elif defined(Vc_IMPL_SSE2)
0597 SSE2Impl
0598 #endif
0599 #ifdef Vc_IMPL_SSE4a
0600 + Vc::Sse4aInstructions
0601 #ifdef Vc_IMPL_XOP
0602 + Vc::XopInstructions
0603 #ifdef Vc_IMPL_FMA4
0604 + Vc::Fma4Instructions
0605 #endif
0606 #endif
0607 #endif
0608 #ifdef Vc_IMPL_POPCNT
0609 + Vc::PopcntInstructions
0610 #endif
0611 #ifdef Vc_IMPL_FMA
0612 + Vc::FmaInstructions
0613 #endif
0614 #ifdef Vc_IMPL_BMI2
0615 + Vc::Bmi2Instructions
0616 #endif
0617 #ifdef Vc_USE_VEX_CODING
0618 + Vc::VexInstructions
0619 #endif
0620 >;
0621
0622 }
0623
0624 #include "version.h"
0625
0626 #endif
0627
0628