File indexing completed on 2025-07-30 08:46:18
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 #ifndef __TBB_detail__machine_H
0018 #define __TBB_detail__machine_H
0019
0020 #include "_config.h"
0021 #include "_assert.h"
0022
0023 #include <atomic>
0024 #include <climits>
0025 #include <cstdint>
0026 #include <cstddef>
0027
0028 #ifdef _WIN32
0029 #include <intrin.h>
0030 #ifdef __TBBMALLOC_BUILD
0031 #define WIN32_LEAN_AND_MEAN
0032 #ifndef NOMINMAX
0033 #define NOMINMAX
0034 #endif
0035 #include <windows.h> // SwitchToThread()
0036 #endif
0037 #ifdef _MSC_VER
0038 #if __TBB_x86_64 || __TBB_x86_32
0039 #pragma intrinsic(__rdtsc)
0040 #endif
0041 #endif
0042 #endif
0043 #if __TBB_x86_64 || __TBB_x86_32
0044 #include <immintrin.h> // _mm_pause
0045 #endif
0046 #if (_WIN32)
0047 #include <float.h> // _control87
0048 #endif
0049
0050 #if __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN
0051 #include <sched.h> // sched_yield
0052 #else
0053 #include <thread> // std::this_thread::yield()
0054 #endif
0055
0056 namespace tbb {
0057 namespace detail {
0058 inline namespace d0 {
0059
0060
0061
0062
0063
0064 #if __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN
0065 static inline void yield() {
0066 int err = sched_yield();
0067 __TBB_ASSERT_EX(err == 0, "sched_yield has failed");
0068 }
0069 #elif __TBBMALLOC_BUILD && _WIN32
0070
0071 static inline void yield() {
0072 SwitchToThread();
0073 }
0074 #else
0075 using std::this_thread::yield;
0076 #endif
0077
0078
0079
0080
0081
0082 static inline void atomic_fence_seq_cst() {
0083 #if (__TBB_x86_64 || __TBB_x86_32) && defined(__GNUC__) && __GNUC__ < 11
0084 unsigned char dummy = 0u;
0085 __asm__ __volatile__ ("lock; notb %0" : "+m" (dummy) :: "memory");
0086 #else
0087 std::atomic_thread_fence(std::memory_order_seq_cst);
0088 #endif
0089 }
0090
0091
0092
0093
0094
0095 static inline void machine_pause(int32_t delay) {
0096 #if __TBB_x86_64 || __TBB_x86_32
0097 while (delay-- > 0) { _mm_pause(); }
0098 #elif __ARM_ARCH_7A__ || __aarch64__
0099 while (delay-- > 0) { __asm__ __volatile__("yield" ::: "memory"); }
0100 #else
0101 (void)delay;
0102 yield();
0103 #endif
0104 }
0105
0106
0107
0108
0109
0110
0111 #if defined(__GNUC__) || defined(__clang__)
0112 namespace gnu_builtins {
0113 inline uintptr_t clz(unsigned int x) { return static_cast<uintptr_t>(__builtin_clz(x)); }
0114 inline uintptr_t clz(unsigned long int x) { return static_cast<uintptr_t>(__builtin_clzl(x)); }
0115 inline uintptr_t clz(unsigned long long int x) { return static_cast<uintptr_t>(__builtin_clzll(x)); }
0116 }
0117 #elif defined(_MSC_VER)
0118 #pragma intrinsic(__TBB_W(_BitScanReverse))
0119 namespace msvc_intrinsics {
0120 static inline uintptr_t bit_scan_reverse(uintptr_t i) {
0121 unsigned long j;
0122 __TBB_W(_BitScanReverse)( &j, i );
0123 return j;
0124 }
0125 }
0126 #endif
0127
0128 template <typename T>
0129 constexpr std::uintptr_t number_of_bits() {
0130 return sizeof(T) * CHAR_BIT;
0131 }
0132
0133
0134 static inline uintptr_t machine_log2(uintptr_t x) {
0135 #if defined(__GNUC__) || defined(__clang__)
0136
0137 return (number_of_bits<decltype(x)>() - 1) ^ gnu_builtins::clz(x);
0138 #elif defined(_MSC_VER)
0139 return msvc_intrinsics::bit_scan_reverse(x);
0140 #elif __i386__ || __i386 || __MINGW32__
0141 uintptr_t j, i = x;
0142 __asm__("bsr %1,%0" : "=r"(j) : "r"(i));
0143 return j;
0144 #elif __powerpc__ || __POWERPC__
0145 #if __TBB_WORDSIZE==8
0146 __asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x));
0147 return 63 - static_cast<intptr_t>(x);
0148 #else
0149 __asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x));
0150 return 31 - static_cast<intptr_t>(x);
0151 #endif
0152 #elif __sparc
0153 uint64_t count;
0154
0155 x |= (x >> 1);
0156 x |= (x >> 2);
0157 x |= (x >> 4);
0158 x |= (x >> 8);
0159 x |= (x >> 16);
0160 x |= (x >> 32);
0161
0162 __asm__ ("popc %1, %0" : "=r"(count) : "r"(x) );
0163 return count - 1;
0164 #else
0165 intptr_t result = 0;
0166
0167 if( sizeof(x) > 4 && (uintptr_t tmp = x >> 32) ) { x = tmp; result += 32; }
0168 if( uintptr_t tmp = x >> 16 ) { x = tmp; result += 16; }
0169 if( uintptr_t tmp = x >> 8 ) { x = tmp; result += 8; }
0170 if( uintptr_t tmp = x >> 4 ) { x = tmp; result += 4; }
0171 if( uintptr_t tmp = x >> 2 ) { x = tmp; result += 2; }
0172
0173 return (x & 2) ? result + 1 : result;
0174 #endif
0175 }
0176
0177
0178
0179
0180 #if TBB_USE_CLANG_BITREVERSE_BUILTINS
0181 namespace llvm_builtins {
0182 inline uint8_t builtin_bitreverse(uint8_t x) { return __builtin_bitreverse8 (x); }
0183 inline uint16_t builtin_bitreverse(uint16_t x) { return __builtin_bitreverse16(x); }
0184 inline uint32_t builtin_bitreverse(uint32_t x) { return __builtin_bitreverse32(x); }
0185 inline uint64_t builtin_bitreverse(uint64_t x) { return __builtin_bitreverse64(x); }
0186 }
0187 #else
0188 template<typename T>
0189 struct reverse {
0190 static const T byte_table[256];
0191 };
0192
0193 template<typename T>
0194 const T reverse<T>::byte_table[256] = {
0195 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
0196 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
0197 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
0198 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
0199 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
0200 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
0201 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
0202 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
0203 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
0204 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
0205 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
0206 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
0207 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
0208 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
0209 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
0210 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
0211 };
0212
0213 inline unsigned char reverse_byte(unsigned char src) {
0214 return reverse<unsigned char>::byte_table[src];
0215 }
0216 #endif
0217
0218 template<typename T>
0219 T machine_reverse_bits(T src) {
0220 #if TBB_USE_CLANG_BITREVERSE_BUILTINS
0221 return builtin_bitreverse(fixed_width_cast(src));
0222 #else
0223 T dst;
0224 unsigned char *original = reinterpret_cast<unsigned char *>(&src);
0225 unsigned char *reversed = reinterpret_cast<unsigned char *>(&dst);
0226
0227 for ( int i = sizeof(T) - 1; i >= 0; i-- ) {
0228 reversed[i] = reverse_byte( original[sizeof(T) - i - 1] );
0229 }
0230
0231 return dst;
0232 #endif
0233 }
0234
0235 }
0236
0237 namespace d1 {
0238
0239 #if (_WIN32)
0240
0241 #define __TBB_CPU_CTL_ENV_PRESENT 1
0242 struct cpu_ctl_env {
0243 unsigned int x87cw{};
0244 #if (__TBB_x86_64)
0245
0246
0247
0248 static constexpr unsigned int X87CW_CONTROL_MASK = _MCW_DN | _MCW_EM | _MCW_RC;
0249 #else
0250 static constexpr unsigned int X87CW_CONTROL_MASK = ~0U;
0251 #endif
0252 #if (__TBB_x86_32 || __TBB_x86_64)
0253 unsigned int mxcsr{};
0254 static constexpr unsigned int MXCSR_CONTROL_MASK = ~0x3fu;
0255 #endif
0256
0257 bool operator!=( const cpu_ctl_env& ctl ) const {
0258 return
0259 #if (__TBB_x86_32 || __TBB_x86_64)
0260 mxcsr != ctl.mxcsr ||
0261 #endif
0262 x87cw != ctl.x87cw;
0263 }
0264 void get_env() {
0265 x87cw = _control87(0, 0);
0266 #if (__TBB_x86_32 || __TBB_x86_64)
0267 mxcsr = _mm_getcsr();
0268 #endif
0269 }
0270 void set_env() const {
0271 _control87(x87cw, X87CW_CONTROL_MASK);
0272 #if (__TBB_x86_32 || __TBB_x86_64)
0273 _mm_setcsr(mxcsr & MXCSR_CONTROL_MASK);
0274 #endif
0275 }
0276 };
0277 #elif (__TBB_x86_32 || __TBB_x86_64)
0278
0279 #define __TBB_CPU_CTL_ENV_PRESENT 1
0280 struct cpu_ctl_env {
0281 int mxcsr{};
0282 short x87cw{};
0283 static const int MXCSR_CONTROL_MASK = ~0x3f;
0284
0285 bool operator!=(const cpu_ctl_env& ctl) const {
0286 return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw;
0287 }
0288 void get_env() {
0289 __asm__ __volatile__(
0290 "stmxcsr %0\n\t"
0291 "fstcw %1"
0292 : "=m"(mxcsr), "=m"(x87cw)
0293 );
0294 mxcsr &= MXCSR_CONTROL_MASK;
0295 }
0296 void set_env() const {
0297 __asm__ __volatile__(
0298 "ldmxcsr %0\n\t"
0299 "fldcw %1"
0300 : : "m"(mxcsr), "m"(x87cw)
0301 );
0302 }
0303 };
0304 #endif
0305
0306 }
0307
0308 }
0309 }
0310
0311 #if !__TBB_CPU_CTL_ENV_PRESENT
0312 #include <fenv.h>
0313
0314 #include <cstring>
0315
0316 namespace tbb {
0317 namespace detail {
0318
0319 namespace r1 {
0320 void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size);
0321 void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p);
0322 }
0323
0324 namespace d1 {
0325
0326 class cpu_ctl_env {
0327 fenv_t *my_fenv_ptr;
0328 public:
0329 cpu_ctl_env() : my_fenv_ptr(nullptr) {}
0330 ~cpu_ctl_env() {
0331 if ( my_fenv_ptr )
0332 r1::cache_aligned_deallocate( (void*)my_fenv_ptr );
0333 }
0334
0335
0336
0337
0338
0339
0340 cpu_ctl_env( const cpu_ctl_env &src ) : my_fenv_ptr(nullptr) {
0341 *this = src;
0342 }
0343 cpu_ctl_env& operator=( const cpu_ctl_env &src ) {
0344 __TBB_ASSERT( src.my_fenv_ptr, nullptr);
0345 if ( !my_fenv_ptr )
0346 my_fenv_ptr = (fenv_t*)r1::cache_aligned_allocate(sizeof(fenv_t));
0347 *my_fenv_ptr = *src.my_fenv_ptr;
0348 return *this;
0349 }
0350 bool operator!=( const cpu_ctl_env &ctl ) const {
0351 __TBB_ASSERT( my_fenv_ptr, "cpu_ctl_env is not initialized." );
0352 __TBB_ASSERT( ctl.my_fenv_ptr, "cpu_ctl_env is not initialized." );
0353 return std::memcmp( (void*)my_fenv_ptr, (void*)ctl.my_fenv_ptr, sizeof(fenv_t) );
0354 }
0355 void get_env () {
0356 if ( !my_fenv_ptr )
0357 my_fenv_ptr = (fenv_t*)r1::cache_aligned_allocate(sizeof(fenv_t));
0358 fegetenv( my_fenv_ptr );
0359 }
0360 const cpu_ctl_env& set_env () const {
0361 __TBB_ASSERT( my_fenv_ptr, "cpu_ctl_env is not initialized." );
0362 fesetenv( my_fenv_ptr );
0363 return *this;
0364 }
0365 };
0366
0367 }
0368 }
0369 }
0370
0371 #endif
0372
0373 #endif