File indexing completed on 2025-01-18 10:13:00
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 #ifndef __TBB_machine_H
0018 #define __TBB_machine_H
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113 #include "tbb_stddef.h"
0114
0115 namespace tbb {
0116 namespace internal {
0117
0118
0119
0120
0121
0122
0123
0124 template <typename T, std::size_t S>
0125 struct machine_load_store;
0126
0127 template <typename T, std::size_t S>
0128 struct machine_load_store_relaxed;
0129
0130 template <typename T, std::size_t S>
0131 struct machine_load_store_seq_cst;
0132
0133
0134
0135
0136 template<size_t S> struct atomic_selector;
0137
0138 template<> struct atomic_selector<1> {
0139 typedef int8_t word;
0140 inline static word fetch_store ( volatile void* location, word value );
0141 };
0142
0143 template<> struct atomic_selector<2> {
0144 typedef int16_t word;
0145 inline static word fetch_store ( volatile void* location, word value );
0146 };
0147
0148 template<> struct atomic_selector<4> {
0149 #if _MSC_VER && !_WIN64
0150
0151 typedef intptr_t word;
0152 #else
0153 typedef int32_t word;
0154 #endif
0155 inline static word fetch_store ( volatile void* location, word value );
0156 };
0157
0158 template<> struct atomic_selector<8> {
0159 typedef int64_t word;
0160 inline static word fetch_store ( volatile void* location, word value );
0161 };
0162
0163 }}
0164
0165 #define __TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(M) \
0166 inline void __TBB_machine_generic_store8##M(volatile void *ptr, int64_t value) { \
0167 for(;;) { \
0168 int64_t result = *(volatile int64_t *)ptr; \
0169 if( __TBB_machine_cmpswp8##M(ptr,value,result)==result ) break; \
0170 } \
0171 } \
0172
0173 #define __TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(M) \
0174 inline int64_t __TBB_machine_generic_load8##M(const volatile void *ptr) { \
0175 \
0176 \
0177 const int64_t anyvalue = 2305843009213693951LL; \
0178 return __TBB_machine_cmpswp8##M(const_cast<volatile void *>(ptr),anyvalue,anyvalue); \
0179 } \
0180
0181
0182 #define __TBB_ENDIAN_UNSUPPORTED -1
0183 #define __TBB_ENDIAN_LITTLE 0
0184 #define __TBB_ENDIAN_BIG 1
0185 #define __TBB_ENDIAN_DETECT 2
0186
0187 #if _WIN32||_WIN64
0188
0189 #ifdef _MANAGED
0190 #pragma managed(push, off)
0191 #endif
0192
0193 #if __MINGW64__ || __MINGW32__
0194 extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
0195 #define __TBB_Yield() SwitchToThread()
0196 #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
0197 #include "machine/gcc_generic.h"
0198 #elif __MINGW64__
0199 #include "machine/linux_intel64.h"
0200 #elif __MINGW32__
0201 #include "machine/linux_ia32.h"
0202 #endif
0203 #elif (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
0204 #include "machine/icc_generic.h"
0205 #elif defined(_M_IX86) && !defined(__TBB_WIN32_USE_CL_BUILTINS)
0206 #include "machine/windows_ia32.h"
0207 #elif defined(_M_X64)
0208 #include "machine/windows_intel64.h"
0209 #elif defined(_M_ARM) || defined(__TBB_WIN32_USE_CL_BUILTINS)
0210 #include "machine/msvc_armv7.h"
0211 #endif
0212
0213 #ifdef _MANAGED
0214 #pragma managed(pop)
0215 #endif
0216
0217 #elif __TBB_DEFINE_MIC
0218
0219 #include "machine/mic_common.h"
0220 #if (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
0221 #include "machine/icc_generic.h"
0222 #else
0223 #include "machine/linux_intel64.h"
0224 #endif
0225
0226 #elif __linux__ || __FreeBSD__ || __NetBSD__ || __OpenBSD__
0227
0228 #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
0229 #include "machine/gcc_generic.h"
0230 #elif (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
0231 #include "machine/icc_generic.h"
0232 #elif __i386__
0233 #include "machine/linux_ia32.h"
0234 #elif __x86_64__
0235 #include "machine/linux_intel64.h"
0236 #elif __ia64__
0237 #include "machine/linux_ia64.h"
0238 #elif __powerpc__
0239 #include "machine/mac_ppc.h"
0240 #elif __ARM_ARCH_7A__ || __aarch64__
0241 #include "machine/gcc_arm.h"
0242 #elif __TBB_GCC_BUILTIN_ATOMICS_PRESENT
0243 #include "machine/gcc_generic.h"
0244 #endif
0245 #include "machine/linux_common.h"
0246
0247 #elif __APPLE__
0248
0249 #if (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
0250 #include "machine/icc_generic.h"
0251 #elif __TBB_x86_32
0252 #include "machine/linux_ia32.h"
0253 #elif __TBB_x86_64
0254 #include "machine/linux_intel64.h"
0255 #elif __POWERPC__
0256 #include "machine/mac_ppc.h"
0257 #endif
0258 #include "machine/macos_common.h"
0259
0260 #elif _AIX
0261
0262 #include "machine/ibm_aix51.h"
0263
0264 #elif __sun || __SUNPRO_CC
0265
0266 #define __asm__ asm
0267 #define __volatile__ volatile
0268
0269 #if __i386 || __i386__
0270 #include "machine/linux_ia32.h"
0271 #elif __x86_64__
0272 #include "machine/linux_intel64.h"
0273 #elif __sparc
0274 #include "machine/sunos_sparc.h"
0275 #endif
0276 #include <sched.h>
0277
0278 #define __TBB_Yield() sched_yield()
0279
0280 #endif
0281
0282 #ifndef __TBB_64BIT_ATOMICS
0283 #define __TBB_64BIT_ATOMICS 1
0284 #endif
0285
0286
0287
0288
0289 #if __TBB_USE_FENCED_ATOMICS
0290 #define __TBB_machine_cmpswp1 __TBB_machine_cmpswp1full_fence
0291 #define __TBB_machine_cmpswp2 __TBB_machine_cmpswp2full_fence
0292 #define __TBB_machine_cmpswp4 __TBB_machine_cmpswp4full_fence
0293 #define __TBB_machine_cmpswp8 __TBB_machine_cmpswp8full_fence
0294
0295 #if __TBB_WORDSIZE==8
0296 #define __TBB_machine_fetchadd8 __TBB_machine_fetchadd8full_fence
0297 #define __TBB_machine_fetchstore8 __TBB_machine_fetchstore8full_fence
0298 #define __TBB_FetchAndAddWrelease(P,V) __TBB_machine_fetchadd8release(P,V)
0299 #define __TBB_FetchAndIncrementWacquire(P) __TBB_machine_fetchadd8acquire(P,1)
0300 #define __TBB_FetchAndDecrementWrelease(P) __TBB_machine_fetchadd8release(P,(-1))
0301 #else
0302 #define __TBB_machine_fetchadd4 __TBB_machine_fetchadd4full_fence
0303 #define __TBB_machine_fetchstore4 __TBB_machine_fetchstore4full_fence
0304 #define __TBB_FetchAndAddWrelease(P,V) __TBB_machine_fetchadd4release(P,V)
0305 #define __TBB_FetchAndIncrementWacquire(P) __TBB_machine_fetchadd4acquire(P,1)
0306 #define __TBB_FetchAndDecrementWrelease(P) __TBB_machine_fetchadd4release(P,(-1))
0307 #endif
0308 #else
0309 #define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAddW(P,V)
0310 #define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1)
0311 #define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1))
0312 #endif
0313
0314 #if __TBB_WORDSIZE==4
0315 #define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp4(P,V,C)
0316 #define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd4(P,V)
0317 #define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore4(P,V)
0318 #elif __TBB_WORDSIZE==8
0319 #if __TBB_USE_GENERIC_DWORD_LOAD_STORE || __TBB_USE_GENERIC_DWORD_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_STORE
0320 #error These macros should only be used on 32-bit platforms.
0321 #endif
0322
0323 #define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp8(P,V,C)
0324 #define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd8(P,V)
0325 #define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore8(P,V)
0326 #else
0327 #error Unsupported machine word size.
0328 #endif
0329
0330 #ifndef __TBB_Pause
0331 inline void __TBB_Pause(int32_t) {
0332 __TBB_Yield();
0333 }
0334 #endif
0335
0336 namespace tbb {
0337
0338
0339 inline void atomic_fence () { __TBB_full_memory_fence(); }
0340
0341 namespace internal {
0342
0343
0344
0345 class atomic_backoff : no_copy {
0346
0347
0348
0349 static const int32_t LOOPS_BEFORE_YIELD = 16;
0350 int32_t count;
0351 public:
0352
0353
0354
0355 atomic_backoff() : count(1) {}
0356
0357 atomic_backoff( bool ) : count(1) { pause(); }
0358
0359
0360 void pause() {
0361 if( count<=LOOPS_BEFORE_YIELD ) {
0362 __TBB_Pause(count);
0363
0364 count*=2;
0365 } else {
0366
0367 __TBB_Yield();
0368 }
0369 }
0370
0371
0372 bool bounded_pause() {
0373 __TBB_Pause(count);
0374 if( count<LOOPS_BEFORE_YIELD ) {
0375
0376 count*=2;
0377 return true;
0378 } else {
0379 return false;
0380 }
0381 }
0382
0383 void reset() {
0384 count = 1;
0385 }
0386 };
0387
0388
0389
0390 template<typename T, typename U>
0391 void spin_wait_while_eq( const volatile T& location, U value ) {
0392 atomic_backoff backoff;
0393 while( location==value ) backoff.pause();
0394 }
0395
0396
0397
0398 template<typename T, typename U>
0399 void spin_wait_until_eq( const volatile T& location, const U value ) {
0400 atomic_backoff backoff;
0401 while( location!=value ) backoff.pause();
0402 }
0403
0404 template <typename predicate_type>
0405 void spin_wait_while(predicate_type condition){
0406 atomic_backoff backoff;
0407 while( condition() ) backoff.pause();
0408 }
0409
0410
0411
0412
0413 #ifndef __TBB_ENDIANNESS
0414 #define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
0415 #endif
0416
0417 #if __TBB_USE_GENERIC_PART_WORD_CAS && __TBB_ENDIANNESS==__TBB_ENDIAN_UNSUPPORTED
0418 #error Generic implementation of part-word CAS may not be used with __TBB_ENDIAN_UNSUPPORTED
0419 #endif
0420
0421 #if __TBB_ENDIANNESS!=__TBB_ENDIAN_UNSUPPORTED
0422
0423
0424
0425
0426
0427
0428
0429
0430
0431 template<typename T>
0432 inline T __TBB_MaskedCompareAndSwap (volatile T * const ptr, const T value, const T comparand ) {
0433 struct endianness{ static bool is_big_endian(){
0434 #if __TBB_ENDIANNESS==__TBB_ENDIAN_DETECT
0435 const uint32_t probe = 0x03020100;
0436 return (((const char*)(&probe))[0]==0x03);
0437 #elif __TBB_ENDIANNESS==__TBB_ENDIAN_BIG || __TBB_ENDIANNESS==__TBB_ENDIAN_LITTLE
0438 return __TBB_ENDIANNESS==__TBB_ENDIAN_BIG;
0439 #else
0440 #error Unexpected value of __TBB_ENDIANNESS
0441 #endif
0442 }};
0443
0444 const uint32_t byte_offset = (uint32_t) ((uintptr_t)ptr & 0x3);
0445 volatile uint32_t * const aligned_ptr = (uint32_t*)((uintptr_t)ptr - byte_offset );
0446
0447
0448 const uint32_t bits_to_shift = 8*(endianness::is_big_endian() ? (4 - sizeof(T) - (byte_offset)) : byte_offset);
0449 const uint32_t mask = (((uint32_t)1<<(sizeof(T)*8)) - 1 )<<bits_to_shift;
0450
0451 const uint32_t shifted_comparand = ((uint32_t)comparand << bits_to_shift)&mask;
0452 const uint32_t shifted_value = ((uint32_t)value << bits_to_shift)&mask;
0453
0454 for( atomic_backoff b;;b.pause() ) {
0455 const uint32_t surroundings = *aligned_ptr & ~mask ;
0456 const uint32_t big_comparand = surroundings | shifted_comparand ;
0457 const uint32_t big_value = surroundings | shifted_value ;
0458
0459
0460 const uint32_t big_result = (uint32_t)__TBB_machine_cmpswp4( aligned_ptr, big_value, big_comparand );
0461 if( big_result == big_comparand
0462 || ((big_result ^ big_comparand) & mask) != 0)
0463 {
0464 return T((big_result & mask) >> bits_to_shift);
0465 }
0466 else continue;
0467 }
0468 }
0469 #endif
0470
0471
0472 template<size_t S, typename T>
0473 inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand );
0474
0475 template<>
0476 inline int8_t __TBB_CompareAndSwapGeneric <1,int8_t> (volatile void *ptr, int8_t value, int8_t comparand ) {
0477 #if __TBB_USE_GENERIC_PART_WORD_CAS
0478 return __TBB_MaskedCompareAndSwap<int8_t>((volatile int8_t *)ptr,value,comparand);
0479 #else
0480 return __TBB_machine_cmpswp1(ptr,value,comparand);
0481 #endif
0482 }
0483
0484 template<>
0485 inline int16_t __TBB_CompareAndSwapGeneric <2,int16_t> (volatile void *ptr, int16_t value, int16_t comparand ) {
0486 #if __TBB_USE_GENERIC_PART_WORD_CAS
0487 return __TBB_MaskedCompareAndSwap<int16_t>((volatile int16_t *)ptr,value,comparand);
0488 #else
0489 return __TBB_machine_cmpswp2(ptr,value,comparand);
0490 #endif
0491 }
0492
0493 template<>
0494 inline int32_t __TBB_CompareAndSwapGeneric <4,int32_t> (volatile void *ptr, int32_t value, int32_t comparand ) {
0495
0496 return (int32_t)__TBB_machine_cmpswp4(ptr,value,comparand);
0497 }
0498
0499 #if __TBB_64BIT_ATOMICS
0500 template<>
0501 inline int64_t __TBB_CompareAndSwapGeneric <8,int64_t> (volatile void *ptr, int64_t value, int64_t comparand ) {
0502 return __TBB_machine_cmpswp8(ptr,value,comparand);
0503 }
0504 #endif
0505
0506 template<size_t S, typename T>
0507 inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
0508 T result;
0509 for( atomic_backoff b;;b.pause() ) {
0510 result = *reinterpret_cast<volatile T *>(ptr);
0511
0512 if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result )
0513 break;
0514 }
0515 return result;
0516 }
0517
0518 template<size_t S, typename T>
0519 inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) {
0520 T result;
0521 for( atomic_backoff b;;b.pause() ) {
0522 result = *reinterpret_cast<volatile T *>(ptr);
0523
0524 if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result )
0525 break;
0526 }
0527 return result;
0528 }
0529
0530 #if __TBB_USE_GENERIC_PART_WORD_CAS
0531 #define __TBB_machine_cmpswp1 tbb::internal::__TBB_CompareAndSwapGeneric<1,int8_t>
0532 #define __TBB_machine_cmpswp2 tbb::internal::__TBB_CompareAndSwapGeneric<2,int16_t>
0533 #endif
0534
0535 #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_PART_WORD_FETCH_ADD
0536 #define __TBB_machine_fetchadd1 tbb::internal::__TBB_FetchAndAddGeneric<1,int8_t>
0537 #define __TBB_machine_fetchadd2 tbb::internal::__TBB_FetchAndAddGeneric<2,int16_t>
0538 #endif
0539
0540 #if __TBB_USE_GENERIC_FETCH_ADD
0541 #define __TBB_machine_fetchadd4 tbb::internal::__TBB_FetchAndAddGeneric<4,int32_t>
0542 #endif
0543
0544 #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_ADD
0545 #define __TBB_machine_fetchadd8 tbb::internal::__TBB_FetchAndAddGeneric<8,int64_t>
0546 #endif
0547
0548 #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_PART_WORD_FETCH_STORE
0549 #define __TBB_machine_fetchstore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,int8_t>
0550 #define __TBB_machine_fetchstore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,int16_t>
0551 #endif
0552
0553 #if __TBB_USE_GENERIC_FETCH_STORE
0554 #define __TBB_machine_fetchstore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,int32_t>
0555 #endif
0556
0557 #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_DWORD_FETCH_STORE
0558 #define __TBB_machine_fetchstore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,int64_t>
0559 #endif
0560
0561 #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
0562 #define __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(S) \
0563 atomic_selector<S>::word atomic_selector<S>::fetch_store ( volatile void* location, word value ) { \
0564 return __TBB_machine_fetchstore##S( location, value ); \
0565 }
0566
0567 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(1)
0568 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(2)
0569 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(4)
0570 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(8)
0571
0572 #undef __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE
0573 #endif
0574
0575 #if __TBB_USE_GENERIC_DWORD_LOAD_STORE
0576
0577 #if ! __TBB_USE_FENCED_ATOMICS
0578
0579
0580 #define __TBB_machine_cmpswp8full_fence __TBB_machine_cmpswp8
0581 #endif
0582 __TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(full_fence)
0583 __TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(full_fence)
0584
0585 #if ! __TBB_USE_FENCED_ATOMICS
0586 #undef __TBB_machine_cmpswp8full_fence
0587 #endif
0588
0589 #define __TBB_machine_store8 tbb::internal::__TBB_machine_generic_store8full_fence
0590 #define __TBB_machine_load8 tbb::internal::__TBB_machine_generic_load8full_fence
0591 #endif
0592
0593 #if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE
0594
0595
0596
0597
0598
0599
0600
0601
0602
0603
0604 template <typename T, size_t S>
0605 struct machine_load_store {
0606 static T load_with_acquire ( const volatile T& location ) {
0607 T to_return = location;
0608 __TBB_acquire_consistency_helper();
0609 return to_return;
0610 }
0611 static void store_with_release ( volatile T &location, T value ) {
0612 __TBB_release_consistency_helper();
0613 location = value;
0614 }
0615 };
0616
0617
0618 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
0619 template <typename T>
0620 struct machine_load_store<T,8> {
0621 static T load_with_acquire ( const volatile T& location ) {
0622 return (T)__TBB_machine_load8( (const volatile void*)&location );
0623 }
0624 static void store_with_release ( volatile T& location, T value ) {
0625 __TBB_machine_store8( (volatile void*)&location, (int64_t)value );
0626 }
0627 };
0628 #endif
0629 #endif
0630
0631 #if __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE
0632 template <typename T, size_t S>
0633 struct machine_load_store_seq_cst {
0634 static T load ( const volatile T& location ) {
0635 __TBB_full_memory_fence();
0636 return machine_load_store<T,S>::load_with_acquire( location );
0637 }
0638 #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
0639 static void store ( volatile T &location, T value ) {
0640 atomic_selector<S>::fetch_store( (volatile void*)&location, (typename atomic_selector<S>::word)value );
0641 }
0642 #else
0643 static void store ( volatile T &location, T value ) {
0644 machine_load_store<T,S>::store_with_release( location, value );
0645 __TBB_full_memory_fence();
0646 }
0647 #endif
0648 };
0649
0650 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
0651
0652
0653 template <typename T>
0654 struct machine_load_store_seq_cst<T,8> {
0655 static T load ( const volatile T& location ) {
0656
0657
0658 const int64_t anyvalue = 2305843009213693951LL;
0659 return __TBB_machine_cmpswp8( (volatile void*)const_cast<volatile T*>(&location), anyvalue, anyvalue );
0660 }
0661 static void store ( volatile T &location, T value ) {
0662 #if __TBB_GCC_VERSION >= 40702
0663 #pragma GCC diagnostic push
0664 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
0665 #endif
0666
0667 int64_t result = (volatile int64_t&)location;
0668 #if __TBB_GCC_VERSION >= 40702
0669 #pragma GCC diagnostic pop
0670 #endif
0671 while ( __TBB_machine_cmpswp8((volatile void*)&location, (int64_t)value, result) != result )
0672 result = (volatile int64_t&)location;
0673 }
0674 };
0675 #endif
0676 #endif
0677
0678 #if __TBB_USE_GENERIC_RELAXED_LOAD_STORE
0679
0680
0681
0682
0683 template <typename T, size_t S>
0684 struct machine_load_store_relaxed {
0685 static inline T load ( const volatile T& location ) {
0686 return location;
0687 }
0688 static inline void store ( volatile T& location, T value ) {
0689 location = value;
0690 }
0691 };
0692
0693 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
0694 template <typename T>
0695 struct machine_load_store_relaxed<T,8> {
0696 static inline T load ( const volatile T& location ) {
0697 return (T)__TBB_machine_load8( (const volatile void*)&location );
0698 }
0699 static inline void store ( volatile T& location, T value ) {
0700 __TBB_machine_store8( (volatile void*)&location, (int64_t)value );
0701 }
0702 };
0703 #endif
0704 #endif
0705
0706 #undef __TBB_WORDSIZE
0707
0708 template<typename T>
0709 inline T __TBB_load_with_acquire(const volatile T &location) {
0710 return machine_load_store<T,sizeof(T)>::load_with_acquire( location );
0711 }
0712 template<typename T, typename V>
0713 inline void __TBB_store_with_release(volatile T& location, V value) {
0714 machine_load_store<T,sizeof(T)>::store_with_release( location, T(value) );
0715 }
0716
0717 inline void __TBB_store_with_release(volatile size_t& location, size_t value) {
0718 machine_load_store<size_t,sizeof(size_t)>::store_with_release( location, value );
0719 }
0720
0721 template<typename T>
0722 inline T __TBB_load_full_fence(const volatile T &location) {
0723 return machine_load_store_seq_cst<T,sizeof(T)>::load( location );
0724 }
0725 template<typename T, typename V>
0726 inline void __TBB_store_full_fence(volatile T& location, V value) {
0727 machine_load_store_seq_cst<T,sizeof(T)>::store( location, T(value) );
0728 }
0729
0730 inline void __TBB_store_full_fence(volatile size_t& location, size_t value) {
0731 machine_load_store_seq_cst<size_t,sizeof(size_t)>::store( location, value );
0732 }
0733
0734 template<typename T>
0735 inline T __TBB_load_relaxed (const volatile T& location) {
0736 return machine_load_store_relaxed<T,sizeof(T)>::load( const_cast<T&>(location) );
0737 }
0738 template<typename T, typename V>
0739 inline void __TBB_store_relaxed ( volatile T& location, V value ) {
0740 machine_load_store_relaxed<T,sizeof(T)>::store( const_cast<T&>(location), T(value) );
0741 }
0742
0743 inline void __TBB_store_relaxed ( volatile size_t& location, size_t value ) {
0744 machine_load_store_relaxed<size_t,sizeof(size_t)>::store( const_cast<size_t&>(location), value );
0745 }
0746
0747
0748
0749
0750
0751
0752
0753
0754 #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
0755
0756 #if __TBB_ALIGNAS_PRESENT
0757
0758
0759 #define __TBB_DefineTypeWithAlignment(PowerOf2) \
0760 struct alignas(PowerOf2) __TBB_machine_type_with_alignment_##PowerOf2 { \
0761 uint32_t member[PowerOf2/sizeof(uint32_t)]; \
0762 };
0763 #define __TBB_alignof(T) alignof(T)
0764
0765 #elif __TBB_ATTRIBUTE_ALIGNED_PRESENT
0766
0767 #define __TBB_DefineTypeWithAlignment(PowerOf2) \
0768 struct __TBB_machine_type_with_alignment_##PowerOf2 { \
0769 uint32_t member[PowerOf2/sizeof(uint32_t)]; \
0770 } __attribute__((aligned(PowerOf2)));
0771 #define __TBB_alignof(T) __alignof__(T)
0772
0773 #elif __TBB_DECLSPEC_ALIGN_PRESENT
0774
0775 #define __TBB_DefineTypeWithAlignment(PowerOf2) \
0776 __declspec(align(PowerOf2)) \
0777 struct __TBB_machine_type_with_alignment_##PowerOf2 { \
0778 uint32_t member[PowerOf2/sizeof(uint32_t)]; \
0779 };
0780 #define __TBB_alignof(T) __alignof(T)
0781
0782 #else
0783 #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T)
0784 #endif
0785
0786
0787 __TBB_DefineTypeWithAlignment(8)
0788 __TBB_DefineTypeWithAlignment(16)
0789 __TBB_DefineTypeWithAlignment(32)
0790 __TBB_DefineTypeWithAlignment(64)
0791
0792 typedef __TBB_machine_type_with_alignment_64 __TBB_machine_type_with_strictest_alignment;
0793
0794
0795 template<size_t N> struct type_with_alignment;
0796
0797
0798 template<> struct type_with_alignment<1> { char member; };
0799 template<> struct type_with_alignment<2> { uint16_t member; };
0800 template<> struct type_with_alignment<4> { uint32_t member; };
0801 template<> struct type_with_alignment<8> { __TBB_machine_type_with_alignment_8 member; };
0802 template<> struct type_with_alignment<16> {__TBB_machine_type_with_alignment_16 member; };
0803 template<> struct type_with_alignment<32> {__TBB_machine_type_with_alignment_32 member; };
0804 template<> struct type_with_alignment<64> {__TBB_machine_type_with_alignment_64 member; };
0805
0806 #if __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN
0807
0808
0809
0810 template<size_t Size, typename T>
0811 struct work_around_alignment_bug {
0812 static const size_t alignment = __TBB_alignof(T);
0813 };
0814 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
0815 #else
0816 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__TBB_alignof(T)>
0817 #endif
0818
0819 #endif
0820
0821
0822 template<typename T>
0823 struct reverse {
0824 static const T byte_table[256];
0825 };
0826
0827
0828 template<typename T>
0829 const T reverse<T>::byte_table[256] = {
0830 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
0831 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
0832 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
0833 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
0834 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
0835 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
0836 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
0837 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
0838 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
0839 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
0840 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
0841 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
0842 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
0843 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
0844 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
0845 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
0846 };
0847
0848 }
0849 }
0850
0851
0852 using tbb::internal::__TBB_load_with_acquire;
0853 using tbb::internal::__TBB_store_with_release;
0854
0855
0856 #define __TBB_load_acquire __TBB_load_with_acquire
0857 #define __TBB_store_release __TBB_store_with_release
0858
0859 #ifndef __TBB_Log2
0860 inline intptr_t __TBB_Log2( uintptr_t x ) {
0861 if( x==0 ) return -1;
0862 intptr_t result = 0;
0863
0864 #if !defined(_M_ARM)
0865 uintptr_t tmp_;
0866 if( sizeof(x)>4 && (tmp_ = ((uint64_t)x)>>32) ) { x=tmp_; result += 32; }
0867 #endif
0868 if( uintptr_t tmp = x>>16 ) { x=tmp; result += 16; }
0869 if( uintptr_t tmp = x>>8 ) { x=tmp; result += 8; }
0870 if( uintptr_t tmp = x>>4 ) { x=tmp; result += 4; }
0871 if( uintptr_t tmp = x>>2 ) { x=tmp; result += 2; }
0872
0873 return (x&2)? result+1: result;
0874 }
0875 #endif
0876
0877 #ifndef __TBB_AtomicOR
0878 inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
0879 for( tbb::internal::atomic_backoff b;;b.pause() ) {
0880 uintptr_t tmp = *(volatile uintptr_t *)operand;
0881 uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
0882 if( result==tmp ) break;
0883 }
0884 }
0885 #endif
0886
0887 #ifndef __TBB_AtomicAND
0888 inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) {
0889 for( tbb::internal::atomic_backoff b;;b.pause() ) {
0890 uintptr_t tmp = *(volatile uintptr_t *)operand;
0891 uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
0892 if( result==tmp ) break;
0893 }
0894 }
0895 #endif
0896
0897 #if __TBB_PREFETCHING
0898 #ifndef __TBB_cl_prefetch
0899 #error This platform does not define cache management primitives required for __TBB_PREFETCHING
0900 #endif
0901
0902 #ifndef __TBB_cl_evict
0903 #define __TBB_cl_evict(p)
0904 #endif
0905 #endif
0906
0907 #ifndef __TBB_Flag
0908 typedef unsigned char __TBB_Flag;
0909 #endif
0910 typedef __TBB_atomic __TBB_Flag __TBB_atomic_flag;
0911
0912 #ifndef __TBB_TryLockByte
0913 inline bool __TBB_TryLockByte( __TBB_atomic_flag &flag ) {
0914 return __TBB_machine_cmpswp1(&flag,1,0)==0;
0915 }
0916 #endif
0917
0918 #ifndef __TBB_LockByte
0919 inline __TBB_Flag __TBB_LockByte( __TBB_atomic_flag& flag ) {
0920 tbb::internal::atomic_backoff backoff;
0921 while( !__TBB_TryLockByte(flag) ) backoff.pause();
0922 return 0;
0923 }
0924 #endif
0925
0926 #ifndef __TBB_UnlockByte
0927 #define __TBB_UnlockByte(addr) __TBB_store_with_release((addr),0)
0928 #endif
0929
0930
0931 #if ( __TBB_x86_32 || __TBB_x86_64 )
0932 inline void __TBB_TryLockByteElidedCancel() { __TBB_machine_try_lock_elided_cancel(); }
0933
0934 inline bool __TBB_TryLockByteElided( __TBB_atomic_flag& flag ) {
0935 bool res = __TBB_machine_try_lock_elided( &flag )!=0;
0936
0937
0938
0939 if( !res ) __TBB_TryLockByteElidedCancel();
0940 return res;
0941 }
0942
0943 inline void __TBB_LockByteElided( __TBB_atomic_flag& flag )
0944 {
0945 for(;;) {
0946 tbb::internal::spin_wait_while_eq( flag, 1 );
0947 if( __TBB_machine_try_lock_elided( &flag ) )
0948 return;
0949
0950
0951 __TBB_TryLockByteElidedCancel();
0952 }
0953 }
0954
0955 inline void __TBB_UnlockByteElided( __TBB_atomic_flag& flag ) {
0956 __TBB_machine_unlock_elided( &flag );
0957 }
0958 #endif
0959
0960 #ifndef __TBB_ReverseByte
0961 inline unsigned char __TBB_ReverseByte(unsigned char src) {
0962 return tbb::internal::reverse<unsigned char>::byte_table[src];
0963 }
0964 #endif
0965
0966 template<typename T>
0967 T __TBB_ReverseBits(T src) {
0968 T dst;
0969 unsigned char *original = (unsigned char *) &src;
0970 unsigned char *reversed = (unsigned char *) &dst;
0971
0972 for( int i = sizeof(T)-1; i >= 0; i-- )
0973 reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] );
0974
0975 return dst;
0976 }
0977
0978 #endif