Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:13:00

0001 /*
0002     Copyright (c) 2005-2020 Intel Corporation
0003 
0004     Licensed under the Apache License, Version 2.0 (the "License");
0005     you may not use this file except in compliance with the License.
0006     You may obtain a copy of the License at
0007 
0008         http://www.apache.org/licenses/LICENSE-2.0
0009 
0010     Unless required by applicable law or agreed to in writing, software
0011     distributed under the License is distributed on an "AS IS" BASIS,
0012     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013     See the License for the specific language governing permissions and
0014     limitations under the License.
0015 */
0016 
0017 #ifndef __TBB_machine_H
0018 #define __TBB_machine_H
0019 
0020 /** This header provides basic platform abstraction layer by hooking up appropriate
0021     architecture/OS/compiler specific headers from the /include/tbb/machine directory.
0022     If a plug-in header does not implement all the required APIs, it must specify
0023     the missing ones by setting one or more of the following macros:
0024 
0025     __TBB_USE_GENERIC_PART_WORD_CAS
0026     __TBB_USE_GENERIC_PART_WORD_FETCH_ADD
0027     __TBB_USE_GENERIC_PART_WORD_FETCH_STORE
0028     __TBB_USE_GENERIC_FETCH_ADD
0029     __TBB_USE_GENERIC_FETCH_STORE
0030     __TBB_USE_GENERIC_DWORD_FETCH_ADD
0031     __TBB_USE_GENERIC_DWORD_FETCH_STORE
0032     __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE
0033     __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE
0034     __TBB_USE_GENERIC_RELAXED_LOAD_STORE
0035     __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
0036 
0037     In this case tbb_machine.h will add missing functionality based on a minimal set
0038     of APIs that are required to be implemented by all plug-n headers as described
0039     further.
0040     Note that these generic implementations may be sub-optimal for a particular
0041     architecture, and thus should be relied upon only after careful evaluation
0042     or as the last resort.
0043 
0044     Additionally __TBB_64BIT_ATOMICS can be set to 0 on a 32-bit architecture to
0045     indicate that the port is not going to support double word atomics. It may also
0046     be set to 1 explicitly, though normally this is not necessary as tbb_machine.h
0047     will set it automatically.
0048 
0049     __TBB_ENDIANNESS macro can be defined by the implementation as well.
0050     It is used only if __TBB_USE_GENERIC_PART_WORD_CAS is set (or for testing),
0051     and must specify the layout of aligned 16-bit and 32-bit data anywhere within a process
0052     (while the details of unaligned 16-bit or 32-bit data or of 64-bit data are irrelevant).
0053     The layout must be the same at all relevant memory locations within the current process;
0054     in case of page-specific endianness, one endianness must be kept "out of sight".
0055     Possible settings, reflecting hardware and possibly O.S. convention, are:
0056     -  __TBB_ENDIAN_BIG for big-endian data,
0057     -  __TBB_ENDIAN_LITTLE for little-endian data,
0058     -  __TBB_ENDIAN_DETECT for run-time detection iff exactly one of the above,
0059     -  __TBB_ENDIAN_UNSUPPORTED to prevent undefined behavior if none of the above.
0060 
0061     Prerequisites for each architecture port
0062     ----------------------------------------
0063     The following functions and macros have no generic implementation. Therefore they must be
0064     implemented in each machine architecture specific header either as a conventional
0065     function or as a functional macro.
0066 
0067     __TBB_WORDSIZE
0068         This is the size of machine word in bytes, i.e. for 32 bit systems it
0069         should be defined to 4.
0070 
0071     __TBB_Yield()
0072         Signals OS that the current thread is willing to relinquish the remainder
0073         of its time quantum.
0074 
0075     __TBB_full_memory_fence()
0076         Must prevent all memory operations from being reordered across it (both
0077         by hardware and compiler). All such fences must be totally ordered (or
0078         sequentially consistent).
0079 
0080     __TBB_machine_cmpswp4( volatile void *ptr, int32_t value, int32_t comparand )
0081         Must be provided if __TBB_USE_FENCED_ATOMICS is not set.
0082 
0083     __TBB_machine_cmpswp8( volatile void *ptr, int32_t value, int64_t comparand )
0084         Must be provided for 64-bit architectures if __TBB_USE_FENCED_ATOMICS is not set,
0085         and for 32-bit architectures if __TBB_64BIT_ATOMICS is set
0086 
0087     __TBB_machine_<op><S><fence>(...), where
0088         <op> = {cmpswp, fetchadd, fetchstore}
0089         <S> = {1, 2, 4, 8}
0090         <fence> = {full_fence, acquire, release, relaxed}
0091         Must be provided if __TBB_USE_FENCED_ATOMICS is set.
0092 
0093     __TBB_control_consistency_helper()
0094         Bridges the memory-semantics gap between architectures providing only
0095         implicit C++0x "consume" semantics (like Power Architecture) and those
0096         also implicitly obeying control dependencies (like IA-64 architecture).
0097         It must be used only in conditional code where the condition is itself
0098         data-dependent, and will then make subsequent code behave as if the
0099         original data dependency were acquired.
0100         It needs only a compiler fence where implied by the architecture
0101         either specifically (like IA-64 architecture) or because generally stronger
0102         "acquire" semantics are enforced (like x86).
0103         It is always valid, though potentially suboptimal, to replace
0104         control with acquire on the load and then remove the helper.
0105 
0106     __TBB_acquire_consistency_helper(), __TBB_release_consistency_helper()
0107         Must be provided if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE is set.
0108         Enforce acquire and release semantics in generic implementations of fenced
0109         store and load operations. Depending on the particular architecture/compiler
0110         combination they may be a hardware fence, a compiler fence, both or nothing.
0111  **/
0112 
0113 #include "tbb_stddef.h"
0114 
0115 namespace tbb {
0116 namespace internal { //< @cond INTERNAL
0117 
0118 ////////////////////////////////////////////////////////////////////////////////
0119 // Overridable helpers declarations
0120 //
0121 // A machine/*.h file may choose to define these templates, otherwise it must
0122 // request default implementation by setting appropriate __TBB_USE_GENERIC_XXX macro(s).
0123 //
0124 template <typename T, std::size_t S>
0125 struct machine_load_store;
0126 
0127 template <typename T, std::size_t S>
0128 struct machine_load_store_relaxed;
0129 
0130 template <typename T, std::size_t S>
0131 struct machine_load_store_seq_cst;
0132 //
0133 // End of overridable helpers declarations
0134 ////////////////////////////////////////////////////////////////////////////////
0135 
0136 template<size_t S> struct atomic_selector;
0137 
0138 template<> struct atomic_selector<1> {
0139     typedef int8_t word;
0140     inline static word fetch_store ( volatile void* location, word value );
0141 };
0142 
0143 template<> struct atomic_selector<2> {
0144     typedef int16_t word;
0145     inline static word fetch_store ( volatile void* location, word value );
0146 };
0147 
0148 template<> struct atomic_selector<4> {
0149 #if _MSC_VER && !_WIN64
0150     // Work-around that avoids spurious /Wp64 warnings
0151     typedef intptr_t word;
0152 #else
0153     typedef int32_t word;
0154 #endif
0155     inline static word fetch_store ( volatile void* location, word value );
0156 };
0157 
0158 template<> struct atomic_selector<8> {
0159     typedef int64_t word;
0160     inline static word fetch_store ( volatile void* location, word value );
0161 };
0162 
0163 }} //< namespaces internal @endcond, tbb
0164 
0165 #define __TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(M)                                        \
0166     inline void __TBB_machine_generic_store8##M(volatile void *ptr, int64_t value) {         \
0167         for(;;) {                                                                            \
0168             int64_t result = *(volatile int64_t *)ptr;                                       \
0169             if( __TBB_machine_cmpswp8##M(ptr,value,result)==result ) break;                  \
0170         }                                                                                    \
0171     }                                                                                        \
0172 
0173 #define __TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(M)                                         \
0174     inline int64_t __TBB_machine_generic_load8##M(const volatile void *ptr) {                \
0175         /* Comparand and new value may be anything, they only must be equal, and      */     \
0176         /* the value should have a low probability to be actually found in 'location'.*/     \
0177         const int64_t anyvalue = 2305843009213693951LL;                                      \
0178         return __TBB_machine_cmpswp8##M(const_cast<volatile void *>(ptr),anyvalue,anyvalue); \
0179     }                                                                                        \
0180 
0181 // The set of allowed values for __TBB_ENDIANNESS (see above for details)
0182 #define __TBB_ENDIAN_UNSUPPORTED -1
0183 #define __TBB_ENDIAN_LITTLE       0
0184 #define __TBB_ENDIAN_BIG          1
0185 #define __TBB_ENDIAN_DETECT       2
0186 
0187 #if _WIN32||_WIN64
0188 
0189 #ifdef _MANAGED
0190 #pragma managed(push, off)
0191 #endif
0192 
0193     #if __MINGW64__ || __MINGW32__
0194         extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
0195         #define __TBB_Yield()  SwitchToThread()
0196         #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
0197             #include "machine/gcc_generic.h"
0198         #elif __MINGW64__
0199             #include "machine/linux_intel64.h"
0200         #elif __MINGW32__
0201             #include "machine/linux_ia32.h"
0202         #endif
0203     #elif (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
0204         #include "machine/icc_generic.h"
0205     #elif defined(_M_IX86) && !defined(__TBB_WIN32_USE_CL_BUILTINS)
0206         #include "machine/windows_ia32.h"
0207     #elif defined(_M_X64)
0208         #include "machine/windows_intel64.h"
0209     #elif defined(_M_ARM) || defined(__TBB_WIN32_USE_CL_BUILTINS)
0210         #include "machine/msvc_armv7.h"
0211     #endif
0212 
0213 #ifdef _MANAGED
0214 #pragma managed(pop)
0215 #endif
0216 
0217 #elif __TBB_DEFINE_MIC
0218 
0219     #include "machine/mic_common.h"
0220     #if (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
0221         #include "machine/icc_generic.h"
0222     #else
0223         #include "machine/linux_intel64.h"
0224     #endif
0225 
0226 #elif __linux__ || __FreeBSD__ || __NetBSD__ || __OpenBSD__
0227 
0228     #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
0229         #include "machine/gcc_generic.h"
0230     #elif (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
0231         #include "machine/icc_generic.h"
0232     #elif __i386__
0233         #include "machine/linux_ia32.h"
0234     #elif __x86_64__
0235         #include "machine/linux_intel64.h"
0236     #elif __ia64__
0237         #include "machine/linux_ia64.h"
0238     #elif __powerpc__
0239         #include "machine/mac_ppc.h"
0240     #elif __ARM_ARCH_7A__ || __aarch64__
0241         #include "machine/gcc_arm.h"
0242     #elif __TBB_GCC_BUILTIN_ATOMICS_PRESENT
0243         #include "machine/gcc_generic.h"
0244     #endif
0245     #include "machine/linux_common.h"
0246 
0247 #elif __APPLE__
0248     //TODO:  TBB_USE_GCC_BUILTINS is not used for Mac, Sun, Aix
0249     #if (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
0250         #include "machine/icc_generic.h"
0251     #elif __TBB_x86_32
0252         #include "machine/linux_ia32.h"
0253     #elif __TBB_x86_64
0254         #include "machine/linux_intel64.h"
0255     #elif __POWERPC__
0256         #include "machine/mac_ppc.h"
0257     #endif
0258     #include "machine/macos_common.h"
0259 
0260 #elif _AIX
0261 
0262     #include "machine/ibm_aix51.h"
0263 
0264 #elif __sun || __SUNPRO_CC
0265 
0266     #define __asm__ asm
0267     #define __volatile__ volatile
0268 
0269     #if __i386  || __i386__
0270         #include "machine/linux_ia32.h"
0271     #elif __x86_64__
0272         #include "machine/linux_intel64.h"
0273     #elif __sparc
0274         #include "machine/sunos_sparc.h"
0275     #endif
0276     #include <sched.h>
0277 
0278     #define __TBB_Yield() sched_yield()
0279 
0280 #endif /* OS selection */
0281 
0282 #ifndef __TBB_64BIT_ATOMICS
0283     #define __TBB_64BIT_ATOMICS 1
0284 #endif
0285 
0286 //TODO: replace usage of these functions with usage of tbb::atomic, and then remove them
0287 //TODO: map functions with W suffix to use cast to tbb::atomic and according op, i.e. as_atomic().op()
0288 // Special atomic functions
0289 #if __TBB_USE_FENCED_ATOMICS
0290     #define __TBB_machine_cmpswp1   __TBB_machine_cmpswp1full_fence
0291     #define __TBB_machine_cmpswp2   __TBB_machine_cmpswp2full_fence
0292     #define __TBB_machine_cmpswp4   __TBB_machine_cmpswp4full_fence
0293     #define __TBB_machine_cmpswp8   __TBB_machine_cmpswp8full_fence
0294 
0295     #if __TBB_WORDSIZE==8
0296         #define __TBB_machine_fetchadd8             __TBB_machine_fetchadd8full_fence
0297         #define __TBB_machine_fetchstore8           __TBB_machine_fetchstore8full_fence
0298         #define __TBB_FetchAndAddWrelease(P,V)      __TBB_machine_fetchadd8release(P,V)
0299         #define __TBB_FetchAndIncrementWacquire(P)  __TBB_machine_fetchadd8acquire(P,1)
0300         #define __TBB_FetchAndDecrementWrelease(P)  __TBB_machine_fetchadd8release(P,(-1))
0301     #else
0302         #define __TBB_machine_fetchadd4             __TBB_machine_fetchadd4full_fence
0303         #define __TBB_machine_fetchstore4           __TBB_machine_fetchstore4full_fence
0304         #define __TBB_FetchAndAddWrelease(P,V)      __TBB_machine_fetchadd4release(P,V)
0305         #define __TBB_FetchAndIncrementWacquire(P)  __TBB_machine_fetchadd4acquire(P,1)
0306         #define __TBB_FetchAndDecrementWrelease(P)  __TBB_machine_fetchadd4release(P,(-1))
0307     #endif /* __TBB_WORDSIZE==4 */
0308 #else /* !__TBB_USE_FENCED_ATOMICS */
0309     #define __TBB_FetchAndAddWrelease(P,V)      __TBB_FetchAndAddW(P,V)
0310     #define __TBB_FetchAndIncrementWacquire(P)  __TBB_FetchAndAddW(P,1)
0311     #define __TBB_FetchAndDecrementWrelease(P)  __TBB_FetchAndAddW(P,(-1))
0312 #endif /* !__TBB_USE_FENCED_ATOMICS */
0313 
0314 #if __TBB_WORDSIZE==4
0315     #define __TBB_CompareAndSwapW(P,V,C)    __TBB_machine_cmpswp4(P,V,C)
0316     #define __TBB_FetchAndAddW(P,V)         __TBB_machine_fetchadd4(P,V)
0317     #define __TBB_FetchAndStoreW(P,V)       __TBB_machine_fetchstore4(P,V)
0318 #elif  __TBB_WORDSIZE==8
0319     #if __TBB_USE_GENERIC_DWORD_LOAD_STORE || __TBB_USE_GENERIC_DWORD_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_STORE
0320         #error These macros should only be used on 32-bit platforms.
0321     #endif
0322 
0323     #define __TBB_CompareAndSwapW(P,V,C)    __TBB_machine_cmpswp8(P,V,C)
0324     #define __TBB_FetchAndAddW(P,V)         __TBB_machine_fetchadd8(P,V)
0325     #define __TBB_FetchAndStoreW(P,V)       __TBB_machine_fetchstore8(P,V)
0326 #else /* __TBB_WORDSIZE != 8 */
0327     #error Unsupported machine word size.
0328 #endif /* __TBB_WORDSIZE */
0329 
0330 #ifndef __TBB_Pause
0331     inline void __TBB_Pause(int32_t) {
0332         __TBB_Yield();
0333     }
0334 #endif
0335 
0336 namespace tbb {
0337 
0338 //! Sequentially consistent full memory fence.
0339 inline void atomic_fence () { __TBB_full_memory_fence(); }
0340 
0341 namespace internal { //< @cond INTERNAL
0342 
0343 //! Class that implements exponential backoff.
0344 /** See implementation of spin_wait_while_eq for an example. */
0345 class atomic_backoff : no_copy {
0346     //! Time delay, in units of "pause" instructions.
0347     /** Should be equal to approximately the number of "pause" instructions
0348         that take the same time as an context switch. Must be a power of two.*/
0349     static const int32_t LOOPS_BEFORE_YIELD = 16;
0350     int32_t count;
0351 public:
0352     // In many cases, an object of this type is initialized eagerly on hot path,
0353     // as in for(atomic_backoff b; ; b.pause()) { /*loop body*/ }
0354     // For this reason, the construction cost must be very small!
0355     atomic_backoff() : count(1) {}
0356     // This constructor pauses immediately; do not use on hot paths!
0357     atomic_backoff( bool ) : count(1) { pause(); }
0358 
0359     //! Pause for a while.
0360     void pause() {
0361         if( count<=LOOPS_BEFORE_YIELD ) {
0362             __TBB_Pause(count);
0363             // Pause twice as long the next time.
0364             count*=2;
0365         } else {
0366             // Pause is so long that we might as well yield CPU to scheduler.
0367             __TBB_Yield();
0368         }
0369     }
0370 
0371     //! Pause for a few times and return false if saturated.
0372     bool bounded_pause() {
0373         __TBB_Pause(count);
0374         if( count<LOOPS_BEFORE_YIELD ) {
0375             // Pause twice as long the next time.
0376             count*=2;
0377             return true;
0378         } else {
0379             return false;
0380         }
0381     }
0382 
0383     void reset() {
0384         count = 1;
0385     }
0386 };
0387 
0388 //! Spin WHILE the value of the variable is equal to a given value
0389 /** T and U should be comparable types. */
0390 template<typename T, typename U>
0391 void spin_wait_while_eq( const volatile T& location, U value ) {
0392     atomic_backoff backoff;
0393     while( location==value ) backoff.pause();
0394 }
0395 
0396 //! Spin UNTIL the value of the variable is equal to a given value
0397 /** T and U should be comparable types. */
0398 template<typename T, typename U>
0399 void spin_wait_until_eq( const volatile T& location, const U value ) {
0400     atomic_backoff backoff;
0401     while( location!=value ) backoff.pause();
0402 }
0403 
0404 template <typename predicate_type>
0405 void spin_wait_while(predicate_type condition){
0406     atomic_backoff backoff;
0407     while( condition() ) backoff.pause();
0408 }
0409 
0410 ////////////////////////////////////////////////////////////////////////////////
0411 // Generic compare-and-swap applied to only a part of a machine word.
0412 //
0413 #ifndef __TBB_ENDIANNESS
0414 #define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
0415 #endif
0416 
0417 #if __TBB_USE_GENERIC_PART_WORD_CAS && __TBB_ENDIANNESS==__TBB_ENDIAN_UNSUPPORTED
0418 #error Generic implementation of part-word CAS may not be used with __TBB_ENDIAN_UNSUPPORTED
0419 #endif
0420 
0421 #if __TBB_ENDIANNESS!=__TBB_ENDIAN_UNSUPPORTED
0422 //
0423 // This function is the only use of __TBB_ENDIANNESS.
0424 // The following restrictions/limitations apply for this operation:
0425 //  - T must be an integer type of at most 4 bytes for the casts and calculations to work
0426 //  - T must also be less than 4 bytes to avoid compiler warnings when computing mask
0427 //      (and for the operation to be useful at all, so no workaround is applied)
0428 //  - the architecture must consistently use either little-endian or big-endian (same for all locations)
0429 //
0430 // TODO: static_assert for the type requirements stated above
0431 template<typename T>
0432 inline T __TBB_MaskedCompareAndSwap (volatile T * const ptr, const T value, const T comparand ) {
0433     struct endianness{ static bool is_big_endian(){
0434         #if __TBB_ENDIANNESS==__TBB_ENDIAN_DETECT
0435             const uint32_t probe = 0x03020100;
0436             return (((const char*)(&probe))[0]==0x03);
0437         #elif __TBB_ENDIANNESS==__TBB_ENDIAN_BIG || __TBB_ENDIANNESS==__TBB_ENDIAN_LITTLE
0438             return __TBB_ENDIANNESS==__TBB_ENDIAN_BIG;
0439         #else
0440             #error Unexpected value of __TBB_ENDIANNESS
0441         #endif
0442     }};
0443 
0444     const uint32_t byte_offset            = (uint32_t) ((uintptr_t)ptr & 0x3);
0445     volatile uint32_t * const aligned_ptr = (uint32_t*)((uintptr_t)ptr - byte_offset );
0446 
0447     // location of T within uint32_t for a C++ shift operation
0448     const uint32_t bits_to_shift     = 8*(endianness::is_big_endian() ? (4 - sizeof(T) - (byte_offset)) : byte_offset);
0449     const uint32_t mask              = (((uint32_t)1<<(sizeof(T)*8)) - 1 )<<bits_to_shift;
0450     // for signed T, any sign extension bits in cast value/comparand are immediately clipped by mask
0451     const uint32_t shifted_comparand = ((uint32_t)comparand << bits_to_shift)&mask;
0452     const uint32_t shifted_value     = ((uint32_t)value     << bits_to_shift)&mask;
0453 
0454     for( atomic_backoff b;;b.pause() ) {
0455         const uint32_t surroundings  = *aligned_ptr & ~mask ; // may have changed during the pause
0456         const uint32_t big_comparand = surroundings | shifted_comparand ;
0457         const uint32_t big_value     = surroundings | shifted_value     ;
0458         // __TBB_machine_cmpswp4 presumed to have full fence.
0459         // Cast shuts up /Wp64 warning
0460         const uint32_t big_result = (uint32_t)__TBB_machine_cmpswp4( aligned_ptr, big_value, big_comparand );
0461         if( big_result == big_comparand                    // CAS succeeded
0462           || ((big_result ^ big_comparand) & mask) != 0)   // CAS failed and the bits of interest have changed
0463         {
0464             return T((big_result & mask) >> bits_to_shift);
0465         }
0466         else continue;                                     // CAS failed but the bits of interest were not changed
0467     }
0468 }
0469 #endif // __TBB_ENDIANNESS!=__TBB_ENDIAN_UNSUPPORTED
0470 ////////////////////////////////////////////////////////////////////////////////
0471 
0472 template<size_t S, typename T>
0473 inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand );
0474 
0475 template<>
0476 inline int8_t __TBB_CompareAndSwapGeneric <1,int8_t> (volatile void *ptr, int8_t value, int8_t comparand ) {
0477 #if __TBB_USE_GENERIC_PART_WORD_CAS
0478     return __TBB_MaskedCompareAndSwap<int8_t>((volatile int8_t *)ptr,value,comparand);
0479 #else
0480     return __TBB_machine_cmpswp1(ptr,value,comparand);
0481 #endif
0482 }
0483 
0484 template<>
0485 inline int16_t __TBB_CompareAndSwapGeneric <2,int16_t> (volatile void *ptr, int16_t value, int16_t comparand ) {
0486 #if __TBB_USE_GENERIC_PART_WORD_CAS
0487     return __TBB_MaskedCompareAndSwap<int16_t>((volatile int16_t *)ptr,value,comparand);
0488 #else
0489     return __TBB_machine_cmpswp2(ptr,value,comparand);
0490 #endif
0491 }
0492 
0493 template<>
0494 inline int32_t __TBB_CompareAndSwapGeneric <4,int32_t> (volatile void *ptr, int32_t value, int32_t comparand ) {
0495     // Cast shuts up /Wp64 warning
0496     return (int32_t)__TBB_machine_cmpswp4(ptr,value,comparand);
0497 }
0498 
0499 #if __TBB_64BIT_ATOMICS
0500 template<>
0501 inline int64_t __TBB_CompareAndSwapGeneric <8,int64_t> (volatile void *ptr, int64_t value, int64_t comparand ) {
0502     return __TBB_machine_cmpswp8(ptr,value,comparand);
0503 }
0504 #endif
0505 
0506 template<size_t S, typename T>
0507 inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
0508     T result;
0509     for( atomic_backoff b;;b.pause() ) {
0510         result = *reinterpret_cast<volatile T *>(ptr);
0511         // __TBB_CompareAndSwapGeneric presumed to have full fence.
0512         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result )
0513             break;
0514     }
0515     return result;
0516 }
0517 
0518 template<size_t S, typename T>
0519 inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) {
0520     T result;
0521     for( atomic_backoff b;;b.pause() ) {
0522         result = *reinterpret_cast<volatile T *>(ptr);
0523         // __TBB_CompareAndSwapGeneric presumed to have full fence.
0524         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result )
0525             break;
0526     }
0527     return result;
0528 }
0529 
0530 #if __TBB_USE_GENERIC_PART_WORD_CAS
0531 #define __TBB_machine_cmpswp1 tbb::internal::__TBB_CompareAndSwapGeneric<1,int8_t>
0532 #define __TBB_machine_cmpswp2 tbb::internal::__TBB_CompareAndSwapGeneric<2,int16_t>
0533 #endif
0534 
0535 #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_PART_WORD_FETCH_ADD
0536 #define __TBB_machine_fetchadd1 tbb::internal::__TBB_FetchAndAddGeneric<1,int8_t>
0537 #define __TBB_machine_fetchadd2 tbb::internal::__TBB_FetchAndAddGeneric<2,int16_t>
0538 #endif
0539 
0540 #if __TBB_USE_GENERIC_FETCH_ADD
0541 #define __TBB_machine_fetchadd4 tbb::internal::__TBB_FetchAndAddGeneric<4,int32_t>
0542 #endif
0543 
0544 #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_ADD
0545 #define __TBB_machine_fetchadd8 tbb::internal::__TBB_FetchAndAddGeneric<8,int64_t>
0546 #endif
0547 
0548 #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_PART_WORD_FETCH_STORE
0549 #define __TBB_machine_fetchstore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,int8_t>
0550 #define __TBB_machine_fetchstore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,int16_t>
0551 #endif
0552 
0553 #if __TBB_USE_GENERIC_FETCH_STORE
0554 #define __TBB_machine_fetchstore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,int32_t>
0555 #endif
0556 
0557 #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_DWORD_FETCH_STORE
0558 #define __TBB_machine_fetchstore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,int64_t>
0559 #endif
0560 
0561 #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
0562 #define __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(S)                                             \
0563     atomic_selector<S>::word atomic_selector<S>::fetch_store ( volatile void* location, word value ) {  \
0564         return __TBB_machine_fetchstore##S( location, value );                                          \
0565     }
0566 
0567 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(1)
0568 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(2)
0569 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(4)
0570 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(8)
0571 
0572 #undef __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE
0573 #endif /* __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
0574 
0575 #if __TBB_USE_GENERIC_DWORD_LOAD_STORE
0576 /*TODO: find a more elegant way to handle function names difference*/
0577 #if ! __TBB_USE_FENCED_ATOMICS
0578     /* This name forwarding is needed for generic implementation of
0579      * load8/store8 defined below (via macro) to pick the right CAS function*/
0580     #define   __TBB_machine_cmpswp8full_fence __TBB_machine_cmpswp8
0581 #endif
0582 __TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(full_fence)
0583 __TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(full_fence)
0584 
0585 #if ! __TBB_USE_FENCED_ATOMICS
0586     #undef   __TBB_machine_cmpswp8full_fence
0587 #endif
0588 
0589 #define __TBB_machine_store8 tbb::internal::__TBB_machine_generic_store8full_fence
0590 #define __TBB_machine_load8  tbb::internal::__TBB_machine_generic_load8full_fence
0591 #endif /* __TBB_USE_GENERIC_DWORD_LOAD_STORE */
0592 
0593 #if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE
0594 /** Fenced operations use volatile qualifier to prevent compiler from optimizing
0595     them out, and on architectures with weak memory ordering to induce compiler
0596     to generate code with appropriate acquire/release semantics.
0597     On architectures like IA32, Intel64 (and likely Sparc TSO) volatile has
0598     no effect on code gen, and consistency helpers serve as a compiler fence (the
0599     latter being true for IA64/gcc as well to fix a bug in some gcc versions).
0600     This code assumes that the generated instructions will operate atomically,
0601     which typically requires a type that can be moved in a single instruction,
0602     cooperation from the compiler for effective use of such an instruction,
0603     and appropriate alignment of the data. **/
0604 template <typename T, size_t S>
0605 struct machine_load_store {
0606     static T load_with_acquire ( const volatile T& location ) {
0607         T to_return = location;
0608         __TBB_acquire_consistency_helper();
0609         return to_return;
0610     }
0611     static void store_with_release ( volatile T &location, T value ) {
0612         __TBB_release_consistency_helper();
0613         location = value;
0614     }
0615 };
0616 
0617 //in general, plain load and store of 32bit compiler is not atomic for 64bit types
0618 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
0619 template <typename T>
0620 struct machine_load_store<T,8> {
0621     static T load_with_acquire ( const volatile T& location ) {
0622         return (T)__TBB_machine_load8( (const volatile void*)&location );
0623     }
0624     static void store_with_release ( volatile T& location, T value ) {
0625         __TBB_machine_store8( (volatile void*)&location, (int64_t)value );
0626     }
0627 };
0628 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
0629 #endif /* __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE */
0630 
0631 #if __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE
0632 template <typename T, size_t S>
0633 struct machine_load_store_seq_cst {
0634     static T load ( const volatile T& location ) {
0635         __TBB_full_memory_fence();
0636         return machine_load_store<T,S>::load_with_acquire( location );
0637     }
0638 #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
0639     static void store ( volatile T &location, T value ) {
0640         atomic_selector<S>::fetch_store( (volatile void*)&location, (typename atomic_selector<S>::word)value );
0641     }
0642 #else /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
0643     static void store ( volatile T &location, T value ) {
0644         machine_load_store<T,S>::store_with_release( location, value );
0645         __TBB_full_memory_fence();
0646     }
0647 #endif /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
0648 };
0649 
0650 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
0651 /** The implementation does not use functions __TBB_machine_load8/store8 as they
0652     are not required to be sequentially consistent. **/
0653 template <typename T>
0654 struct machine_load_store_seq_cst<T,8> {
0655     static T load ( const volatile T& location ) {
0656         // Comparand and new value may be anything, they only must be equal, and
0657         // the value should have a low probability to be actually found in 'location'.
0658         const int64_t anyvalue = 2305843009213693951LL;
0659         return __TBB_machine_cmpswp8( (volatile void*)const_cast<volatile T*>(&location), anyvalue, anyvalue );
0660     }
0661     static void store ( volatile T &location, T value ) {
0662 #if __TBB_GCC_VERSION >= 40702
0663 #pragma GCC diagnostic push
0664 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
0665 #endif
0666         // An atomic initialization leads to reading of uninitialized memory
0667         int64_t result = (volatile int64_t&)location;
0668 #if __TBB_GCC_VERSION >= 40702
0669 #pragma GCC diagnostic pop
0670 #endif
0671         while ( __TBB_machine_cmpswp8((volatile void*)&location, (int64_t)value, result) != result )
0672             result = (volatile int64_t&)location;
0673     }
0674 };
0675 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
0676 #endif /*__TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE */
0677 
0678 #if __TBB_USE_GENERIC_RELAXED_LOAD_STORE
0679 // Relaxed operations add volatile qualifier to prevent compiler from optimizing them out.
0680 /** Volatile should not incur any additional cost on IA32, Intel64, and Sparc TSO
0681     architectures. However on architectures with weak memory ordering compiler may
0682     generate code with acquire/release semantics for operations on volatile data. **/
0683 template <typename T, size_t S>
0684 struct machine_load_store_relaxed {
0685     static inline T load ( const volatile T& location ) {
0686         return location;
0687     }
0688     static inline void store ( volatile T& location, T value ) {
0689         location = value;
0690     }
0691 };
0692 
0693 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
0694 template <typename T>
0695 struct machine_load_store_relaxed<T,8> {
0696     static inline T load ( const volatile T& location ) {
0697         return (T)__TBB_machine_load8( (const volatile void*)&location );
0698     }
0699     static inline void store ( volatile T& location, T value ) {
0700         __TBB_machine_store8( (volatile void*)&location, (int64_t)value );
0701     }
0702 };
0703 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
0704 #endif /* __TBB_USE_GENERIC_RELAXED_LOAD_STORE */
0705 
0706 #undef __TBB_WORDSIZE //this macro is forbidden to use outside of atomic machinery
0707 
0708 template<typename T>
0709 inline T __TBB_load_with_acquire(const volatile T &location) {
0710     return machine_load_store<T,sizeof(T)>::load_with_acquire( location );
0711 }
0712 template<typename T, typename V>
0713 inline void __TBB_store_with_release(volatile T& location, V value) {
0714     machine_load_store<T,sizeof(T)>::store_with_release( location, T(value) );
0715 }
0716 //! Overload that exists solely to avoid /Wp64 warnings.
0717 inline void __TBB_store_with_release(volatile size_t& location, size_t value) {
0718     machine_load_store<size_t,sizeof(size_t)>::store_with_release( location, value );
0719 }
0720 
0721 template<typename T>
0722 inline T __TBB_load_full_fence(const volatile T &location) {
0723     return machine_load_store_seq_cst<T,sizeof(T)>::load( location );
0724 }
0725 template<typename T, typename V>
0726 inline void __TBB_store_full_fence(volatile T& location, V value) {
0727     machine_load_store_seq_cst<T,sizeof(T)>::store( location, T(value) );
0728 }
0729 //! Overload that exists solely to avoid /Wp64 warnings.
0730 inline void __TBB_store_full_fence(volatile size_t& location, size_t value) {
0731     machine_load_store_seq_cst<size_t,sizeof(size_t)>::store( location, value );
0732 }
0733 
0734 template<typename T>
0735 inline T __TBB_load_relaxed (const volatile T& location) {
0736     return machine_load_store_relaxed<T,sizeof(T)>::load( const_cast<T&>(location) );
0737 }
0738 template<typename T, typename V>
0739 inline void __TBB_store_relaxed ( volatile T& location, V value ) {
0740     machine_load_store_relaxed<T,sizeof(T)>::store( const_cast<T&>(location), T(value) );
0741 }
0742 //! Overload that exists solely to avoid /Wp64 warnings.
0743 inline void __TBB_store_relaxed ( volatile size_t& location, size_t value ) {
0744     machine_load_store_relaxed<size_t,sizeof(size_t)>::store( const_cast<size_t&>(location), value );
0745 }
0746 
0747 // Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with alignment at least as
0748 // strict as type T.  The type should have a trivial default constructor and destructor, so that
0749 // arrays of that type can be declared without initializers.
0750 // It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentAtLeastAsStrict(T) expands
0751 // to a type bigger than T.
0752 // The default definition here works on machines where integers are naturally aligned and the
0753 // strictest alignment is 64.
0754 #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
0755 
0756 #if __TBB_ALIGNAS_PRESENT
0757 
0758 // Use C++11 keywords alignas and alignof
0759 #define __TBB_DefineTypeWithAlignment(PowerOf2)       \
0760 struct alignas(PowerOf2) __TBB_machine_type_with_alignment_##PowerOf2 { \
0761     uint32_t member[PowerOf2/sizeof(uint32_t)];       \
0762 };
0763 #define __TBB_alignof(T) alignof(T)
0764 
0765 #elif __TBB_ATTRIBUTE_ALIGNED_PRESENT
0766 
0767 #define __TBB_DefineTypeWithAlignment(PowerOf2)       \
0768 struct __TBB_machine_type_with_alignment_##PowerOf2 { \
0769     uint32_t member[PowerOf2/sizeof(uint32_t)];       \
0770 } __attribute__((aligned(PowerOf2)));
0771 #define __TBB_alignof(T) __alignof__(T)
0772 
0773 #elif __TBB_DECLSPEC_ALIGN_PRESENT
0774 
0775 #define __TBB_DefineTypeWithAlignment(PowerOf2)       \
0776 __declspec(align(PowerOf2))                           \
0777 struct __TBB_machine_type_with_alignment_##PowerOf2 { \
0778     uint32_t member[PowerOf2/sizeof(uint32_t)];       \
0779 };
0780 #define __TBB_alignof(T) __alignof(T)
0781 
0782 #else /* A compiler with unknown syntax for data alignment */
0783 #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T)
0784 #endif
0785 
0786 /* Now declare types aligned to useful powers of two */
0787 __TBB_DefineTypeWithAlignment(8) // i386 ABI says that uint64_t is aligned on 4 bytes  
0788 __TBB_DefineTypeWithAlignment(16)
0789 __TBB_DefineTypeWithAlignment(32)
0790 __TBB_DefineTypeWithAlignment(64)
0791 
0792 typedef __TBB_machine_type_with_alignment_64 __TBB_machine_type_with_strictest_alignment;
0793 
0794 // Primary template is a declaration of incomplete type so that it fails with unknown alignments
0795 template<size_t N> struct type_with_alignment;
0796 
0797 // Specializations for allowed alignments
0798 template<> struct type_with_alignment<1> { char member; };
0799 template<> struct type_with_alignment<2> { uint16_t member; };
0800 template<> struct type_with_alignment<4> { uint32_t member; };
0801 template<> struct type_with_alignment<8> { __TBB_machine_type_with_alignment_8 member; };
0802 template<> struct type_with_alignment<16> {__TBB_machine_type_with_alignment_16 member; };
0803 template<> struct type_with_alignment<32> {__TBB_machine_type_with_alignment_32 member; };
0804 template<> struct type_with_alignment<64> {__TBB_machine_type_with_alignment_64 member; };
0805 
0806 #if __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN
0807 //! Work around for bug in GNU 3.2 and MSVC compilers.
0808 /** Bug is that compiler sometimes returns 0 for __alignof(T) when T has not yet been instantiated.
0809     The work-around forces instantiation by forcing computation of sizeof(T) before __alignof(T). */
0810 template<size_t Size, typename T>
0811 struct work_around_alignment_bug {
0812     static const size_t alignment = __TBB_alignof(T);
0813 };
0814 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
0815 #else
0816 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__TBB_alignof(T)>
0817 #endif  /* __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN */
0818 
0819 #endif  /* __TBB_TypeWithAlignmentAtLeastAsStrict */
0820 
0821 // Template class here is to avoid instantiation of the static data for modules that don't use it
0822 template<typename T>
0823 struct reverse {
0824     static const T byte_table[256];
0825 };
0826 // An efficient implementation of the reverse function utilizes a 2^8 lookup table holding the bit-reversed
0827 // values of [0..2^8 - 1]. Those values can also be computed on the fly at a slightly higher cost.
0828 template<typename T>
0829 const T reverse<T>::byte_table[256] = {
0830     0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
0831     0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
0832     0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
0833     0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
0834     0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
0835     0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
0836     0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
0837     0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
0838     0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
0839     0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
0840     0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
0841     0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
0842     0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
0843     0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
0844     0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
0845     0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
0846 };
0847 
0848 } // namespace internal @endcond
0849 } // namespace tbb
0850 
0851 // Preserving access to legacy APIs
0852 using tbb::internal::__TBB_load_with_acquire;
0853 using tbb::internal::__TBB_store_with_release;
0854 
0855 // Mapping historically used names to the ones expected by atomic_load_store_traits
0856 #define __TBB_load_acquire  __TBB_load_with_acquire
0857 #define __TBB_store_release __TBB_store_with_release
0858 
0859 #ifndef __TBB_Log2
0860 inline intptr_t __TBB_Log2( uintptr_t x ) {
0861     if( x==0 ) return -1;
0862     intptr_t result = 0;
0863 
0864 #if !defined(_M_ARM)
0865     uintptr_t tmp_;
0866     if( sizeof(x)>4 && (tmp_ = ((uint64_t)x)>>32) ) { x=tmp_; result += 32; }
0867 #endif
0868     if( uintptr_t tmp = x>>16 ) { x=tmp; result += 16; }
0869     if( uintptr_t tmp = x>>8 )  { x=tmp; result += 8; }
0870     if( uintptr_t tmp = x>>4 )  { x=tmp; result += 4; }
0871     if( uintptr_t tmp = x>>2 )  { x=tmp; result += 2; }
0872 
0873     return (x&2)? result+1: result;
0874 }
0875 #endif
0876 
0877 #ifndef __TBB_AtomicOR
0878 inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
0879     for( tbb::internal::atomic_backoff b;;b.pause() ) {
0880         uintptr_t tmp = *(volatile uintptr_t *)operand;
0881         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
0882         if( result==tmp ) break;
0883     }
0884 }
0885 #endif
0886 
0887 #ifndef __TBB_AtomicAND
0888 inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) {
0889     for( tbb::internal::atomic_backoff b;;b.pause() ) {
0890         uintptr_t tmp = *(volatile uintptr_t *)operand;
0891         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
0892         if( result==tmp ) break;
0893     }
0894 }
0895 #endif
0896 
0897 #if __TBB_PREFETCHING
0898 #ifndef __TBB_cl_prefetch
0899 #error This platform does not define cache management primitives required for __TBB_PREFETCHING
0900 #endif
0901 
0902 #ifndef __TBB_cl_evict
0903 #define __TBB_cl_evict(p)
0904 #endif
0905 #endif
0906 
0907 #ifndef __TBB_Flag
0908 typedef unsigned char __TBB_Flag;
0909 #endif
0910 typedef __TBB_atomic __TBB_Flag __TBB_atomic_flag;
0911 
0912 #ifndef __TBB_TryLockByte
0913 inline bool __TBB_TryLockByte( __TBB_atomic_flag &flag ) {
0914     return __TBB_machine_cmpswp1(&flag,1,0)==0;
0915 }
0916 #endif
0917 
0918 #ifndef __TBB_LockByte
0919 inline __TBB_Flag __TBB_LockByte( __TBB_atomic_flag& flag ) {
0920     tbb::internal::atomic_backoff backoff;
0921     while( !__TBB_TryLockByte(flag) ) backoff.pause();
0922     return 0;
0923 }
0924 #endif
0925 
0926 #ifndef  __TBB_UnlockByte
0927 #define __TBB_UnlockByte(addr) __TBB_store_with_release((addr),0)
0928 #endif
0929 
0930 // lock primitives with Intel(R) Transactional Synchronization Extensions (Intel(R) TSX)
0931 #if ( __TBB_x86_32 || __TBB_x86_64 )  /* only on ia32/intel64 */
0932 inline void __TBB_TryLockByteElidedCancel() { __TBB_machine_try_lock_elided_cancel(); }
0933 
0934 inline bool __TBB_TryLockByteElided( __TBB_atomic_flag& flag ) {
0935     bool res = __TBB_machine_try_lock_elided( &flag )!=0;
0936     // to avoid the "lemming" effect, we need to abort the transaction
0937     // if  __TBB_machine_try_lock_elided returns false (i.e., someone else
0938     // has acquired the mutex non-speculatively).
0939     if( !res ) __TBB_TryLockByteElidedCancel();
0940     return res;
0941 }
0942 
0943 inline void __TBB_LockByteElided( __TBB_atomic_flag& flag )
0944 {
0945     for(;;) {
0946         tbb::internal::spin_wait_while_eq( flag, 1 );
0947         if( __TBB_machine_try_lock_elided( &flag ) )
0948             return;
0949         // Another thread acquired the lock "for real".
0950         // To avoid the "lemming" effect, we abort the transaction.
0951         __TBB_TryLockByteElidedCancel();
0952     }
0953 }
0954 
0955 inline void __TBB_UnlockByteElided( __TBB_atomic_flag& flag ) {
0956     __TBB_machine_unlock_elided( &flag );
0957 }
0958 #endif
0959 
0960 #ifndef __TBB_ReverseByte
0961 inline unsigned char __TBB_ReverseByte(unsigned char src) {
0962     return tbb::internal::reverse<unsigned char>::byte_table[src];
0963 }
0964 #endif
0965 
0966 template<typename T>
0967 T __TBB_ReverseBits(T src) {
0968     T dst;
0969     unsigned char *original = (unsigned char *) &src;
0970     unsigned char *reversed = (unsigned char *) &dst;
0971 
0972     for( int i = sizeof(T)-1; i >= 0; i-- )
0973         reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] );
0974 
0975     return dst;
0976 }
0977 
0978 #endif /* __TBB_machine_H */