Back to home page

EIC code displayed by LXR

 
 

    


Warning, file /include/oneapi/tbb/enumerable_thread_specific.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 /*
0002     Copyright (c) 2005-2024 Intel Corporation
0003 
0004     Licensed under the Apache License, Version 2.0 (the "License");
0005     you may not use this file except in compliance with the License.
0006     You may obtain a copy of the License at
0007 
0008         http://www.apache.org/licenses/LICENSE-2.0
0009 
0010     Unless required by applicable law or agreed to in writing, software
0011     distributed under the License is distributed on an "AS IS" BASIS,
0012     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013     See the License for the specific language governing permissions and
0014     limitations under the License.
0015 */
0016 
0017 #ifndef __TBB_enumerable_thread_specific_H
0018 #define __TBB_enumerable_thread_specific_H
0019 
0020 #include "detail/_config.h"
0021 #include "detail/_namespace_injection.h"
0022 #include "detail/_assert.h"
0023 #include "detail/_template_helpers.h"
0024 #include "detail/_aligned_space.h"
0025 
0026 #include "concurrent_vector.h"
0027 #include "tbb_allocator.h"
0028 #include "cache_aligned_allocator.h"
0029 #include "profiling.h"
0030 
0031 #include <atomic>
0032 #include <thread>
0033 #include <cstring> // memcpy
0034 #include <cstddef> // std::ptrdiff_t
0035 
0036 #include "task.h" // for task::suspend_point
0037 
0038 #if _WIN32 || _WIN64
0039 #ifndef NOMINMAX
0040 #define NOMINMAX
0041 #define __TBB_DEFINED_NOMINMAX 1
0042 #endif
0043 #include <windows.h>
0044 #if __TBB_DEFINED_NOMINMAX
0045 #undef NOMINMAX
0046 #undef __TBB_DEFINED_NOMINMAX
0047 #endif
0048 #else
0049 #include <pthread.h>
0050 #endif
0051 
0052 namespace tbb {
0053 namespace detail {
0054 namespace d1 {
0055 
0056 //! enum for selecting between single key and key-per-instance versions
0057 enum ets_key_usage_type {
0058     ets_key_per_instance
0059     , ets_no_key
0060 #if __TBB_RESUMABLE_TASKS
0061     , ets_suspend_aware
0062 #endif
0063 };
0064 
0065 // Forward declaration to use in internal classes
0066 template <typename T, typename Allocator, ets_key_usage_type ETS_key_type>
0067 class enumerable_thread_specific;
0068 
0069 template <std::size_t ThreadIDSize>
0070 struct internal_ets_key_selector {
0071     using key_type = std::thread::id;
0072     static key_type current_key() {
0073         return std::this_thread::get_id();
0074     }
0075 };
0076 
0077 // Intel Compiler on OSX cannot create atomics objects that instantiated from non-fundamental types
0078 #if __INTEL_COMPILER && __APPLE__
0079 template<>
0080 struct internal_ets_key_selector<sizeof(std::size_t)> {
0081     using key_type = std::size_t;
0082     static key_type current_key() {
0083         auto id = std::this_thread::get_id();
0084         return reinterpret_cast<key_type&>(id);
0085     }
0086 };
0087 #endif
0088 
0089 template <ets_key_usage_type ETS_key_type>
0090 struct ets_key_selector : internal_ets_key_selector<sizeof(std::thread::id)> {};
0091 
0092 #if __TBB_RESUMABLE_TASKS
0093 template <>
0094 struct ets_key_selector<ets_suspend_aware> {
0095     using key_type = suspend_point;
0096     static key_type current_key() {
0097         return r1::current_suspend_point();
0098     }
0099 };
0100 #endif
0101 
0102 template<ets_key_usage_type ETS_key_type>
0103 class ets_base : detail::no_copy {
0104 protected:
0105     using key_type = typename ets_key_selector<ETS_key_type>::key_type;
0106 
0107 public:
0108     struct slot;
0109     struct array {
0110         array* next;
0111         std::size_t lg_size;
0112         slot& at( std::size_t k ) {
0113             return (reinterpret_cast<slot*>(reinterpret_cast<void*>(this+1)))[k];
0114         }
0115         std::size_t size() const { return std::size_t(1) << lg_size; }
0116         std::size_t mask() const { return size() - 1; }
0117         std::size_t start( std::size_t h ) const {
0118             return h >> (8 * sizeof(std::size_t) - lg_size);
0119         }
0120     };
0121     struct slot {
0122         std::atomic<key_type> key;
0123         void* ptr;
0124         bool empty() const { return key.load(std::memory_order_relaxed) == key_type(); }
0125         bool match( key_type k ) const { return key.load(std::memory_order_relaxed) == k; }
0126         bool claim( key_type k ) {
0127             // TODO: maybe claim ptr, because key_type is not guaranteed to fit into word size
0128             key_type expected = key_type();
0129             return key.compare_exchange_strong(expected, k);
0130         }
0131     };
0132 
0133 protected:
0134     //! Root of linked list of arrays of decreasing size.
0135     /** nullptr if and only if my_count==0.
0136         Each array in the list is half the size of its predecessor. */
0137     std::atomic<array*> my_root;
0138     std::atomic<std::size_t> my_count;
0139 
0140     virtual void* create_local() = 0;
0141     virtual void* create_array(std::size_t _size) = 0;  // _size in bytes
0142     virtual void free_array(void* ptr, std::size_t _size) = 0; // _size in bytes
0143 
0144     array* allocate( std::size_t lg_size ) {
0145         std::size_t n = std::size_t(1) << lg_size;
0146         array* a = static_cast<array*>(create_array(sizeof(array) + n * sizeof(slot)));
0147         a->lg_size = lg_size;
0148         std::memset( a + 1, 0, n * sizeof(slot) );
0149         return a;
0150     }
0151     void deallocate(array* a) {
0152         std::size_t n = std::size_t(1) << (a->lg_size);
0153         free_array( static_cast<void*>(a), std::size_t(sizeof(array) + n * sizeof(slot)) );
0154     }
0155 
0156     ets_base() : my_root{nullptr}, my_count{0} {}
0157     virtual ~ets_base();  // g++ complains if this is not virtual
0158 
0159     void* table_lookup( bool& exists );
0160     void  table_clear();
0161     // The following functions are not used in concurrent context,
0162     // so we don't need synchronization and ITT annotations there.
0163     template <ets_key_usage_type E2>
0164     void table_elementwise_copy( const ets_base& other,
0165                                  void*(*add_element)(ets_base<E2>&, void*) ) {
0166         __TBB_ASSERT(!my_root.load(std::memory_order_relaxed), nullptr);
0167         __TBB_ASSERT(!my_count.load(std::memory_order_relaxed), nullptr);
0168         if( !other.my_root.load(std::memory_order_relaxed) ) return;
0169         array* root = allocate(other.my_root.load(std::memory_order_relaxed)->lg_size);
0170         my_root.store(root, std::memory_order_relaxed);
0171         root->next = nullptr;
0172         my_count.store(other.my_count.load(std::memory_order_relaxed), std::memory_order_relaxed);
0173         std::size_t mask = root->mask();
0174         for( array* r = other.my_root.load(std::memory_order_relaxed); r; r = r->next ) {
0175             for( std::size_t i = 0; i < r->size(); ++i ) {
0176                 slot& s1 = r->at(i);
0177                 if( !s1.empty() ) {
0178                     for( std::size_t j = root->start(std::hash<key_type>{}(s1.key.load(std::memory_order_relaxed))); ; j = (j+1)&mask ) {
0179                         slot& s2 = root->at(j);
0180                         if( s2.empty() ) {
0181                             s2.ptr = add_element(static_cast<ets_base<E2>&>(*this), s1.ptr);
0182                             s2.key.store(s1.key.load(std::memory_order_relaxed), std::memory_order_relaxed);
0183                             break;
0184                         }
0185                         else if( s2.match(s1.key.load(std::memory_order_relaxed)) )
0186                             break;
0187                     }
0188                 }
0189             }
0190         }
0191     }
0192     void table_swap( ets_base& other ) {
0193        __TBB_ASSERT(this!=&other, "Don't swap an instance with itself");
0194        swap_atomics_relaxed(my_root, other.my_root);
0195        swap_atomics_relaxed(my_count, other.my_count);
0196     }
0197 };
0198 
0199 template<ets_key_usage_type ETS_key_type>
0200 ets_base<ETS_key_type>::~ets_base() {
0201     __TBB_ASSERT(!my_root.load(std::memory_order_relaxed), nullptr);
0202 }
0203 
0204 template<ets_key_usage_type ETS_key_type>
0205 void ets_base<ETS_key_type>::table_clear() {
0206     while ( array* r = my_root.load(std::memory_order_relaxed) ) {
0207         my_root.store(r->next, std::memory_order_relaxed);
0208         deallocate(r);
0209     }
0210     my_count.store(0, std::memory_order_relaxed);
0211 }
0212 
0213 template<ets_key_usage_type ETS_key_type>
0214 void* ets_base<ETS_key_type>::table_lookup( bool& exists ) {
0215     const key_type k = ets_key_selector<ETS_key_type>::current_key();
0216 
0217     __TBB_ASSERT(k != key_type(), nullptr);
0218     void* found;
0219     std::size_t h = std::hash<key_type>{}(k);
0220     for( array* r = my_root.load(std::memory_order_acquire); r; r = r->next ) {
0221         call_itt_notify(acquired,r);
0222         std::size_t mask=r->mask();
0223         for(std::size_t i = r->start(h); ;i=(i+1)&mask) {
0224             slot& s = r->at(i);
0225             if( s.empty() ) break;
0226             if( s.match(k) ) {
0227                 if( r == my_root.load(std::memory_order_acquire) ) {
0228                     // Success at top level
0229                     exists = true;
0230                     return s.ptr;
0231                 } else {
0232                     // Success at some other level.  Need to insert at top level.
0233                     exists = true;
0234                     found = s.ptr;
0235                     goto insert;
0236                 }
0237             }
0238         }
0239     }
0240     // Key does not yet exist.  The density of slots in the table does not exceed 0.5,
0241     // for if this will occur a new table is allocated with double the current table
0242     // size, which is swapped in as the new root table.  So an empty slot is guaranteed.
0243     exists = false;
0244     found = create_local();
0245     {
0246         std::size_t c = ++my_count;
0247         array* r = my_root.load(std::memory_order_acquire);
0248         call_itt_notify(acquired,r);
0249         if( !r || c > r->size()/2 ) {
0250             std::size_t s = r ? r->lg_size : 2;
0251             while( c > std::size_t(1)<<(s-1) ) ++s;
0252             array* a = allocate(s);
0253             for(;;) {
0254                 a->next = r;
0255                 call_itt_notify(releasing,a);
0256                 array* new_r = r;
0257                 if( my_root.compare_exchange_strong(new_r, a) ) break;
0258                 call_itt_notify(acquired, new_r);
0259                 __TBB_ASSERT(new_r != nullptr, nullptr);
0260                 if( new_r->lg_size >= s ) {
0261                     // Another thread inserted an equal or  bigger array, so our array is superfluous.
0262                     deallocate(a);
0263                     break;
0264                 }
0265                 r = new_r;
0266             }
0267         }
0268     }
0269     insert:
0270     // Whether a slot has been found in an older table, or if it has been inserted at this level,
0271     // it has already been accounted for in the total.  Guaranteed to be room for it, and it is
0272     // not present, so search for empty slot and use it.
0273     array* ir = my_root.load(std::memory_order_acquire);
0274     call_itt_notify(acquired, ir);
0275     std::size_t mask = ir->mask();
0276     for(std::size_t i = ir->start(h);; i = (i+1)&mask) {
0277         slot& s = ir->at(i);
0278         if( s.empty() ) {
0279             if( s.claim(k) ) {
0280                 s.ptr = found;
0281                 return found;
0282             }
0283         }
0284     }
0285 }
0286 
0287 //! Specialization that exploits native TLS
0288 template <>
0289 class ets_base<ets_key_per_instance>: public ets_base<ets_no_key> {
0290     using super = ets_base<ets_no_key>;
0291 #if _WIN32||_WIN64
0292 #if __TBB_WIN8UI_SUPPORT
0293     using tls_key_t = DWORD;
0294     void create_key() { my_key = FlsAlloc(nullptr); }
0295     void destroy_key() { FlsFree(my_key); }
0296     void set_tls(void * value) { FlsSetValue(my_key, (LPVOID)value); }
0297     void* get_tls() { return (void *)FlsGetValue(my_key); }
0298 #else
0299     using tls_key_t = DWORD;
0300     void create_key() { my_key = TlsAlloc(); }
0301     void destroy_key() { TlsFree(my_key); }
0302     void set_tls(void * value) { TlsSetValue(my_key, (LPVOID)value); }
0303     void* get_tls() { return (void *)TlsGetValue(my_key); }
0304 #endif
0305 #else
0306     using tls_key_t = pthread_key_t;
0307     void create_key() { pthread_key_create(&my_key, nullptr); }
0308     void destroy_key() { pthread_key_delete(my_key); }
0309     void set_tls( void * value ) const { pthread_setspecific(my_key, value); }
0310     void* get_tls() const { return pthread_getspecific(my_key); }
0311 #endif
0312     tls_key_t my_key;
0313     virtual void* create_local() override = 0;
0314     virtual void* create_array(std::size_t _size) override = 0;  // _size in bytes
0315     virtual void free_array(void* ptr, std::size_t _size) override = 0; // size in bytes
0316 protected:
0317     ets_base() {create_key();}
0318     ~ets_base() {destroy_key();}
0319     void* table_lookup( bool& exists ) {
0320         void* found = get_tls();
0321         if( found ) {
0322             exists=true;
0323         } else {
0324             found = super::table_lookup(exists);
0325             set_tls(found);
0326         }
0327         return found;
0328     }
0329     void table_clear() {
0330         destroy_key();
0331         create_key();
0332         super::table_clear();
0333     }
0334     void table_swap( ets_base& other ) {
0335        using std::swap;
0336        __TBB_ASSERT(this!=&other, "Don't swap an instance with itself");
0337        swap(my_key, other.my_key);
0338        super::table_swap(other);
0339     }
0340 };
0341 
0342 //! Random access iterator for traversing the thread local copies.
0343 template< typename Container, typename Value >
0344 class enumerable_thread_specific_iterator
0345 {
0346     //! current position in the concurrent_vector
0347 
0348     Container *my_container;
0349     typename Container::size_type my_index;
0350     mutable Value *my_value;
0351 
0352     template<typename C, typename T, typename U>
0353     friend bool operator==( const enumerable_thread_specific_iterator<C, T>& i,
0354                      const enumerable_thread_specific_iterator<C, U>& j );
0355 
0356     template<typename C, typename T, typename U>
0357     friend bool operator<( const enumerable_thread_specific_iterator<C,T>& i,
0358                            const enumerable_thread_specific_iterator<C,U>& j );
0359 
0360     template<typename C, typename T, typename U>
0361     friend std::ptrdiff_t operator-( const enumerable_thread_specific_iterator<C,T>& i,
0362                                 const enumerable_thread_specific_iterator<C,U>& j );
0363 
0364     template<typename C, typename U>
0365     friend class enumerable_thread_specific_iterator;
0366 
0367 public:
0368     //! STL support
0369     using difference_type = std::ptrdiff_t;
0370     using value_type = Value;
0371     using pointer = Value*;
0372     using reference = Value&;
0373     using iterator_category = std::random_access_iterator_tag;
0374 
0375     enumerable_thread_specific_iterator( const Container &container, typename Container::size_type index ) :
0376         my_container(&const_cast<Container &>(container)), my_index(index), my_value(nullptr) {}
0377 
0378     //! Default constructor
0379     enumerable_thread_specific_iterator() : my_container(nullptr), my_index(0), my_value(nullptr) {}
0380 
0381     template<typename U>
0382     enumerable_thread_specific_iterator( const enumerable_thread_specific_iterator<Container, U>& other ) :
0383             my_container( other.my_container ), my_index( other.my_index), my_value( const_cast<Value *>(other.my_value) ) {}
0384 
0385     enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset ) const {
0386         return enumerable_thread_specific_iterator(*my_container, my_index + offset);
0387     }
0388 
0389     friend enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset, enumerable_thread_specific_iterator v ) {
0390         return enumerable_thread_specific_iterator(*v.my_container, v.my_index + offset);
0391     }
0392 
0393     enumerable_thread_specific_iterator &operator+=( std::ptrdiff_t offset ) {
0394         my_index += offset;
0395         my_value = nullptr;
0396         return *this;
0397     }
0398 
0399     enumerable_thread_specific_iterator operator-( std::ptrdiff_t offset ) const {
0400         return enumerable_thread_specific_iterator( *my_container, my_index-offset );
0401     }
0402 
0403     enumerable_thread_specific_iterator &operator-=( std::ptrdiff_t offset ) {
0404         my_index -= offset;
0405         my_value = nullptr;
0406         return *this;
0407     }
0408 
0409     Value& operator*() const {
0410         Value* value = my_value;
0411         if( !value ) {
0412             value = my_value = (*my_container)[my_index].value();
0413         }
0414         __TBB_ASSERT( value==(*my_container)[my_index].value(), "corrupt cache" );
0415         return *value;
0416     }
0417 
0418     Value& operator[]( std::ptrdiff_t k ) const {
0419        return *(*my_container)[my_index + k].value();
0420     }
0421 
0422     Value* operator->() const {return &operator*();}
0423 
0424     enumerable_thread_specific_iterator& operator++() {
0425         ++my_index;
0426         my_value = nullptr;
0427         return *this;
0428     }
0429 
0430     enumerable_thread_specific_iterator& operator--() {
0431         --my_index;
0432         my_value = nullptr;
0433         return *this;
0434     }
0435 
0436     //! Post increment
0437     enumerable_thread_specific_iterator operator++(int) {
0438         enumerable_thread_specific_iterator result = *this;
0439         ++my_index;
0440         my_value = nullptr;
0441         return result;
0442     }
0443 
0444     //! Post decrement
0445     enumerable_thread_specific_iterator operator--(int) {
0446         enumerable_thread_specific_iterator result = *this;
0447         --my_index;
0448         my_value = nullptr;
0449         return result;
0450     }
0451 };
0452 
0453 template<typename Container, typename T, typename U>
0454 bool operator==( const enumerable_thread_specific_iterator<Container, T>& i,
0455                  const enumerable_thread_specific_iterator<Container, U>& j ) {
0456     return i.my_index == j.my_index && i.my_container == j.my_container;
0457 }
0458 
0459 template<typename Container, typename T, typename U>
0460 bool operator!=( const enumerable_thread_specific_iterator<Container,T>& i,
0461                  const enumerable_thread_specific_iterator<Container,U>& j ) {
0462     return !(i==j);
0463 }
0464 
0465 template<typename Container, typename T, typename U>
0466 bool operator<( const enumerable_thread_specific_iterator<Container,T>& i,
0467                 const enumerable_thread_specific_iterator<Container,U>& j ) {
0468     return i.my_index<j.my_index;
0469 }
0470 
0471 template<typename Container, typename T, typename U>
0472 bool operator>( const enumerable_thread_specific_iterator<Container,T>& i,
0473                 const enumerable_thread_specific_iterator<Container,U>& j ) {
0474     return j<i;
0475 }
0476 
0477 template<typename Container, typename T, typename U>
0478 bool operator>=( const enumerable_thread_specific_iterator<Container,T>& i,
0479                  const enumerable_thread_specific_iterator<Container,U>& j ) {
0480     return !(i<j);
0481 }
0482 
0483 template<typename Container, typename T, typename U>
0484 bool operator<=( const enumerable_thread_specific_iterator<Container,T>& i,
0485                  const enumerable_thread_specific_iterator<Container,U>& j ) {
0486     return !(j<i);
0487 }
0488 
0489 template<typename Container, typename T, typename U>
0490 std::ptrdiff_t operator-( const enumerable_thread_specific_iterator<Container,T>& i,
0491                      const enumerable_thread_specific_iterator<Container,U>& j ) {
0492     return i.my_index-j.my_index;
0493 }
0494 
0495 template<typename SegmentedContainer, typename Value >
0496 class segmented_iterator
0497 {
0498     template<typename C, typename T, typename U>
0499     friend bool operator==(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j);
0500 
0501     template<typename C, typename T, typename U>
0502     friend bool operator!=(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j);
0503 
0504     template<typename C, typename U>
0505     friend class segmented_iterator;
0506 
0507 public:
0508     segmented_iterator() {my_segcont = nullptr;}
0509 
0510     segmented_iterator( const SegmentedContainer& _segmented_container ) :
0511         my_segcont(const_cast<SegmentedContainer*>(&_segmented_container)),
0512         outer_iter(my_segcont->end()) { }
0513 
0514     ~segmented_iterator() {}
0515 
0516     using InnerContainer = typename SegmentedContainer::value_type;
0517     using inner_iterator = typename InnerContainer::iterator;
0518     using outer_iterator = typename SegmentedContainer::iterator;
0519 
0520     // STL support
0521     // TODO: inherit all types from segmented container?
0522     using difference_type = std::ptrdiff_t;
0523     using value_type = Value;
0524     using size_type = typename SegmentedContainer::size_type;
0525     using pointer = Value*;
0526     using reference = Value&;
0527     using iterator_category = std::input_iterator_tag;
0528 
0529     // Copy Constructor
0530     template<typename U>
0531     segmented_iterator(const segmented_iterator<SegmentedContainer, U>& other) :
0532         my_segcont(other.my_segcont),
0533         outer_iter(other.outer_iter),
0534         // can we assign a default-constructed iterator to inner if we're at the end?
0535         inner_iter(other.inner_iter)
0536     {}
0537 
0538     // assignment
0539     template<typename U>
0540     segmented_iterator& operator=( const segmented_iterator<SegmentedContainer, U>& other) {
0541         my_segcont = other.my_segcont;
0542         outer_iter = other.outer_iter;
0543         if(outer_iter != my_segcont->end()) inner_iter = other.inner_iter;
0544         return *this;
0545     }
0546 
0547     // allow assignment of outer iterator to segmented iterator.  Once it is
0548     // assigned, move forward until a non-empty inner container is found or
0549     // the end of the outer container is reached.
0550     segmented_iterator& operator=(const outer_iterator& new_outer_iter) {
0551         __TBB_ASSERT(my_segcont != nullptr, nullptr);
0552         // check that this iterator points to something inside the segmented container
0553         for(outer_iter = new_outer_iter ;outer_iter!=my_segcont->end(); ++outer_iter) {
0554             if( !outer_iter->empty() ) {
0555                 inner_iter = outer_iter->begin();
0556                 break;
0557             }
0558         }
0559         return *this;
0560     }
0561 
0562     // pre-increment
0563     segmented_iterator& operator++() {
0564         advance_me();
0565         return *this;
0566     }
0567 
0568     // post-increment
0569     segmented_iterator operator++(int) {
0570         segmented_iterator tmp = *this;
0571         operator++();
0572         return tmp;
0573     }
0574 
0575     bool operator==(const outer_iterator& other_outer) const {
0576         __TBB_ASSERT(my_segcont != nullptr, nullptr);
0577         return (outer_iter == other_outer &&
0578                 (outer_iter == my_segcont->end() || inner_iter == outer_iter->begin()));
0579     }
0580 
0581     bool operator!=(const outer_iterator& other_outer) const {
0582         return !operator==(other_outer);
0583 
0584     }
0585 
0586     // (i)* RHS
0587     reference operator*() const {
0588         __TBB_ASSERT(my_segcont != nullptr, nullptr);
0589         __TBB_ASSERT(outer_iter != my_segcont->end(), "Dereferencing a pointer at end of container");
0590         __TBB_ASSERT(inner_iter != outer_iter->end(), nullptr); // should never happen
0591         return *inner_iter;
0592     }
0593 
0594     // i->
0595     pointer operator->() const { return &operator*();}
0596 
0597 private:
0598     SegmentedContainer* my_segcont;
0599     outer_iterator outer_iter;
0600     inner_iterator inner_iter;
0601 
0602     void advance_me() {
0603         __TBB_ASSERT(my_segcont != nullptr, nullptr);
0604         __TBB_ASSERT(outer_iter != my_segcont->end(), nullptr); // not true if there are no inner containers
0605         __TBB_ASSERT(inner_iter != outer_iter->end(), nullptr); // not true if the inner containers are all empty.
0606         ++inner_iter;
0607         while(inner_iter == outer_iter->end() && ++outer_iter != my_segcont->end()) {
0608             inner_iter = outer_iter->begin();
0609         }
0610     }
0611 };    // segmented_iterator
0612 
0613 template<typename SegmentedContainer, typename T, typename U>
0614 bool operator==( const segmented_iterator<SegmentedContainer,T>& i,
0615                  const segmented_iterator<SegmentedContainer,U>& j ) {
0616     if(i.my_segcont != j.my_segcont) return false;
0617     if(i.my_segcont == nullptr) return true;
0618     if(i.outer_iter != j.outer_iter) return false;
0619     if(i.outer_iter == i.my_segcont->end()) return true;
0620     return i.inner_iter == j.inner_iter;
0621 }
0622 
0623 // !=
0624 template<typename SegmentedContainer, typename T, typename U>
0625 bool operator!=( const segmented_iterator<SegmentedContainer,T>& i,
0626                  const segmented_iterator<SegmentedContainer,U>& j ) {
0627     return !(i==j);
0628 }
0629 
0630 template<typename T>
0631 struct construct_by_default: no_assign {
0632     void construct(void*where) {new(where) T();} // C++ note: the () in T() ensure zero initialization.
0633     construct_by_default( int ) {}
0634 };
0635 
0636 template<typename T>
0637 struct construct_by_exemplar: no_assign {
0638     const T exemplar;
0639     void construct(void*where) {new(where) T(exemplar);}
0640     construct_by_exemplar( const T& t ) : exemplar(t) {}
0641     construct_by_exemplar( T&& t ) : exemplar(std::move(t)) {}
0642 };
0643 
0644 template<typename T, typename Finit>
0645 struct construct_by_finit: no_assign {
0646     Finit f;
0647     void construct(void* where) {new(where) T(f());}
0648     construct_by_finit( Finit&& f_ ) : f(std::move(f_)) {}
0649 };
0650 
0651 template<typename T, typename... P>
0652 struct construct_by_args: no_assign {
0653     stored_pack<P...> pack;
0654     void construct(void* where) {
0655         call( [where](const typename std::decay<P>::type&... args ){
0656            new(where) T(args...);
0657         }, pack );
0658     }
0659     construct_by_args( P&& ... args ) : pack(std::forward<P>(args)...) {}
0660 };
0661 
0662 // storage for initialization function pointer
0663 // TODO: consider removing the template parameter T here and in callback_leaf
0664 class callback_base {
0665 public:
0666     // Clone *this
0667     virtual callback_base* clone() const = 0;
0668     // Destruct and free *this
0669     virtual void destroy() = 0;
0670     // Need virtual destructor to satisfy GCC compiler warning
0671     virtual ~callback_base() { }
0672     // Construct T at where
0673     virtual void construct(void* where) = 0;
0674 };
0675 
0676 template <typename Constructor>
0677 class callback_leaf: public callback_base, Constructor {
0678     template<typename... P> callback_leaf( P&& ... params ) : Constructor(std::forward<P>(params)...) {}
0679     // TODO: make the construction/destruction consistent (use allocator.construct/destroy)
0680     using my_allocator_type = typename tbb::tbb_allocator<callback_leaf>;
0681 
0682     callback_base* clone() const override {
0683         return make(*this);
0684     }
0685 
0686     void destroy() override {
0687         my_allocator_type alloc;
0688         tbb::detail::allocator_traits<my_allocator_type>::destroy(alloc, this);
0689         tbb::detail::allocator_traits<my_allocator_type>::deallocate(alloc, this, 1);
0690     }
0691 
0692     void construct(void* where) override {
0693         Constructor::construct(where);
0694     }
0695 
0696 public:
0697     template<typename... P>
0698     static callback_base* make( P&& ... params ) {
0699         void* where = my_allocator_type().allocate(1);
0700         return new(where) callback_leaf( std::forward<P>(params)... );
0701     }
0702 };
0703 
0704 //! Template for recording construction of objects in table
0705 /** All maintenance of the space will be done explicitly on push_back,
0706     and all thread local copies must be destroyed before the concurrent
0707     vector is deleted.
0708 
0709     The flag is_built is initialized to false.  When the local is
0710     successfully-constructed, set the flag to true or call value_committed().
0711     If the constructor throws, the flag will be false.
0712 */
0713 template<typename U>
0714 struct ets_element {
0715     detail::aligned_space<U> my_space;
0716     bool is_built;
0717     ets_element() { is_built = false; }  // not currently-built
0718     U* value() { return my_space.begin(); }
0719     U* value_committed() { is_built = true; return my_space.begin(); }
0720     ~ets_element() {
0721         if(is_built) {
0722             my_space.begin()->~U();
0723             is_built = false;
0724         }
0725     }
0726 };
0727 
0728 // A predicate that can be used for a compile-time compatibility check of ETS instances
0729 // Ideally, it should have been declared inside the ETS class, but unfortunately
0730 // in that case VS2013 does not enable the variadic constructor.
0731 template<typename T, typename ETS> struct is_compatible_ets : std::false_type {};
0732 template<typename T, typename U, typename A, ets_key_usage_type C>
0733 struct is_compatible_ets< T, enumerable_thread_specific<U,A,C> > : std::is_same<T, U> {};
0734 
0735 // A predicate that checks whether, for a variable 'foo' of type T, foo() is a valid expression
0736 template <typename T> using has_empty_braces_operator = decltype(std::declval<T>()());
0737 template <typename T> using is_callable_no_args = supports<T, has_empty_braces_operator>;
0738 
0739 //! The enumerable_thread_specific container
0740 /** enumerable_thread_specific has the following properties:
0741     - thread-local copies are lazily created, with default, exemplar or function initialization.
0742     - thread-local copies do not move (during lifetime, and excepting clear()) so the address of a copy is invariant.
0743     - the contained objects need not have operator=() defined if combine is not used.
0744     - enumerable_thread_specific containers may be copy-constructed or assigned.
0745     - thread-local copies can be managed by hash-table, or can be accessed via TLS storage for speed.
0746     - outside of parallel contexts, the contents of all thread-local copies are accessible by iterator or using combine or combine_each methods
0747 
0748 @par Segmented iterator
0749     When the thread-local objects are containers with input_iterators defined, a segmented iterator may
0750     be used to iterate over all the elements of all thread-local copies.
0751 
0752 @par combine and combine_each
0753     - Both methods are defined for enumerable_thread_specific.
0754     - combine() requires the type T have operator=() defined.
0755     - neither method modifies the contents of the object (though there is no guarantee that the applied methods do not modify the object.)
0756     - Both are evaluated in serial context (the methods are assumed to be non-benign.)
0757 
0758 @ingroup containers */
0759 template <typename T, typename Allocator=cache_aligned_allocator<T>,
0760           ets_key_usage_type ETS_key_type=ets_no_key >
0761 class enumerable_thread_specific: ets_base<ETS_key_type> {
0762 
0763     template<typename U, typename A, ets_key_usage_type C> friend class enumerable_thread_specific;
0764 
0765     using padded_element = padded<ets_element<T>>;
0766 
0767     //! A generic range, used to create range objects from the iterators
0768     template<typename I>
0769     class generic_range_type: public blocked_range<I> {
0770     public:
0771         using value_type = T;
0772         using reference = T&;
0773         using const_reference = const T&;
0774         using iterator = I;
0775         using difference_type = std::ptrdiff_t;
0776 
0777         generic_range_type( I begin_, I end_, std::size_t grainsize_ = 1) : blocked_range<I>(begin_,end_,grainsize_) {}
0778         template<typename U>
0779         generic_range_type( const generic_range_type<U>& r) : blocked_range<I>(r.begin(),r.end(),r.grainsize()) {}
0780         generic_range_type( generic_range_type& r, split ) : blocked_range<I>(r,split()) {}
0781     };
0782 
0783     using allocator_traits_type = tbb::detail::allocator_traits<Allocator>;
0784 
0785     using padded_allocator_type = typename allocator_traits_type::template rebind_alloc<padded_element>;
0786     using internal_collection_type = tbb::concurrent_vector< padded_element, padded_allocator_type >;
0787 
0788     callback_base *my_construct_callback;
0789 
0790     internal_collection_type my_locals;
0791 
0792     // TODO: consider unifying the callback mechanism for all create_local* methods below
0793     //   (likely non-compatible and requires interface version increase)
0794     void* create_local() override {
0795         padded_element& lref = *my_locals.grow_by(1);
0796         my_construct_callback->construct(lref.value());
0797         return lref.value_committed();
0798     }
0799 
0800     static void* create_local_by_copy( ets_base<ETS_key_type>& base, void* p ) {
0801         enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base);
0802         padded_element& lref = *ets.my_locals.grow_by(1);
0803         new(lref.value()) T(*static_cast<T*>(p));
0804         return lref.value_committed();
0805     }
0806 
0807     static void* create_local_by_move( ets_base<ETS_key_type>& base, void* p ) {
0808         enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base);
0809         padded_element& lref = *ets.my_locals.grow_by(1);
0810         new(lref.value()) T(std::move(*static_cast<T*>(p)));
0811         return lref.value_committed();
0812     }
0813 
0814     using array_allocator_type = typename allocator_traits_type::template rebind_alloc<uintptr_t>;
0815 
0816     // _size is in bytes
0817     void* create_array(std::size_t _size) override {
0818         std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t);
0819         return array_allocator_type().allocate(nelements);
0820     }
0821 
0822     void free_array( void* _ptr, std::size_t _size) override {
0823         std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t);
0824         array_allocator_type().deallocate( reinterpret_cast<uintptr_t *>(_ptr),nelements);
0825     }
0826 
0827 public:
0828 
0829     //! Basic types
0830     using value_type = T;
0831     using allocator_type = Allocator;
0832     using size_type = typename internal_collection_type::size_type;
0833     using difference_type = typename internal_collection_type::difference_type;
0834     using reference = value_type&;
0835     using const_reference = const value_type&;
0836 
0837     using pointer = typename allocator_traits_type::pointer;
0838     using const_pointer = typename allocator_traits_type::const_pointer;
0839 
0840     // Iterator types
0841     using iterator = enumerable_thread_specific_iterator<internal_collection_type, value_type>;
0842     using const_iterator = enumerable_thread_specific_iterator<internal_collection_type, const value_type>;
0843 
0844     // Parallel range types
0845     using range_type = generic_range_type<iterator>;
0846     using const_range_type = generic_range_type<const_iterator>;
0847 
0848     //! Default constructor.  Each local instance of T is default constructed.
0849     enumerable_thread_specific() : my_construct_callback(
0850         callback_leaf<construct_by_default<T> >::make(/*dummy argument*/0)
0851     ){}
0852 
0853     //! Constructor with initializer functor. Each local instance of T is constructed by T(finit()).
0854     template <typename Finit , typename = typename std::enable_if<is_callable_no_args<typename std::decay<Finit>::type>::value>::type>
0855     explicit enumerable_thread_specific( Finit finit ) : my_construct_callback(
0856         callback_leaf<construct_by_finit<T,Finit> >::make( std::move(finit) )
0857     ){}
0858 
0859     //! Constructor with exemplar. Each local instance of T is copy-constructed from the exemplar.
0860     explicit enumerable_thread_specific( const T& exemplar ) : my_construct_callback(
0861         callback_leaf<construct_by_exemplar<T> >::make( exemplar )
0862     ){}
0863 
0864     explicit enumerable_thread_specific( T&& exemplar ) : my_construct_callback(
0865         callback_leaf<construct_by_exemplar<T> >::make( std::move(exemplar) )
0866     ){}
0867 
0868     //! Variadic constructor with initializer arguments.  Each local instance of T is constructed by T(args...)
0869     template <typename P1, typename... P,
0870               typename = typename std::enable_if<!is_callable_no_args<typename std::decay<P1>::type>::value
0871                                                       && !is_compatible_ets<T, typename std::decay<P1>::type>::value
0872                                                       && !std::is_same<T, typename std::decay<P1>::type>::value
0873                                                      >::type>
0874     enumerable_thread_specific( P1&& arg1, P&& ... args ) : my_construct_callback(
0875         callback_leaf<construct_by_args<T,P1,P...> >::make( std::forward<P1>(arg1), std::forward<P>(args)... )
0876     ){}
0877 
0878     //! Destructor
0879     ~enumerable_thread_specific() {
0880         if(my_construct_callback) my_construct_callback->destroy();
0881         // Deallocate the hash table before overridden free_array() becomes inaccessible
0882         this->ets_base<ETS_key_type>::table_clear();
0883     }
0884 
0885     //! returns reference to local, discarding exists
0886     reference local() {
0887         bool exists;
0888         return local(exists);
0889     }
0890 
0891     //! Returns reference to calling thread's local copy, creating one if necessary
0892     reference local(bool& exists)  {
0893         void* ptr = this->table_lookup(exists);
0894         return *(T*)ptr;
0895     }
0896 
0897     //! Get the number of local copies
0898     size_type size() const { return my_locals.size(); }
0899 
0900     //! true if there have been no local copies created
0901     bool empty() const { return my_locals.empty(); }
0902 
0903     //! begin iterator
0904     iterator begin() { return iterator( my_locals, 0 ); }
0905     //! end iterator
0906     iterator end() { return iterator(my_locals, my_locals.size() ); }
0907 
0908     //! begin const iterator
0909     const_iterator begin() const { return const_iterator(my_locals, 0); }
0910 
0911     //! end const iterator
0912     const_iterator end() const { return const_iterator(my_locals, my_locals.size()); }
0913 
0914     //! Get range for parallel algorithms
0915     range_type range( std::size_t grainsize=1 ) { return range_type( begin(), end(), grainsize ); }
0916 
0917     //! Get const range for parallel algorithms
0918     const_range_type range( std::size_t grainsize=1 ) const { return const_range_type( begin(), end(), grainsize ); }
0919 
0920     //! Destroys local copies
0921     void clear() {
0922         my_locals.clear();
0923         this->table_clear();
0924         // callback is not destroyed
0925     }
0926 
0927 private:
0928     template<typename A2, ets_key_usage_type C2>
0929     void internal_copy(const enumerable_thread_specific<T, A2, C2>& other) {
0930         // this tests is_compatible_ets
0931         static_assert( (is_compatible_ets<T, typename std::decay<decltype(other)>::type>::value), "is_compatible_ets fails" );
0932         // Initialize my_construct_callback first, so that it is valid even if rest of this routine throws an exception.
0933         my_construct_callback = other.my_construct_callback->clone();
0934         __TBB_ASSERT(my_locals.size()==0, nullptr);
0935         my_locals.reserve(other.size());
0936         this->table_elementwise_copy( other, create_local_by_copy );
0937     }
0938 
0939     void internal_swap(enumerable_thread_specific& other) {
0940         using std::swap;
0941         __TBB_ASSERT( this!=&other, nullptr);
0942         swap(my_construct_callback, other.my_construct_callback);
0943         // concurrent_vector::swap() preserves storage space,
0944         // so addresses to the vector kept in ETS hash table remain valid.
0945         swap(my_locals, other.my_locals);
0946         this->ets_base<ETS_key_type>::table_swap(other);
0947     }
0948 
0949     template<typename A2, ets_key_usage_type C2>
0950     void internal_move(enumerable_thread_specific<T, A2, C2>&& other) {
0951         static_assert( (is_compatible_ets<T, typename std::decay<decltype(other)>::type>::value), "is_compatible_ets fails" );
0952         my_construct_callback = other.my_construct_callback;
0953         other.my_construct_callback = nullptr;
0954         __TBB_ASSERT(my_locals.size()==0, nullptr);
0955         my_locals.reserve(other.size());
0956         this->table_elementwise_copy( other, create_local_by_move );
0957     }
0958 
0959 public:
0960     enumerable_thread_specific( const enumerable_thread_specific& other )
0961     : ets_base<ETS_key_type>() /* prevents GCC warnings with -Wextra */
0962     {
0963         internal_copy(other);
0964     }
0965 
0966     template<typename Alloc, ets_key_usage_type Cachetype>
0967     enumerable_thread_specific( const enumerable_thread_specific<T, Alloc, Cachetype>& other )
0968     {
0969         internal_copy(other);
0970     }
0971 
0972     enumerable_thread_specific( enumerable_thread_specific&& other ) : my_construct_callback()
0973     {
0974         // TODO: use internal_move correctly here
0975         internal_swap(other);
0976     }
0977 
0978     template<typename Alloc, ets_key_usage_type Cachetype>
0979     enumerable_thread_specific( enumerable_thread_specific<T, Alloc, Cachetype>&& other ) : my_construct_callback()
0980     {
0981         internal_move(std::move(other));
0982     }
0983 
0984     enumerable_thread_specific& operator=( const enumerable_thread_specific& other )
0985     {
0986         if( this != &other ) {
0987             this->clear();
0988             my_construct_callback->destroy();
0989             internal_copy( other );
0990         }
0991         return *this;
0992     }
0993 
0994     template<typename Alloc, ets_key_usage_type Cachetype>
0995     enumerable_thread_specific& operator=( const enumerable_thread_specific<T, Alloc, Cachetype>& other )
0996     {
0997         __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), nullptr); // Objects of different types
0998         this->clear();
0999         my_construct_callback->destroy();
1000         internal_copy(other);
1001         return *this;
1002     }
1003 
1004     enumerable_thread_specific& operator=( enumerable_thread_specific&& other )
1005     {
1006         if( this != &other ) {
1007             // TODO: use internal_move correctly here
1008             internal_swap(other);
1009         }
1010         return *this;
1011     }
1012 
1013     template<typename Alloc, ets_key_usage_type Cachetype>
1014     enumerable_thread_specific& operator=( enumerable_thread_specific<T, Alloc, Cachetype>&& other )
1015     {
1016         __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), nullptr); // Objects of different types
1017         this->clear();
1018         my_construct_callback->destroy();
1019         internal_move(std::move(other));
1020         return *this;
1021     }
1022 
1023     // CombineFunc has signature T(T,T) or T(const T&, const T&)
1024     template <typename CombineFunc>
1025     T combine(CombineFunc f_combine) {
1026         if(begin() == end()) {
1027             ets_element<T> location;
1028             my_construct_callback->construct(location.value());
1029             return *location.value_committed();
1030         }
1031         const_iterator ci = begin();
1032         T my_result = *ci;
1033         while(++ci != end())
1034             my_result = f_combine( my_result, *ci );
1035         return my_result;
1036     }
1037 
1038     // combine_func_t takes T by value or by [const] reference, and returns nothing
1039     template <typename CombineFunc>
1040     void combine_each(CombineFunc f_combine) {
1041         for(iterator ci = begin(); ci != end(); ++ci) {
1042             f_combine( *ci );
1043         }
1044     }
1045 
1046 }; // enumerable_thread_specific
1047 
1048 template< typename Container >
1049 class flattened2d {
1050     // This intermediate typedef is to address issues with VC7.1 compilers
1051     using conval_type = typename Container::value_type;
1052 
1053 public:
1054     //! Basic types
1055     using size_type = typename conval_type::size_type;
1056     using difference_type = typename conval_type::difference_type;
1057     using allocator_type = typename conval_type::allocator_type;
1058     using value_type = typename conval_type::value_type;
1059     using reference = typename conval_type::reference;
1060     using const_reference = typename conval_type::const_reference;
1061     using pointer = typename conval_type::pointer;
1062     using const_pointer = typename conval_type::const_pointer;
1063 
1064     using iterator = segmented_iterator<Container, value_type>;
1065     using const_iterator = segmented_iterator<Container, const value_type>;
1066 
1067     flattened2d( const Container &c, typename Container::const_iterator b, typename Container::const_iterator e ) :
1068         my_container(const_cast<Container*>(&c)), my_begin(b), my_end(e) { }
1069 
1070     explicit flattened2d( const Container &c ) :
1071         my_container(const_cast<Container*>(&c)), my_begin(c.begin()), my_end(c.end()) { }
1072 
1073     iterator begin() { return iterator(*my_container) = my_begin; }
1074     iterator end() { return iterator(*my_container) = my_end; }
1075     const_iterator begin() const { return const_iterator(*my_container) = my_begin; }
1076     const_iterator end() const { return const_iterator(*my_container) = my_end; }
1077 
1078     size_type size() const {
1079         size_type tot_size = 0;
1080         for(typename Container::const_iterator i = my_begin; i != my_end; ++i) {
1081             tot_size += i->size();
1082         }
1083         return tot_size;
1084     }
1085 
1086 private:
1087     Container *my_container;
1088     typename Container::const_iterator my_begin;
1089     typename Container::const_iterator my_end;
1090 };
1091 
1092 template <typename Container>
1093 flattened2d<Container> flatten2d(const Container &c, const typename Container::const_iterator b, const typename Container::const_iterator e) {
1094     return flattened2d<Container>(c, b, e);
1095 }
1096 
1097 template <typename Container>
1098 flattened2d<Container> flatten2d(const Container &c) {
1099     return flattened2d<Container>(c);
1100 }
1101 
1102 } // namespace d1
1103 } // namespace detail
1104 
1105 inline namespace v1 {
1106 using detail::d1::enumerable_thread_specific;
1107 using detail::d1::flattened2d;
1108 using detail::d1::flatten2d;
1109 // ets enum keys
1110 using detail::d1::ets_key_usage_type;
1111 using detail::d1::ets_key_per_instance;
1112 using detail::d1::ets_no_key;
1113 #if __TBB_RESUMABLE_TASKS
1114 using detail::d1::ets_suspend_aware;
1115 #endif
1116 } // inline namespace v1
1117 
1118 } // namespace tbb
1119 
1120 #endif // __TBB_enumerable_thread_specific_H
1121