Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:43:03

0001 //===- ConcurrentHashtable.h ------------------------------------*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 
0009 #ifndef LLVM_ADT_CONCURRENTHASHTABLE_H
0010 #define LLVM_ADT_CONCURRENTHASHTABLE_H
0011 
0012 #include "llvm/ADT/DenseMap.h"
0013 #include "llvm/ADT/Hashing.h"
0014 #include "llvm/ADT/STLExtras.h"
0015 #include "llvm/ADT/SmallVector.h"
0016 #include "llvm/Config/llvm-config.h" // for LLVM_ENABLE_THREADS
0017 #include "llvm/Support/Allocator.h"
0018 #include "llvm/Support/Debug.h"
0019 #include "llvm/Support/Parallel.h"
0020 #include "llvm/Support/WithColor.h"
0021 #include "llvm/Support/xxhash.h"
0022 #include <atomic>
0023 #include <cstddef>
0024 #include <iomanip>
0025 #include <mutex>
0026 #include <sstream>
0027 #include <type_traits>
0028 
0029 namespace llvm {
0030 
0031 /// ConcurrentHashTable - is a resizeable concurrent hashtable.
0032 /// The number of resizings limited up to x2^31. This hashtable is
0033 /// useful to have efficient access to aggregate data(like strings,
0034 /// type descriptors...) and to keep only single copy of such
0035 /// an aggregate. The hashtable allows only concurrent insertions:
0036 ///
0037 /// KeyDataTy* = insert ( const KeyTy& );
0038 ///
0039 /// Data structure:
0040 ///
0041 /// Inserted value KeyTy is mapped to 64-bit hash value ->
0042 ///
0043 ///          [------- 64-bit Hash value --------]
0044 ///          [  StartEntryIndex ][ Bucket Index ]
0045 ///                    |                |
0046 ///              points to the     points to
0047 ///              first probe       the bucket.
0048 ///              position inside
0049 ///              bucket entries
0050 ///
0051 /// After initialization, all buckets have an initial size. During insertions,
0052 /// buckets might be extended to contain more entries. Each bucket can be
0053 /// independently resized and rehashed(no need to lock the whole table).
0054 /// Different buckets may have different sizes. If the single bucket is full
0055 /// then the bucket is resized.
0056 ///
0057 /// BucketsArray keeps all buckets. Each bucket keeps an array of Entries
0058 /// (pointers to KeyDataTy) and another array of entries hashes:
0059 ///
0060 /// BucketsArray[BucketIdx].Hashes[EntryIdx]:
0061 /// BucketsArray[BucketIdx].Entries[EntryIdx]:
0062 ///
0063 /// [Bucket 0].Hashes -> [uint32_t][uint32_t]
0064 /// [Bucket 0].Entries -> [KeyDataTy*][KeyDataTy*]
0065 ///
0066 /// [Bucket 1].Hashes -> [uint32_t][uint32_t][uint32_t][uint32_t]
0067 /// [Bucket 1].Entries -> [KeyDataTy*][KeyDataTy*][KeyDataTy*][KeyDataTy*]
0068 ///                      .........................
0069 /// [Bucket N].Hashes -> [uint32_t][uint32_t][uint32_t]
0070 /// [Bucket N].Entries -> [KeyDataTy*][KeyDataTy*][KeyDataTy*]
0071 ///
0072 /// ConcurrentHashTableByPtr uses an external thread-safe allocator to allocate
0073 /// KeyDataTy items.
0074 
0075 template <typename KeyTy, typename KeyDataTy, typename AllocatorTy>
0076 class ConcurrentHashTableInfoByPtr {
0077 public:
0078   /// \returns Hash value for the specified \p Key.
0079   static inline uint64_t getHashValue(const KeyTy &Key) {
0080     return xxh3_64bits(Key);
0081   }
0082 
0083   /// \returns true if both \p LHS and \p RHS are equal.
0084   static inline bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
0085     return LHS == RHS;
0086   }
0087 
0088   /// \returns key for the specified \p KeyData.
0089   static inline const KeyTy &getKey(const KeyDataTy &KeyData) {
0090     return KeyData.getKey();
0091   }
0092 
0093   /// \returns newly created object of KeyDataTy type.
0094   static inline KeyDataTy *create(const KeyTy &Key, AllocatorTy &Allocator) {
0095     return KeyDataTy::create(Key, Allocator);
0096   }
0097 };
0098 
0099 template <typename KeyTy, typename KeyDataTy, typename AllocatorTy,
0100           typename Info =
0101               ConcurrentHashTableInfoByPtr<KeyTy, KeyDataTy, AllocatorTy>>
0102 class ConcurrentHashTableByPtr {
0103 public:
0104   ConcurrentHashTableByPtr(
0105       AllocatorTy &Allocator, uint64_t EstimatedSize = 100000,
0106       size_t ThreadsNum = parallel::strategy.compute_thread_count(),
0107       size_t InitialNumberOfBuckets = 128)
0108       : MultiThreadAllocator(Allocator) {
0109     assert((ThreadsNum > 0) && "ThreadsNum must be greater than 0");
0110     assert((InitialNumberOfBuckets > 0) &&
0111            "InitialNumberOfBuckets must be greater than 0");
0112 
0113     // Calculate number of buckets.
0114     uint64_t EstimatedNumberOfBuckets = ThreadsNum;
0115     if (ThreadsNum > 1) {
0116       EstimatedNumberOfBuckets *= InitialNumberOfBuckets;
0117       EstimatedNumberOfBuckets *= std::max(
0118           1,
0119           countr_zero(PowerOf2Ceil(EstimatedSize / InitialNumberOfBuckets)) >>
0120               2);
0121     }
0122     EstimatedNumberOfBuckets = PowerOf2Ceil(EstimatedNumberOfBuckets);
0123     NumberOfBuckets =
0124         std::min(EstimatedNumberOfBuckets, (uint64_t)(1Ull << 31));
0125 
0126     // Allocate buckets.
0127     BucketsArray = std::make_unique<Bucket[]>(NumberOfBuckets);
0128 
0129     InitialBucketSize = EstimatedSize / NumberOfBuckets;
0130     InitialBucketSize = std::max((uint32_t)1, InitialBucketSize);
0131     InitialBucketSize = PowerOf2Ceil(InitialBucketSize);
0132 
0133     // Initialize each bucket.
0134     for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
0135       HashesPtr Hashes = new ExtHashBitsTy[InitialBucketSize];
0136       memset(Hashes, 0, sizeof(ExtHashBitsTy) * InitialBucketSize);
0137 
0138       DataPtr Entries = new EntryDataTy[InitialBucketSize];
0139       memset(Entries, 0, sizeof(EntryDataTy) * InitialBucketSize);
0140 
0141       BucketsArray[Idx].Size = InitialBucketSize;
0142       BucketsArray[Idx].Hashes = Hashes;
0143       BucketsArray[Idx].Entries = Entries;
0144     }
0145 
0146     // Calculate masks.
0147     HashMask = NumberOfBuckets - 1;
0148 
0149     size_t LeadingZerosNumber = countl_zero(HashMask);
0150     HashBitsNum = 64 - LeadingZerosNumber;
0151 
0152     // We keep only high 32-bits of hash value. So bucket size cannot
0153     // exceed 2^31. Bucket size is always power of two.
0154     MaxBucketSize = 1Ull << (std::min((size_t)31, LeadingZerosNumber));
0155 
0156     // Calculate mask for extended hash bits.
0157     ExtHashMask = (uint64_t)NumberOfBuckets * MaxBucketSize - 1;
0158   }
0159 
0160   virtual ~ConcurrentHashTableByPtr() {
0161     // Deallocate buckets.
0162     for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
0163       delete[] BucketsArray[Idx].Hashes;
0164       delete[] BucketsArray[Idx].Entries;
0165     }
0166   }
0167 
0168   /// Insert new value \p NewValue or return already existing entry.
0169   ///
0170   /// \returns entry and "true" if an entry is just inserted or
0171   /// "false" if an entry already exists.
0172   std::pair<KeyDataTy *, bool> insert(const KeyTy &NewValue) {
0173     // Calculate bucket index.
0174     uint64_t Hash = Info::getHashValue(NewValue);
0175     Bucket &CurBucket = BucketsArray[getBucketIdx(Hash)];
0176     uint32_t ExtHashBits = getExtHashBits(Hash);
0177 
0178 #if LLVM_ENABLE_THREADS
0179     // Lock bucket.
0180     CurBucket.Guard.lock();
0181 #endif
0182 
0183     HashesPtr BucketHashes = CurBucket.Hashes;
0184     DataPtr BucketEntries = CurBucket.Entries;
0185     uint32_t CurEntryIdx = getStartIdx(ExtHashBits, CurBucket.Size);
0186 
0187     while (true) {
0188       uint32_t CurEntryHashBits = BucketHashes[CurEntryIdx];
0189 
0190       if (CurEntryHashBits == 0 && BucketEntries[CurEntryIdx] == nullptr) {
0191         // Found empty slot. Insert data.
0192         KeyDataTy *NewData = Info::create(NewValue, MultiThreadAllocator);
0193         BucketEntries[CurEntryIdx] = NewData;
0194         BucketHashes[CurEntryIdx] = ExtHashBits;
0195 
0196         CurBucket.NumberOfEntries++;
0197         RehashBucket(CurBucket);
0198 
0199 #if LLVM_ENABLE_THREADS
0200         CurBucket.Guard.unlock();
0201 #endif
0202 
0203         return {NewData, true};
0204       }
0205 
0206       if (CurEntryHashBits == ExtHashBits) {
0207         // Hash matched. Check value for equality.
0208         KeyDataTy *EntryData = BucketEntries[CurEntryIdx];
0209         if (Info::isEqual(Info::getKey(*EntryData), NewValue)) {
0210           // Already existed entry matched with inserted data is found.
0211 #if LLVM_ENABLE_THREADS
0212           CurBucket.Guard.unlock();
0213 #endif
0214 
0215           return {EntryData, false};
0216         }
0217       }
0218 
0219       CurEntryIdx++;
0220       CurEntryIdx &= (CurBucket.Size - 1);
0221     }
0222 
0223     llvm_unreachable("Insertion error.");
0224     return {};
0225   }
0226 
0227   /// Print information about current state of hash table structures.
0228   void printStatistic(raw_ostream &OS) {
0229     OS << "\n--- HashTable statistic:\n";
0230     OS << "\nNumber of buckets = " << NumberOfBuckets;
0231     OS << "\nInitial bucket size = " << InitialBucketSize;
0232 
0233     uint64_t NumberOfNonEmptyBuckets = 0;
0234     uint64_t NumberOfEntriesPlusEmpty = 0;
0235     uint64_t OverallNumberOfEntries = 0;
0236     uint64_t OverallSize = sizeof(*this) + NumberOfBuckets * sizeof(Bucket);
0237 
0238     DenseMap<uint32_t, uint32_t> BucketSizesMap;
0239 
0240     // For each bucket...
0241     for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
0242       Bucket &CurBucket = BucketsArray[Idx];
0243 
0244       BucketSizesMap[CurBucket.Size]++;
0245 
0246       if (CurBucket.NumberOfEntries != 0)
0247         NumberOfNonEmptyBuckets++;
0248       NumberOfEntriesPlusEmpty += CurBucket.Size;
0249       OverallNumberOfEntries += CurBucket.NumberOfEntries;
0250       OverallSize +=
0251           (sizeof(ExtHashBitsTy) + sizeof(EntryDataTy)) * CurBucket.Size;
0252     }
0253 
0254     OS << "\nOverall number of entries = " << OverallNumberOfEntries;
0255     OS << "\nOverall number of non empty buckets = " << NumberOfNonEmptyBuckets;
0256     for (auto &BucketSize : BucketSizesMap)
0257       OS << "\n Number of buckets with size " << BucketSize.first << ": "
0258          << BucketSize.second;
0259 
0260     std::stringstream stream;
0261     stream << std::fixed << std::setprecision(2)
0262            << ((float)OverallNumberOfEntries / (float)NumberOfEntriesPlusEmpty);
0263     std::string str = stream.str();
0264 
0265     OS << "\nLoad factor = " << str;
0266     OS << "\nOverall allocated size = " << OverallSize;
0267   }
0268 
0269 protected:
0270   using ExtHashBitsTy = uint32_t;
0271   using EntryDataTy = KeyDataTy *;
0272 
0273   using HashesPtr = ExtHashBitsTy *;
0274   using DataPtr = EntryDataTy *;
0275 
0276   // Bucket structure. Keeps bucket data.
0277   struct Bucket {
0278     Bucket() = default;
0279 
0280     // Size of bucket.
0281     uint32_t Size = 0;
0282 
0283     // Number of non-null entries.
0284     uint32_t NumberOfEntries = 0;
0285 
0286     // Hashes for [Size] entries.
0287     HashesPtr Hashes = nullptr;
0288 
0289     // [Size] entries.
0290     DataPtr Entries = nullptr;
0291 
0292 #if LLVM_ENABLE_THREADS
0293     // Mutex for this bucket.
0294     std::mutex Guard;
0295 #endif
0296   };
0297 
0298   // Reallocate and rehash bucket if this is full enough.
0299   void RehashBucket(Bucket &CurBucket) {
0300     assert((CurBucket.Size > 0) && "Uninitialised bucket");
0301     if (CurBucket.NumberOfEntries < CurBucket.Size * 0.9)
0302       return;
0303 
0304     if (CurBucket.Size >= MaxBucketSize)
0305       report_fatal_error("ConcurrentHashTable is full");
0306 
0307     uint32_t NewBucketSize = CurBucket.Size << 1;
0308     assert((NewBucketSize <= MaxBucketSize) && "New bucket size is too big");
0309     assert((CurBucket.Size < NewBucketSize) &&
0310            "New bucket size less than size of current bucket");
0311 
0312     // Store old entries & hashes arrays.
0313     HashesPtr SrcHashes = CurBucket.Hashes;
0314     DataPtr SrcEntries = CurBucket.Entries;
0315 
0316     // Allocate new entries&hashes arrays.
0317     HashesPtr DestHashes = new ExtHashBitsTy[NewBucketSize];
0318     memset(DestHashes, 0, sizeof(ExtHashBitsTy) * NewBucketSize);
0319 
0320     DataPtr DestEntries = new EntryDataTy[NewBucketSize];
0321     memset(DestEntries, 0, sizeof(EntryDataTy) * NewBucketSize);
0322 
0323     // For each entry in source arrays...
0324     for (uint32_t CurSrcEntryIdx = 0; CurSrcEntryIdx < CurBucket.Size;
0325          CurSrcEntryIdx++) {
0326       uint32_t CurSrcEntryHashBits = SrcHashes[CurSrcEntryIdx];
0327 
0328       // Check for null entry.
0329       if (CurSrcEntryHashBits == 0 && SrcEntries[CurSrcEntryIdx] == nullptr)
0330         continue;
0331 
0332       uint32_t StartDestIdx = getStartIdx(CurSrcEntryHashBits, NewBucketSize);
0333 
0334       // Insert non-null entry into the new arrays.
0335       while (true) {
0336         uint32_t CurDestEntryHashBits = DestHashes[StartDestIdx];
0337 
0338         if (CurDestEntryHashBits == 0 && DestEntries[StartDestIdx] == nullptr) {
0339           // Found empty slot. Insert data.
0340           DestHashes[StartDestIdx] = CurSrcEntryHashBits;
0341           DestEntries[StartDestIdx] = SrcEntries[CurSrcEntryIdx];
0342           break;
0343         }
0344 
0345         StartDestIdx++;
0346         StartDestIdx = StartDestIdx & (NewBucketSize - 1);
0347       }
0348     }
0349 
0350     // Update bucket fields.
0351     CurBucket.Hashes = DestHashes;
0352     CurBucket.Entries = DestEntries;
0353     CurBucket.Size = NewBucketSize;
0354 
0355     // Delete old bucket entries.
0356     if (SrcHashes != nullptr)
0357       delete[] SrcHashes;
0358     if (SrcEntries != nullptr)
0359       delete[] SrcEntries;
0360   }
0361 
0362   uint32_t getBucketIdx(hash_code Hash) { return Hash & HashMask; }
0363 
0364   uint32_t getExtHashBits(uint64_t Hash) {
0365     return (Hash & ExtHashMask) >> HashBitsNum;
0366   }
0367 
0368   uint32_t getStartIdx(uint32_t ExtHashBits, uint32_t BucketSize) {
0369     assert((BucketSize > 0) && "Empty bucket");
0370 
0371     return ExtHashBits & (BucketSize - 1);
0372   }
0373 
0374   // Number of bits in hash mask.
0375   uint64_t HashBitsNum = 0;
0376 
0377   // Hash mask.
0378   uint64_t HashMask = 0;
0379 
0380   // Hash mask for the extended hash bits.
0381   uint64_t ExtHashMask = 0;
0382 
0383   // The maximal bucket size.
0384   uint32_t MaxBucketSize = 0;
0385 
0386   // Initial size of bucket.
0387   uint32_t InitialBucketSize = 0;
0388 
0389   // The number of buckets.
0390   uint32_t NumberOfBuckets = 0;
0391 
0392   // Array of buckets.
0393   std::unique_ptr<Bucket[]> BucketsArray;
0394 
0395   // Used for allocating KeyDataTy values.
0396   AllocatorTy &MultiThreadAllocator;
0397 };
0398 
0399 } // end namespace llvm
0400 
0401 #endif // LLVM_ADT_CONCURRENTHASHTABLE_H