File indexing completed on 2026-05-10 08:43:03
0001
0002
0003
0004
0005
0006
0007
0008
0009 #ifndef LLVM_ADT_CONCURRENTHASHTABLE_H
0010 #define LLVM_ADT_CONCURRENTHASHTABLE_H
0011
0012 #include "llvm/ADT/DenseMap.h"
0013 #include "llvm/ADT/Hashing.h"
0014 #include "llvm/ADT/STLExtras.h"
0015 #include "llvm/ADT/SmallVector.h"
0016 #include "llvm/Config/llvm-config.h" // for LLVM_ENABLE_THREADS
0017 #include "llvm/Support/Allocator.h"
0018 #include "llvm/Support/Debug.h"
0019 #include "llvm/Support/Parallel.h"
0020 #include "llvm/Support/WithColor.h"
0021 #include "llvm/Support/xxhash.h"
0022 #include <atomic>
0023 #include <cstddef>
0024 #include <iomanip>
0025 #include <mutex>
0026 #include <sstream>
0027 #include <type_traits>
0028
0029 namespace llvm {
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075 template <typename KeyTy, typename KeyDataTy, typename AllocatorTy>
0076 class ConcurrentHashTableInfoByPtr {
0077 public:
0078
0079 static inline uint64_t getHashValue(const KeyTy &Key) {
0080 return xxh3_64bits(Key);
0081 }
0082
0083
0084 static inline bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
0085 return LHS == RHS;
0086 }
0087
0088
0089 static inline const KeyTy &getKey(const KeyDataTy &KeyData) {
0090 return KeyData.getKey();
0091 }
0092
0093
0094 static inline KeyDataTy *create(const KeyTy &Key, AllocatorTy &Allocator) {
0095 return KeyDataTy::create(Key, Allocator);
0096 }
0097 };
0098
0099 template <typename KeyTy, typename KeyDataTy, typename AllocatorTy,
0100 typename Info =
0101 ConcurrentHashTableInfoByPtr<KeyTy, KeyDataTy, AllocatorTy>>
0102 class ConcurrentHashTableByPtr {
0103 public:
0104 ConcurrentHashTableByPtr(
0105 AllocatorTy &Allocator, uint64_t EstimatedSize = 100000,
0106 size_t ThreadsNum = parallel::strategy.compute_thread_count(),
0107 size_t InitialNumberOfBuckets = 128)
0108 : MultiThreadAllocator(Allocator) {
0109 assert((ThreadsNum > 0) && "ThreadsNum must be greater than 0");
0110 assert((InitialNumberOfBuckets > 0) &&
0111 "InitialNumberOfBuckets must be greater than 0");
0112
0113
0114 uint64_t EstimatedNumberOfBuckets = ThreadsNum;
0115 if (ThreadsNum > 1) {
0116 EstimatedNumberOfBuckets *= InitialNumberOfBuckets;
0117 EstimatedNumberOfBuckets *= std::max(
0118 1,
0119 countr_zero(PowerOf2Ceil(EstimatedSize / InitialNumberOfBuckets)) >>
0120 2);
0121 }
0122 EstimatedNumberOfBuckets = PowerOf2Ceil(EstimatedNumberOfBuckets);
0123 NumberOfBuckets =
0124 std::min(EstimatedNumberOfBuckets, (uint64_t)(1Ull << 31));
0125
0126
0127 BucketsArray = std::make_unique<Bucket[]>(NumberOfBuckets);
0128
0129 InitialBucketSize = EstimatedSize / NumberOfBuckets;
0130 InitialBucketSize = std::max((uint32_t)1, InitialBucketSize);
0131 InitialBucketSize = PowerOf2Ceil(InitialBucketSize);
0132
0133
0134 for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
0135 HashesPtr Hashes = new ExtHashBitsTy[InitialBucketSize];
0136 memset(Hashes, 0, sizeof(ExtHashBitsTy) * InitialBucketSize);
0137
0138 DataPtr Entries = new EntryDataTy[InitialBucketSize];
0139 memset(Entries, 0, sizeof(EntryDataTy) * InitialBucketSize);
0140
0141 BucketsArray[Idx].Size = InitialBucketSize;
0142 BucketsArray[Idx].Hashes = Hashes;
0143 BucketsArray[Idx].Entries = Entries;
0144 }
0145
0146
0147 HashMask = NumberOfBuckets - 1;
0148
0149 size_t LeadingZerosNumber = countl_zero(HashMask);
0150 HashBitsNum = 64 - LeadingZerosNumber;
0151
0152
0153
0154 MaxBucketSize = 1Ull << (std::min((size_t)31, LeadingZerosNumber));
0155
0156
0157 ExtHashMask = (uint64_t)NumberOfBuckets * MaxBucketSize - 1;
0158 }
0159
0160 virtual ~ConcurrentHashTableByPtr() {
0161
0162 for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
0163 delete[] BucketsArray[Idx].Hashes;
0164 delete[] BucketsArray[Idx].Entries;
0165 }
0166 }
0167
0168
0169
0170
0171
0172 std::pair<KeyDataTy *, bool> insert(const KeyTy &NewValue) {
0173
0174 uint64_t Hash = Info::getHashValue(NewValue);
0175 Bucket &CurBucket = BucketsArray[getBucketIdx(Hash)];
0176 uint32_t ExtHashBits = getExtHashBits(Hash);
0177
0178 #if LLVM_ENABLE_THREADS
0179
0180 CurBucket.Guard.lock();
0181 #endif
0182
0183 HashesPtr BucketHashes = CurBucket.Hashes;
0184 DataPtr BucketEntries = CurBucket.Entries;
0185 uint32_t CurEntryIdx = getStartIdx(ExtHashBits, CurBucket.Size);
0186
0187 while (true) {
0188 uint32_t CurEntryHashBits = BucketHashes[CurEntryIdx];
0189
0190 if (CurEntryHashBits == 0 && BucketEntries[CurEntryIdx] == nullptr) {
0191
0192 KeyDataTy *NewData = Info::create(NewValue, MultiThreadAllocator);
0193 BucketEntries[CurEntryIdx] = NewData;
0194 BucketHashes[CurEntryIdx] = ExtHashBits;
0195
0196 CurBucket.NumberOfEntries++;
0197 RehashBucket(CurBucket);
0198
0199 #if LLVM_ENABLE_THREADS
0200 CurBucket.Guard.unlock();
0201 #endif
0202
0203 return {NewData, true};
0204 }
0205
0206 if (CurEntryHashBits == ExtHashBits) {
0207
0208 KeyDataTy *EntryData = BucketEntries[CurEntryIdx];
0209 if (Info::isEqual(Info::getKey(*EntryData), NewValue)) {
0210
0211 #if LLVM_ENABLE_THREADS
0212 CurBucket.Guard.unlock();
0213 #endif
0214
0215 return {EntryData, false};
0216 }
0217 }
0218
0219 CurEntryIdx++;
0220 CurEntryIdx &= (CurBucket.Size - 1);
0221 }
0222
0223 llvm_unreachable("Insertion error.");
0224 return {};
0225 }
0226
0227
0228 void printStatistic(raw_ostream &OS) {
0229 OS << "\n--- HashTable statistic:\n";
0230 OS << "\nNumber of buckets = " << NumberOfBuckets;
0231 OS << "\nInitial bucket size = " << InitialBucketSize;
0232
0233 uint64_t NumberOfNonEmptyBuckets = 0;
0234 uint64_t NumberOfEntriesPlusEmpty = 0;
0235 uint64_t OverallNumberOfEntries = 0;
0236 uint64_t OverallSize = sizeof(*this) + NumberOfBuckets * sizeof(Bucket);
0237
0238 DenseMap<uint32_t, uint32_t> BucketSizesMap;
0239
0240
0241 for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
0242 Bucket &CurBucket = BucketsArray[Idx];
0243
0244 BucketSizesMap[CurBucket.Size]++;
0245
0246 if (CurBucket.NumberOfEntries != 0)
0247 NumberOfNonEmptyBuckets++;
0248 NumberOfEntriesPlusEmpty += CurBucket.Size;
0249 OverallNumberOfEntries += CurBucket.NumberOfEntries;
0250 OverallSize +=
0251 (sizeof(ExtHashBitsTy) + sizeof(EntryDataTy)) * CurBucket.Size;
0252 }
0253
0254 OS << "\nOverall number of entries = " << OverallNumberOfEntries;
0255 OS << "\nOverall number of non empty buckets = " << NumberOfNonEmptyBuckets;
0256 for (auto &BucketSize : BucketSizesMap)
0257 OS << "\n Number of buckets with size " << BucketSize.first << ": "
0258 << BucketSize.second;
0259
0260 std::stringstream stream;
0261 stream << std::fixed << std::setprecision(2)
0262 << ((float)OverallNumberOfEntries / (float)NumberOfEntriesPlusEmpty);
0263 std::string str = stream.str();
0264
0265 OS << "\nLoad factor = " << str;
0266 OS << "\nOverall allocated size = " << OverallSize;
0267 }
0268
0269 protected:
0270 using ExtHashBitsTy = uint32_t;
0271 using EntryDataTy = KeyDataTy *;
0272
0273 using HashesPtr = ExtHashBitsTy *;
0274 using DataPtr = EntryDataTy *;
0275
0276
0277 struct Bucket {
0278 Bucket() = default;
0279
0280
0281 uint32_t Size = 0;
0282
0283
0284 uint32_t NumberOfEntries = 0;
0285
0286
0287 HashesPtr Hashes = nullptr;
0288
0289
0290 DataPtr Entries = nullptr;
0291
0292 #if LLVM_ENABLE_THREADS
0293
0294 std::mutex Guard;
0295 #endif
0296 };
0297
0298
0299 void RehashBucket(Bucket &CurBucket) {
0300 assert((CurBucket.Size > 0) && "Uninitialised bucket");
0301 if (CurBucket.NumberOfEntries < CurBucket.Size * 0.9)
0302 return;
0303
0304 if (CurBucket.Size >= MaxBucketSize)
0305 report_fatal_error("ConcurrentHashTable is full");
0306
0307 uint32_t NewBucketSize = CurBucket.Size << 1;
0308 assert((NewBucketSize <= MaxBucketSize) && "New bucket size is too big");
0309 assert((CurBucket.Size < NewBucketSize) &&
0310 "New bucket size less than size of current bucket");
0311
0312
0313 HashesPtr SrcHashes = CurBucket.Hashes;
0314 DataPtr SrcEntries = CurBucket.Entries;
0315
0316
0317 HashesPtr DestHashes = new ExtHashBitsTy[NewBucketSize];
0318 memset(DestHashes, 0, sizeof(ExtHashBitsTy) * NewBucketSize);
0319
0320 DataPtr DestEntries = new EntryDataTy[NewBucketSize];
0321 memset(DestEntries, 0, sizeof(EntryDataTy) * NewBucketSize);
0322
0323
0324 for (uint32_t CurSrcEntryIdx = 0; CurSrcEntryIdx < CurBucket.Size;
0325 CurSrcEntryIdx++) {
0326 uint32_t CurSrcEntryHashBits = SrcHashes[CurSrcEntryIdx];
0327
0328
0329 if (CurSrcEntryHashBits == 0 && SrcEntries[CurSrcEntryIdx] == nullptr)
0330 continue;
0331
0332 uint32_t StartDestIdx = getStartIdx(CurSrcEntryHashBits, NewBucketSize);
0333
0334
0335 while (true) {
0336 uint32_t CurDestEntryHashBits = DestHashes[StartDestIdx];
0337
0338 if (CurDestEntryHashBits == 0 && DestEntries[StartDestIdx] == nullptr) {
0339
0340 DestHashes[StartDestIdx] = CurSrcEntryHashBits;
0341 DestEntries[StartDestIdx] = SrcEntries[CurSrcEntryIdx];
0342 break;
0343 }
0344
0345 StartDestIdx++;
0346 StartDestIdx = StartDestIdx & (NewBucketSize - 1);
0347 }
0348 }
0349
0350
0351 CurBucket.Hashes = DestHashes;
0352 CurBucket.Entries = DestEntries;
0353 CurBucket.Size = NewBucketSize;
0354
0355
0356 if (SrcHashes != nullptr)
0357 delete[] SrcHashes;
0358 if (SrcEntries != nullptr)
0359 delete[] SrcEntries;
0360 }
0361
0362 uint32_t getBucketIdx(hash_code Hash) { return Hash & HashMask; }
0363
0364 uint32_t getExtHashBits(uint64_t Hash) {
0365 return (Hash & ExtHashMask) >> HashBitsNum;
0366 }
0367
0368 uint32_t getStartIdx(uint32_t ExtHashBits, uint32_t BucketSize) {
0369 assert((BucketSize > 0) && "Empty bucket");
0370
0371 return ExtHashBits & (BucketSize - 1);
0372 }
0373
0374
0375 uint64_t HashBitsNum = 0;
0376
0377
0378 uint64_t HashMask = 0;
0379
0380
0381 uint64_t ExtHashMask = 0;
0382
0383
0384 uint32_t MaxBucketSize = 0;
0385
0386
0387 uint32_t InitialBucketSize = 0;
0388
0389
0390 uint32_t NumberOfBuckets = 0;
0391
0392
0393 std::unique_ptr<Bucket[]> BucketsArray;
0394
0395
0396 AllocatorTy &MultiThreadAllocator;
0397 };
0398
0399 }
0400
0401 #endif