File indexing completed on 2026-05-10 08:44:33
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #ifndef LLVM_SUPPORT_ONDISKHASHTABLE_H
0014 #define LLVM_SUPPORT_ONDISKHASHTABLE_H
0015
0016 #include "llvm/Support/Alignment.h"
0017 #include "llvm/Support/Allocator.h"
0018 #include "llvm/Support/DataTypes.h"
0019 #include "llvm/Support/EndianStream.h"
0020 #include "llvm/Support/MathExtras.h"
0021 #include "llvm/Support/raw_ostream.h"
0022 #include <cassert>
0023 #include <cstdlib>
0024
0025 namespace llvm {
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058 template <typename Info> class OnDiskChainedHashTableGenerator {
0059
0060 class Item {
0061 public:
0062 typename Info::key_type Key;
0063 typename Info::data_type Data;
0064 Item *Next;
0065 const typename Info::hash_value_type Hash;
0066
0067 Item(typename Info::key_type_ref Key, typename Info::data_type_ref Data,
0068 Info &InfoObj)
0069 : Key(Key), Data(Data), Next(nullptr), Hash(InfoObj.ComputeHash(Key)) {}
0070 };
0071
0072 typedef typename Info::offset_type offset_type;
0073 offset_type NumBuckets;
0074 offset_type NumEntries;
0075 llvm::SpecificBumpPtrAllocator<Item> BA;
0076
0077
0078 struct Bucket {
0079 offset_type Off;
0080 unsigned Length;
0081 Item *Head;
0082 };
0083
0084 Bucket *Buckets;
0085
0086 private:
0087
0088 void insert(Bucket *Buckets, size_t Size, Item *E) {
0089 Bucket &B = Buckets[E->Hash & (Size - 1)];
0090 E->Next = B.Head;
0091 ++B.Length;
0092 B.Head = E;
0093 }
0094
0095
0096 void resize(size_t NewSize) {
0097 Bucket *NewBuckets = static_cast<Bucket *>(
0098 safe_calloc(NewSize, sizeof(Bucket)));
0099
0100 for (size_t I = 0; I < NumBuckets; ++I)
0101 for (Item *E = Buckets[I].Head; E;) {
0102 Item *N = E->Next;
0103 E->Next = nullptr;
0104 insert(NewBuckets, NewSize, E);
0105 E = N;
0106 }
0107
0108 free(Buckets);
0109 NumBuckets = NewSize;
0110 Buckets = NewBuckets;
0111 }
0112
0113 public:
0114
0115 void insert(typename Info::key_type_ref Key,
0116 typename Info::data_type_ref Data) {
0117 Info InfoObj;
0118 insert(Key, Data, InfoObj);
0119 }
0120
0121
0122
0123
0124 void insert(typename Info::key_type_ref Key,
0125 typename Info::data_type_ref Data, Info &InfoObj) {
0126 ++NumEntries;
0127 if (4 * NumEntries >= 3 * NumBuckets)
0128 resize(NumBuckets * 2);
0129 insert(Buckets, NumBuckets, new (BA.Allocate()) Item(Key, Data, InfoObj));
0130 }
0131
0132
0133 bool contains(typename Info::key_type_ref Key, Info &InfoObj) {
0134 unsigned Hash = InfoObj.ComputeHash(Key);
0135 for (Item *I = Buckets[Hash & (NumBuckets - 1)].Head; I; I = I->Next)
0136 if (I->Hash == Hash && InfoObj.EqualKey(I->Key, Key))
0137 return true;
0138 return false;
0139 }
0140
0141
0142 offset_type Emit(raw_ostream &Out) {
0143 Info InfoObj;
0144 return Emit(Out, InfoObj);
0145 }
0146
0147
0148
0149
0150 offset_type Emit(raw_ostream &Out, Info &InfoObj) {
0151 using namespace llvm::support;
0152 endian::Writer LE(Out, llvm::endianness::little);
0153
0154
0155
0156
0157
0158
0159
0160
0161
0162
0163
0164
0165 unsigned TargetNumBuckets =
0166 NumEntries <= 2 ? 1 : llvm::bit_ceil(NumEntries * 4 / 3 + 1);
0167 if (TargetNumBuckets != NumBuckets)
0168 resize(TargetNumBuckets);
0169
0170
0171 for (offset_type I = 0; I < NumBuckets; ++I) {
0172 Bucket &B = Buckets[I];
0173 if (!B.Head)
0174 continue;
0175
0176
0177 B.Off = Out.tell();
0178 assert(B.Off && "Cannot write a bucket at offset 0. Please add padding.");
0179
0180
0181 LE.write<uint16_t>(B.Length);
0182 assert(B.Length != 0 && "Bucket has a head but zero length?");
0183
0184
0185 for (Item *I = B.Head; I; I = I->Next) {
0186 LE.write<typename Info::hash_value_type>(I->Hash);
0187 const std::pair<offset_type, offset_type> &Len =
0188 InfoObj.EmitKeyDataLength(Out, I->Key, I->Data);
0189 #ifdef NDEBUG
0190 InfoObj.EmitKey(Out, I->Key, Len.first);
0191 InfoObj.EmitData(Out, I->Key, I->Data, Len.second);
0192 #else
0193
0194
0195 uint64_t KeyStart = Out.tell();
0196 InfoObj.EmitKey(Out, I->Key, Len.first);
0197 uint64_t DataStart = Out.tell();
0198 InfoObj.EmitData(Out, I->Key, I->Data, Len.second);
0199 uint64_t End = Out.tell();
0200 assert(offset_type(DataStart - KeyStart) == Len.first &&
0201 "key length does not match bytes written");
0202 assert(offset_type(End - DataStart) == Len.second &&
0203 "data length does not match bytes written");
0204 #endif
0205 }
0206 }
0207
0208
0209 offset_type TableOff = Out.tell();
0210 uint64_t N = offsetToAlignment(TableOff, Align(alignof(offset_type)));
0211 TableOff += N;
0212 while (N--)
0213 LE.write<uint8_t>(0);
0214
0215
0216 LE.write<offset_type>(NumBuckets);
0217 LE.write<offset_type>(NumEntries);
0218 for (offset_type I = 0; I < NumBuckets; ++I)
0219 LE.write<offset_type>(Buckets[I].Off);
0220
0221 return TableOff;
0222 }
0223
0224 OnDiskChainedHashTableGenerator() {
0225 NumEntries = 0;
0226 NumBuckets = 64;
0227
0228
0229 Buckets = static_cast<Bucket *>(safe_calloc(NumBuckets, sizeof(Bucket)));
0230 }
0231
0232 ~OnDiskChainedHashTableGenerator() { std::free(Buckets); }
0233 };
0234
0235
0236
0237
0238
0239
0240
0241
0242
0243
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254
0255
0256
0257
0258
0259
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269
0270
0271
0272
0273 template <typename Info> class OnDiskChainedHashTable {
0274 const typename Info::offset_type NumBuckets;
0275 const typename Info::offset_type NumEntries;
0276 const unsigned char *const Buckets;
0277 const unsigned char *const Base;
0278 Info InfoObj;
0279
0280 public:
0281 typedef Info InfoType;
0282 typedef typename Info::internal_key_type internal_key_type;
0283 typedef typename Info::external_key_type external_key_type;
0284 typedef typename Info::data_type data_type;
0285 typedef typename Info::hash_value_type hash_value_type;
0286 typedef typename Info::offset_type offset_type;
0287
0288 OnDiskChainedHashTable(offset_type NumBuckets, offset_type NumEntries,
0289 const unsigned char *Buckets,
0290 const unsigned char *Base,
0291 const Info &InfoObj = Info())
0292 : NumBuckets(NumBuckets), NumEntries(NumEntries), Buckets(Buckets),
0293 Base(Base), InfoObj(InfoObj) {
0294 assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
0295 "'buckets' must have a 4-byte alignment");
0296 }
0297
0298
0299
0300
0301 static std::pair<offset_type, offset_type>
0302 readNumBucketsAndEntries(const unsigned char *&Buckets) {
0303 assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
0304 "buckets should be 4-byte aligned.");
0305 using namespace llvm::support;
0306 offset_type NumBuckets =
0307 endian::readNext<offset_type, llvm::endianness::little, aligned>(
0308 Buckets);
0309 offset_type NumEntries =
0310 endian::readNext<offset_type, llvm::endianness::little, aligned>(
0311 Buckets);
0312 return std::make_pair(NumBuckets, NumEntries);
0313 }
0314
0315 offset_type getNumBuckets() const { return NumBuckets; }
0316 offset_type getNumEntries() const { return NumEntries; }
0317 const unsigned char *getBase() const { return Base; }
0318 const unsigned char *getBuckets() const { return Buckets; }
0319
0320 bool isEmpty() const { return NumEntries == 0; }
0321
0322 class iterator {
0323 internal_key_type Key;
0324 const unsigned char *const Data;
0325 const offset_type Len;
0326 Info *InfoObj;
0327
0328 public:
0329 iterator() : Key(), Data(nullptr), Len(0), InfoObj(nullptr) {}
0330 iterator(const internal_key_type K, const unsigned char *D, offset_type L,
0331 Info *InfoObj)
0332 : Key(K), Data(D), Len(L), InfoObj(InfoObj) {}
0333
0334 data_type operator*() const { return InfoObj->ReadData(Key, Data, Len); }
0335
0336 const unsigned char *getDataPtr() const { return Data; }
0337 offset_type getDataLen() const { return Len; }
0338
0339 bool operator==(const iterator &X) const { return X.Data == Data; }
0340 bool operator!=(const iterator &X) const { return X.Data != Data; }
0341 };
0342
0343
0344 iterator find(const external_key_type &EKey, Info *InfoPtr = nullptr) {
0345 const internal_key_type &IKey = InfoObj.GetInternalKey(EKey);
0346 hash_value_type KeyHash = InfoObj.ComputeHash(IKey);
0347 return find_hashed(IKey, KeyHash, InfoPtr);
0348 }
0349
0350
0351 iterator find_hashed(const internal_key_type &IKey, hash_value_type KeyHash,
0352 Info *InfoPtr = nullptr) {
0353 using namespace llvm::support;
0354
0355 if (!InfoPtr)
0356 InfoPtr = &InfoObj;
0357
0358
0359 offset_type Idx = KeyHash & (NumBuckets - 1);
0360 const unsigned char *Bucket = Buckets + sizeof(offset_type) * Idx;
0361
0362 offset_type Offset =
0363 endian::readNext<offset_type, llvm::endianness::little, aligned>(
0364 Bucket);
0365 if (Offset == 0)
0366 return iterator();
0367 const unsigned char *Items = Base + Offset;
0368
0369
0370
0371 unsigned Len = endian::readNext<uint16_t, llvm::endianness::little>(Items);
0372
0373 for (unsigned i = 0; i < Len; ++i) {
0374
0375 hash_value_type ItemHash =
0376 endian::readNext<hash_value_type, llvm::endianness::little>(Items);
0377
0378
0379 const std::pair<offset_type, offset_type> &L =
0380 Info::ReadKeyDataLength(Items);
0381 offset_type ItemLen = L.first + L.second;
0382
0383
0384 if (ItemHash != KeyHash) {
0385 Items += ItemLen;
0386 continue;
0387 }
0388
0389
0390 const internal_key_type &X =
0391 InfoPtr->ReadKey((const unsigned char *const)Items, L.first);
0392
0393
0394 if (!InfoPtr->EqualKey(X, IKey)) {
0395 Items += ItemLen;
0396 continue;
0397 }
0398
0399
0400 return iterator(X, Items + L.first, L.second, InfoPtr);
0401 }
0402
0403 return iterator();
0404 }
0405
0406 iterator end() const { return iterator(); }
0407
0408 Info &getInfoObj() { return InfoObj; }
0409
0410
0411
0412
0413
0414
0415
0416
0417
0418
0419 static OnDiskChainedHashTable *Create(const unsigned char *Buckets,
0420 const unsigned char *const Base,
0421 const Info &InfoObj = Info()) {
0422 assert(Buckets > Base);
0423 auto NumBucketsAndEntries = readNumBucketsAndEntries(Buckets);
0424 return new OnDiskChainedHashTable<Info>(NumBucketsAndEntries.first,
0425 NumBucketsAndEntries.second,
0426 Buckets, Base, InfoObj);
0427 }
0428 };
0429
0430
0431
0432
0433 template <typename Info>
0434 class OnDiskIterableChainedHashTable : public OnDiskChainedHashTable<Info> {
0435 const unsigned char *Payload;
0436
0437 public:
0438 typedef OnDiskChainedHashTable<Info> base_type;
0439 typedef typename base_type::internal_key_type internal_key_type;
0440 typedef typename base_type::external_key_type external_key_type;
0441 typedef typename base_type::data_type data_type;
0442 typedef typename base_type::hash_value_type hash_value_type;
0443 typedef typename base_type::offset_type offset_type;
0444
0445 private:
0446
0447 class iterator_base {
0448 const unsigned char *Ptr;
0449 offset_type NumItemsInBucketLeft;
0450 offset_type NumEntriesLeft;
0451
0452 public:
0453 typedef external_key_type value_type;
0454
0455 iterator_base(const unsigned char *const Ptr, offset_type NumEntries)
0456 : Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries) {}
0457 iterator_base()
0458 : Ptr(nullptr), NumItemsInBucketLeft(0), NumEntriesLeft(0) {}
0459
0460 friend bool operator==(const iterator_base &X, const iterator_base &Y) {
0461 return X.NumEntriesLeft == Y.NumEntriesLeft;
0462 }
0463 friend bool operator!=(const iterator_base &X, const iterator_base &Y) {
0464 return X.NumEntriesLeft != Y.NumEntriesLeft;
0465 }
0466
0467
0468 void advance() {
0469 using namespace llvm::support;
0470 if (!NumItemsInBucketLeft) {
0471
0472
0473 NumItemsInBucketLeft =
0474 endian::readNext<uint16_t, llvm::endianness::little>(Ptr);
0475 }
0476 Ptr += sizeof(hash_value_type);
0477
0478 const std::pair<offset_type, offset_type> &L =
0479 Info::ReadKeyDataLength(Ptr);
0480 Ptr += L.first + L.second;
0481 assert(NumItemsInBucketLeft);
0482 --NumItemsInBucketLeft;
0483 assert(NumEntriesLeft);
0484 --NumEntriesLeft;
0485 }
0486
0487
0488
0489 const unsigned char *getItem() const {
0490 return Ptr + (NumItemsInBucketLeft ? 0 : 2) + sizeof(hash_value_type);
0491 }
0492 };
0493
0494 public:
0495 OnDiskIterableChainedHashTable(offset_type NumBuckets, offset_type NumEntries,
0496 const unsigned char *Buckets,
0497 const unsigned char *Payload,
0498 const unsigned char *Base,
0499 const Info &InfoObj = Info())
0500 : base_type(NumBuckets, NumEntries, Buckets, Base, InfoObj),
0501 Payload(Payload) {}
0502
0503
0504 class key_iterator : public iterator_base {
0505 Info *InfoObj;
0506
0507 public:
0508 typedef external_key_type value_type;
0509
0510 key_iterator(const unsigned char *const Ptr, offset_type NumEntries,
0511 Info *InfoObj)
0512 : iterator_base(Ptr, NumEntries), InfoObj(InfoObj) {}
0513 key_iterator() : iterator_base(), InfoObj() {}
0514
0515 key_iterator &operator++() {
0516 this->advance();
0517 return *this;
0518 }
0519 key_iterator operator++(int) {
0520 key_iterator tmp = *this;
0521 ++*this;
0522 return tmp;
0523 }
0524
0525 internal_key_type getInternalKey() const {
0526 auto *LocalPtr = this->getItem();
0527
0528
0529 auto L = Info::ReadKeyDataLength(LocalPtr);
0530
0531
0532 return InfoObj->ReadKey(LocalPtr, L.first);
0533 }
0534
0535 value_type operator*() const {
0536 return InfoObj->GetExternalKey(getInternalKey());
0537 }
0538 };
0539
0540 key_iterator key_begin() {
0541 return key_iterator(Payload, this->getNumEntries(), &this->getInfoObj());
0542 }
0543 key_iterator key_end() { return key_iterator(); }
0544
0545 iterator_range<key_iterator> keys() {
0546 return make_range(key_begin(), key_end());
0547 }
0548
0549
0550 class data_iterator : public iterator_base {
0551 Info *InfoObj;
0552
0553 public:
0554 typedef data_type value_type;
0555
0556 data_iterator(const unsigned char *const Ptr, offset_type NumEntries,
0557 Info *InfoObj)
0558 : iterator_base(Ptr, NumEntries), InfoObj(InfoObj) {}
0559 data_iterator() : iterator_base(), InfoObj() {}
0560
0561 data_iterator &operator++() {
0562 this->advance();
0563 return *this;
0564 }
0565 data_iterator operator++(int) {
0566 data_iterator tmp = *this;
0567 ++*this;
0568 return tmp;
0569 }
0570
0571 value_type operator*() const {
0572 auto *LocalPtr = this->getItem();
0573
0574
0575 auto L = Info::ReadKeyDataLength(LocalPtr);
0576
0577
0578 const internal_key_type &Key = InfoObj->ReadKey(LocalPtr, L.first);
0579 return InfoObj->ReadData(Key, LocalPtr + L.first, L.second);
0580 }
0581 };
0582
0583 data_iterator data_begin() {
0584 return data_iterator(Payload, this->getNumEntries(), &this->getInfoObj());
0585 }
0586 data_iterator data_end() { return data_iterator(); }
0587
0588 iterator_range<data_iterator> data() {
0589 return make_range(data_begin(), data_end());
0590 }
0591
0592
0593
0594
0595
0596
0597
0598
0599
0600
0601
0602
0603
0604
0605 static OnDiskIterableChainedHashTable *
0606 Create(const unsigned char *Buckets, const unsigned char *const Payload,
0607 const unsigned char *const Base, const Info &InfoObj = Info()) {
0608 assert(Buckets > Base);
0609 auto NumBucketsAndEntries =
0610 OnDiskIterableChainedHashTable<Info>::readNumBucketsAndEntries(Buckets);
0611 return new OnDiskIterableChainedHashTable<Info>(
0612 NumBucketsAndEntries.first, NumBucketsAndEntries.second,
0613 Buckets, Payload, Base, InfoObj);
0614 }
0615 };
0616
0617 }
0618
0619 #endif