PDB/Native/HashTable.h

0001 //===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008
0009 #ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
0010 #define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
0011
0012 #include "llvm/ADT/SparseBitVector.h"
0013 #include "llvm/ADT/iterator.h"
0014 #include "llvm/DebugInfo/PDB/Native/RawError.h"
0015 #include "llvm/Support/BinaryStreamReader.h"
0016 #include "llvm/Support/BinaryStreamWriter.h"
0017 #include "llvm/Support/Endian.h"
0018 #include "llvm/Support/Error.h"
0019 #include <cstdint>
0020 #include <iterator>
0021 #include <utility>
0022 #include <vector>
0023
0024 namespace llvm {
0025
0026 namespace pdb {
0027
0028 Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
0029 Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
0030
0031 template <typename ValueT> class HashTable;
0032
0033 template <typename ValueT>
0034 class HashTableIterator
0035     : public iterator_facade_base<HashTableIterator<ValueT>,
0036                                   std::forward_iterator_tag,
0037                                   const std::pair<uint32_t, ValueT>> {
0038   using BaseT = typename HashTableIterator::iterator_facade_base;
0039   friend HashTable<ValueT>;
0040
0041   HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index,
0042                     bool IsEnd)
0043       : Map(&Map), Index(Index), IsEnd(IsEnd) {}
0044
0045 public:
0046   HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) {
0047     int I = Map.Present.find_first();
0048     if (I == -1) {
0049       Index = 0;
0050       IsEnd = true;
0051     } else {
0052       Index = static_cast<uint32_t>(I);
0053       IsEnd = false;
0054     }
0055   }
0056
0057   HashTableIterator(const HashTableIterator &R) = default;
0058   HashTableIterator &operator=(const HashTableIterator &R) {
0059     Map = R.Map;
0060     return *this;
0061   }
0062   bool operator==(const HashTableIterator &R) const {
0063     if (IsEnd && R.IsEnd)
0064       return true;
0065     if (IsEnd != R.IsEnd)
0066       return false;
0067
0068     return (Map == R.Map) && (Index == R.Index);
0069   }
0070   const std::pair<uint32_t, ValueT> &operator*() const {
0071     assert(Map->Present.test(Index));
0072     return Map->Buckets[Index];
0073   }
0074
0075   // Implement postfix op++ in terms of prefix op++ by using the superclass
0076   // implementation.
0077   using BaseT::operator++;
0078   HashTableIterator &operator++() {
0079     while (Index < Map->Buckets.size()) {
0080       ++Index;
0081       if (Map->Present.test(Index))
0082         return *this;
0083     }
0084
0085     IsEnd = true;
0086     return *this;
0087   }
0088
0089 private:
0090   bool isEnd() const { return IsEnd; }
0091   uint32_t index() const { return Index; }
0092
0093   const HashTable<ValueT> *Map;
0094   uint32_t Index;
0095   bool IsEnd;
0096 };
0097
0098 template <typename ValueT>
0099 class HashTable {
0100   struct Header {
0101     support::ulittle32_t Size;
0102     support::ulittle32_t Capacity;
0103   };
0104
0105   using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
0106
0107 public:
0108   using const_iterator = HashTableIterator<ValueT>;
0109   friend const_iterator;
0110
0111   HashTable() { Buckets.resize(8); }
0112   explicit HashTable(uint32_t Capacity) {
0113     Buckets.resize(Capacity);
0114   }
0115
0116   Error load(BinaryStreamReader &Stream) {
0117     const Header *H;
0118     if (auto EC = Stream.readObject(H))
0119       return EC;
0120     if (H->Capacity == 0)
0121       return make_error<RawError>(raw_error_code::corrupt_file,
0122                                   "Invalid Hash Table Capacity");
0123     if (H->Size > maxLoad(H->Capacity))
0124       return make_error<RawError>(raw_error_code::corrupt_file,
0125                                   "Invalid Hash Table Size");
0126
0127     Buckets.resize(H->Capacity);
0128
0129     if (auto EC = readSparseBitVector(Stream, Present))
0130       return EC;
0131     if (Present.count() != H->Size)
0132       return make_error<RawError>(raw_error_code::corrupt_file,
0133                                   "Present bit vector does not match size!");
0134
0135     if (auto EC = readSparseBitVector(Stream, Deleted))
0136       return EC;
0137     if (Present.intersects(Deleted))
0138       return make_error<RawError>(raw_error_code::corrupt_file,
0139                                   "Present bit vector intersects deleted!");
0140
0141     for (uint32_t P : Present) {
0142       if (auto EC = Stream.readInteger(Buckets[P].first))
0143         return EC;
0144       const ValueT *Value;
0145       if (auto EC = Stream.readObject(Value))
0146         return EC;
0147       Buckets[P].second = *Value;
0148     }
0149
0150     return Error::success();
0151   }
0152
0153   uint32_t calculateSerializedLength() const {
0154     uint32_t Size = sizeof(Header);
0155
0156     constexpr int BitsPerWord = 8 * sizeof(uint32_t);
0157
0158     int NumBitsP = Present.find_last() + 1;
0159     int NumBitsD = Deleted.find_last() + 1;
0160
0161     uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord;
0162     uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord;
0163
0164     // Present bit set number of words (4 bytes), followed by that many actual
0165     // words (4 bytes each).
0166     Size += sizeof(uint32_t);
0167     Size += NumWordsP * sizeof(uint32_t);
0168
0169     // Deleted bit set number of words (4 bytes), followed by that many actual
0170     // words (4 bytes each).
0171     Size += sizeof(uint32_t);
0172     Size += NumWordsD * sizeof(uint32_t);
0173
0174     // One (Key, ValueT) pair for each entry Present.
0175     Size += (sizeof(uint32_t) + sizeof(ValueT)) * size();
0176
0177     return Size;
0178   }
0179
0180   Error commit(BinaryStreamWriter &Writer) const {
0181     Header H;
0182     H.Size = size();
0183     H.Capacity = capacity();
0184     if (auto EC = Writer.writeObject(H))
0185       return EC;
0186
0187     if (auto EC = writeSparseBitVector(Writer, Present))
0188       return EC;
0189
0190     if (auto EC = writeSparseBitVector(Writer, Deleted))
0191       return EC;
0192
0193     for (const auto &Entry : *this) {
0194       if (auto EC = Writer.writeInteger(Entry.first))
0195         return EC;
0196       if (auto EC = Writer.writeObject(Entry.second))
0197         return EC;
0198     }
0199     return Error::success();
0200   }
0201
0202   void clear() {
0203     Buckets.resize(8);
0204     Present.clear();
0205     Deleted.clear();
0206   }
0207
0208   bool empty() const { return size() == 0; }
0209   uint32_t capacity() const { return Buckets.size(); }
0210   uint32_t size() const { return Present.count(); }
0211
0212   const_iterator begin() const { return const_iterator(*this); }
0213   const_iterator end() const { return const_iterator(*this, 0, true); }
0214
0215   /// Find the entry whose key has the specified hash value, using the specified
0216   /// traits defining hash function and equality.
0217   template <typename Key, typename TraitsT>
0218   const_iterator find_as(const Key &K, TraitsT &Traits) const {
0219     uint32_t H = Traits.hashLookupKey(K) % capacity();
0220     uint32_t I = H;
0221     std::optional<uint32_t> FirstUnused;
0222     do {
0223       if (isPresent(I)) {
0224         if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
0225           return const_iterator(*this, I, false);
0226       } else {
0227         if (!FirstUnused)
0228           FirstUnused = I;
0229         // Insertion occurs via linear probing from the slot hint, and will be
0230         // inserted at the first empty / deleted location.  Therefore, if we are
0231         // probing and find a location that is neither present nor deleted, then
0232         // nothing must have EVER been inserted at this location, and thus it is
0233         // not possible for a matching value to occur later.
0234         if (!isDeleted(I))
0235           break;
0236       }
0237       I = (I + 1) % capacity();
0238     } while (I != H);
0239
0240     // The only way FirstUnused would not be set is if every single entry in the
0241     // table were Present.  But this would violate the load factor constraints
0242     // that we impose, so it should never happen.
0243     assert(FirstUnused);
0244     return const_iterator(*this, *FirstUnused, true);
0245   }
0246
0247   /// Set the entry using a key type that the specified Traits can convert
0248   /// from a real key to an internal key.
0249   template <typename Key, typename TraitsT>
0250   bool set_as(const Key &K, ValueT V, TraitsT &Traits) {
0251     return set_as_internal(K, std::move(V), Traits, std::nullopt);
0252   }
0253
0254   template <typename Key, typename TraitsT>
0255   ValueT get(const Key &K, TraitsT &Traits) const {
0256     auto Iter = find_as(K, Traits);
0257     assert(Iter != end());
0258     return (*Iter).second;
0259   }
0260
0261 protected:
0262   bool isPresent(uint32_t K) const { return Present.test(K); }
0263   bool isDeleted(uint32_t K) const { return Deleted.test(K); }
0264
0265   BucketList Buckets;
0266   mutable SparseBitVector<> Present;
0267   mutable SparseBitVector<> Deleted;
0268
0269 private:
0270   /// Set the entry using a key type that the specified Traits can convert
0271   /// from a real key to an internal key.
0272   template <typename Key, typename TraitsT>
0273   bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits,
0274                        std::optional<uint32_t> InternalKey) {
0275     auto Entry = find_as(K, Traits);
0276     if (Entry != end()) {
0277       assert(isPresent(Entry.index()));
0278       assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
0279       // We're updating, no need to do anything special.
0280       Buckets[Entry.index()].second = V;
0281       return false;
0282     }
0283
0284     auto &B = Buckets[Entry.index()];
0285     assert(!isPresent(Entry.index()));
0286     assert(Entry.isEnd());
0287     B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K);
0288     B.second = V;
0289     Present.set(Entry.index());
0290     Deleted.reset(Entry.index());
0291
0292     grow(Traits);
0293
0294     assert((find_as(K, Traits)) != end());
0295     return true;
0296   }
0297
0298   static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
0299
0300   template <typename TraitsT>
0301   void grow(TraitsT &Traits) {
0302     uint32_t S = size();
0303     uint32_t MaxLoad = maxLoad(capacity());
0304     if (S < maxLoad(capacity()))
0305       return;
0306     assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
0307
0308     uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX;
0309
0310     // Growing requires rebuilding the table and re-hashing every item.  Make a
0311     // copy with a larger capacity, insert everything into the copy, then swap
0312     // it in.
0313     HashTable NewMap(NewCapacity);
0314     for (auto I : Present) {
0315       auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
0316       NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits,
0317                              Buckets[I].first);
0318     }
0319
0320     Buckets.swap(NewMap.Buckets);
0321     std::swap(Present, NewMap.Present);
0322     std::swap(Deleted, NewMap.Deleted);
0323     assert(capacity() == NewCapacity);
0324     assert(size() == S);
0325   }
0326 };
0327
0328 } // end namespace pdb
0329
0330 } // end namespace llvm
0331
0332 #endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H