Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:43:10

0001 //===- TrieRawHashMap.h -----------------------------------------*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 
0009 #ifndef LLVM_ADT_TRIERAWHASHMAP_H
0010 #define LLVM_ADT_TRIERAWHASHMAP_H
0011 
0012 #include "llvm/ADT/ArrayRef.h"
0013 #include <atomic>
0014 #include <optional>
0015 
0016 namespace llvm {
0017 
0018 class raw_ostream;
0019 
0020 /// TrieRawHashMap - is a lock-free thread-safe trie that is can be used to
0021 /// store/index data based on a hash value. It can be customized to work with
0022 /// any hash algorithm or store any data.
0023 ///
0024 /// Data structure:
0025 /// Data node stored in the Trie contains both hash and data:
0026 /// struct {
0027 ///    HashT Hash;
0028 ///    DataT Data;
0029 /// };
0030 ///
0031 /// Data is stored/indexed via a prefix tree, where each node in the tree can be
0032 /// either the root, a sub-trie or a data node. Assuming a 4-bit hash and two
0033 /// data objects {0001, A} and {0100, B}, it can be stored in a trie
0034 /// (assuming Root has 2 bits, SubTrie has 1 bit):
0035 ///  +--------+
0036 ///  |Root[00]| -> {0001, A}
0037 ///  |    [01]| -> {0100, B}
0038 ///  |    [10]| (empty)
0039 ///  |    [11]| (empty)
0040 ///  +--------+
0041 ///
0042 /// Inserting a new object {0010, C} will result in:
0043 ///  +--------+    +----------+
0044 ///  |Root[00]| -> |SubTrie[0]| -> {0001, A}
0045 ///  |        |    |       [1]| -> {0010, C}
0046 ///  |        |    +----------+
0047 ///  |    [01]| -> {0100, B}
0048 ///  |    [10]| (empty)
0049 ///  |    [11]| (empty)
0050 ///  +--------+
0051 /// Note object A is sunk down to a sub-trie during the insertion. All the
0052 /// nodes are inserted through compare-exchange to ensure thread-safe and
0053 /// lock-free.
0054 ///
0055 /// To find an object in the trie, walk the tree with prefix of the hash until
0056 /// the data node is found. Then the hash is compared with the hash stored in
0057 /// the data node to see if the is the same object.
0058 ///
0059 /// Hash collision is not allowed so it is recommended to use trie with a
0060 /// "strong" hashing algorithm. A well-distributed hash can also result in
0061 /// better performance and memory usage.
0062 ///
0063 /// It currently does not support iteration and deletion.
0064 
0065 /// Base class for a lock-free thread-safe hash-mapped trie.
0066 class ThreadSafeTrieRawHashMapBase {
0067 public:
0068   static constexpr size_t TrieContentBaseSize = 4;
0069   static constexpr size_t DefaultNumRootBits = 6;
0070   static constexpr size_t DefaultNumSubtrieBits = 4;
0071 
0072 private:
0073   template <class T> struct AllocValueType {
0074     char Base[TrieContentBaseSize];
0075     std::aligned_union_t<sizeof(T), T> Content;
0076   };
0077 
0078 protected:
0079   template <class T>
0080   static constexpr size_t DefaultContentAllocSize = sizeof(AllocValueType<T>);
0081 
0082   template <class T>
0083   static constexpr size_t DefaultContentAllocAlign = alignof(AllocValueType<T>);
0084 
0085   template <class T>
0086   static constexpr size_t DefaultContentOffset =
0087       offsetof(AllocValueType<T>, Content);
0088 
0089 public:
0090   static void *operator new(size_t Size) { return ::operator new(Size); }
0091   void operator delete(void *Ptr) { ::operator delete(Ptr); }
0092 
0093   LLVM_DUMP_METHOD void dump() const;
0094   void print(raw_ostream &OS) const;
0095 
0096 protected:
0097   /// Result of a lookup. Suitable for an insertion hint. Maybe could be
0098   /// expanded into an iterator of sorts, but likely not useful (visiting
0099   /// everything in the trie should probably be done some way other than
0100   /// through an iterator pattern).
0101   class PointerBase {
0102   protected:
0103     void *get() const { return I == -2u ? P : nullptr; }
0104 
0105   public:
0106     PointerBase() noexcept = default;
0107 
0108   private:
0109     friend class ThreadSafeTrieRawHashMapBase;
0110     explicit PointerBase(void *Content) : P(Content), I(-2u) {}
0111     PointerBase(void *P, unsigned I, unsigned B) : P(P), I(I), B(B) {}
0112 
0113     bool isHint() const { return I != -1u && I != -2u; }
0114 
0115     void *P = nullptr;
0116     unsigned I = -1u;
0117     unsigned B = 0;
0118   };
0119 
0120   /// Find the stored content with hash.
0121   PointerBase find(ArrayRef<uint8_t> Hash) const;
0122 
0123   /// Insert and return the stored content.
0124   PointerBase
0125   insert(PointerBase Hint, ArrayRef<uint8_t> Hash,
0126          function_ref<const uint8_t *(void *Mem, ArrayRef<uint8_t> Hash)>
0127              Constructor);
0128 
0129   ThreadSafeTrieRawHashMapBase() = delete;
0130 
0131   ThreadSafeTrieRawHashMapBase(
0132       size_t ContentAllocSize, size_t ContentAllocAlign, size_t ContentOffset,
0133       std::optional<size_t> NumRootBits = std::nullopt,
0134       std::optional<size_t> NumSubtrieBits = std::nullopt);
0135 
0136   /// Destructor, which asserts if there's anything to do. Subclasses should
0137   /// call \a destroyImpl().
0138   ///
0139   /// \pre \a destroyImpl() was already called.
0140   ~ThreadSafeTrieRawHashMapBase();
0141   void destroyImpl(function_ref<void(void *ValueMem)> Destructor);
0142 
0143   ThreadSafeTrieRawHashMapBase(ThreadSafeTrieRawHashMapBase &&RHS);
0144 
0145   // Move assignment is not supported as it is not thread-safe.
0146   ThreadSafeTrieRawHashMapBase &
0147   operator=(ThreadSafeTrieRawHashMapBase &&RHS) = delete;
0148 
0149   // No copy.
0150   ThreadSafeTrieRawHashMapBase(const ThreadSafeTrieRawHashMapBase &) = delete;
0151   ThreadSafeTrieRawHashMapBase &
0152   operator=(const ThreadSafeTrieRawHashMapBase &) = delete;
0153 
0154   // Debug functions. Implementation details and not guaranteed to be
0155   // thread-safe.
0156   PointerBase getRoot() const;
0157   unsigned getStartBit(PointerBase P) const;
0158   unsigned getNumBits(PointerBase P) const;
0159   unsigned getNumSlotUsed(PointerBase P) const;
0160   std::string getTriePrefixAsString(PointerBase P) const;
0161   unsigned getNumTries() const;
0162   // Visit next trie in the allocation chain.
0163   PointerBase getNextTrie(PointerBase P) const;
0164 
0165 private:
0166   friend class TrieRawHashMapTestHelper;
0167   const unsigned short ContentAllocSize;
0168   const unsigned short ContentAllocAlign;
0169   const unsigned short ContentOffset;
0170   unsigned short NumRootBits;
0171   unsigned short NumSubtrieBits;
0172   class ImplType;
0173   // ImplPtr is owned by ThreadSafeTrieRawHashMapBase and needs to be freed in
0174   // destroyImpl.
0175   std::atomic<ImplType *> ImplPtr;
0176   ImplType &getOrCreateImpl();
0177   ImplType *getImpl() const;
0178 };
0179 
0180 /// Lock-free thread-safe hash-mapped trie.
0181 template <class T, size_t NumHashBytes>
0182 class ThreadSafeTrieRawHashMap : public ThreadSafeTrieRawHashMapBase {
0183 public:
0184   using HashT = std::array<uint8_t, NumHashBytes>;
0185 
0186   class LazyValueConstructor;
0187   struct value_type {
0188     const HashT Hash;
0189     T Data;
0190 
0191     value_type(value_type &&) = default;
0192     value_type(const value_type &) = default;
0193 
0194     value_type(ArrayRef<uint8_t> Hash, const T &Data)
0195         : Hash(makeHash(Hash)), Data(Data) {}
0196     value_type(ArrayRef<uint8_t> Hash, T &&Data)
0197         : Hash(makeHash(Hash)), Data(std::move(Data)) {}
0198 
0199   private:
0200     friend class LazyValueConstructor;
0201 
0202     struct EmplaceTag {};
0203     template <class... ArgsT>
0204     value_type(ArrayRef<uint8_t> Hash, EmplaceTag, ArgsT &&...Args)
0205         : Hash(makeHash(Hash)), Data(std::forward<ArgsT>(Args)...) {}
0206 
0207     static HashT makeHash(ArrayRef<uint8_t> HashRef) {
0208       HashT Hash;
0209       std::copy(HashRef.begin(), HashRef.end(), Hash.data());
0210       return Hash;
0211     }
0212   };
0213 
0214   using ThreadSafeTrieRawHashMapBase::operator delete;
0215   using HashType = HashT;
0216 
0217   using ThreadSafeTrieRawHashMapBase::dump;
0218   using ThreadSafeTrieRawHashMapBase::print;
0219 
0220 private:
0221   template <class ValueT> class PointerImpl : PointerBase {
0222     friend class ThreadSafeTrieRawHashMap;
0223 
0224     ValueT *get() const {
0225       return reinterpret_cast<ValueT *>(PointerBase::get());
0226     }
0227 
0228   public:
0229     ValueT &operator*() const {
0230       assert(get());
0231       return *get();
0232     }
0233     ValueT *operator->() const {
0234       assert(get());
0235       return get();
0236     }
0237     explicit operator bool() const { return get(); }
0238 
0239     PointerImpl() = default;
0240 
0241   protected:
0242     PointerImpl(PointerBase Result) : PointerBase(Result) {}
0243   };
0244 
0245 public:
0246   class pointer;
0247   class const_pointer;
0248   class pointer : public PointerImpl<value_type> {
0249     friend class ThreadSafeTrieRawHashMap;
0250     friend class const_pointer;
0251 
0252   public:
0253     pointer() = default;
0254 
0255   private:
0256     pointer(PointerBase Result) : pointer::PointerImpl(Result) {}
0257   };
0258 
0259   class const_pointer : public PointerImpl<const value_type> {
0260     friend class ThreadSafeTrieRawHashMap;
0261 
0262   public:
0263     const_pointer() = default;
0264     const_pointer(const pointer &P) : const_pointer::PointerImpl(P) {}
0265 
0266   private:
0267     const_pointer(PointerBase Result) : const_pointer::PointerImpl(Result) {}
0268   };
0269 
0270   class LazyValueConstructor {
0271   public:
0272     value_type &operator()(T &&RHS) {
0273       assert(Mem && "Constructor already called, or moved away");
0274       return assign(::new (Mem) value_type(Hash, std::move(RHS)));
0275     }
0276     value_type &operator()(const T &RHS) {
0277       assert(Mem && "Constructor already called, or moved away");
0278       return assign(::new (Mem) value_type(Hash, RHS));
0279     }
0280     template <class... ArgsT> value_type &emplace(ArgsT &&...Args) {
0281       assert(Mem && "Constructor already called, or moved away");
0282       return assign(::new (Mem)
0283                         value_type(Hash, typename value_type::EmplaceTag{},
0284                                    std::forward<ArgsT>(Args)...));
0285     }
0286 
0287     LazyValueConstructor(LazyValueConstructor &&RHS)
0288         : Mem(RHS.Mem), Result(RHS.Result), Hash(RHS.Hash) {
0289       RHS.Mem = nullptr; // Moved away, cannot call.
0290     }
0291     ~LazyValueConstructor() { assert(!Mem && "Constructor never called!"); }
0292 
0293   private:
0294     value_type &assign(value_type *V) {
0295       Mem = nullptr;
0296       Result = V;
0297       return *V;
0298     }
0299     friend class ThreadSafeTrieRawHashMap;
0300     LazyValueConstructor() = delete;
0301     LazyValueConstructor(void *Mem, value_type *&Result, ArrayRef<uint8_t> Hash)
0302         : Mem(Mem), Result(Result), Hash(Hash) {
0303       assert(Hash.size() == sizeof(HashT) && "Invalid hash");
0304       assert(Mem && "Invalid memory for construction");
0305     }
0306     void *Mem;
0307     value_type *&Result;
0308     ArrayRef<uint8_t> Hash;
0309   };
0310 
0311   /// Insert with a hint. Default-constructed hint will work, but it's
0312   /// recommended to start with a lookup to avoid overhead in object creation
0313   /// if it already exists.
0314   pointer insertLazy(const_pointer Hint, ArrayRef<uint8_t> Hash,
0315                      function_ref<void(LazyValueConstructor)> OnConstruct) {
0316     return pointer(ThreadSafeTrieRawHashMapBase::insert(
0317         Hint, Hash, [&](void *Mem, ArrayRef<uint8_t> Hash) {
0318           value_type *Result = nullptr;
0319           OnConstruct(LazyValueConstructor(Mem, Result, Hash));
0320           return Result->Hash.data();
0321         }));
0322   }
0323 
0324   pointer insertLazy(ArrayRef<uint8_t> Hash,
0325                      function_ref<void(LazyValueConstructor)> OnConstruct) {
0326     return insertLazy(const_pointer(), Hash, OnConstruct);
0327   }
0328 
0329   pointer insert(const_pointer Hint, value_type &&HashedData) {
0330     return insertLazy(Hint, HashedData.Hash, [&](LazyValueConstructor C) {
0331       C(std::move(HashedData.Data));
0332     });
0333   }
0334 
0335   pointer insert(const_pointer Hint, const value_type &HashedData) {
0336     return insertLazy(Hint, HashedData.Hash,
0337                       [&](LazyValueConstructor C) { C(HashedData.Data); });
0338   }
0339 
0340   pointer find(ArrayRef<uint8_t> Hash) {
0341     assert(Hash.size() == std::tuple_size<HashT>::value);
0342     return ThreadSafeTrieRawHashMapBase::find(Hash);
0343   }
0344 
0345   const_pointer find(ArrayRef<uint8_t> Hash) const {
0346     assert(Hash.size() == std::tuple_size<HashT>::value);
0347     return ThreadSafeTrieRawHashMapBase::find(Hash);
0348   }
0349 
0350   ThreadSafeTrieRawHashMap(std::optional<size_t> NumRootBits = std::nullopt,
0351                            std::optional<size_t> NumSubtrieBits = std::nullopt)
0352       : ThreadSafeTrieRawHashMapBase(DefaultContentAllocSize<value_type>,
0353                                      DefaultContentAllocAlign<value_type>,
0354                                      DefaultContentOffset<value_type>,
0355                                      NumRootBits, NumSubtrieBits) {}
0356 
0357   ~ThreadSafeTrieRawHashMap() {
0358     if constexpr (std::is_trivially_destructible<value_type>::value)
0359       this->destroyImpl(nullptr);
0360     else
0361       this->destroyImpl(
0362           [](void *P) { static_cast<value_type *>(P)->~value_type(); });
0363   }
0364 
0365   // Move constructor okay.
0366   ThreadSafeTrieRawHashMap(ThreadSafeTrieRawHashMap &&) = default;
0367 
0368   // No move assignment or any copy.
0369   ThreadSafeTrieRawHashMap &operator=(ThreadSafeTrieRawHashMap &&) = delete;
0370   ThreadSafeTrieRawHashMap(const ThreadSafeTrieRawHashMap &) = delete;
0371   ThreadSafeTrieRawHashMap &
0372   operator=(const ThreadSafeTrieRawHashMap &) = delete;
0373 };
0374 
0375 } // namespace llvm
0376 
0377 #endif // LLVM_ADT_TRIERAWHASHMAP_H