Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:43:09

0001 //===- llvm/ADT/SparseSet.h - Sparse set ------------------------*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 ///
0009 /// \file
0010 /// This file defines the SparseSet class derived from the version described in
0011 /// Briggs, Torczon, "An efficient representation for sparse sets", ACM Letters
0012 /// on Programming Languages and Systems, Volume 2 Issue 1-4, March-Dec.  1993.
0013 ///
0014 /// A sparse set holds a small number of objects identified by integer keys from
0015 /// a moderately sized universe. The sparse set uses more memory than other
0016 /// containers in order to provide faster operations.
0017 ///
0018 //===----------------------------------------------------------------------===//
0019 
0020 #ifndef LLVM_ADT_SPARSESET_H
0021 #define LLVM_ADT_SPARSESET_H
0022 
0023 #include "llvm/ADT/identity.h"
0024 #include "llvm/ADT/SmallVector.h"
0025 #include "llvm/Support/AllocatorBase.h"
0026 #include <cassert>
0027 #include <cstdint>
0028 #include <cstdlib>
0029 #include <limits>
0030 #include <utility>
0031 
0032 namespace llvm {
0033 
0034 /// SparseSetValTraits - Objects in a SparseSet are identified by keys that can
0035 /// be uniquely converted to a small integer less than the set's universe. This
0036 /// class allows the set to hold values that differ from the set's key type as
0037 /// long as an index can still be derived from the value. SparseSet never
0038 /// directly compares ValueT, only their indices, so it can map keys to
0039 /// arbitrary values. SparseSetValTraits computes the index from the value
0040 /// object. To compute the index from a key, SparseSet uses a separate
0041 /// KeyFunctorT template argument.
0042 ///
0043 /// A simple type declaration, SparseSet<Type>, handles these cases:
0044 /// - unsigned key, identity index, identity value
0045 /// - unsigned key, identity index, fat value providing getSparseSetIndex()
0046 ///
0047 /// The type declaration SparseSet<Type, UnaryFunction> handles:
0048 /// - unsigned key, remapped index, identity value (virtual registers)
0049 /// - pointer key, pointer-derived index, identity value (node+ID)
0050 /// - pointer key, pointer-derived index, fat value with getSparseSetIndex()
0051 ///
0052 /// Only other, unexpected cases require specializing SparseSetValTraits.
0053 ///
0054 /// For best results, ValueT should not require a destructor.
0055 ///
0056 template<typename ValueT>
0057 struct SparseSetValTraits {
0058   static unsigned getValIndex(const ValueT &Val) {
0059     return Val.getSparseSetIndex();
0060   }
0061 };
0062 
0063 /// SparseSetValFunctor - Helper class for selecting SparseSetValTraits. The
0064 /// generic implementation handles ValueT classes which either provide
0065 /// getSparseSetIndex() or specialize SparseSetValTraits<>.
0066 ///
0067 template<typename KeyT, typename ValueT, typename KeyFunctorT>
0068 struct SparseSetValFunctor {
0069   unsigned operator()(const ValueT &Val) const {
0070     return SparseSetValTraits<ValueT>::getValIndex(Val);
0071   }
0072 };
0073 
0074 /// SparseSetValFunctor<KeyT, KeyT> - Helper class for the common case of
0075 /// identity key/value sets.
0076 template<typename KeyT, typename KeyFunctorT>
0077 struct SparseSetValFunctor<KeyT, KeyT, KeyFunctorT> {
0078   unsigned operator()(const KeyT &Key) const {
0079     return KeyFunctorT()(Key);
0080   }
0081 };
0082 
0083 /// SparseSet - Fast set implementation for objects that can be identified by
0084 /// small unsigned keys.
0085 ///
0086 /// SparseSet allocates memory proportional to the size of the key universe, so
0087 /// it is not recommended for building composite data structures.  It is useful
0088 /// for algorithms that require a single set with fast operations.
0089 ///
0090 /// Compared to DenseSet and DenseMap, SparseSet provides constant-time fast
0091 /// clear() and iteration as fast as a vector.  The find(), insert(), and
0092 /// erase() operations are all constant time, and typically faster than a hash
0093 /// table.  The iteration order doesn't depend on numerical key values, it only
0094 /// depends on the order of insert() and erase() operations.  When no elements
0095 /// have been erased, the iteration order is the insertion order.
0096 ///
0097 /// Compared to BitVector, SparseSet<unsigned> uses 8x-40x more memory, but
0098 /// offers constant-time clear() and size() operations as well as fast
0099 /// iteration independent on the size of the universe.
0100 ///
0101 /// SparseSet contains a dense vector holding all the objects and a sparse
0102 /// array holding indexes into the dense vector.  Most of the memory is used by
0103 /// the sparse array which is the size of the key universe.  The SparseT
0104 /// template parameter provides a space/speed tradeoff for sets holding many
0105 /// elements.
0106 ///
0107 /// When SparseT is uint32_t, find() only touches 2 cache lines, but the sparse
0108 /// array uses 4 x Universe bytes.
0109 ///
0110 /// When SparseT is uint8_t (the default), find() touches up to 2+[N/256] cache
0111 /// lines, but the sparse array is 4x smaller.  N is the number of elements in
0112 /// the set.
0113 ///
0114 /// For sets that may grow to thousands of elements, SparseT should be set to
0115 /// uint16_t or uint32_t.
0116 ///
0117 /// @tparam ValueT      The type of objects in the set.
0118 /// @tparam KeyFunctorT A functor that computes an unsigned index from KeyT.
0119 /// @tparam SparseT     An unsigned integer type. See above.
0120 ///
0121 template<typename ValueT,
0122          typename KeyFunctorT = identity<unsigned>,
0123          typename SparseT = uint8_t>
0124 class SparseSet {
0125   static_assert(std::is_unsigned_v<SparseT>,
0126                 "SparseT must be an unsigned integer type");
0127 
0128   using KeyT = typename KeyFunctorT::argument_type;
0129   using DenseT = SmallVector<ValueT, 8>;
0130   using size_type = unsigned;
0131   DenseT Dense;
0132 
0133   struct Deleter {
0134     void operator()(SparseT *S) { free(S); }
0135   };
0136   std::unique_ptr<SparseT[], Deleter> Sparse;
0137 
0138   unsigned Universe = 0;
0139   KeyFunctorT KeyIndexOf;
0140   SparseSetValFunctor<KeyT, ValueT, KeyFunctorT> ValIndexOf;
0141 
0142 public:
0143   using value_type = ValueT;
0144   using reference = ValueT &;
0145   using const_reference = const ValueT &;
0146   using pointer = ValueT *;
0147   using const_pointer = const ValueT *;
0148 
0149   SparseSet() = default;
0150   SparseSet(const SparseSet &) = delete;
0151   SparseSet &operator=(const SparseSet &) = delete;
0152   SparseSet(SparseSet &&) = default;
0153 
0154   /// setUniverse - Set the universe size which determines the largest key the
0155   /// set can hold.  The universe must be sized before any elements can be
0156   /// added.
0157   ///
0158   /// @param U Universe size. All object keys must be less than U.
0159   ///
0160   void setUniverse(unsigned U) {
0161     // It's not hard to resize the universe on a non-empty set, but it doesn't
0162     // seem like a likely use case, so we can add that code when we need it.
0163     assert(empty() && "Can only resize universe on an empty map");
0164     // Hysteresis prevents needless reallocations.
0165     if (U >= Universe/4 && U <= Universe)
0166       return;
0167     // The Sparse array doesn't actually need to be initialized, so malloc
0168     // would be enough here, but that will cause tools like valgrind to
0169     // complain about branching on uninitialized data.
0170     Sparse.reset(static_cast<SparseT *>(safe_calloc(U, sizeof(SparseT))));
0171     Universe = U;
0172   }
0173 
0174   // Import trivial vector stuff from DenseT.
0175   using iterator = typename DenseT::iterator;
0176   using const_iterator = typename DenseT::const_iterator;
0177 
0178   const_iterator begin() const { return Dense.begin(); }
0179   const_iterator end() const { return Dense.end(); }
0180   iterator begin() { return Dense.begin(); }
0181   iterator end() { return Dense.end(); }
0182 
0183   /// empty - Returns true if the set is empty.
0184   ///
0185   /// This is not the same as BitVector::empty().
0186   ///
0187   bool empty() const { return Dense.empty(); }
0188 
0189   /// size - Returns the number of elements in the set.
0190   ///
0191   /// This is not the same as BitVector::size() which returns the size of the
0192   /// universe.
0193   ///
0194   size_type size() const { return Dense.size(); }
0195 
0196   /// clear - Clears the set.  This is a very fast constant time operation.
0197   ///
0198   void clear() {
0199     // Sparse does not need to be cleared, see find().
0200     Dense.clear();
0201   }
0202 
0203   /// findIndex - Find an element by its index.
0204   ///
0205   /// @param   Idx A valid index to find.
0206   /// @returns An iterator to the element identified by key, or end().
0207   ///
0208   iterator findIndex(unsigned Idx) {
0209     assert(Idx < Universe && "Key out of range");
0210     assert(Sparse != nullptr && "Invalid sparse type");
0211     const unsigned Stride = std::numeric_limits<SparseT>::max() + 1u;
0212     for (unsigned i = Sparse[Idx], e = size(); i < e; i += Stride) {
0213       const unsigned FoundIdx = ValIndexOf(Dense[i]);
0214       assert(FoundIdx < Universe && "Invalid key in set. Did object mutate?");
0215       if (Idx == FoundIdx)
0216         return begin() + i;
0217       // Stride is 0 when SparseT >= unsigned.  We don't need to loop.
0218       if (!Stride)
0219         break;
0220     }
0221     return end();
0222   }
0223 
0224   /// find - Find an element by its key.
0225   ///
0226   /// @param   Key A valid key to find.
0227   /// @returns An iterator to the element identified by key, or end().
0228   ///
0229   iterator find(const KeyT &Key) {
0230     return findIndex(KeyIndexOf(Key));
0231   }
0232 
0233   const_iterator find(const KeyT &Key) const {
0234     return const_cast<SparseSet*>(this)->findIndex(KeyIndexOf(Key));
0235   }
0236 
0237   /// Check if the set contains the given \c Key.
0238   ///
0239   /// @param Key A valid key to find.
0240   bool contains(const KeyT &Key) const { return find(Key) != end(); }
0241 
0242   /// count - Returns 1 if this set contains an element identified by Key,
0243   /// 0 otherwise.
0244   ///
0245   size_type count(const KeyT &Key) const { return contains(Key) ? 1 : 0; }
0246 
0247   /// insert - Attempts to insert a new element.
0248   ///
0249   /// If Val is successfully inserted, return (I, true), where I is an iterator
0250   /// pointing to the newly inserted element.
0251   ///
0252   /// If the set already contains an element with the same key as Val, return
0253   /// (I, false), where I is an iterator pointing to the existing element.
0254   ///
0255   /// Insertion invalidates all iterators.
0256   ///
0257   std::pair<iterator, bool> insert(const ValueT &Val) {
0258     unsigned Idx = ValIndexOf(Val);
0259     iterator I = findIndex(Idx);
0260     if (I != end())
0261       return std::make_pair(I, false);
0262     Sparse[Idx] = size();
0263     Dense.push_back(Val);
0264     return std::make_pair(end() - 1, true);
0265   }
0266 
0267   /// array subscript - If an element already exists with this key, return it.
0268   /// Otherwise, automatically construct a new value from Key, insert it,
0269   /// and return the newly inserted element.
0270   ValueT &operator[](const KeyT &Key) {
0271     return *insert(ValueT(Key)).first;
0272   }
0273 
0274   ValueT pop_back_val() {
0275     // Sparse does not need to be cleared, see find().
0276     return Dense.pop_back_val();
0277   }
0278 
0279   /// erase - Erases an existing element identified by a valid iterator.
0280   ///
0281   /// This invalidates all iterators, but erase() returns an iterator pointing
0282   /// to the next element.  This makes it possible to erase selected elements
0283   /// while iterating over the set:
0284   ///
0285   ///   for (SparseSet::iterator I = Set.begin(); I != Set.end();)
0286   ///     if (test(*I))
0287   ///       I = Set.erase(I);
0288   ///     else
0289   ///       ++I;
0290   ///
0291   /// Note that end() changes when elements are erased, unlike std::list.
0292   ///
0293   iterator erase(iterator I) {
0294     assert(unsigned(I - begin()) < size() && "Invalid iterator");
0295     if (I != end() - 1) {
0296       *I = Dense.back();
0297       unsigned BackIdx = ValIndexOf(Dense.back());
0298       assert(BackIdx < Universe && "Invalid key in set. Did object mutate?");
0299       Sparse[BackIdx] = I - begin();
0300     }
0301     // This depends on SmallVector::pop_back() not invalidating iterators.
0302     // std::vector::pop_back() doesn't give that guarantee.
0303     Dense.pop_back();
0304     return I;
0305   }
0306 
0307   /// erase - Erases an element identified by Key, if it exists.
0308   ///
0309   /// @param   Key The key identifying the element to erase.
0310   /// @returns True when an element was erased, false if no element was found.
0311   ///
0312   bool erase(const KeyT &Key) {
0313     iterator I = find(Key);
0314     if (I == end())
0315       return false;
0316     erase(I);
0317     return true;
0318   }
0319 };
0320 
0321 } // end namespace llvm
0322 
0323 #endif // LLVM_ADT_SPARSESET_H