Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:44:31

0001 //===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 // This file implements an interface allowing to conveniently build hashes of
0010 // various data types, without relying on the underlying hasher type to know
0011 // about hashed data types.
0012 //
0013 //===----------------------------------------------------------------------===//
0014 
0015 #ifndef LLVM_SUPPORT_HASHBUILDER_H
0016 #define LLVM_SUPPORT_HASHBUILDER_H
0017 
0018 #include "llvm/ADT/ArrayRef.h"
0019 #include "llvm/ADT/Hashing.h"
0020 #include "llvm/ADT/STLExtras.h"
0021 #include "llvm/ADT/StringRef.h"
0022 #include "llvm/Support/Endian.h"
0023 #include "llvm/Support/type_traits.h"
0024 
0025 #include <iterator>
0026 #include <optional>
0027 #include <utility>
0028 
0029 namespace llvm {
0030 
0031 namespace hashbuilder_detail {
0032 /// Trait to indicate whether a type's bits can be hashed directly (after
0033 /// endianness correction).
0034 template <typename U>
0035 struct IsHashableData
0036     : std::integral_constant<bool, is_integral_or_enum<U>::value> {};
0037 
0038 } // namespace hashbuilder_detail
0039 
0040 /// Declares the hasher member, and functions forwarding directly to the hasher.
0041 template <typename HasherT> class HashBuilderBase {
0042 public:
0043   template <typename HasherT_ = HasherT>
0044   using HashResultTy = decltype(std::declval<HasherT_ &>().final());
0045 
0046   HasherT &getHasher() { return Hasher; }
0047 
0048   /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
0049   ///
0050   /// This may not take the size of `Data` into account.
0051   /// Users of this function should pay attention to respect endianness
0052   /// contraints.
0053   void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); }
0054 
0055   /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
0056   ///
0057   /// This may not take the size of `Data` into account.
0058   /// Users of this function should pay attention to respect endianness
0059   /// contraints.
0060   void update(StringRef Data) {
0061     update(
0062         ArrayRef(reinterpret_cast<const uint8_t *>(Data.data()), Data.size()));
0063   }
0064 
0065   /// Forward to `HasherT::final()` if available.
0066   template <typename HasherT_ = HasherT> HashResultTy<HasherT_> final() {
0067     return this->getHasher().final();
0068   }
0069 
0070   /// Forward to `HasherT::result()` if available.
0071   template <typename HasherT_ = HasherT> HashResultTy<HasherT_> result() {
0072     return this->getHasher().result();
0073   }
0074 
0075 protected:
0076   explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {}
0077 
0078   template <typename... ArgTypes>
0079   explicit HashBuilderBase(ArgTypes &&...Args)
0080       : OptionalHasher(std::in_place, std::forward<ArgTypes>(Args)...),
0081         Hasher(*OptionalHasher) {}
0082 
0083 private:
0084   std::optional<HasherT> OptionalHasher;
0085   HasherT &Hasher;
0086 };
0087 
0088 /// Interface to help hash various types through a hasher type.
0089 ///
0090 /// Via provided specializations of `add`, `addRange`, and `addRangeElements`
0091 /// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed
0092 /// without requiring any knowledge of hashed types from the hasher type.
0093 ///
0094 /// The only method expected from the templated hasher type `HasherT` is:
0095 /// * void update(ArrayRef<uint8_t> Data)
0096 ///
0097 /// Additionally, the following methods will be forwarded to the hasher type:
0098 /// * decltype(std::declval<HasherT &>().final()) final()
0099 /// * decltype(std::declval<HasherT &>().result()) result()
0100 ///
0101 /// From a user point of view, the interface provides the following:
0102 /// * `template<typename T> add(const T &Value)`
0103 ///   The `add` function implements hashing of various types.
0104 /// * `template <typename ItT> void addRange(ItT First, ItT Last)`
0105 ///   The `addRange` function is designed to aid hashing a range of values.
0106 ///   It explicitly adds the size of the range in the hash.
0107 /// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)`
0108 ///   The `addRangeElements` function is also designed to aid hashing a range of
0109 ///   values. In contrast to `addRange`, it **ignores** the size of the range,
0110 ///   behaving as if elements were added one at a time with `add`.
0111 ///
0112 /// User-defined `struct` types can participate in this interface by providing
0113 /// an `addHash` templated function. See the associated template specialization
0114 /// for details.
0115 ///
0116 /// This interface does not impose requirements on the hasher
0117 /// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for
0118 /// variable-size types; for example for
0119 /// ```
0120 /// builder.add({1});
0121 /// builder.add({2, 3});
0122 /// ```
0123 /// and
0124 /// ```
0125 /// builder.add({1, 2});
0126 /// builder.add({3});
0127 /// ```
0128 /// . Thus, specializations of `add` and `addHash` for variable-size types must
0129 /// not assume that the hasher type considers the size as part of the hash; they
0130 /// must explicitly add the size to the hash. See for example specializations
0131 /// for `ArrayRef` and `StringRef`.
0132 ///
0133 /// Additionally, since types are eventually forwarded to the hasher's
0134 /// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash
0135 /// computation (for example when computing `add((int)123)`).
0136 /// Specifiying a non-`native` `Endianness` template parameter allows to compute
0137 /// stable hash across platforms with different endianness.
0138 template <typename HasherT, llvm::endianness Endianness>
0139 class HashBuilder : public HashBuilderBase<HasherT> {
0140 public:
0141   explicit HashBuilder(HasherT &Hasher) : HashBuilderBase<HasherT>(Hasher) {}
0142   template <typename... ArgTypes>
0143   explicit HashBuilder(ArgTypes &&...Args)
0144       : HashBuilderBase<HasherT>(Args...) {}
0145 
0146   /// Implement hashing for hashable data types, e.g. integral or enum values.
0147   template <typename T>
0148   std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value, HashBuilder &>
0149   add(T Value) {
0150     return adjustForEndiannessAndAdd(Value);
0151   }
0152 
0153   /// Support hashing `ArrayRef`.
0154   ///
0155   /// `Value.size()` is taken into account to ensure cases like
0156   /// ```
0157   /// builder.add({1});
0158   /// builder.add({2, 3});
0159   /// ```
0160   /// and
0161   /// ```
0162   /// builder.add({1, 2});
0163   /// builder.add({3});
0164   /// ```
0165   /// do not collide.
0166   template <typename T> HashBuilder &add(ArrayRef<T> Value) {
0167     // As of implementation time, simply calling `addRange(Value)` would also go
0168     // through the `update` fast path. But that would rely on the implementation
0169     // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call
0170     // `update` to guarantee the fast path.
0171     add(Value.size());
0172     if (hashbuilder_detail::IsHashableData<T>::value &&
0173         Endianness == llvm::endianness::native) {
0174       this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
0175                             Value.size() * sizeof(T)));
0176     } else {
0177       for (auto &V : Value)
0178         add(V);
0179     }
0180     return *this;
0181   }
0182 
0183   /// Support hashing `StringRef`.
0184   ///
0185   /// `Value.size()` is taken into account to ensure cases like
0186   /// ```
0187   /// builder.add("a");
0188   /// builder.add("bc");
0189   /// ```
0190   /// and
0191   /// ```
0192   /// builder.add("ab");
0193   /// builder.add("c");
0194   /// ```
0195   /// do not collide.
0196   HashBuilder &add(StringRef Value) {
0197     // As of implementation time, simply calling `addRange(Value)` would also go
0198     // through `update`. But that would rely on the implementation of
0199     // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to
0200     // guarantee the fast path.
0201     add(Value.size());
0202     this->update(ArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
0203                           Value.size()));
0204     return *this;
0205   }
0206 
0207   template <typename T>
0208   using HasAddHashT =
0209       decltype(addHash(std::declval<HashBuilder &>(), std::declval<T &>()));
0210   /// Implement hashing for user-defined `struct`s.
0211   ///
0212   /// Any user-define `struct` can participate in hashing via `HashBuilder` by
0213   /// providing a `addHash` templated function.
0214   ///
0215   /// ```
0216   /// template <typename HasherT, llvm::endianness Endianness>
0217   /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder,
0218   ///              const UserDefinedStruct &Value);
0219   /// ```
0220   ///
0221   /// For example:
0222   /// ```
0223   /// struct SimpleStruct {
0224   ///   char c;
0225   ///   int i;
0226   /// };
0227   ///
0228   /// template <typename HasherT, llvm::endianness Endianness>
0229   /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder,
0230   ///              const SimpleStruct &Value) {
0231   ///   HBuilder.add(Value.c);
0232   ///   HBuilder.add(Value.i);
0233   /// }
0234   /// ```
0235   ///
0236   /// To avoid endianness issues, specializations of `addHash` should
0237   /// generally rely on exising `add`, `addRange`, and `addRangeElements`
0238   /// functions. If directly using `update`, an implementation must correctly
0239   /// handle endianness.
0240   ///
0241   /// ```
0242   /// struct __attribute__ ((packed)) StructWithFastHash {
0243   ///   int I;
0244   ///   char C;
0245   ///
0246   ///   // If possible, we want to hash both `I` and `C` in a single
0247   ///   // `update` call for performance concerns.
0248   ///   template <typename HasherT, llvm::endianness Endianness>
0249   ///   friend void addHash(HashBuilder<HasherT, Endianness> &HBuilder,
0250   ///                       const StructWithFastHash &Value) {
0251   ///     if (Endianness == llvm::endianness::native) {
0252   ///       HBuilder.update(ArrayRef(
0253   ///           reinterpret_cast<const uint8_t *>(&Value), sizeof(Value)));
0254   ///     } else {
0255   ///       // Rely on existing `add` methods to handle endianness.
0256   ///       HBuilder.add(Value.I);
0257   ///       HBuilder.add(Value.C);
0258   ///     }
0259   ///   }
0260   /// };
0261   /// ```
0262   ///
0263   /// To avoid collisions, specialization of `addHash` for variable-size
0264   /// types must take the size into account.
0265   ///
0266   /// For example:
0267   /// ```
0268   /// struct CustomContainer {
0269   /// private:
0270   ///   size_t Size;
0271   ///   int Elements[100];
0272   ///
0273   /// public:
0274   ///   CustomContainer(size_t Size) : Size(Size) {
0275   ///     for (size_t I = 0; I != Size; ++I)
0276   ///       Elements[I] = I;
0277   ///   }
0278   ///   template <typename HasherT, llvm::endianness Endianness>
0279   ///   friend void addHash(HashBuilder<HasherT, Endianness> &HBuilder,
0280   ///                       const CustomContainer &Value) {
0281   ///     if (Endianness == llvm::endianness::native) {
0282   ///       HBuilder.update(ArrayRef(
0283   ///           reinterpret_cast<const uint8_t *>(&Value.Size),
0284   ///           sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0])));
0285   ///     } else {
0286   ///       // `addRange` will take care of encoding the size.
0287   ///       HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] +
0288   ///       Value.Size);
0289   ///     }
0290   ///   }
0291   /// };
0292   /// ```
0293   template <typename T>
0294   std::enable_if_t<is_detected<HasAddHashT, T>::value &&
0295                        !hashbuilder_detail::IsHashableData<T>::value,
0296                    HashBuilder &>
0297   add(const T &Value) {
0298     addHash(*this, Value);
0299     return *this;
0300   }
0301 
0302   template <typename T1, typename T2>
0303   HashBuilder &add(const std::pair<T1, T2> &Value) {
0304     return add(Value.first, Value.second);
0305   }
0306 
0307   template <typename... Ts> HashBuilder &add(const std::tuple<Ts...> &Arg) {
0308     std::apply([this](const auto &...Args) { this->add(Args...); }, Arg);
0309     return *this;
0310   }
0311 
0312   /// A convenenience variadic helper.
0313   /// It simply iterates over its arguments, in order.
0314   /// ```
0315   /// add(Arg1, Arg2);
0316   /// ```
0317   /// is equivalent to
0318   /// ```
0319   /// add(Arg1)
0320   /// add(Arg2)
0321   /// ```
0322   template <typename... Ts>
0323   std::enable_if_t<(sizeof...(Ts) > 1), HashBuilder &> add(const Ts &...Args) {
0324     return (add(Args), ...);
0325   }
0326 
0327   template <typename ForwardIteratorT>
0328   HashBuilder &addRange(ForwardIteratorT First, ForwardIteratorT Last) {
0329     add(std::distance(First, Last));
0330     return addRangeElements(First, Last);
0331   }
0332 
0333   template <typename RangeT> HashBuilder &addRange(const RangeT &Range) {
0334     return addRange(adl_begin(Range), adl_end(Range));
0335   }
0336 
0337   template <typename ForwardIteratorT>
0338   HashBuilder &addRangeElements(ForwardIteratorT First, ForwardIteratorT Last) {
0339     return addRangeElementsImpl(
0340         First, Last,
0341         typename std::iterator_traits<ForwardIteratorT>::iterator_category());
0342   }
0343 
0344   template <typename RangeT>
0345   HashBuilder &addRangeElements(const RangeT &Range) {
0346     return addRangeElements(adl_begin(Range), adl_end(Range));
0347   }
0348 
0349   template <typename T>
0350   using HasByteSwapT = decltype(support::endian::byte_swap(
0351       std::declval<T &>(), llvm::endianness::little));
0352   /// Adjust `Value` for the target endianness and add it to the hash.
0353   template <typename T>
0354   std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilder &>
0355   adjustForEndiannessAndAdd(const T &Value) {
0356     T SwappedValue = support::endian::byte_swap(Value, Endianness);
0357     this->update(ArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue),
0358                           sizeof(SwappedValue)));
0359     return *this;
0360   }
0361 
0362 private:
0363   // FIXME: Once available, specialize this function for `contiguous_iterator`s,
0364   // and use it for `ArrayRef` and `StringRef`.
0365   template <typename ForwardIteratorT>
0366   HashBuilder &addRangeElementsImpl(ForwardIteratorT First,
0367                                     ForwardIteratorT Last,
0368                                     std::forward_iterator_tag) {
0369     for (auto It = First; It != Last; ++It)
0370       add(*It);
0371     return *this;
0372   }
0373 
0374   template <typename T>
0375   std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value &&
0376                        Endianness == llvm::endianness::native,
0377                    HashBuilder &>
0378   addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) {
0379     this->update(ArrayRef(reinterpret_cast<const uint8_t *>(First),
0380                           (Last - First) * sizeof(T)));
0381     return *this;
0382   }
0383 };
0384 
0385 namespace hashbuilder_detail {
0386 class HashCodeHasher {
0387 public:
0388   HashCodeHasher() : Code(0) {}
0389   void update(ArrayRef<uint8_t> Data) {
0390     hash_code DataCode = hash_value(Data);
0391     Code = hash_combine(Code, DataCode);
0392   }
0393   hash_code Code;
0394 };
0395 
0396 using HashCodeHashBuilder =
0397     HashBuilder<hashbuilder_detail::HashCodeHasher, llvm::endianness::native>;
0398 } // namespace hashbuilder_detail
0399 
0400 /// Provide a default implementation of `hash_value` when `addHash(const T &)`
0401 /// is supported.
0402 template <typename T>
0403 std::enable_if_t<
0404     is_detected<hashbuilder_detail::HashCodeHashBuilder::HasAddHashT, T>::value,
0405     hash_code>
0406 hash_value(const T &Value) {
0407   hashbuilder_detail::HashCodeHashBuilder HBuilder;
0408   HBuilder.add(Value);
0409   return HBuilder.getHasher().Code;
0410 }
0411 } // end namespace llvm
0412 
0413 #endif // LLVM_SUPPORT_HASHBUILDER_H