Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:43:40

0001 //===- TypeHashing.h ---------------------------------------------*- C++-*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 
0009 #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
0010 #define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
0011 
0012 #include "llvm/ADT/ArrayRef.h"
0013 #include "llvm/ADT/Hashing.h"
0014 #include "llvm/ADT/StringRef.h"
0015 
0016 #include "llvm/DebugInfo/CodeView/CVRecord.h"
0017 #include "llvm/DebugInfo/CodeView/TypeCollection.h"
0018 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
0019 
0020 #include "llvm/Support/FormatProviders.h"
0021 
0022 #include <type_traits>
0023 
0024 namespace llvm {
0025 class raw_ostream;
0026 namespace codeview {
0027 
0028 /// A locally hashed type represents a straightforward hash code of a serialized
0029 /// record.  The record is simply serialized, and then the bytes are hashed by
0030 /// a standard algorithm.  This is sufficient for the case of de-duplicating
0031 /// records within a single sequence of types, because if two records both have
0032 /// a back-reference to the same type in the same stream, they will both have
0033 /// the same numeric value for the TypeIndex of the back reference.
0034 struct LocallyHashedType {
0035   hash_code Hash;
0036   ArrayRef<uint8_t> RecordData;
0037 
0038   /// Given a type, compute its local hash.
0039   static LocallyHashedType hashType(ArrayRef<uint8_t> RecordData);
0040 
0041   /// Given a sequence of types, compute all of the local hashes.
0042   template <typename Range>
0043   static std::vector<LocallyHashedType> hashTypes(Range &&Records) {
0044     std::vector<LocallyHashedType> Hashes;
0045     Hashes.reserve(std::distance(std::begin(Records), std::end(Records)));
0046     for (const auto &R : Records)
0047       Hashes.push_back(hashType(R));
0048 
0049     return Hashes;
0050   }
0051 
0052   static std::vector<LocallyHashedType>
0053   hashTypeCollection(TypeCollection &Types) {
0054     std::vector<LocallyHashedType> Hashes;
0055     Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) {
0056       Hashes.push_back(hashType(Type.RecordData));
0057     });
0058     return Hashes;
0059   }
0060 };
0061 
0062 enum class GlobalTypeHashAlg : uint16_t {
0063   SHA1 = 0, // standard 20-byte SHA1 hash
0064   SHA1_8,   // last 8-bytes of standard SHA1 hash
0065   BLAKE3,   // truncated 8-bytes BLAKE3
0066 };
0067 
0068 /// A globally hashed type represents a hash value that is sufficient to
0069 /// uniquely identify a record across multiple type streams or type sequences.
0070 /// This works by, for any given record A which references B, replacing the
0071 /// TypeIndex that refers to B with a previously-computed global hash for B.  As
0072 /// this is a recursive algorithm (e.g. the global hash of B also depends on the
0073 /// global hashes of the types that B refers to), a global hash can uniquely
0074 /// identify that A occurs in another stream that has a completely
0075 /// different graph structure.  Although the hash itself is slower to compute,
0076 /// probing is much faster with a globally hashed type, because the hash itself
0077 /// is considered "as good as" the original type.  Since type records can be
0078 /// quite large, this makes the equality comparison of the hash much faster than
0079 /// equality comparison of a full record.
0080 struct GloballyHashedType {
0081   GloballyHashedType() = default;
0082   GloballyHashedType(StringRef H)
0083       : GloballyHashedType(ArrayRef<uint8_t>(H.bytes_begin(), H.bytes_end())) {}
0084   GloballyHashedType(ArrayRef<uint8_t> H) {
0085     assert(H.size() == 8);
0086     ::memcpy(Hash.data(), H.data(), 8);
0087   }
0088   std::array<uint8_t, 8> Hash;
0089 
0090   bool empty() const { return *(const uint64_t*)Hash.data() == 0; }
0091 
0092   friend inline bool operator==(const GloballyHashedType &L,
0093                                 const GloballyHashedType &R) {
0094     return L.Hash == R.Hash;
0095   }
0096 
0097   friend inline bool operator!=(const GloballyHashedType &L,
0098                                 const GloballyHashedType &R) {
0099     return !(L.Hash == R.Hash);
0100   }
0101 
0102   /// Given a sequence of bytes representing a record, compute a global hash for
0103   /// this record.  Due to the nature of global hashes incorporating the hashes
0104   /// of referenced records, this function requires a list of types and ids
0105   /// that RecordData might reference, indexable by TypeIndex.
0106   static GloballyHashedType hashType(ArrayRef<uint8_t> RecordData,
0107                                      ArrayRef<GloballyHashedType> PreviousTypes,
0108                                      ArrayRef<GloballyHashedType> PreviousIds);
0109 
0110   /// Given a sequence of bytes representing a record, compute a global hash for
0111   /// this record.  Due to the nature of global hashes incorporating the hashes
0112   /// of referenced records, this function requires a list of types and ids
0113   /// that RecordData might reference, indexable by TypeIndex.
0114   static GloballyHashedType hashType(CVType Type,
0115                                      ArrayRef<GloballyHashedType> PreviousTypes,
0116                                      ArrayRef<GloballyHashedType> PreviousIds) {
0117     return hashType(Type.RecordData, PreviousTypes, PreviousIds);
0118   }
0119 
0120   /// Given a sequence of combined type and ID records, compute global hashes
0121   /// for each of them, returning the results in a vector of hashed types.
0122   template <typename Range>
0123   static std::vector<GloballyHashedType> hashTypes(Range &&Records) {
0124     std::vector<GloballyHashedType> Hashes;
0125     bool UnresolvedRecords = false;
0126     for (const auto &R : Records) {
0127       GloballyHashedType H = hashType(R, Hashes, Hashes);
0128       if (H.empty())
0129         UnresolvedRecords = true;
0130       Hashes.push_back(H);
0131     }
0132 
0133     // In some rare cases, there might be records with forward references in the
0134     // stream. Several passes might be needed to fully hash each record in the
0135     // Type stream. However this occurs on very small OBJs generated by MASM,
0136     // with a dozen records at most. Therefore this codepath isn't
0137     // time-critical, as it isn't taken in 99% of cases.
0138     while (UnresolvedRecords) {
0139       UnresolvedRecords = false;
0140       auto HashIt = Hashes.begin();
0141       for (const auto &R : Records) {
0142         if (HashIt->empty()) {
0143           GloballyHashedType H = hashType(R, Hashes, Hashes);
0144           if (H.empty())
0145             UnresolvedRecords = true;
0146           else
0147             *HashIt = H;
0148         }
0149         ++HashIt;
0150       }
0151     }
0152 
0153     return Hashes;
0154   }
0155 
0156   /// Given a sequence of combined type and ID records, compute global hashes
0157   /// for each of them, returning the results in a vector of hashed types.
0158   template <typename Range>
0159   static std::vector<GloballyHashedType>
0160   hashIds(Range &&Records, ArrayRef<GloballyHashedType> TypeHashes) {
0161     std::vector<GloballyHashedType> IdHashes;
0162     for (const auto &R : Records)
0163       IdHashes.push_back(hashType(R, TypeHashes, IdHashes));
0164 
0165     return IdHashes;
0166   }
0167 
0168   static std::vector<GloballyHashedType>
0169   hashTypeCollection(TypeCollection &Types) {
0170     std::vector<GloballyHashedType> Hashes;
0171     Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) {
0172       Hashes.push_back(hashType(Type.RecordData, Hashes, Hashes));
0173     });
0174     return Hashes;
0175   }
0176 };
0177 static_assert(std::is_trivially_copyable<GloballyHashedType>::value,
0178               "GloballyHashedType must be trivially copyable so that we can "
0179               "reinterpret_cast arrays of hash data to arrays of "
0180               "GloballyHashedType");
0181 } // namespace codeview
0182 
0183 template <> struct DenseMapInfo<codeview::LocallyHashedType> {
0184   static codeview::LocallyHashedType Empty;
0185   static codeview::LocallyHashedType Tombstone;
0186 
0187   static codeview::LocallyHashedType getEmptyKey() { return Empty; }
0188 
0189   static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; }
0190 
0191   static unsigned getHashValue(codeview::LocallyHashedType Val) {
0192     return Val.Hash;
0193   }
0194 
0195   static bool isEqual(codeview::LocallyHashedType LHS,
0196                       codeview::LocallyHashedType RHS) {
0197     if (LHS.Hash != RHS.Hash)
0198       return false;
0199     return LHS.RecordData == RHS.RecordData;
0200   }
0201 };
0202 
0203 template <> struct DenseMapInfo<codeview::GloballyHashedType> {
0204   static codeview::GloballyHashedType Empty;
0205   static codeview::GloballyHashedType Tombstone;
0206 
0207   static codeview::GloballyHashedType getEmptyKey() { return Empty; }
0208 
0209   static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; }
0210 
0211   static unsigned getHashValue(codeview::GloballyHashedType Val) {
0212     return *reinterpret_cast<const unsigned *>(Val.Hash.data());
0213   }
0214 
0215   static bool isEqual(codeview::GloballyHashedType LHS,
0216                       codeview::GloballyHashedType RHS) {
0217     return LHS == RHS;
0218   }
0219 };
0220 
0221 template <> struct format_provider<codeview::LocallyHashedType> {
0222 public:
0223   static void format(const codeview::LocallyHashedType &V,
0224                      llvm::raw_ostream &Stream, StringRef Style) {
0225     write_hex(Stream, V.Hash, HexPrintStyle::Upper, 8);
0226   }
0227 };
0228 
0229 template <> struct format_provider<codeview::GloballyHashedType> {
0230 public:
0231   static void format(const codeview::GloballyHashedType &V,
0232                      llvm::raw_ostream &Stream, StringRef Style) {
0233     for (uint8_t B : V.Hash) {
0234       write_hex(Stream, B, HexPrintStyle::Upper, 2);
0235     }
0236   }
0237 };
0238 
0239 } // namespace llvm
0240 
0241 #endif