Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:43:42

0001 //===- FunctionInfo.h -------------------------------------------*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 
0009 #ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
0010 #define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
0011 
0012 #include "llvm/ADT/SmallString.h"
0013 #include "llvm/DebugInfo/GSYM/CallSiteInfo.h"
0014 #include "llvm/DebugInfo/GSYM/ExtractRanges.h"
0015 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
0016 #include "llvm/DebugInfo/GSYM/LineTable.h"
0017 #include "llvm/DebugInfo/GSYM/LookupResult.h"
0018 #include "llvm/DebugInfo/GSYM/MergedFunctionsInfo.h"
0019 #include "llvm/DebugInfo/GSYM/StringTable.h"
0020 #include <cstdint>
0021 
0022 namespace llvm {
0023 class raw_ostream;
0024 
0025 namespace gsym {
0026 
0027 class GsymReader;
0028 /// Function information in GSYM files encodes information for one contiguous
0029 /// address range. If a function has discontiguous address ranges, they will
0030 /// need to be encoded using multiple FunctionInfo objects.
0031 ///
0032 /// ENCODING
0033 ///
0034 /// The function information gets the function start address as an argument
0035 /// to the FunctionInfo::decode(...) function. This information is calculated
0036 /// from the GSYM header and an address offset from the GSYM address offsets
0037 /// table. The encoded FunctionInfo information must be aligned to a 4 byte
0038 /// boundary.
0039 ///
0040 /// The encoded data for a FunctionInfo starts with fixed data that all
0041 /// function info objects have:
0042 ///
0043 /// ENCODING  NAME        DESCRIPTION
0044 /// ========= =========== ====================================================
0045 /// uint32_t  Size        The size in bytes of this function.
0046 /// uint32_t  Name        The string table offset of the function name.
0047 ///
0048 /// The optional data in a FunctionInfo object follows this fixed information
0049 /// and consists of a stream of tuples that consist of:
0050 ///
0051 /// ENCODING  NAME        DESCRIPTION
0052 /// ========= =========== ====================================================
0053 /// uint32_t  InfoType    An "InfoType" enumeration that describes the type
0054 ///                       of optional data that is encoded.
0055 /// uint32_t  InfoLength  The size in bytes of the encoded data that
0056 ///                       immediately follows this length if this value is
0057 ///                       greater than zero.
0058 /// uint8_t[] InfoData    Encoded bytes that represent the data for the
0059 ///                       "InfoType". These bytes are only present if
0060 ///                       "InfoLength" is greater than zero.
0061 ///
0062 /// The "InfoType" is an enumeration:
0063 ///
0064 ///   enum InfoType {
0065 ///     EndOfList = 0u,
0066 ///     LineTableInfo = 1u,
0067 ///     InlineInfo = 2u,
0068 ///     MergedFunctionsInfo = 3u,
0069 ///     CallSiteInfo = 4u
0070 ///   };
0071 ///
0072 /// This stream of tuples is terminated by a "InfoType" whose value is
0073 /// InfoType::EndOfList and a zero for "InfoLength". This signifies the end of
0074 /// the optional information list. This format allows us to add new optional
0075 /// information data to a FunctionInfo object over time and allows older
0076 /// clients to still parse the format and skip over any data that they don't
0077 /// understand or want to parse.
0078 ///
0079 /// So the function information encoding essentially looks like:
0080 ///
0081 /// struct {
0082 ///   uint32_t Size;
0083 ///   uint32_t Name;
0084 ///   struct {
0085 ///     uint32_t InfoType;
0086 ///     uint32_t InfoLength;
0087 ///     uint8_t InfoData[InfoLength];
0088 ///   }[N];
0089 /// }
0090 ///
0091 /// Where "N" is the number of tuples.
0092 struct FunctionInfo {
0093   AddressRange Range;
0094   uint32_t Name; ///< String table offset in the string table.
0095   std::optional<LineTable> OptLineTable;
0096   std::optional<InlineInfo> Inline;
0097   std::optional<MergedFunctionsInfo> MergedFunctions;
0098   std::optional<CallSiteInfoCollection> CallSites;
0099   /// If we encode a FunctionInfo during segmenting so we know its size, we can
0100   /// cache that encoding here so we don't need to re-encode it when saving the
0101   /// GSYM file.
0102   SmallString<32> EncodingCache;
0103 
0104   FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0)
0105       : Range(Addr, Addr + Size), Name(N) {}
0106 
0107   /// Query if a FunctionInfo has rich debug info.
0108   ///
0109   /// \returns A bool that indicates if this object has something else than
0110   /// range and name. When converting information from a symbol table and from
0111   /// debug info, we might end up with multiple FunctionInfo objects for the
0112   /// same range and we need to be able to tell which one is the better object
0113   /// to use.
0114   bool hasRichInfo() const { return OptLineTable || Inline || CallSites; }
0115 
0116   /// Query if a FunctionInfo object is valid.
0117   ///
0118   /// Address and size can be zero and there can be no line entries for a
0119   /// symbol so the only indication this entry is valid is if the name is
0120   /// not zero. This can happen when extracting information from symbol
0121   /// tables that do not encode symbol sizes. In that case only the
0122   /// address and name will be filled in.
0123   ///
0124   /// \returns A boolean indicating if this FunctionInfo is valid.
0125   bool isValid() const {
0126     return Name != 0;
0127   }
0128 
0129   /// Decode an object from a binary data stream.
0130   ///
0131   /// \param Data The binary stream to read the data from. This object must
0132   /// have the data for the object starting at offset zero. The data
0133   /// can contain more data than needed.
0134   ///
0135   /// \param BaseAddr The FunctionInfo's start address and will be used as the
0136   /// base address when decoding any contained information like the line table
0137   /// and the inline info.
0138   ///
0139   /// \returns An FunctionInfo or an error describing the issue that was
0140   /// encountered during decoding.
0141   static llvm::Expected<FunctionInfo> decode(DataExtractor &Data,
0142                                              uint64_t BaseAddr);
0143 
0144   /// Encode this object into FileWriter stream.
0145   ///
0146   /// \param O The binary stream to write the data to at the current file
0147   /// position.
0148   ///
0149   /// \param NoPadding Directly write the FunctionInfo data, without any padding
0150   /// By default, FunctionInfo will be 4-byte aligned by padding with
0151   /// 0's at the start. This is OK since the function will return the offset of
0152   /// actual data in the stream. However when writing FunctionInfo's as a
0153   /// stream, the padding will break the decoding of the data - since the offset
0154   /// where the FunctionInfo starts is not kept in this scenario.
0155   ///
0156   /// \returns An error object that indicates failure or the offset of the
0157   /// function info that was successfully written into the stream.
0158   llvm::Expected<uint64_t> encode(FileWriter &O, bool NoPadding = false) const;
0159 
0160   /// Encode this function info into the internal byte cache and return the size
0161   /// in bytes.
0162   ///
0163   /// When segmenting GSYM files we need to know how big each FunctionInfo will
0164   /// encode into so we can generate segments of the right size. We don't want
0165   /// to have to encode a FunctionInfo twice, so we can cache the encoded bytes
0166   /// and re-use then when calling FunctionInfo::encode(...).
0167   ///
0168   /// \returns The size in bytes of the FunctionInfo if it were to be encoded
0169   /// into a byte stream.
0170   uint64_t cacheEncoding();
0171 
0172   /// Lookup an address within a FunctionInfo object's data stream.
0173   ///
0174   /// Instead of decoding an entire FunctionInfo object when doing lookups,
0175   /// we can decode only the information we need from the FunctionInfo's data
0176   /// for the specific address. The lookup result information is returned as
0177   /// a LookupResult.
0178   ///
0179   /// \param Data The binary stream to read the data from. This object must
0180   /// have the data for the object starting at offset zero. The data
0181   /// can contain more data than needed.
0182   ///
0183   /// \param GR The GSYM reader that contains the string and file table that
0184   /// will be used to fill in information in the returned result.
0185   ///
0186   /// \param FuncAddr The function start address decoded from the GsymReader.
0187   ///
0188   /// \param Addr The address to lookup.
0189   ///
0190   /// \param MergedFuncsData A pointer to an optional DataExtractor that, if
0191   /// non-null, will be set to the raw data of the MergedFunctionInfo, if
0192   /// present.
0193   ///
0194   /// \returns An LookupResult or an error describing the issue that was
0195   /// encountered during decoding. An error should only be returned if the
0196   /// address is not contained in the FunctionInfo or if the data is corrupted.
0197   static llvm::Expected<LookupResult>
0198   lookup(DataExtractor &Data, const GsymReader &GR, uint64_t FuncAddr,
0199          uint64_t Addr,
0200          std::optional<DataExtractor> *MergedFuncsData = nullptr);
0201 
0202   uint64_t startAddress() const { return Range.start(); }
0203   uint64_t endAddress() const { return Range.end(); }
0204   uint64_t size() const { return Range.size(); }
0205 
0206   void clear() {
0207     Range = {0, 0};
0208     Name = 0;
0209     OptLineTable = std::nullopt;
0210     Inline = std::nullopt;
0211   }
0212 };
0213 
0214 inline bool operator==(const FunctionInfo &LHS, const FunctionInfo &RHS) {
0215   return LHS.Range == RHS.Range && LHS.Name == RHS.Name &&
0216          LHS.OptLineTable == RHS.OptLineTable && LHS.Inline == RHS.Inline;
0217 }
0218 inline bool operator!=(const FunctionInfo &LHS, const FunctionInfo &RHS) {
0219   return !(LHS == RHS);
0220 }
0221 /// This sorting will order things consistently by address range first, but
0222 /// then followed by increasing levels of debug info like inline information
0223 /// and line tables. We might end up with a FunctionInfo from debug info that
0224 /// will have the same range as one from the symbol table, but we want to
0225 /// quickly be able to sort and use the best version when creating the final
0226 /// GSYM file. This function compares the inline information as we have seen
0227 /// cases where LTO can generate a wide array of differing inline information,
0228 /// mostly due to messing up the address ranges for inlined functions, so the
0229 /// inline information with the most entries will appeear last. If the inline
0230 /// information match, either by both function infos not having any or both
0231 /// being exactly the same, we will then compare line tables. Comparing line
0232 /// tables allows the entry with the most line entries to appear last. This
0233 /// ensures we are able to save the FunctionInfo with the most debug info into
0234 /// the GSYM file.
0235 inline bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS) {
0236   // First sort by address range
0237   if (LHS.Range != RHS.Range)
0238     return LHS.Range < RHS.Range;
0239   if (LHS.Inline == RHS.Inline)
0240     return LHS.OptLineTable < RHS.OptLineTable;
0241   return LHS.Inline < RHS.Inline;
0242 }
0243 
0244 raw_ostream &operator<<(raw_ostream &OS, const FunctionInfo &R);
0245 
0246 } // namespace gsym
0247 } // namespace llvm
0248 
0249 #endif // LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H