Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:43:42

0001 //===- GsymReader.h ---------------------------------------------*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 
0009 #ifndef LLVM_DEBUGINFO_GSYM_GSYMREADER_H
0010 #define LLVM_DEBUGINFO_GSYM_GSYMREADER_H
0011 
0012 #include "llvm/ADT/ArrayRef.h"
0013 #include "llvm/DebugInfo/GSYM/FileEntry.h"
0014 #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
0015 #include "llvm/DebugInfo/GSYM/Header.h"
0016 #include "llvm/DebugInfo/GSYM/LineEntry.h"
0017 #include "llvm/DebugInfo/GSYM/StringTable.h"
0018 #include "llvm/Support/DataExtractor.h"
0019 #include "llvm/Support/Endian.h"
0020 #include "llvm/Support/ErrorOr.h"
0021 #include <inttypes.h>
0022 #include <memory>
0023 #include <stdint.h>
0024 #include <vector>
0025 
0026 namespace llvm {
0027 class MemoryBuffer;
0028 class raw_ostream;
0029 
0030 namespace gsym {
0031 
0032 /// GsymReader is used to read GSYM data from a file or buffer.
0033 ///
0034 /// This class is optimized for very quick lookups when the endianness matches
0035 /// the host system. The Header, address table, address info offsets, and file
0036 /// table is designed to be mmap'ed as read only into memory and used without
0037 /// any parsing needed. If the endianness doesn't match, we swap these objects
0038 /// and tables into GsymReader::SwappedData and then point our header and
0039 /// ArrayRefs to this swapped internal data.
0040 ///
0041 /// GsymReader objects must use one of the static functions to create an
0042 /// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...).
0043 
0044 class GsymReader {
0045   GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
0046   llvm::Error parse();
0047 
0048   std::unique_ptr<MemoryBuffer> MemBuffer;
0049   StringRef GsymBytes;
0050   llvm::endianness Endian;
0051   const Header *Hdr = nullptr;
0052   ArrayRef<uint8_t> AddrOffsets;
0053   ArrayRef<uint32_t> AddrInfoOffsets;
0054   ArrayRef<FileEntry> Files;
0055   StringTable StrTab;
0056   /// When the GSYM file's endianness doesn't match the host system then
0057   /// we must decode all data structures that need to be swapped into
0058   /// local storage and set point the ArrayRef objects above to these swapped
0059   /// copies.
0060   struct SwappedData {
0061     Header Hdr;
0062     std::vector<uint8_t> AddrOffsets;
0063     std::vector<uint32_t> AddrInfoOffsets;
0064     std::vector<FileEntry> Files;
0065   };
0066   std::unique_ptr<SwappedData> Swap;
0067 
0068 public:
0069   GsymReader(GsymReader &&RHS);
0070   ~GsymReader();
0071 
0072   /// Construct a GsymReader from a file on disk.
0073   ///
0074   /// \param Path The file path the GSYM file to read.
0075   /// \returns An expected GsymReader that contains the object or an error
0076   /// object that indicates reason for failing to read the GSYM.
0077   static llvm::Expected<GsymReader> openFile(StringRef Path);
0078 
0079   /// Construct a GsymReader from a buffer.
0080   ///
0081   /// \param Bytes A set of bytes that will be copied and owned by the
0082   /// returned object on success.
0083   /// \returns An expected GsymReader that contains the object or an error
0084   /// object that indicates reason for failing to read the GSYM.
0085   static llvm::Expected<GsymReader> copyBuffer(StringRef Bytes);
0086 
0087   /// Access the GSYM header.
0088   /// \returns A native endian version of the GSYM header.
0089   const Header &getHeader() const;
0090 
0091   /// Get the full function info for an address.
0092   ///
0093   /// This should be called when a client will store a copy of the complete
0094   /// FunctionInfo for a given address. For one off lookups, use the lookup()
0095   /// function below.
0096   ///
0097   /// Symbolication server processes might want to parse the entire function
0098   /// info for a given address and cache it if the process stays around to
0099   /// service many symbolication addresses, like for parsing profiling
0100   /// information.
0101   ///
0102   /// \param Addr A virtual address from the orignal object file to lookup.
0103   ///
0104   /// \returns An expected FunctionInfo that contains the function info object
0105   /// or an error object that indicates reason for failing to lookup the
0106   /// address.
0107   llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;
0108 
0109   /// Get the full function info given an address index.
0110   ///
0111   /// \param AddrIdx A address index for an address in the address table.
0112   ///
0113   /// \returns An expected FunctionInfo that contains the function info object
0114   /// or an error object that indicates reason for failing get the function
0115   /// info object.
0116   llvm::Expected<FunctionInfo> getFunctionInfoAtIndex(uint64_t AddrIdx) const;
0117 
0118   /// Lookup an address in the a GSYM.
0119   ///
0120   /// Lookup just the information needed for a specific address \a Addr. This
0121   /// function is faster that calling getFunctionInfo() as it will only return
0122   /// information that pertains to \a Addr and allows the parsing to skip any
0123   /// extra information encoded for other addresses. For example the line table
0124   /// parsing can stop when a matching LineEntry has been fouhnd, and the
0125   /// InlineInfo can stop parsing early once a match has been found and also
0126   /// skip information that doesn't match. This avoids memory allocations and
0127   /// is much faster for lookups.
0128   ///
0129   /// \param Addr A virtual address from the orignal object file to lookup.
0130   ///
0131   /// \param MergedFuncsData A pointer to an optional DataExtractor that, if
0132   /// non-null, will be set to the raw data of the MergedFunctionInfo, if
0133   /// present.
0134   ///
0135   /// \returns An expected LookupResult that contains only the information
0136   /// needed for the current address, or an error object that indicates reason
0137   /// for failing to lookup the address.
0138   llvm::Expected<LookupResult>
0139   lookup(uint64_t Addr,
0140          std::optional<DataExtractor> *MergedFuncsData = nullptr) const;
0141 
0142   /// Lookup all merged functions for a given address.
0143   ///
0144   /// This function performs a lookup for the specified address and then
0145   /// retrieves additional LookupResults from any merged functions associated
0146   /// with the primary LookupResult.
0147   ///
0148   /// \param Addr The address to lookup.
0149   ///
0150   /// \returns A vector of LookupResult objects, where the first element is the
0151   /// primary result, followed by results for any merged functions
0152   llvm::Expected<std::vector<LookupResult>> lookupAll(uint64_t Addr) const;
0153 
0154   /// Get a string from the string table.
0155   ///
0156   /// \param Offset The string table offset for the string to retrieve.
0157   /// \returns The string from the strin table.
0158   StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
0159 
0160   /// Get the a file entry for the suppplied file index.
0161   ///
0162   /// Used to convert any file indexes in the FunctionInfo data back into
0163   /// files. This function can be used for iteration, but is more commonly used
0164   /// for random access when doing lookups.
0165   ///
0166   /// \param Index An index into the file table.
0167   /// \returns An optional FileInfo that will be valid if the file index is
0168   /// valid, or std::nullopt if the file index is out of bounds,
0169   std::optional<FileEntry> getFile(uint32_t Index) const {
0170     if (Index < Files.size())
0171       return Files[Index];
0172     return std::nullopt;
0173   }
0174 
0175   /// Dump the entire Gsym data contained in this object.
0176   ///
0177   /// \param  OS The output stream to dump to.
0178   void dump(raw_ostream &OS);
0179 
0180   /// Dump a FunctionInfo object.
0181   ///
0182   /// This function will convert any string table indexes and file indexes
0183   /// into human readable format.
0184   ///
0185   /// \param  OS The output stream to dump to.
0186   ///
0187   /// \param FI The object to dump.
0188   ///
0189   /// \param Indent The indentation as number of spaces. Used when dumping as an
0190   /// item within MergedFunctionsInfo.
0191   void dump(raw_ostream &OS, const FunctionInfo &FI, uint32_t Indent = 0);
0192 
0193   /// Dump a MergedFunctionsInfo object.
0194   ///
0195   /// This function will dump a MergedFunctionsInfo object - basically by
0196   /// dumping the contained FunctionInfo objects with indentation.
0197   ///
0198   /// \param  OS The output stream to dump to.
0199   ///
0200   /// \param MFI The object to dump.
0201   void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI);
0202 
0203   /// Dump a CallSiteInfo object.
0204   ///
0205   /// This function will output the details of a CallSiteInfo object in a
0206   /// human-readable format.
0207   ///
0208   /// \param OS The output stream to dump to.
0209   ///
0210   /// \param CSI The CallSiteInfo object to dump.
0211   void dump(raw_ostream &OS, const CallSiteInfo &CSI);
0212 
0213   /// Dump a CallSiteInfoCollection object.
0214   ///
0215   /// This function will iterate over a collection of CallSiteInfo objects and
0216   /// dump each one.
0217   ///
0218   /// \param OS The output stream to dump to.
0219   ///
0220   /// \param CSIC The CallSiteInfoCollection object to dump.
0221   ///
0222   /// \param Indent The indentation as number of spaces. Used when dumping as an
0223   /// item from within MergedFunctionsInfo.
0224   void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
0225             uint32_t Indent = 0);
0226 
0227   /// Dump a LineTable object.
0228   ///
0229   /// This function will convert any string table indexes and file indexes
0230   /// into human readable format.
0231   ///
0232   ///
0233   /// \param  OS The output stream to dump to.
0234   ///
0235   /// \param LT The object to dump.
0236   ///
0237   /// \param Indent The indentation as number of spaces. Used when dumping as an
0238   /// item from within MergedFunctionsInfo.
0239   void dump(raw_ostream &OS, const LineTable &LT, uint32_t Indent = 0);
0240 
0241   /// Dump a InlineInfo object.
0242   ///
0243   /// This function will convert any string table indexes and file indexes
0244   /// into human readable format.
0245   ///
0246   /// \param  OS The output stream to dump to.
0247   ///
0248   /// \param II The object to dump.
0249   ///
0250   /// \param Indent The indentation as number of spaces. Used for recurive
0251   /// dumping.
0252   void dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent = 0);
0253 
0254   /// Dump a FileEntry object.
0255   ///
0256   /// This function will convert any string table indexes into human readable
0257   /// format.
0258   ///
0259   /// \param  OS The output stream to dump to.
0260   ///
0261   /// \param FE The object to dump.
0262   void dump(raw_ostream &OS, std::optional<FileEntry> FE);
0263 
0264   /// Get the number of addresses in this Gsym file.
0265   uint32_t getNumAddresses() const {
0266     return Hdr->NumAddresses;
0267   }
0268 
0269   /// Gets an address from the address table.
0270   ///
0271   /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
0272   ///
0273   /// \param Index A index into the address table.
0274   /// \returns A resolved virtual address for adddress in the address table
0275   /// or std::nullopt if Index is out of bounds.
0276   std::optional<uint64_t> getAddress(size_t Index) const;
0277 
0278 protected:
0279 
0280   /// Get an appropriate address info offsets array.
0281   ///
0282   /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
0283   /// byte offsets from the The gsym::Header::BaseAddress. The table is stored
0284   /// internally as a array of bytes that are in the correct endianness. When
0285   /// we access this table we must get an array that matches those sizes. This
0286   /// templatized helper function is used when accessing address offsets in the
0287   /// AddrOffsets member variable.
0288   ///
0289   /// \returns An ArrayRef of an appropriate address offset size.
0290   template <class T> ArrayRef<T>
0291   getAddrOffsets() const {
0292     return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
0293                        AddrOffsets.size()/sizeof(T));
0294   }
0295 
0296   /// Get an appropriate address from the address table.
0297   ///
0298   /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
0299   /// byte address offsets from the The gsym::Header::BaseAddress. The table is
0300   /// stored internally as a array of bytes that are in the correct endianness.
0301   /// In order to extract an address from the address table we must access the
0302   /// address offset using the correct size and then add it to the BaseAddress
0303   /// in the header.
0304   ///
0305   /// \param Index An index into the AddrOffsets array.
0306   /// \returns An virtual address that matches the original object file for the
0307   /// address as the specified index, or std::nullopt if Index is out of bounds.
0308   template <class T>
0309   std::optional<uint64_t> addressForIndex(size_t Index) const {
0310     ArrayRef<T> AIO = getAddrOffsets<T>();
0311     if (Index < AIO.size())
0312       return AIO[Index] + Hdr->BaseAddress;
0313     return std::nullopt;
0314   }
0315   /// Lookup an address offset in the AddrOffsets table.
0316   ///
0317   /// Given an address offset, look it up using a binary search of the
0318   /// AddrOffsets table.
0319   ///
0320   /// \param AddrOffset An address offset, that has already been computed by
0321   /// subtracting the gsym::Header::BaseAddress.
0322   /// \returns The matching address offset index. This index will be used to
0323   /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
0324   template <class T>
0325   std::optional<uint64_t>
0326   getAddressOffsetIndex(const uint64_t AddrOffset) const {
0327     ArrayRef<T> AIO = getAddrOffsets<T>();
0328     const auto Begin = AIO.begin();
0329     const auto End = AIO.end();
0330     auto Iter = std::lower_bound(Begin, End, AddrOffset);
0331     // Watch for addresses that fall between the gsym::Header::BaseAddress and
0332     // the first address offset.
0333     if (Iter == Begin && AddrOffset < *Begin)
0334       return std::nullopt;
0335     if (Iter == End || AddrOffset < *Iter)
0336       --Iter;
0337 
0338     // GSYM files have sorted function infos with the most information (line
0339     // table and/or inline info) first in the array of function infos, so
0340     // always backup as much as possible as long as the address offset is the
0341     // same as the previous entry.
0342     while (Iter != Begin) {
0343       auto Prev = Iter - 1;
0344       if (*Prev == *Iter)
0345         Iter = Prev;
0346       else
0347         break;
0348     }
0349 
0350     return std::distance(Begin, Iter);
0351   }
0352 
0353   /// Create a GSYM from a memory buffer.
0354   ///
0355   /// Called by both openFile() and copyBuffer(), this function does all of the
0356   /// work of parsing the GSYM file and returning an error.
0357   ///
0358   /// \param MemBuffer A memory buffer that will transfer ownership into the
0359   /// GsymReader.
0360   /// \returns An expected GsymReader that contains the object or an error
0361   /// object that indicates reason for failing to read the GSYM.
0362   static llvm::Expected<llvm::gsym::GsymReader>
0363   create(std::unique_ptr<MemoryBuffer> &MemBuffer);
0364 
0365 
0366   /// Given an address, find the address index.
0367   ///
0368   /// Binary search the address table and find the matching address index.
0369   ///
0370   /// \param Addr A virtual address that matches the original object file
0371   /// to lookup.
0372   /// \returns An index into the address table. This index can be used to
0373   /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
0374   /// Returns an error if the address isn't in the GSYM with details of why.
0375   Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;
0376 
0377   /// Given an address index, get the offset for the FunctionInfo.
0378   ///
0379   /// Looking up an address is done by finding the corresponding address
0380   /// index for the address. This index is then used to get the offset of the
0381   /// FunctionInfo data that we will decode using this function.
0382   ///
0383   /// \param Index An index into the address table.
0384   /// \returns An optional GSYM data offset for the offset of the FunctionInfo
0385   /// that needs to be decoded.
0386   std::optional<uint64_t> getAddressInfoOffset(size_t Index) const;
0387 
0388   /// Given an address, find the correct function info data and function
0389   /// address.
0390   ///
0391   /// Binary search the address table and find the matching address info
0392   /// and make sure that the function info contains the address. GSYM allows
0393   /// functions to overlap, and the most debug info is contained in the first
0394   /// entries due to the sorting when GSYM files are created. We can have
0395   /// multiple function info that start at the same address only if their
0396   /// address range doesn't match. So find the first entry that matches \a Addr
0397   /// and iterate forward until we find one that contains the address.
0398   ///
0399   /// \param[in] Addr A virtual address that matches the original object file
0400   /// to lookup.
0401   ///
0402   /// \param[out] FuncStartAddr A virtual address that is the base address of
0403   /// the function that is used for decoding the FunctionInfo.
0404   ///
0405   /// \returns An valid data extractor on success, or an error if we fail to
0406   /// find the address in a function info or corrrectly decode the data
0407   llvm::Expected<llvm::DataExtractor>
0408   getFunctionInfoDataForAddress(uint64_t Addr, uint64_t &FuncStartAddr) const;
0409 
0410   /// Get the function data and address given an address index.
0411   ///
0412   /// \param AddrIdx A address index from the address table.
0413   ///
0414   /// \returns An expected FunctionInfo that contains the function info object
0415   /// or an error object that indicates reason for failing to lookup the
0416   /// address.
0417   llvm::Expected<llvm::DataExtractor>
0418   getFunctionInfoDataAtIndex(uint64_t AddrIdx, uint64_t &FuncStartAddr) const;
0419 };
0420 
0421 } // namespace gsym
0422 } // namespace llvm
0423 
0424 #endif // LLVM_DEBUGINFO_GSYM_GSYMREADER_H