Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:44:14

0001 //===- MCPseudoProbe.h - Pseudo probe encoding support ---------*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 // This file contains the declaration of the MCPseudoProbe to support the pseudo
0010 // probe encoding for AutoFDO. Pseudo probes together with their inline context
0011 // are encoded in a DFS recursive way in the .pseudoprobe sections. For each
0012 // .pseudoprobe section, the encoded binary data consist of a single or mutiple
0013 // function records each for one outlined function. A function record has the
0014 // following format :
0015 //
0016 // FUNCTION BODY (one for each outlined function present in the text section)
0017 //    GUID (uint64)
0018 //        GUID of the function's source name which may be different from the
0019 //        actual binary linkage name. This GUID will be used to decode and
0020 //        generate a profile against the source function name.
0021 //    NPROBES (ULEB128)
0022 //        Number of probes originating from this function.
0023 //    NUM_INLINED_FUNCTIONS (ULEB128)
0024 //        Number of callees inlined into this function, aka number of
0025 //        first-level inlinees
0026 //    PROBE RECORDS
0027 //        A list of NPROBES entries. Each entry contains:
0028 //          INDEX (ULEB128)
0029 //          TYPE (uint4)
0030 //            0 - block probe, 1 - indirect call, 2 - direct call
0031 //          ATTRIBUTE (uint3)
0032 //            1 - reserved
0033 //            2 - Sentinel
0034 //            4 - HasDiscriminator
0035 //          ADDRESS_TYPE (uint1)
0036 //            0 - code address for regular probes (for downwards compatibility)
0037 //              - GUID of linkage name for sentinel probes
0038 //            1 - address delta
0039 //          CODE_ADDRESS (uint64 or ULEB128)
0040 //            code address or address delta, depending on ADDRESS_TYPE
0041 //          DISCRIMINATOR (ULEB128) if HasDiscriminator
0042 //    INLINED FUNCTION RECORDS
0043 //        A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
0044 //        callees.  Each record contains:
0045 //          INLINE SITE
0046 //            ID of the callsite probe (ULEB128)
0047 //          FUNCTION BODY
0048 //            A FUNCTION BODY entry describing the inlined function.
0049 //
0050 // TODO: retire the ADDRESS_TYPE encoding for code addresses once compatibility
0051 // is no longer an issue.
0052 //===----------------------------------------------------------------------===//
0053 
0054 #ifndef LLVM_MC_MCPSEUDOPROBE_H
0055 #define LLVM_MC_MCPSEUDOPROBE_H
0056 
0057 #include "llvm/ADT/ArrayRef.h"
0058 #include "llvm/ADT/DenseMap.h"
0059 #include "llvm/ADT/DenseSet.h"
0060 #include "llvm/ADT/SmallVector.h"
0061 #include "llvm/ADT/StringRef.h"
0062 #include "llvm/ADT/iterator.h"
0063 #include "llvm/IR/PseudoProbe.h"
0064 #include "llvm/Support/Allocator.h"
0065 #include "llvm/Support/ErrorOr.h"
0066 #include <functional>
0067 #include <memory>
0068 #include <string>
0069 #include <tuple>
0070 #include <type_traits>
0071 #include <unordered_map>
0072 #include <vector>
0073 
0074 namespace llvm {
0075 
0076 class MCSymbol;
0077 class MCObjectStreamer;
0078 class raw_ostream;
0079 
0080 enum class MCPseudoProbeFlag {
0081   // If set, indicates that the probe is encoded as an address delta
0082   // instead of a real code address.
0083   AddressDelta = 0x1,
0084 };
0085 
0086 // Function descriptor decoded from .pseudo_probe_desc section
0087 struct MCPseudoProbeFuncDesc {
0088   uint64_t FuncGUID = 0;
0089   uint64_t FuncHash = 0;
0090   StringRef FuncName;
0091 
0092   MCPseudoProbeFuncDesc(uint64_t GUID, uint64_t Hash, StringRef Name)
0093       : FuncGUID(GUID), FuncHash(Hash), FuncName(Name){};
0094 
0095   void print(raw_ostream &OS);
0096 };
0097 
0098 class MCDecodedPseudoProbe;
0099 
0100 // An inline frame has the form <CalleeGuid, ProbeID>
0101 using InlineSite = std::tuple<uint64_t, uint32_t>;
0102 using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
0103 // GUID to PseudoProbeFuncDesc map
0104 class GUIDProbeFunctionMap : public std::vector<MCPseudoProbeFuncDesc> {
0105 public:
0106   auto find(uint64_t GUID) const {
0107     auto CompareDesc = [](const MCPseudoProbeFuncDesc &Desc, uint64_t GUID) {
0108       return Desc.FuncGUID < GUID;
0109     };
0110     auto It = llvm::lower_bound(*this, GUID, CompareDesc);
0111     if (It->FuncGUID != GUID)
0112       return end();
0113     return It;
0114   }
0115 };
0116 
0117 class MCDecodedPseudoProbeInlineTree;
0118 
0119 class MCPseudoProbeBase {
0120 protected:
0121   uint32_t Index;
0122   uint32_t Discriminator;
0123   uint8_t Attributes;
0124   uint8_t Type;
0125   // The value should be equal to PseudoProbeReservedId::Last + 1 which is
0126   // defined in SampleProfileProbe.h. The header file is not included here to
0127   // reduce the dependency from MC to IPO.
0128   const static uint32_t PseudoProbeFirstId = 1;
0129 
0130 public:
0131   MCPseudoProbeBase(uint64_t I, uint64_t At, uint8_t T, uint32_t D)
0132       : Index(I), Discriminator(D), Attributes(At), Type(T) {}
0133 
0134   bool isEntry() const { return Index == PseudoProbeFirstId; }
0135 
0136   uint32_t getIndex() const { return Index; }
0137 
0138   uint32_t getDiscriminator() const { return Discriminator; }
0139 
0140   uint8_t getAttributes() const { return Attributes; }
0141 
0142   uint8_t getType() const { return Type; }
0143 
0144   bool isBlock() const {
0145     return Type == static_cast<uint8_t>(PseudoProbeType::Block);
0146   }
0147 
0148   bool isIndirectCall() const {
0149     return Type == static_cast<uint8_t>(PseudoProbeType::IndirectCall);
0150   }
0151 
0152   bool isDirectCall() const {
0153     return Type == static_cast<uint8_t>(PseudoProbeType::DirectCall);
0154   }
0155 
0156   bool isCall() const { return isIndirectCall() || isDirectCall(); }
0157 
0158   void setAttributes(uint8_t Attr) { Attributes = Attr; }
0159 };
0160 
0161 /// Instances of this class represent a pseudo probe instance for a pseudo probe
0162 /// table entry, which is created during a machine instruction is assembled and
0163 /// uses an address from a temporary label created at the current address in the
0164 /// current section.
0165 class MCPseudoProbe : public MCPseudoProbeBase {
0166   uint64_t Guid;
0167   MCSymbol *Label;
0168 
0169 public:
0170   MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type,
0171                 uint64_t Attributes, uint32_t Discriminator)
0172       : MCPseudoProbeBase(Index, Attributes, Type, Discriminator), Guid(Guid),
0173         Label(Label) {
0174     assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8");
0175     assert(Attributes <= 0xFF &&
0176            "Probe attributes too big to encode, exceeding 2^16");
0177   }
0178 
0179   uint64_t getGuid() const { return Guid; };
0180   MCSymbol *getLabel() const { return Label; }
0181   void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const;
0182 };
0183 
0184 // Represents a callsite with caller function name and probe id
0185 using MCPseudoProbeFrameLocation = std::pair<StringRef, uint32_t>;
0186 
0187 class MCDecodedPseudoProbe : public MCPseudoProbeBase {
0188   uint64_t Address;
0189   MCDecodedPseudoProbeInlineTree *InlineTree;
0190 
0191 public:
0192   MCDecodedPseudoProbe(uint64_t Ad, uint32_t I, PseudoProbeType K, uint8_t At,
0193                        uint32_t D, MCDecodedPseudoProbeInlineTree *Tree)
0194       : MCPseudoProbeBase(I, At, static_cast<uint8_t>(K), D), Address(Ad),
0195         InlineTree(Tree){};
0196   uint64_t getGuid() const;
0197 
0198   uint64_t getAddress() const { return Address; }
0199 
0200   void setAddress(uint64_t Addr) { Address = Addr; }
0201 
0202   MCDecodedPseudoProbeInlineTree *getInlineTreeNode() const {
0203     return InlineTree;
0204   }
0205 
0206   // Get the inlined context by traversing current inline tree backwards,
0207   // each tree node has its InlineSite which is taken as the context.
0208   // \p ContextStack is populated in root to leaf order
0209   void
0210   getInlineContext(SmallVectorImpl<MCPseudoProbeFrameLocation> &ContextStack,
0211                    const GUIDProbeFunctionMap &GUID2FuncMAP) const;
0212 
0213   // Helper function to get the string from context stack
0214   std::string
0215   getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP) const;
0216 
0217   // Print pseudo probe while disassembling
0218   void print(raw_ostream &OS, const GUIDProbeFunctionMap &GUID2FuncMAP,
0219              bool ShowName) const;
0220 };
0221 
0222 // Address to pseudo probes map.
0223 class AddressProbesMap
0224     : public std::vector<std::reference_wrapper<MCDecodedPseudoProbe>> {
0225   auto getIt(uint64_t Addr) const {
0226     auto CompareProbe = [](const MCDecodedPseudoProbe &Probe, uint64_t Addr) {
0227       return Probe.getAddress() < Addr;
0228     };
0229     return llvm::lower_bound(*this, Addr, CompareProbe);
0230   }
0231 
0232 public:
0233   // Returns range of probes within [\p From, \p To) address range.
0234   auto find(uint64_t From, uint64_t To) const {
0235     return llvm::make_range(getIt(From), getIt(To));
0236   }
0237   // Returns range of probes with given \p Address.
0238   auto find(uint64_t Address) const {
0239     auto FromIt = getIt(Address);
0240     if (FromIt == end() || FromIt->get().getAddress() != Address)
0241       return llvm::make_range(end(), end());
0242     auto ToIt = getIt(Address + 1);
0243     return llvm::make_range(FromIt, ToIt);
0244   }
0245 };
0246 
0247 template <typename ProbesType, typename DerivedProbeInlineTreeType,
0248           typename InlinedProbeTreeMap>
0249 class MCPseudoProbeInlineTreeBase {
0250 protected:
0251   // Track children (e.g. inlinees) of current context
0252   InlinedProbeTreeMap Children;
0253   // Set of probes that come with the function.
0254   ProbesType Probes;
0255   MCPseudoProbeInlineTreeBase() {
0256     static_assert(std::is_base_of<MCPseudoProbeInlineTreeBase,
0257                                   DerivedProbeInlineTreeType>::value,
0258                   "DerivedProbeInlineTreeType must be subclass of "
0259                   "MCPseudoProbeInlineTreeBase");
0260   }
0261 
0262 public:
0263   uint64_t Guid = 0;
0264 
0265   // Root node has a GUID 0.
0266   bool isRoot() const { return Guid == 0; }
0267   InlinedProbeTreeMap &getChildren() { return Children; }
0268   const InlinedProbeTreeMap &getChildren() const { return Children; }
0269   const ProbesType &getProbes() const { return Probes; }
0270   // Caller node of the inline site
0271   MCPseudoProbeInlineTreeBase<ProbesType, DerivedProbeInlineTreeType,
0272                               InlinedProbeTreeMap> *Parent = nullptr;
0273   DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) {
0274     auto Ret = Children.emplace(
0275         Site, std::make_unique<DerivedProbeInlineTreeType>(Site));
0276     Ret.first->second->Parent = this;
0277     return Ret.first->second.get();
0278   };
0279 };
0280 
0281 // A Tri-tree based data structure to group probes by inline stack.
0282 // A tree is allocated for a standalone .text section. A fake
0283 // instance is created as the root of a tree.
0284 // A real instance of this class is created for each function, either a
0285 // not inlined function that has code in .text section or an inlined function.
0286 struct InlineSiteHash {
0287   uint64_t operator()(const InlineSite &Site) const {
0288     return std::get<0>(Site) ^ std::get<1>(Site);
0289   }
0290 };
0291 class MCPseudoProbeInlineTree
0292     : public MCPseudoProbeInlineTreeBase<
0293           std::vector<MCPseudoProbe>, MCPseudoProbeInlineTree,
0294           std::unordered_map<InlineSite,
0295                              std::unique_ptr<MCPseudoProbeInlineTree>,
0296                              InlineSiteHash>> {
0297 public:
0298   MCPseudoProbeInlineTree() = default;
0299   MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; }
0300   MCPseudoProbeInlineTree(const InlineSite &Site) {
0301     this->Guid = std::get<0>(Site);
0302   }
0303 
0304   // MCPseudoProbeInlineTree method based on Inlinees
0305   void addPseudoProbe(const MCPseudoProbe &Probe,
0306                       const MCPseudoProbeInlineStack &InlineStack);
0307   void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *&LastProbe);
0308 };
0309 
0310 // inline tree node for the decoded pseudo probe
0311 class MCDecodedPseudoProbeInlineTree
0312     : public MCPseudoProbeInlineTreeBase<
0313           MCDecodedPseudoProbe *, MCDecodedPseudoProbeInlineTree,
0314           MutableArrayRef<MCDecodedPseudoProbeInlineTree>> {
0315   uint32_t NumProbes = 0;
0316   uint32_t ProbeId = 0;
0317 
0318 public:
0319   MCDecodedPseudoProbeInlineTree() = default;
0320   MCDecodedPseudoProbeInlineTree(const InlineSite &Site,
0321                                  MCDecodedPseudoProbeInlineTree *Parent)
0322       : ProbeId(std::get<1>(Site)) {
0323     this->Guid = std::get<0>(Site);
0324     this->Parent = Parent;
0325   }
0326 
0327   // Return false if it's a dummy inline site
0328   bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); }
0329   InlineSite getInlineSite() const { return InlineSite(Guid, ProbeId); }
0330   void setProbes(MutableArrayRef<MCDecodedPseudoProbe> ProbesRef) {
0331     Probes = ProbesRef.data();
0332     NumProbes = ProbesRef.size();
0333   }
0334   auto getProbes() const {
0335     return MutableArrayRef<MCDecodedPseudoProbe>(Probes, NumProbes);
0336   }
0337 };
0338 
0339 /// Instances of this class represent the pseudo probes inserted into a compile
0340 /// unit.
0341 class MCPseudoProbeSections {
0342 public:
0343   void addPseudoProbe(MCSymbol *FuncSym, const MCPseudoProbe &Probe,
0344                       const MCPseudoProbeInlineStack &InlineStack) {
0345     MCProbeDivisions[FuncSym].addPseudoProbe(Probe, InlineStack);
0346   }
0347 
0348   // The addresses of MCPseudoProbeInlineTree are used by the tree structure and
0349   // need to be stable.
0350   using MCProbeDivisionMap = std::unordered_map<MCSymbol *, MCPseudoProbeInlineTree>;
0351 
0352 private:
0353   // A collection of MCPseudoProbe for each function. The MCPseudoProbes are
0354   // grouped by GUIDs due to inlining that can bring probes from different
0355   // functions into one function.
0356   MCProbeDivisionMap MCProbeDivisions;
0357 
0358 public:
0359   const MCProbeDivisionMap &getMCProbes() const { return MCProbeDivisions; }
0360 
0361   bool empty() const { return MCProbeDivisions.empty(); }
0362 
0363   void emit(MCObjectStreamer *MCOS);
0364 };
0365 
0366 class MCPseudoProbeTable {
0367   // A collection of MCPseudoProbe in the current module grouped by
0368   // functions. MCPseudoProbes will be encoded into a corresponding
0369   // .pseudoprobe section. With functions emitted as separate comdats,
0370   // a text section really only contains the code of a function solely, and the
0371   // probes associated with the text section will be emitted into a standalone
0372   // .pseudoprobe section that shares the same comdat group with the function.
0373   MCPseudoProbeSections MCProbeSections;
0374 
0375 public:
0376   static void emit(MCObjectStreamer *MCOS);
0377 
0378   MCPseudoProbeSections &getProbeSections() { return MCProbeSections; }
0379 
0380 #ifndef NDEBUG
0381   static int DdgPrintIndent;
0382 #endif
0383 };
0384 
0385 class MCPseudoProbeDecoder {
0386   // Decoded pseudo probes vector.
0387   std::vector<MCDecodedPseudoProbe> PseudoProbeVec;
0388   // Injected pseudo probes, identified by the containing inline tree node.
0389   // Need to keep injected probes separately for two reasons:
0390   // 1) Probes cannot be added to the PseudoProbeVec: appending may cause
0391   //    reallocation so that pointers to its elements will become invalid.
0392   // 2) Probes belonging to function record must be contiguous in PseudoProbeVec
0393   //    as owning InlineTree references them with an ArrayRef to save space.
0394   std::unordered_map<const MCDecodedPseudoProbeInlineTree *,
0395                      std::vector<MCDecodedPseudoProbe>>
0396       InjectedProbeMap;
0397   // Decoded inline records vector.
0398   std::vector<MCDecodedPseudoProbeInlineTree> InlineTreeVec;
0399 
0400   // GUID to PseudoProbeFuncDesc map.
0401   GUIDProbeFunctionMap GUID2FuncDescMap;
0402 
0403   BumpPtrAllocator FuncNameAllocator;
0404 
0405   // Address to probes map.
0406   AddressProbesMap Address2ProbesMap;
0407 
0408   // The dummy root of the inline trie, all the outlined function will directly
0409   // be the children of the dummy root, all the inlined function will be the
0410   // children of its inlineer. So the relation would be like:
0411   // DummyRoot --> OutlinedFunc --> InlinedFunc1 --> InlinedFunc2
0412   MCDecodedPseudoProbeInlineTree DummyInlineRoot;
0413 
0414   /// Points to the current location in the buffer.
0415   const uint8_t *Data = nullptr;
0416 
0417   /// Points to the end of the buffer.
0418   const uint8_t *End = nullptr;
0419 
0420   /// Whether encoding is based on a starting probe with absolute code address.
0421   bool EncodingIsAddrBased = false;
0422 
0423   // Decoding helper function
0424   template <typename T> ErrorOr<T> readUnencodedNumber();
0425   template <typename T> ErrorOr<T> readUnsignedNumber();
0426   template <typename T> ErrorOr<T> readSignedNumber();
0427   ErrorOr<StringRef> readString(uint32_t Size);
0428 
0429 public:
0430   using Uint64Set = DenseSet<uint64_t>;
0431   using Uint64Map = DenseMap<uint64_t, uint64_t>;
0432 
0433   // Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map.
0434   // If pseudo_probe_desc section is mapped to memory and \p IsMMapped is true,
0435   // uses StringRefs pointing to the section.
0436   bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size,
0437                              bool IsMMapped = false);
0438 
0439   // Decode pseudo_probe section to count the number of probes and inlined
0440   // function records for each function record.
0441   template <bool IsTopLevelFunc>
0442   bool countRecords(bool &Discard, uint32_t &ProbeCount, uint32_t &InlinedCount,
0443                     const Uint64Set &GuidFilter);
0444 
0445   // Decode pseudo_probe section to build address to probes map for specifed
0446   // functions only.
0447   bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size,
0448                              const Uint64Set &GuildFilter,
0449                              const Uint64Map &FuncStartAddrs);
0450 
0451   // Print pseudo_probe_desc section info
0452   void printGUID2FuncDescMap(raw_ostream &OS);
0453 
0454   // Print pseudo_probe section info, used along with show-disassembly
0455   void printProbeForAddress(raw_ostream &OS, uint64_t Address);
0456 
0457   // do printProbeForAddress for all addresses
0458   void printProbesForAllAddresses(raw_ostream &OS);
0459 
0460   // Look up the probe of a call for the input address
0461   const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const;
0462 
0463   const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) const;
0464 
0465   // Helper function to populate one probe's inline stack into
0466   // \p InlineContextStack.
0467   // Current leaf location info will be added if IncludeLeaf is true
0468   // Example:
0469   //  Current probe(bar:3) inlined at foo:2 then inlined at main:1
0470   //  IncludeLeaf = true,  Output: [main:1, foo:2, bar:3]
0471   //  IncludeLeaf = false, Output: [main:1, foo:2]
0472   void getInlineContextForProbe(
0473       const MCDecodedPseudoProbe *Probe,
0474       SmallVectorImpl<MCPseudoProbeFrameLocation> &InlineContextStack,
0475       bool IncludeLeaf) const;
0476 
0477   const AddressProbesMap &getAddress2ProbesMap() const {
0478     return Address2ProbesMap;
0479   }
0480 
0481   AddressProbesMap &getAddress2ProbesMap() { return Address2ProbesMap; }
0482 
0483   const GUIDProbeFunctionMap &getGUID2FuncDescMap() const {
0484     return GUID2FuncDescMap;
0485   }
0486 
0487   const MCPseudoProbeFuncDesc *
0488   getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) const;
0489 
0490   const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const {
0491     return DummyInlineRoot;
0492   }
0493 
0494   void addInjectedProbe(const MCDecodedPseudoProbe &Probe, uint64_t Address) {
0495     const MCDecodedPseudoProbeInlineTree *Parent = Probe.getInlineTreeNode();
0496     InjectedProbeMap[Parent].emplace_back(Probe).setAddress(Address);
0497   }
0498 
0499   size_t
0500   getNumInjectedProbes(const MCDecodedPseudoProbeInlineTree *Parent) const {
0501     auto It = InjectedProbeMap.find(Parent);
0502     if (It == InjectedProbeMap.end())
0503       return 0;
0504     return It->second.size();
0505   }
0506 
0507   auto getInjectedProbes(MCDecodedPseudoProbeInlineTree *Parent) {
0508     auto It = InjectedProbeMap.find(Parent);
0509     assert(It != InjectedProbeMap.end());
0510     return iterator_range(It->second);
0511   }
0512 
0513   const ArrayRef<MCDecodedPseudoProbeInlineTree> getInlineTreeVec() const {
0514     return InlineTreeVec;
0515   }
0516 
0517 private:
0518   // Recursively parse an inlining tree encoded in pseudo_probe section. Returns
0519   // whether the the top-level node should be skipped.
0520   template <bool IsTopLevelFunc>
0521   bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
0522                              uint64_t &LastAddr, const Uint64Set &GuildFilter,
0523                              const Uint64Map &FuncStartAddrs,
0524                              const uint32_t CurChildIndex);
0525 };
0526 
0527 } // end namespace llvm
0528 
0529 #endif // LLVM_MC_MCPSEUDOPROBE_H