Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:44:11

0001 //===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 
0009 #ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
0010 #define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
0011 
0012 #include "llvm/ADT/StringRef.h"
0013 #include "llvm/BinaryFormat/XCOFF.h"
0014 #include "llvm/MC/MCDisassembler/MCSymbolizer.h"
0015 #include "llvm/Support/Error.h"
0016 #include <cstdint>
0017 #include <memory>
0018 #include <vector>
0019 
0020 namespace llvm {
0021 
0022 struct XCOFFSymbolInfoTy {
0023   std::optional<XCOFF::StorageMappingClass> StorageMappingClass;
0024   std::optional<uint32_t> Index;
0025   bool IsLabel = false;
0026   bool operator<(const XCOFFSymbolInfoTy &SymInfo) const;
0027 };
0028 
0029 struct SymbolInfoTy {
0030   uint64_t Addr;
0031   StringRef Name;
0032   // XCOFF uses XCOFFSymInfo. Other targets use Type.
0033   XCOFFSymbolInfoTy XCOFFSymInfo;
0034   uint8_t Type;
0035   // Used by ELF to describe a mapping symbol that is usually not displayed.
0036   bool IsMappingSymbol;
0037 
0038 private:
0039   bool IsXCOFF;
0040   bool HasType;
0041 
0042 public:
0043   SymbolInfoTy(std::optional<XCOFF::StorageMappingClass> Smc, uint64_t Addr,
0044                StringRef Name, std::optional<uint32_t> Idx, bool Label)
0045       : Addr(Addr), Name(Name), XCOFFSymInfo{Smc, Idx, Label}, Type(0),
0046         IsMappingSymbol(false), IsXCOFF(true), HasType(false) {}
0047   SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type,
0048                bool IsMappingSymbol = false, bool IsXCOFF = false)
0049       : Addr(Addr), Name(Name), Type(Type), IsMappingSymbol(IsMappingSymbol),
0050         IsXCOFF(IsXCOFF), HasType(true) {}
0051   bool isXCOFF() const { return IsXCOFF; }
0052 
0053 private:
0054   friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) {
0055     assert((P1.IsXCOFF == P2.IsXCOFF && P1.HasType == P2.HasType) &&
0056            "The value of IsXCOFF and HasType in P1 and P2 should be the same "
0057            "respectively.");
0058 
0059     if (P1.IsXCOFF && P1.HasType)
0060       return std::tie(P1.Addr, P1.Type, P1.Name) <
0061              std::tie(P2.Addr, P2.Type, P2.Name);
0062 
0063     if (P1.IsXCOFF)
0064       return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) <
0065              std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name);
0066 
0067     // With the same address, place mapping symbols first.
0068     bool MS1 = !P1.IsMappingSymbol, MS2 = !P2.IsMappingSymbol;
0069     return std::tie(P1.Addr, MS1, P1.Name, P1.Type) <
0070            std::tie(P2.Addr, MS2, P2.Name, P2.Type);
0071   }
0072 };
0073 
0074 using SectionSymbolsTy = std::vector<SymbolInfoTy>;
0075 
0076 template <typename T> class ArrayRef;
0077 class MCContext;
0078 class MCInst;
0079 class MCSubtargetInfo;
0080 class raw_ostream;
0081 
0082 /// Superclass for all disassemblers. Consumes a memory region and provides an
0083 /// array of assembly instructions.
0084 class MCDisassembler {
0085 public:
0086   /// Ternary decode status. Most backends will just use Fail and
0087   /// Success, however some have a concept of an instruction with
0088   /// understandable semantics but which is architecturally
0089   /// incorrect. An example of this is ARM UNPREDICTABLE instructions
0090   /// which are disassemblable but cause undefined behaviour.
0091   ///
0092   /// Because it makes sense to disassemble these instructions, there
0093   /// is a "soft fail" failure mode that indicates the MCInst& is
0094   /// valid but architecturally incorrect.
0095   ///
0096   /// The enum numbers are deliberately chosen such that reduction
0097   /// from Success->SoftFail ->Fail can be done with a simple
0098   /// bitwise-AND:
0099   ///
0100   ///   LEFT & TOP =  | Success       Unpredictable   Fail
0101   ///   --------------+-----------------------------------
0102   ///   Success       | Success       Unpredictable   Fail
0103   ///   Unpredictable | Unpredictable Unpredictable   Fail
0104   ///   Fail          | Fail          Fail            Fail
0105   ///
0106   /// An easy way of encoding this is as 0b11, 0b01, 0b00 for
0107   /// Success, SoftFail, Fail respectively.
0108   enum DecodeStatus {
0109     Fail = 0,
0110     SoftFail = 1,
0111     Success = 3
0112   };
0113 
0114   MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
0115     : Ctx(Ctx), STI(STI) {}
0116 
0117   virtual ~MCDisassembler();
0118 
0119   /// Returns the disassembly of a single instruction.
0120   ///
0121   /// \param Instr    - An MCInst to populate with the contents of the
0122   ///                   instruction.
0123   /// \param Size     - A value to populate with the size of the instruction, or
0124   ///                   the number of bytes consumed while attempting to decode
0125   ///                   an invalid instruction.
0126   /// \param Address  - The address, in the memory space of region, of the first
0127   ///                   byte of the instruction.
0128   /// \param Bytes    - A reference to the actual bytes of the instruction.
0129   /// \param CStream  - The stream to print comments and annotations on.
0130   /// \return         - MCDisassembler::Success if the instruction is valid,
0131   ///                   MCDisassembler::SoftFail if the instruction was
0132   ///                                            disassemblable but invalid,
0133   ///                   MCDisassembler::Fail if the instruction was invalid.
0134   virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
0135                                       ArrayRef<uint8_t> Bytes, uint64_t Address,
0136                                       raw_ostream &CStream) const = 0;
0137 
0138   /// Used to perform separate target specific disassembly for a particular
0139   /// symbol. May parse any prelude that precedes instructions after the
0140   /// start of a symbol, or the entire symbol.
0141   /// This is used for example by WebAssembly to decode preludes.
0142   ///
0143   /// Base implementation returns false. So all targets by default decline to
0144   /// treat symbols separately.
0145   ///
0146   /// \param Symbol   - The symbol.
0147   /// \param Size     - The number of bytes consumed.
0148   /// \param Address  - The address, in the memory space of region, of the first
0149   ///                   byte of the symbol.
0150   /// \param Bytes    - A reference to the actual bytes at the symbol location.
0151   /// \return         - True if this symbol triggered some target specific
0152   ///                   disassembly for this symbol. Size must be set with the
0153   ///                   number of bytes consumed.
0154   ///                 - Error if this symbol triggered some target specific
0155   ///                   disassembly for this symbol, but an error was found with
0156   ///                   it. Size must be set with the number of bytes consumed.
0157   ///                 - False if the target doesn't want to handle the symbol
0158   ///                   separately. The value of Size is ignored in this case,
0159   ///                   and Err must not be set.
0160   virtual Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
0161                                        ArrayRef<uint8_t> Bytes,
0162                                        uint64_t Address) const;
0163   // TODO:
0164   // Implement similar hooks that can be used at other points during
0165   // disassembly. Something along the following lines:
0166   // - onBeforeInstructionDecode()
0167   // - onAfterInstructionDecode()
0168   // - onSymbolEnd()
0169   // It should help move much of the target specific code from llvm-objdump to
0170   // respective target disassemblers.
0171 
0172   /// Suggest a distance to skip in a buffer of data to find the next
0173   /// place to look for the start of an instruction. For example, if
0174   /// all instructions have a fixed alignment, this might advance to
0175   /// the next multiple of that alignment.
0176   ///
0177   /// If not overridden, the default is 1.
0178   ///
0179   /// \param Address  - The address, in the memory space of region, of the
0180   ///                   starting point (typically the first byte of something
0181   ///                   that did not decode as a valid instruction at all).
0182   /// \param Bytes    - A reference to the actual bytes at Address. May be
0183   ///                   needed in order to determine the width of an
0184   ///                   unrecognized instruction (e.g. in Thumb this is a simple
0185   ///                   consistent criterion that doesn't require knowing the
0186   ///                   specific instruction). The caller can pass as much data
0187   ///                   as they have available, and the function is required to
0188   ///                   make a reasonable default choice if not enough data is
0189   ///                   available to make a better one.
0190   /// \return         - A number of bytes to skip. Must always be greater than
0191   ///                   zero. May be greater than the size of Bytes.
0192   virtual uint64_t suggestBytesToSkip(ArrayRef<uint8_t> Bytes,
0193                                       uint64_t Address) const;
0194 
0195 private:
0196   MCContext &Ctx;
0197 
0198 protected:
0199   // Subtarget information, for instruction decoding predicates if required.
0200   const MCSubtargetInfo &STI;
0201   std::unique_ptr<MCSymbolizer> Symbolizer;
0202 
0203 public:
0204   // Helpers around MCSymbolizer
0205   bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address,
0206                                 bool IsBranch, uint64_t Offset, uint64_t OpSize,
0207                                 uint64_t InstSize) const;
0208 
0209   void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const;
0210 
0211   /// Set \p Symzer as the current symbolizer.
0212   /// This takes ownership of \p Symzer, and deletes the previously set one.
0213   void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer);
0214 
0215   MCContext& getContext() const { return Ctx; }
0216 
0217   const MCSubtargetInfo& getSubtargetInfo() const { return STI; }
0218 
0219   /// ELF-specific, set the ABI version from the object header.
0220   virtual void setABIVersion(unsigned Version) {}
0221 
0222   // Marked mutable because we cache it inside the disassembler, rather than
0223   // having to pass it around as an argument through all the autogenerated code.
0224   mutable raw_ostream *CommentStream = nullptr;
0225 };
0226 
0227 } // end namespace llvm
0228 
0229 #endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H