|
|
|||
File indexing completed on 2026-05-10 08:44:11
0001 //===- llvm/MC/MCDisassembler.h - Disassembler interface --------*- C++ -*-===// 0002 // 0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 0004 // See https://llvm.org/LICENSE.txt for license information. 0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 0006 // 0007 //===----------------------------------------------------------------------===// 0008 0009 #ifndef LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H 0010 #define LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H 0011 0012 #include "llvm/ADT/StringRef.h" 0013 #include "llvm/BinaryFormat/XCOFF.h" 0014 #include "llvm/MC/MCDisassembler/MCSymbolizer.h" 0015 #include "llvm/Support/Error.h" 0016 #include <cstdint> 0017 #include <memory> 0018 #include <vector> 0019 0020 namespace llvm { 0021 0022 struct XCOFFSymbolInfoTy { 0023 std::optional<XCOFF::StorageMappingClass> StorageMappingClass; 0024 std::optional<uint32_t> Index; 0025 bool IsLabel = false; 0026 bool operator<(const XCOFFSymbolInfoTy &SymInfo) const; 0027 }; 0028 0029 struct SymbolInfoTy { 0030 uint64_t Addr; 0031 StringRef Name; 0032 // XCOFF uses XCOFFSymInfo. Other targets use Type. 0033 XCOFFSymbolInfoTy XCOFFSymInfo; 0034 uint8_t Type; 0035 // Used by ELF to describe a mapping symbol that is usually not displayed. 0036 bool IsMappingSymbol; 0037 0038 private: 0039 bool IsXCOFF; 0040 bool HasType; 0041 0042 public: 0043 SymbolInfoTy(std::optional<XCOFF::StorageMappingClass> Smc, uint64_t Addr, 0044 StringRef Name, std::optional<uint32_t> Idx, bool Label) 0045 : Addr(Addr), Name(Name), XCOFFSymInfo{Smc, Idx, Label}, Type(0), 0046 IsMappingSymbol(false), IsXCOFF(true), HasType(false) {} 0047 SymbolInfoTy(uint64_t Addr, StringRef Name, uint8_t Type, 0048 bool IsMappingSymbol = false, bool IsXCOFF = false) 0049 : Addr(Addr), Name(Name), Type(Type), IsMappingSymbol(IsMappingSymbol), 0050 IsXCOFF(IsXCOFF), HasType(true) {} 0051 bool isXCOFF() const { return IsXCOFF; } 0052 0053 private: 0054 friend bool operator<(const SymbolInfoTy &P1, const SymbolInfoTy &P2) { 0055 assert((P1.IsXCOFF == P2.IsXCOFF && P1.HasType == P2.HasType) && 0056 "The value of IsXCOFF and HasType in P1 and P2 should be the same " 0057 "respectively."); 0058 0059 if (P1.IsXCOFF && P1.HasType) 0060 return std::tie(P1.Addr, P1.Type, P1.Name) < 0061 std::tie(P2.Addr, P2.Type, P2.Name); 0062 0063 if (P1.IsXCOFF) 0064 return std::tie(P1.Addr, P1.XCOFFSymInfo, P1.Name) < 0065 std::tie(P2.Addr, P2.XCOFFSymInfo, P2.Name); 0066 0067 // With the same address, place mapping symbols first. 0068 bool MS1 = !P1.IsMappingSymbol, MS2 = !P2.IsMappingSymbol; 0069 return std::tie(P1.Addr, MS1, P1.Name, P1.Type) < 0070 std::tie(P2.Addr, MS2, P2.Name, P2.Type); 0071 } 0072 }; 0073 0074 using SectionSymbolsTy = std::vector<SymbolInfoTy>; 0075 0076 template <typename T> class ArrayRef; 0077 class MCContext; 0078 class MCInst; 0079 class MCSubtargetInfo; 0080 class raw_ostream; 0081 0082 /// Superclass for all disassemblers. Consumes a memory region and provides an 0083 /// array of assembly instructions. 0084 class MCDisassembler { 0085 public: 0086 /// Ternary decode status. Most backends will just use Fail and 0087 /// Success, however some have a concept of an instruction with 0088 /// understandable semantics but which is architecturally 0089 /// incorrect. An example of this is ARM UNPREDICTABLE instructions 0090 /// which are disassemblable but cause undefined behaviour. 0091 /// 0092 /// Because it makes sense to disassemble these instructions, there 0093 /// is a "soft fail" failure mode that indicates the MCInst& is 0094 /// valid but architecturally incorrect. 0095 /// 0096 /// The enum numbers are deliberately chosen such that reduction 0097 /// from Success->SoftFail ->Fail can be done with a simple 0098 /// bitwise-AND: 0099 /// 0100 /// LEFT & TOP = | Success Unpredictable Fail 0101 /// --------------+----------------------------------- 0102 /// Success | Success Unpredictable Fail 0103 /// Unpredictable | Unpredictable Unpredictable Fail 0104 /// Fail | Fail Fail Fail 0105 /// 0106 /// An easy way of encoding this is as 0b11, 0b01, 0b00 for 0107 /// Success, SoftFail, Fail respectively. 0108 enum DecodeStatus { 0109 Fail = 0, 0110 SoftFail = 1, 0111 Success = 3 0112 }; 0113 0114 MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) 0115 : Ctx(Ctx), STI(STI) {} 0116 0117 virtual ~MCDisassembler(); 0118 0119 /// Returns the disassembly of a single instruction. 0120 /// 0121 /// \param Instr - An MCInst to populate with the contents of the 0122 /// instruction. 0123 /// \param Size - A value to populate with the size of the instruction, or 0124 /// the number of bytes consumed while attempting to decode 0125 /// an invalid instruction. 0126 /// \param Address - The address, in the memory space of region, of the first 0127 /// byte of the instruction. 0128 /// \param Bytes - A reference to the actual bytes of the instruction. 0129 /// \param CStream - The stream to print comments and annotations on. 0130 /// \return - MCDisassembler::Success if the instruction is valid, 0131 /// MCDisassembler::SoftFail if the instruction was 0132 /// disassemblable but invalid, 0133 /// MCDisassembler::Fail if the instruction was invalid. 0134 virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, 0135 ArrayRef<uint8_t> Bytes, uint64_t Address, 0136 raw_ostream &CStream) const = 0; 0137 0138 /// Used to perform separate target specific disassembly for a particular 0139 /// symbol. May parse any prelude that precedes instructions after the 0140 /// start of a symbol, or the entire symbol. 0141 /// This is used for example by WebAssembly to decode preludes. 0142 /// 0143 /// Base implementation returns false. So all targets by default decline to 0144 /// treat symbols separately. 0145 /// 0146 /// \param Symbol - The symbol. 0147 /// \param Size - The number of bytes consumed. 0148 /// \param Address - The address, in the memory space of region, of the first 0149 /// byte of the symbol. 0150 /// \param Bytes - A reference to the actual bytes at the symbol location. 0151 /// \return - True if this symbol triggered some target specific 0152 /// disassembly for this symbol. Size must be set with the 0153 /// number of bytes consumed. 0154 /// - Error if this symbol triggered some target specific 0155 /// disassembly for this symbol, but an error was found with 0156 /// it. Size must be set with the number of bytes consumed. 0157 /// - False if the target doesn't want to handle the symbol 0158 /// separately. The value of Size is ignored in this case, 0159 /// and Err must not be set. 0160 virtual Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, 0161 ArrayRef<uint8_t> Bytes, 0162 uint64_t Address) const; 0163 // TODO: 0164 // Implement similar hooks that can be used at other points during 0165 // disassembly. Something along the following lines: 0166 // - onBeforeInstructionDecode() 0167 // - onAfterInstructionDecode() 0168 // - onSymbolEnd() 0169 // It should help move much of the target specific code from llvm-objdump to 0170 // respective target disassemblers. 0171 0172 /// Suggest a distance to skip in a buffer of data to find the next 0173 /// place to look for the start of an instruction. For example, if 0174 /// all instructions have a fixed alignment, this might advance to 0175 /// the next multiple of that alignment. 0176 /// 0177 /// If not overridden, the default is 1. 0178 /// 0179 /// \param Address - The address, in the memory space of region, of the 0180 /// starting point (typically the first byte of something 0181 /// that did not decode as a valid instruction at all). 0182 /// \param Bytes - A reference to the actual bytes at Address. May be 0183 /// needed in order to determine the width of an 0184 /// unrecognized instruction (e.g. in Thumb this is a simple 0185 /// consistent criterion that doesn't require knowing the 0186 /// specific instruction). The caller can pass as much data 0187 /// as they have available, and the function is required to 0188 /// make a reasonable default choice if not enough data is 0189 /// available to make a better one. 0190 /// \return - A number of bytes to skip. Must always be greater than 0191 /// zero. May be greater than the size of Bytes. 0192 virtual uint64_t suggestBytesToSkip(ArrayRef<uint8_t> Bytes, 0193 uint64_t Address) const; 0194 0195 private: 0196 MCContext &Ctx; 0197 0198 protected: 0199 // Subtarget information, for instruction decoding predicates if required. 0200 const MCSubtargetInfo &STI; 0201 std::unique_ptr<MCSymbolizer> Symbolizer; 0202 0203 public: 0204 // Helpers around MCSymbolizer 0205 bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, 0206 bool IsBranch, uint64_t Offset, uint64_t OpSize, 0207 uint64_t InstSize) const; 0208 0209 void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const; 0210 0211 /// Set \p Symzer as the current symbolizer. 0212 /// This takes ownership of \p Symzer, and deletes the previously set one. 0213 void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer); 0214 0215 MCContext& getContext() const { return Ctx; } 0216 0217 const MCSubtargetInfo& getSubtargetInfo() const { return STI; } 0218 0219 /// ELF-specific, set the ABI version from the object header. 0220 virtual void setABIVersion(unsigned Version) {} 0221 0222 // Marked mutable because we cache it inside the disassembler, rather than 0223 // having to pass it around as an argument through all the autogenerated code. 0224 mutable raw_ostream *CommentStream = nullptr; 0225 }; 0226 0227 } // end namespace llvm 0228 0229 #endif // LLVM_MC_MCDISASSEMBLER_MCDISASSEMBLER_H
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|