Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:42:44

0001 //===-- Disassembler.h ------------------------------------------*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 
0009 #ifndef LLDB_CORE_DISASSEMBLER_H
0010 #define LLDB_CORE_DISASSEMBLER_H
0011 
0012 #include "lldb/Core/Address.h"
0013 #include "lldb/Core/EmulateInstruction.h"
0014 #include "lldb/Core/FormatEntity.h"
0015 #include "lldb/Core/Opcode.h"
0016 #include "lldb/Core/PluginInterface.h"
0017 #include "lldb/Interpreter/OptionValue.h"
0018 #include "lldb/Symbol/LineEntry.h"
0019 #include "lldb/Target/ExecutionContext.h"
0020 #include "lldb/Utility/ArchSpec.h"
0021 #include "lldb/Utility/ConstString.h"
0022 #include "lldb/Utility/FileSpec.h"
0023 #include "lldb/lldb-defines.h"
0024 #include "lldb/lldb-forward.h"
0025 #include "lldb/lldb-private-enumerations.h"
0026 #include "lldb/lldb-types.h"
0027 
0028 #include "llvm/ADT/StringRef.h"
0029 
0030 #include <functional>
0031 #include <map>
0032 #include <memory>
0033 #include <set>
0034 #include <string>
0035 #include <vector>
0036 
0037 #include <cstddef>
0038 #include <cstdint>
0039 #include <cstdio>
0040 
0041 namespace llvm {
0042 template <typename T> class SmallVectorImpl;
0043 }
0044 
0045 namespace lldb_private {
0046 class AddressRange;
0047 class DataExtractor;
0048 class Debugger;
0049 class Disassembler;
0050 class Module;
0051 class StackFrame;
0052 class Stream;
0053 class SymbolContext;
0054 class SymbolContextList;
0055 class Target;
0056 struct RegisterInfo;
0057 
0058 class Instruction {
0059 public:
0060   Instruction(const Address &address,
0061               AddressClass addr_class = AddressClass::eInvalid);
0062 
0063   virtual ~Instruction();
0064 
0065   const Address &GetAddress() const { return m_address; }
0066 
0067   const char *GetMnemonic(const ExecutionContext *exe_ctx,
0068                           bool markup = false) {
0069     CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
0070     return markup ? m_markup_opcode_name.c_str() : m_opcode_name.c_str();
0071   }
0072 
0073   const char *GetOperands(const ExecutionContext *exe_ctx,
0074                           bool markup = false) {
0075     CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
0076     return markup ? m_markup_mnemonics.c_str() : m_mnemonics.c_str();
0077   }
0078 
0079   const char *GetComment(const ExecutionContext *exe_ctx) {
0080     CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
0081     return m_comment.c_str();
0082   }
0083 
0084   /// \return
0085   ///    The control flow kind of this instruction, or
0086   ///    eInstructionControlFlowKindUnknown if the instruction
0087   ///    can't be classified.
0088   virtual lldb::InstructionControlFlowKind
0089   GetControlFlowKind(const ExecutionContext *exe_ctx) {
0090     return lldb::eInstructionControlFlowKindUnknown;
0091   }
0092 
0093   virtual void
0094   CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0;
0095 
0096   AddressClass GetAddressClass();
0097 
0098   void SetAddress(const Address &addr) {
0099     // Invalidate the address class to lazily discover it if we need to.
0100     m_address_class = AddressClass::eInvalid;
0101     m_address = addr;
0102   }
0103 
0104   /// Dump the text representation of this Instruction to a Stream
0105   ///
0106   /// Print the (optional) address, (optional) bytes, opcode,
0107   /// operands, and instruction comments to a stream.
0108   ///
0109   /// \param[in] s
0110   ///     The Stream to add the text to.
0111   ///
0112   /// \param[in] show_address
0113   ///     Whether the address (using disassembly_addr_format_spec formatting)
0114   ///     should be printed.
0115   ///
0116   /// \param[in] show_bytes
0117   ///     Whether the bytes of the assembly instruction should be printed.
0118   ///
0119   /// \param[in] show_control_flow_kind
0120   ///     Whether the control flow kind of the instruction should be printed.
0121   ///
0122   /// \param[in] max_opcode_byte_size
0123   ///     The size (in bytes) of the largest instruction in the list that
0124   ///     we are printing (for text justification/alignment purposes)
0125   ///     Only needed if show_bytes is true.
0126   ///
0127   /// \param[in] exe_ctx
0128   ///     The current execution context, if available.  May be used in
0129   ///     the assembling of the operands+comments for this instruction.
0130   ///     Pass NULL if not applicable.
0131   ///
0132   /// \param[in] sym_ctx
0133   ///     The SymbolContext for this instruction.
0134   ///     Pass NULL if not available/computed.
0135   ///     Only needed if show_address is true.
0136   ///
0137   /// \param[in] prev_sym_ctx
0138   ///     The SymbolContext for the previous instruction.  Depending on
0139   ///     the disassembly address format specification, a change in
0140   ///     Symbol / Function may mean that a line is printed with the new
0141   ///     symbol/function name.
0142   ///     Pass NULL if unavailable, or if this is the first instruction of
0143   ///     the InstructionList.
0144   ///     Only needed if show_address is true.
0145   ///
0146   /// \param[in] disassembly_addr_format
0147   ///     The format specification for how addresses are printed.
0148   ///     Only needed if show_address is true.
0149   ///
0150   /// \param[in] max_address_text_size
0151   ///     The length of the longest address string at the start of the
0152   ///     disassembly line that will be printed (the
0153   ///     Debugger::FormatDisassemblerAddress() string)
0154   ///     so this method can properly align the instruction opcodes.
0155   ///     May be 0 to indicate no indentation/alignment of the opcodes.
0156   virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address,
0157                     bool show_bytes, bool show_control_flow_kind,
0158                     const ExecutionContext *exe_ctx,
0159                     const SymbolContext *sym_ctx,
0160                     const SymbolContext *prev_sym_ctx,
0161                     const FormatEntity::Entry *disassembly_addr_format,
0162                     size_t max_address_text_size);
0163 
0164   virtual bool DoesBranch() = 0;
0165 
0166   virtual bool HasDelaySlot();
0167 
0168   virtual bool IsLoad() = 0;
0169 
0170   virtual bool IsAuthenticated() = 0;
0171 
0172   bool CanSetBreakpoint ();
0173 
0174   virtual size_t Decode(const Disassembler &disassembler,
0175                         const DataExtractor &data,
0176                         lldb::offset_t data_offset) = 0;
0177 
0178   virtual void SetDescription(llvm::StringRef) {
0179   } // May be overridden in sub-classes that have descriptions.
0180 
0181   lldb::OptionValueSP ReadArray(FILE *in_file, Stream &out_stream,
0182                                 OptionValue::Type data_type);
0183 
0184   lldb::OptionValueSP ReadDictionary(FILE *in_file, Stream &out_stream);
0185 
0186   bool DumpEmulation(const ArchSpec &arch);
0187 
0188   virtual bool TestEmulation(Stream &stream, const char *test_file_name);
0189 
0190   bool Emulate(const ArchSpec &arch, uint32_t evaluate_options, void *baton,
0191                EmulateInstruction::ReadMemoryCallback read_mem_callback,
0192                EmulateInstruction::WriteMemoryCallback write_mem_calback,
0193                EmulateInstruction::ReadRegisterCallback read_reg_callback,
0194                EmulateInstruction::WriteRegisterCallback write_reg_callback);
0195 
0196   const Opcode &GetOpcode() const { return m_opcode; }
0197 
0198   uint32_t GetData(DataExtractor &data);
0199 
0200   struct Operand {
0201     enum class Type {
0202       Invalid = 0,
0203       Register,
0204       Immediate,
0205       Dereference,
0206       Sum,
0207       Product
0208     } m_type = Type::Invalid;
0209     std::vector<Operand> m_children;
0210     lldb::addr_t m_immediate = 0;
0211     ConstString m_register;
0212     bool m_negative = false;
0213     bool m_clobbered = false;
0214 
0215     bool IsValid() { return m_type != Type::Invalid; }
0216 
0217     static Operand BuildRegister(ConstString &r);
0218     static Operand BuildImmediate(lldb::addr_t imm, bool neg);
0219     static Operand BuildImmediate(int64_t imm);
0220     static Operand BuildDereference(const Operand &ref);
0221     static Operand BuildSum(const Operand &lhs, const Operand &rhs);
0222     static Operand BuildProduct(const Operand &lhs, const Operand &rhs);
0223   };
0224 
0225   virtual bool ParseOperands(llvm::SmallVectorImpl<Operand> &operands) {
0226     return false;
0227   }
0228 
0229   virtual bool IsCall() { return false; }
0230 
0231   static const char *GetNameForInstructionControlFlowKind(
0232       lldb::InstructionControlFlowKind instruction_control_flow_kind);
0233 
0234 protected:
0235   Address m_address; // The section offset address of this instruction
0236                      // We include an address class in the Instruction class to
0237                      // allow the instruction specify the
0238                      // AddressClass::eCodeAlternateISA (currently used for
0239                      // thumb), and also to specify data (AddressClass::eData).
0240                      // The usual value will be AddressClass::eCode, but often
0241                      // when disassembling memory, you might run into data.
0242                      // This can help us to disassemble appropriately.
0243 private:
0244   AddressClass m_address_class; // Use GetAddressClass () accessor function!
0245 
0246 protected:
0247   Opcode m_opcode; // The opcode for this instruction
0248   std::string m_opcode_name;
0249   std::string m_markup_opcode_name;
0250   std::string m_mnemonics;
0251   std::string m_markup_mnemonics;
0252   std::string m_comment;
0253   bool m_calculated_strings;
0254 
0255   void
0256   CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext *exe_ctx) {
0257     if (!m_calculated_strings) {
0258       m_calculated_strings = true;
0259       CalculateMnemonicOperandsAndComment(exe_ctx);
0260     }
0261   }
0262 };
0263 
0264 namespace OperandMatchers {
0265 std::function<bool(const Instruction::Operand &)>
0266 MatchBinaryOp(std::function<bool(const Instruction::Operand &)> base,
0267               std::function<bool(const Instruction::Operand &)> left,
0268               std::function<bool(const Instruction::Operand &)> right);
0269 
0270 std::function<bool(const Instruction::Operand &)>
0271 MatchUnaryOp(std::function<bool(const Instruction::Operand &)> base,
0272              std::function<bool(const Instruction::Operand &)> child);
0273 
0274 std::function<bool(const Instruction::Operand &)>
0275 MatchRegOp(const RegisterInfo &info);
0276 
0277 std::function<bool(const Instruction::Operand &)> FetchRegOp(ConstString &reg);
0278 
0279 std::function<bool(const Instruction::Operand &)> MatchImmOp(int64_t imm);
0280 
0281 std::function<bool(const Instruction::Operand &)> FetchImmOp(int64_t &imm);
0282 
0283 std::function<bool(const Instruction::Operand &)>
0284 MatchOpType(Instruction::Operand::Type type);
0285 }
0286 
0287 class InstructionList {
0288 public:
0289   InstructionList();
0290   ~InstructionList();
0291 
0292   size_t GetSize() const;
0293 
0294   uint32_t GetMaxOpcocdeByteSize() const;
0295 
0296   lldb::InstructionSP GetInstructionAtIndex(size_t idx) const;
0297 
0298   /// Get the instruction at the given address.
0299   ///
0300   /// \return
0301   ///    A valid \a InstructionSP if the address could be found, or null
0302   ///    otherwise.
0303   lldb::InstructionSP GetInstructionAtAddress(const Address &addr);
0304 
0305   //------------------------------------------------------------------
0306   /// Get the index of the next branch instruction.
0307   ///
0308   /// Given a list of instructions, find the next branch instruction
0309   /// in the list by returning an index.
0310   ///
0311   /// @param[in] start
0312   ///     The instruction index of the first instruction to check.
0313   ///
0314   /// @param[in] ignore_calls
0315   ///     It true, then fine the first branch instruction that isn't
0316   ///     a function call (a branch that calls and returns to the next
0317   ///     instruction). If false, find the instruction index of any 
0318   ///     branch in the list.
0319   ///     
0320   /// @param[out] found_calls
0321   ///     If non-null, this will be set to true if any calls were found in 
0322   ///     extending the range.
0323   ///    
0324   /// @return
0325   ///     The instruction index of the first branch that is at or past
0326   ///     \a start. Returns UINT32_MAX if no matching branches are 
0327   ///     found.
0328   //------------------------------------------------------------------
0329   uint32_t GetIndexOfNextBranchInstruction(uint32_t start,
0330                                            bool ignore_calls,
0331                                            bool *found_calls) const;
0332 
0333   uint32_t GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr,
0334                                               Target &target);
0335 
0336   uint32_t GetIndexOfInstructionAtAddress(const Address &addr);
0337 
0338   void Clear();
0339 
0340   void Append(lldb::InstructionSP &inst_sp);
0341 
0342   void Dump(Stream *s, bool show_address, bool show_bytes,
0343             bool show_control_flow_kind, const ExecutionContext *exe_ctx);
0344 
0345 private:
0346   typedef std::vector<lldb::InstructionSP> collection;
0347   typedef collection::iterator iterator;
0348   typedef collection::const_iterator const_iterator;
0349 
0350   collection m_instructions;
0351 };
0352 
0353 class PseudoInstruction : public Instruction {
0354 public:
0355   PseudoInstruction();
0356 
0357   ~PseudoInstruction() override;
0358 
0359   bool DoesBranch() override;
0360 
0361   bool HasDelaySlot() override;
0362 
0363   bool IsLoad() override;
0364 
0365   bool IsAuthenticated() override;
0366 
0367   void CalculateMnemonicOperandsAndComment(
0368       const ExecutionContext *exe_ctx) override {
0369     // TODO: fill this in and put opcode name into Instruction::m_opcode_name,
0370     // mnemonic into Instruction::m_mnemonics, and any comment into
0371     // Instruction::m_comment
0372   }
0373 
0374   size_t Decode(const Disassembler &disassembler, const DataExtractor &data,
0375                 lldb::offset_t data_offset) override;
0376 
0377   void SetOpcode(size_t opcode_size, void *opcode_data);
0378 
0379   void SetDescription(llvm::StringRef description) override;
0380 
0381 protected:
0382   std::string m_description;
0383 
0384   PseudoInstruction(const PseudoInstruction &) = delete;
0385   const PseudoInstruction &operator=(const PseudoInstruction &) = delete;
0386 };
0387 
0388 class Disassembler : public std::enable_shared_from_this<Disassembler>,
0389                      public PluginInterface {
0390 public:
0391   enum {
0392     eOptionNone = 0u,
0393     eOptionShowBytes = (1u << 0),
0394     eOptionRawOuput = (1u << 1),
0395     eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains
0396                                          // the current PC (mixed mode only)
0397     eOptionMarkPCAddress =
0398         (1u << 3), // Mark the disassembly line the contains the PC
0399     eOptionShowControlFlowKind = (1u << 4),
0400   };
0401 
0402   enum HexImmediateStyle {
0403     eHexStyleC,
0404     eHexStyleAsm,
0405   };
0406 
0407   // FindPlugin should be lax about the flavor string (it is too annoying to
0408   // have various internal uses of the disassembler fail because the global
0409   // flavor string gets set wrong. Instead, if you get a flavor string you
0410   // don't understand, use the default.  Folks who care to check can use the
0411   // FlavorValidForArchSpec method on the disassembler they got back.
0412   static lldb::DisassemblerSP FindPlugin(const ArchSpec &arch,
0413                                          const char *flavor, const char *cpu,
0414                                          const char *features,
0415                                          const char *plugin_name);
0416 
0417   // This version will use the value in the Target settings if flavor is NULL;
0418   static lldb::DisassemblerSP
0419   FindPluginForTarget(const Target &target, const ArchSpec &arch,
0420                       const char *flavor, const char *cpu, const char *features,
0421                       const char *plugin_name);
0422 
0423   struct Limit {
0424     enum { Bytes, Instructions } kind;
0425     lldb::addr_t value;
0426   };
0427 
0428   static lldb::DisassemblerSP
0429   DisassembleRange(const ArchSpec &arch, const char *plugin_name,
0430                    const char *flavor, const char *cpu, const char *features,
0431                    Target &target, llvm::ArrayRef<AddressRange> disasm_ranges,
0432                    bool force_live_memory = false);
0433 
0434   static lldb::DisassemblerSP
0435   DisassembleBytes(const ArchSpec &arch, const char *plugin_name,
0436                    const char *flavor, const char *cpu, const char *features,
0437                    const Address &start, const void *bytes, size_t length,
0438                    uint32_t max_num_instructions, bool data_from_file);
0439 
0440   static bool Disassemble(Debugger &debugger, const ArchSpec &arch,
0441                           const char *plugin_name, const char *flavor,
0442                           const char *cpu, const char *features,
0443                           const ExecutionContext &exe_ctx, const Address &start,
0444                           Limit limit, bool mixed_source_and_assembly,
0445                           uint32_t num_mixed_context_lines, uint32_t options,
0446                           Stream &strm);
0447 
0448   static bool Disassemble(Debugger &debugger, const ArchSpec &arch,
0449                           StackFrame &frame, Stream &strm);
0450 
0451   // Constructors and Destructors
0452   Disassembler(const ArchSpec &arch, const char *flavor);
0453   ~Disassembler() override;
0454 
0455   void PrintInstructions(Debugger &debugger, const ArchSpec &arch,
0456                          const ExecutionContext &exe_ctx,
0457                          bool mixed_source_and_assembly,
0458                          uint32_t num_mixed_context_lines, uint32_t options,
0459                          Stream &strm);
0460 
0461   size_t ParseInstructions(Target &target, Address address, Limit limit,
0462                            Stream *error_strm_ptr,
0463                            bool force_live_memory = false) {
0464     m_instruction_list.Clear();
0465     return AppendInstructions(target, address, limit, error_strm_ptr,
0466                               force_live_memory);
0467   }
0468 
0469   virtual size_t DecodeInstructions(const Address &base_addr,
0470                                     const DataExtractor &data,
0471                                     lldb::offset_t data_offset,
0472                                     size_t num_instructions, bool append,
0473                                     bool data_from_file) = 0;
0474 
0475   InstructionList &GetInstructionList();
0476 
0477   const InstructionList &GetInstructionList() const;
0478 
0479   const ArchSpec &GetArchitecture() const { return m_arch; }
0480 
0481   const char *GetFlavor() const { return m_flavor.c_str(); }
0482 
0483   virtual bool FlavorValidForArchSpec(const lldb_private::ArchSpec &arch,
0484                                       const char *flavor) = 0;
0485 
0486 protected:
0487   size_t AppendInstructions(Target &target, Address address, Limit limit,
0488                             Stream *error_strm_ptr, bool force_live_memory);
0489 
0490   // SourceLine and SourceLinesToDisplay structures are only used in the mixed
0491   // source and assembly display methods internal to this class.
0492 
0493   struct SourceLine {
0494     FileSpec file;
0495     uint32_t line = LLDB_INVALID_LINE_NUMBER;
0496     uint32_t column = 0;
0497 
0498     SourceLine() = default;
0499 
0500     bool operator==(const SourceLine &rhs) const {
0501       return file == rhs.file && line == rhs.line && rhs.column == column;
0502     }
0503 
0504     bool operator!=(const SourceLine &rhs) const {
0505       return file != rhs.file || line != rhs.line || column != rhs.column;
0506     }
0507 
0508     bool IsValid() const { return line != LLDB_INVALID_LINE_NUMBER; }
0509   };
0510 
0511   struct SourceLinesToDisplay {
0512     std::vector<SourceLine> lines;
0513 
0514     // index of the "current" source line, if we want to highlight that when
0515     // displaying the source lines.  (as opposed to the surrounding source
0516     // lines provided to give context)
0517     size_t current_source_line = -1;
0518 
0519     // Whether to print a blank line at the end of the source lines.
0520     bool print_source_context_end_eol = true;
0521 
0522     SourceLinesToDisplay() = default;
0523   };
0524 
0525   // Get the function's declaration line number, hopefully a line number
0526   // earlier than the opening curly brace at the start of the function body.
0527   static SourceLine GetFunctionDeclLineEntry(const SymbolContext &sc);
0528 
0529   // Add the provided SourceLine to the map of filenames-to-source-lines-seen.
0530   static void AddLineToSourceLineTables(
0531       SourceLine &line,
0532       std::map<FileSpec, std::set<uint32_t>> &source_lines_seen);
0533 
0534   // Given a source line, determine if we should print it when we're doing
0535   // mixed source & assembly output. We're currently using the
0536   // target.process.thread.step-avoid-regexp setting (which is used for
0537   // stepping over inlined STL functions by default) to determine what source
0538   // lines to avoid showing.
0539   //
0540   // Returns true if this source line should be elided (if the source line
0541   // should not be displayed).
0542   static bool
0543   ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx,
0544                                      const SymbolContext &sc, SourceLine &line);
0545 
0546   static bool
0547   ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx,
0548                                      const SymbolContext &sc, LineEntry &line) {
0549     SourceLine sl;
0550     sl.file = line.GetFile();
0551     sl.line = line.line;
0552     sl.column = line.column;
0553     return ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, sl);
0554   };
0555 
0556   // Classes that inherit from Disassembler can see and modify these
0557   ArchSpec m_arch;
0558   InstructionList m_instruction_list;
0559   std::string m_flavor;
0560 
0561 private:
0562   // For Disassembler only
0563   Disassembler(const Disassembler &) = delete;
0564   const Disassembler &operator=(const Disassembler &) = delete;
0565 };
0566 
0567 } // namespace lldb_private
0568 
0569 #endif // LLDB_CORE_DISASSEMBLER_H