Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:44:12

0001 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 
0009 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
0010 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
0011 
0012 #include "llvm/ADT/StringRef.h"
0013 #include "llvm/MC/MCExpr.h"
0014 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
0015 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
0016 #include "llvm/MC/MCRegister.h"
0017 #include "llvm/MC/MCTargetOptions.h"
0018 #include "llvm/Support/SMLoc.h"
0019 #include "llvm/TargetParser/SubtargetFeature.h"
0020 #include <cstdint>
0021 #include <memory>
0022 
0023 namespace llvm {
0024 
0025 class MCContext;
0026 class MCInst;
0027 class MCInstrInfo;
0028 class MCStreamer;
0029 class MCSubtargetInfo;
0030 class MCSymbol;
0031 template <typename T> class SmallVectorImpl;
0032 
0033 using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>;
0034 
0035 enum AsmRewriteKind {
0036   AOK_Align,          // Rewrite align as .align.
0037   AOK_EVEN,           // Rewrite even as .even.
0038   AOK_Emit,           // Rewrite _emit as .byte.
0039   AOK_CallInput,      // Rewrite in terms of ${N:P}.
0040   AOK_Input,          // Rewrite in terms of $N.
0041   AOK_Output,         // Rewrite in terms of $N.
0042   AOK_SizeDirective,  // Add a sizing directive (e.g., dword ptr).
0043   AOK_Label,          // Rewrite local labels.
0044   AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t").
0045   AOK_Skip,           // Skip emission (e.g., offset/type operators).
0046   AOK_IntelExpr       // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
0047 };
0048 
0049 const char AsmRewritePrecedence [] = {
0050   2, // AOK_Align
0051   2, // AOK_EVEN
0052   2, // AOK_Emit
0053   3, // AOK_Input
0054   3, // AOK_CallInput
0055   3, // AOK_Output
0056   5, // AOK_SizeDirective
0057   1, // AOK_Label
0058   5, // AOK_EndOfStatement
0059   2, // AOK_Skip
0060   2  // AOK_IntelExpr
0061 };
0062 
0063 // Represent the various parts which make up an intel expression,
0064 // used for emitting compound intel expressions
0065 struct IntelExpr {
0066   bool NeedBracs = false;
0067   int64_t Imm = 0;
0068   StringRef BaseReg;
0069   StringRef IndexReg;
0070   StringRef OffsetName;
0071   unsigned Scale = 1;
0072 
0073   IntelExpr() = default;
0074   // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression]
0075   IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale,
0076             StringRef offsetName, int64_t imm, bool needBracs)
0077       : NeedBracs(needBracs), Imm(imm), BaseReg(baseReg), IndexReg(indexReg),
0078         OffsetName(offsetName), Scale(1) {
0079     if (scale)
0080       Scale = scale;
0081   }
0082   bool hasBaseReg() const { return !BaseReg.empty(); }
0083   bool hasIndexReg() const { return !IndexReg.empty(); }
0084   bool hasRegs() const { return hasBaseReg() || hasIndexReg(); }
0085   bool hasOffset() const { return !OffsetName.empty(); }
0086   // Normally we won't emit immediates unconditionally,
0087   // unless we've got no other components
0088   bool emitImm() const { return !(hasRegs() || hasOffset()); }
0089   bool isValid() const {
0090     return (Scale == 1) ||
0091            (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
0092   }
0093 };
0094 
0095 struct AsmRewrite {
0096   AsmRewriteKind Kind;
0097   SMLoc Loc;
0098   unsigned Len;
0099   bool Done;
0100   int64_t Val;
0101   StringRef Label;
0102   IntelExpr IntelExp;
0103   bool IntelExpRestricted;
0104 
0105 public:
0106   AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0,
0107              bool Restricted = false)
0108       : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) {
0109     IntelExpRestricted = Restricted;
0110   }
0111   AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
0112     : AsmRewrite(kind, loc, len) { Label = label; }
0113   AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
0114     : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; }
0115 };
0116 
0117 struct ParseInstructionInfo {
0118   SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
0119 
0120   ParseInstructionInfo() = default;
0121   ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites)
0122     : AsmRewrites(rewrites) {}
0123 };
0124 
0125 enum OperandMatchResultTy {
0126   MatchOperand_Success,  // operand matched successfully
0127   MatchOperand_NoMatch,  // operand did not match
0128   MatchOperand_ParseFail // operand matched but had errors
0129 };
0130 
0131 /// Ternary parse status returned by various parse* methods.
0132 class ParseStatus {
0133   enum class StatusTy { Success, Failure, NoMatch } Status;
0134 
0135 public:
0136 #if __cplusplus >= 202002L
0137   using enum StatusTy;
0138 #else
0139   static constexpr StatusTy Success = StatusTy::Success;
0140   static constexpr StatusTy Failure = StatusTy::Failure;
0141   static constexpr StatusTy NoMatch = StatusTy::NoMatch;
0142 #endif
0143 
0144   constexpr ParseStatus() : Status(NoMatch) {}
0145 
0146   constexpr ParseStatus(StatusTy Status) : Status(Status) {}
0147 
0148   constexpr ParseStatus(bool Error) : Status(Error ? Failure : Success) {}
0149 
0150   template <typename T> constexpr ParseStatus(T) = delete;
0151 
0152   constexpr bool isSuccess() const { return Status == StatusTy::Success; }
0153   constexpr bool isFailure() const { return Status == StatusTy::Failure; }
0154   constexpr bool isNoMatch() const { return Status == StatusTy::NoMatch; }
0155 
0156   // Allow implicit conversions to / from OperandMatchResultTy.
0157   LLVM_DEPRECATED("Migrate to ParseStatus", "")
0158   constexpr ParseStatus(OperandMatchResultTy R)
0159       : Status(R == MatchOperand_Success     ? Success
0160                : R == MatchOperand_ParseFail ? Failure
0161                                              : NoMatch) {}
0162   LLVM_DEPRECATED("Migrate to ParseStatus", "")
0163   constexpr operator OperandMatchResultTy() const {
0164     return isSuccess()   ? MatchOperand_Success
0165            : isFailure() ? MatchOperand_ParseFail
0166                          : MatchOperand_NoMatch;
0167   }
0168 };
0169 
0170 enum class DiagnosticPredicateTy {
0171   Match,
0172   NearMatch,
0173   NoMatch,
0174 };
0175 
0176 // When an operand is parsed, the assembler will try to iterate through a set of
0177 // possible operand classes that the operand might match and call the
0178 // corresponding PredicateMethod to determine that.
0179 //
0180 // If there are two AsmOperands that would give a specific diagnostic if there
0181 // is no match, there is currently no mechanism to distinguish which operand is
0182 // a closer match. The DiagnosticPredicate distinguishes between 'completely
0183 // no match' and 'near match', so the assembler can decide whether to give a
0184 // specific diagnostic, or use 'InvalidOperand' and continue to find a
0185 // 'better matching' diagnostic.
0186 //
0187 // For example:
0188 //    opcode opnd0, onpd1, opnd2
0189 //
0190 // where:
0191 //    opnd2 could be an 'immediate of range [-8, 7]'
0192 //    opnd2 could be a  'register + shift/extend'.
0193 //
0194 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes
0195 // little sense to give a diagnostic that the operand should be an immediate
0196 // in range [-8, 7].
0197 //
0198 // This is a light-weight alternative to the 'NearMissInfo' approach
0199 // below which collects *all* possible diagnostics. This alternative
0200 // is optional and fully backward compatible with existing
0201 // PredicateMethods that return a 'bool' (match or no match).
0202 struct DiagnosticPredicate {
0203   DiagnosticPredicateTy Type;
0204 
0205   explicit DiagnosticPredicate(bool Match)
0206       : Type(Match ? DiagnosticPredicateTy::Match
0207                    : DiagnosticPredicateTy::NearMatch) {}
0208   DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {}
0209   DiagnosticPredicate(const DiagnosticPredicate &) = default;
0210   DiagnosticPredicate& operator=(const DiagnosticPredicate &) = default;
0211 
0212   operator bool() const { return Type == DiagnosticPredicateTy::Match; }
0213   bool isMatch() const { return Type == DiagnosticPredicateTy::Match; }
0214   bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; }
0215   bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; }
0216 };
0217 
0218 // When matching of an assembly instruction fails, there may be multiple
0219 // encodings that are close to being a match. It's often ambiguous which one
0220 // the programmer intended to use, so we want to report an error which mentions
0221 // each of these "near-miss" encodings. This struct contains information about
0222 // one such encoding, and why it did not match the parsed instruction.
0223 class NearMissInfo {
0224 public:
0225   enum NearMissKind {
0226     NoNearMiss,
0227     NearMissOperand,
0228     NearMissFeature,
0229     NearMissPredicate,
0230     NearMissTooFewOperands,
0231   };
0232 
0233   // The encoding is valid for the parsed assembly string. This is only used
0234   // internally to the table-generated assembly matcher.
0235   static NearMissInfo getSuccess() { return NearMissInfo(); }
0236 
0237   // The instruction encoding is not valid because it requires some target
0238   // features that are not currently enabled. MissingFeatures has a bit set for
0239   // each feature that the encoding needs but which is not enabled.
0240   static NearMissInfo getMissedFeature(const FeatureBitset &MissingFeatures) {
0241     NearMissInfo Result;
0242     Result.Kind = NearMissFeature;
0243     Result.Features = MissingFeatures;
0244     return Result;
0245   }
0246 
0247   // The instruction encoding is not valid because the target-specific
0248   // predicate function returned an error code. FailureCode is the
0249   // target-specific error code returned by the predicate.
0250   static NearMissInfo getMissedPredicate(unsigned FailureCode) {
0251     NearMissInfo Result;
0252     Result.Kind = NearMissPredicate;
0253     Result.PredicateError = FailureCode;
0254     return Result;
0255   }
0256 
0257   // The instruction encoding is not valid because one (and only one) parsed
0258   // operand is not of the correct type. OperandError is the error code
0259   // relating to the operand class expected by the encoding. OperandClass is
0260   // the type of the expected operand. Opcode is the opcode of the encoding.
0261   // OperandIndex is the index into the parsed operand list.
0262   static NearMissInfo getMissedOperand(unsigned OperandError,
0263                                        unsigned OperandClass, unsigned Opcode,
0264                                        unsigned OperandIndex) {
0265     NearMissInfo Result;
0266     Result.Kind = NearMissOperand;
0267     Result.MissedOperand.Error = OperandError;
0268     Result.MissedOperand.Class = OperandClass;
0269     Result.MissedOperand.Opcode = Opcode;
0270     Result.MissedOperand.Index = OperandIndex;
0271     return Result;
0272   }
0273 
0274   // The instruction encoding is not valid because it expects more operands
0275   // than were parsed. OperandClass is the class of the expected operand that
0276   // was not provided. Opcode is the instruction encoding.
0277   static NearMissInfo getTooFewOperands(unsigned OperandClass,
0278                                         unsigned Opcode) {
0279     NearMissInfo Result;
0280     Result.Kind = NearMissTooFewOperands;
0281     Result.TooFewOperands.Class = OperandClass;
0282     Result.TooFewOperands.Opcode = Opcode;
0283     return Result;
0284   }
0285 
0286   operator bool() const { return Kind != NoNearMiss; }
0287 
0288   NearMissKind getKind() const { return Kind; }
0289 
0290   // Feature flags required by the instruction, that the current target does
0291   // not have.
0292   const FeatureBitset& getFeatures() const {
0293     assert(Kind == NearMissFeature);
0294     return Features;
0295   }
0296   // Error code returned by the target predicate when validating this
0297   // instruction encoding.
0298   unsigned getPredicateError() const {
0299     assert(Kind == NearMissPredicate);
0300     return PredicateError;
0301   }
0302   // MatchClassKind of the operand that we expected to see.
0303   unsigned getOperandClass() const {
0304     assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
0305     return MissedOperand.Class;
0306   }
0307   // Opcode of the encoding we were trying to match.
0308   unsigned getOpcode() const {
0309     assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
0310     return MissedOperand.Opcode;
0311   }
0312   // Error code returned when validating the operand.
0313   unsigned getOperandError() const {
0314     assert(Kind == NearMissOperand);
0315     return MissedOperand.Error;
0316   }
0317   // Index of the actual operand we were trying to match in the list of parsed
0318   // operands.
0319   unsigned getOperandIndex() const {
0320     assert(Kind == NearMissOperand);
0321     return MissedOperand.Index;
0322   }
0323 
0324 private:
0325   NearMissKind Kind;
0326 
0327   // These two structs share a common prefix, so we can safely rely on the fact
0328   // that they overlap in the union.
0329   struct MissedOpInfo {
0330     unsigned Class;
0331     unsigned Opcode;
0332     unsigned Error;
0333     unsigned Index;
0334   };
0335 
0336   struct TooFewOperandsInfo {
0337     unsigned Class;
0338     unsigned Opcode;
0339   };
0340 
0341   union {
0342     FeatureBitset Features;
0343     unsigned PredicateError;
0344     MissedOpInfo MissedOperand;
0345     TooFewOperandsInfo TooFewOperands;
0346   };
0347 
0348   NearMissInfo() : Kind(NoNearMiss) {}
0349 };
0350 
0351 /// MCTargetAsmParser - Generic interface to target specific assembly parsers.
0352 class MCTargetAsmParser : public MCAsmParserExtension {
0353 public:
0354   enum MatchResultTy {
0355     Match_InvalidOperand,
0356     Match_InvalidTiedOperand,
0357     Match_MissingFeature,
0358     Match_MnemonicFail,
0359     Match_Success,
0360     Match_NearMisses,
0361     FIRST_TARGET_MATCH_RESULT_TY
0362   };
0363 
0364 protected: // Can only create subclasses.
0365   MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI,
0366                     const MCInstrInfo &MII);
0367 
0368   /// Create a copy of STI and return a non-const reference to it.
0369   MCSubtargetInfo &copySTI();
0370 
0371   /// AvailableFeatures - The current set of available features.
0372   FeatureBitset AvailableFeatures;
0373 
0374   /// ParsingMSInlineAsm - Are we parsing ms-style inline assembly?
0375   bool ParsingMSInlineAsm = false;
0376 
0377   /// SemaCallback - The Sema callback implementation.  Must be set when parsing
0378   /// ms-style inline assembly.
0379   MCAsmParserSemaCallback *SemaCallback = nullptr;
0380 
0381   /// Set of options which affects instrumentation of inline assembly.
0382   MCTargetOptions MCOptions;
0383 
0384   /// Current STI.
0385   const MCSubtargetInfo *STI;
0386 
0387   const MCInstrInfo &MII;
0388 
0389 public:
0390   MCTargetAsmParser(const MCTargetAsmParser &) = delete;
0391   MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete;
0392 
0393   ~MCTargetAsmParser() override;
0394 
0395   const MCSubtargetInfo &getSTI() const;
0396 
0397   const FeatureBitset& getAvailableFeatures() const {
0398     return AvailableFeatures;
0399   }
0400   void setAvailableFeatures(const FeatureBitset& Value) {
0401     AvailableFeatures = Value;
0402   }
0403 
0404   bool isParsingMSInlineAsm () { return ParsingMSInlineAsm; }
0405   void setParsingMSInlineAsm (bool Value) { ParsingMSInlineAsm = Value; }
0406 
0407   MCTargetOptions getTargetOptions() const { return MCOptions; }
0408 
0409   void setSemaCallback(MCAsmParserSemaCallback *Callback) {
0410     SemaCallback = Callback;
0411   }
0412 
0413   // Target-specific parsing of expression.
0414   virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
0415     return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
0416   }
0417 
0418   virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc,
0419                              SMLoc &EndLoc) = 0;
0420 
0421   /// tryParseRegister - parse one register if possible
0422   ///
0423   /// Check whether a register specification can be parsed at the current
0424   /// location, without failing the entire parse if it can't. Must not consume
0425   /// tokens if the parse fails.
0426   virtual ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
0427                                        SMLoc &EndLoc) = 0;
0428 
0429   /// Parse one assembly instruction.
0430   ///
0431   /// The parser is positioned following the instruction name. The target
0432   /// specific instruction parser should parse the entire instruction and
0433   /// construct the appropriate MCInst, or emit an error. On success, the entire
0434   /// line should be parsed up to and including the end-of-statement token. On
0435   /// failure, the parser is not required to read to the end of the line.
0436   //
0437   /// \param Name - The instruction name.
0438   /// \param NameLoc - The source location of the name.
0439   /// \param Operands [out] - The list of parsed operands, this returns
0440   ///        ownership of them to the caller.
0441   /// \return True on failure.
0442   virtual bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
0443                                 SMLoc NameLoc, OperandVector &Operands) = 0;
0444   virtual bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
0445                                 AsmToken Token, OperandVector &Operands) {
0446     return parseInstruction(Info, Name, Token.getLoc(), Operands);
0447   }
0448 
0449   /// ParseDirective - Parse a target specific assembler directive
0450   /// This method is deprecated, use 'parseDirective' instead.
0451   ///
0452   /// The parser is positioned following the directive name.  The target
0453   /// specific directive parser should parse the entire directive doing or
0454   /// recording any target specific work, or return true and do nothing if the
0455   /// directive is not target specific. If the directive is specific for
0456   /// the target, the entire line is parsed up to and including the
0457   /// end-of-statement token and false is returned.
0458   ///
0459   /// \param DirectiveID - the identifier token of the directive.
0460   virtual bool ParseDirective(AsmToken DirectiveID) { return true; }
0461 
0462   /// Parses a target-specific assembler directive.
0463   ///
0464   /// The parser is positioned following the directive name. The target-specific
0465   /// directive parser should parse the entire directive doing or recording any
0466   /// target-specific work, or emit an error. On success, the entire line should
0467   /// be parsed up to and including the end-of-statement token. On failure, the
0468   /// parser is not required to read to the end of the line. If the directive is
0469   /// not target-specific, no tokens should be consumed and NoMatch is returned.
0470   ///
0471   /// \param DirectiveID - The token identifying the directive.
0472   virtual ParseStatus parseDirective(AsmToken DirectiveID);
0473 
0474   /// Recognize a series of operands of a parsed
0475   /// instruction as an actual MCInst and emit it to the specified MCStreamer.
0476   /// This returns false on success and returns true on failure to match.
0477   ///
0478   /// On failure, the target parser is responsible for emitting a diagnostic
0479   /// explaining the match failure.
0480   virtual bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
0481                                        OperandVector &Operands, MCStreamer &Out,
0482                                        uint64_t &ErrorInfo,
0483                                        bool MatchingInlineAsm) = 0;
0484 
0485   /// Allows targets to let registers opt out of clobber lists.
0486   virtual bool omitRegisterFromClobberLists(MCRegister Reg) { return false; }
0487 
0488   /// Allow a target to add special case operand matching for things that
0489   /// tblgen doesn't/can't handle effectively. For example, literal
0490   /// immediates on ARM. TableGen expects a token operand, but the parser
0491   /// will recognize them as immediates.
0492   virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
0493                                               unsigned Kind) {
0494     return Match_InvalidOperand;
0495   }
0496 
0497   /// Validate the instruction match against any complex target predicates
0498   /// before rendering any operands to it.
0499   virtual unsigned
0500   checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) {
0501     return Match_Success;
0502   }
0503 
0504   /// checkTargetMatchPredicate - Validate the instruction match against
0505   /// any complex target predicates not expressible via match classes.
0506   virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
0507     return Match_Success;
0508   }
0509 
0510   virtual void convertToMapAndConstraints(unsigned Kind,
0511                                           const OperandVector &Operands) = 0;
0512 
0513   /// Returns whether two operands are registers and are equal. This is used
0514   /// by the tied-operands checks in the AsmMatcher. This method can be
0515   /// overridden to allow e.g. a sub- or super-register as the tied operand.
0516   virtual bool areEqualRegs(const MCParsedAsmOperand &Op1,
0517                             const MCParsedAsmOperand &Op2) const;
0518 
0519   // Return whether this parser uses assignment statements with equals tokens
0520   virtual bool equalIsAsmAssignment() { return true; };
0521   // Return whether this start of statement identifier is a label
0522   virtual bool isLabel(AsmToken &Token) { return true; };
0523   // Return whether this parser accept star as start of statement
0524   virtual bool starIsStartOfStatement() { return false; };
0525 
0526   virtual MCSymbolRefExpr::VariantKind
0527   getVariantKindForName(StringRef Name) const {
0528     return MCSymbolRefExpr::getVariantKindForName(Name);
0529   }
0530   virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
0531                                             MCSymbolRefExpr::VariantKind,
0532                                             MCContext &Ctx) {
0533     return nullptr;
0534   }
0535 
0536   // For actions that have to be performed before a label is emitted
0537   virtual void doBeforeLabelEmit(MCSymbol *Symbol, SMLoc IDLoc) {}
0538   
0539   virtual void onLabelParsed(MCSymbol *Symbol) {}
0540 
0541   /// Ensure that all previously parsed instructions have been emitted to the
0542   /// output streamer, if the target does not emit them immediately.
0543   virtual void flushPendingInstructions(MCStreamer &Out) {}
0544 
0545   virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E,
0546                                               AsmToken::TokenKind OperatorToken,
0547                                               MCContext &Ctx) {
0548     return nullptr;
0549   }
0550 
0551   // For any initialization at the beginning of parsing.
0552   virtual void onBeginOfFile() {}
0553 
0554   // For any checks or cleanups at the end of parsing.
0555   virtual void onEndOfFile() {}
0556 };
0557 
0558 } // end namespace llvm
0559 
0560 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H