File indexing completed on 2026-05-10 08:36:30
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #ifndef LLVM_CLANG_AST_COMMENTLEXER_H
0014 #define LLVM_CLANG_AST_COMMENTLEXER_H
0015
0016 #include "clang/Basic/Diagnostic.h"
0017 #include "clang/Basic/SourceManager.h"
0018 #include "llvm/ADT/SmallString.h"
0019 #include "llvm/ADT/StringRef.h"
0020 #include "llvm/Support/Allocator.h"
0021 #include "llvm/Support/raw_ostream.h"
0022
0023 namespace clang {
0024 namespace comments {
0025
0026 class Lexer;
0027 class TextTokenRetokenizer;
0028 struct CommandInfo;
0029 class CommandTraits;
0030
0031 namespace tok {
0032 enum TokenKind {
0033 eof,
0034 newline,
0035 text,
0036 unknown_command,
0037 backslash_command,
0038 at_command,
0039 verbatim_block_begin,
0040 verbatim_block_line,
0041 verbatim_block_end,
0042 verbatim_line_name,
0043 verbatim_line_text,
0044 html_start_tag,
0045 html_ident,
0046 html_equals,
0047 html_quoted_string,
0048 html_greater,
0049 html_slash_greater,
0050 html_end_tag
0051 };
0052 }
0053
0054
0055 class Token {
0056 friend class Lexer;
0057 friend class TextTokenRetokenizer;
0058
0059
0060 SourceLocation Loc;
0061
0062
0063 tok::TokenKind Kind;
0064
0065
0066
0067
0068
0069
0070 unsigned IntVal;
0071
0072
0073
0074 unsigned Length;
0075
0076
0077 const char *TextPtr;
0078
0079 public:
0080 SourceLocation getLocation() const LLVM_READONLY { return Loc; }
0081 void setLocation(SourceLocation SL) { Loc = SL; }
0082
0083 SourceLocation getEndLocation() const LLVM_READONLY {
0084 if (Length == 0 || Length == 1)
0085 return Loc;
0086 return Loc.getLocWithOffset(Length - 1);
0087 }
0088
0089 tok::TokenKind getKind() const LLVM_READONLY { return Kind; }
0090 void setKind(tok::TokenKind K) { Kind = K; }
0091
0092 bool is(tok::TokenKind K) const LLVM_READONLY { return Kind == K; }
0093 bool isNot(tok::TokenKind K) const LLVM_READONLY { return Kind != K; }
0094
0095 unsigned getLength() const LLVM_READONLY { return Length; }
0096 void setLength(unsigned L) { Length = L; }
0097
0098 StringRef getText() const LLVM_READONLY {
0099 assert(is(tok::text));
0100 return StringRef(TextPtr, IntVal);
0101 }
0102
0103 void setText(StringRef Text) {
0104 assert(is(tok::text));
0105 TextPtr = Text.data();
0106 IntVal = Text.size();
0107 }
0108
0109 StringRef getUnknownCommandName() const LLVM_READONLY {
0110 assert(is(tok::unknown_command));
0111 return StringRef(TextPtr, IntVal);
0112 }
0113
0114 void setUnknownCommandName(StringRef Name) {
0115 assert(is(tok::unknown_command));
0116 TextPtr = Name.data();
0117 IntVal = Name.size();
0118 }
0119
0120 unsigned getCommandID() const LLVM_READONLY {
0121 assert(is(tok::backslash_command) || is(tok::at_command));
0122 return IntVal;
0123 }
0124
0125 void setCommandID(unsigned ID) {
0126 assert(is(tok::backslash_command) || is(tok::at_command));
0127 IntVal = ID;
0128 }
0129
0130 unsigned getVerbatimBlockID() const LLVM_READONLY {
0131 assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end));
0132 return IntVal;
0133 }
0134
0135 void setVerbatimBlockID(unsigned ID) {
0136 assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end));
0137 IntVal = ID;
0138 }
0139
0140 StringRef getVerbatimBlockText() const LLVM_READONLY {
0141 assert(is(tok::verbatim_block_line));
0142 return StringRef(TextPtr, IntVal);
0143 }
0144
0145 void setVerbatimBlockText(StringRef Text) {
0146 assert(is(tok::verbatim_block_line));
0147 TextPtr = Text.data();
0148 IntVal = Text.size();
0149 }
0150
0151 unsigned getVerbatimLineID() const LLVM_READONLY {
0152 assert(is(tok::verbatim_line_name));
0153 return IntVal;
0154 }
0155
0156 void setVerbatimLineID(unsigned ID) {
0157 assert(is(tok::verbatim_line_name));
0158 IntVal = ID;
0159 }
0160
0161 StringRef getVerbatimLineText() const LLVM_READONLY {
0162 assert(is(tok::verbatim_line_text));
0163 return StringRef(TextPtr, IntVal);
0164 }
0165
0166 void setVerbatimLineText(StringRef Text) {
0167 assert(is(tok::verbatim_line_text));
0168 TextPtr = Text.data();
0169 IntVal = Text.size();
0170 }
0171
0172 StringRef getHTMLTagStartName() const LLVM_READONLY {
0173 assert(is(tok::html_start_tag));
0174 return StringRef(TextPtr, IntVal);
0175 }
0176
0177 void setHTMLTagStartName(StringRef Name) {
0178 assert(is(tok::html_start_tag));
0179 TextPtr = Name.data();
0180 IntVal = Name.size();
0181 }
0182
0183 StringRef getHTMLIdent() const LLVM_READONLY {
0184 assert(is(tok::html_ident));
0185 return StringRef(TextPtr, IntVal);
0186 }
0187
0188 void setHTMLIdent(StringRef Name) {
0189 assert(is(tok::html_ident));
0190 TextPtr = Name.data();
0191 IntVal = Name.size();
0192 }
0193
0194 StringRef getHTMLQuotedString() const LLVM_READONLY {
0195 assert(is(tok::html_quoted_string));
0196 return StringRef(TextPtr, IntVal);
0197 }
0198
0199 void setHTMLQuotedString(StringRef Str) {
0200 assert(is(tok::html_quoted_string));
0201 TextPtr = Str.data();
0202 IntVal = Str.size();
0203 }
0204
0205 StringRef getHTMLTagEndName() const LLVM_READONLY {
0206 assert(is(tok::html_end_tag));
0207 return StringRef(TextPtr, IntVal);
0208 }
0209
0210 void setHTMLTagEndName(StringRef Name) {
0211 assert(is(tok::html_end_tag));
0212 TextPtr = Name.data();
0213 IntVal = Name.size();
0214 }
0215
0216 void dump(const Lexer &L, const SourceManager &SM) const;
0217 };
0218
0219
0220 class Lexer {
0221 private:
0222 Lexer(const Lexer &) = delete;
0223 void operator=(const Lexer &) = delete;
0224
0225
0226
0227 llvm::BumpPtrAllocator &Allocator;
0228
0229 DiagnosticsEngine &Diags;
0230
0231 const CommandTraits &Traits;
0232
0233 const char *const BufferStart;
0234 const char *const BufferEnd;
0235
0236 const char *BufferPtr;
0237
0238
0239
0240 const char *CommentEnd;
0241
0242 SourceLocation FileLoc;
0243
0244
0245
0246
0247 bool ParseCommands;
0248
0249 enum LexerCommentState : uint8_t {
0250 LCS_BeforeComment,
0251 LCS_InsideBCPLComment,
0252 LCS_InsideCComment,
0253 LCS_BetweenComments
0254 };
0255
0256
0257 LexerCommentState CommentState;
0258
0259 enum LexerState : uint8_t {
0260
0261 LS_Normal,
0262
0263
0264
0265 LS_VerbatimBlockFirstLine,
0266
0267
0268
0269 LS_VerbatimBlockBody,
0270
0271
0272
0273 LS_VerbatimLineText,
0274
0275
0276 LS_HTMLStartTag,
0277
0278
0279 LS_HTMLEndTag
0280 };
0281
0282
0283 LexerState State;
0284
0285
0286
0287 SmallString<16> VerbatimBlockEndCommandName;
0288
0289
0290
0291 StringRef resolveHTMLNamedCharacterReference(StringRef Name) const;
0292
0293
0294 StringRef resolveHTMLDecimalCharacterReference(StringRef Name) const;
0295
0296
0297 StringRef resolveHTMLHexCharacterReference(StringRef Name) const;
0298
0299 void formTokenWithChars(Token &Result, const char *TokEnd,
0300 tok::TokenKind Kind);
0301
0302 void formTextToken(Token &Result, const char *TokEnd) {
0303 StringRef Text(BufferPtr, TokEnd - BufferPtr);
0304 formTokenWithChars(Result, TokEnd, tok::text);
0305 Result.setText(Text);
0306 }
0307
0308 SourceLocation getSourceLocation(const char *Loc) const {
0309 assert(Loc >= BufferStart && Loc <= BufferEnd &&
0310 "Location out of range for this buffer!");
0311
0312 const unsigned CharNo = Loc - BufferStart;
0313 return FileLoc.getLocWithOffset(CharNo);
0314 }
0315
0316 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) {
0317 return Diags.Report(Loc, DiagID);
0318 }
0319
0320
0321 void skipLineStartingDecorations();
0322
0323
0324 const char *skipTextToken();
0325
0326
0327 void lexCommentText(Token &T);
0328
0329 void setupAndLexVerbatimBlock(Token &T, const char *TextBegin, char Marker,
0330 const CommandInfo *Info);
0331
0332 void lexVerbatimBlockFirstLine(Token &T);
0333
0334 void lexVerbatimBlockBody(Token &T);
0335
0336 void setupAndLexVerbatimLine(Token &T, const char *TextBegin,
0337 const CommandInfo *Info);
0338
0339 void lexVerbatimLineText(Token &T);
0340
0341 void lexHTMLCharacterReference(Token &T);
0342
0343 void setupAndLexHTMLStartTag(Token &T);
0344
0345 void lexHTMLStartTag(Token &T);
0346
0347 void setupAndLexHTMLEndTag(Token &T);
0348
0349 void lexHTMLEndTag(Token &T);
0350
0351 public:
0352 Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags,
0353 const CommandTraits &Traits, SourceLocation FileLoc,
0354 const char *BufferStart, const char *BufferEnd,
0355 bool ParseCommands = true);
0356
0357 void lex(Token &T);
0358
0359 StringRef getSpelling(const Token &Tok, const SourceManager &SourceMgr) const;
0360 };
0361
0362 }
0363 }
0364
0365 #endif
0366