File indexing completed on 2026-05-10 08:36:57
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014 #ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H
0015 #define LLVM_CLANG_LEX_LITERALSUPPORT_H
0016
0017 #include "clang/Basic/CharInfo.h"
0018 #include "clang/Basic/LLVM.h"
0019 #include "clang/Basic/TokenKinds.h"
0020 #include "llvm/ADT/APFloat.h"
0021 #include "llvm/ADT/ArrayRef.h"
0022 #include "llvm/ADT/SmallString.h"
0023 #include "llvm/ADT/StringRef.h"
0024 #include "llvm/Support/DataTypes.h"
0025
0026 namespace clang {
0027
0028 class DiagnosticsEngine;
0029 class Preprocessor;
0030 class Token;
0031 class SourceLocation;
0032 class TargetInfo;
0033 class SourceManager;
0034 class LangOptions;
0035
0036
0037 void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input);
0038
0039
0040
0041
0042 bool isFunctionLocalStringLiteralMacro(tok::TokenKind K, const LangOptions &LO);
0043
0044
0045
0046 bool tokenIsLikeStringLiteral(const Token &Tok, const LangOptions &LO);
0047
0048
0049
0050
0051 class NumericLiteralParser {
0052 const SourceManager &SM;
0053 const LangOptions &LangOpts;
0054 DiagnosticsEngine &Diags;
0055
0056 const char *const ThisTokBegin;
0057 const char *const ThisTokEnd;
0058 const char *DigitsBegin, *SuffixBegin;
0059 const char *s;
0060
0061 unsigned radix;
0062
0063 bool saw_exponent, saw_period, saw_ud_suffix, saw_fixed_point_suffix;
0064
0065 SmallString<32> UDSuffixBuf;
0066
0067 public:
0068 NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc,
0069 const SourceManager &SM, const LangOptions &LangOpts,
0070 const TargetInfo &Target, DiagnosticsEngine &Diags);
0071 bool hadError : 1;
0072 bool isUnsigned : 1;
0073 bool isLong : 1;
0074 bool isLongLong : 1;
0075 bool isSizeT : 1;
0076 bool isHalf : 1;
0077 bool isFloat : 1;
0078 bool isImaginary : 1;
0079 bool isFloat16 : 1;
0080 bool isFloat128 : 1;
0081 bool isFract : 1;
0082 bool isAccum : 1;
0083 bool isBitInt : 1;
0084
0085 uint8_t MicrosoftInteger;
0086
0087
0088 bool isFixedPointLiteral() const {
0089 return (saw_period || saw_exponent) && saw_fixed_point_suffix;
0090 }
0091
0092 bool isIntegerLiteral() const {
0093 return !saw_period && !saw_exponent && !isFixedPointLiteral();
0094 }
0095 bool isFloatingLiteral() const {
0096 return (saw_period || saw_exponent) && !isFixedPointLiteral();
0097 }
0098
0099 bool hasUDSuffix() const {
0100 return saw_ud_suffix;
0101 }
0102 StringRef getUDSuffix() const {
0103 assert(saw_ud_suffix);
0104 return UDSuffixBuf;
0105 }
0106 unsigned getUDSuffixOffset() const {
0107 assert(saw_ud_suffix);
0108 return SuffixBegin - ThisTokBegin;
0109 }
0110
0111 static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
0112
0113 unsigned getRadix() const { return radix; }
0114
0115
0116
0117
0118
0119 bool GetIntegerValue(llvm::APInt &Val);
0120
0121
0122
0123 llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result,
0124 llvm::RoundingMode RM);
0125
0126
0127
0128
0129
0130 bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale);
0131
0132
0133
0134 StringRef getLiteralDigits() const {
0135 assert(!hadError && "cannot reliably get the literal digits with an error");
0136 return StringRef(DigitsBegin, SuffixBegin - DigitsBegin);
0137 }
0138
0139 private:
0140
0141 void ParseNumberStartingWithZero(SourceLocation TokLoc);
0142 void ParseDecimalOrOctalCommon(SourceLocation TokLoc);
0143
0144 static bool isDigitSeparator(char C) { return C == '\''; }
0145
0146
0147
0148 bool containsDigits(const char *Start, const char *End) {
0149 return Start != End && (Start + 1 != End || !isDigitSeparator(Start[0]));
0150 }
0151
0152 enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits };
0153
0154
0155 void checkSeparator(SourceLocation TokLoc, const char *Pos,
0156 CheckSeparatorKind IsAfterDigits);
0157
0158
0159
0160 const char *SkipHexDigits(const char *ptr) {
0161 while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr)))
0162 ptr++;
0163 return ptr;
0164 }
0165
0166
0167
0168 const char *SkipOctalDigits(const char *ptr) {
0169 while (ptr != ThisTokEnd &&
0170 ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr)))
0171 ptr++;
0172 return ptr;
0173 }
0174
0175
0176
0177 const char *SkipDigits(const char *ptr) {
0178 while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr)))
0179 ptr++;
0180 return ptr;
0181 }
0182
0183
0184
0185 const char *SkipBinaryDigits(const char *ptr) {
0186 while (ptr != ThisTokEnd &&
0187 (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr)))
0188 ptr++;
0189 return ptr;
0190 }
0191
0192 };
0193
0194
0195
0196 class CharLiteralParser {
0197 uint64_t Value;
0198 tok::TokenKind Kind;
0199 bool IsMultiChar;
0200 bool HadError;
0201 SmallString<32> UDSuffixBuf;
0202 unsigned UDSuffixOffset;
0203 public:
0204 CharLiteralParser(const char *begin, const char *end,
0205 SourceLocation Loc, Preprocessor &PP,
0206 tok::TokenKind kind);
0207
0208 bool hadError() const { return HadError; }
0209 bool isOrdinary() const { return Kind == tok::char_constant; }
0210 bool isWide() const { return Kind == tok::wide_char_constant; }
0211 bool isUTF8() const { return Kind == tok::utf8_char_constant; }
0212 bool isUTF16() const { return Kind == tok::utf16_char_constant; }
0213 bool isUTF32() const { return Kind == tok::utf32_char_constant; }
0214 bool isMultiChar() const { return IsMultiChar; }
0215 uint64_t getValue() const { return Value; }
0216 StringRef getUDSuffix() const { return UDSuffixBuf; }
0217 unsigned getUDSuffixOffset() const {
0218 assert(!UDSuffixBuf.empty() && "no ud-suffix");
0219 return UDSuffixOffset;
0220 }
0221 };
0222
0223 enum class StringLiteralEvalMethod {
0224 Evaluated,
0225 Unevaluated,
0226 };
0227
0228
0229
0230
0231 class StringLiteralParser {
0232 const SourceManager &SM;
0233 const LangOptions &Features;
0234 const TargetInfo &Target;
0235 DiagnosticsEngine *Diags;
0236
0237 unsigned MaxTokenLength;
0238 unsigned SizeBound;
0239 unsigned CharByteWidth;
0240 tok::TokenKind Kind;
0241 SmallString<512> ResultBuf;
0242 char *ResultPtr;
0243 SmallString<32> UDSuffixBuf;
0244 unsigned UDSuffixToken;
0245 unsigned UDSuffixOffset;
0246 StringLiteralEvalMethod EvalMethod;
0247
0248 public:
0249 StringLiteralParser(ArrayRef<Token> StringToks, Preprocessor &PP,
0250 StringLiteralEvalMethod StringMethod =
0251 StringLiteralEvalMethod::Evaluated);
0252 StringLiteralParser(ArrayRef<Token> StringToks, const SourceManager &sm,
0253 const LangOptions &features, const TargetInfo &target,
0254 DiagnosticsEngine *diags = nullptr)
0255 : SM(sm), Features(features), Target(target), Diags(diags),
0256 MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
0257 ResultPtr(ResultBuf.data()),
0258 EvalMethod(StringLiteralEvalMethod::Evaluated), hadError(false),
0259 Pascal(false) {
0260 init(StringToks);
0261 }
0262
0263 bool hadError;
0264 bool Pascal;
0265
0266 StringRef GetString() const {
0267 return StringRef(ResultBuf.data(), GetStringLength());
0268 }
0269 unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); }
0270
0271 unsigned GetNumStringChars() const {
0272 return GetStringLength() / CharByteWidth;
0273 }
0274
0275
0276
0277
0278
0279
0280 unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const;
0281
0282 bool isOrdinary() const { return Kind == tok::string_literal; }
0283 bool isWide() const { return Kind == tok::wide_string_literal; }
0284 bool isUTF8() const { return Kind == tok::utf8_string_literal; }
0285 bool isUTF16() const { return Kind == tok::utf16_string_literal; }
0286 bool isUTF32() const { return Kind == tok::utf32_string_literal; }
0287 bool isPascal() const { return Pascal; }
0288 bool isUnevaluated() const {
0289 return EvalMethod == StringLiteralEvalMethod::Unevaluated;
0290 }
0291
0292 StringRef getUDSuffix() const { return UDSuffixBuf; }
0293
0294
0295 unsigned getUDSuffixToken() const {
0296 assert(!UDSuffixBuf.empty() && "no ud-suffix");
0297 return UDSuffixToken;
0298 }
0299
0300 unsigned getUDSuffixOffset() const {
0301 assert(!UDSuffixBuf.empty() && "no ud-suffix");
0302 return UDSuffixOffset;
0303 }
0304
0305 static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
0306
0307 private:
0308 void init(ArrayRef<Token> StringToks);
0309 bool CopyStringFragment(const Token &Tok, const char *TokBegin,
0310 StringRef Fragment);
0311 void DiagnoseLexingError(SourceLocation Loc);
0312 };
0313
0314 }
0315
0316 #endif