File indexing completed on 2026-05-10 08:36:48
0001
0002
0003
0004
0005
0006
0007
0008
0009 #ifndef LLVM_CLANG_BASIC_CHARINFO_H
0010 #define LLVM_CLANG_BASIC_CHARINFO_H
0011
0012 #include "clang/Basic/LLVM.h"
0013 #include "llvm/ADT/StringRef.h"
0014 #include "llvm/Support/Compiler.h"
0015 #include "llvm/Support/DataTypes.h"
0016
0017 namespace clang {
0018 namespace charinfo {
0019 extern const uint16_t InfoTable[256];
0020
0021 enum {
0022 CHAR_HORZ_WS = 0x0001,
0023 CHAR_VERT_WS = 0x0002,
0024 CHAR_SPACE = 0x0004,
0025 CHAR_DIGIT = 0x0008,
0026 CHAR_XLETTER = 0x0010,
0027 CHAR_UPPER = 0x0020,
0028 CHAR_LOWER = 0x0040,
0029 CHAR_UNDER = 0x0080,
0030 CHAR_PERIOD = 0x0100,
0031 CHAR_PUNCT = 0x0200,
0032 };
0033
0034 enum {
0035 CHAR_XUPPER = CHAR_XLETTER | CHAR_UPPER,
0036 CHAR_XLOWER = CHAR_XLETTER | CHAR_LOWER
0037 };
0038 }
0039
0040
0041 LLVM_READNONE inline bool isASCII(char c) {
0042 return static_cast<unsigned char>(c) <= 127;
0043 }
0044
0045 LLVM_READNONE inline bool isASCII(unsigned char c) { return c <= 127; }
0046
0047
0048 LLVM_READNONE inline bool isASCII(uint32_t c) { return c <= 127; }
0049 LLVM_READNONE inline bool isASCII(int64_t c) { return 0 <= c && c <= 127; }
0050
0051
0052
0053 LLVM_READONLY inline bool isAsciiIdentifierStart(unsigned char c,
0054 bool AllowDollar = false) {
0055 using namespace charinfo;
0056 if (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_UNDER))
0057 return true;
0058 return AllowDollar && c == '$';
0059 }
0060
0061 LLVM_READONLY inline bool isAsciiIdentifierContinue(unsigned char c) {
0062
0063 static constexpr unsigned char IDContinue[256] = {
0064 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0065 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0066 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
0067 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0068 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0069 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0070 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0071 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0072 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0073 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0074 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
0075 return IDContinue[c];
0076 }
0077
0078
0079
0080 LLVM_READONLY inline bool isAsciiIdentifierContinue(unsigned char c,
0081 bool AllowDollar) {
0082 if (isAsciiIdentifierContinue(c))
0083 return true;
0084 return AllowDollar && c == '$';
0085 }
0086
0087
0088
0089
0090
0091 LLVM_READONLY inline bool isHorizontalWhitespace(unsigned char c) {
0092 using namespace charinfo;
0093 return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_SPACE)) != 0;
0094 }
0095
0096
0097
0098
0099 LLVM_READONLY inline bool isVerticalWhitespace(unsigned char c) {
0100 using namespace charinfo;
0101 return (InfoTable[c] & CHAR_VERT_WS) != 0;
0102 }
0103
0104
0105
0106
0107
0108 LLVM_READONLY inline bool isWhitespace(unsigned char c) {
0109 using namespace charinfo;
0110 return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_VERT_WS|CHAR_SPACE)) != 0;
0111 }
0112
0113
0114 LLVM_READONLY inline bool isDigit(unsigned char c) {
0115 using namespace charinfo;
0116 return (InfoTable[c] & CHAR_DIGIT) != 0;
0117 }
0118
0119
0120 LLVM_READONLY inline bool isLowercase(unsigned char c) {
0121 using namespace charinfo;
0122 return (InfoTable[c] & CHAR_LOWER) != 0;
0123 }
0124
0125
0126 LLVM_READONLY inline bool isUppercase(unsigned char c) {
0127 using namespace charinfo;
0128 return (InfoTable[c] & CHAR_UPPER) != 0;
0129 }
0130
0131
0132 LLVM_READONLY inline bool isLetter(unsigned char c) {
0133 using namespace charinfo;
0134 return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER)) != 0;
0135 }
0136
0137
0138 LLVM_READONLY inline bool isAlphanumeric(unsigned char c) {
0139 using namespace charinfo;
0140 return (InfoTable[c] & (CHAR_DIGIT|CHAR_UPPER|CHAR_LOWER)) != 0;
0141 }
0142
0143
0144 LLVM_READONLY inline bool isHexDigit(unsigned char c) {
0145 using namespace charinfo;
0146 return (InfoTable[c] & (CHAR_DIGIT|CHAR_XLETTER)) != 0;
0147 }
0148
0149
0150
0151
0152 LLVM_READONLY inline bool isPunctuation(unsigned char c) {
0153 using namespace charinfo;
0154 return (InfoTable[c] & (CHAR_UNDER | CHAR_PERIOD | CHAR_PUNCT)) != 0;
0155 }
0156
0157
0158
0159
0160 LLVM_READONLY inline bool isPrintable(unsigned char c) {
0161 using namespace charinfo;
0162 return (InfoTable[c] & (CHAR_UPPER | CHAR_LOWER | CHAR_PERIOD | CHAR_PUNCT |
0163 CHAR_DIGIT | CHAR_UNDER | CHAR_SPACE)) != 0;
0164 }
0165
0166
0167
0168 LLVM_READONLY inline bool isPreprocessingNumberBody(unsigned char c) {
0169 using namespace charinfo;
0170 return (InfoTable[c] &
0171 (CHAR_UPPER|CHAR_LOWER|CHAR_DIGIT|CHAR_UNDER|CHAR_PERIOD)) != 0;
0172 }
0173
0174
0175 LLVM_READONLY inline bool isRawStringDelimBody(unsigned char c) {
0176 using namespace charinfo;
0177 return (InfoTable[c] & (CHAR_UPPER | CHAR_LOWER | CHAR_PERIOD | CHAR_DIGIT |
0178 CHAR_UNDER | CHAR_PUNCT)) != 0 &&
0179 c != '(' && c != ')' && c != '\\';
0180 }
0181
0182 enum class EscapeChar {
0183 Single = 1,
0184 Double = 2,
0185 SingleAndDouble = static_cast<int>(Single) | static_cast<int>(Double),
0186 };
0187
0188
0189
0190 template <EscapeChar Opt, class CharT>
0191 LLVM_READONLY inline auto escapeCStyle(CharT Ch) -> StringRef {
0192 switch (Ch) {
0193 case '\\':
0194 return "\\\\";
0195 case '\'':
0196 if ((static_cast<int>(Opt) & static_cast<int>(EscapeChar::Single)) == 0)
0197 break;
0198 return "\\'";
0199 case '"':
0200 if ((static_cast<int>(Opt) & static_cast<int>(EscapeChar::Double)) == 0)
0201 break;
0202 return "\\\"";
0203 case '\a':
0204 return "\\a";
0205 case '\b':
0206 return "\\b";
0207 case '\f':
0208 return "\\f";
0209 case '\n':
0210 return "\\n";
0211 case '\r':
0212 return "\\r";
0213 case '\t':
0214 return "\\t";
0215 case '\v':
0216 return "\\v";
0217 }
0218 return {};
0219 }
0220
0221
0222
0223
0224 LLVM_READONLY inline char toLowercase(char c) {
0225 if (isUppercase(c))
0226 return c + 'a' - 'A';
0227 return c;
0228 }
0229
0230
0231
0232
0233 LLVM_READONLY inline char toUppercase(char c) {
0234 if (isLowercase(c))
0235 return c + 'A' - 'a';
0236 return c;
0237 }
0238
0239
0240
0241
0242
0243
0244 LLVM_READONLY inline bool isValidAsciiIdentifier(StringRef S,
0245 bool AllowDollar = false) {
0246 if (S.empty() || !isAsciiIdentifierStart(S[0], AllowDollar))
0247 return false;
0248
0249 for (StringRef::iterator I = S.begin(), E = S.end(); I != E; ++I)
0250 if (!isAsciiIdentifierContinue(*I, AllowDollar))
0251 return false;
0252
0253 return true;
0254 }
0255
0256 }
0257
0258 #endif