Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:36:48

0001 //===--- clang/Basic/CharInfo.h - Classifying ASCII Characters --*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 
0009 #ifndef LLVM_CLANG_BASIC_CHARINFO_H
0010 #define LLVM_CLANG_BASIC_CHARINFO_H
0011 
0012 #include "clang/Basic/LLVM.h"
0013 #include "llvm/ADT/StringRef.h"
0014 #include "llvm/Support/Compiler.h"
0015 #include "llvm/Support/DataTypes.h"
0016 
0017 namespace clang {
0018 namespace charinfo {
0019   extern const uint16_t InfoTable[256];
0020 
0021   enum {
0022     CHAR_HORZ_WS  = 0x0001,  // '\t', '\f', '\v'.  Note, no '\0'
0023     CHAR_VERT_WS  = 0x0002,  // '\r', '\n'
0024     CHAR_SPACE    = 0x0004,  // ' '
0025     CHAR_DIGIT    = 0x0008,  // 0-9
0026     CHAR_XLETTER  = 0x0010,  // a-f,A-F
0027     CHAR_UPPER    = 0x0020,  // A-Z
0028     CHAR_LOWER    = 0x0040,  // a-z
0029     CHAR_UNDER    = 0x0080,  // _
0030     CHAR_PERIOD   = 0x0100,  // .
0031     CHAR_PUNCT    = 0x0200,  // {}[]#<>%:;?*+-/^&|~!=,"'`$@()
0032   };
0033 
0034   enum {
0035     CHAR_XUPPER = CHAR_XLETTER | CHAR_UPPER,
0036     CHAR_XLOWER = CHAR_XLETTER | CHAR_LOWER
0037   };
0038 } // end namespace charinfo
0039 
0040 /// Returns true if a byte is an ASCII character.
0041 LLVM_READNONE inline bool isASCII(char c) {
0042   return static_cast<unsigned char>(c) <= 127;
0043 }
0044 
0045 LLVM_READNONE inline bool isASCII(unsigned char c) { return c <= 127; }
0046 
0047 /// Returns true if a codepoint is an ASCII character.
0048 LLVM_READNONE inline bool isASCII(uint32_t c) { return c <= 127; }
0049 LLVM_READNONE inline bool isASCII(int64_t c) { return 0 <= c && c <= 127; }
0050 
0051 /// Returns true if this is a valid first character of a C identifier,
0052 /// which is [a-zA-Z_].
0053 LLVM_READONLY inline bool isAsciiIdentifierStart(unsigned char c,
0054                                                  bool AllowDollar = false) {
0055   using namespace charinfo;
0056   if (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_UNDER))
0057     return true;
0058   return AllowDollar && c == '$';
0059 }
0060 
0061 LLVM_READONLY inline bool isAsciiIdentifierContinue(unsigned char c) {
0062   // Precomputed CHAR_UPPER|CHAR_LOWER|CHAR_DIGIT|CHAR_UNDER
0063   static constexpr unsigned char IDContinue[256] = {
0064       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0065       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0066       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
0067       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0068       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0069       1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0070       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0071       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0072       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0073       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0074       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
0075   return IDContinue[c];
0076 }
0077 
0078 /// Returns true if this is a body character of a C identifier,
0079 /// which is [a-zA-Z0-9_].
0080 LLVM_READONLY inline bool isAsciiIdentifierContinue(unsigned char c,
0081                                                     bool AllowDollar) {
0082   if (isAsciiIdentifierContinue(c))
0083     return true;
0084   return AllowDollar && c == '$';
0085 }
0086 
0087 /// Returns true if this character is horizontal ASCII whitespace:
0088 /// ' ', '\\t', '\\f', '\\v'.
0089 ///
0090 /// Note that this returns false for '\\0'.
0091 LLVM_READONLY inline bool isHorizontalWhitespace(unsigned char c) {
0092   using namespace charinfo;
0093   return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_SPACE)) != 0;
0094 }
0095 
0096 /// Returns true if this character is vertical ASCII whitespace: '\\n', '\\r'.
0097 ///
0098 /// Note that this returns false for '\\0'.
0099 LLVM_READONLY inline bool isVerticalWhitespace(unsigned char c) {
0100   using namespace charinfo;
0101   return (InfoTable[c] & CHAR_VERT_WS) != 0;
0102 }
0103 
0104 /// Return true if this character is horizontal or vertical ASCII whitespace:
0105 /// ' ', '\\t', '\\f', '\\v', '\\n', '\\r'.
0106 ///
0107 /// Note that this returns false for '\\0'.
0108 LLVM_READONLY inline bool isWhitespace(unsigned char c) {
0109   using namespace charinfo;
0110   return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_VERT_WS|CHAR_SPACE)) != 0;
0111 }
0112 
0113 /// Return true if this character is an ASCII digit: [0-9]
0114 LLVM_READONLY inline bool isDigit(unsigned char c) {
0115   using namespace charinfo;
0116   return (InfoTable[c] & CHAR_DIGIT) != 0;
0117 }
0118 
0119 /// Return true if this character is a lowercase ASCII letter: [a-z]
0120 LLVM_READONLY inline bool isLowercase(unsigned char c) {
0121   using namespace charinfo;
0122   return (InfoTable[c] & CHAR_LOWER) != 0;
0123 }
0124 
0125 /// Return true if this character is an uppercase ASCII letter: [A-Z]
0126 LLVM_READONLY inline bool isUppercase(unsigned char c) {
0127   using namespace charinfo;
0128   return (InfoTable[c] & CHAR_UPPER) != 0;
0129 }
0130 
0131 /// Return true if this character is an ASCII letter: [a-zA-Z]
0132 LLVM_READONLY inline bool isLetter(unsigned char c) {
0133   using namespace charinfo;
0134   return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER)) != 0;
0135 }
0136 
0137 /// Return true if this character is an ASCII letter or digit: [a-zA-Z0-9]
0138 LLVM_READONLY inline bool isAlphanumeric(unsigned char c) {
0139   using namespace charinfo;
0140   return (InfoTable[c] & (CHAR_DIGIT|CHAR_UPPER|CHAR_LOWER)) != 0;
0141 }
0142 
0143 /// Return true if this character is an ASCII hex digit: [0-9a-fA-F]
0144 LLVM_READONLY inline bool isHexDigit(unsigned char c) {
0145   using namespace charinfo;
0146   return (InfoTable[c] & (CHAR_DIGIT|CHAR_XLETTER)) != 0;
0147 }
0148 
0149 /// Return true if this character is an ASCII punctuation character.
0150 ///
0151 /// Note that '_' is both a punctuation character and an identifier character!
0152 LLVM_READONLY inline bool isPunctuation(unsigned char c) {
0153   using namespace charinfo;
0154   return (InfoTable[c] & (CHAR_UNDER | CHAR_PERIOD | CHAR_PUNCT)) != 0;
0155 }
0156 
0157 /// Return true if this character is an ASCII printable character; that is, a
0158 /// character that should take exactly one column to print in a fixed-width
0159 /// terminal.
0160 LLVM_READONLY inline bool isPrintable(unsigned char c) {
0161   using namespace charinfo;
0162   return (InfoTable[c] & (CHAR_UPPER | CHAR_LOWER | CHAR_PERIOD | CHAR_PUNCT |
0163                           CHAR_DIGIT | CHAR_UNDER | CHAR_SPACE)) != 0;
0164 }
0165 
0166 /// Return true if this is the body character of a C preprocessing number,
0167 /// which is [a-zA-Z0-9_.].
0168 LLVM_READONLY inline bool isPreprocessingNumberBody(unsigned char c) {
0169   using namespace charinfo;
0170   return (InfoTable[c] &
0171           (CHAR_UPPER|CHAR_LOWER|CHAR_DIGIT|CHAR_UNDER|CHAR_PERIOD)) != 0;
0172 }
0173 
0174 /// Return true if this is the body character of a C++ raw string delimiter.
0175 LLVM_READONLY inline bool isRawStringDelimBody(unsigned char c) {
0176   using namespace charinfo;
0177   return (InfoTable[c] & (CHAR_UPPER | CHAR_LOWER | CHAR_PERIOD | CHAR_DIGIT |
0178                           CHAR_UNDER | CHAR_PUNCT)) != 0 &&
0179          c != '(' && c != ')' && c != '\\';
0180 }
0181 
0182 enum class EscapeChar {
0183   Single = 1,
0184   Double = 2,
0185   SingleAndDouble = static_cast<int>(Single) | static_cast<int>(Double),
0186 };
0187 
0188 /// Return C-style escaped string for special characters, or an empty string if
0189 /// there is no such mapping.
0190 template <EscapeChar Opt, class CharT>
0191 LLVM_READONLY inline auto escapeCStyle(CharT Ch) -> StringRef {
0192   switch (Ch) {
0193   case '\\':
0194     return "\\\\";
0195   case '\'':
0196     if ((static_cast<int>(Opt) & static_cast<int>(EscapeChar::Single)) == 0)
0197       break;
0198     return "\\'";
0199   case '"':
0200     if ((static_cast<int>(Opt) & static_cast<int>(EscapeChar::Double)) == 0)
0201       break;
0202     return "\\\"";
0203   case '\a':
0204     return "\\a";
0205   case '\b':
0206     return "\\b";
0207   case '\f':
0208     return "\\f";
0209   case '\n':
0210     return "\\n";
0211   case '\r':
0212     return "\\r";
0213   case '\t':
0214     return "\\t";
0215   case '\v':
0216     return "\\v";
0217   }
0218   return {};
0219 }
0220 
0221 /// Converts the given ASCII character to its lowercase equivalent.
0222 ///
0223 /// If the character is not an uppercase character, it is returned as is.
0224 LLVM_READONLY inline char toLowercase(char c) {
0225   if (isUppercase(c))
0226     return c + 'a' - 'A';
0227   return c;
0228 }
0229 
0230 /// Converts the given ASCII character to its uppercase equivalent.
0231 ///
0232 /// If the character is not a lowercase character, it is returned as is.
0233 LLVM_READONLY inline char toUppercase(char c) {
0234   if (isLowercase(c))
0235     return c + 'A' - 'a';
0236   return c;
0237 }
0238 
0239 
0240 /// Return true if this is a valid ASCII identifier.
0241 ///
0242 /// Note that this is a very simple check; it does not accept UCNs as valid
0243 /// identifier characters.
0244 LLVM_READONLY inline bool isValidAsciiIdentifier(StringRef S,
0245                                                  bool AllowDollar = false) {
0246   if (S.empty() || !isAsciiIdentifierStart(S[0], AllowDollar))
0247     return false;
0248 
0249   for (StringRef::iterator I = S.begin(), E = S.end(); I != E; ++I)
0250     if (!isAsciiIdentifierContinue(*I, AllowDollar))
0251       return false;
0252 
0253   return true;
0254 }
0255 
0256 } // end namespace clang
0257 
0258 #endif