Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-16 09:41:01

0001 // Copyright 2022 The Abseil Authors.
0002 //
0003 // Licensed under the Apache License, Version 2.0 (the "License");
0004 // you may not use this file except in compliance with the License.
0005 // You may obtain a copy of the License at
0006 //
0007 //      https://www.apache.org/licenses/LICENSE-2.0
0008 //
0009 // Unless required by applicable law or agreed to in writing, software
0010 // distributed under the License is distributed on an "AS IS" BASIS,
0011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0012 // See the License for the specific language governing permissions and
0013 // limitations under the License.
0014 //
0015 // -----------------------------------------------------------------------------
0016 // File: charset.h
0017 // -----------------------------------------------------------------------------
0018 //
0019 // This file contains absl::CharSet, a fast, bit-vector set of 8-bit unsigned
0020 // characters.
0021 //
0022 // Instances can be initialized as constexpr constants. For example:
0023 //
0024 //   constexpr absl::CharSet kJustX = absl::CharSet::Char('x');
0025 //   constexpr absl::CharSet kMySymbols = absl::CharSet("$@!");
0026 //   constexpr absl::CharSet kLetters = absl::CharSet::Range('a', 'z');
0027 //
0028 // Multiple instances can be combined that still forms a constexpr expression.
0029 // For example:
0030 //
0031 //   constexpr absl::CharSet kLettersAndNumbers =
0032 //       absl::CharSet::Range('a', 'z') | absl::CharSet::Range('0', '9');
0033 //
0034 // Several pre-defined character classes are available that mirror the methods
0035 // from <cctype>. For example:
0036 //
0037 //   constexpr absl::CharSet kLettersAndWhitespace =
0038 //       absl::CharSet::AsciiAlphabet() | absl::CharSet::AsciiWhitespace();
0039 //
0040 // To check membership, use the .contains method, e.g.
0041 //
0042 //   absl::CharSet hex_letters("abcdef");
0043 //   hex_letters.contains('a');  // true
0044 //   hex_letters.contains('g');  // false
0045 
0046 #ifndef ABSL_STRINGS_CHARSET_H_
0047 #define ABSL_STRINGS_CHARSET_H_
0048 
0049 #include <cstddef>
0050 #include <cstdint>
0051 #include <cstring>
0052 
0053 #include "absl/base/macros.h"
0054 #include "absl/base/port.h"
0055 #include "absl/strings/string_view.h"
0056 
0057 namespace absl {
0058 
0059 class CharSet {
0060  public:
0061   constexpr CharSet() : m_() {}
0062 
0063   // Initializes with a given string_view.
0064   constexpr explicit CharSet(absl::string_view str) : m_() {
0065     for (char c : str) {
0066       SetChar(static_cast<unsigned char>(c));
0067     }
0068   }
0069 
0070   constexpr bool contains(char c) const {
0071     return ((m_[static_cast<unsigned char>(c) / 64] >>
0072              (static_cast<unsigned char>(c) % 64)) &
0073             0x1) == 0x1;
0074   }
0075 
0076   constexpr bool empty() const {
0077     for (uint64_t c : m_) {
0078       if (c != 0) return false;
0079     }
0080     return true;
0081   }
0082 
0083   // Containing only a single specified char.
0084   static constexpr CharSet Char(char x) {
0085     return CharSet(CharMaskForWord(x, 0), CharMaskForWord(x, 1),
0086                    CharMaskForWord(x, 2), CharMaskForWord(x, 3));
0087   }
0088 
0089   // Containing all the chars in the closed interval [lo,hi].
0090   static constexpr CharSet Range(char lo, char hi) {
0091     return CharSet(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1),
0092                    RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3));
0093   }
0094 
0095   friend constexpr CharSet operator&(const CharSet& a, const CharSet& b) {
0096     return CharSet(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2],
0097                    a.m_[3] & b.m_[3]);
0098   }
0099 
0100   friend constexpr CharSet operator|(const CharSet& a, const CharSet& b) {
0101     return CharSet(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2],
0102                    a.m_[3] | b.m_[3]);
0103   }
0104 
0105   friend constexpr CharSet operator~(const CharSet& a) {
0106     return CharSet(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]);
0107   }
0108 
0109   // Mirrors the char-classifying predicates in <cctype>.
0110   static constexpr CharSet AsciiUppercase() { return CharSet::Range('A', 'Z'); }
0111   static constexpr CharSet AsciiLowercase() { return CharSet::Range('a', 'z'); }
0112   static constexpr CharSet AsciiDigits() { return CharSet::Range('0', '9'); }
0113   static constexpr CharSet AsciiAlphabet() {
0114     return AsciiLowercase() | AsciiUppercase();
0115   }
0116   static constexpr CharSet AsciiAlphanumerics() {
0117     return AsciiDigits() | AsciiAlphabet();
0118   }
0119   static constexpr CharSet AsciiHexDigits() {
0120     return AsciiDigits() | CharSet::Range('A', 'F') | CharSet::Range('a', 'f');
0121   }
0122   static constexpr CharSet AsciiPrintable() {
0123     return CharSet::Range(0x20, 0x7e);
0124   }
0125   static constexpr CharSet AsciiWhitespace() { return CharSet("\t\n\v\f\r "); }
0126   static constexpr CharSet AsciiPunctuation() {
0127     return AsciiPrintable() & ~AsciiWhitespace() & ~AsciiAlphanumerics();
0128   }
0129 
0130  private:
0131   constexpr CharSet(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
0132       : m_{b0, b1, b2, b3} {}
0133 
0134   static constexpr uint64_t RangeForWord(char lo, char hi, uint64_t word) {
0135     return OpenRangeFromZeroForWord(static_cast<unsigned char>(hi) + 1, word) &
0136            ~OpenRangeFromZeroForWord(static_cast<unsigned char>(lo), word);
0137   }
0138 
0139   // All the chars in the specified word of the range [0, upper).
0140   static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper,
0141                                                      uint64_t word) {
0142     return (upper <= 64 * word) ? 0
0143            : (upper >= 64 * (word + 1))
0144                ? ~static_cast<uint64_t>(0)
0145                : (~static_cast<uint64_t>(0) >> (64 - upper % 64));
0146   }
0147 
0148   static constexpr uint64_t CharMaskForWord(char x, uint64_t word) {
0149     return (static_cast<unsigned char>(x) / 64 == word)
0150                ? (static_cast<uint64_t>(1)
0151                   << (static_cast<unsigned char>(x) % 64))
0152                : 0;
0153   }
0154 
0155   constexpr void SetChar(unsigned char c) {
0156     m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64);
0157   }
0158 
0159   uint64_t m_[4];
0160 };
0161 
0162 }  // namespace absl
0163 
0164 #endif  // ABSL_STRINGS_CHARSET_H_