Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-15 08:53:57

0001 //
0002 // Copyright (c) 2021 Vinnie Falco (vinnie dot falco at gmail dot com)
0003 //
0004 // Distributed under the Boost Software License, Version 1.0. (See accompanying
0005 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0006 //
0007 // Official repository: https://github.com/boostorg/url
0008 //
0009 
0010 #ifndef BOOST_URL_GRAMMAR_LUT_CHARS_HPP
0011 #define BOOST_URL_GRAMMAR_LUT_CHARS_HPP
0012 
0013 #include <boost/url/detail/config.hpp>
0014 #include <boost/url/grammar/detail/charset.hpp>
0015 #include <cstdint>
0016 #include <type_traits>
0017 
0018 // Credit to Peter Dimov for ideas regarding
0019 // SIMD constexpr, and character set masks.
0020 
0021 namespace boost {
0022 namespace urls {
0023 namespace grammar {
0024 
0025 #ifndef BOOST_URL_DOCS
0026 namespace detail {
0027 template<class T, class = void>
0028 struct is_pred : std::false_type {};
0029 
0030 template<class T>
0031 struct is_pred<T, void_t<
0032     decltype(
0033     std::declval<bool&>() =
0034         std::declval<T const&>().operator()(
0035             std::declval<char>())
0036             ) > > : std::true_type
0037 {
0038 };
0039 } // detail
0040 #endif
0041 
0042 /** A set of characters
0043 
0044     The characters defined by instances of
0045     this set are provided upon construction.
0046     The `constexpr` implementation allows
0047     these to become compile-time constants.
0048 
0049     @par Example
0050     Character sets are used with rules and the
0051     functions @ref find_if and @ref find_if_not.
0052     @code
0053     constexpr lut_chars vowel_chars = "AEIOU" "aeiou";
0054 
0055     system::result< core::string_view > rv = parse( "Aiea", token_rule( vowel_chars ) );
0056     @endcode
0057 
0058     @see
0059         @ref find_if,
0060         @ref find_if_not,
0061         @ref parse,
0062         @ref token_rule.
0063 */
0064 class lut_chars
0065 {
0066     std::uint64_t mask_[4] = {};
0067 
0068     constexpr
0069     static
0070     std::uint64_t
0071     lo(char c) noexcept
0072     {
0073         return static_cast<
0074             unsigned char>(c) & 3;
0075     }
0076 
0077     constexpr
0078     static
0079     std::uint64_t
0080     hi(char c) noexcept
0081     {
0082         return 1ULL << (static_cast<
0083             unsigned char>(c) >> 2);
0084     }
0085 
0086     constexpr
0087     static
0088     lut_chars
0089     construct(
0090         char const* s) noexcept
0091     {
0092         return *s
0093             ? lut_chars(*s) +
0094                 construct(s+1)
0095             : lut_chars();
0096     }
0097 
0098     constexpr
0099     static
0100     lut_chars
0101     construct(
0102         unsigned char ch,
0103         bool b) noexcept
0104     {
0105         return b
0106             ? lut_chars(ch)
0107             : lut_chars();
0108     }
0109 
0110     template<class Pred>
0111     constexpr
0112     static
0113     lut_chars
0114     construct(
0115         Pred pred,
0116         unsigned char ch) noexcept
0117     {
0118         return ch == 255
0119             ? construct(ch, pred(static_cast<char>(ch)))
0120             : construct(ch, pred(static_cast<char>(ch))) +
0121                 construct(pred, ch + 1);
0122     }
0123 
0124     constexpr
0125     lut_chars() = default;
0126 
0127     constexpr
0128     lut_chars(
0129         std::uint64_t m0,
0130         std::uint64_t m1,
0131         std::uint64_t m2,
0132         std::uint64_t m3) noexcept
0133         : mask_{ m0, m1, m2, m3 }
0134     {
0135     }
0136 
0137 public:
0138     /** Constructor
0139 
0140         This function constructs a character
0141         set which has as a single member,
0142         the character `ch`.
0143 
0144         @par Example
0145         @code
0146         constexpr lut_chars asterisk( '*' );
0147         @endcode
0148 
0149         @par Complexity
0150         Constant.
0151 
0152         @par Exception Safety
0153         Throws nothing.
0154 
0155         @param ch A character.
0156     */
0157     constexpr
0158     lut_chars(char ch) noexcept
0159         : mask_ {
0160             lo(ch) == 0 ? hi(ch) : 0,
0161             lo(ch) == 1 ? hi(ch) : 0,
0162             lo(ch) == 2 ? hi(ch) : 0,
0163             lo(ch) == 3 ? hi(ch) : 0 }
0164     {
0165     }
0166 
0167     /** Constructor
0168 
0169         This function constructs a character
0170         set which has as members, all of the
0171         characters present in the null-terminated
0172         string `s`.
0173 
0174         @par Example
0175         @code
0176         constexpr lut_chars digits = "0123456789";
0177         @endcode
0178 
0179         @par Complexity
0180         Linear in `::strlen(s)`, or constant
0181         if `s` is a constant expression.
0182 
0183         @par Exception Safety
0184         Throws nothing.
0185 
0186         @param s A null-terminated string.
0187     */
0188     constexpr
0189     lut_chars(
0190         char const* s) noexcept
0191         : lut_chars(construct(s))
0192     {
0193     }
0194 
0195     /** Constructor.
0196 
0197         This function constructs a character
0198         set which has as members, every value
0199         of `char ch` for which the expression
0200         `pred(ch)` returns `true`.
0201 
0202         @par Example
0203         @code
0204         struct is_digit
0205         {
0206             constexpr bool
0207             operator()(char c ) const noexcept
0208             {
0209                 return c >= '0' && c <= '9';
0210             }
0211         };
0212 
0213         constexpr lut_chars digits( is_digit{} );
0214         @endcode
0215 
0216         @par Complexity
0217         Linear in `pred`, or constant if
0218         `pred(ch)` is a constant expression.
0219 
0220         @par Exception Safety
0221         Throws nothing.
0222 
0223         @param pred The function object to
0224         use for determining membership in
0225         the character set.
0226     */
0227     template<class Pred
0228 #ifndef BOOST_URL_DOCS
0229         ,class = typename std::enable_if<
0230             detail::is_pred<Pred>::value &&
0231         ! std::is_base_of<
0232             lut_chars, Pred>::value>::type
0233 #endif
0234     >
0235     constexpr
0236     lut_chars(Pred const& pred) noexcept
0237         : lut_chars(
0238             construct(pred, 0))
0239     {
0240     }
0241 
0242     /** Return true if ch is in the character set.
0243 
0244         This function returns true if the
0245         character `ch` is in the set, otherwise
0246         it returns false.
0247 
0248         @par Complexity
0249         Constant.
0250 
0251         @par Exception Safety
0252         Throws nothing.
0253 
0254         @param ch The character to test.
0255         @return `true` if `ch` is in the set.
0256     */
0257     constexpr
0258     bool
0259     operator()(
0260         unsigned char ch) const noexcept
0261     {
0262         return operator()(static_cast<char>(ch));
0263     }
0264 
0265     /// @copydoc operator()(unsigned char) const
0266     constexpr
0267     bool
0268     operator()(char ch) const noexcept
0269     {
0270         return mask_[lo(ch)] & hi(ch);
0271     }
0272 
0273     /** Return the union of two character sets.
0274 
0275         This function returns a new character
0276         set which contains all of the characters
0277         in `cs0` as well as all of the characters
0278         in `cs`.
0279 
0280         @par Example
0281         This creates a character set which
0282         includes all letters and numbers
0283         @code
0284         constexpr lut_chars alpha_chars(
0285             "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
0286             "abcdefghijklmnopqrstuvwxyz");
0287 
0288         constexpr lut_chars alnum_chars = alpha_chars + "0123456789";
0289         @endcode
0290 
0291         @par Complexity
0292         Constant.
0293 
0294         @return The new character set.
0295 
0296         @param cs0 A character to join
0297         
0298         @param cs1 A character to join
0299     */
0300     friend
0301     constexpr
0302     lut_chars
0303     operator+(
0304         lut_chars const& cs0,
0305         lut_chars const& cs1) noexcept
0306     {
0307         return lut_chars(
0308             cs0.mask_[0] | cs1.mask_[0],
0309             cs0.mask_[1] | cs1.mask_[1],
0310             cs0.mask_[2] | cs1.mask_[2],
0311             cs0.mask_[3] | cs1.mask_[3]);
0312     }
0313 
0314     /** Return a new character set by subtracting
0315 
0316         This function returns a new character
0317         set which is formed from all of the
0318         characters in `cs0` which are not in `cs`.
0319 
0320         @par Example
0321         This statement declares a character set
0322         containing all the lowercase letters
0323         which are not vowels:
0324         @code
0325         constexpr lut_chars consonants = lut_chars("abcdefghijklmnopqrstuvwxyz") - "aeiou";
0326         @endcode
0327 
0328         @par Complexity
0329         Constant.
0330 
0331         @return The new character set.
0332 
0333         @param cs0 A character set to join.
0334         
0335         @param cs1 A character set to join.
0336     */
0337     friend
0338     constexpr
0339     lut_chars
0340     operator-(
0341         lut_chars const& cs0,
0342         lut_chars const& cs1) noexcept
0343     {
0344         return lut_chars(
0345             cs0.mask_[0] & ~cs1.mask_[0],
0346             cs0.mask_[1] & ~cs1.mask_[1],
0347             cs0.mask_[2] & ~cs1.mask_[2],
0348             cs0.mask_[3] & ~cs1.mask_[3]);
0349     }
0350 
0351     /** Return a new character set which is the complement of another character set.
0352 
0353         This function returns a new character
0354         set which contains all of the characters
0355         that are not in `*this`.
0356 
0357         @par Example
0358         This statement declares a character set
0359         containing everything but vowels:
0360         @code
0361         constexpr lut_chars not_vowels = ~lut_chars( "AEIOU" "aeiou" );
0362         @endcode
0363 
0364         @par Complexity
0365         Constant.
0366 
0367         @par Exception Safety
0368         Throws nothing.
0369 
0370         @return The new character set.
0371     */
0372     constexpr
0373     lut_chars
0374     operator~() const noexcept
0375     {
0376         return lut_chars(
0377             ~mask_[0],
0378             ~mask_[1],
0379             ~mask_[2],
0380             ~mask_[3]
0381         );
0382     }
0383 
0384 #ifndef BOOST_URL_DOCS
0385 #ifdef BOOST_URL_USE_SSE2
0386     char const*
0387     find_if(
0388         char const* first,
0389         char const* last) const noexcept
0390     {
0391         return detail::find_if_pred(
0392             *this, first, last);
0393     }
0394 
0395     char const*
0396     find_if_not(
0397         char const* first,
0398         char const* last) const noexcept
0399     {
0400         return detail::find_if_not_pred(
0401             *this, first, last);
0402     }
0403 #endif
0404 #endif
0405 };
0406 
0407 } // grammar
0408 } // urls
0409 } // boost
0410 
0411 #endif