Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:53:27

0001 //
0002 // Copyright (c) 2021 Vinnie Falco (vinnie dot falco at gmail dot com)
0003 //
0004 // Distributed under the Boost Software License, Version 1.0. (See accompanying
0005 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0006 //
0007 // Official repository: https://github.com/boostorg/url
0008 //
0009 
0010 #ifndef BOOST_URL_GRAMMAR_LUT_CHARS_HPP
0011 #define BOOST_URL_GRAMMAR_LUT_CHARS_HPP
0012 
0013 #include <boost/url/detail/config.hpp>
0014 #include <boost/url/grammar/detail/charset.hpp>
0015 #include <cstdint>
0016 #include <type_traits>
0017 
0018 // Credit to Peter Dimov for ideas regarding
0019 // SIMD constexpr, and character set masks.
0020 
0021 namespace boost {
0022 namespace urls {
0023 namespace grammar {
0024 
0025 #ifndef BOOST_URL_DOCS
0026 namespace detail {
0027 template<class T, class = void>
0028 struct is_pred : std::false_type {};
0029 
0030 template<class T>
0031 struct is_pred<T, void_t<
0032     decltype(
0033     std::declval<bool&>() =
0034         std::declval<T const&>().operator()(
0035             std::declval<char>())
0036             ) > > : std::true_type
0037 {
0038 };
0039 } // detail
0040 #endif
0041 
0042 /** A set of characters
0043 
0044     The characters defined by instances of
0045     this set are provided upon construction.
0046     The `constexpr` implementation allows
0047     these to become compile-time constants.
0048 
0049     @par Example
0050     Character sets are used with rules and the
0051     functions @ref find_if and @ref find_if_not.
0052     @code
0053     constexpr lut_chars vowel_chars = "AEIOU" "aeiou";
0054 
0055     system::result< core::string_view > rv = parse( "Aiea", token_rule( vowel_chars ) );
0056     @endcode
0057 
0058     @see
0059         @ref find_if,
0060         @ref find_if_not,
0061         @ref parse,
0062         @ref token_rule.
0063 */
0064 class lut_chars
0065 {
0066     std::uint64_t mask_[4] = {};
0067 
0068     constexpr
0069     static
0070     std::uint64_t
0071     lo(char c) noexcept
0072     {
0073         return static_cast<
0074             unsigned char>(c) & 3;
0075     }
0076 
0077     constexpr
0078     static
0079     std::uint64_t
0080     hi(char c) noexcept
0081     {
0082         return 1ULL << (static_cast<
0083             unsigned char>(c) >> 2);
0084     }
0085 
0086     constexpr
0087     static
0088     lut_chars
0089     construct(
0090         char const* s) noexcept
0091     {
0092         return *s
0093             ? lut_chars(*s) +
0094                 construct(s+1)
0095             : lut_chars();
0096     }
0097 
0098     constexpr
0099     static
0100     lut_chars
0101     construct(
0102         unsigned char ch,
0103         bool b) noexcept
0104     {
0105         return b
0106             ? lut_chars(ch)
0107             : lut_chars();
0108     }
0109 
0110     template<class Pred>
0111     constexpr
0112     static
0113     lut_chars
0114     construct(
0115         Pred pred,
0116         unsigned char ch) noexcept
0117     {
0118         return ch == 255
0119             ? construct(ch, pred(ch))
0120             : construct(ch, pred(ch)) +
0121                 construct(pred, ch + 1);
0122     }
0123 
0124     constexpr
0125     lut_chars() = default;
0126 
0127     constexpr
0128     lut_chars(
0129         std::uint64_t m0,
0130         std::uint64_t m1,
0131         std::uint64_t m2,
0132         std::uint64_t m3) noexcept
0133         : mask_{ m0, m1, m2, m3 }
0134     {
0135     }
0136 
0137 public:
0138     /** Constructor
0139 
0140         This function constructs a character
0141         set which has as a single member,
0142         the character `ch`.
0143 
0144         @par Example
0145         @code
0146         constexpr lut_chars asterisk( '*' );
0147         @endcode
0148 
0149         @par Complexity
0150         Constant.
0151 
0152         @par Exception Safety
0153         Throws nothing.
0154 
0155         @param ch A character.
0156     */
0157     constexpr
0158     lut_chars(char ch) noexcept
0159         : mask_ {
0160             lo(ch) == 0 ? hi(ch) : 0,
0161             lo(ch) == 1 ? hi(ch) : 0,
0162             lo(ch) == 2 ? hi(ch) : 0,
0163             lo(ch) == 3 ? hi(ch) : 0 }
0164     {
0165     }
0166 
0167     /** Constructor
0168 
0169         This function constructs a character
0170         set which has as members, all of the
0171         characters present in the null-terminated
0172         string `s`.
0173 
0174         @par Example
0175         @code
0176         constexpr lut_chars digits = "0123456789";
0177         @endcode
0178 
0179         @par Complexity
0180         Linear in `::strlen(s)`, or constant
0181         if `s` is a constant expression.
0182 
0183         @par Exception Safety
0184         Throws nothing.
0185 
0186         @param s A null-terminated string.
0187     */
0188     constexpr
0189     lut_chars(
0190         char const* s) noexcept
0191         : lut_chars(construct(s))
0192     {
0193     }
0194 
0195     /** Constructor.
0196 
0197         This function constructs a character
0198         set which has as members, every value
0199         of `char ch` for which the expression
0200         `pred(ch)` returns `true`.
0201 
0202         @par Example
0203         @code
0204         struct is_digit
0205         {
0206             constexpr bool
0207             operator()(char c ) const noexcept
0208             {
0209                 return c >= '0' && c <= '9';
0210             }
0211         };
0212 
0213         constexpr lut_chars digits( is_digit{} );
0214         @endcode
0215 
0216         @par Complexity
0217         Linear in `pred`, or constant if
0218         `pred(ch)` is a constant expression.
0219 
0220         @par Exception Safety
0221         Throws nothing.
0222 
0223         @param pred The function object to
0224         use for determining membership in
0225         the character set.
0226     */
0227     template<class Pred
0228 #ifndef BOOST_URL_DOCS
0229         ,class = typename std::enable_if<
0230             detail::is_pred<Pred>::value &&
0231         ! std::is_base_of<
0232             lut_chars, Pred>::value>::type
0233 #endif
0234     >
0235     constexpr
0236     lut_chars(Pred const& pred) noexcept
0237         : lut_chars(
0238             construct(pred, 0))
0239     {
0240     }
0241 
0242     /** Return true if ch is in the character set.
0243 
0244         This function returns true if the
0245         character `ch` is in the set, otherwise
0246         it returns false.
0247 
0248         @par Complexity
0249         Constant.
0250 
0251         @par Exception Safety
0252         Throws nothing.
0253 
0254         @param ch The character to test.
0255     */
0256     constexpr
0257     bool
0258     operator()(
0259         unsigned char ch) const noexcept
0260     {
0261         return mask_[lo(ch)] & hi(ch);
0262     }
0263 
0264     /** Return the union of two character sets.
0265 
0266         This function returns a new character
0267         set which contains all of the characters
0268         in `cs0` as well as all of the characters
0269         in `cs`.
0270 
0271         @par Example
0272         This creates a character set which
0273         includes all letters and numbers
0274         @code
0275         constexpr lut_chars alpha_chars(
0276             "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
0277             "abcdefghijklmnopqrstuvwxyz");
0278 
0279         constexpr lut_chars alnum_chars = alpha_chars + "0123456789";
0280         @endcode
0281 
0282         @par Complexity
0283         Constant.
0284 
0285         @return The new character set.
0286 
0287         @param cs0 A character to join
0288         
0289         @param cs1 A character to join
0290     */
0291     friend
0292     constexpr
0293     lut_chars
0294     operator+(
0295         lut_chars const& cs0,
0296         lut_chars const& cs1) noexcept
0297     {
0298         return lut_chars(
0299             cs0.mask_[0] | cs1.mask_[0],
0300             cs0.mask_[1] | cs1.mask_[1],
0301             cs0.mask_[2] | cs1.mask_[2],
0302             cs0.mask_[3] | cs1.mask_[3]);
0303     }
0304 
0305     /** Return a new character set by subtracting
0306 
0307         This function returns a new character
0308         set which is formed from all of the
0309         characters in `cs0` which are not in `cs`.
0310 
0311         @par Example
0312         This statement declares a character set
0313         containing all the lowercase letters
0314         which are not vowels:
0315         @code
0316         constexpr lut_chars consonants = lut_chars("abcdefghijklmnopqrstuvwxyz") - "aeiou";
0317         @endcode
0318 
0319         @par Complexity
0320         Constant.
0321 
0322         @return The new character set.
0323 
0324         @param cs0 A character set to join.
0325         
0326         @param cs1 A character set to join.
0327     */
0328     friend
0329     constexpr
0330     lut_chars
0331     operator-(
0332         lut_chars const& cs0,
0333         lut_chars const& cs1) noexcept
0334     {
0335         return lut_chars(
0336             cs0.mask_[0] & ~cs1.mask_[0],
0337             cs0.mask_[1] & ~cs1.mask_[1],
0338             cs0.mask_[2] & ~cs1.mask_[2],
0339             cs0.mask_[3] & ~cs1.mask_[3]);
0340     }
0341 
0342     /** Return a new character set which is the complement of another character set.
0343 
0344         This function returns a new character
0345         set which contains all of the characters
0346         that are not in `*this`.
0347 
0348         @par Example
0349         This statement declares a character set
0350         containing everything but vowels:
0351         @code
0352         constexpr lut_chars not_vowels = ~lut_chars( "AEIOU" "aeiou" );
0353         @endcode
0354 
0355         @par Complexity
0356         Constant.
0357 
0358         @par Exception Safety
0359         Throws nothing.
0360 
0361         @return The new character set.
0362     */
0363     constexpr
0364     lut_chars
0365     operator~() const noexcept
0366     {
0367         return lut_chars(
0368             ~mask_[0],
0369             ~mask_[1],
0370             ~mask_[2],
0371             ~mask_[3]
0372         );
0373     }
0374 
0375 #ifndef BOOST_URL_DOCS
0376 #ifdef BOOST_URL_USE_SSE2
0377     char const*
0378     find_if(
0379         char const* first,
0380         char const* last) const noexcept
0381     {
0382         return detail::find_if_pred(
0383             *this, first, last);
0384     }
0385 
0386     char const*
0387     find_if_not(
0388         char const* first,
0389         char const* last) const noexcept
0390     {
0391         return detail::find_if_not_pred(
0392             *this, first, last);
0393     }
0394 #endif
0395 #endif
0396 };
0397 
0398 } // grammar
0399 } // urls
0400 } // boost
0401 
0402 #endif