Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-16 10:11:11

0001 ///////////////////////////////////////////////////////////////////////////////
0002 // detail/dynamic/parser_traits.hpp
0003 //
0004 //  Copyright 2008 Eric Niebler. Distributed under the Boost
0005 //  Software License, Version 1.0. (See accompanying file
0006 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0007 
0008 #ifndef BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSER_TRAITS_HPP_EAN_10_04_2005
0009 #define BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSER_TRAITS_HPP_EAN_10_04_2005
0010 
0011 // MS compatible compilers support #pragma once
0012 #if defined(_MSC_VER)
0013 # pragma once
0014 #endif
0015 
0016 #include <string>
0017 #include <climits>
0018 #include <boost/config.hpp>
0019 #include <boost/assert.hpp>
0020 #include <boost/throw_exception.hpp>
0021 #include <boost/xpressive/regex_error.hpp>
0022 #include <boost/xpressive/regex_traits.hpp>
0023 #include <boost/xpressive/detail/detail_fwd.hpp>
0024 #include <boost/xpressive/detail/dynamic/matchable.hpp>
0025 #include <boost/xpressive/detail/dynamic/parser_enum.hpp>
0026 #include <boost/xpressive/detail/utility/literals.hpp>
0027 #include <boost/xpressive/detail/utility/algorithm.hpp>
0028 
0029 namespace boost { namespace xpressive
0030 {
0031 
0032 ///////////////////////////////////////////////////////////////////////////////
0033 // compiler_traits
0034 //  this works for char and wchar_t. it must be specialized for anything else.
0035 //
0036 template<typename RegexTraits>
0037 struct compiler_traits
0038 {
0039     typedef RegexTraits regex_traits;
0040     typedef typename regex_traits::char_type char_type;
0041     typedef typename regex_traits::string_type string_type;
0042     typedef typename regex_traits::locale_type locale_type;
0043 
0044     ///////////////////////////////////////////////////////////////////////////////
0045     // constructor
0046     explicit compiler_traits(RegexTraits const &traits = RegexTraits())
0047       : traits_(traits)
0048       , flags_(regex_constants::ECMAScript)
0049       , space_(lookup_classname(traits_, "space"))
0050       , alnum_(lookup_classname(traits_, "alnum"))
0051     {
0052     }
0053 
0054     ///////////////////////////////////////////////////////////////////////////////
0055     // flags
0056     regex_constants::syntax_option_type flags() const
0057     {
0058         return this->flags_;
0059     }
0060 
0061     ///////////////////////////////////////////////////////////////////////////////
0062     // flags
0063     void flags(regex_constants::syntax_option_type flags)
0064     {
0065         this->flags_ = flags;
0066     }
0067 
0068     ///////////////////////////////////////////////////////////////////////////////
0069     // traits
0070     regex_traits &traits()
0071     {
0072         return this->traits_;
0073     }
0074 
0075     regex_traits const &traits() const
0076     {
0077         return this->traits_;
0078     }
0079 
0080     ///////////////////////////////////////////////////////////////////////////////
0081     // imbue
0082     locale_type imbue(locale_type const &loc)
0083     {
0084         locale_type oldloc = this->traits().imbue(loc);
0085         this->space_ = lookup_classname(this->traits(), "space");
0086         this->alnum_ = lookup_classname(this->traits(), "alnum");
0087         return oldloc;
0088     }
0089 
0090     ///////////////////////////////////////////////////////////////////////////////
0091     // getloc
0092     locale_type getloc() const
0093     {
0094         return this->traits().getloc();
0095     }
0096 
0097     ///////////////////////////////////////////////////////////////////////////////
0098     // get_token
0099     //  get a token and advance the iterator
0100     template<typename FwdIter>
0101     regex_constants::compiler_token_type get_token(FwdIter &begin, FwdIter end)
0102     {
0103         using namespace regex_constants;
0104         if(this->eat_ws_(begin, end) == end)
0105         {
0106             return regex_constants::token_end_of_pattern;
0107         }
0108 
0109         switch(*begin)
0110         {
0111         case BOOST_XPR_CHAR_(char_type, '\\'): return this->get_escape_token(++begin, end);
0112         case BOOST_XPR_CHAR_(char_type, '.'): ++begin; return token_any;
0113         case BOOST_XPR_CHAR_(char_type, '^'): ++begin; return token_assert_begin_line;
0114         case BOOST_XPR_CHAR_(char_type, '$'): ++begin; return token_assert_end_line;
0115         case BOOST_XPR_CHAR_(char_type, '('): ++begin; return token_group_begin;
0116         case BOOST_XPR_CHAR_(char_type, ')'): ++begin; return token_group_end;
0117         case BOOST_XPR_CHAR_(char_type, '|'): ++begin; return token_alternate;
0118         case BOOST_XPR_CHAR_(char_type, '['): ++begin; return token_charset_begin;
0119 
0120         case BOOST_XPR_CHAR_(char_type, '*'):
0121         case BOOST_XPR_CHAR_(char_type, '+'):
0122         case BOOST_XPR_CHAR_(char_type, '?'):
0123             return token_invalid_quantifier;
0124 
0125         case BOOST_XPR_CHAR_(char_type, ']'):
0126         case BOOST_XPR_CHAR_(char_type, '{'):
0127         default:
0128             return token_literal;
0129         }
0130     }
0131 
0132     ///////////////////////////////////////////////////////////////////////////////
0133     // get_quant_spec
0134     template<typename FwdIter>
0135     bool get_quant_spec(FwdIter &begin, FwdIter end, detail::quant_spec &spec)
0136     {
0137         using namespace regex_constants;
0138         FwdIter old_begin;
0139 
0140         if(this->eat_ws_(begin, end) == end)
0141         {
0142             return false;
0143         }
0144 
0145         switch(*begin)
0146         {
0147         case BOOST_XPR_CHAR_(char_type, '*'):
0148             spec.min_ = 0;
0149             spec.max_ = (std::numeric_limits<unsigned int>::max)();
0150             break;
0151 
0152         case BOOST_XPR_CHAR_(char_type, '+'):
0153             spec.min_ = 1;
0154             spec.max_ = (std::numeric_limits<unsigned int>::max)();
0155             break;
0156 
0157         case BOOST_XPR_CHAR_(char_type, '?'):
0158             spec.min_ = 0;
0159             spec.max_ = 1;
0160             break;
0161 
0162         case BOOST_XPR_CHAR_(char_type, '{'):
0163             old_begin = this->eat_ws_(++begin, end);
0164             spec.min_ = spec.max_ = detail::toi(begin, end, this->traits());
0165             BOOST_XPR_ENSURE_
0166             (
0167                 begin != old_begin && begin != end, error_brace, "invalid quantifier"
0168             );
0169 
0170             if(*begin == BOOST_XPR_CHAR_(char_type, ','))
0171             {
0172                 old_begin = this->eat_ws_(++begin, end);
0173                 spec.max_ = detail::toi(begin, end, this->traits());
0174                 BOOST_XPR_ENSURE_
0175                 (
0176                     begin != end && BOOST_XPR_CHAR_(char_type, '}') == *begin
0177                   , error_brace, "invalid quantifier"
0178                 );
0179 
0180                 if(begin == old_begin)
0181                 {
0182                     spec.max_ = (std::numeric_limits<unsigned int>::max)();
0183                 }
0184                 else
0185                 {
0186                     BOOST_XPR_ENSURE_
0187                     (
0188                         spec.min_ <= spec.max_, error_badbrace, "invalid quantification range"
0189                     );
0190                 }
0191             }
0192             else
0193             {
0194                 BOOST_XPR_ENSURE_
0195                 (
0196                     BOOST_XPR_CHAR_(char_type, '}') == *begin, error_brace, "invalid quantifier"
0197                 );
0198             }
0199             break;
0200 
0201         default:
0202             return false;
0203         }
0204 
0205         spec.greedy_ = true;
0206         if(this->eat_ws_(++begin, end) != end && BOOST_XPR_CHAR_(char_type, '?') == *begin)
0207         {
0208             ++begin;
0209             spec.greedy_ = false;
0210         }
0211 
0212         return true;
0213     }
0214 
0215     ///////////////////////////////////////////////////////////////////////////
0216     // get_group_type
0217     template<typename FwdIter>
0218     regex_constants::compiler_token_type get_group_type(FwdIter &begin, FwdIter end, string_type &name)
0219     {
0220         using namespace regex_constants;
0221         if(this->eat_ws_(begin, end) != end && BOOST_XPR_CHAR_(char_type, '?') == *begin)
0222         {
0223             this->eat_ws_(++begin, end);
0224             BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
0225 
0226             switch(*begin)
0227             {
0228             case BOOST_XPR_CHAR_(char_type, ':'): ++begin; return token_no_mark;
0229             case BOOST_XPR_CHAR_(char_type, '>'): ++begin; return token_independent_sub_expression;
0230             case BOOST_XPR_CHAR_(char_type, '#'): ++begin; return token_comment;
0231             case BOOST_XPR_CHAR_(char_type, '='): ++begin; return token_positive_lookahead;
0232             case BOOST_XPR_CHAR_(char_type, '!'): ++begin; return token_negative_lookahead;
0233             case BOOST_XPR_CHAR_(char_type, 'R'): ++begin; return token_recurse;
0234             case BOOST_XPR_CHAR_(char_type, '$'):
0235                 this->get_name_(++begin, end, name);
0236                 BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
0237                 if(BOOST_XPR_CHAR_(char_type, '=') == *begin)
0238                 {
0239                     ++begin;
0240                     return token_rule_assign;
0241                 }
0242                 return token_rule_ref;
0243 
0244             case BOOST_XPR_CHAR_(char_type, '<'):
0245                 this->eat_ws_(++begin, end);
0246                 BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
0247                 switch(*begin)
0248                 {
0249                 case BOOST_XPR_CHAR_(char_type, '='): ++begin; return token_positive_lookbehind;
0250                 case BOOST_XPR_CHAR_(char_type, '!'): ++begin; return token_negative_lookbehind;
0251                 default:
0252                     BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension"));
0253                 }
0254 
0255             case BOOST_XPR_CHAR_(char_type, 'P'):
0256                 this->eat_ws_(++begin, end);
0257                 BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
0258                 switch(*begin)
0259                 {
0260                 case BOOST_XPR_CHAR_(char_type, '<'):
0261                     this->get_name_(++begin, end, name);
0262                     BOOST_XPR_ENSURE_(begin != end && BOOST_XPR_CHAR_(char_type, '>') == *begin++, error_paren, "incomplete extension");
0263                     return token_named_mark;
0264                 case BOOST_XPR_CHAR_(char_type, '='):
0265                     this->get_name_(++begin, end, name);
0266                     BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
0267                     return token_named_mark_ref;
0268                 default:
0269                     BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension"));
0270                 }
0271 
0272             case BOOST_XPR_CHAR_(char_type, 'i'):
0273             case BOOST_XPR_CHAR_(char_type, 'm'):
0274             case BOOST_XPR_CHAR_(char_type, 's'):
0275             case BOOST_XPR_CHAR_(char_type, 'x'):
0276             case BOOST_XPR_CHAR_(char_type, '-'):
0277                 return this->parse_mods_(begin, end);
0278 
0279             default:
0280                 BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension"));
0281             }
0282         }
0283 
0284         return token_literal;
0285     }
0286 
0287     //////////////////////////////////////////////////////////////////////////
0288     // get_charset_token
0289     //  NOTE: white-space is *never* ignored in a charset.
0290     template<typename FwdIter>
0291     regex_constants::compiler_token_type get_charset_token(FwdIter &begin, FwdIter end)
0292     {
0293         using namespace regex_constants;
0294         BOOST_ASSERT(begin != end);
0295         switch(*begin)
0296         {
0297         case BOOST_XPR_CHAR_(char_type, '^'): ++begin; return token_charset_invert;
0298         case BOOST_XPR_CHAR_(char_type, '-'): ++begin; return token_charset_hyphen;
0299         case BOOST_XPR_CHAR_(char_type, ']'): ++begin; return token_charset_end;
0300         case BOOST_XPR_CHAR_(char_type, '['):
0301             {
0302                 FwdIter next = begin; ++next;
0303                 if(next != end)
0304                 {
0305                     BOOST_XPR_ENSURE_(
0306                         *next != BOOST_XPR_CHAR_(char_type, '=')
0307                       , error_collate
0308                       , "equivalence classes are not yet supported"
0309                     );
0310 
0311                     BOOST_XPR_ENSURE_(
0312                         *next != BOOST_XPR_CHAR_(char_type, '.')
0313                       , error_collate
0314                       , "collation sequences are not yet supported"
0315                     );
0316 
0317                     if(*next == BOOST_XPR_CHAR_(char_type, ':'))
0318                     {
0319                         begin = ++next;
0320                         return token_posix_charset_begin;
0321                     }
0322                 }
0323             }
0324             break;
0325         case BOOST_XPR_CHAR_(char_type, ':'):
0326             {
0327                 FwdIter next = begin; ++next;
0328                 if(next != end && *next == BOOST_XPR_CHAR_(char_type, ']'))
0329                 {
0330                     begin = ++next;
0331                     return token_posix_charset_end;
0332                 }
0333             }
0334             break;
0335         case BOOST_XPR_CHAR_(char_type, '\\'):
0336             if(++begin != end)
0337             {
0338                 switch(*begin)
0339                 {
0340                 case BOOST_XPR_CHAR_(char_type, 'b'): ++begin; return token_charset_backspace;
0341                 default:;
0342                 }
0343             }
0344             return token_escape;
0345         default:;
0346         }
0347         return token_literal;
0348     }
0349 
0350     //////////////////////////////////////////////////////////////////////////
0351     // get_escape_token
0352     template<typename FwdIter>
0353     regex_constants::compiler_token_type get_escape_token(FwdIter &begin, FwdIter end)
0354     {
0355         using namespace regex_constants;
0356         if(begin != end)
0357         {
0358             switch(*begin)
0359             {
0360             //case BOOST_XPR_CHAR_(char_type, 'a'): ++begin; return token_escape_bell;
0361             //case BOOST_XPR_CHAR_(char_type, 'c'): ++begin; return token_escape_control;
0362             //case BOOST_XPR_CHAR_(char_type, 'e'): ++begin; return token_escape_escape;
0363             //case BOOST_XPR_CHAR_(char_type, 'f'): ++begin; return token_escape_formfeed;
0364             //case BOOST_XPR_CHAR_(char_type, 'n'): ++begin; return token_escape_newline;
0365             //case BOOST_XPR_CHAR_(char_type, 't'): ++begin; return token_escape_horizontal_tab;
0366             //case BOOST_XPR_CHAR_(char_type, 'v'): ++begin; return token_escape_vertical_tab;
0367             case BOOST_XPR_CHAR_(char_type, 'A'): ++begin; return token_assert_begin_sequence;
0368             case BOOST_XPR_CHAR_(char_type, 'b'): ++begin; return token_assert_word_boundary;
0369             case BOOST_XPR_CHAR_(char_type, 'B'): ++begin; return token_assert_not_word_boundary;
0370             case BOOST_XPR_CHAR_(char_type, 'E'): ++begin; return token_quote_meta_end;
0371             case BOOST_XPR_CHAR_(char_type, 'Q'): ++begin; return token_quote_meta_begin;
0372             case BOOST_XPR_CHAR_(char_type, 'Z'): ++begin; return token_assert_end_sequence;
0373             // Non-standard extension to ECMAScript syntax
0374             case BOOST_XPR_CHAR_(char_type, '<'): ++begin; return token_assert_word_begin;
0375             case BOOST_XPR_CHAR_(char_type, '>'): ++begin; return token_assert_word_end;
0376             default:; // fall-through
0377             }
0378         }
0379 
0380         return token_escape;
0381     }
0382 
0383 private:
0384 
0385     //////////////////////////////////////////////////////////////////////////
0386     // parse_mods_
0387     template<typename FwdIter>
0388     regex_constants::compiler_token_type parse_mods_(FwdIter &begin, FwdIter end)
0389     {
0390         using namespace regex_constants;
0391         bool set = true;
0392         do switch(*begin)
0393         {
0394         case BOOST_XPR_CHAR_(char_type, 'i'): this->flag_(set, icase_); break;
0395         case BOOST_XPR_CHAR_(char_type, 'm'): this->flag_(!set, single_line); break;
0396         case BOOST_XPR_CHAR_(char_type, 's'): this->flag_(!set, not_dot_newline); break;
0397         case BOOST_XPR_CHAR_(char_type, 'x'): this->flag_(set, ignore_white_space); break;
0398         case BOOST_XPR_CHAR_(char_type, ':'): ++begin; BOOST_FALLTHROUGH;
0399         case BOOST_XPR_CHAR_(char_type, ')'): return token_no_mark;
0400         case BOOST_XPR_CHAR_(char_type, '-'): if(false == (set = !set)) break; BOOST_FALLTHROUGH;
0401         default: BOOST_THROW_EXCEPTION(regex_error(error_paren, "unknown pattern modifier"));
0402         }
0403         while(BOOST_XPR_ENSURE_(++begin != end, error_paren, "incomplete extension"));
0404         // this return is technically unreachable, but this must
0405         // be here to work around a bug in gcc 4.0
0406         return token_no_mark;
0407     }
0408 
0409     ///////////////////////////////////////////////////////////////////////////////
0410     // flag_
0411     void flag_(bool set, regex_constants::syntax_option_type flag)
0412     {
0413         this->flags_ = set ? (this->flags_ | flag) : (this->flags_ & ~flag);
0414     }
0415 
0416     ///////////////////////////////////////////////////////////////////////////
0417     // is_space_
0418     bool is_space_(char_type ch) const
0419     {
0420         return 0 != this->space_ && this->traits().isctype(ch, this->space_);
0421     }
0422 
0423     ///////////////////////////////////////////////////////////////////////////
0424     // is_alnum_
0425     bool is_alnum_(char_type ch) const
0426     {
0427         return 0 != this->alnum_ && this->traits().isctype(ch, this->alnum_);
0428     }
0429 
0430     ///////////////////////////////////////////////////////////////////////////
0431     // get_name_
0432     template<typename FwdIter>
0433     void get_name_(FwdIter &begin, FwdIter end, string_type &name)
0434     {
0435         this->eat_ws_(begin, end);
0436         for(name.clear(); begin != end && this->is_alnum_(*begin); ++begin)
0437         {
0438             name.push_back(*begin);
0439         }
0440         this->eat_ws_(begin, end);
0441         BOOST_XPR_ENSURE_(!name.empty(), regex_constants::error_paren, "incomplete extension");
0442     }
0443 
0444     ///////////////////////////////////////////////////////////////////////////////
0445     // eat_ws_
0446     template<typename FwdIter>
0447     FwdIter &eat_ws_(FwdIter &begin, FwdIter end)
0448     {
0449         if(0 != (regex_constants::ignore_white_space & this->flags()))
0450         {
0451             while(end != begin && (BOOST_XPR_CHAR_(char_type, '#') == *begin || this->is_space_(*begin)))
0452             {
0453                 if(BOOST_XPR_CHAR_(char_type, '#') == *begin++)
0454                 {
0455                     while(end != begin && BOOST_XPR_CHAR_(char_type, '\n') != *begin++) {}
0456                 }
0457                 else
0458                 {
0459                     for(; end != begin && this->is_space_(*begin); ++begin) {}
0460                 }
0461             }
0462         }
0463 
0464         return begin;
0465     }
0466 
0467     regex_traits traits_;
0468     regex_constants::syntax_option_type flags_;
0469     typename regex_traits::char_class_type space_;
0470     typename regex_traits::char_class_type alnum_;
0471 };
0472 
0473 }} // namespace boost::xpressive
0474 
0475 #endif