File indexing completed on 2025-12-16 10:11:11
0001
0002
0003
0004
0005
0006
0007
0008 #ifndef BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSER_TRAITS_HPP_EAN_10_04_2005
0009 #define BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSER_TRAITS_HPP_EAN_10_04_2005
0010
0011
0012 #if defined(_MSC_VER)
0013 # pragma once
0014 #endif
0015
0016 #include <string>
0017 #include <climits>
0018 #include <boost/config.hpp>
0019 #include <boost/assert.hpp>
0020 #include <boost/throw_exception.hpp>
0021 #include <boost/xpressive/regex_error.hpp>
0022 #include <boost/xpressive/regex_traits.hpp>
0023 #include <boost/xpressive/detail/detail_fwd.hpp>
0024 #include <boost/xpressive/detail/dynamic/matchable.hpp>
0025 #include <boost/xpressive/detail/dynamic/parser_enum.hpp>
0026 #include <boost/xpressive/detail/utility/literals.hpp>
0027 #include <boost/xpressive/detail/utility/algorithm.hpp>
0028
0029 namespace boost { namespace xpressive
0030 {
0031
0032
0033
0034
0035
0036 template<typename RegexTraits>
0037 struct compiler_traits
0038 {
0039 typedef RegexTraits regex_traits;
0040 typedef typename regex_traits::char_type char_type;
0041 typedef typename regex_traits::string_type string_type;
0042 typedef typename regex_traits::locale_type locale_type;
0043
0044
0045
0046 explicit compiler_traits(RegexTraits const &traits = RegexTraits())
0047 : traits_(traits)
0048 , flags_(regex_constants::ECMAScript)
0049 , space_(lookup_classname(traits_, "space"))
0050 , alnum_(lookup_classname(traits_, "alnum"))
0051 {
0052 }
0053
0054
0055
0056 regex_constants::syntax_option_type flags() const
0057 {
0058 return this->flags_;
0059 }
0060
0061
0062
0063 void flags(regex_constants::syntax_option_type flags)
0064 {
0065 this->flags_ = flags;
0066 }
0067
0068
0069
0070 regex_traits &traits()
0071 {
0072 return this->traits_;
0073 }
0074
0075 regex_traits const &traits() const
0076 {
0077 return this->traits_;
0078 }
0079
0080
0081
0082 locale_type imbue(locale_type const &loc)
0083 {
0084 locale_type oldloc = this->traits().imbue(loc);
0085 this->space_ = lookup_classname(this->traits(), "space");
0086 this->alnum_ = lookup_classname(this->traits(), "alnum");
0087 return oldloc;
0088 }
0089
0090
0091
0092 locale_type getloc() const
0093 {
0094 return this->traits().getloc();
0095 }
0096
0097
0098
0099
0100 template<typename FwdIter>
0101 regex_constants::compiler_token_type get_token(FwdIter &begin, FwdIter end)
0102 {
0103 using namespace regex_constants;
0104 if(this->eat_ws_(begin, end) == end)
0105 {
0106 return regex_constants::token_end_of_pattern;
0107 }
0108
0109 switch(*begin)
0110 {
0111 case BOOST_XPR_CHAR_(char_type, '\\'): return this->get_escape_token(++begin, end);
0112 case BOOST_XPR_CHAR_(char_type, '.'): ++begin; return token_any;
0113 case BOOST_XPR_CHAR_(char_type, '^'): ++begin; return token_assert_begin_line;
0114 case BOOST_XPR_CHAR_(char_type, '$'): ++begin; return token_assert_end_line;
0115 case BOOST_XPR_CHAR_(char_type, '('): ++begin; return token_group_begin;
0116 case BOOST_XPR_CHAR_(char_type, ')'): ++begin; return token_group_end;
0117 case BOOST_XPR_CHAR_(char_type, '|'): ++begin; return token_alternate;
0118 case BOOST_XPR_CHAR_(char_type, '['): ++begin; return token_charset_begin;
0119
0120 case BOOST_XPR_CHAR_(char_type, '*'):
0121 case BOOST_XPR_CHAR_(char_type, '+'):
0122 case BOOST_XPR_CHAR_(char_type, '?'):
0123 return token_invalid_quantifier;
0124
0125 case BOOST_XPR_CHAR_(char_type, ']'):
0126 case BOOST_XPR_CHAR_(char_type, '{'):
0127 default:
0128 return token_literal;
0129 }
0130 }
0131
0132
0133
0134 template<typename FwdIter>
0135 bool get_quant_spec(FwdIter &begin, FwdIter end, detail::quant_spec &spec)
0136 {
0137 using namespace regex_constants;
0138 FwdIter old_begin;
0139
0140 if(this->eat_ws_(begin, end) == end)
0141 {
0142 return false;
0143 }
0144
0145 switch(*begin)
0146 {
0147 case BOOST_XPR_CHAR_(char_type, '*'):
0148 spec.min_ = 0;
0149 spec.max_ = (std::numeric_limits<unsigned int>::max)();
0150 break;
0151
0152 case BOOST_XPR_CHAR_(char_type, '+'):
0153 spec.min_ = 1;
0154 spec.max_ = (std::numeric_limits<unsigned int>::max)();
0155 break;
0156
0157 case BOOST_XPR_CHAR_(char_type, '?'):
0158 spec.min_ = 0;
0159 spec.max_ = 1;
0160 break;
0161
0162 case BOOST_XPR_CHAR_(char_type, '{'):
0163 old_begin = this->eat_ws_(++begin, end);
0164 spec.min_ = spec.max_ = detail::toi(begin, end, this->traits());
0165 BOOST_XPR_ENSURE_
0166 (
0167 begin != old_begin && begin != end, error_brace, "invalid quantifier"
0168 );
0169
0170 if(*begin == BOOST_XPR_CHAR_(char_type, ','))
0171 {
0172 old_begin = this->eat_ws_(++begin, end);
0173 spec.max_ = detail::toi(begin, end, this->traits());
0174 BOOST_XPR_ENSURE_
0175 (
0176 begin != end && BOOST_XPR_CHAR_(char_type, '}') == *begin
0177 , error_brace, "invalid quantifier"
0178 );
0179
0180 if(begin == old_begin)
0181 {
0182 spec.max_ = (std::numeric_limits<unsigned int>::max)();
0183 }
0184 else
0185 {
0186 BOOST_XPR_ENSURE_
0187 (
0188 spec.min_ <= spec.max_, error_badbrace, "invalid quantification range"
0189 );
0190 }
0191 }
0192 else
0193 {
0194 BOOST_XPR_ENSURE_
0195 (
0196 BOOST_XPR_CHAR_(char_type, '}') == *begin, error_brace, "invalid quantifier"
0197 );
0198 }
0199 break;
0200
0201 default:
0202 return false;
0203 }
0204
0205 spec.greedy_ = true;
0206 if(this->eat_ws_(++begin, end) != end && BOOST_XPR_CHAR_(char_type, '?') == *begin)
0207 {
0208 ++begin;
0209 spec.greedy_ = false;
0210 }
0211
0212 return true;
0213 }
0214
0215
0216
0217 template<typename FwdIter>
0218 regex_constants::compiler_token_type get_group_type(FwdIter &begin, FwdIter end, string_type &name)
0219 {
0220 using namespace regex_constants;
0221 if(this->eat_ws_(begin, end) != end && BOOST_XPR_CHAR_(char_type, '?') == *begin)
0222 {
0223 this->eat_ws_(++begin, end);
0224 BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
0225
0226 switch(*begin)
0227 {
0228 case BOOST_XPR_CHAR_(char_type, ':'): ++begin; return token_no_mark;
0229 case BOOST_XPR_CHAR_(char_type, '>'): ++begin; return token_independent_sub_expression;
0230 case BOOST_XPR_CHAR_(char_type, '#'): ++begin; return token_comment;
0231 case BOOST_XPR_CHAR_(char_type, '='): ++begin; return token_positive_lookahead;
0232 case BOOST_XPR_CHAR_(char_type, '!'): ++begin; return token_negative_lookahead;
0233 case BOOST_XPR_CHAR_(char_type, 'R'): ++begin; return token_recurse;
0234 case BOOST_XPR_CHAR_(char_type, '$'):
0235 this->get_name_(++begin, end, name);
0236 BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
0237 if(BOOST_XPR_CHAR_(char_type, '=') == *begin)
0238 {
0239 ++begin;
0240 return token_rule_assign;
0241 }
0242 return token_rule_ref;
0243
0244 case BOOST_XPR_CHAR_(char_type, '<'):
0245 this->eat_ws_(++begin, end);
0246 BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
0247 switch(*begin)
0248 {
0249 case BOOST_XPR_CHAR_(char_type, '='): ++begin; return token_positive_lookbehind;
0250 case BOOST_XPR_CHAR_(char_type, '!'): ++begin; return token_negative_lookbehind;
0251 default:
0252 BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension"));
0253 }
0254
0255 case BOOST_XPR_CHAR_(char_type, 'P'):
0256 this->eat_ws_(++begin, end);
0257 BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
0258 switch(*begin)
0259 {
0260 case BOOST_XPR_CHAR_(char_type, '<'):
0261 this->get_name_(++begin, end, name);
0262 BOOST_XPR_ENSURE_(begin != end && BOOST_XPR_CHAR_(char_type, '>') == *begin++, error_paren, "incomplete extension");
0263 return token_named_mark;
0264 case BOOST_XPR_CHAR_(char_type, '='):
0265 this->get_name_(++begin, end, name);
0266 BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
0267 return token_named_mark_ref;
0268 default:
0269 BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension"));
0270 }
0271
0272 case BOOST_XPR_CHAR_(char_type, 'i'):
0273 case BOOST_XPR_CHAR_(char_type, 'm'):
0274 case BOOST_XPR_CHAR_(char_type, 's'):
0275 case BOOST_XPR_CHAR_(char_type, 'x'):
0276 case BOOST_XPR_CHAR_(char_type, '-'):
0277 return this->parse_mods_(begin, end);
0278
0279 default:
0280 BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension"));
0281 }
0282 }
0283
0284 return token_literal;
0285 }
0286
0287
0288
0289
0290 template<typename FwdIter>
0291 regex_constants::compiler_token_type get_charset_token(FwdIter &begin, FwdIter end)
0292 {
0293 using namespace regex_constants;
0294 BOOST_ASSERT(begin != end);
0295 switch(*begin)
0296 {
0297 case BOOST_XPR_CHAR_(char_type, '^'): ++begin; return token_charset_invert;
0298 case BOOST_XPR_CHAR_(char_type, '-'): ++begin; return token_charset_hyphen;
0299 case BOOST_XPR_CHAR_(char_type, ']'): ++begin; return token_charset_end;
0300 case BOOST_XPR_CHAR_(char_type, '['):
0301 {
0302 FwdIter next = begin; ++next;
0303 if(next != end)
0304 {
0305 BOOST_XPR_ENSURE_(
0306 *next != BOOST_XPR_CHAR_(char_type, '=')
0307 , error_collate
0308 , "equivalence classes are not yet supported"
0309 );
0310
0311 BOOST_XPR_ENSURE_(
0312 *next != BOOST_XPR_CHAR_(char_type, '.')
0313 , error_collate
0314 , "collation sequences are not yet supported"
0315 );
0316
0317 if(*next == BOOST_XPR_CHAR_(char_type, ':'))
0318 {
0319 begin = ++next;
0320 return token_posix_charset_begin;
0321 }
0322 }
0323 }
0324 break;
0325 case BOOST_XPR_CHAR_(char_type, ':'):
0326 {
0327 FwdIter next = begin; ++next;
0328 if(next != end && *next == BOOST_XPR_CHAR_(char_type, ']'))
0329 {
0330 begin = ++next;
0331 return token_posix_charset_end;
0332 }
0333 }
0334 break;
0335 case BOOST_XPR_CHAR_(char_type, '\\'):
0336 if(++begin != end)
0337 {
0338 switch(*begin)
0339 {
0340 case BOOST_XPR_CHAR_(char_type, 'b'): ++begin; return token_charset_backspace;
0341 default:;
0342 }
0343 }
0344 return token_escape;
0345 default:;
0346 }
0347 return token_literal;
0348 }
0349
0350
0351
0352 template<typename FwdIter>
0353 regex_constants::compiler_token_type get_escape_token(FwdIter &begin, FwdIter end)
0354 {
0355 using namespace regex_constants;
0356 if(begin != end)
0357 {
0358 switch(*begin)
0359 {
0360
0361
0362
0363
0364
0365
0366
0367 case BOOST_XPR_CHAR_(char_type, 'A'): ++begin; return token_assert_begin_sequence;
0368 case BOOST_XPR_CHAR_(char_type, 'b'): ++begin; return token_assert_word_boundary;
0369 case BOOST_XPR_CHAR_(char_type, 'B'): ++begin; return token_assert_not_word_boundary;
0370 case BOOST_XPR_CHAR_(char_type, 'E'): ++begin; return token_quote_meta_end;
0371 case BOOST_XPR_CHAR_(char_type, 'Q'): ++begin; return token_quote_meta_begin;
0372 case BOOST_XPR_CHAR_(char_type, 'Z'): ++begin; return token_assert_end_sequence;
0373
0374 case BOOST_XPR_CHAR_(char_type, '<'): ++begin; return token_assert_word_begin;
0375 case BOOST_XPR_CHAR_(char_type, '>'): ++begin; return token_assert_word_end;
0376 default:;
0377 }
0378 }
0379
0380 return token_escape;
0381 }
0382
0383 private:
0384
0385
0386
0387 template<typename FwdIter>
0388 regex_constants::compiler_token_type parse_mods_(FwdIter &begin, FwdIter end)
0389 {
0390 using namespace regex_constants;
0391 bool set = true;
0392 do switch(*begin)
0393 {
0394 case BOOST_XPR_CHAR_(char_type, 'i'): this->flag_(set, icase_); break;
0395 case BOOST_XPR_CHAR_(char_type, 'm'): this->flag_(!set, single_line); break;
0396 case BOOST_XPR_CHAR_(char_type, 's'): this->flag_(!set, not_dot_newline); break;
0397 case BOOST_XPR_CHAR_(char_type, 'x'): this->flag_(set, ignore_white_space); break;
0398 case BOOST_XPR_CHAR_(char_type, ':'): ++begin; BOOST_FALLTHROUGH;
0399 case BOOST_XPR_CHAR_(char_type, ')'): return token_no_mark;
0400 case BOOST_XPR_CHAR_(char_type, '-'): if(false == (set = !set)) break; BOOST_FALLTHROUGH;
0401 default: BOOST_THROW_EXCEPTION(regex_error(error_paren, "unknown pattern modifier"));
0402 }
0403 while(BOOST_XPR_ENSURE_(++begin != end, error_paren, "incomplete extension"));
0404
0405
0406 return token_no_mark;
0407 }
0408
0409
0410
0411 void flag_(bool set, regex_constants::syntax_option_type flag)
0412 {
0413 this->flags_ = set ? (this->flags_ | flag) : (this->flags_ & ~flag);
0414 }
0415
0416
0417
0418 bool is_space_(char_type ch) const
0419 {
0420 return 0 != this->space_ && this->traits().isctype(ch, this->space_);
0421 }
0422
0423
0424
0425 bool is_alnum_(char_type ch) const
0426 {
0427 return 0 != this->alnum_ && this->traits().isctype(ch, this->alnum_);
0428 }
0429
0430
0431
0432 template<typename FwdIter>
0433 void get_name_(FwdIter &begin, FwdIter end, string_type &name)
0434 {
0435 this->eat_ws_(begin, end);
0436 for(name.clear(); begin != end && this->is_alnum_(*begin); ++begin)
0437 {
0438 name.push_back(*begin);
0439 }
0440 this->eat_ws_(begin, end);
0441 BOOST_XPR_ENSURE_(!name.empty(), regex_constants::error_paren, "incomplete extension");
0442 }
0443
0444
0445
0446 template<typename FwdIter>
0447 FwdIter &eat_ws_(FwdIter &begin, FwdIter end)
0448 {
0449 if(0 != (regex_constants::ignore_white_space & this->flags()))
0450 {
0451 while(end != begin && (BOOST_XPR_CHAR_(char_type, '#') == *begin || this->is_space_(*begin)))
0452 {
0453 if(BOOST_XPR_CHAR_(char_type, '#') == *begin++)
0454 {
0455 while(end != begin && BOOST_XPR_CHAR_(char_type, '\n') != *begin++) {}
0456 }
0457 else
0458 {
0459 for(; end != begin && this->is_space_(*begin); ++begin) {}
0460 }
0461 }
0462 }
0463
0464 return begin;
0465 }
0466
0467 regex_traits traits_;
0468 regex_constants::syntax_option_type flags_;
0469 typename regex_traits::char_class_type space_;
0470 typename regex_traits::char_class_type alnum_;
0471 };
0472
0473 }}
0474
0475 #endif