File indexing completed on 2025-01-18 09:51:22
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019 #ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
0020 #define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
0021
0022 #ifdef BOOST_MSVC
0023 #pragma warning(push)
0024 #pragma warning(disable: 4103)
0025 #if BOOST_MSVC >= 1800
0026 #pragma warning(disable: 26812)
0027 #endif
0028 #endif
0029 #ifdef BOOST_HAS_ABI_HEADERS
0030 # include BOOST_ABI_PREFIX
0031 #endif
0032 #ifdef BOOST_MSVC
0033 #pragma warning(pop)
0034 #endif
0035
0036 namespace boost{
0037 namespace BOOST_REGEX_DETAIL_NS{
0038
0039 #ifdef BOOST_MSVC
0040 #pragma warning(push)
0041 #pragma warning(disable:4244)
0042 #if BOOST_MSVC < 1910
0043 #pragma warning(disable:4800)
0044 #endif
0045 #endif
0046
0047 inline boost::intmax_t umax(mpl::false_ const&)
0048 {
0049
0050 return std::numeric_limits<boost::intmax_t>::is_specialized ? (std::numeric_limits<boost::intmax_t>::max)() : INT_MAX;
0051 }
0052 inline boost::intmax_t umax(mpl::true_ const&)
0053 {
0054 return (std::numeric_limits<std::size_t>::max)();
0055 }
0056
0057 inline boost::intmax_t umax()
0058 {
0059 return umax(mpl::bool_<std::numeric_limits<boost::intmax_t>::digits >= std::numeric_limits<std::size_t>::digits>());
0060 }
0061
0062 template <class charT, class traits>
0063 class basic_regex_parser : public basic_regex_creator<charT, traits>
0064 {
0065 public:
0066 basic_regex_parser(regex_data<charT, traits>* data);
0067 void parse(const charT* p1, const charT* p2, unsigned flags);
0068 void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
0069 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos);
0070 void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message)
0071 {
0072 fail(error_code, position, message, position);
0073 }
0074
0075 bool parse_all();
0076 bool parse_basic();
0077 bool parse_extended();
0078 bool parse_literal();
0079 bool parse_open_paren();
0080 bool parse_basic_escape();
0081 bool parse_extended_escape();
0082 bool parse_match_any();
0083 bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
0084 bool parse_repeat_range(bool isbasic);
0085 bool parse_alt();
0086 bool parse_set();
0087 bool parse_backref();
0088 void parse_set_literal(basic_char_set<charT, traits>& char_set);
0089 bool parse_inner_set(basic_char_set<charT, traits>& char_set);
0090 bool parse_QE();
0091 bool parse_perl_extension();
0092 bool parse_perl_verb();
0093 bool match_verb(const char*);
0094 bool add_emacs_code(bool negate);
0095 bool unwind_alts(std::ptrdiff_t last_paren_start);
0096 digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
0097 charT unescape_character();
0098 regex_constants::syntax_option_type parse_options();
0099
0100 private:
0101 typedef bool (basic_regex_parser::*parser_proc_type)();
0102 typedef typename traits::string_type string_type;
0103 typedef typename traits::char_class_type char_class_type;
0104 parser_proc_type m_parser_proc;
0105 const charT* m_base;
0106 const charT* m_end;
0107 const charT* m_position;
0108 unsigned m_mark_count;
0109 int m_mark_reset;
0110 unsigned m_max_mark;
0111 std::ptrdiff_t m_paren_start;
0112 std::ptrdiff_t m_alt_insert_point;
0113 bool m_has_case_change;
0114 unsigned m_recursion_count;
0115 #if defined(BOOST_MSVC) && defined(_M_IX86)
0116
0117
0118 BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));
0119 std::vector<long> m_alt_jumps;
0120 #else
0121 std::vector<std::ptrdiff_t> m_alt_jumps;
0122 #endif
0123
0124 basic_regex_parser& operator=(const basic_regex_parser&);
0125 basic_regex_parser(const basic_regex_parser&);
0126 };
0127
0128 template <class charT, class traits>
0129 basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
0130 : basic_regex_creator<charT, traits>(data), m_parser_proc(), m_base(0), m_end(0), m_position(0),
0131 m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false), m_recursion_count(0)
0132 {
0133 }
0134
0135 template <class charT, class traits>
0136 void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags)
0137 {
0138
0139 this->init(l_flags);
0140
0141 m_position = m_base = p1;
0142 m_end = p2;
0143
0144 if((p1 == p2) &&
0145 (
0146 ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
0147 || (l_flags & regbase::no_empty_expressions)
0148 )
0149 )
0150 {
0151 fail(regex_constants::error_empty, 0);
0152 return;
0153 }
0154
0155 switch(l_flags & regbase::main_option_type)
0156 {
0157 case regbase::perl_syntax_group:
0158 {
0159 m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
0160
0161
0162
0163 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
0164 br->index = 0;
0165 br->icase = this->flags() & regbase::icase;
0166 break;
0167 }
0168 case regbase::basic_syntax_group:
0169 m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
0170 break;
0171 case regbase::literal:
0172 m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
0173 break;
0174 default:
0175
0176
0177 fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used.");
0178 return;
0179 }
0180
0181
0182 bool result = parse_all();
0183
0184
0185
0186 unwind_alts(-1);
0187
0188 this->flags(l_flags);
0189
0190
0191 if(!result)
0192 {
0193 fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Found a closing ) with no corresponding opening parenthesis.");
0194 return;
0195 }
0196
0197 if(this->m_pdata->m_status)
0198 return;
0199
0200 this->m_pdata->m_mark_count = 1u + (std::size_t)m_mark_count;
0201 this->finalize(p1, p2);
0202 }
0203
0204 template <class charT, class traits>
0205 void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
0206 {
0207
0208 std::string message = this->m_pdata->m_ptraits->error_string(error_code);
0209 fail(error_code, position, message);
0210 }
0211
0212 template <class charT, class traits>
0213 void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos)
0214 {
0215 if(0 == this->m_pdata->m_status)
0216 this->m_pdata->m_status = error_code;
0217 m_position = m_end;
0218
0219 #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
0220
0221
0222
0223 if(start_pos == position)
0224 start_pos = (std::max)(static_cast<std::ptrdiff_t>(0), position - static_cast<std::ptrdiff_t>(10));
0225 std::ptrdiff_t end_pos = (std::min)(position + static_cast<std::ptrdiff_t>(10), static_cast<std::ptrdiff_t>(m_end - m_base));
0226 if(error_code != regex_constants::error_empty)
0227 {
0228 if((start_pos != 0) || (end_pos != (m_end - m_base)))
0229 message += " The error occurred while parsing the regular expression fragment: '";
0230 else
0231 message += " The error occurred while parsing the regular expression: '";
0232 if(start_pos != end_pos)
0233 {
0234 message += std::string(m_base + start_pos, m_base + position);
0235 message += ">>>HERE>>>";
0236 message += std::string(m_base + position, m_base + end_pos);
0237 }
0238 message += "'.";
0239 }
0240 #endif
0241
0242 #ifndef BOOST_NO_EXCEPTIONS
0243 if(0 == (this->flags() & regex_constants::no_except))
0244 {
0245 boost::regex_error e(message, error_code, position);
0246 e.raise();
0247 }
0248 #else
0249 (void)position;
0250 #endif
0251 }
0252
0253 template <class charT, class traits>
0254 bool basic_regex_parser<charT, traits>::parse_all()
0255 {
0256 if (++m_recursion_count > 400)
0257 {
0258
0259 fail(boost::regex_constants::error_complexity, m_position - m_base, "Exceeded nested brace limit.");
0260 }
0261 bool result = true;
0262 while(result && (m_position != m_end))
0263 {
0264 result = (this->*m_parser_proc)();
0265 }
0266 --m_recursion_count;
0267 return result;
0268 }
0269
0270 #ifdef BOOST_MSVC
0271 #pragma warning(push)
0272 #pragma warning(disable:4702)
0273 #endif
0274 template <class charT, class traits>
0275 bool basic_regex_parser<charT, traits>::parse_basic()
0276 {
0277 switch(this->m_traits.syntax_type(*m_position))
0278 {
0279 case regex_constants::syntax_escape:
0280 return parse_basic_escape();
0281 case regex_constants::syntax_dot:
0282 return parse_match_any();
0283 case regex_constants::syntax_caret:
0284 ++m_position;
0285 this->append_state(syntax_element_start_line);
0286 break;
0287 case regex_constants::syntax_dollar:
0288 ++m_position;
0289 this->append_state(syntax_element_end_line);
0290 break;
0291 case regex_constants::syntax_star:
0292 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
0293 return parse_literal();
0294 else
0295 {
0296 ++m_position;
0297 return parse_repeat();
0298 }
0299 case regex_constants::syntax_plus:
0300 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
0301 return parse_literal();
0302 else
0303 {
0304 ++m_position;
0305 return parse_repeat(1);
0306 }
0307 case regex_constants::syntax_question:
0308 if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
0309 return parse_literal();
0310 else
0311 {
0312 ++m_position;
0313 return parse_repeat(0, 1);
0314 }
0315 case regex_constants::syntax_open_set:
0316 return parse_set();
0317 case regex_constants::syntax_newline:
0318 if(this->flags() & regbase::newline_alt)
0319 return parse_alt();
0320 else
0321 return parse_literal();
0322 default:
0323 return parse_literal();
0324 }
0325 return true;
0326 }
0327
0328 #ifdef BOOST_MSVC
0329 # pragma warning(push)
0330 #if BOOST_MSVC >= 1800
0331 #pragma warning(disable:26812)
0332 #endif
0333 #endif
0334 template <class charT, class traits>
0335 bool basic_regex_parser<charT, traits>::parse_extended()
0336 {
0337 bool result = true;
0338 switch(this->m_traits.syntax_type(*m_position))
0339 {
0340 case regex_constants::syntax_open_mark:
0341 return parse_open_paren();
0342 case regex_constants::syntax_close_mark:
0343 return false;
0344 case regex_constants::syntax_escape:
0345 return parse_extended_escape();
0346 case regex_constants::syntax_dot:
0347 return parse_match_any();
0348 case regex_constants::syntax_caret:
0349 ++m_position;
0350 this->append_state(
0351 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
0352 break;
0353 case regex_constants::syntax_dollar:
0354 ++m_position;
0355 this->append_state(
0356 (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
0357 break;
0358 case regex_constants::syntax_star:
0359 if(m_position == this->m_base)
0360 {
0361 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression.");
0362 return false;
0363 }
0364 ++m_position;
0365 return parse_repeat();
0366 case regex_constants::syntax_question:
0367 if(m_position == this->m_base)
0368 {
0369 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression.");
0370 return false;
0371 }
0372 ++m_position;
0373 return parse_repeat(0,1);
0374 case regex_constants::syntax_plus:
0375 if(m_position == this->m_base)
0376 {
0377 fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression.");
0378 return false;
0379 }
0380 ++m_position;
0381 return parse_repeat(1);
0382 case regex_constants::syntax_open_brace:
0383 ++m_position;
0384 return parse_repeat_range(false);
0385 case regex_constants::syntax_close_brace:
0386 if((this->flags() & regbase::no_perl_ex) == regbase::no_perl_ex)
0387 {
0388 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
0389 return false;
0390 }
0391 result = parse_literal();
0392 break;
0393 case regex_constants::syntax_or:
0394 return parse_alt();
0395 case regex_constants::syntax_open_set:
0396 return parse_set();
0397 case regex_constants::syntax_newline:
0398 if(this->flags() & regbase::newline_alt)
0399 return parse_alt();
0400 else
0401 return parse_literal();
0402 case regex_constants::syntax_hash:
0403
0404
0405
0406
0407 if((this->flags()
0408 & (regbase::no_perl_ex|regbase::mod_x))
0409 == regbase::mod_x)
0410 {
0411 while((m_position != m_end) && !is_separator(*m_position++)){}
0412 return true;
0413 }
0414 BOOST_FALLTHROUGH;
0415 default:
0416 result = parse_literal();
0417 break;
0418 }
0419 return result;
0420 }
0421 #ifdef BOOST_MSVC
0422 # pragma warning(pop)
0423 #endif
0424 #ifdef BOOST_MSVC
0425 #pragma warning(pop)
0426 #endif
0427
0428 template <class charT, class traits>
0429 bool basic_regex_parser<charT, traits>::parse_literal()
0430 {
0431
0432
0433 if(
0434 ((this->flags()
0435 & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex))
0436 != regbase::mod_x)
0437 || !this->m_traits.isctype(*m_position, this->m_mask_space))
0438 this->append_literal(*m_position);
0439 ++m_position;
0440 return true;
0441 }
0442
0443 template <class charT, class traits>
0444 bool basic_regex_parser<charT, traits>::parse_open_paren()
0445 {
0446
0447
0448
0449 if(++m_position == m_end)
0450 {
0451 fail(regex_constants::error_paren, m_position - m_base);
0452 return false;
0453 }
0454
0455
0456
0457 if(
0458 ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
0459 || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
0460 )
0461 {
0462 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
0463 return parse_perl_extension();
0464 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_star)
0465 return parse_perl_verb();
0466 }
0467
0468
0469
0470 unsigned markid = 0;
0471 if(0 == (this->flags() & regbase::nosubs))
0472 {
0473 markid = ++m_mark_count;
0474 #ifndef BOOST_NO_STD_DISTANCE
0475 if(this->flags() & regbase::save_subexpression_location)
0476 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
0477 #else
0478 if(this->flags() & regbase::save_subexpression_location)
0479 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 1, 0));
0480 #endif
0481 }
0482 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
0483 pb->index = markid;
0484 pb->icase = this->flags() & regbase::icase;
0485 std::ptrdiff_t last_paren_start = this->getoffset(pb);
0486
0487 std::ptrdiff_t last_alt_point = m_alt_insert_point;
0488 this->m_pdata->m_data.align();
0489 m_alt_insert_point = this->m_pdata->m_data.size();
0490
0491
0492
0493 regex_constants::syntax_option_type opts = this->flags();
0494 bool old_case_change = m_has_case_change;
0495 m_has_case_change = false;
0496
0497
0498
0499 int mark_reset = m_mark_reset;
0500 m_mark_reset = -1;
0501
0502
0503
0504
0505 parse_all();
0506
0507
0508
0509 if(0 == unwind_alts(last_paren_start))
0510 return false;
0511
0512
0513
0514 if(m_has_case_change)
0515 {
0516
0517
0518
0519 static_cast<re_case*>(
0520 this->append_state(syntax_element_toggle_case, sizeof(re_case))
0521 )->icase = opts & regbase::icase;
0522 }
0523 this->flags(opts);
0524 m_has_case_change = old_case_change;
0525
0526
0527
0528 m_mark_reset = mark_reset;
0529
0530
0531
0532 if(m_position == m_end)
0533 {
0534 this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
0535 return false;
0536 }
0537 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
0538 return false;
0539 #ifndef BOOST_NO_STD_DISTANCE
0540 if(markid && (this->flags() & regbase::save_subexpression_location))
0541 this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
0542 #else
0543 if(markid && (this->flags() & regbase::save_subexpression_location))
0544 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base);
0545 #endif
0546 ++m_position;
0547
0548
0549
0550 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
0551 pb->index = markid;
0552 pb->icase = this->flags() & regbase::icase;
0553 this->m_paren_start = last_paren_start;
0554
0555
0556
0557 this->m_alt_insert_point = last_alt_point;
0558
0559
0560
0561 if(markid > 0)
0562 this->m_backrefs.set(markid);
0563
0564 return true;
0565 }
0566
0567 template <class charT, class traits>
0568 bool basic_regex_parser<charT, traits>::parse_basic_escape()
0569 {
0570 if(++m_position == m_end)
0571 {
0572 fail(regex_constants::error_paren, m_position - m_base);
0573 return false;
0574 }
0575 bool result = true;
0576 switch(this->m_traits.escape_syntax_type(*m_position))
0577 {
0578 case regex_constants::syntax_open_mark:
0579 return parse_open_paren();
0580 case regex_constants::syntax_close_mark:
0581 return false;
0582 case regex_constants::syntax_plus:
0583 if(this->flags() & regex_constants::bk_plus_qm)
0584 {
0585 ++m_position;
0586 return parse_repeat(1);
0587 }
0588 else
0589 return parse_literal();
0590 case regex_constants::syntax_question:
0591 if(this->flags() & regex_constants::bk_plus_qm)
0592 {
0593 ++m_position;
0594 return parse_repeat(0, 1);
0595 }
0596 else
0597 return parse_literal();
0598 case regex_constants::syntax_open_brace:
0599 if(this->flags() & regbase::no_intervals)
0600 return parse_literal();
0601 ++m_position;
0602 return parse_repeat_range(true);
0603 case regex_constants::syntax_close_brace:
0604 if(this->flags() & regbase::no_intervals)
0605 return parse_literal();
0606 fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
0607 return false;
0608 case regex_constants::syntax_or:
0609 if(this->flags() & regbase::bk_vbar)
0610 return parse_alt();
0611 else
0612 result = parse_literal();
0613 break;
0614 case regex_constants::syntax_digit:
0615 return parse_backref();
0616 case regex_constants::escape_type_start_buffer:
0617 if(this->flags() & regbase::emacs_ex)
0618 {
0619 ++m_position;
0620 this->append_state(syntax_element_buffer_start);
0621 }
0622 else
0623 result = parse_literal();
0624 break;
0625 case regex_constants::escape_type_end_buffer:
0626 if(this->flags() & regbase::emacs_ex)
0627 {
0628 ++m_position;
0629 this->append_state(syntax_element_buffer_end);
0630 }
0631 else
0632 result = parse_literal();
0633 break;
0634 case regex_constants::escape_type_word_assert:
0635 if(this->flags() & regbase::emacs_ex)
0636 {
0637 ++m_position;
0638 this->append_state(syntax_element_word_boundary);
0639 }
0640 else
0641 result = parse_literal();
0642 break;
0643 case regex_constants::escape_type_not_word_assert:
0644 if(this->flags() & regbase::emacs_ex)
0645 {
0646 ++m_position;
0647 this->append_state(syntax_element_within_word);
0648 }
0649 else
0650 result = parse_literal();
0651 break;
0652 case regex_constants::escape_type_left_word:
0653 if(this->flags() & regbase::emacs_ex)
0654 {
0655 ++m_position;
0656 this->append_state(syntax_element_word_start);
0657 }
0658 else
0659 result = parse_literal();
0660 break;
0661 case regex_constants::escape_type_right_word:
0662 if(this->flags() & regbase::emacs_ex)
0663 {
0664 ++m_position;
0665 this->append_state(syntax_element_word_end);
0666 }
0667 else
0668 result = parse_literal();
0669 break;
0670 default:
0671 if(this->flags() & regbase::emacs_ex)
0672 {
0673 bool negate = true;
0674 switch(*m_position)
0675 {
0676 case 'w':
0677 negate = false;
0678 BOOST_FALLTHROUGH;
0679 case 'W':
0680 {
0681 basic_char_set<charT, traits> char_set;
0682 if(negate)
0683 char_set.negate();
0684 char_set.add_class(this->m_word_mask);
0685 if(0 == this->append_set(char_set))
0686 {
0687 fail(regex_constants::error_ctype, m_position - m_base);
0688 return false;
0689 }
0690 ++m_position;
0691 return true;
0692 }
0693 case 's':
0694 negate = false;
0695 BOOST_FALLTHROUGH;
0696 case 'S':
0697 return add_emacs_code(negate);
0698 case 'c':
0699 case 'C':
0700
0701 fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead.");
0702 return false;
0703 default:
0704 break;
0705 }
0706 }
0707 result = parse_literal();
0708 break;
0709 }
0710 return result;
0711 }
0712
0713 template <class charT, class traits>
0714 bool basic_regex_parser<charT, traits>::parse_extended_escape()
0715 {
0716 ++m_position;
0717 if(m_position == m_end)
0718 {
0719 fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found.");
0720 return false;
0721 }
0722 bool negate = false;
0723 switch(this->m_traits.escape_syntax_type(*m_position))
0724 {
0725 case regex_constants::escape_type_not_class:
0726 negate = true;
0727 BOOST_FALLTHROUGH;
0728 case regex_constants::escape_type_class:
0729 {
0730 escape_type_class_jump:
0731 typedef typename traits::char_class_type m_type;
0732 m_type m = this->m_traits.lookup_classname(m_position, m_position+1);
0733 if(m != 0)
0734 {
0735 basic_char_set<charT, traits> char_set;
0736 if(negate)
0737 char_set.negate();
0738 char_set.add_class(m);
0739 if(0 == this->append_set(char_set))
0740 {
0741 fail(regex_constants::error_ctype, m_position - m_base);
0742 return false;
0743 }
0744 ++m_position;
0745 return true;
0746 }
0747
0748
0749
0750 this->append_literal(unescape_character());
0751 break;
0752 }
0753 case regex_constants::syntax_digit:
0754 return parse_backref();
0755 case regex_constants::escape_type_left_word:
0756 ++m_position;
0757 this->append_state(syntax_element_word_start);
0758 break;
0759 case regex_constants::escape_type_right_word:
0760 ++m_position;
0761 this->append_state(syntax_element_word_end);
0762 break;
0763 case regex_constants::escape_type_start_buffer:
0764 ++m_position;
0765 this->append_state(syntax_element_buffer_start);
0766 break;
0767 case regex_constants::escape_type_end_buffer:
0768 ++m_position;
0769 this->append_state(syntax_element_buffer_end);
0770 break;
0771 case regex_constants::escape_type_word_assert:
0772 ++m_position;
0773 this->append_state(syntax_element_word_boundary);
0774 break;
0775 case regex_constants::escape_type_not_word_assert:
0776 ++m_position;
0777 this->append_state(syntax_element_within_word);
0778 break;
0779 case regex_constants::escape_type_Z:
0780 ++m_position;
0781 this->append_state(syntax_element_soft_buffer_end);
0782 break;
0783 case regex_constants::escape_type_Q:
0784 return parse_QE();
0785 case regex_constants::escape_type_C:
0786 return parse_match_any();
0787 case regex_constants::escape_type_X:
0788 ++m_position;
0789 this->append_state(syntax_element_combining);
0790 break;
0791 case regex_constants::escape_type_G:
0792 ++m_position;
0793 this->append_state(syntax_element_restart_continue);
0794 break;
0795 case regex_constants::escape_type_not_property:
0796 negate = true;
0797 BOOST_FALLTHROUGH;
0798 case regex_constants::escape_type_property:
0799 {
0800 ++m_position;
0801 char_class_type m;
0802 if(m_position == m_end)
0803 {
0804 fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found.");
0805 return false;
0806 }
0807
0808 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
0809 {
0810 const charT* base = m_position;
0811
0812 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
0813 ++m_position;
0814 if(m_position == m_end)
0815 {
0816 fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence.");
0817 return false;
0818 }
0819 m = this->m_traits.lookup_classname(++base, m_position++);
0820 }
0821 else
0822 {
0823 m = this->m_traits.lookup_classname(m_position, m_position+1);
0824 ++m_position;
0825 }
0826 if(m != 0)
0827 {
0828 basic_char_set<charT, traits> char_set;
0829 if(negate)
0830 char_set.negate();
0831 char_set.add_class(m);
0832 if(0 == this->append_set(char_set))
0833 {
0834 fail(regex_constants::error_ctype, m_position - m_base);
0835 return false;
0836 }
0837 return true;
0838 }
0839 fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name.");
0840 return false;
0841 }
0842 case regex_constants::escape_type_reset_start_mark:
0843 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
0844 {
0845 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
0846 pb->index = -5;
0847 pb->icase = this->flags() & regbase::icase;
0848 this->m_pdata->m_data.align();
0849 ++m_position;
0850 return true;
0851 }
0852 goto escape_type_class_jump;
0853 case regex_constants::escape_type_line_ending:
0854 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
0855 {
0856 const charT* e = get_escape_R_string<charT>();
0857 const charT* old_position = m_position;
0858 const charT* old_end = m_end;
0859 const charT* old_base = m_base;
0860 m_position = e;
0861 m_base = e;
0862 m_end = e + traits::length(e);
0863 bool r = parse_all();
0864 m_position = ++old_position;
0865 m_end = old_end;
0866 m_base = old_base;
0867 return r;
0868 }
0869 goto escape_type_class_jump;
0870 case regex_constants::escape_type_extended_backref:
0871 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
0872 {
0873 bool have_brace = false;
0874 bool negative = false;
0875 static const char incomplete_message[] = "Incomplete \\g escape found.";
0876 if(++m_position == m_end)
0877 {
0878 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
0879 return false;
0880 }
0881
0882 regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position);
0883 regex_constants::syntax_type syn_end = 0;
0884 if((syn == regex_constants::syntax_open_brace)
0885 || (syn == regex_constants::escape_type_left_word)
0886 || (syn == regex_constants::escape_type_end_buffer))
0887 {
0888 if(++m_position == m_end)
0889 {
0890 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
0891 return false;
0892 }
0893 have_brace = true;
0894 switch(syn)
0895 {
0896 case regex_constants::syntax_open_brace:
0897 syn_end = regex_constants::syntax_close_brace;
0898 break;
0899 case regex_constants::escape_type_left_word:
0900 syn_end = regex_constants::escape_type_right_word;
0901 break;
0902 default:
0903 syn_end = regex_constants::escape_type_end_buffer;
0904 break;
0905 }
0906 }
0907 negative = (*m_position == static_cast<charT>('-'));
0908 if((negative) && (++m_position == m_end))
0909 {
0910 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
0911 return false;
0912 }
0913 const charT* pc = m_position;
0914 boost::intmax_t i = this->m_traits.toi(pc, m_end, 10);
0915 if((i < 0) && syn_end)
0916 {
0917
0918 const charT* base = m_position;
0919 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end))
0920 {
0921 ++m_position;
0922 }
0923 i = hash_value_from_capture_name(base, m_position);
0924 pc = m_position;
0925 }
0926 if(negative)
0927 i = 1 + (static_cast<boost::intmax_t>(m_mark_count) - i);
0928 if(((i < hash_value_mask) && (i > 0) && (this->m_backrefs.test(i))) || ((i >= hash_value_mask) && (this->m_pdata->get_id(i) > 0) && (this->m_backrefs.test(this->m_pdata->get_id(i)))))
0929 {
0930 m_position = pc;
0931 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
0932 pb->index = i;
0933 pb->icase = this->flags() & regbase::icase;
0934 }
0935 else
0936 {
0937 fail(regex_constants::error_backref, m_position - m_base);
0938 return false;
0939 }
0940 m_position = pc;
0941 if(have_brace)
0942 {
0943 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end))
0944 {
0945 fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
0946 return false;
0947 }
0948 ++m_position;
0949 }
0950 return true;
0951 }
0952 goto escape_type_class_jump;
0953 case regex_constants::escape_type_control_v:
0954 if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
0955 goto escape_type_class_jump;
0956 BOOST_FALLTHROUGH;
0957 default:
0958 this->append_literal(unescape_character());
0959 break;
0960 }
0961 return true;
0962 }
0963
0964 template <class charT, class traits>
0965 bool basic_regex_parser<charT, traits>::parse_match_any()
0966 {
0967
0968
0969
0970 ++m_position;
0971 static_cast<re_dot*>(
0972 this->append_state(syntax_element_wild, sizeof(re_dot))
0973 )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s
0974 ? BOOST_REGEX_DETAIL_NS::force_not_newline
0975 : this->flags() & regbase::mod_s ?
0976 BOOST_REGEX_DETAIL_NS::force_newline : BOOST_REGEX_DETAIL_NS::dont_care);
0977 return true;
0978 }
0979
0980 template <class charT, class traits>
0981 bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
0982 {
0983 bool greedy = true;
0984 bool possessive = false;
0985 std::size_t insert_point;
0986
0987
0988
0989 if((m_position != m_end)
0990 && (
0991 (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
0992 || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
0993 )
0994 )
0995 {
0996
0997 if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x)
0998 {
0999
1000 while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1001 ++m_position;
1002 }
1003 if((m_position != m_end) && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question))
1004 {
1005 greedy = false;
1006 ++m_position;
1007 }
1008
1009 if((m_position != m_end)
1010 && (0 == (this->flags() & regbase::main_option_type))
1011 && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
1012 {
1013 possessive = true;
1014 ++m_position;
1015 }
1016 }
1017 if(0 == this->m_last_state)
1018 {
1019 fail(regex_constants::error_badrepeat, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Nothing to repeat.");
1020 return false;
1021 }
1022 if(this->m_last_state->type == syntax_element_endmark)
1023 {
1024
1025 insert_point = this->m_paren_start;
1026 }
1027 else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))
1028 {
1029
1030 re_literal* lit = static_cast<re_literal*>(this->m_last_state);
1031 charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];
1032 lit->length -= 1;
1033
1034 lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
1035 lit->length = 1;
1036 (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;
1037 insert_point = this->getoffset(this->m_last_state);
1038 }
1039 else
1040 {
1041
1042 switch(this->m_last_state->type)
1043 {
1044 case syntax_element_start_line:
1045 case syntax_element_end_line:
1046 case syntax_element_word_boundary:
1047 case syntax_element_within_word:
1048 case syntax_element_word_start:
1049 case syntax_element_word_end:
1050 case syntax_element_buffer_start:
1051 case syntax_element_buffer_end:
1052 case syntax_element_alt:
1053 case syntax_element_soft_buffer_end:
1054 case syntax_element_restart_continue:
1055 case syntax_element_jump:
1056 case syntax_element_startmark:
1057 case syntax_element_backstep:
1058 case syntax_element_toggle_case:
1059
1060 fail(regex_constants::error_badrepeat, m_position - m_base);
1061 return false;
1062 default:
1063
1064 break;
1065 }
1066 insert_point = this->getoffset(this->m_last_state);
1067 }
1068
1069
1070
1071 re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));
1072 rep->min = low;
1073 rep->max = high;
1074 rep->greedy = greedy;
1075 rep->leading = false;
1076
1077 std::ptrdiff_t rep_off = this->getoffset(rep);
1078
1079 re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));
1080 jmp->alt.i = rep_off - this->getoffset(jmp);
1081 this->m_pdata->m_data.align();
1082
1083 rep = static_cast<re_repeat*>(this->getaddress(rep_off));
1084 rep->alt.i = this->m_pdata->m_data.size() - rep_off;
1085
1086
1087
1088
1089 if(possessive)
1090 {
1091 if(m_position != m_end)
1092 {
1093
1094
1095
1096
1097 bool contin = false;
1098 do
1099 {
1100 if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x)
1101 {
1102
1103 while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1104 ++m_position;
1105 }
1106 if (m_position != m_end)
1107 {
1108 switch (this->m_traits.syntax_type(*m_position))
1109 {
1110 case regex_constants::syntax_star:
1111 case regex_constants::syntax_plus:
1112 case regex_constants::syntax_question:
1113 case regex_constants::syntax_open_brace:
1114 fail(regex_constants::error_badrepeat, m_position - m_base);
1115 return false;
1116 case regex_constants::syntax_open_mark:
1117
1118 if ((m_position + 2 < m_end) && this->m_traits.syntax_type(*(m_position + 1)) == regex_constants::syntax_question
1119 && this->m_traits.syntax_type(*(m_position + 2)) == regex_constants::syntax_hash)
1120 {
1121 while ((m_position != m_end)
1122 && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark)) {
1123 }
1124 contin = true;
1125 }
1126 else
1127 contin = false;
1128 break;
1129 default:
1130 contin = false;
1131 }
1132 }
1133 else
1134 contin = false;
1135 } while (contin);
1136 }
1137 re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
1138 pb->index = -3;
1139 pb->icase = this->flags() & regbase::icase;
1140 jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
1141 this->m_pdata->m_data.align();
1142 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
1143 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
1144 pb->index = -3;
1145 pb->icase = this->flags() & regbase::icase;
1146 }
1147 return true;
1148 }
1149
1150 template <class charT, class traits>
1151 bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
1152 {
1153 static const char incomplete_message[] = "Missing } in quantified repetition.";
1154
1155
1156
1157 std::size_t min, max;
1158 boost::intmax_t v;
1159
1160 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1161 ++m_position;
1162 if(this->m_position == this->m_end)
1163 {
1164 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1165 {
1166 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1167 return false;
1168 }
1169
1170 --m_position;
1171 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1172 return parse_literal();
1173 }
1174
1175 v = this->m_traits.toi(m_position, m_end, 10);
1176
1177 if((v < 0) || (v > umax()))
1178 {
1179 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1180 {
1181 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1182 return false;
1183 }
1184
1185 --m_position;
1186 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1187 return parse_literal();
1188 }
1189 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1190 ++m_position;
1191 if(this->m_position == this->m_end)
1192 {
1193 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1194 {
1195 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1196 return false;
1197 }
1198
1199 --m_position;
1200 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1201 return parse_literal();
1202 }
1203 min = static_cast<std::size_t>(v);
1204
1205 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
1206 {
1207
1208 ++m_position;
1209
1210 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1211 ++m_position;
1212 if(this->m_position == this->m_end)
1213 {
1214 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1215 {
1216 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1217 return false;
1218 }
1219
1220 --m_position;
1221 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1222 return parse_literal();
1223 }
1224
1225 v = this->m_traits.toi(m_position, m_end, 10);
1226 max = ((v >= 0) && (v < umax())) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
1227 }
1228 else
1229 {
1230
1231 max = min;
1232 }
1233
1234 while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1235 ++m_position;
1236
1237 if(this->m_position == this->m_end)
1238 {
1239 if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1240 {
1241 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1242 return false;
1243 }
1244
1245 --m_position;
1246 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1247 return parse_literal();
1248 }
1249 if(isbasic)
1250 {
1251 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
1252 {
1253 ++m_position;
1254 if(this->m_position == this->m_end)
1255 {
1256 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1257 return false;
1258 }
1259 }
1260 else
1261 {
1262 fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1263 return false;
1264 }
1265 }
1266 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
1267 ++m_position;
1268 else
1269 {
1270
1271 --m_position;
1272 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1273 return parse_literal();
1274 }
1275
1276
1277
1278 if(min > max)
1279 {
1280
1281 m_position -= 2;
1282 while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position;
1283 ++m_position;
1284 fail(regex_constants::error_badbrace, m_position - m_base);
1285 return false;
1286 }
1287 return parse_repeat(min, max);
1288 }
1289
1290 template <class charT, class traits>
1291 bool basic_regex_parser<charT, traits>::parse_alt()
1292 {
1293
1294
1295
1296
1297 if(
1298 ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
1299 &&
1300 !(
1301 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
1302 &&
1303 ((this->flags() & regbase::no_empty_expressions) == 0)
1304 )
1305 )
1306 {
1307 fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression cannot start with the alternation operator |.");
1308 return false;
1309 }
1310
1311
1312
1313 if(m_max_mark < m_mark_count)
1314 m_max_mark = m_mark_count;
1315 if(m_mark_reset >= 0)
1316 m_mark_count = m_mark_reset;
1317
1318 ++m_position;
1319
1320
1321
1322 re_syntax_base* pj = this->append_state(BOOST_REGEX_DETAIL_NS::syntax_element_jump, sizeof(re_jump));
1323 std::ptrdiff_t jump_offset = this->getoffset(pj);
1324
1325
1326
1327 re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
1328 jump_offset += re_alt_size;
1329 this->m_pdata->m_data.align();
1330 palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
1331
1332
1333
1334
1335 this->m_alt_insert_point = this->m_pdata->m_data.size();
1336
1337
1338
1339
1340 if(m_has_case_change)
1341 {
1342 static_cast<re_case*>(
1343 this->append_state(syntax_element_toggle_case, sizeof(re_case))
1344 )->icase = this->m_icase;
1345 }
1346
1347
1348
1349
1350
1351
1352 m_alt_jumps.push_back(jump_offset);
1353 return true;
1354 }
1355
1356 template <class charT, class traits>
1357 bool basic_regex_parser<charT, traits>::parse_set()
1358 {
1359 static const char incomplete_message[] = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1360 ++m_position;
1361 if(m_position == m_end)
1362 {
1363 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1364 return false;
1365 }
1366 basic_char_set<charT, traits> char_set;
1367
1368 const charT* base = m_position;
1369 const charT* item_base = m_position;
1370
1371 while(m_position != m_end)
1372 {
1373 switch(this->m_traits.syntax_type(*m_position))
1374 {
1375 case regex_constants::syntax_caret:
1376 if(m_position == base)
1377 {
1378 char_set.negate();
1379 ++m_position;
1380 item_base = m_position;
1381 }
1382 else
1383 parse_set_literal(char_set);
1384 break;
1385 case regex_constants::syntax_close_set:
1386 if(m_position == item_base)
1387 {
1388 parse_set_literal(char_set);
1389 break;
1390 }
1391 else
1392 {
1393 ++m_position;
1394 if(0 == this->append_set(char_set))
1395 {
1396 fail(regex_constants::error_ctype, m_position - m_base);
1397 return false;
1398 }
1399 }
1400 return true;
1401 case regex_constants::syntax_open_set:
1402 if(parse_inner_set(char_set))
1403 break;
1404 return true;
1405 case regex_constants::syntax_escape:
1406 {
1407
1408
1409
1410
1411 ++m_position;
1412 if(this->m_traits.escape_syntax_type(*m_position)
1413 == regex_constants::escape_type_class)
1414 {
1415 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1416 if(m != 0)
1417 {
1418 char_set.add_class(m);
1419 ++m_position;
1420 break;
1421 }
1422 }
1423 else if(this->m_traits.escape_syntax_type(*m_position)
1424 == regex_constants::escape_type_not_class)
1425 {
1426
1427 char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1428 if(m != 0)
1429 {
1430 char_set.add_negated_class(m);
1431 ++m_position;
1432 break;
1433 }
1434 }
1435
1436 --m_position;
1437 parse_set_literal(char_set);
1438 break;
1439 }
1440 default:
1441 parse_set_literal(char_set);
1442 break;
1443 }
1444 }
1445 return m_position != m_end;
1446 }
1447
1448 template <class charT, class traits>
1449 bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
1450 {
1451 static const char incomplete_message[] = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1452
1453
1454
1455
1456
1457 if(m_end == ++m_position)
1458 {
1459 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1460 return false;
1461 }
1462 switch(this->m_traits.syntax_type(*m_position))
1463 {
1464 case regex_constants::syntax_dot:
1465
1466
1467
1468 --m_position;
1469 parse_set_literal(char_set);
1470 return true;
1471 case regex_constants::syntax_colon:
1472 {
1473
1474 if((this->flags() & (regbase::main_option_type | regbase::no_char_classes))
1475 == (regbase::basic_syntax_group | regbase::no_char_classes))
1476 {
1477 --m_position;
1478 parse_set_literal(char_set);
1479 return true;
1480 }
1481
1482 if(m_end == ++m_position)
1483 {
1484 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1485 return false;
1486 }
1487 const charT* name_first = m_position;
1488
1489 if(m_end == ++m_position)
1490 {
1491 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1492 return false;
1493 }
1494 while((m_position != m_end)
1495 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon))
1496 ++m_position;
1497 const charT* name_last = m_position;
1498 if(m_end == m_position)
1499 {
1500 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1501 return false;
1502 }
1503 if((m_end == ++m_position)
1504 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1505 {
1506 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1507 return false;
1508 }
1509
1510
1511
1512 bool negated = false;
1513 if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
1514 {
1515 ++name_first;
1516 negated = true;
1517 }
1518 typedef typename traits::char_class_type m_type;
1519 m_type m = this->m_traits.lookup_classname(name_first, name_last);
1520 if(m == 0)
1521 {
1522 if(char_set.empty() && (name_last - name_first == 1))
1523 {
1524
1525 ++m_position;
1526 if( (m_position != m_end)
1527 && (this->m_traits.syntax_type(*m_position)
1528 == regex_constants::syntax_close_set))
1529 {
1530 if(this->m_traits.escape_syntax_type(*name_first)
1531 == regex_constants::escape_type_left_word)
1532 {
1533 ++m_position;
1534 this->append_state(syntax_element_word_start);
1535 return false;
1536 }
1537 if(this->m_traits.escape_syntax_type(*name_first)
1538 == regex_constants::escape_type_right_word)
1539 {
1540 ++m_position;
1541 this->append_state(syntax_element_word_end);
1542 return false;
1543 }
1544 }
1545 }
1546 fail(regex_constants::error_ctype, name_first - m_base);
1547 return false;
1548 }
1549 if(!negated)
1550 char_set.add_class(m);
1551 else
1552 char_set.add_negated_class(m);
1553 ++m_position;
1554 break;
1555 }
1556 case regex_constants::syntax_equal:
1557 {
1558
1559 if(m_end == ++m_position)
1560 {
1561 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1562 return false;
1563 }
1564 const charT* name_first = m_position;
1565
1566 if(m_end == ++m_position)
1567 {
1568 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1569 return false;
1570 }
1571 while((m_position != m_end)
1572 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal))
1573 ++m_position;
1574 const charT* name_last = m_position;
1575 if(m_end == m_position)
1576 {
1577 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1578 return false;
1579 }
1580 if((m_end == ++m_position)
1581 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1582 {
1583 fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1584 return false;
1585 }
1586 string_type m = this->m_traits.lookup_collatename(name_first, name_last);
1587 if(m.empty() || (m.size() > 2))
1588 {
1589 fail(regex_constants::error_collate, name_first - m_base);
1590 return false;
1591 }
1592 digraph<charT> d;
1593 d.first = m[0];
1594 if(m.size() > 1)
1595 d.second = m[1];
1596 else
1597 d.second = 0;
1598 char_set.add_equivalent(d);
1599 ++m_position;
1600 break;
1601 }
1602 default:
1603 --m_position;
1604 parse_set_literal(char_set);
1605 break;
1606 }
1607 return true;
1608 }
1609
1610 template <class charT, class traits>
1611 void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
1612 {
1613 digraph<charT> start_range(get_next_set_literal(char_set));
1614 if(m_end == m_position)
1615 {
1616 fail(regex_constants::error_brack, m_position - m_base);
1617 return;
1618 }
1619 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1620 {
1621
1622 if(m_end == ++m_position)
1623 {
1624 fail(regex_constants::error_brack, m_position - m_base);
1625 return;
1626 }
1627 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
1628 {
1629 digraph<charT> end_range = get_next_set_literal(char_set);
1630 char_set.add_range(start_range, end_range);
1631 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1632 {
1633 if(m_end == ++m_position)
1634 {
1635 fail(regex_constants::error_brack, m_position - m_base);
1636 return;
1637 }
1638 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
1639 {
1640
1641 --m_position;
1642 return;
1643 }
1644 fail(regex_constants::error_range, m_position - m_base);
1645 return;
1646 }
1647 return;
1648 }
1649 --m_position;
1650 }
1651 char_set.add_single(start_range);
1652 }
1653
1654 template <class charT, class traits>
1655 digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
1656 {
1657 digraph<charT> result;
1658 switch(this->m_traits.syntax_type(*m_position))
1659 {
1660 case regex_constants::syntax_dash:
1661 if(!char_set.empty())
1662 {
1663
1664 if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1665 {
1666 fail(regex_constants::error_range, m_position - m_base);
1667 return result;
1668 }
1669 --m_position;
1670 }
1671 result.first = *m_position++;
1672 return result;
1673 case regex_constants::syntax_escape:
1674
1675 if(this->flags() & regex_constants::no_escape_in_lists)
1676 {
1677 result = *m_position++;
1678 break;
1679 }
1680 ++m_position;
1681 result = unescape_character();
1682 break;
1683 case regex_constants::syntax_open_set:
1684 {
1685 if(m_end == ++m_position)
1686 {
1687 fail(regex_constants::error_collate, m_position - m_base);
1688 return result;
1689 }
1690 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
1691 {
1692 --m_position;
1693 result.first = *m_position;
1694 ++m_position;
1695 return result;
1696 }
1697 if(m_end == ++m_position)
1698 {
1699 fail(regex_constants::error_collate, m_position - m_base);
1700 return result;
1701 }
1702 const charT* name_first = m_position;
1703
1704 if(m_end == ++m_position)
1705 {
1706 fail(regex_constants::error_collate, name_first - m_base);
1707 return result;
1708 }
1709 while((m_position != m_end)
1710 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot))
1711 ++m_position;
1712 const charT* name_last = m_position;
1713 if(m_end == m_position)
1714 {
1715 fail(regex_constants::error_collate, name_first - m_base);
1716 return result;
1717 }
1718 if((m_end == ++m_position)
1719 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1720 {
1721 fail(regex_constants::error_collate, name_first - m_base);
1722 return result;
1723 }
1724 ++m_position;
1725 string_type s = this->m_traits.lookup_collatename(name_first, name_last);
1726 if(s.empty() || (s.size() > 2))
1727 {
1728 fail(regex_constants::error_collate, name_first - m_base);
1729 return result;
1730 }
1731 result.first = s[0];
1732 if(s.size() > 1)
1733 result.second = s[1];
1734 else
1735 result.second = 0;
1736 return result;
1737 }
1738 default:
1739 result = *m_position++;
1740 }
1741 return result;
1742 }
1743
1744
1745
1746
1747 template <class charT>
1748 bool valid_value(charT, boost::intmax_t v, const mpl::true_&)
1749 {
1750 return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
1751 }
1752 template <class charT>
1753 bool valid_value(charT, boost::intmax_t, const mpl::false_&)
1754 {
1755 return true;
1756 }
1757 template <class charT>
1758 bool valid_value(charT c, boost::intmax_t v)
1759 {
1760 return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(boost::intmax_t))>());
1761 }
1762
1763 template <class charT, class traits>
1764 charT basic_regex_parser<charT, traits>::unescape_character()
1765 {
1766 #ifdef BOOST_MSVC
1767 #pragma warning(push)
1768 #pragma warning(disable:4127)
1769 #endif
1770 charT result(0);
1771 if(m_position == m_end)
1772 {
1773 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely.");
1774 return false;
1775 }
1776 switch(this->m_traits.escape_syntax_type(*m_position))
1777 {
1778 case regex_constants::escape_type_control_a:
1779 result = charT('\a');
1780 break;
1781 case regex_constants::escape_type_e:
1782 result = charT(27);
1783 break;
1784 case regex_constants::escape_type_control_f:
1785 result = charT('\f');
1786 break;
1787 case regex_constants::escape_type_control_n:
1788 result = charT('\n');
1789 break;
1790 case regex_constants::escape_type_control_r:
1791 result = charT('\r');
1792 break;
1793 case regex_constants::escape_type_control_t:
1794 result = charT('\t');
1795 break;
1796 case regex_constants::escape_type_control_v:
1797 result = charT('\v');
1798 break;
1799 case regex_constants::escape_type_word_assert:
1800 result = charT('\b');
1801 break;
1802 case regex_constants::escape_type_ascii_control:
1803 ++m_position;
1804 if(m_position == m_end)
1805 {
1806
1807 --m_position;
1808 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1809 fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely.");
1810 return result;
1811 }
1812 result = static_cast<charT>(*m_position % 32);
1813 break;
1814 case regex_constants::escape_type_hex:
1815 ++m_position;
1816 if(m_position == m_end)
1817 {
1818
1819 --m_position;
1820 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1821 fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely.");
1822 return result;
1823 }
1824
1825 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1826 {
1827 ++m_position;
1828 if(m_position == m_end)
1829 {
1830
1831 --m_position;
1832 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1833 fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
1834 return result;
1835 }
1836 boost::intmax_t i = this->m_traits.toi(m_position, m_end, 16);
1837 if((m_position == m_end)
1838 || (i < 0)
1839 || ((std::numeric_limits<charT>::is_specialized) && (i > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
1840 || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1841 {
1842
1843 --m_position;
1844 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1845 fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid.");
1846 return result;
1847 }
1848 ++m_position;
1849 result = charT(i);
1850 }
1851 else
1852 {
1853 std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
1854 boost::intmax_t i = this->m_traits.toi(m_position, m_position + len, 16);
1855 if((i < 0)
1856 || !valid_value(charT(0), i))
1857 {
1858
1859 --m_position;
1860 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1861 fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character.");
1862 return result;
1863 }
1864 result = charT(i);
1865 }
1866 return result;
1867 case regex_constants::syntax_digit:
1868 {
1869
1870
1871 std::ptrdiff_t len = (std::min)(::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
1872 const charT* bp = m_position;
1873 boost::intmax_t val = this->m_traits.toi(bp, bp + 1, 8);
1874 if(val != 0)
1875 {
1876
1877 --m_position;
1878 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1879
1880 fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence.");
1881 return result;
1882 }
1883 val = this->m_traits.toi(m_position, m_position + len, 8);
1884 if((val < 0) || (val > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
1885 {
1886
1887 --m_position;
1888 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1889 fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid.");
1890 return result;
1891 }
1892 return static_cast<charT>(val);
1893 }
1894 case regex_constants::escape_type_named_char:
1895 {
1896 ++m_position;
1897 if(m_position == m_end)
1898 {
1899
1900 --m_position;
1901 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1902 fail(regex_constants::error_escape, m_position - m_base);
1903 return false;
1904 }
1905
1906 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1907 {
1908 const charT* base = m_position;
1909
1910 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1911 ++m_position;
1912 if(m_position == m_end)
1913 {
1914
1915 --m_position;
1916 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1917 fail(regex_constants::error_escape, m_position - m_base);
1918 return false;
1919 }
1920 string_type s = this->m_traits.lookup_collatename(++base, m_position++);
1921 if(s.empty())
1922 {
1923
1924 --m_position;
1925 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1926 fail(regex_constants::error_collate, m_position - m_base);
1927 return false;
1928 }
1929 if(s.size() == 1)
1930 {
1931 return s[0];
1932 }
1933 }
1934
1935
1936 --m_position;
1937 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1938 fail(regex_constants::error_escape, m_position - m_base);
1939 return false;
1940 }
1941 default:
1942 result = *m_position;
1943 break;
1944 }
1945 ++m_position;
1946 return result;
1947 #ifdef BOOST_MSVC
1948 #pragma warning(pop)
1949 #endif
1950 }
1951
1952 template <class charT, class traits>
1953 bool basic_regex_parser<charT, traits>::parse_backref()
1954 {
1955 BOOST_REGEX_ASSERT(m_position != m_end);
1956 const charT* pc = m_position;
1957 boost::intmax_t i = this->m_traits.toi(pc, pc + 1, 10);
1958 if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
1959 {
1960
1961 charT c = unescape_character();
1962 this->append_literal(c);
1963 }
1964 else if((i > 0) && (this->m_backrefs.test(i)))
1965 {
1966 m_position = pc;
1967 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
1968 pb->index = i;
1969 pb->icase = this->flags() & regbase::icase;
1970 }
1971 else
1972 {
1973
1974 --m_position;
1975 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1976 fail(regex_constants::error_backref, m_position - m_base);
1977 return false;
1978 }
1979 return true;
1980 }
1981
1982 template <class charT, class traits>
1983 bool basic_regex_parser<charT, traits>::parse_QE()
1984 {
1985 #ifdef BOOST_MSVC
1986 #pragma warning(push)
1987 #pragma warning(disable:4127)
1988 #endif
1989
1990
1991
1992 ++m_position;
1993 const charT* start = m_position;
1994 const charT* end;
1995 do
1996 {
1997 while((m_position != m_end)
1998 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
1999 ++m_position;
2000 if(m_position == m_end)
2001 {
2002
2003 end = m_position;
2004 break;
2005 }
2006 if(++m_position == m_end)
2007 {
2008 fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence.");
2009 return false;
2010 }
2011
2012 if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
2013 {
2014 ++m_position;
2015 end = m_position - 2;
2016 break;
2017 }
2018
2019 }while(true);
2020
2021
2022
2023 while(start != end)
2024 {
2025 this->append_literal(*start);
2026 ++start;
2027 }
2028 return true;
2029 #ifdef BOOST_MSVC
2030 #pragma warning(pop)
2031 #endif
2032 }
2033
2034 template <class charT, class traits>
2035 bool basic_regex_parser<charT, traits>::parse_perl_extension()
2036 {
2037 if(++m_position == m_end)
2038 {
2039
2040 --m_position;
2041 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2042 fail(regex_constants::error_perl_extension, m_position - m_base);
2043 return false;
2044 }
2045
2046
2047
2048
2049
2050 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
2051 {
2052 while((m_position != m_end)
2053 && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
2054 {}
2055 return true;
2056 }
2057
2058
2059
2060 int markid = 0;
2061 std::ptrdiff_t jump_offset = 0;
2062 re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
2063 pb->icase = this->flags() & regbase::icase;
2064 std::ptrdiff_t last_paren_start = this->getoffset(pb);
2065
2066 std::ptrdiff_t last_alt_point = m_alt_insert_point;
2067 this->m_pdata->m_data.align();
2068 m_alt_insert_point = this->m_pdata->m_data.size();
2069 std::ptrdiff_t expected_alt_point = m_alt_insert_point;
2070 bool restore_flags = true;
2071 regex_constants::syntax_option_type old_flags = this->flags();
2072 bool old_case_change = m_has_case_change;
2073 m_has_case_change = false;
2074 charT name_delim;
2075 int mark_reset = m_mark_reset;
2076 int max_mark = m_max_mark;
2077 m_mark_reset = -1;
2078 m_max_mark = m_mark_count;
2079 boost::intmax_t v;
2080
2081
2082
2083 switch(this->m_traits.syntax_type(*m_position))
2084 {
2085 case regex_constants::syntax_or:
2086 m_mark_reset = m_mark_count;
2087 BOOST_FALLTHROUGH;
2088 case regex_constants::syntax_colon:
2089
2090
2091
2092 pb->index = markid = 0;
2093 ++m_position;
2094 break;
2095 case regex_constants::syntax_digit:
2096 {
2097
2098
2099
2100 v = this->m_traits.toi(m_position, m_end, 10);
2101 if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2102 {
2103
2104 --m_position;
2105 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2106 fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated.");
2107 return false;
2108 }
2109 insert_recursion:
2110 pb->index = markid = 0;
2111 re_recurse* pr = static_cast<re_recurse*>(this->append_state(syntax_element_recurse, sizeof(re_recurse)));
2112 pr->alt.i = v;
2113 pr->state_id = 0;
2114 static_cast<re_case*>(
2115 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2116 )->icase = this->flags() & regbase::icase;
2117 break;
2118 }
2119 case regex_constants::syntax_plus:
2120
2121
2122
2123 ++m_position;
2124 v = this->m_traits.toi(m_position, m_end, 10);
2125 if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2126 {
2127
2128 --m_position;
2129 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2130 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2131 return false;
2132 }
2133 if ((std::numeric_limits<boost::intmax_t>::max)() - m_mark_count < v)
2134 {
2135 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2136 return false;
2137 }
2138 v += m_mark_count;
2139 goto insert_recursion;
2140 case regex_constants::syntax_dash:
2141
2142
2143
2144 ++m_position;
2145 v = this->m_traits.toi(m_position, m_end, 10);
2146 if(v <= 0)
2147 {
2148 --m_position;
2149
2150 goto option_group_jump;
2151 }
2152 v = static_cast<boost::intmax_t>(m_mark_count) + 1 - v;
2153 if(v <= 0)
2154 {
2155
2156 --m_position;
2157 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2158 fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2159 return false;
2160 }
2161 goto insert_recursion;
2162 case regex_constants::syntax_equal:
2163 pb->index = markid = -1;
2164 ++m_position;
2165 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2166 this->m_pdata->m_data.align();
2167 m_alt_insert_point = this->m_pdata->m_data.size();
2168 break;
2169 case regex_constants::syntax_not:
2170 pb->index = markid = -2;
2171 ++m_position;
2172 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2173 this->m_pdata->m_data.align();
2174 m_alt_insert_point = this->m_pdata->m_data.size();
2175 break;
2176 case regex_constants::escape_type_left_word:
2177 {
2178
2179 if(++m_position == m_end)
2180 {
2181
2182 --m_position;
2183 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2184 fail(regex_constants::error_perl_extension, m_position - m_base);
2185 return false;
2186 }
2187 regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
2188 if(t == regex_constants::syntax_not)
2189 pb->index = markid = -2;
2190 else if(t == regex_constants::syntax_equal)
2191 pb->index = markid = -1;
2192 else
2193 {
2194
2195 name_delim = '>';
2196 --m_position;
2197 goto named_capture_jump;
2198 }
2199 ++m_position;
2200 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2201 this->append_state(syntax_element_backstep, sizeof(re_brace));
2202 this->m_pdata->m_data.align();
2203 m_alt_insert_point = this->m_pdata->m_data.size();
2204 break;
2205 }
2206 case regex_constants::escape_type_right_word:
2207
2208
2209
2210 pb->index = markid = -3;
2211 ++m_position;
2212 jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2213 this->m_pdata->m_data.align();
2214 m_alt_insert_point = this->m_pdata->m_data.size();
2215 break;
2216 case regex_constants::syntax_open_mark:
2217 {
2218
2219 pb->index = markid = -4;
2220 if(++m_position == m_end)
2221 {
2222
2223 --m_position;
2224 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2225 fail(regex_constants::error_perl_extension, m_position - m_base);
2226 return false;
2227 }
2228 v = this->m_traits.toi(m_position, m_end, 10);
2229 if(m_position == m_end)
2230 {
2231
2232 --m_position;
2233 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2234 fail(regex_constants::error_perl_extension, m_position - m_base);
2235 return false;
2236 }
2237 if(*m_position == charT('R'))
2238 {
2239 if(++m_position == m_end)
2240 {
2241
2242 --m_position;
2243 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2244 fail(regex_constants::error_perl_extension, m_position - m_base);
2245 return false;
2246 }
2247 if(*m_position == charT('&'))
2248 {
2249 const charT* base = ++m_position;
2250 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2251 ++m_position;
2252 if(m_position == m_end)
2253 {
2254
2255 --m_position;
2256 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2257 fail(regex_constants::error_perl_extension, m_position - m_base);
2258 return false;
2259 }
2260 v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
2261 }
2262 else
2263 {
2264 v = -this->m_traits.toi(m_position, m_end, 10);
2265 }
2266 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2267 br->index = v < 0 ? (v - 1) : 0;
2268 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2269 {
2270
2271 --m_position;
2272 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2273 fail(regex_constants::error_perl_extension, m_position - m_base);
2274 return false;
2275 }
2276 if(++m_position == m_end)
2277 {
2278
2279 --m_position;
2280 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2281 fail(regex_constants::error_perl_extension, m_position - m_base);
2282 return false;
2283 }
2284 }
2285 else if((*m_position == charT('\'')) || (*m_position == charT('<')))
2286 {
2287 const charT* base = ++m_position;
2288 while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\'')))
2289 ++m_position;
2290 if(m_position == m_end)
2291 {
2292
2293 --m_position;
2294 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2295 fail(regex_constants::error_perl_extension, m_position - m_base);
2296 return false;
2297 }
2298 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2299 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2300 br->index = v;
2301 if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end))
2302 {
2303
2304 --m_position;
2305 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2306 fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture.");
2307 return false;
2308 }
2309 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2310 {
2311
2312 --m_position;
2313 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2314 fail(regex_constants::error_perl_extension, m_position - m_base);
2315 return false;
2316 }
2317 if(++m_position == m_end)
2318 {
2319
2320 --m_position;
2321 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2322 fail(regex_constants::error_perl_extension, m_position - m_base);
2323 return false;
2324 }
2325 }
2326 else if(*m_position == charT('D'))
2327 {
2328 const char* def = "DEFINE";
2329 while(*def && (m_position != m_end) && (*m_position == charT(*def)))
2330 ++m_position, ++def;
2331 if((m_position == m_end) || *def)
2332 {
2333
2334 --m_position;
2335 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2336 fail(regex_constants::error_perl_extension, m_position - m_base);
2337 return false;
2338 }
2339 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2340 br->index = 9999;
2341 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2342 {
2343
2344 --m_position;
2345 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2346 fail(regex_constants::error_perl_extension, m_position - m_base);
2347 return false;
2348 }
2349 if(++m_position == m_end)
2350 {
2351
2352 --m_position;
2353 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2354 fail(regex_constants::error_perl_extension, m_position - m_base);
2355 return false;
2356 }
2357 }
2358 else if(v > 0)
2359 {
2360 re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2361 br->index = v;
2362 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2363 {
2364
2365 --m_position;
2366 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2367 fail(regex_constants::error_perl_extension, m_position - m_base);
2368 return false;
2369 }
2370 if(++m_position == m_end)
2371 {
2372
2373 --m_position;
2374 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2375 fail(regex_constants::error_perl_extension, m_position - m_base);
2376 return false;
2377 }
2378 }
2379 else
2380 {
2381
2382 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
2383 {
2384
2385 --m_position;
2386 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2387 fail(regex_constants::error_perl_extension, m_position - m_base);
2388 return false;
2389 }
2390 if(++m_position == m_end)
2391 {
2392
2393 --m_position;
2394 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2395 fail(regex_constants::error_perl_extension, m_position - m_base);
2396 return false;
2397 }
2398 if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
2399 {
2400 if(++m_position == m_end)
2401 {
2402
2403 --m_position;
2404 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2405 fail(regex_constants::error_perl_extension, m_position - m_base);
2406 return false;
2407 }
2408 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2409 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2410 {
2411
2412 --m_position;
2413 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2414 fail(regex_constants::error_perl_extension, m_position - m_base);
2415 return false;
2416 }
2417 m_position -= 3;
2418 }
2419 else
2420 {
2421 if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2422 && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2423 {
2424
2425 --m_position;
2426 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2427 fail(regex_constants::error_perl_extension, m_position - m_base);
2428 return false;
2429 }
2430 m_position -= 2;
2431 }
2432 }
2433 break;
2434 }
2435 case regex_constants::syntax_close_mark:
2436
2437 --m_position;
2438 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2439 fail(regex_constants::error_perl_extension, m_position - m_base);
2440 return false;
2441 case regex_constants::escape_type_end_buffer:
2442 {
2443 name_delim = *m_position;
2444 named_capture_jump:
2445 markid = 0;
2446 if(0 == (this->flags() & regbase::nosubs))
2447 {
2448 markid = ++m_mark_count;
2449 #ifndef BOOST_NO_STD_DISTANCE
2450 if(this->flags() & regbase::save_subexpression_location)
2451 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0));
2452 #else
2453 if(this->flags() & regbase::save_subexpression_location)
2454 this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 2, 0));
2455 #endif
2456 }
2457 pb->index = markid;
2458 const charT* base = ++m_position;
2459 if(m_position == m_end)
2460 {
2461
2462 --m_position;
2463 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2464 fail(regex_constants::error_perl_extension, m_position - m_base);
2465 return false;
2466 }
2467 while((m_position != m_end) && (*m_position != name_delim))
2468 ++m_position;
2469 if(m_position == m_end)
2470 {
2471
2472 --m_position;
2473 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2474 fail(regex_constants::error_perl_extension, m_position - m_base);
2475 return false;
2476 }
2477 this->m_pdata->set_name(base, m_position, markid);
2478 ++m_position;
2479 break;
2480 }
2481 default:
2482 if(*m_position == charT('R'))
2483 {
2484 ++m_position;
2485 v = 0;
2486 if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2487 {
2488
2489 --m_position;
2490 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2491 fail(regex_constants::error_perl_extension, m_position - m_base);
2492 return false;
2493 }
2494 goto insert_recursion;
2495 }
2496 if(*m_position == charT('&'))
2497 {
2498 ++m_position;
2499 const charT* base = m_position;
2500 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2501 ++m_position;
2502 if(m_position == m_end)
2503 {
2504
2505 --m_position;
2506 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2507 fail(regex_constants::error_perl_extension, m_position - m_base);
2508 return false;
2509 }
2510 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2511 goto insert_recursion;
2512 }
2513 if(*m_position == charT('P'))
2514 {
2515 ++m_position;
2516 if(m_position == m_end)
2517 {
2518
2519 --m_position;
2520 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2521 fail(regex_constants::error_perl_extension, m_position - m_base);
2522 return false;
2523 }
2524 if(*m_position == charT('>'))
2525 {
2526 ++m_position;
2527 const charT* base = m_position;
2528 while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2529 ++m_position;
2530 if(m_position == m_end)
2531 {
2532
2533 --m_position;
2534 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2535 fail(regex_constants::error_perl_extension, m_position - m_base);
2536 return false;
2537 }
2538 v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2539 goto insert_recursion;
2540 }
2541 }
2542
2543
2544
2545 option_group_jump:
2546 regex_constants::syntax_option_type opts = parse_options();
2547 if(m_position == m_end)
2548 {
2549
2550 --m_position;
2551 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2552 fail(regex_constants::error_perl_extension, m_position - m_base);
2553 return false;
2554 }
2555
2556 m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
2557 pb->index = markid = 0;
2558 if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
2559 {
2560
2561 this->flags(opts);
2562 restore_flags = false;
2563 old_case_change |= m_has_case_change;
2564 }
2565 else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
2566 {
2567
2568 this->flags(opts);
2569 ++m_position;
2570 }
2571 else
2572 {
2573
2574 --m_position;
2575 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2576 fail(regex_constants::error_perl_extension, m_position - m_base);
2577 return false;
2578 }
2579
2580
2581 if(m_has_case_change)
2582 {
2583 static_cast<re_case*>(
2584 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2585 )->icase = opts & regbase::icase;
2586 }
2587
2588 }
2589
2590
2591
2592
2593 parse_all();
2594
2595
2596
2597 if(0 == unwind_alts(last_paren_start))
2598 {
2599
2600 --m_position;
2601 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2602 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block.");
2603 return false;
2604 }
2605
2606
2607
2608 if(m_position == m_end)
2609 {
2610
2611 --m_position;
2612 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2613 this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
2614 return false;
2615 }
2616 BOOST_REGEX_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
2617 ++m_position;
2618
2619
2620
2621 if(restore_flags)
2622 {
2623
2624 if(m_has_case_change)
2625 {
2626 static_cast<re_case*>(
2627 this->append_state(syntax_element_toggle_case, sizeof(re_case))
2628 )->icase = old_flags & regbase::icase;
2629 }
2630 this->flags(old_flags);
2631 }
2632
2633
2634
2635 if(jump_offset)
2636 {
2637 this->m_pdata->m_data.align();
2638 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
2639 jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
2640 if((this->m_last_state == jmp) && (markid != -2))
2641 {
2642
2643
2644
2645
2646 --m_position;
2647 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2648 fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion.");
2649 return false;
2650 }
2651 }
2652
2653
2654
2655
2656 if(markid == -4)
2657 {
2658 re_syntax_base* b = this->getaddress(expected_alt_point);
2659
2660 if(b->type != syntax_element_alt)
2661 {
2662 re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
2663 alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
2664 }
2665 else if(((std::ptrdiff_t)this->m_pdata->m_data.size() > (static_cast<re_alt*>(b)->alt.i + this->getoffset(b))) && (static_cast<re_alt*>(b)->alt.i > 0) && this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
2666 {
2667
2668
2669 --m_position;
2670 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2671 fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression.");
2672 return false;
2673 }
2674 else
2675 {
2676
2677 b = this->getaddress(b->next.i, b);
2678 if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999))
2679 {
2680
2681 --m_position;
2682 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2683 fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block.");
2684 return false;
2685 }
2686 }
2687
2688 b = this->getaddress(expected_alt_point);
2689 b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
2690 if((b->type != syntax_element_assert_backref)
2691 && (b->type != syntax_element_startmark))
2692 {
2693
2694 --m_position;
2695 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2696 fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion.");
2697 return false;
2698 }
2699 }
2700
2701
2702
2703 pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
2704 pb->index = markid;
2705 pb->icase = this->flags() & regbase::icase;
2706 this->m_paren_start = last_paren_start;
2707
2708
2709
2710 this->m_alt_insert_point = last_alt_point;
2711
2712
2713
2714 m_has_case_change = old_case_change;
2715
2716
2717
2718 if(m_max_mark > m_mark_count)
2719 {
2720 m_mark_count = m_max_mark;
2721 }
2722 m_mark_reset = mark_reset;
2723 m_max_mark = max_mark;
2724
2725
2726 if(markid > 0)
2727 {
2728 #ifndef BOOST_NO_STD_DISTANCE
2729 if(this->flags() & regbase::save_subexpression_location)
2730 this->m_pdata->m_subs.at((std::size_t)markid - 1).second = std::distance(m_base, m_position) - 1;
2731 #else
2732 if(this->flags() & regbase::save_subexpression_location)
2733 this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1;
2734 #endif
2735
2736
2737
2738 this->m_backrefs.set(markid);
2739 }
2740 return true;
2741 }
2742
2743 template <class charT, class traits>
2744 bool basic_regex_parser<charT, traits>::match_verb(const char* verb)
2745 {
2746 while(*verb)
2747 {
2748 if(static_cast<charT>(*verb) != *m_position)
2749 {
2750 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2751 fail(regex_constants::error_perl_extension, m_position - m_base);
2752 return false;
2753 }
2754 if(++m_position == m_end)
2755 {
2756 --m_position;
2757 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2758 fail(regex_constants::error_perl_extension, m_position - m_base);
2759 return false;
2760 }
2761 ++verb;
2762 }
2763 return true;
2764 }
2765
2766 #ifdef BOOST_MSVC
2767 # pragma warning(push)
2768 #if BOOST_MSVC >= 1800
2769 #pragma warning(disable:26812)
2770 #endif
2771 #endif
2772 template <class charT, class traits>
2773 bool basic_regex_parser<charT, traits>::parse_perl_verb()
2774 {
2775 if(++m_position == m_end)
2776 {
2777
2778 --m_position;
2779 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2780 fail(regex_constants::error_perl_extension, m_position - m_base);
2781 return false;
2782 }
2783 switch(*m_position)
2784 {
2785 case 'F':
2786 if(++m_position == m_end)
2787 {
2788
2789 --m_position;
2790 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2791 fail(regex_constants::error_perl_extension, m_position - m_base);
2792 return false;
2793 }
2794 if((this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) || match_verb("AIL"))
2795 {
2796 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2797 {
2798
2799 --m_position;
2800 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2801 fail(regex_constants::error_perl_extension, m_position - m_base);
2802 return false;
2803 }
2804 ++m_position;
2805 this->append_state(syntax_element_fail);
2806 return true;
2807 }
2808 break;
2809 case 'A':
2810 if(++m_position == m_end)
2811 {
2812
2813 --m_position;
2814 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2815 fail(regex_constants::error_perl_extension, m_position - m_base);
2816 return false;
2817 }
2818 if(match_verb("CCEPT"))
2819 {
2820 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2821 {
2822
2823 --m_position;
2824 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2825 fail(regex_constants::error_perl_extension, m_position - m_base);
2826 return false;
2827 }
2828 ++m_position;
2829 this->append_state(syntax_element_accept);
2830 return true;
2831 }
2832 break;
2833 case 'C':
2834 if(++m_position == m_end)
2835 {
2836
2837 --m_position;
2838 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2839 fail(regex_constants::error_perl_extension, m_position - m_base);
2840 return false;
2841 }
2842 if(match_verb("OMMIT"))
2843 {
2844 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2845 {
2846
2847 --m_position;
2848 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2849 fail(regex_constants::error_perl_extension, m_position - m_base);
2850 return false;
2851 }
2852 ++m_position;
2853 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_commit;
2854 this->m_pdata->m_disable_match_any = true;
2855 return true;
2856 }
2857 break;
2858 case 'P':
2859 if(++m_position == m_end)
2860 {
2861
2862 --m_position;
2863 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2864 fail(regex_constants::error_perl_extension, m_position - m_base);
2865 return false;
2866 }
2867 if(match_verb("RUNE"))
2868 {
2869 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2870 {
2871
2872 --m_position;
2873 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2874 fail(regex_constants::error_perl_extension, m_position - m_base);
2875 return false;
2876 }
2877 ++m_position;
2878 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_prune;
2879 this->m_pdata->m_disable_match_any = true;
2880 return true;
2881 }
2882 break;
2883 case 'S':
2884 if(++m_position == m_end)
2885 {
2886
2887 --m_position;
2888 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2889 fail(regex_constants::error_perl_extension, m_position - m_base);
2890 return false;
2891 }
2892 if(match_verb("KIP"))
2893 {
2894 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2895 {
2896
2897 --m_position;
2898 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2899 fail(regex_constants::error_perl_extension, m_position - m_base);
2900 return false;
2901 }
2902 ++m_position;
2903 static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_skip;
2904 this->m_pdata->m_disable_match_any = true;
2905 return true;
2906 }
2907 break;
2908 case 'T':
2909 if(++m_position == m_end)
2910 {
2911
2912 --m_position;
2913 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2914 fail(regex_constants::error_perl_extension, m_position - m_base);
2915 return false;
2916 }
2917 if(match_verb("HEN"))
2918 {
2919 if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2920 {
2921
2922 --m_position;
2923 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2924 fail(regex_constants::error_perl_extension, m_position - m_base);
2925 return false;
2926 }
2927 ++m_position;
2928 this->append_state(syntax_element_then);
2929 this->m_pdata->m_disable_match_any = true;
2930 return true;
2931 }
2932 break;
2933 }
2934
2935 --m_position;
2936 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2937 fail(regex_constants::error_perl_extension, m_position - m_base);
2938 return false;
2939 }
2940 #ifdef BOOST_MSVC
2941 # pragma warning(pop)
2942 #endif
2943
2944 template <class charT, class traits>
2945 bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
2946 {
2947
2948
2949
2950 if(++m_position == m_end)
2951 {
2952
2953 --m_position;
2954 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
2955 fail(regex_constants::error_escape, m_position - m_base);
2956 return false;
2957 }
2958 basic_char_set<charT, traits> char_set;
2959 if(negate)
2960 char_set.negate();
2961
2962 static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };
2963
2964 switch(*m_position)
2965 {
2966 case 's':
2967 case ' ':
2968 char_set.add_class(this->m_mask_space);
2969 break;
2970 case 'w':
2971 char_set.add_class(this->m_word_mask);
2972 break;
2973 case '_':
2974 char_set.add_single(digraph<charT>(charT('$')));
2975 char_set.add_single(digraph<charT>(charT('&')));
2976 char_set.add_single(digraph<charT>(charT('*')));
2977 char_set.add_single(digraph<charT>(charT('+')));
2978 char_set.add_single(digraph<charT>(charT('-')));
2979 char_set.add_single(digraph<charT>(charT('_')));
2980 char_set.add_single(digraph<charT>(charT('<')));
2981 char_set.add_single(digraph<charT>(charT('>')));
2982 break;
2983 case '.':
2984 char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
2985 break;
2986 case '(':
2987 char_set.add_single(digraph<charT>(charT('(')));
2988 char_set.add_single(digraph<charT>(charT('[')));
2989 char_set.add_single(digraph<charT>(charT('{')));
2990 break;
2991 case ')':
2992 char_set.add_single(digraph<charT>(charT(')')));
2993 char_set.add_single(digraph<charT>(charT(']')));
2994 char_set.add_single(digraph<charT>(charT('}')));
2995 break;
2996 case '"':
2997 char_set.add_single(digraph<charT>(charT('"')));
2998 char_set.add_single(digraph<charT>(charT('\'')));
2999 char_set.add_single(digraph<charT>(charT('`')));
3000 break;
3001 case '\'':
3002 char_set.add_single(digraph<charT>(charT('\'')));
3003 char_set.add_single(digraph<charT>(charT(',')));
3004 char_set.add_single(digraph<charT>(charT('#')));
3005 break;
3006 case '<':
3007 char_set.add_single(digraph<charT>(charT(';')));
3008 break;
3009 case '>':
3010 char_set.add_single(digraph<charT>(charT('\n')));
3011 char_set.add_single(digraph<charT>(charT('\f')));
3012 break;
3013 default:
3014 fail(regex_constants::error_ctype, m_position - m_base);
3015 return false;
3016 }
3017 if(0 == this->append_set(char_set))
3018 {
3019 fail(regex_constants::error_ctype, m_position - m_base);
3020 return false;
3021 }
3022 ++m_position;
3023 return true;
3024 }
3025
3026 template <class charT, class traits>
3027 regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
3028 {
3029
3030 regex_constants::syntax_option_type f = this->flags();
3031 bool breakout = false;
3032 do
3033 {
3034 switch(*m_position)
3035 {
3036 case 's':
3037 f |= regex_constants::mod_s;
3038 f &= ~regex_constants::no_mod_s;
3039 break;
3040 case 'm':
3041 f &= ~regex_constants::no_mod_m;
3042 break;
3043 case 'i':
3044 f |= regex_constants::icase;
3045 break;
3046 case 'x':
3047 f |= regex_constants::mod_x;
3048 break;
3049 default:
3050 breakout = true;
3051 continue;
3052 }
3053 if(++m_position == m_end)
3054 {
3055
3056 --m_position;
3057 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3058 fail(regex_constants::error_paren, m_position - m_base);
3059 return false;
3060 }
3061 }
3062 while(!breakout);
3063
3064 breakout = false;
3065
3066 if(*m_position == static_cast<charT>('-'))
3067 {
3068 if(++m_position == m_end)
3069 {
3070
3071 --m_position;
3072 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3073 fail(regex_constants::error_paren, m_position - m_base);
3074 return false;
3075 }
3076 do
3077 {
3078 switch(*m_position)
3079 {
3080 case 's':
3081 f &= ~regex_constants::mod_s;
3082 f |= regex_constants::no_mod_s;
3083 break;
3084 case 'm':
3085 f |= regex_constants::no_mod_m;
3086 break;
3087 case 'i':
3088 f &= ~regex_constants::icase;
3089 break;
3090 case 'x':
3091 f &= ~regex_constants::mod_x;
3092 break;
3093 default:
3094 breakout = true;
3095 continue;
3096 }
3097 if(++m_position == m_end)
3098 {
3099
3100 --m_position;
3101 while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3102 fail(regex_constants::error_paren, m_position - m_base);
3103 return false;
3104 }
3105 }
3106 while(!breakout);
3107 }
3108 return f;
3109 }
3110
3111 template <class charT, class traits>
3112 bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start)
3113 {
3114
3115
3116
3117
3118 if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
3119 && (!m_alt_jumps.empty()) && (m_alt_jumps.back() > last_paren_start)
3120 &&
3121 !(
3122 ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
3123 &&
3124 ((this->flags() & regbase::no_empty_expressions) == 0)
3125 )
3126 )
3127 {
3128 fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |.");
3129 return false;
3130 }
3131
3132
3133
3134 while((!m_alt_jumps.empty()) && (m_alt_jumps.back() > last_paren_start))
3135 {
3136
3137
3138
3139
3140 std::ptrdiff_t jump_offset = m_alt_jumps.back();
3141 m_alt_jumps.pop_back();
3142 this->m_pdata->m_data.align();
3143 re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
3144 if (jmp->type != syntax_element_jump)
3145 {
3146
3147
3148 fail(regex_constants::error_unknown, this->m_position - this->m_base, "Internal logic failed while compiling the expression, probably you added a repeat to something non-repeatable!");
3149 return false;
3150 }
3151 jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
3152 }
3153 return true;
3154 }
3155
3156 #ifdef BOOST_MSVC
3157 #pragma warning(pop)
3158 #endif
3159
3160 }
3161 }
3162
3163 #ifdef BOOST_MSVC
3164 #pragma warning(push)
3165 #pragma warning(disable: 4103)
3166 #endif
3167 #ifdef BOOST_HAS_ABI_HEADERS
3168 # include BOOST_ABI_SUFFIX
3169 #endif
3170 #ifdef BOOST_MSVC
3171 #pragma warning(pop)
3172 #endif
3173
3174 #endif