File indexing completed on 2025-01-18 09:51:24
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #ifndef BOOST_REGEX_MATCHER_HPP
0013 #define BOOST_REGEX_MATCHER_HPP
0014
0015 #include <boost/regex/v4/iterator_category.hpp>
0016
0017 #ifdef BOOST_MSVC
0018 #pragma warning(push)
0019 #pragma warning(disable: 4103)
0020 #endif
0021 #ifdef BOOST_HAS_ABI_HEADERS
0022 # include BOOST_ABI_PREFIX
0023 #endif
0024 #ifdef BOOST_MSVC
0025 #pragma warning(pop)
0026 #endif
0027
0028 #ifdef BOOST_MSVC
0029 # pragma warning(push)
0030 #pragma warning(disable : 4251)
0031 #if BOOST_MSVC < 1700
0032 # pragma warning(disable : 4231)
0033 #endif
0034 # if BOOST_MSVC < 1600
0035 # pragma warning(disable : 4660)
0036 # endif
0037 #if BOOST_MSVC < 1910
0038 #pragma warning(disable:4800)
0039 #endif
0040 #endif
0041
0042 namespace boost{
0043 namespace BOOST_REGEX_DETAIL_NS{
0044
0045
0046
0047
0048 inline void BOOST_REGEX_CALL verify_options(boost::regex_constants::syntax_option_type, match_flag_type mf)
0049 {
0050
0051
0052
0053 if ((mf & match_extra) && (mf & match_posix))
0054 {
0055 std::logic_error msg("Usage Error: Can't mix regular expression captures with POSIX matching rules");
0056 throw_exception(msg);
0057 }
0058 }
0059
0060
0061
0062 template <class charT>
0063 inline bool can_start(charT c, const unsigned char* map, unsigned char mask)
0064 {
0065 return ((c < static_cast<charT>(0)) ? true : ((c >= static_cast<charT>(1 << CHAR_BIT)) ? true : map[c] & mask));
0066 }
0067 inline bool can_start(char c, const unsigned char* map, unsigned char mask)
0068 {
0069 return map[(unsigned char)c] & mask;
0070 }
0071 inline bool can_start(signed char c, const unsigned char* map, unsigned char mask)
0072 {
0073 return map[(unsigned char)c] & mask;
0074 }
0075 inline bool can_start(unsigned char c, const unsigned char* map, unsigned char mask)
0076 {
0077 return map[c] & mask;
0078 }
0079 inline bool can_start(unsigned short c, const unsigned char* map, unsigned char mask)
0080 {
0081 return ((c >= (1 << CHAR_BIT)) ? true : map[c] & mask);
0082 }
0083 #if !defined(__hpux) && !defined(__WINSCW__)
0084 #if defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T)
0085 inline bool can_start(wchar_t c, const unsigned char* map, unsigned char mask)
0086 {
0087 return ((c >= static_cast<wchar_t>(1u << CHAR_BIT)) ? true : map[c] & mask);
0088 }
0089 #endif
0090 #endif
0091 #if !defined(BOOST_NO_INTRINSIC_WCHAR_T)
0092 inline bool can_start(unsigned int c, const unsigned char* map, unsigned char mask)
0093 {
0094 return (((c >= static_cast<unsigned int>(1u << CHAR_BIT)) ? true : map[c] & mask));
0095 }
0096 #endif
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107 #ifndef _RWSTD_VER
0108 template <class C, class T, class A>
0109 inline int string_compare(const std::basic_string<C,T,A>& s, const C* p)
0110 {
0111 if(0 == *p)
0112 {
0113 if(s.empty() || ((s.size() == 1) && (s[0] == 0)))
0114 return 0;
0115 }
0116 return s.compare(p);
0117 }
0118 #else
0119 template <class C, class T, class A>
0120 inline int string_compare(const std::basic_string<C,T,A>& s, const C* p)
0121 {
0122 if(0 == *p)
0123 {
0124 if(s.empty() || ((s.size() == 1) && (s[0] == 0)))
0125 return 0;
0126 }
0127 return s.compare(p);
0128 }
0129 inline int string_compare(const std::string& s, const char* p)
0130 { return std::strcmp(s.c_str(), p); }
0131 # ifndef BOOST_NO_WREGEX
0132 inline int string_compare(const std::wstring& s, const wchar_t* p)
0133 { return std::wcscmp(s.c_str(), p); }
0134 #endif
0135 #endif
0136 template <class Seq, class C>
0137 inline int string_compare(const Seq& s, const C* p)
0138 {
0139 std::size_t i = 0;
0140 while((i < s.size()) && (p[i] == s[i]))
0141 {
0142 ++i;
0143 }
0144 return (i == s.size()) ? -(int)p[i] : (int)s[i] - (int)p[i];
0145 }
0146 # define STR_COMP(s,p) string_compare(s,p)
0147
0148 template<class charT>
0149 inline const charT* re_skip_past_null(const charT* p)
0150 {
0151 while (*p != static_cast<charT>(0)) ++p;
0152 return ++p;
0153 }
0154
0155 template <class iterator, class charT, class traits_type, class char_classT>
0156 iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
0157 iterator last,
0158 const re_set_long<char_classT>* set_,
0159 const regex_data<charT, traits_type>& e, bool icase)
0160 {
0161 const charT* p = reinterpret_cast<const charT*>(set_+1);
0162 iterator ptr;
0163 unsigned int i;
0164
0165
0166 if(next == last) return next;
0167
0168 typedef typename traits_type::string_type traits_string_type;
0169 const ::boost::regex_traits_wrapper<traits_type>& traits_inst = *(e.m_ptraits);
0170
0171
0172
0173 (void)traits_inst;
0174
0175
0176
0177 for(i = 0; i < set_->csingles; ++i)
0178 {
0179 ptr = next;
0180 if(*p == static_cast<charT>(0))
0181 {
0182
0183 if(traits_inst.translate(*ptr, icase))
0184 {
0185 ++p;
0186 continue;
0187 }
0188 return set_->isnot ? next : (ptr == next) ? ++next : ptr;
0189 }
0190 else
0191 {
0192 while(*p && (ptr != last))
0193 {
0194 if(traits_inst.translate(*ptr, icase) != *p)
0195 break;
0196 ++p;
0197 ++ptr;
0198 }
0199
0200 if(*p == static_cast<charT>(0))
0201 return set_->isnot ? next : (ptr == next) ? ++next : ptr;
0202
0203 p = re_skip_past_null(p);
0204 }
0205 }
0206
0207 charT col = traits_inst.translate(*next, icase);
0208
0209
0210 if(set_->cranges || set_->cequivalents)
0211 {
0212 traits_string_type s1;
0213
0214
0215 if(set_->cranges)
0216 {
0217 if((e.m_flags & regex_constants::collate) == 0)
0218 s1.assign(1, col);
0219 else
0220 {
0221 charT a[2] = { col, charT(0), };
0222 s1 = traits_inst.transform(a, a + 1);
0223 }
0224 for(i = 0; i < set_->cranges; ++i)
0225 {
0226 if(STR_COMP(s1, p) >= 0)
0227 {
0228 do{ ++p; }while(*p);
0229 ++p;
0230 if(STR_COMP(s1, p) <= 0)
0231 return set_->isnot ? next : ++next;
0232 }
0233 else
0234 {
0235
0236 do{ ++p; }while(*p);
0237 ++p;
0238 }
0239
0240 do{ ++p; }while(*p);
0241 ++p;
0242 }
0243 }
0244
0245
0246 if(set_->cequivalents)
0247 {
0248 charT a[2] = { col, charT(0), };
0249 s1 = traits_inst.transform_primary(a, a +1);
0250 for(i = 0; i < set_->cequivalents; ++i)
0251 {
0252 if(STR_COMP(s1, p) == 0)
0253 return set_->isnot ? next : ++next;
0254
0255 do{ ++p; }while(*p);
0256 ++p;
0257 }
0258 }
0259 }
0260 if(traits_inst.isctype(col, set_->cclasses) == true)
0261 return set_->isnot ? next : ++next;
0262 if((set_->cnclasses != 0) && (traits_inst.isctype(col, set_->cnclasses) == false))
0263 return set_->isnot ? next : ++next;
0264 return set_->isnot ? ++next : next;
0265 }
0266
0267 template <class BidiIterator>
0268 class repeater_count
0269 {
0270 repeater_count** stack;
0271 repeater_count* next;
0272 int state_id;
0273 std::size_t count;
0274 BidiIterator start_pos;
0275
0276 repeater_count* unwind_until(int n, repeater_count* p, int current_recursion_id)
0277 {
0278 while(p && (p->state_id != n))
0279 {
0280 if(-2 - current_recursion_id == p->state_id)
0281 return 0;
0282 p = p->next;
0283 if(p && (p->state_id < 0))
0284 {
0285 p = unwind_until(p->state_id, p, current_recursion_id);
0286 if(!p)
0287 return p;
0288 p = p->next;
0289 }
0290 }
0291 return p;
0292 }
0293 public:
0294 repeater_count(repeater_count** s) : stack(s), next(0), state_id(-1), count(0), start_pos() {}
0295
0296 repeater_count(int i, repeater_count** s, BidiIterator start, int current_recursion_id)
0297 : start_pos(start)
0298 {
0299 state_id = i;
0300 stack = s;
0301 next = *stack;
0302 *stack = this;
0303 if((state_id > next->state_id) && (next->state_id >= 0))
0304 count = 0;
0305 else
0306 {
0307 repeater_count* p = next;
0308 p = unwind_until(state_id, p, current_recursion_id);
0309 if(p)
0310 {
0311 count = p->count;
0312 start_pos = p->start_pos;
0313 }
0314 else
0315 count = 0;
0316 }
0317 }
0318 ~repeater_count()
0319 {
0320 if(next)
0321 *stack = next;
0322 }
0323 std::size_t get_count() { return count; }
0324 int get_id() { return state_id; }
0325 std::size_t operator++() { return ++count; }
0326 bool check_null_repeat(const BidiIterator& pos, std::size_t max)
0327 {
0328
0329
0330
0331 bool result = (count == 0) ? false : (pos == start_pos);
0332 if(result)
0333 count = max;
0334 else
0335 start_pos = pos;
0336 return result;
0337 }
0338 };
0339
0340 struct saved_state;
0341
0342 enum saved_state_type
0343 {
0344 saved_type_end = 0,
0345 saved_type_paren = 1,
0346 saved_type_recurse = 2,
0347 saved_type_assertion = 3,
0348 saved_state_alt = 4,
0349 saved_state_repeater_count = 5,
0350 saved_state_extra_block = 6,
0351 saved_state_greedy_single_repeat = 7,
0352 saved_state_rep_slow_dot = 8,
0353 saved_state_rep_fast_dot = 9,
0354 saved_state_rep_char = 10,
0355 saved_state_rep_short_set = 11,
0356 saved_state_rep_long_set = 12,
0357 saved_state_non_greedy_long_repeat = 13,
0358 saved_state_count = 14
0359 };
0360
0361 #ifdef BOOST_MSVC
0362 # pragma warning(push)
0363 #if BOOST_MSVC >= 1800
0364 #pragma warning(disable:26495)
0365 #endif
0366 #endif
0367 template <class Results>
0368 struct recursion_info
0369 {
0370 typedef typename Results::value_type value_type;
0371 typedef typename value_type::iterator iterator;
0372 int idx;
0373 const re_syntax_base* preturn_address;
0374 Results results;
0375 repeater_count<iterator>* repeater_stack;
0376 iterator location_of_start;
0377 };
0378 #ifdef BOOST_MSVC
0379 # pragma warning(pop)
0380 #endif
0381
0382 template <class BidiIterator, class Allocator, class traits>
0383 class perl_matcher
0384 {
0385 public:
0386 typedef typename traits::char_type char_type;
0387 typedef perl_matcher<BidiIterator, Allocator, traits> self_type;
0388 typedef bool (self_type::*matcher_proc_type)();
0389 typedef std::size_t traits_size_type;
0390 typedef typename is_byte<char_type>::width_type width_type;
0391 typedef typename regex_iterator_traits<BidiIterator>::difference_type difference_type;
0392 typedef match_results<BidiIterator, Allocator> results_type;
0393
0394 perl_matcher(BidiIterator first, BidiIterator end,
0395 match_results<BidiIterator, Allocator>& what,
0396 const basic_regex<char_type, traits>& e,
0397 match_flag_type f,
0398 BidiIterator l_base)
0399 : m_result(what), base(first), last(end),
0400 position(first), backstop(l_base), re(e), traits_inst(e.get_traits()),
0401 m_independent(false), next_count(&rep_obj), rep_obj(&next_count)
0402 #ifdef BOOST_REGEX_NON_RECURSIVE
0403 , m_recursions(0)
0404 #endif
0405 {
0406 construct_init(e, f);
0407 }
0408
0409 bool match();
0410 bool find();
0411
0412 void setf(match_flag_type f)
0413 { m_match_flags |= f; }
0414 void unsetf(match_flag_type f)
0415 { m_match_flags &= ~f; }
0416
0417 private:
0418 void construct_init(const basic_regex<char_type, traits>& e, match_flag_type f);
0419
0420 bool find_imp();
0421 bool match_imp();
0422 #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
0423 typedef bool (perl_matcher::*protected_proc_type)();
0424 bool protected_call(protected_proc_type);
0425 #endif
0426 void estimate_max_state_count(std::random_access_iterator_tag*);
0427 void estimate_max_state_count(void*);
0428 bool match_prefix();
0429 bool match_all_states();
0430
0431
0432 bool match_startmark();
0433 bool match_endmark();
0434 bool match_literal();
0435 bool match_start_line();
0436 bool match_end_line();
0437 bool match_wild();
0438 bool match_match();
0439 bool match_word_boundary();
0440 bool match_within_word();
0441 bool match_word_start();
0442 bool match_word_end();
0443 bool match_buffer_start();
0444 bool match_buffer_end();
0445 bool match_backref();
0446 bool match_long_set();
0447 bool match_set();
0448 bool match_jump();
0449 bool match_alt();
0450 bool match_rep();
0451 bool match_combining();
0452 bool match_soft_buffer_end();
0453 bool match_restart_continue();
0454 bool match_long_set_repeat();
0455 bool match_set_repeat();
0456 bool match_char_repeat();
0457 bool match_dot_repeat_fast();
0458 bool match_dot_repeat_slow();
0459 bool match_dot_repeat_dispatch()
0460 {
0461 return ::boost::is_random_access_iterator<BidiIterator>::value ? match_dot_repeat_fast() : match_dot_repeat_slow();
0462 }
0463 bool match_backstep();
0464 bool match_assert_backref();
0465 bool match_toggle_case();
0466 #ifdef BOOST_REGEX_RECURSIVE
0467 bool backtrack_till_match(std::size_t count);
0468 #endif
0469 bool match_recursion();
0470 bool match_fail();
0471 bool match_accept();
0472 bool match_commit();
0473 bool match_then();
0474 bool skip_until_paren(int index, bool match = true);
0475
0476
0477 bool find_restart_any();
0478 bool find_restart_word();
0479 bool find_restart_line();
0480 bool find_restart_buf();
0481 bool find_restart_lit();
0482
0483 private:
0484
0485 match_results<BidiIterator, Allocator>& m_result;
0486
0487 scoped_ptr<match_results<BidiIterator, Allocator> > m_temp_match;
0488
0489 match_results<BidiIterator, Allocator>* m_presult;
0490
0491 BidiIterator base;
0492
0493 BidiIterator last;
0494
0495 BidiIterator position;
0496
0497 BidiIterator restart;
0498
0499 BidiIterator search_base;
0500
0501 BidiIterator backstop;
0502
0503 const basic_regex<char_type, traits>& re;
0504
0505 const ::boost::regex_traits_wrapper<traits>& traits_inst;
0506
0507 const re_syntax_base* pstate;
0508
0509 match_flag_type m_match_flags;
0510
0511 std::ptrdiff_t state_count;
0512
0513 std::ptrdiff_t max_state_count;
0514
0515 bool icase;
0516
0517 bool m_has_partial_match;
0518
0519 bool m_has_found_match;
0520
0521 bool m_independent;
0522
0523 repeater_count<BidiIterator>* next_count;
0524
0525 repeater_count<BidiIterator> rep_obj;
0526
0527 typename traits::char_class_type m_word_mask;
0528
0529 unsigned char match_any_mask;
0530
0531 std::vector<recursion_info<results_type> > recursion_stack;
0532 #ifdef BOOST_REGEX_RECURSIVE
0533
0534 bool m_can_backtrack;
0535 bool m_have_accept;
0536 bool m_have_then;
0537 #endif
0538 #ifdef BOOST_REGEX_NON_RECURSIVE
0539
0540
0541
0542 typedef bool (self_type::*unwind_proc_type)(bool);
0543
0544 void extend_stack();
0545 bool unwind(bool);
0546 bool unwind_end(bool);
0547 bool unwind_paren(bool);
0548 bool unwind_recursion_stopper(bool);
0549 bool unwind_assertion(bool);
0550 bool unwind_alt(bool);
0551 bool unwind_repeater_counter(bool);
0552 bool unwind_extra_block(bool);
0553 bool unwind_greedy_single_repeat(bool);
0554 bool unwind_slow_dot_repeat(bool);
0555 bool unwind_fast_dot_repeat(bool);
0556 bool unwind_char_repeat(bool);
0557 bool unwind_short_set_repeat(bool);
0558 bool unwind_long_set_repeat(bool);
0559 bool unwind_non_greedy_repeat(bool);
0560 bool unwind_recursion(bool);
0561 bool unwind_recursion_pop(bool);
0562 bool unwind_commit(bool);
0563 bool unwind_then(bool);
0564 bool unwind_case(bool);
0565 void destroy_single_repeat();
0566 void push_matched_paren(int index, const sub_match<BidiIterator>& sub);
0567 void push_recursion_stopper();
0568 void push_assertion(const re_syntax_base* ps, bool positive);
0569 void push_alt(const re_syntax_base* ps);
0570 void push_repeater_count(int i, repeater_count<BidiIterator>** s);
0571 void push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int state_id);
0572 void push_non_greedy_repeat(const re_syntax_base* ps);
0573 void push_recursion(int idx, const re_syntax_base* p, results_type* presults, results_type* presults2);
0574 void push_recursion_pop();
0575 void push_case_change(bool);
0576
0577
0578 saved_state* m_stack_base;
0579
0580 saved_state* m_backup_state;
0581
0582 unsigned used_block_count;
0583
0584
0585 bool m_recursive_result;
0586
0587 bool m_unwound_lookahead;
0588
0589 bool m_unwound_alt;
0590
0591
0592
0593 unsigned m_recursions;
0594 #endif
0595
0596 #ifdef BOOST_MSVC
0597 # pragma warning(push)
0598 #if BOOST_MSVC >= 1800
0599 #pragma warning(disable:26495)
0600 #endif
0601 #endif
0602
0603
0604 perl_matcher& operator=(const perl_matcher&)
0605 {
0606 return *this;
0607 }
0608 perl_matcher(const perl_matcher& that)
0609 : m_result(that.m_result), re(that.re), traits_inst(that.traits_inst), rep_obj(0) {}
0610 #ifdef BOOST_MSVC
0611 # pragma warning(pop)
0612 #endif
0613 };
0614
0615 }
0616
0617 #ifdef BOOST_MSVC
0618 # pragma warning(pop)
0619 #endif
0620
0621 #ifdef BOOST_MSVC
0622 #pragma warning(push)
0623 #pragma warning(disable: 4103)
0624 #endif
0625 #ifdef BOOST_HAS_ABI_HEADERS
0626 # include BOOST_ABI_SUFFIX
0627 #endif
0628 #ifdef BOOST_MSVC
0629 #pragma warning(pop)
0630 #endif
0631
0632 }
0633
0634
0635
0636
0637 #ifdef BOOST_REGEX_RECURSIVE
0638 #include <boost/regex/v4/perl_matcher_recursive.hpp>
0639 #else
0640 #include <boost/regex/v4/perl_matcher_non_recursive.hpp>
0641 #endif
0642
0643 #include <boost/regex/v4/perl_matcher_common.hpp>
0644
0645 #endif