Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:51:24

0001 /*
0002  *
0003  * Copyright (c) 2002
0004  * John Maddock
0005  *
0006  * Use, modification and distribution are subject to the 
0007  * Boost Software License, Version 1.0. (See accompanying file 
0008  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0009  *
0010  */
0011 
0012 #ifndef BOOST_REGEX_MATCHER_HPP
0013 #define BOOST_REGEX_MATCHER_HPP
0014 
0015 #include <boost/regex/v4/iterator_category.hpp>
0016 
0017 #ifdef BOOST_MSVC
0018 #pragma warning(push)
0019 #pragma warning(disable: 4103)
0020 #endif
0021 #ifdef BOOST_HAS_ABI_HEADERS
0022 #  include BOOST_ABI_PREFIX
0023 #endif
0024 #ifdef BOOST_MSVC
0025 #pragma warning(pop)
0026 #endif
0027 
0028 #ifdef BOOST_MSVC
0029 #  pragma warning(push)
0030 #pragma warning(disable : 4251)
0031 #if BOOST_MSVC < 1700
0032 #     pragma warning(disable : 4231)
0033 #endif
0034 #  if BOOST_MSVC < 1600
0035 #     pragma warning(disable : 4660)
0036 #  endif
0037 #if BOOST_MSVC < 1910
0038 #pragma warning(disable:4800)
0039 #endif
0040 #endif
0041 
0042 namespace boost{
0043 namespace BOOST_REGEX_DETAIL_NS{
0044 
0045 //
0046 // error checking API:
0047 //
0048 inline void BOOST_REGEX_CALL verify_options(boost::regex_constants::syntax_option_type, match_flag_type mf)
0049 {
0050    //
0051    // can't mix match_extra with POSIX matching rules:
0052    //
0053    if ((mf & match_extra) && (mf & match_posix))
0054    {
0055       std::logic_error msg("Usage Error: Can't mix regular expression captures with POSIX matching rules");
0056       throw_exception(msg);
0057    }
0058 }
0059 //
0060 // function can_start:
0061 //
0062 template <class charT>
0063 inline bool can_start(charT c, const unsigned char* map, unsigned char mask)
0064 {
0065    return ((c < static_cast<charT>(0)) ? true : ((c >= static_cast<charT>(1 << CHAR_BIT)) ? true : map[c] & mask));
0066 }
0067 inline bool can_start(char c, const unsigned char* map, unsigned char mask)
0068 {
0069    return map[(unsigned char)c] & mask;
0070 }
0071 inline bool can_start(signed char c, const unsigned char* map, unsigned char mask)
0072 {
0073    return map[(unsigned char)c] & mask;
0074 }
0075 inline bool can_start(unsigned char c, const unsigned char* map, unsigned char mask)
0076 {
0077    return map[c] & mask;
0078 }
0079 inline bool can_start(unsigned short c, const unsigned char* map, unsigned char mask)
0080 {
0081    return ((c >= (1 << CHAR_BIT)) ? true : map[c] & mask);
0082 }
0083 #if !defined(__hpux) && !defined(__WINSCW__)// WCHAR_MIN not usable in pp-directives.
0084 #if defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T)
0085 inline bool can_start(wchar_t c, const unsigned char* map, unsigned char mask)
0086 {
0087    return ((c >= static_cast<wchar_t>(1u << CHAR_BIT)) ? true : map[c] & mask);
0088 }
0089 #endif
0090 #endif
0091 #if !defined(BOOST_NO_INTRINSIC_WCHAR_T)
0092 inline bool can_start(unsigned int c, const unsigned char* map, unsigned char mask)
0093 {
0094    return (((c >= static_cast<unsigned int>(1u << CHAR_BIT)) ? true : map[c] & mask));
0095 }
0096 #endif
0097 
0098 
0099 //
0100 // Unfortunately Rogue Waves standard library appears to have a bug
0101 // in std::basic_string::compare that results in erroneous answers
0102 // in some cases (tested with Borland C++ 5.1, Rogue Wave lib version
0103 // 0x020101) the test case was:
0104 // {39135,0} < {0xff,0}
0105 // which succeeds when it should not.
0106 //
0107 #ifndef _RWSTD_VER
0108 template <class C, class T, class A>
0109 inline int string_compare(const std::basic_string<C,T,A>& s, const C* p)
0110 { 
0111    if(0 == *p)
0112    {
0113       if(s.empty() || ((s.size() == 1) && (s[0] == 0)))
0114          return 0;
0115    }
0116    return s.compare(p); 
0117 }
0118 #else
0119 template <class C, class T, class A>
0120 inline int string_compare(const std::basic_string<C,T,A>& s, const C* p)
0121 { 
0122    if(0 == *p)
0123    {
0124       if(s.empty() || ((s.size() == 1) && (s[0] == 0)))
0125          return 0;
0126    }
0127    return s.compare(p); 
0128 }
0129 inline int string_compare(const std::string& s, const char* p)
0130 { return std::strcmp(s.c_str(), p); }
0131 # ifndef BOOST_NO_WREGEX
0132 inline int string_compare(const std::wstring& s, const wchar_t* p)
0133 { return std::wcscmp(s.c_str(), p); }
0134 #endif
0135 #endif
0136 template <class Seq, class C>
0137 inline int string_compare(const Seq& s, const C* p)
0138 {
0139    std::size_t i = 0;
0140    while((i < s.size()) && (p[i] == s[i]))
0141    {
0142       ++i;
0143    }
0144    return (i == s.size()) ? -(int)p[i] : (int)s[i] - (int)p[i];
0145 }
0146 # define STR_COMP(s,p) string_compare(s,p)
0147 
0148 template<class charT>
0149 inline const charT* re_skip_past_null(const charT* p)
0150 {
0151   while (*p != static_cast<charT>(0)) ++p;
0152   return ++p;
0153 }
0154 
0155 template <class iterator, class charT, class traits_type, class char_classT>
0156 iterator BOOST_REGEX_CALL re_is_set_member(iterator next, 
0157                           iterator last, 
0158                           const re_set_long<char_classT>* set_, 
0159                           const regex_data<charT, traits_type>& e, bool icase)
0160 {   
0161    const charT* p = reinterpret_cast<const charT*>(set_+1);
0162    iterator ptr;
0163    unsigned int i;
0164    //bool icase = e.m_flags & regex_constants::icase;
0165 
0166    if(next == last) return next;
0167 
0168    typedef typename traits_type::string_type traits_string_type;
0169    const ::boost::regex_traits_wrapper<traits_type>& traits_inst = *(e.m_ptraits);
0170    
0171    // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never
0172    // referenced
0173    (void)traits_inst;
0174 
0175    // try and match a single character, could be a multi-character
0176    // collating element...
0177    for(i = 0; i < set_->csingles; ++i)
0178    {
0179       ptr = next;
0180       if(*p == static_cast<charT>(0))
0181       {
0182          // treat null string as special case:
0183          if(traits_inst.translate(*ptr, icase))
0184          {
0185             ++p;
0186             continue;
0187          }
0188          return set_->isnot ? next : (ptr == next) ? ++next : ptr;
0189       }
0190       else
0191       {
0192          while(*p && (ptr != last))
0193          {
0194             if(traits_inst.translate(*ptr, icase) != *p)
0195                break;
0196             ++p;
0197             ++ptr;
0198          }
0199 
0200          if(*p == static_cast<charT>(0)) // if null we've matched
0201             return set_->isnot ? next : (ptr == next) ? ++next : ptr;
0202 
0203          p = re_skip_past_null(p);     // skip null
0204       }
0205    }
0206 
0207    charT col = traits_inst.translate(*next, icase);
0208 
0209 
0210    if(set_->cranges || set_->cequivalents)
0211    {
0212       traits_string_type s1;
0213       //
0214       // try and match a range, NB only a single character can match
0215       if(set_->cranges)
0216       {
0217          if((e.m_flags & regex_constants::collate) == 0)
0218             s1.assign(1, col);
0219          else
0220          {
0221             charT a[2] = { col, charT(0), };
0222             s1 = traits_inst.transform(a, a + 1);
0223          }
0224          for(i = 0; i < set_->cranges; ++i)
0225          {
0226             if(STR_COMP(s1, p) >= 0)
0227             {
0228                do{ ++p; }while(*p);
0229                ++p;
0230                if(STR_COMP(s1, p) <= 0)
0231                   return set_->isnot ? next : ++next;
0232             }
0233             else
0234             {
0235                // skip first string
0236                do{ ++p; }while(*p);
0237                ++p;
0238             }
0239             // skip second string
0240             do{ ++p; }while(*p);
0241             ++p;
0242          }
0243       }
0244       //
0245       // try and match an equivalence class, NB only a single character can match
0246       if(set_->cequivalents)
0247       {
0248          charT a[2] = { col, charT(0), };
0249          s1 = traits_inst.transform_primary(a, a +1);
0250          for(i = 0; i < set_->cequivalents; ++i)
0251          {
0252             if(STR_COMP(s1, p) == 0)
0253                return set_->isnot ? next : ++next;
0254             // skip string
0255             do{ ++p; }while(*p);
0256             ++p;
0257          }
0258       }
0259    }
0260    if(traits_inst.isctype(col, set_->cclasses) == true)
0261       return set_->isnot ? next : ++next;
0262    if((set_->cnclasses != 0) && (traits_inst.isctype(col, set_->cnclasses) == false))
0263       return set_->isnot ? next : ++next;
0264    return set_->isnot ? ++next : next;
0265 }
0266 
0267 template <class BidiIterator>
0268 class repeater_count
0269 {
0270    repeater_count** stack;
0271    repeater_count* next;
0272    int state_id;
0273    std::size_t count;        // the number of iterations so far
0274    BidiIterator start_pos;   // where the last repeat started
0275 
0276    repeater_count* unwind_until(int n, repeater_count* p, int current_recursion_id)
0277    { 
0278       while(p && (p->state_id != n))
0279       {
0280          if(-2 - current_recursion_id == p->state_id)
0281             return 0;
0282          p = p->next;
0283          if(p && (p->state_id < 0))
0284          {
0285             p = unwind_until(p->state_id, p, current_recursion_id);
0286             if(!p)
0287                return p;
0288             p = p->next;
0289          }
0290       }
0291       return p;
0292    }
0293 public:
0294    repeater_count(repeater_count** s) : stack(s), next(0), state_id(-1), count(0), start_pos() {}
0295    
0296    repeater_count(int i, repeater_count** s, BidiIterator start, int current_recursion_id)
0297       : start_pos(start)
0298    {
0299       state_id = i;
0300       stack = s;
0301       next = *stack;
0302       *stack = this;
0303       if((state_id > next->state_id) && (next->state_id >= 0))
0304          count = 0;
0305       else
0306       {
0307          repeater_count* p = next;
0308          p = unwind_until(state_id, p, current_recursion_id);
0309          if(p)
0310          {
0311             count = p->count;
0312             start_pos = p->start_pos;
0313          }
0314          else
0315             count = 0;
0316       }
0317    }
0318    ~repeater_count()
0319    {
0320       if(next)
0321          *stack = next;
0322    }
0323    std::size_t get_count() { return count; }
0324    int get_id() { return state_id; }
0325    std::size_t operator++() { return ++count; }
0326    bool check_null_repeat(const BidiIterator& pos, std::size_t max)
0327    {
0328       // this is called when we are about to start a new repeat,
0329       // if the last one was NULL move our count to max,
0330       // otherwise save the current position.
0331       bool result = (count == 0) ? false : (pos == start_pos);
0332       if(result)
0333          count = max;
0334       else
0335          start_pos = pos;
0336       return result;
0337    }
0338 };
0339 
0340 struct saved_state;
0341 
0342 enum saved_state_type
0343 {
0344    saved_type_end = 0,
0345    saved_type_paren = 1,
0346    saved_type_recurse = 2,
0347    saved_type_assertion = 3,
0348    saved_state_alt = 4,
0349    saved_state_repeater_count = 5,
0350    saved_state_extra_block = 6,
0351    saved_state_greedy_single_repeat = 7,
0352    saved_state_rep_slow_dot = 8,
0353    saved_state_rep_fast_dot = 9,
0354    saved_state_rep_char = 10,
0355    saved_state_rep_short_set = 11,
0356    saved_state_rep_long_set = 12,
0357    saved_state_non_greedy_long_repeat = 13, 
0358    saved_state_count = 14
0359 };
0360 
0361 #ifdef BOOST_MSVC
0362 #  pragma warning(push)
0363 #if BOOST_MSVC >= 1800
0364 #pragma warning(disable:26495)
0365 #endif
0366 #endif
0367 template <class Results>
0368 struct recursion_info
0369 {
0370    typedef typename Results::value_type value_type;
0371    typedef typename value_type::iterator iterator;
0372    int idx;
0373    const re_syntax_base* preturn_address;
0374    Results results;
0375    repeater_count<iterator>* repeater_stack;
0376    iterator location_of_start;
0377 };
0378 #ifdef BOOST_MSVC
0379 #  pragma warning(pop)
0380 #endif
0381 
0382 template <class BidiIterator, class Allocator, class traits>
0383 class perl_matcher
0384 {
0385 public:
0386    typedef typename traits::char_type char_type;
0387    typedef perl_matcher<BidiIterator, Allocator, traits> self_type;
0388    typedef bool (self_type::*matcher_proc_type)();
0389    typedef std::size_t traits_size_type;
0390    typedef typename is_byte<char_type>::width_type width_type;
0391    typedef typename regex_iterator_traits<BidiIterator>::difference_type difference_type;
0392    typedef match_results<BidiIterator, Allocator> results_type;
0393 
0394    perl_matcher(BidiIterator first, BidiIterator end, 
0395       match_results<BidiIterator, Allocator>& what, 
0396       const basic_regex<char_type, traits>& e,
0397       match_flag_type f,
0398       BidiIterator l_base)
0399       :  m_result(what), base(first), last(end), 
0400          position(first), backstop(l_base), re(e), traits_inst(e.get_traits()), 
0401          m_independent(false), next_count(&rep_obj), rep_obj(&next_count)
0402 #ifdef BOOST_REGEX_NON_RECURSIVE
0403       , m_recursions(0)
0404 #endif
0405    {
0406       construct_init(e, f);
0407    }
0408 
0409    bool match();
0410    bool find();
0411 
0412    void setf(match_flag_type f)
0413    { m_match_flags |= f; }
0414    void unsetf(match_flag_type f)
0415    { m_match_flags &= ~f; }
0416 
0417 private:
0418    void construct_init(const basic_regex<char_type, traits>& e, match_flag_type f);
0419 
0420    bool find_imp();
0421    bool match_imp();
0422 #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
0423    typedef bool (perl_matcher::*protected_proc_type)();
0424    bool protected_call(protected_proc_type);
0425 #endif
0426    void estimate_max_state_count(std::random_access_iterator_tag*);
0427    void estimate_max_state_count(void*);
0428    bool match_prefix();
0429    bool match_all_states();
0430 
0431    // match procs, stored in s_match_vtable:
0432    bool match_startmark();
0433    bool match_endmark();
0434    bool match_literal();
0435    bool match_start_line();
0436    bool match_end_line();
0437    bool match_wild();
0438    bool match_match();
0439    bool match_word_boundary();
0440    bool match_within_word();
0441    bool match_word_start();
0442    bool match_word_end();
0443    bool match_buffer_start();
0444    bool match_buffer_end();
0445    bool match_backref();
0446    bool match_long_set();
0447    bool match_set();
0448    bool match_jump();
0449    bool match_alt();
0450    bool match_rep();
0451    bool match_combining();
0452    bool match_soft_buffer_end();
0453    bool match_restart_continue();
0454    bool match_long_set_repeat();
0455    bool match_set_repeat();
0456    bool match_char_repeat();
0457    bool match_dot_repeat_fast();
0458    bool match_dot_repeat_slow();
0459    bool match_dot_repeat_dispatch()
0460    {
0461       return ::boost::is_random_access_iterator<BidiIterator>::value ? match_dot_repeat_fast() : match_dot_repeat_slow();
0462    }
0463    bool match_backstep();
0464    bool match_assert_backref();
0465    bool match_toggle_case();
0466 #ifdef BOOST_REGEX_RECURSIVE
0467    bool backtrack_till_match(std::size_t count);
0468 #endif
0469    bool match_recursion();
0470    bool match_fail();
0471    bool match_accept();
0472    bool match_commit();
0473    bool match_then();
0474    bool skip_until_paren(int index, bool match = true);
0475 
0476    // find procs stored in s_find_vtable:
0477    bool find_restart_any();
0478    bool find_restart_word();
0479    bool find_restart_line();
0480    bool find_restart_buf();
0481    bool find_restart_lit();
0482 
0483 private:
0484    // final result structure to be filled in:
0485    match_results<BidiIterator, Allocator>& m_result;
0486    // temporary result for POSIX matches:
0487    scoped_ptr<match_results<BidiIterator, Allocator> > m_temp_match;
0488    // pointer to actual result structure to fill in:
0489    match_results<BidiIterator, Allocator>* m_presult;
0490    // start of sequence being searched:
0491    BidiIterator base;
0492    // end of sequence being searched:
0493    BidiIterator last; 
0494    // current character being examined:
0495    BidiIterator position;
0496    // where to restart next search after failed match attempt:
0497    BidiIterator restart;
0498    // where the current search started from, acts as base for $` during grep:
0499    BidiIterator search_base;
0500    // how far we can go back when matching lookbehind:
0501    BidiIterator backstop;
0502    // the expression being examined:
0503    const basic_regex<char_type, traits>& re;
0504    // the expression's traits class:
0505    const ::boost::regex_traits_wrapper<traits>& traits_inst;
0506    // the next state in the machine being matched:
0507    const re_syntax_base* pstate;
0508    // matching flags in use:
0509    match_flag_type m_match_flags;
0510    // how many states we have examined so far:
0511    std::ptrdiff_t state_count;
0512    // max number of states to examine before giving up:
0513    std::ptrdiff_t max_state_count;
0514    // whether we should ignore case or not:
0515    bool icase;
0516    // set to true when (position == last), indicates that we may have a partial match:
0517    bool m_has_partial_match;
0518    // set to true whenever we get a match:
0519    bool m_has_found_match;
0520    // set to true whenever we're inside an independent sub-expression:
0521    bool m_independent;
0522    // the current repeat being examined:
0523    repeater_count<BidiIterator>* next_count;
0524    // the first repeat being examined (top of linked list):
0525    repeater_count<BidiIterator> rep_obj;
0526    // the mask to pass when matching word boundaries:
0527    typename traits::char_class_type m_word_mask;
0528    // the bitmask to use when determining whether a match_any matches a newline or not:
0529    unsigned char match_any_mask;
0530    // recursion information:
0531    std::vector<recursion_info<results_type> > recursion_stack;
0532 #ifdef BOOST_REGEX_RECURSIVE
0533    // Set to false by a (*COMMIT):
0534    bool m_can_backtrack;
0535    bool m_have_accept;
0536    bool m_have_then;
0537 #endif
0538 #ifdef BOOST_REGEX_NON_RECURSIVE
0539    //
0540    // additional members for non-recursive version:
0541    //
0542    typedef bool (self_type::*unwind_proc_type)(bool);
0543 
0544    void extend_stack();
0545    bool unwind(bool);
0546    bool unwind_end(bool);
0547    bool unwind_paren(bool);
0548    bool unwind_recursion_stopper(bool);
0549    bool unwind_assertion(bool);
0550    bool unwind_alt(bool);
0551    bool unwind_repeater_counter(bool);
0552    bool unwind_extra_block(bool);
0553    bool unwind_greedy_single_repeat(bool);
0554    bool unwind_slow_dot_repeat(bool);
0555    bool unwind_fast_dot_repeat(bool);
0556    bool unwind_char_repeat(bool);
0557    bool unwind_short_set_repeat(bool);
0558    bool unwind_long_set_repeat(bool);
0559    bool unwind_non_greedy_repeat(bool);
0560    bool unwind_recursion(bool);
0561    bool unwind_recursion_pop(bool);
0562    bool unwind_commit(bool);
0563    bool unwind_then(bool);
0564    bool unwind_case(bool);
0565    void destroy_single_repeat();
0566    void push_matched_paren(int index, const sub_match<BidiIterator>& sub);
0567    void push_recursion_stopper();
0568    void push_assertion(const re_syntax_base* ps, bool positive);
0569    void push_alt(const re_syntax_base* ps);
0570    void push_repeater_count(int i, repeater_count<BidiIterator>** s);
0571    void push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int state_id);
0572    void push_non_greedy_repeat(const re_syntax_base* ps);
0573    void push_recursion(int idx, const re_syntax_base* p, results_type* presults, results_type* presults2);
0574    void push_recursion_pop();
0575    void push_case_change(bool);
0576 
0577    // pointer to base of stack:
0578    saved_state* m_stack_base;
0579    // pointer to current stack position:
0580    saved_state* m_backup_state;
0581    // how many memory blocks have we used up?:
0582    unsigned used_block_count;
0583    // determines what value to return when unwinding from recursion,
0584    // allows for mixed recursive/non-recursive algorithm:
0585    bool m_recursive_result;
0586    // We have unwound to a lookahead/lookbehind, used by COMMIT/PRUNE/SKIP:
0587    bool m_unwound_lookahead;
0588    // We have unwound to an alternative, used by THEN:
0589    bool m_unwound_alt;
0590    // We are unwinding a commit - used by independent subs to determine whether to stop there or carry on unwinding:
0591    //bool m_unwind_commit;
0592    // Recursion limit:
0593    unsigned m_recursions;
0594 #endif
0595 
0596 #ifdef BOOST_MSVC
0597 #  pragma warning(push)
0598 #if BOOST_MSVC >= 1800
0599 #pragma warning(disable:26495)
0600 #endif
0601 #endif
0602    // these operations aren't allowed, so are declared private,
0603    // bodies are provided to keep explicit-instantiation requests happy:
0604    perl_matcher& operator=(const perl_matcher&)
0605    {
0606       return *this;
0607    }
0608    perl_matcher(const perl_matcher& that)
0609       : m_result(that.m_result), re(that.re), traits_inst(that.traits_inst), rep_obj(0) {}
0610 #ifdef BOOST_MSVC
0611 #  pragma warning(pop)
0612 #endif
0613 };
0614 
0615 } // namespace BOOST_REGEX_DETAIL_NS
0616 
0617 #ifdef BOOST_MSVC
0618 #  pragma warning(pop)
0619 #endif
0620 
0621 #ifdef BOOST_MSVC
0622 #pragma warning(push)
0623 #pragma warning(disable: 4103)
0624 #endif
0625 #ifdef BOOST_HAS_ABI_HEADERS
0626 #  include BOOST_ABI_SUFFIX
0627 #endif
0628 #ifdef BOOST_MSVC
0629 #pragma warning(pop)
0630 #endif
0631 
0632 } // namespace boost
0633 
0634 //
0635 // include the implementation of perl_matcher:
0636 //
0637 #ifdef BOOST_REGEX_RECURSIVE
0638 #include <boost/regex/v4/perl_matcher_recursive.hpp>
0639 #else
0640 #include <boost/regex/v4/perl_matcher_non_recursive.hpp>
0641 #endif
0642 // this one has to be last:
0643 #include <boost/regex/v4/perl_matcher_common.hpp>
0644 
0645 #endif