Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:51:22

0001 /*
0002  *
0003  * Copyright (c) 2004
0004  * John Maddock
0005  *
0006  * Use, modification and distribution are subject to the 
0007  * Boost Software License, Version 1.0. (See accompanying file 
0008  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0009  *
0010  */
0011 
0012  /*
0013   *   LOCATION:    see http://www.boost.org for most recent version.
0014   *   FILE         basic_regex_parser.cpp
0015   *   VERSION      see <boost/version.hpp>
0016   *   DESCRIPTION: Declares template class basic_regex_parser.
0017   */
0018 
0019 #ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
0020 #define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP
0021 
0022 #ifdef BOOST_MSVC
0023 #pragma warning(push)
0024 #pragma warning(disable: 4103)
0025 #if BOOST_MSVC >= 1800
0026 #pragma warning(disable: 26812)
0027 #endif
0028 #endif
0029 #ifdef BOOST_HAS_ABI_HEADERS
0030 #  include BOOST_ABI_PREFIX
0031 #endif
0032 #ifdef BOOST_MSVC
0033 #pragma warning(pop)
0034 #endif
0035 
0036 namespace boost{
0037 namespace BOOST_REGEX_DETAIL_NS{
0038 
0039 #ifdef BOOST_MSVC
0040 #pragma warning(push)
0041 #pragma warning(disable:4244)
0042 #if BOOST_MSVC < 1910
0043 #pragma warning(disable:4800)
0044 #endif
0045 #endif
0046 
0047 inline boost::intmax_t umax(mpl::false_ const&)
0048 {
0049    // Get out clause here, just in case numeric_limits is unspecialized:
0050    return std::numeric_limits<boost::intmax_t>::is_specialized ? (std::numeric_limits<boost::intmax_t>::max)() : INT_MAX;
0051 }
0052 inline boost::intmax_t umax(mpl::true_ const&)
0053 {
0054    return (std::numeric_limits<std::size_t>::max)();
0055 }
0056 
0057 inline boost::intmax_t umax()
0058 {
0059    return umax(mpl::bool_<std::numeric_limits<boost::intmax_t>::digits >= std::numeric_limits<std::size_t>::digits>());
0060 }
0061 
0062 template <class charT, class traits>
0063 class basic_regex_parser : public basic_regex_creator<charT, traits>
0064 {
0065 public:
0066    basic_regex_parser(regex_data<charT, traits>* data);
0067    void parse(const charT* p1, const charT* p2, unsigned flags);
0068    void fail(regex_constants::error_type error_code, std::ptrdiff_t position);
0069    void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos);
0070    void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message)
0071    {
0072       fail(error_code, position, message, position);
0073    }
0074 
0075    bool parse_all();
0076    bool parse_basic();
0077    bool parse_extended();
0078    bool parse_literal();
0079    bool parse_open_paren();
0080    bool parse_basic_escape();
0081    bool parse_extended_escape();
0082    bool parse_match_any();
0083    bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits<std::size_t>::max)());
0084    bool parse_repeat_range(bool isbasic);
0085    bool parse_alt();
0086    bool parse_set();
0087    bool parse_backref();
0088    void parse_set_literal(basic_char_set<charT, traits>& char_set);
0089    bool parse_inner_set(basic_char_set<charT, traits>& char_set);
0090    bool parse_QE();
0091    bool parse_perl_extension();
0092    bool parse_perl_verb();
0093    bool match_verb(const char*);
0094    bool add_emacs_code(bool negate);
0095    bool unwind_alts(std::ptrdiff_t last_paren_start);
0096    digraph<charT> get_next_set_literal(basic_char_set<charT, traits>& char_set);
0097    charT unescape_character();
0098    regex_constants::syntax_option_type parse_options();
0099 
0100 private:
0101    typedef bool (basic_regex_parser::*parser_proc_type)();
0102    typedef typename traits::string_type string_type;
0103    typedef typename traits::char_class_type char_class_type;
0104    parser_proc_type           m_parser_proc;    // the main parser to use
0105    const charT*               m_base;           // the start of the string being parsed
0106    const charT*               m_end;            // the end of the string being parsed
0107    const charT*               m_position;       // our current parser position
0108    unsigned                   m_mark_count;     // how many sub-expressions we have
0109    int                        m_mark_reset;     // used to indicate that we're inside a (?|...) block.
0110    unsigned                   m_max_mark;       // largest mark count seen inside a (?|...) block.
0111    std::ptrdiff_t             m_paren_start;    // where the last seen ')' began (where repeats are inserted).
0112    std::ptrdiff_t             m_alt_insert_point; // where to insert the next alternative
0113    bool                       m_has_case_change; // true if somewhere in the current block the case has changed
0114    unsigned                   m_recursion_count; // How many times we've called parse_all.
0115 #if defined(BOOST_MSVC) && defined(_M_IX86)
0116    // This is an ugly warning suppression workaround (for warnings *inside* std::vector
0117    // that can not otherwise be suppressed)...
0118    BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*));
0119    std::vector<long>           m_alt_jumps;      // list of alternative in the current scope.
0120 #else
0121    std::vector<std::ptrdiff_t> m_alt_jumps;      // list of alternative in the current scope.
0122 #endif
0123 
0124    basic_regex_parser& operator=(const basic_regex_parser&);
0125    basic_regex_parser(const basic_regex_parser&);
0126 };
0127 
0128 template <class charT, class traits>
0129 basic_regex_parser<charT, traits>::basic_regex_parser(regex_data<charT, traits>* data)
0130    : basic_regex_creator<charT, traits>(data), m_parser_proc(), m_base(0), m_end(0), m_position(0), 
0131    m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false), m_recursion_count(0)
0132 {
0133 }
0134 
0135 template <class charT, class traits>
0136 void basic_regex_parser<charT, traits>::parse(const charT* p1, const charT* p2, unsigned l_flags)
0137 {
0138    // pass l_flags on to base class:
0139    this->init(l_flags);
0140    // set up pointers:
0141    m_position = m_base = p1;
0142    m_end = p2;
0143    // empty strings are errors:
0144    if((p1 == p2) && 
0145       (
0146          ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group)
0147          || (l_flags & regbase::no_empty_expressions)
0148       )
0149      )
0150    {
0151       fail(regex_constants::error_empty, 0);
0152       return;
0153    }
0154    // select which parser to use:
0155    switch(l_flags & regbase::main_option_type)
0156    {
0157    case regbase::perl_syntax_group:
0158       {
0159          m_parser_proc = &basic_regex_parser<charT, traits>::parse_extended;
0160          //
0161          // Add a leading paren with index zero to give recursions a target:
0162          //
0163          re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
0164          br->index = 0;
0165          br->icase = this->flags() & regbase::icase;
0166          break;
0167       }
0168    case regbase::basic_syntax_group:
0169       m_parser_proc = &basic_regex_parser<charT, traits>::parse_basic;
0170       break;
0171    case regbase::literal:
0172       m_parser_proc = &basic_regex_parser<charT, traits>::parse_literal;
0173       break;
0174    default:
0175       // Oops, someone has managed to set more than one of the main option flags, 
0176       // so this must be an error:
0177       fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used.");
0178       return;
0179    }
0180 
0181    // parse all our characters:
0182    bool result = parse_all();
0183    //
0184    // Unwind our alternatives:
0185    //
0186    unwind_alts(-1);
0187    // reset l_flags as a global scope (?imsx) may have altered them:
0188    this->flags(l_flags);
0189    // if we haven't gobbled up all the characters then we must
0190    // have had an unexpected ')' :
0191    if(!result)
0192    {
0193       fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Found a closing ) with no corresponding opening parenthesis.");
0194       return;
0195    }
0196    // if an error has been set then give up now:
0197    if(this->m_pdata->m_status)
0198       return;
0199    // fill in our sub-expression count:
0200    this->m_pdata->m_mark_count = 1u + (std::size_t)m_mark_count;
0201    this->finalize(p1, p2);
0202 }
0203 
0204 template <class charT, class traits>
0205 void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position)
0206 {
0207    // get the error message:
0208    std::string message = this->m_pdata->m_ptraits->error_string(error_code);
0209    fail(error_code, position, message);
0210 }
0211 
0212 template <class charT, class traits>
0213 void basic_regex_parser<charT, traits>::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos)
0214 {
0215    if(0 == this->m_pdata->m_status) // update the error code if not already set
0216       this->m_pdata->m_status = error_code;
0217    m_position = m_end; // don't bother parsing anything else
0218 
0219 #ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS
0220    //
0221    // Augment error message with the regular expression text:
0222    //
0223    if(start_pos == position)
0224       start_pos = (std::max)(static_cast<std::ptrdiff_t>(0), position - static_cast<std::ptrdiff_t>(10));
0225    std::ptrdiff_t end_pos = (std::min)(position + static_cast<std::ptrdiff_t>(10), static_cast<std::ptrdiff_t>(m_end - m_base));
0226    if(error_code != regex_constants::error_empty)
0227    {
0228       if((start_pos != 0) || (end_pos != (m_end - m_base)))
0229          message += "  The error occurred while parsing the regular expression fragment: '";
0230       else
0231          message += "  The error occurred while parsing the regular expression: '";
0232       if(start_pos != end_pos)
0233       {
0234          message += std::string(m_base + start_pos, m_base + position);
0235          message += ">>>HERE>>>";
0236          message += std::string(m_base + position, m_base + end_pos);
0237       }
0238       message += "'.";
0239    }
0240 #endif
0241 
0242 #ifndef BOOST_NO_EXCEPTIONS
0243    if(0 == (this->flags() & regex_constants::no_except))
0244    {
0245       boost::regex_error e(message, error_code, position);
0246       e.raise();
0247    }
0248 #else
0249    (void)position; // suppress warnings.
0250 #endif
0251 }
0252 
0253 template <class charT, class traits>
0254 bool basic_regex_parser<charT, traits>::parse_all()
0255 {
0256    if (++m_recursion_count > 400)
0257    {
0258       // exceeded internal limits
0259       fail(boost::regex_constants::error_complexity, m_position - m_base, "Exceeded nested brace limit.");
0260    }
0261    bool result = true;
0262    while(result && (m_position != m_end))
0263    {
0264       result = (this->*m_parser_proc)();
0265    }
0266    --m_recursion_count;
0267    return result;
0268 }
0269 
0270 #ifdef BOOST_MSVC
0271 #pragma warning(push)
0272 #pragma warning(disable:4702)
0273 #endif
0274 template <class charT, class traits>
0275 bool basic_regex_parser<charT, traits>::parse_basic()
0276 {
0277    switch(this->m_traits.syntax_type(*m_position))
0278    {
0279    case regex_constants::syntax_escape:
0280       return parse_basic_escape();
0281    case regex_constants::syntax_dot:
0282       return parse_match_any();
0283    case regex_constants::syntax_caret:
0284       ++m_position;
0285       this->append_state(syntax_element_start_line);
0286       break;
0287    case regex_constants::syntax_dollar:
0288       ++m_position;
0289       this->append_state(syntax_element_end_line);
0290       break;
0291    case regex_constants::syntax_star:
0292       if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line))
0293          return parse_literal();
0294       else
0295       {
0296          ++m_position;
0297          return parse_repeat();
0298       }
0299    case regex_constants::syntax_plus:
0300       if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
0301          return parse_literal();
0302       else
0303       {
0304          ++m_position;
0305          return parse_repeat(1);
0306       }
0307    case regex_constants::syntax_question:
0308       if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex))
0309          return parse_literal();
0310       else
0311       {
0312          ++m_position;
0313          return parse_repeat(0, 1);
0314       }
0315    case regex_constants::syntax_open_set:
0316       return parse_set();
0317    case regex_constants::syntax_newline:
0318       if(this->flags() & regbase::newline_alt)
0319          return parse_alt();
0320       else
0321          return parse_literal();
0322    default:
0323       return parse_literal();
0324    }
0325    return true;
0326 }
0327 
0328 #ifdef BOOST_MSVC
0329 #  pragma warning(push)
0330 #if BOOST_MSVC >= 1800
0331 #pragma warning(disable:26812)
0332 #endif
0333 #endif
0334 template <class charT, class traits>
0335 bool basic_regex_parser<charT, traits>::parse_extended()
0336 {
0337    bool result = true;
0338    switch(this->m_traits.syntax_type(*m_position))
0339    {
0340    case regex_constants::syntax_open_mark:
0341       return parse_open_paren();
0342    case regex_constants::syntax_close_mark:
0343       return false;
0344    case regex_constants::syntax_escape:
0345       return parse_extended_escape();
0346    case regex_constants::syntax_dot:
0347       return parse_match_any();
0348    case regex_constants::syntax_caret:
0349       ++m_position;
0350       this->append_state(
0351          (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line));
0352       break;
0353    case regex_constants::syntax_dollar:
0354       ++m_position;
0355       this->append_state(
0356          (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line));
0357       break;
0358    case regex_constants::syntax_star:
0359       if(m_position == this->m_base)
0360       {
0361          fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression.");
0362          return false;
0363       }
0364       ++m_position;
0365       return parse_repeat();
0366    case regex_constants::syntax_question:
0367       if(m_position == this->m_base)
0368       {
0369          fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression.");
0370          return false;
0371       }
0372       ++m_position;
0373       return parse_repeat(0,1);
0374    case regex_constants::syntax_plus:
0375       if(m_position == this->m_base)
0376       {
0377          fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression.");
0378          return false;
0379       }
0380       ++m_position;
0381       return parse_repeat(1);
0382    case regex_constants::syntax_open_brace:
0383       ++m_position;
0384       return parse_repeat_range(false);
0385    case regex_constants::syntax_close_brace:
0386       if((this->flags() & regbase::no_perl_ex) == regbase::no_perl_ex)
0387       {
0388          fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
0389          return false;
0390       }
0391       result = parse_literal();
0392       break;
0393    case regex_constants::syntax_or:
0394       return parse_alt();
0395    case regex_constants::syntax_open_set:
0396       return parse_set();
0397    case regex_constants::syntax_newline:
0398       if(this->flags() & regbase::newline_alt)
0399          return parse_alt();
0400       else
0401          return parse_literal();
0402    case regex_constants::syntax_hash:
0403       //
0404       // If we have a mod_x flag set, then skip until
0405       // we get to a newline character:
0406       //
0407       if((this->flags() 
0408          & (regbase::no_perl_ex|regbase::mod_x))
0409          == regbase::mod_x)
0410       {
0411          while((m_position != m_end) && !is_separator(*m_position++)){}
0412          return true;
0413       }
0414       BOOST_FALLTHROUGH;
0415    default:
0416       result = parse_literal();
0417       break;
0418    }
0419    return result;
0420 }
0421 #ifdef BOOST_MSVC
0422 #  pragma warning(pop)
0423 #endif
0424 #ifdef BOOST_MSVC
0425 #pragma warning(pop)
0426 #endif
0427 
0428 template <class charT, class traits>
0429 bool basic_regex_parser<charT, traits>::parse_literal()
0430 {
0431    // append this as a literal provided it's not a space character
0432    // or the perl option regbase::mod_x is not set:
0433    if(
0434       ((this->flags() 
0435          & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex)) 
0436             != regbase::mod_x)
0437       || !this->m_traits.isctype(*m_position, this->m_mask_space))
0438          this->append_literal(*m_position);
0439    ++m_position;
0440    return true;
0441 }
0442 
0443 template <class charT, class traits>
0444 bool basic_regex_parser<charT, traits>::parse_open_paren()
0445 {
0446    //
0447    // skip the '(' and error check:
0448    //
0449    if(++m_position == m_end)
0450    {
0451       fail(regex_constants::error_paren, m_position - m_base);
0452       return false;
0453    }
0454    //
0455    // begin by checking for a perl-style (?...) extension:
0456    //
0457    if(
0458          ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0)
0459          || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
0460      )
0461    {
0462       if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question)
0463          return parse_perl_extension();
0464       if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_star)
0465          return parse_perl_verb();
0466    }
0467    //
0468    // update our mark count, and append the required state:
0469    //
0470    unsigned markid = 0;
0471    if(0 == (this->flags() & regbase::nosubs))
0472    {
0473       markid = ++m_mark_count;
0474 #ifndef BOOST_NO_STD_DISTANCE
0475       if(this->flags() & regbase::save_subexpression_location)
0476          this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 1, 0));
0477 #else
0478       if(this->flags() & regbase::save_subexpression_location)
0479          this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 1, 0));
0480 #endif
0481    }
0482    re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
0483    pb->index = markid;
0484    pb->icase = this->flags() & regbase::icase;
0485    std::ptrdiff_t last_paren_start = this->getoffset(pb);
0486    // back up insertion point for alternations, and set new point:
0487    std::ptrdiff_t last_alt_point = m_alt_insert_point;
0488    this->m_pdata->m_data.align();
0489    m_alt_insert_point = this->m_pdata->m_data.size();
0490    //
0491    // back up the current flags in case we have a nested (?imsx) group:
0492    //
0493    regex_constants::syntax_option_type opts = this->flags();
0494    bool old_case_change = m_has_case_change;
0495    m_has_case_change = false; // no changes to this scope as yet...
0496    //
0497    // Back up branch reset data in case we have a nested (?|...)
0498    //
0499    int mark_reset = m_mark_reset;
0500    m_mark_reset = -1;
0501    //
0502    // now recursively add more states, this will terminate when we get to a
0503    // matching ')' :
0504    //
0505    parse_all();
0506    //
0507    // Unwind pushed alternatives:
0508    //
0509    if(0 == unwind_alts(last_paren_start))
0510       return false;
0511    //
0512    // restore flags:
0513    //
0514    if(m_has_case_change)
0515    {
0516       // the case has changed in one or more of the alternatives
0517       // within the scoped (...) block: we have to add a state
0518       // to reset the case sensitivity:
0519       static_cast<re_case*>(
0520          this->append_state(syntax_element_toggle_case, sizeof(re_case))
0521          )->icase = opts & regbase::icase;
0522    }
0523    this->flags(opts);
0524    m_has_case_change = old_case_change;
0525    //
0526    // restore branch reset:
0527    //
0528    m_mark_reset = mark_reset;
0529    //
0530    // we either have a ')' or we have run out of characters prematurely:
0531    //
0532    if(m_position == m_end)
0533    {
0534       this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
0535       return false;
0536    }
0537    if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
0538       return false;
0539 #ifndef BOOST_NO_STD_DISTANCE
0540    if(markid && (this->flags() & regbase::save_subexpression_location))
0541       this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position);
0542 #else
0543    if(markid && (this->flags() & regbase::save_subexpression_location))
0544       this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base);
0545 #endif
0546    ++m_position;
0547    //
0548    // append closing parenthesis state:
0549    //
0550    pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
0551    pb->index = markid;
0552    pb->icase = this->flags() & regbase::icase;
0553    this->m_paren_start = last_paren_start;
0554    //
0555    // restore the alternate insertion point:
0556    //
0557    this->m_alt_insert_point = last_alt_point;
0558    //
0559    // allow backrefs to this mark:
0560    //
0561    if(markid > 0)
0562       this->m_backrefs.set(markid);
0563 
0564    return true;
0565 }
0566 
0567 template <class charT, class traits>
0568 bool basic_regex_parser<charT, traits>::parse_basic_escape()
0569 {
0570    if(++m_position == m_end)
0571    {
0572       fail(regex_constants::error_paren, m_position - m_base);
0573       return false;
0574    }
0575    bool result = true;
0576    switch(this->m_traits.escape_syntax_type(*m_position))
0577    {
0578    case regex_constants::syntax_open_mark:
0579       return parse_open_paren();
0580    case regex_constants::syntax_close_mark:
0581       return false;
0582    case regex_constants::syntax_plus:
0583       if(this->flags() & regex_constants::bk_plus_qm)
0584       {
0585          ++m_position;
0586          return parse_repeat(1);
0587       }
0588       else
0589          return parse_literal();
0590    case regex_constants::syntax_question:
0591       if(this->flags() & regex_constants::bk_plus_qm)
0592       {
0593          ++m_position;
0594          return parse_repeat(0, 1);
0595       }
0596       else
0597          return parse_literal();
0598    case regex_constants::syntax_open_brace:
0599       if(this->flags() & regbase::no_intervals)
0600          return parse_literal();
0601       ++m_position;
0602       return parse_repeat_range(true);
0603    case regex_constants::syntax_close_brace:
0604       if(this->flags() & regbase::no_intervals)
0605          return parse_literal();
0606       fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {.");
0607       return false;
0608    case regex_constants::syntax_or:
0609       if(this->flags() & regbase::bk_vbar)
0610          return parse_alt();
0611       else
0612          result = parse_literal();
0613       break;
0614    case regex_constants::syntax_digit:
0615       return parse_backref();
0616    case regex_constants::escape_type_start_buffer:
0617       if(this->flags() & regbase::emacs_ex)
0618       {
0619          ++m_position;
0620          this->append_state(syntax_element_buffer_start);
0621       }
0622       else
0623          result = parse_literal();
0624       break;
0625    case regex_constants::escape_type_end_buffer:
0626       if(this->flags() & regbase::emacs_ex)
0627       {
0628          ++m_position;
0629          this->append_state(syntax_element_buffer_end);
0630       }
0631       else
0632          result = parse_literal();
0633       break;
0634    case regex_constants::escape_type_word_assert:
0635       if(this->flags() & regbase::emacs_ex)
0636       {
0637          ++m_position;
0638          this->append_state(syntax_element_word_boundary);
0639       }
0640       else
0641          result = parse_literal();
0642       break;
0643    case regex_constants::escape_type_not_word_assert:
0644       if(this->flags() & regbase::emacs_ex)
0645       {
0646          ++m_position;
0647          this->append_state(syntax_element_within_word);
0648       }
0649       else
0650          result = parse_literal();
0651       break;
0652    case regex_constants::escape_type_left_word:
0653       if(this->flags() & regbase::emacs_ex)
0654       {
0655          ++m_position;
0656          this->append_state(syntax_element_word_start);
0657       }
0658       else
0659          result = parse_literal();
0660       break;
0661    case regex_constants::escape_type_right_word:
0662       if(this->flags() & regbase::emacs_ex)
0663       {
0664          ++m_position;
0665          this->append_state(syntax_element_word_end);
0666       }
0667       else
0668          result = parse_literal();
0669       break;
0670    default:
0671       if(this->flags() & regbase::emacs_ex)
0672       {
0673          bool negate = true;
0674          switch(*m_position)
0675          {
0676          case 'w':
0677             negate = false;
0678             BOOST_FALLTHROUGH;
0679          case 'W':
0680             {
0681             basic_char_set<charT, traits> char_set;
0682             if(negate)
0683                char_set.negate();
0684             char_set.add_class(this->m_word_mask);
0685             if(0 == this->append_set(char_set))
0686             {
0687                fail(regex_constants::error_ctype, m_position - m_base);
0688                return false;
0689             }
0690             ++m_position;
0691             return true;
0692             }
0693          case 's':
0694             negate = false;
0695             BOOST_FALLTHROUGH;
0696          case 'S':
0697             return add_emacs_code(negate);
0698          case 'c':
0699          case 'C':
0700             // not supported yet:
0701             fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead.");
0702             return false;
0703          default:
0704             break;
0705          }
0706       }
0707       result = parse_literal();
0708       break;
0709    }
0710    return result;
0711 }
0712 
0713 template <class charT, class traits>
0714 bool basic_regex_parser<charT, traits>::parse_extended_escape()
0715 {
0716    ++m_position;
0717    if(m_position == m_end)
0718    {
0719       fail(regex_constants::error_escape, m_position - m_base, "Incomplete escape sequence found.");
0720       return false;
0721    }
0722    bool negate = false; // in case this is a character class escape: \w \d etc
0723    switch(this->m_traits.escape_syntax_type(*m_position))
0724    {
0725    case regex_constants::escape_type_not_class:
0726       negate = true;
0727       BOOST_FALLTHROUGH;
0728    case regex_constants::escape_type_class:
0729       {
0730 escape_type_class_jump:
0731          typedef typename traits::char_class_type m_type;
0732          m_type m = this->m_traits.lookup_classname(m_position, m_position+1);
0733          if(m != 0)
0734          {
0735             basic_char_set<charT, traits> char_set;
0736             if(negate)
0737                char_set.negate();
0738             char_set.add_class(m);
0739             if(0 == this->append_set(char_set))
0740             {
0741                fail(regex_constants::error_ctype, m_position - m_base);
0742                return false;
0743             }
0744             ++m_position;
0745             return true;
0746          }
0747          //
0748          // not a class, just a regular unknown escape:
0749          //
0750          this->append_literal(unescape_character());
0751          break;
0752       }
0753    case regex_constants::syntax_digit:
0754       return parse_backref();
0755    case regex_constants::escape_type_left_word:
0756       ++m_position;
0757       this->append_state(syntax_element_word_start);
0758       break;
0759    case regex_constants::escape_type_right_word:
0760       ++m_position;
0761       this->append_state(syntax_element_word_end);
0762       break;
0763    case regex_constants::escape_type_start_buffer:
0764       ++m_position;
0765       this->append_state(syntax_element_buffer_start);
0766       break;
0767    case regex_constants::escape_type_end_buffer:
0768       ++m_position;
0769       this->append_state(syntax_element_buffer_end);
0770       break;
0771    case regex_constants::escape_type_word_assert:
0772       ++m_position;
0773       this->append_state(syntax_element_word_boundary);
0774       break;
0775    case regex_constants::escape_type_not_word_assert:
0776       ++m_position;
0777       this->append_state(syntax_element_within_word);
0778       break;
0779    case regex_constants::escape_type_Z:
0780       ++m_position;
0781       this->append_state(syntax_element_soft_buffer_end);
0782       break;
0783    case regex_constants::escape_type_Q:
0784       return parse_QE();
0785    case regex_constants::escape_type_C:
0786       return parse_match_any();
0787    case regex_constants::escape_type_X:
0788       ++m_position;
0789       this->append_state(syntax_element_combining);
0790       break;
0791    case regex_constants::escape_type_G:
0792       ++m_position;
0793       this->append_state(syntax_element_restart_continue);
0794       break;
0795    case regex_constants::escape_type_not_property:
0796       negate = true;
0797       BOOST_FALLTHROUGH;
0798    case regex_constants::escape_type_property:
0799       {
0800          ++m_position;
0801          char_class_type m;
0802          if(m_position == m_end)
0803          {
0804             fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found.");
0805             return false;
0806          }
0807          // maybe have \p{ddd}
0808          if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
0809          {
0810             const charT* base = m_position;
0811             // skip forward until we find enclosing brace:
0812             while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
0813                ++m_position;
0814             if(m_position == m_end)
0815             {
0816                fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence.");
0817                return false;
0818             }
0819             m = this->m_traits.lookup_classname(++base, m_position++);
0820          }
0821          else
0822          {
0823             m = this->m_traits.lookup_classname(m_position, m_position+1);
0824             ++m_position;
0825          }
0826          if(m != 0)
0827          {
0828             basic_char_set<charT, traits> char_set;
0829             if(negate)
0830                char_set.negate();
0831             char_set.add_class(m);
0832             if(0 == this->append_set(char_set))
0833             {
0834                fail(regex_constants::error_ctype, m_position - m_base);
0835                return false;
0836             }
0837             return true;
0838          }
0839          fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name.");
0840          return false;
0841       }
0842    case regex_constants::escape_type_reset_start_mark:
0843       if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
0844       {
0845          re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
0846          pb->index = -5;
0847          pb->icase = this->flags() & regbase::icase;
0848          this->m_pdata->m_data.align();
0849          ++m_position;
0850          return true;
0851       }
0852       goto escape_type_class_jump;
0853    case regex_constants::escape_type_line_ending:
0854       if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
0855       {
0856          const charT* e = get_escape_R_string<charT>();
0857          const charT* old_position = m_position;
0858          const charT* old_end = m_end;
0859          const charT* old_base = m_base;
0860          m_position = e;
0861          m_base = e;
0862          m_end = e + traits::length(e);
0863          bool r = parse_all();
0864          m_position = ++old_position;
0865          m_end = old_end;
0866          m_base = old_base;
0867          return r;
0868       }
0869       goto escape_type_class_jump;
0870    case regex_constants::escape_type_extended_backref:
0871       if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
0872       {
0873          bool have_brace = false;
0874          bool negative = false;
0875          static const char incomplete_message[] = "Incomplete \\g escape found.";
0876          if(++m_position == m_end)
0877          {
0878             fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
0879             return false;
0880          }
0881          // maybe have \g{ddd}
0882          regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position);
0883          regex_constants::syntax_type syn_end = 0;
0884          if((syn == regex_constants::syntax_open_brace) 
0885             || (syn == regex_constants::escape_type_left_word)
0886             || (syn == regex_constants::escape_type_end_buffer))
0887          {
0888             if(++m_position == m_end)
0889             {
0890                fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
0891                return false;
0892             }
0893             have_brace = true;
0894             switch(syn)
0895             {
0896             case regex_constants::syntax_open_brace:
0897                syn_end = regex_constants::syntax_close_brace;
0898                break;
0899             case regex_constants::escape_type_left_word:
0900                syn_end = regex_constants::escape_type_right_word;
0901                break;
0902             default:
0903                syn_end = regex_constants::escape_type_end_buffer;
0904                break;
0905             }
0906          }
0907          negative = (*m_position == static_cast<charT>('-'));
0908          if((negative) && (++m_position == m_end))
0909          {
0910             fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
0911             return false;
0912          }
0913          const charT* pc = m_position;
0914          boost::intmax_t i = this->m_traits.toi(pc, m_end, 10);
0915          if((i < 0) && syn_end)
0916          {
0917             // Check for a named capture, get the leftmost one if there is more than one:
0918             const charT* base = m_position;
0919             while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end))
0920             {
0921                ++m_position;
0922             }
0923             i = hash_value_from_capture_name(base, m_position);
0924             pc = m_position;
0925          }
0926          if(negative)
0927             i = 1 + (static_cast<boost::intmax_t>(m_mark_count) - i);
0928          if(((i < hash_value_mask) && (i > 0) && (this->m_backrefs.test(i))) || ((i >= hash_value_mask) && (this->m_pdata->get_id(i) > 0) && (this->m_backrefs.test(this->m_pdata->get_id(i)))))
0929          {
0930             m_position = pc;
0931             re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
0932             pb->index = i;
0933             pb->icase = this->flags() & regbase::icase;
0934          }
0935          else
0936          {
0937             fail(regex_constants::error_backref, m_position - m_base);
0938             return false;
0939          }
0940          m_position = pc;
0941          if(have_brace)
0942          {
0943             if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end))
0944             {
0945                fail(regex_constants::error_escape, m_position - m_base, incomplete_message);
0946                return false;
0947             }
0948             ++m_position;
0949          }
0950          return true;
0951       }
0952       goto escape_type_class_jump;
0953    case regex_constants::escape_type_control_v:
0954       if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
0955          goto escape_type_class_jump;
0956       BOOST_FALLTHROUGH;
0957    default:
0958       this->append_literal(unescape_character());
0959       break;
0960    }
0961    return true;
0962 }
0963 
0964 template <class charT, class traits>
0965 bool basic_regex_parser<charT, traits>::parse_match_any()
0966 {
0967    //
0968    // we have a '.' that can match any character:
0969    //
0970    ++m_position;
0971    static_cast<re_dot*>(
0972       this->append_state(syntax_element_wild, sizeof(re_dot))
0973       )->mask = static_cast<unsigned char>(this->flags() & regbase::no_mod_s 
0974       ? BOOST_REGEX_DETAIL_NS::force_not_newline 
0975          : this->flags() & regbase::mod_s ?
0976             BOOST_REGEX_DETAIL_NS::force_newline : BOOST_REGEX_DETAIL_NS::dont_care);
0977    return true;
0978 }
0979 
0980 template <class charT, class traits>
0981 bool basic_regex_parser<charT, traits>::parse_repeat(std::size_t low, std::size_t high)
0982 {
0983    bool greedy = true;
0984    bool possessive = false;
0985    std::size_t insert_point;
0986    // 
0987    // when we get to here we may have a non-greedy ? mark still to come:
0988    //
0989    if((m_position != m_end) 
0990       && (
0991             (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex)))
0992             || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex)))
0993          )
0994       )
0995    {
0996       // OK we have a perl or emacs regex, check for a '?':
0997       if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x)
0998       {
0999          // whitespace skip:
1000          while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1001             ++m_position;
1002       }
1003       if((m_position != m_end) && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question))
1004       {
1005          greedy = false;
1006          ++m_position;
1007       }
1008       // for perl regexes only check for possessive ++ repeats.
1009       if((m_position != m_end)
1010          && (0 == (this->flags() & regbase::main_option_type)) 
1011          && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus))
1012       {
1013          possessive = true;
1014          ++m_position;
1015       }
1016    }
1017    if(0 == this->m_last_state)
1018    {
1019       fail(regex_constants::error_badrepeat, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_position), "Nothing to repeat.");
1020       return false;
1021    }
1022    if(this->m_last_state->type == syntax_element_endmark)
1023    {
1024       // insert a repeat before the '(' matching the last ')':
1025       insert_point = this->m_paren_start;
1026    }
1027    else if((this->m_last_state->type == syntax_element_literal) && (static_cast<re_literal*>(this->m_last_state)->length > 1))
1028    {
1029       // the last state was a literal with more than one character, split it in two:
1030       re_literal* lit = static_cast<re_literal*>(this->m_last_state);
1031       charT c = (static_cast<charT*>(static_cast<void*>(lit+1)))[lit->length - 1];
1032       lit->length -= 1;
1033       // now append new state:
1034       lit = static_cast<re_literal*>(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
1035       lit->length = 1;
1036       (static_cast<charT*>(static_cast<void*>(lit+1)))[0] = c;
1037       insert_point = this->getoffset(this->m_last_state);
1038    }
1039    else
1040    {
1041       // repeat the last state whatever it was, need to add some error checking here:
1042       switch(this->m_last_state->type)
1043       {
1044       case syntax_element_start_line:
1045       case syntax_element_end_line:
1046       case syntax_element_word_boundary:
1047       case syntax_element_within_word:
1048       case syntax_element_word_start:
1049       case syntax_element_word_end:
1050       case syntax_element_buffer_start:
1051       case syntax_element_buffer_end:
1052       case syntax_element_alt:
1053       case syntax_element_soft_buffer_end:
1054       case syntax_element_restart_continue:
1055       case syntax_element_jump:
1056       case syntax_element_startmark:
1057       case syntax_element_backstep:
1058       case syntax_element_toggle_case:
1059          // can't legally repeat any of the above:
1060          fail(regex_constants::error_badrepeat, m_position - m_base);
1061          return false;
1062       default:
1063          // do nothing...
1064          break;
1065       }
1066       insert_point = this->getoffset(this->m_last_state);
1067    }
1068    //
1069    // OK we now know what to repeat, so insert the repeat around it:
1070    //
1071    re_repeat* rep = static_cast<re_repeat*>(this->insert_state(insert_point, syntax_element_rep, re_repeater_size));
1072    rep->min = low;
1073    rep->max = high;
1074    rep->greedy = greedy;
1075    rep->leading = false;
1076    // store our repeater position for later:
1077    std::ptrdiff_t rep_off = this->getoffset(rep);
1078    // and append a back jump to the repeat:
1079    re_jump* jmp = static_cast<re_jump*>(this->append_state(syntax_element_jump, sizeof(re_jump)));
1080    jmp->alt.i = rep_off - this->getoffset(jmp);
1081    this->m_pdata->m_data.align();
1082    // now fill in the alt jump for the repeat:
1083    rep = static_cast<re_repeat*>(this->getaddress(rep_off));
1084    rep->alt.i = this->m_pdata->m_data.size() - rep_off;
1085    //
1086    // If the repeat is possessive then bracket the repeat with a (?>...)
1087    // independent sub-expression construct:
1088    //
1089    if(possessive)
1090    {
1091       if(m_position != m_end)
1092       {
1093          //
1094          // Check for illegal following quantifier, we have to do this here, because
1095          // the extra states we insert below circumvents our usual error checking :-(
1096          //
1097          bool contin = false;
1098          do
1099          {
1100             if ((this->flags() & (regbase::main_option_type | regbase::mod_x | regbase::no_perl_ex)) == regbase::mod_x)
1101             {
1102                // whitespace skip:
1103                while ((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1104                   ++m_position;
1105             }
1106             if (m_position != m_end)
1107             {
1108                switch (this->m_traits.syntax_type(*m_position))
1109                {
1110                case regex_constants::syntax_star:
1111                case regex_constants::syntax_plus:
1112                case regex_constants::syntax_question:
1113                case regex_constants::syntax_open_brace:
1114                   fail(regex_constants::error_badrepeat, m_position - m_base);
1115                   return false;
1116                case regex_constants::syntax_open_mark:
1117                   // Do we have a comment?  If so we need to skip it here...
1118                   if ((m_position + 2 < m_end) && this->m_traits.syntax_type(*(m_position + 1)) == regex_constants::syntax_question
1119                      && this->m_traits.syntax_type(*(m_position + 2)) == regex_constants::syntax_hash)
1120                   {
1121                      while ((m_position != m_end)
1122                         && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark)) {
1123                      }
1124                      contin = true;
1125                   }
1126                   else
1127                      contin = false;
1128                   break;
1129                default:
1130                   contin = false;
1131                }
1132             }
1133             else
1134                contin = false;
1135          } while (contin);
1136       }
1137       re_brace* pb = static_cast<re_brace*>(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace)));
1138       pb->index = -3;
1139       pb->icase = this->flags() & regbase::icase;
1140       jmp = static_cast<re_jump*>(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump)));
1141       this->m_pdata->m_data.align();
1142       jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
1143       pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
1144       pb->index = -3;
1145       pb->icase = this->flags() & regbase::icase;
1146    }
1147    return true;
1148 }
1149 
1150 template <class charT, class traits>
1151 bool basic_regex_parser<charT, traits>::parse_repeat_range(bool isbasic)
1152 {
1153    static const char incomplete_message[] = "Missing } in quantified repetition.";
1154    //
1155    // parse a repeat-range:
1156    //
1157    std::size_t min, max;
1158    boost::intmax_t v;
1159    // skip whitespace:
1160    while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1161       ++m_position;
1162    if(this->m_position == this->m_end)
1163    {
1164       if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1165       {
1166          fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1167          return false;
1168       }
1169       // Treat the opening '{' as a literal character, rewind to start of error:
1170       --m_position;
1171       while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1172       return parse_literal();
1173    }
1174    // get min:
1175    v = this->m_traits.toi(m_position, m_end, 10);
1176    // skip whitespace:
1177    if((v < 0) || (v > umax()))
1178    {
1179       if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1180       {
1181          fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1182          return false;
1183       }
1184       // Treat the opening '{' as a literal character, rewind to start of error:
1185       --m_position;
1186       while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1187       return parse_literal();
1188    }
1189    while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1190       ++m_position;
1191    if(this->m_position == this->m_end)
1192    {
1193       if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1194       {
1195          fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1196          return false;
1197       }
1198       // Treat the opening '{' as a literal character, rewind to start of error:
1199       --m_position;
1200       while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1201       return parse_literal();
1202    }
1203    min = static_cast<std::size_t>(v);
1204    // see if we have a comma:
1205    if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma)
1206    {
1207       // move on and error check:
1208       ++m_position;
1209       // skip whitespace:
1210       while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1211          ++m_position;
1212       if(this->m_position == this->m_end)
1213       {
1214          if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1215          {
1216             fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1217             return false;
1218          }
1219          // Treat the opening '{' as a literal character, rewind to start of error:
1220          --m_position;
1221          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1222          return parse_literal();
1223       }
1224       // get the value if any:
1225       v = this->m_traits.toi(m_position, m_end, 10);
1226       max = ((v >= 0) && (v < umax())) ? (std::size_t)v : (std::numeric_limits<std::size_t>::max)();
1227    }
1228    else
1229    {
1230       // no comma, max = min:
1231       max = min;
1232    }
1233    // skip whitespace:
1234    while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space))
1235       ++m_position;
1236    // OK now check trailing }:
1237    if(this->m_position == this->m_end)
1238    {
1239       if(this->flags() & (regbase::main_option_type | regbase::no_perl_ex))
1240       {
1241          fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1242          return false;
1243       }
1244       // Treat the opening '{' as a literal character, rewind to start of error:
1245       --m_position;
1246       while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1247       return parse_literal();
1248    }
1249    if(isbasic)
1250    {
1251       if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape)
1252       {
1253          ++m_position;
1254          if(this->m_position == this->m_end)
1255          {
1256             fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1257             return false;
1258          }
1259       }
1260       else
1261       {
1262          fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message);
1263          return false;
1264       }
1265    }
1266    if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace)
1267       ++m_position;
1268    else
1269    {
1270       // Treat the opening '{' as a literal character, rewind to start of error:
1271       --m_position;
1272       while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_brace) --m_position;
1273       return parse_literal();
1274    }
1275    //
1276    // finally go and add the repeat, unless error:
1277    //
1278    if(min > max)
1279    {
1280       // Backtrack to error location:
1281       m_position -= 2;
1282       while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position;
1283          ++m_position;
1284       fail(regex_constants::error_badbrace, m_position - m_base);
1285       return false;
1286    }
1287    return parse_repeat(min, max);
1288 }
1289 
1290 template <class charT, class traits>
1291 bool basic_regex_parser<charT, traits>::parse_alt()
1292 {
1293    //
1294    // error check: if there have been no previous states,
1295    // or if the last state was a '(' then error:
1296    //
1297    if(
1298       ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark))
1299       &&
1300       !(
1301          ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
1302            &&
1303          ((this->flags() & regbase::no_empty_expressions) == 0)
1304         )
1305       )
1306    {
1307       fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression cannot start with the alternation operator |.");
1308       return false;
1309    }
1310    //
1311    // Reset mark count if required:
1312    //
1313    if(m_max_mark < m_mark_count)
1314       m_max_mark = m_mark_count;
1315    if(m_mark_reset >= 0)
1316       m_mark_count = m_mark_reset;
1317 
1318    ++m_position;
1319    //
1320    // we need to append a trailing jump: 
1321    //
1322    re_syntax_base* pj = this->append_state(BOOST_REGEX_DETAIL_NS::syntax_element_jump, sizeof(re_jump));
1323    std::ptrdiff_t jump_offset = this->getoffset(pj);
1324    //
1325    // now insert the alternative:
1326    //
1327    re_alt* palt = static_cast<re_alt*>(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size));
1328    jump_offset += re_alt_size;
1329    this->m_pdata->m_data.align();
1330    palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt);
1331    //
1332    // update m_alt_insert_point so that the next alternate gets
1333    // inserted at the start of the second of the two we've just created:
1334    //
1335    this->m_alt_insert_point = this->m_pdata->m_data.size();
1336    //
1337    // the start of this alternative must have a case changes state
1338    // if the current block has messed around with case changes:
1339    //
1340    if(m_has_case_change)
1341    {
1342       static_cast<re_case*>(
1343          this->append_state(syntax_element_toggle_case, sizeof(re_case))
1344          )->icase = this->m_icase;
1345    }
1346    //
1347    // push the alternative onto our stack, a recursive
1348    // implementation here is easier to understand (and faster
1349    // as it happens), but causes all kinds of stack overflow problems
1350    // on programs with small stacks (COM+).
1351    //
1352    m_alt_jumps.push_back(jump_offset);
1353    return true;
1354 }
1355 
1356 template <class charT, class traits>
1357 bool basic_regex_parser<charT, traits>::parse_set()
1358 {
1359    static const char incomplete_message[] = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1360    ++m_position;
1361    if(m_position == m_end)
1362    {
1363       fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1364       return false;
1365    }
1366    basic_char_set<charT, traits> char_set;
1367 
1368    const charT* base = m_position;  // where the '[' was
1369    const charT* item_base = m_position;  // where the '[' or '^' was
1370 
1371    while(m_position != m_end)
1372    {
1373       switch(this->m_traits.syntax_type(*m_position))
1374       {
1375       case regex_constants::syntax_caret:
1376          if(m_position == base)
1377          {
1378             char_set.negate();
1379             ++m_position;
1380             item_base = m_position;
1381          }
1382          else
1383             parse_set_literal(char_set);
1384          break;
1385       case regex_constants::syntax_close_set:
1386          if(m_position == item_base)
1387          {
1388             parse_set_literal(char_set);
1389             break;
1390          }
1391          else
1392          {
1393             ++m_position;
1394             if(0 == this->append_set(char_set))
1395             {
1396                fail(regex_constants::error_ctype, m_position - m_base);
1397                return false;
1398             }
1399          }
1400          return true;
1401       case regex_constants::syntax_open_set:
1402          if(parse_inner_set(char_set))
1403             break;
1404          return true;
1405       case regex_constants::syntax_escape:
1406          {
1407             // 
1408             // look ahead and see if this is a character class shortcut
1409             // \d \w \s etc...
1410             //
1411             ++m_position;
1412             if(this->m_traits.escape_syntax_type(*m_position)
1413                == regex_constants::escape_type_class)
1414             {
1415                char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1416                if(m != 0)
1417                {
1418                   char_set.add_class(m);
1419                   ++m_position;
1420                   break;
1421                }
1422             }
1423             else if(this->m_traits.escape_syntax_type(*m_position)
1424                == regex_constants::escape_type_not_class)
1425             {
1426                // negated character class:
1427                char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1);
1428                if(m != 0)
1429                {
1430                   char_set.add_negated_class(m);
1431                   ++m_position;
1432                   break;
1433                }
1434             }
1435             // not a character class, just a regular escape:
1436             --m_position;
1437             parse_set_literal(char_set);
1438             break;
1439          }
1440       default:
1441          parse_set_literal(char_set);
1442          break;
1443       }
1444    }
1445    return m_position != m_end;
1446 }
1447 
1448 template <class charT, class traits>
1449 bool basic_regex_parser<charT, traits>::parse_inner_set(basic_char_set<charT, traits>& char_set)
1450 {
1451    static const char incomplete_message[] = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content.";
1452    //
1453    // we have either a character class [:name:]
1454    // a collating element [.name.]
1455    // or an equivalence class [=name=]
1456    //
1457    if(m_end == ++m_position)
1458    {
1459       fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1460       return false;
1461    }
1462    switch(this->m_traits.syntax_type(*m_position))
1463    {
1464    case regex_constants::syntax_dot:
1465       //
1466       // a collating element is treated as a literal:
1467       //
1468       --m_position;
1469       parse_set_literal(char_set);
1470       return true;
1471    case regex_constants::syntax_colon:
1472       {
1473       // check that character classes are actually enabled:
1474       if((this->flags() & (regbase::main_option_type | regbase::no_char_classes)) 
1475          == (regbase::basic_syntax_group  | regbase::no_char_classes))
1476       {
1477          --m_position;
1478          parse_set_literal(char_set);
1479          return true;
1480       }
1481       // skip the ':'
1482       if(m_end == ++m_position)
1483       {
1484          fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1485          return false;
1486       }
1487       const charT* name_first = m_position;
1488       // skip at least one character, then find the matching ':]'
1489       if(m_end == ++m_position)
1490       {
1491          fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1492          return false;
1493       }
1494       while((m_position != m_end) 
1495          && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon)) 
1496          ++m_position;
1497       const charT* name_last = m_position;
1498       if(m_end == m_position)
1499       {
1500          fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1501          return false;
1502       }
1503       if((m_end == ++m_position) 
1504          || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1505       {
1506          fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1507          return false;
1508       }
1509       //
1510       // check for negated class:
1511       //
1512       bool negated = false;
1513       if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret)
1514       {
1515          ++name_first;
1516          negated = true;
1517       }
1518       typedef typename traits::char_class_type m_type;
1519       m_type m = this->m_traits.lookup_classname(name_first, name_last);
1520       if(m == 0)
1521       {
1522          if(char_set.empty() && (name_last - name_first == 1))
1523          {
1524             // maybe a special case:
1525             ++m_position;
1526             if( (m_position != m_end) 
1527                && (this->m_traits.syntax_type(*m_position) 
1528                   == regex_constants::syntax_close_set))
1529             {
1530                if(this->m_traits.escape_syntax_type(*name_first) 
1531                   == regex_constants::escape_type_left_word)
1532                {
1533                   ++m_position;
1534                   this->append_state(syntax_element_word_start);
1535                   return false;
1536                }
1537                if(this->m_traits.escape_syntax_type(*name_first) 
1538                   == regex_constants::escape_type_right_word)
1539                {
1540                   ++m_position;
1541                   this->append_state(syntax_element_word_end);
1542                   return false;
1543                }
1544             }
1545          }
1546          fail(regex_constants::error_ctype, name_first - m_base);
1547          return false;
1548       }
1549       if(!negated)
1550          char_set.add_class(m);
1551       else
1552          char_set.add_negated_class(m);
1553       ++m_position;
1554       break;
1555    }
1556    case regex_constants::syntax_equal:
1557       {
1558       // skip the '='
1559       if(m_end == ++m_position)
1560       {
1561          fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1562          return false;
1563       }
1564       const charT* name_first = m_position;
1565       // skip at least one character, then find the matching '=]'
1566       if(m_end == ++m_position)
1567       {
1568          fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1569          return false;
1570       }
1571       while((m_position != m_end) 
1572          && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)) 
1573          ++m_position;
1574       const charT* name_last = m_position;
1575       if(m_end == m_position)
1576       {
1577          fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1578          return false;
1579       }
1580       if((m_end == ++m_position) 
1581          || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1582       {
1583          fail(regex_constants::error_brack, m_position - m_base, incomplete_message);
1584          return false;
1585       }
1586       string_type m = this->m_traits.lookup_collatename(name_first, name_last);
1587       if(m.empty() || (m.size() > 2))
1588       {
1589          fail(regex_constants::error_collate, name_first - m_base);
1590          return false;
1591       }
1592       digraph<charT> d;
1593       d.first = m[0];
1594       if(m.size() > 1)
1595          d.second = m[1];
1596       else
1597          d.second = 0;
1598       char_set.add_equivalent(d);
1599       ++m_position;
1600       break;
1601    }
1602    default:
1603       --m_position;
1604       parse_set_literal(char_set);
1605       break;
1606    }
1607    return true;
1608 }
1609 
1610 template <class charT, class traits>
1611 void basic_regex_parser<charT, traits>::parse_set_literal(basic_char_set<charT, traits>& char_set)
1612 {
1613    digraph<charT> start_range(get_next_set_literal(char_set));
1614    if(m_end == m_position)
1615    {
1616       fail(regex_constants::error_brack, m_position - m_base);
1617       return;
1618    }
1619    if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1620    {
1621       // we have a range:
1622       if(m_end == ++m_position)
1623       {
1624          fail(regex_constants::error_brack, m_position - m_base);
1625          return;
1626       }
1627       if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)
1628       {
1629          digraph<charT> end_range = get_next_set_literal(char_set);
1630          char_set.add_range(start_range, end_range);
1631          if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash)
1632          {
1633             if(m_end == ++m_position)
1634             {
1635                fail(regex_constants::error_brack, m_position - m_base);
1636                return;
1637             }
1638             if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set)
1639             {
1640                // trailing - :
1641                --m_position;
1642                return;
1643             }
1644             fail(regex_constants::error_range, m_position - m_base);
1645             return;
1646          }
1647          return;
1648       }
1649       --m_position;
1650    }
1651    char_set.add_single(start_range);
1652 }
1653 
1654 template <class charT, class traits>
1655 digraph<charT> basic_regex_parser<charT, traits>::get_next_set_literal(basic_char_set<charT, traits>& char_set)
1656 {
1657    digraph<charT> result;
1658    switch(this->m_traits.syntax_type(*m_position))
1659    {
1660    case regex_constants::syntax_dash:
1661       if(!char_set.empty())
1662       {
1663          // see if we are at the end of the set:
1664          if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1665          {
1666             fail(regex_constants::error_range, m_position - m_base);
1667             return result;
1668          }
1669          --m_position;
1670       }
1671       result.first = *m_position++;
1672       return result;
1673    case regex_constants::syntax_escape:
1674       // check to see if escapes are supported first:
1675       if(this->flags() & regex_constants::no_escape_in_lists)
1676       {
1677          result = *m_position++;
1678          break;
1679       }
1680       ++m_position;
1681       result = unescape_character();
1682       break;
1683    case regex_constants::syntax_open_set:
1684    {
1685       if(m_end == ++m_position)
1686       {
1687          fail(regex_constants::error_collate, m_position - m_base);
1688          return result;
1689       }
1690       if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)
1691       {
1692          --m_position;
1693          result.first = *m_position;
1694          ++m_position;
1695          return result;
1696       }
1697       if(m_end == ++m_position)
1698       {
1699          fail(regex_constants::error_collate, m_position - m_base);
1700          return result;
1701       }
1702       const charT* name_first = m_position;
1703       // skip at least one character, then find the matching ':]'
1704       if(m_end == ++m_position)
1705       {
1706          fail(regex_constants::error_collate, name_first - m_base);
1707          return result;
1708       }
1709       while((m_position != m_end) 
1710          && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)) 
1711          ++m_position;
1712       const charT* name_last = m_position;
1713       if(m_end == m_position)
1714       {
1715          fail(regex_constants::error_collate, name_first - m_base);
1716          return result;
1717       }
1718       if((m_end == ++m_position) 
1719          || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set))
1720       {
1721          fail(regex_constants::error_collate, name_first - m_base);
1722          return result;
1723       }
1724       ++m_position;
1725       string_type s = this->m_traits.lookup_collatename(name_first, name_last);
1726       if(s.empty() || (s.size() > 2))
1727       {
1728          fail(regex_constants::error_collate, name_first - m_base);
1729          return result;
1730       }
1731       result.first = s[0];
1732       if(s.size() > 1)
1733          result.second = s[1];
1734       else
1735          result.second = 0;
1736       return result;
1737    }
1738    default:
1739       result = *m_position++;
1740    }
1741    return result;
1742 }
1743 
1744 //
1745 // does a value fit in the specified charT type?
1746 //
1747 template <class charT>
1748 bool valid_value(charT, boost::intmax_t v, const mpl::true_&)
1749 {
1750    return (v >> (sizeof(charT) * CHAR_BIT)) == 0;
1751 }
1752 template <class charT>
1753 bool valid_value(charT, boost::intmax_t, const mpl::false_&)
1754 {
1755    return true; // v will alsways fit in a charT
1756 }
1757 template <class charT>
1758 bool valid_value(charT c, boost::intmax_t v)
1759 {
1760    return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(boost::intmax_t))>());
1761 }
1762 
1763 template <class charT, class traits>
1764 charT basic_regex_parser<charT, traits>::unescape_character()
1765 {
1766 #ifdef BOOST_MSVC
1767 #pragma warning(push)
1768 #pragma warning(disable:4127)
1769 #endif
1770    charT result(0);
1771    if(m_position == m_end)
1772    {
1773       fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely.");
1774       return false;
1775    }
1776    switch(this->m_traits.escape_syntax_type(*m_position))
1777    {
1778    case regex_constants::escape_type_control_a:
1779       result = charT('\a');
1780       break;
1781    case regex_constants::escape_type_e:
1782       result = charT(27);
1783       break;
1784    case regex_constants::escape_type_control_f:
1785       result = charT('\f');
1786       break;
1787    case regex_constants::escape_type_control_n:
1788       result = charT('\n');
1789       break;
1790    case regex_constants::escape_type_control_r:
1791       result = charT('\r');
1792       break;
1793    case regex_constants::escape_type_control_t:
1794       result = charT('\t');
1795       break;
1796    case regex_constants::escape_type_control_v:
1797       result = charT('\v');
1798       break;
1799    case regex_constants::escape_type_word_assert:
1800       result = charT('\b');
1801       break;
1802    case regex_constants::escape_type_ascii_control:
1803       ++m_position;
1804       if(m_position == m_end)
1805       {
1806          // Rewind to start of escape:
1807          --m_position;
1808          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1809          fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely.");
1810          return result;
1811       }
1812       result = static_cast<charT>(*m_position % 32);
1813       break;
1814    case regex_constants::escape_type_hex:
1815       ++m_position;
1816       if(m_position == m_end)
1817       {
1818          // Rewind to start of escape:
1819          --m_position;
1820          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1821          fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely.");
1822          return result;
1823       }
1824       // maybe have \x{ddd}
1825       if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1826       {
1827          ++m_position;
1828          if(m_position == m_end)
1829          {
1830             // Rewind to start of escape:
1831             --m_position;
1832             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1833             fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence.");
1834             return result;
1835          }
1836          boost::intmax_t i = this->m_traits.toi(m_position, m_end, 16);
1837          if((m_position == m_end)
1838             || (i < 0)
1839             || ((std::numeric_limits<charT>::is_specialized) && (i > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
1840             || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1841          {
1842             // Rewind to start of escape:
1843             --m_position;
1844             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1845             fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid.");
1846             return result;
1847          }
1848          ++m_position;
1849          result = charT(i);
1850       }
1851       else
1852       {
1853          std::ptrdiff_t len = (std::min)(static_cast<std::ptrdiff_t>(2), static_cast<std::ptrdiff_t>(m_end - m_position));
1854          boost::intmax_t i = this->m_traits.toi(m_position, m_position + len, 16);
1855          if((i < 0)
1856             || !valid_value(charT(0), i))
1857          {
1858             // Rewind to start of escape:
1859             --m_position;
1860             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1861             fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character.");
1862             return result;
1863          }
1864          result = charT(i);
1865       }
1866       return result;
1867    case regex_constants::syntax_digit:
1868       {
1869       // an octal escape sequence, the first character must be a zero
1870       // followed by up to 3 octal digits:
1871       std::ptrdiff_t len = (std::min)(::boost::BOOST_REGEX_DETAIL_NS::distance(m_position, m_end), static_cast<std::ptrdiff_t>(4));
1872       const charT* bp = m_position;
1873       boost::intmax_t val = this->m_traits.toi(bp, bp + 1, 8);
1874       if(val != 0)
1875       {
1876          // Rewind to start of escape:
1877          --m_position;
1878          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1879          // Oops not an octal escape after all:
1880          fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence.");
1881          return result;
1882       }
1883       val = this->m_traits.toi(m_position, m_position + len, 8);
1884       if((val < 0) || (val > (boost::intmax_t)(std::numeric_limits<charT>::max)()))
1885       {
1886          // Rewind to start of escape:
1887          --m_position;
1888          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1889          fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid.");
1890          return result;
1891       }
1892       return static_cast<charT>(val);
1893       }
1894    case regex_constants::escape_type_named_char:
1895       {
1896          ++m_position;
1897          if(m_position == m_end)
1898          {
1899             // Rewind to start of escape:
1900             --m_position;
1901             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1902             fail(regex_constants::error_escape, m_position - m_base);
1903             return false;
1904          }
1905          // maybe have \N{name}
1906          if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace)
1907          {
1908             const charT* base = m_position;
1909             // skip forward until we find enclosing brace:
1910             while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace))
1911                ++m_position;
1912             if(m_position == m_end)
1913             {
1914                // Rewind to start of escape:
1915                --m_position;
1916                while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1917                fail(regex_constants::error_escape, m_position - m_base);
1918                return false;
1919             }
1920             string_type s = this->m_traits.lookup_collatename(++base, m_position++);
1921             if(s.empty())
1922             {
1923                // Rewind to start of escape:
1924                --m_position;
1925                while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1926                fail(regex_constants::error_collate, m_position - m_base);
1927                return false;
1928             }
1929             if(s.size() == 1)
1930             {
1931                return s[0];
1932             }
1933          }
1934          // fall through is a failure:
1935          // Rewind to start of escape:
1936          --m_position;
1937          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1938          fail(regex_constants::error_escape, m_position - m_base);
1939          return false;
1940       }
1941    default:
1942       result = *m_position;
1943       break;
1944    }
1945    ++m_position;
1946    return result;
1947 #ifdef BOOST_MSVC
1948 #pragma warning(pop)
1949 #endif
1950 }
1951 
1952 template <class charT, class traits>
1953 bool basic_regex_parser<charT, traits>::parse_backref()
1954 {
1955    BOOST_REGEX_ASSERT(m_position != m_end);
1956    const charT* pc = m_position;
1957    boost::intmax_t i = this->m_traits.toi(pc, pc + 1, 10);
1958    if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs)))
1959    {
1960       // not a backref at all but an octal escape sequence:
1961       charT c = unescape_character();
1962       this->append_literal(c);
1963    }
1964    else if((i > 0) && (this->m_backrefs.test(i)))
1965    {
1966       m_position = pc;
1967       re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_backref, sizeof(re_brace)));
1968       pb->index = i;
1969       pb->icase = this->flags() & regbase::icase;
1970    }
1971    else
1972    {
1973       // Rewind to start of escape:
1974       --m_position;
1975       while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
1976       fail(regex_constants::error_backref, m_position - m_base);
1977       return false;
1978    }
1979    return true;
1980 }
1981 
1982 template <class charT, class traits>
1983 bool basic_regex_parser<charT, traits>::parse_QE()
1984 {
1985 #ifdef BOOST_MSVC
1986 #pragma warning(push)
1987 #pragma warning(disable:4127)
1988 #endif
1989    //
1990    // parse a \Q...\E sequence:
1991    //
1992    ++m_position; // skip the Q
1993    const charT* start = m_position;
1994    const charT* end;
1995    do
1996    {
1997       while((m_position != m_end) 
1998          && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape))
1999          ++m_position;
2000       if(m_position == m_end)
2001       {
2002          //  a \Q...\E sequence may terminate with the end of the expression:
2003          end = m_position;
2004          break;  
2005       }
2006       if(++m_position == m_end) // skip the escape
2007       {
2008          fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence.");
2009          return false;
2010       }
2011       // check to see if it's a \E:
2012       if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E)
2013       {
2014          ++m_position;
2015          end = m_position - 2;
2016          break;
2017       }
2018       // otherwise go round again:
2019    }while(true);
2020    //
2021    // now add all the character between the two escapes as literals:
2022    //
2023    while(start != end)
2024    {
2025       this->append_literal(*start);
2026       ++start;
2027    }
2028    return true;
2029 #ifdef BOOST_MSVC
2030 #pragma warning(pop)
2031 #endif
2032 }
2033 
2034 template <class charT, class traits>
2035 bool basic_regex_parser<charT, traits>::parse_perl_extension()
2036 {
2037    if(++m_position == m_end)
2038    {
2039       // Rewind to start of (? sequence:
2040       --m_position;
2041       while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2042       fail(regex_constants::error_perl_extension, m_position - m_base);
2043       return false;
2044    }
2045    //
2046    // treat comments as a special case, as these
2047    // are the only ones that don't start with a leading
2048    // startmark state:
2049    //
2050    if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash)
2051    {
2052       while((m_position != m_end) 
2053          && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark))
2054       {}
2055       return true;
2056    }
2057    //
2058    // backup some state, and prepare the way:
2059    //
2060    int markid = 0;
2061    std::ptrdiff_t jump_offset = 0;
2062    re_brace* pb = static_cast<re_brace*>(this->append_state(syntax_element_startmark, sizeof(re_brace)));
2063    pb->icase = this->flags() & regbase::icase;
2064    std::ptrdiff_t last_paren_start = this->getoffset(pb);
2065    // back up insertion point for alternations, and set new point:
2066    std::ptrdiff_t last_alt_point = m_alt_insert_point;
2067    this->m_pdata->m_data.align();
2068    m_alt_insert_point = this->m_pdata->m_data.size();
2069    std::ptrdiff_t expected_alt_point = m_alt_insert_point;
2070    bool restore_flags = true;
2071    regex_constants::syntax_option_type old_flags = this->flags();
2072    bool old_case_change = m_has_case_change;
2073    m_has_case_change = false;
2074    charT name_delim;
2075    int mark_reset = m_mark_reset;
2076    int max_mark = m_max_mark;
2077    m_mark_reset = -1;
2078    m_max_mark = m_mark_count;
2079    boost::intmax_t v;
2080    //
2081    // select the actual extension used:
2082    //
2083    switch(this->m_traits.syntax_type(*m_position))
2084    {
2085    case regex_constants::syntax_or:
2086       m_mark_reset = m_mark_count;
2087       BOOST_FALLTHROUGH;
2088    case regex_constants::syntax_colon:
2089       //
2090       // a non-capturing mark:
2091       //
2092       pb->index = markid = 0;
2093       ++m_position;
2094       break;
2095    case regex_constants::syntax_digit:
2096       {
2097       //
2098       // a recursive subexpression:
2099       //
2100       v = this->m_traits.toi(m_position, m_end, 10);
2101       if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2102       {
2103          // Rewind to start of (? sequence:
2104          --m_position;
2105          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2106          fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated.");
2107          return false;
2108       }
2109 insert_recursion:
2110       pb->index = markid = 0;
2111       re_recurse* pr = static_cast<re_recurse*>(this->append_state(syntax_element_recurse, sizeof(re_recurse)));
2112       pr->alt.i = v;
2113       pr->state_id = 0;
2114       static_cast<re_case*>(
2115             this->append_state(syntax_element_toggle_case, sizeof(re_case))
2116             )->icase = this->flags() & regbase::icase;
2117       break;
2118       }
2119    case regex_constants::syntax_plus:
2120       //
2121       // A forward-relative recursive subexpression:
2122       //
2123       ++m_position;
2124       v = this->m_traits.toi(m_position, m_end, 10);
2125       if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2126       {
2127          // Rewind to start of (? sequence:
2128          --m_position;
2129          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2130          fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2131          return false;
2132       }
2133       if ((std::numeric_limits<boost::intmax_t>::max)() - m_mark_count < v)
2134       {
2135          fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2136          return false;
2137       }
2138       v += m_mark_count;
2139       goto insert_recursion;
2140    case regex_constants::syntax_dash:
2141       //
2142       // Possibly a backward-relative recursive subexpression:
2143       //
2144       ++m_position;
2145       v = this->m_traits.toi(m_position, m_end, 10);
2146       if(v <= 0)
2147       {
2148          --m_position;
2149          // Oops not a relative recursion at all, but a (?-imsx) group:
2150          goto option_group_jump;
2151       }
2152       v = static_cast<boost::intmax_t>(m_mark_count) + 1 - v;
2153       if(v <= 0)
2154       {
2155          // Rewind to start of (? sequence:
2156          --m_position;
2157          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2158          fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression.");
2159          return false;
2160       }
2161       goto insert_recursion;
2162    case regex_constants::syntax_equal:
2163       pb->index = markid = -1;
2164       ++m_position;
2165       jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2166       this->m_pdata->m_data.align();
2167       m_alt_insert_point = this->m_pdata->m_data.size();
2168       break;
2169    case regex_constants::syntax_not:
2170       pb->index = markid = -2;
2171       ++m_position;
2172       jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2173       this->m_pdata->m_data.align();
2174       m_alt_insert_point = this->m_pdata->m_data.size();
2175       break;
2176    case regex_constants::escape_type_left_word:
2177       {
2178          // a lookbehind assertion:
2179          if(++m_position == m_end)
2180          {
2181             // Rewind to start of (? sequence:
2182             --m_position;
2183             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2184             fail(regex_constants::error_perl_extension, m_position - m_base);
2185             return false;
2186          }
2187          regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position);
2188          if(t == regex_constants::syntax_not)
2189             pb->index = markid = -2;
2190          else if(t == regex_constants::syntax_equal)
2191             pb->index = markid = -1;
2192          else
2193          {
2194             // Probably a named capture which also starts (?< :
2195             name_delim = '>';
2196             --m_position;
2197             goto named_capture_jump;
2198          }
2199          ++m_position;
2200          jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2201          this->append_state(syntax_element_backstep, sizeof(re_brace));
2202          this->m_pdata->m_data.align();
2203          m_alt_insert_point = this->m_pdata->m_data.size();
2204          break;
2205       }
2206    case regex_constants::escape_type_right_word:
2207       //
2208       // an independent sub-expression:
2209       //
2210       pb->index = markid = -3;
2211       ++m_position;
2212       jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump)));
2213       this->m_pdata->m_data.align();
2214       m_alt_insert_point = this->m_pdata->m_data.size();
2215       break;
2216    case regex_constants::syntax_open_mark:
2217       {
2218       // a conditional expression:
2219       pb->index = markid = -4;
2220       if(++m_position == m_end)
2221       {
2222          // Rewind to start of (? sequence:
2223          --m_position;
2224          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2225          fail(regex_constants::error_perl_extension, m_position - m_base);
2226          return false;
2227       }
2228       v = this->m_traits.toi(m_position, m_end, 10);
2229       if(m_position == m_end)
2230       {
2231          // Rewind to start of (? sequence:
2232          --m_position;
2233          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2234          fail(regex_constants::error_perl_extension, m_position - m_base);
2235          return false;
2236       }
2237       if(*m_position == charT('R'))
2238       {
2239          if(++m_position == m_end)
2240          {
2241             // Rewind to start of (? sequence:
2242             --m_position;
2243             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2244             fail(regex_constants::error_perl_extension, m_position - m_base);
2245             return false;
2246          }
2247          if(*m_position == charT('&'))
2248          {
2249             const charT* base = ++m_position;
2250             while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2251                ++m_position;
2252             if(m_position == m_end)
2253             {
2254                // Rewind to start of (? sequence:
2255                --m_position;
2256                while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2257                fail(regex_constants::error_perl_extension, m_position - m_base);
2258                return false;
2259             }
2260             v = -static_cast<int>(hash_value_from_capture_name(base, m_position));
2261          }
2262          else
2263          {
2264             v = -this->m_traits.toi(m_position, m_end, 10);
2265          }
2266          re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2267          br->index = v < 0 ? (v - 1) : 0;
2268          if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2269          {
2270             // Rewind to start of (? sequence:
2271             --m_position;
2272             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2273             fail(regex_constants::error_perl_extension, m_position - m_base);
2274             return false;
2275          }
2276          if(++m_position == m_end)
2277          {
2278             // Rewind to start of (? sequence:
2279             --m_position;
2280             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2281             fail(regex_constants::error_perl_extension, m_position - m_base);
2282             return false;
2283          }
2284       }
2285       else if((*m_position == charT('\'')) || (*m_position == charT('<')))
2286       {
2287          const charT* base = ++m_position;
2288          while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\'')))
2289             ++m_position;
2290          if(m_position == m_end)
2291          {
2292             // Rewind to start of (? sequence:
2293             --m_position;
2294             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2295             fail(regex_constants::error_perl_extension, m_position - m_base);
2296             return false;
2297          }
2298          v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2299          re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2300          br->index = v;
2301          if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end))
2302          {
2303             // Rewind to start of (? sequence:
2304             --m_position;
2305             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2306             fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture.");
2307             return false;
2308          }
2309          if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2310          {
2311             // Rewind to start of (? sequence:
2312             --m_position;
2313             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2314             fail(regex_constants::error_perl_extension, m_position - m_base);
2315             return false;
2316          }
2317          if(++m_position == m_end)
2318          {
2319             // Rewind to start of (? sequence:
2320             --m_position;
2321             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2322             fail(regex_constants::error_perl_extension, m_position - m_base);
2323             return false;
2324          }
2325       }
2326       else if(*m_position == charT('D'))
2327       {
2328          const char* def = "DEFINE";
2329          while(*def && (m_position != m_end) && (*m_position == charT(*def)))
2330             ++m_position, ++def;
2331          if((m_position == m_end) || *def)
2332          {
2333             // Rewind to start of (? sequence:
2334             --m_position;
2335             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2336             fail(regex_constants::error_perl_extension, m_position - m_base);
2337             return false;
2338          }
2339          re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2340          br->index = 9999; // special magic value!
2341          if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2342          {
2343             // Rewind to start of (? sequence:
2344             --m_position;
2345             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2346             fail(regex_constants::error_perl_extension, m_position - m_base);
2347             return false;
2348          }
2349          if(++m_position == m_end)
2350          {
2351             // Rewind to start of (? sequence:
2352             --m_position;
2353             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2354             fail(regex_constants::error_perl_extension, m_position - m_base);
2355             return false;
2356          }
2357       }
2358       else if(v > 0)
2359       {
2360          re_brace* br = static_cast<re_brace*>(this->append_state(syntax_element_assert_backref, sizeof(re_brace)));
2361          br->index = v;
2362          if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2363          {
2364             // Rewind to start of (? sequence:
2365             --m_position;
2366             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2367             fail(regex_constants::error_perl_extension, m_position - m_base);
2368             return false;
2369          }
2370          if(++m_position == m_end)
2371          {
2372             // Rewind to start of (? sequence:
2373             --m_position;
2374             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2375             fail(regex_constants::error_perl_extension, m_position - m_base);
2376             return false;
2377          }
2378       }
2379       else
2380       {
2381          // verify that we have a lookahead or lookbehind assert:
2382          if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question)
2383          {
2384             // Rewind to start of (? sequence:
2385             --m_position;
2386             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2387             fail(regex_constants::error_perl_extension, m_position - m_base);
2388             return false;
2389          }
2390          if(++m_position == m_end)
2391          {
2392             // Rewind to start of (? sequence:
2393             --m_position;
2394             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2395             fail(regex_constants::error_perl_extension, m_position - m_base);
2396             return false;
2397          }
2398          if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word)
2399          {
2400             if(++m_position == m_end)
2401             {
2402                // Rewind to start of (? sequence:
2403                --m_position;
2404                while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2405                fail(regex_constants::error_perl_extension, m_position - m_base);
2406                return false;
2407             }
2408             if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2409                && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2410             {
2411                // Rewind to start of (? sequence:
2412                --m_position;
2413                while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2414                fail(regex_constants::error_perl_extension, m_position - m_base);
2415                return false;
2416             }
2417             m_position -= 3;
2418          }
2419          else
2420          {
2421             if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)
2422                && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not))
2423             {
2424                // Rewind to start of (? sequence:
2425                --m_position;
2426                while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2427                fail(regex_constants::error_perl_extension, m_position - m_base);
2428                return false;
2429             }
2430             m_position -= 2;
2431          }
2432       }
2433       break;
2434       }
2435    case regex_constants::syntax_close_mark:
2436       // Rewind to start of (? sequence:
2437       --m_position;
2438       while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2439       fail(regex_constants::error_perl_extension, m_position - m_base);
2440       return false;
2441    case regex_constants::escape_type_end_buffer:
2442       {
2443       name_delim = *m_position;
2444 named_capture_jump:
2445       markid = 0;
2446       if(0 == (this->flags() & regbase::nosubs))
2447       {
2448          markid = ++m_mark_count;
2449    #ifndef BOOST_NO_STD_DISTANCE
2450          if(this->flags() & regbase::save_subexpression_location)
2451             this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>(std::distance(m_base, m_position) - 2, 0));
2452    #else
2453          if(this->flags() & regbase::save_subexpression_location)
2454             this->m_pdata->m_subs.push_back(std::pair<std::size_t, std::size_t>((m_position - m_base) - 2, 0));
2455    #endif
2456       }
2457       pb->index = markid;
2458       const charT* base = ++m_position;
2459       if(m_position == m_end)
2460       {
2461          // Rewind to start of (? sequence:
2462          --m_position;
2463          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2464          fail(regex_constants::error_perl_extension, m_position - m_base);
2465          return false;
2466       }
2467       while((m_position != m_end) && (*m_position != name_delim))
2468          ++m_position;
2469       if(m_position == m_end)
2470       {
2471          // Rewind to start of (? sequence:
2472          --m_position;
2473          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2474          fail(regex_constants::error_perl_extension, m_position - m_base);
2475          return false;
2476       }
2477       this->m_pdata->set_name(base, m_position, markid);
2478       ++m_position;
2479       break;
2480       }
2481    default:
2482       if(*m_position == charT('R'))
2483       {
2484          ++m_position;
2485          v = 0;
2486          if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)
2487          {
2488             // Rewind to start of (? sequence:
2489             --m_position;
2490             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2491             fail(regex_constants::error_perl_extension, m_position - m_base);
2492             return false;
2493          }
2494          goto insert_recursion;
2495       }
2496       if(*m_position == charT('&'))
2497       {
2498          ++m_position;
2499          const charT* base = m_position;
2500          while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2501             ++m_position;
2502          if(m_position == m_end)
2503          {
2504             // Rewind to start of (? sequence:
2505             --m_position;
2506             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2507             fail(regex_constants::error_perl_extension, m_position - m_base);
2508             return false;
2509          }
2510          v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2511          goto insert_recursion;
2512       }
2513       if(*m_position == charT('P'))
2514       {
2515          ++m_position;
2516          if(m_position == m_end)
2517          {
2518             // Rewind to start of (? sequence:
2519             --m_position;
2520             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2521             fail(regex_constants::error_perl_extension, m_position - m_base);
2522             return false;
2523          }
2524          if(*m_position == charT('>'))
2525          {
2526             ++m_position;
2527             const charT* base = m_position;
2528             while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2529                ++m_position;
2530             if(m_position == m_end)
2531             {
2532                // Rewind to start of (? sequence:
2533                --m_position;
2534                while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2535                fail(regex_constants::error_perl_extension, m_position - m_base);
2536                return false;
2537             }
2538             v = static_cast<int>(hash_value_from_capture_name(base, m_position));
2539             goto insert_recursion;
2540          }
2541       }
2542       //
2543       // lets assume that we have a (?imsx) group and try and parse it:
2544       //
2545 option_group_jump:
2546       regex_constants::syntax_option_type opts = parse_options();
2547       if(m_position == m_end)
2548       {
2549          // Rewind to start of (? sequence:
2550          --m_position;
2551          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2552          fail(regex_constants::error_perl_extension, m_position - m_base);
2553          return false;
2554       }
2555       // make a note of whether we have a case change:
2556       m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase));
2557       pb->index = markid = 0;
2558       if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark)
2559       {
2560          // update flags and carry on as normal:
2561          this->flags(opts);
2562          restore_flags = false;
2563          old_case_change |= m_has_case_change; // defer end of scope by one ')'
2564       }
2565       else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon)
2566       {
2567          // update flags and carry on until the matching ')' is found:
2568          this->flags(opts);
2569          ++m_position;
2570       }
2571       else
2572       {
2573          // Rewind to start of (? sequence:
2574          --m_position;
2575          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2576          fail(regex_constants::error_perl_extension, m_position - m_base);
2577          return false;
2578       }
2579 
2580       // finally append a case change state if we need it:
2581       if(m_has_case_change)
2582       {
2583          static_cast<re_case*>(
2584             this->append_state(syntax_element_toggle_case, sizeof(re_case))
2585             )->icase = opts & regbase::icase;
2586       }
2587 
2588    }
2589    //
2590    // now recursively add more states, this will terminate when we get to a
2591    // matching ')' :
2592    //
2593    parse_all();
2594    //
2595    // Unwind alternatives:
2596    //
2597    if(0 == unwind_alts(last_paren_start))
2598    {
2599       // Rewind to start of (? sequence:
2600       --m_position;
2601       while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2602       fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block.");
2603       return false;
2604    }
2605    //
2606    // we either have a ')' or we have run out of characters prematurely:
2607    //
2608    if(m_position == m_end)
2609    {
2610       // Rewind to start of (? sequence:
2611       --m_position;
2612       while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2613       this->fail(regex_constants::error_paren, ::boost::BOOST_REGEX_DETAIL_NS::distance(m_base, m_end));
2614       return false;
2615    }
2616    BOOST_REGEX_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark);
2617    ++m_position;
2618    //
2619    // restore the flags:
2620    //
2621    if(restore_flags)
2622    {
2623       // append a case change state if we need it:
2624       if(m_has_case_change)
2625       {
2626          static_cast<re_case*>(
2627             this->append_state(syntax_element_toggle_case, sizeof(re_case))
2628             )->icase = old_flags & regbase::icase;
2629       }
2630       this->flags(old_flags);
2631    }
2632    //
2633    // set up the jump pointer if we have one:
2634    //
2635    if(jump_offset)
2636    {
2637       this->m_pdata->m_data.align();
2638       re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
2639       jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp);
2640       if((this->m_last_state == jmp) && (markid != -2))
2641       {
2642          // Oops... we didn't have anything inside the assertion.
2643          // Note we don't get here for negated forward lookahead as (?!)
2644          // does have some uses.
2645          // Rewind to start of (? sequence:
2646          --m_position;
2647          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2648          fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion.");
2649          return false;
2650       }
2651    }
2652    //
2653    // verify that if this is conditional expression, that we do have
2654    // an alternative, if not add one:
2655    //
2656    if(markid == -4)
2657    {
2658       re_syntax_base* b = this->getaddress(expected_alt_point);
2659       // Make sure we have exactly one alternative following this state:
2660       if(b->type != syntax_element_alt)
2661       {
2662          re_alt* alt = static_cast<re_alt*>(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt)));
2663          alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt);
2664       }
2665       else if(((std::ptrdiff_t)this->m_pdata->m_data.size() > (static_cast<re_alt*>(b)->alt.i + this->getoffset(b))) && (static_cast<re_alt*>(b)->alt.i > 0) && this->getaddress(static_cast<re_alt*>(b)->alt.i, b)->type == syntax_element_alt)
2666       {
2667          // Can't have seen more than one alternative:
2668          // Rewind to start of (? sequence:
2669          --m_position;
2670          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2671          fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression.");
2672          return false;
2673       }
2674       else
2675       {
2676          // We must *not* have seen an alternative inside a (DEFINE) block:
2677          b = this->getaddress(b->next.i, b);
2678          if((b->type == syntax_element_assert_backref) && (static_cast<re_brace*>(b)->index == 9999))
2679          {
2680             // Rewind to start of (? sequence:
2681             --m_position;
2682             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2683             fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block.");
2684             return false;
2685          }
2686       }
2687       // check for invalid repetition of next state:
2688       b = this->getaddress(expected_alt_point);
2689       b = this->getaddress(static_cast<re_alt*>(b)->next.i, b);
2690       if((b->type != syntax_element_assert_backref)
2691          && (b->type != syntax_element_startmark))
2692       {
2693          // Rewind to start of (? sequence:
2694          --m_position;
2695          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2696          fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion.");
2697          return false;
2698       }
2699    }
2700    //
2701    // append closing parenthesis state:
2702    //
2703    pb = static_cast<re_brace*>(this->append_state(syntax_element_endmark, sizeof(re_brace)));
2704    pb->index = markid;
2705    pb->icase = this->flags() & regbase::icase;
2706    this->m_paren_start = last_paren_start;
2707    //
2708    // restore the alternate insertion point:
2709    //
2710    this->m_alt_insert_point = last_alt_point;
2711    //
2712    // and the case change data:
2713    //
2714    m_has_case_change = old_case_change;
2715    //
2716    // And the mark_reset data:
2717    //
2718    if(m_max_mark > m_mark_count)
2719    {
2720       m_mark_count = m_max_mark;
2721    }
2722    m_mark_reset = mark_reset;
2723    m_max_mark = max_mark;
2724 
2725 
2726    if(markid > 0)
2727    {
2728 #ifndef BOOST_NO_STD_DISTANCE
2729       if(this->flags() & regbase::save_subexpression_location)
2730          this->m_pdata->m_subs.at((std::size_t)markid - 1).second = std::distance(m_base, m_position) - 1;
2731 #else
2732       if(this->flags() & regbase::save_subexpression_location)
2733          this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1;
2734 #endif
2735       //
2736       // allow backrefs to this mark:
2737       //
2738       this->m_backrefs.set(markid);
2739    }
2740    return true;
2741 }
2742 
2743 template <class charT, class traits>
2744 bool basic_regex_parser<charT, traits>::match_verb(const char* verb)
2745 {
2746    while(*verb)
2747    {
2748       if(static_cast<charT>(*verb) != *m_position)
2749       {
2750          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2751          fail(regex_constants::error_perl_extension, m_position - m_base);
2752          return false;
2753       }
2754       if(++m_position == m_end)
2755       {
2756          --m_position;
2757          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2758          fail(regex_constants::error_perl_extension, m_position - m_base);
2759          return false;
2760       }
2761       ++verb;
2762    }
2763    return true;
2764 }
2765 
2766 #ifdef BOOST_MSVC
2767 #  pragma warning(push)
2768 #if BOOST_MSVC >= 1800
2769 #pragma warning(disable:26812)
2770 #endif
2771 #endif
2772 template <class charT, class traits>
2773 bool basic_regex_parser<charT, traits>::parse_perl_verb()
2774 {
2775    if(++m_position == m_end)
2776    {
2777       // Rewind to start of (* sequence:
2778       --m_position;
2779       while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2780       fail(regex_constants::error_perl_extension, m_position - m_base);
2781       return false;
2782    }
2783    switch(*m_position)
2784    {
2785    case 'F':
2786       if(++m_position == m_end)
2787       {
2788          // Rewind to start of (* sequence:
2789          --m_position;
2790          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2791          fail(regex_constants::error_perl_extension, m_position - m_base);
2792          return false;
2793       }
2794       if((this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) || match_verb("AIL"))
2795       {
2796          if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2797          {
2798             // Rewind to start of (* sequence:
2799             --m_position;
2800             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2801             fail(regex_constants::error_perl_extension, m_position - m_base);
2802             return false;
2803          }
2804          ++m_position;
2805          this->append_state(syntax_element_fail);
2806          return true;
2807       }
2808       break;
2809    case 'A':
2810       if(++m_position == m_end)
2811       {
2812          // Rewind to start of (* sequence:
2813          --m_position;
2814          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2815          fail(regex_constants::error_perl_extension, m_position - m_base);
2816          return false;
2817       }
2818       if(match_verb("CCEPT"))
2819       {
2820          if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2821          {
2822             // Rewind to start of (* sequence:
2823             --m_position;
2824             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2825             fail(regex_constants::error_perl_extension, m_position - m_base);
2826             return false;
2827          }
2828          ++m_position;
2829          this->append_state(syntax_element_accept);
2830          return true;
2831       }
2832       break;
2833    case 'C':
2834       if(++m_position == m_end)
2835       {
2836          // Rewind to start of (* sequence:
2837          --m_position;
2838          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2839          fail(regex_constants::error_perl_extension, m_position - m_base);
2840          return false;
2841       }
2842       if(match_verb("OMMIT"))
2843       {
2844          if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2845          {
2846             // Rewind to start of (* sequence:
2847             --m_position;
2848             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2849             fail(regex_constants::error_perl_extension, m_position - m_base);
2850             return false;
2851          }
2852          ++m_position;
2853          static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_commit;
2854          this->m_pdata->m_disable_match_any = true;
2855          return true;
2856       }
2857       break;
2858    case 'P':
2859       if(++m_position == m_end)
2860       {
2861          // Rewind to start of (* sequence:
2862          --m_position;
2863          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2864          fail(regex_constants::error_perl_extension, m_position - m_base);
2865          return false;
2866       }
2867       if(match_verb("RUNE"))
2868       {
2869          if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2870          {
2871             // Rewind to start of (* sequence:
2872             --m_position;
2873             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2874             fail(regex_constants::error_perl_extension, m_position - m_base);
2875             return false;
2876          }
2877          ++m_position;
2878          static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_prune;
2879          this->m_pdata->m_disable_match_any = true;
2880          return true;
2881       }
2882       break;
2883    case 'S':
2884       if(++m_position == m_end)
2885       {
2886          // Rewind to start of (* sequence:
2887          --m_position;
2888          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2889          fail(regex_constants::error_perl_extension, m_position - m_base);
2890          return false;
2891       }
2892       if(match_verb("KIP"))
2893       {
2894          if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2895          {
2896             // Rewind to start of (* sequence:
2897             --m_position;
2898             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2899             fail(regex_constants::error_perl_extension, m_position - m_base);
2900             return false;
2901          }
2902          ++m_position;
2903          static_cast<re_commit*>(this->append_state(syntax_element_commit, sizeof(re_commit)))->action = commit_skip;
2904          this->m_pdata->m_disable_match_any = true;
2905          return true;
2906       }
2907       break;
2908    case 'T':
2909       if(++m_position == m_end)
2910       {
2911          // Rewind to start of (* sequence:
2912          --m_position;
2913          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2914          fail(regex_constants::error_perl_extension, m_position - m_base);
2915          return false;
2916       }
2917       if(match_verb("HEN"))
2918       {
2919          if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark))
2920          {
2921             // Rewind to start of (* sequence:
2922             --m_position;
2923             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2924             fail(regex_constants::error_perl_extension, m_position - m_base);
2925             return false;
2926          }
2927          ++m_position;
2928          this->append_state(syntax_element_then);
2929          this->m_pdata->m_disable_match_any = true;
2930          return true;
2931       }
2932       break;
2933    }
2934    // Rewind to start of (* sequence:
2935    --m_position;
2936    while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
2937    fail(regex_constants::error_perl_extension, m_position - m_base);
2938    return false;
2939 }
2940 #ifdef BOOST_MSVC
2941 #  pragma warning(pop)
2942 #endif
2943 
2944 template <class charT, class traits>
2945 bool basic_regex_parser<charT, traits>::add_emacs_code(bool negate)
2946 {
2947    //
2948    // parses an emacs style \sx or \Sx construct.
2949    //
2950    if(++m_position == m_end)
2951    {
2952       // Rewind to start of sequence:
2953       --m_position;
2954       while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position;
2955       fail(regex_constants::error_escape, m_position - m_base);
2956       return false;
2957    }
2958    basic_char_set<charT, traits> char_set;
2959    if(negate)
2960       char_set.negate();
2961 
2962    static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', };
2963 
2964    switch(*m_position)
2965    {
2966    case 's':
2967    case ' ':
2968       char_set.add_class(this->m_mask_space);
2969       break;
2970    case 'w':
2971       char_set.add_class(this->m_word_mask);
2972       break;
2973    case '_':
2974       char_set.add_single(digraph<charT>(charT('$'))); 
2975       char_set.add_single(digraph<charT>(charT('&'))); 
2976       char_set.add_single(digraph<charT>(charT('*'))); 
2977       char_set.add_single(digraph<charT>(charT('+'))); 
2978       char_set.add_single(digraph<charT>(charT('-'))); 
2979       char_set.add_single(digraph<charT>(charT('_'))); 
2980       char_set.add_single(digraph<charT>(charT('<'))); 
2981       char_set.add_single(digraph<charT>(charT('>'))); 
2982       break;
2983    case '.':
2984       char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5));
2985       break;
2986    case '(':
2987       char_set.add_single(digraph<charT>(charT('('))); 
2988       char_set.add_single(digraph<charT>(charT('['))); 
2989       char_set.add_single(digraph<charT>(charT('{'))); 
2990       break;
2991    case ')':
2992       char_set.add_single(digraph<charT>(charT(')'))); 
2993       char_set.add_single(digraph<charT>(charT(']'))); 
2994       char_set.add_single(digraph<charT>(charT('}'))); 
2995       break;
2996    case '"':
2997       char_set.add_single(digraph<charT>(charT('"'))); 
2998       char_set.add_single(digraph<charT>(charT('\''))); 
2999       char_set.add_single(digraph<charT>(charT('`'))); 
3000       break;
3001    case '\'':
3002       char_set.add_single(digraph<charT>(charT('\''))); 
3003       char_set.add_single(digraph<charT>(charT(','))); 
3004       char_set.add_single(digraph<charT>(charT('#'))); 
3005       break;
3006    case '<':
3007       char_set.add_single(digraph<charT>(charT(';'))); 
3008       break;
3009    case '>':
3010       char_set.add_single(digraph<charT>(charT('\n'))); 
3011       char_set.add_single(digraph<charT>(charT('\f'))); 
3012       break;
3013    default:
3014       fail(regex_constants::error_ctype, m_position - m_base);
3015       return false;
3016    }
3017    if(0 == this->append_set(char_set))
3018    {
3019       fail(regex_constants::error_ctype, m_position - m_base);
3020       return false;
3021    }
3022    ++m_position;
3023    return true;
3024 }
3025 
3026 template <class charT, class traits>
3027 regex_constants::syntax_option_type basic_regex_parser<charT, traits>::parse_options()
3028 {
3029    // we have a (?imsx-imsx) group, convert it into a set of flags:
3030    regex_constants::syntax_option_type f = this->flags();
3031    bool breakout = false;
3032    do
3033    {
3034       switch(*m_position)
3035       {
3036       case 's':
3037          f |= regex_constants::mod_s;
3038          f &= ~regex_constants::no_mod_s;
3039          break;
3040       case 'm':
3041          f &= ~regex_constants::no_mod_m;
3042          break;
3043       case 'i':
3044          f |= regex_constants::icase;
3045          break;
3046       case 'x':
3047          f |= regex_constants::mod_x;
3048          break;
3049       default:
3050          breakout = true;
3051          continue;
3052       }
3053       if(++m_position == m_end)
3054       {
3055          // Rewind to start of (? sequence:
3056          --m_position;
3057          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3058          fail(regex_constants::error_paren, m_position - m_base);
3059          return false;
3060       }
3061    }
3062    while(!breakout);
3063    
3064    breakout = false;
3065 
3066    if(*m_position == static_cast<charT>('-'))
3067    {
3068       if(++m_position == m_end)
3069       {
3070          // Rewind to start of (? sequence:
3071          --m_position;
3072          while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3073          fail(regex_constants::error_paren, m_position - m_base);
3074          return false;
3075       }
3076       do
3077       {
3078          switch(*m_position)
3079          {
3080          case 's':
3081             f &= ~regex_constants::mod_s;
3082             f |= regex_constants::no_mod_s;
3083             break;
3084          case 'm':
3085             f |= regex_constants::no_mod_m;
3086             break;
3087          case 'i':
3088             f &= ~regex_constants::icase;
3089             break;
3090          case 'x':
3091             f &= ~regex_constants::mod_x;
3092             break;
3093          default:
3094             breakout = true;
3095             continue;
3096          }
3097          if(++m_position == m_end)
3098          {
3099             // Rewind to start of (? sequence:
3100             --m_position;
3101             while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position;
3102             fail(regex_constants::error_paren, m_position - m_base);
3103             return false;
3104          }
3105       }
3106       while(!breakout);
3107    }
3108    return f;
3109 }
3110 
3111 template <class charT, class traits>
3112 bool basic_regex_parser<charT, traits>::unwind_alts(std::ptrdiff_t last_paren_start)
3113 {
3114    //
3115    // If we didn't actually add any states after the last 
3116    // alternative then that's an error:
3117    //
3118    if((this->m_alt_insert_point == static_cast<std::ptrdiff_t>(this->m_pdata->m_data.size()))
3119       && (!m_alt_jumps.empty()) && (m_alt_jumps.back() > last_paren_start)
3120       &&
3121       !(
3122          ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group)
3123            &&
3124          ((this->flags() & regbase::no_empty_expressions) == 0)
3125         )
3126       )
3127    {
3128       fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |.");
3129       return false;
3130    }
3131    // 
3132    // Fix up our alternatives:
3133    //
3134    while((!m_alt_jumps.empty()) && (m_alt_jumps.back() > last_paren_start))
3135    {
3136       //
3137       // fix up the jump to point to the end of the states
3138       // that we've just added:
3139       //
3140       std::ptrdiff_t jump_offset = m_alt_jumps.back();
3141       m_alt_jumps.pop_back();
3142       this->m_pdata->m_data.align();
3143       re_jump* jmp = static_cast<re_jump*>(this->getaddress(jump_offset));
3144       if (jmp->type != syntax_element_jump)
3145       {
3146          // Something really bad happened, this used to be an assert, 
3147          // but we'll make it an error just in case we should ever get here.
3148          fail(regex_constants::error_unknown, this->m_position - this->m_base, "Internal logic failed while compiling the expression, probably you added a repeat to something non-repeatable!");
3149          return false;
3150       }
3151       jmp->alt.i = this->m_pdata->m_data.size() - jump_offset;
3152    }
3153    return true;
3154 }
3155 
3156 #ifdef BOOST_MSVC
3157 #pragma warning(pop)
3158 #endif
3159 
3160 } // namespace BOOST_REGEX_DETAIL_NS
3161 } // namespace boost
3162 
3163 #ifdef BOOST_MSVC
3164 #pragma warning(push)
3165 #pragma warning(disable: 4103)
3166 #endif
3167 #ifdef BOOST_HAS_ABI_HEADERS
3168 #  include BOOST_ABI_SUFFIX
3169 #endif
3170 #ifdef BOOST_MSVC
3171 #pragma warning(pop)
3172 #endif
3173 
3174 #endif