|
||||
File indexing completed on 2025-01-30 10:02:37
0001 /////////////////////////////////////////////////////////////////////////////// 0002 /// \file regex_primitives.hpp 0003 /// Contains the syntax elements for writing static regular expressions. 0004 // 0005 // Copyright 2008 Eric Niebler. Distributed under the Boost 0006 // Software License, Version 1.0. (See accompanying file 0007 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 0008 0009 #ifndef BOOST_XPRESSIVE_REGEX_PRIMITIVES_HPP_EAN_10_04_2005 0010 #define BOOST_XPRESSIVE_REGEX_PRIMITIVES_HPP_EAN_10_04_2005 0011 0012 #include <vector> 0013 #include <climits> 0014 #include <boost/config.hpp> 0015 #include <boost/assert.hpp> 0016 #include <boost/mpl/if.hpp> 0017 #include <boost/mpl/and.hpp> 0018 #include <boost/mpl/assert.hpp> 0019 #include <boost/detail/workaround.hpp> 0020 #include <boost/preprocessor/cat.hpp> 0021 #include <boost/xpressive/detail/detail_fwd.hpp> 0022 #include <boost/xpressive/detail/core/matchers.hpp> 0023 #include <boost/xpressive/detail/core/regex_domain.hpp> 0024 #include <boost/xpressive/detail/utility/ignore_unused.hpp> 0025 0026 // Doxygen can't handle proto :-( 0027 #ifndef BOOST_XPRESSIVE_DOXYGEN_INVOKED 0028 # include <boost/proto/core.hpp> 0029 # include <boost/proto/transform/arg.hpp> 0030 # include <boost/proto/transform/when.hpp> 0031 # include <boost/xpressive/detail/core/icase.hpp> 0032 # include <boost/xpressive/detail/static/compile.hpp> 0033 # include <boost/xpressive/detail/static/modifier.hpp> 0034 #endif 0035 0036 namespace boost { namespace xpressive { namespace detail 0037 { 0038 0039 typedef assert_word_placeholder<word_boundary<mpl::true_> > assert_word_boundary; 0040 typedef assert_word_placeholder<word_begin> assert_word_begin; 0041 typedef assert_word_placeholder<word_end> assert_word_end; 0042 0043 // workaround msvc-7.1 bug with function pointer types 0044 // within function types: 0045 #if BOOST_WORKAROUND(BOOST_MSVC, == 1310) 0046 #define mark_number(x) proto::call<mark_number(x)> 0047 #define minus_one() proto::make<minus_one()> 0048 #endif 0049 0050 struct push_back : proto::callable 0051 { 0052 typedef int result_type; 0053 0054 template<typename Subs> 0055 int operator ()(Subs &subs, int i) const 0056 { 0057 subs.push_back(i); 0058 return i; 0059 } 0060 }; 0061 0062 struct mark_number : proto::callable 0063 { 0064 typedef int result_type; 0065 0066 template<typename Expr> 0067 int operator ()(Expr const &expr) const 0068 { 0069 return expr.mark_number_; 0070 } 0071 }; 0072 0073 typedef mpl::int_<-1> minus_one; 0074 0075 // s1 or -s1 0076 struct SubMatch 0077 : proto::or_< 0078 proto::when<basic_mark_tag, push_back(proto::_data, mark_number(proto::_value)) > 0079 , proto::when<proto::negate<basic_mark_tag>, push_back(proto::_data, minus_one()) > 0080 > 0081 {}; 0082 0083 struct SubMatchList 0084 : proto::or_<SubMatch, proto::comma<SubMatchList, SubMatch> > 0085 {}; 0086 0087 template<typename Subs> 0088 typename enable_if< 0089 mpl::and_<proto::is_expr<Subs>, proto::matches<Subs, SubMatchList> > 0090 , std::vector<int> 0091 >::type 0092 to_vector(Subs const &subs) 0093 { 0094 std::vector<int> subs_; 0095 SubMatchList()(subs, 0, subs_); 0096 return subs_; 0097 } 0098 0099 #if BOOST_WORKAROUND(BOOST_MSVC, == 1310) 0100 #undef mark_number 0101 #undef minus_one 0102 #endif 0103 0104 // replace "Expr" with "keep(*State) >> Expr" 0105 struct skip_primitives : proto::transform<skip_primitives> 0106 { 0107 template<typename Expr, typename State, typename Data> 0108 struct impl : proto::transform_impl<Expr, State, Data> 0109 { 0110 typedef 0111 typename proto::shift_right< 0112 typename proto::unary_expr< 0113 keeper_tag 0114 , typename proto::dereference<State>::type 0115 >::type 0116 , Expr 0117 >::type 0118 result_type; 0119 0120 result_type operator ()( 0121 typename impl::expr_param expr 0122 , typename impl::state_param state 0123 , typename impl::data_param 0124 ) const 0125 { 0126 result_type that = {{{state}}, expr}; 0127 return that; 0128 } 0129 }; 0130 }; 0131 0132 struct Primitives 0133 : proto::or_< 0134 proto::terminal<proto::_> 0135 , proto::comma<proto::_, proto::_> 0136 , proto::subscript<proto::terminal<set_initializer>, proto::_> 0137 , proto::assign<proto::terminal<set_initializer>, proto::_> 0138 , proto::assign<proto::terminal<attribute_placeholder<proto::_> >, proto::_> 0139 , proto::complement<Primitives> 0140 > 0141 {}; 0142 0143 struct SkipGrammar 0144 : proto::or_< 0145 proto::when<Primitives, skip_primitives> 0146 , proto::assign<proto::terminal<mark_placeholder>, SkipGrammar> // don't "skip" mark tags 0147 , proto::subscript<SkipGrammar, proto::_> // don't put skips in actions 0148 , proto::binary_expr<modifier_tag, proto::_, SkipGrammar> // don't skip modifiers 0149 , proto::unary_expr<lookbehind_tag, proto::_> // don't skip lookbehinds 0150 , proto::nary_expr<proto::_, proto::vararg<SkipGrammar> > // everything else is fair game! 0151 > 0152 {}; 0153 0154 template<typename Skip> 0155 struct skip_directive 0156 { 0157 typedef typename proto::result_of::as_expr<Skip>::type skip_type; 0158 0159 skip_directive(Skip const &skip) 0160 : skip_(proto::as_expr(skip)) 0161 {} 0162 0163 template<typename Sig> 0164 struct result {}; 0165 0166 template<typename This, typename Expr> 0167 struct result<This(Expr)> 0168 { 0169 typedef 0170 SkipGrammar::impl< 0171 typename proto::result_of::as_expr<Expr>::type 0172 , skip_type const & 0173 , mpl::void_ & 0174 > 0175 skip_transform; 0176 0177 typedef 0178 typename proto::shift_right< 0179 typename skip_transform::result_type 0180 , typename proto::dereference<skip_type>::type 0181 >::type 0182 type; 0183 }; 0184 0185 template<typename Expr> 0186 typename result<skip_directive(Expr)>::type 0187 operator ()(Expr const &expr) const 0188 { 0189 mpl::void_ ignore; 0190 typedef result<skip_directive(Expr)> result_fun; 0191 typename result_fun::type that = { 0192 typename result_fun::skip_transform()(proto::as_expr(expr), this->skip_, ignore) 0193 , {skip_} 0194 }; 0195 return that; 0196 } 0197 0198 private: 0199 skip_type skip_; 0200 }; 0201 0202 /* 0203 /////////////////////////////////////////////////////////////////////////////// 0204 /// INTERNAL ONLY 0205 // BOOST_XPRESSIVE_GLOBAL 0206 // for defining globals that neither violate the One Definition Rule nor 0207 // lead to undefined behavior due to global object initialization order. 0208 //#define BOOST_XPRESSIVE_GLOBAL(type, name, init) \ 0209 // namespace detail \ 0210 // { \ 0211 // template<int Dummy> \ 0212 // struct BOOST_PP_CAT(global_pod_, name) \ 0213 // { \ 0214 // static type const value; \ 0215 // private: \ 0216 // union type_must_be_pod \ 0217 // { \ 0218 // type t; \ 0219 // char ch; \ 0220 // } u; \ 0221 // }; \ 0222 // template<int Dummy> \ 0223 // type const BOOST_PP_CAT(global_pod_, name)<Dummy>::value = init; \ 0224 // } \ 0225 // type const &name = detail::BOOST_PP_CAT(global_pod_, name)<0>::value 0226 */ 0227 0228 0229 } // namespace detail 0230 0231 /// INTERNAL ONLY (for backwards compatibility) 0232 unsigned int const repeat_max = UINT_MAX-1; 0233 0234 /////////////////////////////////////////////////////////////////////////////// 0235 /// \brief For infinite repetition of a sub-expression. 0236 /// 0237 /// Magic value used with the repeat\<\>() function template 0238 /// to specify an unbounded repeat. Use as: repeat<17, inf>('a'). 0239 /// The equivalent in perl is /a{17,}/. 0240 unsigned int const inf = UINT_MAX-1; 0241 0242 /// INTERNAL ONLY (for backwards compatibility) 0243 proto::terminal<detail::epsilon_matcher>::type const epsilon = {{}}; 0244 0245 /////////////////////////////////////////////////////////////////////////////// 0246 /// \brief Successfully matches nothing. 0247 /// 0248 /// Successfully matches a zero-width sequence. nil always succeeds and 0249 /// never consumes any characters. 0250 proto::terminal<detail::epsilon_matcher>::type const nil = {{}}; 0251 0252 /////////////////////////////////////////////////////////////////////////////// 0253 /// \brief Matches an alpha-numeric character. 0254 /// 0255 /// The regex traits are used to determine which characters are alpha-numeric. 0256 /// To match any character that is not alpha-numeric, use ~alnum. 0257 /// 0258 /// \attention alnum is equivalent to /[[:alnum:]]/ in perl. ~alnum is equivalent 0259 /// to /[[:^alnum:]]/ in perl. 0260 proto::terminal<detail::posix_charset_placeholder>::type const alnum = {{"alnum", false}}; 0261 0262 /////////////////////////////////////////////////////////////////////////////// 0263 /// \brief Matches an alphabetic character. 0264 /// 0265 /// The regex traits are used to determine which characters are alphabetic. 0266 /// To match any character that is not alphabetic, use ~alpha. 0267 /// 0268 /// \attention alpha is equivalent to /[[:alpha:]]/ in perl. ~alpha is equivalent 0269 /// to /[[:^alpha:]]/ in perl. 0270 proto::terminal<detail::posix_charset_placeholder>::type const alpha = {{"alpha", false}}; 0271 0272 /////////////////////////////////////////////////////////////////////////////// 0273 /// \brief Matches a blank (horizonal white-space) character. 0274 /// 0275 /// The regex traits are used to determine which characters are blank characters. 0276 /// To match any character that is not blank, use ~blank. 0277 /// 0278 /// \attention blank is equivalent to /[[:blank:]]/ in perl. ~blank is equivalent 0279 /// to /[[:^blank:]]/ in perl. 0280 proto::terminal<detail::posix_charset_placeholder>::type const blank = {{"blank", false}}; 0281 0282 /////////////////////////////////////////////////////////////////////////////// 0283 /// \brief Matches a control character. 0284 /// 0285 /// The regex traits are used to determine which characters are control characters. 0286 /// To match any character that is not a control character, use ~cntrl. 0287 /// 0288 /// \attention cntrl is equivalent to /[[:cntrl:]]/ in perl. ~cntrl is equivalent 0289 /// to /[[:^cntrl:]]/ in perl. 0290 proto::terminal<detail::posix_charset_placeholder>::type const cntrl = {{"cntrl", false}}; 0291 0292 /////////////////////////////////////////////////////////////////////////////// 0293 /// \brief Matches a digit character. 0294 /// 0295 /// The regex traits are used to determine which characters are digits. 0296 /// To match any character that is not a digit, use ~digit. 0297 /// 0298 /// \attention digit is equivalent to /[[:digit:]]/ in perl. ~digit is equivalent 0299 /// to /[[:^digit:]]/ in perl. 0300 proto::terminal<detail::posix_charset_placeholder>::type const digit = {{"digit", false}}; 0301 0302 /////////////////////////////////////////////////////////////////////////////// 0303 /// \brief Matches a graph character. 0304 /// 0305 /// The regex traits are used to determine which characters are graphable. 0306 /// To match any character that is not graphable, use ~graph. 0307 /// 0308 /// \attention graph is equivalent to /[[:graph:]]/ in perl. ~graph is equivalent 0309 /// to /[[:^graph:]]/ in perl. 0310 proto::terminal<detail::posix_charset_placeholder>::type const graph = {{"graph", false}}; 0311 0312 /////////////////////////////////////////////////////////////////////////////// 0313 /// \brief Matches a lower-case character. 0314 /// 0315 /// The regex traits are used to determine which characters are lower-case. 0316 /// To match any character that is not a lower-case character, use ~lower. 0317 /// 0318 /// \attention lower is equivalent to /[[:lower:]]/ in perl. ~lower is equivalent 0319 /// to /[[:^lower:]]/ in perl. 0320 proto::terminal<detail::posix_charset_placeholder>::type const lower = {{"lower", false}}; 0321 0322 /////////////////////////////////////////////////////////////////////////////// 0323 /// \brief Matches a printable character. 0324 /// 0325 /// The regex traits are used to determine which characters are printable. 0326 /// To match any character that is not printable, use ~print. 0327 /// 0328 /// \attention print is equivalent to /[[:print:]]/ in perl. ~print is equivalent 0329 /// to /[[:^print:]]/ in perl. 0330 proto::terminal<detail::posix_charset_placeholder>::type const print = {{"print", false}}; 0331 0332 /////////////////////////////////////////////////////////////////////////////// 0333 /// \brief Matches a punctuation character. 0334 /// 0335 /// The regex traits are used to determine which characters are punctuation. 0336 /// To match any character that is not punctuation, use ~punct. 0337 /// 0338 /// \attention punct is equivalent to /[[:punct:]]/ in perl. ~punct is equivalent 0339 /// to /[[:^punct:]]/ in perl. 0340 proto::terminal<detail::posix_charset_placeholder>::type const punct = {{"punct", false}}; 0341 0342 /////////////////////////////////////////////////////////////////////////////// 0343 /// \brief Matches a space character. 0344 /// 0345 /// The regex traits are used to determine which characters are space characters. 0346 /// To match any character that is not white-space, use ~space. 0347 /// 0348 /// \attention space is equivalent to /[[:space:]]/ in perl. ~space is equivalent 0349 /// to /[[:^space:]]/ in perl. 0350 proto::terminal<detail::posix_charset_placeholder>::type const space = {{"space", false}}; 0351 0352 /////////////////////////////////////////////////////////////////////////////// 0353 /// \brief Matches an upper-case character. 0354 /// 0355 /// The regex traits are used to determine which characters are upper-case. 0356 /// To match any character that is not upper-case, use ~upper. 0357 /// 0358 /// \attention upper is equivalent to /[[:upper:]]/ in perl. ~upper is equivalent 0359 /// to /[[:^upper:]]/ in perl. 0360 proto::terminal<detail::posix_charset_placeholder>::type const upper = {{"upper", false}}; 0361 0362 /////////////////////////////////////////////////////////////////////////////// 0363 /// \brief Matches a hexadecimal digit character. 0364 /// 0365 /// The regex traits are used to determine which characters are hex digits. 0366 /// To match any character that is not a hex digit, use ~xdigit. 0367 /// 0368 /// \attention xdigit is equivalent to /[[:xdigit:]]/ in perl. ~xdigit is equivalent 0369 /// to /[[:^xdigit:]]/ in perl. 0370 proto::terminal<detail::posix_charset_placeholder>::type const xdigit = {{"xdigit", false}}; 0371 0372 /////////////////////////////////////////////////////////////////////////////// 0373 /// \brief Beginning of sequence assertion. 0374 /// 0375 /// For the character sequence [begin, end), 'bos' matches the 0376 /// zero-width sub-sequence [begin, begin). 0377 proto::terminal<detail::assert_bos_matcher>::type const bos = {{}}; 0378 0379 /////////////////////////////////////////////////////////////////////////////// 0380 /// \brief End of sequence assertion. 0381 /// 0382 /// For the character sequence [begin, end), 0383 /// 'eos' matches the zero-width sub-sequence [end, end). 0384 /// 0385 /// \attention Unlike the perl end of sequence assertion \$, 'eos' will 0386 /// not match at the position [end-1, end-1) if *(end-1) is '\\n'. To 0387 /// get that behavior, use (!_n >> eos). 0388 proto::terminal<detail::assert_eos_matcher>::type const eos = {{}}; 0389 0390 /////////////////////////////////////////////////////////////////////////////// 0391 /// \brief Beginning of line assertion. 0392 /// 0393 /// 'bol' matches the zero-width sub-sequence 0394 /// immediately following a logical newline sequence. The regex traits 0395 /// is used to determine what constitutes a logical newline sequence. 0396 proto::terminal<detail::assert_bol_placeholder>::type const bol = {{}}; 0397 0398 /////////////////////////////////////////////////////////////////////////////// 0399 /// \brief End of line assertion. 0400 /// 0401 /// 'eol' matches the zero-width sub-sequence 0402 /// immediately preceeding a logical newline sequence. The regex traits 0403 /// is used to determine what constitutes a logical newline sequence. 0404 proto::terminal<detail::assert_eol_placeholder>::type const eol = {{}}; 0405 0406 /////////////////////////////////////////////////////////////////////////////// 0407 /// \brief Beginning of word assertion. 0408 /// 0409 /// 'bow' matches the zero-width sub-sequence 0410 /// immediately following a non-word character and preceeding a word character. 0411 /// The regex traits are used to determine what constitutes a word character. 0412 proto::terminal<detail::assert_word_begin>::type const bow = {{}}; 0413 0414 /////////////////////////////////////////////////////////////////////////////// 0415 /// \brief End of word assertion. 0416 /// 0417 /// 'eow' matches the zero-width sub-sequence 0418 /// immediately following a word character and preceeding a non-word character. 0419 /// The regex traits are used to determine what constitutes a word character. 0420 proto::terminal<detail::assert_word_end>::type const eow = {{}}; 0421 0422 /////////////////////////////////////////////////////////////////////////////// 0423 /// \brief Word boundary assertion. 0424 /// 0425 /// '_b' matches the zero-width sub-sequence at the beginning or the end of a word. 0426 /// It is equivalent to (bow | eow). The regex traits are used to determine what 0427 /// constitutes a word character. To match a non-word boundary, use ~_b. 0428 /// 0429 /// \attention _b is like \\b in perl. ~_b is like \\B in perl. 0430 proto::terminal<detail::assert_word_boundary>::type const _b = {{}}; 0431 0432 /////////////////////////////////////////////////////////////////////////////// 0433 /// \brief Matches a word character. 0434 /// 0435 /// '_w' matches a single word character. The regex traits are used to determine which 0436 /// characters are word characters. Use ~_w to match a character that is not a word 0437 /// character. 0438 /// 0439 /// \attention _w is like \\w in perl. ~_w is like \\W in perl. 0440 proto::terminal<detail::posix_charset_placeholder>::type const _w = {{"w", false}}; 0441 0442 /////////////////////////////////////////////////////////////////////////////// 0443 /// \brief Matches a digit character. 0444 /// 0445 /// '_d' matches a single digit character. The regex traits are used to determine which 0446 /// characters are digits. Use ~_d to match a character that is not a digit 0447 /// character. 0448 /// 0449 /// \attention _d is like \\d in perl. ~_d is like \\D in perl. 0450 proto::terminal<detail::posix_charset_placeholder>::type const _d = {{"d", false}}; 0451 0452 /////////////////////////////////////////////////////////////////////////////// 0453 /// \brief Matches a space character. 0454 /// 0455 /// '_s' matches a single space character. The regex traits are used to determine which 0456 /// characters are space characters. Use ~_s to match a character that is not a space 0457 /// character. 0458 /// 0459 /// \attention _s is like \\s in perl. ~_s is like \\S in perl. 0460 proto::terminal<detail::posix_charset_placeholder>::type const _s = {{"s", false}}; 0461 0462 /////////////////////////////////////////////////////////////////////////////// 0463 /// \brief Matches a literal newline character, '\\n'. 0464 /// 0465 /// '_n' matches a single newline character, '\\n'. Use ~_n to match a character 0466 /// that is not a newline. 0467 /// 0468 /// \attention ~_n is like '.' in perl without the /s modifier. 0469 proto::terminal<char>::type const _n = {'\n'}; 0470 0471 /////////////////////////////////////////////////////////////////////////////// 0472 /// \brief Matches a logical newline sequence. 0473 /// 0474 /// '_ln' matches a logical newline sequence. This can be any character in the 0475 /// line separator class, as determined by the regex traits, or the '\\r\\n' sequence. 0476 /// For the purpose of back-tracking, '\\r\\n' is treated as a unit. 0477 /// To match any one character that is not a logical newline, use ~_ln. 0478 detail::logical_newline_xpression const _ln = {{}}; 0479 0480 /////////////////////////////////////////////////////////////////////////////// 0481 /// \brief Matches any one character. 0482 /// 0483 /// Match any character, similar to '.' in perl syntax with the /s modifier. 0484 /// '_' matches any one character, including the newline. 0485 /// 0486 /// \attention To match any character except the newline, use ~_n 0487 proto::terminal<detail::any_matcher>::type const _ = {{}}; 0488 0489 /////////////////////////////////////////////////////////////////////////////// 0490 /// \brief Reference to the current regex object 0491 /// 0492 /// Useful when constructing recursive regular expression objects. The 'self' 0493 /// identifier is a short-hand for the current regex object. For instance, 0494 /// sregex rx = '(' >> (self | nil) >> ')'; will create a regex object that 0495 /// matches balanced parens such as "((()))". 0496 proto::terminal<detail::self_placeholder>::type const self = {{}}; 0497 0498 /////////////////////////////////////////////////////////////////////////////// 0499 /// \brief Used to create character sets. 0500 /// 0501 /// There are two ways to create character sets with the 'set' identifier. The 0502 /// easiest is to create a comma-separated list of the characters in the set, 0503 /// as in (set= 'a','b','c'). This set will match 'a', 'b', or 'c'. The other 0504 /// way is to define the set as an argument to the set subscript operator. 0505 /// For instance, set[ 'a' | range('b','c') | digit ] will match an 'a', 'b', 0506 /// 'c' or a digit character. 0507 /// 0508 /// To complement a set, apply the '~' operator. For instance, ~(set= 'a','b','c') 0509 /// will match any character that is not an 'a', 'b', or 'c'. 0510 /// 0511 /// Sets can be composed of other, possibly complemented, sets. For instance, 0512 /// set[ ~digit | ~(set= 'a','b','c') ]. 0513 detail::set_initializer_type const set = {{}}; 0514 0515 /////////////////////////////////////////////////////////////////////////////// 0516 /// \brief Sub-match placeholder type, used to create named captures in 0517 /// static regexes. 0518 /// 0519 /// \c mark_tag is the type of the global sub-match placeholders \c s0, \c s1, etc.. You 0520 /// can use the \c mark_tag type to create your own sub-match placeholders with 0521 /// more meaningful names. This is roughly equivalent to the "named capture" 0522 /// feature of dynamic regular expressions. 0523 /// 0524 /// To create a named sub-match placeholder, initialize it with a unique integer. 0525 /// The integer must only be unique within the regex in which the placeholder 0526 /// is used. Then you can use it within static regexes to created sub-matches 0527 /// by assigning a sub-expression to it, or to refer back to already created 0528 /// sub-matches. 0529 /// 0530 /// \code 0531 /// mark_tag number(1); // "number" is now equivalent to "s1" 0532 /// // Match a number, followed by a space and the same number again 0533 /// sregex rx = (number = +_d) >> ' ' >> number; 0534 /// \endcode 0535 /// 0536 /// After a successful \c regex_match() or \c regex_search(), the sub-match placeholder 0537 /// can be used to index into the <tt>match_results\<\></tt> object to retrieve the 0538 /// corresponding sub-match. 0539 struct mark_tag 0540 : proto::extends<detail::basic_mark_tag, mark_tag, detail::regex_domain> 0541 { 0542 private: 0543 typedef proto::extends<detail::basic_mark_tag, mark_tag, detail::regex_domain> base_type; 0544 0545 static detail::basic_mark_tag make_tag(int mark_nbr) 0546 { 0547 detail::basic_mark_tag mark = {{mark_nbr}}; 0548 return mark; 0549 } 0550 0551 public: 0552 /// \brief Initialize a mark_tag placeholder 0553 /// \param mark_nbr An integer that uniquely identifies this \c mark_tag 0554 /// within the static regexes in which this \c mark_tag will be used. 0555 /// \pre <tt>mark_nbr \> 0</tt> 0556 mark_tag(int mark_nbr) 0557 : base_type(mark_tag::make_tag(mark_nbr)) 0558 { 0559 // Marks numbers must be integers greater than 0. 0560 BOOST_ASSERT(mark_nbr > 0); 0561 } 0562 0563 /// INTERNAL ONLY 0564 operator detail::basic_mark_tag const &() const 0565 { 0566 return this->proto_base(); 0567 } 0568 0569 BOOST_PROTO_EXTENDS_USING_ASSIGN_NON_DEPENDENT(mark_tag) 0570 }; 0571 0572 // This macro is used when declaring mark_tags that are global because 0573 // it guarantees that they are statically initialized. That avoids 0574 // order-of-initialization bugs. In user code, the simpler: mark_tag s0(0); 0575 // would be preferable. 0576 /// INTERNAL ONLY 0577 #define BOOST_XPRESSIVE_GLOBAL_MARK_TAG(NAME, VALUE) \ 0578 boost::xpressive::mark_tag::proto_base_expr const NAME = {{VALUE}} \ 0579 /**/ 0580 0581 /////////////////////////////////////////////////////////////////////////////// 0582 /// \brief Sub-match placeholder, like $& in Perl 0583 BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s0, 0); 0584 0585 /////////////////////////////////////////////////////////////////////////////// 0586 /// \brief Sub-match placeholder, like $1 in perl. 0587 /// 0588 /// To create a sub-match, assign a sub-expression to the sub-match placeholder. 0589 /// For instance, (s1= _) will match any one character and remember which 0590 /// character was matched in the 1st sub-match. Later in the pattern, you can 0591 /// refer back to the sub-match. For instance, (s1= _) >> s1 will match any 0592 /// character, and then match the same character again. 0593 /// 0594 /// After a successful regex_match() or regex_search(), the sub-match placeholders 0595 /// can be used to index into the match_results\<\> object to retrieve the Nth 0596 /// sub-match. 0597 BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s1, 1); 0598 BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s2, 2); 0599 BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s3, 3); 0600 BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s4, 4); 0601 BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s5, 5); 0602 BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s6, 6); 0603 BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s7, 7); 0604 BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s8, 8); 0605 BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s9, 9); 0606 0607 // NOTE: For the purpose of xpressive's documentation, make icase() look like an 0608 // ordinary function. In reality, it is a function object defined in detail/icase.hpp 0609 // so that it can serve double-duty as regex_constants::icase, the syntax_option_type. 0610 #ifdef BOOST_XPRESSIVE_DOXYGEN_INVOKED 0611 /////////////////////////////////////////////////////////////////////////////// 0612 /// \brief Makes a sub-expression case-insensitive. 0613 /// 0614 /// Use icase() to make a sub-expression case-insensitive. For instance, 0615 /// "foo" >> icase(set['b'] >> "ar") will match "foo" exactly followed by 0616 /// "bar" irrespective of case. 0617 template<typename Expr> detail::unspecified icase(Expr const &expr) { return 0; } 0618 #endif 0619 0620 /////////////////////////////////////////////////////////////////////////////// 0621 /// \brief Makes a literal into a regular expression. 0622 /// 0623 /// Use as_xpr() to turn a literal into a regular expression. For instance, 0624 /// "foo" >> "bar" will not compile because both operands to the right-shift 0625 /// operator are const char*, and no such operator exists. Use as_xpr("foo") >> "bar" 0626 /// instead. 0627 /// 0628 /// You can use as_xpr() with character literals in addition to string literals. 0629 /// For instance, as_xpr('a') will match an 'a'. You can also complement a 0630 /// character literal, as with ~as_xpr('a'). This will match any one character 0631 /// that is not an 'a'. 0632 #ifdef BOOST_XPRESSIVE_DOXYGEN_INVOKED 0633 template<typename Literal> detail::unspecified as_xpr(Literal const &literal) { return 0; } 0634 #else 0635 proto::functional::as_expr<> const as_xpr = {}; 0636 #endif 0637 0638 /////////////////////////////////////////////////////////////////////////////// 0639 /// \brief Embed a regex object by reference. 0640 /// 0641 /// \param rex The basic_regex object to embed by reference. 0642 template<typename BidiIter> 0643 inline typename proto::terminal<reference_wrapper<basic_regex<BidiIter> const> >::type const 0644 by_ref(basic_regex<BidiIter> const &rex) 0645 { 0646 reference_wrapper<basic_regex<BidiIter> const> ref(rex); 0647 return proto::terminal<reference_wrapper<basic_regex<BidiIter> const> >::type::make(ref); 0648 } 0649 0650 /////////////////////////////////////////////////////////////////////////////// 0651 /// \brief Match a range of characters. 0652 /// 0653 /// Match any character in the range [ch_min, ch_max]. 0654 /// 0655 /// \param ch_min The lower end of the range to match. 0656 /// \param ch_max The upper end of the range to match. 0657 template<typename Char> 0658 inline typename proto::terminal<detail::range_placeholder<Char> >::type const 0659 range(Char ch_min, Char ch_max) 0660 { 0661 detail::range_placeholder<Char> that = {ch_min, ch_max, false}; 0662 return proto::terminal<detail::range_placeholder<Char> >::type::make(that); 0663 } 0664 0665 /////////////////////////////////////////////////////////////////////////////// 0666 /// \brief Make a sub-expression optional. Equivalent to !as_xpr(expr). 0667 /// 0668 /// \param expr The sub-expression to make optional. 0669 template<typename Expr> 0670 typename proto::result_of::make_expr< 0671 proto::tag::logical_not 0672 , proto::default_domain 0673 , Expr const & 0674 >::type const 0675 optional(Expr const &expr) 0676 { 0677 return proto::make_expr< 0678 proto::tag::logical_not 0679 , proto::default_domain 0680 >(boost::ref(expr)); 0681 } 0682 0683 /////////////////////////////////////////////////////////////////////////////// 0684 /// \brief Repeat a sub-expression multiple times. 0685 /// 0686 /// There are two forms of the repeat\<\>() function template. To match a 0687 /// sub-expression N times, use repeat\<N\>(expr). To match a sub-expression 0688 /// from M to N times, use repeat\<M,N\>(expr). 0689 /// 0690 /// The repeat\<\>() function creates a greedy quantifier. To make the quantifier 0691 /// non-greedy, apply the unary minus operator, as in -repeat\<M,N\>(expr). 0692 /// 0693 /// \param expr The sub-expression to repeat. 0694 template<unsigned int Min, unsigned int Max, typename Expr> 0695 typename proto::result_of::make_expr< 0696 detail::generic_quant_tag<Min, Max> 0697 , proto::default_domain 0698 , Expr const & 0699 >::type const 0700 repeat(Expr const &expr) 0701 { 0702 return proto::make_expr< 0703 detail::generic_quant_tag<Min, Max> 0704 , proto::default_domain 0705 >(boost::ref(expr)); 0706 } 0707 0708 /// \overload 0709 /// 0710 template<unsigned int Count, typename Expr2> 0711 typename proto::result_of::make_expr< 0712 detail::generic_quant_tag<Count, Count> 0713 , proto::default_domain 0714 , Expr2 const & 0715 >::type const 0716 repeat(Expr2 const &expr2) 0717 { 0718 return proto::make_expr< 0719 detail::generic_quant_tag<Count, Count> 0720 , proto::default_domain 0721 >(boost::ref(expr2)); 0722 } 0723 0724 /////////////////////////////////////////////////////////////////////////////// 0725 /// \brief Create an independent sub-expression. 0726 /// 0727 /// Turn off back-tracking for a sub-expression. Any branches or repeats within 0728 /// the sub-expression will match only one way, and no other alternatives are 0729 /// tried. 0730 /// 0731 /// \attention keep(expr) is equivalent to the perl (?>...) extension. 0732 /// 0733 /// \param expr The sub-expression to modify. 0734 template<typename Expr> 0735 typename proto::result_of::make_expr< 0736 detail::keeper_tag 0737 , proto::default_domain 0738 , Expr const & 0739 >::type const 0740 keep(Expr const &expr) 0741 { 0742 return proto::make_expr< 0743 detail::keeper_tag 0744 , proto::default_domain 0745 >(boost::ref(expr)); 0746 } 0747 0748 /////////////////////////////////////////////////////////////////////////////// 0749 /// \brief Look-ahead assertion. 0750 /// 0751 /// before(expr) succeeds if the expr sub-expression would match at the current 0752 /// position in the sequence, but expr is not included in the match. For instance, 0753 /// before("foo") succeeds if we are before a "foo". Look-ahead assertions can be 0754 /// negated with the bit-compliment operator. 0755 /// 0756 /// \attention before(expr) is equivalent to the perl (?=...) extension. 0757 /// ~before(expr) is a negative look-ahead assertion, equivalent to the 0758 /// perl (?!...) extension. 0759 /// 0760 /// \param expr The sub-expression to put in the look-ahead assertion. 0761 template<typename Expr> 0762 typename proto::result_of::make_expr< 0763 detail::lookahead_tag 0764 , proto::default_domain 0765 , Expr const & 0766 >::type const 0767 before(Expr const &expr) 0768 { 0769 return proto::make_expr< 0770 detail::lookahead_tag 0771 , proto::default_domain 0772 >(boost::ref(expr)); 0773 } 0774 0775 /////////////////////////////////////////////////////////////////////////////// 0776 /// \brief Look-behind assertion. 0777 /// 0778 /// after(expr) succeeds if the expr sub-expression would match at the current 0779 /// position minus N in the sequence, where N is the width of expr. expr is not included in 0780 /// the match. For instance, after("foo") succeeds if we are after a "foo". Look-behind 0781 /// assertions can be negated with the bit-complement operator. 0782 /// 0783 /// \attention after(expr) is equivalent to the perl (?<=...) extension. 0784 /// ~after(expr) is a negative look-behind assertion, equivalent to the 0785 /// perl (?<!...) extension. 0786 /// 0787 /// \param expr The sub-expression to put in the look-ahead assertion. 0788 /// 0789 /// \pre expr cannot match a variable number of characters. 0790 template<typename Expr> 0791 typename proto::result_of::make_expr< 0792 detail::lookbehind_tag 0793 , proto::default_domain 0794 , Expr const & 0795 >::type const 0796 after(Expr const &expr) 0797 { 0798 return proto::make_expr< 0799 detail::lookbehind_tag 0800 , proto::default_domain 0801 >(boost::ref(expr)); 0802 } 0803 0804 /////////////////////////////////////////////////////////////////////////////// 0805 /// \brief Specify a regex traits or a std::locale. 0806 /// 0807 /// imbue() instructs the regex engine to use the specified traits or locale 0808 /// when matching the regex. The entire expression must use the same traits/locale. 0809 /// For instance, the following specifies a locale for use with a regex: 0810 /// std::locale loc; 0811 /// sregex rx = imbue(loc)(+digit); 0812 /// 0813 /// \param loc The std::locale or regex traits object. 0814 template<typename Locale> 0815 inline detail::modifier_op<detail::locale_modifier<Locale> > const 0816 imbue(Locale const &loc) 0817 { 0818 detail::modifier_op<detail::locale_modifier<Locale> > mod = 0819 { 0820 detail::locale_modifier<Locale>(loc) 0821 , regex_constants::ECMAScript 0822 }; 0823 return mod; 0824 } 0825 0826 proto::terminal<detail::attribute_placeholder<mpl::int_<1> > >::type const a1 = {{}}; 0827 proto::terminal<detail::attribute_placeholder<mpl::int_<2> > >::type const a2 = {{}}; 0828 proto::terminal<detail::attribute_placeholder<mpl::int_<3> > >::type const a3 = {{}}; 0829 proto::terminal<detail::attribute_placeholder<mpl::int_<4> > >::type const a4 = {{}}; 0830 proto::terminal<detail::attribute_placeholder<mpl::int_<5> > >::type const a5 = {{}}; 0831 proto::terminal<detail::attribute_placeholder<mpl::int_<6> > >::type const a6 = {{}}; 0832 proto::terminal<detail::attribute_placeholder<mpl::int_<7> > >::type const a7 = {{}}; 0833 proto::terminal<detail::attribute_placeholder<mpl::int_<8> > >::type const a8 = {{}}; 0834 proto::terminal<detail::attribute_placeholder<mpl::int_<9> > >::type const a9 = {{}}; 0835 0836 /////////////////////////////////////////////////////////////////////////////// 0837 /// \brief Specify which characters to skip when matching a regex. 0838 /// 0839 /// <tt>skip()</tt> instructs the regex engine to skip certain characters when matching 0840 /// a regex. It is most useful for writing regexes that ignore whitespace. 0841 /// For instance, the following specifies a regex that skips whitespace and 0842 /// punctuation: 0843 /// 0844 /// \code 0845 /// // A sentence is one or more words separated by whitespace 0846 /// // and punctuation. 0847 /// sregex word = +alpha; 0848 /// sregex sentence = skip(set[_s | punct])( +word ); 0849 /// \endcode 0850 /// 0851 /// The way it works in the above example is to insert 0852 /// <tt>keep(*set[_s | punct])</tt> before each primitive within the regex. 0853 /// A "primitive" includes terminals like strings, character sets and nested 0854 /// regexes. A final <tt>*set[_s | punct]</tt> is added to the end of the 0855 /// regex. The regex <tt>sentence</tt> specified above is equivalent to 0856 /// the following: 0857 /// 0858 /// \code 0859 /// sregex sentence = +( keep(*set[_s | punct]) >> word ) 0860 /// >> *set[_s | punct]; 0861 /// \endcode 0862 /// 0863 /// \attention Skipping does not affect how nested regexes are handled because 0864 /// they are treated atomically. String literals are also treated 0865 /// atomically; that is, no skipping is done within a string literal. So 0866 /// <tt>skip(_s)("this that")</tt> is not the same as 0867 /// <tt>skip(_s)("this" >> as_xpr("that"))</tt>. The first will only match 0868 /// when there is only one space between "this" and "that". The second will 0869 /// skip any and all whitespace between "this" and "that". 0870 /// 0871 /// \param skip A regex that specifies which characters to skip. 0872 template<typename Skip> 0873 detail::skip_directive<Skip> skip(Skip const &skip) 0874 { 0875 return detail::skip_directive<Skip>(skip); 0876 } 0877 0878 namespace detail 0879 { 0880 inline void ignore_unused_regex_primitives() 0881 { 0882 detail::ignore_unused(repeat_max); 0883 detail::ignore_unused(inf); 0884 detail::ignore_unused(epsilon); 0885 detail::ignore_unused(nil); 0886 detail::ignore_unused(alnum); 0887 detail::ignore_unused(bos); 0888 detail::ignore_unused(eos); 0889 detail::ignore_unused(bol); 0890 detail::ignore_unused(eol); 0891 detail::ignore_unused(bow); 0892 detail::ignore_unused(eow); 0893 detail::ignore_unused(_b); 0894 detail::ignore_unused(_w); 0895 detail::ignore_unused(_d); 0896 detail::ignore_unused(_s); 0897 detail::ignore_unused(_n); 0898 detail::ignore_unused(_ln); 0899 detail::ignore_unused(_); 0900 detail::ignore_unused(self); 0901 detail::ignore_unused(set); 0902 detail::ignore_unused(s0); 0903 detail::ignore_unused(s1); 0904 detail::ignore_unused(s2); 0905 detail::ignore_unused(s3); 0906 detail::ignore_unused(s4); 0907 detail::ignore_unused(s5); 0908 detail::ignore_unused(s6); 0909 detail::ignore_unused(s7); 0910 detail::ignore_unused(s8); 0911 detail::ignore_unused(s9); 0912 detail::ignore_unused(a1); 0913 detail::ignore_unused(a2); 0914 detail::ignore_unused(a3); 0915 detail::ignore_unused(a4); 0916 detail::ignore_unused(a5); 0917 detail::ignore_unused(a6); 0918 detail::ignore_unused(a7); 0919 detail::ignore_unused(a8); 0920 detail::ignore_unused(a9); 0921 detail::ignore_unused(as_xpr); 0922 } 0923 } 0924 0925 }} // namespace boost::xpressive 0926 0927 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |