File indexing completed on 2025-12-16 10:11:10
0001
0002
0003
0004
0005
0006
0007
0008 #ifndef BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSE_CHARSET_HPP_EAN_10_04_2005
0009 #define BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSE_CHARSET_HPP_EAN_10_04_2005
0010
0011
0012 #if defined(_MSC_VER)
0013 # pragma once
0014 #endif
0015
0016 #include <boost/config.hpp>
0017 #include <boost/integer.hpp>
0018 #include <boost/mpl/bool.hpp>
0019 #include <boost/throw_exception.hpp>
0020 #include <boost/numeric/conversion/converter.hpp>
0021 #include <boost/xpressive/detail/detail_fwd.hpp>
0022 #include <boost/xpressive/detail/dynamic/parser_enum.hpp>
0023 #include <boost/xpressive/detail/utility/literals.hpp>
0024 #include <boost/xpressive/detail/utility/chset/chset.hpp>
0025 #include <boost/xpressive/regex_constants.hpp>
0026
0027 namespace boost { namespace xpressive { namespace detail
0028 {
0029
0030 enum escape_type
0031 {
0032 escape_char
0033 , escape_mark
0034 , escape_class
0035 };
0036
0037
0038
0039
0040 template<typename Char, typename Class>
0041 struct escape_value
0042 {
0043 Char ch_;
0044 int mark_nbr_;
0045 Class class_;
0046 escape_type type_;
0047 };
0048
0049
0050
0051
0052 struct char_overflow_handler
0053 {
0054 void operator ()(numeric::range_check_result result) const
0055 {
0056 if(numeric::cInRange != result)
0057 {
0058 BOOST_THROW_EXCEPTION(
0059 regex_error(
0060 regex_constants::error_escape
0061 , "character escape too large to fit in target character type"
0062 )
0063 );
0064 }
0065 }
0066 };
0067
0068
0069
0070
0071 template<typename FwdIter, typename CompilerTraits>
0072 escape_value<typename iterator_value<FwdIter>::type, typename CompilerTraits::regex_traits::char_class_type>
0073 parse_escape(FwdIter &begin, FwdIter end, CompilerTraits &tr)
0074 {
0075 using namespace regex_constants;
0076 typedef typename iterator_value<FwdIter>::type char_type;
0077 typedef typename CompilerTraits::regex_traits regex_traits;
0078 typedef typename regex_traits::char_class_type char_class_type;
0079
0080
0081 typedef typename boost::uint_t<CHAR_BIT * sizeof(char_type)>::least uchar_t;
0082 BOOST_MPL_ASSERT_RELATION(sizeof(uchar_t), ==, sizeof(char_type));
0083 typedef numeric::conversion_traits<uchar_t, int> converstion_traits;
0084
0085 BOOST_XPR_ENSURE_(begin != end, error_escape, "unexpected end of pattern found");
0086 numeric::converter<int, uchar_t, converstion_traits, char_overflow_handler> converter;
0087 escape_value<char_type,char_class_type> esc = { 0, 0, 0, escape_char };
0088 bool const icase = (0 != (regex_constants::icase_ & tr.flags()));
0089 regex_traits const &rxtraits = tr.traits();
0090 FwdIter tmp;
0091
0092 esc.class_ = rxtraits.lookup_classname(begin, begin + 1, icase);
0093 if(0 != esc.class_)
0094 {
0095 esc.type_ = escape_class;
0096 return esc;
0097 }
0098
0099 if(-1 != rxtraits.value(*begin, 8))
0100 {
0101 esc.ch_ = converter(toi(begin, end, rxtraits, 8, 0777));
0102 return esc;
0103 }
0104
0105 switch(*begin)
0106 {
0107
0108 case BOOST_XPR_CHAR_(char_type, 'a'):
0109 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\a');
0110 ++begin;
0111 break;
0112
0113 case BOOST_XPR_CHAR_(char_type, 'e'):
0114 esc.ch_ = converter(27);
0115 ++begin;
0116 break;
0117
0118 case BOOST_XPR_CHAR_(char_type, 'c'):
0119 BOOST_XPR_ENSURE_(++begin != end, error_escape, "unexpected end of pattern found");
0120 BOOST_XPR_ENSURE_
0121 (
0122 rxtraits.in_range(BOOST_XPR_CHAR_(char_type, 'a'), BOOST_XPR_CHAR_(char_type, 'z'), *begin)
0123 || rxtraits.in_range(BOOST_XPR_CHAR_(char_type, 'A'), BOOST_XPR_CHAR_(char_type, 'Z'), *begin)
0124 , error_escape
0125 , "invalid escape control letter; must be one of a-z or A-Z"
0126 );
0127
0128 esc.ch_ = converter(*begin % 32);
0129 ++begin;
0130 break;
0131
0132 case BOOST_XPR_CHAR_(char_type, 'f'):
0133 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\f');
0134 ++begin;
0135 break;
0136
0137 case BOOST_XPR_CHAR_(char_type, 'n'):
0138 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\n');
0139 ++begin;
0140 break;
0141
0142 case BOOST_XPR_CHAR_(char_type, 'r'):
0143 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\r');
0144 ++begin;
0145 break;
0146
0147 case BOOST_XPR_CHAR_(char_type, 't'):
0148 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\t');
0149 ++begin;
0150 break;
0151
0152 case BOOST_XPR_CHAR_(char_type, 'v'):
0153 esc.ch_ = BOOST_XPR_CHAR_(char_type, '\v');
0154 ++begin;
0155 break;
0156
0157 case BOOST_XPR_CHAR_(char_type, 'x'):
0158 BOOST_XPR_ENSURE_(++begin != end, error_escape, "unexpected end of pattern found");
0159 tmp = begin;
0160 esc.ch_ = converter(toi(begin, end, rxtraits, 16, 0xff));
0161 BOOST_XPR_ENSURE_(2 == std::distance(tmp, begin), error_escape, "invalid hex escape : "
0162 "must be \\x HexDigit HexDigit");
0163 break;
0164
0165 case BOOST_XPR_CHAR_(char_type, 'u'):
0166 BOOST_XPR_ENSURE_(++begin != end, error_escape, "unexpected end of pattern found");
0167 tmp = begin;
0168 esc.ch_ = converter(toi(begin, end, rxtraits, 16, 0xffff));
0169 BOOST_XPR_ENSURE_(4 == std::distance(tmp, begin), error_escape, "invalid Unicode escape : "
0170 "must be \\u HexDigit HexDigit HexDigit HexDigit");
0171 break;
0172
0173 case BOOST_XPR_CHAR_(char_type, '\\'):
0174
0175
0176
0177
0178 default:
0179 esc.ch_ = *begin;
0180 ++begin;
0181 break;
0182 }
0183
0184 return esc;
0185 }
0186
0187
0188
0189
0190 template<typename FwdIter, typename RegexTraits, typename CompilerTraits>
0191 inline void parse_charset
0192 (
0193 FwdIter &begin
0194 , FwdIter end
0195 , compound_charset<RegexTraits> &chset
0196 , CompilerTraits &tr
0197 )
0198 {
0199 using namespace regex_constants;
0200 typedef typename RegexTraits::char_type char_type;
0201 typedef typename RegexTraits::char_class_type char_class_type;
0202 BOOST_XPR_ENSURE_(begin != end, error_brack, "unexpected end of pattern found");
0203 RegexTraits const &rxtraits = tr.traits();
0204 bool const icase = (0 != (regex_constants::icase_ & tr.flags()));
0205 FwdIter iprev = FwdIter();
0206 escape_value<char_type, char_class_type> esc = {0, 0, 0, escape_char};
0207 bool invert = false;
0208
0209
0210 if(begin != end && token_charset_invert == tr.get_charset_token(iprev = begin, end))
0211 {
0212 begin = iprev;
0213 invert = true;
0214 }
0215
0216
0217 if(begin != end && token_charset_end == tr.get_charset_token(iprev = begin, end))
0218 {
0219 for(; begin != iprev; ++begin)
0220 {
0221 chset.set_char(*begin, rxtraits, icase);
0222 }
0223 }
0224
0225 compiler_token_type tok;
0226 char_type ch_prev = char_type(), ch_next = char_type();
0227 bool have_prev = false;
0228
0229 BOOST_XPR_ENSURE_(begin != end, error_brack, "unexpected end of pattern found");
0230
0231
0232 iprev = begin;
0233 tok = tr.get_charset_token(begin, end);
0234 do
0235 {
0236 BOOST_XPR_ENSURE_(begin != end, error_brack, "unexpected end of pattern found");
0237
0238 if(token_charset_hyphen == tok && have_prev)
0239 {
0240
0241 FwdIter iprev2 = begin;
0242 have_prev = false;
0243
0244
0245 switch(tr.get_charset_token(begin, end))
0246 {
0247 case token_charset_hyphen:
0248 case token_charset_invert:
0249 begin = iprev2;
0250 BOOST_FALLTHROUGH;
0251 case token_literal:
0252 ch_next = *begin++;
0253 BOOST_XPR_ENSURE_(ch_prev <= ch_next, error_range, "invalid charset range");
0254 chset.set_range(ch_prev, ch_next, rxtraits, icase);
0255 continue;
0256 case token_charset_backspace:
0257 ch_next = char_type(8);
0258 BOOST_XPR_ENSURE_(ch_prev <= ch_next, error_range, "invalid charset range");
0259 chset.set_range(ch_prev, ch_next, rxtraits, icase);
0260 continue;
0261 case token_escape:
0262 esc = parse_escape(begin, end, tr);
0263 if(escape_char == esc.type_)
0264 {
0265 BOOST_XPR_ENSURE_(ch_prev <= esc.ch_, error_range, "invalid charset range");
0266 chset.set_range(ch_prev, esc.ch_, rxtraits, icase);
0267 continue;
0268 }
0269 BOOST_FALLTHROUGH;
0270 case token_charset_end:
0271 default:
0272 begin = iprev;
0273 chset.set_char(ch_prev, rxtraits, icase);
0274 chset.set_char(*begin++, rxtraits, icase);
0275 continue;
0276 }
0277 }
0278
0279 if(have_prev)
0280 {
0281 chset.set_char(ch_prev, rxtraits, icase);
0282 have_prev = false;
0283 }
0284
0285 switch(tok)
0286 {
0287 case token_charset_hyphen:
0288 case token_charset_invert:
0289 case token_charset_end:
0290 case token_posix_charset_end:
0291 begin = iprev;
0292 ch_prev = *begin++;
0293 have_prev = true;
0294 continue;
0295
0296 case token_charset_backspace:
0297 ch_prev = char_type(8);
0298 have_prev = true;
0299 continue;
0300
0301 case token_posix_charset_begin:
0302 {
0303 FwdIter tmp = begin, start = begin;
0304 bool invert = (token_charset_invert == tr.get_charset_token(tmp, end));
0305 if(invert)
0306 {
0307 begin = start = tmp;
0308 }
0309 while(token_literal == (tok = tr.get_charset_token(begin, end)))
0310 {
0311 tmp = ++begin;
0312 BOOST_XPR_ENSURE_(begin != end, error_brack, "unexpected end of pattern found");
0313 }
0314 if(token_posix_charset_end == tok)
0315 {
0316 char_class_type chclass = rxtraits.lookup_classname(start, tmp, icase);
0317 BOOST_XPR_ENSURE_(0 != chclass, error_ctype, "unknown class name");
0318 chset.set_class(chclass, invert);
0319 continue;
0320 }
0321 begin = iprev;
0322 ch_prev = *begin++;
0323 have_prev = true;
0324 }
0325 continue;
0326
0327 case token_escape:
0328 esc = parse_escape(begin, end, tr);
0329 if(escape_char == esc.type_)
0330 {
0331 ch_prev = esc.ch_;
0332 have_prev = true;
0333 }
0334 else if(escape_class == esc.type_)
0335 {
0336 char_class_type upper_ = lookup_classname(rxtraits, "upper");
0337 BOOST_ASSERT(0 != upper_);
0338 chset.set_class(esc.class_, rxtraits.isctype(*begin++, upper_));
0339 }
0340 else
0341 {
0342 BOOST_ASSERT(false);
0343 }
0344 continue;
0345
0346 default:
0347 ch_prev = *begin++;
0348 have_prev = true;
0349 continue;
0350 }
0351 }
0352 while(BOOST_XPR_ENSURE_((iprev = begin) != end, error_brack, "unexpected end of pattern found"),
0353 token_charset_end != (tok = tr.get_charset_token(begin, end)));
0354
0355 if(have_prev)
0356 {
0357 chset.set_char(ch_prev, rxtraits, icase);
0358 }
0359
0360 if(invert)
0361 {
0362 chset.inverse();
0363 }
0364 }
0365
0366 }}}
0367
0368 #endif