File indexing completed on 2025-01-18 09:53:53
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #ifndef BOOST_XPRESSIVE_TRAITS_CPP_REGEX_TRAITS_HPP_EAN_10_04_2005
0012 #define BOOST_XPRESSIVE_TRAITS_CPP_REGEX_TRAITS_HPP_EAN_10_04_2005
0013
0014
0015 #if defined(_MSC_VER)
0016 # pragma once
0017 #endif
0018
0019 #include <ios>
0020 #include <string>
0021 #include <locale>
0022 #include <sstream>
0023 #include <climits>
0024 #include <boost/config.hpp>
0025 #include <boost/assert.hpp>
0026 #include <boost/integer.hpp>
0027 #include <boost/mpl/assert.hpp>
0028 #include <boost/static_assert.hpp>
0029 #include <boost/detail/workaround.hpp>
0030 #include <boost/type_traits/is_same.hpp>
0031 #include <boost/xpressive/detail/detail_fwd.hpp>
0032 #include <boost/xpressive/detail/utility/literals.hpp>
0033
0034
0035
0036
0037
0038
0039 #if BOOST_WORKAROUND(__GLIBCPP__, != 0)
0040 # define BOOST_XPRESSIVE_BUGGY_CTYPE_FACET
0041 #endif
0042
0043 namespace boost { namespace xpressive
0044 {
0045
0046 namespace detail
0047 {
0048
0049 typedef boost::uint_t<sizeof(std::ctype_base::mask) * CHAR_BIT>::least umask_t;
0050 BOOST_MPL_ASSERT_RELATION(sizeof(std::ctype_base::mask), ==, sizeof(umask_t));
0051
0052
0053
0054
0055
0056 int const umaskex_bits = (14 > (sizeof(umask_t) * CHAR_BIT)) ? 14 : sizeof(umask_t) * CHAR_BIT;
0057
0058
0059 typedef boost::uint_t<umaskex_bits>::fast umaskex_t;
0060 BOOST_MPL_ASSERT_RELATION(sizeof(umask_t), <=, sizeof(umaskex_t));
0061
0062
0063 template<std::ctype_base::mask Mask>
0064 struct mask_cast
0065 {
0066 BOOST_STATIC_CONSTANT(umaskex_t, value = static_cast<umask_t>(Mask));
0067 };
0068
0069 #ifdef __CYGWIN__
0070
0071 template<>
0072 struct mask_cast<std::ctype_base::print>
0073 {
0074 BOOST_MPL_ASSERT_RELATION('\227', ==, std::ctype_base::print);
0075 BOOST_STATIC_CONSTANT(umaskex_t, value = 0227);
0076 };
0077 #endif
0078
0079 #ifndef BOOST_NO_INCLASS_MEMBER_INITIALIZATION
0080 template<std::ctype_base::mask Mask>
0081 umaskex_t const mask_cast<Mask>::value;
0082 #endif
0083
0084 #ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET
0085
0086 umaskex_t const highest_bit = static_cast<umaskex_t>(1) << (sizeof(umaskex_t) * CHAR_BIT - 1);
0087
0088
0089
0090
0091 template<umaskex_t In, umaskex_t Out = highest_bit, bool Done = (0 == (Out & In))>
0092 struct unused_mask
0093 {
0094 BOOST_STATIC_ASSERT(1 != Out);
0095 BOOST_STATIC_CONSTANT(umaskex_t, value = (unused_mask<In, (Out >> 1)>::value));
0096 };
0097
0098 template<umaskex_t In, umaskex_t Out>
0099 struct unused_mask<In, Out, true>
0100 {
0101 BOOST_STATIC_CONSTANT(umaskex_t, value = Out);
0102 };
0103
0104 #ifndef BOOST_NO_INCLASS_MEMBER_INITIALIZATION
0105 template<umaskex_t In, umaskex_t Out, bool Done>
0106 umaskex_t const unused_mask<In, Out, Done>::value;
0107 #endif
0108
0109 umaskex_t const std_ctype_alnum = mask_cast<std::ctype_base::alnum>::value;
0110 umaskex_t const std_ctype_alpha = mask_cast<std::ctype_base::alpha>::value;
0111 umaskex_t const std_ctype_cntrl = mask_cast<std::ctype_base::cntrl>::value;
0112 umaskex_t const std_ctype_digit = mask_cast<std::ctype_base::digit>::value;
0113 umaskex_t const std_ctype_graph = mask_cast<std::ctype_base::graph>::value;
0114 umaskex_t const std_ctype_lower = mask_cast<std::ctype_base::lower>::value;
0115 umaskex_t const std_ctype_print = mask_cast<std::ctype_base::print>::value;
0116 umaskex_t const std_ctype_punct = mask_cast<std::ctype_base::punct>::value;
0117 umaskex_t const std_ctype_space = mask_cast<std::ctype_base::space>::value;
0118 umaskex_t const std_ctype_upper = mask_cast<std::ctype_base::upper>::value;
0119 umaskex_t const std_ctype_xdigit = mask_cast<std::ctype_base::xdigit>::value;
0120
0121
0122 #if defined(__GLIBCXX__)
0123 umaskex_t const std_ctype_reserved = 0x8000;
0124 #elif defined(_CPPLIB_VER) && defined(BOOST_WINDOWS)
0125 umaskex_t const std_ctype_reserved = 0x8200;
0126 #elif defined(_LIBCPP_VERSION)
0127 umaskex_t const std_ctype_reserved = 0x8000;
0128 #else
0129 umaskex_t const std_ctype_reserved = 0;
0130 #endif
0131
0132
0133 umaskex_t const all_ctype_masks = std_ctype_reserved
0134 | std_ctype_alnum | std_ctype_alpha | std_ctype_cntrl | std_ctype_digit
0135 | std_ctype_graph | std_ctype_lower | std_ctype_print | std_ctype_punct
0136 | std_ctype_space | std_ctype_upper | std_ctype_xdigit;
0137
0138
0139 umaskex_t const non_std_ctype_underscore = unused_mask<all_ctype_masks>::value;
0140
0141
0142 umaskex_t const non_std_ctype_blank = unused_mask<all_ctype_masks | non_std_ctype_underscore>::value;
0143
0144
0145 umaskex_t const non_std_ctype_newline = unused_mask<all_ctype_masks | non_std_ctype_underscore | non_std_ctype_blank>::value;
0146
0147 #else
0148
0149
0150 umaskex_t const std_ctype_alnum = 1 << 0;
0151 umaskex_t const std_ctype_alpha = 1 << 1;
0152 umaskex_t const std_ctype_cntrl = 1 << 2;
0153 umaskex_t const std_ctype_digit = 1 << 3;
0154 umaskex_t const std_ctype_graph = 1 << 4;
0155 umaskex_t const std_ctype_lower = 1 << 5;
0156 umaskex_t const std_ctype_print = 1 << 6;
0157 umaskex_t const std_ctype_punct = 1 << 7;
0158 umaskex_t const std_ctype_space = 1 << 8;
0159 umaskex_t const std_ctype_upper = 1 << 9;
0160 umaskex_t const std_ctype_xdigit = 1 << 10;
0161 umaskex_t const non_std_ctype_underscore = 1 << 11;
0162 umaskex_t const non_std_ctype_blank = 1 << 12;
0163 umaskex_t const non_std_ctype_newline = 1 << 13;
0164
0165 static umaskex_t const std_masks[] =
0166 {
0167 mask_cast<std::ctype_base::alnum>::value
0168 , mask_cast<std::ctype_base::alpha>::value
0169 , mask_cast<std::ctype_base::cntrl>::value
0170 , mask_cast<std::ctype_base::digit>::value
0171 , mask_cast<std::ctype_base::graph>::value
0172 , mask_cast<std::ctype_base::lower>::value
0173 , mask_cast<std::ctype_base::print>::value
0174 , mask_cast<std::ctype_base::punct>::value
0175 , mask_cast<std::ctype_base::space>::value
0176 , mask_cast<std::ctype_base::upper>::value
0177 , mask_cast<std::ctype_base::xdigit>::value
0178 };
0179
0180 inline int mylog2(umaskex_t i)
0181 {
0182 return "\0\0\1\0\2\0\0\0\3"[i & 0xf]
0183 + "\0\4\5\0\6\0\0\0\7"[(i & 0xf0) >> 04]
0184 + "\0\10\11\0\12\0\0\0\13"[(i & 0xf00) >> 010];
0185 }
0186 #endif
0187
0188
0189 umaskex_t const non_std_ctype_masks = non_std_ctype_underscore | non_std_ctype_blank | non_std_ctype_newline;
0190
0191
0192
0193
0194
0195 template<typename Char, std::size_t SizeOfChar = sizeof(Char)>
0196 struct cpp_regex_traits_base
0197 {
0198 protected:
0199 void imbue(std::locale const &)
0200 {
0201 }
0202
0203 static bool is(std::ctype<Char> const &ct, Char ch, umaskex_t mask)
0204 {
0205 #ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET
0206
0207 if(ct.is((std::ctype_base::mask)(umask_t)mask, ch))
0208 {
0209 return true;
0210 }
0211
0212
0213 #if defined(__CYGWIN__) || defined(__MINGW32_VERSION)
0214 if (std::ctype_base::xdigit == ((std::ctype_base::mask)(umask_t)mask & std::ctype_base::xdigit))
0215 {
0216 typename std::char_traits<Char>::int_type i = std::char_traits<Char>::to_int_type(ch);
0217 if(UCHAR_MAX >= i && std::isxdigit(static_cast<int>(i)))
0218 return true;
0219 }
0220 #endif
0221
0222 #else
0223
0224 umaskex_t tmp = mask & ~non_std_ctype_masks;
0225 for(umaskex_t i; 0 != (i = (tmp & (~tmp+1))); tmp &= ~i)
0226 {
0227 std::ctype_base::mask m = (std::ctype_base::mask)(umask_t)std_masks[mylog2(i)];
0228 if(ct.is(m, ch))
0229 {
0230 return true;
0231 }
0232 }
0233
0234 #endif
0235
0236 return ((mask & non_std_ctype_blank) && cpp_regex_traits_base::is_blank(ch))
0237 || ((mask & non_std_ctype_underscore) && cpp_regex_traits_base::is_underscore(ch))
0238 || ((mask & non_std_ctype_newline) && cpp_regex_traits_base::is_newline(ch));
0239 }
0240
0241 private:
0242 static bool is_blank(Char ch)
0243 {
0244 BOOST_MPL_ASSERT_RELATION('\t', ==, L'\t');
0245 BOOST_MPL_ASSERT_RELATION(' ', ==, L' ');
0246 return L' ' == ch || L'\t' == ch;
0247 }
0248
0249 static bool is_underscore(Char ch)
0250 {
0251 BOOST_MPL_ASSERT_RELATION('_', ==, L'_');
0252 return L'_' == ch;
0253 }
0254
0255 static bool is_newline(Char ch)
0256 {
0257 BOOST_MPL_ASSERT_RELATION('\r', ==, L'\r');
0258 BOOST_MPL_ASSERT_RELATION('\n', ==, L'\n');
0259 BOOST_MPL_ASSERT_RELATION('\f', ==, L'\f');
0260 return L'\r' == ch || L'\n' == ch || L'\f' == ch
0261 || (1 < SizeOfChar && (0x2028u == ch || 0x2029u == ch || 0x85u == ch));
0262 }
0263 };
0264
0265 #ifndef BOOST_XPRESSIVE_BUGGY_CTYPE_FACET
0266
0267 template<typename Char>
0268 struct cpp_regex_traits_base<Char, 1>
0269 {
0270 protected:
0271 void imbue(std::locale const &loc)
0272 {
0273 int i = 0;
0274 Char allchars[UCHAR_MAX + 1];
0275 for(i = 0; i <= static_cast<int>(UCHAR_MAX); ++i)
0276 {
0277 allchars[i] = static_cast<Char>(i);
0278 }
0279
0280 std::ctype<Char> const &ct = BOOST_USE_FACET(std::ctype<Char>, loc);
0281 std::ctype_base::mask tmp[UCHAR_MAX + 1];
0282 ct.is(allchars, allchars + UCHAR_MAX + 1, tmp);
0283 for(i = 0; i <= static_cast<int>(UCHAR_MAX); ++i)
0284 {
0285 this->masks_[i] = static_cast<umask_t>(tmp[i]);
0286 BOOST_ASSERT(0 == (this->masks_[i] & non_std_ctype_masks));
0287 }
0288
0289 this->masks_[static_cast<unsigned char>('_')] |= non_std_ctype_underscore;
0290 this->masks_[static_cast<unsigned char>(' ')] |= non_std_ctype_blank;
0291 this->masks_[static_cast<unsigned char>('\t')] |= non_std_ctype_blank;
0292 this->masks_[static_cast<unsigned char>('\n')] |= non_std_ctype_newline;
0293 this->masks_[static_cast<unsigned char>('\r')] |= non_std_ctype_newline;
0294 this->masks_[static_cast<unsigned char>('\f')] |= non_std_ctype_newline;
0295 }
0296
0297 bool is(std::ctype<Char> const &, Char ch, umaskex_t mask) const
0298 {
0299 return 0 != (this->masks_[static_cast<unsigned char>(ch)] & mask);
0300 }
0301
0302 private:
0303 umaskex_t masks_[UCHAR_MAX + 1];
0304 };
0305
0306 #endif
0307
0308 }
0309
0310
0311
0312
0313
0314
0315
0316 template<typename Char>
0317 struct cpp_regex_traits
0318 : detail::cpp_regex_traits_base<Char>
0319 {
0320 typedef Char char_type;
0321 typedef std::basic_string<char_type> string_type;
0322 typedef std::locale locale_type;
0323 typedef detail::umaskex_t char_class_type;
0324 typedef regex_traits_version_2_tag version_tag;
0325 typedef detail::cpp_regex_traits_base<Char> base_type;
0326
0327
0328
0329
0330 cpp_regex_traits(locale_type const &loc = locale_type())
0331 : base_type()
0332 , loc_()
0333 {
0334 this->imbue(loc);
0335 }
0336
0337
0338
0339
0340 bool operator ==(cpp_regex_traits<char_type> const &that) const
0341 {
0342 return this->loc_ == that.loc_;
0343 }
0344
0345
0346
0347
0348 bool operator !=(cpp_regex_traits<char_type> const &that) const
0349 {
0350 return this->loc_ != that.loc_;
0351 }
0352
0353
0354
0355
0356
0357 char_type widen(char ch) const
0358 {
0359 return this->ctype_->widen(ch);
0360 }
0361
0362
0363
0364
0365
0366 static unsigned char hash(char_type ch)
0367 {
0368 return static_cast<unsigned char>(std::char_traits<Char>::to_int_type(ch));
0369 }
0370
0371
0372
0373
0374
0375 static char_type translate(char_type ch)
0376 {
0377 return ch;
0378 }
0379
0380
0381
0382
0383
0384 char_type translate_nocase(char_type ch) const
0385 {
0386 return this->ctype_->tolower(ch);
0387 }
0388
0389
0390
0391
0392
0393 char_type tolower(char_type ch) const
0394 {
0395 return this->ctype_->tolower(ch);
0396 }
0397
0398
0399
0400
0401
0402 char_type toupper(char_type ch) const
0403 {
0404 return this->ctype_->toupper(ch);
0405 }
0406
0407
0408
0409
0410
0411
0412
0413
0414 string_type fold_case(char_type ch) const
0415 {
0416 BOOST_MPL_ASSERT((is_same<char_type, char>));
0417 char_type ntcs[] = {
0418 this->ctype_->tolower(ch)
0419 , this->ctype_->toupper(ch)
0420 , 0
0421 };
0422 if(ntcs[1] == ntcs[0])
0423 ntcs[1] = 0;
0424 return string_type(ntcs);
0425 }
0426
0427
0428
0429
0430
0431
0432
0433 static bool in_range(char_type first, char_type last, char_type ch)
0434 {
0435 return first <= ch && ch <= last;
0436 }
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446
0447
0448 bool in_range_nocase(char_type first, char_type last, char_type ch) const
0449 {
0450
0451
0452
0453 return this->in_range(first, last, ch)
0454 || this->in_range(first, last, this->ctype_->toupper(ch))
0455 || this->in_range(first, last, this->ctype_->tolower(ch));
0456 }
0457
0458
0459
0460
0461
0462
0463
0464
0465
0466
0467
0468
0469 template<typename FwdIter>
0470 string_type transform(FwdIter, FwdIter) const
0471 {
0472
0473
0474
0475 BOOST_ASSERT(false);
0476 return string_type();
0477 }
0478
0479
0480
0481
0482
0483
0484
0485 template<typename FwdIter>
0486 string_type transform_primary(FwdIter, FwdIter ) const
0487 {
0488 BOOST_ASSERT(false);
0489 return string_type();
0490 }
0491
0492
0493
0494
0495
0496
0497 template<typename FwdIter>
0498 string_type lookup_collatename(FwdIter, FwdIter) const
0499 {
0500 BOOST_ASSERT(false);
0501 return string_type();
0502 }
0503
0504
0505
0506
0507
0508
0509
0510
0511
0512
0513 template<typename FwdIter>
0514 char_class_type lookup_classname(FwdIter begin, FwdIter end, bool icase) const
0515 {
0516 static detail::umaskex_t const icase_masks =
0517 detail::std_ctype_lower | detail::std_ctype_upper;
0518
0519 BOOST_ASSERT(begin != end);
0520 char_class_type char_class = this->lookup_classname_impl_(begin, end);
0521 if(0 == char_class)
0522 {
0523
0524 string_type classname(begin, end);
0525 for(typename string_type::size_type i = 0, len = classname.size(); i < len; ++i)
0526 {
0527 classname[i] = this->translate_nocase(classname[i]);
0528 }
0529 char_class = this->lookup_classname_impl_(classname.begin(), classname.end());
0530 }
0531
0532 if(icase && 0 != (char_class & icase_masks))
0533 {
0534 char_class |= icase_masks;
0535 }
0536 return char_class;
0537 }
0538
0539
0540
0541
0542
0543
0544
0545
0546
0547 bool isctype(char_type ch, char_class_type mask) const
0548 {
0549 return this->base_type::is(*this->ctype_, ch, mask);
0550 }
0551
0552
0553
0554
0555
0556
0557
0558
0559
0560
0561 int value(char_type ch, int radix) const
0562 {
0563 BOOST_ASSERT(8 == radix || 10 == radix || 16 == radix);
0564 int val = -1;
0565 std::basic_stringstream<char_type> str;
0566 str.imbue(this->getloc());
0567 str << (8 == radix ? std::oct : (16 == radix ? std::hex : std::dec));
0568 str.put(ch);
0569 str >> val;
0570 return str.fail() ? -1 : val;
0571 }
0572
0573
0574
0575
0576
0577 locale_type imbue(locale_type loc)
0578 {
0579 locale_type old_loc = this->loc_;
0580 this->loc_ = loc;
0581 this->ctype_ = &BOOST_USE_FACET(std::ctype<char_type>, this->loc_);
0582
0583 this->base_type::imbue(this->loc_);
0584 return old_loc;
0585 }
0586
0587
0588
0589 locale_type getloc() const
0590 {
0591 return this->loc_;
0592 }
0593
0594 private:
0595
0596
0597
0598
0599 struct char_class_pair
0600 {
0601 char_type const *class_name_;
0602 char_class_type class_type_;
0603 };
0604
0605
0606
0607
0608 static char_class_pair const &char_class(std::size_t j)
0609 {
0610 static BOOST_CONSTEXPR_OR_CONST char_class_pair s_char_class_map[] =
0611 {
0612 { BOOST_XPR_CSTR_(char_type, "alnum"), detail::std_ctype_alnum }
0613 , { BOOST_XPR_CSTR_(char_type, "alpha"), detail::std_ctype_alpha }
0614 , { BOOST_XPR_CSTR_(char_type, "blank"), detail::non_std_ctype_blank }
0615 , { BOOST_XPR_CSTR_(char_type, "cntrl"), detail::std_ctype_cntrl }
0616 , { BOOST_XPR_CSTR_(char_type, "d"), detail::std_ctype_digit }
0617 , { BOOST_XPR_CSTR_(char_type, "digit"), detail::std_ctype_digit }
0618 , { BOOST_XPR_CSTR_(char_type, "graph"), detail::std_ctype_graph }
0619 , { BOOST_XPR_CSTR_(char_type, "lower"), detail::std_ctype_lower }
0620 , { BOOST_XPR_CSTR_(char_type, "newline"),detail::non_std_ctype_newline }
0621 , { BOOST_XPR_CSTR_(char_type, "print"), detail::std_ctype_print }
0622 , { BOOST_XPR_CSTR_(char_type, "punct"), detail::std_ctype_punct }
0623 , { BOOST_XPR_CSTR_(char_type, "s"), detail::std_ctype_space }
0624 , { BOOST_XPR_CSTR_(char_type, "space"), detail::std_ctype_space }
0625 , { BOOST_XPR_CSTR_(char_type, "upper"), detail::std_ctype_upper }
0626 , { BOOST_XPR_CSTR_(char_type, "w"), detail::std_ctype_alnum | detail::non_std_ctype_underscore }
0627 , { BOOST_XPR_CSTR_(char_type, "xdigit"), detail::std_ctype_xdigit }
0628 , { 0, 0 }
0629 };
0630 return s_char_class_map[j];
0631 }
0632
0633
0634
0635
0636 template<typename FwdIter>
0637 static char_class_type lookup_classname_impl_(FwdIter begin, FwdIter end)
0638 {
0639
0640 typedef cpp_regex_traits<Char> this_t;
0641 for(std::size_t j = 0; 0 != this_t::char_class(j).class_name_; ++j)
0642 {
0643 if(this_t::compare_(this_t::char_class(j).class_name_, begin, end))
0644 {
0645 return this_t::char_class(j).class_type_;
0646 }
0647 }
0648 return 0;
0649 }
0650
0651
0652 template<typename FwdIter>
0653 static bool compare_(char_type const *name, FwdIter begin, FwdIter end)
0654 {
0655 for(; *name && begin != end; ++name, ++begin)
0656 {
0657 if(*name != *begin)
0658 {
0659 return false;
0660 }
0661 }
0662 return !*name && begin == end;
0663 }
0664
0665 locale_type loc_;
0666 std::ctype<char_type> const *ctype_;
0667
0668 };
0669
0670
0671
0672 template<>
0673 inline unsigned char cpp_regex_traits<unsigned char>::hash(unsigned char ch)
0674 {
0675 return ch;
0676 }
0677
0678 template<>
0679 inline unsigned char cpp_regex_traits<char>::hash(char ch)
0680 {
0681 return static_cast<unsigned char>(ch);
0682 }
0683
0684 template<>
0685 inline unsigned char cpp_regex_traits<signed char>::hash(signed char ch)
0686 {
0687 return static_cast<unsigned char>(ch);
0688 }
0689
0690 #ifndef BOOST_XPRESSIVE_NO_WREGEX
0691 template<>
0692 inline unsigned char cpp_regex_traits<wchar_t>::hash(wchar_t ch)
0693 {
0694 return static_cast<unsigned char>(ch);
0695 }
0696 #endif
0697
0698
0699 template<>
0700 struct has_fold_case<cpp_regex_traits<char> >
0701 : mpl::true_
0702 {
0703 };
0704
0705
0706 }}
0707
0708 #endif