File indexing completed on 2024-11-16 09:32:52
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020 #ifndef BOOST_REGEX_V5_BASIC_REGEX_CREATOR_HPP
0021 #define BOOST_REGEX_V5_BASIC_REGEX_CREATOR_HPP
0022
0023 #ifdef BOOST_REGEX_MSVC
0024 # pragma warning(push)
0025 #pragma warning(disable:4459)
0026 #if BOOST_REGEX_MSVC < 1910
0027 #pragma warning(disable:4800)
0028 #endif
0029 #endif
0030
0031 #include <set>
0032
0033 namespace boost{
0034
0035 namespace BOOST_REGEX_DETAIL_NS{
0036
0037 template <class charT>
0038 struct digraph : public std::pair<charT, charT>
0039 {
0040 digraph() : std::pair<charT, charT>(charT(0), charT(0)){}
0041 digraph(charT c1) : std::pair<charT, charT>(c1, charT(0)){}
0042 digraph(charT c1, charT c2) : std::pair<charT, charT>(c1, c2)
0043 {}
0044 digraph(const digraph<charT>& d) : std::pair<charT, charT>(d.first, d.second){}
0045 digraph<charT>& operator=(const digraph<charT>&) = default;
0046 template <class Seq>
0047 digraph(const Seq& s) : std::pair<charT, charT>()
0048 {
0049 BOOST_REGEX_ASSERT(s.size() <= 2);
0050 BOOST_REGEX_ASSERT(s.size());
0051 this->first = s[0];
0052 this->second = (s.size() > 1) ? s[1] : 0;
0053 }
0054 };
0055
0056 template <class charT, class traits>
0057 class basic_char_set
0058 {
0059 public:
0060 typedef digraph<charT> digraph_type;
0061 typedef typename traits::string_type string_type;
0062 typedef typename traits::char_class_type m_type;
0063
0064 basic_char_set()
0065 {
0066 m_negate = false;
0067 m_has_digraphs = false;
0068 m_classes = 0;
0069 m_negated_classes = 0;
0070 m_empty = true;
0071 }
0072
0073 void add_single(const digraph_type& s)
0074 {
0075 m_singles.insert(s);
0076 if(s.second)
0077 m_has_digraphs = true;
0078 m_empty = false;
0079 }
0080 void add_range(const digraph_type& first, const digraph_type& end)
0081 {
0082 m_ranges.push_back(first);
0083 m_ranges.push_back(end);
0084 if(first.second)
0085 {
0086 m_has_digraphs = true;
0087 add_single(first);
0088 }
0089 if(end.second)
0090 {
0091 m_has_digraphs = true;
0092 add_single(end);
0093 }
0094 m_empty = false;
0095 }
0096 void add_class(m_type m)
0097 {
0098 m_classes |= m;
0099 m_empty = false;
0100 }
0101 void add_negated_class(m_type m)
0102 {
0103 m_negated_classes |= m;
0104 m_empty = false;
0105 }
0106 void add_equivalent(const digraph_type& s)
0107 {
0108 m_equivalents.insert(s);
0109 if(s.second)
0110 {
0111 m_has_digraphs = true;
0112 add_single(s);
0113 }
0114 m_empty = false;
0115 }
0116 void negate()
0117 {
0118 m_negate = true;
0119
0120 }
0121
0122
0123
0124
0125 bool has_digraphs()const
0126 {
0127 return m_has_digraphs;
0128 }
0129 bool is_negated()const
0130 {
0131 return m_negate;
0132 }
0133 typedef typename std::vector<digraph_type>::const_iterator list_iterator;
0134 typedef typename std::set<digraph_type>::const_iterator set_iterator;
0135 set_iterator singles_begin()const
0136 {
0137 return m_singles.begin();
0138 }
0139 set_iterator singles_end()const
0140 {
0141 return m_singles.end();
0142 }
0143 list_iterator ranges_begin()const
0144 {
0145 return m_ranges.begin();
0146 }
0147 list_iterator ranges_end()const
0148 {
0149 return m_ranges.end();
0150 }
0151 set_iterator equivalents_begin()const
0152 {
0153 return m_equivalents.begin();
0154 }
0155 set_iterator equivalents_end()const
0156 {
0157 return m_equivalents.end();
0158 }
0159 m_type classes()const
0160 {
0161 return m_classes;
0162 }
0163 m_type negated_classes()const
0164 {
0165 return m_negated_classes;
0166 }
0167 bool empty()const
0168 {
0169 return m_empty;
0170 }
0171 private:
0172 std::set<digraph_type> m_singles;
0173 std::vector<digraph_type> m_ranges;
0174 bool m_negate;
0175 bool m_has_digraphs;
0176 m_type m_classes;
0177 m_type m_negated_classes;
0178 bool m_empty;
0179 std::set<digraph_type> m_equivalents;
0180 };
0181
0182 template <class charT, class traits>
0183 class basic_regex_creator
0184 {
0185 public:
0186 basic_regex_creator(regex_data<charT, traits>* data);
0187 std::ptrdiff_t getoffset(void* addr)
0188 {
0189 return getoffset(addr, m_pdata->m_data.data());
0190 }
0191 std::ptrdiff_t getoffset(const void* addr, const void* base)
0192 {
0193 return static_cast<const char*>(addr) - static_cast<const char*>(base);
0194 }
0195 re_syntax_base* getaddress(std::ptrdiff_t off)
0196 {
0197 return getaddress(off, m_pdata->m_data.data());
0198 }
0199 re_syntax_base* getaddress(std::ptrdiff_t off, void* base)
0200 {
0201 return static_cast<re_syntax_base*>(static_cast<void*>(static_cast<char*>(base) + off));
0202 }
0203 void init(unsigned l_flags)
0204 {
0205 m_pdata->m_flags = l_flags;
0206 m_icase = l_flags & regex_constants::icase;
0207 }
0208 regbase::flag_type flags()
0209 {
0210 return m_pdata->m_flags;
0211 }
0212 void flags(regbase::flag_type f)
0213 {
0214 m_pdata->m_flags = f;
0215 if(m_icase != static_cast<bool>(f & regbase::icase))
0216 {
0217 m_icase = static_cast<bool>(f & regbase::icase);
0218 }
0219 }
0220 re_syntax_base* append_state(syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
0221 re_syntax_base* insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s = sizeof(re_syntax_base));
0222 re_literal* append_literal(charT c);
0223 re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set);
0224 re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, std::integral_constant<bool, false>*);
0225 re_syntax_base* append_set(const basic_char_set<charT, traits>& char_set, std::integral_constant<bool, true>*);
0226 void finalize(const charT* p1, const charT* p2);
0227 protected:
0228 regex_data<charT, traits>* m_pdata;
0229 const ::boost::regex_traits_wrapper<traits>&
0230 m_traits;
0231 re_syntax_base* m_last_state;
0232 bool m_icase;
0233 unsigned m_repeater_id;
0234 bool m_has_backrefs;
0235 std::uintmax_t m_bad_repeats;
0236 bool m_has_recursions;
0237 std::vector<unsigned char> m_recursion_checks;
0238 typename traits::char_class_type m_word_mask;
0239 typename traits::char_class_type m_mask_space;
0240 typename traits::char_class_type m_lower_mask;
0241 typename traits::char_class_type m_upper_mask;
0242 typename traits::char_class_type m_alpha_mask;
0243 private:
0244 basic_regex_creator& operator=(const basic_regex_creator&);
0245 basic_regex_creator(const basic_regex_creator&);
0246
0247 void fixup_pointers(re_syntax_base* state);
0248 void fixup_recursions(re_syntax_base* state);
0249 void create_startmaps(re_syntax_base* state);
0250 int calculate_backstep(re_syntax_base* state);
0251 void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask);
0252 unsigned get_restart_type(re_syntax_base* state);
0253 void set_all_masks(unsigned char* bits, unsigned char);
0254 bool is_bad_repeat(re_syntax_base* pt);
0255 void set_bad_repeat(re_syntax_base* pt);
0256 syntax_element_type get_repeat_type(re_syntax_base* state);
0257 void probe_leading_repeat(re_syntax_base* state);
0258 };
0259
0260 template <class charT, class traits>
0261 basic_regex_creator<charT, traits>::basic_regex_creator(regex_data<charT, traits>* data)
0262 : m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_icase(false), m_repeater_id(0),
0263 m_has_backrefs(false), m_bad_repeats(0), m_has_recursions(false), m_word_mask(0), m_mask_space(0), m_lower_mask(0), m_upper_mask(0), m_alpha_mask(0)
0264 {
0265 m_pdata->m_data.clear();
0266 m_pdata->m_status = ::boost::regex_constants::error_ok;
0267 static const charT w = 'w';
0268 static const charT s = 's';
0269 static const charT l[5] = { 'l', 'o', 'w', 'e', 'r', };
0270 static const charT u[5] = { 'u', 'p', 'p', 'e', 'r', };
0271 static const charT a[5] = { 'a', 'l', 'p', 'h', 'a', };
0272 m_word_mask = m_traits.lookup_classname(&w, &w +1);
0273 m_mask_space = m_traits.lookup_classname(&s, &s +1);
0274 m_lower_mask = m_traits.lookup_classname(l, l + 5);
0275 m_upper_mask = m_traits.lookup_classname(u, u + 5);
0276 m_alpha_mask = m_traits.lookup_classname(a, a + 5);
0277 m_pdata->m_word_mask = m_word_mask;
0278 BOOST_REGEX_ASSERT(m_word_mask != 0);
0279 BOOST_REGEX_ASSERT(m_mask_space != 0);
0280 BOOST_REGEX_ASSERT(m_lower_mask != 0);
0281 BOOST_REGEX_ASSERT(m_upper_mask != 0);
0282 BOOST_REGEX_ASSERT(m_alpha_mask != 0);
0283 }
0284
0285 template <class charT, class traits>
0286 re_syntax_base* basic_regex_creator<charT, traits>::append_state(syntax_element_type t, std::size_t s)
0287 {
0288
0289 if(t == syntax_element_backref)
0290 this->m_has_backrefs = true;
0291
0292 m_pdata->m_data.align();
0293
0294 if(m_last_state)
0295 m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state);
0296
0297 m_last_state = static_cast<re_syntax_base*>(m_pdata->m_data.extend(s));
0298
0299 m_last_state->next.i = 0;
0300 m_last_state->type = t;
0301 return m_last_state;
0302 }
0303
0304 template <class charT, class traits>
0305 re_syntax_base* basic_regex_creator<charT, traits>::insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s)
0306 {
0307
0308 m_pdata->m_data.align();
0309
0310 if(m_last_state)
0311 m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state);
0312
0313 std::ptrdiff_t off = getoffset(m_last_state) + s;
0314
0315 re_syntax_base* new_state = static_cast<re_syntax_base*>(m_pdata->m_data.insert(pos, s));
0316
0317 new_state->next.i = s;
0318 new_state->type = t;
0319 m_last_state = getaddress(off);
0320 return new_state;
0321 }
0322
0323 template <class charT, class traits>
0324 re_literal* basic_regex_creator<charT, traits>::append_literal(charT c)
0325 {
0326 re_literal* result;
0327
0328 if((0 == m_last_state) || (m_last_state->type != syntax_element_literal))
0329 {
0330
0331 result = static_cast<re_literal*>(append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT)));
0332 result->length = 1;
0333 *static_cast<charT*>(static_cast<void*>(result+1)) = m_traits.translate(c, m_icase);
0334 }
0335 else
0336 {
0337
0338 std::ptrdiff_t off = getoffset(m_last_state);
0339 m_pdata->m_data.extend(sizeof(charT));
0340 m_last_state = result = static_cast<re_literal*>(getaddress(off));
0341 charT* characters = static_cast<charT*>(static_cast<void*>(result+1));
0342 characters[result->length] = m_traits.translate(c, m_icase);
0343 result->length += 1;
0344 }
0345 return result;
0346 }
0347
0348 template <class charT, class traits>
0349 inline re_syntax_base* basic_regex_creator<charT, traits>::append_set(
0350 const basic_char_set<charT, traits>& char_set)
0351 {
0352 typedef std::integral_constant<bool, (sizeof(charT) == 1) > truth_type;
0353 return char_set.has_digraphs()
0354 ? append_set(char_set, static_cast<std::integral_constant<bool, false>*>(0))
0355 : append_set(char_set, static_cast<truth_type*>(0));
0356 }
0357
0358 template <class charT, class traits>
0359 re_syntax_base* basic_regex_creator<charT, traits>::append_set(
0360 const basic_char_set<charT, traits>& char_set, std::integral_constant<bool, false>*)
0361 {
0362 typedef typename traits::string_type string_type;
0363 typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
0364 typedef typename basic_char_set<charT, traits>::set_iterator set_iterator;
0365 typedef typename traits::char_class_type m_type;
0366
0367 re_set_long<m_type>* result = static_cast<re_set_long<m_type>*>(append_state(syntax_element_long_set, sizeof(re_set_long<m_type>)));
0368
0369
0370
0371 result->csingles = static_cast<unsigned int>(std::distance(char_set.singles_begin(), char_set.singles_end()));
0372 result->cranges = static_cast<unsigned int>(std::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2;
0373 result->cequivalents = static_cast<unsigned int>(std::distance(char_set.equivalents_begin(), char_set.equivalents_end()));
0374 result->cclasses = char_set.classes();
0375 result->cnclasses = char_set.negated_classes();
0376 if(flags() & regbase::icase)
0377 {
0378
0379 if(((result->cclasses & m_lower_mask) == m_lower_mask) || ((result->cclasses & m_upper_mask) == m_upper_mask))
0380 result->cclasses |= m_alpha_mask;
0381 if(((result->cnclasses & m_lower_mask) == m_lower_mask) || ((result->cnclasses & m_upper_mask) == m_upper_mask))
0382 result->cnclasses |= m_alpha_mask;
0383 }
0384
0385 result->isnot = char_set.is_negated();
0386 result->singleton = !char_set.has_digraphs();
0387
0388
0389
0390 std::ptrdiff_t offset = getoffset(result);
0391
0392
0393
0394 item_iterator first, last;
0395 set_iterator sfirst, slast;
0396 sfirst = char_set.singles_begin();
0397 slast = char_set.singles_end();
0398 while(sfirst != slast)
0399 {
0400 charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (sfirst->first == static_cast<charT>(0) ? 1 : sfirst->second ? 3 : 2)));
0401 p[0] = m_traits.translate(sfirst->first, m_icase);
0402 if(sfirst->first == static_cast<charT>(0))
0403 {
0404 p[0] = 0;
0405 }
0406 else if(sfirst->second)
0407 {
0408 p[1] = m_traits.translate(sfirst->second, m_icase);
0409 p[2] = 0;
0410 }
0411 else
0412 p[1] = 0;
0413 ++sfirst;
0414 }
0415
0416
0417
0418 first = char_set.ranges_begin();
0419 last = char_set.ranges_end();
0420 while(first != last)
0421 {
0422
0423 digraph<charT> c1 = *first;
0424 c1.first = this->m_traits.translate(c1.first, this->m_icase);
0425 c1.second = this->m_traits.translate(c1.second, this->m_icase);
0426 ++first;
0427 digraph<charT> c2 = *first;
0428 c2.first = this->m_traits.translate(c2.first, this->m_icase);
0429 c2.second = this->m_traits.translate(c2.second, this->m_icase);
0430 ++first;
0431 string_type s1, s2;
0432
0433 if(flags() & regex_constants::collate)
0434 {
0435
0436 charT a1[3] = { c1.first, c1.second, charT(0), };
0437 charT a2[3] = { c2.first, c2.second, charT(0), };
0438 s1 = this->m_traits.transform(a1, (a1[1] ? a1+2 : a1+1));
0439 s2 = this->m_traits.transform(a2, (a2[1] ? a2+2 : a2+1));
0440 if(s1.empty())
0441 s1 = string_type(1, charT(0));
0442 if(s2.empty())
0443 s2 = string_type(1, charT(0));
0444 }
0445 else
0446 {
0447 if(c1.second)
0448 {
0449 s1.insert(s1.end(), c1.first);
0450 s1.insert(s1.end(), c1.second);
0451 }
0452 else
0453 s1 = string_type(1, c1.first);
0454 if(c2.second)
0455 {
0456 s2.insert(s2.end(), c2.first);
0457 s2.insert(s2.end(), c2.second);
0458 }
0459 else
0460 s2.insert(s2.end(), c2.first);
0461 }
0462 if(s1 > s2)
0463 {
0464
0465 return 0;
0466 }
0467 charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s1.size() + s2.size() + 2) ) );
0468 BOOST_REGEX_DETAIL_NS::copy(s1.begin(), s1.end(), p);
0469 p[s1.size()] = charT(0);
0470 p += s1.size() + 1;
0471 BOOST_REGEX_DETAIL_NS::copy(s2.begin(), s2.end(), p);
0472 p[s2.size()] = charT(0);
0473 }
0474
0475
0476
0477 sfirst = char_set.equivalents_begin();
0478 slast = char_set.equivalents_end();
0479 while(sfirst != slast)
0480 {
0481 string_type s;
0482 if(sfirst->second)
0483 {
0484 charT cs[3] = { sfirst->first, sfirst->second, charT(0), };
0485 s = m_traits.transform_primary(cs, cs+2);
0486 }
0487 else
0488 s = m_traits.transform_primary(&sfirst->first, &sfirst->first+1);
0489 if(s.empty())
0490 return 0;
0491 charT* p = static_cast<charT*>(this->m_pdata->m_data.extend(sizeof(charT) * (s.size()+1) ) );
0492 BOOST_REGEX_DETAIL_NS::copy(s.begin(), s.end(), p);
0493 p[s.size()] = charT(0);
0494 ++sfirst;
0495 }
0496
0497
0498
0499 m_last_state = result = static_cast<re_set_long<m_type>*>(getaddress(offset));
0500 return result;
0501 }
0502
0503 template<class T>
0504 inline bool char_less(T t1, T t2)
0505 {
0506 return t1 < t2;
0507 }
0508 inline bool char_less(char t1, char t2)
0509 {
0510 return static_cast<unsigned char>(t1) < static_cast<unsigned char>(t2);
0511 }
0512 inline bool char_less(signed char t1, signed char t2)
0513 {
0514 return static_cast<unsigned char>(t1) < static_cast<unsigned char>(t2);
0515 }
0516
0517 template <class charT, class traits>
0518 re_syntax_base* basic_regex_creator<charT, traits>::append_set(
0519 const basic_char_set<charT, traits>& char_set, std::integral_constant<bool, true>*)
0520 {
0521 typedef typename traits::string_type string_type;
0522 typedef typename basic_char_set<charT, traits>::list_iterator item_iterator;
0523 typedef typename basic_char_set<charT, traits>::set_iterator set_iterator;
0524
0525 re_set* result = static_cast<re_set*>(append_state(syntax_element_set, sizeof(re_set)));
0526 bool negate = char_set.is_negated();
0527 std::memset(result->_map, 0, sizeof(result->_map));
0528
0529
0530
0531 item_iterator first, last;
0532 set_iterator sfirst, slast;
0533 sfirst = char_set.singles_begin();
0534 slast = char_set.singles_end();
0535 while(sfirst != slast)
0536 {
0537 for(unsigned int i = 0; i < (1 << CHAR_BIT); ++i)
0538 {
0539 if(this->m_traits.translate(static_cast<charT>(i), this->m_icase)
0540 == this->m_traits.translate(sfirst->first, this->m_icase))
0541 result->_map[i] = true;
0542 }
0543 ++sfirst;
0544 }
0545
0546
0547
0548 first = char_set.ranges_begin();
0549 last = char_set.ranges_end();
0550 while(first != last)
0551 {
0552
0553 charT c1 = this->m_traits.translate(first->first, this->m_icase);
0554 ++first;
0555 charT c2 = this->m_traits.translate(first->first, this->m_icase);
0556 ++first;
0557
0558 if(flags() & regex_constants::collate)
0559 {
0560
0561 charT c3[2] = { c1, charT(0), };
0562 string_type s1 = this->m_traits.transform(c3, c3+1);
0563 c3[0] = c2;
0564 string_type s2 = this->m_traits.transform(c3, c3+1);
0565 if(s1 > s2)
0566 {
0567
0568 return 0;
0569 }
0570 BOOST_REGEX_ASSERT(c3[1] == charT(0));
0571 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
0572 {
0573 c3[0] = static_cast<charT>(i);
0574 string_type s3 = this->m_traits.transform(c3, c3 +1);
0575 if((s1 <= s3) && (s3 <= s2))
0576 result->_map[i] = true;
0577 }
0578 }
0579 else
0580 {
0581 if(char_less(c2, c1))
0582 {
0583
0584 return 0;
0585 }
0586
0587 std::memset(result->_map + static_cast<unsigned char>(c1), true, static_cast<unsigned char>(1u) + static_cast<unsigned char>(static_cast<unsigned char>(c2) - static_cast<unsigned char>(c1)));
0588 }
0589 }
0590
0591
0592
0593 typedef typename traits::char_class_type m_type;
0594 m_type m = char_set.classes();
0595 if(flags() & regbase::icase)
0596 {
0597
0598 if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
0599 m |= m_alpha_mask;
0600 }
0601 if(m != 0)
0602 {
0603 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
0604 {
0605 if(this->m_traits.isctype(static_cast<charT>(i), m))
0606 result->_map[i] = true;
0607 }
0608 }
0609
0610
0611
0612 m = char_set.negated_classes();
0613 if(flags() & regbase::icase)
0614 {
0615
0616 if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask))
0617 m |= m_alpha_mask;
0618 }
0619 if(m != 0)
0620 {
0621 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
0622 {
0623 if(0 == this->m_traits.isctype(static_cast<charT>(i), m))
0624 result->_map[i] = true;
0625 }
0626 }
0627
0628
0629
0630 sfirst = char_set.equivalents_begin();
0631 slast = char_set.equivalents_end();
0632 while(sfirst != slast)
0633 {
0634 string_type s;
0635 BOOST_REGEX_ASSERT(static_cast<charT>(0) == sfirst->second);
0636 s = m_traits.transform_primary(&sfirst->first, &sfirst->first+1);
0637 if(s.empty())
0638 return 0;
0639 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
0640 {
0641 charT c[2] = { (static_cast<charT>(i)), charT(0), };
0642 string_type s2 = this->m_traits.transform_primary(c, c+1);
0643 if(s == s2)
0644 result->_map[i] = true;
0645 }
0646 ++sfirst;
0647 }
0648 if(negate)
0649 {
0650 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
0651 {
0652 result->_map[i] = !(result->_map[i]);
0653 }
0654 }
0655 return result;
0656 }
0657
0658 template <class charT, class traits>
0659 void basic_regex_creator<charT, traits>::finalize(const charT* p1, const charT* p2)
0660 {
0661 if(this->m_pdata->m_status)
0662 return;
0663
0664
0665 append_state(syntax_element_match);
0666
0667 std::ptrdiff_t len = p2 - p1;
0668 m_pdata->m_expression_len = len;
0669 charT* ps = static_cast<charT*>(m_pdata->m_data.extend(sizeof(charT) * (1 + (p2 - p1))));
0670 m_pdata->m_expression = ps;
0671 BOOST_REGEX_DETAIL_NS::copy(p1, p2, ps);
0672 ps[p2 - p1] = 0;
0673
0674
0675 m_pdata->m_status = 0;
0676
0677 m_pdata->m_first_state = static_cast<re_syntax_base*>(m_pdata->m_data.data());
0678
0679 fixup_pointers(m_pdata->m_first_state);
0680 if(m_has_recursions)
0681 {
0682 m_pdata->m_has_recursions = true;
0683 fixup_recursions(m_pdata->m_first_state);
0684 if(this->m_pdata->m_status)
0685 return;
0686 }
0687 else
0688 m_pdata->m_has_recursions = false;
0689
0690 create_startmaps(m_pdata->m_first_state);
0691
0692 std::memset(m_pdata->m_startmap, 0, sizeof(m_pdata->m_startmap));
0693 m_pdata->m_can_be_null = 0;
0694
0695 m_bad_repeats = 0;
0696 if(m_has_recursions)
0697 m_recursion_checks.assign(1 + m_pdata->m_mark_count, 0u);
0698 create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all);
0699
0700 m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state);
0701
0702 probe_leading_repeat(m_pdata->m_first_state);
0703 }
0704
0705 template <class charT, class traits>
0706 void basic_regex_creator<charT, traits>::fixup_pointers(re_syntax_base* state)
0707 {
0708 while(state)
0709 {
0710 switch(state->type)
0711 {
0712 case syntax_element_recurse:
0713 m_has_recursions = true;
0714 if(state->next.i)
0715 state->next.p = getaddress(state->next.i, state);
0716 else
0717 state->next.p = 0;
0718 break;
0719 case syntax_element_rep:
0720 case syntax_element_dot_rep:
0721 case syntax_element_char_rep:
0722 case syntax_element_short_set_rep:
0723 case syntax_element_long_set_rep:
0724
0725 static_cast<re_repeat*>(state)->state_id = m_repeater_id++;
0726 BOOST_REGEX_FALLTHROUGH;
0727 case syntax_element_alt:
0728 std::memset(static_cast<re_alt*>(state)->_map, 0, sizeof(static_cast<re_alt*>(state)->_map));
0729 static_cast<re_alt*>(state)->can_be_null = 0;
0730 BOOST_REGEX_FALLTHROUGH;
0731 case syntax_element_jump:
0732 static_cast<re_jump*>(state)->alt.p = getaddress(static_cast<re_jump*>(state)->alt.i, state);
0733 BOOST_REGEX_FALLTHROUGH;
0734 default:
0735 if(state->next.i)
0736 state->next.p = getaddress(state->next.i, state);
0737 else
0738 state->next.p = 0;
0739 }
0740 state = state->next.p;
0741 }
0742 }
0743
0744 template <class charT, class traits>
0745 void basic_regex_creator<charT, traits>::fixup_recursions(re_syntax_base* state)
0746 {
0747 re_syntax_base* base = state;
0748 while(state)
0749 {
0750 switch(state->type)
0751 {
0752 case syntax_element_assert_backref:
0753 {
0754
0755 int idx = static_cast<const re_brace*>(state)->index;
0756 if(idx < 0)
0757 {
0758 idx = -idx-1;
0759 if(idx >= hash_value_mask)
0760 {
0761 idx = m_pdata->get_id(idx);
0762 if(idx <= 0)
0763 {
0764
0765 if(0 == this->m_pdata->m_status)
0766 this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
0767
0768
0769
0770 this->m_pdata->m_expression = 0;
0771 this->m_pdata->m_expression_len = 0;
0772
0773
0774
0775 if(0 == (this->flags() & regex_constants::no_except))
0776 {
0777 std::string message = "Encountered a forward reference to a marked sub-expression that does not exist.";
0778 boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
0779 e.raise();
0780 }
0781 }
0782 }
0783 }
0784 }
0785 break;
0786 case syntax_element_recurse:
0787 {
0788 bool ok = false;
0789 re_syntax_base* p = base;
0790 std::ptrdiff_t idx = static_cast<re_jump*>(state)->alt.i;
0791 if(idx >= hash_value_mask)
0792 {
0793
0794
0795
0796
0797 idx = m_pdata->get_id(static_cast<int>(idx));
0798 }
0799 if(idx < 0)
0800 {
0801 ok = false;
0802 }
0803 else
0804 {
0805 while(p)
0806 {
0807 if((p->type == syntax_element_startmark) && (static_cast<re_brace*>(p)->index == idx))
0808 {
0809
0810
0811
0812 static_cast<re_jump*>(state)->alt.p = p;
0813 ok = true;
0814
0815
0816
0817 p = p->next.p;
0818 int next_rep_id = 0;
0819 while(p)
0820 {
0821 switch(p->type)
0822 {
0823 case syntax_element_rep:
0824 case syntax_element_dot_rep:
0825 case syntax_element_char_rep:
0826 case syntax_element_short_set_rep:
0827 case syntax_element_long_set_rep:
0828 next_rep_id = static_cast<re_repeat*>(p)->state_id;
0829 break;
0830 case syntax_element_endmark:
0831 if(static_cast<const re_brace*>(p)->index == idx)
0832 next_rep_id = -1;
0833 break;
0834 default:
0835 break;
0836 }
0837 if(next_rep_id)
0838 break;
0839 p = p->next.p;
0840 }
0841 if(next_rep_id > 0)
0842 {
0843 static_cast<re_recurse*>(state)->state_id = next_rep_id - 1;
0844 }
0845
0846 break;
0847 }
0848 p = p->next.p;
0849 }
0850 }
0851 if(!ok)
0852 {
0853
0854 if(0 == this->m_pdata->m_status)
0855 this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
0856
0857
0858
0859 this->m_pdata->m_expression = 0;
0860 this->m_pdata->m_expression_len = 0;
0861
0862
0863
0864 if(0 == (this->flags() & regex_constants::no_except))
0865 {
0866 std::string message = "Encountered a forward reference to a recursive sub-expression that does not exist.";
0867 boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
0868 e.raise();
0869 }
0870 }
0871 }
0872 break;
0873 default:
0874 break;
0875 }
0876 state = state->next.p;
0877 }
0878 }
0879
0880 template <class charT, class traits>
0881 void basic_regex_creator<charT, traits>::create_startmaps(re_syntax_base* state)
0882 {
0883
0884
0885
0886
0887
0888
0889
0890
0891 bool l_icase = m_icase;
0892 std::vector<std::pair<bool, re_syntax_base*> > v;
0893
0894 while(state)
0895 {
0896 switch(state->type)
0897 {
0898 case syntax_element_toggle_case:
0899
0900 m_icase = static_cast<re_case*>(state)->icase;
0901 state = state->next.p;
0902 continue;
0903 case syntax_element_alt:
0904 case syntax_element_rep:
0905 case syntax_element_dot_rep:
0906 case syntax_element_char_rep:
0907 case syntax_element_short_set_rep:
0908 case syntax_element_long_set_rep:
0909
0910 v.push_back(std::pair<bool, re_syntax_base*>(m_icase, state));
0911 state = state->next.p;
0912 break;
0913 case syntax_element_backstep:
0914
0915 static_cast<re_brace*>(state)->index
0916 = this->calculate_backstep(state->next.p);
0917 if(static_cast<re_brace*>(state)->index < 0)
0918 {
0919
0920 if(0 == this->m_pdata->m_status)
0921 this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
0922
0923
0924
0925 this->m_pdata->m_expression = 0;
0926 this->m_pdata->m_expression_len = 0;
0927
0928
0929
0930 if(0 == (this->flags() & regex_constants::no_except))
0931 {
0932 std::string message = "Invalid lookbehind assertion encountered in the regular expression.";
0933 boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
0934 e.raise();
0935 }
0936 }
0937 BOOST_REGEX_FALLTHROUGH;
0938 default:
0939 state = state->next.p;
0940 }
0941 }
0942
0943
0944 while(!v.empty())
0945 {
0946
0947 if(m_has_recursions)
0948 m_recursion_checks.assign(1 + m_pdata->m_mark_count, 0u);
0949
0950 const std::pair<bool, re_syntax_base*>& p = v.back();
0951 m_icase = p.first;
0952 state = p.second;
0953 v.pop_back();
0954
0955
0956 m_bad_repeats = 0;
0957 create_startmap(state->next.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_take);
0958 m_bad_repeats = 0;
0959
0960 if(m_has_recursions)
0961 m_recursion_checks.assign(1 + m_pdata->m_mark_count, 0u);
0962 create_startmap(static_cast<re_alt*>(state)->alt.p, static_cast<re_alt*>(state)->_map, &static_cast<re_alt*>(state)->can_be_null, mask_skip);
0963
0964 state->type = this->get_repeat_type(state);
0965 }
0966
0967 m_icase = l_icase;
0968 }
0969
0970 template <class charT, class traits>
0971 int basic_regex_creator<charT, traits>::calculate_backstep(re_syntax_base* state)
0972 {
0973 typedef typename traits::char_class_type m_type;
0974 int result = 0;
0975 while(state)
0976 {
0977 switch(state->type)
0978 {
0979 case syntax_element_startmark:
0980 if((static_cast<re_brace*>(state)->index == -1)
0981 || (static_cast<re_brace*>(state)->index == -2))
0982 {
0983 state = static_cast<re_jump*>(state->next.p)->alt.p->next.p;
0984 continue;
0985 }
0986 else if(static_cast<re_brace*>(state)->index == -3)
0987 {
0988 state = state->next.p->next.p;
0989 continue;
0990 }
0991 break;
0992 case syntax_element_endmark:
0993 if((static_cast<re_brace*>(state)->index == -1)
0994 || (static_cast<re_brace*>(state)->index == -2))
0995 return result;
0996 break;
0997 case syntax_element_literal:
0998 result += static_cast<re_literal*>(state)->length;
0999 break;
1000 case syntax_element_wild:
1001 case syntax_element_set:
1002 result += 1;
1003 break;
1004 case syntax_element_dot_rep:
1005 case syntax_element_char_rep:
1006 case syntax_element_short_set_rep:
1007 case syntax_element_backref:
1008 case syntax_element_rep:
1009 case syntax_element_combining:
1010 case syntax_element_long_set_rep:
1011 case syntax_element_backstep:
1012 {
1013 re_repeat* rep = static_cast<re_repeat *>(state);
1014
1015 state->type = this->get_repeat_type(state);
1016 if((state->type == syntax_element_dot_rep)
1017 || (state->type == syntax_element_char_rep)
1018 || (state->type == syntax_element_short_set_rep))
1019 {
1020 if(rep->max != rep->min)
1021 return -1;
1022 if (static_cast<std::size_t>((std::numeric_limits<int>::max)() - result) < rep->min)
1023 return -1;
1024 result += static_cast<int>(rep->min);
1025 state = rep->alt.p;
1026 continue;
1027 }
1028 else if(state->type == syntax_element_long_set_rep)
1029 {
1030 BOOST_REGEX_ASSERT(rep->next.p->type == syntax_element_long_set);
1031 if(static_cast<re_set_long<m_type>*>(rep->next.p)->singleton == 0)
1032 return -1;
1033 if(rep->max != rep->min)
1034 return -1;
1035 result += static_cast<int>(rep->min);
1036 state = rep->alt.p;
1037 continue;
1038 }
1039 }
1040 return -1;
1041 case syntax_element_long_set:
1042 if(static_cast<re_set_long<m_type>*>(state)->singleton == 0)
1043 return -1;
1044 result += 1;
1045 break;
1046 case syntax_element_jump:
1047 state = static_cast<re_jump*>(state)->alt.p;
1048 continue;
1049 case syntax_element_alt:
1050 {
1051 int r1 = calculate_backstep(state->next.p);
1052 int r2 = calculate_backstep(static_cast<re_alt*>(state)->alt.p);
1053 if((r1 < 0) || (r1 != r2))
1054 return -1;
1055 return result + r1;
1056 }
1057 default:
1058 break;
1059 }
1060 state = state->next.p;
1061 }
1062 return -1;
1063 }
1064
1065 struct recursion_saver
1066 {
1067 std::vector<unsigned char> saved_state;
1068 std::vector<unsigned char>* state;
1069 recursion_saver(std::vector<unsigned char>* p) : saved_state(*p), state(p) {}
1070 ~recursion_saver()
1071 {
1072 state->swap(saved_state);
1073 }
1074 };
1075
1076 template <class charT, class traits>
1077 void basic_regex_creator<charT, traits>::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask)
1078 {
1079 recursion_saver saved_recursions(&m_recursion_checks);
1080 int not_last_jump = 1;
1081 re_syntax_base* recursion_start = 0;
1082 int recursion_sub = 0;
1083 re_syntax_base* recursion_restart = 0;
1084
1085
1086 bool l_icase = m_icase;
1087
1088 while(state)
1089 {
1090 switch(state->type)
1091 {
1092 case syntax_element_toggle_case:
1093 l_icase = static_cast<re_case*>(state)->icase;
1094 state = state->next.p;
1095 break;
1096 case syntax_element_literal:
1097 {
1098
1099
1100 if(l_map)
1101 {
1102 l_map[0] |= mask_init;
1103 charT first_char = *static_cast<charT*>(static_cast<void*>(static_cast<re_literal*>(state) + 1));
1104 for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
1105 {
1106 if(m_traits.translate(static_cast<charT>(i), l_icase) == first_char)
1107 l_map[i] |= mask;
1108 }
1109 }
1110 return;
1111 }
1112 case syntax_element_end_line:
1113 {
1114
1115 if(l_map)
1116 {
1117 l_map[0] |= mask_init;
1118 l_map[static_cast<unsigned>('\n')] |= mask;
1119 l_map[static_cast<unsigned>('\r')] |= mask;
1120 l_map[static_cast<unsigned>('\f')] |= mask;
1121 l_map[0x85] |= mask;
1122 }
1123
1124 if(pnull)
1125 create_startmap(state->next.p, 0, pnull, mask);
1126 return;
1127 }
1128 case syntax_element_recurse:
1129 {
1130 BOOST_REGEX_ASSERT(static_cast<const re_jump*>(state)->alt.p->type == syntax_element_startmark);
1131 recursion_sub = static_cast<re_brace*>(static_cast<const re_jump*>(state)->alt.p)->index;
1132 if(m_recursion_checks[recursion_sub] & 1u)
1133 {
1134
1135 if(0 == this->m_pdata->m_status)
1136 this->m_pdata->m_status = boost::regex_constants::error_bad_pattern;
1137
1138
1139
1140 this->m_pdata->m_expression = 0;
1141 this->m_pdata->m_expression_len = 0;
1142
1143
1144
1145 if(0 == (this->flags() & regex_constants::no_except))
1146 {
1147 std::string message = "Encountered an infinite recursion.";
1148 boost::regex_error e(message, boost::regex_constants::error_bad_pattern, 0);
1149 e.raise();
1150 }
1151 }
1152 else if(recursion_start == 0)
1153 {
1154 recursion_start = state;
1155 recursion_restart = state->next.p;
1156 state = static_cast<re_jump*>(state)->alt.p;
1157 m_recursion_checks[recursion_sub] |= 1u;
1158 break;
1159 }
1160 m_recursion_checks[recursion_sub] |= 1u;
1161
1162 BOOST_REGEX_FALLTHROUGH;
1163 }
1164 case syntax_element_backref:
1165
1166 if(pnull)
1167 *pnull |= mask;
1168 BOOST_REGEX_FALLTHROUGH;
1169 case syntax_element_wild:
1170 {
1171
1172 set_all_masks(l_map, mask);
1173 return;
1174 }
1175 case syntax_element_accept:
1176 case syntax_element_match:
1177 {
1178
1179 set_all_masks(l_map, mask);
1180 if(pnull)
1181 *pnull |= mask;
1182 return;
1183 }
1184 case syntax_element_word_start:
1185 {
1186
1187 create_startmap(state->next.p, l_map, pnull, mask);
1188 if(l_map)
1189 {
1190 l_map[0] |= mask_init;
1191 for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
1192 {
1193 if(!m_traits.isctype(static_cast<charT>(i), m_word_mask))
1194 l_map[i] &= static_cast<unsigned char>(~mask);
1195 }
1196 }
1197 return;
1198 }
1199 case syntax_element_word_end:
1200 {
1201
1202 create_startmap(state->next.p, l_map, pnull, mask);
1203 if(l_map)
1204 {
1205 l_map[0] |= mask_init;
1206 for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
1207 {
1208 if(m_traits.isctype(static_cast<charT>(i), m_word_mask))
1209 l_map[i] &= static_cast<unsigned char>(~mask);
1210 }
1211 }
1212 return;
1213 }
1214 case syntax_element_buffer_end:
1215 {
1216
1217 if(pnull)
1218 *pnull |= mask;
1219 return;
1220 }
1221 case syntax_element_long_set:
1222 if(l_map)
1223 {
1224 typedef typename traits::char_class_type m_type;
1225 if(static_cast<re_set_long<m_type>*>(state)->singleton)
1226 {
1227 l_map[0] |= mask_init;
1228 for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
1229 {
1230 charT c = static_cast<charT>(i);
1231 if(&c != re_is_set_member(&c, &c + 1, static_cast<re_set_long<m_type>*>(state), *m_pdata, l_icase))
1232 l_map[i] |= mask;
1233 }
1234 }
1235 else
1236 set_all_masks(l_map, mask);
1237 }
1238 return;
1239 case syntax_element_set:
1240 if(l_map)
1241 {
1242 l_map[0] |= mask_init;
1243 for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i)
1244 {
1245 if(static_cast<re_set*>(state)->_map[
1246 static_cast<unsigned char>(m_traits.translate(static_cast<charT>(i), l_icase))])
1247 l_map[i] |= mask;
1248 }
1249 }
1250 return;
1251 case syntax_element_jump:
1252
1253 state = static_cast<re_alt*>(state)->alt.p;
1254 not_last_jump = -1;
1255 break;
1256 case syntax_element_alt:
1257 case syntax_element_rep:
1258 case syntax_element_dot_rep:
1259 case syntax_element_char_rep:
1260 case syntax_element_short_set_rep:
1261 case syntax_element_long_set_rep:
1262 {
1263 re_alt* rep = static_cast<re_alt*>(state);
1264 if(rep->_map[0] & mask_init)
1265 {
1266 if(l_map)
1267 {
1268
1269 l_map[0] |= mask_init;
1270 for(unsigned int i = 0; i <= UCHAR_MAX; ++i)
1271 {
1272 if(rep->_map[i] & mask_any)
1273 l_map[i] |= mask;
1274 }
1275 }
1276 if(pnull)
1277 {
1278 if(rep->can_be_null & mask_any)
1279 *pnull |= mask;
1280 }
1281 }
1282 else
1283 {
1284
1285
1286 if(is_bad_repeat(state))
1287 {
1288 set_all_masks(l_map, mask);
1289 if(pnull)
1290 *pnull |= mask;
1291 return;
1292 }
1293 set_bad_repeat(state);
1294 create_startmap(state->next.p, l_map, pnull, mask);
1295 if((state->type == syntax_element_alt)
1296 || (static_cast<re_repeat*>(state)->min == 0)
1297 || (not_last_jump == 0))
1298 create_startmap(rep->alt.p, l_map, pnull, mask);
1299 }
1300 }
1301 return;
1302 case syntax_element_soft_buffer_end:
1303
1304 if(l_map)
1305 {
1306 l_map[0] |= mask_init;
1307 l_map[static_cast<unsigned>('\n')] |= mask;
1308 l_map[static_cast<unsigned>('\r')] |= mask;
1309 }
1310 if(pnull)
1311 *pnull |= mask;
1312 return;
1313 case syntax_element_endmark:
1314
1315 if(static_cast<re_brace*>(state)->index < 0)
1316 {
1317
1318 set_all_masks(l_map, mask);
1319 if(pnull)
1320 *pnull |= mask;
1321 return;
1322 }
1323 else if(recursion_start && (recursion_sub != 0) && (recursion_sub == static_cast<re_brace*>(state)->index))
1324 {
1325
1326 recursion_start = 0;
1327 state = recursion_restart;
1328 break;
1329 }
1330
1331
1332
1333
1334
1335
1336
1337 if(m_pdata->m_has_recursions && static_cast<re_brace*>(state)->index)
1338 {
1339 bool ok = false;
1340 re_syntax_base* p = m_pdata->m_first_state;
1341 while(p)
1342 {
1343 if(p->type == syntax_element_recurse)
1344 {
1345 re_brace* p2 = static_cast<re_brace*>(static_cast<re_jump*>(p)->alt.p);
1346 if((p2->type == syntax_element_startmark) && (p2->index == static_cast<re_brace*>(state)->index))
1347 {
1348 ok = true;
1349 break;
1350 }
1351 }
1352 p = p->next.p;
1353 }
1354 if(ok && ((m_recursion_checks[static_cast<re_brace*>(state)->index] & 2u) == 0))
1355 {
1356 m_recursion_checks[static_cast<re_brace*>(state)->index] |= 2u;
1357 create_startmap(p->next.p, l_map, pnull, mask);
1358 }
1359 }
1360 state = state->next.p;
1361 break;
1362
1363 case syntax_element_commit:
1364 set_all_masks(l_map, mask);
1365
1366 state = state->next.p;
1367 break;
1368 case syntax_element_startmark:
1369
1370 if(static_cast<re_brace*>(state)->index == -3)
1371 {
1372 state = state->next.p->next.p;
1373 break;
1374 }
1375 BOOST_REGEX_FALLTHROUGH;
1376 default:
1377 state = state->next.p;
1378 }
1379 ++not_last_jump;
1380 }
1381 }
1382
1383 template <class charT, class traits>
1384 unsigned basic_regex_creator<charT, traits>::get_restart_type(re_syntax_base* state)
1385 {
1386
1387
1388
1389 while(state)
1390 {
1391 switch(state->type)
1392 {
1393 case syntax_element_startmark:
1394 case syntax_element_endmark:
1395 state = state->next.p;
1396 continue;
1397 case syntax_element_start_line:
1398 return regbase::restart_line;
1399 case syntax_element_word_start:
1400 return regbase::restart_word;
1401 case syntax_element_buffer_start:
1402 return regbase::restart_buf;
1403 case syntax_element_restart_continue:
1404 return regbase::restart_continue;
1405 default:
1406 state = 0;
1407 continue;
1408 }
1409 }
1410 return regbase::restart_any;
1411 }
1412
1413 template <class charT, class traits>
1414 void basic_regex_creator<charT, traits>::set_all_masks(unsigned char* bits, unsigned char mask)
1415 {
1416
1417
1418
1419
1420
1421 if(bits)
1422 {
1423 if(bits[0] == 0)
1424 (std::memset)(bits, mask, 1u << CHAR_BIT);
1425 else
1426 {
1427 for(unsigned i = 0; i < (1u << CHAR_BIT); ++i)
1428 bits[i] |= mask;
1429 }
1430 bits[0] |= mask_init;
1431 }
1432 }
1433
1434 template <class charT, class traits>
1435 bool basic_regex_creator<charT, traits>::is_bad_repeat(re_syntax_base* pt)
1436 {
1437 switch(pt->type)
1438 {
1439 case syntax_element_rep:
1440 case syntax_element_dot_rep:
1441 case syntax_element_char_rep:
1442 case syntax_element_short_set_rep:
1443 case syntax_element_long_set_rep:
1444 {
1445 unsigned state_id = static_cast<re_repeat*>(pt)->state_id;
1446 if(state_id >= sizeof(m_bad_repeats) * CHAR_BIT)
1447 return true;
1448 static const std::uintmax_t one = 1uL;
1449 return m_bad_repeats & (one << state_id);
1450 }
1451 default:
1452 return false;
1453 }
1454 }
1455
1456 template <class charT, class traits>
1457 void basic_regex_creator<charT, traits>::set_bad_repeat(re_syntax_base* pt)
1458 {
1459 switch(pt->type)
1460 {
1461 case syntax_element_rep:
1462 case syntax_element_dot_rep:
1463 case syntax_element_char_rep:
1464 case syntax_element_short_set_rep:
1465 case syntax_element_long_set_rep:
1466 {
1467 unsigned state_id = static_cast<re_repeat*>(pt)->state_id;
1468 static const std::uintmax_t one = 1uL;
1469 if(state_id <= sizeof(m_bad_repeats) * CHAR_BIT)
1470 m_bad_repeats |= (one << state_id);
1471 }
1472 break;
1473 default:
1474 break;
1475 }
1476 }
1477
1478 template <class charT, class traits>
1479 syntax_element_type basic_regex_creator<charT, traits>::get_repeat_type(re_syntax_base* state)
1480 {
1481 typedef typename traits::char_class_type m_type;
1482 if(state->type == syntax_element_rep)
1483 {
1484
1485 if(state->next.p->next.p->next.p == static_cast<re_alt*>(state)->alt.p)
1486 {
1487 switch(state->next.p->type)
1488 {
1489 case BOOST_REGEX_DETAIL_NS::syntax_element_wild:
1490 return BOOST_REGEX_DETAIL_NS::syntax_element_dot_rep;
1491 case BOOST_REGEX_DETAIL_NS::syntax_element_literal:
1492 return BOOST_REGEX_DETAIL_NS::syntax_element_char_rep;
1493 case BOOST_REGEX_DETAIL_NS::syntax_element_set:
1494 return BOOST_REGEX_DETAIL_NS::syntax_element_short_set_rep;
1495 case BOOST_REGEX_DETAIL_NS::syntax_element_long_set:
1496 if(static_cast<BOOST_REGEX_DETAIL_NS::re_set_long<m_type>*>(state->next.p)->singleton)
1497 return BOOST_REGEX_DETAIL_NS::syntax_element_long_set_rep;
1498 break;
1499 default:
1500 break;
1501 }
1502 }
1503 }
1504 return state->type;
1505 }
1506
1507 template <class charT, class traits>
1508 void basic_regex_creator<charT, traits>::probe_leading_repeat(re_syntax_base* state)
1509 {
1510
1511
1512 do
1513 {
1514 switch(state->type)
1515 {
1516 case syntax_element_startmark:
1517 if(static_cast<re_brace*>(state)->index >= 0)
1518 {
1519 state = state->next.p;
1520 continue;
1521 }
1522 #ifdef BOOST_REGEX_MSVC
1523 # pragma warning(push)
1524 #pragma warning(disable:6011)
1525 #endif
1526 if((static_cast<re_brace*>(state)->index == -1)
1527 || (static_cast<re_brace*>(state)->index == -2))
1528 {
1529
1530 state = static_cast<const re_jump*>(state->next.p)->alt.p->next.p;
1531 continue;
1532 }
1533 #ifdef BOOST_REGEX_MSVC
1534 # pragma warning(pop)
1535 #endif
1536 if(static_cast<re_brace*>(state)->index == -3)
1537 {
1538
1539 state = state->next.p->next.p;
1540 continue;
1541 }
1542 return;
1543 case syntax_element_endmark:
1544 case syntax_element_start_line:
1545 case syntax_element_end_line:
1546 case syntax_element_word_boundary:
1547 case syntax_element_within_word:
1548 case syntax_element_word_start:
1549 case syntax_element_word_end:
1550 case syntax_element_buffer_start:
1551 case syntax_element_buffer_end:
1552 case syntax_element_restart_continue:
1553 state = state->next.p;
1554 break;
1555 case syntax_element_dot_rep:
1556 case syntax_element_char_rep:
1557 case syntax_element_short_set_rep:
1558 case syntax_element_long_set_rep:
1559 if(this->m_has_backrefs == 0)
1560 static_cast<re_repeat*>(state)->leading = true;
1561 BOOST_REGEX_FALLTHROUGH;
1562 default:
1563 return;
1564 }
1565 }while(state);
1566 }
1567
1568 }
1569
1570 }
1571
1572 #ifdef BOOST_REGEX_MSVC
1573 # pragma warning(pop)
1574 #endif
1575
1576 #endif