File indexing completed on 2025-01-19 09:47:50
0001
0002
0003
0004
0005
0006 #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_GENERATOR_HPP
0007 #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_GENERATOR_HPP
0008
0009 #include "char_traits.hpp"
0010
0011 #include <cstring>
0012 #include "partition/charset.hpp"
0013 #include "partition/equivset.hpp"
0014 #include <memory>
0015 #include <limits>
0016 #include "parser/tree/node.hpp"
0017 #include "parser/parser.hpp"
0018 #include "containers/ptr_list.hpp"
0019 #include <boost/move/unique_ptr.hpp>
0020 #include "rules.hpp"
0021 #include "state_machine.hpp"
0022
0023 namespace boost
0024 {
0025 namespace lexer
0026 {
0027 template<typename CharT, typename Traits = char_traits<CharT> >
0028 class basic_generator
0029 {
0030 public:
0031 typedef typename detail::internals::size_t_vector size_t_vector;
0032 typedef basic_rules<CharT> rules;
0033
0034 static void build (const rules &rules_,
0035 basic_state_machine<CharT> &state_machine_)
0036 {
0037 std::size_t index_ = 0;
0038 std::size_t size_ = rules_.statemap ().size ();
0039 node_ptr_vector node_ptr_vector_;
0040 detail::internals &internals_ = const_cast<detail::internals &>
0041 (state_machine_.data ());
0042 bool seen_BOL_assertion_ = false;
0043 bool seen_EOL_assertion_ = false;
0044
0045 state_machine_.clear ();
0046
0047 for (; index_ < size_; ++index_)
0048 {
0049 internals_._lookup->push_back (static_cast<size_t_vector *>(0));
0050 internals_._lookup->back () = new size_t_vector;
0051 internals_._dfa_alphabet.push_back (0);
0052 internals_._dfa->push_back (static_cast<size_t_vector *>(0));
0053 internals_._dfa->back () = new size_t_vector;
0054 }
0055
0056 for (index_ = 0, size_ = internals_._lookup->size ();
0057 index_ < size_; ++index_)
0058 {
0059 internals_._lookup[index_]->resize (sizeof (CharT) == 1 ?
0060 num_chars : num_wchar_ts, dead_state_index);
0061
0062 if (!rules_.regexes ()[index_].empty ())
0063 {
0064
0065 index_set_vector set_mapping_;
0066
0067 detail::node *root_ = build_tree (rules_, index_,
0068 node_ptr_vector_, internals_, set_mapping_);
0069
0070 build_dfa (root_, set_mapping_,
0071 internals_._dfa_alphabet[index_],
0072 *internals_._dfa[index_]);
0073
0074 if (internals_._seen_BOL_assertion)
0075 {
0076 seen_BOL_assertion_ = true;
0077 }
0078
0079 if (internals_._seen_EOL_assertion)
0080 {
0081 seen_EOL_assertion_ = true;
0082 }
0083
0084 internals_._seen_BOL_assertion = false;
0085 internals_._seen_EOL_assertion = false;
0086 }
0087 }
0088
0089 internals_._seen_BOL_assertion = seen_BOL_assertion_;
0090 internals_._seen_EOL_assertion = seen_EOL_assertion_;
0091 }
0092
0093 static void minimise (basic_state_machine<CharT> &state_machine_)
0094 {
0095 detail::internals &internals_ = const_cast<detail::internals &>
0096 (state_machine_.data ());
0097 const std::size_t machines_ = internals_._dfa->size ();
0098
0099 for (std::size_t i_ = 0; i_ < machines_; ++i_)
0100 {
0101 const std::size_t dfa_alphabet_ = internals_._dfa_alphabet[i_];
0102 size_t_vector *dfa_ = internals_._dfa[i_];
0103
0104 if (dfa_alphabet_ != 0)
0105 {
0106 std::size_t size_ = 0;
0107
0108 do
0109 {
0110 size_ = dfa_->size ();
0111 minimise_dfa (dfa_alphabet_, *dfa_, size_);
0112 } while (dfa_->size () != size_);
0113 }
0114 }
0115 }
0116
0117 protected:
0118 typedef detail::basic_charset<CharT> charset;
0119 typedef detail::ptr_list<charset> charset_list;
0120 typedef boost::movelib::unique_ptr<charset> charset_ptr;
0121 typedef detail::equivset equivset;
0122 typedef detail::ptr_list<equivset> equivset_list;
0123 typedef boost::movelib::unique_ptr<equivset> equivset_ptr;
0124 typedef typename charset::index_set index_set;
0125 typedef std::vector<index_set> index_set_vector;
0126 typedef detail::basic_parser<CharT> parser;
0127 typedef typename parser::node_ptr_vector node_ptr_vector;
0128 typedef std::set<const detail::node *> node_set;
0129 typedef detail::ptr_vector<node_set> node_set_vector;
0130 typedef std::vector<const detail::node *> node_vector;
0131 typedef detail::ptr_vector<node_vector> node_vector_vector;
0132 typedef typename parser::string string;
0133 typedef std::pair<string, string> string_pair;
0134 typedef typename parser::tokeniser::string_token string_token;
0135 typedef std::deque<string_pair> macro_deque;
0136 typedef std::pair<string, const detail::node *> macro_pair;
0137 typedef typename parser::macro_map::iterator macro_iter;
0138 typedef std::pair<macro_iter, bool> macro_iter_pair;
0139 typedef typename parser::tokeniser::token_map token_map;
0140
0141 static detail::node *build_tree (const rules &rules_,
0142 const std::size_t state_, node_ptr_vector &node_ptr_vector_,
0143 detail::internals &internals_, index_set_vector &set_mapping_)
0144 {
0145 size_t_vector *lookup_ = internals_._lookup[state_];
0146 const typename rules::string_deque_deque ®exes_ =
0147 rules_.regexes ();
0148 const typename rules::id_vector_deque &ids_ = rules_.ids ();
0149 const typename rules::id_vector_deque &unique_ids_ =
0150 rules_.unique_ids ();
0151 const typename rules::id_vector_deque &states_ = rules_.states ();
0152 typename rules::string_deque::const_iterator regex_iter_ =
0153 regexes_[state_].begin ();
0154 typename rules::string_deque::const_iterator regex_iter_end_ =
0155 regexes_[state_].end ();
0156 typename rules::id_vector::const_iterator ids_iter_ =
0157 ids_[state_].begin ();
0158 typename rules::id_vector::const_iterator unique_ids_iter_ =
0159 unique_ids_[state_].begin ();
0160 typename rules::id_vector::const_iterator states_iter_ =
0161 states_[state_].begin ();
0162 const typename rules::string ®ex_ = *regex_iter_;
0163
0164 token_map token_map_;
0165 const typename rules::string_pair_deque ¯odeque_ =
0166 rules_.macrodeque ();
0167 typename parser::macro_map macromap_;
0168 typename detail::node::node_vector tree_vector_;
0169
0170 build_macros (token_map_, macrodeque_, macromap_,
0171 rules_.flags (), rules_.locale (), node_ptr_vector_,
0172 internals_._seen_BOL_assertion, internals_._seen_EOL_assertion);
0173
0174 detail::node *root_ = parser::parse (regex_.c_str (),
0175 regex_.c_str () + regex_.size (), *ids_iter_, *unique_ids_iter_,
0176 *states_iter_, rules_.flags (), rules_.locale (), node_ptr_vector_,
0177 macromap_, token_map_, internals_._seen_BOL_assertion,
0178 internals_._seen_EOL_assertion);
0179
0180 ++regex_iter_;
0181 ++ids_iter_;
0182 ++unique_ids_iter_;
0183 ++states_iter_;
0184 tree_vector_.push_back (root_);
0185
0186
0187 while (regex_iter_ != regex_iter_end_)
0188 {
0189
0190 const typename rules::string ®ex2_ = *regex_iter_;
0191
0192 root_ = parser::parse (regex2_.c_str (),
0193 regex2_.c_str () + regex2_.size (), *ids_iter_,
0194 *unique_ids_iter_, *states_iter_, rules_.flags (),
0195 rules_.locale (), node_ptr_vector_, macromap_, token_map_,
0196 internals_._seen_BOL_assertion,
0197 internals_._seen_EOL_assertion);
0198 tree_vector_.push_back (root_);
0199 ++regex_iter_;
0200 ++ids_iter_;
0201 ++unique_ids_iter_;
0202 ++states_iter_;
0203 }
0204
0205 if (internals_._seen_BOL_assertion)
0206 {
0207
0208 typename detail::node::node_vector::iterator iter_ =
0209 tree_vector_.begin ();
0210 typename detail::node::node_vector::iterator end_ =
0211 tree_vector_.end ();
0212
0213 for (; iter_ != end_; ++iter_)
0214 {
0215 fixup_bol (*iter_, node_ptr_vector_);
0216 }
0217 }
0218
0219
0220 {
0221 typename detail::node::node_vector::iterator iter_ =
0222 tree_vector_.begin ();
0223 typename detail::node::node_vector::iterator end_ =
0224 tree_vector_.end ();
0225
0226 if (iter_ != end_)
0227 {
0228 root_ = *iter_;
0229 ++iter_;
0230 }
0231
0232 for (; iter_ != end_; ++iter_)
0233 {
0234 node_ptr_vector_->push_back (static_cast<detail::selection_node *>(0));
0235 node_ptr_vector_->back () = new detail::selection_node
0236 (root_, *iter_);
0237 root_ = node_ptr_vector_->back ();
0238 }
0239 }
0240
0241
0242 charset_list token_list_;
0243
0244 set_mapping_.resize (token_map_.size ());
0245 partition_tokens (token_map_, token_list_);
0246
0247 typename charset_list::list::const_iterator iter_ =
0248 token_list_->begin ();
0249 typename charset_list::list::const_iterator end_ =
0250 token_list_->end ();
0251 std::size_t index_ = 0;
0252
0253 for (; iter_ != end_; ++iter_, ++index_)
0254 {
0255 const charset *cs_ = *iter_;
0256 typename charset::index_set::const_iterator set_iter_ =
0257 cs_->_index_set.begin ();
0258 typename charset::index_set::const_iterator set_end_ =
0259 cs_->_index_set.end ();
0260
0261 fill_lookup (cs_->_token, lookup_, index_);
0262
0263 for (; set_iter_ != set_end_; ++set_iter_)
0264 {
0265 set_mapping_[*set_iter_].insert (index_);
0266 }
0267 }
0268
0269 internals_._dfa_alphabet[state_] = token_list_->size () + dfa_offset;
0270 return root_;
0271 }
0272
0273 static void build_macros (token_map &token_map_,
0274 const macro_deque ¯odeque_,
0275 typename parser::macro_map ¯omap_, const regex_flags flags_,
0276 const std::locale &locale_, node_ptr_vector &node_ptr_vector_,
0277 bool &seen_BOL_assertion_, bool &seen_EOL_assertion_)
0278 {
0279 for (typename macro_deque::const_iterator iter_ =
0280 macrodeque_.begin (), end_ = macrodeque_.end ();
0281 iter_ != end_; ++iter_)
0282 {
0283 const typename rules::string &name_ = iter_->first;
0284 const typename rules::string ®ex_ = iter_->second;
0285 detail::node *node_ = parser::parse (regex_.c_str (),
0286 regex_.c_str () + regex_.size (), 0, 0, 0, flags_,
0287 locale_, node_ptr_vector_, macromap_, token_map_,
0288 seen_BOL_assertion_, seen_EOL_assertion_);
0289 macro_iter_pair map_iter_ = macromap_.
0290 insert (macro_pair (name_, static_cast<const detail::node *>
0291 (0)));
0292
0293 map_iter_.first->second = node_;
0294 }
0295 }
0296
0297 static void build_dfa (detail::node *root_,
0298 const index_set_vector &set_mapping_, const std::size_t dfa_alphabet_,
0299 size_t_vector &dfa_)
0300 {
0301 typename detail::node::node_vector *followpos_ =
0302 &root_->firstpos ();
0303 node_set_vector seen_sets_;
0304 node_vector_vector seen_vectors_;
0305 size_t_vector hash_vector_;
0306
0307
0308 dfa_.resize (dfa_alphabet_, 0);
0309 closure (followpos_, seen_sets_, seen_vectors_,
0310 hash_vector_, dfa_alphabet_, dfa_);
0311
0312 std::size_t *ptr_ = 0;
0313
0314 for (std::size_t index_ = 0; index_ < seen_vectors_->size (); ++index_)
0315 {
0316 equivset_list equiv_list_;
0317
0318 build_equiv_list (seen_vectors_[index_], set_mapping_, equiv_list_);
0319
0320 for (typename equivset_list::list::const_iterator iter_ =
0321 equiv_list_->begin (), end_ = equiv_list_->end ();
0322 iter_ != end_; ++iter_)
0323 {
0324 equivset *equivset_ = *iter_;
0325 const std::size_t transition_ = closure
0326 (&equivset_->_followpos, seen_sets_, seen_vectors_,
0327 hash_vector_, dfa_alphabet_, dfa_);
0328
0329 if (transition_ != npos)
0330 {
0331 ptr_ = &dfa_.front () + ((index_ + 1) * dfa_alphabet_);
0332
0333
0334 if (*ptr_ && !equivset_->_greedy) continue;
0335
0336 for (typename detail::equivset::index_vector::const_iterator
0337 equiv_iter_ = equivset_->_index_vector.begin (),
0338 equiv_end_ = equivset_->_index_vector.end ();
0339 equiv_iter_ != equiv_end_; ++equiv_iter_)
0340 {
0341 const std::size_t equiv_index_ = *equiv_iter_;
0342
0343 if (equiv_index_ == bol_token)
0344 {
0345 if (ptr_[eol_index] == 0)
0346 {
0347 ptr_[bol_index] = transition_;
0348 }
0349 }
0350 else if (equiv_index_ == eol_token)
0351 {
0352 if (ptr_[bol_index] == 0)
0353 {
0354 ptr_[eol_index] = transition_;
0355 }
0356 }
0357 else
0358 {
0359 ptr_[equiv_index_ + dfa_offset] = transition_;
0360 }
0361 }
0362 }
0363 }
0364 }
0365 }
0366
0367 static std::size_t closure (typename detail::node::node_vector *followpos_,
0368 node_set_vector &seen_sets_, node_vector_vector &seen_vectors_,
0369 size_t_vector &hash_vector_, const std::size_t size_,
0370 size_t_vector &dfa_)
0371 {
0372 bool end_state_ = false;
0373 std::size_t id_ = 0;
0374 std::size_t unique_id_ = npos;
0375 std::size_t state_ = 0;
0376 std::size_t hash_ = 0;
0377
0378 if (followpos_->empty ()) return npos;
0379
0380 std::size_t index_ = 0;
0381 boost::movelib::unique_ptr<node_set> set_ptr_ (new node_set);
0382 boost::movelib::unique_ptr<node_vector> vector_ptr_ (new node_vector);
0383
0384 for (typename detail::node::node_vector::const_iterator iter_ =
0385 followpos_->begin (), end_ = followpos_->end ();
0386 iter_ != end_; ++iter_)
0387 {
0388 closure_ex (*iter_, end_state_, id_, unique_id_, state_,
0389 set_ptr_.get (), vector_ptr_.get (), hash_);
0390 }
0391
0392 bool found_ = false;
0393 typename size_t_vector::const_iterator hash_iter_ =
0394 hash_vector_.begin ();
0395 typename size_t_vector::const_iterator hash_end_ =
0396 hash_vector_.end ();
0397 typename node_set_vector::vector::const_iterator set_iter_ =
0398 seen_sets_->begin ();
0399
0400 for (; hash_iter_ != hash_end_; ++hash_iter_, ++set_iter_)
0401 {
0402 found_ = *hash_iter_ == hash_ && *(*set_iter_) == *set_ptr_;
0403 ++index_;
0404
0405 if (found_) break;
0406 }
0407
0408 if (!found_)
0409 {
0410 seen_sets_->push_back (static_cast<node_set *>(0));
0411 seen_sets_->back () = set_ptr_.release ();
0412 seen_vectors_->push_back (static_cast<node_vector *>(0));
0413 seen_vectors_->back () = vector_ptr_.release ();
0414 hash_vector_.push_back (hash_);
0415
0416 index_ = seen_sets_->size ();
0417
0418 const std::size_t old_size_ = dfa_.size ();
0419
0420 dfa_.resize (old_size_ + size_, 0);
0421
0422 if (end_state_)
0423 {
0424 dfa_[old_size_] |= end_state;
0425 dfa_[old_size_ + id_index] = id_;
0426 dfa_[old_size_ + unique_id_index] = unique_id_;
0427 dfa_[old_size_ + state_index] = state_;
0428 }
0429 }
0430
0431 return index_;
0432 }
0433
0434 static void closure_ex (detail::node *node_, bool &end_state_,
0435 std::size_t &id_, std::size_t &unique_id_, std::size_t &state_,
0436 node_set *set_ptr_, node_vector *vector_ptr_, std::size_t &hash_)
0437 {
0438 const bool temp_end_state_ = node_->end_state ();
0439
0440 if (temp_end_state_)
0441 {
0442 if (!end_state_)
0443 {
0444 end_state_ = true;
0445 id_ = node_->id ();
0446 unique_id_ = node_->unique_id ();
0447 state_ = node_->lexer_state ();
0448 }
0449 }
0450
0451 if (set_ptr_->insert (node_).second)
0452 {
0453 vector_ptr_->push_back (node_);
0454 hash_ += reinterpret_cast<std::size_t> (node_);
0455 }
0456 }
0457
0458 static void partition_tokens (const token_map &map_,
0459 charset_list &lhs_)
0460 {
0461 charset_list rhs_;
0462
0463 fill_rhs_list (map_, rhs_);
0464
0465 if (!rhs_->empty ())
0466 {
0467 typename charset_list::list::iterator iter_;
0468 typename charset_list::list::iterator end_;
0469 charset_ptr overlap_ (new charset);
0470
0471 lhs_->push_back (static_cast<charset *>(0));
0472 lhs_->back () = rhs_->front ();
0473 rhs_->pop_front ();
0474
0475 while (!rhs_->empty ())
0476 {
0477 charset_ptr r_ (rhs_->front ());
0478
0479 rhs_->pop_front ();
0480 iter_ = lhs_->begin ();
0481 end_ = lhs_->end ();
0482
0483 while (!r_->empty () && iter_ != end_)
0484 {
0485 typename charset_list::list::iterator l_iter_ = iter_;
0486
0487 (*l_iter_)->intersect (*r_.get (), *overlap_.get ());
0488
0489 if (overlap_->empty ())
0490 {
0491 ++iter_;
0492 }
0493 else if ((*l_iter_)->empty ())
0494 {
0495 delete *l_iter_;
0496 *l_iter_ = overlap_.release ();
0497
0498 overlap_.reset (new charset);
0499 ++iter_;
0500 }
0501 else if (r_->empty ())
0502 {
0503 overlap_.swap (r_);
0504 overlap_.reset (new charset);
0505 break;
0506 }
0507 else
0508 {
0509 iter_ = lhs_->insert (++iter_,
0510 static_cast<charset *>(0));
0511 *iter_ = overlap_.release ();
0512
0513 overlap_.reset(new charset);
0514 ++iter_;
0515 end_ = lhs_->end ();
0516 }
0517 }
0518
0519 if (!r_->empty ())
0520 {
0521 lhs_->push_back (static_cast<charset *>(0));
0522 lhs_->back () = r_.release ();
0523 }
0524 }
0525 }
0526 }
0527
0528 static void fill_rhs_list (const token_map &map_,
0529 charset_list &list_)
0530 {
0531 typename parser::tokeniser::token_map::const_iterator iter_ =
0532 map_.begin ();
0533 typename parser::tokeniser::token_map::const_iterator end_ =
0534 map_.end ();
0535
0536 for (; iter_ != end_; ++iter_)
0537 {
0538 list_->push_back (static_cast<charset *>(0));
0539 list_->back () = new charset (iter_->first, iter_->second);
0540 }
0541 }
0542
0543 static void fill_lookup (const string_token &token_,
0544 size_t_vector *lookup_, const std::size_t index_)
0545 {
0546 const CharT *curr_ = token_._charset.c_str ();
0547 const CharT *chars_end_ = curr_ + token_._charset.size ();
0548 std::size_t *ptr_ = &lookup_->front ();
0549 const std::size_t max_ = sizeof (CharT) == 1 ?
0550 num_chars : num_wchar_ts;
0551
0552 if (token_._negated)
0553 {
0554
0555
0556
0557
0558
0559 CharT curr_char_ = 0;
0560 std::size_t i_ = 0;
0561
0562 while (curr_ < chars_end_)
0563 {
0564 while (*curr_ > curr_char_)
0565 {
0566 ptr_[static_cast<typename Traits::index_type>
0567 (curr_char_)] = index_ + dfa_offset;
0568 ++curr_char_;
0569 ++i_;
0570 }
0571
0572 ++curr_char_;
0573 ++curr_;
0574 ++i_;
0575 }
0576
0577 for (; i_ < max_; ++i_)
0578 {
0579 ptr_[static_cast<typename Traits::index_type>(curr_char_)] =
0580 index_ + dfa_offset;
0581 ++curr_char_;
0582 }
0583 }
0584 else
0585 {
0586 while (curr_ < chars_end_)
0587 {
0588 ptr_[static_cast<typename Traits::index_type>(*curr_)] =
0589 index_ + dfa_offset;
0590 ++curr_;
0591 }
0592 }
0593 }
0594
0595 static void build_equiv_list (const node_vector *vector_,
0596 const index_set_vector &set_mapping_, equivset_list &lhs_)
0597 {
0598 equivset_list rhs_;
0599
0600 fill_rhs_list (vector_, set_mapping_, rhs_);
0601
0602 if (!rhs_->empty ())
0603 {
0604 typename equivset_list::list::iterator iter_;
0605 typename equivset_list::list::iterator end_;
0606 equivset_ptr overlap_ (new equivset);
0607
0608 lhs_->push_back (static_cast<equivset *>(0));
0609 lhs_->back () = rhs_->front ();
0610 rhs_->pop_front ();
0611
0612 while (!rhs_->empty ())
0613 {
0614 equivset_ptr r_ (rhs_->front ());
0615
0616 rhs_->pop_front ();
0617 iter_ = lhs_->begin ();
0618 end_ = lhs_->end ();
0619
0620 while (!r_->empty () && iter_ != end_)
0621 {
0622 typename equivset_list::list::iterator l_iter_ = iter_;
0623
0624 (*l_iter_)->intersect (*r_.get (), *overlap_.get ());
0625
0626 if (overlap_->empty ())
0627 {
0628 ++iter_;
0629 }
0630 else if ((*l_iter_)->empty ())
0631 {
0632 delete *l_iter_;
0633 *l_iter_ = overlap_.release ();
0634
0635 overlap_.reset (new equivset);
0636 ++iter_;
0637 }
0638 else if (r_->empty ())
0639 {
0640 overlap_.swap (r_);
0641 overlap_.reset (new equivset);
0642 break;
0643 }
0644 else
0645 {
0646 iter_ = lhs_->insert (++iter_,
0647 static_cast<equivset *>(0));
0648 *iter_ = overlap_.release ();
0649
0650 overlap_.reset (new equivset);
0651 ++iter_;
0652 end_ = lhs_->end ();
0653 }
0654 }
0655
0656 if (!r_->empty ())
0657 {
0658 lhs_->push_back (static_cast<equivset *>(0));
0659 lhs_->back () = r_.release ();
0660 }
0661 }
0662 }
0663 }
0664
0665 static void fill_rhs_list (const node_vector *vector_,
0666 const index_set_vector &set_mapping_, equivset_list &list_)
0667 {
0668 typename node_vector::const_iterator iter_ =
0669 vector_->begin ();
0670 typename node_vector::const_iterator end_ =
0671 vector_->end ();
0672
0673 for (; iter_ != end_; ++iter_)
0674 {
0675 const detail::node *node_ = *iter_;
0676
0677 if (!node_->end_state ())
0678 {
0679 const std::size_t token_ = node_->token ();
0680
0681 if (token_ != null_token)
0682 {
0683 list_->push_back (static_cast<equivset *>(0));
0684
0685 if (token_ == bol_token || token_ == eol_token)
0686 {
0687 std::set<std::size_t> index_set_;
0688
0689 index_set_.insert (token_);
0690 list_->back () = new equivset (index_set_,
0691 node_->greedy (), token_, node_->followpos ());
0692 }
0693 else
0694 {
0695 list_->back () = new equivset (set_mapping_[token_],
0696 node_->greedy (), token_, node_->followpos ());
0697 }
0698 }
0699 }
0700 }
0701 }
0702
0703 static void fixup_bol (detail::node * &root_,
0704 node_ptr_vector &node_ptr_vector_)
0705 {
0706 typename detail::node::node_vector *first_ = &root_->firstpos ();
0707 bool found_ = false;
0708 typename detail::node::node_vector::const_iterator iter_ =
0709 first_->begin ();
0710 typename detail::node::node_vector::const_iterator end_ =
0711 first_->end ();
0712
0713 for (; iter_ != end_; ++iter_)
0714 {
0715 const detail::node *node_ = *iter_;
0716
0717 found_ = !node_->end_state () && node_->token () == bol_token;
0718
0719 if (found_) break;
0720 }
0721
0722 if (!found_)
0723 {
0724 node_ptr_vector_->push_back (static_cast<detail::leaf_node *>(0));
0725 node_ptr_vector_->back () = new detail::leaf_node
0726 (bol_token, true);
0727
0728 detail::node *lhs_ = node_ptr_vector_->back ();
0729
0730 node_ptr_vector_->push_back (static_cast<detail::leaf_node *>(0));
0731 node_ptr_vector_->back () = new detail::leaf_node
0732 (null_token, true);
0733
0734 detail::node *rhs_ = node_ptr_vector_->back ();
0735
0736 node_ptr_vector_->push_back
0737 (static_cast<detail::selection_node *>(0));
0738 node_ptr_vector_->back () =
0739 new detail::selection_node (lhs_, rhs_);
0740 lhs_ = node_ptr_vector_->back ();
0741
0742 node_ptr_vector_->push_back
0743 (static_cast<detail::sequence_node *>(0));
0744 node_ptr_vector_->back () =
0745 new detail::sequence_node (lhs_, root_);
0746 root_ = node_ptr_vector_->back ();
0747 }
0748 }
0749
0750 static void minimise_dfa (const std::size_t dfa_alphabet_,
0751 size_t_vector &dfa_, std::size_t size_)
0752 {
0753 const std::size_t *first_ = &dfa_.front ();
0754 const std::size_t *second_ = 0;
0755 const std::size_t *end_ = first_ + size_;
0756 std::size_t index_ = 1;
0757 std::size_t new_index_ = 1;
0758 std::size_t curr_index_ = 0;
0759 index_set index_set_;
0760 size_t_vector lookup_;
0761 std::size_t *lookup_ptr_ = 0;
0762
0763 lookup_.resize (size_ / dfa_alphabet_, null_token);
0764 lookup_ptr_ = &lookup_.front ();
0765 *lookup_ptr_ = 0;
0766
0767 first_ += dfa_alphabet_;
0768
0769 for (; first_ < end_; first_ += dfa_alphabet_, ++index_)
0770 {
0771 for (second_ = first_ + dfa_alphabet_, curr_index_ = index_ + 1;
0772 second_ < end_; second_ += dfa_alphabet_, ++curr_index_)
0773 {
0774 if (index_set_.find (curr_index_) != index_set_.end ())
0775 {
0776 continue;
0777 }
0778
0779
0780 using namespace std;
0781
0782 if (memcmp (first_, second_, sizeof (std::size_t) *
0783 dfa_alphabet_) == 0)
0784 {
0785 index_set_.insert (curr_index_);
0786 lookup_ptr_[curr_index_] = new_index_;
0787 }
0788 }
0789
0790 if (lookup_ptr_[index_] == null_token)
0791 {
0792 lookup_ptr_[index_] = new_index_;
0793 ++new_index_;
0794 }
0795 }
0796
0797 if (!index_set_.empty ())
0798 {
0799 const std::size_t *front_ = &dfa_.front ();
0800 size_t_vector new_dfa_ (front_, front_ + dfa_alphabet_);
0801 typename index_set::iterator set_end_ =
0802 index_set_.end ();
0803 const std::size_t *ptr_ = front_ + dfa_alphabet_;
0804 std::size_t *new_ptr_ = 0;
0805
0806 new_dfa_.resize (size_ - index_set_.size () * dfa_alphabet_, 0);
0807 new_ptr_ = &new_dfa_.front () + dfa_alphabet_;
0808 size_ /= dfa_alphabet_;
0809
0810 for (index_ = 1; index_ < size_; ++index_)
0811 {
0812 if (index_set_.find (index_) != set_end_)
0813 {
0814 ptr_ += dfa_alphabet_;
0815 continue;
0816 }
0817
0818 new_ptr_[end_state_index] = ptr_[end_state_index];
0819 new_ptr_[id_index] = ptr_[id_index];
0820 new_ptr_[unique_id_index] = ptr_[unique_id_index];
0821 new_ptr_[state_index] = ptr_[state_index];
0822 new_ptr_[bol_index] = lookup_ptr_[ptr_[bol_index]];
0823 new_ptr_[eol_index] = lookup_ptr_[ptr_[eol_index]];
0824 new_ptr_ += dfa_offset;
0825 ptr_ += dfa_offset;
0826
0827 for (std::size_t i_ = dfa_offset; i_ < dfa_alphabet_; ++i_)
0828 {
0829 *new_ptr_++ = lookup_ptr_[*ptr_++];
0830 }
0831 }
0832
0833 dfa_.swap (new_dfa_);
0834 }
0835 }
0836 };
0837
0838 typedef basic_generator<char> generator;
0839 typedef basic_generator<wchar_t> wgenerator;
0840 }
0841 }
0842
0843 #endif