Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-19 09:47:50

0001 // generator.hpp
0002 // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
0003 //
0004 // Distributed under the Boost Software License, Version 1.0. (See accompanying
0005 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0006 #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_GENERATOR_HPP
0007 #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_GENERATOR_HPP
0008 
0009 #include "char_traits.hpp"
0010 // memcmp()
0011 #include <cstring>
0012 #include "partition/charset.hpp"
0013 #include "partition/equivset.hpp"
0014 #include <memory>
0015 #include <limits>
0016 #include "parser/tree/node.hpp"
0017 #include "parser/parser.hpp"
0018 #include "containers/ptr_list.hpp"
0019 #include <boost/move/unique_ptr.hpp>
0020 #include "rules.hpp"
0021 #include "state_machine.hpp"
0022 
0023 namespace boost
0024 {
0025 namespace lexer
0026 {
0027 template<typename CharT, typename Traits = char_traits<CharT> >
0028 class basic_generator
0029 {
0030 public:
0031     typedef typename detail::internals::size_t_vector size_t_vector;
0032     typedef basic_rules<CharT> rules;
0033 
0034     static void build (const rules &rules_,
0035         basic_state_machine<CharT> &state_machine_)
0036     {
0037         std::size_t index_ = 0;
0038         std::size_t size_ = rules_.statemap ().size ();
0039         node_ptr_vector node_ptr_vector_;
0040         detail::internals &internals_ = const_cast<detail::internals &>
0041             (state_machine_.data ());
0042         bool seen_BOL_assertion_ = false;
0043         bool seen_EOL_assertion_ = false;
0044 
0045         state_machine_.clear ();
0046 
0047         for (; index_ < size_; ++index_)
0048         {
0049             internals_._lookup->push_back (static_cast<size_t_vector *>(0));
0050             internals_._lookup->back () = new size_t_vector;
0051             internals_._dfa_alphabet.push_back (0);
0052             internals_._dfa->push_back (static_cast<size_t_vector *>(0));
0053             internals_._dfa->back () = new size_t_vector;
0054         }
0055 
0056         for (index_ = 0, size_ = internals_._lookup->size ();
0057             index_ < size_; ++index_)
0058         {
0059             internals_._lookup[index_]->resize (sizeof (CharT) == 1 ?
0060                 num_chars : num_wchar_ts, dead_state_index);
0061 
0062             if (!rules_.regexes ()[index_].empty ())
0063             {
0064                 // vector mapping token indexes to partitioned token index sets
0065                 index_set_vector set_mapping_;
0066                 // syntax tree
0067                 detail::node *root_ = build_tree (rules_, index_,
0068                     node_ptr_vector_, internals_, set_mapping_);
0069 
0070                 build_dfa (root_, set_mapping_,
0071                     internals_._dfa_alphabet[index_],
0072                     *internals_._dfa[index_]);
0073 
0074                 if (internals_._seen_BOL_assertion)
0075                 {
0076                     seen_BOL_assertion_ = true;
0077                 }
0078 
0079                 if (internals_._seen_EOL_assertion)
0080                 {
0081                     seen_EOL_assertion_ = true;
0082                 }
0083 
0084                 internals_._seen_BOL_assertion = false;
0085                 internals_._seen_EOL_assertion = false;
0086             }
0087         }
0088 
0089         internals_._seen_BOL_assertion = seen_BOL_assertion_;
0090         internals_._seen_EOL_assertion = seen_EOL_assertion_;
0091     }
0092 
0093     static void minimise (basic_state_machine<CharT> &state_machine_)
0094     {
0095         detail::internals &internals_ = const_cast<detail::internals &>
0096             (state_machine_.data ());
0097         const std::size_t machines_ = internals_._dfa->size ();
0098 
0099         for (std::size_t i_ = 0; i_ < machines_; ++i_)
0100         {
0101             const std::size_t dfa_alphabet_ = internals_._dfa_alphabet[i_];
0102             size_t_vector *dfa_ = internals_._dfa[i_];
0103 
0104             if (dfa_alphabet_ != 0)
0105             {
0106                 std::size_t size_ = 0;
0107 
0108                 do
0109                 {
0110                     size_ = dfa_->size ();
0111                     minimise_dfa (dfa_alphabet_, *dfa_, size_);
0112                 } while (dfa_->size () != size_);
0113             }
0114         }
0115     }
0116 
0117 protected:
0118     typedef detail::basic_charset<CharT> charset;
0119     typedef detail::ptr_list<charset> charset_list;
0120     typedef boost::movelib::unique_ptr<charset> charset_ptr;
0121     typedef detail::equivset equivset;
0122     typedef detail::ptr_list<equivset> equivset_list;
0123     typedef boost::movelib::unique_ptr<equivset> equivset_ptr;
0124     typedef typename charset::index_set index_set;
0125     typedef std::vector<index_set> index_set_vector;
0126     typedef detail::basic_parser<CharT> parser;
0127     typedef typename parser::node_ptr_vector node_ptr_vector;
0128     typedef std::set<const detail::node *> node_set;
0129     typedef detail::ptr_vector<node_set> node_set_vector;
0130     typedef std::vector<const detail::node *> node_vector;
0131     typedef detail::ptr_vector<node_vector> node_vector_vector;
0132     typedef typename parser::string string;
0133     typedef std::pair<string, string> string_pair;
0134     typedef typename parser::tokeniser::string_token string_token;
0135     typedef std::deque<string_pair> macro_deque;
0136     typedef std::pair<string, const detail::node *> macro_pair;
0137     typedef typename parser::macro_map::iterator macro_iter;
0138     typedef std::pair<macro_iter, bool> macro_iter_pair;
0139     typedef typename parser::tokeniser::token_map token_map;
0140 
0141     static detail::node *build_tree (const rules &rules_,
0142         const std::size_t state_, node_ptr_vector &node_ptr_vector_,
0143         detail::internals &internals_, index_set_vector &set_mapping_)
0144     {
0145         size_t_vector *lookup_ = internals_._lookup[state_];
0146         const typename rules::string_deque_deque &regexes_ =
0147             rules_.regexes ();
0148         const typename rules::id_vector_deque &ids_ = rules_.ids ();
0149         const typename rules::id_vector_deque &unique_ids_ =
0150             rules_.unique_ids ();
0151         const typename rules::id_vector_deque &states_ = rules_.states ();
0152         typename rules::string_deque::const_iterator regex_iter_ =
0153             regexes_[state_].begin ();
0154         typename rules::string_deque::const_iterator regex_iter_end_ =
0155             regexes_[state_].end ();
0156         typename rules::id_vector::const_iterator ids_iter_ =
0157             ids_[state_].begin ();
0158         typename rules::id_vector::const_iterator unique_ids_iter_ =
0159             unique_ids_[state_].begin ();
0160         typename rules::id_vector::const_iterator states_iter_ =
0161             states_[state_].begin ();
0162         const typename rules::string &regex_ = *regex_iter_;
0163         // map of regex charset tokens (strings) to index
0164         token_map token_map_;
0165         const typename rules::string_pair_deque &macrodeque_ =
0166             rules_.macrodeque ();
0167         typename parser::macro_map macromap_;
0168         typename detail::node::node_vector tree_vector_;
0169 
0170         build_macros (token_map_, macrodeque_, macromap_,
0171             rules_.flags (), rules_.locale (), node_ptr_vector_,
0172             internals_._seen_BOL_assertion, internals_._seen_EOL_assertion);
0173 
0174         detail::node *root_ = parser::parse (regex_.c_str (),
0175             regex_.c_str () + regex_.size (), *ids_iter_, *unique_ids_iter_,
0176             *states_iter_, rules_.flags (), rules_.locale (), node_ptr_vector_,
0177             macromap_, token_map_, internals_._seen_BOL_assertion,
0178             internals_._seen_EOL_assertion);
0179 
0180         ++regex_iter_;
0181         ++ids_iter_;
0182         ++unique_ids_iter_;
0183         ++states_iter_;
0184         tree_vector_.push_back (root_);
0185 
0186         // build syntax trees
0187         while (regex_iter_ != regex_iter_end_)
0188         {
0189             // re-declare var, otherwise we perform an assignment..!
0190             const typename rules::string &regex2_ = *regex_iter_;
0191 
0192             root_ = parser::parse (regex2_.c_str (),
0193                 regex2_.c_str () + regex2_.size (), *ids_iter_,
0194                 *unique_ids_iter_, *states_iter_, rules_.flags (),
0195                 rules_.locale (), node_ptr_vector_, macromap_, token_map_,
0196                 internals_._seen_BOL_assertion,
0197                 internals_._seen_EOL_assertion);
0198             tree_vector_.push_back (root_);
0199             ++regex_iter_;
0200             ++ids_iter_;
0201             ++unique_ids_iter_;
0202             ++states_iter_;
0203         }
0204 
0205         if (internals_._seen_BOL_assertion)
0206         {
0207             // Fixup BOLs
0208             typename detail::node::node_vector::iterator iter_ =
0209                 tree_vector_.begin ();
0210             typename detail::node::node_vector::iterator end_ =
0211                 tree_vector_.end ();
0212 
0213             for (; iter_ != end_; ++iter_)
0214             {
0215                 fixup_bol (*iter_, node_ptr_vector_);
0216             }
0217         }
0218 
0219         // join trees
0220         {
0221             typename detail::node::node_vector::iterator iter_ =
0222                 tree_vector_.begin ();
0223             typename detail::node::node_vector::iterator end_ =
0224                 tree_vector_.end ();
0225 
0226             if (iter_ != end_)
0227             {
0228                 root_ = *iter_;
0229                 ++iter_;
0230             }
0231 
0232             for (; iter_ != end_; ++iter_)
0233             {
0234                 node_ptr_vector_->push_back (static_cast<detail::selection_node *>(0));
0235                 node_ptr_vector_->back () = new detail::selection_node
0236                     (root_, *iter_);
0237                 root_ = node_ptr_vector_->back ();
0238             }
0239         }
0240 
0241         // partitioned token list
0242         charset_list token_list_;
0243 
0244         set_mapping_.resize (token_map_.size ());
0245         partition_tokens (token_map_, token_list_);
0246 
0247         typename charset_list::list::const_iterator iter_ =
0248             token_list_->begin ();
0249         typename charset_list::list::const_iterator end_ =
0250             token_list_->end ();
0251         std::size_t index_ = 0;
0252 
0253         for (; iter_ != end_; ++iter_, ++index_)
0254         {
0255             const charset *cs_ = *iter_;
0256             typename charset::index_set::const_iterator set_iter_ =
0257                 cs_->_index_set.begin ();
0258             typename charset::index_set::const_iterator set_end_ =
0259                 cs_->_index_set.end ();
0260 
0261             fill_lookup (cs_->_token, lookup_, index_);
0262 
0263             for (; set_iter_ != set_end_; ++set_iter_)
0264             {
0265                 set_mapping_[*set_iter_].insert (index_);
0266             }
0267         }
0268 
0269         internals_._dfa_alphabet[state_] = token_list_->size () + dfa_offset;
0270         return root_;
0271     }
0272 
0273     static void build_macros (token_map &token_map_,
0274         const macro_deque &macrodeque_,
0275         typename parser::macro_map &macromap_, const regex_flags flags_,
0276         const std::locale &locale_, node_ptr_vector &node_ptr_vector_,
0277         bool &seen_BOL_assertion_, bool &seen_EOL_assertion_)
0278     {
0279         for (typename macro_deque::const_iterator iter_ =
0280             macrodeque_.begin (), end_ = macrodeque_.end ();
0281             iter_ != end_; ++iter_)
0282         {
0283             const typename rules::string &name_ = iter_->first;
0284             const typename rules::string &regex_ = iter_->second;
0285             detail::node *node_ = parser::parse (regex_.c_str (),
0286                 regex_.c_str () + regex_.size (), 0, 0, 0, flags_,
0287                 locale_, node_ptr_vector_, macromap_, token_map_,
0288                 seen_BOL_assertion_, seen_EOL_assertion_);
0289             macro_iter_pair map_iter_ = macromap_.
0290                 insert (macro_pair (name_, static_cast<const detail::node *>
0291                 (0)));
0292 
0293             map_iter_.first->second = node_;
0294         }
0295     }
0296 
0297     static void build_dfa (detail::node *root_,
0298         const index_set_vector &set_mapping_, const std::size_t dfa_alphabet_,
0299         size_t_vector &dfa_)
0300     {
0301         typename detail::node::node_vector *followpos_ =
0302             &root_->firstpos ();
0303         node_set_vector seen_sets_;
0304         node_vector_vector seen_vectors_;
0305         size_t_vector hash_vector_;
0306 
0307         // 'jam' state
0308         dfa_.resize (dfa_alphabet_, 0);
0309         closure (followpos_, seen_sets_, seen_vectors_,
0310             hash_vector_, dfa_alphabet_, dfa_);
0311 
0312         std::size_t *ptr_ = 0;
0313 
0314         for (std::size_t index_ = 0; index_ < seen_vectors_->size (); ++index_)
0315         {
0316             equivset_list equiv_list_;
0317 
0318             build_equiv_list (seen_vectors_[index_], set_mapping_, equiv_list_);
0319 
0320             for (typename equivset_list::list::const_iterator iter_ =
0321                 equiv_list_->begin (), end_ = equiv_list_->end ();
0322                 iter_ != end_; ++iter_)
0323             {
0324                 equivset *equivset_ = *iter_;
0325                 const std::size_t transition_ = closure
0326                     (&equivset_->_followpos, seen_sets_, seen_vectors_,
0327                     hash_vector_, dfa_alphabet_, dfa_);
0328 
0329                 if (transition_ != npos)
0330                 {
0331                     ptr_ = &dfa_.front () + ((index_ + 1) * dfa_alphabet_);
0332 
0333                     // Prune abstemious transitions from end states.
0334                     if (*ptr_ && !equivset_->_greedy) continue;
0335 
0336                     for (typename detail::equivset::index_vector::const_iterator
0337                         equiv_iter_ = equivset_->_index_vector.begin (),
0338                         equiv_end_ = equivset_->_index_vector.end ();
0339                         equiv_iter_ != equiv_end_; ++equiv_iter_)
0340                     {
0341                         const std::size_t equiv_index_ = *equiv_iter_;
0342 
0343                         if (equiv_index_ == bol_token)
0344                         {
0345                             if (ptr_[eol_index] == 0)
0346                             {
0347                                 ptr_[bol_index] = transition_;
0348                             }
0349                         }
0350                         else if (equiv_index_ == eol_token)
0351                         {
0352                             if (ptr_[bol_index] == 0)
0353                             {
0354                                 ptr_[eol_index] = transition_;
0355                             }
0356                         }
0357                         else
0358                         {
0359                             ptr_[equiv_index_ + dfa_offset] = transition_;
0360                         }
0361                     }
0362                 }
0363             }
0364         }
0365     }
0366 
0367     static std::size_t closure (typename detail::node::node_vector *followpos_,
0368         node_set_vector &seen_sets_, node_vector_vector &seen_vectors_,
0369         size_t_vector &hash_vector_, const std::size_t size_,
0370         size_t_vector &dfa_)
0371     {
0372         bool end_state_ = false;
0373         std::size_t id_ = 0;
0374         std::size_t unique_id_ = npos;
0375         std::size_t state_ = 0;
0376         std::size_t hash_ = 0;
0377 
0378         if (followpos_->empty ()) return npos;
0379 
0380         std::size_t index_ = 0;
0381         boost::movelib::unique_ptr<node_set> set_ptr_ (new node_set);
0382         boost::movelib::unique_ptr<node_vector> vector_ptr_ (new node_vector);
0383 
0384         for (typename detail::node::node_vector::const_iterator iter_ =
0385             followpos_->begin (), end_ = followpos_->end ();
0386             iter_ != end_; ++iter_)
0387         {
0388             closure_ex (*iter_, end_state_, id_, unique_id_, state_,
0389                 set_ptr_.get (), vector_ptr_.get (), hash_);
0390         }
0391 
0392         bool found_ = false;
0393         typename size_t_vector::const_iterator hash_iter_ =
0394             hash_vector_.begin ();
0395         typename size_t_vector::const_iterator hash_end_ =
0396             hash_vector_.end ();
0397         typename node_set_vector::vector::const_iterator set_iter_ =
0398             seen_sets_->begin ();
0399 
0400         for (; hash_iter_ != hash_end_; ++hash_iter_, ++set_iter_)
0401         {
0402             found_ = *hash_iter_ == hash_ && *(*set_iter_) == *set_ptr_;
0403             ++index_;
0404 
0405             if (found_) break;
0406         }
0407 
0408         if (!found_)
0409         {
0410             seen_sets_->push_back (static_cast<node_set *>(0));
0411             seen_sets_->back () = set_ptr_.release ();
0412             seen_vectors_->push_back (static_cast<node_vector *>(0));
0413             seen_vectors_->back () = vector_ptr_.release ();
0414             hash_vector_.push_back (hash_);
0415             // State 0 is the jam state...
0416             index_ = seen_sets_->size ();
0417 
0418             const std::size_t old_size_ = dfa_.size ();
0419 
0420             dfa_.resize (old_size_ + size_, 0);
0421 
0422             if (end_state_)
0423             {
0424                 dfa_[old_size_] |= end_state;
0425                 dfa_[old_size_ + id_index] = id_;
0426                 dfa_[old_size_ + unique_id_index] = unique_id_;
0427                 dfa_[old_size_ + state_index] = state_;
0428             }
0429         }
0430 
0431         return index_;
0432     }
0433 
0434     static void closure_ex (detail::node *node_, bool &end_state_,
0435         std::size_t &id_, std::size_t &unique_id_, std::size_t &state_,
0436         node_set *set_ptr_, node_vector *vector_ptr_, std::size_t &hash_)
0437     {
0438         const bool temp_end_state_ = node_->end_state ();
0439 
0440         if (temp_end_state_)
0441         {
0442             if (!end_state_)
0443             {
0444                 end_state_ = true;
0445                 id_ = node_->id ();
0446                 unique_id_ = node_->unique_id ();
0447                 state_ = node_->lexer_state ();
0448             }
0449         }
0450 
0451         if (set_ptr_->insert (node_).second)
0452         {
0453             vector_ptr_->push_back (node_);
0454             hash_ += reinterpret_cast<std::size_t> (node_);
0455         }
0456     }
0457 
0458     static void partition_tokens (const token_map &map_,
0459         charset_list &lhs_)
0460     {
0461         charset_list rhs_;
0462 
0463         fill_rhs_list (map_, rhs_);
0464 
0465         if (!rhs_->empty ())
0466         {
0467             typename charset_list::list::iterator iter_;
0468             typename charset_list::list::iterator end_;
0469             charset_ptr overlap_ (new charset);
0470 
0471             lhs_->push_back (static_cast<charset *>(0));
0472             lhs_->back () = rhs_->front ();
0473             rhs_->pop_front ();
0474 
0475             while (!rhs_->empty ())
0476             {
0477                 charset_ptr r_ (rhs_->front ());
0478 
0479                 rhs_->pop_front ();
0480                 iter_ = lhs_->begin ();
0481                 end_ = lhs_->end ();
0482 
0483                 while (!r_->empty () && iter_ != end_)
0484                 {
0485                     typename charset_list::list::iterator l_iter_ = iter_;
0486 
0487                     (*l_iter_)->intersect (*r_.get (), *overlap_.get ());
0488 
0489                     if (overlap_->empty ())
0490                     {
0491                         ++iter_;
0492                     }
0493                     else if ((*l_iter_)->empty ())
0494                     {
0495                         delete *l_iter_;
0496                         *l_iter_ = overlap_.release ();
0497 
0498                         overlap_.reset (new charset);
0499                         ++iter_;
0500                     }
0501                     else if (r_->empty ())
0502                     {
0503                         overlap_.swap (r_);
0504                         overlap_.reset (new charset);
0505                         break;
0506                     }
0507                     else
0508                     {
0509                         iter_ = lhs_->insert (++iter_,
0510                             static_cast<charset *>(0));
0511                         *iter_ = overlap_.release ();
0512 
0513                         overlap_.reset(new charset);
0514                         ++iter_;
0515                         end_ = lhs_->end ();
0516                     }
0517                 }
0518 
0519                 if (!r_->empty ())
0520                 {
0521                     lhs_->push_back (static_cast<charset *>(0));
0522                     lhs_->back () = r_.release ();
0523                 }
0524             }
0525         }
0526     }
0527 
0528     static void fill_rhs_list (const token_map &map_,
0529         charset_list &list_)
0530     {
0531         typename parser::tokeniser::token_map::const_iterator iter_ =
0532             map_.begin ();
0533         typename parser::tokeniser::token_map::const_iterator end_ =
0534             map_.end ();
0535 
0536         for (; iter_ != end_; ++iter_)
0537         {
0538             list_->push_back (static_cast<charset *>(0));
0539             list_->back () = new charset (iter_->first, iter_->second);
0540         }
0541     }
0542 
0543     static void fill_lookup (const string_token &token_,
0544         size_t_vector *lookup_, const std::size_t index_)
0545     {
0546         const CharT *curr_ = token_._charset.c_str ();
0547         const CharT *chars_end_ = curr_ + token_._charset.size ();
0548         std::size_t *ptr_ = &lookup_->front ();
0549         const std::size_t max_ = sizeof (CharT) == 1 ?
0550             num_chars : num_wchar_ts;
0551 
0552         if (token_._negated)
0553         {
0554             // $$$ FIXME JDG July 2014 $$$
0555             // this code is problematic on platforms where wchar_t is signed
0556             // with min generating negative numbers. This crashes with BAD_ACCESS
0557             // because of the vector index below:
0558             //  ptr_[static_cast<typename Traits::index_type>(curr_char_)]
0559             CharT curr_char_ = 0; // (std::numeric_limits<CharT>::min)();
0560             std::size_t i_ = 0;
0561 
0562             while (curr_ < chars_end_)
0563             {
0564                 while (*curr_ > curr_char_)
0565                 {
0566                     ptr_[static_cast<typename Traits::index_type>
0567                         (curr_char_)] = index_ + dfa_offset;
0568                     ++curr_char_;
0569                     ++i_;
0570                 }
0571 
0572                 ++curr_char_;
0573                 ++curr_;
0574                 ++i_;
0575             }
0576 
0577             for (; i_ < max_; ++i_)
0578             {
0579                 ptr_[static_cast<typename Traits::index_type>(curr_char_)] =
0580                     index_ + dfa_offset;
0581                 ++curr_char_;
0582             }
0583         }
0584         else
0585         {
0586             while (curr_ < chars_end_)
0587             {
0588                 ptr_[static_cast<typename Traits::index_type>(*curr_)] =
0589                     index_ + dfa_offset;
0590                 ++curr_;
0591             }
0592         }
0593     }
0594 
0595     static void build_equiv_list (const node_vector *vector_,
0596         const index_set_vector &set_mapping_, equivset_list &lhs_)
0597     {
0598         equivset_list rhs_;
0599 
0600         fill_rhs_list (vector_, set_mapping_, rhs_);
0601 
0602         if (!rhs_->empty ())
0603         {
0604             typename equivset_list::list::iterator iter_;
0605             typename equivset_list::list::iterator end_;
0606             equivset_ptr overlap_ (new equivset);
0607 
0608             lhs_->push_back (static_cast<equivset *>(0));
0609             lhs_->back () = rhs_->front ();
0610             rhs_->pop_front ();
0611 
0612             while (!rhs_->empty ())
0613             {
0614                 equivset_ptr r_ (rhs_->front ());
0615 
0616                 rhs_->pop_front ();
0617                 iter_ = lhs_->begin ();
0618                 end_ = lhs_->end ();
0619 
0620                 while (!r_->empty () && iter_ != end_)
0621                 {
0622                     typename equivset_list::list::iterator l_iter_ = iter_;
0623 
0624                     (*l_iter_)->intersect (*r_.get (), *overlap_.get ());
0625 
0626                     if (overlap_->empty ())
0627                     {
0628                         ++iter_;
0629                     }
0630                     else if ((*l_iter_)->empty ())
0631                     {
0632                         delete *l_iter_;
0633                         *l_iter_ = overlap_.release ();
0634 
0635                         overlap_.reset (new equivset);
0636                         ++iter_;
0637                     }
0638                     else if (r_->empty ())
0639                     {
0640                         overlap_.swap (r_);
0641                         overlap_.reset (new equivset);
0642                         break;
0643                     }
0644                     else
0645                     {
0646                         iter_ = lhs_->insert (++iter_,
0647                             static_cast<equivset *>(0));
0648                         *iter_ = overlap_.release ();
0649 
0650                         overlap_.reset (new equivset);
0651                         ++iter_;
0652                         end_ = lhs_->end ();
0653                     }
0654                 }
0655 
0656                 if (!r_->empty ())
0657                 {
0658                     lhs_->push_back (static_cast<equivset *>(0));
0659                     lhs_->back () = r_.release ();
0660                 }
0661             }
0662         }
0663     }
0664 
0665     static void fill_rhs_list (const node_vector *vector_,
0666         const index_set_vector &set_mapping_, equivset_list &list_)
0667     {
0668         typename node_vector::const_iterator iter_ =
0669             vector_->begin ();
0670         typename node_vector::const_iterator end_ =
0671             vector_->end ();
0672 
0673         for (; iter_ != end_; ++iter_)
0674         {
0675             const detail::node *node_ = *iter_;
0676 
0677             if (!node_->end_state ())
0678             {
0679                 const std::size_t token_ = node_->token ();
0680 
0681                 if (token_ != null_token)
0682                 {
0683                     list_->push_back (static_cast<equivset *>(0));
0684 
0685                     if (token_ == bol_token || token_ == eol_token)
0686                     {
0687                         std::set<std::size_t> index_set_;
0688 
0689                         index_set_.insert (token_);
0690                         list_->back () = new equivset (index_set_,
0691                             node_->greedy (), token_, node_->followpos ());
0692                     }
0693                     else
0694                     {
0695                         list_->back () = new equivset (set_mapping_[token_],
0696                             node_->greedy (), token_, node_->followpos ());
0697                     }
0698                 }
0699             }
0700         }
0701     }
0702 
0703     static void fixup_bol (detail::node * &root_,
0704         node_ptr_vector &node_ptr_vector_)
0705     {
0706         typename detail::node::node_vector *first_ = &root_->firstpos ();
0707         bool found_ = false;
0708         typename detail::node::node_vector::const_iterator iter_ =
0709             first_->begin ();
0710         typename detail::node::node_vector::const_iterator end_ =
0711             first_->end ();
0712 
0713         for (; iter_ != end_; ++iter_)
0714         {
0715             const detail::node *node_ = *iter_;
0716 
0717             found_ = !node_->end_state () && node_->token () == bol_token;
0718 
0719             if (found_) break;
0720         }
0721 
0722         if (!found_)
0723         {
0724             node_ptr_vector_->push_back (static_cast<detail::leaf_node *>(0));
0725             node_ptr_vector_->back () = new detail::leaf_node
0726                 (bol_token, true);
0727 
0728             detail::node *lhs_ = node_ptr_vector_->back ();
0729 
0730             node_ptr_vector_->push_back (static_cast<detail::leaf_node *>(0));
0731             node_ptr_vector_->back () = new detail::leaf_node
0732                 (null_token, true);
0733 
0734             detail::node *rhs_ = node_ptr_vector_->back ();
0735 
0736             node_ptr_vector_->push_back
0737                 (static_cast<detail::selection_node *>(0));
0738             node_ptr_vector_->back () =
0739                 new detail::selection_node (lhs_, rhs_);
0740             lhs_ = node_ptr_vector_->back ();
0741 
0742             node_ptr_vector_->push_back
0743                 (static_cast<detail::sequence_node *>(0));
0744             node_ptr_vector_->back () =
0745                 new detail::sequence_node (lhs_, root_);
0746             root_ = node_ptr_vector_->back ();
0747         }
0748     }
0749 
0750     static void minimise_dfa (const std::size_t dfa_alphabet_,
0751         size_t_vector &dfa_, std::size_t size_)
0752     {
0753         const std::size_t *first_ = &dfa_.front ();
0754         const std::size_t *second_ = 0;
0755         const std::size_t *end_ = first_ + size_;
0756         std::size_t index_ = 1;
0757         std::size_t new_index_ = 1;
0758         std::size_t curr_index_ = 0;
0759         index_set index_set_;
0760         size_t_vector lookup_;
0761         std::size_t *lookup_ptr_ = 0;
0762 
0763         lookup_.resize (size_ / dfa_alphabet_, null_token);
0764         lookup_ptr_ = &lookup_.front ();
0765         *lookup_ptr_ = 0;
0766         // Only one 'jam' state, so skip it.
0767         first_ += dfa_alphabet_;
0768 
0769         for (; first_ < end_; first_ += dfa_alphabet_, ++index_)
0770         {
0771             for (second_ = first_ + dfa_alphabet_, curr_index_ = index_ + 1;
0772                 second_ < end_; second_ += dfa_alphabet_, ++curr_index_)
0773             {
0774                 if (index_set_.find (curr_index_) != index_set_.end ())
0775                 {
0776                     continue;
0777                 }
0778 
0779                 // Some systems have memcmp in namespace std.
0780                 using namespace std;
0781 
0782                 if (memcmp (first_, second_, sizeof (std::size_t) *
0783                     dfa_alphabet_) == 0)
0784                 {
0785                     index_set_.insert (curr_index_);
0786                     lookup_ptr_[curr_index_] = new_index_;
0787                 }
0788             }
0789 
0790             if (lookup_ptr_[index_] == null_token)
0791             {
0792                 lookup_ptr_[index_] = new_index_;
0793                 ++new_index_;
0794             }
0795         }
0796 
0797         if (!index_set_.empty ())
0798         {
0799             const std::size_t *front_ = &dfa_.front ();
0800             size_t_vector new_dfa_ (front_, front_ + dfa_alphabet_);
0801             typename index_set::iterator set_end_ =
0802                 index_set_.end ();
0803             const std::size_t *ptr_ = front_ + dfa_alphabet_;
0804             std::size_t *new_ptr_ = 0;
0805 
0806             new_dfa_.resize (size_ - index_set_.size () * dfa_alphabet_, 0);
0807             new_ptr_ = &new_dfa_.front () + dfa_alphabet_;
0808             size_ /= dfa_alphabet_;
0809 
0810             for (index_ = 1; index_ < size_; ++index_)
0811             {
0812                 if (index_set_.find (index_) != set_end_)
0813                 {
0814                     ptr_ += dfa_alphabet_;
0815                     continue;
0816                 }
0817 
0818                 new_ptr_[end_state_index] = ptr_[end_state_index];
0819                 new_ptr_[id_index] = ptr_[id_index];
0820                 new_ptr_[unique_id_index] = ptr_[unique_id_index];
0821                 new_ptr_[state_index] = ptr_[state_index];
0822                 new_ptr_[bol_index] = lookup_ptr_[ptr_[bol_index]];
0823                 new_ptr_[eol_index] = lookup_ptr_[ptr_[eol_index]];
0824                 new_ptr_ += dfa_offset;
0825                 ptr_ += dfa_offset;
0826 
0827                 for (std::size_t i_ = dfa_offset; i_ < dfa_alphabet_; ++i_)
0828                 {
0829                     *new_ptr_++ = lookup_ptr_[*ptr_++];
0830                 }
0831             }
0832 
0833             dfa_.swap (new_dfa_);
0834         }
0835     }
0836 };
0837 
0838 typedef basic_generator<char> generator;
0839 typedef basic_generator<wchar_t> wgenerator;
0840 }
0841 }
0842 
0843 #endif