Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-19 09:47:50

0001 // state_machine.hpp
0002 // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
0003 //
0004 // Distributed under the Boost Software License, Version 1.0. (See accompanying
0005 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0006 #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_STATE_MACHINE_HPP
0007 #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_STATE_MACHINE_HPP
0008 
0009 #include <algorithm>
0010 #include "conversion/char_state_machine.hpp"
0011 #include "consts.hpp"
0012 #include <deque>
0013 #include "internals.hpp"
0014 #include <map>
0015 #include "containers/ptr_vector.hpp"
0016 #include "size_t.hpp"
0017 #include <string>
0018 
0019 namespace boost
0020 {
0021 namespace lexer
0022 {
0023 template<typename CharT>
0024 class basic_state_machine
0025 {
0026 public:
0027     typedef CharT char_type;
0028 
0029     class iterator
0030     {
0031     public:
0032         friend class basic_state_machine;
0033 
0034         struct data
0035         {
0036             // Current iterator info
0037             std::size_t dfa;
0038             std::size_t states;
0039             std::size_t state;
0040             std::size_t transitions;
0041             std::size_t transition;
0042 
0043             // Current state info
0044             bool end_state;
0045             std::size_t id;
0046             std::size_t unique_id;
0047             std::size_t goto_dfa;
0048             std::size_t bol_index;
0049             std::size_t eol_index;
0050 
0051             // Current transition info
0052             basic_string_token<CharT> token;
0053             std::size_t goto_state;
0054 
0055             data () :
0056                 dfa (npos),
0057                 states (0),
0058                 state (npos),
0059                 transitions (0),
0060                 transition (npos),
0061                 end_state (false),
0062                 id (npos),
0063                 unique_id (npos),
0064                 goto_dfa (npos),
0065                 bol_index (npos),
0066                 eol_index (npos),
0067                 goto_state (npos)
0068             {
0069             }
0070 
0071             bool operator == (const data &rhs_) const
0072             {
0073                 return dfa == rhs_.dfa &&
0074                     states == rhs_.states &&
0075                     state == rhs_.state &&
0076                     transitions == rhs_.transitions &&
0077                     transition == rhs_.transition &&
0078                     end_state == rhs_.end_state &&
0079                     id == rhs_.id &&
0080                     unique_id == rhs_.unique_id &&
0081                     goto_dfa == rhs_.goto_dfa &&
0082                     bol_index == rhs_.bol_index &&
0083                     eol_index == rhs_.eol_index &&
0084                     token == rhs_.token &&
0085                     goto_state == rhs_.goto_state;
0086             }
0087         };
0088 
0089         iterator () :
0090             _sm (0),
0091             _dfas (0),
0092             _dfa (npos),
0093             _states (0),
0094             _state (npos),
0095             _transitions (0),
0096             _transition (npos)
0097         {
0098         }
0099 
0100         bool operator == (const iterator &rhs_) const
0101         {
0102             return _dfas == rhs_._dfas && _dfa == rhs_._dfa &&
0103                 _states == rhs_._states && _state == rhs_._state &&
0104                 _transitions == rhs_._transitions &&
0105                 _transition == rhs_._transition;
0106         }
0107 
0108         bool operator != (const iterator &rhs_) const
0109         {
0110             return !(*this == rhs_);
0111         }
0112 
0113         data &operator * ()
0114         {
0115             return _data;
0116         }
0117 
0118         data *operator -> ()
0119         {
0120             return &_data;
0121         }
0122 
0123         // Let compiler generate operator = ().
0124 
0125         // prefix version
0126         iterator &operator ++ ()
0127         {
0128             next ();
0129             return *this;
0130         }
0131 
0132         // postfix version
0133         iterator operator ++ (int)
0134         {
0135             iterator iter_ = *this;
0136 
0137             next ();
0138             return iter_;
0139         }
0140 
0141         void clear ()
0142         {
0143             _dfas = _states = _transitions = 0;
0144             _dfa = _state = _transition = npos;
0145         }
0146 
0147     private:
0148         basic_state_machine *_sm;
0149         data _data;
0150         std::size_t _dfas;
0151         std::size_t _dfa;
0152         std::size_t _states;
0153         std::size_t _state;
0154         std::size_t _transitions;
0155         std::size_t _transition;
0156         typename detail::basic_char_state_machine<CharT>::state::
0157             size_t_string_token_map::const_iterator _token_iter;
0158         typename detail::basic_char_state_machine<CharT>::state::
0159             size_t_string_token_map::const_iterator _token_end;
0160 
0161         void next ()
0162         {
0163             bool reset_state_ = false;
0164 
0165             if (_transition >= _transitions)
0166             {
0167                 _transition = _data.transition = 0;
0168                 _data.state = ++_state;
0169                 reset_state_ = true;
0170 
0171                 if (_state >= _states)
0172                 {
0173                     ++_dfa;
0174 
0175                     if (_dfa >= _dfas)
0176                     {
0177                         clear ();
0178                         reset_state_ = false;
0179                     }
0180                     else
0181                     {
0182                         _states = _data.states =
0183                             _sm->_csm._sm_vector[_dfa].size ();
0184                         _state = _data.state = 0;
0185                     }
0186                 }
0187             }
0188             else
0189             {
0190                 _data.transition = _transition;
0191             }
0192 
0193             if (reset_state_)
0194             {
0195                 const typename detail::basic_char_state_machine<CharT>::
0196                     state *ptr_ = &_sm->_csm._sm_vector[_dfa][_state];
0197 
0198                 _transitions = _data.transitions = ptr_->_transitions.size ();
0199                 _data.end_state = ptr_->_end_state;
0200                 _data.id = ptr_->_id;
0201                 _data.unique_id = ptr_->_unique_id;
0202                 _data.goto_dfa = ptr_->_state;
0203                 _data.bol_index = ptr_->_bol_index;
0204                 _data.eol_index = ptr_->_eol_index;
0205                 _token_iter = ptr_->_transitions.begin ();
0206                 _token_end = ptr_->_transitions.end ();
0207             }
0208 
0209             if (_token_iter != _token_end)
0210             {
0211                 _data.token = _token_iter->second;
0212                 _data.goto_state = _token_iter->first;
0213                 ++_token_iter;
0214                 ++_transition;
0215             }
0216             else
0217             {
0218                 _data.token.clear ();
0219                 _data.goto_state = npos;
0220             }
0221         }
0222     };
0223 
0224     friend class iterator;
0225 
0226     basic_state_machine ()
0227     {
0228     }
0229 
0230     void clear ()
0231     {
0232         _internals.clear ();
0233         _csm.clear ();
0234     }
0235 
0236     bool empty () const
0237     {
0238         // Don't include _csm in this test, as irrelevant to state.
0239         return _internals._lookup->empty () &&
0240             _internals._dfa_alphabet.empty () &&
0241             _internals._dfa->empty ();
0242     }
0243 
0244     std::size_t size () const
0245     {
0246         return _internals._dfa->size ();
0247     }
0248 
0249     bool operator == (const basic_state_machine &rhs_) const
0250     {
0251         // Don't include _csm in this test, as irrelevant to state.
0252         return _internals._lookup == rhs_._internals._lookup &&
0253             _internals._dfa_alphabet == rhs_._internals._dfa_alphabet &&
0254             _internals._dfa == rhs_._internals._dfa &&
0255             _internals._seen_BOL_assertion ==
0256                 rhs_._internals._seen_BOL_assertion &&
0257             _internals._seen_EOL_assertion ==
0258                 rhs_._internals._seen_EOL_assertion;
0259     }
0260 
0261     iterator begin () const
0262     {
0263         iterator iter_;
0264 
0265         iter_._sm = const_cast<basic_state_machine *>(this);
0266         check_for_csm ();
0267 
0268         if (!_csm.empty ())
0269         {
0270             const typename detail::basic_char_state_machine<CharT>::
0271                 state_vector *ptr_ = &_csm._sm_vector.front ();
0272 
0273             iter_._dfas = _csm._sm_vector.size ();
0274             iter_._states = iter_._data.states = ptr_->size ();
0275             iter_._transitions = iter_._data.transitions =
0276                 ptr_->front ()._transitions.size ();
0277             iter_._dfa = iter_._data.dfa = 0;
0278             iter_._state = iter_._data.state = 0;
0279             iter_._transition = 0;
0280             iter_._data.end_state = ptr_->front ()._end_state;
0281             iter_._data.id = ptr_->front ()._id;
0282             iter_._data.unique_id = ptr_->front ()._unique_id;
0283             iter_._data.goto_dfa = ptr_->front ()._state;
0284             iter_._data.bol_index = ptr_->front ()._bol_index;
0285             iter_._data.eol_index = ptr_->front ()._eol_index;
0286             iter_._token_iter = ptr_->front ()._transitions.begin ();
0287             iter_._token_end = ptr_->front ()._transitions.end ();
0288 
0289             // Deal with case where there is only a bol or eol
0290             // but no other transitions.
0291             if (iter_._transitions)
0292             {
0293                 ++iter_;
0294             }
0295         }
0296 
0297         return iter_;
0298     }
0299 
0300     iterator end () const
0301     {
0302         iterator iter_;
0303 
0304         iter_._sm = const_cast<basic_state_machine *>(this);
0305         return iter_;
0306     }
0307 
0308     void swap (basic_state_machine &sm_)
0309     {
0310         _internals.swap (sm_._internals);
0311         _csm.swap (sm_._csm);
0312     }
0313 
0314     const detail::internals &data () const
0315     {
0316         return _internals;
0317     }
0318 
0319 private:
0320     detail::internals _internals;
0321     mutable detail::basic_char_state_machine<CharT> _csm;
0322 
0323     void check_for_csm () const
0324     {
0325         if (_csm.empty ())
0326         {
0327             human_readable (_csm);
0328         }
0329     }
0330 
0331     void human_readable (detail::basic_char_state_machine<CharT> &sm_) const
0332     {
0333         const std::size_t max_ = sizeof (CharT) == 1 ?
0334             num_chars : num_wchar_ts;
0335         const std::size_t start_states_ = _internals._dfa->size ();
0336 
0337         sm_.clear ();
0338         sm_._sm_vector.resize (start_states_);
0339 
0340         for (std::size_t start_state_index_ = 0;
0341             start_state_index_ < start_states_; ++start_state_index_)
0342         {
0343             const detail::internals::size_t_vector *lu_ =
0344                 _internals._lookup[start_state_index_];
0345             const std::size_t alphabet_ =
0346                 _internals._dfa_alphabet[start_state_index_] - dfa_offset;
0347             std::vector<std::basic_string<CharT> > chars_ (alphabet_);
0348             const std::size_t states_ = _internals._dfa[start_state_index_]->
0349                 size () / (alphabet_ + dfa_offset);
0350             const std::size_t *read_ptr_ = &_internals.
0351                 _dfa[start_state_index_]->front () + alphabet_ + dfa_offset;
0352 
0353             sm_._sm_vector[start_state_index_].resize (states_ - 1);
0354 
0355             for (std::size_t alpha_index_ = 0; alpha_index_ < max_;
0356                 ++alpha_index_)
0357             {
0358                 const std::size_t col_ = lu_->at (alpha_index_);
0359 
0360                 if (col_ != static_cast<std::size_t>(dead_state_index))
0361                 {
0362                     chars_[col_ - dfa_offset] += static_cast<CharT>
0363                         (alpha_index_);
0364                 }
0365             }
0366 
0367             for (std::size_t state_index_ = 1; state_index_ < states_;
0368                 ++state_index_)
0369             {
0370                 typename detail::basic_char_state_machine<CharT>::state
0371                     *state_ = &sm_._sm_vector[start_state_index_]
0372                     [state_index_ - 1];
0373 
0374                 state_->_end_state = *read_ptr_ != 0;
0375                 state_->_id = *(read_ptr_ + id_index);
0376                 state_->_unique_id = *(read_ptr_ + unique_id_index);
0377                 state_->_state = *(read_ptr_ + state_index);
0378                 state_->_bol_index = *(read_ptr_ + bol_index) - 1;
0379                 state_->_eol_index = *(read_ptr_ + eol_index) - 1;
0380                 read_ptr_ += dfa_offset;
0381 
0382                 for (std::size_t col_index_ = 0; col_index_ < alphabet_;
0383                     ++col_index_, ++read_ptr_)
0384                 {
0385                     const std::size_t transition_ = *read_ptr_;
0386 
0387                     if (transition_ != 0)
0388                     {
0389                         const std::size_t i_ = transition_ - 1;
0390                         typename detail::basic_char_state_machine<CharT>::
0391                             state::size_t_string_token_map::iterator iter_ =
0392                             state_->_transitions.find (i_);
0393 
0394                         if (iter_ == state_->_transitions.end ())
0395                         {
0396                             basic_string_token<CharT> token_
0397                                 (false, chars_[col_index_]);
0398                             typename detail::basic_char_state_machine<CharT>::
0399                                 state::size_t_string_token_pair pair_
0400                                 (i_, token_);
0401 
0402                             state_->_transitions.insert (pair_);
0403                         }
0404                         else
0405                         {
0406                             iter_->second._charset += chars_[col_index_];
0407                         }
0408                     }
0409                 }
0410 
0411                 for (typename detail::basic_char_state_machine<CharT>::state::
0412                     size_t_string_token_map::iterator iter_ =
0413                     state_->_transitions.begin (),
0414                     end_ = state_->_transitions.end ();
0415                     iter_ != end_; ++iter_)
0416                 {
0417                     std::sort (iter_->second._charset.begin (),
0418                         iter_->second._charset.end ());
0419                     iter_->second.normalise ();
0420                 }
0421             }
0422         }
0423     }
0424 };
0425 
0426 typedef basic_state_machine<char> state_machine;
0427 typedef basic_state_machine<wchar_t> wstate_machine;
0428 }
0429 }
0430 
0431 #endif