File indexing completed on 2025-01-19 09:47:50
0001
0002
0003
0004
0005
0006 #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_STATE_MACHINE_HPP
0007 #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_STATE_MACHINE_HPP
0008
0009 #include <algorithm>
0010 #include "conversion/char_state_machine.hpp"
0011 #include "consts.hpp"
0012 #include <deque>
0013 #include "internals.hpp"
0014 #include <map>
0015 #include "containers/ptr_vector.hpp"
0016 #include "size_t.hpp"
0017 #include <string>
0018
0019 namespace boost
0020 {
0021 namespace lexer
0022 {
0023 template<typename CharT>
0024 class basic_state_machine
0025 {
0026 public:
0027 typedef CharT char_type;
0028
0029 class iterator
0030 {
0031 public:
0032 friend class basic_state_machine;
0033
0034 struct data
0035 {
0036
0037 std::size_t dfa;
0038 std::size_t states;
0039 std::size_t state;
0040 std::size_t transitions;
0041 std::size_t transition;
0042
0043
0044 bool end_state;
0045 std::size_t id;
0046 std::size_t unique_id;
0047 std::size_t goto_dfa;
0048 std::size_t bol_index;
0049 std::size_t eol_index;
0050
0051
0052 basic_string_token<CharT> token;
0053 std::size_t goto_state;
0054
0055 data () :
0056 dfa (npos),
0057 states (0),
0058 state (npos),
0059 transitions (0),
0060 transition (npos),
0061 end_state (false),
0062 id (npos),
0063 unique_id (npos),
0064 goto_dfa (npos),
0065 bol_index (npos),
0066 eol_index (npos),
0067 goto_state (npos)
0068 {
0069 }
0070
0071 bool operator == (const data &rhs_) const
0072 {
0073 return dfa == rhs_.dfa &&
0074 states == rhs_.states &&
0075 state == rhs_.state &&
0076 transitions == rhs_.transitions &&
0077 transition == rhs_.transition &&
0078 end_state == rhs_.end_state &&
0079 id == rhs_.id &&
0080 unique_id == rhs_.unique_id &&
0081 goto_dfa == rhs_.goto_dfa &&
0082 bol_index == rhs_.bol_index &&
0083 eol_index == rhs_.eol_index &&
0084 token == rhs_.token &&
0085 goto_state == rhs_.goto_state;
0086 }
0087 };
0088
0089 iterator () :
0090 _sm (0),
0091 _dfas (0),
0092 _dfa (npos),
0093 _states (0),
0094 _state (npos),
0095 _transitions (0),
0096 _transition (npos)
0097 {
0098 }
0099
0100 bool operator == (const iterator &rhs_) const
0101 {
0102 return _dfas == rhs_._dfas && _dfa == rhs_._dfa &&
0103 _states == rhs_._states && _state == rhs_._state &&
0104 _transitions == rhs_._transitions &&
0105 _transition == rhs_._transition;
0106 }
0107
0108 bool operator != (const iterator &rhs_) const
0109 {
0110 return !(*this == rhs_);
0111 }
0112
0113 data &operator * ()
0114 {
0115 return _data;
0116 }
0117
0118 data *operator -> ()
0119 {
0120 return &_data;
0121 }
0122
0123
0124
0125
0126 iterator &operator ++ ()
0127 {
0128 next ();
0129 return *this;
0130 }
0131
0132
0133 iterator operator ++ (int)
0134 {
0135 iterator iter_ = *this;
0136
0137 next ();
0138 return iter_;
0139 }
0140
0141 void clear ()
0142 {
0143 _dfas = _states = _transitions = 0;
0144 _dfa = _state = _transition = npos;
0145 }
0146
0147 private:
0148 basic_state_machine *_sm;
0149 data _data;
0150 std::size_t _dfas;
0151 std::size_t _dfa;
0152 std::size_t _states;
0153 std::size_t _state;
0154 std::size_t _transitions;
0155 std::size_t _transition;
0156 typename detail::basic_char_state_machine<CharT>::state::
0157 size_t_string_token_map::const_iterator _token_iter;
0158 typename detail::basic_char_state_machine<CharT>::state::
0159 size_t_string_token_map::const_iterator _token_end;
0160
0161 void next ()
0162 {
0163 bool reset_state_ = false;
0164
0165 if (_transition >= _transitions)
0166 {
0167 _transition = _data.transition = 0;
0168 _data.state = ++_state;
0169 reset_state_ = true;
0170
0171 if (_state >= _states)
0172 {
0173 ++_dfa;
0174
0175 if (_dfa >= _dfas)
0176 {
0177 clear ();
0178 reset_state_ = false;
0179 }
0180 else
0181 {
0182 _states = _data.states =
0183 _sm->_csm._sm_vector[_dfa].size ();
0184 _state = _data.state = 0;
0185 }
0186 }
0187 }
0188 else
0189 {
0190 _data.transition = _transition;
0191 }
0192
0193 if (reset_state_)
0194 {
0195 const typename detail::basic_char_state_machine<CharT>::
0196 state *ptr_ = &_sm->_csm._sm_vector[_dfa][_state];
0197
0198 _transitions = _data.transitions = ptr_->_transitions.size ();
0199 _data.end_state = ptr_->_end_state;
0200 _data.id = ptr_->_id;
0201 _data.unique_id = ptr_->_unique_id;
0202 _data.goto_dfa = ptr_->_state;
0203 _data.bol_index = ptr_->_bol_index;
0204 _data.eol_index = ptr_->_eol_index;
0205 _token_iter = ptr_->_transitions.begin ();
0206 _token_end = ptr_->_transitions.end ();
0207 }
0208
0209 if (_token_iter != _token_end)
0210 {
0211 _data.token = _token_iter->second;
0212 _data.goto_state = _token_iter->first;
0213 ++_token_iter;
0214 ++_transition;
0215 }
0216 else
0217 {
0218 _data.token.clear ();
0219 _data.goto_state = npos;
0220 }
0221 }
0222 };
0223
0224 friend class iterator;
0225
0226 basic_state_machine ()
0227 {
0228 }
0229
0230 void clear ()
0231 {
0232 _internals.clear ();
0233 _csm.clear ();
0234 }
0235
0236 bool empty () const
0237 {
0238
0239 return _internals._lookup->empty () &&
0240 _internals._dfa_alphabet.empty () &&
0241 _internals._dfa->empty ();
0242 }
0243
0244 std::size_t size () const
0245 {
0246 return _internals._dfa->size ();
0247 }
0248
0249 bool operator == (const basic_state_machine &rhs_) const
0250 {
0251
0252 return _internals._lookup == rhs_._internals._lookup &&
0253 _internals._dfa_alphabet == rhs_._internals._dfa_alphabet &&
0254 _internals._dfa == rhs_._internals._dfa &&
0255 _internals._seen_BOL_assertion ==
0256 rhs_._internals._seen_BOL_assertion &&
0257 _internals._seen_EOL_assertion ==
0258 rhs_._internals._seen_EOL_assertion;
0259 }
0260
0261 iterator begin () const
0262 {
0263 iterator iter_;
0264
0265 iter_._sm = const_cast<basic_state_machine *>(this);
0266 check_for_csm ();
0267
0268 if (!_csm.empty ())
0269 {
0270 const typename detail::basic_char_state_machine<CharT>::
0271 state_vector *ptr_ = &_csm._sm_vector.front ();
0272
0273 iter_._dfas = _csm._sm_vector.size ();
0274 iter_._states = iter_._data.states = ptr_->size ();
0275 iter_._transitions = iter_._data.transitions =
0276 ptr_->front ()._transitions.size ();
0277 iter_._dfa = iter_._data.dfa = 0;
0278 iter_._state = iter_._data.state = 0;
0279 iter_._transition = 0;
0280 iter_._data.end_state = ptr_->front ()._end_state;
0281 iter_._data.id = ptr_->front ()._id;
0282 iter_._data.unique_id = ptr_->front ()._unique_id;
0283 iter_._data.goto_dfa = ptr_->front ()._state;
0284 iter_._data.bol_index = ptr_->front ()._bol_index;
0285 iter_._data.eol_index = ptr_->front ()._eol_index;
0286 iter_._token_iter = ptr_->front ()._transitions.begin ();
0287 iter_._token_end = ptr_->front ()._transitions.end ();
0288
0289
0290
0291 if (iter_._transitions)
0292 {
0293 ++iter_;
0294 }
0295 }
0296
0297 return iter_;
0298 }
0299
0300 iterator end () const
0301 {
0302 iterator iter_;
0303
0304 iter_._sm = const_cast<basic_state_machine *>(this);
0305 return iter_;
0306 }
0307
0308 void swap (basic_state_machine &sm_)
0309 {
0310 _internals.swap (sm_._internals);
0311 _csm.swap (sm_._csm);
0312 }
0313
0314 const detail::internals &data () const
0315 {
0316 return _internals;
0317 }
0318
0319 private:
0320 detail::internals _internals;
0321 mutable detail::basic_char_state_machine<CharT> _csm;
0322
0323 void check_for_csm () const
0324 {
0325 if (_csm.empty ())
0326 {
0327 human_readable (_csm);
0328 }
0329 }
0330
0331 void human_readable (detail::basic_char_state_machine<CharT> &sm_) const
0332 {
0333 const std::size_t max_ = sizeof (CharT) == 1 ?
0334 num_chars : num_wchar_ts;
0335 const std::size_t start_states_ = _internals._dfa->size ();
0336
0337 sm_.clear ();
0338 sm_._sm_vector.resize (start_states_);
0339
0340 for (std::size_t start_state_index_ = 0;
0341 start_state_index_ < start_states_; ++start_state_index_)
0342 {
0343 const detail::internals::size_t_vector *lu_ =
0344 _internals._lookup[start_state_index_];
0345 const std::size_t alphabet_ =
0346 _internals._dfa_alphabet[start_state_index_] - dfa_offset;
0347 std::vector<std::basic_string<CharT> > chars_ (alphabet_);
0348 const std::size_t states_ = _internals._dfa[start_state_index_]->
0349 size () / (alphabet_ + dfa_offset);
0350 const std::size_t *read_ptr_ = &_internals.
0351 _dfa[start_state_index_]->front () + alphabet_ + dfa_offset;
0352
0353 sm_._sm_vector[start_state_index_].resize (states_ - 1);
0354
0355 for (std::size_t alpha_index_ = 0; alpha_index_ < max_;
0356 ++alpha_index_)
0357 {
0358 const std::size_t col_ = lu_->at (alpha_index_);
0359
0360 if (col_ != static_cast<std::size_t>(dead_state_index))
0361 {
0362 chars_[col_ - dfa_offset] += static_cast<CharT>
0363 (alpha_index_);
0364 }
0365 }
0366
0367 for (std::size_t state_index_ = 1; state_index_ < states_;
0368 ++state_index_)
0369 {
0370 typename detail::basic_char_state_machine<CharT>::state
0371 *state_ = &sm_._sm_vector[start_state_index_]
0372 [state_index_ - 1];
0373
0374 state_->_end_state = *read_ptr_ != 0;
0375 state_->_id = *(read_ptr_ + id_index);
0376 state_->_unique_id = *(read_ptr_ + unique_id_index);
0377 state_->_state = *(read_ptr_ + state_index);
0378 state_->_bol_index = *(read_ptr_ + bol_index) - 1;
0379 state_->_eol_index = *(read_ptr_ + eol_index) - 1;
0380 read_ptr_ += dfa_offset;
0381
0382 for (std::size_t col_index_ = 0; col_index_ < alphabet_;
0383 ++col_index_, ++read_ptr_)
0384 {
0385 const std::size_t transition_ = *read_ptr_;
0386
0387 if (transition_ != 0)
0388 {
0389 const std::size_t i_ = transition_ - 1;
0390 typename detail::basic_char_state_machine<CharT>::
0391 state::size_t_string_token_map::iterator iter_ =
0392 state_->_transitions.find (i_);
0393
0394 if (iter_ == state_->_transitions.end ())
0395 {
0396 basic_string_token<CharT> token_
0397 (false, chars_[col_index_]);
0398 typename detail::basic_char_state_machine<CharT>::
0399 state::size_t_string_token_pair pair_
0400 (i_, token_);
0401
0402 state_->_transitions.insert (pair_);
0403 }
0404 else
0405 {
0406 iter_->second._charset += chars_[col_index_];
0407 }
0408 }
0409 }
0410
0411 for (typename detail::basic_char_state_machine<CharT>::state::
0412 size_t_string_token_map::iterator iter_ =
0413 state_->_transitions.begin (),
0414 end_ = state_->_transitions.end ();
0415 iter_ != end_; ++iter_)
0416 {
0417 std::sort (iter_->second._charset.begin (),
0418 iter_->second._charset.end ());
0419 iter_->second.normalise ();
0420 }
0421 }
0422 }
0423 }
0424 };
0425
0426 typedef basic_state_machine<char> state_machine;
0427 typedef basic_state_machine<wchar_t> wstate_machine;
0428 }
0429 }
0430
0431 #endif