File indexing completed on 2025-01-19 09:47:50
0001
0002
0003
0004
0005
0006 #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_INPUT_HPP
0007 #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_INPUT_HPP
0008
0009 #include "char_traits.hpp"
0010 #include "size_t.hpp"
0011 #include "state_machine.hpp"
0012 #include <iterator> // for std::iterator_traits
0013
0014 namespace boost
0015 {
0016 namespace lexer
0017 {
0018 template<typename FwdIter, typename Traits =
0019 char_traits<typename std::iterator_traits<FwdIter>::value_type> >
0020 class basic_input
0021 {
0022 public:
0023 class iterator
0024 {
0025 public:
0026 friend class basic_input;
0027
0028 struct data
0029 {
0030 std::size_t id;
0031 std::size_t unique_id;
0032 FwdIter start;
0033 FwdIter end;
0034 bool bol;
0035 std::size_t state;
0036
0037
0038 data () :
0039 id (0),
0040 unique_id (npos),
0041 bol (false),
0042 state (npos)
0043 {
0044 }
0045
0046 bool operator == (const data &rhs_) const
0047 {
0048 return id == rhs_.id && unique_id == rhs_.unique_id &&
0049 start == rhs_.start && end == rhs_.end &&
0050 bol == rhs_.bol && state == rhs_.state;
0051 }
0052 };
0053
0054 iterator () :
0055 _input (0)
0056 {
0057 }
0058
0059 bool operator == (const iterator &rhs_) const
0060 {
0061 return _data == rhs_._data;
0062 }
0063
0064 bool operator != (const iterator &rhs_) const
0065 {
0066 return !(*this == rhs_);
0067 }
0068
0069 data &operator * ()
0070 {
0071 return _data;
0072 }
0073
0074 data *operator -> ()
0075 {
0076 return &_data;
0077 }
0078
0079
0080
0081
0082 iterator &operator ++ ()
0083 {
0084 next_token ();
0085 return *this;
0086 }
0087
0088
0089 iterator operator ++ (int)
0090 {
0091 iterator iter_ = *this;
0092
0093 next_token ();
0094 return iter_;
0095 }
0096
0097 private:
0098
0099 const basic_input *_input;
0100 data _data;
0101
0102 void next_token ()
0103 {
0104 const detail::internals &internals_ =
0105 _input->_state_machine->data ();
0106
0107 _data.start = _data.end;
0108
0109 if (internals_._dfa->size () == 1)
0110 {
0111 if (internals_._seen_BOL_assertion ||
0112 internals_._seen_EOL_assertion)
0113 {
0114 _data.id = next
0115 (&internals_._lookup->front ()->front (),
0116 internals_._dfa_alphabet.front (),
0117 &internals_._dfa->front ()->front (),
0118 _data.bol, _data.end, _input->_end, _data.unique_id);
0119 }
0120 else
0121 {
0122 _data.id = next (&internals_._lookup->front ()->front (),
0123 internals_._dfa_alphabet.front (), &internals_.
0124 _dfa->front ()->front (), _data.end, _input->_end,
0125 _data.unique_id);
0126 }
0127 }
0128 else
0129 {
0130 if (internals_._seen_BOL_assertion ||
0131 internals_._seen_EOL_assertion)
0132 {
0133 _data.id = next (internals_, _data.state,
0134 _data.bol, _data.end, _input->_end, _data.unique_id);
0135 }
0136 else
0137 {
0138 _data.id = next (internals_, _data.state,
0139 _data.end, _input->_end, _data.unique_id);
0140 }
0141 }
0142
0143 if (_data.end == _input->_end && _data.start == _data.end)
0144 {
0145
0146 _data.state = npos;
0147 }
0148 }
0149
0150 std::size_t next (const detail::internals &internals_,
0151 std::size_t &start_state_, bool bol_,
0152 FwdIter &start_token_, const FwdIter &end_,
0153 std::size_t &unique_id_)
0154 {
0155 if (start_token_ == end_)
0156 {
0157 unique_id_ = npos;
0158 return 0;
0159 }
0160
0161 again:
0162 const std::size_t * lookup_ = &internals_._lookup[start_state_]->
0163 front ();
0164 std::size_t dfa_alphabet_ = internals_._dfa_alphabet[start_state_];
0165 const std::size_t *dfa_ = &internals_._dfa[start_state_]->front ();
0166 const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
0167 FwdIter curr_ = start_token_;
0168 bool end_state_ = *ptr_ != 0;
0169 std::size_t id_ = *(ptr_ + id_index);
0170 std::size_t uid_ = *(ptr_ + unique_id_index);
0171 std::size_t end_start_state_ = start_state_;
0172 bool end_bol_ = bol_;
0173 FwdIter end_token_ = start_token_;
0174
0175 while (curr_ != end_)
0176 {
0177 const std::size_t BOL_state_ = ptr_[bol_index];
0178 const std::size_t EOL_state_ = ptr_[eol_index];
0179
0180 if (BOL_state_ && bol_)
0181 {
0182 ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
0183 }
0184 else if (EOL_state_ && *curr_ == '\n')
0185 {
0186 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
0187 }
0188 else
0189 {
0190 typename Traits::char_type prev_char_ = *curr_++;
0191
0192 bol_ = prev_char_ == '\n';
0193
0194 const std::size_t state_ =
0195 ptr_[lookup_[static_cast<typename Traits::index_type>
0196 (prev_char_)]];
0197
0198 if (state_ == 0)
0199 {
0200 break;
0201 }
0202
0203 ptr_ = &dfa_[state_ * dfa_alphabet_];
0204 }
0205
0206 if (*ptr_)
0207 {
0208 end_state_ = true;
0209 id_ = *(ptr_ + id_index);
0210 uid_ = *(ptr_ + unique_id_index);
0211 end_start_state_ = *(ptr_ + state_index);
0212 end_bol_ = bol_;
0213 end_token_ = curr_;
0214 }
0215 }
0216
0217 const std::size_t EOL_state_ = ptr_[eol_index];
0218
0219 if (EOL_state_ && curr_ == end_)
0220 {
0221 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
0222
0223 if (*ptr_)
0224 {
0225 end_state_ = true;
0226 id_ = *(ptr_ + id_index);
0227 uid_ = *(ptr_ + unique_id_index);
0228 end_start_state_ = *(ptr_ + state_index);
0229 end_bol_ = bol_;
0230 end_token_ = curr_;
0231 }
0232 }
0233
0234 if (end_state_)
0235 {
0236
0237 start_state_ = end_start_state_;
0238 start_token_ = end_token_;
0239
0240 if (id_ == 0)
0241 {
0242 bol_ = end_bol_;
0243 goto again;
0244 }
0245 else
0246 {
0247 _data.bol = end_bol_;
0248 }
0249 }
0250 else
0251 {
0252
0253 _data.bol = *start_token_ == '\n';
0254 ++start_token_;
0255 id_ = npos;
0256 uid_ = npos;
0257 }
0258
0259 unique_id_ = uid_;
0260 return id_;
0261 }
0262
0263 std::size_t next (const detail::internals &internals_,
0264 std::size_t &start_state_, FwdIter &start_token_,
0265 FwdIter const &end_, std::size_t &unique_id_)
0266 {
0267 if (start_token_ == end_)
0268 {
0269 unique_id_ = npos;
0270 return 0;
0271 }
0272
0273 again:
0274 const std::size_t * lookup_ = &internals_._lookup[start_state_]->
0275 front ();
0276 std::size_t dfa_alphabet_ = internals_._dfa_alphabet[start_state_];
0277 const std::size_t *dfa_ = &internals_._dfa[start_state_]->front ();
0278 const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
0279 FwdIter curr_ = start_token_;
0280 bool end_state_ = *ptr_ != 0;
0281 std::size_t id_ = *(ptr_ + id_index);
0282 std::size_t uid_ = *(ptr_ + unique_id_index);
0283 std::size_t end_start_state_ = start_state_;
0284 FwdIter end_token_ = start_token_;
0285
0286 while (curr_ != end_)
0287 {
0288 const std::size_t state_ = ptr_[lookup_[static_cast
0289 <typename Traits::index_type>(*curr_++)]];
0290
0291 if (state_ == 0)
0292 {
0293 break;
0294 }
0295
0296 ptr_ = &dfa_[state_ * dfa_alphabet_];
0297
0298 if (*ptr_)
0299 {
0300 end_state_ = true;
0301 id_ = *(ptr_ + id_index);
0302 uid_ = *(ptr_ + unique_id_index);
0303 end_start_state_ = *(ptr_ + state_index);
0304 end_token_ = curr_;
0305 }
0306 }
0307
0308 if (end_state_)
0309 {
0310
0311 start_state_ = end_start_state_;
0312 start_token_ = end_token_;
0313
0314 if (id_ == 0) goto again;
0315 }
0316 else
0317 {
0318
0319 ++start_token_;
0320 id_ = npos;
0321 uid_ = npos;
0322 }
0323
0324 unique_id_ = uid_;
0325 return id_;
0326 }
0327
0328 std::size_t next (const std::size_t * const lookup_,
0329 const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
0330 bool bol_, FwdIter &start_token_, FwdIter const &end_,
0331 std::size_t &unique_id_)
0332 {
0333 if (start_token_ == end_)
0334 {
0335 unique_id_ = npos;
0336 return 0;
0337 }
0338
0339 const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
0340 FwdIter curr_ = start_token_;
0341 bool end_state_ = *ptr_ != 0;
0342 std::size_t id_ = *(ptr_ + id_index);
0343 std::size_t uid_ = *(ptr_ + unique_id_index);
0344 bool end_bol_ = bol_;
0345 FwdIter end_token_ = start_token_;
0346
0347 while (curr_ != end_)
0348 {
0349 const std::size_t BOL_state_ = ptr_[bol_index];
0350 const std::size_t EOL_state_ = ptr_[eol_index];
0351
0352 if (BOL_state_ && bol_)
0353 {
0354 ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
0355 }
0356 else if (EOL_state_ && *curr_ == '\n')
0357 {
0358 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
0359 }
0360 else
0361 {
0362 typename Traits::char_type prev_char_ = *curr_++;
0363
0364 bol_ = prev_char_ == '\n';
0365
0366 const std::size_t state_ =
0367 ptr_[lookup_[static_cast<typename Traits::index_type>
0368 (prev_char_)]];
0369
0370 if (state_ == 0)
0371 {
0372 break;
0373 }
0374
0375 ptr_ = &dfa_[state_ * dfa_alphabet_];
0376 }
0377
0378 if (*ptr_)
0379 {
0380 end_state_ = true;
0381 id_ = *(ptr_ + id_index);
0382 uid_ = *(ptr_ + unique_id_index);
0383 end_bol_ = bol_;
0384 end_token_ = curr_;
0385 }
0386 }
0387
0388 const std::size_t EOL_state_ = ptr_[eol_index];
0389
0390 if (EOL_state_ && curr_ == end_)
0391 {
0392 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
0393
0394 if (*ptr_)
0395 {
0396 end_state_ = true;
0397 id_ = *(ptr_ + id_index);
0398 uid_ = *(ptr_ + unique_id_index);
0399 end_bol_ = bol_;
0400 end_token_ = curr_;
0401 }
0402 }
0403
0404 if (end_state_)
0405 {
0406
0407 _data.bol = end_bol_;
0408 start_token_ = end_token_;
0409 }
0410 else
0411 {
0412
0413 _data.bol = *start_token_ == '\n';
0414 ++start_token_;
0415 id_ = npos;
0416 uid_ = npos;
0417 }
0418
0419 unique_id_ = uid_;
0420 return id_;
0421 }
0422
0423 std::size_t next (const std::size_t * const lookup_,
0424 const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
0425 FwdIter &start_token_, FwdIter const &end_,
0426 std::size_t &unique_id_)
0427 {
0428 if (start_token_ == end_)
0429 {
0430 unique_id_ = npos;
0431 return 0;
0432 }
0433
0434 const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
0435 FwdIter curr_ = start_token_;
0436 bool end_state_ = *ptr_ != 0;
0437 std::size_t id_ = *(ptr_ + id_index);
0438 std::size_t uid_ = *(ptr_ + unique_id_index);
0439 FwdIter end_token_ = start_token_;
0440
0441 while (curr_ != end_)
0442 {
0443 const std::size_t state_ = ptr_[lookup_[static_cast
0444 <typename Traits::index_type>(*curr_++)]];
0445
0446 if (state_ == 0)
0447 {
0448 break;
0449 }
0450
0451 ptr_ = &dfa_[state_ * dfa_alphabet_];
0452
0453 if (*ptr_)
0454 {
0455 end_state_ = true;
0456 id_ = *(ptr_ + id_index);
0457 uid_ = *(ptr_ + unique_id_index);
0458 end_token_ = curr_;
0459 }
0460 }
0461
0462 if (end_state_)
0463 {
0464
0465 start_token_ = end_token_;
0466 }
0467 else
0468 {
0469
0470 ++start_token_;
0471 id_ = npos;
0472 uid_ = npos;
0473 }
0474
0475 unique_id_ = uid_;
0476 return id_;
0477 }
0478 };
0479
0480 friend class iterator;
0481
0482
0483 basic_input (const basic_state_machine<typename Traits::char_type>
0484 *state_machine_, const FwdIter &begin_, const FwdIter &end_) :
0485 _state_machine (state_machine_),
0486 _begin (begin_),
0487 _end (end_)
0488 {
0489 }
0490
0491 iterator begin () const
0492 {
0493 iterator iter_;
0494
0495 iter_._input = this;
0496
0497 iter_._data.id = npos;
0498 iter_._data.start = _begin;
0499 iter_._data.end = _begin;
0500 iter_._data.bol = _state_machine->data ()._seen_BOL_assertion;
0501 iter_._data.state = 0;
0502 ++iter_;
0503 return iter_;
0504 }
0505
0506 iterator end () const
0507 {
0508 iterator iter_;
0509
0510 iter_._input = this;
0511 iter_._data.start = _end;
0512 iter_._data.end = _end;
0513 return iter_;
0514 }
0515
0516 private:
0517 const basic_state_machine<typename Traits::char_type> *_state_machine;
0518 FwdIter _begin;
0519 FwdIter _end;
0520 };
0521
0522 typedef basic_input<std::string::iterator> iter_input;
0523 typedef basic_input<std::basic_string<wchar_t>::iterator> iter_winput;
0524 typedef basic_input<const char *> ptr_input;
0525 typedef basic_input<const wchar_t *> ptr_winput;
0526 }
0527 }
0528
0529 #endif