File indexing completed on 2025-01-19 09:47:49
0001
0002
0003
0004
0005
0006 #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_FILE_INPUT_HPP
0007 #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_FILE_INPUT_HPP
0008
0009 #include "char_traits.hpp"
0010
0011 #include <cstring>
0012 #include "size_t.hpp"
0013 #include "state_machine.hpp"
0014 #include <iosfwd>
0015
0016 namespace boost
0017 {
0018 namespace lexer
0019 {
0020 template<typename CharT, typename Traits = char_traits<CharT> >
0021 class basic_file_input
0022 {
0023 public:
0024 class iterator
0025 {
0026 public:
0027 friend class basic_file_input;
0028
0029 struct data
0030 {
0031 std::size_t id;
0032 std::size_t unique_id;
0033 const CharT *start;
0034 const CharT *end;
0035 std::size_t state;
0036
0037
0038 data () :
0039 id (0),
0040 unique_id (npos),
0041 state (npos)
0042 {
0043 }
0044
0045 bool operator == (const data &rhs_) const
0046 {
0047 return id == rhs_.id && unique_id == rhs_.unique_id &&
0048 start == rhs_.start && end == rhs_.end &&
0049 state == rhs_.state;
0050 }
0051 };
0052
0053 iterator () :
0054 _input (0)
0055 {
0056 }
0057
0058 bool operator == (const iterator &rhs_) const
0059 {
0060 return _data == rhs_._data;
0061 }
0062
0063 bool operator != (const iterator &rhs_) const
0064 {
0065 return !(*this == rhs_);
0066 }
0067
0068 data &operator * ()
0069 {
0070 return _data;
0071 }
0072
0073 data *operator -> ()
0074 {
0075 return &_data;
0076 }
0077
0078
0079
0080
0081 iterator &operator ++ ()
0082 {
0083 next_token ();
0084 return *this;
0085 }
0086
0087
0088 iterator operator ++ (int)
0089 {
0090 iterator iter_ = *this;
0091
0092 next_token ();
0093 return iter_;
0094 }
0095
0096 void next_token ()
0097 {
0098 const detail::internals &internals_ =
0099 _input->_state_machine->data ();
0100
0101 _data.start = _data.end;
0102
0103 if (internals_._dfa->size () == 1)
0104 {
0105 _data.id = _input->next (&internals_._lookup->front ()->
0106 front (), internals_._dfa_alphabet.front (),
0107 &internals_._dfa->front ()->front (), _data.start,
0108 _data.end, _data.unique_id);
0109 }
0110 else
0111 {
0112 _data.id = _input->next (internals_, _data.state, _data.start,
0113 _data.end, _data.unique_id);
0114 }
0115
0116 if (_data.id == 0)
0117 {
0118 _data.start = 0;
0119 _data.end = 0;
0120
0121 _data.state = npos;
0122 }
0123 }
0124
0125 private:
0126
0127 basic_file_input *_input;
0128 data _data;
0129 };
0130
0131 friend class iterator;
0132
0133
0134 basic_file_input (const basic_state_machine<CharT> *state_machine_,
0135 std::basic_ifstream<CharT> *is_,
0136 const std::streamsize buffer_size_ = 4096,
0137 const std::streamsize buffer_increment_ = 1024) :
0138 _state_machine (state_machine_),
0139 _stream (is_),
0140 _buffer_size (buffer_size_),
0141 _buffer_increment (buffer_increment_),
0142 _buffer (_buffer_size, '!')
0143 {
0144 _start_buffer = &_buffer.front ();
0145 _end_buffer = _start_buffer + _buffer.size ();
0146 _start_token = _end_buffer;
0147 _end_token = _end_buffer;
0148 }
0149
0150 iterator begin ()
0151 {
0152 iterator iter_;
0153
0154 iter_._input = this;
0155
0156 iter_._data.id = npos;
0157 iter_._data.start = 0;
0158 iter_._data.end = 0;
0159 iter_._data.state = 0;
0160 ++iter_;
0161 return iter_;
0162 }
0163
0164 iterator end ()
0165 {
0166 iterator iter_;
0167
0168 iter_._input = this;
0169 iter_._data.start = 0;
0170 iter_._data.end = 0;
0171 return iter_;
0172 }
0173
0174 void flush ()
0175 {
0176
0177
0178 const CharT *temp_ = _end_buffer;
0179
0180 _start_token = _end_token = _end_buffer;
0181 reload_buffer (temp_, true, _end_token);
0182 }
0183
0184 private:
0185 typedef std::basic_istream<CharT> istream;
0186 typedef std::vector<CharT> buffer;
0187
0188 const basic_state_machine<CharT> *_state_machine;
0189 const std::streamsize _buffer_size;
0190 const std::streamsize _buffer_increment;
0191
0192 buffer _buffer;
0193 CharT *_start_buffer;
0194 istream *_stream;
0195 const CharT *_start_token;
0196 const CharT *_end_token;
0197 CharT *_end_buffer;
0198
0199 std::size_t next (const detail::internals &internals_,
0200 std::size_t &start_state_, const CharT * &start_, const CharT * &end_,
0201 std::size_t &unique_id_)
0202 {
0203 _start_token = _end_token;
0204
0205 again:
0206 const std::size_t * lookup_ = &internals_._lookup[start_state_]->
0207 front ();
0208 std::size_t dfa_alphabet_ = internals_._dfa_alphabet[start_state_];
0209 const std::size_t *dfa_ = &internals_._dfa[start_state_]->front ();
0210 const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
0211 const CharT *curr_ = _start_token;
0212 bool end_state_ = *ptr_ != 0;
0213 std::size_t id_ = *(ptr_ + id_index);
0214 std::size_t uid_ = *(ptr_ + unique_id_index);
0215 const CharT *end_token_ = curr_;
0216
0217 for (;;)
0218 {
0219 if (curr_ >= _end_buffer)
0220 {
0221 if (!reload_buffer (curr_, end_state_, end_token_))
0222 {
0223
0224 break;
0225 }
0226 }
0227
0228 const std::size_t BOL_state_ = ptr_[bol_index];
0229 const std::size_t EOL_state_ = ptr_[eol_index];
0230
0231 if (BOL_state_ && (_start_token == _start_buffer ||
0232 *(_start_token - 1) == '\n'))
0233 {
0234 ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
0235 }
0236 else if (EOL_state_ && *curr_ == '\n')
0237 {
0238 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
0239 }
0240 else
0241 {
0242 const std::size_t state_ =
0243 ptr_[lookup_[static_cast<typename Traits::index_type>
0244 (*curr_++)]];
0245
0246 if (state_ == 0)
0247 {
0248 break;
0249 }
0250
0251 ptr_ = &dfa_[state_ * dfa_alphabet_];
0252 }
0253
0254 if (*ptr_)
0255 {
0256 end_state_ = true;
0257 id_ = *(ptr_ + id_index);
0258 uid_ = *(ptr_ + unique_id_index);
0259 start_state_ = *(ptr_ + state_index);
0260 end_token_ = curr_;
0261 }
0262 }
0263
0264 if (_start_token >= _end_buffer)
0265 {
0266
0267 unique_id_ = npos;
0268 return 0;
0269 }
0270
0271 const std::size_t EOL_state_ = ptr_[eol_index];
0272
0273 if (EOL_state_ && curr_ == end_)
0274 {
0275 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
0276
0277 if (*ptr_)
0278 {
0279 end_state_ = true;
0280 id_ = *(ptr_ + id_index);
0281 uid_ = *(ptr_ + unique_id_index);
0282 start_state_ = *(ptr_ + state_index);
0283 end_token_ = curr_;
0284 }
0285 }
0286
0287 if (end_state_)
0288 {
0289
0290 _end_token = end_token_;
0291
0292 if (id_ == 0) goto again;
0293 }
0294 else
0295 {
0296
0297 _end_token = _start_token + 1;
0298 id_ = npos;
0299 uid_ = npos;
0300 }
0301
0302 start_ = _start_token;
0303 end_ = _end_token;
0304 unique_id_ = uid_;
0305 return id_;
0306 }
0307
0308 std::size_t next (const std::size_t * const lookup_,
0309 const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
0310 const CharT * &start_, const CharT * &end_, std::size_t &unique_id_)
0311 {
0312 _start_token = _end_token;
0313
0314 const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
0315 const CharT *curr_ = _start_token;
0316 bool end_state_ = *ptr_ != 0;
0317 std::size_t id_ = *(ptr_ + id_index);
0318 std::size_t uid_ = *(ptr_ + unique_id_index);
0319 const CharT *end_token_ = curr_;
0320
0321 for (;;)
0322 {
0323 if (curr_ >= _end_buffer)
0324 {
0325 if (!reload_buffer (curr_, end_state_, end_token_))
0326 {
0327
0328 break;
0329 }
0330 }
0331
0332 const std::size_t BOL_state_ = ptr_[bol_index];
0333 const std::size_t EOL_state_ = ptr_[eol_index];
0334
0335 if (BOL_state_ && (_start_token == _start_buffer ||
0336 *(_start_token - 1) == '\n'))
0337 {
0338 ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
0339 }
0340 else if (EOL_state_ && *curr_ == '\n')
0341 {
0342 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
0343 }
0344 else
0345 {
0346 const std::size_t state_ =
0347 ptr_[lookup_[static_cast<typename Traits::index_type>
0348 (*curr_++)]];
0349
0350 if (state_ == 0)
0351 {
0352 break;
0353 }
0354
0355 ptr_ = &dfa_[state_ * dfa_alphabet_];
0356 }
0357
0358 if (*ptr_)
0359 {
0360 end_state_ = true;
0361 id_ = *(ptr_ + id_index);
0362 uid_ = *(ptr_ + unique_id_index);
0363 end_token_ = curr_;
0364 }
0365 }
0366
0367 if (_start_token >= _end_buffer)
0368 {
0369
0370 unique_id_ = npos;
0371 return 0;
0372 }
0373
0374 const std::size_t EOL_state_ = ptr_[eol_index];
0375
0376 if (EOL_state_ && curr_ == end_)
0377 {
0378 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
0379
0380 if (*ptr_)
0381 {
0382 end_state_ = true;
0383 id_ = *(ptr_ + id_index);
0384 uid_ = *(ptr_ + unique_id_index);
0385 end_token_ = curr_;
0386 }
0387 }
0388
0389 if (end_state_)
0390 {
0391
0392 _end_token = end_token_;
0393 }
0394 else
0395 {
0396
0397 _end_token = _start_token + 1;
0398 id_ = npos;
0399 uid_ = npos;
0400 }
0401
0402 start_ = _start_token;
0403 end_ = _end_token;
0404 unique_id_ = uid_;
0405 return id_;
0406 }
0407
0408 bool reload_buffer (const CharT * &curr_, const bool end_state_,
0409 const CharT * &end_token_)
0410 {
0411 bool success_ = !_stream->eof ();
0412
0413 if (success_)
0414 {
0415 const CharT *old_start_token_ = _start_token;
0416 std::size_t old_size_ = _buffer.size ();
0417 std::size_t count_ = 0;
0418
0419 if (_start_token - 1 == _start_buffer)
0420 {
0421
0422 _buffer.resize (old_size_ + _buffer_increment, '!');
0423 _start_buffer = &_buffer.front ();
0424 _start_token = _start_buffer + 1;
0425 _stream->read (_start_buffer + old_size_,
0426 _buffer_increment);
0427 count_ = _stream->gcount ();
0428 _end_buffer = _start_buffer + old_size_ + count_;
0429 }
0430 else if (_start_token < _end_buffer)
0431 {
0432 const std::size_t len_ = _end_buffer - _start_token;
0433
0434 using namespace std;
0435
0436 memcpy (_start_buffer, _start_token - 1, (len_ + 1) *
0437 sizeof (CharT));
0438 _stream->read (_start_buffer + len_ + 1,
0439 static_cast<std::streamsize> (_buffer.size () - len_ - 1));
0440 count_ = _stream->gcount ();
0441 _start_token = _start_buffer + 1;
0442 _end_buffer = _start_buffer + len_ + 1 + count_;
0443 }
0444 else
0445 {
0446 _stream->read (_start_buffer, static_cast<std::streamsize>
0447 (_buffer.size ()));
0448 count_ = _stream->gcount ();
0449 _start_token = _start_buffer;
0450 _end_buffer = _start_buffer + count_;
0451 }
0452
0453 if (end_state_)
0454 {
0455 end_token_ = _start_token +
0456 (end_token_ - old_start_token_);
0457 }
0458
0459 curr_ = _start_token + (curr_ - old_start_token_);
0460 }
0461
0462 return success_;
0463 }
0464
0465
0466 basic_file_input (const basic_file_input &);
0467 const basic_file_input &operator = (const basic_file_input &);
0468 };
0469
0470 typedef basic_file_input<char> file_input;
0471 typedef basic_file_input<wchar_t> wfile_input;
0472 }
0473 }
0474
0475 #endif