Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-19 09:47:49

0001 // file_input.hpp
0002 // Copyright (c) 2008-2009 Ben Hanson (http://www.benhanson.net/)
0003 //
0004 // Distributed under the Boost Software License, Version 1.0. (See accompanying
0005 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0006 #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_FILE_INPUT_HPP
0007 #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_FILE_INPUT_HPP
0008 
0009 #include "char_traits.hpp"
0010 // memcpy
0011 #include <cstring>
0012 #include "size_t.hpp"
0013 #include "state_machine.hpp"
0014 #include <iosfwd>
0015 
0016 namespace boost
0017 {
0018 namespace lexer
0019 {
0020 template<typename CharT, typename Traits = char_traits<CharT> >
0021 class basic_file_input
0022 {
0023 public:
0024     class iterator
0025     {
0026     public:
0027         friend class basic_file_input;
0028 
0029         struct data
0030         {
0031             std::size_t id;
0032             std::size_t unique_id;
0033             const CharT *start;
0034             const CharT *end;
0035             std::size_t state;
0036 
0037             // Construct in end() state.
0038             data () :
0039                 id (0),
0040                 unique_id (npos),
0041                 state (npos)
0042             {
0043             }
0044 
0045             bool operator == (const data &rhs_) const
0046             {
0047                 return id == rhs_.id && unique_id == rhs_.unique_id &&
0048                     start == rhs_.start && end == rhs_.end &&
0049                     state == rhs_.state;
0050             }
0051         };
0052 
0053         iterator () :
0054             _input (0)
0055         {
0056         }
0057 
0058         bool operator == (const iterator &rhs_) const
0059         {
0060             return _data == rhs_._data;
0061         }
0062 
0063         bool operator != (const iterator &rhs_) const
0064         {
0065             return !(*this == rhs_);
0066         }
0067 
0068         data &operator * ()
0069         {
0070             return _data;
0071         }
0072 
0073         data *operator -> ()
0074         {
0075             return &_data;
0076         }
0077 
0078         // Let compiler generate operator = ().
0079 
0080         // prefix version
0081         iterator &operator ++ ()
0082         {
0083             next_token ();
0084             return *this;
0085         }
0086 
0087         // postfix version
0088         iterator operator ++ (int)
0089         {
0090             iterator iter_ = *this;
0091 
0092             next_token ();
0093             return iter_;
0094         }
0095 
0096         void next_token ()
0097         {
0098             const detail::internals &internals_ =
0099                 _input->_state_machine->data ();
0100 
0101             _data.start = _data.end;
0102 
0103             if (internals_._dfa->size () == 1)
0104             {
0105                 _data.id = _input->next (&internals_._lookup->front ()->
0106                     front (), internals_._dfa_alphabet.front (),
0107                     &internals_._dfa->front ()->front (), _data.start,
0108                     _data.end, _data.unique_id);
0109             }
0110             else
0111             {
0112                 _data.id = _input->next (internals_, _data.state, _data.start,
0113                     _data.end, _data.unique_id);
0114             }
0115 
0116             if (_data.id == 0)
0117             {
0118                 _data.start = 0;
0119                 _data.end = 0;
0120                 // Ensure current state matches that returned by end().
0121                 _data.state = npos;
0122             }
0123         }
0124 
0125     private:
0126         // Not owner (obviously!)
0127         basic_file_input *_input;
0128         data _data;
0129     };
0130 
0131     friend class iterator;
0132 
0133     // Make it explicit that we are NOT taking a copy of state_machine_!
0134     basic_file_input (const basic_state_machine<CharT> *state_machine_,
0135         std::basic_ifstream<CharT> *is_,
0136         const std::streamsize buffer_size_ = 4096,
0137         const std::streamsize buffer_increment_ = 1024) :
0138         _state_machine (state_machine_),
0139         _stream (is_),
0140         _buffer_size (buffer_size_),
0141         _buffer_increment (buffer_increment_),
0142         _buffer (_buffer_size, '!')
0143     {
0144         _start_buffer = &_buffer.front ();
0145         _end_buffer = _start_buffer + _buffer.size ();
0146         _start_token = _end_buffer;
0147         _end_token = _end_buffer;
0148     }
0149 
0150     iterator begin ()
0151     {
0152         iterator iter_;
0153 
0154         iter_._input = this;
0155         // Over-ride default of 0 (EOF)
0156         iter_._data.id = npos;
0157         iter_._data.start = 0;
0158         iter_._data.end = 0;
0159         iter_._data.state = 0;
0160         ++iter_;
0161         return iter_;
0162     }
0163 
0164     iterator end ()
0165     {
0166         iterator iter_;
0167 
0168         iter_._input = this;
0169         iter_._data.start = 0;
0170         iter_._data.end = 0;
0171         return iter_;
0172     }
0173 
0174     void flush ()
0175     {
0176         // This temporary is mandatory, otherwise the
0177         // pointer calculations won't work!
0178         const CharT *temp_ = _end_buffer;
0179 
0180         _start_token = _end_token = _end_buffer;
0181         reload_buffer (temp_, true, _end_token);
0182     }
0183 
0184 private:
0185     typedef std::basic_istream<CharT> istream;
0186     typedef std::vector<CharT> buffer;
0187 
0188     const basic_state_machine<CharT> *_state_machine;
0189     const std::streamsize _buffer_size;
0190     const std::streamsize _buffer_increment;
0191 
0192     buffer _buffer;
0193     CharT *_start_buffer;
0194     istream *_stream;
0195     const CharT *_start_token;
0196     const CharT *_end_token;
0197     CharT *_end_buffer;
0198 
0199     std::size_t next (const detail::internals &internals_,
0200         std::size_t &start_state_, const CharT * &start_, const CharT * &end_,
0201         std::size_t &unique_id_)
0202     {
0203         _start_token = _end_token;
0204 
0205 again:
0206         const std::size_t * lookup_ = &internals_._lookup[start_state_]->
0207             front ();
0208         std::size_t dfa_alphabet_ = internals_._dfa_alphabet[start_state_];
0209         const std::size_t *dfa_ = &internals_._dfa[start_state_]->front ();
0210         const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
0211         const CharT *curr_ = _start_token;
0212         bool end_state_ = *ptr_ != 0;
0213         std::size_t id_ = *(ptr_ + id_index);
0214         std::size_t uid_ = *(ptr_ + unique_id_index);
0215         const CharT *end_token_ = curr_;
0216 
0217         for (;;)
0218         {
0219             if (curr_ >= _end_buffer)
0220             {
0221                 if (!reload_buffer (curr_, end_state_, end_token_))
0222                 {
0223                     // EOF
0224                     break;
0225                 }
0226             }
0227 
0228             const std::size_t BOL_state_ = ptr_[bol_index];
0229             const std::size_t EOL_state_ = ptr_[eol_index];
0230 
0231             if (BOL_state_ && (_start_token == _start_buffer ||
0232                 *(_start_token - 1) == '\n'))
0233             {
0234                 ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
0235             }
0236             else if (EOL_state_ && *curr_ == '\n')
0237             {
0238                 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
0239             }
0240             else
0241             {
0242                 const std::size_t state_ =
0243                     ptr_[lookup_[static_cast<typename Traits::index_type>
0244                         (*curr_++)]];
0245 
0246                 if (state_ == 0)
0247                 {
0248                     break;
0249                 }
0250 
0251                 ptr_ = &dfa_[state_ * dfa_alphabet_];
0252             }
0253 
0254             if (*ptr_)
0255             {
0256                 end_state_ = true;
0257                 id_ = *(ptr_ + id_index);
0258                 uid_ = *(ptr_ + unique_id_index);
0259                 start_state_ = *(ptr_ + state_index);
0260                 end_token_ = curr_;
0261             }
0262         }
0263 
0264         if (_start_token >= _end_buffer)
0265         {
0266             // No more tokens...
0267             unique_id_ = npos;
0268             return 0;
0269         }
0270 
0271         const std::size_t EOL_state_ = ptr_[eol_index];
0272 
0273         if (EOL_state_ && curr_ == end_)
0274         {
0275             ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
0276 
0277             if (*ptr_)
0278             {
0279                 end_state_ = true;
0280                 id_ = *(ptr_ + id_index);
0281                 uid_ = *(ptr_ + unique_id_index);
0282                 start_state_ = *(ptr_ + state_index);
0283                 end_token_ = curr_;
0284             }
0285         }
0286 
0287         if (end_state_)
0288         {
0289             // return longest match
0290             _end_token = end_token_;
0291 
0292             if (id_ == 0) goto again;
0293         }
0294         else
0295         {
0296             // No match causes char to be skipped
0297             _end_token = _start_token + 1;
0298             id_ = npos;
0299             uid_ = npos;
0300         }
0301 
0302         start_ = _start_token;
0303         end_ = _end_token;
0304         unique_id_ = uid_;
0305         return id_;
0306     }
0307 
0308     std::size_t next (const std::size_t * const lookup_,
0309         const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
0310         const CharT * &start_, const CharT * &end_, std::size_t &unique_id_)
0311     {
0312         _start_token = _end_token;
0313 
0314         const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
0315         const CharT *curr_ = _start_token;
0316         bool end_state_ = *ptr_ != 0;
0317         std::size_t id_ = *(ptr_ + id_index);
0318         std::size_t uid_ = *(ptr_ + unique_id_index);
0319         const CharT *end_token_ = curr_;
0320 
0321         for (;;)
0322         {
0323             if (curr_ >= _end_buffer)
0324             {
0325                 if (!reload_buffer (curr_, end_state_, end_token_))
0326                 {
0327                     // EOF
0328                     break;
0329                 }
0330             }
0331 
0332             const std::size_t BOL_state_ = ptr_[bol_index];
0333             const std::size_t EOL_state_ = ptr_[eol_index];
0334 
0335             if (BOL_state_ && (_start_token == _start_buffer ||
0336                 *(_start_token - 1) == '\n'))
0337             {
0338                 ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
0339             }
0340             else if (EOL_state_ && *curr_ == '\n')
0341             {
0342                 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
0343             }
0344             else
0345             {
0346                 const std::size_t state_ =
0347                     ptr_[lookup_[static_cast<typename Traits::index_type>
0348                         (*curr_++)]];
0349 
0350                 if (state_ == 0)
0351                 {
0352                     break;
0353                 }
0354 
0355                 ptr_ = &dfa_[state_ * dfa_alphabet_];
0356             }
0357 
0358             if (*ptr_)
0359             {
0360                 end_state_ = true;
0361                 id_ = *(ptr_ + id_index);
0362                 uid_ = *(ptr_ + unique_id_index);
0363                 end_token_ = curr_;
0364             }
0365         }
0366 
0367         if (_start_token >= _end_buffer)
0368         {
0369             // No more tokens...
0370             unique_id_ = npos;
0371             return 0;
0372         }
0373 
0374         const std::size_t EOL_state_ = ptr_[eol_index];
0375 
0376         if (EOL_state_ && curr_ == end_)
0377         {
0378             ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
0379 
0380             if (*ptr_)
0381             {
0382                 end_state_ = true;
0383                 id_ = *(ptr_ + id_index);
0384                 uid_ = *(ptr_ + unique_id_index);
0385                 end_token_ = curr_;
0386             }
0387         }
0388 
0389         if (end_state_)
0390         {
0391             // return longest match
0392             _end_token = end_token_;
0393         }
0394         else
0395         {
0396             // No match causes char to be skipped
0397             _end_token = _start_token + 1;
0398             id_ = npos;
0399             uid_ = npos;
0400         }
0401 
0402         start_ = _start_token;
0403         end_ = _end_token;
0404         unique_id_ = uid_;
0405         return id_;
0406     }
0407 
0408     bool reload_buffer (const CharT * &curr_, const bool end_state_,
0409         const CharT * &end_token_)
0410     {
0411         bool success_ = !_stream->eof ();
0412 
0413         if (success_)
0414         {
0415             const CharT *old_start_token_ = _start_token;
0416             std::size_t old_size_ = _buffer.size ();
0417             std::size_t count_ = 0;
0418 
0419             if (_start_token - 1 == _start_buffer)
0420             {
0421                 // Run out of buffer space, so increase.
0422                 _buffer.resize (old_size_ + _buffer_increment, '!');
0423                 _start_buffer = &_buffer.front ();
0424                 _start_token = _start_buffer + 1;
0425                 _stream->read (_start_buffer + old_size_,
0426                     _buffer_increment);
0427                 count_ = _stream->gcount ();
0428                 _end_buffer = _start_buffer + old_size_ + count_;
0429             }
0430             else if (_start_token < _end_buffer)
0431             {
0432                 const std::size_t len_ = _end_buffer - _start_token;
0433                 // Some systems have memcpy in namespace std.
0434                 using namespace std;
0435 
0436                 memcpy (_start_buffer, _start_token - 1, (len_ + 1) *
0437                     sizeof (CharT));
0438                 _stream->read (_start_buffer + len_ + 1,
0439                     static_cast<std::streamsize> (_buffer.size () - len_ - 1));
0440                 count_ = _stream->gcount ();
0441                 _start_token = _start_buffer + 1;
0442                 _end_buffer = _start_buffer + len_ + 1 + count_;
0443             }
0444             else
0445             {
0446                 _stream->read (_start_buffer, static_cast<std::streamsize>
0447                     (_buffer.size ()));
0448                 count_ = _stream->gcount ();
0449                 _start_token = _start_buffer;
0450                 _end_buffer = _start_buffer + count_;
0451             }
0452 
0453             if (end_state_)
0454             {
0455                 end_token_ = _start_token +
0456                     (end_token_ - old_start_token_);
0457             }
0458 
0459             curr_ = _start_token + (curr_ - old_start_token_);
0460         }
0461 
0462         return success_;
0463     }
0464 
0465     // Disallow copying of buffer
0466     basic_file_input (const basic_file_input &);
0467     const basic_file_input &operator = (const basic_file_input &);
0468 };
0469 
0470 typedef basic_file_input<char> file_input;
0471 typedef basic_file_input<wchar_t> wfile_input;
0472 }
0473 }
0474 
0475 #endif