Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-19 09:47:50

0001 // input.hpp
0002 // Copyright (c) 2008-2009 Ben Hanson (http://www.benhanson.net/)
0003 //
0004 // Distributed under the Boost Software License, Version 1.0. (See accompanying
0005 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0006 #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_INPUT_HPP
0007 #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_INPUT_HPP
0008 
0009 #include "char_traits.hpp"
0010 #include "size_t.hpp"
0011 #include "state_machine.hpp"
0012 #include <iterator> // for std::iterator_traits
0013 
0014 namespace boost
0015 {
0016 namespace lexer
0017 {
0018 template<typename FwdIter, typename Traits =
0019     char_traits<typename std::iterator_traits<FwdIter>::value_type> >
0020 class basic_input
0021 {
0022 public:
0023     class iterator
0024     {
0025     public:
0026         friend class basic_input;
0027 
0028         struct data
0029         {
0030             std::size_t id;
0031             std::size_t unique_id;
0032             FwdIter start;
0033             FwdIter end;
0034             bool bol;
0035             std::size_t state;
0036 
0037             // Construct in end() state.
0038             data () :
0039                 id (0),
0040                 unique_id (npos),
0041                 bol (false),
0042                 state (npos)
0043             {
0044             }
0045 
0046             bool operator == (const data &rhs_) const
0047             {
0048                 return id == rhs_.id && unique_id == rhs_.unique_id &&
0049                     start == rhs_.start && end == rhs_.end &&
0050                     bol == rhs_.bol && state == rhs_.state;
0051             }
0052         };
0053 
0054         iterator () :
0055             _input (0)
0056         {
0057         }
0058 
0059         bool operator == (const iterator &rhs_) const
0060         {
0061             return _data == rhs_._data;
0062         }
0063 
0064         bool operator != (const iterator &rhs_) const
0065         {
0066             return !(*this == rhs_);
0067         }
0068 
0069         data &operator * ()
0070         {
0071             return _data;
0072         }
0073 
0074         data *operator -> ()
0075         {
0076             return &_data;
0077         }
0078 
0079         // Let compiler generate operator = ().
0080 
0081         // prefix version
0082         iterator &operator ++ ()
0083         {
0084             next_token ();
0085             return *this;
0086         }
0087 
0088         // postfix version
0089         iterator operator ++ (int)
0090         {
0091             iterator iter_ = *this;
0092 
0093             next_token ();
0094             return iter_;
0095         }
0096 
0097     private:
0098         // Not owner (obviously!)
0099         const basic_input *_input;
0100         data _data;
0101 
0102         void next_token ()
0103         {
0104             const detail::internals &internals_ =
0105                 _input->_state_machine->data ();
0106 
0107             _data.start = _data.end;
0108 
0109             if (internals_._dfa->size () == 1)
0110             {
0111                 if (internals_._seen_BOL_assertion ||
0112                     internals_._seen_EOL_assertion)
0113                 {
0114                     _data.id = next
0115                         (&internals_._lookup->front ()->front (),
0116                         internals_._dfa_alphabet.front (),
0117                         &internals_._dfa->front ()->front (),
0118                         _data.bol, _data.end, _input->_end, _data.unique_id);
0119                 }
0120                 else
0121                 {
0122                     _data.id = next (&internals_._lookup->front ()->front (),
0123                         internals_._dfa_alphabet.front (), &internals_.
0124                         _dfa->front ()->front (), _data.end, _input->_end,
0125                         _data.unique_id);
0126                 }
0127             }
0128             else
0129             {
0130                 if (internals_._seen_BOL_assertion ||
0131                     internals_._seen_EOL_assertion)
0132                 {
0133                     _data.id = next (internals_, _data.state,
0134                         _data.bol, _data.end, _input->_end, _data.unique_id);
0135                 }
0136                 else
0137                 {
0138                     _data.id = next (internals_, _data.state,
0139                         _data.end, _input->_end, _data.unique_id);
0140                 }
0141             }
0142 
0143             if (_data.end == _input->_end && _data.start == _data.end)
0144             {
0145                 // Ensure current state matches that returned by end().
0146                 _data.state = npos;
0147             }
0148         }
0149 
0150         std::size_t next (const detail::internals &internals_,
0151             std::size_t &start_state_, bool bol_,
0152             FwdIter &start_token_, const FwdIter &end_,
0153             std::size_t &unique_id_)
0154         {
0155             if (start_token_ == end_)
0156             {
0157                 unique_id_ = npos;
0158                 return 0;
0159             }
0160 
0161         again:
0162             const std::size_t * lookup_ = &internals_._lookup[start_state_]->
0163                 front ();
0164             std::size_t dfa_alphabet_ = internals_._dfa_alphabet[start_state_];
0165             const std::size_t *dfa_ = &internals_._dfa[start_state_]->front ();
0166             const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
0167             FwdIter curr_ = start_token_;
0168             bool end_state_ = *ptr_ != 0;
0169             std::size_t id_ = *(ptr_ + id_index);
0170             std::size_t uid_ = *(ptr_ + unique_id_index);
0171             std::size_t end_start_state_ = start_state_;
0172             bool end_bol_ = bol_;
0173             FwdIter end_token_ = start_token_;
0174 
0175             while (curr_ != end_)
0176             {
0177                 const std::size_t BOL_state_ = ptr_[bol_index];
0178                 const std::size_t EOL_state_ = ptr_[eol_index];
0179 
0180                 if (BOL_state_ && bol_)
0181                 {
0182                     ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
0183                 }
0184                 else if (EOL_state_ && *curr_ == '\n')
0185                 {
0186                     ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
0187                 }
0188                 else
0189                 {
0190                     typename Traits::char_type prev_char_ = *curr_++;
0191 
0192                     bol_ = prev_char_ == '\n';
0193 
0194                     const std::size_t state_ =
0195                         ptr_[lookup_[static_cast<typename Traits::index_type>
0196                         (prev_char_)]];
0197 
0198                     if (state_ == 0)
0199                     {
0200                         break;
0201                     }
0202 
0203                     ptr_ = &dfa_[state_ * dfa_alphabet_];
0204                 }
0205 
0206                 if (*ptr_)
0207                 {
0208                     end_state_ = true;
0209                     id_ = *(ptr_ + id_index);
0210                     uid_ = *(ptr_ + unique_id_index);
0211                     end_start_state_ = *(ptr_ + state_index);
0212                     end_bol_ = bol_;
0213                     end_token_ = curr_;
0214                 }
0215             }
0216 
0217             const std::size_t EOL_state_ = ptr_[eol_index];
0218 
0219             if (EOL_state_ && curr_ == end_)
0220             {
0221                 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
0222 
0223                 if (*ptr_)
0224                 {
0225                     end_state_ = true;
0226                     id_ = *(ptr_ + id_index);
0227                     uid_ = *(ptr_ + unique_id_index);
0228                     end_start_state_ = *(ptr_ + state_index);
0229                     end_bol_ = bol_;
0230                     end_token_ = curr_;
0231                 }
0232             }
0233 
0234             if (end_state_)
0235             {
0236                 // return longest match
0237                 start_state_ = end_start_state_;
0238                 start_token_ = end_token_;
0239 
0240                 if (id_ == 0)
0241                 {
0242                     bol_ = end_bol_;
0243                     goto again;
0244                 }
0245                 else
0246                 {
0247                     _data.bol = end_bol_;
0248                 }
0249             }
0250             else
0251             {
0252                 // No match causes char to be skipped
0253                 _data.bol = *start_token_ == '\n';
0254                 ++start_token_;
0255                 id_ = npos;
0256                 uid_ = npos;
0257             }
0258 
0259             unique_id_ = uid_;
0260             return id_;
0261         }
0262 
0263         std::size_t next (const detail::internals &internals_,
0264             std::size_t &start_state_, FwdIter &start_token_,
0265             FwdIter const &end_, std::size_t &unique_id_)
0266         {
0267             if (start_token_ == end_)
0268             {
0269                 unique_id_ = npos;
0270                 return 0;
0271             }
0272 
0273         again:
0274             const std::size_t * lookup_ = &internals_._lookup[start_state_]->
0275                 front ();
0276             std::size_t dfa_alphabet_ = internals_._dfa_alphabet[start_state_];
0277             const std::size_t *dfa_ = &internals_._dfa[start_state_]->front ();
0278             const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
0279             FwdIter curr_ = start_token_;
0280             bool end_state_ = *ptr_ != 0;
0281             std::size_t id_ = *(ptr_ + id_index);
0282             std::size_t uid_ = *(ptr_ + unique_id_index);
0283             std::size_t end_start_state_ = start_state_;
0284             FwdIter end_token_ = start_token_;
0285 
0286             while (curr_ != end_)
0287             {
0288                 const std::size_t state_ = ptr_[lookup_[static_cast
0289                     <typename Traits::index_type>(*curr_++)]];
0290 
0291                 if (state_ == 0)
0292                 {
0293                     break;
0294                 }
0295 
0296                 ptr_ = &dfa_[state_ * dfa_alphabet_];
0297 
0298                 if (*ptr_)
0299                 {
0300                     end_state_ = true;
0301                     id_ = *(ptr_ + id_index);
0302                     uid_ = *(ptr_ + unique_id_index);
0303                     end_start_state_ = *(ptr_ + state_index);
0304                     end_token_ = curr_;
0305                 }
0306             }
0307 
0308             if (end_state_)
0309             {
0310                 // return longest match
0311                 start_state_ = end_start_state_;
0312                 start_token_ = end_token_;
0313 
0314                 if (id_ == 0) goto again;
0315             }
0316             else
0317             {
0318                 // No match causes char to be skipped
0319                 ++start_token_;
0320                 id_ = npos;
0321                 uid_ = npos;
0322             }
0323 
0324             unique_id_ = uid_;
0325             return id_;
0326         }
0327 
0328         std::size_t next (const std::size_t * const lookup_,
0329             const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
0330             bool bol_, FwdIter &start_token_, FwdIter const &end_,
0331             std::size_t &unique_id_)
0332         {
0333             if (start_token_ == end_)
0334             {
0335                 unique_id_ = npos;
0336                 return 0;
0337             }
0338 
0339             const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
0340             FwdIter curr_ = start_token_;
0341             bool end_state_ = *ptr_ != 0;
0342             std::size_t id_ = *(ptr_ + id_index);
0343             std::size_t uid_ = *(ptr_ + unique_id_index);
0344             bool end_bol_ = bol_;
0345             FwdIter end_token_ = start_token_;
0346 
0347             while (curr_ != end_)
0348             {
0349                 const std::size_t BOL_state_ = ptr_[bol_index];
0350                 const std::size_t EOL_state_ = ptr_[eol_index];
0351 
0352                 if (BOL_state_ && bol_)
0353                 {
0354                     ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
0355                 }
0356                 else if (EOL_state_ && *curr_ == '\n')
0357                 {
0358                     ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
0359                 }
0360                 else
0361                 {
0362                     typename Traits::char_type prev_char_ = *curr_++;
0363 
0364                     bol_ = prev_char_ == '\n';
0365 
0366                     const std::size_t state_ =
0367                         ptr_[lookup_[static_cast<typename Traits::index_type>
0368                         (prev_char_)]];
0369 
0370                     if (state_ == 0)
0371                     {
0372                         break;
0373                     }
0374 
0375                     ptr_ = &dfa_[state_ * dfa_alphabet_];
0376                 }
0377 
0378                 if (*ptr_)
0379                 {
0380                     end_state_ = true;
0381                     id_ = *(ptr_ + id_index);
0382                     uid_ = *(ptr_ + unique_id_index);
0383                     end_bol_ = bol_;
0384                     end_token_ = curr_;
0385                 }
0386             }
0387 
0388             const std::size_t EOL_state_ = ptr_[eol_index];
0389 
0390             if (EOL_state_ && curr_ == end_)
0391             {
0392                 ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
0393 
0394                 if (*ptr_)
0395                 {
0396                     end_state_ = true;
0397                     id_ = *(ptr_ + id_index);
0398                     uid_ = *(ptr_ + unique_id_index);
0399                     end_bol_ = bol_;
0400                     end_token_ = curr_;
0401                 }
0402             }
0403 
0404             if (end_state_)
0405             {
0406                 // return longest match
0407                 _data.bol = end_bol_;
0408                 start_token_ = end_token_;
0409             }
0410             else
0411             {
0412                 // No match causes char to be skipped
0413                 _data.bol = *start_token_ == '\n';
0414                 ++start_token_;
0415                 id_ = npos;
0416                 uid_ = npos;
0417             }
0418 
0419             unique_id_ = uid_;
0420             return id_;
0421         }
0422 
0423         std::size_t next (const std::size_t * const lookup_,
0424             const std::size_t dfa_alphabet_, const std::size_t * const dfa_,
0425             FwdIter &start_token_, FwdIter const &end_,
0426             std::size_t &unique_id_)
0427         {
0428             if (start_token_ == end_)
0429             {
0430                 unique_id_ = npos;
0431                 return 0;
0432             }
0433 
0434             const std::size_t *ptr_ = dfa_ + dfa_alphabet_;
0435             FwdIter curr_ = start_token_;
0436             bool end_state_ = *ptr_ != 0;
0437             std::size_t id_ = *(ptr_ + id_index);
0438             std::size_t uid_ = *(ptr_ + unique_id_index);
0439             FwdIter end_token_ = start_token_;
0440 
0441             while (curr_ != end_)
0442             {
0443                 const std::size_t state_ = ptr_[lookup_[static_cast
0444                     <typename Traits::index_type>(*curr_++)]];
0445 
0446                 if (state_ == 0)
0447                 {
0448                     break;
0449                 }
0450 
0451                 ptr_ = &dfa_[state_ * dfa_alphabet_];
0452 
0453                 if (*ptr_)
0454                 {
0455                     end_state_ = true;
0456                     id_ = *(ptr_ + id_index);
0457                     uid_ = *(ptr_ + unique_id_index);
0458                     end_token_ = curr_;
0459                 }
0460             }
0461 
0462             if (end_state_)
0463             {
0464                 // return longest match
0465                 start_token_ = end_token_;
0466             }
0467             else
0468             {
0469                 // No match causes char to be skipped
0470                 ++start_token_;
0471                 id_ = npos;
0472                 uid_ = npos;
0473             }
0474 
0475             unique_id_ = uid_;
0476             return id_;
0477         }
0478     };
0479 
0480     friend class iterator;
0481 
0482     // Make it explicit that we are NOT taking a copy of state_machine_!
0483     basic_input (const basic_state_machine<typename Traits::char_type>
0484         *state_machine_, const FwdIter &begin_, const FwdIter &end_) :
0485         _state_machine (state_machine_),
0486         _begin (begin_),
0487         _end (end_)
0488     {
0489     }
0490 
0491     iterator begin () const
0492     {
0493         iterator iter_;
0494 
0495         iter_._input = this;
0496         // Over-ride default of 0 (EOI)
0497         iter_._data.id = npos;
0498         iter_._data.start = _begin;
0499         iter_._data.end = _begin;
0500         iter_._data.bol = _state_machine->data ()._seen_BOL_assertion;
0501         iter_._data.state = 0;
0502         ++iter_;
0503         return iter_;
0504     }
0505 
0506     iterator end () const
0507     {
0508         iterator iter_;
0509 
0510         iter_._input = this;
0511         iter_._data.start = _end;
0512         iter_._data.end = _end;
0513         return iter_;
0514     }
0515 
0516 private:
0517     const basic_state_machine<typename Traits::char_type> *_state_machine;
0518     FwdIter _begin;
0519     FwdIter _end;
0520 };
0521 
0522 typedef basic_input<std::string::iterator> iter_input;
0523 typedef basic_input<std::basic_string<wchar_t>::iterator> iter_winput;
0524 typedef basic_input<const char *> ptr_input;
0525 typedef basic_input<const wchar_t *> ptr_winput;
0526 }
0527 }
0528 
0529 #endif