Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-19 09:47:50

0001 // generate_re2c.hpp
0002 // Copyright (c) 2009 Ben Hanson (http://www.benhanson.net/)
0003 //
0004 // Distributed under the Boost Software License, Version 1.0. (See accompanying
0005 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0006 #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_GENERATE_RE2C_HPP
0007 #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_GENERATE_RE2C_HPP
0008 
0009 #include "char_traits.hpp"
0010 #include "consts.hpp"
0011 #include "internals.hpp"
0012 #include "runtime_error.hpp"
0013 #include "size_t.hpp"
0014 #include "state_machine.hpp"
0015 #include <iosfwd>
0016 #include <vector>
0017 
0018 namespace boost
0019 {
0020 namespace lexer
0021 {
0022 // check whether state0_0 is referenced from any of the other states
0023 template <typename Char>
0024 bool need_label0_0(boost::lexer::basic_state_machine<Char> const &sm_)
0025 {
0026     typedef typename boost::lexer::basic_state_machine<Char>::iterator
0027         iterator_type;
0028     iterator_type iter_ = sm_.begin();
0029     std::size_t states_ = iter_->states;
0030 
0031     for (std::size_t state_ = 0; state_ < states_; ++state_)
0032     {
0033         if (0 == iter_->bol_index || 0 == iter_->eol_index)
0034         {
0035             return true;
0036         }
0037 
0038         std::size_t const transitions_ = iter_->transitions;
0039         for (std::size_t t_ = 0; t_ < transitions_; ++t_)
0040         {
0041             if (0 == iter_->goto_state)
0042             {
0043                 return true;
0044             }
0045             ++iter_;
0046         }
0047         if (transitions_ == 0) ++iter_;
0048     }
0049     return false;
0050 }
0051 
0052 template<typename CharT>
0053 void generate_re2c (const basic_state_machine<CharT> &state_machine_,
0054     std::ostream &os_, const bool use_pointers_ = false,
0055     const bool skip_unknown_ = true, const bool optimise_parameters_ = true,
0056     const char *name_ = "next_token")
0057 {
0058     typedef typename boost::lexer::basic_string_token<CharT> string_token;
0059     const detail::internals &sm_ = state_machine_.data ();
0060 
0061     if (sm_._lookup->size () == 0)
0062     {
0063         throw runtime_error ("Cannot generate code from an empty "
0064             "state machine");
0065     }
0066 
0067     std::string upper_name_ (__DATE__);
0068     const std::size_t lookups_ = sm_._lookup->front ()->size ();
0069     typename boost::lexer::basic_state_machine<CharT>::iterator iter_ =
0070         state_machine_.begin();
0071     typename boost::lexer::basic_state_machine<CharT>::iterator end_ =
0072         state_machine_.end();
0073     const std::size_t dfas_ = sm_._dfa->size ();
0074     std::string::size_type pos_ = upper_name_.find (' ');
0075     const char *iterator_ = 0;
0076 
0077     if (use_pointers_)
0078     {
0079         if (lookups_ == 256)
0080         {
0081             iterator_ = "const char *";
0082         }
0083         else
0084         {
0085             iterator_ = "const wchar_t *";
0086         }
0087     }
0088     else
0089     {
0090         iterator_ = "Iterator &";
0091     }
0092 
0093     while (pos_ != std::string::npos)
0094     {
0095         upper_name_.replace (pos_, 1, "_");
0096         pos_ = upper_name_.find (' ', pos_);
0097     }
0098 
0099     upper_name_ += '_';
0100     upper_name_ +=  __TIME__;
0101 
0102     pos_ = upper_name_.find (':');
0103 
0104     while (pos_ != std::string::npos)
0105     {
0106         upper_name_.erase (pos_, 1);
0107         pos_ = upper_name_.find (':', pos_);
0108     }
0109 
0110     upper_name_ = '_' + upper_name_;
0111     upper_name_ = name_ + upper_name_;
0112     std::transform (upper_name_.begin (), upper_name_.end (),
0113         upper_name_.begin (), ::toupper);
0114     os_ << "#ifndef " << upper_name_ + '\n';
0115     os_ << "#define " << upper_name_ + '\n';
0116     os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
0117     os_ << "//\n";
0118     os_ << "// Distributed under the Boost Software License, "
0119         "Version 1.0. (See accompanying\n";
0120     os_ << "// file licence_1_0.txt or copy at "
0121         "http://www.boost.org/LICENSE_1_0.txt)\n\n";
0122     os_ << "// Auto-generated by boost::lexer\n";
0123     os_ << "template<typename Iterator>\n";
0124     os_ << "std::size_t " << name_  << " (";
0125 
0126     if (dfas_ > 1 || !optimise_parameters_)
0127     {
0128         os_ << "std::size_t &start_state_, ";
0129     }
0130 
0131     if (use_pointers_)
0132     {
0133         os_ << iterator_ << " &";
0134     }
0135     else
0136     {
0137         os_ << iterator_;
0138     }
0139 
0140     os_ << "start_token_, ";
0141 
0142     if (use_pointers_)
0143     {
0144         os_ << iterator_ << " const ";
0145     }
0146     else
0147     {
0148         os_ << "const " << iterator_;
0149     }
0150 
0151     os_ << "end_, \n";
0152     os_ << "    std::size_t &unique_id_";
0153 
0154     if (sm_._seen_BOL_assertion || !optimise_parameters_)
0155     {
0156         os_ << ", bool &beg_of_line_";
0157     }
0158 
0159     os_ << ")\n";
0160     os_ << "{\n";
0161     os_ << "    static const std::size_t npos = static_cast"
0162         "<std::size_t>(~0);\n";
0163     os_ << "\n    if (start_token_ == end_)\n";
0164     os_ << "    {\n";
0165     os_ << "        unique_id_ = npos;\n";
0166     os_ << "        return 0;\n";
0167     os_ << "    }\n\n";
0168 
0169     if (dfas_ > 1)
0170     {
0171         os_ << "again:\n";
0172     }
0173 
0174     os_ << "    Iterator curr_ = start_token_;\n";
0175     os_ << "    bool end_state_ = false;\n";
0176     os_ << "    std::size_t id_ = npos;\n";
0177     os_ << "    std::size_t uid_ = npos;\n";
0178 
0179     if (dfas_ > 1)
0180     {
0181         os_ << "    std::size_t end_start_state_ = start_state_;\n";
0182     }
0183 
0184     if (sm_._seen_BOL_assertion)
0185     {
0186         os_ << "    bool bol_ = beg_of_line_;\n";
0187         os_ << "    bool end_bol_ = bol_;\n";
0188     }
0189 
0190     os_ << "    Iterator end_token_ = start_token_;\n";
0191     os_ << '\n';
0192 
0193     if (dfas_ > 1)
0194     {
0195         os_ << "    switch (start_state_)\n";
0196         os_ << "    {\n";
0197 
0198         for (std::size_t i_ = 0; i_ < dfas_; ++i_)
0199         {
0200             os_ << "    case " << i_ << ":\n";
0201             os_ << "        goto " << i_ << "_0;\n";
0202             os_ << "        // Not needed, but to prevent warnings\n";
0203             os_ << "        break;\n";
0204         }
0205 
0206         os_ << "    default:\n";
0207         os_ << "        throw std::runtime_error (\"Invalid start state!\")\n";
0208         os_ << "        break;\n";
0209         os_ << "    }\n\n";
0210     }
0211 
0212     os_ << "    ";
0213 
0214     if (lookups_ == 256)
0215     {
0216         os_ << "char";
0217     }
0218     else
0219     {
0220         os_ << "wchar_t";
0221     }
0222 
0223     os_ << " ch_ = 0;\n\n";
0224 
0225     bool need_state0_0_label = need_label0_0(state_machine_);
0226 
0227     for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_)
0228     {
0229         const std::size_t states_ = iter_->states;
0230 
0231         for (std::size_t state_ = 0; state_ < states_; ++state_)
0232         {
0233             const std::size_t transitions_ = iter_->transitions;
0234             std::size_t t_ = 0;
0235 
0236             if (dfas_ > 1 || dfa_ != 0 || state_ != 0 || need_state0_0_label)
0237             {
0238                 os_ << "state" << dfa_ << '_' << state_ << ":\n";
0239             }
0240 
0241             if (iter_->end_state)
0242             {
0243                 os_ << "    end_state_ = true;\n";
0244                 os_ << "    id_ = " << iter_->id << ";\n";
0245                 os_ << "    uid_ = " << iter_->unique_id << ";\n";
0246                 os_ << "    end_token_ = curr_;\n";
0247 
0248                 if (dfas_ > 1)
0249                 {
0250                     os_ << "    end_start_state_ = " << iter_->goto_dfa <<
0251                         ";\n";
0252                 }
0253 
0254                 if (sm_._seen_BOL_assertion)
0255                 {
0256                     os_ << "    end_bol_ = bol_;\n";
0257                 }
0258 
0259                 if (transitions_) os_ << '\n';
0260             }
0261 
0262             if (t_ < transitions_ || iter_->bol_index != boost::lexer::npos ||
0263                 iter_->eol_index != boost::lexer::npos)
0264             {
0265                 os_ << "    if (curr_ == end_) goto end;\n\n";
0266                 os_ << "    ch_ = *curr_;\n";
0267 
0268                 if (iter_->bol_index != boost::lexer::npos)
0269                 {
0270                     os_ << "\n    if (bol_) goto state" << dfa_ << '_' <<
0271                         iter_->bol_index << ";\n\n";
0272                 }
0273 
0274                 if (iter_->eol_index != boost::lexer::npos)
0275                 {
0276                     os_ << "\n    if (ch_ == '\n') goto state" << dfa_ << '_' <<
0277                         iter_->eol_index << ";\n\n";
0278                 }
0279 
0280                 os_ << "    ++curr_;\n";
0281             }
0282 
0283             for (; t_ < transitions_; ++t_)
0284             {
0285                 const char *ptr_ = iter_->token._charset.c_str();
0286                 const char *end_ = ptr_ + iter_->token._charset.size();
0287                 char start_char_ = 0;
0288                 char curr_char_ = 0;
0289                 bool range_ = false;
0290                 bool first_char_ = true;
0291 
0292                 os_ << "\n    if (";
0293 
0294                 while (ptr_ != end_)
0295                 {
0296                     curr_char_ = *ptr_++;
0297 
0298                     if (*ptr_ == curr_char_ + 1)
0299                     {
0300                         if (!range_)
0301                         {
0302                             start_char_ = curr_char_;
0303                         }
0304 
0305                         range_ = true;
0306                     }
0307                     else
0308                     {
0309                         if (!first_char_)
0310                         {
0311                             if (iter_->token._negated)
0312                             {
0313                                 os_ << " && ";
0314                             }
0315                             else
0316                             {
0317                                 os_ << " || ";
0318                             }
0319                         }
0320 
0321                         first_char_ = false;
0322 
0323                         if (range_)
0324                         {
0325                             typename string_token::string temp_;
0326 
0327                             if (iter_->token._negated)
0328                             {
0329                                 os_ << "!";
0330                             }
0331 
0332                             string_token::escape_char (start_char_, temp_);
0333                             os_ << "(ch_ >= '" << temp_;
0334                             temp_.clear ();
0335                             string_token::escape_char (curr_char_, temp_);
0336                             os_ << "' && ch_ <= '" << temp_ << "')";
0337                             range_ = false;
0338                         }
0339                         else
0340                         {
0341                             typename string_token::string temp_;
0342 
0343                             os_ << "ch_ ";
0344 
0345                             if (iter_->token._negated)
0346                             {
0347                                 os_ << "!=";
0348                             }
0349                             else
0350                             {
0351                                 os_ << "==";
0352                             }
0353 
0354                             string_token::escape_char (curr_char_, temp_);
0355                             os_ << " '" << temp_ << "'";
0356                         }
0357                     }
0358                 }
0359 
0360                 os_ << ") goto state" << dfa_ << '_' << iter_->goto_state <<
0361                     ";\n\n";
0362                 ++iter_;
0363             }
0364 
0365             if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1))
0366             {
0367                 os_ << "    goto end;\n";
0368             }
0369 
0370             if (transitions_ == 0) ++iter_;
0371         }
0372     }
0373 
0374     os_ << "end:\n";
0375     os_ << "    if (end_state_)\n";
0376     os_ << "    {\n";
0377     os_ << "        // return longest match\n";
0378 
0379     if (dfas_ > 1)
0380     {
0381         os_ << "        start_state_ = end_start_state_;\n";
0382     }
0383 
0384     if (sm_._seen_BOL_assertion && dfas_ < 2)
0385     {
0386         os_ << "        beg_of_line_ = end_bol_;\n";
0387     }
0388 
0389     os_ << "        start_token_ = end_token_;\n";
0390 
0391     if (dfas_ > 1)
0392     {
0393         os_ << '\n';
0394         os_ << "        if (id_ == 0)\n";
0395         os_ << "        {\n";
0396 
0397         if (sm_._seen_BOL_assertion)
0398         {
0399             os_ << "            bol_ = end_bol_;\n";
0400         }
0401 
0402         os_ << "            goto again;\n";
0403         os_ << "        }\n";
0404 
0405         if (sm_._seen_BOL_assertion)
0406         {
0407             os_ << "        else\n";
0408             os_ << "        {\n";
0409             os_ << "            beg_of_line_ = end_bol_;\n";
0410             os_ << "        }\n";
0411         }
0412     }
0413 
0414     os_ << "    }\n";
0415     os_ << "    else\n";
0416     os_ << "    {\n";
0417 
0418     if (sm_._seen_BOL_assertion)
0419     {
0420         os_ << "        beg_of_line_ = *start_token_ == '\\n';\n";
0421     }
0422 
0423     if (skip_unknown_)
0424     {
0425         os_ << "        // No match causes char to be skipped\n";
0426         os_ << "        ++start_token_;\n";
0427     }
0428 
0429     os_ << "        id_ = npos;\n";
0430     os_ << "        uid_ = npos;\n";
0431     os_ << "    }\n";
0432     os_ << '\n';
0433     os_ << "    unique_id_ = uid_;\n";
0434     os_ << "    return id_;\n";
0435     os_ << "}\n";
0436     os_ << "\n#endif\n";
0437 }
0438 }
0439 }
0440 #endif