File indexing completed on 2025-01-19 09:47:50
0001
0002
0003
0004
0005
0006 #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_GENERATE_RE2C_HPP
0007 #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_GENERATE_RE2C_HPP
0008
0009 #include "char_traits.hpp"
0010 #include "consts.hpp"
0011 #include "internals.hpp"
0012 #include "runtime_error.hpp"
0013 #include "size_t.hpp"
0014 #include "state_machine.hpp"
0015 #include <iosfwd>
0016 #include <vector>
0017
0018 namespace boost
0019 {
0020 namespace lexer
0021 {
0022
0023 template <typename Char>
0024 bool need_label0_0(boost::lexer::basic_state_machine<Char> const &sm_)
0025 {
0026 typedef typename boost::lexer::basic_state_machine<Char>::iterator
0027 iterator_type;
0028 iterator_type iter_ = sm_.begin();
0029 std::size_t states_ = iter_->states;
0030
0031 for (std::size_t state_ = 0; state_ < states_; ++state_)
0032 {
0033 if (0 == iter_->bol_index || 0 == iter_->eol_index)
0034 {
0035 return true;
0036 }
0037
0038 std::size_t const transitions_ = iter_->transitions;
0039 for (std::size_t t_ = 0; t_ < transitions_; ++t_)
0040 {
0041 if (0 == iter_->goto_state)
0042 {
0043 return true;
0044 }
0045 ++iter_;
0046 }
0047 if (transitions_ == 0) ++iter_;
0048 }
0049 return false;
0050 }
0051
0052 template<typename CharT>
0053 void generate_re2c (const basic_state_machine<CharT> &state_machine_,
0054 std::ostream &os_, const bool use_pointers_ = false,
0055 const bool skip_unknown_ = true, const bool optimise_parameters_ = true,
0056 const char *name_ = "next_token")
0057 {
0058 typedef typename boost::lexer::basic_string_token<CharT> string_token;
0059 const detail::internals &sm_ = state_machine_.data ();
0060
0061 if (sm_._lookup->size () == 0)
0062 {
0063 throw runtime_error ("Cannot generate code from an empty "
0064 "state machine");
0065 }
0066
0067 std::string upper_name_ (__DATE__);
0068 const std::size_t lookups_ = sm_._lookup->front ()->size ();
0069 typename boost::lexer::basic_state_machine<CharT>::iterator iter_ =
0070 state_machine_.begin();
0071 typename boost::lexer::basic_state_machine<CharT>::iterator end_ =
0072 state_machine_.end();
0073 const std::size_t dfas_ = sm_._dfa->size ();
0074 std::string::size_type pos_ = upper_name_.find (' ');
0075 const char *iterator_ = 0;
0076
0077 if (use_pointers_)
0078 {
0079 if (lookups_ == 256)
0080 {
0081 iterator_ = "const char *";
0082 }
0083 else
0084 {
0085 iterator_ = "const wchar_t *";
0086 }
0087 }
0088 else
0089 {
0090 iterator_ = "Iterator &";
0091 }
0092
0093 while (pos_ != std::string::npos)
0094 {
0095 upper_name_.replace (pos_, 1, "_");
0096 pos_ = upper_name_.find (' ', pos_);
0097 }
0098
0099 upper_name_ += '_';
0100 upper_name_ += __TIME__;
0101
0102 pos_ = upper_name_.find (':');
0103
0104 while (pos_ != std::string::npos)
0105 {
0106 upper_name_.erase (pos_, 1);
0107 pos_ = upper_name_.find (':', pos_);
0108 }
0109
0110 upper_name_ = '_' + upper_name_;
0111 upper_name_ = name_ + upper_name_;
0112 std::transform (upper_name_.begin (), upper_name_.end (),
0113 upper_name_.begin (), ::toupper);
0114 os_ << "#ifndef " << upper_name_ + '\n';
0115 os_ << "#define " << upper_name_ + '\n';
0116 os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
0117 os_ << "//\n";
0118 os_ << "// Distributed under the Boost Software License, "
0119 "Version 1.0. (See accompanying\n";
0120 os_ << "// file licence_1_0.txt or copy at "
0121 "http://www.boost.org/LICENSE_1_0.txt)\n\n";
0122 os_ << "// Auto-generated by boost::lexer\n";
0123 os_ << "template<typename Iterator>\n";
0124 os_ << "std::size_t " << name_ << " (";
0125
0126 if (dfas_ > 1 || !optimise_parameters_)
0127 {
0128 os_ << "std::size_t &start_state_, ";
0129 }
0130
0131 if (use_pointers_)
0132 {
0133 os_ << iterator_ << " &";
0134 }
0135 else
0136 {
0137 os_ << iterator_;
0138 }
0139
0140 os_ << "start_token_, ";
0141
0142 if (use_pointers_)
0143 {
0144 os_ << iterator_ << " const ";
0145 }
0146 else
0147 {
0148 os_ << "const " << iterator_;
0149 }
0150
0151 os_ << "end_, \n";
0152 os_ << " std::size_t &unique_id_";
0153
0154 if (sm_._seen_BOL_assertion || !optimise_parameters_)
0155 {
0156 os_ << ", bool &beg_of_line_";
0157 }
0158
0159 os_ << ")\n";
0160 os_ << "{\n";
0161 os_ << " static const std::size_t npos = static_cast"
0162 "<std::size_t>(~0);\n";
0163 os_ << "\n if (start_token_ == end_)\n";
0164 os_ << " {\n";
0165 os_ << " unique_id_ = npos;\n";
0166 os_ << " return 0;\n";
0167 os_ << " }\n\n";
0168
0169 if (dfas_ > 1)
0170 {
0171 os_ << "again:\n";
0172 }
0173
0174 os_ << " Iterator curr_ = start_token_;\n";
0175 os_ << " bool end_state_ = false;\n";
0176 os_ << " std::size_t id_ = npos;\n";
0177 os_ << " std::size_t uid_ = npos;\n";
0178
0179 if (dfas_ > 1)
0180 {
0181 os_ << " std::size_t end_start_state_ = start_state_;\n";
0182 }
0183
0184 if (sm_._seen_BOL_assertion)
0185 {
0186 os_ << " bool bol_ = beg_of_line_;\n";
0187 os_ << " bool end_bol_ = bol_;\n";
0188 }
0189
0190 os_ << " Iterator end_token_ = start_token_;\n";
0191 os_ << '\n';
0192
0193 if (dfas_ > 1)
0194 {
0195 os_ << " switch (start_state_)\n";
0196 os_ << " {\n";
0197
0198 for (std::size_t i_ = 0; i_ < dfas_; ++i_)
0199 {
0200 os_ << " case " << i_ << ":\n";
0201 os_ << " goto " << i_ << "_0;\n";
0202 os_ << " // Not needed, but to prevent warnings\n";
0203 os_ << " break;\n";
0204 }
0205
0206 os_ << " default:\n";
0207 os_ << " throw std::runtime_error (\"Invalid start state!\")\n";
0208 os_ << " break;\n";
0209 os_ << " }\n\n";
0210 }
0211
0212 os_ << " ";
0213
0214 if (lookups_ == 256)
0215 {
0216 os_ << "char";
0217 }
0218 else
0219 {
0220 os_ << "wchar_t";
0221 }
0222
0223 os_ << " ch_ = 0;\n\n";
0224
0225 bool need_state0_0_label = need_label0_0(state_machine_);
0226
0227 for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_)
0228 {
0229 const std::size_t states_ = iter_->states;
0230
0231 for (std::size_t state_ = 0; state_ < states_; ++state_)
0232 {
0233 const std::size_t transitions_ = iter_->transitions;
0234 std::size_t t_ = 0;
0235
0236 if (dfas_ > 1 || dfa_ != 0 || state_ != 0 || need_state0_0_label)
0237 {
0238 os_ << "state" << dfa_ << '_' << state_ << ":\n";
0239 }
0240
0241 if (iter_->end_state)
0242 {
0243 os_ << " end_state_ = true;\n";
0244 os_ << " id_ = " << iter_->id << ";\n";
0245 os_ << " uid_ = " << iter_->unique_id << ";\n";
0246 os_ << " end_token_ = curr_;\n";
0247
0248 if (dfas_ > 1)
0249 {
0250 os_ << " end_start_state_ = " << iter_->goto_dfa <<
0251 ";\n";
0252 }
0253
0254 if (sm_._seen_BOL_assertion)
0255 {
0256 os_ << " end_bol_ = bol_;\n";
0257 }
0258
0259 if (transitions_) os_ << '\n';
0260 }
0261
0262 if (t_ < transitions_ || iter_->bol_index != boost::lexer::npos ||
0263 iter_->eol_index != boost::lexer::npos)
0264 {
0265 os_ << " if (curr_ == end_) goto end;\n\n";
0266 os_ << " ch_ = *curr_;\n";
0267
0268 if (iter_->bol_index != boost::lexer::npos)
0269 {
0270 os_ << "\n if (bol_) goto state" << dfa_ << '_' <<
0271 iter_->bol_index << ";\n\n";
0272 }
0273
0274 if (iter_->eol_index != boost::lexer::npos)
0275 {
0276 os_ << "\n if (ch_ == '\n') goto state" << dfa_ << '_' <<
0277 iter_->eol_index << ";\n\n";
0278 }
0279
0280 os_ << " ++curr_;\n";
0281 }
0282
0283 for (; t_ < transitions_; ++t_)
0284 {
0285 const char *ptr_ = iter_->token._charset.c_str();
0286 const char *end_ = ptr_ + iter_->token._charset.size();
0287 char start_char_ = 0;
0288 char curr_char_ = 0;
0289 bool range_ = false;
0290 bool first_char_ = true;
0291
0292 os_ << "\n if (";
0293
0294 while (ptr_ != end_)
0295 {
0296 curr_char_ = *ptr_++;
0297
0298 if (*ptr_ == curr_char_ + 1)
0299 {
0300 if (!range_)
0301 {
0302 start_char_ = curr_char_;
0303 }
0304
0305 range_ = true;
0306 }
0307 else
0308 {
0309 if (!first_char_)
0310 {
0311 if (iter_->token._negated)
0312 {
0313 os_ << " && ";
0314 }
0315 else
0316 {
0317 os_ << " || ";
0318 }
0319 }
0320
0321 first_char_ = false;
0322
0323 if (range_)
0324 {
0325 typename string_token::string temp_;
0326
0327 if (iter_->token._negated)
0328 {
0329 os_ << "!";
0330 }
0331
0332 string_token::escape_char (start_char_, temp_);
0333 os_ << "(ch_ >= '" << temp_;
0334 temp_.clear ();
0335 string_token::escape_char (curr_char_, temp_);
0336 os_ << "' && ch_ <= '" << temp_ << "')";
0337 range_ = false;
0338 }
0339 else
0340 {
0341 typename string_token::string temp_;
0342
0343 os_ << "ch_ ";
0344
0345 if (iter_->token._negated)
0346 {
0347 os_ << "!=";
0348 }
0349 else
0350 {
0351 os_ << "==";
0352 }
0353
0354 string_token::escape_char (curr_char_, temp_);
0355 os_ << " '" << temp_ << "'";
0356 }
0357 }
0358 }
0359
0360 os_ << ") goto state" << dfa_ << '_' << iter_->goto_state <<
0361 ";\n\n";
0362 ++iter_;
0363 }
0364
0365 if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1))
0366 {
0367 os_ << " goto end;\n";
0368 }
0369
0370 if (transitions_ == 0) ++iter_;
0371 }
0372 }
0373
0374 os_ << "end:\n";
0375 os_ << " if (end_state_)\n";
0376 os_ << " {\n";
0377 os_ << " // return longest match\n";
0378
0379 if (dfas_ > 1)
0380 {
0381 os_ << " start_state_ = end_start_state_;\n";
0382 }
0383
0384 if (sm_._seen_BOL_assertion && dfas_ < 2)
0385 {
0386 os_ << " beg_of_line_ = end_bol_;\n";
0387 }
0388
0389 os_ << " start_token_ = end_token_;\n";
0390
0391 if (dfas_ > 1)
0392 {
0393 os_ << '\n';
0394 os_ << " if (id_ == 0)\n";
0395 os_ << " {\n";
0396
0397 if (sm_._seen_BOL_assertion)
0398 {
0399 os_ << " bol_ = end_bol_;\n";
0400 }
0401
0402 os_ << " goto again;\n";
0403 os_ << " }\n";
0404
0405 if (sm_._seen_BOL_assertion)
0406 {
0407 os_ << " else\n";
0408 os_ << " {\n";
0409 os_ << " beg_of_line_ = end_bol_;\n";
0410 os_ << " }\n";
0411 }
0412 }
0413
0414 os_ << " }\n";
0415 os_ << " else\n";
0416 os_ << " {\n";
0417
0418 if (sm_._seen_BOL_assertion)
0419 {
0420 os_ << " beg_of_line_ = *start_token_ == '\\n';\n";
0421 }
0422
0423 if (skip_unknown_)
0424 {
0425 os_ << " // No match causes char to be skipped\n";
0426 os_ << " ++start_token_;\n";
0427 }
0428
0429 os_ << " id_ = npos;\n";
0430 os_ << " uid_ = npos;\n";
0431 os_ << " }\n";
0432 os_ << '\n';
0433 os_ << " unique_id_ = uid_;\n";
0434 os_ << " return id_;\n";
0435 os_ << "}\n";
0436 os_ << "\n#endif\n";
0437 }
0438 }
0439 }
0440 #endif