File indexing completed on 2025-01-31 10:02:15
0001
0002
0003
0004
0005
0006
0007 #if !defined(BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM)
0008 #define BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM
0009
0010 #if defined(_MSC_VER)
0011 #pragma once
0012 #endif
0013
0014 #include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
0015 #include <boost/spirit/home/support/detail/lexer/consts.hpp>
0016 #include <boost/spirit/home/support/detail/lexer/rules.hpp>
0017 #include <boost/spirit/home/support/detail/lexer/size_t.hpp>
0018 #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
0019 #include <boost/spirit/home/support/detail/lexer/debug.hpp>
0020 #include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp>
0021 #include <boost/scoped_array.hpp>
0022 #include <cstring>
0023 #include <locale>
0024
0025
0026 namespace boost { namespace spirit { namespace lex { namespace lexertl
0027 {
0028 namespace detail
0029 {
0030
0031
0032 template <typename CharT>
0033 struct string_lit;
0034
0035 template <>
0036 struct string_lit<char>
0037 {
0038 static char get(char c) { return c; }
0039 static std::string get(char const* str = "") { return str; }
0040 };
0041
0042 template <>
0043 struct string_lit<wchar_t>
0044 {
0045 static wchar_t get(char c)
0046 {
0047 typedef std::ctype<wchar_t> ctype_t;
0048 return std::use_facet<ctype_t>(std::locale()).widen(c);
0049 }
0050 static std::basic_string<wchar_t> get(char const* source = "")
0051 {
0052 using namespace std;
0053 size_t len = strlen(source);
0054 boost::scoped_array<wchar_t> result (new wchar_t[len+1]);
0055 result.get()[len] = '\0';
0056
0057
0058
0059 BOOST_ASSERT(std::has_facet<std::ctype<wchar_t> >(std::locale()));
0060
0061 std::use_facet<std::ctype<wchar_t> >(std::locale())
0062 .widen(source, source + len, result.get());
0063 return result.get();
0064 }
0065 };
0066
0067 template <typename Char>
0068 inline Char L(char c)
0069 {
0070 return string_lit<Char>::get(c);
0071 }
0072
0073 template <typename Char>
0074 inline std::basic_string<Char> L(char const* c = "")
0075 {
0076 return string_lit<Char>::get(c);
0077 }
0078
0079
0080 template <typename Char>
0081 inline bool
0082 generate_delimiter(std::basic_ostream<Char> &os_)
0083 {
0084 os_ << std::basic_string<Char>(80, '/') << "\n";
0085 return os_.good();
0086 }
0087
0088
0089
0090
0091
0092 template <typename Char>
0093 inline bool
0094 generate_cpp_state_info (boost::lexer::basic_rules<Char> const& rules_
0095 , std::basic_ostream<Char> &os_, Char const* name_suffix)
0096 {
0097
0098
0099 typedef typename
0100 boost::lexer::basic_rules<Char>::string_size_t_map::const_iterator
0101 state_iterator;
0102 typedef std::map<std::size_t, Char const*> reverse_state_map_type;
0103
0104 reverse_state_map_type reverse_state_map;
0105 state_iterator send = rules_.statemap().end();
0106 for (state_iterator sit = rules_.statemap().begin(); sit != send; ++sit)
0107 {
0108 typedef typename reverse_state_map_type::value_type value_type;
0109 reverse_state_map.insert(value_type((*sit).second, (*sit).first.c_str()));
0110 }
0111
0112 generate_delimiter(os_);
0113 os_ << "// this table defines the names of the lexer states\n";
0114 os_ << boost::lexer::detail::strings<Char>::char_name()
0115 << " const* const lexer_state_names"
0116 << (name_suffix[0] ? "_" : "") << name_suffix
0117 << "[" << rules_.statemap().size() << "] = \n{\n";
0118
0119 typedef typename reverse_state_map_type::iterator iterator;
0120 iterator rend = reverse_state_map.end();
0121 std::size_t last_id = 0;
0122 for (iterator rit = reverse_state_map.begin(); rit != rend; ++last_id)
0123 {
0124 for (; last_id < (*rit).first; ++last_id)
0125 {
0126 os_ << " 0, // \"<undefined state>\"\n";
0127 }
0128 os_ << " "
0129 << boost::lexer::detail::strings<Char>::char_prefix()
0130 << "\"" << (*rit).second << "\"";
0131 if (++rit != rend)
0132 os_ << ",\n";
0133 else
0134 os_ << "\n";
0135 }
0136 os_ << "};\n\n";
0137
0138 generate_delimiter(os_);
0139 os_ << "// this variable defines the number of lexer states\n";
0140 os_ << "std::size_t const lexer_state_count"
0141 << (name_suffix[0] ? "_" : "") << name_suffix
0142 << " = " << rules_.statemap().size() << ";\n\n";
0143 return os_.good();
0144 }
0145
0146 template <typename Char>
0147 inline bool
0148 generate_cpp_state_table (std::basic_ostream<Char> &os_
0149 , Char const* name_suffix, bool bol, bool eol)
0150 {
0151 std::basic_string<Char> suffix(L<Char>(name_suffix[0] ? "_" : ""));
0152 suffix += name_suffix;
0153
0154 generate_delimiter(os_);
0155 os_ << "// this defines a generic accessors for the information above\n";
0156 os_ << "struct lexer" << suffix << "\n{\n";
0157 os_ << " // version number and feature-set of compatible static lexer engine\n";
0158 os_ << " enum\n";
0159 os_ << " {\n static_version = " << SPIRIT_STATIC_LEXER_VERSION << ",\n";
0160 os_ << " supports_bol = " << std::boolalpha << bol << ",\n";
0161 os_ << " supports_eol = " << std::boolalpha << eol << "\n";
0162 os_ << " };\n\n";
0163 os_ << " // return the number of lexer states\n";
0164 os_ << " static std::size_t state_count()\n";
0165 os_ << " {\n return lexer_state_count" << suffix << "; \n }\n\n";
0166 os_ << " // return the name of the lexer state as given by 'idx'\n";
0167 os_ << " static " << boost::lexer::detail::strings<Char>::char_name()
0168 << " const* state_name(std::size_t idx)\n";
0169 os_ << " {\n return lexer_state_names" << suffix << "[idx]; \n }\n\n";
0170 os_ << " // return the next matched token\n";
0171 os_ << " template<typename Iterator>\n";
0172 os_ << " static std::size_t next(std::size_t &start_state_, bool& bol_\n";
0173 os_ << " , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)\n";
0174 os_ << " {\n return next_token" << suffix
0175 << "(start_state_, bol_, start_token_, end_, unique_id_);\n }\n";
0176 os_ << "};\n\n";
0177 return os_.good();
0178 }
0179
0180
0181
0182 template <typename Char>
0183 bool generate_function_body_dfa(std::basic_ostream<Char>& os_
0184 , boost::lexer::basic_state_machine<Char> const &sm_)
0185 {
0186 std::size_t const dfas_ = sm_.data()._dfa->size();
0187 std::size_t const lookups_ = sm_.data()._lookup->front()->size();
0188
0189 os_ << " enum {end_state_index, id_index, unique_id_index, "
0190 "state_index, bol_index,\n";
0191 os_ << " eol_index, dead_state_index, dfa_offset};\n\n";
0192 os_ << " static std::size_t const npos = "
0193 "static_cast<std::size_t>(~0);\n";
0194
0195 if (dfas_ > 1)
0196 {
0197 for (std::size_t state_ = 0; state_ < dfas_; ++state_)
0198 {
0199 std::size_t i_ = 0;
0200 std::size_t j_ = 1;
0201 std::size_t count_ = lookups_ / 8;
0202 std::size_t const* lookup_ = &sm_.data()._lookup[state_]->front();
0203 std::size_t const* dfa_ = &sm_.data()._dfa[state_]->front();
0204
0205 os_ << " static std::size_t const lookup" << state_
0206 << "_[" << lookups_ << "] = {\n ";
0207 for (; i_ < count_; ++i_)
0208 {
0209 std::size_t const index_ = i_ * 8;
0210 os_ << lookup_[index_];
0211 for (; j_ < 8; ++j_)
0212 {
0213 os_ << ", " << lookup_[index_ + j_];
0214 }
0215 if (i_ < count_ - 1)
0216 {
0217 os_ << ",\n ";
0218 }
0219 j_ = 1;
0220 }
0221 os_ << " };\n";
0222
0223 count_ = sm_.data()._dfa[state_]->size ();
0224 os_ << " static const std::size_t dfa" << state_ << "_["
0225 << count_ << "] = {\n ";
0226 count_ /= 8;
0227 for (i_ = 0; i_ < count_; ++i_)
0228 {
0229 std::size_t const index_ = i_ * 8;
0230 os_ << dfa_[index_];
0231 for (j_ = 1; j_ < 8; ++j_)
0232 {
0233 os_ << ", " << dfa_[index_ + j_];
0234 }
0235 if (i_ < count_ - 1)
0236 {
0237 os_ << ",\n ";
0238 }
0239 }
0240
0241 std::size_t const mod_ = sm_.data()._dfa[state_]->size () % 8;
0242 if (mod_)
0243 {
0244 std::size_t const index_ = count_ * 8;
0245 if (count_)
0246 {
0247 os_ << ",\n ";
0248 }
0249 os_ << dfa_[index_];
0250 for (j_ = 1; j_ < mod_; ++j_)
0251 {
0252 os_ << ", " << dfa_[index_ + j_];
0253 }
0254 }
0255 os_ << " };\n";
0256 }
0257
0258 std::size_t count_ = sm_.data()._dfa_alphabet.size();
0259 std::size_t i_ = 1;
0260
0261 os_ << " static std::size_t const* lookup_arr_[" << count_
0262 << "] = { lookup0_";
0263 for (i_ = 1; i_ < count_; ++i_)
0264 {
0265 os_ << ", " << "lookup" << i_ << "_";
0266 }
0267 os_ << " };\n";
0268
0269 os_ << " static std::size_t const dfa_alphabet_arr_["
0270 << count_ << "] = { ";
0271 os_ << sm_.data()._dfa_alphabet.front ();
0272 for (i_ = 1; i_ < count_; ++i_)
0273 {
0274 os_ << ", " << sm_.data()._dfa_alphabet[i_];
0275 }
0276 os_ << " };\n";
0277
0278 os_ << " static std::size_t const* dfa_arr_[" << count_
0279 << "] = { ";
0280 os_ << "dfa0_";
0281 for (i_ = 1; i_ < count_; ++i_)
0282 {
0283 os_ << ", " << "dfa" << i_ << "_";
0284 }
0285 os_ << " };\n";
0286 }
0287 else
0288 {
0289 std::size_t const* lookup_ = &sm_.data()._lookup[0]->front();
0290 std::size_t const* dfa_ = &sm_.data()._dfa[0]->front();
0291 std::size_t i_ = 0;
0292 std::size_t j_ = 1;
0293 std::size_t count_ = lookups_ / 8;
0294
0295 os_ << " static std::size_t const lookup_[";
0296 os_ << sm_.data()._lookup[0]->size() << "] = {\n ";
0297 for (; i_ < count_; ++i_)
0298 {
0299 const std::size_t index_ = i_ * 8;
0300 os_ << lookup_[index_];
0301 for (; j_ < 8; ++j_)
0302 {
0303 os_ << ", " << lookup_[index_ + j_];
0304 }
0305 if (i_ < count_ - 1)
0306 {
0307 os_ << ",\n ";
0308 }
0309 j_ = 1;
0310 }
0311 os_ << " };\n";
0312
0313 os_ << " static std::size_t const dfa_alphabet_ = "
0314 << sm_.data()._dfa_alphabet.front () << ";\n";
0315 os_ << " static std::size_t const dfa_["
0316 << sm_.data()._dfa[0]->size () << "] = {\n ";
0317 count_ = sm_.data()._dfa[0]->size () / 8;
0318 for (i_ = 0; i_ < count_; ++i_)
0319 {
0320 const std::size_t index_ = i_ * 8;
0321 os_ << dfa_[index_];
0322 for (j_ = 1; j_ < 8; ++j_)
0323 {
0324 os_ << ", " << dfa_[index_ + j_];
0325 }
0326 if (i_ < count_ - 1)
0327 {
0328 os_ << ",\n ";
0329 }
0330 }
0331
0332 const std::size_t mod_ = sm_.data()._dfa[0]->size () % 8;
0333 if (mod_)
0334 {
0335 const std::size_t index_ = count_ * 8;
0336 if (count_)
0337 {
0338 os_ << ",\n ";
0339 }
0340 os_ << dfa_[index_];
0341 for (j_ = 1; j_ < mod_; ++j_)
0342 {
0343 os_ << ", " << dfa_[index_ + j_];
0344 }
0345 }
0346 os_ << " };\n";
0347 }
0348
0349 os_ << "\n if (start_token_ == end_)\n";
0350 os_ << " {\n";
0351 os_ << " unique_id_ = npos;\n";
0352 os_ << " return 0;\n";
0353 os_ << " }\n\n";
0354 if (sm_.data()._seen_BOL_assertion)
0355 {
0356 os_ << " bool bol = bol_;\n\n";
0357 }
0358
0359 if (dfas_ > 1)
0360 {
0361 os_ << "again:\n";
0362 os_ << " std::size_t const* lookup_ = lookup_arr_[start_state_];\n";
0363 os_ << " std::size_t dfa_alphabet_ = dfa_alphabet_arr_[start_state_];\n";
0364 os_ << " std::size_t const*dfa_ = dfa_arr_[start_state_];\n";
0365 }
0366
0367 os_ << " std::size_t const* ptr_ = dfa_ + dfa_alphabet_;\n";
0368 os_ << " Iterator curr_ = start_token_;\n";
0369 os_ << " bool end_state_ = *ptr_ != 0;\n";
0370 os_ << " std::size_t id_ = *(ptr_ + id_index);\n";
0371 os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n";
0372 if (dfas_ > 1)
0373 {
0374 os_ << " std::size_t end_start_state_ = start_state_;\n";
0375 }
0376 if (sm_.data()._seen_BOL_assertion)
0377 {
0378 os_ << " bool end_bol_ = bol_;\n";
0379 }
0380 os_ << " Iterator end_token_ = start_token_;\n\n";
0381
0382 os_ << " while (curr_ != end_)\n";
0383 os_ << " {\n";
0384
0385 if (sm_.data()._seen_BOL_assertion)
0386 {
0387 os_ << " std::size_t const BOL_state_ = ptr_[bol_index];\n\n";
0388 }
0389
0390 if (sm_.data()._seen_EOL_assertion)
0391 {
0392 os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
0393 }
0394
0395 if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion)
0396 {
0397 os_ << " if (BOL_state_ && bol)\n";
0398 os_ << " {\n";
0399 os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
0400 os_ << " }\n";
0401 os_ << " else if (EOL_state_ && *curr_ == '\\n')\n";
0402 os_ << " {\n";
0403 os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
0404 os_ << " }\n";
0405 os_ << " else\n";
0406 os_ << " {\n";
0407 if (lookups_ == 256)
0408 {
0409 os_ << " unsigned char index = \n";
0410 os_ << " static_cast<unsigned char>(*curr_++);\n";
0411 }
0412 else
0413 {
0414 os_ << " std::size_t index = *curr_++\n";
0415 }
0416 os_ << " bol = (index == '\\n') ? true : false;\n";
0417 os_ << " std::size_t const state_ = ptr_[\n";
0418 os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
0419
0420 os_ << '\n';
0421 os_ << " if (state_ == 0) break;\n";
0422 os_ << '\n';
0423 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
0424 os_ << " }\n\n";
0425 }
0426 else if (sm_.data()._seen_BOL_assertion)
0427 {
0428 os_ << " if (BOL_state_ && bol)\n";
0429 os_ << " {\n";
0430 os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
0431 os_ << " }\n";
0432 os_ << " else\n";
0433 os_ << " {\n";
0434 if (lookups_ == 256)
0435 {
0436 os_ << " unsigned char index = \n";
0437 os_ << " static_cast<unsigned char>(*curr_++);\n";
0438 }
0439 else
0440 {
0441 os_ << " std::size_t index = *curr_++\n";
0442 }
0443 os_ << " bol = (index == '\\n') ? true : false;\n";
0444 os_ << " std::size_t const state_ = ptr_[\n";
0445 os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
0446
0447 os_ << '\n';
0448 os_ << " if (state_ == 0) break;\n";
0449 os_ << '\n';
0450 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
0451 os_ << " }\n\n";
0452 }
0453 else if (sm_.data()._seen_EOL_assertion)
0454 {
0455 os_ << " if (EOL_state_ && *curr_ == '\\n')\n";
0456 os_ << " {\n";
0457 os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
0458 os_ << " }\n";
0459 os_ << " else\n";
0460 os_ << " {\n";
0461 if (lookups_ == 256)
0462 {
0463 os_ << " unsigned char index = \n";
0464 os_ << " static_cast<unsigned char>(*curr_++);\n";
0465 }
0466 else
0467 {
0468 os_ << " std::size_t index = *curr_++\n";
0469 }
0470 os_ << " bol = (index == '\\n') ? true : false;\n";
0471 os_ << " std::size_t const state_ = ptr_[\n";
0472 os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
0473
0474 os_ << '\n';
0475 os_ << " if (state_ == 0) break;\n";
0476 os_ << '\n';
0477 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
0478 os_ << " }\n\n";
0479 }
0480 else
0481 {
0482 os_ << " std::size_t const state_ =\n";
0483
0484 if (lookups_ == 256)
0485 {
0486 os_ << " ptr_[lookup_["
0487 "static_cast<unsigned char>(*curr_++)]];\n";
0488 }
0489 else
0490 {
0491 os_ << " ptr_[lookup_[*curr_++]];\n";
0492 }
0493
0494 os_ << '\n';
0495 os_ << " if (state_ == 0) break;\n";
0496 os_ << '\n';
0497 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n\n";
0498 }
0499
0500 os_ << " if (*ptr_)\n";
0501 os_ << " {\n";
0502 os_ << " end_state_ = true;\n";
0503 os_ << " id_ = *(ptr_ + id_index);\n";
0504 os_ << " uid_ = *(ptr_ + unique_id_index);\n";
0505 if (dfas_ > 1)
0506 {
0507 os_ << " end_start_state_ = *(ptr_ + state_index);\n";
0508 }
0509 if (sm_.data()._seen_BOL_assertion)
0510 {
0511 os_ << " end_bol_ = bol;\n";
0512 }
0513 os_ << " end_token_ = curr_;\n";
0514 os_ << " }\n";
0515 os_ << " }\n\n";
0516
0517 if (sm_.data()._seen_EOL_assertion)
0518 {
0519 os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
0520
0521 os_ << " if (EOL_state_ && curr_ == end_)\n";
0522 os_ << " {\n";
0523 os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n\n";
0524
0525 os_ << " if (*ptr_)\n";
0526 os_ << " {\n";
0527 os_ << " end_state_ = true;\n";
0528 os_ << " id_ = *(ptr_ + id_index);\n";
0529 os_ << " uid_ = *(ptr_ + unique_id_index);\n";
0530 if (dfas_ > 1)
0531 {
0532 os_ << " end_start_state_ = *(ptr_ + state_index);\n";
0533 }
0534 if (sm_.data()._seen_BOL_assertion)
0535 {
0536 os_ << " end_bol_ = bol;\n";
0537 }
0538 os_ << " end_token_ = curr_;\n";
0539 os_ << " }\n";
0540 os_ << " }\n\n";
0541 }
0542
0543 os_ << " if (end_state_)\n";
0544 os_ << " {\n";
0545 os_ << " // return longest match\n";
0546 os_ << " start_token_ = end_token_;\n";
0547
0548 if (dfas_ > 1)
0549 {
0550 os_ << " start_state_ = end_start_state_;\n";
0551 os_ << " if (id_ == 0)\n";
0552 os_ << " {\n";
0553 if (sm_.data()._seen_BOL_assertion)
0554 {
0555 os_ << " bol = end_bol_;\n";
0556 }
0557 os_ << " goto again;\n";
0558 os_ << " }\n";
0559 if (sm_.data()._seen_BOL_assertion)
0560 {
0561 os_ << " else\n";
0562 os_ << " {\n";
0563 os_ << " bol_ = end_bol_;\n";
0564 os_ << " }\n";
0565 }
0566 }
0567 else if (sm_.data()._seen_BOL_assertion)
0568 {
0569 os_ << " bol_ = end_bol_;\n";
0570 }
0571
0572 os_ << " }\n";
0573 os_ << " else\n";
0574 os_ << " {\n";
0575
0576 if (sm_.data()._seen_BOL_assertion)
0577 {
0578 os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n";
0579 }
0580
0581 os_ << " id_ = npos;\n";
0582 os_ << " uid_ = npos;\n";
0583 os_ << " }\n\n";
0584
0585 os_ << " unique_id_ = uid_;\n";
0586 os_ << " return id_;\n";
0587 return os_.good();
0588 }
0589
0590
0591 template <typename Char>
0592 inline std::basic_string<Char> get_charlit(Char ch)
0593 {
0594 std::basic_string<Char> result;
0595 boost::lexer::basic_string_token<Char>::escape_char(ch, result);
0596 return result;
0597 }
0598
0599
0600 template <typename Char>
0601 bool need_label0_0(boost::lexer::basic_state_machine<Char> const &sm_)
0602 {
0603 typedef typename boost::lexer::basic_state_machine<Char>::iterator
0604 iterator_type;
0605 iterator_type iter_ = sm_.begin();
0606 std::size_t const states_ = iter_->states;
0607
0608 for (std::size_t state_ = 0; state_ < states_; ++state_)
0609 {
0610 if (0 == iter_->bol_index || 0 == iter_->eol_index)
0611 {
0612 return true;
0613 }
0614
0615 std::size_t const transitions_ = iter_->transitions;
0616 for (std::size_t t_ = 0; t_ < transitions_; ++t_)
0617 {
0618 if (0 == iter_->goto_state)
0619 {
0620 return true;
0621 }
0622 ++iter_;
0623 }
0624 if (transitions_ == 0) ++iter_;
0625 }
0626 return false;
0627 }
0628
0629
0630 template <typename Char>
0631 bool generate_function_body_switch(std::basic_ostream<Char> & os_
0632 , boost::lexer::basic_state_machine<Char> const &sm_)
0633 {
0634 typedef typename boost::lexer::basic_state_machine<Char>::iterator
0635 iterator_type;
0636
0637 std::size_t const lookups_ = sm_.data()._lookup->front ()->size ();
0638 iterator_type iter_ = sm_.begin();
0639 iterator_type labeliter_ = iter_;
0640 iterator_type end_ = sm_.end();
0641 std::size_t const dfas_ = sm_.data()._dfa->size ();
0642
0643 os_ << " static std::size_t const npos = "
0644 "static_cast<std::size_t>(~0);\n";
0645
0646 os_ << "\n if (start_token_ == end_)\n";
0647 os_ << " {\n";
0648 os_ << " unique_id_ = npos;\n";
0649 os_ << " return 0;\n";
0650 os_ << " }\n\n";
0651
0652 if (sm_.data()._seen_BOL_assertion)
0653 {
0654 os_ << " bool bol = bol_;\n";
0655 }
0656
0657 if (dfas_ > 1)
0658 {
0659 os_ << "again:\n";
0660 }
0661
0662 os_ << " Iterator curr_ = start_token_;\n";
0663 os_ << " bool end_state_ = false;\n";
0664 os_ << " std::size_t id_ = npos;\n";
0665 os_ << " std::size_t uid_ = npos;\n";
0666
0667 if (dfas_ > 1)
0668 {
0669 os_ << " std::size_t end_start_state_ = start_state_;\n";
0670 }
0671
0672 if (sm_.data()._seen_BOL_assertion)
0673 {
0674 os_ << " bool end_bol_ = bol_;\n";
0675 }
0676
0677 os_ << " Iterator end_token_ = start_token_;\n";
0678 os_ << '\n';
0679
0680 os_ << " " << ((lookups_ == 256) ? "char" : "wchar_t")
0681 << " ch_ = 0;\n\n";
0682
0683 if (dfas_ > 1)
0684 {
0685 os_ << " switch (start_state_)\n";
0686 os_ << " {\n";
0687
0688 for (std::size_t i_ = 0; i_ < dfas_; ++i_)
0689 {
0690 os_ << " case " << i_ << ":\n";
0691 os_ << " goto state" << i_ << "_0;\n";
0692 os_ << " break;\n";
0693 }
0694
0695 os_ << " default:\n";
0696 os_ << " goto end;\n";
0697 os_ << " break;\n";
0698 os_ << " }\n";
0699 }
0700
0701 bool need_state0_0_label = need_label0_0(sm_);
0702
0703 for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_)
0704 {
0705 std::size_t const states_ = iter_->states;
0706 for (std::size_t state_ = 0; state_ < states_; ++state_)
0707 {
0708 std::size_t const transitions_ = iter_->transitions;
0709 std::size_t t_ = 0;
0710
0711 if (dfas_ > 1 || dfa_ != 0 || state_ != 0 || need_state0_0_label)
0712 {
0713 os_ << "\nstate" << dfa_ << '_' << state_ << ":\n";
0714 }
0715
0716 if (iter_->end_state)
0717 {
0718 os_ << " end_state_ = true;\n";
0719 os_ << " id_ = " << iter_->id << ";\n";
0720 os_ << " uid_ = " << iter_->unique_id << ";\n";
0721 os_ << " end_token_ = curr_;\n";
0722
0723 if (dfas_ > 1)
0724 {
0725 os_ << " end_start_state_ = " << iter_->goto_dfa <<
0726 ";\n";
0727 }
0728
0729 if (sm_.data()._seen_BOL_assertion)
0730 {
0731 os_ << " end_bol_ = bol;\n";
0732 }
0733
0734 if (transitions_) os_ << '\n';
0735 }
0736
0737 if (t_ < transitions_ ||
0738 iter_->bol_index != boost::lexer::npos ||
0739 iter_->eol_index != boost::lexer::npos)
0740 {
0741 os_ << " if (curr_ == end_) goto end;\n";
0742 os_ << " ch_ = *curr_;\n";
0743 if (iter_->bol_index != boost::lexer::npos)
0744 {
0745 os_ << "\n if (bol) goto state" << dfa_ << '_'
0746 << iter_->bol_index << ";\n";
0747 }
0748 if (iter_->eol_index != boost::lexer::npos)
0749 {
0750 os_ << "\n if (ch_ == '\\n') goto state" << dfa_
0751 << '_' << iter_->eol_index << ";\n";
0752 }
0753 os_ << " ++curr_;\n";
0754 }
0755
0756 for (; t_ < transitions_; ++t_)
0757 {
0758 Char const *ptr_ = iter_->token._charset.c_str();
0759 Char const *end2_ = ptr_ + iter_->token._charset.size();
0760 Char start_char_ = 0;
0761 Char curr_char_ = 0;
0762 bool range_ = false;
0763 bool first_char_ = true;
0764
0765 os_ << "\n if (";
0766
0767 while (ptr_ != end2_)
0768 {
0769 curr_char_ = *ptr_++;
0770
0771 if (*ptr_ == curr_char_ + 1)
0772 {
0773 if (!range_)
0774 {
0775 start_char_ = curr_char_;
0776 }
0777 range_ = true;
0778 }
0779 else
0780 {
0781 if (!first_char_)
0782 {
0783 os_ << ((iter_->token._negated) ? " && " : " || ");
0784 }
0785 else
0786 {
0787 first_char_ = false;
0788 }
0789 if (range_)
0790 {
0791 if (iter_->token._negated)
0792 {
0793 os_ << "!";
0794 }
0795 os_ << "(ch_ >= '" << get_charlit(start_char_)
0796 << "' && ch_ <= '"
0797 << get_charlit(curr_char_) << "')";
0798 range_ = false;
0799 }
0800 else
0801 {
0802 os_ << "ch_ "
0803 << ((iter_->token._negated) ? "!=" : "==")
0804 << " '" << get_charlit(curr_char_) << "'";
0805 }
0806 }
0807 }
0808
0809 os_ << ") goto state" << dfa_ << '_' << iter_->goto_state
0810 << ";\n";
0811 ++iter_;
0812 }
0813
0814 if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1))
0815 {
0816 os_ << " goto end;\n";
0817 }
0818
0819 if (transitions_ == 0) ++iter_;
0820 }
0821 }
0822
0823 os_ << "\nend:\n";
0824 os_ << " if (end_state_)\n";
0825 os_ << " {\n";
0826 os_ << " // return longest match\n";
0827 os_ << " start_token_ = end_token_;\n";
0828
0829 if (dfas_ > 1)
0830 {
0831 os_ << " start_state_ = end_start_state_;\n";
0832 os_ << "\n if (id_ == 0)\n";
0833 os_ << " {\n";
0834
0835 if (sm_.data()._seen_BOL_assertion)
0836 {
0837 os_ << " bol = end_bol_;\n";
0838 }
0839
0840 os_ << " goto again;\n";
0841 os_ << " }\n";
0842
0843 if (sm_.data()._seen_BOL_assertion)
0844 {
0845 os_ << " else\n";
0846 os_ << " {\n";
0847 os_ << " bol_ = end_bol_;\n";
0848 os_ << " }\n";
0849 }
0850 }
0851 else if (sm_.data()._seen_BOL_assertion)
0852 {
0853 os_ << " bol_ = end_bol_;\n";
0854 }
0855
0856 os_ << " }\n";
0857 os_ << " else\n";
0858 os_ << " {\n";
0859
0860 if (sm_.data()._seen_BOL_assertion)
0861 {
0862 os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n";
0863 }
0864 os_ << " id_ = npos;\n";
0865 os_ << " uid_ = npos;\n";
0866 os_ << " }\n\n";
0867
0868 os_ << " unique_id_ = uid_;\n";
0869 os_ << " return id_;\n";
0870 return os_.good();
0871 }
0872
0873
0874
0875 template <typename Char, typename F>
0876 inline bool
0877 generate_cpp (boost::lexer::basic_state_machine<Char> const& sm_
0878 , boost::lexer::basic_rules<Char> const& rules_
0879 , std::basic_ostream<Char> &os_, Char const* name_suffix
0880 , F generate_function_body)
0881 {
0882 if (sm_.data()._lookup->empty())
0883 return false;
0884
0885 std::size_t const dfas_ = sm_.data()._dfa->size();
0886
0887
0888 os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
0889 os_ << "// Copyright (c) 2008-2011 Hartmut Kaiser\n";
0890 os_ << "//\n";
0891 os_ << "// Distributed under the Boost Software License, "
0892 "Version 1.0. (See accompanying\n";
0893 os_ << "// file licence_1_0.txt or copy at "
0894 "http://www.boost.org/LICENSE_1_0.txt)\n\n";
0895 os_ << "// Auto-generated by boost::lexer, do not edit\n\n";
0896
0897 std::basic_string<Char> guard(name_suffix);
0898 guard += L<Char>(name_suffix[0] ? "_" : "");
0899 guard += L<Char>(__DATE__ "_" __TIME__);
0900 typename std::basic_string<Char>::size_type p =
0901 guard.find_first_of(L<Char>(": "));
0902 while (std::string::npos != p)
0903 {
0904 guard.replace(p, 1, L<Char>("_"));
0905 p = guard.find_first_of(L<Char>(": "), p);
0906 }
0907 {
0908 typedef std::ctype<Char> facet_t;
0909 facet_t const& facet = std::use_facet<facet_t>(std::locale());
0910 typedef typename std::basic_string<Char>::iterator iter_t;
0911 for (iter_t iter = guard.begin(),
0912 last = guard.end(); iter != last; ++iter)
0913 *iter = facet.toupper(*iter);
0914 }
0915
0916 os_ << "#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << ")\n";
0917 os_ << "#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << "\n\n";
0918
0919 os_ << "#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>\n\n";
0920
0921 generate_delimiter(os_);
0922 os_ << "// the generated table of state names and the tokenizer have to be\n"
0923 "// defined in the boost::spirit::lex::lexertl::static_ namespace\n";
0924 os_ << "namespace boost { namespace spirit { namespace lex { "
0925 "namespace lexertl { namespace static_ {\n\n";
0926
0927
0928 if (!generate_cpp_state_info(rules_, os_, name_suffix))
0929 return false;
0930
0931 generate_delimiter(os_);
0932 os_ << "// this function returns the next matched token\n";
0933 os_ << "template<typename Iterator>\n";
0934 os_ << "std::size_t next_token" << (name_suffix[0] ? "_" : "")
0935 << name_suffix << " (";
0936
0937 if (dfas_ > 1)
0938 {
0939 os_ << "std::size_t& start_state_, ";
0940 }
0941 else
0942 {
0943 os_ << "std::size_t& /*start_state_*/, ";
0944 }
0945 if (sm_.data()._seen_BOL_assertion)
0946 {
0947 os_ << "bool& bol_, ";
0948 }
0949 else
0950 {
0951 os_ << "bool& /*bol_*/, ";
0952 }
0953 os_ << "\n ";
0954
0955 os_ << "Iterator &start_token_, Iterator const& end_, ";
0956 os_ << "std::size_t& unique_id_)\n";
0957 os_ << "{\n";
0958 if (!generate_function_body(os_, sm_))
0959 return false;
0960 os_ << "}\n\n";
0961
0962 if (!generate_cpp_state_table<Char>(os_, name_suffix
0963 , sm_.data()._seen_BOL_assertion, sm_.data()._seen_EOL_assertion))
0964 {
0965 return false;
0966 }
0967
0968 os_ << "}}}}} // namespace boost::spirit::lex::lexertl::static_\n\n";
0969
0970 os_ << "#endif\n";
0971
0972 return os_.good();
0973 }
0974
0975 }
0976
0977
0978 template <typename Lexer, typename F>
0979 inline bool
0980 generate_static(Lexer const& lexer
0981 , std::basic_ostream<typename Lexer::char_type>& os
0982 , typename Lexer::char_type const* name_suffix, F f)
0983 {
0984 if (!lexer.init_dfa(true))
0985 return false;
0986 return detail::generate_cpp(lexer.state_machine_, lexer.rules_, os
0987 , name_suffix, f);
0988 }
0989
0990
0991
0992
0993 template <typename Lexer>
0994 inline bool
0995 generate_static(Lexer const& lexer
0996 , std::basic_ostream<typename Lexer::char_type>& os
0997 , typename Lexer::char_type const* name_suffix =
0998 detail::L<typename Lexer::char_type>())
0999 {
1000 return generate_static(lexer, os, name_suffix
1001 , &detail::generate_function_body_dfa<typename Lexer::char_type>);
1002 }
1003
1004
1005 template <typename Lexer>
1006 inline bool
1007 generate_static_dfa(Lexer const& lexer
1008 , std::basic_ostream<typename Lexer::char_type>& os
1009 , typename Lexer::char_type const* name_suffix =
1010 detail::L<typename Lexer::char_type>())
1011 {
1012 return generate_static(lexer, os, name_suffix
1013 , &detail::generate_function_body_dfa<typename Lexer::char_type>);
1014 }
1015
1016
1017 template <typename Lexer>
1018 inline bool
1019 generate_static_switch(Lexer const& lexer
1020 , std::basic_ostream<typename Lexer::char_type>& os
1021 , typename Lexer::char_type const* name_suffix =
1022 detail::L<typename Lexer::char_type>())
1023 {
1024 return generate_static(lexer, os, name_suffix
1025 , &detail::generate_function_body_switch<typename Lexer::char_type>);
1026 }
1027
1028
1029 }}}}
1030
1031 #endif