File indexing completed on 2025-01-19 09:47:49
0001
0002
0003
0004
0005
0006 #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_GENERATE_CPP_HPP
0007 #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_GENERATE_CPP_HPP
0008
0009 #include "char_traits.hpp"
0010 #include "consts.hpp"
0011 #include "internals.hpp"
0012 #include "runtime_error.hpp"
0013 #include "size_t.hpp"
0014 #include "state_machine.hpp"
0015 #include <iosfwd>
0016 #include <vector>
0017
0018 namespace boost
0019 {
0020 namespace lexer
0021 {
0022 template<typename CharT>
0023 void generate_cpp (const basic_state_machine<CharT> &state_machine_,
0024 std::ostream &os_, const bool use_pointers_ = false,
0025 const bool skip_unknown_ = true, const bool optimise_parameters_ = true,
0026 const char *name_ = "next_token")
0027 {
0028 const detail::internals &sm_ = state_machine_.data ();
0029
0030 if (sm_._lookup->size () == 0)
0031 {
0032 throw runtime_error ("Cannot generate code from an empty "
0033 "state machine");
0034 }
0035
0036 std::string upper_name_ (__DATE__);
0037 const std::size_t lookups_ = sm_._lookup->front ()->size ();
0038 const std::size_t dfas_ = sm_._dfa->size ();
0039 std::string::size_type pos_ = upper_name_.find (' ');
0040 const char *iterator_ = 0;
0041
0042 if (use_pointers_)
0043 {
0044 if (lookups_ == 256)
0045 {
0046 iterator_ = "const char *";
0047 }
0048 else
0049 {
0050 iterator_ = "const wchar_t *";
0051 }
0052 }
0053 else
0054 {
0055 iterator_ = "Iterator &";
0056 }
0057
0058 while (pos_ != std::string::npos)
0059 {
0060 upper_name_.replace (pos_, 1, "_");
0061 pos_ = upper_name_.find (' ', pos_);
0062 }
0063
0064 upper_name_ += '_';
0065 upper_name_ += __TIME__;
0066
0067 pos_ = upper_name_.find (':');
0068
0069 while (pos_ != std::string::npos)
0070 {
0071 upper_name_.erase (pos_, 1);
0072 pos_ = upper_name_.find (':', pos_);
0073 }
0074
0075 upper_name_ = '_' + upper_name_;
0076 upper_name_ = name_ + upper_name_;
0077 std::transform (upper_name_.begin (), upper_name_.end (),
0078 upper_name_.begin (), ::toupper);
0079 os_ << "#ifndef " << upper_name_ + '\n';
0080 os_ << "#define " << upper_name_ + '\n';
0081 os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
0082 os_ << "//\n";
0083 os_ << "// Distributed under the Boost Software License, "
0084 "Version 1.0. (See accompanying\n";
0085 os_ << "// file licence_1_0.txt or copy at "
0086 "http://www.boost.org/LICENSE_1_0.txt)\n\n";
0087 os_ << "// Auto-generated by boost::lexer\n";
0088 os_ << "template<typename Iterator>\n";
0089 os_ << "std::size_t " << name_ << " (";
0090
0091 if (dfas_ > 1 || !optimise_parameters_)
0092 {
0093 os_ << "std::size_t &start_state_, ";
0094 }
0095
0096 if (use_pointers_)
0097 {
0098 os_ << iterator_ << " &";
0099 }
0100 else
0101 {
0102 os_ << iterator_;
0103 }
0104
0105 os_ << "start_token_, ";
0106
0107 if (use_pointers_)
0108 {
0109 os_ << iterator_ << " const ";
0110 }
0111 else
0112 {
0113 os_ << "const " << iterator_;
0114 }
0115
0116 os_ << "end_, \n";
0117 os_ << " std::size_t &unique_id_";
0118
0119 if (sm_._seen_BOL_assertion || !optimise_parameters_)
0120 {
0121 os_ << ", bool &beg_of_line_";
0122 }
0123
0124 os_ << ")\n";
0125 os_ << "{\n";
0126 os_ << " enum {end_state_index, id_index, unique_id_index, state_index, bol_index,\n";
0127 os_ << " eol_index, dead_state_index, dfa_offset};\n";
0128 os_ << " static const std::size_t npos = static_cast"
0129 "<std::size_t>(~0);\n";
0130
0131 if (dfas_ > 1)
0132 {
0133 std::size_t state_ = 0;
0134
0135 for (; state_ < dfas_; ++state_)
0136 {
0137 std::size_t i_ = 0;
0138 std::size_t j_ = 1;
0139 std::size_t count_ = lookups_ / 8;
0140 const std::size_t *lookup_ = &sm_._lookup[state_]->front ();
0141 const std::size_t *dfa_ = &sm_._dfa[state_]->front ();
0142
0143 os_ << " static const std::size_t lookup" << state_ << "_[" <<
0144 lookups_ << "] = {";
0145
0146 for (; i_ < count_; ++i_)
0147 {
0148 const std::size_t index_ = i_ * 8;
0149
0150 os_ << lookup_[index_];
0151
0152 for (; j_ < 8; ++j_)
0153 {
0154 os_ << ", " << lookup_[index_ + j_];
0155 }
0156
0157 if (i_ < count_ - 1)
0158 {
0159 os_ << "," << std::endl << " ";
0160 }
0161
0162 j_ = 1;
0163 }
0164
0165 os_ << "};\n";
0166 count_ = sm_._dfa[state_]->size ();
0167 os_ << " static const std::size_t dfa" << state_ << "_[" <<
0168 count_ << "] = {";
0169 count_ /= 8;
0170
0171 for (i_ = 0; i_ < count_; ++i_)
0172 {
0173 const std::size_t index_ = i_ * 8;
0174
0175 os_ << dfa_[index_];
0176
0177 for (j_ = 1; j_ < 8; ++j_)
0178 {
0179 os_ << ", " << dfa_[index_ + j_];
0180 }
0181
0182 if (i_ < count_ - 1)
0183 {
0184 os_ << "," << std::endl << " ";
0185 }
0186 }
0187
0188 const std::size_t mod_ = sm_._dfa[state_]->size () % 8;
0189
0190 if (mod_)
0191 {
0192 const std::size_t index_ = count_ * 8;
0193
0194 if (count_)
0195 {
0196 os_ << ",\n ";
0197 }
0198
0199 os_ << dfa_[index_];
0200
0201 for (j_ = 1; j_ < mod_; ++j_)
0202 {
0203 os_ << ", " << dfa_[index_ + j_];
0204 }
0205 }
0206
0207 os_ << "};\n";
0208 }
0209
0210 std::size_t count_ = sm_._dfa_alphabet.size ();
0211 std::size_t i_ = 1;
0212
0213 os_ << " static const std::size_t *lookup_arr_[" << count_ <<
0214 "] = {";
0215 os_ << "lookup0_";
0216
0217 for (i_ = 1; i_ < count_; ++i_)
0218 {
0219 os_ << ", " << "lookup" << i_ << "_";
0220 }
0221
0222 os_ << "};\n";
0223 os_ << " static const std::size_t dfa_alphabet_arr_[" << count_ <<
0224 "] = {";
0225 os_ << sm_._dfa_alphabet.front ();
0226
0227 for (i_ = 1; i_ < count_; ++i_)
0228 {
0229 os_ << ", " << sm_._dfa_alphabet[i_];
0230 }
0231
0232 os_ << "};\n";
0233 os_ << " static const std::size_t *dfa_arr_[" << count_ <<
0234 "] = {";
0235 os_ << "dfa0_";
0236
0237 for (i_ = 1; i_ < count_; ++i_)
0238 {
0239 os_ << ", " << "dfa" << i_ << "_";
0240 }
0241
0242 os_ << "};\n";
0243 }
0244 else
0245 {
0246 const std::size_t *lookup_ = &sm_._lookup->front ()->front ();
0247 const std::size_t *dfa_ = &sm_._dfa->front ()->front ();
0248 std::size_t i_ = 0;
0249 std::size_t j_ = 1;
0250 std::size_t count_ = lookups_ / 8;
0251
0252 os_ << " static const std::size_t lookup_[";
0253 os_ << sm_._lookup->front ()->size () << "] = {";
0254
0255 for (; i_ < count_; ++i_)
0256 {
0257 const std::size_t index_ = i_ * 8;
0258
0259 os_ << lookup_[index_];
0260
0261 for (; j_ < 8; ++j_)
0262 {
0263 os_ << ", " << lookup_[index_ + j_];
0264 }
0265
0266 if (i_ < count_ - 1)
0267 {
0268 os_ << "," << std::endl << " ";
0269 }
0270
0271 j_ = 1;
0272 }
0273
0274 os_ << "};\n";
0275 os_ << " static const std::size_t dfa_alphabet_ = " <<
0276 sm_._dfa_alphabet.front () << ";\n";
0277 os_ << " static const std::size_t dfa_[" <<
0278 sm_._dfa->front ()->size () << "] = {";
0279 count_ = sm_._dfa->front ()->size () / 8;
0280
0281 for (i_ = 0; i_ < count_; ++i_)
0282 {
0283 const std::size_t index_ = i_ * 8;
0284
0285 os_ << dfa_[index_];
0286
0287 for (j_ = 1; j_ < 8; ++j_)
0288 {
0289 os_ << ", " << dfa_[index_ + j_];
0290 }
0291
0292 if (i_ < count_ - 1)
0293 {
0294 os_ << "," << std::endl << " ";
0295 }
0296 }
0297
0298 const std::size_t mod_ = sm_._dfa->front ()->size () % 8;
0299
0300 if (mod_)
0301 {
0302 const std::size_t index_ = count_ * 8;
0303
0304 if (count_)
0305 {
0306 os_ << ",\n ";
0307 }
0308
0309 os_ << dfa_[index_];
0310
0311 for (j_ = 1; j_ < mod_; ++j_)
0312 {
0313 os_ << ", " << dfa_[index_ + j_];
0314 }
0315 }
0316
0317 os_ << "};\n";
0318 }
0319
0320 os_ << "\n if (start_token_ == end_)\n";
0321 os_ << " {\n";
0322 os_ << " unique_id_ = npos;\n";
0323 os_ << " return 0;\n";
0324 os_ << " }\n\n";
0325
0326 if (dfas_ > 1)
0327 {
0328 os_ << "again:\n";
0329 os_ << " const std::size_t * lookup_ = "
0330 "lookup_arr_[start_state_];\n";
0331 os_ << " std::size_t dfa_alphabet_ = "
0332 "dfa_alphabet_arr_[start_state_];\n";
0333 os_ << " const std::size_t *dfa_ = dfa_arr_[start_state_];\n";
0334 }
0335
0336 os_ << " const std::size_t *ptr_ = dfa_ + dfa_alphabet_;\n";
0337 os_ << " Iterator curr_ = start_token_;\n";
0338 os_ << " bool end_state_ = *ptr_ != 0;\n";
0339 os_ << " std::size_t id_ = *(ptr_ + id_index);\n";
0340 os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n";
0341
0342 if (dfas_ > 1)
0343 {
0344 os_ << " std::size_t end_start_state_ = start_state_;\n";
0345 }
0346
0347 if (sm_._seen_BOL_assertion)
0348 {
0349 os_ << " bool bol_ = beg_of_line_;\n";
0350 os_ << " bool end_bol_ = bol_;\n";
0351 }
0352
0353 os_ << " Iterator end_token_ = start_token_;\n";
0354 os_ << '\n';
0355 os_ << " while (curr_ != end_)\n";
0356 os_ << " {\n";
0357
0358 if (sm_._seen_BOL_assertion)
0359 {
0360 os_ << " const std::size_t BOL_state_ = ptr_[bol_index];\n";
0361 }
0362
0363 if (sm_._seen_EOL_assertion)
0364 {
0365 os_ << " const std::size_t EOL_state_ = ptr_[eol_index];\n";
0366 }
0367
0368 if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion)
0369 {
0370 os_ << '\n';
0371 }
0372
0373 if (sm_._seen_BOL_assertion)
0374 {
0375 os_ << " if (BOL_state_ && bol_)\n";
0376 os_ << " {\n";
0377 os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
0378 os_ << " }\n";
0379 }
0380
0381 if (sm_._seen_EOL_assertion)
0382 {
0383 os_ << " ";
0384
0385 if (sm_._seen_BOL_assertion)
0386 {
0387 os_ << "else ";
0388 }
0389
0390 os_ << "if (EOL_state_ && *curr_ == '\\n')\n";
0391 os_ << " {\n";
0392 os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
0393 os_ << " }\n";
0394 }
0395
0396 std::string tab_ (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion ? " " : "");
0397
0398 if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion)
0399 {
0400 os_ << " else\n";
0401 os_ << " {\n";
0402 }
0403
0404 if (sm_._seen_BOL_assertion)
0405 {
0406 os_ << " ";
0407
0408 if (lookups_ == 256)
0409 {
0410 os_ << "char";
0411 }
0412 else
0413 {
0414 os_ << "wchar_t";
0415 }
0416
0417 os_ << " prev_char_ = *curr_++;\n\n";
0418 os_ << " bol_ = prev_char_ == '\\n';\n\n";
0419 }
0420
0421 os_ << tab_;
0422 os_ << " const std::size_t state_ =\n";
0423 os_ << tab_;
0424 os_ << " ptr_[lookup_[";
0425
0426 if (lookups_ == 256)
0427 {
0428 os_ << "static_cast<unsigned char>(";
0429 }
0430
0431 if (sm_._seen_BOL_assertion)
0432 {
0433 os_ << "prev_char";
0434 }
0435 else
0436 {
0437 os_ << "*curr_++";
0438 }
0439
0440
0441 if (lookups_ == 256)
0442 {
0443 os_ << ')';
0444 }
0445
0446 os_ << "]];\n\n";
0447
0448 os_ << tab_;
0449 os_ << " if (state_ == 0) break;\n\n";
0450 os_ << tab_;
0451 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
0452
0453 if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion)
0454 {
0455 os_ << " }\n";
0456 }
0457
0458 os_ << '\n';
0459 os_ << " if (*ptr_)\n";
0460 os_ << " {\n";
0461 os_ << " end_state_ = true;\n";
0462 os_ << " id_ = *(ptr_ + id_index);\n";
0463 os_ << " uid_ = *(ptr_ + unique_id_index);\n";
0464
0465 if (dfas_ > 1)
0466 {
0467 os_ << " end_start_state_ = *(ptr_ + state_index);\n";
0468 }
0469
0470 if (sm_._seen_BOL_assertion)
0471 {
0472 os_ << " end_bol_ = bol_;\n";
0473 }
0474
0475 os_ << " end_token_ = curr_;\n";
0476 os_ << " }\n";
0477 os_ << " }\n";
0478 os_ << '\n';
0479
0480 if (sm_._seen_EOL_assertion)
0481 {
0482 os_ << " const std::size_t EOL_state_ = ptr_[eol_index];\n";
0483 os_ << '\n';
0484 os_ << " if (EOL_state_ && curr_ == end_)\n";
0485 os_ << " {\n";
0486 os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
0487 os_ << '\n';
0488 os_ << " if (*ptr_)\n";
0489 os_ << " {\n";
0490 os_ << " end_state_ = true;\n";
0491 os_ << " id_ = *(ptr_ + id_index);\n";
0492 os_ << " uid_ = *(ptr_ + unique_id_index);\n";
0493
0494 if (dfas_ > 1)
0495 {
0496 os_ << " end_start_state_ = *(ptr_ + state_index);\n";
0497 }
0498
0499 if (sm_._seen_BOL_assertion)
0500 {
0501 os_ << " end_bol_ = bol_;\n";
0502 }
0503
0504 os_ << " end_token_ = curr_;\n";
0505 os_ << " }\n";
0506 os_ << " }\n";
0507 os_ << '\n';
0508 }
0509
0510 os_ << " if (end_state_)\n";
0511 os_ << " {\n";
0512 os_ << " // return longest match\n";
0513
0514 if (dfas_ > 1)
0515 {
0516 os_ << " start_state_ = end_start_state_;\n";
0517 }
0518
0519 if (sm_._seen_BOL_assertion && dfas_ < 2)
0520 {
0521 os_ << " beg_of_line_ = end_bol_;\n";
0522 }
0523
0524 os_ << " start_token_ = end_token_;\n";
0525
0526 if (dfas_ > 1)
0527 {
0528 os_ << '\n';
0529 os_ << " if (id_ == 0)\n";
0530 os_ << " {\n";
0531
0532 if (sm_._seen_BOL_assertion)
0533 {
0534 os_ << " bol_ = end_bol_;\n";
0535 }
0536
0537 os_ << " goto again;\n";
0538 os_ << " }\n";
0539
0540 if (sm_._seen_BOL_assertion)
0541 {
0542 os_ << " else\n";
0543 os_ << " {\n";
0544 os_ << " beg_of_line_ = end_bol_;\n";
0545 os_ << " }\n";
0546 }
0547 }
0548
0549 os_ << " }\n";
0550 os_ << " else\n";
0551 os_ << " {\n";
0552
0553 if (sm_._seen_BOL_assertion)
0554 {
0555 os_ << " beg_of_line_ = *start_token_ == '\\n';\n";
0556 }
0557
0558 if (skip_unknown_)
0559 {
0560 os_ << " // No match causes char to be skipped\n";
0561 os_ << " ++start_token_;\n";
0562 }
0563
0564 os_ << " id_ = npos;\n";
0565 os_ << " uid_ = npos;\n";
0566 os_ << " }\n";
0567 os_ << '\n';
0568 os_ << " unique_id_ = uid_;\n";
0569 os_ << " return id_;\n";
0570 os_ << "}\n";
0571 os_ << "\n#endif\n";
0572 }
0573 }
0574 }
0575
0576 #endif