Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-19 09:47:49

0001 // generate_cpp.hpp
0002 // Copyright (c) 2008-2009 Ben Hanson (http://www.benhanson.net/)
0003 //
0004 // Distributed under the Boost Software License, Version 1.0. (See accompanying
0005 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0006 #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_GENERATE_CPP_HPP
0007 #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_GENERATE_CPP_HPP
0008 
0009 #include "char_traits.hpp"
0010 #include "consts.hpp"
0011 #include "internals.hpp"
0012 #include "runtime_error.hpp"
0013 #include "size_t.hpp"
0014 #include "state_machine.hpp"
0015 #include <iosfwd>
0016 #include <vector>
0017 
0018 namespace boost
0019 {
0020 namespace lexer
0021 {
0022 template<typename CharT>
0023 void generate_cpp (const basic_state_machine<CharT> &state_machine_,
0024     std::ostream &os_, const bool use_pointers_ = false,
0025     const bool skip_unknown_ = true, const bool optimise_parameters_ = true,
0026     const char *name_ = "next_token")
0027 {
0028     const detail::internals &sm_ = state_machine_.data ();
0029 
0030     if (sm_._lookup->size () == 0)
0031     {
0032         throw runtime_error ("Cannot generate code from an empty "
0033             "state machine");
0034     }
0035 
0036     std::string upper_name_ (__DATE__);
0037     const std::size_t lookups_ = sm_._lookup->front ()->size ();
0038     const std::size_t dfas_ = sm_._dfa->size ();
0039     std::string::size_type pos_ = upper_name_.find (' ');
0040     const char *iterator_ = 0;
0041 
0042     if (use_pointers_)
0043     {
0044         if (lookups_ == 256)
0045         {
0046             iterator_ = "const char *";
0047         }
0048         else
0049         {
0050             iterator_ = "const wchar_t *";
0051         }
0052     }
0053     else
0054     {
0055         iterator_ = "Iterator &";
0056     }
0057 
0058     while (pos_ != std::string::npos)
0059     {
0060         upper_name_.replace (pos_, 1, "_");
0061         pos_ = upper_name_.find (' ', pos_);
0062     }
0063 
0064     upper_name_ += '_';
0065     upper_name_ +=  __TIME__;
0066 
0067     pos_ = upper_name_.find (':');
0068 
0069     while (pos_ != std::string::npos)
0070     {
0071         upper_name_.erase (pos_, 1);
0072         pos_ = upper_name_.find (':', pos_);
0073     }
0074 
0075     upper_name_ = '_' + upper_name_;
0076     upper_name_ = name_ + upper_name_;
0077     std::transform (upper_name_.begin (), upper_name_.end (),
0078         upper_name_.begin (), ::toupper);
0079     os_ << "#ifndef " << upper_name_ + '\n';
0080     os_ << "#define " << upper_name_ + '\n';
0081     os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
0082     os_ << "//\n";
0083     os_ << "// Distributed under the Boost Software License, "
0084         "Version 1.0. (See accompanying\n";
0085     os_ << "// file licence_1_0.txt or copy at "
0086         "http://www.boost.org/LICENSE_1_0.txt)\n\n";
0087     os_ << "// Auto-generated by boost::lexer\n";
0088     os_ << "template<typename Iterator>\n";
0089     os_ << "std::size_t " << name_  << " (";
0090 
0091     if (dfas_ > 1 || !optimise_parameters_)
0092     {
0093         os_ << "std::size_t &start_state_, ";
0094     }
0095 
0096     if (use_pointers_)
0097     {
0098         os_ << iterator_ << " &";
0099     }
0100     else
0101     {
0102         os_ << iterator_;
0103     }
0104 
0105     os_ << "start_token_, ";
0106 
0107     if (use_pointers_)
0108     {
0109         os_ << iterator_ << " const ";
0110     }
0111     else
0112     {
0113         os_ << "const " << iterator_;
0114     }
0115 
0116     os_ << "end_, \n";
0117     os_ << "    std::size_t &unique_id_";
0118 
0119     if (sm_._seen_BOL_assertion || !optimise_parameters_)
0120     {
0121         os_ << ", bool &beg_of_line_";
0122     }
0123 
0124     os_ << ")\n";
0125     os_ << "{\n";
0126     os_ << "    enum {end_state_index, id_index, unique_id_index, state_index, bol_index,\n";
0127     os_ << "        eol_index, dead_state_index, dfa_offset};\n";
0128     os_ << "    static const std::size_t npos = static_cast"
0129         "<std::size_t>(~0);\n";
0130 
0131     if (dfas_ > 1)
0132     {
0133         std::size_t state_ = 0;
0134 
0135         for (; state_ < dfas_; ++state_)
0136         {
0137             std::size_t i_ = 0;
0138             std::size_t j_ = 1;
0139             std::size_t count_ = lookups_ / 8;
0140             const std::size_t *lookup_ = &sm_._lookup[state_]->front ();
0141             const std::size_t *dfa_ = &sm_._dfa[state_]->front ();
0142 
0143             os_ << "    static const std::size_t lookup" << state_ << "_[" <<
0144                 lookups_ << "] = {";
0145 
0146             for (; i_ < count_; ++i_)
0147             {
0148                 const std::size_t index_ = i_ * 8;
0149 
0150                 os_ << lookup_[index_];
0151 
0152                 for (; j_ < 8; ++j_)
0153                 {
0154                     os_ << ", " << lookup_[index_ + j_];
0155                 }
0156 
0157                 if (i_ < count_ - 1)
0158                 {
0159                     os_ << "," << std::endl << "        ";
0160                 }
0161 
0162                 j_ = 1;
0163             }
0164 
0165             os_ << "};\n";
0166             count_ = sm_._dfa[state_]->size ();
0167             os_ << "    static const std::size_t dfa" << state_ << "_[" <<
0168                 count_ << "] = {";
0169             count_ /= 8;
0170 
0171             for (i_ = 0; i_ < count_; ++i_)
0172             {
0173                 const std::size_t index_ = i_ * 8;
0174 
0175                 os_ << dfa_[index_];
0176 
0177                 for (j_ = 1; j_ < 8; ++j_)
0178                 {
0179                     os_ << ", " << dfa_[index_ + j_];
0180                 }
0181 
0182                 if (i_ < count_ - 1)
0183                 {
0184                     os_ << "," << std::endl << "        ";
0185                 }
0186             }
0187 
0188             const std::size_t mod_ = sm_._dfa[state_]->size () % 8;
0189 
0190             if (mod_)
0191             {
0192                 const std::size_t index_ = count_ * 8;
0193 
0194                 if (count_)
0195                 {
0196                     os_ << ",\n        ";
0197                 }
0198 
0199                 os_ << dfa_[index_];
0200 
0201                 for (j_ = 1; j_ < mod_; ++j_)
0202                 {
0203                     os_ << ", " << dfa_[index_ + j_];
0204                 }
0205             }
0206 
0207             os_ << "};\n";
0208         }
0209 
0210         std::size_t count_ = sm_._dfa_alphabet.size ();
0211         std::size_t i_ = 1;
0212 
0213         os_ << "    static const std::size_t *lookup_arr_[" << count_ <<
0214             "] = {";
0215         os_ << "lookup0_";
0216 
0217         for (i_ = 1; i_ < count_; ++i_)
0218         {
0219             os_ << ", " << "lookup" << i_ << "_";
0220         }
0221 
0222         os_ << "};\n";
0223         os_ << "    static const std::size_t dfa_alphabet_arr_[" << count_ <<
0224             "] = {";
0225         os_ << sm_._dfa_alphabet.front ();
0226 
0227         for (i_ = 1; i_ < count_; ++i_)
0228         {
0229             os_ << ", " << sm_._dfa_alphabet[i_];
0230         }
0231 
0232         os_ << "};\n";
0233         os_ << "    static const std::size_t *dfa_arr_[" << count_ <<
0234             "] = {";
0235         os_ << "dfa0_";
0236 
0237         for (i_ = 1; i_ < count_; ++i_)
0238         {
0239             os_ << ", " << "dfa" << i_ << "_";
0240         }
0241 
0242         os_ << "};\n";
0243     }
0244     else
0245     {
0246         const std::size_t *lookup_ = &sm_._lookup->front ()->front ();
0247         const std::size_t *dfa_ = &sm_._dfa->front ()->front ();
0248         std::size_t i_ = 0;
0249         std::size_t j_ = 1;
0250         std::size_t count_ = lookups_ / 8;
0251 
0252         os_ << "    static const std::size_t lookup_[";
0253         os_ << sm_._lookup->front ()->size () << "] = {";
0254 
0255         for (; i_ < count_; ++i_)
0256         {
0257             const std::size_t index_ = i_ * 8;
0258 
0259             os_ << lookup_[index_];
0260 
0261             for (; j_ < 8; ++j_)
0262             {
0263                 os_ << ", " << lookup_[index_ + j_];
0264             }
0265 
0266             if (i_ < count_ - 1)
0267             {
0268                 os_ << "," << std::endl << "        ";
0269             }
0270 
0271             j_ = 1;
0272         }
0273 
0274         os_ << "};\n";
0275         os_ << "    static const std::size_t dfa_alphabet_ = " <<
0276             sm_._dfa_alphabet.front () << ";\n";
0277         os_ << "    static const std::size_t dfa_[" <<
0278             sm_._dfa->front ()->size () << "] = {";
0279         count_ = sm_._dfa->front ()->size () / 8;
0280 
0281         for (i_ = 0; i_ < count_; ++i_)
0282         {
0283             const std::size_t index_ = i_ * 8;
0284 
0285             os_ << dfa_[index_];
0286 
0287             for (j_ = 1; j_ < 8; ++j_)
0288             {
0289                 os_ << ", " << dfa_[index_ + j_];
0290             }
0291 
0292             if (i_ < count_ - 1)
0293             {
0294                 os_ << "," << std::endl << "        ";
0295             }
0296         }
0297 
0298         const std::size_t mod_ = sm_._dfa->front ()->size () % 8;
0299 
0300         if (mod_)
0301         {
0302             const std::size_t index_ = count_ * 8;
0303 
0304             if (count_)
0305             {
0306                 os_ << ",\n        ";
0307             }
0308 
0309             os_ << dfa_[index_];
0310 
0311             for (j_ = 1; j_ < mod_; ++j_)
0312             {
0313                 os_ << ", " << dfa_[index_ + j_];
0314             }
0315         }
0316 
0317         os_ << "};\n";
0318     }
0319 
0320     os_ << "\n    if (start_token_ == end_)\n";
0321     os_ << "    {\n";
0322     os_ << "        unique_id_ = npos;\n";
0323     os_ << "        return 0;\n";
0324     os_ << "    }\n\n";
0325 
0326     if (dfas_ > 1)
0327     {
0328         os_ << "again:\n";
0329         os_ << "    const std::size_t * lookup_ = "
0330             "lookup_arr_[start_state_];\n";
0331         os_ << "    std::size_t dfa_alphabet_ = "
0332             "dfa_alphabet_arr_[start_state_];\n";
0333         os_ << "    const std::size_t *dfa_ = dfa_arr_[start_state_];\n";
0334     }
0335 
0336     os_ << "    const std::size_t *ptr_ = dfa_ + dfa_alphabet_;\n";
0337     os_ << "    Iterator curr_ = start_token_;\n";
0338     os_ << "    bool end_state_ = *ptr_ != 0;\n";
0339     os_ << "    std::size_t id_ = *(ptr_ + id_index);\n";
0340     os_ << "    std::size_t uid_ = *(ptr_ + unique_id_index);\n";
0341 
0342     if (dfas_ > 1)
0343     {
0344         os_ << "    std::size_t end_start_state_ = start_state_;\n";
0345     }
0346 
0347     if (sm_._seen_BOL_assertion)
0348     {
0349         os_ << "    bool bol_ = beg_of_line_;\n";
0350         os_ << "    bool end_bol_ = bol_;\n";
0351     }
0352 
0353     os_ << "    Iterator end_token_ = start_token_;\n";
0354     os_ << '\n';
0355     os_ << "    while (curr_ != end_)\n";
0356     os_ << "    {\n";
0357 
0358     if (sm_._seen_BOL_assertion)
0359     {
0360         os_ << "        const std::size_t BOL_state_ = ptr_[bol_index];\n";
0361     }
0362 
0363     if (sm_._seen_EOL_assertion)
0364     {
0365         os_ << "        const std::size_t EOL_state_ = ptr_[eol_index];\n";
0366     }
0367 
0368     if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion)
0369     {
0370         os_ << '\n';
0371     }
0372 
0373     if (sm_._seen_BOL_assertion)
0374     {
0375         os_ << "        if (BOL_state_ && bol_)\n";
0376         os_ << "        {\n";
0377         os_ << "            ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
0378         os_ << "        }\n";
0379     }
0380 
0381     if (sm_._seen_EOL_assertion)
0382     {
0383         os_ << "        ";
0384 
0385         if (sm_._seen_BOL_assertion)
0386         {
0387             os_ << "else ";
0388         }
0389 
0390         os_ << "if (EOL_state_ && *curr_ == '\\n')\n";
0391         os_ << "        {\n";
0392         os_ << "            ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
0393         os_ << "        }\n";
0394     }
0395 
0396     std::string tab_ (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion ? "    " : "");
0397 
0398     if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion)
0399     {
0400         os_ << "        else\n";
0401         os_ << "        {\n";
0402     }
0403 
0404     if (sm_._seen_BOL_assertion)
0405     {
0406         os_ << "            ";
0407 
0408         if (lookups_ == 256)
0409         {
0410             os_ << "char";
0411         }
0412         else
0413         {
0414             os_ << "wchar_t";
0415         }
0416 
0417         os_ << " prev_char_ = *curr_++;\n\n";
0418         os_ << "            bol_ = prev_char_ == '\\n';\n\n";
0419     }
0420 
0421     os_ << tab_;
0422     os_ << "        const std::size_t state_ =\n";
0423     os_ << tab_;
0424     os_ << "            ptr_[lookup_[";
0425 
0426     if (lookups_ == 256)
0427     {
0428         os_ << "static_cast<unsigned char>(";
0429     }
0430 
0431     if (sm_._seen_BOL_assertion)
0432     {
0433         os_ << "prev_char";
0434     }
0435     else
0436     {
0437         os_ << "*curr_++";
0438     }
0439 
0440 
0441     if (lookups_ == 256)
0442     {
0443         os_ << ')';
0444     }
0445 
0446     os_ << "]];\n\n";
0447 
0448     os_ << tab_;
0449     os_ << "        if (state_ == 0) break;\n\n";
0450     os_ << tab_;
0451     os_ << "        ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
0452 
0453     if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion)
0454     {
0455         os_ << "        }\n";
0456     }
0457 
0458     os_ << '\n';
0459     os_ << "        if (*ptr_)\n";
0460     os_ << "        {\n";
0461     os_ << "            end_state_ = true;\n";
0462     os_ << "            id_ = *(ptr_ + id_index);\n";
0463     os_ << "            uid_ = *(ptr_ + unique_id_index);\n";
0464 
0465     if (dfas_ > 1)
0466     {
0467         os_ << "            end_start_state_ = *(ptr_ + state_index);\n";
0468     }
0469 
0470     if (sm_._seen_BOL_assertion)
0471     {
0472         os_ << "            end_bol_ = bol_;\n";
0473     }
0474 
0475     os_ << "            end_token_ = curr_;\n";
0476     os_ << "        }\n";
0477     os_ << "    }\n";
0478     os_ << '\n';
0479 
0480     if (sm_._seen_EOL_assertion)
0481     {
0482         os_ << "    const std::size_t EOL_state_ = ptr_[eol_index];\n";
0483         os_ << '\n';
0484         os_ << "    if (EOL_state_ && curr_ == end_)\n";
0485         os_ << "    {\n";
0486         os_ << "        ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
0487         os_ << '\n';
0488         os_ << "        if (*ptr_)\n";
0489         os_ << "        {\n";
0490         os_ << "            end_state_ = true;\n";
0491         os_ << "            id_ = *(ptr_ + id_index);\n";
0492         os_ << "            uid_ = *(ptr_ + unique_id_index);\n";
0493 
0494         if (dfas_ > 1)
0495         {
0496             os_ << "            end_start_state_ = *(ptr_ + state_index);\n";
0497         }
0498 
0499         if (sm_._seen_BOL_assertion)
0500         {
0501             os_ << "            end_bol_ = bol_;\n";
0502         }
0503 
0504         os_ << "            end_token_ = curr_;\n";
0505         os_ << "        }\n";
0506         os_ << "    }\n";
0507         os_ << '\n';
0508     }
0509 
0510     os_ << "    if (end_state_)\n";
0511     os_ << "    {\n";
0512     os_ << "        // return longest match\n";
0513 
0514     if (dfas_ > 1)
0515     {
0516         os_ << "        start_state_ = end_start_state_;\n";
0517     }
0518 
0519     if (sm_._seen_BOL_assertion && dfas_ < 2)
0520     {
0521         os_ << "        beg_of_line_ = end_bol_;\n";
0522     }
0523 
0524     os_ << "        start_token_ = end_token_;\n";
0525 
0526     if (dfas_ > 1)
0527     {
0528         os_ << '\n';
0529         os_ << "        if (id_ == 0)\n";
0530         os_ << "        {\n";
0531 
0532         if (sm_._seen_BOL_assertion)
0533         {
0534             os_ << "            bol_ = end_bol_;\n";
0535         }
0536 
0537         os_ << "            goto again;\n";
0538         os_ << "        }\n";
0539 
0540         if (sm_._seen_BOL_assertion)
0541         {
0542             os_ << "        else\n";
0543             os_ << "        {\n";
0544             os_ << "            beg_of_line_ = end_bol_;\n";
0545             os_ << "        }\n";
0546         }
0547     }
0548 
0549     os_ << "    }\n";
0550     os_ << "    else\n";
0551     os_ << "    {\n";
0552 
0553     if (sm_._seen_BOL_assertion)
0554     {
0555         os_ << "        beg_of_line_ = *start_token_ == '\\n';\n";
0556     }
0557 
0558     if (skip_unknown_)
0559     {
0560         os_ << "        // No match causes char to be skipped\n";
0561         os_ << "        ++start_token_;\n";
0562     }
0563 
0564     os_ << "        id_ = npos;\n";
0565     os_ << "        uid_ = npos;\n";
0566     os_ << "    }\n";
0567     os_ << '\n';
0568     os_ << "    unique_id_ = uid_;\n";
0569     os_ << "    return id_;\n";
0570     os_ << "}\n";
0571     os_ << "\n#endif\n";
0572 }
0573 }
0574 }
0575 
0576 #endif