Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-31 10:02:15

0001 //  Copyright (c) 2008-2009 Ben Hanson
0002 //  Copyright (c) 2008-2011 Hartmut Kaiser
0003 //
0004 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
0005 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0006 
0007 #if !defined(BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM)
0008 #define BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM
0009 
0010 #if defined(_MSC_VER)
0011 #pragma once
0012 #endif
0013 
0014 #include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
0015 #include <boost/spirit/home/support/detail/lexer/consts.hpp>
0016 #include <boost/spirit/home/support/detail/lexer/rules.hpp>
0017 #include <boost/spirit/home/support/detail/lexer/size_t.hpp>
0018 #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
0019 #include <boost/spirit/home/support/detail/lexer/debug.hpp>
0020 #include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp>
0021 #include <boost/scoped_array.hpp>
0022 #include <cstring>
0023 #include <locale>
0024 
0025 ///////////////////////////////////////////////////////////////////////////////
0026 namespace boost { namespace spirit { namespace lex { namespace lexertl
0027 {
0028     namespace detail
0029     {
0030 
0031     ///////////////////////////////////////////////////////////////////////////
0032     template <typename CharT>
0033     struct string_lit;
0034 
0035     template <>
0036     struct string_lit<char>
0037     {
0038         static char get(char c) { return c; }
0039         static std::string get(char const* str = "") { return str; }
0040     };
0041 
0042     template <>
0043     struct string_lit<wchar_t>
0044     {
0045         static wchar_t get(char c)
0046         {
0047             typedef std::ctype<wchar_t> ctype_t;
0048             return std::use_facet<ctype_t>(std::locale()).widen(c);
0049         }
0050         static std::basic_string<wchar_t> get(char const* source = "")
0051         {
0052             using namespace std;        // some systems have size_t in ns std
0053             size_t len = strlen(source);
0054             boost::scoped_array<wchar_t> result (new wchar_t[len+1]);
0055             result.get()[len] = '\0';
0056 
0057             // working with wide character streams is supported only if the
0058             // platform provides the std::ctype<wchar_t> facet
0059             BOOST_ASSERT(std::has_facet<std::ctype<wchar_t> >(std::locale()));
0060 
0061             std::use_facet<std::ctype<wchar_t> >(std::locale())
0062                 .widen(source, source + len, result.get());
0063             return result.get();
0064         }
0065     };
0066 
0067     template <typename Char>
0068     inline Char L(char c)
0069     {
0070         return string_lit<Char>::get(c);
0071     }
0072 
0073     template <typename Char>
0074     inline std::basic_string<Char> L(char const* c = "")
0075     {
0076         return string_lit<Char>::get(c);
0077     }
0078 
0079     ///////////////////////////////////////////////////////////////////////////
0080     template <typename Char>
0081     inline bool
0082     generate_delimiter(std::basic_ostream<Char> &os_)
0083     {
0084         os_ << std::basic_string<Char>(80, '/') << "\n";
0085         return os_.good();
0086     }
0087 
0088     ///////////////////////////////////////////////////////////////////////////
0089     // Generate a table of the names of the used lexer states, which is a bit
0090     // tricky, because the table stored with the rules is sorted based on the
0091     // names, but we need it sorted using the state ids.
0092     template <typename Char>
0093     inline bool
0094     generate_cpp_state_info (boost::lexer::basic_rules<Char> const& rules_
0095       , std::basic_ostream<Char> &os_, Char const* name_suffix)
0096     {
0097         // we need to re-sort the state names in ascending order of the state
0098         // ids, filling possible gaps in between later
0099         typedef typename
0100             boost::lexer::basic_rules<Char>::string_size_t_map::const_iterator
0101         state_iterator;
0102         typedef std::map<std::size_t, Char const*> reverse_state_map_type;
0103 
0104         reverse_state_map_type reverse_state_map;
0105         state_iterator send = rules_.statemap().end();
0106         for (state_iterator sit = rules_.statemap().begin(); sit != send; ++sit)
0107         {
0108             typedef typename reverse_state_map_type::value_type value_type;
0109             reverse_state_map.insert(value_type((*sit).second, (*sit).first.c_str()));
0110         }
0111 
0112         generate_delimiter(os_);
0113         os_ << "// this table defines the names of the lexer states\n";
0114         os_ << boost::lexer::detail::strings<Char>::char_name()
0115             << " const* const lexer_state_names"
0116             << (name_suffix[0] ? "_" : "") << name_suffix
0117             << "[" << rules_.statemap().size() << "] = \n{\n";
0118 
0119         typedef typename reverse_state_map_type::iterator iterator;
0120         iterator rend = reverse_state_map.end();
0121         std::size_t last_id = 0;
0122         for (iterator rit = reverse_state_map.begin(); rit != rend; ++last_id)
0123         {
0124             for (/**/; last_id < (*rit).first; ++last_id)
0125             {
0126                 os_ << "    0,  // \"<undefined state>\"\n";
0127             }
0128             os_ << "    "
0129                 << boost::lexer::detail::strings<Char>::char_prefix()
0130                 << "\"" << (*rit).second << "\"";
0131             if (++rit != rend)
0132                 os_ << ",\n";
0133             else
0134                 os_ << "\n";        // don't generate the final comma
0135         }
0136         os_ << "};\n\n";
0137 
0138         generate_delimiter(os_);
0139         os_ << "// this variable defines the number of lexer states\n";
0140         os_ << "std::size_t const lexer_state_count"
0141             << (name_suffix[0] ? "_" : "") << name_suffix
0142             << " = " << rules_.statemap().size() << ";\n\n";
0143         return os_.good();
0144     }
0145 
0146     template <typename Char>
0147     inline bool
0148     generate_cpp_state_table (std::basic_ostream<Char> &os_
0149       , Char const* name_suffix, bool bol, bool eol)
0150     {
0151         std::basic_string<Char> suffix(L<Char>(name_suffix[0] ? "_" : ""));
0152         suffix += name_suffix;
0153 
0154         generate_delimiter(os_);
0155         os_ << "// this defines a generic accessors for the information above\n";
0156         os_ << "struct lexer" << suffix << "\n{\n";
0157         os_ << "    // version number and feature-set of compatible static lexer engine\n";
0158         os_ << "    enum\n";
0159         os_ << "    {\n        static_version = " << SPIRIT_STATIC_LEXER_VERSION << ",\n";
0160         os_ << "        supports_bol = " << std::boolalpha << bol << ",\n";
0161         os_ << "        supports_eol = " << std::boolalpha << eol << "\n";
0162         os_ << "    };\n\n";
0163         os_ << "    // return the number of lexer states\n";
0164         os_ << "    static std::size_t state_count()\n";
0165         os_ << "    {\n        return lexer_state_count" << suffix << "; \n    }\n\n";
0166         os_ << "    // return the name of the lexer state as given by 'idx'\n";
0167         os_ << "    static " << boost::lexer::detail::strings<Char>::char_name()
0168             << " const* state_name(std::size_t idx)\n";
0169         os_ << "    {\n        return lexer_state_names" << suffix << "[idx]; \n    }\n\n";
0170         os_ << "    // return the next matched token\n";
0171         os_ << "    template<typename Iterator>\n";
0172         os_ << "    static std::size_t next(std::size_t &start_state_, bool& bol_\n";
0173         os_ << "      , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)\n";
0174         os_ << "    {\n        return next_token" << suffix
0175             << "(start_state_, bol_, start_token_, end_, unique_id_);\n    }\n";
0176         os_ << "};\n\n";
0177         return os_.good();
0178     }
0179 
0180     ///////////////////////////////////////////////////////////////////////////
0181     // generate function body based on traversing the DFA tables
0182     template <typename Char>
0183     bool generate_function_body_dfa(std::basic_ostream<Char>& os_
0184       , boost::lexer::basic_state_machine<Char> const &sm_)
0185     {
0186         std::size_t const dfas_ = sm_.data()._dfa->size();
0187         std::size_t const lookups_ = sm_.data()._lookup->front()->size();
0188 
0189         os_ << "    enum {end_state_index, id_index, unique_id_index, "
0190                "state_index, bol_index,\n";
0191         os_ << "        eol_index, dead_state_index, dfa_offset};\n\n";
0192         os_ << "    static std::size_t const npos = "
0193                "static_cast<std::size_t>(~0);\n";
0194 
0195         if (dfas_ > 1)
0196         {
0197             for (std::size_t state_ = 0; state_ < dfas_; ++state_)
0198             {
0199                 std::size_t i_ = 0;
0200                 std::size_t j_ = 1;
0201                 std::size_t count_ = lookups_ / 8;
0202                 std::size_t const* lookup_ = &sm_.data()._lookup[state_]->front();
0203                 std::size_t const* dfa_ = &sm_.data()._dfa[state_]->front();
0204 
0205                 os_ << "    static std::size_t const lookup" << state_
0206                     << "_[" << lookups_ << "] = {\n        ";
0207                 for (/**/; i_ < count_; ++i_)
0208                 {
0209                     std::size_t const index_ = i_ * 8;
0210                     os_ << lookup_[index_];
0211                     for (/**/; j_ < 8; ++j_)
0212                     {
0213                         os_ << ", " << lookup_[index_ + j_];
0214                     }
0215                     if (i_ < count_ - 1)
0216                     {
0217                         os_ << ",\n        ";
0218                     }
0219                     j_ = 1;
0220                 }
0221                 os_ << " };\n";
0222 
0223                 count_ = sm_.data()._dfa[state_]->size ();
0224                 os_ << "    static const std::size_t dfa" << state_ << "_["
0225                     << count_ << "] = {\n        ";
0226                 count_ /= 8;
0227                 for (i_ = 0; i_ < count_; ++i_)
0228                 {
0229                     std::size_t const index_ = i_ * 8;
0230                     os_ << dfa_[index_];
0231                     for (j_ = 1; j_ < 8; ++j_)
0232                     {
0233                         os_ << ", " << dfa_[index_ + j_];
0234                     }
0235                     if (i_ < count_ - 1)
0236                     {
0237                         os_ << ",\n        ";
0238                     }
0239                 }
0240 
0241                 std::size_t const mod_ = sm_.data()._dfa[state_]->size () % 8;
0242                 if (mod_)
0243                 {
0244                     std::size_t const index_ = count_ * 8;
0245                     if (count_)
0246                     {
0247                         os_ << ",\n        ";
0248                     }
0249                     os_ << dfa_[index_];
0250                     for (j_ = 1; j_ < mod_; ++j_)
0251                     {
0252                         os_ << ", " << dfa_[index_ + j_];
0253                     }
0254                 }
0255                 os_ << " };\n";
0256             }
0257 
0258             std::size_t count_ = sm_.data()._dfa_alphabet.size();
0259             std::size_t i_ = 1;
0260 
0261             os_ << "    static std::size_t const* lookup_arr_[" << count_
0262                 << "] = { lookup0_";
0263             for (i_ = 1; i_ < count_; ++i_)
0264             {
0265                 os_ << ", " << "lookup" << i_ << "_";
0266             }
0267             os_ << " };\n";
0268 
0269             os_ << "    static std::size_t const dfa_alphabet_arr_["
0270                 << count_ << "] = { ";
0271             os_ << sm_.data()._dfa_alphabet.front ();
0272             for (i_ = 1; i_ < count_; ++i_)
0273             {
0274                 os_ << ", " << sm_.data()._dfa_alphabet[i_];
0275             }
0276             os_ << " };\n";
0277 
0278             os_ << "    static std::size_t const* dfa_arr_[" << count_
0279                 << "] = { ";
0280             os_ << "dfa0_";
0281             for (i_ = 1; i_ < count_; ++i_)
0282             {
0283                 os_ << ", " << "dfa" << i_ << "_";
0284             }
0285             os_ << " };\n";
0286         }
0287         else
0288         {
0289             std::size_t const* lookup_ = &sm_.data()._lookup[0]->front();
0290             std::size_t const* dfa_ = &sm_.data()._dfa[0]->front();
0291             std::size_t i_ = 0;
0292             std::size_t j_ = 1;
0293             std::size_t count_ = lookups_ / 8;
0294 
0295             os_ << "    static std::size_t const lookup_[";
0296             os_ << sm_.data()._lookup[0]->size() << "] = {\n        ";
0297             for (/**/; i_ < count_; ++i_)
0298             {
0299                 const std::size_t index_ = i_ * 8;
0300                 os_ << lookup_[index_];
0301                 for (/**/; j_ < 8; ++j_)
0302                 {
0303                     os_ << ", " << lookup_[index_ + j_];
0304                 }
0305                 if (i_ < count_ - 1)
0306                 {
0307                     os_ << ",\n        ";
0308                 }
0309                 j_ = 1;
0310             }
0311             os_ << " };\n";
0312 
0313             os_ << "    static std::size_t const dfa_alphabet_ = "
0314                 << sm_.data()._dfa_alphabet.front () << ";\n";
0315             os_ << "    static std::size_t const dfa_["
0316                 << sm_.data()._dfa[0]->size () << "] = {\n        ";
0317             count_ = sm_.data()._dfa[0]->size () / 8;
0318             for (i_ = 0; i_ < count_; ++i_)
0319             {
0320                 const std::size_t index_ = i_ * 8;
0321                 os_ << dfa_[index_];
0322                 for (j_ = 1; j_ < 8; ++j_)
0323                 {
0324                     os_ << ", " << dfa_[index_ + j_];
0325                 }
0326                 if (i_ < count_ - 1)
0327                 {
0328                     os_ << ",\n        ";
0329                 }
0330             }
0331 
0332             const std::size_t mod_ = sm_.data()._dfa[0]->size () % 8;
0333             if (mod_)
0334             {
0335                 const std::size_t index_ = count_ * 8;
0336                 if (count_)
0337                 {
0338                     os_ << ",\n        ";
0339                 }
0340                 os_ << dfa_[index_];
0341                 for (j_ = 1; j_ < mod_; ++j_)
0342                 {
0343                     os_ << ", " << dfa_[index_ + j_];
0344                 }
0345             }
0346             os_ << " };\n";
0347         }
0348 
0349         os_ << "\n    if (start_token_ == end_)\n";
0350         os_ << "    {\n";
0351         os_ << "        unique_id_ = npos;\n";
0352         os_ << "        return 0;\n";
0353         os_ << "    }\n\n";
0354         if (sm_.data()._seen_BOL_assertion)
0355         {
0356             os_ << "    bool bol = bol_;\n\n";
0357         }
0358 
0359         if (dfas_ > 1)
0360         {
0361             os_ << "again:\n";
0362             os_ << "    std::size_t const* lookup_ = lookup_arr_[start_state_];\n";
0363             os_ << "    std::size_t dfa_alphabet_ = dfa_alphabet_arr_[start_state_];\n";
0364             os_ << "    std::size_t const*dfa_ = dfa_arr_[start_state_];\n";
0365         }
0366 
0367         os_ << "    std::size_t const* ptr_ = dfa_ + dfa_alphabet_;\n";
0368         os_ << "    Iterator curr_ = start_token_;\n";
0369         os_ << "    bool end_state_ = *ptr_ != 0;\n";
0370         os_ << "    std::size_t id_ = *(ptr_ + id_index);\n";
0371         os_ << "    std::size_t uid_ = *(ptr_ + unique_id_index);\n";
0372         if (dfas_ > 1)
0373         {
0374             os_ << "    std::size_t end_start_state_ = start_state_;\n";
0375         }
0376         if (sm_.data()._seen_BOL_assertion)
0377         {
0378             os_ << "    bool end_bol_ = bol_;\n";
0379         }
0380         os_ << "    Iterator end_token_ = start_token_;\n\n";
0381 
0382         os_ << "    while (curr_ != end_)\n";
0383         os_ << "    {\n";
0384 
0385         if (sm_.data()._seen_BOL_assertion)
0386         {
0387             os_ << "        std::size_t const BOL_state_ = ptr_[bol_index];\n\n";
0388         }
0389 
0390         if (sm_.data()._seen_EOL_assertion)
0391         {
0392             os_ << "        std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
0393         }
0394 
0395         if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion)
0396         {
0397             os_ << "        if (BOL_state_ && bol)\n";
0398             os_ << "        {\n";
0399             os_ << "            ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
0400             os_ << "        }\n";
0401             os_ << "        else if (EOL_state_ && *curr_ == '\\n')\n";
0402             os_ << "        {\n";
0403             os_ << "            ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
0404             os_ << "        }\n";
0405             os_ << "        else\n";
0406             os_ << "        {\n";
0407             if (lookups_ == 256)
0408             {
0409                 os_ << "            unsigned char index = \n";
0410                 os_ << "                static_cast<unsigned char>(*curr_++);\n";
0411             }
0412             else
0413             {
0414                 os_ << "            std::size_t index = *curr_++\n";
0415             }
0416             os_ << "            bol = (index == '\\n') ? true : false;\n";
0417             os_ << "            std::size_t const state_ = ptr_[\n";
0418             os_ << "                lookup_[static_cast<std::size_t>(index)]];\n";
0419 
0420             os_ << '\n';
0421             os_ << "            if (state_ == 0) break;\n";
0422             os_ << '\n';
0423             os_ << "            ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
0424             os_ << "        }\n\n";
0425         }
0426         else if (sm_.data()._seen_BOL_assertion)
0427         {
0428             os_ << "        if (BOL_state_ && bol)\n";
0429             os_ << "        {\n";
0430             os_ << "            ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
0431             os_ << "        }\n";
0432             os_ << "        else\n";
0433             os_ << "        {\n";
0434             if (lookups_ == 256)
0435             {
0436                 os_ << "            unsigned char index = \n";
0437                 os_ << "                static_cast<unsigned char>(*curr_++);\n";
0438             }
0439             else
0440             {
0441                 os_ << "            std::size_t index = *curr_++\n";
0442             }
0443             os_ << "            bol = (index == '\\n') ? true : false;\n";
0444             os_ << "            std::size_t const state_ = ptr_[\n";
0445             os_ << "                lookup_[static_cast<std::size_t>(index)]];\n";
0446 
0447             os_ << '\n';
0448             os_ << "            if (state_ == 0) break;\n";
0449             os_ << '\n';
0450             os_ << "            ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
0451             os_ << "        }\n\n";
0452         }
0453         else if (sm_.data()._seen_EOL_assertion)
0454         {
0455             os_ << "        if (EOL_state_ && *curr_ == '\\n')\n";
0456             os_ << "        {\n";
0457             os_ << "            ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
0458             os_ << "        }\n";
0459             os_ << "        else\n";
0460             os_ << "        {\n";
0461             if (lookups_ == 256)
0462             {
0463                 os_ << "            unsigned char index = \n";
0464                 os_ << "                static_cast<unsigned char>(*curr_++);\n";
0465             }
0466             else
0467             {
0468                 os_ << "            std::size_t index = *curr_++\n";
0469             }
0470             os_ << "            bol = (index == '\\n') ? true : false;\n";
0471             os_ << "            std::size_t const state_ = ptr_[\n";
0472             os_ << "                lookup_[static_cast<std::size_t>(index)]];\n";
0473 
0474             os_ << '\n';
0475             os_ << "            if (state_ == 0) break;\n";
0476             os_ << '\n';
0477             os_ << "            ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
0478             os_ << "        }\n\n";
0479         }
0480         else
0481         {
0482             os_ << "        std::size_t const state_ =\n";
0483 
0484             if (lookups_ == 256)
0485             {
0486                 os_ << "            ptr_[lookup_["
0487                        "static_cast<unsigned char>(*curr_++)]];\n";
0488             }
0489             else
0490             {
0491                 os_ << "            ptr_[lookup_[*curr_++]];\n";
0492             }
0493 
0494             os_ << '\n';
0495             os_ << "        if (state_ == 0) break;\n";
0496             os_ << '\n';
0497             os_ << "        ptr_ = &dfa_[state_ * dfa_alphabet_];\n\n";
0498         }
0499 
0500         os_ << "        if (*ptr_)\n";
0501         os_ << "        {\n";
0502         os_ << "            end_state_ = true;\n";
0503         os_ << "            id_ = *(ptr_ + id_index);\n";
0504         os_ << "            uid_ = *(ptr_ + unique_id_index);\n";
0505         if (dfas_ > 1)
0506         {
0507             os_ << "            end_start_state_ = *(ptr_ + state_index);\n";
0508         }
0509         if (sm_.data()._seen_BOL_assertion)
0510         {
0511             os_ << "            end_bol_ = bol;\n";
0512         }
0513         os_ << "            end_token_ = curr_;\n";
0514         os_ << "        }\n";
0515         os_ << "    }\n\n";
0516 
0517         if (sm_.data()._seen_EOL_assertion)
0518         {
0519             os_ << "    std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
0520 
0521             os_ << "    if (EOL_state_ && curr_ == end_)\n";
0522             os_ << "    {\n";
0523             os_ << "        ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n\n";
0524 
0525             os_ << "        if (*ptr_)\n";
0526             os_ << "        {\n";
0527             os_ << "            end_state_ = true;\n";
0528             os_ << "            id_ = *(ptr_ + id_index);\n";
0529             os_ << "            uid_ = *(ptr_ + unique_id_index);\n";
0530             if (dfas_ > 1)
0531             {
0532                 os_ << "            end_start_state_ = *(ptr_ + state_index);\n";
0533             }
0534             if (sm_.data()._seen_BOL_assertion)
0535             {
0536                 os_ << "            end_bol_ = bol;\n";
0537             }
0538             os_ << "            end_token_ = curr_;\n";
0539             os_ << "        }\n";
0540             os_ << "    }\n\n";
0541         }
0542 
0543         os_ << "    if (end_state_)\n";
0544         os_ << "    {\n";
0545         os_ << "        // return longest match\n";
0546         os_ << "        start_token_ = end_token_;\n";
0547 
0548         if (dfas_ > 1)
0549         {
0550             os_ << "        start_state_ = end_start_state_;\n";
0551             os_ << "        if (id_ == 0)\n";
0552             os_ << "        {\n";
0553             if (sm_.data()._seen_BOL_assertion)
0554             {
0555                 os_ << "            bol = end_bol_;\n";
0556             }
0557             os_ << "            goto again;\n";
0558             os_ << "        }\n";
0559             if (sm_.data()._seen_BOL_assertion)
0560             {
0561                 os_ << "        else\n";
0562                 os_ << "        {\n";
0563                 os_ << "            bol_ = end_bol_;\n";
0564                 os_ << "        }\n";
0565             }
0566         }
0567         else if (sm_.data()._seen_BOL_assertion)
0568         {
0569             os_ << "        bol_ = end_bol_;\n";
0570         }
0571 
0572         os_ << "    }\n";
0573         os_ << "    else\n";
0574         os_ << "    {\n";
0575 
0576         if (sm_.data()._seen_BOL_assertion)
0577         {
0578             os_ << "        bol_ = (*start_token_ == '\\n') ? true : false;\n";
0579         }
0580 
0581         os_ << "        id_ = npos;\n";
0582         os_ << "        uid_ = npos;\n";
0583         os_ << "    }\n\n";
0584 
0585         os_ << "    unique_id_ = uid_;\n";
0586         os_ << "    return id_;\n";
0587         return os_.good();
0588     }
0589 
0590     ///////////////////////////////////////////////////////////////////////////
0591     template <typename Char>
0592     inline std::basic_string<Char> get_charlit(Char ch)
0593     {
0594         std::basic_string<Char> result;
0595         boost::lexer::basic_string_token<Char>::escape_char(ch, result);
0596         return result;
0597     }
0598 
0599     // check whether state0_0 is referenced from any of the other states
0600     template <typename Char>
0601     bool need_label0_0(boost::lexer::basic_state_machine<Char> const &sm_)
0602     {
0603         typedef typename boost::lexer::basic_state_machine<Char>::iterator
0604             iterator_type;
0605         iterator_type iter_ = sm_.begin();
0606         std::size_t const states_ = iter_->states;
0607 
0608         for (std::size_t state_ = 0; state_ < states_; ++state_)
0609         {
0610             if (0 == iter_->bol_index || 0 == iter_->eol_index)
0611             {
0612                 return true;
0613             }
0614 
0615             std::size_t const transitions_ = iter_->transitions;
0616             for (std::size_t t_ = 0; t_ < transitions_; ++t_)
0617             {
0618                 if (0 == iter_->goto_state)
0619                 {
0620                     return true;
0621                 }
0622                 ++iter_;
0623             }
0624             if (transitions_ == 0) ++iter_;
0625         }
0626         return false;
0627     }
0628 
0629     ///////////////////////////////////////////////////////////////////////////
0630     template <typename Char>
0631     bool generate_function_body_switch(std::basic_ostream<Char> & os_
0632       , boost::lexer::basic_state_machine<Char> const &sm_)
0633     {
0634         typedef typename boost::lexer::basic_state_machine<Char>::iterator
0635             iterator_type;
0636 
0637         std::size_t const lookups_ = sm_.data()._lookup->front ()->size ();
0638         iterator_type iter_ = sm_.begin();
0639         iterator_type labeliter_ = iter_;
0640         iterator_type end_ = sm_.end();
0641         std::size_t const dfas_ = sm_.data()._dfa->size ();
0642 
0643         os_ << "    static std::size_t const npos = "
0644                "static_cast<std::size_t>(~0);\n";
0645 
0646         os_ << "\n    if (start_token_ == end_)\n";
0647         os_ << "    {\n";
0648         os_ << "        unique_id_ = npos;\n";
0649         os_ << "        return 0;\n";
0650         os_ << "    }\n\n";
0651 
0652         if (sm_.data()._seen_BOL_assertion)
0653         {
0654             os_ << "    bool bol = bol_;\n";
0655         }
0656 
0657         if (dfas_ > 1)
0658         {
0659             os_ << "again:\n";
0660         }
0661 
0662         os_ << "    Iterator curr_ = start_token_;\n";
0663         os_ << "    bool end_state_ = false;\n";
0664         os_ << "    std::size_t id_ = npos;\n";
0665         os_ << "    std::size_t uid_ = npos;\n";
0666 
0667         if (dfas_ > 1)
0668         {
0669             os_ << "    std::size_t end_start_state_ = start_state_;\n";
0670         }
0671 
0672         if (sm_.data()._seen_BOL_assertion)
0673         {
0674             os_ << "    bool end_bol_ = bol_;\n";
0675         }
0676 
0677         os_ << "    Iterator end_token_ = start_token_;\n";
0678         os_ << '\n';
0679 
0680         os_ << "    " << ((lookups_ == 256) ? "char" : "wchar_t")
0681             << " ch_ = 0;\n\n";
0682 
0683         if (dfas_ > 1)
0684         {
0685             os_ << "    switch (start_state_)\n";
0686             os_ << "    {\n";
0687 
0688             for (std::size_t i_ = 0; i_ < dfas_; ++i_)
0689             {
0690                 os_ << "    case " << i_ << ":\n";
0691                 os_ << "        goto state" << i_ << "_0;\n";
0692                 os_ << "        break;\n";
0693             }
0694 
0695             os_ << "    default:\n";
0696             os_ << "        goto end;\n";
0697             os_ << "        break;\n";
0698             os_ << "    }\n";
0699         }
0700 
0701         bool need_state0_0_label = need_label0_0(sm_);
0702 
0703         for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_)
0704         {
0705             std::size_t const states_ = iter_->states;
0706             for (std::size_t state_ = 0; state_ < states_; ++state_)
0707             {
0708                 std::size_t const transitions_ = iter_->transitions;
0709                 std::size_t t_ = 0;
0710 
0711                 if (dfas_ > 1 || dfa_ != 0 || state_ != 0 || need_state0_0_label)
0712                 {
0713                     os_ << "\nstate" << dfa_ << '_' << state_ << ":\n";
0714                 }
0715 
0716                 if (iter_->end_state)
0717                 {
0718                     os_ << "    end_state_ = true;\n";
0719                     os_ << "    id_ = " << iter_->id << ";\n";
0720                     os_ << "    uid_ = " << iter_->unique_id << ";\n";
0721                     os_ << "    end_token_ = curr_;\n";
0722 
0723                     if (dfas_ > 1)
0724                     {
0725                         os_ << "    end_start_state_ = " << iter_->goto_dfa <<
0726                             ";\n";
0727                     }
0728 
0729                     if (sm_.data()._seen_BOL_assertion)
0730                     {
0731                         os_ << "    end_bol_ = bol;\n";
0732                     }
0733 
0734                     if (transitions_) os_ << '\n';
0735                 }
0736 
0737                 if (t_ < transitions_ ||
0738                     iter_->bol_index != boost::lexer::npos ||
0739                     iter_->eol_index != boost::lexer::npos)
0740                 {
0741                     os_ << "    if (curr_ == end_) goto end;\n";
0742                     os_ << "    ch_ = *curr_;\n";
0743                     if (iter_->bol_index != boost::lexer::npos)
0744                     {
0745                         os_ << "\n    if (bol) goto state" << dfa_ << '_'
0746                             << iter_->bol_index << ";\n";
0747                     }
0748                     if (iter_->eol_index != boost::lexer::npos)
0749                     {
0750                         os_ << "\n    if (ch_ == '\\n') goto state" << dfa_
0751                             << '_' << iter_->eol_index << ";\n";
0752                     }
0753                     os_ << "    ++curr_;\n";
0754                 }
0755 
0756                 for (/**/; t_ < transitions_; ++t_)
0757                 {
0758                     Char const *ptr_ = iter_->token._charset.c_str();
0759                     Char const *end2_ = ptr_ + iter_->token._charset.size();
0760                     Char start_char_ = 0;
0761                     Char curr_char_ = 0;
0762                     bool range_ = false;
0763                     bool first_char_ = true;
0764 
0765                     os_ << "\n    if (";
0766 
0767                     while (ptr_ != end2_)
0768                     {
0769                         curr_char_ = *ptr_++;
0770 
0771                         if (*ptr_ == curr_char_ + 1)
0772                         {
0773                             if (!range_)
0774                             {
0775                                 start_char_ = curr_char_;
0776                             }
0777                             range_ = true;
0778                         }
0779                         else
0780                         {
0781                             if (!first_char_)
0782                             {
0783                                 os_ << ((iter_->token._negated) ? " && " : " || ");
0784                             }
0785                             else
0786                             {
0787                                 first_char_ = false;
0788                             }
0789                             if (range_)
0790                             {
0791                                 if (iter_->token._negated)
0792                                 {
0793                                     os_ << "!";
0794                                 }
0795                                 os_ << "(ch_ >= '" << get_charlit(start_char_)
0796                                     << "' && ch_ <= '"
0797                                     << get_charlit(curr_char_) << "')";
0798                                 range_ = false;
0799                             }
0800                             else
0801                             {
0802                                 os_ << "ch_ "
0803                                     << ((iter_->token._negated) ? "!=" : "==")
0804                                     << " '" << get_charlit(curr_char_) << "'";
0805                             }
0806                         }
0807                     }
0808 
0809                     os_ << ") goto state" << dfa_ << '_' << iter_->goto_state
0810                         << ";\n";
0811                     ++iter_;
0812                 }
0813 
0814                 if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1))
0815                 {
0816                     os_ << "    goto end;\n";
0817                 }
0818 
0819                 if (transitions_ == 0) ++iter_;
0820             }
0821         }
0822 
0823         os_ << "\nend:\n";
0824         os_ << "    if (end_state_)\n";
0825         os_ << "    {\n";
0826         os_ << "        // return longest match\n";
0827         os_ << "        start_token_ = end_token_;\n";
0828 
0829         if (dfas_ > 1)
0830         {
0831             os_ << "        start_state_ = end_start_state_;\n";
0832             os_ << "\n        if (id_ == 0)\n";
0833             os_ << "        {\n";
0834 
0835             if (sm_.data()._seen_BOL_assertion)
0836             {
0837                 os_ << "            bol = end_bol_;\n";
0838             }
0839 
0840             os_ << "            goto again;\n";
0841             os_ << "        }\n";
0842 
0843             if (sm_.data()._seen_BOL_assertion)
0844             {
0845                 os_ << "        else\n";
0846                 os_ << "        {\n";
0847                 os_ << "            bol_ = end_bol_;\n";
0848                 os_ << "        }\n";
0849             }
0850         }
0851         else if (sm_.data()._seen_BOL_assertion)
0852         {
0853             os_ << "        bol_ = end_bol_;\n";
0854         }
0855 
0856         os_ << "    }\n";
0857         os_ << "    else\n";
0858         os_ << "    {\n";
0859 
0860         if (sm_.data()._seen_BOL_assertion)
0861         {
0862             os_ << "        bol_ = (*start_token_ == '\\n') ? true : false;\n";
0863         }
0864         os_ << "        id_ = npos;\n";
0865         os_ << "        uid_ = npos;\n";
0866         os_ << "    }\n\n";
0867 
0868         os_ << "    unique_id_ = uid_;\n";
0869         os_ << "    return id_;\n";
0870         return os_.good();
0871     }
0872 
0873     ///////////////////////////////////////////////////////////////////////////
0874     // Generate a tokenizer for the given state machine.
0875     template <typename Char, typename F>
0876     inline bool
0877     generate_cpp (boost::lexer::basic_state_machine<Char> const& sm_
0878       , boost::lexer::basic_rules<Char> const& rules_
0879       , std::basic_ostream<Char> &os_, Char const* name_suffix
0880       , F generate_function_body)
0881     {
0882         if (sm_.data()._lookup->empty())
0883             return false;
0884 
0885         std::size_t const dfas_ = sm_.data()._dfa->size();
0886 //         std::size_t const lookups_ = sm_.data()._lookup->front()->size();
0887 
0888         os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
0889         os_ << "// Copyright (c) 2008-2011 Hartmut Kaiser\n";
0890         os_ << "//\n";
0891         os_ << "// Distributed under the Boost Software License, "
0892             "Version 1.0. (See accompanying\n";
0893         os_ << "// file licence_1_0.txt or copy at "
0894             "http://www.boost.org/LICENSE_1_0.txt)\n\n";
0895         os_ << "// Auto-generated by boost::lexer, do not edit\n\n";
0896 
0897         std::basic_string<Char> guard(name_suffix);
0898         guard += L<Char>(name_suffix[0] ? "_" : "");
0899         guard += L<Char>(__DATE__ "_" __TIME__);
0900         typename std::basic_string<Char>::size_type p = 
0901             guard.find_first_of(L<Char>(": "));
0902         while (std::string::npos != p)
0903         {
0904             guard.replace(p, 1, L<Char>("_"));
0905             p = guard.find_first_of(L<Char>(": "), p);
0906         }
0907         { // to_upper(guard)
0908             typedef std::ctype<Char> facet_t;
0909             facet_t const& facet = std::use_facet<facet_t>(std::locale());
0910             typedef typename std::basic_string<Char>::iterator iter_t;
0911             for (iter_t iter = guard.begin(),
0912                         last = guard.end(); iter != last; ++iter)
0913                 *iter = facet.toupper(*iter);
0914         }
0915 
0916         os_ << "#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << ")\n";
0917         os_ << "#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << "\n\n";
0918 
0919         os_ << "#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>\n\n";
0920 
0921         generate_delimiter(os_);
0922         os_ << "// the generated table of state names and the tokenizer have to be\n"
0923                "// defined in the boost::spirit::lex::lexertl::static_ namespace\n";
0924         os_ << "namespace boost { namespace spirit { namespace lex { "
0925             "namespace lexertl { namespace static_ {\n\n";
0926 
0927         // generate the lexer state information variables
0928         if (!generate_cpp_state_info(rules_, os_, name_suffix))
0929             return false;
0930 
0931         generate_delimiter(os_);
0932         os_ << "// this function returns the next matched token\n";
0933         os_ << "template<typename Iterator>\n";
0934         os_ << "std::size_t next_token" << (name_suffix[0] ? "_" : "")
0935             << name_suffix  << " (";
0936 
0937         if (dfas_ > 1)
0938         {
0939             os_ << "std::size_t& start_state_, ";
0940         }
0941         else
0942         {
0943             os_ << "std::size_t& /*start_state_*/, ";
0944         }
0945         if (sm_.data()._seen_BOL_assertion)
0946         {
0947             os_ << "bool& bol_, ";
0948         }
0949         else
0950         {
0951             os_ << "bool& /*bol_*/, ";
0952         }
0953         os_ << "\n    ";
0954 
0955         os_ << "Iterator &start_token_, Iterator const& end_, ";
0956         os_ << "std::size_t& unique_id_)\n";
0957         os_ << "{\n";
0958         if (!generate_function_body(os_, sm_))
0959             return false;
0960         os_ << "}\n\n";
0961 
0962         if (!generate_cpp_state_table<Char>(os_, name_suffix
0963             , sm_.data()._seen_BOL_assertion, sm_.data()._seen_EOL_assertion))
0964         {
0965             return false;
0966         }
0967 
0968         os_ << "}}}}}  // namespace boost::spirit::lex::lexertl::static_\n\n";
0969 
0970         os_ << "#endif\n";
0971 
0972         return os_.good();
0973     }
0974 
0975     }   // namespace detail
0976 
0977     ///////////////////////////////////////////////////////////////////////////
0978     template <typename Lexer, typename F>
0979     inline bool
0980     generate_static(Lexer const& lexer
0981       , std::basic_ostream<typename Lexer::char_type>& os
0982       , typename Lexer::char_type const* name_suffix, F f)
0983     {
0984         if (!lexer.init_dfa(true))    // always minimize DFA for static lexers
0985             return false;
0986         return detail::generate_cpp(lexer.state_machine_, lexer.rules_, os
0987           , name_suffix, f);
0988     }
0989 
0990     ///////////////////////////////////////////////////////////////////////////
0991     // deprecated function, will be removed in the future (this has been
0992     // replaced by the function generate_static_dfa - see below).
0993     template <typename Lexer>
0994     inline bool
0995     generate_static(Lexer const& lexer
0996       , std::basic_ostream<typename Lexer::char_type>& os
0997       , typename Lexer::char_type const* name_suffix =
0998           detail::L<typename Lexer::char_type>())
0999     {
1000         return generate_static(lexer, os, name_suffix
1001           , &detail::generate_function_body_dfa<typename Lexer::char_type>);
1002     }
1003 
1004     ///////////////////////////////////////////////////////////////////////////
1005     template <typename Lexer>
1006     inline bool
1007     generate_static_dfa(Lexer const& lexer
1008       , std::basic_ostream<typename Lexer::char_type>& os
1009       , typename Lexer::char_type const* name_suffix =
1010           detail::L<typename Lexer::char_type>())
1011     {
1012         return generate_static(lexer, os, name_suffix
1013           , &detail::generate_function_body_dfa<typename Lexer::char_type>);
1014     }
1015 
1016     ///////////////////////////////////////////////////////////////////////////
1017     template <typename Lexer>
1018     inline bool
1019     generate_static_switch(Lexer const& lexer
1020       , std::basic_ostream<typename Lexer::char_type>& os
1021       , typename Lexer::char_type const* name_suffix =
1022           detail::L<typename Lexer::char_type>())
1023     {
1024         return generate_static(lexer, os, name_suffix
1025           , &detail::generate_function_body_switch<typename Lexer::char_type>);
1026     }
1027 
1028 ///////////////////////////////////////////////////////////////////////////////
1029 }}}}
1030 
1031 #endif