Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-31 10:02:16

0001 //  Copyright (c) 2001-2011 Hartmut Kaiser
0002 //
0003 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
0004 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0005 
0006 #if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_17_2007_0139PM)
0007 #define BOOST_SPIRIT_LEX_LEXER_MAR_17_2007_0139PM
0008 
0009 #if defined(_MSC_VER)
0010 #pragma once
0011 #endif
0012 
0013 #include <iosfwd>
0014 
0015 #include <boost/spirit/home/support/detail/lexer/generator.hpp>
0016 #include <boost/spirit/home/support/detail/lexer/rules.hpp>
0017 #include <boost/spirit/home/support/detail/lexer/consts.hpp>
0018 #include <boost/spirit/home/support/unused.hpp>
0019 
0020 #include <boost/spirit/home/lex/lexer/lexertl/token.hpp>
0021 #include <boost/spirit/home/lex/lexer/lexertl/functor.hpp>
0022 #include <boost/spirit/home/lex/lexer/lexertl/functor_data.hpp>
0023 #include <boost/spirit/home/lex/lexer/lexertl/iterator.hpp>
0024 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
0025 #include <boost/spirit/home/support/detail/lexer/debug.hpp>
0026 #endif
0027 
0028 #include <iterator> // for std::iterator_traits
0029 
0030 namespace boost { namespace spirit { namespace lex { namespace lexertl
0031 {
0032     ///////////////////////////////////////////////////////////////////////////
0033     namespace detail
0034     {
0035         ///////////////////////////////////////////////////////////////////////
0036         //  The must_escape function checks if the given character value needs
0037         //  to be preceded by a backslash character to disable its special
0038         //  meaning in the context of a regular expression
0039         ///////////////////////////////////////////////////////////////////////
0040         template <typename Char>
0041         inline bool must_escape(Char c)
0042         {
0043             // FIXME: more needed?
0044             switch (c) {
0045             case '+': case '/': case '*': case '?':
0046             case '|':
0047             case '(': case ')':
0048             case '[': case ']':
0049             case '{': case '}':
0050             case '.':
0051             case '^': case '$':
0052             case '\\':
0053             case '"':
0054                 return true;
0055 
0056             default:
0057                 break;
0058             }
0059             return false;
0060         }
0061 
0062         ///////////////////////////////////////////////////////////////////////
0063         //  The escape function returns the string representation of the given
0064         //  character value, possibly escaped with a backslash character, to
0065         //  allow it being safely used in a regular expression definition.
0066         ///////////////////////////////////////////////////////////////////////
0067         template <typename Char>
0068         inline std::basic_string<Char> escape(Char ch)
0069         {
0070             std::basic_string<Char> result(1, ch);
0071             if (detail::must_escape(ch))
0072             {
0073                 typedef typename std::basic_string<Char>::size_type size_type;
0074                 result.insert((size_type)0, 1, '\\');
0075             }
0076             return result;
0077         }
0078 
0079         ///////////////////////////////////////////////////////////////////////
0080         //
0081         ///////////////////////////////////////////////////////////////////////
0082         inline boost::lexer::regex_flags map_flags(unsigned int flags)
0083         {
0084             unsigned int retval = boost::lexer::none;
0085             if (flags & match_flags::match_not_dot_newline)
0086                 retval |= boost::lexer::dot_not_newline;
0087             if (flags & match_flags::match_icase)
0088                 retval |= boost::lexer::icase;
0089 
0090             return boost::lexer::regex_flags(retval);
0091         }
0092     }
0093 
0094     ///////////////////////////////////////////////////////////////////////////
0095     template <typename Lexer, typename F>
0096     bool generate_static(Lexer const&
0097       , std::basic_ostream<typename Lexer::char_type>&
0098       , typename Lexer::char_type const*, F);
0099 
0100     ///////////////////////////////////////////////////////////////////////////
0101     //
0102     //  Every lexer type to be used as a lexer for Spirit has to conform to
0103     //  the following public interface:
0104     //
0105     //    typedefs:
0106     //        iterator_type   The type of the iterator exposed by this lexer.
0107     //        token_type      The type of the tokens returned from the exposed
0108     //                        iterators.
0109     //
0110     //    functions:
0111     //        default constructor
0112     //                        Since lexers are instantiated as base classes
0113     //                        only it might be a good idea to make this
0114     //                        constructor protected.
0115     //        begin, end      Return a pair of iterators, when dereferenced
0116     //                        returning the sequence of tokens recognized in
0117     //                        the input stream given as the parameters to the
0118     //                        begin() function.
0119     //        add_token       Should add the definition of a token to be
0120     //                        recognized by this lexer.
0121     //        clear           Should delete all current token definitions
0122     //                        associated with the given state of this lexer
0123     //                        object.
0124     //
0125     //    template parameters:
0126     //        Iterator        The type of the iterator used to access the
0127     //                        underlying character stream.
0128     //        Token           The type of the tokens to be returned from the
0129     //                        exposed token iterator.
0130     //        Functor         The type of the InputPolicy to use to instantiate
0131     //                        the multi_pass iterator type to be used as the
0132     //                        token iterator (returned from begin()/end()).
0133     //
0134     ///////////////////////////////////////////////////////////////////////////
0135 
0136     ///////////////////////////////////////////////////////////////////////////
0137     //
0138     //  The lexer class is a implementation of a Spirit.Lex lexer on
0139     //  top of Ben Hanson's lexertl library as outlined above (For more
0140     //  information about lexertl go here: http://www.benhanson.net/lexertl.html).
0141     //
0142     //  This class is supposed to be used as the first and only template
0143     //  parameter while instantiating instances of a lex::lexer class.
0144     //
0145     ///////////////////////////////////////////////////////////////////////////
0146     template <typename Token = token<>
0147       , typename Iterator = typename Token::iterator_type
0148       , typename Functor = functor<Token, lexertl::detail::data, Iterator> >
0149     class lexer
0150     {
0151     private:
0152         struct dummy { void true_() {} };
0153         typedef void (dummy::*safe_bool)();
0154 
0155         static std::size_t const all_states_id = static_cast<std::size_t>(-2);
0156 
0157     public:
0158         operator safe_bool() const
0159             { return initialized_dfa_ ? &dummy::true_ : 0; }
0160 
0161         typedef typename std::iterator_traits<Iterator>::value_type char_type;
0162         typedef std::basic_string<char_type> string_type;
0163 
0164         typedef boost::lexer::basic_rules<char_type> basic_rules_type;
0165 
0166         //  Every lexer type to be used as a lexer for Spirit has to conform to
0167         //  a public interface .
0168         typedef Token token_type;
0169         typedef typename Token::id_type id_type;
0170         typedef iterator<Functor> iterator_type;
0171 
0172     private:
0173 #ifdef _MSC_VER
0174 #  pragma warning(push)
0175 #  pragma warning(disable: 4512) // assignment operator could not be generated.
0176 #endif
0177         // this type is purely used for the iterator_type construction below
0178         struct iterator_data_type
0179         {
0180             typedef typename Functor::semantic_actions_type semantic_actions_type;
0181 
0182             iterator_data_type(
0183                     boost::lexer::basic_state_machine<char_type> const& sm
0184                   , boost::lexer::basic_rules<char_type> const& rules
0185                   , semantic_actions_type const& actions)
0186               : state_machine_(sm), rules_(rules), actions_(actions)
0187             {}
0188 
0189             boost::lexer::basic_state_machine<char_type> const& state_machine_;
0190             boost::lexer::basic_rules<char_type> const& rules_;
0191             semantic_actions_type const& actions_;
0192         };
0193 #ifdef _MSC_VER
0194 #  pragma warning(pop)
0195 #endif
0196 
0197     public:
0198         //  Return the start iterator usable for iterating over the generated
0199         //  tokens.
0200         iterator_type begin(Iterator& first, Iterator const& last
0201           , char_type const* initial_state = 0) const
0202         {
0203             if (!init_dfa())    // never minimize DFA for dynamic lexers
0204                 return iterator_type();
0205 
0206             iterator_data_type iterator_data(state_machine_, rules_, actions_);
0207             return iterator_type(iterator_data, first, last, initial_state);
0208         }
0209 
0210         //  Return the end iterator usable to stop iterating over the generated
0211         //  tokens.
0212         iterator_type end() const
0213         {
0214             return iterator_type();
0215         }
0216 
0217     protected:
0218         //  Lexer instances can be created by means of a derived class only.
0219         lexer(unsigned int flags)
0220           : flags_(detail::map_flags(flags))
0221           , rules_(flags_)
0222           , initialized_dfa_(false)
0223         {}
0224 
0225     public:
0226         // interface for token definition management
0227         std::size_t add_token(char_type const* state, char_type tokendef,
0228             std::size_t token_id, char_type const* targetstate)
0229         {
0230             add_state(state);
0231             initialized_dfa_ = false;
0232             if (state == all_states())
0233                 return rules_.add(state, detail::escape(tokendef), token_id, rules_.dot());
0234 
0235             if (0 == targetstate)
0236                 targetstate = state;
0237             else
0238                 add_state(targetstate);
0239             return rules_.add(state, detail::escape(tokendef), token_id, targetstate);
0240         }
0241         std::size_t add_token(char_type const* state, string_type const& tokendef,
0242             std::size_t token_id, char_type const* targetstate)
0243         {
0244             add_state(state);
0245             initialized_dfa_ = false;
0246             if (state == all_states())
0247                 return rules_.add(state, tokendef, token_id, rules_.dot());
0248 
0249             if (0 == targetstate)
0250                 targetstate = state;
0251             else
0252                 add_state(targetstate);
0253             return rules_.add(state, tokendef, token_id, targetstate);
0254         }
0255 
0256         // interface for pattern definition management
0257         void add_pattern (char_type const* state, string_type const& name,
0258             string_type const& patterndef)
0259         {
0260             add_state(state);
0261             rules_.add_macro(name.c_str(), patterndef);
0262             initialized_dfa_ = false;
0263         }
0264 
0265         boost::lexer::rules const& get_rules() const { return rules_; }
0266 
0267         void clear(char_type const* state)
0268         {
0269             std::size_t s = rules_.state(state);
0270             if (boost::lexer::npos != s)
0271                 rules_.clear(state);
0272             initialized_dfa_ = false;
0273         }
0274         std::size_t add_state(char_type const* state)
0275         {
0276             if (state == all_states())
0277                 return all_states_id;
0278 
0279             std::size_t stateid = rules_.state(state);
0280             if (boost::lexer::npos == stateid) {
0281                 stateid = rules_.add_state(state);
0282                 initialized_dfa_ = false;
0283             }
0284             return stateid;
0285         }
0286         string_type initial_state() const
0287         {
0288             return string_type(rules_.initial());
0289         }
0290         string_type all_states() const
0291         {
0292             return string_type(rules_.all_states());
0293         }
0294 
0295         //  Register a semantic action with the given id
0296         template <typename F>
0297         void add_action(std::size_t unique_id, std::size_t state, F act)
0298         {
0299             // If you see an error here stating add_action is not a member of
0300             // fusion::unused_type then you are probably having semantic actions
0301             // attached to at least one token in the lexer definition without
0302             // using the lex::lexertl::actor_lexer<> as its base class.
0303             typedef typename Functor::wrap_action_type wrapper_type;
0304             if (state == all_states_id) {
0305                 // add the action to all known states
0306                 typedef typename
0307                     basic_rules_type::string_size_t_map::const_iterator
0308                 state_iterator;
0309 
0310                 std::size_t states = rules_.statemap().size();
0311                 for (state_iterator it = rules_.statemap().begin(),
0312                                     end = rules_.statemap().end(); it != end; ++it) {
0313                     for (std::size_t j = 0; j < states; ++j)
0314                         actions_.add_action(unique_id + j, it->second, wrapper_type::call(act));
0315                 }
0316             }
0317             else {
0318                 actions_.add_action(unique_id, state, wrapper_type::call(act));
0319             }
0320         }
0321 //         template <typename F>
0322 //         void add_action(std::size_t unique_id, char_type const* state, F act)
0323 //         {
0324 //             typedef typename Functor::wrap_action_type wrapper_type;
0325 //             actions_.add_action(unique_id, add_state(state), wrapper_type::call(act));
0326 //         }
0327 
0328         // We do not minimize the state machine by default anymore because
0329         // Ben said: "If you can afford to generate a lexer at runtime, there
0330         //            is little point in calling minimise."
0331         // Go figure.
0332         bool init_dfa(bool minimize = false) const
0333         {
0334             if (!initialized_dfa_) {
0335                 state_machine_.clear();
0336                 typedef boost::lexer::basic_generator<char_type> generator;
0337                 generator::build (rules_, state_machine_);
0338                 if (minimize)
0339                     generator::minimise (state_machine_);
0340 
0341 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
0342                 boost::lexer::debug::dump(state_machine_, std::cerr);
0343 #endif
0344                 initialized_dfa_ = true;
0345 
0346 //                 // release memory held by rules description
0347 //                 basic_rules_type rules;
0348 //                 rules.init_state_info(rules_);        // preserve states
0349 //                 std::swap(rules, rules_);
0350             }
0351             return true;
0352         }
0353 
0354     private:
0355         // lexertl specific data
0356         mutable boost::lexer::basic_state_machine<char_type> state_machine_;
0357         boost::lexer::regex_flags flags_;
0358         /*mutable*/ basic_rules_type rules_;
0359 
0360         typename Functor::semantic_actions_type actions_;
0361         mutable bool initialized_dfa_;
0362 
0363         // generator functions must be able to access members directly
0364         template <typename Lexer, typename F>
0365         friend bool generate_static(Lexer const&
0366           , std::basic_ostream<typename Lexer::char_type>&
0367           , typename Lexer::char_type const*, F);
0368     };
0369 
0370     ///////////////////////////////////////////////////////////////////////////
0371     //
0372     //  The actor_lexer class is another implementation of a Spirit.Lex
0373     //  lexer on top of Ben Hanson's lexertl library as outlined above (For
0374     //  more information about lexertl go here:
0375     //  http://www.benhanson.net/lexertl.html).
0376     //
0377     //  The only difference to the lexer class above is that token_def
0378     //  definitions may have semantic (lexer) actions attached while being
0379     //  defined:
0380     //
0381     //      int w;
0382     //      token_def word = "[^ \t\n]+";
0383     //      self = word[++ref(w)];        // see example: word_count_lexer
0384     //
0385     //  This class is supposed to be used as the first and only template
0386     //  parameter while instantiating instances of a lex::lexer class.
0387     //
0388     ///////////////////////////////////////////////////////////////////////////
0389     template <typename Token = token<>
0390       , typename Iterator = typename Token::iterator_type
0391       , typename Functor = functor<Token, lexertl::detail::data, Iterator, mpl::true_> >
0392     class actor_lexer : public lexer<Token, Iterator, Functor>
0393     {
0394     protected:
0395         //  Lexer instances can be created by means of a derived class only.
0396         actor_lexer(unsigned int flags)
0397           : lexer<Token, Iterator, Functor>(flags) {}
0398     };
0399 
0400 }}}}
0401 
0402 #endif