File indexing completed on 2025-01-31 10:02:16
0001
0002
0003
0004
0005
0006 #if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_17_2007_0139PM)
0007 #define BOOST_SPIRIT_LEX_LEXER_MAR_17_2007_0139PM
0008
0009 #if defined(_MSC_VER)
0010 #pragma once
0011 #endif
0012
0013 #include <iosfwd>
0014
0015 #include <boost/spirit/home/support/detail/lexer/generator.hpp>
0016 #include <boost/spirit/home/support/detail/lexer/rules.hpp>
0017 #include <boost/spirit/home/support/detail/lexer/consts.hpp>
0018 #include <boost/spirit/home/support/unused.hpp>
0019
0020 #include <boost/spirit/home/lex/lexer/lexertl/token.hpp>
0021 #include <boost/spirit/home/lex/lexer/lexertl/functor.hpp>
0022 #include <boost/spirit/home/lex/lexer/lexertl/functor_data.hpp>
0023 #include <boost/spirit/home/lex/lexer/lexertl/iterator.hpp>
0024 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
0025 #include <boost/spirit/home/support/detail/lexer/debug.hpp>
0026 #endif
0027
0028 #include <iterator> // for std::iterator_traits
0029
0030 namespace boost { namespace spirit { namespace lex { namespace lexertl
0031 {
0032
0033 namespace detail
0034 {
0035
0036
0037
0038
0039
0040 template <typename Char>
0041 inline bool must_escape(Char c)
0042 {
0043
0044 switch (c) {
0045 case '+': case '/': case '*': case '?':
0046 case '|':
0047 case '(': case ')':
0048 case '[': case ']':
0049 case '{': case '}':
0050 case '.':
0051 case '^': case '$':
0052 case '\\':
0053 case '"':
0054 return true;
0055
0056 default:
0057 break;
0058 }
0059 return false;
0060 }
0061
0062
0063
0064
0065
0066
0067 template <typename Char>
0068 inline std::basic_string<Char> escape(Char ch)
0069 {
0070 std::basic_string<Char> result(1, ch);
0071 if (detail::must_escape(ch))
0072 {
0073 typedef typename std::basic_string<Char>::size_type size_type;
0074 result.insert((size_type)0, 1, '\\');
0075 }
0076 return result;
0077 }
0078
0079
0080
0081
0082 inline boost::lexer::regex_flags map_flags(unsigned int flags)
0083 {
0084 unsigned int retval = boost::lexer::none;
0085 if (flags & match_flags::match_not_dot_newline)
0086 retval |= boost::lexer::dot_not_newline;
0087 if (flags & match_flags::match_icase)
0088 retval |= boost::lexer::icase;
0089
0090 return boost::lexer::regex_flags(retval);
0091 }
0092 }
0093
0094
0095 template <typename Lexer, typename F>
0096 bool generate_static(Lexer const&
0097 , std::basic_ostream<typename Lexer::char_type>&
0098 , typename Lexer::char_type const*, F);
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144
0145
0146 template <typename Token = token<>
0147 , typename Iterator = typename Token::iterator_type
0148 , typename Functor = functor<Token, lexertl::detail::data, Iterator> >
0149 class lexer
0150 {
0151 private:
0152 struct dummy { void true_() {} };
0153 typedef void (dummy::*safe_bool)();
0154
0155 static std::size_t const all_states_id = static_cast<std::size_t>(-2);
0156
0157 public:
0158 operator safe_bool() const
0159 { return initialized_dfa_ ? &dummy::true_ : 0; }
0160
0161 typedef typename std::iterator_traits<Iterator>::value_type char_type;
0162 typedef std::basic_string<char_type> string_type;
0163
0164 typedef boost::lexer::basic_rules<char_type> basic_rules_type;
0165
0166
0167
0168 typedef Token token_type;
0169 typedef typename Token::id_type id_type;
0170 typedef iterator<Functor> iterator_type;
0171
0172 private:
0173 #ifdef _MSC_VER
0174 # pragma warning(push)
0175 # pragma warning(disable: 4512)
0176 #endif
0177
0178 struct iterator_data_type
0179 {
0180 typedef typename Functor::semantic_actions_type semantic_actions_type;
0181
0182 iterator_data_type(
0183 boost::lexer::basic_state_machine<char_type> const& sm
0184 , boost::lexer::basic_rules<char_type> const& rules
0185 , semantic_actions_type const& actions)
0186 : state_machine_(sm), rules_(rules), actions_(actions)
0187 {}
0188
0189 boost::lexer::basic_state_machine<char_type> const& state_machine_;
0190 boost::lexer::basic_rules<char_type> const& rules_;
0191 semantic_actions_type const& actions_;
0192 };
0193 #ifdef _MSC_VER
0194 # pragma warning(pop)
0195 #endif
0196
0197 public:
0198
0199
0200 iterator_type begin(Iterator& first, Iterator const& last
0201 , char_type const* initial_state = 0) const
0202 {
0203 if (!init_dfa())
0204 return iterator_type();
0205
0206 iterator_data_type iterator_data(state_machine_, rules_, actions_);
0207 return iterator_type(iterator_data, first, last, initial_state);
0208 }
0209
0210
0211
0212 iterator_type end() const
0213 {
0214 return iterator_type();
0215 }
0216
0217 protected:
0218
0219 lexer(unsigned int flags)
0220 : flags_(detail::map_flags(flags))
0221 , rules_(flags_)
0222 , initialized_dfa_(false)
0223 {}
0224
0225 public:
0226
0227 std::size_t add_token(char_type const* state, char_type tokendef,
0228 std::size_t token_id, char_type const* targetstate)
0229 {
0230 add_state(state);
0231 initialized_dfa_ = false;
0232 if (state == all_states())
0233 return rules_.add(state, detail::escape(tokendef), token_id, rules_.dot());
0234
0235 if (0 == targetstate)
0236 targetstate = state;
0237 else
0238 add_state(targetstate);
0239 return rules_.add(state, detail::escape(tokendef), token_id, targetstate);
0240 }
0241 std::size_t add_token(char_type const* state, string_type const& tokendef,
0242 std::size_t token_id, char_type const* targetstate)
0243 {
0244 add_state(state);
0245 initialized_dfa_ = false;
0246 if (state == all_states())
0247 return rules_.add(state, tokendef, token_id, rules_.dot());
0248
0249 if (0 == targetstate)
0250 targetstate = state;
0251 else
0252 add_state(targetstate);
0253 return rules_.add(state, tokendef, token_id, targetstate);
0254 }
0255
0256
0257 void add_pattern (char_type const* state, string_type const& name,
0258 string_type const& patterndef)
0259 {
0260 add_state(state);
0261 rules_.add_macro(name.c_str(), patterndef);
0262 initialized_dfa_ = false;
0263 }
0264
0265 boost::lexer::rules const& get_rules() const { return rules_; }
0266
0267 void clear(char_type const* state)
0268 {
0269 std::size_t s = rules_.state(state);
0270 if (boost::lexer::npos != s)
0271 rules_.clear(state);
0272 initialized_dfa_ = false;
0273 }
0274 std::size_t add_state(char_type const* state)
0275 {
0276 if (state == all_states())
0277 return all_states_id;
0278
0279 std::size_t stateid = rules_.state(state);
0280 if (boost::lexer::npos == stateid) {
0281 stateid = rules_.add_state(state);
0282 initialized_dfa_ = false;
0283 }
0284 return stateid;
0285 }
0286 string_type initial_state() const
0287 {
0288 return string_type(rules_.initial());
0289 }
0290 string_type all_states() const
0291 {
0292 return string_type(rules_.all_states());
0293 }
0294
0295
0296 template <typename F>
0297 void add_action(std::size_t unique_id, std::size_t state, F act)
0298 {
0299
0300
0301
0302
0303 typedef typename Functor::wrap_action_type wrapper_type;
0304 if (state == all_states_id) {
0305
0306 typedef typename
0307 basic_rules_type::string_size_t_map::const_iterator
0308 state_iterator;
0309
0310 std::size_t states = rules_.statemap().size();
0311 for (state_iterator it = rules_.statemap().begin(),
0312 end = rules_.statemap().end(); it != end; ++it) {
0313 for (std::size_t j = 0; j < states; ++j)
0314 actions_.add_action(unique_id + j, it->second, wrapper_type::call(act));
0315 }
0316 }
0317 else {
0318 actions_.add_action(unique_id, state, wrapper_type::call(act));
0319 }
0320 }
0321
0322
0323
0324
0325
0326
0327
0328
0329
0330
0331
0332 bool init_dfa(bool minimize = false) const
0333 {
0334 if (!initialized_dfa_) {
0335 state_machine_.clear();
0336 typedef boost::lexer::basic_generator<char_type> generator;
0337 generator::build (rules_, state_machine_);
0338 if (minimize)
0339 generator::minimise (state_machine_);
0340
0341 #if defined(BOOST_SPIRIT_LEXERTL_DEBUG)
0342 boost::lexer::debug::dump(state_machine_, std::cerr);
0343 #endif
0344 initialized_dfa_ = true;
0345
0346
0347
0348
0349
0350 }
0351 return true;
0352 }
0353
0354 private:
0355
0356 mutable boost::lexer::basic_state_machine<char_type> state_machine_;
0357 boost::lexer::regex_flags flags_;
0358 basic_rules_type rules_;
0359
0360 typename Functor::semantic_actions_type actions_;
0361 mutable bool initialized_dfa_;
0362
0363
0364 template <typename Lexer, typename F>
0365 friend bool generate_static(Lexer const&
0366 , std::basic_ostream<typename Lexer::char_type>&
0367 , typename Lexer::char_type const*, F);
0368 };
0369
0370
0371
0372
0373
0374
0375
0376
0377
0378
0379
0380
0381
0382
0383
0384
0385
0386
0387
0388
0389 template <typename Token = token<>
0390 , typename Iterator = typename Token::iterator_type
0391 , typename Functor = functor<Token, lexertl::detail::data, Iterator, mpl::true_> >
0392 class actor_lexer : public lexer<Token, Iterator, Functor>
0393 {
0394 protected:
0395
0396 actor_lexer(unsigned int flags)
0397 : lexer<Token, Iterator, Functor>(flags) {}
0398 };
0399
0400 }}}}
0401
0402 #endif