File indexing completed on 2025-01-19 09:47:49
0001
0002
0003
0004
0005
0006 #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_PARSER_PARSER_HPP
0007 #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_PARSER_PARSER_HPP
0008
0009 #include <boost/assert.hpp>
0010 #include "tree/end_node.hpp"
0011 #include "tree/iteration_node.hpp"
0012 #include "tree/leaf_node.hpp"
0013 #include "../runtime_error.hpp"
0014 #include "tree/selection_node.hpp"
0015 #include "tree/sequence_node.hpp"
0016 #include "../size_t.hpp"
0017 #include "tokeniser/re_tokeniser.hpp"
0018 #include <sstream>
0019
0020 namespace boost
0021 {
0022 namespace lexer
0023 {
0024 namespace detail
0025 {
0026 template<typename CharT>
0027 class basic_parser
0028 {
0029 public:
0030 typedef basic_re_tokeniser<CharT> tokeniser;
0031 typedef typename tokeniser::string string;
0032 typedef std::map<string, const node *> macro_map;
0033 typedef node::node_ptr_vector node_ptr_vector;
0034 typedef typename tokeniser::num_token token;
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054 static node *parse (const CharT *start_, const CharT * const end_,
0055 const std::size_t id_, const std::size_t unique_id_,
0056 const std::size_t dfa_state_, const regex_flags flags_,
0057 const std::locale &locale_, node_ptr_vector &node_ptr_vector_,
0058 const macro_map ¯omap_, typename tokeniser::token_map &map_,
0059 bool &seen_BOL_assertion_, bool &seen_EOL_assertion_)
0060 {
0061 node *root_ = 0;
0062 state state_ (start_, end_, flags_, locale_);
0063 token lhs_token_;
0064 token rhs_token_;
0065 token_stack token_stack_;
0066 tree_node_stack tree_node_stack_;
0067 char action_ = 0;
0068
0069 token_stack_.push (rhs_token_);
0070 tokeniser::next (state_, map_, rhs_token_);
0071
0072 do
0073 {
0074 lhs_token_ = token_stack_.top ();
0075 action_ = lhs_token_.precedence (rhs_token_._type);
0076
0077 switch (action_)
0078 {
0079 case '<':
0080 case '=':
0081 token_stack_.push (rhs_token_);
0082 tokeniser::next (state_, map_, rhs_token_);
0083 break;
0084 case '>':
0085 reduce (token_stack_, macromap_, node_ptr_vector_,
0086 tree_node_stack_);
0087 break;
0088 default:
0089 std::ostringstream ss_;
0090
0091 ss_ << "A syntax error occurred: '" <<
0092 lhs_token_.precedence_string () <<
0093 "' against '" << rhs_token_.precedence_string () <<
0094 "' at index " << state_.index () << ".";
0095 throw runtime_error (ss_.str ().c_str ());
0096 break;
0097 }
0098 } while (!token_stack_.empty ());
0099
0100 if (tree_node_stack_.empty ())
0101 {
0102 throw runtime_error ("Empty rules are not allowed.");
0103 }
0104
0105 BOOST_ASSERT(tree_node_stack_.size () == 1);
0106
0107 node *lhs_node_ = tree_node_stack_.top ();
0108
0109 tree_node_stack_.pop ();
0110
0111 if (id_ == 0)
0112 {
0113
0114 root_ = lhs_node_;
0115 }
0116 else
0117 {
0118 node_ptr_vector_->push_back (static_cast<end_node *>(0));
0119
0120 node *rhs_node_ = new end_node (id_, unique_id_, dfa_state_);
0121
0122 node_ptr_vector_->back () = rhs_node_;
0123 node_ptr_vector_->push_back (static_cast<sequence_node *>(0));
0124 node_ptr_vector_->back () = new sequence_node
0125 (lhs_node_, rhs_node_);
0126 root_ = node_ptr_vector_->back ();
0127 }
0128
0129
0130
0131 if (state_._seen_BOL_assertion) seen_BOL_assertion_ = true;
0132
0133 if (state_._seen_EOL_assertion) seen_EOL_assertion_ = true;
0134
0135 return root_;
0136 }
0137
0138 private:
0139 typedef typename tokeniser::state state;
0140 typedef std::stack<token> token_stack;
0141 typedef node::node_stack tree_node_stack;
0142
0143 static void reduce (token_stack &token_stack_,
0144 const macro_map ¯omap_, node_ptr_vector &node_vector_ptr_,
0145 tree_node_stack &tree_node_stack_)
0146 {
0147 typename tokeniser::num_token lhs_;
0148 typename tokeniser::num_token rhs_;
0149 token_stack handle_;
0150 char action_ = 0;
0151
0152 do
0153 {
0154 rhs_ = token_stack_.top ();
0155 token_stack_.pop ();
0156 handle_.push (rhs_);
0157
0158 if (!token_stack_.empty ())
0159 {
0160 lhs_ = token_stack_.top ();
0161 action_ = lhs_.precedence (rhs_._type);
0162 }
0163 } while (!token_stack_.empty () && action_ == '=');
0164
0165 BOOST_ASSERT(token_stack_.empty () || action_ == '<');
0166
0167 switch (rhs_._type)
0168 {
0169 case token::BEGIN:
0170
0171 break;
0172 case token::REGEX:
0173
0174 break;
0175 case token::OREXP:
0176 orexp (handle_, token_stack_, node_vector_ptr_, tree_node_stack_);
0177 break;
0178 case token::SEQUENCE:
0179 token_stack_.push (token::OREXP);
0180 break;
0181 case token::SUB:
0182 sub (handle_, token_stack_, node_vector_ptr_, tree_node_stack_);
0183 break;
0184 case token::EXPRESSION:
0185 token_stack_.push (token::SUB);
0186 break;
0187 case token::REPEAT:
0188 repeat (handle_, token_stack_);
0189 break;
0190 case token::CHARSET:
0191 charset (handle_, token_stack_, node_vector_ptr_,
0192 tree_node_stack_);
0193 break;
0194 case token::MACRO:
0195 macro (handle_, token_stack_, macromap_, node_vector_ptr_,
0196 tree_node_stack_);
0197 break;
0198 case token::OPENPAREN:
0199 openparen (handle_, token_stack_);
0200 break;
0201 case token::OPT:
0202 case token::AOPT:
0203 optional (rhs_._type == token::OPT, node_vector_ptr_,
0204 tree_node_stack_);
0205 token_stack_.push (token::DUP);
0206 break;
0207 case token::ZEROORMORE:
0208 case token::AZEROORMORE:
0209 zero_or_more (rhs_._type == token::ZEROORMORE, node_vector_ptr_,
0210 tree_node_stack_);
0211 token_stack_.push (token::DUP);
0212 break;
0213 case token::ONEORMORE:
0214 case token::AONEORMORE:
0215 one_or_more (rhs_._type == token::ONEORMORE, node_vector_ptr_,
0216 tree_node_stack_);
0217 token_stack_.push (token::DUP);
0218 break;
0219 case token::REPEATN:
0220 case token::AREPEATN:
0221 repeatn (rhs_._type == token::REPEATN, handle_.top (),
0222 node_vector_ptr_, tree_node_stack_);
0223 token_stack_.push (token::DUP);
0224 break;
0225 default:
0226 throw runtime_error
0227 ("Internal error regex_parser::reduce");
0228 break;
0229 }
0230 }
0231
0232 static void orexp (token_stack &handle_, token_stack &token_stack_,
0233 node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
0234 {
0235 BOOST_ASSERT(handle_.top ()._type == token::OREXP &&
0236 (handle_.size () == 1 || handle_.size () == 3));
0237
0238 if (handle_.size () == 1)
0239 {
0240 token_stack_.push (token::REGEX);
0241 }
0242 else
0243 {
0244 handle_.pop ();
0245 BOOST_ASSERT(handle_.top ()._type == token::OR);
0246 handle_.pop ();
0247 BOOST_ASSERT(handle_.top ()._type == token::SEQUENCE);
0248 perform_or (node_ptr_vector_, tree_node_stack_);
0249 token_stack_.push (token::OREXP);
0250 }
0251 }
0252
0253 static void sub (token_stack &handle_, token_stack &token_stack_,
0254 node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
0255 {
0256 BOOST_ASSERT(handle_.top ()._type == token::SUB &&
0257 (handle_.size () == 1 || handle_.size () == 2));
0258
0259 if (handle_.size () == 1)
0260 {
0261 token_stack_.push (token::SEQUENCE);
0262 }
0263 else
0264 {
0265 handle_.pop ();
0266 BOOST_ASSERT(handle_.top ()._type == token::EXPRESSION);
0267
0268 sequence (node_ptr_vector_, tree_node_stack_);
0269 token_stack_.push (token::SUB);
0270 }
0271 }
0272
0273 static void repeat (token_stack &handle_, token_stack &token_stack_)
0274 {
0275 BOOST_ASSERT(handle_.top ()._type == token::REPEAT &&
0276 handle_.size () >= 1 && handle_.size () <= 3);
0277
0278 if (handle_.size () == 1)
0279 {
0280 token_stack_.push (token::EXPRESSION);
0281 }
0282 else
0283 {
0284 handle_.pop ();
0285 BOOST_ASSERT(handle_.top ()._type == token::DUP);
0286 token_stack_.push (token::REPEAT);
0287 }
0288 }
0289
0290 static void charset (token_stack &handle_, token_stack &token_stack_,
0291 node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
0292 {
0293 BOOST_ASSERT(handle_.top ()._type == token::CHARSET &&
0294 handle_.size () == 1);
0295
0296 node_ptr_vector_->push_back (static_cast<leaf_node *>(0));
0297
0298 const size_t id_ = handle_.top ()._id;
0299
0300 node_ptr_vector_->back () = new leaf_node (id_, true);
0301 tree_node_stack_.push (node_ptr_vector_->back ());
0302 token_stack_.push (token::REPEAT);
0303 }
0304
0305 static void macro (token_stack &handle_, token_stack &token_stack_,
0306 const macro_map ¯omap_, node_ptr_vector &node_ptr_vector_,
0307 tree_node_stack &tree_node_stack_)
0308 {
0309 token &top_ = handle_.top ();
0310
0311 BOOST_ASSERT(top_._type == token::MACRO && handle_.size () == 1);
0312
0313 typename macro_map::const_iterator iter_ =
0314 macromap_.find (top_._macro);
0315
0316 if (iter_ == macromap_.end ())
0317 {
0318 const CharT *name_ = top_._macro;
0319 std::basic_stringstream<CharT> ss_;
0320 std::ostringstream os_;
0321
0322 os_ << "Unknown MACRO name '";
0323
0324 while (*name_)
0325 {
0326 os_ << ss_.narrow (*name_++, ' ');
0327 }
0328
0329 os_ << "'.";
0330 throw runtime_error (os_.str ());
0331 }
0332
0333 tree_node_stack_.push (iter_->second->copy (node_ptr_vector_));
0334 token_stack_.push (token::REPEAT);
0335 }
0336
0337 static void openparen (token_stack &handle_, token_stack &token_stack_)
0338 {
0339 BOOST_ASSERT(handle_.top ()._type == token::OPENPAREN &&
0340 handle_.size () == 3);
0341 handle_.pop ();
0342 BOOST_ASSERT(handle_.top ()._type == token::REGEX);
0343 handle_.pop ();
0344 BOOST_ASSERT(handle_.top ()._type == token::CLOSEPAREN);
0345 token_stack_.push (token::REPEAT);
0346 }
0347
0348 static void perform_or (node_ptr_vector &node_ptr_vector_,
0349 tree_node_stack &tree_node_stack_)
0350 {
0351
0352 node *rhs_ = tree_node_stack_.top ();
0353
0354 tree_node_stack_.pop ();
0355
0356 node *lhs_ = tree_node_stack_.top ();
0357
0358 node_ptr_vector_->push_back (static_cast<selection_node *>(0));
0359 node_ptr_vector_->back () = new selection_node (lhs_, rhs_);
0360 tree_node_stack_.top () = node_ptr_vector_->back ();
0361 }
0362
0363 static void sequence (node_ptr_vector &node_ptr_vector_,
0364 tree_node_stack &tree_node_stack_)
0365 {
0366 node *rhs_ = tree_node_stack_.top ();
0367
0368 tree_node_stack_.pop ();
0369
0370 node *lhs_ = tree_node_stack_.top ();
0371
0372 node_ptr_vector_->push_back (static_cast<sequence_node *>(0));
0373 node_ptr_vector_->back () = new sequence_node (lhs_, rhs_);
0374 tree_node_stack_.top () = node_ptr_vector_->back ();
0375 }
0376
0377 static void optional (const bool greedy_,
0378 node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
0379 {
0380
0381 node *lhs_ = tree_node_stack_.top ();
0382
0383 node::node_vector &firstpos_ = lhs_->firstpos ();
0384
0385 for (node::node_vector::iterator iter_ = firstpos_.begin (),
0386 end_ = firstpos_.end (); iter_ != end_; ++iter_)
0387 {
0388
0389 (*iter_)->greedy (greedy_);
0390 }
0391
0392 node_ptr_vector_->push_back (static_cast<leaf_node *>(0));
0393
0394 node *rhs_ = new leaf_node (null_token, greedy_);
0395
0396 node_ptr_vector_->back () = rhs_;
0397 node_ptr_vector_->push_back (static_cast<selection_node *>(0));
0398 node_ptr_vector_->back () = new selection_node (lhs_, rhs_);
0399 tree_node_stack_.top () = node_ptr_vector_->back ();
0400 }
0401
0402 static void zero_or_more (const bool greedy_,
0403 node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
0404 {
0405
0406 node *ptr_ = tree_node_stack_.top ();
0407
0408 node_ptr_vector_->push_back (static_cast<iteration_node *>(0));
0409 node_ptr_vector_->back () = new iteration_node (ptr_, greedy_);
0410 tree_node_stack_.top () = node_ptr_vector_->back ();
0411 }
0412
0413 static void one_or_more (const bool greedy_,
0414 node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
0415 {
0416
0417 node *lhs_ = tree_node_stack_.top ();
0418 node *copy_ = lhs_->copy (node_ptr_vector_);
0419
0420 node_ptr_vector_->push_back (static_cast<iteration_node *>(0));
0421
0422 node *rhs_ = new iteration_node (copy_, greedy_);
0423
0424 node_ptr_vector_->back () = rhs_;
0425 node_ptr_vector_->push_back (static_cast<sequence_node *>(0));
0426 node_ptr_vector_->back () = new sequence_node (lhs_, rhs_);
0427 tree_node_stack_.top () = node_ptr_vector_->back ();
0428 }
0429
0430
0431 static void repeatn (const bool greedy_, const token &token_,
0432 node_ptr_vector &node_ptr_vector_, tree_node_stack &tree_node_stack_)
0433 {
0434
0435
0436
0437
0438
0439
0440 if (!(token_._min == 1 && !token_._comma))
0441 {
0442 const std::size_t top_ = token_._min > 0 ?
0443 token_._min : token_._max;
0444
0445 if (token_._min == 0)
0446 {
0447 optional (greedy_, node_ptr_vector_, tree_node_stack_);
0448 }
0449
0450 node *prev_ = tree_node_stack_.top ()->copy (node_ptr_vector_);
0451 node *curr_ = 0;
0452
0453 for (std::size_t i_ = 2; i_ < top_; ++i_)
0454 {
0455 curr_ = prev_->copy (node_ptr_vector_);
0456 tree_node_stack_.push (static_cast<node *>(0));
0457 tree_node_stack_.top () = prev_;
0458 sequence (node_ptr_vector_, tree_node_stack_);
0459 prev_ = curr_;
0460 }
0461
0462 if (token_._comma && token_._min > 0)
0463 {
0464 if (token_._min > 1)
0465 {
0466 curr_ = prev_->copy (node_ptr_vector_);
0467 tree_node_stack_.push (static_cast<node *>(0));
0468 tree_node_stack_.top () = prev_;
0469 sequence (node_ptr_vector_, tree_node_stack_);
0470 prev_ = curr_;
0471 }
0472
0473 if (token_._comma && token_._max)
0474 {
0475 tree_node_stack_.push (static_cast<node *>(0));
0476 tree_node_stack_.top () = prev_;
0477 optional (greedy_, node_ptr_vector_, tree_node_stack_);
0478 prev_ = tree_node_stack_.top ();
0479 tree_node_stack_.pop ();
0480
0481 const std::size_t count_ = token_._max - token_._min;
0482
0483 for (std::size_t i_ = 1; i_ < count_; ++i_)
0484 {
0485 curr_ = prev_->copy (node_ptr_vector_);
0486 tree_node_stack_.push (static_cast<node *>(0));
0487 tree_node_stack_.top () = prev_;
0488 sequence (node_ptr_vector_, tree_node_stack_);
0489 prev_ = curr_;
0490 }
0491 }
0492 else
0493 {
0494 tree_node_stack_.push (static_cast<node *>(0));
0495 tree_node_stack_.top () = prev_;
0496 zero_or_more (greedy_, node_ptr_vector_, tree_node_stack_);
0497 prev_ = tree_node_stack_.top ();
0498 tree_node_stack_.pop ();
0499 }
0500 }
0501
0502 tree_node_stack_.push (static_cast<node *>(0));
0503 tree_node_stack_.top () = prev_;
0504 sequence (node_ptr_vector_, tree_node_stack_);
0505 }
0506 }
0507 };
0508 }
0509 }
0510 }
0511
0512 #endif