File indexing completed on 2025-01-19 09:47:50
0001
0002
0003
0004
0005
0006 #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_RULES_HPP
0007 #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_RULES_HPP
0008
0009 #include "consts.hpp"
0010 #include <deque>
0011 #include <locale>
0012 #include <map>
0013 #include "runtime_error.hpp"
0014 #include <set>
0015 #include "size_t.hpp"
0016 #include <sstream>
0017 #include <string>
0018 #include <vector>
0019
0020 namespace boost
0021 {
0022 namespace lexer
0023 {
0024 namespace detail
0025 {
0026
0027 template <typename CharT>
0028 struct strings;
0029
0030 template <>
0031 struct strings<char>
0032 {
0033 static const char *initial ()
0034 {
0035 return "INITIAL";
0036 }
0037
0038 static const char *dot ()
0039 {
0040 return ".";
0041 }
0042
0043 static const char *all_states ()
0044 {
0045 return "*";
0046 }
0047
0048 static const char *char_name ()
0049 {
0050 return "char";
0051 }
0052
0053 static const char *char_prefix ()
0054 {
0055 return "";
0056 }
0057 };
0058
0059 template <>
0060 struct strings<wchar_t>
0061 {
0062 static const wchar_t *initial ()
0063 {
0064 return L"INITIAL";
0065 }
0066
0067 static const wchar_t *dot ()
0068 {
0069 return L".";
0070 }
0071
0072 static const wchar_t *all_states ()
0073 {
0074 return L"*";
0075 }
0076
0077 static const char *char_name ()
0078 {
0079 return "wchar_t";
0080 }
0081
0082 static const char *char_prefix ()
0083 {
0084 return "L";
0085 }
0086 };
0087 }
0088
0089 template<typename CharT>
0090 class basic_rules
0091 {
0092 public:
0093 typedef std::vector<std::size_t> id_vector;
0094 typedef std::deque<id_vector> id_vector_deque;
0095 typedef std::basic_string<CharT> string;
0096 typedef std::deque<string> string_deque;
0097 typedef std::deque<string_deque> string_deque_deque;
0098 typedef std::set<string> string_set;
0099 typedef std::pair<string, string> string_pair;
0100 typedef std::deque<string_pair> string_pair_deque;
0101 typedef std::map<string, std::size_t> string_size_t_map;
0102 typedef std::pair<string, std::size_t> string_size_t_pair;
0103
0104 basic_rules (const regex_flags flags_ = dot_not_newline,
0105 std::size_t (*counter_ptr_) () = 0) :
0106 _flags (flags_),
0107 _counter (0),
0108 _counter_ptr (counter_ptr_)
0109 {
0110 add_state (initial ());
0111 }
0112
0113 void clear ()
0114 {
0115 _statemap.clear ();
0116 _macrodeque.clear ();
0117 _macroset.clear ();
0118 _regexes.clear ();
0119 _ids.clear ();
0120 _unique_ids.clear ();
0121 _states.clear ();
0122 _flags = dot_not_newline;
0123 _locale = std::locale ();
0124 add_state (initial ());
0125 }
0126
0127 void clear (const CharT *state_name_)
0128 {
0129 std::size_t state_ = state (state_name_);
0130
0131 if (state_ != npos)
0132 {
0133 _regexes[state_].clear ();
0134 _ids[state_].clear ();
0135 _unique_ids[state_].clear ();
0136 _states[state_].clear ();
0137 }
0138 }
0139
0140 void flags (const regex_flags flags_)
0141 {
0142 _flags = flags_;
0143 }
0144
0145 regex_flags flags () const
0146 {
0147 return _flags;
0148 }
0149
0150 std::size_t next_unique_id ()
0151 {
0152 return _counter_ptr ? _counter_ptr () : _counter++;
0153 }
0154
0155 std::locale imbue (std::locale &locale_)
0156 {
0157 std::locale loc_ = _locale;
0158
0159 _locale = locale_;
0160 return loc_;
0161 }
0162
0163 const std::locale &locale () const
0164 {
0165 return _locale;
0166 }
0167
0168 std::size_t state (const CharT *name_) const
0169 {
0170 std::size_t state_ = npos;
0171 typename string_size_t_map::const_iterator iter_ =
0172 _statemap.find (name_);
0173
0174 if (iter_ != _statemap.end ())
0175 {
0176 state_ = iter_->second;
0177 }
0178
0179 return state_;
0180 }
0181
0182 const CharT *state (const std::size_t index_) const
0183 {
0184 if (index_ == 0)
0185 {
0186 return initial ();
0187 }
0188 else
0189 {
0190 const std::size_t vec_index_ = index_ - 1;
0191
0192 if (vec_index_ > _lexer_state_names.size () - 1)
0193 {
0194 return 0;
0195 }
0196 else
0197 {
0198 return _lexer_state_names[vec_index_].c_str ();
0199 }
0200 }
0201 }
0202
0203 std::size_t add_state (const CharT *name_)
0204 {
0205 validate (name_);
0206
0207 if (_statemap.insert (string_size_t_pair (name_,
0208 _statemap.size ())).second)
0209 {
0210 _regexes.push_back (string_deque ());
0211 _ids.push_back (id_vector ());
0212 _unique_ids.push_back (id_vector ());
0213 _states.push_back (id_vector ());
0214
0215 if (string (name_) != initial ())
0216 {
0217 _lexer_state_names.push_back (name_);
0218 }
0219 }
0220
0221
0222 return _lexer_state_names.size ();
0223 }
0224
0225 void add_macro (const CharT *name_, const CharT *regex_)
0226 {
0227 add_macro (name_, string (regex_));
0228 }
0229
0230 void add_macro (const CharT *name_, const CharT *regex_start_,
0231 const CharT *regex_end_)
0232 {
0233 add_macro (name_, string (regex_start_, regex_end_));
0234 }
0235
0236 void add_macro (const CharT *name_, const string ®ex_)
0237 {
0238 validate (name_);
0239
0240 typename string_set::const_iterator iter_ = _macroset.find (name_);
0241
0242 if (iter_ == _macroset.end ())
0243 {
0244 _macrodeque.push_back (string_pair (name_, regex_));
0245 _macroset.insert (name_);
0246 }
0247 else
0248 {
0249 std::basic_stringstream<CharT> ss_;
0250 std::ostringstream os_;
0251
0252 os_ << "Attempt to redefine MACRO '";
0253
0254 while (*name_)
0255 {
0256 os_ << ss_.narrow (*name_++, static_cast<CharT> (' '));
0257 }
0258
0259 os_ << "'.";
0260 throw runtime_error (os_.str ());
0261 }
0262 }
0263
0264 void add_macros (const basic_rules<CharT> &rules_)
0265 {
0266 const string_pair_deque ¯os_ = rules_.macrodeque ();
0267 typename string_pair_deque::const_iterator macro_iter_ =
0268 macros_.begin ();
0269 typename string_pair_deque::const_iterator macro_end_ =
0270 macros_.end ();
0271
0272 for (; macro_iter_ != macro_end_; ++macro_iter_)
0273 {
0274 add_macro (macro_iter_->first.c_str (),
0275 macro_iter_->second.c_str ());
0276 }
0277 }
0278
0279 void merge_macros (const basic_rules<CharT> &rules_)
0280 {
0281 const string_pair_deque ¯os_ = rules_.macrodeque ();
0282 typename string_pair_deque::const_iterator macro_iter_ =
0283 macros_.begin ();
0284 typename string_pair_deque::const_iterator macro_end_ =
0285 macros_.end ();
0286 typename string_set::const_iterator macro_dest_iter_;
0287 typename string_set::const_iterator macro_dest_end_ = _macroset.end ();
0288
0289 for (; macro_iter_ != macro_end_; ++macro_iter_)
0290 {
0291 macro_dest_iter_ = _macroset.find (macro_iter_->first);
0292
0293 if (macro_dest_iter_ == macro_dest_end_)
0294 {
0295 add_macro (macro_iter_->first.c_str (),
0296 macro_iter_->second.c_str ());
0297 }
0298 }
0299 }
0300
0301 std::size_t add (const CharT *regex_, const std::size_t id_)
0302 {
0303 return add (string (regex_), id_);
0304 }
0305
0306 std::size_t add (const CharT *regex_start_, const CharT *regex_end_,
0307 const std::size_t id_)
0308 {
0309 return add (string (regex_start_, regex_end_), id_);
0310 }
0311
0312 std::size_t add (const string ®ex_, const std::size_t id_)
0313 {
0314 const std::size_t counter_ = next_unique_id ();
0315
0316 check_for_invalid_id (id_);
0317 _regexes.front ().push_back (regex_);
0318 _ids.front ().push_back (id_);
0319 _unique_ids.front ().push_back (counter_);
0320 _states.front ().push_back (0);
0321 return counter_;
0322 }
0323
0324 std::size_t add (const CharT *curr_state_, const CharT *regex_,
0325 const CharT *new_state_)
0326 {
0327 return add (curr_state_, string (regex_), new_state_);
0328 }
0329
0330 std::size_t add (const CharT *curr_state_, const CharT *regex_start_,
0331 const CharT *regex_end_, const CharT *new_state_)
0332 {
0333 return add (curr_state_, string (regex_start_, regex_end_),
0334 new_state_);
0335 }
0336
0337 std::size_t add (const CharT *curr_state_, const string ®ex_,
0338 const CharT *new_state_)
0339 {
0340 return add (curr_state_, regex_, 0, new_state_, false);
0341 }
0342
0343 std::size_t add (const CharT *curr_state_, const CharT *regex_,
0344 const std::size_t id_, const CharT *new_state_)
0345 {
0346 return add (curr_state_, string (regex_), id_, new_state_);
0347 }
0348
0349 std::size_t add (const CharT *curr_state_, const CharT *regex_start_,
0350 const CharT *regex_end_, const std::size_t id_,
0351 const CharT *new_state_)
0352 {
0353 return add (curr_state_, string (regex_start_, regex_end_), id_,
0354 new_state_);
0355 }
0356
0357 std::size_t add (const CharT *curr_state_, const string ®ex_,
0358 const std::size_t id_, const CharT *new_state_)
0359 {
0360 return add (curr_state_, regex_, id_, new_state_, true);
0361 }
0362
0363 void add (const CharT *source_, const basic_rules<CharT> &rules_,
0364 const CharT *dest_, const CharT *to_ = detail::strings<CharT>::dot ())
0365 {
0366 const bool star_ = *source_ == '*' && *(source_ + 1) == 0;
0367 const bool dest_dot_ = *dest_ == '.' && *(dest_ + 1) == 0;
0368 const bool to_dot_ = *to_ == '.' && *(to_ + 1) == 0;
0369 std::size_t state_ = 0;
0370 const string_deque_deque &all_regexes_ = rules_.regexes ();
0371 const id_vector_deque &all_ids_ = rules_.ids ();
0372 const id_vector_deque &all_unique_ids_ = rules_.unique_ids ();
0373 const id_vector_deque &all_states_ = rules_.states ();
0374 typename string_deque::const_iterator regex_iter_;
0375 typename string_deque::const_iterator regex_end_;
0376 typename id_vector::const_iterator id_iter_;
0377 typename id_vector::const_iterator uid_iter_;
0378 typename id_vector::const_iterator state_iter_;
0379
0380 if (star_)
0381 {
0382 typename string_deque_deque::const_iterator all_regexes_iter_ =
0383 all_regexes_.begin ();
0384 typename string_deque_deque::const_iterator all_regexes_end_ =
0385 all_regexes_.end ();
0386 typename id_vector_deque::const_iterator all_ids_iter_ =
0387 all_ids_.begin ();
0388 typename id_vector_deque::const_iterator all_uids_iter_ =
0389 all_unique_ids_.begin ();
0390 typename id_vector_deque::const_iterator all_states_iter_ =
0391 all_states_.begin ();
0392
0393 for (; all_regexes_iter_ != all_regexes_end_;
0394 ++state_, ++all_regexes_iter_, ++all_ids_iter_,
0395 ++all_uids_iter_, ++all_states_iter_)
0396 {
0397 regex_iter_ = all_regexes_iter_->begin ();
0398 regex_end_ = all_regexes_iter_->end ();
0399 id_iter_ = all_ids_iter_->begin ();
0400 uid_iter_ = all_uids_iter_->begin ();
0401 state_iter_ = all_states_iter_->begin ();
0402
0403 for (; regex_iter_ != regex_end_; ++regex_iter_, ++id_iter_,
0404 ++uid_iter_, ++state_iter_)
0405 {
0406
0407
0408 add (dest_dot_ ? rules_.state (state_) : dest_, *regex_iter_,
0409 *id_iter_, to_dot_ ? rules_.state (*state_iter_) : to_, true,
0410 *uid_iter_);
0411 }
0412 }
0413 }
0414 else
0415 {
0416 const CharT *start_ = source_;
0417 string state_name_;
0418
0419 while (*source_)
0420 {
0421 while (*source_ && *source_ != ',')
0422 {
0423 ++source_;
0424 }
0425
0426 state_name_.assign (start_, source_);
0427
0428 if (*source_)
0429 {
0430 ++source_;
0431 start_ = source_;
0432 }
0433
0434 state_ = rules_.state (state_name_.c_str ());
0435
0436 if (state_ == npos)
0437 {
0438 std::basic_stringstream<CharT> ss_;
0439 std::ostringstream os_;
0440
0441 os_ << "Unknown state name '";
0442 source_ = state_name_.c_str ();
0443
0444 while (*source_)
0445 {
0446 os_ << ss_.narrow (*source_++, ' ');
0447 }
0448
0449 os_ << "'.";
0450 throw runtime_error (os_.str ());
0451 }
0452
0453 regex_iter_ = all_regexes_[state_].begin ();
0454 regex_end_ = all_regexes_[state_].end ();
0455 id_iter_ = all_ids_[state_].begin ();
0456 uid_iter_ = all_unique_ids_[state_].begin ();
0457 state_iter_ = all_states_[state_].begin ();
0458
0459 for (; regex_iter_ != regex_end_; ++regex_iter_, ++id_iter_,
0460 ++uid_iter_, ++state_iter_)
0461 {
0462
0463
0464 add (dest_dot_ ? state_name_.c_str () : dest_, *regex_iter_,
0465 *id_iter_, to_dot_ ? rules_.state (*state_iter_) : to_, true,
0466 *uid_iter_);
0467 }
0468 }
0469 }
0470 }
0471
0472
0473
0474
0475
0476
0477
0478
0479
0480
0481
0482
0483
0484
0485
0486
0487
0488
0489
0490
0491
0492
0493
0494
0495
0496
0497
0498
0499
0500
0501
0502
0503
0504
0505
0506 const string_size_t_map &statemap () const
0507 {
0508 return _statemap;
0509 }
0510
0511 const string_pair_deque ¯odeque () const
0512 {
0513 return _macrodeque;
0514 }
0515
0516 const string_deque_deque ®exes () const
0517 {
0518 return _regexes;
0519 }
0520
0521 const id_vector_deque &ids () const
0522 {
0523 return _ids;
0524 }
0525
0526 const id_vector_deque &unique_ids () const
0527 {
0528 return _unique_ids;
0529 }
0530
0531 const id_vector_deque &states () const
0532 {
0533 return _states;
0534 }
0535
0536 bool empty () const
0537 {
0538 typename string_deque_deque::const_iterator iter_ = _regexes.begin ();
0539 typename string_deque_deque::const_iterator end_ = _regexes.end ();
0540 bool empty_ = true;
0541
0542 for (; iter_ != end_; ++iter_)
0543 {
0544 if (!iter_->empty ())
0545 {
0546 empty_ = false;
0547 break;
0548 }
0549 }
0550
0551 return empty_;
0552 }
0553
0554 static const CharT *initial ()
0555 {
0556 return detail::strings<CharT>::initial ();
0557 }
0558
0559 static const CharT *all_states ()
0560 {
0561 return detail::strings<CharT>::all_states ();
0562 }
0563
0564 static const CharT *dot ()
0565 {
0566 return detail::strings<CharT>::dot ();
0567 }
0568
0569 private:
0570 string_size_t_map _statemap;
0571 string_pair_deque _macrodeque;
0572 string_set _macroset;
0573 string_deque_deque _regexes;
0574 id_vector_deque _ids;
0575 id_vector_deque _unique_ids;
0576 id_vector_deque _states;
0577 regex_flags _flags;
0578 std::size_t _counter;
0579 std::size_t (*_counter_ptr) ();
0580 std::locale _locale;
0581 string_deque _lexer_state_names;
0582
0583 std::size_t add (const CharT *curr_state_, const string ®ex_,
0584 const std::size_t id_, const CharT *new_state_, const bool check_,
0585 const std::size_t uid_ = npos)
0586 {
0587 const bool star_ = *curr_state_ == '*' && *(curr_state_ + 1) == 0;
0588 const bool dot_ = *new_state_ == '.' && *(new_state_ + 1) == 0;
0589
0590 if (check_)
0591 {
0592 check_for_invalid_id (id_);
0593 }
0594
0595 if (!dot_)
0596 {
0597 validate (new_state_);
0598 }
0599
0600 std::size_t new_ = string::npos;
0601 typename string_size_t_map::const_iterator iter_;
0602 typename string_size_t_map::const_iterator end_ = _statemap.end ();
0603 id_vector states_;
0604
0605 if (!dot_)
0606 {
0607 iter_ = _statemap.find (new_state_);
0608
0609 if (iter_ == end_)
0610 {
0611 std::basic_stringstream<CharT> ss_;
0612 std::ostringstream os_;
0613
0614 os_ << "Unknown state name '";
0615
0616 while (*new_state_)
0617 {
0618 os_ << ss_.narrow (*new_state_++, ' ');
0619 }
0620
0621 os_ << "'.";
0622 throw runtime_error (os_.str ());
0623 }
0624
0625 new_ = iter_->second;
0626 }
0627
0628 if (star_)
0629 {
0630 const std::size_t size_ = _statemap.size ();
0631
0632 for (std::size_t i_ = 0; i_ < size_; ++i_)
0633 {
0634 states_.push_back (i_);
0635 }
0636 }
0637 else
0638 {
0639 const CharT *start_ = curr_state_;
0640 string state_;
0641
0642 while (*curr_state_)
0643 {
0644 while (*curr_state_ && *curr_state_ != ',')
0645 {
0646 ++curr_state_;
0647 }
0648
0649 state_.assign (start_, curr_state_);
0650
0651 if (*curr_state_)
0652 {
0653 ++curr_state_;
0654 start_ = curr_state_;
0655 }
0656
0657 validate (state_.c_str ());
0658 iter_ = _statemap.find (state_.c_str ());
0659
0660 if (iter_ == end_)
0661 {
0662 std::basic_stringstream<CharT> ss_;
0663 std::ostringstream os_;
0664
0665 os_ << "Unknown state name '";
0666 curr_state_ = state_.c_str ();
0667
0668 while (*curr_state_)
0669 {
0670 os_ << ss_.narrow (*curr_state_++, ' ');
0671 }
0672
0673 os_ << "'.";
0674 throw runtime_error (os_.str ());
0675 }
0676
0677 states_.push_back (iter_->second);
0678 }
0679 }
0680
0681 std::size_t first_counter_ = npos;
0682
0683 for (std::size_t i_ = 0, size_ = states_.size (); i_ < size_; ++i_)
0684 {
0685 const std::size_t curr_ = states_[i_];
0686
0687 _regexes[curr_].push_back (regex_);
0688 _ids[curr_].push_back (id_);
0689
0690 if (uid_ == npos)
0691 {
0692 const std::size_t counter_ = next_unique_id ();
0693
0694 if (first_counter_ == npos)
0695 {
0696 first_counter_ = counter_;
0697 }
0698
0699 _unique_ids[curr_].push_back (counter_);
0700 }
0701 else
0702 {
0703 if (first_counter_ == npos)
0704 {
0705 first_counter_ = uid_;
0706 }
0707
0708 _unique_ids[curr_].push_back (uid_);
0709 }
0710
0711 _states[curr_].push_back (dot_ ? curr_ : new_);
0712 }
0713
0714 return first_counter_;
0715 }
0716
0717 void validate (const CharT *name_) const
0718 {
0719 const CharT *start_ = name_;
0720
0721 if (*name_ != '_' && !(*name_ >= 'A' && *name_ <= 'Z') &&
0722 !(*name_ >= 'a' && *name_ <= 'z'))
0723 {
0724 std::basic_stringstream<CharT> ss_;
0725 std::ostringstream os_;
0726
0727 os_ << "Invalid name '";
0728
0729 while (*name_)
0730 {
0731 os_ << ss_.narrow (*name_++, ' ');
0732 }
0733
0734 os_ << "'.";
0735 throw runtime_error (os_.str ());
0736 }
0737 else if (*name_)
0738 {
0739 ++name_;
0740 }
0741
0742 while (*name_)
0743 {
0744 if (*name_ != '_' && *name_ != '-' &&
0745 !(*name_ >= 'A' && *name_ <= 'Z') &&
0746 !(*name_ >= 'a' && *name_ <= 'z') &&
0747 !(*name_ >= '0' && *name_ <= '9'))
0748 {
0749 std::basic_stringstream<CharT> ss_;
0750 std::ostringstream os_;
0751
0752 os_ << "Invalid name '";
0753 name_ = start_;
0754
0755 while (*name_)
0756 {
0757 os_ << ss_.narrow (*name_++, ' ');
0758 }
0759
0760 os_ << "'.";
0761 throw runtime_error (os_.str ());
0762 }
0763
0764 ++name_;
0765 }
0766
0767 if (name_ - start_ > static_cast<std::ptrdiff_t>(max_macro_len))
0768 {
0769 std::basic_stringstream<CharT> ss_;
0770 std::ostringstream os_;
0771
0772 os_ << "Name '";
0773 name_ = start_;
0774
0775 while (*name_)
0776 {
0777 os_ << ss_.narrow (*name_++, ' ');
0778 }
0779
0780 os_ << "' too long.";
0781 throw runtime_error (os_.str ());
0782 }
0783 }
0784
0785 void check_for_invalid_id (const std::size_t id_) const
0786 {
0787 switch (id_)
0788 {
0789 case 0:
0790 throw runtime_error ("id 0 is reserved for EOF.");
0791 case npos:
0792 throw runtime_error ("id npos is reserved for the "
0793 "UNKNOWN token.");
0794 default:
0795
0796 break;
0797 }
0798 }
0799 };
0800
0801 typedef basic_rules<char> rules;
0802 typedef basic_rules<wchar_t> wrules;
0803 }
0804 }
0805
0806 #endif