File indexing completed on 2025-01-19 09:47:50
0001
0002
0003
0004
0005
0006 #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_STRING_TOKEN_HPP
0007 #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_STRING_TOKEN_HPP
0008
0009 #include <algorithm>
0010 #include "size_t.hpp"
0011 #include "consts.hpp" // num_chars, num_wchar_ts
0012 #include <sstream>
0013 #include <string>
0014 #include <limits>
0015
0016 namespace boost
0017 {
0018 namespace lexer
0019 {
0020 template<typename CharT>
0021 struct basic_string_token
0022 {
0023 typedef std::basic_string<CharT> string;
0024
0025 bool _negated;
0026 string _charset;
0027
0028 basic_string_token () :
0029 _negated (false)
0030 {
0031 }
0032
0033 basic_string_token (const bool negated_, const string &charset_) :
0034 _negated (negated_),
0035 _charset (charset_)
0036 {
0037 }
0038
0039 void remove_duplicates ()
0040 {
0041 const CharT *start_ = _charset.c_str ();
0042 const CharT *end_ = start_ + _charset.size ();
0043
0044
0045
0046
0047 std::sort (const_cast<CharT *> (start_), const_cast<CharT *> (end_));
0048 _charset.erase (std::unique (_charset.begin (), _charset.end ()),
0049 _charset.end ());
0050 }
0051
0052 void normalise ()
0053 {
0054 const std::size_t max_chars_ = sizeof (CharT) == 1 ?
0055 num_chars : num_wchar_ts;
0056
0057 if (_charset.length () == max_chars_)
0058 {
0059 _negated = !_negated;
0060 _charset.clear ();
0061 }
0062 else if (_charset.length () > max_chars_ / 2)
0063 {
0064 negate ();
0065 }
0066 }
0067
0068 void negate ()
0069 {
0070 const std::size_t max_chars_ = sizeof (CharT) == 1 ?
0071 num_chars : num_wchar_ts;
0072 CharT curr_char_ = (std::numeric_limits<CharT>::min)();
0073 string temp_;
0074 const CharT *curr_ = _charset.c_str ();
0075 const CharT *chars_end_ = curr_ + _charset.size ();
0076
0077 _negated = !_negated;
0078 temp_.resize (max_chars_ - _charset.size ());
0079
0080 CharT *ptr_ = const_cast<CharT *> (temp_.c_str ());
0081 std::size_t i_ = 0;
0082
0083 while (curr_ < chars_end_)
0084 {
0085 while (*curr_ > curr_char_)
0086 {
0087 *ptr_ = curr_char_;
0088 ++ptr_;
0089 ++curr_char_;
0090 ++i_;
0091 }
0092
0093 ++curr_char_;
0094 ++curr_;
0095 ++i_;
0096 }
0097
0098 for (; i_ < max_chars_; ++i_)
0099 {
0100 *ptr_ = curr_char_;
0101 ++ptr_;
0102 ++curr_char_;
0103 }
0104
0105 _charset = temp_;
0106 }
0107
0108 bool operator < (const basic_string_token &rhs_) const
0109 {
0110 return _negated < rhs_._negated ||
0111 (_negated == rhs_._negated && _charset < rhs_._charset);
0112 }
0113
0114 bool empty () const
0115 {
0116 return _charset.empty () && !_negated;
0117 }
0118
0119 bool any () const
0120 {
0121 return _charset.empty () && _negated;
0122 }
0123
0124 void clear ()
0125 {
0126 _negated = false;
0127 _charset.clear ();
0128 }
0129
0130 void intersect (basic_string_token &rhs_, basic_string_token &overlap_)
0131 {
0132 if ((any () && rhs_.any ()) || (_negated == rhs_._negated &&
0133 !any () && !rhs_.any ()))
0134 {
0135 intersect_same_types (rhs_, overlap_);
0136 }
0137 else
0138 {
0139 intersect_diff_types (rhs_, overlap_);
0140 }
0141 }
0142
0143 static void escape_char (const CharT ch_, string &out_)
0144 {
0145 switch (ch_)
0146 {
0147 case '\0':
0148 out_ += '\\';
0149 out_ += '0';
0150 break;
0151 case '\a':
0152 out_ += '\\';
0153 out_ += 'a';
0154 break;
0155 case '\b':
0156 out_ += '\\';
0157 out_ += 'b';
0158 break;
0159 case 27:
0160 out_ += '\\';
0161 out_ += 'x';
0162 out_ += '1';
0163 out_ += 'b';
0164 break;
0165 case '\f':
0166 out_ += '\\';
0167 out_ += 'f';
0168 break;
0169 case '\n':
0170 out_ += '\\';
0171 out_ += 'n';
0172 break;
0173 case '\r':
0174 out_ += '\\';
0175 out_ += 'r';
0176 break;
0177 case '\t':
0178 out_ += '\\';
0179 out_ += 't';
0180 break;
0181 case '\v':
0182 out_ += '\\';
0183 out_ += 'v';
0184 break;
0185 case '\\':
0186 out_ += '\\';
0187 out_ += '\\';
0188 break;
0189 case '"':
0190 out_ += '\\';
0191 out_ += '"';
0192 break;
0193 case '\'':
0194 out_ += '\\';
0195 out_ += '\'';
0196 break;
0197 default:
0198 {
0199 if (ch_ < 32 && ch_ >= 0)
0200 {
0201 std::basic_stringstream<CharT> ss_;
0202
0203 out_ += '\\';
0204 out_ += 'x';
0205 ss_ << std::hex <<
0206 static_cast<std::size_t> (ch_);
0207 out_ += ss_.str ();
0208 }
0209 else
0210 {
0211 out_ += ch_;
0212 }
0213
0214 break;
0215 }
0216 }
0217 }
0218
0219 private:
0220 void intersect_same_types (basic_string_token &rhs_,
0221 basic_string_token &overlap_)
0222 {
0223 if (any ())
0224 {
0225 clear ();
0226 overlap_._negated = true;
0227 rhs_.clear ();
0228 }
0229 else
0230 {
0231 typename string::iterator iter_ = _charset.begin ();
0232 typename string::iterator end_ = _charset.end ();
0233 typename string::iterator rhs_iter_ = rhs_._charset.begin ();
0234 typename string::iterator rhs_end_ = rhs_._charset.end ();
0235
0236 overlap_._negated = _negated;
0237
0238 while (iter_ != end_ && rhs_iter_ != rhs_end_)
0239 {
0240 if (*iter_ < *rhs_iter_)
0241 {
0242 ++iter_;
0243 }
0244 else if (*iter_ > *rhs_iter_)
0245 {
0246 ++rhs_iter_;
0247 }
0248 else
0249 {
0250 overlap_._charset += *iter_;
0251 iter_ = _charset.erase (iter_);
0252 end_ = _charset.end ();
0253 rhs_iter_ = rhs_._charset.erase (rhs_iter_);
0254 rhs_end_ = rhs_._charset.end ();
0255 }
0256 }
0257
0258 if (_negated)
0259 {
0260
0261
0262
0263
0264 merge (_charset, overlap_._charset);
0265
0266
0267
0268
0269 merge (rhs_._charset, overlap_._charset);
0270 _negated = false;
0271 rhs_._negated = false;
0272 std::swap (_charset, rhs_._charset);
0273 normalise ();
0274 overlap_.normalise ();
0275 rhs_.normalise ();
0276 }
0277 else if (!overlap_._charset.empty ())
0278 {
0279 normalise ();
0280 overlap_.normalise ();
0281 rhs_.normalise ();
0282 }
0283 }
0284 }
0285
0286 void intersect_diff_types (basic_string_token &rhs_,
0287 basic_string_token &overlap_)
0288 {
0289 if (any ())
0290 {
0291 intersect_any (rhs_, overlap_);
0292 }
0293 else if (_negated)
0294 {
0295 intersect_negated (rhs_, overlap_);
0296 }
0297 else
0298 {
0299 intersect_charset (rhs_, overlap_);
0300 }
0301 }
0302
0303 void intersect_any (basic_string_token &rhs_, basic_string_token &overlap_)
0304 {
0305 if (rhs_._negated)
0306 {
0307 rhs_.intersect_negated (*this, overlap_);
0308 }
0309 else
0310 {
0311 rhs_.intersect_charset (*this, overlap_);
0312 }
0313 }
0314
0315 void intersect_negated (basic_string_token &rhs_,
0316 basic_string_token &overlap_)
0317 {
0318 if (rhs_.any ())
0319 {
0320 overlap_._negated = true;
0321 overlap_._charset = _charset;
0322 rhs_._negated = false;
0323 rhs_._charset = _charset;
0324 clear ();
0325 }
0326 else
0327 {
0328 rhs_.intersect_charset (*this, overlap_);
0329 }
0330 }
0331
0332 void intersect_charset (basic_string_token &rhs_,
0333 basic_string_token &overlap_)
0334 {
0335 if (rhs_.any ())
0336 {
0337 overlap_._charset = _charset;
0338 rhs_._negated = true;
0339 rhs_._charset = _charset;
0340 clear ();
0341 }
0342 else
0343 {
0344 typename string::iterator iter_ = _charset.begin ();
0345 typename string::iterator end_ = _charset.end ();
0346 typename string::iterator rhs_iter_ = rhs_._charset.begin ();
0347 typename string::iterator rhs_end_ = rhs_._charset.end ();
0348
0349 while (iter_ != end_ && rhs_iter_ != rhs_end_)
0350 {
0351 if (*iter_ < *rhs_iter_)
0352 {
0353 overlap_._charset += *iter_;
0354 rhs_iter_ = rhs_._charset.insert (rhs_iter_, *iter_);
0355 ++rhs_iter_;
0356 rhs_end_ = rhs_._charset.end ();
0357 iter_ = _charset.erase (iter_);
0358 end_ = _charset.end ();
0359 }
0360 else if (*iter_ > *rhs_iter_)
0361 {
0362 ++rhs_iter_;
0363 }
0364 else
0365 {
0366 ++iter_;
0367 ++rhs_iter_;
0368 }
0369 }
0370
0371 if (iter_ != end_)
0372 {
0373
0374
0375 string temp_ (iter_, end_);
0376
0377
0378 merge (temp_, overlap_._charset);
0379 _charset.erase (iter_, end_);
0380 }
0381
0382 if (!overlap_._charset.empty ())
0383 {
0384 merge (overlap_._charset, rhs_._charset);
0385
0386 rhs_._charset.erase (std::unique (rhs_._charset.begin (),
0387 rhs_._charset.end ()), rhs_._charset.end ());
0388 normalise ();
0389 overlap_.normalise ();
0390 rhs_.normalise ();
0391 }
0392 }
0393 }
0394
0395 void merge (string &src_, string &dest_)
0396 {
0397 string tmp_ (src_.size () + dest_.size (), 0);
0398
0399 std::merge (src_.begin (), src_.end (), dest_.begin (), dest_.end (),
0400 tmp_.begin ());
0401 dest_ = tmp_;
0402 }
0403 };
0404 }
0405 }
0406
0407 #endif