Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-19 09:47:50

0001 // string_token.hpp
0002 // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
0003 //
0004 // Distributed under the Boost Software License, Version 1.0. (See accompanying
0005 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0006 #ifndef BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_STRING_TOKEN_HPP
0007 #define BOOST_SPIRIT_SUPPORT_DETAIL_LEXER_STRING_TOKEN_HPP
0008 
0009 #include <algorithm>
0010 #include "size_t.hpp"
0011 #include "consts.hpp" // num_chars, num_wchar_ts
0012 #include <sstream>
0013 #include <string>
0014 #include <limits>
0015 
0016 namespace boost
0017 {
0018 namespace lexer
0019 {
0020 template<typename CharT>
0021 struct basic_string_token
0022 {
0023     typedef std::basic_string<CharT> string;
0024 
0025     bool _negated;
0026     string _charset;
0027 
0028     basic_string_token () :
0029         _negated (false)
0030     {
0031     }
0032 
0033     basic_string_token (const bool negated_, const string &charset_) :
0034         _negated (negated_),
0035         _charset (charset_)
0036     {
0037     }
0038 
0039     void remove_duplicates ()
0040     {
0041         const CharT *start_ = _charset.c_str ();
0042         const CharT *end_ = start_ + _charset.size ();
0043 
0044         // Optimisation for very large charsets:
0045         // sorting via pointers is much quicker than
0046         // via iterators...
0047         std::sort (const_cast<CharT *> (start_), const_cast<CharT *> (end_));
0048         _charset.erase (std::unique (_charset.begin (), _charset.end ()),
0049             _charset.end ());
0050     }
0051 
0052     void normalise ()
0053     {
0054         const std::size_t max_chars_ = sizeof (CharT) == 1 ?
0055             num_chars : num_wchar_ts;
0056 
0057         if (_charset.length () == max_chars_)
0058         {
0059             _negated = !_negated;
0060             _charset.clear ();
0061         }
0062         else if (_charset.length () > max_chars_ / 2)
0063         {
0064             negate ();
0065         }
0066     }
0067 
0068     void negate ()
0069     {
0070         const std::size_t max_chars_ = sizeof (CharT) == 1 ?
0071             num_chars : num_wchar_ts;
0072         CharT curr_char_ = (std::numeric_limits<CharT>::min)();
0073         string temp_;
0074         const CharT *curr_ = _charset.c_str ();
0075         const CharT *chars_end_ = curr_ + _charset.size ();
0076 
0077         _negated = !_negated;
0078         temp_.resize (max_chars_ - _charset.size ());
0079 
0080         CharT *ptr_ = const_cast<CharT *> (temp_.c_str ());
0081         std::size_t i_ = 0;
0082 
0083         while (curr_ < chars_end_)
0084         {
0085             while (*curr_ > curr_char_)
0086             {
0087                 *ptr_ = curr_char_;
0088                 ++ptr_;
0089                 ++curr_char_;
0090                 ++i_;
0091             }
0092 
0093             ++curr_char_;
0094             ++curr_;
0095             ++i_;
0096         }
0097 
0098         for (; i_ < max_chars_; ++i_)
0099         {
0100             *ptr_ = curr_char_;
0101             ++ptr_;
0102             ++curr_char_;
0103         }
0104 
0105         _charset = temp_;
0106     }
0107 
0108     bool operator < (const basic_string_token &rhs_) const
0109     {
0110         return _negated < rhs_._negated ||
0111             (_negated == rhs_._negated && _charset < rhs_._charset);
0112     }
0113 
0114     bool empty () const
0115     {
0116         return _charset.empty () && !_negated;
0117     }
0118 
0119     bool any () const
0120     {
0121         return _charset.empty () && _negated;
0122     }
0123 
0124     void clear ()
0125     {
0126         _negated = false;
0127         _charset.clear ();
0128     }
0129 
0130     void intersect (basic_string_token &rhs_, basic_string_token &overlap_)
0131     {
0132         if ((any () && rhs_.any ()) || (_negated == rhs_._negated &&
0133             !any () && !rhs_.any ()))
0134         {
0135             intersect_same_types (rhs_, overlap_);
0136         }
0137         else
0138         {
0139             intersect_diff_types (rhs_, overlap_);
0140         }
0141     }
0142 
0143     static void escape_char (const CharT ch_, string &out_)
0144     {
0145         switch (ch_)
0146         {
0147             case '\0':
0148                 out_ += '\\';
0149                 out_ += '0';
0150                 break;
0151             case '\a':
0152                 out_ += '\\';
0153                 out_ += 'a';
0154                 break;
0155             case '\b':
0156                 out_ += '\\';
0157                 out_ += 'b';
0158                 break;
0159             case 27:
0160                 out_ += '\\';
0161                 out_ += 'x';
0162                 out_ += '1';
0163                 out_ += 'b';
0164                 break;
0165             case '\f':
0166                 out_ += '\\';
0167                 out_ += 'f';
0168                 break;
0169             case '\n':
0170                 out_ += '\\';
0171                 out_ += 'n';
0172                 break;
0173             case '\r':
0174                 out_ += '\\';
0175                 out_ += 'r';
0176                 break;
0177             case '\t':
0178                 out_ += '\\';
0179                 out_ += 't';
0180                 break;
0181             case '\v':
0182                 out_ += '\\';
0183                 out_ += 'v';
0184                 break;
0185             case '\\':
0186                 out_ += '\\';
0187                 out_ += '\\';
0188                 break;
0189             case '"':
0190                 out_ += '\\';
0191                 out_ += '"';
0192                 break;
0193             case '\'':
0194                 out_ += '\\';
0195                 out_ += '\'';
0196                 break;
0197             default:
0198             {
0199                 if (ch_ < 32 && ch_ >= 0)
0200                 {
0201                     std::basic_stringstream<CharT> ss_;
0202 
0203                     out_ += '\\';
0204                     out_ += 'x';
0205                     ss_ << std::hex <<
0206                         static_cast<std::size_t> (ch_);
0207                     out_ += ss_.str ();
0208                 }
0209                 else
0210                 {
0211                     out_ += ch_;
0212                 }
0213 
0214                 break;
0215             }
0216         }
0217     }
0218 
0219 private:
0220     void intersect_same_types (basic_string_token &rhs_,
0221         basic_string_token &overlap_)
0222     {
0223         if (any ())
0224         {
0225             clear ();
0226             overlap_._negated = true;
0227             rhs_.clear ();
0228         }
0229         else
0230         {
0231             typename string::iterator iter_ = _charset.begin ();
0232             typename string::iterator end_ = _charset.end ();
0233             typename string::iterator rhs_iter_ = rhs_._charset.begin ();
0234             typename string::iterator rhs_end_ = rhs_._charset.end ();
0235 
0236             overlap_._negated = _negated;
0237 
0238             while (iter_ != end_ && rhs_iter_ != rhs_end_)
0239             {
0240                 if (*iter_ < *rhs_iter_)
0241                 {
0242                     ++iter_;
0243                 }
0244                 else if (*iter_ > *rhs_iter_)
0245                 {
0246                     ++rhs_iter_;
0247                 }
0248                 else
0249                 {
0250                     overlap_._charset += *iter_;
0251                     iter_ = _charset.erase (iter_);
0252                     end_ = _charset.end ();
0253                     rhs_iter_ = rhs_._charset.erase (rhs_iter_);
0254                     rhs_end_ = rhs_._charset.end ();
0255                 }
0256             }
0257 
0258             if (_negated)
0259             {
0260                 // duplicates already merged, so safe to merge
0261                 // using std lib.
0262 
0263                 // src, dest
0264                 merge (_charset, overlap_._charset);
0265                 // duplicates already merged, so safe to merge
0266                 // using std lib.
0267 
0268                 // src, dest
0269                 merge (rhs_._charset, overlap_._charset);
0270                 _negated = false;
0271                 rhs_._negated = false;
0272                 std::swap (_charset, rhs_._charset);
0273                 normalise ();
0274                 overlap_.normalise ();
0275                 rhs_.normalise ();
0276             }
0277             else if (!overlap_._charset.empty ())
0278             {
0279                 normalise ();
0280                 overlap_.normalise ();
0281                 rhs_.normalise ();
0282             }
0283         }
0284     }
0285 
0286     void intersect_diff_types (basic_string_token &rhs_,
0287         basic_string_token &overlap_)
0288     {
0289         if (any ())
0290         {
0291             intersect_any (rhs_, overlap_);
0292         }
0293         else if (_negated)
0294         {
0295             intersect_negated (rhs_, overlap_);
0296         }
0297         else // _negated == false
0298         {
0299             intersect_charset (rhs_, overlap_);
0300         }
0301     }
0302 
0303     void intersect_any (basic_string_token &rhs_, basic_string_token &overlap_)
0304     {
0305         if (rhs_._negated)
0306         {
0307             rhs_.intersect_negated (*this, overlap_);
0308         }
0309         else // rhs._negated == false
0310         {
0311             rhs_.intersect_charset (*this, overlap_);
0312         }
0313     }
0314 
0315     void intersect_negated (basic_string_token &rhs_,
0316         basic_string_token &overlap_)
0317     {
0318         if (rhs_.any ())
0319         {
0320             overlap_._negated = true;
0321             overlap_._charset = _charset;
0322             rhs_._negated = false;
0323             rhs_._charset = _charset;
0324             clear ();
0325         }
0326         else // rhs._negated == false
0327         {
0328             rhs_.intersect_charset (*this, overlap_);
0329         }
0330     }
0331 
0332     void intersect_charset (basic_string_token &rhs_,
0333         basic_string_token &overlap_)
0334     {
0335         if (rhs_.any ())
0336         {
0337             overlap_._charset = _charset;
0338             rhs_._negated = true;
0339             rhs_._charset = _charset;
0340             clear ();
0341         }
0342         else // rhs_._negated == true
0343         {
0344             typename string::iterator iter_ = _charset.begin ();
0345             typename string::iterator end_ = _charset.end ();
0346             typename string::iterator rhs_iter_ = rhs_._charset.begin ();
0347             typename string::iterator rhs_end_ = rhs_._charset.end ();
0348 
0349             while (iter_ != end_ && rhs_iter_ != rhs_end_)
0350             {
0351                 if (*iter_ < *rhs_iter_)
0352                 {
0353                     overlap_._charset += *iter_;
0354                     rhs_iter_ = rhs_._charset.insert (rhs_iter_, *iter_);
0355                     ++rhs_iter_;
0356                     rhs_end_ = rhs_._charset.end ();
0357                     iter_ = _charset.erase (iter_);
0358                     end_ = _charset.end ();
0359                 }
0360                 else if (*iter_ > *rhs_iter_)
0361                 {
0362                     ++rhs_iter_;
0363                 }
0364                 else
0365                 {
0366                     ++iter_;
0367                     ++rhs_iter_;
0368                 }
0369             }
0370 
0371             if (iter_ != end_)
0372             {
0373                 // nothing bigger in rhs_ than iter_,
0374                 // so safe to merge using std lib.
0375                 string temp_ (iter_, end_);
0376 
0377                 // src, dest
0378                 merge (temp_, overlap_._charset);
0379                 _charset.erase (iter_, end_);
0380             }
0381 
0382             if (!overlap_._charset.empty ())
0383             {
0384                 merge (overlap_._charset, rhs_._charset);
0385                 // possible duplicates, so check for any and erase.
0386                 rhs_._charset.erase (std::unique (rhs_._charset.begin (),
0387                     rhs_._charset.end ()), rhs_._charset.end ());
0388                 normalise ();
0389                 overlap_.normalise ();
0390                 rhs_.normalise ();
0391             }
0392         }
0393     }
0394 
0395     void merge (string &src_, string &dest_)
0396     {
0397         string tmp_ (src_.size () + dest_.size (), 0);
0398 
0399         std::merge (src_.begin (), src_.end (), dest_.begin (), dest_.end (),
0400             tmp_.begin ());
0401         dest_ = tmp_;
0402     }
0403 };
0404 }
0405 }
0406 
0407 #endif