Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:53:55

0001  ///////////////////////////////////////////////////////////////////////////////
0002 /// \file regex_token_iterator.hpp
0003 /// Contains the definition of regex_token_iterator, and STL-compatible iterator
0004 /// for tokenizing a string using a regular expression.
0005 //
0006 //  Copyright 2008 Eric Niebler. Distributed under the Boost
0007 //  Software License, Version 1.0. (See accompanying file
0008 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0009 
0010 #ifndef BOOST_XPRESSIVE_REGEX_TOKEN_ITERATOR_HPP_EAN_10_04_2005
0011 #define BOOST_XPRESSIVE_REGEX_TOKEN_ITERATOR_HPP_EAN_10_04_2005
0012 
0013 // MS compatible compilers support #pragma once
0014 #if defined(_MSC_VER)
0015 # pragma once
0016 #endif
0017 
0018 #include <vector>
0019 #include <boost/assert.hpp>
0020 #include <boost/mpl/assert.hpp>
0021 #include <boost/type_traits/is_same.hpp>
0022 #include <boost/type_traits/is_convertible.hpp>
0023 #include <boost/xpressive/regex_iterator.hpp>
0024 
0025 namespace boost { namespace xpressive { namespace detail
0026 {
0027 
0028 //////////////////////////////////////////////////////////////////////////
0029 // regex_token_iterator_impl
0030 //
0031 template<typename BidiIter>
0032 struct regex_token_iterator_impl
0033   : counted_base<regex_token_iterator_impl<BidiIter> >
0034 {
0035     typedef sub_match<BidiIter> value_type;
0036 
0037     regex_token_iterator_impl
0038     (
0039         BidiIter begin
0040       , BidiIter cur
0041       , BidiIter end
0042       , BidiIter next_search
0043       , basic_regex<BidiIter> const &rex
0044       , regex_constants::match_flag_type flags = regex_constants::match_default
0045       , std::vector<int> subs = std::vector<int>(1, 0)
0046       , int n = -2
0047       , bool not_null = false
0048     )
0049       : iter_(begin, cur, end, next_search, rex, flags, not_null)
0050       , result_()
0051       , n_((-2 == n) ? (int)subs.size() - 1 : n)
0052       , subs_()
0053     {
0054         BOOST_ASSERT(0 != subs.size());
0055         this->subs_.swap(subs);
0056     }
0057 
0058     bool next()
0059     {
0060         if(-1 != this->n_)
0061         {
0062             BidiIter cur = this->iter_.state_.cur_;
0063             if(0 != (++this->n_ %= (int)this->subs_.size()) || this->iter_.next())
0064             {
0065                 this->result_ = (-1 == this->subs_[ this->n_ ])
0066                     ? this->iter_.what_.prefix()
0067                     : this->iter_.what_[ this->subs_[ this->n_ ] ];
0068                 return true;
0069             }
0070             else if(-1 == this->subs_[ this->n_-- ] && cur != this->iter_.state_.end_)
0071             {
0072                 this->result_ = value_type(cur, this->iter_.state_.end_, true);
0073                 return true;
0074             }
0075         }
0076 
0077         return false;
0078     }
0079 
0080     bool equal_to(regex_token_iterator_impl<BidiIter> const &that) const
0081     {
0082         return this->iter_.equal_to(that.iter_) && this->n_ == that.n_;
0083     }
0084 
0085     regex_iterator_impl<BidiIter> iter_;
0086     value_type result_;
0087     int n_;
0088     std::vector<int> subs_;
0089 };
0090 
0091 inline int get_mark_number(int i)
0092 {
0093     return i;
0094 }
0095 
0096 inline std::vector<int> to_vector(int subs)
0097 {
0098     return std::vector<int>(1, subs);
0099 }
0100 
0101 inline std::vector<int> const &to_vector(std::vector<int> const &subs)
0102 {
0103     return subs;
0104 }
0105 
0106 template<typename Int, std::size_t Size>
0107 inline std::vector<int> to_vector(Int const (&sub_matches)[ Size ])
0108 {
0109     // so that people can specify sub-match indices inline with
0110     // string literals, like "\1\2\3", leave off the trailing '\0'
0111     std::size_t const size = Size - is_same<Int, char>::value;
0112     std::vector<int> vect(size);
0113     for(std::size_t i = 0; i < size; ++i)
0114     {
0115         vect[i] = get_mark_number(sub_matches[i]);
0116     }
0117     return vect;
0118 }
0119 
0120 template<typename Int>
0121 inline std::vector<int> to_vector(std::vector<Int> const &sub_matches)
0122 {
0123     BOOST_MPL_ASSERT((is_convertible<Int, int>));
0124     return std::vector<int>(sub_matches.begin(), sub_matches.end());
0125 }
0126 
0127 } // namespace detail
0128 
0129 //////////////////////////////////////////////////////////////////////////
0130 // regex_token_iterator
0131 //
0132 template<typename BidiIter>
0133 struct regex_token_iterator
0134 {
0135     typedef basic_regex<BidiIter> regex_type;
0136     typedef typename iterator_value<BidiIter>::type char_type;
0137     typedef sub_match<BidiIter> value_type;
0138     typedef std::ptrdiff_t difference_type;
0139     typedef value_type const *pointer;
0140     typedef value_type const &reference;
0141     typedef std::forward_iterator_tag iterator_category;
0142 
0143     /// INTERNAL ONLY
0144     typedef detail::regex_token_iterator_impl<BidiIter> impl_type_;
0145 
0146     /// \post \c *this is the end of sequence iterator.
0147     regex_token_iterator()
0148       : impl_()
0149     {
0150     }
0151 
0152     /// \param begin The beginning of the character range to search.
0153     /// \param end The end of the character range to search.
0154     /// \param rex The regex pattern to search for.
0155     /// \pre \c [begin,end) is a valid range.
0156     regex_token_iterator
0157     (
0158         BidiIter begin
0159       , BidiIter end
0160       , basic_regex<BidiIter> const &rex
0161     )
0162       : impl_()
0163     {
0164         if(0 != rex.regex_id())
0165         {
0166             this->impl_ = new impl_type_(begin, begin, end, begin, rex);
0167             this->next_();
0168         }
0169     }
0170 
0171     /// \param begin The beginning of the character range to search.
0172     /// \param end The end of the character range to search.
0173     /// \param rex The regex pattern to search for.
0174     /// \param args A let() expression with argument bindings for semantic actions.
0175     /// \pre \c [begin,end) is a valid range.
0176     template<typename LetExpr>
0177     regex_token_iterator
0178     (
0179         BidiIter begin
0180       , BidiIter end
0181       , basic_regex<BidiIter> const &rex
0182       , detail::let_<LetExpr> const &args
0183     )
0184       : impl_()
0185     {
0186         if(0 != rex.regex_id())
0187         {
0188             this->impl_ = new impl_type_(begin, begin, end, begin, rex);
0189             detail::bind_args(args, this->impl_->iter_.what_);
0190             this->next_();
0191         }
0192     }
0193 
0194     /// \param begin The beginning of the character range to search.
0195     /// \param end The end of the character range to search.
0196     /// \param rex The regex pattern to search for.
0197     /// \param subs A range of integers designating sub-matches to be treated as tokens.
0198     /// \param flags Optional match flags, used to control how the expression is matched against the sequence. (See match_flag_type.)
0199     /// \pre \c [begin,end) is a valid range.
0200     /// \pre \c subs is either an integer greater or equal to -1,
0201     ///     or else an array or non-empty \c std::vector\<\> of such integers.
0202     template<typename Subs>
0203     regex_token_iterator
0204     (
0205         BidiIter begin
0206       , BidiIter end
0207       , basic_regex<BidiIter> const &rex
0208       , Subs const &subs
0209       , regex_constants::match_flag_type flags = regex_constants::match_default
0210     )
0211       : impl_()
0212     {
0213         if(0 != rex.regex_id())
0214         {
0215             this->impl_ = new impl_type_(begin, begin, end, begin, rex, flags, detail::to_vector(subs));
0216             this->next_();
0217         }
0218     }
0219 
0220     /// \param begin The beginning of the character range to search.
0221     /// \param end The end of the character range to search.
0222     /// \param rex The regex pattern to search for.
0223     /// \param subs A range of integers designating sub-matches to be treated as tokens.
0224     /// \param args A let() expression with argument bindings for semantic actions.
0225     /// \param flags Optional match flags, used to control how the expression is matched against the sequence. (See match_flag_type.)
0226     /// \pre \c [begin,end) is a valid range.
0227     /// \pre \c subs is either an integer greater or equal to -1,
0228     ///     or else an array or non-empty \c std::vector\<\> of such integers.
0229     template<typename Subs, typename LetExpr>
0230     regex_token_iterator
0231     (
0232         BidiIter begin
0233       , BidiIter end
0234       , basic_regex<BidiIter> const &rex
0235       , Subs const &subs
0236       , detail::let_<LetExpr> const &args
0237       , regex_constants::match_flag_type flags = regex_constants::match_default
0238     )
0239       : impl_()
0240     {
0241         if(0 != rex.regex_id())
0242         {
0243             this->impl_ = new impl_type_(begin, begin, end, begin, rex, flags, detail::to_vector(subs));
0244             detail::bind_args(args, this->impl_->iter_.what_);
0245             this->next_();
0246         }
0247     }
0248 
0249     /// \post <tt>*this == that</tt>
0250     regex_token_iterator(regex_token_iterator<BidiIter> const &that)
0251       : impl_(that.impl_) // COW
0252     {
0253     }
0254 
0255     /// \post <tt>*this == that</tt>
0256     regex_token_iterator<BidiIter> &operator =(regex_token_iterator<BidiIter> const &that)
0257     {
0258         this->impl_ = that.impl_; // COW
0259         return *this;
0260     }
0261 
0262     friend bool operator ==(regex_token_iterator<BidiIter> const &left, regex_token_iterator<BidiIter> const &right)
0263     {
0264         if(!left.impl_ || !right.impl_)
0265         {
0266             return !left.impl_ && !right.impl_;
0267         }
0268 
0269         return left.impl_->equal_to(*right.impl_);
0270     }
0271 
0272     friend bool operator !=(regex_token_iterator<BidiIter> const &left, regex_token_iterator<BidiIter> const &right)
0273     {
0274         return !(left == right);
0275     }
0276 
0277     value_type const &operator *() const
0278     {
0279         return this->impl_->result_;
0280     }
0281 
0282     value_type const *operator ->() const
0283     {
0284         return &this->impl_->result_;
0285     }
0286 
0287     /// If N == -1 then sets *this equal to the end of sequence iterator.
0288     /// Otherwise if N+1 \< subs.size(), then increments N and sets result equal to
0289     /// ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[subs[N]].str())).
0290     /// Otherwise if what.prefix().first != what[0].second and if the element match_prev_avail is
0291     /// not set in flags then sets it. Then locates the next match as if by calling
0292     /// regex_search(what[0].second, end, what, *pre, flags), with the following variation:
0293     /// in the event that the previous match found was of zero length (what[0].length() == 0)
0294     /// then attempts to find a non-zero length match starting at what[0].second, only if that
0295     /// fails and provided what[0].second != suffix().second does it look for a (possibly zero
0296     /// length) match starting from what[0].second + 1.  If such a match is found then sets N
0297     /// equal to zero, and sets result equal to
0298     /// ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[subs[N]].str())).
0299     /// Otherwise if no further matches were found, then let last_end be the endpoint of the last
0300     /// match that was found. Then if last_end != end and subs[0] == -1 sets N equal to -1 and
0301     /// sets result equal to value_type(last_end, end). Otherwise sets *this equal to the end
0302     /// of sequence iterator.
0303     regex_token_iterator<BidiIter> &operator ++()
0304     {
0305         this->fork_(); // un-share the implementation
0306         this->next_();
0307         return *this;
0308     }
0309 
0310     regex_token_iterator<BidiIter> operator ++(int)
0311     {
0312         regex_token_iterator<BidiIter> tmp(*this);
0313         ++*this;
0314         return tmp;
0315     }
0316 
0317 private:
0318 
0319     /// INTERNAL ONLY
0320     void fork_()
0321     {
0322         if(1 != this->impl_->use_count())
0323         {
0324             intrusive_ptr<impl_type_> clone = new impl_type_
0325             (
0326                 this->impl_->iter_.state_.begin_
0327               , this->impl_->iter_.state_.cur_
0328               , this->impl_->iter_.state_.end_
0329               , this->impl_->iter_.state_.next_search_
0330               , this->impl_->iter_.rex_
0331               , this->impl_->iter_.flags_
0332               , this->impl_->subs_
0333               , this->impl_->n_
0334               , this->impl_->iter_.not_null_
0335             );
0336 
0337             // only copy the match_results struct if we have to. Note: if the next call
0338             // to impl_->next() will return false or call regex_search, we don't need to
0339             // copy the match_results struct.
0340             if(-1 != this->impl_->n_ && this->impl_->n_ + 1 != static_cast<int>(this->impl_->subs_.size()))
0341             {
0342                 // BUGBUG This is expensive -- it causes the sequence_stack to be cleared.
0343                 // Find a better way
0344                 clone->iter_.what_ = this->impl_->iter_.what_;
0345             }
0346             else
0347             {
0348                 // At the very least, copy the action args
0349                 detail::core_access<BidiIter>::get_action_args(clone->iter_.what_)
0350                     = detail::core_access<BidiIter>::get_action_args(this->impl_->iter_.what_);
0351             }
0352 
0353             this->impl_.swap(clone);
0354         }
0355     }
0356 
0357     /// INTERNAL ONLY
0358     void next_()
0359     {
0360         BOOST_ASSERT(this->impl_ && 1 == this->impl_->use_count());
0361         if(!this->impl_->next())
0362         {
0363             this->impl_ = 0;
0364         }
0365     }
0366 
0367     intrusive_ptr<impl_type_> impl_;
0368 };
0369 
0370 }} // namespace boost::xpressive
0371 
0372 #endif