Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:53:52

0001 ///////////////////////////////////////////////////////////////////////////////
0002 /// \file boyer_moore.hpp
0003 ///   Contains the boyer-moore implementation. Note: this is *not* a general-
0004 ///   purpose boyer-moore implementation. It truncates the search string at
0005 ///   256 characters, but it is sufficient for the needs of xpressive.
0006 //
0007 //  Copyright 2008 Eric Niebler. Distributed under the Boost
0008 //  Software License, Version 1.0. (See accompanying file
0009 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0010 
0011 #ifndef BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005
0012 #define BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005
0013 
0014 // MS compatible compilers support #pragma once
0015 #if defined(_MSC_VER)
0016 # pragma once
0017 # pragma warning(push)
0018 # pragma warning(disable : 4100) // unreferenced formal parameter
0019 #endif
0020 
0021 #include <climits>  // for UCHAR_MAX
0022 #include <cstddef>  // for std::ptrdiff_t
0023 #include <utility>  // for std::max
0024 #include <vector>
0025 #include <boost/mpl/bool.hpp>
0026 #include <boost/noncopyable.hpp>
0027 #include <boost/iterator/iterator_traits.hpp>
0028 #include <boost/type_traits/is_convertible.hpp>
0029 #include <boost/xpressive/detail/detail_fwd.hpp>
0030 
0031 namespace boost { namespace xpressive { namespace detail
0032 {
0033 
0034 ///////////////////////////////////////////////////////////////////////////////
0035 // boyer_moore
0036 //
0037 template<typename BidiIter, typename Traits>
0038 struct boyer_moore
0039   : noncopyable
0040 {
0041     typedef typename iterator_value<BidiIter>::type char_type;
0042     typedef Traits traits_type;
0043     typedef has_fold_case<Traits> case_fold;
0044     typedef typename Traits::string_type string_type;
0045 
0046     // initialize the Boyer-Moore search data structure, using the
0047     // search sub-sequence to prime the pump.
0048     boyer_moore(char_type const *begin, char_type const *end, Traits const &tr, bool icase)
0049       : begin_(begin)
0050       , last_(begin)
0051       , fold_()
0052       , find_fun_(
0053               icase
0054             ? (case_fold() ? &boyer_moore::find_nocase_fold_ : &boyer_moore::find_nocase_)
0055             : &boyer_moore::find_
0056         )
0057     {
0058         std::ptrdiff_t const uchar_max = UCHAR_MAX;
0059         std::ptrdiff_t diff = std::distance(begin, end);
0060         this->length_  = static_cast<unsigned char>((std::min)(diff, uchar_max));
0061         std::fill_n(static_cast<unsigned char *>(this->offsets_), uchar_max + 1, this->length_);
0062         --this->length_;
0063 
0064         icase ? this->init_(tr, case_fold()) : this->init_(tr, mpl::false_());
0065     }
0066 
0067     BidiIter find(BidiIter begin, BidiIter end, Traits const &tr) const
0068     {
0069         return (this->*this->find_fun_)(begin, end, tr);
0070     }
0071 
0072 private:
0073 
0074     void init_(Traits const &tr, mpl::false_)
0075     {
0076         for(unsigned char offset = this->length_; offset; --offset, ++this->last_)
0077         {
0078             this->offsets_[tr.hash(*this->last_)] = offset;
0079         }
0080     }
0081 
0082     void init_(Traits const &tr, mpl::true_)
0083     {
0084         this->fold_.reserve(this->length_ + 1);
0085         for(unsigned char offset = this->length_; offset; --offset, ++this->last_)
0086         {
0087             this->fold_.push_back(tr.fold_case(*this->last_));
0088             for(typename string_type::const_iterator beg = this->fold_.back().begin(), end = this->fold_.back().end();
0089                 beg != end; ++beg)
0090             {
0091                 this->offsets_[tr.hash(*beg)] = offset;
0092             }
0093         }
0094         this->fold_.push_back(tr.fold_case(*this->last_));
0095     }
0096 
0097     // case-sensitive Boyer-Moore search
0098     BidiIter find_(BidiIter begin, BidiIter end, Traits const &tr) const
0099     {
0100         typedef typename boost::iterator_difference<BidiIter>::type diff_type;
0101         diff_type const endpos = std::distance(begin, end);
0102         diff_type offset = static_cast<diff_type>(this->length_);
0103 
0104         for(diff_type curpos = offset; curpos < endpos; curpos += offset)
0105         {
0106             std::advance(begin, offset);
0107 
0108             char_type const *pat_tmp = this->last_;
0109             BidiIter str_tmp = begin;
0110 
0111             for(; tr.translate(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp)
0112             {
0113                 if(pat_tmp == this->begin_)
0114                 {
0115                     return str_tmp;
0116                 }
0117             }
0118 
0119             offset = this->offsets_[tr.hash(tr.translate(*begin))];
0120         }
0121 
0122         return end;
0123     }
0124 
0125     // case-insensitive Boyer-Moore search
0126     BidiIter find_nocase_(BidiIter begin, BidiIter end, Traits const &tr) const
0127     {
0128         typedef typename boost::iterator_difference<BidiIter>::type diff_type;
0129         diff_type const endpos = std::distance(begin, end);
0130         diff_type offset = static_cast<diff_type>(this->length_);
0131 
0132         for(diff_type curpos = offset; curpos < endpos; curpos += offset)
0133         {
0134             std::advance(begin, offset);
0135 
0136             char_type const *pat_tmp = this->last_;
0137             BidiIter str_tmp = begin;
0138 
0139             for(; tr.translate_nocase(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp)
0140             {
0141                 if(pat_tmp == this->begin_)
0142                 {
0143                     return str_tmp;
0144                 }
0145             }
0146 
0147             offset = this->offsets_[tr.hash(tr.translate_nocase(*begin))];
0148         }
0149 
0150         return end;
0151     }
0152 
0153     // case-insensitive Boyer-Moore search with case-folding
0154     BidiIter find_nocase_fold_(BidiIter begin, BidiIter end, Traits const &tr) const
0155     {
0156         typedef typename boost::iterator_difference<BidiIter>::type diff_type;
0157         diff_type const endpos = std::distance(begin, end);
0158         diff_type offset = static_cast<diff_type>(this->length_);
0159 
0160         for(diff_type curpos = offset; curpos < endpos; curpos += offset)
0161         {
0162             std::advance(begin, offset);
0163 
0164             string_type const *pat_tmp = &this->fold_.back();
0165             BidiIter str_tmp = begin;
0166 
0167             for(; pat_tmp->end() != std::find(pat_tmp->begin(), pat_tmp->end(), *str_tmp);
0168                 --pat_tmp, --str_tmp)
0169             {
0170                 if(pat_tmp == &this->fold_.front())
0171                 {
0172                     return str_tmp;
0173                 }
0174             }
0175 
0176             offset = this->offsets_[tr.hash(*begin)];
0177         }
0178 
0179         return end;
0180     }
0181 
0182 private:
0183 
0184     char_type const *begin_;
0185     char_type const *last_;
0186     std::vector<string_type> fold_;
0187     BidiIter (boyer_moore::*const find_fun_)(BidiIter, BidiIter, Traits const &) const;
0188     unsigned char length_;
0189     unsigned char offsets_[UCHAR_MAX + 1];
0190 };
0191 
0192 }}} // namespace boost::xpressive::detail
0193 
0194 #if defined(_MSC_VER)
0195 # pragma warning(pop)
0196 #endif
0197 
0198 #endif