Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-05-12 09:08:10

0001 #ifndef REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
0002 #define REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66
0003 
0004 #if defined(_MSC_VER) ||                                            \
0005     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
0006      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
0007 #pragma once
0008 #endif
0009 
0010 #include "stream.h"
0011 #include "streamcharsource.h"
0012 #include "stringsource.h"
0013 
0014 namespace SHERPA_YAML {
0015 // query matches
0016 inline bool RegEx::Matches(char ch) const {
0017   std::string str;
0018   str += ch;
0019   return Matches(str);
0020 }
0021 
0022 inline bool RegEx::Matches(const std::string& str) const {
0023   return Match(str) >= 0;
0024 }
0025 
0026 inline bool RegEx::Matches(const Stream& in) const { return Match(in) >= 0; }
0027 
0028 template <typename Source>
0029 inline bool RegEx::Matches(const Source& source) const {
0030   return Match(source) >= 0;
0031 }
0032 
0033 // Match
0034 // . Matches the given string against this regular expression.
0035 // . Returns the number of characters matched.
0036 // . Returns -1 if no characters were matched (the reason for
0037 //   not returning zero is that we may have an empty regex
0038 //   which is ALWAYS successful at matching zero characters).
0039 // . REMEMBER that we only match from the start of the buffer!
0040 inline int RegEx::Match(const std::string& str) const {
0041   StringCharSource source(str.c_str(), str.size());
0042   return Match(source);
0043 }
0044 
0045 inline int RegEx::Match(const Stream& in) const {
0046   StreamCharSource source(in);
0047   return Match(source);
0048 }
0049 
0050 template <typename Source>
0051 inline bool RegEx::IsValidSource(const Source& source) const {
0052   return source;
0053 }
0054 
0055 template <>
0056 inline bool RegEx::IsValidSource<StringCharSource>(
0057     const StringCharSource& source) const {
0058   switch (m_op) {
0059     case REGEX_MATCH:
0060     case REGEX_RANGE:
0061       return source;
0062     default:
0063       return true;
0064   }
0065 }
0066 
0067 template <typename Source>
0068 inline int RegEx::Match(const Source& source) const {
0069   return IsValidSource(source) ? MatchUnchecked(source) : -1;
0070 }
0071 
0072 template <typename Source>
0073 inline int RegEx::MatchUnchecked(const Source& source) const {
0074   switch (m_op) {
0075     case REGEX_EMPTY:
0076       return MatchOpEmpty(source);
0077     case REGEX_MATCH:
0078       return MatchOpMatch(source);
0079     case REGEX_RANGE:
0080       return MatchOpRange(source);
0081     case REGEX_OR:
0082       return MatchOpOr(source);
0083     case REGEX_AND:
0084       return MatchOpAnd(source);
0085     case REGEX_NOT:
0086       return MatchOpNot(source);
0087     case REGEX_SEQ:
0088       return MatchOpSeq(source);
0089   }
0090 
0091   return -1;
0092 }
0093 
0094 //////////////////////////////////////////////////////////////////////////////
0095 // Operators
0096 // Note: the convention MatchOp*<Source> is that we can assume
0097 // IsSourceValid(source).
0098 //       So we do all our checks *before* we call these functions
0099 
0100 // EmptyOperator
0101 template <typename Source>
0102 inline int RegEx::MatchOpEmpty(const Source& source) const {
0103   return source[0] == Stream::eof() ? 0 : -1;
0104 }
0105 
0106 template <>
0107 inline int RegEx::MatchOpEmpty<StringCharSource>(
0108     const StringCharSource& source) const {
0109   return !source ? 0 : -1;  // the empty regex only is successful on the empty
0110                             // string
0111 }
0112 
0113 // MatchOperator
0114 template <typename Source>
0115 inline int RegEx::MatchOpMatch(const Source& source) const {
0116   if (source[0] != m_a)
0117     return -1;
0118   return 1;
0119 }
0120 
0121 // RangeOperator
0122 template <typename Source>
0123 inline int RegEx::MatchOpRange(const Source& source) const {
0124   if (m_a > source[0] || m_z < source[0])
0125     return -1;
0126   return 1;
0127 }
0128 
0129 // OrOperator
0130 template <typename Source>
0131 inline int RegEx::MatchOpOr(const Source& source) const {
0132   for (const RegEx& param : m_params) {
0133     int n = param.MatchUnchecked(source);
0134     if (n >= 0)
0135       return n;
0136   }
0137   return -1;
0138 }
0139 
0140 // AndOperator
0141 // Note: 'AND' is a little funny, since we may be required to match things
0142 //       of different lengths. If we find a match, we return the length of
0143 //       the FIRST entry on the list.
0144 template <typename Source>
0145 inline int RegEx::MatchOpAnd(const Source& source) const {
0146   int first = -1;
0147   for (std::size_t i = 0; i < m_params.size(); i++) {
0148     int n = m_params[i].MatchUnchecked(source);
0149     if (n == -1)
0150       return -1;
0151     if (i == 0)
0152       first = n;
0153   }
0154   return first;
0155 }
0156 
0157 // NotOperator
0158 template <typename Source>
0159 inline int RegEx::MatchOpNot(const Source& source) const {
0160   if (m_params.empty())
0161     return -1;
0162   if (m_params[0].MatchUnchecked(source) >= 0)
0163     return -1;
0164   return 1;
0165 }
0166 
0167 // SeqOperator
0168 template <typename Source>
0169 inline int RegEx::MatchOpSeq(const Source& source) const {
0170   int offset = 0;
0171   for (const RegEx& param : m_params) {
0172     int n = param.Match(source + offset);  // note Match, not
0173                                            // MatchUnchecked because we
0174                                            // need to check validity after
0175                                            // the offset
0176     if (n == -1)
0177       return -1;
0178     offset += n;
0179   }
0180 
0181   return offset;
0182 }
0183 }  // namespace SHERPA_YAML
0184 
0185 #endif  // REGEXIMPL_H_62B23520_7C8E_11DE_8A39_0800200C9A66