Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:44:33

0001 //===-- Regex.h - Regular Expression matcher implementation -*- C++ -*-----===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 // This file implements a POSIX regular expression matcher.  Both Basic and
0010 // Extended POSIX regular expressions (ERE) are supported.  EREs were extended
0011 // to support backreferences in matches.
0012 // This implementation also supports matching strings with embedded NUL chars.
0013 //
0014 //===----------------------------------------------------------------------===//
0015 
0016 #ifndef LLVM_SUPPORT_REGEX_H
0017 #define LLVM_SUPPORT_REGEX_H
0018 
0019 #include "llvm/ADT/BitmaskEnum.h"
0020 #include <string>
0021 
0022 struct llvm_regex;
0023 
0024 namespace llvm {
0025   class StringRef;
0026   template<typename T> class SmallVectorImpl;
0027 
0028   class Regex {
0029   public:
0030     enum RegexFlags : unsigned {
0031       NoFlags = 0,
0032       /// Compile for matching that ignores upper/lower case distinctions.
0033       IgnoreCase = 1,
0034       /// Compile for newline-sensitive matching. With this flag '[^' bracket
0035       /// expressions and '.' never match newline. A ^ anchor matches the
0036       /// null string after any newline in the string in addition to its normal
0037       /// function, and the $ anchor matches the null string before any
0038       /// newline in the string in addition to its normal function.
0039       Newline = 2,
0040       /// By default, the POSIX extended regular expression (ERE) syntax is
0041       /// assumed. Pass this flag to turn on basic regular expressions (BRE)
0042       /// instead.
0043       BasicRegex = 4,
0044 
0045       LLVM_MARK_AS_BITMASK_ENUM(BasicRegex)
0046     };
0047 
0048     Regex();
0049     /// Compiles the given regular expression \p Regex.
0050     ///
0051     /// \param Regex - referenced string is no longer needed after this
0052     /// constructor does finish.  Only its compiled form is kept stored.
0053     Regex(StringRef Regex, RegexFlags Flags = NoFlags);
0054     Regex(StringRef Regex, unsigned Flags);
0055     Regex(const Regex &) = delete;
0056     Regex &operator=(Regex regex) {
0057       std::swap(preg, regex.preg);
0058       std::swap(error, regex.error);
0059       return *this;
0060     }
0061     Regex(Regex &&regex);
0062     ~Regex();
0063 
0064     /// isValid - returns the error encountered during regex compilation, if
0065     /// any.
0066     bool isValid(std::string &Error) const;
0067     bool isValid() const { return !error; }
0068 
0069     /// getNumMatches - In a valid regex, return the number of parenthesized
0070     /// matches it contains.  The number filled in by match will include this
0071     /// many entries plus one for the whole regex (as element 0).
0072     unsigned getNumMatches() const;
0073 
0074     /// matches - Match the regex against a given \p String.
0075     ///
0076     /// \param Matches - If given, on a successful match this will be filled in
0077     /// with references to the matched group expressions (inside \p String),
0078     /// the first group is always the entire pattern.
0079     ///
0080     /// \param Error - If non-null, any errors in the matching will be recorded
0081     /// as a non-empty string. If there is no error, it will be an empty string.
0082     ///
0083     /// This returns true on a successful match.
0084     bool match(StringRef String, SmallVectorImpl<StringRef> *Matches = nullptr,
0085                std::string *Error = nullptr) const;
0086 
0087     /// sub - Return the result of replacing the first match of the regex in
0088     /// \p String with the \p Repl string. Backreferences like "\0" and "\g<1>"
0089     /// in the replacement string are replaced with the appropriate match
0090     /// substring.
0091     ///
0092     /// Note that the replacement string has backslash escaping performed on
0093     /// it. Invalid backreferences are ignored (replaced by empty strings).
0094     ///
0095     /// \param Error If non-null, any errors in the substitution (invalid
0096     /// backreferences, trailing backslashes) will be recorded as a non-empty
0097     /// string. If there is no error, it will be an empty string.
0098     std::string sub(StringRef Repl, StringRef String,
0099                     std::string *Error = nullptr) const;
0100 
0101     /// If this function returns true, ^Str$ is an extended regular
0102     /// expression that matches Str and only Str.
0103     static bool isLiteralERE(StringRef Str);
0104 
0105     /// Turn String into a regex by escaping its special characters.
0106     static std::string escape(StringRef String);
0107 
0108   private:
0109     struct llvm_regex *preg;
0110     int error;
0111   };
0112 }
0113 
0114 #endif // LLVM_SUPPORT_REGEX_H