Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:44:31

0001 //===-- GlobPattern.h - glob pattern matcher implementation -*- C++ -*-----===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 // This file implements a glob pattern matcher.
0010 //
0011 //===----------------------------------------------------------------------===//
0012 
0013 #ifndef LLVM_SUPPORT_GLOBPATTERN_H
0014 #define LLVM_SUPPORT_GLOBPATTERN_H
0015 
0016 #include "llvm/ADT/BitVector.h"
0017 #include "llvm/ADT/SmallVector.h"
0018 #include "llvm/ADT/StringRef.h"
0019 #include "llvm/Support/Error.h"
0020 #include <optional>
0021 
0022 namespace llvm {
0023 
0024 /// This class implements a glob pattern matcher similar to the one found in
0025 /// bash, but with some key differences. Namely, that \p "*" matches all
0026 /// characters and does not exclude path separators.
0027 ///
0028 /// * \p "?" matches a single character.
0029 /// * \p "*" matches zero or more characters.
0030 /// * \p "[<chars>]" matches one character in the bracket. Character ranges,
0031 ///   e.g., \p "[a-z]", and negative sets via \p "[^ab]" or \p "[!ab]" are also
0032 ///   supported.
0033 /// * \p "{<glob>,...}" matches one of the globs in the list. Nested brace
0034 ///   expansions are not supported. If \p MaxSubPatterns is empty then
0035 ///   brace expansions are not supported and characters \p "{,}" are treated as
0036 ///   literals.
0037 /// * \p "\\" (a single backslash) escapes the next character so it is treated
0038 ///   as a literal.
0039 ///
0040 /// Some known edge cases are:
0041 /// * \p "]" is allowed as the first character in a character class, i.e.,
0042 ///   \p "[]]" is valid and matches the literal \p "]".
0043 /// * The empty character class, i.e., \p "[]", is invalid.
0044 /// * Empty or singleton brace expansions, e.g., \p "{}", \p "{a}", are invalid.
0045 /// * \p "}" and \p "," that are not inside a brace expansion are taken as
0046 ///   literals, e.g., \p ",}" is valid but \p "{" is not.
0047 ///
0048 /// For example, \p "*[/\\\\]foo.{c,cpp}" (with two backslashes) will match
0049 /// (unix or windows) paths to all files named \p "foo.c" or \p "foo.cpp".
0050 class GlobPattern {
0051 public:
0052   /// \param Pat the pattern to match against
0053   /// \param MaxSubPatterns if provided limit the number of allowed subpatterns
0054   ///                       created from expanding braces otherwise disable
0055   ///                       brace expansion
0056   static Expected<GlobPattern>
0057   create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {});
0058   /// \returns \p true if \p S matches this glob pattern
0059   bool match(StringRef S) const;
0060 
0061   // Returns true for glob pattern "*". Can be used to avoid expensive
0062   // preparation/acquisition of the input for match().
0063   bool isTrivialMatchAll() const {
0064     if (!Prefix.empty())
0065       return false;
0066     if (SubGlobs.size() != 1)
0067       return false;
0068     return SubGlobs[0].getPat() == "*";
0069   }
0070 
0071 private:
0072   StringRef Prefix;
0073 
0074   struct SubGlobPattern {
0075     /// \param Pat the pattern to match against
0076     static Expected<SubGlobPattern> create(StringRef Pat);
0077     /// \returns \p true if \p S matches this glob pattern
0078     bool match(StringRef S) const;
0079     StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); }
0080 
0081     // Brackets with their end position and matched bytes.
0082     struct Bracket {
0083       size_t NextOffset;
0084       BitVector Bytes;
0085     };
0086     SmallVector<Bracket, 0> Brackets;
0087     SmallVector<char, 0> Pat;
0088   };
0089   SmallVector<SubGlobPattern, 1> SubGlobs;
0090 };
0091 }
0092 
0093 #endif // LLVM_SUPPORT_GLOBPATTERN_H