Back to home page

EIC code displayed by LXR

 
 

    


Warning, file /include/re2/filtered_re2.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 // Copyright 2009 The RE2 Authors.  All Rights Reserved.
0002 // Use of this source code is governed by a BSD-style
0003 // license that can be found in the LICENSE file.
0004 
0005 #ifndef RE2_FILTERED_RE2_H_
0006 #define RE2_FILTERED_RE2_H_
0007 
0008 // The class FilteredRE2 is used as a wrapper to multiple RE2 regexps.
0009 // It provides a prefilter mechanism that helps in cutting down the
0010 // number of regexps that need to be actually searched.
0011 //
0012 // By design, it does not include a string matching engine. This is to
0013 // allow the user of the class to use their favorite string matching
0014 // engine. The overall flow is: Add all the regexps using Add, then
0015 // Compile the FilteredRE2. Compile returns strings that need to be
0016 // matched. Note that the returned strings are lowercased and distinct.
0017 // For applying regexps to a search text, the caller does the string
0018 // matching using the returned strings. When doing the string match,
0019 // note that the caller has to do that in a case-insensitive way or
0020 // on a lowercased version of the search text. Then call FirstMatch
0021 // or AllMatches with a vector of indices of strings that were found
0022 // in the text to get the actual regexp matches.
0023 
0024 #include <memory>
0025 #include <string>
0026 #include <vector>
0027 
0028 #include "absl/strings/string_view.h"
0029 #include "re2/re2.h"
0030 
0031 namespace re2 {
0032 
0033 class PrefilterTree;
0034 
0035 class FilteredRE2 {
0036  public:
0037   FilteredRE2();
0038   explicit FilteredRE2(int min_atom_len);
0039   ~FilteredRE2();
0040 
0041   // Not copyable.
0042   FilteredRE2(const FilteredRE2&) = delete;
0043   FilteredRE2& operator=(const FilteredRE2&) = delete;
0044   // Movable.
0045   FilteredRE2(FilteredRE2&& other);
0046   FilteredRE2& operator=(FilteredRE2&& other);
0047 
0048   // Uses RE2 constructor to create a RE2 object (re). Returns
0049   // re->error_code(). If error_code is other than NoError, then re is
0050   // deleted and not added to re2_vec_.
0051   RE2::ErrorCode Add(absl::string_view pattern,
0052                      const RE2::Options& options,
0053                      int* id);
0054 
0055   // Prepares the regexps added by Add for filtering.  Returns a set
0056   // of strings that the caller should check for in candidate texts.
0057   // The returned strings are lowercased and distinct. When doing
0058   // string matching, it should be performed in a case-insensitive
0059   // way or the search text should be lowercased first.  Call after
0060   // all Add calls are done.
0061   void Compile(std::vector<std::string>* strings_to_match);
0062 
0063   // Returns the index of the first matching regexp.
0064   // Returns -1 on no match. Can be called prior to Compile.
0065   // Does not do any filtering: simply tries to Match the
0066   // regexps in a loop.
0067   int SlowFirstMatch(absl::string_view text) const;
0068 
0069   // Returns the index of the first matching regexp.
0070   // Returns -1 on no match. Compile has to be called before
0071   // calling this.
0072   int FirstMatch(absl::string_view text,
0073                  const std::vector<int>& atoms) const;
0074 
0075   // Returns the indices of all matching regexps, after first clearing
0076   // matched_regexps.
0077   bool AllMatches(absl::string_view text,
0078                   const std::vector<int>& atoms,
0079                   std::vector<int>* matching_regexps) const;
0080 
0081   // Returns the indices of all potentially matching regexps after first
0082   // clearing potential_regexps.
0083   // A regexp is potentially matching if it passes the filter.
0084   // If a regexp passes the filter it may still not match.
0085   // A regexp that does not pass the filter is guaranteed to not match.
0086   void AllPotentials(const std::vector<int>& atoms,
0087                      std::vector<int>* potential_regexps) const;
0088 
0089   // The number of regexps added.
0090   int NumRegexps() const { return static_cast<int>(re2_vec_.size()); }
0091 
0092   // Get the individual RE2 objects.
0093   const RE2& GetRE2(int regexpid) const { return *re2_vec_[regexpid]; }
0094 
0095  private:
0096   // Print prefilter.
0097   void PrintPrefilter(int regexpid);
0098 
0099   // Useful for testing and debugging.
0100   void RegexpsGivenStrings(const std::vector<int>& matched_atoms,
0101                            std::vector<int>* passed_regexps);
0102 
0103   // All the regexps in the FilteredRE2.
0104   std::vector<RE2*> re2_vec_;
0105 
0106   // Has the FilteredRE2 been compiled using Compile()
0107   bool compiled_;
0108 
0109   // An AND-OR tree of string atoms used for filtering regexps.
0110   std::unique_ptr<PrefilterTree> prefilter_tree_;
0111 };
0112 
0113 }  // namespace re2
0114 
0115 #endif  // RE2_FILTERED_RE2_H_