Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:36:58

0001 //===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 /// \file
0010 /// Defines the clang::Preprocessor interface.
0011 //
0012 //===----------------------------------------------------------------------===//
0013 
0014 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
0015 #define LLVM_CLANG_LEX_PREPROCESSOR_H
0016 
0017 #include "clang/Basic/Diagnostic.h"
0018 #include "clang/Basic/DiagnosticIDs.h"
0019 #include "clang/Basic/IdentifierTable.h"
0020 #include "clang/Basic/LLVM.h"
0021 #include "clang/Basic/LangOptions.h"
0022 #include "clang/Basic/Module.h"
0023 #include "clang/Basic/SourceLocation.h"
0024 #include "clang/Basic/SourceManager.h"
0025 #include "clang/Basic/TokenKinds.h"
0026 #include "clang/Lex/HeaderSearch.h"
0027 #include "clang/Lex/Lexer.h"
0028 #include "clang/Lex/MacroInfo.h"
0029 #include "clang/Lex/ModuleLoader.h"
0030 #include "clang/Lex/ModuleMap.h"
0031 #include "clang/Lex/PPCallbacks.h"
0032 #include "clang/Lex/PPEmbedParameters.h"
0033 #include "clang/Lex/Token.h"
0034 #include "clang/Lex/TokenLexer.h"
0035 #include "clang/Support/Compiler.h"
0036 #include "llvm/ADT/APSInt.h"
0037 #include "llvm/ADT/ArrayRef.h"
0038 #include "llvm/ADT/DenseMap.h"
0039 #include "llvm/ADT/FoldingSet.h"
0040 #include "llvm/ADT/FunctionExtras.h"
0041 #include "llvm/ADT/PointerUnion.h"
0042 #include "llvm/ADT/STLExtras.h"
0043 #include "llvm/ADT/SmallPtrSet.h"
0044 #include "llvm/ADT/SmallVector.h"
0045 #include "llvm/ADT/StringRef.h"
0046 #include "llvm/ADT/TinyPtrVector.h"
0047 #include "llvm/ADT/iterator_range.h"
0048 #include "llvm/Support/Allocator.h"
0049 #include "llvm/Support/Casting.h"
0050 #include "llvm/Support/Registry.h"
0051 #include <cassert>
0052 #include <cstddef>
0053 #include <cstdint>
0054 #include <map>
0055 #include <memory>
0056 #include <optional>
0057 #include <string>
0058 #include <utility>
0059 #include <vector>
0060 
0061 namespace llvm {
0062 
0063 template<unsigned InternalLen> class SmallString;
0064 
0065 } // namespace llvm
0066 
0067 namespace clang {
0068 
0069 class CodeCompletionHandler;
0070 class CommentHandler;
0071 class DirectoryEntry;
0072 class EmptylineHandler;
0073 class ExternalPreprocessorSource;
0074 class FileEntry;
0075 class FileManager;
0076 class HeaderSearch;
0077 class MacroArgs;
0078 class PragmaHandler;
0079 class PragmaNamespace;
0080 class PreprocessingRecord;
0081 class PreprocessorLexer;
0082 class PreprocessorOptions;
0083 class ScratchBuffer;
0084 class TargetInfo;
0085 
0086 namespace Builtin {
0087 class Context;
0088 }
0089 
0090 /// Stores token information for comparing actual tokens with
0091 /// predefined values.  Only handles simple tokens and identifiers.
0092 class TokenValue {
0093   tok::TokenKind Kind;
0094   IdentifierInfo *II;
0095 
0096 public:
0097   TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
0098     assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
0099     assert(Kind != tok::identifier &&
0100            "Identifiers should be created by TokenValue(IdentifierInfo *)");
0101     assert(!tok::isLiteral(Kind) && "Literals are not supported.");
0102     assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
0103   }
0104 
0105   TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
0106 
0107   bool operator==(const Token &Tok) const {
0108     return Tok.getKind() == Kind &&
0109         (!II || II == Tok.getIdentifierInfo());
0110   }
0111 };
0112 
0113 /// Context in which macro name is used.
0114 enum MacroUse {
0115   // other than #define or #undef
0116   MU_Other  = 0,
0117 
0118   // macro name specified in #define
0119   MU_Define = 1,
0120 
0121   // macro name specified in #undef
0122   MU_Undef  = 2
0123 };
0124 
0125 enum class EmbedResult {
0126   Invalid = -1, // Parsing error occurred.
0127   NotFound = 0, // Corresponds to __STDC_EMBED_NOT_FOUND__
0128   Found = 1,    // Corresponds to __STDC_EMBED_FOUND__
0129   Empty = 2,    // Corresponds to __STDC_EMBED_EMPTY__
0130 };
0131 
0132 /// Engages in a tight little dance with the lexer to efficiently
0133 /// preprocess tokens.
0134 ///
0135 /// Lexers know only about tokens within a single source file, and don't
0136 /// know anything about preprocessor-level issues like the \#include stack,
0137 /// token expansion, etc.
0138 class Preprocessor {
0139   friend class VAOptDefinitionContext;
0140   friend class VariadicMacroScopeGuard;
0141 
0142   llvm::unique_function<void(const clang::Token &)> OnToken;
0143   std::shared_ptr<PreprocessorOptions> PPOpts;
0144   DiagnosticsEngine        *Diags;
0145   const LangOptions &LangOpts;
0146   const TargetInfo *Target = nullptr;
0147   const TargetInfo *AuxTarget = nullptr;
0148   FileManager       &FileMgr;
0149   SourceManager     &SourceMgr;
0150   std::unique_ptr<ScratchBuffer> ScratchBuf;
0151   HeaderSearch      &HeaderInfo;
0152   ModuleLoader      &TheModuleLoader;
0153 
0154   /// External source of macros.
0155   ExternalPreprocessorSource *ExternalSource;
0156 
0157   /// A BumpPtrAllocator object used to quickly allocate and release
0158   /// objects internal to the Preprocessor.
0159   llvm::BumpPtrAllocator BP;
0160 
0161   /// Identifiers for builtin macros and other builtins.
0162   IdentifierInfo *Ident__LINE__, *Ident__FILE__;   // __LINE__, __FILE__
0163   IdentifierInfo *Ident__DATE__, *Ident__TIME__;   // __DATE__, __TIME__
0164   IdentifierInfo *Ident__INCLUDE_LEVEL__;          // __INCLUDE_LEVEL__
0165   IdentifierInfo *Ident__BASE_FILE__;              // __BASE_FILE__
0166   IdentifierInfo *Ident__FILE_NAME__;              // __FILE_NAME__
0167   IdentifierInfo *Ident__TIMESTAMP__;              // __TIMESTAMP__
0168   IdentifierInfo *Ident__COUNTER__;                // __COUNTER__
0169   IdentifierInfo *Ident_Pragma, *Ident__pragma;    // _Pragma, __pragma
0170   IdentifierInfo *Ident__identifier;               // __identifier
0171   IdentifierInfo *Ident__VA_ARGS__;                // __VA_ARGS__
0172   IdentifierInfo *Ident__VA_OPT__;                 // __VA_OPT__
0173   IdentifierInfo *Ident__has_feature;              // __has_feature
0174   IdentifierInfo *Ident__has_extension;            // __has_extension
0175   IdentifierInfo *Ident__has_builtin;              // __has_builtin
0176   IdentifierInfo *Ident__has_constexpr_builtin;    // __has_constexpr_builtin
0177   IdentifierInfo *Ident__has_attribute;            // __has_attribute
0178   IdentifierInfo *Ident__has_embed;                // __has_embed
0179   IdentifierInfo *Ident__has_include;              // __has_include
0180   IdentifierInfo *Ident__has_include_next;         // __has_include_next
0181   IdentifierInfo *Ident__has_warning;              // __has_warning
0182   IdentifierInfo *Ident__is_identifier;            // __is_identifier
0183   IdentifierInfo *Ident__building_module;          // __building_module
0184   IdentifierInfo *Ident__MODULE__;                 // __MODULE__
0185   IdentifierInfo *Ident__has_cpp_attribute;        // __has_cpp_attribute
0186   IdentifierInfo *Ident__has_c_attribute;          // __has_c_attribute
0187   IdentifierInfo *Ident__has_declspec;             // __has_declspec_attribute
0188   IdentifierInfo *Ident__is_target_arch;           // __is_target_arch
0189   IdentifierInfo *Ident__is_target_vendor;         // __is_target_vendor
0190   IdentifierInfo *Ident__is_target_os;             // __is_target_os
0191   IdentifierInfo *Ident__is_target_environment;    // __is_target_environment
0192   IdentifierInfo *Ident__is_target_variant_os;
0193   IdentifierInfo *Ident__is_target_variant_environment;
0194   IdentifierInfo *Ident__FLT_EVAL_METHOD__;        // __FLT_EVAL_METHOD
0195 
0196   // Weak, only valid (and set) while InMacroArgs is true.
0197   Token* ArgMacro;
0198 
0199   SourceLocation DATELoc, TIMELoc;
0200 
0201   // FEM_UnsetOnCommandLine means that an explicit evaluation method was
0202   // not specified on the command line. The target is queried to set the
0203   // default evaluation method.
0204   LangOptions::FPEvalMethodKind CurrentFPEvalMethod =
0205       LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine;
0206 
0207   // The most recent pragma location where the floating point evaluation
0208   // method was modified. This is used to determine whether the
0209   // 'pragma clang fp eval_method' was used whithin the current scope.
0210   SourceLocation LastFPEvalPragmaLocation;
0211 
0212   LangOptions::FPEvalMethodKind TUFPEvalMethod =
0213       LangOptions::FPEvalMethodKind::FEM_UnsetOnCommandLine;
0214 
0215   // Next __COUNTER__ value, starts at 0.
0216   unsigned CounterValue = 0;
0217 
0218   enum {
0219     /// Maximum depth of \#includes.
0220     MaxAllowedIncludeStackDepth = 200
0221   };
0222 
0223   // State that is set before the preprocessor begins.
0224   bool KeepComments : 1;
0225   bool KeepMacroComments : 1;
0226   bool SuppressIncludeNotFoundError : 1;
0227 
0228   // State that changes while the preprocessor runs:
0229   bool InMacroArgs : 1;            // True if parsing fn macro invocation args.
0230 
0231   /// Whether the preprocessor owns the header search object.
0232   bool OwnsHeaderSearch : 1;
0233 
0234   /// True if macro expansion is disabled.
0235   bool DisableMacroExpansion : 1;
0236 
0237   /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
0238   /// when parsing preprocessor directives.
0239   bool MacroExpansionInDirectivesOverride : 1;
0240 
0241   class ResetMacroExpansionHelper;
0242 
0243   /// Whether we have already loaded macros from the external source.
0244   mutable bool ReadMacrosFromExternalSource : 1;
0245 
0246   /// True if pragmas are enabled.
0247   bool PragmasEnabled : 1;
0248 
0249   /// True if the current build action is a preprocessing action.
0250   bool PreprocessedOutput : 1;
0251 
0252   /// True if we are currently preprocessing a #if or #elif directive
0253   bool ParsingIfOrElifDirective;
0254 
0255   /// True if we are pre-expanding macro arguments.
0256   bool InMacroArgPreExpansion;
0257 
0258   /// Mapping/lookup information for all identifiers in
0259   /// the program, including program keywords.
0260   mutable IdentifierTable Identifiers;
0261 
0262   /// This table contains all the selectors in the program.
0263   ///
0264   /// Unlike IdentifierTable above, this table *isn't* populated by the
0265   /// preprocessor. It is declared/expanded here because its role/lifetime is
0266   /// conceptually similar to the IdentifierTable. In addition, the current
0267   /// control flow (in clang::ParseAST()), make it convenient to put here.
0268   ///
0269   /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
0270   /// the lifetime of the preprocessor.
0271   SelectorTable Selectors;
0272 
0273   /// Information about builtins.
0274   std::unique_ptr<Builtin::Context> BuiltinInfo;
0275 
0276   /// Tracks all of the pragmas that the client registered
0277   /// with this preprocessor.
0278   std::unique_ptr<PragmaNamespace> PragmaHandlers;
0279 
0280   /// Pragma handlers of the original source is stored here during the
0281   /// parsing of a model file.
0282   std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
0283 
0284   /// Tracks all of the comment handlers that the client registered
0285   /// with this preprocessor.
0286   std::vector<CommentHandler *> CommentHandlers;
0287 
0288   /// Empty line handler.
0289   EmptylineHandler *Emptyline = nullptr;
0290 
0291   /// True to avoid tearing down the lexer etc on EOF
0292   bool IncrementalProcessing = false;
0293 
0294 public:
0295   /// The kind of translation unit we are processing.
0296   const TranslationUnitKind TUKind;
0297 
0298   /// Returns a pointer into the given file's buffer that's guaranteed
0299   /// to be between tokens. The returned pointer is always before \p Start.
0300   /// The maximum distance betweenthe returned pointer and \p Start is
0301   /// limited by a constant value, but also an implementation detail.
0302   /// If no such check point exists, \c nullptr is returned.
0303   const char *getCheckPoint(FileID FID, const char *Start) const;
0304 
0305 private:
0306   /// The code-completion handler.
0307   CodeCompletionHandler *CodeComplete = nullptr;
0308 
0309   /// The file that we're performing code-completion for, if any.
0310   const FileEntry *CodeCompletionFile = nullptr;
0311 
0312   /// The offset in file for the code-completion point.
0313   unsigned CodeCompletionOffset = 0;
0314 
0315   /// The location for the code-completion point. This gets instantiated
0316   /// when the CodeCompletionFile gets \#include'ed for preprocessing.
0317   SourceLocation CodeCompletionLoc;
0318 
0319   /// The start location for the file of the code-completion point.
0320   ///
0321   /// This gets instantiated when the CodeCompletionFile gets \#include'ed
0322   /// for preprocessing.
0323   SourceLocation CodeCompletionFileLoc;
0324 
0325   /// The source location of the \c import contextual keyword we just
0326   /// lexed, if any.
0327   SourceLocation ModuleImportLoc;
0328 
0329   /// The import path for named module that we're currently processing.
0330   SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> NamedModuleImportPath;
0331 
0332   llvm::DenseMap<FileID, SmallVector<const char *>> CheckPoints;
0333   unsigned CheckPointCounter = 0;
0334 
0335   /// Whether the import is an `@import` or a standard c++ modules import.
0336   bool IsAtImport = false;
0337 
0338   /// Whether the last token we lexed was an '@'.
0339   bool LastTokenWasAt = false;
0340 
0341   /// A position within a C++20 import-seq.
0342   class StdCXXImportSeq {
0343   public:
0344     enum State : int {
0345       // Positive values represent a number of unclosed brackets.
0346       AtTopLevel = 0,
0347       AfterTopLevelTokenSeq = -1,
0348       AfterExport = -2,
0349       AfterImportSeq = -3,
0350     };
0351 
0352     StdCXXImportSeq(State S) : S(S) {}
0353 
0354     /// Saw any kind of open bracket.
0355     void handleOpenBracket() {
0356       S = static_cast<State>(std::max<int>(S, 0) + 1);
0357     }
0358     /// Saw any kind of close bracket other than '}'.
0359     void handleCloseBracket() {
0360       S = static_cast<State>(std::max<int>(S, 1) - 1);
0361     }
0362     /// Saw a close brace.
0363     void handleCloseBrace() {
0364       handleCloseBracket();
0365       if (S == AtTopLevel && !AfterHeaderName)
0366         S = AfterTopLevelTokenSeq;
0367     }
0368     /// Saw a semicolon.
0369     void handleSemi() {
0370       if (atTopLevel()) {
0371         S = AfterTopLevelTokenSeq;
0372         AfterHeaderName = false;
0373       }
0374     }
0375 
0376     /// Saw an 'export' identifier.
0377     void handleExport() {
0378       if (S == AfterTopLevelTokenSeq)
0379         S = AfterExport;
0380       else if (S <= 0)
0381         S = AtTopLevel;
0382     }
0383     /// Saw an 'import' identifier.
0384     void handleImport() {
0385       if (S == AfterTopLevelTokenSeq || S == AfterExport)
0386         S = AfterImportSeq;
0387       else if (S <= 0)
0388         S = AtTopLevel;
0389     }
0390 
0391     /// Saw a 'header-name' token; do not recognize any more 'import' tokens
0392     /// until we reach a top-level semicolon.
0393     void handleHeaderName() {
0394       if (S == AfterImportSeq)
0395         AfterHeaderName = true;
0396       handleMisc();
0397     }
0398 
0399     /// Saw any other token.
0400     void handleMisc() {
0401       if (S <= 0)
0402         S = AtTopLevel;
0403     }
0404 
0405     bool atTopLevel() { return S <= 0; }
0406     bool afterImportSeq() { return S == AfterImportSeq; }
0407     bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; }
0408 
0409   private:
0410     State S;
0411     /// Whether we're in the pp-import-suffix following the header-name in a
0412     /// pp-import. If so, a close-brace is not sufficient to end the
0413     /// top-level-token-seq of an import-seq.
0414     bool AfterHeaderName = false;
0415   };
0416 
0417   /// Our current position within a C++20 import-seq.
0418   StdCXXImportSeq StdCXXImportSeqState = StdCXXImportSeq::AfterTopLevelTokenSeq;
0419 
0420   /// Track whether we are in a Global Module Fragment
0421   class TrackGMF {
0422   public:
0423     enum GMFState : int {
0424       GMFActive = 1,
0425       MaybeGMF = 0,
0426       BeforeGMFIntroducer = -1,
0427       GMFAbsentOrEnded = -2,
0428     };
0429 
0430     TrackGMF(GMFState S) : S(S) {}
0431 
0432     /// Saw a semicolon.
0433     void handleSemi() {
0434       // If it is immediately after the first instance of the module keyword,
0435       // then that introduces the GMF.
0436       if (S == MaybeGMF)
0437         S = GMFActive;
0438     }
0439 
0440     /// Saw an 'export' identifier.
0441     void handleExport() {
0442       // The presence of an 'export' keyword always ends or excludes a GMF.
0443       S = GMFAbsentOrEnded;
0444     }
0445 
0446     /// Saw an 'import' identifier.
0447     void handleImport(bool AfterTopLevelTokenSeq) {
0448       // If we see this before any 'module' kw, then we have no GMF.
0449       if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
0450         S = GMFAbsentOrEnded;
0451     }
0452 
0453     /// Saw a 'module' identifier.
0454     void handleModule(bool AfterTopLevelTokenSeq) {
0455       // This was the first module identifier and not preceded by any token
0456       // that would exclude a GMF.  It could begin a GMF, but only if directly
0457       // followed by a semicolon.
0458       if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
0459         S = MaybeGMF;
0460       else
0461         S = GMFAbsentOrEnded;
0462     }
0463 
0464     /// Saw any other token.
0465     void handleMisc() {
0466       // We saw something other than ; after the 'module' kw, so not a GMF.
0467       if (S == MaybeGMF)
0468         S = GMFAbsentOrEnded;
0469     }
0470 
0471     bool inGMF() { return S == GMFActive; }
0472 
0473   private:
0474     /// Track the transitions into and out of a Global Module Fragment,
0475     /// if one is present.
0476     GMFState S;
0477   };
0478 
0479   TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer;
0480 
0481   /// Track the status of the c++20 module decl.
0482   ///
0483   ///   module-declaration:
0484   ///     'export'[opt] 'module' module-name module-partition[opt]
0485   ///     attribute-specifier-seq[opt] ';'
0486   ///
0487   ///   module-name:
0488   ///     module-name-qualifier[opt] identifier
0489   ///
0490   ///   module-partition:
0491   ///     ':' module-name-qualifier[opt] identifier
0492   ///
0493   ///   module-name-qualifier:
0494   ///     identifier '.'
0495   ///     module-name-qualifier identifier '.'
0496   ///
0497   /// Transition state:
0498   ///
0499   ///   NotAModuleDecl --- export ---> FoundExport
0500   ///   NotAModuleDecl --- module ---> ImplementationCandidate
0501   ///   FoundExport --- module ---> InterfaceCandidate
0502   ///   ImplementationCandidate --- Identifier ---> ImplementationCandidate
0503   ///   ImplementationCandidate --- period ---> ImplementationCandidate
0504   ///   ImplementationCandidate --- colon ---> ImplementationCandidate
0505   ///   InterfaceCandidate --- Identifier ---> InterfaceCandidate
0506   ///   InterfaceCandidate --- period ---> InterfaceCandidate
0507   ///   InterfaceCandidate --- colon ---> InterfaceCandidate
0508   ///   ImplementationCandidate --- Semi ---> NamedModuleImplementation
0509   ///   NamedModuleInterface --- Semi ---> NamedModuleInterface
0510   ///   NamedModuleImplementation --- Anything ---> NamedModuleImplementation
0511   ///   NamedModuleInterface --- Anything ---> NamedModuleInterface
0512   ///
0513   /// FIXME: We haven't handle attribute-specifier-seq here. It may not be bad
0514   /// soon since we don't support any module attributes yet.
0515   class ModuleDeclSeq {
0516     enum ModuleDeclState : int {
0517       NotAModuleDecl,
0518       FoundExport,
0519       InterfaceCandidate,
0520       ImplementationCandidate,
0521       NamedModuleInterface,
0522       NamedModuleImplementation,
0523     };
0524 
0525   public:
0526     ModuleDeclSeq() = default;
0527 
0528     void handleExport() {
0529       if (State == NotAModuleDecl)
0530         State = FoundExport;
0531       else if (!isNamedModule())
0532         reset();
0533     }
0534 
0535     void handleModule() {
0536       if (State == FoundExport)
0537         State = InterfaceCandidate;
0538       else if (State == NotAModuleDecl)
0539         State = ImplementationCandidate;
0540       else if (!isNamedModule())
0541         reset();
0542     }
0543 
0544     void handleIdentifier(IdentifierInfo *Identifier) {
0545       if (isModuleCandidate() && Identifier)
0546         Name += Identifier->getName().str();
0547       else if (!isNamedModule())
0548         reset();
0549     }
0550 
0551     void handleColon() {
0552       if (isModuleCandidate())
0553         Name += ":";
0554       else if (!isNamedModule())
0555         reset();
0556     }
0557 
0558     void handlePeriod() {
0559       if (isModuleCandidate())
0560         Name += ".";
0561       else if (!isNamedModule())
0562         reset();
0563     }
0564 
0565     void handleSemi() {
0566       if (!Name.empty() && isModuleCandidate()) {
0567         if (State == InterfaceCandidate)
0568           State = NamedModuleInterface;
0569         else if (State == ImplementationCandidate)
0570           State = NamedModuleImplementation;
0571         else
0572           llvm_unreachable("Unimaged ModuleDeclState.");
0573       } else if (!isNamedModule())
0574         reset();
0575     }
0576 
0577     void handleMisc() {
0578       if (!isNamedModule())
0579         reset();
0580     }
0581 
0582     bool isModuleCandidate() const {
0583       return State == InterfaceCandidate || State == ImplementationCandidate;
0584     }
0585 
0586     bool isNamedModule() const {
0587       return State == NamedModuleInterface ||
0588              State == NamedModuleImplementation;
0589     }
0590 
0591     bool isNamedInterface() const { return State == NamedModuleInterface; }
0592 
0593     bool isImplementationUnit() const {
0594       return State == NamedModuleImplementation && !getName().contains(':');
0595     }
0596 
0597     StringRef getName() const {
0598       assert(isNamedModule() && "Can't get name from a non named module");
0599       return Name;
0600     }
0601 
0602     StringRef getPrimaryName() const {
0603       assert(isNamedModule() && "Can't get name from a non named module");
0604       return getName().split(':').first;
0605     }
0606 
0607     void reset() {
0608       Name.clear();
0609       State = NotAModuleDecl;
0610     }
0611 
0612   private:
0613     ModuleDeclState State = NotAModuleDecl;
0614     std::string Name;
0615   };
0616 
0617   ModuleDeclSeq ModuleDeclState;
0618 
0619   /// Whether the module import expects an identifier next. Otherwise,
0620   /// it expects a '.' or ';'.
0621   bool ModuleImportExpectsIdentifier = false;
0622 
0623   /// The identifier and source location of the currently-active
0624   /// \#pragma clang arc_cf_code_audited begin.
0625   std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo;
0626 
0627   /// The source location of the currently-active
0628   /// \#pragma clang assume_nonnull begin.
0629   SourceLocation PragmaAssumeNonNullLoc;
0630 
0631   /// Set only for preambles which end with an active
0632   /// \#pragma clang assume_nonnull begin.
0633   ///
0634   /// When the preamble is loaded into the main file,
0635   /// `PragmaAssumeNonNullLoc` will be set to this to
0636   /// replay the unterminated assume_nonnull.
0637   SourceLocation PreambleRecordedPragmaAssumeNonNullLoc;
0638 
0639   /// True if we hit the code-completion point.
0640   bool CodeCompletionReached = false;
0641 
0642   /// The code completion token containing the information
0643   /// on the stem that is to be code completed.
0644   IdentifierInfo *CodeCompletionII = nullptr;
0645 
0646   /// Range for the code completion token.
0647   SourceRange CodeCompletionTokenRange;
0648 
0649   /// The directory that the main file should be considered to occupy,
0650   /// if it does not correspond to a real file (as happens when building a
0651   /// module).
0652   OptionalDirectoryEntryRef MainFileDir;
0653 
0654   /// The number of bytes that we will initially skip when entering the
0655   /// main file, along with a flag that indicates whether skipping this number
0656   /// of bytes will place the lexer at the start of a line.
0657   ///
0658   /// This is used when loading a precompiled preamble.
0659   std::pair<int, bool> SkipMainFilePreamble;
0660 
0661   /// Whether we hit an error due to reaching max allowed include depth. Allows
0662   /// to avoid hitting the same error over and over again.
0663   bool HasReachedMaxIncludeDepth = false;
0664 
0665   /// The number of currently-active calls to Lex.
0666   ///
0667   /// Lex is reentrant, and asking for an (end-of-phase-4) token can often
0668   /// require asking for multiple additional tokens. This counter makes it
0669   /// possible for Lex to detect whether it's producing a token for the end
0670   /// of phase 4 of translation or for some other situation.
0671   unsigned LexLevel = 0;
0672 
0673   /// The number of (LexLevel 0) preprocessor tokens.
0674   unsigned TokenCount = 0;
0675 
0676   /// Preprocess every token regardless of LexLevel.
0677   bool PreprocessToken = false;
0678 
0679   /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens
0680   /// warning, or zero for unlimited.
0681   unsigned MaxTokens = 0;
0682   SourceLocation MaxTokensOverrideLoc;
0683 
0684 public:
0685   struct PreambleSkipInfo {
0686     SourceLocation HashTokenLoc;
0687     SourceLocation IfTokenLoc;
0688     bool FoundNonSkipPortion;
0689     bool FoundElse;
0690     SourceLocation ElseLoc;
0691 
0692     PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc,
0693                      bool FoundNonSkipPortion, bool FoundElse,
0694                      SourceLocation ElseLoc)
0695         : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc),
0696           FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse),
0697           ElseLoc(ElseLoc) {}
0698   };
0699 
0700   using IncludedFilesSet = llvm::DenseSet<const FileEntry *>;
0701 
0702 private:
0703   friend class ASTReader;
0704   friend class MacroArgs;
0705 
0706   class PreambleConditionalStackStore {
0707     enum State {
0708       Off = 0,
0709       Recording = 1,
0710       Replaying = 2,
0711     };
0712 
0713   public:
0714     PreambleConditionalStackStore() = default;
0715 
0716     void startRecording() { ConditionalStackState = Recording; }
0717     void startReplaying() { ConditionalStackState = Replaying; }
0718     bool isRecording() const { return ConditionalStackState == Recording; }
0719     bool isReplaying() const { return ConditionalStackState == Replaying; }
0720 
0721     ArrayRef<PPConditionalInfo> getStack() const {
0722       return ConditionalStack;
0723     }
0724 
0725     void doneReplaying() {
0726       ConditionalStack.clear();
0727       ConditionalStackState = Off;
0728     }
0729 
0730     void setStack(ArrayRef<PPConditionalInfo> s) {
0731       if (!isRecording() && !isReplaying())
0732         return;
0733       ConditionalStack.clear();
0734       ConditionalStack.append(s.begin(), s.end());
0735     }
0736 
0737     bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
0738 
0739     bool reachedEOFWhileSkipping() const { return SkipInfo.has_value(); }
0740 
0741     void clearSkipInfo() { SkipInfo.reset(); }
0742 
0743     std::optional<PreambleSkipInfo> SkipInfo;
0744 
0745   private:
0746     SmallVector<PPConditionalInfo, 4> ConditionalStack;
0747     State ConditionalStackState = Off;
0748   } PreambleConditionalStack;
0749 
0750   /// The current top of the stack that we're lexing from if
0751   /// not expanding a macro and we are lexing directly from source code.
0752   ///
0753   /// Only one of CurLexer, or CurTokenLexer will be non-null.
0754   std::unique_ptr<Lexer> CurLexer;
0755 
0756   /// The current top of the stack that we're lexing from
0757   /// if not expanding a macro.
0758   ///
0759   /// This is an alias for CurLexer.
0760   PreprocessorLexer *CurPPLexer = nullptr;
0761 
0762   /// Used to find the current FileEntry, if CurLexer is non-null
0763   /// and if applicable.
0764   ///
0765   /// This allows us to implement \#include_next and find directory-specific
0766   /// properties.
0767   ConstSearchDirIterator CurDirLookup = nullptr;
0768 
0769   /// The current macro we are expanding, if we are expanding a macro.
0770   ///
0771   /// One of CurLexer and CurTokenLexer must be null.
0772   std::unique_ptr<TokenLexer> CurTokenLexer;
0773 
0774   /// The kind of lexer we're currently working with.
0775   typedef bool (*LexerCallback)(Preprocessor &, Token &);
0776   LexerCallback CurLexerCallback = &CLK_Lexer;
0777 
0778   /// If the current lexer is for a submodule that is being built, this
0779   /// is that submodule.
0780   Module *CurLexerSubmodule = nullptr;
0781 
0782   /// Keeps track of the stack of files currently
0783   /// \#included, and macros currently being expanded from, not counting
0784   /// CurLexer/CurTokenLexer.
0785   struct IncludeStackInfo {
0786     LexerCallback               CurLexerCallback;
0787     Module                     *TheSubmodule;
0788     std::unique_ptr<Lexer>      TheLexer;
0789     PreprocessorLexer          *ThePPLexer;
0790     std::unique_ptr<TokenLexer> TheTokenLexer;
0791     ConstSearchDirIterator      TheDirLookup;
0792 
0793     // The following constructors are completely useless copies of the default
0794     // versions, only needed to pacify MSVC.
0795     IncludeStackInfo(LexerCallback CurLexerCallback, Module *TheSubmodule,
0796                      std::unique_ptr<Lexer> &&TheLexer,
0797                      PreprocessorLexer *ThePPLexer,
0798                      std::unique_ptr<TokenLexer> &&TheTokenLexer,
0799                      ConstSearchDirIterator TheDirLookup)
0800         : CurLexerCallback(std::move(CurLexerCallback)),
0801           TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
0802           ThePPLexer(std::move(ThePPLexer)),
0803           TheTokenLexer(std::move(TheTokenLexer)),
0804           TheDirLookup(std::move(TheDirLookup)) {}
0805   };
0806   std::vector<IncludeStackInfo> IncludeMacroStack;
0807 
0808   /// Actions invoked when some preprocessor activity is
0809   /// encountered (e.g. a file is \#included, etc).
0810   std::unique_ptr<PPCallbacks> Callbacks;
0811 
0812   struct MacroExpandsInfo {
0813     Token Tok;
0814     MacroDefinition MD;
0815     SourceRange Range;
0816 
0817     MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
0818         : Tok(Tok), MD(MD), Range(Range) {}
0819   };
0820   SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
0821 
0822   /// Information about a name that has been used to define a module macro.
0823   struct ModuleMacroInfo {
0824     /// The most recent macro directive for this identifier.
0825     MacroDirective *MD;
0826 
0827     /// The active module macros for this identifier.
0828     llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros;
0829 
0830     /// The generation number at which we last updated ActiveModuleMacros.
0831     /// \see Preprocessor::VisibleModules.
0832     unsigned ActiveModuleMacrosGeneration = 0;
0833 
0834     /// Whether this macro name is ambiguous.
0835     bool IsAmbiguous = false;
0836 
0837     /// The module macros that are overridden by this macro.
0838     llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros;
0839 
0840     ModuleMacroInfo(MacroDirective *MD) : MD(MD) {}
0841   };
0842 
0843   /// The state of a macro for an identifier.
0844   class MacroState {
0845     mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
0846 
0847     ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
0848                                    const IdentifierInfo *II) const {
0849       if (II->isOutOfDate())
0850         PP.updateOutOfDateIdentifier(*II);
0851       // FIXME: Find a spare bit on IdentifierInfo and store a
0852       //        HasModuleMacros flag.
0853       if (!II->hasMacroDefinition() ||
0854           (!PP.getLangOpts().Modules &&
0855            !PP.getLangOpts().ModulesLocalVisibility) ||
0856           !PP.CurSubmoduleState->VisibleModules.getGeneration())
0857         return nullptr;
0858 
0859       auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State);
0860       if (!Info) {
0861         Info = new (PP.getPreprocessorAllocator())
0862             ModuleMacroInfo(cast<MacroDirective *>(State));
0863         State = Info;
0864       }
0865 
0866       if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
0867           Info->ActiveModuleMacrosGeneration)
0868         PP.updateModuleMacroInfo(II, *Info);
0869       return Info;
0870     }
0871 
0872   public:
0873     MacroState() : MacroState(nullptr) {}
0874     MacroState(MacroDirective *MD) : State(MD) {}
0875 
0876     MacroState(MacroState &&O) noexcept : State(O.State) {
0877       O.State = (MacroDirective *)nullptr;
0878     }
0879 
0880     MacroState &operator=(MacroState &&O) noexcept {
0881       auto S = O.State;
0882       O.State = (MacroDirective *)nullptr;
0883       State = S;
0884       return *this;
0885     }
0886 
0887     ~MacroState() {
0888       if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State))
0889         Info->~ModuleMacroInfo();
0890     }
0891 
0892     MacroDirective *getLatest() const {
0893       if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State))
0894         return Info->MD;
0895       return cast<MacroDirective *>(State);
0896     }
0897 
0898     void setLatest(MacroDirective *MD) {
0899       if (auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State))
0900         Info->MD = MD;
0901       else
0902         State = MD;
0903     }
0904 
0905     bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
0906       auto *Info = getModuleInfo(PP, II);
0907       return Info ? Info->IsAmbiguous : false;
0908     }
0909 
0910     ArrayRef<ModuleMacro *>
0911     getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
0912       if (auto *Info = getModuleInfo(PP, II))
0913         return Info->ActiveModuleMacros;
0914       return {};
0915     }
0916 
0917     MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
0918                                                SourceManager &SourceMgr) const {
0919       // FIXME: Incorporate module macros into the result of this.
0920       if (auto *Latest = getLatest())
0921         return Latest->findDirectiveAtLoc(Loc, SourceMgr);
0922       return {};
0923     }
0924 
0925     void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
0926       if (auto *Info = getModuleInfo(PP, II)) {
0927         Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
0928                                       Info->ActiveModuleMacros.begin(),
0929                                       Info->ActiveModuleMacros.end());
0930         Info->ActiveModuleMacros.clear();
0931         Info->IsAmbiguous = false;
0932       }
0933     }
0934 
0935     ArrayRef<ModuleMacro*> getOverriddenMacros() const {
0936       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
0937         return Info->OverriddenMacros;
0938       return {};
0939     }
0940 
0941     void setOverriddenMacros(Preprocessor &PP,
0942                              ArrayRef<ModuleMacro *> Overrides) {
0943       auto *Info = dyn_cast_if_present<ModuleMacroInfo *>(State);
0944       if (!Info) {
0945         if (Overrides.empty())
0946           return;
0947         Info = new (PP.getPreprocessorAllocator())
0948             ModuleMacroInfo(cast<MacroDirective *>(State));
0949         State = Info;
0950       }
0951       Info->OverriddenMacros.clear();
0952       Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
0953                                     Overrides.begin(), Overrides.end());
0954       Info->ActiveModuleMacrosGeneration = 0;
0955     }
0956   };
0957 
0958   /// For each IdentifierInfo that was associated with a macro, we
0959   /// keep a mapping to the history of all macro definitions and #undefs in
0960   /// the reverse order (the latest one is in the head of the list).
0961   ///
0962   /// This mapping lives within the \p CurSubmoduleState.
0963   using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>;
0964 
0965   struct SubmoduleState;
0966 
0967   /// Information about a submodule that we're currently building.
0968   struct BuildingSubmoduleInfo {
0969     /// The module that we are building.
0970     Module *M;
0971 
0972     /// The location at which the module was included.
0973     SourceLocation ImportLoc;
0974 
0975     /// Whether we entered this submodule via a pragma.
0976     bool IsPragma;
0977 
0978     /// The previous SubmoduleState.
0979     SubmoduleState *OuterSubmoduleState;
0980 
0981     /// The number of pending module macro names when we started building this.
0982     unsigned OuterPendingModuleMacroNames;
0983 
0984     BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
0985                           SubmoduleState *OuterSubmoduleState,
0986                           unsigned OuterPendingModuleMacroNames)
0987         : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
0988           OuterSubmoduleState(OuterSubmoduleState),
0989           OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
0990   };
0991   SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
0992 
0993   /// Information about a submodule's preprocessor state.
0994   struct SubmoduleState {
0995     /// The macros for the submodule.
0996     MacroMap Macros;
0997 
0998     /// The set of modules that are visible within the submodule.
0999     VisibleModuleSet VisibleModules;
1000 
1001     // FIXME: CounterValue?
1002     // FIXME: PragmaPushMacroInfo?
1003   };
1004   std::map<Module *, SubmoduleState> Submodules;
1005 
1006   /// The preprocessor state for preprocessing outside of any submodule.
1007   SubmoduleState NullSubmoduleState;
1008 
1009   /// The current submodule state. Will be \p NullSubmoduleState if we're not
1010   /// in a submodule.
1011   SubmoduleState *CurSubmoduleState;
1012 
1013   /// The files that have been included.
1014   IncludedFilesSet IncludedFiles;
1015 
1016   /// The set of top-level modules that affected preprocessing, but were not
1017   /// imported.
1018   llvm::SmallSetVector<Module *, 2> AffectingClangModules;
1019 
1020   /// The set of known macros exported from modules.
1021   llvm::FoldingSet<ModuleMacro> ModuleMacros;
1022 
1023   /// The names of potential module macros that we've not yet processed.
1024   llvm::SmallVector<IdentifierInfo *, 32> PendingModuleMacroNames;
1025 
1026   /// The list of module macros, for each identifier, that are not overridden by
1027   /// any other module macro.
1028   llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>>
1029       LeafModuleMacros;
1030 
1031   /// Macros that we want to warn because they are not used at the end
1032   /// of the translation unit.
1033   ///
1034   /// We store just their SourceLocations instead of
1035   /// something like MacroInfo*. The benefit of this is that when we are
1036   /// deserializing from PCH, we don't need to deserialize identifier & macros
1037   /// just so that we can report that they are unused, we just warn using
1038   /// the SourceLocations of this set (that will be filled by the ASTReader).
1039   using WarnUnusedMacroLocsTy = llvm::SmallDenseSet<SourceLocation, 32>;
1040   WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
1041 
1042   /// This is a pair of an optional message and source location used for pragmas
1043   /// that annotate macros like pragma clang restrict_expansion and pragma clang
1044   /// deprecated. This pair stores the optional message and the location of the
1045   /// annotation pragma for use producing diagnostics and notes.
1046   using MsgLocationPair = std::pair<std::string, SourceLocation>;
1047 
1048   struct MacroAnnotationInfo {
1049     SourceLocation Location;
1050     std::string Message;
1051   };
1052 
1053   struct MacroAnnotations {
1054     std::optional<MacroAnnotationInfo> DeprecationInfo;
1055     std::optional<MacroAnnotationInfo> RestrictExpansionInfo;
1056     std::optional<SourceLocation> FinalAnnotationLoc;
1057   };
1058 
1059   /// Warning information for macro annotations.
1060   llvm::DenseMap<const IdentifierInfo *, MacroAnnotations> AnnotationInfos;
1061 
1062   /// A "freelist" of MacroArg objects that can be
1063   /// reused for quick allocation.
1064   MacroArgs *MacroArgCache = nullptr;
1065 
1066   /// For each IdentifierInfo used in a \#pragma push_macro directive,
1067   /// we keep a MacroInfo stack used to restore the previous macro value.
1068   llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>
1069       PragmaPushMacroInfo;
1070 
1071   // Various statistics we track for performance analysis.
1072   unsigned NumDirectives = 0;
1073   unsigned NumDefined = 0;
1074   unsigned NumUndefined = 0;
1075   unsigned NumPragma = 0;
1076   unsigned NumIf = 0;
1077   unsigned NumElse = 0;
1078   unsigned NumEndif = 0;
1079   unsigned NumEnteredSourceFiles = 0;
1080   unsigned MaxIncludeStackDepth = 0;
1081   unsigned NumMacroExpanded = 0;
1082   unsigned NumFnMacroExpanded = 0;
1083   unsigned NumBuiltinMacroExpanded = 0;
1084   unsigned NumFastMacroExpanded = 0;
1085   unsigned NumTokenPaste = 0;
1086   unsigned NumFastTokenPaste = 0;
1087   unsigned NumSkipped = 0;
1088 
1089   /// The predefined macros that preprocessor should use from the
1090   /// command line etc.
1091   std::string Predefines;
1092 
1093   /// The file ID for the preprocessor predefines.
1094   FileID PredefinesFileID;
1095 
1096   /// The file ID for the PCH through header.
1097   FileID PCHThroughHeaderFileID;
1098 
1099   /// Whether tokens are being skipped until a #pragma hdrstop is seen.
1100   bool SkippingUntilPragmaHdrStop = false;
1101 
1102   /// Whether tokens are being skipped until the through header is seen.
1103   bool SkippingUntilPCHThroughHeader = false;
1104 
1105   /// \{
1106   /// Cache of macro expanders to reduce malloc traffic.
1107   enum { TokenLexerCacheSize = 8 };
1108   unsigned NumCachedTokenLexers;
1109   std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
1110   /// \}
1111 
1112   /// Keeps macro expanded tokens for TokenLexers.
1113   //
1114   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
1115   /// going to lex in the cache and when it finishes the tokens are removed
1116   /// from the end of the cache.
1117   SmallVector<Token, 16> MacroExpandedTokens;
1118   std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack;
1119 
1120   /// A record of the macro definitions and expansions that
1121   /// occurred during preprocessing.
1122   ///
1123   /// This is an optional side structure that can be enabled with
1124   /// \c createPreprocessingRecord() prior to preprocessing.
1125   PreprocessingRecord *Record = nullptr;
1126 
1127   /// Cached tokens state.
1128   using CachedTokensTy = SmallVector<Token, 1>;
1129 
1130   /// Cached tokens are stored here when we do backtracking or
1131   /// lookahead. They are "lexed" by the CachingLex() method.
1132   CachedTokensTy CachedTokens;
1133 
1134   /// The position of the cached token that CachingLex() should
1135   /// "lex" next.
1136   ///
1137   /// If it points beyond the CachedTokens vector, it means that a normal
1138   /// Lex() should be invoked.
1139   CachedTokensTy::size_type CachedLexPos = 0;
1140 
1141   /// Stack of backtrack positions, allowing nested backtracks.
1142   ///
1143   /// The EnableBacktrackAtThisPos() method pushes a position to
1144   /// indicate where CachedLexPos should be set when the BackTrack() method is
1145   /// invoked (at which point the last position is popped).
1146   std::vector<CachedTokensTy::size_type> BacktrackPositions;
1147 
1148   /// Stack of cached tokens/initial number of cached tokens pairs, allowing
1149   /// nested unannotated backtracks.
1150   std::vector<std::pair<CachedTokensTy, CachedTokensTy::size_type>>
1151       UnannotatedBacktrackTokens;
1152 
1153   /// True if \p Preprocessor::SkipExcludedConditionalBlock() is running.
1154   /// This is used to guard against calling this function recursively.
1155   ///
1156   /// See comments at the use-site for more context about why it is needed.
1157   bool SkippingExcludedConditionalBlock = false;
1158 
1159   /// Keeps track of skipped range mappings that were recorded while skipping
1160   /// excluded conditional directives. It maps the source buffer pointer at
1161   /// the beginning of a skipped block, to the number of bytes that should be
1162   /// skipped.
1163   llvm::DenseMap<const char *, unsigned> RecordedSkippedRanges;
1164 
1165   void updateOutOfDateIdentifier(const IdentifierInfo &II) const;
1166 
1167 public:
1168   Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
1169                DiagnosticsEngine &diags, const LangOptions &LangOpts,
1170                SourceManager &SM, HeaderSearch &Headers,
1171                ModuleLoader &TheModuleLoader,
1172                IdentifierInfoLookup *IILookup = nullptr,
1173                bool OwnsHeaderSearch = false,
1174                TranslationUnitKind TUKind = TU_Complete);
1175 
1176   ~Preprocessor();
1177 
1178   /// Initialize the preprocessor using information about the target.
1179   ///
1180   /// \param Target is owned by the caller and must remain valid for the
1181   /// lifetime of the preprocessor.
1182   /// \param AuxTarget is owned by the caller and must remain valid for
1183   /// the lifetime of the preprocessor.
1184   void Initialize(const TargetInfo &Target,
1185                   const TargetInfo *AuxTarget = nullptr);
1186 
1187   /// Initialize the preprocessor to parse a model file
1188   ///
1189   /// To parse model files the preprocessor of the original source is reused to
1190   /// preserver the identifier table. However to avoid some duplicate
1191   /// information in the preprocessor some cleanup is needed before it is used
1192   /// to parse model files. This method does that cleanup.
1193   void InitializeForModelFile();
1194 
1195   /// Cleanup after model file parsing
1196   void FinalizeForModelFile();
1197 
1198   /// Retrieve the preprocessor options used to initialize this
1199   /// preprocessor.
1200   PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
1201 
1202   DiagnosticsEngine &getDiagnostics() const { return *Diags; }
1203   void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
1204 
1205   const LangOptions &getLangOpts() const { return LangOpts; }
1206   const TargetInfo &getTargetInfo() const { return *Target; }
1207   const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
1208   FileManager &getFileManager() const { return FileMgr; }
1209   SourceManager &getSourceManager() const { return SourceMgr; }
1210   HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
1211 
1212   IdentifierTable &getIdentifierTable() { return Identifiers; }
1213   const IdentifierTable &getIdentifierTable() const { return Identifiers; }
1214   SelectorTable &getSelectorTable() { return Selectors; }
1215   Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; }
1216   llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
1217 
1218   void setExternalSource(ExternalPreprocessorSource *Source) {
1219     ExternalSource = Source;
1220   }
1221 
1222   ExternalPreprocessorSource *getExternalSource() const {
1223     return ExternalSource;
1224   }
1225 
1226   /// Retrieve the module loader associated with this preprocessor.
1227   ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
1228 
1229   bool hadModuleLoaderFatalFailure() const {
1230     return TheModuleLoader.HadFatalFailure;
1231   }
1232 
1233   /// Retrieve the number of Directives that have been processed by the
1234   /// Preprocessor.
1235   unsigned getNumDirectives() const {
1236     return NumDirectives;
1237   }
1238 
1239   /// True if we are currently preprocessing a #if or #elif directive
1240   bool isParsingIfOrElifDirective() const {
1241     return ParsingIfOrElifDirective;
1242   }
1243 
1244   /// Control whether the preprocessor retains comments in output.
1245   void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
1246     this->KeepComments = KeepComments | KeepMacroComments;
1247     this->KeepMacroComments = KeepMacroComments;
1248   }
1249 
1250   bool getCommentRetentionState() const { return KeepComments; }
1251 
1252   void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
1253   bool getPragmasEnabled() const { return PragmasEnabled; }
1254 
1255   void SetSuppressIncludeNotFoundError(bool Suppress) {
1256     SuppressIncludeNotFoundError = Suppress;
1257   }
1258 
1259   bool GetSuppressIncludeNotFoundError() {
1260     return SuppressIncludeNotFoundError;
1261   }
1262 
1263   /// Sets whether the preprocessor is responsible for producing output or if
1264   /// it is producing tokens to be consumed by Parse and Sema.
1265   void setPreprocessedOutput(bool IsPreprocessedOutput) {
1266     PreprocessedOutput = IsPreprocessedOutput;
1267   }
1268 
1269   /// Returns true if the preprocessor is responsible for generating output,
1270   /// false if it is producing tokens to be consumed by Parse and Sema.
1271   bool isPreprocessedOutput() const { return PreprocessedOutput; }
1272 
1273   /// Return true if we are lexing directly from the specified lexer.
1274   bool isCurrentLexer(const PreprocessorLexer *L) const {
1275     return CurPPLexer == L;
1276   }
1277 
1278   /// Return the current lexer being lexed from.
1279   ///
1280   /// Note that this ignores any potentially active macro expansions and _Pragma
1281   /// expansions going on at the time.
1282   PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
1283 
1284   /// Return the current file lexer being lexed from.
1285   ///
1286   /// Note that this ignores any potentially active macro expansions and _Pragma
1287   /// expansions going on at the time.
1288   PreprocessorLexer *getCurrentFileLexer() const;
1289 
1290   /// Return the submodule owning the file being lexed. This may not be
1291   /// the current module if we have changed modules since entering the file.
1292   Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }
1293 
1294   /// Returns the FileID for the preprocessor predefines.
1295   FileID getPredefinesFileID() const { return PredefinesFileID; }
1296 
1297   /// \{
1298   /// Accessors for preprocessor callbacks.
1299   ///
1300   /// Note that this class takes ownership of any PPCallbacks object given to
1301   /// it.
1302   PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
1303   void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
1304     if (Callbacks)
1305       C = std::make_unique<PPChainedCallbacks>(std::move(C),
1306                                                 std::move(Callbacks));
1307     Callbacks = std::move(C);
1308   }
1309   /// \}
1310 
1311   /// Get the number of tokens processed so far.
1312   unsigned getTokenCount() const { return TokenCount; }
1313 
1314   /// Get the max number of tokens before issuing a -Wmax-tokens warning.
1315   unsigned getMaxTokens() const { return MaxTokens; }
1316 
1317   void overrideMaxTokens(unsigned Value, SourceLocation Loc) {
1318     MaxTokens = Value;
1319     MaxTokensOverrideLoc = Loc;
1320   };
1321 
1322   SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; }
1323 
1324   /// Register a function that would be called on each token in the final
1325   /// expanded token stream.
1326   /// This also reports annotation tokens produced by the parser.
1327   void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) {
1328     OnToken = std::move(F);
1329   }
1330 
1331   void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; }
1332 
1333   bool isMacroDefined(StringRef Id) {
1334     return isMacroDefined(&Identifiers.get(Id));
1335   }
1336   bool isMacroDefined(const IdentifierInfo *II) {
1337     return II->hasMacroDefinition() &&
1338            (!getLangOpts().Modules || (bool)getMacroDefinition(II));
1339   }
1340 
1341   /// Determine whether II is defined as a macro within the module M,
1342   /// if that is a module that we've already preprocessed. Does not check for
1343   /// macros imported into M.
1344   bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) {
1345     if (!II->hasMacroDefinition())
1346       return false;
1347     auto I = Submodules.find(M);
1348     if (I == Submodules.end())
1349       return false;
1350     auto J = I->second.Macros.find(II);
1351     if (J == I->second.Macros.end())
1352       return false;
1353     auto *MD = J->second.getLatest();
1354     return MD && MD->isDefined();
1355   }
1356 
1357   MacroDefinition getMacroDefinition(const IdentifierInfo *II) {
1358     if (!II->hasMacroDefinition())
1359       return {};
1360 
1361     MacroState &S = CurSubmoduleState->Macros[II];
1362     auto *MD = S.getLatest();
1363     while (isa_and_nonnull<VisibilityMacroDirective>(MD))
1364       MD = MD->getPrevious();
1365     return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
1366                            S.getActiveModuleMacros(*this, II),
1367                            S.isAmbiguous(*this, II));
1368   }
1369 
1370   MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II,
1371                                           SourceLocation Loc) {
1372     if (!II->hadMacroDefinition())
1373       return {};
1374 
1375     MacroState &S = CurSubmoduleState->Macros[II];
1376     MacroDirective::DefInfo DI;
1377     if (auto *MD = S.getLatest())
1378       DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
1379     // FIXME: Compute the set of active module macros at the specified location.
1380     return MacroDefinition(DI.getDirective(),
1381                            S.getActiveModuleMacros(*this, II),
1382                            S.isAmbiguous(*this, II));
1383   }
1384 
1385   /// Given an identifier, return its latest non-imported MacroDirective
1386   /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
1387   MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const {
1388     if (!II->hasMacroDefinition())
1389       return nullptr;
1390 
1391     auto *MD = getLocalMacroDirectiveHistory(II);
1392     if (!MD || MD->getDefinition().isUndefined())
1393       return nullptr;
1394 
1395     return MD;
1396   }
1397 
1398   const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
1399     return const_cast<Preprocessor*>(this)->getMacroInfo(II);
1400   }
1401 
1402   MacroInfo *getMacroInfo(const IdentifierInfo *II) {
1403     if (!II->hasMacroDefinition())
1404       return nullptr;
1405     if (auto MD = getMacroDefinition(II))
1406       return MD.getMacroInfo();
1407     return nullptr;
1408   }
1409 
1410   /// Given an identifier, return the latest non-imported macro
1411   /// directive for that identifier.
1412   ///
1413   /// One can iterate over all previous macro directives from the most recent
1414   /// one.
1415   MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const;
1416 
1417   /// Add a directive to the macro directive history for this identifier.
1418   void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
1419   DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI,
1420                                              SourceLocation Loc) {
1421     DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
1422     appendMacroDirective(II, MD);
1423     return MD;
1424   }
1425   DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II,
1426                                              MacroInfo *MI) {
1427     return appendDefMacroDirective(II, MI, MI->getDefinitionLoc());
1428   }
1429 
1430   /// Set a MacroDirective that was loaded from a PCH file.
1431   void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED,
1432                                MacroDirective *MD);
1433 
1434   /// Register an exported macro for a module and identifier.
1435   ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II,
1436                               MacroInfo *Macro,
1437                               ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
1438   ModuleMacro *getModuleMacro(Module *Mod, const IdentifierInfo *II);
1439 
1440   /// Get the list of leaf (non-overridden) module macros for a name.
1441   ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const {
1442     if (II->isOutOfDate())
1443       updateOutOfDateIdentifier(*II);
1444     auto I = LeafModuleMacros.find(II);
1445     if (I != LeafModuleMacros.end())
1446       return I->second;
1447     return {};
1448   }
1449 
1450   /// Get the list of submodules that we're currently building.
1451   ArrayRef<BuildingSubmoduleInfo> getBuildingSubmodules() const {
1452     return BuildingSubmoduleStack;
1453   }
1454 
1455   /// \{
1456   /// Iterators for the macro history table. Currently defined macros have
1457   /// IdentifierInfo::hasMacroDefinition() set and an empty
1458   /// MacroInfo::getUndefLoc() at the head of the list.
1459   using macro_iterator = MacroMap::const_iterator;
1460 
1461   macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
1462   macro_iterator macro_end(bool IncludeExternalMacros = true) const;
1463 
1464   llvm::iterator_range<macro_iterator>
1465   macros(bool IncludeExternalMacros = true) const {
1466     macro_iterator begin = macro_begin(IncludeExternalMacros);
1467     macro_iterator end = macro_end(IncludeExternalMacros);
1468     return llvm::make_range(begin, end);
1469   }
1470 
1471   /// \}
1472 
1473   /// Mark the given clang module as affecting the current clang module or translation unit.
1474   void markClangModuleAsAffecting(Module *M) {
1475     assert(M->isModuleMapModule());
1476     if (!BuildingSubmoduleStack.empty()) {
1477       if (M != BuildingSubmoduleStack.back().M)
1478         BuildingSubmoduleStack.back().M->AffectingClangModules.insert(M);
1479     } else {
1480       AffectingClangModules.insert(M);
1481     }
1482   }
1483 
1484   /// Get the set of top-level clang modules that affected preprocessing, but were not
1485   /// imported.
1486   const llvm::SmallSetVector<Module *, 2> &getAffectingClangModules() const {
1487     return AffectingClangModules;
1488   }
1489 
1490   /// Mark the file as included.
1491   /// Returns true if this is the first time the file was included.
1492   bool markIncluded(FileEntryRef File) {
1493     HeaderInfo.getFileInfo(File).IsLocallyIncluded = true;
1494     return IncludedFiles.insert(File).second;
1495   }
1496 
1497   /// Return true if this header has already been included.
1498   bool alreadyIncluded(FileEntryRef File) const {
1499     HeaderInfo.getFileInfo(File);
1500     return IncludedFiles.count(File);
1501   }
1502 
1503   /// Get the set of included files.
1504   IncludedFilesSet &getIncludedFiles() { return IncludedFiles; }
1505   const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; }
1506 
1507   /// Return the name of the macro defined before \p Loc that has
1508   /// spelling \p Tokens.  If there are multiple macros with same spelling,
1509   /// return the last one defined.
1510   StringRef getLastMacroWithSpelling(SourceLocation Loc,
1511                                      ArrayRef<TokenValue> Tokens) const;
1512 
1513   /// Get the predefines for this processor.
1514   /// Used by some third-party tools to inspect and add predefines (see
1515   /// https://github.com/llvm/llvm-project/issues/57483).
1516   const std::string &getPredefines() const { return Predefines; }
1517 
1518   /// Set the predefines for this Preprocessor.
1519   ///
1520   /// These predefines are automatically injected when parsing the main file.
1521   void setPredefines(std::string P) { Predefines = std::move(P); }
1522 
1523   /// Return information about the specified preprocessor
1524   /// identifier token.
1525   IdentifierInfo *getIdentifierInfo(StringRef Name) const {
1526     return &Identifiers.get(Name);
1527   }
1528 
1529   /// Add the specified pragma handler to this preprocessor.
1530   ///
1531   /// If \p Namespace is non-null, then it is a token required to exist on the
1532   /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
1533   void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1534   void AddPragmaHandler(PragmaHandler *Handler) {
1535     AddPragmaHandler(StringRef(), Handler);
1536   }
1537 
1538   /// Remove the specific pragma handler from this preprocessor.
1539   ///
1540   /// If \p Namespace is non-null, then it should be the namespace that
1541   /// \p Handler was added to. It is an error to remove a handler that
1542   /// has not been registered.
1543   void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1544   void RemovePragmaHandler(PragmaHandler *Handler) {
1545     RemovePragmaHandler(StringRef(), Handler);
1546   }
1547 
1548   /// Install empty handlers for all pragmas (making them ignored).
1549   void IgnorePragmas();
1550 
1551   /// Set empty line handler.
1552   void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; }
1553 
1554   EmptylineHandler *getEmptylineHandler() const { return Emptyline; }
1555 
1556   /// Add the specified comment handler to the preprocessor.
1557   void addCommentHandler(CommentHandler *Handler);
1558 
1559   /// Remove the specified comment handler.
1560   ///
1561   /// It is an error to remove a handler that has not been registered.
1562   void removeCommentHandler(CommentHandler *Handler);
1563 
1564   /// Set the code completion handler to the given object.
1565   void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
1566     CodeComplete = &Handler;
1567   }
1568 
1569   /// Retrieve the current code-completion handler.
1570   CodeCompletionHandler *getCodeCompletionHandler() const {
1571     return CodeComplete;
1572   }
1573 
1574   /// Clear out the code completion handler.
1575   void clearCodeCompletionHandler() {
1576     CodeComplete = nullptr;
1577   }
1578 
1579   /// Hook used by the lexer to invoke the "included file" code
1580   /// completion point.
1581   void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled);
1582 
1583   /// Hook used by the lexer to invoke the "natural language" code
1584   /// completion point.
1585   void CodeCompleteNaturalLanguage();
1586 
1587   /// Set the code completion token for filtering purposes.
1588   void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) {
1589     CodeCompletionII = Filter;
1590   }
1591 
1592   /// Set the code completion token range for detecting replacement range later
1593   /// on.
1594   void setCodeCompletionTokenRange(const SourceLocation Start,
1595                                    const SourceLocation End) {
1596     CodeCompletionTokenRange = {Start, End};
1597   }
1598   SourceRange getCodeCompletionTokenRange() const {
1599     return CodeCompletionTokenRange;
1600   }
1601 
1602   /// Get the code completion token for filtering purposes.
1603   StringRef getCodeCompletionFilter() {
1604     if (CodeCompletionII)
1605       return CodeCompletionII->getName();
1606     return {};
1607   }
1608 
1609   /// Retrieve the preprocessing record, or NULL if there is no
1610   /// preprocessing record.
1611   PreprocessingRecord *getPreprocessingRecord() const { return Record; }
1612 
1613   /// Create a new preprocessing record, which will keep track of
1614   /// all macro expansions, macro definitions, etc.
1615   void createPreprocessingRecord();
1616 
1617   /// Returns true if the FileEntry is the PCH through header.
1618   bool isPCHThroughHeader(const FileEntry *FE);
1619 
1620   /// True if creating a PCH with a through header.
1621   bool creatingPCHWithThroughHeader();
1622 
1623   /// True if using a PCH with a through header.
1624   bool usingPCHWithThroughHeader();
1625 
1626   /// True if creating a PCH with a #pragma hdrstop.
1627   bool creatingPCHWithPragmaHdrStop();
1628 
1629   /// True if using a PCH with a #pragma hdrstop.
1630   bool usingPCHWithPragmaHdrStop();
1631 
1632   /// Skip tokens until after the #include of the through header or
1633   /// until after a #pragma hdrstop.
1634   void SkipTokensWhileUsingPCH();
1635 
1636   /// Process directives while skipping until the through header or
1637   /// #pragma hdrstop is found.
1638   void HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1639                                            SourceLocation HashLoc);
1640 
1641   /// Enter the specified FileID as the main source file,
1642   /// which implicitly adds the builtin defines etc.
1643   void EnterMainSourceFile();
1644 
1645   /// Inform the preprocessor callbacks that processing is complete.
1646   void EndSourceFile();
1647 
1648   /// Add a source file to the top of the include stack and
1649   /// start lexing tokens from it instead of the current buffer.
1650   ///
1651   /// Emits a diagnostic, doesn't enter the file, and returns true on error.
1652   bool EnterSourceFile(FileID FID, ConstSearchDirIterator Dir,
1653                        SourceLocation Loc, bool IsFirstIncludeOfFile = true);
1654 
1655   /// Add a Macro to the top of the include stack and start lexing
1656   /// tokens from it instead of the current buffer.
1657   ///
1658   /// \param Args specifies the tokens input to a function-like macro.
1659   /// \param ILEnd specifies the location of the ')' for a function-like macro
1660   /// or the identifier for an object-like macro.
1661   void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro,
1662                   MacroArgs *Args);
1663 
1664 private:
1665   /// Add a "macro" context to the top of the include stack,
1666   /// which will cause the lexer to start returning the specified tokens.
1667   ///
1668   /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
1669   /// will not be subject to further macro expansion. Otherwise, these tokens
1670   /// will be re-macro-expanded when/if expansion is enabled.
1671   ///
1672   /// If \p OwnsTokens is false, this method assumes that the specified stream
1673   /// of tokens has a permanent owner somewhere, so they do not need to be
1674   /// copied. If it is true, it assumes the array of tokens is allocated with
1675   /// \c new[] and the Preprocessor will delete[] it.
1676   ///
1677   /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag
1678   /// set, see the flag documentation for details.
1679   void EnterTokenStream(const Token *Toks, unsigned NumToks,
1680                         bool DisableMacroExpansion, bool OwnsTokens,
1681                         bool IsReinject);
1682 
1683 public:
1684   void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
1685                         bool DisableMacroExpansion, bool IsReinject) {
1686     EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true,
1687                      IsReinject);
1688   }
1689 
1690   void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion,
1691                         bool IsReinject) {
1692     EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false,
1693                      IsReinject);
1694   }
1695 
1696   /// Pop the current lexer/macro exp off the top of the lexer stack.
1697   ///
1698   /// This should only be used in situations where the current state of the
1699   /// top-of-stack lexer is known.
1700   void RemoveTopOfLexerStack();
1701 
1702   /// From the point that this method is called, and until
1703   /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
1704   /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
1705   /// make the Preprocessor re-lex the same tokens.
1706   ///
1707   /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
1708   /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
1709   /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
1710   ///
1711   /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
1712   /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
1713   /// tokens will continue indefinitely.
1714   ///
1715   /// \param Unannotated Whether token annotations are reverted upon calling
1716   /// Backtrack().
1717   void EnableBacktrackAtThisPos(bool Unannotated = false);
1718 
1719 private:
1720   std::pair<CachedTokensTy::size_type, bool> LastBacktrackPos();
1721 
1722   CachedTokensTy PopUnannotatedBacktrackTokens();
1723 
1724 public:
1725   /// Disable the last EnableBacktrackAtThisPos call.
1726   void CommitBacktrackedTokens();
1727 
1728   /// Make Preprocessor re-lex the tokens that were lexed since
1729   /// EnableBacktrackAtThisPos() was previously called.
1730   void Backtrack();
1731 
1732   /// True if EnableBacktrackAtThisPos() was called and
1733   /// caching of tokens is on.
1734   bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
1735 
1736   /// True if EnableBacktrackAtThisPos() was called and
1737   /// caching of unannotated tokens is on.
1738   bool isUnannotatedBacktrackEnabled() const {
1739     return !UnannotatedBacktrackTokens.empty();
1740   }
1741 
1742   /// Lex the next token for this preprocessor.
1743   void Lex(Token &Result);
1744 
1745   /// Lex all tokens for this preprocessor until (and excluding) end of file.
1746   void LexTokensUntilEOF(std::vector<Token> *Tokens = nullptr);
1747 
1748   /// Lex a token, forming a header-name token if possible.
1749   bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
1750 
1751   /// Lex the parameters for an #embed directive, returns nullopt on error.
1752   std::optional<LexEmbedParametersResult> LexEmbedParameters(Token &Current,
1753                                                              bool ForHasEmbed);
1754 
1755   bool LexAfterModuleImport(Token &Result);
1756   void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
1757 
1758   void makeModuleVisible(Module *M, SourceLocation Loc);
1759 
1760   SourceLocation getModuleImportLoc(Module *M) const {
1761     return CurSubmoduleState->VisibleModules.getImportLoc(M);
1762   }
1763 
1764   /// Lex a string literal, which may be the concatenation of multiple
1765   /// string literals and may even come from macro expansion.
1766   /// \returns true on success, false if a error diagnostic has been generated.
1767   bool LexStringLiteral(Token &Result, std::string &String,
1768                         const char *DiagnosticTag, bool AllowMacroExpansion) {
1769     if (AllowMacroExpansion)
1770       Lex(Result);
1771     else
1772       LexUnexpandedToken(Result);
1773     return FinishLexStringLiteral(Result, String, DiagnosticTag,
1774                                   AllowMacroExpansion);
1775   }
1776 
1777   /// Complete the lexing of a string literal where the first token has
1778   /// already been lexed (see LexStringLiteral).
1779   bool FinishLexStringLiteral(Token &Result, std::string &String,
1780                               const char *DiagnosticTag,
1781                               bool AllowMacroExpansion);
1782 
1783   /// Lex a token.  If it's a comment, keep lexing until we get
1784   /// something not a comment.
1785   ///
1786   /// This is useful in -E -C mode where comments would foul up preprocessor
1787   /// directive handling.
1788   void LexNonComment(Token &Result) {
1789     do
1790       Lex(Result);
1791     while (Result.getKind() == tok::comment);
1792   }
1793 
1794   /// Just like Lex, but disables macro expansion of identifier tokens.
1795   void LexUnexpandedToken(Token &Result) {
1796     // Disable macro expansion.
1797     bool OldVal = DisableMacroExpansion;
1798     DisableMacroExpansion = true;
1799     // Lex the token.
1800     Lex(Result);
1801 
1802     // Reenable it.
1803     DisableMacroExpansion = OldVal;
1804   }
1805 
1806   /// Like LexNonComment, but this disables macro expansion of
1807   /// identifier tokens.
1808   void LexUnexpandedNonComment(Token &Result) {
1809     do
1810       LexUnexpandedToken(Result);
1811     while (Result.getKind() == tok::comment);
1812   }
1813 
1814   /// Parses a simple integer literal to get its numeric value.  Floating
1815   /// point literals and user defined literals are rejected.  Used primarily to
1816   /// handle pragmas that accept integer arguments.
1817   bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
1818 
1819   /// Disables macro expansion everywhere except for preprocessor directives.
1820   void SetMacroExpansionOnlyInDirectives() {
1821     DisableMacroExpansion = true;
1822     MacroExpansionInDirectivesOverride = true;
1823   }
1824 
1825   /// Peeks ahead N tokens and returns that token without consuming any
1826   /// tokens.
1827   ///
1828   /// LookAhead(0) returns the next token that would be returned by Lex(),
1829   /// LookAhead(1) returns the token after it, etc.  This returns normal
1830   /// tokens after phase 5.  As such, it is equivalent to using
1831   /// 'Lex', not 'LexUnexpandedToken'.
1832   const Token &LookAhead(unsigned N) {
1833     assert(LexLevel == 0 && "cannot use lookahead while lexing");
1834     if (CachedLexPos + N < CachedTokens.size())
1835       return CachedTokens[CachedLexPos+N];
1836     else
1837       return PeekAhead(N+1);
1838   }
1839 
1840   /// When backtracking is enabled and tokens are cached,
1841   /// this allows to revert a specific number of tokens.
1842   ///
1843   /// Note that the number of tokens being reverted should be up to the last
1844   /// backtrack position, not more.
1845   void RevertCachedTokens(unsigned N) {
1846     assert(isBacktrackEnabled() &&
1847            "Should only be called when tokens are cached for backtracking");
1848     assert(signed(CachedLexPos) - signed(N) >=
1849                signed(LastBacktrackPos().first) &&
1850            "Should revert tokens up to the last backtrack position, not more");
1851     assert(signed(CachedLexPos) - signed(N) >= 0 &&
1852            "Corrupted backtrack positions ?");
1853     CachedLexPos -= N;
1854   }
1855 
1856   /// Enters a token in the token stream to be lexed next.
1857   ///
1858   /// If BackTrack() is called afterwards, the token will remain at the
1859   /// insertion point.
1860   /// If \p IsReinject is true, resulting token will have Token::IsReinjected
1861   /// flag set. See the flag documentation for details.
1862   void EnterToken(const Token &Tok, bool IsReinject) {
1863     if (LexLevel) {
1864       // It's not correct in general to enter caching lex mode while in the
1865       // middle of a nested lexing action.
1866       auto TokCopy = std::make_unique<Token[]>(1);
1867       TokCopy[0] = Tok;
1868       EnterTokenStream(std::move(TokCopy), 1, true, IsReinject);
1869     } else {
1870       EnterCachingLexMode();
1871       assert(IsReinject && "new tokens in the middle of cached stream");
1872       CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
1873     }
1874   }
1875 
1876   /// We notify the Preprocessor that if it is caching tokens (because
1877   /// backtrack is enabled) it should replace the most recent cached tokens
1878   /// with the given annotation token. This function has no effect if
1879   /// backtracking is not enabled.
1880   ///
1881   /// Note that the use of this function is just for optimization, so that the
1882   /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
1883   /// invoked.
1884   void AnnotateCachedTokens(const Token &Tok) {
1885     assert(Tok.isAnnotation() && "Expected annotation token");
1886     if (CachedLexPos != 0 && isBacktrackEnabled())
1887       AnnotatePreviousCachedTokens(Tok);
1888   }
1889 
1890   /// Get the location of the last cached token, suitable for setting the end
1891   /// location of an annotation token.
1892   SourceLocation getLastCachedTokenLocation() const {
1893     assert(CachedLexPos != 0);
1894     return CachedTokens[CachedLexPos-1].getLastLoc();
1895   }
1896 
1897   /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
1898   /// CachedTokens.
1899   bool IsPreviousCachedToken(const Token &Tok) const;
1900 
1901   /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
1902   /// in \p NewToks.
1903   ///
1904   /// Useful when a token needs to be split in smaller ones and CachedTokens
1905   /// most recent token must to be updated to reflect that.
1906   void ReplacePreviousCachedToken(ArrayRef<Token> NewToks);
1907 
1908   /// Replace the last token with an annotation token.
1909   ///
1910   /// Like AnnotateCachedTokens(), this routine replaces an
1911   /// already-parsed (and resolved) token with an annotation
1912   /// token. However, this routine only replaces the last token with
1913   /// the annotation token; it does not affect any other cached
1914   /// tokens. This function has no effect if backtracking is not
1915   /// enabled.
1916   void ReplaceLastTokenWithAnnotation(const Token &Tok) {
1917     assert(Tok.isAnnotation() && "Expected annotation token");
1918     if (CachedLexPos != 0 && isBacktrackEnabled())
1919       CachedTokens[CachedLexPos-1] = Tok;
1920   }
1921 
1922   /// Enter an annotation token into the token stream.
1923   void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind,
1924                             void *AnnotationVal);
1925 
1926   /// Determine whether it's possible for a future call to Lex to produce an
1927   /// annotation token created by a previous call to EnterAnnotationToken.
1928   bool mightHavePendingAnnotationTokens() {
1929     return CurLexerCallback != CLK_Lexer;
1930   }
1931 
1932   /// Update the current token to represent the provided
1933   /// identifier, in order to cache an action performed by typo correction.
1934   void TypoCorrectToken(const Token &Tok) {
1935     assert(Tok.getIdentifierInfo() && "Expected identifier token");
1936     if (CachedLexPos != 0 && isBacktrackEnabled())
1937       CachedTokens[CachedLexPos-1] = Tok;
1938   }
1939 
1940   /// Recompute the current lexer kind based on the CurLexer/
1941   /// CurTokenLexer pointers.
1942   void recomputeCurLexerKind();
1943 
1944   /// Returns true if incremental processing is enabled
1945   bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
1946 
1947   /// Enables the incremental processing
1948   void enableIncrementalProcessing(bool value = true) {
1949     IncrementalProcessing = value;
1950   }
1951 
1952   /// Specify the point at which code-completion will be performed.
1953   ///
1954   /// \param File the file in which code completion should occur. If
1955   /// this file is included multiple times, code-completion will
1956   /// perform completion the first time it is included. If NULL, this
1957   /// function clears out the code-completion point.
1958   ///
1959   /// \param Line the line at which code completion should occur
1960   /// (1-based).
1961   ///
1962   /// \param Column the column at which code completion should occur
1963   /// (1-based).
1964   ///
1965   /// \returns true if an error occurred, false otherwise.
1966   bool SetCodeCompletionPoint(FileEntryRef File, unsigned Line,
1967                               unsigned Column);
1968 
1969   /// Determine if we are performing code completion.
1970   bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
1971 
1972   /// Returns the location of the code-completion point.
1973   ///
1974   /// Returns an invalid location if code-completion is not enabled or the file
1975   /// containing the code-completion point has not been lexed yet.
1976   SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
1977 
1978   /// Returns the start location of the file of code-completion point.
1979   ///
1980   /// Returns an invalid location if code-completion is not enabled or the file
1981   /// containing the code-completion point has not been lexed yet.
1982   SourceLocation getCodeCompletionFileLoc() const {
1983     return CodeCompletionFileLoc;
1984   }
1985 
1986   /// Returns true if code-completion is enabled and we have hit the
1987   /// code-completion point.
1988   bool isCodeCompletionReached() const { return CodeCompletionReached; }
1989 
1990   /// Note that we hit the code-completion point.
1991   void setCodeCompletionReached() {
1992     assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
1993     CodeCompletionReached = true;
1994     // Silence any diagnostics that occur after we hit the code-completion.
1995     getDiagnostics().setSuppressAllDiagnostics(true);
1996   }
1997 
1998   /// The location of the currently-active \#pragma clang
1999   /// arc_cf_code_audited begin.
2000   ///
2001   /// Returns an invalid location if there is no such pragma active.
2002   std::pair<IdentifierInfo *, SourceLocation>
2003   getPragmaARCCFCodeAuditedInfo() const {
2004     return PragmaARCCFCodeAuditedInfo;
2005   }
2006 
2007   /// Set the location of the currently-active \#pragma clang
2008   /// arc_cf_code_audited begin.  An invalid location ends the pragma.
2009   void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident,
2010                                      SourceLocation Loc) {
2011     PragmaARCCFCodeAuditedInfo = {Ident, Loc};
2012   }
2013 
2014   /// The location of the currently-active \#pragma clang
2015   /// assume_nonnull begin.
2016   ///
2017   /// Returns an invalid location if there is no such pragma active.
2018   SourceLocation getPragmaAssumeNonNullLoc() const {
2019     return PragmaAssumeNonNullLoc;
2020   }
2021 
2022   /// Set the location of the currently-active \#pragma clang
2023   /// assume_nonnull begin.  An invalid location ends the pragma.
2024   void setPragmaAssumeNonNullLoc(SourceLocation Loc) {
2025     PragmaAssumeNonNullLoc = Loc;
2026   }
2027 
2028   /// Get the location of the recorded unterminated \#pragma clang
2029   /// assume_nonnull begin in the preamble, if one exists.
2030   ///
2031   /// Returns an invalid location if the premable did not end with
2032   /// such a pragma active or if there is no recorded preamble.
2033   SourceLocation getPreambleRecordedPragmaAssumeNonNullLoc() const {
2034     return PreambleRecordedPragmaAssumeNonNullLoc;
2035   }
2036 
2037   /// Record the location of the unterminated \#pragma clang
2038   /// assume_nonnull begin in the preamble.
2039   void setPreambleRecordedPragmaAssumeNonNullLoc(SourceLocation Loc) {
2040     PreambleRecordedPragmaAssumeNonNullLoc = Loc;
2041   }
2042 
2043   /// Set the directory in which the main file should be considered
2044   /// to have been found, if it is not a real file.
2045   void setMainFileDir(DirectoryEntryRef Dir) { MainFileDir = Dir; }
2046 
2047   /// Instruct the preprocessor to skip part of the main source file.
2048   ///
2049   /// \param Bytes The number of bytes in the preamble to skip.
2050   ///
2051   /// \param StartOfLine Whether skipping these bytes puts the lexer at the
2052   /// start of a line.
2053   void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
2054     SkipMainFilePreamble.first = Bytes;
2055     SkipMainFilePreamble.second = StartOfLine;
2056   }
2057 
2058   /// Forwarding function for diagnostics.  This emits a diagnostic at
2059   /// the specified Token's location, translating the token's start
2060   /// position in the current buffer into a SourcePosition object for rendering.
2061   DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
2062     return Diags->Report(Loc, DiagID);
2063   }
2064 
2065   DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
2066     return Diags->Report(Tok.getLocation(), DiagID);
2067   }
2068 
2069   /// Return the 'spelling' of the token at the given
2070   /// location; does not go up to the spelling location or down to the
2071   /// expansion location.
2072   ///
2073   /// \param buffer A buffer which will be used only if the token requires
2074   ///   "cleaning", e.g. if it contains trigraphs or escaped newlines
2075   /// \param invalid If non-null, will be set \c true if an error occurs.
2076   StringRef getSpelling(SourceLocation loc,
2077                         SmallVectorImpl<char> &buffer,
2078                         bool *invalid = nullptr) const {
2079     return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
2080   }
2081 
2082   /// Return the 'spelling' of the Tok token.
2083   ///
2084   /// The spelling of a token is the characters used to represent the token in
2085   /// the source file after trigraph expansion and escaped-newline folding.  In
2086   /// particular, this wants to get the true, uncanonicalized, spelling of
2087   /// things like digraphs, UCNs, etc.
2088   ///
2089   /// \param Invalid If non-null, will be set \c true if an error occurs.
2090   std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
2091     return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
2092   }
2093 
2094   /// Get the spelling of a token into a preallocated buffer, instead
2095   /// of as an std::string.
2096   ///
2097   /// The caller is required to allocate enough space for the token, which is
2098   /// guaranteed to be at least Tok.getLength() bytes long. The length of the
2099   /// actual result is returned.
2100   ///
2101   /// Note that this method may do two possible things: it may either fill in
2102   /// the buffer specified with characters, or it may *change the input pointer*
2103   /// to point to a constant buffer with the data already in it (avoiding a
2104   /// copy).  The caller is not allowed to modify the returned buffer pointer
2105   /// if an internal buffer is returned.
2106   unsigned getSpelling(const Token &Tok, const char *&Buffer,
2107                        bool *Invalid = nullptr) const {
2108     return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
2109   }
2110 
2111   /// Get the spelling of a token into a SmallVector.
2112   ///
2113   /// Note that the returned StringRef may not point to the
2114   /// supplied buffer if a copy can be avoided.
2115   StringRef getSpelling(const Token &Tok,
2116                         SmallVectorImpl<char> &Buffer,
2117                         bool *Invalid = nullptr) const;
2118 
2119   /// Relex the token at the specified location.
2120   /// \returns true if there was a failure, false on success.
2121   bool getRawToken(SourceLocation Loc, Token &Result,
2122                    bool IgnoreWhiteSpace = false) {
2123     return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
2124   }
2125 
2126   /// Given a Token \p Tok that is a numeric constant with length 1,
2127   /// return the value of constant as an unsigned 8-bit integer.
2128   uint8_t
2129   getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
2130                                               bool *Invalid = nullptr) const {
2131     assert((Tok.is(tok::numeric_constant) || Tok.is(tok::binary_data)) &&
2132            Tok.getLength() == 1 && "Called on unsupported token");
2133     assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
2134 
2135     // If the token is carrying a literal data pointer, just use it.
2136     if (const char *D = Tok.getLiteralData())
2137       return (Tok.getKind() == tok::binary_data) ? *D : *D - '0';
2138 
2139     assert(Tok.is(tok::numeric_constant) && "binary data with no data");
2140     // Otherwise, fall back on getCharacterData, which is slower, but always
2141     // works.
2142     return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid) - '0';
2143   }
2144 
2145   /// Retrieve the name of the immediate macro expansion.
2146   ///
2147   /// This routine starts from a source location, and finds the name of the
2148   /// macro responsible for its immediate expansion. It looks through any
2149   /// intervening macro argument expansions to compute this. It returns a
2150   /// StringRef that refers to the SourceManager-owned buffer of the source
2151   /// where that macro name is spelled. Thus, the result shouldn't out-live
2152   /// the SourceManager.
2153   StringRef getImmediateMacroName(SourceLocation Loc) {
2154     return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
2155   }
2156 
2157   /// Plop the specified string into a scratch buffer and set the
2158   /// specified token's location and length to it.
2159   ///
2160   /// If specified, the source location provides a location of the expansion
2161   /// point of the token.
2162   void CreateString(StringRef Str, Token &Tok,
2163                     SourceLocation ExpansionLocStart = SourceLocation(),
2164                     SourceLocation ExpansionLocEnd = SourceLocation());
2165 
2166   /// Split the first Length characters out of the token starting at TokLoc
2167   /// and return a location pointing to the split token. Re-lexing from the
2168   /// split token will return the split token rather than the original.
2169   SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length);
2170 
2171   /// Computes the source location just past the end of the
2172   /// token at this source location.
2173   ///
2174   /// This routine can be used to produce a source location that
2175   /// points just past the end of the token referenced by \p Loc, and
2176   /// is generally used when a diagnostic needs to point just after a
2177   /// token where it expected something different that it received. If
2178   /// the returned source location would not be meaningful (e.g., if
2179   /// it points into a macro), this routine returns an invalid
2180   /// source location.
2181   ///
2182   /// \param Offset an offset from the end of the token, where the source
2183   /// location should refer to. The default offset (0) produces a source
2184   /// location pointing just past the end of the token; an offset of 1 produces
2185   /// a source location pointing to the last character in the token, etc.
2186   SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
2187     return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
2188   }
2189 
2190   /// Returns true if the given MacroID location points at the first
2191   /// token of the macro expansion.
2192   ///
2193   /// \param MacroBegin If non-null and function returns true, it is set to
2194   /// begin location of the macro.
2195   bool isAtStartOfMacroExpansion(SourceLocation loc,
2196                                  SourceLocation *MacroBegin = nullptr) const {
2197     return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
2198                                             MacroBegin);
2199   }
2200 
2201   /// Returns true if the given MacroID location points at the last
2202   /// token of the macro expansion.
2203   ///
2204   /// \param MacroEnd If non-null and function returns true, it is set to
2205   /// end location of the macro.
2206   bool isAtEndOfMacroExpansion(SourceLocation loc,
2207                                SourceLocation *MacroEnd = nullptr) const {
2208     return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
2209   }
2210 
2211   /// Print the token to stderr, used for debugging.
2212   void DumpToken(const Token &Tok, bool DumpFlags = false) const;
2213   void DumpLocation(SourceLocation Loc) const;
2214   void DumpMacro(const MacroInfo &MI) const;
2215   void dumpMacroInfo(const IdentifierInfo *II);
2216 
2217   /// Given a location that specifies the start of a
2218   /// token, return a new location that specifies a character within the token.
2219   SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
2220                                          unsigned Char) const {
2221     return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
2222   }
2223 
2224   /// Increment the counters for the number of token paste operations
2225   /// performed.
2226   ///
2227   /// If fast was specified, this is a 'fast paste' case we handled.
2228   void IncrementPasteCounter(bool isFast) {
2229     if (isFast)
2230       ++NumFastTokenPaste;
2231     else
2232       ++NumTokenPaste;
2233   }
2234 
2235   void PrintStats();
2236 
2237   size_t getTotalMemory() const;
2238 
2239   /// When the macro expander pastes together a comment (/##/) in Microsoft
2240   /// mode, this method handles updating the current state, returning the
2241   /// token on the next source line.
2242   void HandleMicrosoftCommentPaste(Token &Tok);
2243 
2244   //===--------------------------------------------------------------------===//
2245   // Preprocessor callback methods.  These are invoked by a lexer as various
2246   // directives and events are found.
2247 
2248   /// Given a tok::raw_identifier token, look up the
2249   /// identifier information for the token and install it into the token,
2250   /// updating the token kind accordingly.
2251   IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
2252 
2253 private:
2254   llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
2255 
2256 public:
2257   /// Specifies the reason for poisoning an identifier.
2258   ///
2259   /// If that identifier is accessed while poisoned, then this reason will be
2260   /// used instead of the default "poisoned" diagnostic.
2261   void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
2262 
2263   /// Display reason for poisoned identifier.
2264   void HandlePoisonedIdentifier(Token & Identifier);
2265 
2266   void MaybeHandlePoisonedIdentifier(Token & Identifier) {
2267     if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
2268       if(II->isPoisoned()) {
2269         HandlePoisonedIdentifier(Identifier);
2270       }
2271     }
2272   }
2273 
2274   /// Determine whether the next preprocessor token to be
2275   /// lexed is a '('.  If so, consume the token and return true, if not, this
2276   /// method should have no observable side-effect on the lexed tokens.
2277   bool isNextPPTokenLParen();
2278 
2279 private:
2280   /// Identifiers used for SEH handling in Borland. These are only
2281   /// allowed in particular circumstances
2282   // __except block
2283   IdentifierInfo *Ident__exception_code,
2284                  *Ident___exception_code,
2285                  *Ident_GetExceptionCode;
2286   // __except filter expression
2287   IdentifierInfo *Ident__exception_info,
2288                  *Ident___exception_info,
2289                  *Ident_GetExceptionInfo;
2290   // __finally
2291   IdentifierInfo *Ident__abnormal_termination,
2292                  *Ident___abnormal_termination,
2293                  *Ident_AbnormalTermination;
2294 
2295   const char *getCurLexerEndPos();
2296   void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
2297 
2298 public:
2299   void PoisonSEHIdentifiers(bool Poison = true); // Borland
2300 
2301   /// Callback invoked when the lexer reads an identifier and has
2302   /// filled in the tokens IdentifierInfo member.
2303   ///
2304   /// This callback potentially macro expands it or turns it into a named
2305   /// token (like 'for').
2306   ///
2307   /// \returns true if we actually computed a token, false if we need to
2308   /// lex again.
2309   bool HandleIdentifier(Token &Identifier);
2310 
2311   /// Callback invoked when the lexer hits the end of the current file.
2312   ///
2313   /// This either returns the EOF token and returns true, or
2314   /// pops a level off the include stack and returns false, at which point the
2315   /// client should call lex again.
2316   bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
2317 
2318   /// Callback invoked when the current TokenLexer hits the end of its
2319   /// token stream.
2320   bool HandleEndOfTokenLexer(Token &Result);
2321 
2322   /// Callback invoked when the lexer sees a # token at the start of a
2323   /// line.
2324   ///
2325   /// This consumes the directive, modifies the lexer/preprocessor state, and
2326   /// advances the lexer(s) so that the next token read is the correct one.
2327   void HandleDirective(Token &Result);
2328 
2329   /// Ensure that the next token is a tok::eod token.
2330   ///
2331   /// If not, emit a diagnostic and consume up until the eod.
2332   /// If \p EnableMacros is true, then we consider macros that expand to zero
2333   /// tokens as being ok.
2334   ///
2335   /// \return The location of the end of the directive (the terminating
2336   /// newline).
2337   SourceLocation CheckEndOfDirective(const char *DirType,
2338                                      bool EnableMacros = false);
2339 
2340   /// Read and discard all tokens remaining on the current line until
2341   /// the tok::eod token is found. Returns the range of the skipped tokens.
2342   SourceRange DiscardUntilEndOfDirective() {
2343     Token Tmp;
2344     return DiscardUntilEndOfDirective(Tmp);
2345   }
2346 
2347   /// Same as above except retains the token that was found.
2348   SourceRange DiscardUntilEndOfDirective(Token &Tok);
2349 
2350   /// Returns true if the preprocessor has seen a use of
2351   /// __DATE__ or __TIME__ in the file so far.
2352   bool SawDateOrTime() const {
2353     return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
2354   }
2355   unsigned getCounterValue() const { return CounterValue; }
2356   void setCounterValue(unsigned V) { CounterValue = V; }
2357 
2358   LangOptions::FPEvalMethodKind getCurrentFPEvalMethod() const {
2359     assert(CurrentFPEvalMethod != LangOptions::FEM_UnsetOnCommandLine &&
2360            "FPEvalMethod should be set either from command line or from the "
2361            "target info");
2362     return CurrentFPEvalMethod;
2363   }
2364 
2365   LangOptions::FPEvalMethodKind getTUFPEvalMethod() const {
2366     return TUFPEvalMethod;
2367   }
2368 
2369   SourceLocation getLastFPEvalPragmaLocation() const {
2370     return LastFPEvalPragmaLocation;
2371   }
2372 
2373   void setCurrentFPEvalMethod(SourceLocation PragmaLoc,
2374                               LangOptions::FPEvalMethodKind Val) {
2375     assert(Val != LangOptions::FEM_UnsetOnCommandLine &&
2376            "FPEvalMethod should never be set to FEM_UnsetOnCommandLine");
2377     // This is the location of the '#pragma float_control" where the
2378     // execution state is modifed.
2379     LastFPEvalPragmaLocation = PragmaLoc;
2380     CurrentFPEvalMethod = Val;
2381     TUFPEvalMethod = Val;
2382   }
2383 
2384   void setTUFPEvalMethod(LangOptions::FPEvalMethodKind Val) {
2385     assert(Val != LangOptions::FEM_UnsetOnCommandLine &&
2386            "TUPEvalMethod should never be set to FEM_UnsetOnCommandLine");
2387     TUFPEvalMethod = Val;
2388   }
2389 
2390   /// Retrieves the module that we're currently building, if any.
2391   Module *getCurrentModule();
2392 
2393   /// Retrieves the module whose implementation we're current compiling, if any.
2394   Module *getCurrentModuleImplementation();
2395 
2396   /// If we are preprocessing a named module.
2397   bool isInNamedModule() const { return ModuleDeclState.isNamedModule(); }
2398 
2399   /// If we are proprocessing a named interface unit.
2400   /// Note that a module implementation partition is not considered as an
2401   /// named interface unit here although it is importable
2402   /// to ease the parsing.
2403   bool isInNamedInterfaceUnit() const {
2404     return ModuleDeclState.isNamedInterface();
2405   }
2406 
2407   /// Get the named module name we're preprocessing.
2408   /// Requires we're preprocessing a named module.
2409   StringRef getNamedModuleName() const { return ModuleDeclState.getName(); }
2410 
2411   /// If we are implementing an implementation module unit.
2412   /// Note that the module implementation partition is not considered as an
2413   /// implementation unit.
2414   bool isInImplementationUnit() const {
2415     return ModuleDeclState.isImplementationUnit();
2416   }
2417 
2418   /// If we're importing a standard C++20 Named Modules.
2419   bool isInImportingCXXNamedModules() const {
2420     // NamedModuleImportPath will be non-empty only if we're importing
2421     // Standard C++ named modules.
2422     return !NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules &&
2423            !IsAtImport;
2424   }
2425 
2426   /// Allocate a new MacroInfo object with the provided SourceLocation.
2427   MacroInfo *AllocateMacroInfo(SourceLocation L);
2428 
2429   /// Turn the specified lexer token into a fully checked and spelled
2430   /// filename, e.g. as an operand of \#include.
2431   ///
2432   /// The caller is expected to provide a buffer that is large enough to hold
2433   /// the spelling of the filename, but is also expected to handle the case
2434   /// when this method decides to use a different buffer.
2435   ///
2436   /// \returns true if the input filename was in <>'s or false if it was
2437   /// in ""'s.
2438   bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer);
2439 
2440   /// Given a "foo" or \<foo> reference, look up the indicated file.
2441   ///
2442   /// Returns std::nullopt on failure.  \p isAngled indicates whether the file
2443   /// reference is for system \#include's or not (i.e. using <> instead of "").
2444   OptionalFileEntryRef
2445   LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
2446              ConstSearchDirIterator FromDir, const FileEntry *FromFile,
2447              ConstSearchDirIterator *CurDir, SmallVectorImpl<char> *SearchPath,
2448              SmallVectorImpl<char> *RelativePath,
2449              ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
2450              bool *IsFrameworkFound, bool SkipCache = false,
2451              bool OpenFile = true, bool CacheFailures = true);
2452 
2453   /// Given a "Filename" or \<Filename> reference, look up the indicated embed
2454   /// resource. \p isAngled indicates whether the file reference is for
2455   /// system \#include's or not (i.e. using <> instead of ""). If \p OpenFile
2456   /// is true, the file looked up is opened for reading, otherwise it only
2457   /// validates that the file exists. Quoted filenames are looked up relative
2458   /// to \p LookupFromFile if it is nonnull.
2459   ///
2460   /// Returns std::nullopt on failure.
2461   OptionalFileEntryRef
2462   LookupEmbedFile(StringRef Filename, bool isAngled, bool OpenFile,
2463                   const FileEntry *LookupFromFile = nullptr);
2464 
2465   /// Return true if we're in the top-level file, not in a \#include.
2466   bool isInPrimaryFile() const;
2467 
2468   /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is
2469   /// followed by EOD.  Return true if the token is not a valid on-off-switch.
2470   bool LexOnOffSwitch(tok::OnOffSwitch &Result);
2471 
2472   bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
2473                       bool *ShadowFlag = nullptr);
2474 
2475   void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
2476   Module *LeaveSubmodule(bool ForPragma);
2477 
2478 private:
2479   friend void TokenLexer::ExpandFunctionArguments();
2480 
2481   void PushIncludeMacroStack() {
2482     assert(CurLexerCallback != CLK_CachingLexer &&
2483            "cannot push a caching lexer");
2484     IncludeMacroStack.emplace_back(CurLexerCallback, CurLexerSubmodule,
2485                                    std::move(CurLexer), CurPPLexer,
2486                                    std::move(CurTokenLexer), CurDirLookup);
2487     CurPPLexer = nullptr;
2488   }
2489 
2490   void PopIncludeMacroStack() {
2491     CurLexer = std::move(IncludeMacroStack.back().TheLexer);
2492     CurPPLexer = IncludeMacroStack.back().ThePPLexer;
2493     CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
2494     CurDirLookup  = IncludeMacroStack.back().TheDirLookup;
2495     CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
2496     CurLexerCallback = IncludeMacroStack.back().CurLexerCallback;
2497     IncludeMacroStack.pop_back();
2498   }
2499 
2500   void PropagateLineStartLeadingSpaceInfo(Token &Result);
2501 
2502   /// Determine whether we need to create module macros for #defines in the
2503   /// current context.
2504   bool needModuleMacros() const;
2505 
2506   /// Update the set of active module macros and ambiguity flag for a module
2507   /// macro name.
2508   void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
2509 
2510   DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
2511                                                SourceLocation Loc);
2512   UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
2513   VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
2514                                                              bool isPublic);
2515 
2516   /// Lex and validate a macro name, which occurs after a
2517   /// \#define or \#undef.
2518   ///
2519   /// \param MacroNameTok Token that represents the name defined or undefined.
2520   /// \param IsDefineUndef Kind if preprocessor directive.
2521   /// \param ShadowFlag Points to flag that is set if macro name shadows
2522   ///                   a keyword.
2523   ///
2524   /// This emits a diagnostic, sets the token kind to eod,
2525   /// and discards the rest of the macro line if the macro name is invalid.
2526   void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
2527                      bool *ShadowFlag = nullptr);
2528 
2529   /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
2530   /// entire line) of the macro's tokens and adds them to MacroInfo, and while
2531   /// doing so performs certain validity checks including (but not limited to):
2532   ///   - # (stringization) is followed by a macro parameter
2533   /// \param MacroNameTok - Token that represents the macro name
2534   /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
2535   ///
2536   ///  Either returns a pointer to a MacroInfo object OR emits a diagnostic and
2537   ///  returns a nullptr if an invalid sequence of tokens is encountered.
2538   MacroInfo *ReadOptionalMacroParameterListAndBody(
2539       const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
2540 
2541   /// The ( starting an argument list of a macro definition has just been read.
2542   /// Lex the rest of the parameters and the closing ), updating \p MI with
2543   /// what we learn and saving in \p LastTok the last token read.
2544   /// Return true if an error occurs parsing the arg list.
2545   bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
2546 
2547   /// Provide a suggestion for a typoed directive. If there is no typo, then
2548   /// just skip suggesting.
2549   ///
2550   /// \param Tok - Token that represents the directive
2551   /// \param Directive - String reference for the directive name
2552   void SuggestTypoedDirective(const Token &Tok, StringRef Directive) const;
2553 
2554   /// We just read a \#if or related directive and decided that the
2555   /// subsequent tokens are in the \#if'd out portion of the
2556   /// file.  Lex the rest of the file, until we see an \#endif.  If \p
2557   /// FoundNonSkipPortion is true, then we have already emitted code for part of
2558   /// this \#if directive, so \#else/\#elif blocks should never be entered. If
2559   /// \p FoundElse is false, then \#else directives are ok, if not, then we have
2560   /// already seen one so a \#else directive is a duplicate.  When this returns,
2561   /// the caller can lex the first valid token.
2562   void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
2563                                     SourceLocation IfTokenLoc,
2564                                     bool FoundNonSkipPortion, bool FoundElse,
2565                                     SourceLocation ElseLoc = SourceLocation());
2566 
2567   /// Information about the result for evaluating an expression for a
2568   /// preprocessor directive.
2569   struct DirectiveEvalResult {
2570     /// The integral value of the expression.
2571     std::optional<llvm::APSInt> Value;
2572 
2573     /// Whether the expression was evaluated as true or not.
2574     bool Conditional;
2575 
2576     /// True if the expression contained identifiers that were undefined.
2577     bool IncludedUndefinedIds;
2578 
2579     /// The source range for the expression.
2580     SourceRange ExprRange;
2581   };
2582 
2583   /// Evaluate an integer constant expression that may occur after a
2584   /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2585   ///
2586   /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2587   DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
2588                                                   bool CheckForEoD = true);
2589 
2590   /// Evaluate an integer constant expression that may occur after a
2591   /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2592   ///
2593   /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2594   /// \p EvaluatedDefined will contain the result of whether "defined" appeared
2595   /// in the evaluated expression or not.
2596   DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro,
2597                                                   Token &Tok,
2598                                                   bool &EvaluatedDefined,
2599                                                   bool CheckForEoD = true);
2600 
2601   /// Process a '__has_embed("path" [, ...])' expression.
2602   ///
2603   /// Returns predefined `__STDC_EMBED_*` macro values if
2604   /// successful.
2605   EmbedResult EvaluateHasEmbed(Token &Tok, IdentifierInfo *II);
2606 
2607   /// Process a '__has_include("path")' expression.
2608   ///
2609   /// Returns true if successful.
2610   bool EvaluateHasInclude(Token &Tok, IdentifierInfo *II);
2611 
2612   /// Process '__has_include_next("path")' expression.
2613   ///
2614   /// Returns true if successful.
2615   bool EvaluateHasIncludeNext(Token &Tok, IdentifierInfo *II);
2616 
2617   /// Get the directory and file from which to start \#include_next lookup.
2618   std::pair<ConstSearchDirIterator, const FileEntry *>
2619   getIncludeNextStart(const Token &IncludeNextTok) const;
2620 
2621   /// Install the standard preprocessor pragmas:
2622   /// \#pragma GCC poison/system_header/dependency and \#pragma once.
2623   void RegisterBuiltinPragmas();
2624 
2625   /// RegisterBuiltinMacro - Register the specified identifier in the identifier
2626   /// table and mark it as a builtin macro to be expanded.
2627   IdentifierInfo *RegisterBuiltinMacro(const char *Name) {
2628     // Get the identifier.
2629     IdentifierInfo *Id = getIdentifierInfo(Name);
2630 
2631     // Mark it as being a macro that is builtin.
2632     MacroInfo *MI = AllocateMacroInfo(SourceLocation());
2633     MI->setIsBuiltinMacro();
2634     appendDefMacroDirective(Id, MI);
2635     return Id;
2636   }
2637 
2638   /// Register builtin macros such as __LINE__ with the identifier table.
2639   void RegisterBuiltinMacros();
2640 
2641   /// If an identifier token is read that is to be expanded as a macro, handle
2642   /// it and return the next token as 'Tok'.  If we lexed a token, return true;
2643   /// otherwise the caller should lex again.
2644   bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD);
2645 
2646   /// Cache macro expanded tokens for TokenLexers.
2647   //
2648   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
2649   /// going to lex in the cache and when it finishes the tokens are removed
2650   /// from the end of the cache.
2651   Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
2652                                   ArrayRef<Token> tokens);
2653 
2654   void removeCachedMacroExpandedTokensOfLastLexer();
2655 
2656   /// After reading "MACRO(", this method is invoked to read all of the formal
2657   /// arguments specified for the macro invocation.  Returns null on error.
2658   MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
2659                                        SourceLocation &MacroEnd);
2660 
2661   /// If an identifier token is read that is to be expanded
2662   /// as a builtin macro, handle it and return the next token as 'Tok'.
2663   void ExpandBuiltinMacro(Token &Tok);
2664 
2665   /// Read a \c _Pragma directive, slice it up, process it, then
2666   /// return the first token after the directive.
2667   /// This assumes that the \c _Pragma token has just been read into \p Tok.
2668   void Handle_Pragma(Token &Tok);
2669 
2670   /// Like Handle_Pragma except the pragma text is not enclosed within
2671   /// a string literal.
2672   void HandleMicrosoft__pragma(Token &Tok);
2673 
2674   /// Add a lexer to the top of the include stack and
2675   /// start lexing tokens from it instead of the current buffer.
2676   void EnterSourceFileWithLexer(Lexer *TheLexer, ConstSearchDirIterator Dir);
2677 
2678   /// Set the FileID for the preprocessor predefines.
2679   void setPredefinesFileID(FileID FID) {
2680     assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
2681     PredefinesFileID = FID;
2682   }
2683 
2684   /// Set the FileID for the PCH through header.
2685   void setPCHThroughHeaderFileID(FileID FID);
2686 
2687   /// Returns true if we are lexing from a file and not a
2688   /// pragma or a macro.
2689   static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
2690     return L ? !L->isPragmaLexer() : P != nullptr;
2691   }
2692 
2693   static bool IsFileLexer(const IncludeStackInfo& I) {
2694     return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
2695   }
2696 
2697   bool IsFileLexer() const {
2698     return IsFileLexer(CurLexer.get(), CurPPLexer);
2699   }
2700 
2701   //===--------------------------------------------------------------------===//
2702   // Caching stuff.
2703   void CachingLex(Token &Result);
2704 
2705   bool InCachingLexMode() const {
2706     // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
2707     // that we are past EOF, not that we are in CachingLex mode.
2708     return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty();
2709   }
2710 
2711   void EnterCachingLexMode();
2712   void EnterCachingLexModeUnchecked();
2713 
2714   void ExitCachingLexMode() {
2715     if (InCachingLexMode())
2716       RemoveTopOfLexerStack();
2717   }
2718 
2719   const Token &PeekAhead(unsigned N);
2720   void AnnotatePreviousCachedTokens(const Token &Tok);
2721 
2722   //===--------------------------------------------------------------------===//
2723   /// Handle*Directive - implement the various preprocessor directives.  These
2724   /// should side-effect the current preprocessor object so that the next call
2725   /// to Lex() will return the appropriate token next.
2726   void HandleLineDirective();
2727   void HandleDigitDirective(Token &Tok);
2728   void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
2729   void HandleIdentSCCSDirective(Token &Tok);
2730   void HandleMacroPublicDirective(Token &Tok);
2731   void HandleMacroPrivateDirective();
2732 
2733   /// An additional notification that can be produced by a header inclusion or
2734   /// import to tell the parser what happened.
2735   struct ImportAction {
2736     enum ActionKind {
2737       None,
2738       ModuleBegin,
2739       ModuleImport,
2740       HeaderUnitImport,
2741       SkippedModuleImport,
2742       Failure,
2743     } Kind;
2744     Module *ModuleForHeader = nullptr;
2745 
2746     ImportAction(ActionKind AK, Module *Mod = nullptr)
2747         : Kind(AK), ModuleForHeader(Mod) {
2748       assert((AK == None || Mod || AK == Failure) &&
2749              "no module for module action");
2750     }
2751   };
2752 
2753   OptionalFileEntryRef LookupHeaderIncludeOrImport(
2754       ConstSearchDirIterator *CurDir, StringRef &Filename,
2755       SourceLocation FilenameLoc, CharSourceRange FilenameRange,
2756       const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
2757       bool &IsMapped, ConstSearchDirIterator LookupFrom,
2758       const FileEntry *LookupFromFile, StringRef &LookupFilename,
2759       SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
2760       ModuleMap::KnownHeader &SuggestedModule, bool isAngled);
2761   // Binary data inclusion
2762   void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok,
2763                             const FileEntry *LookupFromFile = nullptr);
2764   void HandleEmbedDirectiveImpl(SourceLocation HashLoc,
2765                                 const LexEmbedParametersResult &Params,
2766                                 StringRef BinaryContents);
2767 
2768   // File inclusion.
2769   void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
2770                               ConstSearchDirIterator LookupFrom = nullptr,
2771                               const FileEntry *LookupFromFile = nullptr);
2772   ImportAction
2773   HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok,
2774                               Token &FilenameTok, SourceLocation EndLoc,
2775                               ConstSearchDirIterator LookupFrom = nullptr,
2776                               const FileEntry *LookupFromFile = nullptr);
2777   void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
2778   void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
2779   void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
2780   void HandleMicrosoftImportDirective(Token &Tok);
2781 
2782 public:
2783   /// Check that the given module is available, producing a diagnostic if not.
2784   /// \return \c true if the check failed (because the module is not available).
2785   ///         \c false if the module appears to be usable.
2786   static bool checkModuleIsAvailable(const LangOptions &LangOpts,
2787                                      const TargetInfo &TargetInfo,
2788                                      const Module &M, DiagnosticsEngine &Diags);
2789 
2790   // Module inclusion testing.
2791   /// Find the module that owns the source or header file that
2792   /// \p Loc points to. If the location is in a file that was included
2793   /// into a module, or is outside any module, returns nullptr.
2794   Module *getModuleForLocation(SourceLocation Loc, bool AllowTextual);
2795 
2796   /// We want to produce a diagnostic at location IncLoc concerning an
2797   /// unreachable effect at location MLoc (eg, where a desired entity was
2798   /// declared or defined). Determine whether the right way to make MLoc
2799   /// reachable is by #include, and if so, what header should be included.
2800   ///
2801   /// This is not necessarily fast, and might load unexpected module maps, so
2802   /// should only be called by code that intends to produce an error.
2803   ///
2804   /// \param IncLoc The location at which the missing effect was detected.
2805   /// \param MLoc A location within an unimported module at which the desired
2806   ///        effect occurred.
2807   /// \return A file that can be #included to provide the desired effect. Null
2808   ///         if no such file could be determined or if a #include is not
2809   ///         appropriate (eg, if a module should be imported instead).
2810   OptionalFileEntryRef getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
2811                                                         SourceLocation MLoc);
2812 
2813   bool isRecordingPreamble() const {
2814     return PreambleConditionalStack.isRecording();
2815   }
2816 
2817   bool hasRecordedPreamble() const {
2818     return PreambleConditionalStack.hasRecordedPreamble();
2819   }
2820 
2821   ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const {
2822       return PreambleConditionalStack.getStack();
2823   }
2824 
2825   void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
2826     PreambleConditionalStack.setStack(s);
2827   }
2828 
2829   void setReplayablePreambleConditionalStack(
2830       ArrayRef<PPConditionalInfo> s, std::optional<PreambleSkipInfo> SkipInfo) {
2831     PreambleConditionalStack.startReplaying();
2832     PreambleConditionalStack.setStack(s);
2833     PreambleConditionalStack.SkipInfo = SkipInfo;
2834   }
2835 
2836   std::optional<PreambleSkipInfo> getPreambleSkipInfo() const {
2837     return PreambleConditionalStack.SkipInfo;
2838   }
2839 
2840 private:
2841   /// After processing predefined file, initialize the conditional stack from
2842   /// the preamble.
2843   void replayPreambleConditionalStack();
2844 
2845   // Macro handling.
2846   void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard);
2847   void HandleUndefDirective();
2848 
2849   // Conditional Inclusion.
2850   void HandleIfdefDirective(Token &Result, const Token &HashToken,
2851                             bool isIfndef, bool ReadAnyTokensBeforeDirective);
2852   void HandleIfDirective(Token &IfToken, const Token &HashToken,
2853                          bool ReadAnyTokensBeforeDirective);
2854   void HandleEndifDirective(Token &EndifToken);
2855   void HandleElseDirective(Token &Result, const Token &HashToken);
2856   void HandleElifFamilyDirective(Token &ElifToken, const Token &HashToken,
2857                                  tok::PPKeywordKind Kind);
2858 
2859   // Pragmas.
2860   void HandlePragmaDirective(PragmaIntroducer Introducer);
2861 
2862 public:
2863   void HandlePragmaOnce(Token &OnceTok);
2864   void HandlePragmaMark(Token &MarkTok);
2865   void HandlePragmaPoison();
2866   void HandlePragmaSystemHeader(Token &SysHeaderTok);
2867   void HandlePragmaDependency(Token &DependencyTok);
2868   void HandlePragmaPushMacro(Token &Tok);
2869   void HandlePragmaPopMacro(Token &Tok);
2870   void HandlePragmaIncludeAlias(Token &Tok);
2871   void HandlePragmaModuleBuild(Token &Tok);
2872   void HandlePragmaHdrstop(Token &Tok);
2873   IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
2874 
2875   // Return true and store the first token only if any CommentHandler
2876   // has inserted some tokens and getCommentRetentionState() is false.
2877   bool HandleComment(Token &result, SourceRange Comment);
2878 
2879   /// A macro is used, update information about macros that need unused
2880   /// warnings.
2881   void markMacroAsUsed(MacroInfo *MI);
2882 
2883   void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg,
2884                               SourceLocation AnnotationLoc) {
2885     AnnotationInfos[II].DeprecationInfo =
2886         MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
2887   }
2888 
2889   void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg,
2890                                SourceLocation AnnotationLoc) {
2891     AnnotationInfos[II].RestrictExpansionInfo =
2892         MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
2893   }
2894 
2895   void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc) {
2896     AnnotationInfos[II].FinalAnnotationLoc = AnnotationLoc;
2897   }
2898 
2899   const MacroAnnotations &getMacroAnnotations(const IdentifierInfo *II) const {
2900     return AnnotationInfos.find(II)->second;
2901   }
2902 
2903   void emitMacroExpansionWarnings(const Token &Identifier,
2904                                   bool IsIfnDef = false) const {
2905     IdentifierInfo *Info = Identifier.getIdentifierInfo();
2906     if (Info->isDeprecatedMacro())
2907       emitMacroDeprecationWarning(Identifier);
2908 
2909     if (Info->isRestrictExpansion() &&
2910         !SourceMgr.isInMainFile(Identifier.getLocation()))
2911       emitRestrictExpansionWarning(Identifier);
2912 
2913     if (!IsIfnDef) {
2914       if (Info->getName() == "INFINITY" && getLangOpts().NoHonorInfs)
2915         emitRestrictInfNaNWarning(Identifier, 0);
2916       if (Info->getName() == "NAN" && getLangOpts().NoHonorNaNs)
2917         emitRestrictInfNaNWarning(Identifier, 1);
2918     }
2919   }
2920 
2921   static void processPathForFileMacro(SmallVectorImpl<char> &Path,
2922                                       const LangOptions &LangOpts,
2923                                       const TargetInfo &TI);
2924 
2925   static void processPathToFileName(SmallVectorImpl<char> &FileName,
2926                                     const PresumedLoc &PLoc,
2927                                     const LangOptions &LangOpts,
2928                                     const TargetInfo &TI);
2929 
2930 private:
2931   void emitMacroDeprecationWarning(const Token &Identifier) const;
2932   void emitRestrictExpansionWarning(const Token &Identifier) const;
2933   void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const;
2934   void emitRestrictInfNaNWarning(const Token &Identifier,
2935                                  unsigned DiagSelection) const;
2936 
2937   /// This boolean state keeps track if the current scanned token (by this PP)
2938   /// is in an "-Wunsafe-buffer-usage" opt-out region. Assuming PP scans a
2939   /// translation unit in a linear order.
2940   bool InSafeBufferOptOutRegion = false;
2941 
2942   /// Hold the start location of the current "-Wunsafe-buffer-usage" opt-out
2943   /// region if PP is currently in such a region.  Hold undefined value
2944   /// otherwise.
2945   SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region.
2946 
2947   using SafeBufferOptOutRegionsTy =
2948       SmallVector<std::pair<SourceLocation, SourceLocation>, 16>;
2949   // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
2950   // translation unit. Each region is represented by a pair of start and
2951   // end locations.
2952   SafeBufferOptOutRegionsTy SafeBufferOptOutMap;
2953 
2954   // The "-Wunsafe-buffer-usage" opt-out regions in loaded ASTs.  We use the
2955   // following structure to manage them by their ASTs.
2956   struct {
2957     // A map from unique IDs to region maps of loaded ASTs.  The ID identifies a
2958     // loaded AST. See `SourceManager::getUniqueLoadedASTID`.
2959     llvm::DenseMap<FileID, SafeBufferOptOutRegionsTy> LoadedRegions;
2960 
2961     // Returns a reference to the safe buffer opt-out regions of the loaded
2962     // AST where `Loc` belongs to. (Construct if absent)
2963     SafeBufferOptOutRegionsTy &
2964     findAndConsLoadedOptOutMap(SourceLocation Loc, SourceManager &SrcMgr) {
2965       return LoadedRegions[SrcMgr.getUniqueLoadedASTFileID(Loc)];
2966     }
2967 
2968     // Returns a reference to the safe buffer opt-out regions of the loaded
2969     // AST where `Loc` belongs to. (This const function returns nullptr if
2970     // absent.)
2971     const SafeBufferOptOutRegionsTy *
2972     lookupLoadedOptOutMap(SourceLocation Loc,
2973                           const SourceManager &SrcMgr) const {
2974       FileID FID = SrcMgr.getUniqueLoadedASTFileID(Loc);
2975       auto Iter = LoadedRegions.find(FID);
2976 
2977       if (Iter == LoadedRegions.end())
2978         return nullptr;
2979       return &Iter->getSecond();
2980     }
2981   } LoadedSafeBufferOptOutMap;
2982 
2983 public:
2984   /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
2985   /// region.  This `Loc` must be a source location that has been pre-processed.
2986   bool isSafeBufferOptOut(const SourceManager&SourceMgr, const SourceLocation &Loc) const;
2987 
2988   /// Alter the state of whether this PP currently is in a
2989   /// "-Wunsafe-buffer-usage" opt-out region.
2990   ///
2991   /// \param isEnter true if this PP is entering a region; otherwise, this PP
2992   /// is exiting a region
2993   /// \param Loc the location of the entry or exit of a
2994   /// region
2995   /// \return true iff it is INVALID to enter or exit a region, i.e.,
2996   /// attempt to enter a region before exiting a previous region, or exiting a
2997   /// region that PP is not currently in.
2998   bool enterOrExitSafeBufferOptOutRegion(bool isEnter,
2999                                          const SourceLocation &Loc);
3000 
3001   /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage"
3002   ///          opt-out region
3003   bool isPPInSafeBufferOptOutRegion();
3004 
3005   /// \param StartLoc output argument. It will be set to the start location of
3006   /// the current "-Wunsafe-buffer-usage" opt-out region iff this function
3007   /// returns true.
3008   /// \return true iff this PP is currently in a "-Wunsafe-buffer-usage"
3009   ///          opt-out region
3010   bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);
3011 
3012   /// \return a sequence of SourceLocations representing ordered opt-out regions
3013   /// specified by
3014   /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit.
3015   SmallVector<SourceLocation, 64> serializeSafeBufferOptOutMap() const;
3016 
3017   /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
3018   /// record of code `PP_UNSAFE_BUFFER_USAGE`.
3019   /// \return true iff the `Preprocessor` has been updated; false `Preprocessor`
3020   /// is same as itself before the call.
3021   bool setDeserializedSafeBufferOptOutMap(
3022       const SmallVectorImpl<SourceLocation> &SrcLocSeqs);
3023 
3024 private:
3025   /// Helper functions to forward lexing to the actual lexer. They all share the
3026   /// same signature.
3027   static bool CLK_Lexer(Preprocessor &P, Token &Result) {
3028     return P.CurLexer->Lex(Result);
3029   }
3030   static bool CLK_TokenLexer(Preprocessor &P, Token &Result) {
3031     return P.CurTokenLexer->Lex(Result);
3032   }
3033   static bool CLK_CachingLexer(Preprocessor &P, Token &Result) {
3034     P.CachingLex(Result);
3035     return true;
3036   }
3037   static bool CLK_DependencyDirectivesLexer(Preprocessor &P, Token &Result) {
3038     return P.CurLexer->LexDependencyDirectiveToken(Result);
3039   }
3040   static bool CLK_LexAfterModuleImport(Preprocessor &P, Token &Result) {
3041     return P.LexAfterModuleImport(Result);
3042   }
3043 };
3044 
3045 /// Abstract base class that describes a handler that will receive
3046 /// source ranges for each of the comments encountered in the source file.
3047 class CommentHandler {
3048 public:
3049   virtual ~CommentHandler();
3050 
3051   // The handler shall return true if it has pushed any tokens
3052   // to be read using e.g. EnterToken or EnterTokenStream.
3053   virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
3054 };
3055 
3056 /// Abstract base class that describes a handler that will receive
3057 /// source ranges for empty lines encountered in the source file.
3058 class EmptylineHandler {
3059 public:
3060   virtual ~EmptylineHandler();
3061 
3062   // The handler handles empty lines.
3063   virtual void HandleEmptyline(SourceRange Range) = 0;
3064 };
3065 
3066 /// Helper class to shuttle information about #embed directives from the
3067 /// preprocessor to the parser through an annotation token.
3068 struct EmbedAnnotationData {
3069   StringRef BinaryData;
3070 };
3071 
3072 /// Registry of pragma handlers added by plugins
3073 using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
3074 
3075 } // namespace clang
3076 
3077 namespace llvm {
3078 extern template class CLANG_TEMPLATE_ABI Registry<clang::PragmaHandler>;
3079 } // namespace llvm
3080 
3081 #endif // LLVM_CLANG_LEX_PREPROCESSOR_H