Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:37:11

0001 //===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 
0009 #ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
0010 #define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H
0011 
0012 #include "clang/Basic/LLVM.h"
0013 #include "clang/Lex/DependencyDirectivesScanner.h"
0014 #include "llvm/ADT/DenseMap.h"
0015 #include "llvm/ADT/StringMap.h"
0016 #include "llvm/Support/Allocator.h"
0017 #include "llvm/Support/ErrorOr.h"
0018 #include "llvm/Support/VirtualFileSystem.h"
0019 #include <mutex>
0020 #include <optional>
0021 
0022 namespace clang {
0023 namespace tooling {
0024 namespace dependencies {
0025 
0026 using DependencyDirectivesTy =
0027     SmallVector<dependency_directives_scan::Directive, 20>;
0028 
0029 /// Contents and directive tokens of a cached file entry. Single instance can
0030 /// be shared between multiple entries.
0031 struct CachedFileContents {
0032   CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Contents)
0033       : Original(std::move(Contents)), DepDirectives(nullptr) {}
0034 
0035   /// Owning storage for the original contents.
0036   std::unique_ptr<llvm::MemoryBuffer> Original;
0037 
0038   /// The mutex that must be locked before mutating directive tokens.
0039   std::mutex ValueLock;
0040   SmallVector<dependency_directives_scan::Token, 10> DepDirectiveTokens;
0041   /// Accessor to the directive tokens that's atomic to avoid data races.
0042   /// \p CachedFileContents has ownership of the pointer.
0043   std::atomic<const std::optional<DependencyDirectivesTy> *> DepDirectives;
0044 
0045   ~CachedFileContents() { delete DepDirectives.load(); }
0046 };
0047 
0048 /// An in-memory representation of a file system entity that is of interest to
0049 /// the dependency scanning filesystem.
0050 ///
0051 /// It represents one of the following:
0052 /// - opened file with contents and a stat value,
0053 /// - opened file with contents, directive tokens and a stat value,
0054 /// - directory entry with its stat value,
0055 /// - filesystem error.
0056 ///
0057 /// Single instance of this class can be shared across different filenames (e.g.
0058 /// a regular file and a symlink). For this reason the status filename is empty
0059 /// and is only materialized by \c EntryRef that knows the requested filename.
0060 class CachedFileSystemEntry {
0061 public:
0062   /// Creates an entry without contents: either a filesystem error or
0063   /// a directory with stat value.
0064   CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat)
0065       : MaybeStat(std::move(Stat)), Contents(nullptr) {
0066     clearStatName();
0067   }
0068 
0069   /// Creates an entry representing a file with contents.
0070   CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat,
0071                         CachedFileContents *Contents)
0072       : MaybeStat(std::move(Stat)), Contents(std::move(Contents)) {
0073     clearStatName();
0074   }
0075 
0076   /// \returns True if the entry is a filesystem error.
0077   bool isError() const { return !MaybeStat; }
0078 
0079   /// \returns True if the current entry represents a directory.
0080   bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); }
0081 
0082   /// \returns Original contents of the file.
0083   StringRef getOriginalContents() const {
0084     assert(!isError() && "error");
0085     assert(!MaybeStat->isDirectory() && "not a file");
0086     assert(Contents && "contents not initialized");
0087     return Contents->Original->getBuffer();
0088   }
0089 
0090   /// \returns The scanned preprocessor directive tokens of the file that are
0091   /// used to speed up preprocessing, if available.
0092   std::optional<ArrayRef<dependency_directives_scan::Directive>>
0093   getDirectiveTokens() const {
0094     assert(!isError() && "error");
0095     assert(!isDirectory() && "not a file");
0096     assert(Contents && "contents not initialized");
0097     if (auto *Directives = Contents->DepDirectives.load()) {
0098       if (Directives->has_value())
0099         return ArrayRef<dependency_directives_scan::Directive>(**Directives);
0100     }
0101     return std::nullopt;
0102   }
0103 
0104   /// \returns The error.
0105   std::error_code getError() const { return MaybeStat.getError(); }
0106 
0107   /// \returns The entry status with empty filename.
0108   llvm::vfs::Status getStatus() const {
0109     assert(!isError() && "error");
0110     assert(MaybeStat->getName().empty() && "stat name must be empty");
0111     return *MaybeStat;
0112   }
0113 
0114   /// \returns The unique ID of the entry.
0115   llvm::sys::fs::UniqueID getUniqueID() const {
0116     assert(!isError() && "error");
0117     return MaybeStat->getUniqueID();
0118   }
0119 
0120   /// \returns The data structure holding both contents and directive tokens.
0121   CachedFileContents *getCachedContents() const {
0122     assert(!isError() && "error");
0123     assert(!isDirectory() && "not a file");
0124     return Contents;
0125   }
0126 
0127 private:
0128   void clearStatName() {
0129     if (MaybeStat)
0130       MaybeStat = llvm::vfs::Status::copyWithNewName(*MaybeStat, "");
0131   }
0132 
0133   /// Either the filesystem error or status of the entry.
0134   /// The filename is empty and only materialized by \c EntryRef.
0135   llvm::ErrorOr<llvm::vfs::Status> MaybeStat;
0136 
0137   /// Non-owning pointer to the file contents.
0138   ///
0139   /// We're using pointer here to keep the size of this class small. Instances
0140   /// representing directories and filesystem errors don't hold any contents
0141   /// anyway.
0142   CachedFileContents *Contents;
0143 };
0144 
0145 using CachedRealPath = llvm::ErrorOr<std::string>;
0146 
0147 /// This class is a shared cache, that caches the 'stat' and 'open' calls to the
0148 /// underlying real file system, and the scanned preprocessor directives of
0149 /// files.
0150 ///
0151 /// It is sharded based on the hash of the key to reduce the lock contention for
0152 /// the worker threads.
0153 class DependencyScanningFilesystemSharedCache {
0154 public:
0155   struct CacheShard {
0156     /// The mutex that needs to be locked before mutation of any member.
0157     mutable std::mutex CacheLock;
0158 
0159     /// Map from filenames to cached entries and real paths.
0160     llvm::StringMap<
0161         std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
0162         llvm::BumpPtrAllocator>
0163         CacheByFilename;
0164 
0165     /// Map from unique IDs to cached entries.
0166     llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
0167         EntriesByUID;
0168 
0169     /// The backing storage for cached entries.
0170     llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage;
0171 
0172     /// The backing storage for cached contents.
0173     llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;
0174 
0175     /// The backing storage for cached real paths.
0176     llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage;
0177 
0178     /// Returns entry associated with the filename or nullptr if none is found.
0179     const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;
0180 
0181     /// Returns entry associated with the unique ID or nullptr if none is found.
0182     const CachedFileSystemEntry *
0183     findEntryByUID(llvm::sys::fs::UniqueID UID) const;
0184 
0185     /// Returns entry associated with the filename if there is some. Otherwise,
0186     /// constructs new one with the given status, associates it with the
0187     /// filename and returns the result.
0188     const CachedFileSystemEntry &
0189     getOrEmplaceEntryForFilename(StringRef Filename,
0190                                  llvm::ErrorOr<llvm::vfs::Status> Stat);
0191 
0192     /// Returns entry associated with the unique ID if there is some. Otherwise,
0193     /// constructs new one with the given status and contents, associates it
0194     /// with the unique ID and returns the result.
0195     const CachedFileSystemEntry &
0196     getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
0197                             std::unique_ptr<llvm::MemoryBuffer> Contents);
0198 
0199     /// Returns entry associated with the filename if there is some. Otherwise,
0200     /// associates the given entry with the filename and returns it.
0201     const CachedFileSystemEntry &
0202     getOrInsertEntryForFilename(StringRef Filename,
0203                                 const CachedFileSystemEntry &Entry);
0204 
0205     /// Returns the real path associated with the filename or nullptr if none is
0206     /// found.
0207     const CachedRealPath *findRealPathByFilename(StringRef Filename) const;
0208 
0209     /// Returns the real path associated with the filename if there is some.
0210     /// Otherwise, constructs new one with the given one, associates it with the
0211     /// filename and returns the result.
0212     const CachedRealPath &
0213     getOrEmplaceRealPathForFilename(StringRef Filename,
0214                                     llvm::ErrorOr<StringRef> RealPath);
0215   };
0216 
0217   DependencyScanningFilesystemSharedCache();
0218 
0219   /// Returns shard for the given key.
0220   CacheShard &getShardForFilename(StringRef Filename) const;
0221   CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const;
0222 
0223 private:
0224   std::unique_ptr<CacheShard[]> CacheShards;
0225   unsigned NumShards;
0226 };
0227 
0228 /// This class is a local cache, that caches the 'stat' and 'open' calls to the
0229 /// underlying real file system.
0230 class DependencyScanningFilesystemLocalCache {
0231   llvm::StringMap<
0232       std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
0233       llvm::BumpPtrAllocator>
0234       Cache;
0235 
0236 public:
0237   /// Returns entry associated with the filename or nullptr if none is found.
0238   const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
0239     assert(llvm::sys::path::is_absolute_gnu(Filename));
0240     auto It = Cache.find(Filename);
0241     return It == Cache.end() ? nullptr : It->getValue().first;
0242   }
0243 
0244   /// Associates the given entry with the filename and returns the given entry
0245   /// pointer (for convenience).
0246   const CachedFileSystemEntry &
0247   insertEntryForFilename(StringRef Filename,
0248                          const CachedFileSystemEntry &Entry) {
0249     assert(llvm::sys::path::is_absolute_gnu(Filename));
0250     auto [It, Inserted] = Cache.insert({Filename, {&Entry, nullptr}});
0251     auto &[CachedEntry, CachedRealPath] = It->getValue();
0252     if (!Inserted) {
0253       // The file is already present in the local cache. If we got here, it only
0254       // contains the real path. Let's make sure the entry is populated too.
0255       assert((!CachedEntry && CachedRealPath) && "entry already present");
0256       CachedEntry = &Entry;
0257     }
0258     return *CachedEntry;
0259   }
0260 
0261   /// Returns real path associated with the filename or nullptr if none is
0262   /// found.
0263   const CachedRealPath *findRealPathByFilename(StringRef Filename) const {
0264     assert(llvm::sys::path::is_absolute_gnu(Filename));
0265     auto It = Cache.find(Filename);
0266     return It == Cache.end() ? nullptr : It->getValue().second;
0267   }
0268 
0269   /// Associates the given real path with the filename and returns the given
0270   /// entry pointer (for convenience).
0271   const CachedRealPath &
0272   insertRealPathForFilename(StringRef Filename,
0273                             const CachedRealPath &RealPath) {
0274     assert(llvm::sys::path::is_absolute_gnu(Filename));
0275     auto [It, Inserted] = Cache.insert({Filename, {nullptr, &RealPath}});
0276     auto &[CachedEntry, CachedRealPath] = It->getValue();
0277     if (!Inserted) {
0278       // The file is already present in the local cache. If we got here, it only
0279       // contains the entry. Let's make sure the real path is populated too.
0280       assert((!CachedRealPath && CachedEntry) && "real path already present");
0281       CachedRealPath = &RealPath;
0282     }
0283     return *CachedRealPath;
0284   }
0285 };
0286 
0287 /// Reference to a CachedFileSystemEntry.
0288 /// If the underlying entry is an opened file, this wrapper returns the file
0289 /// contents and the scanned preprocessor directives.
0290 class EntryRef {
0291   /// The filename used to access this entry.
0292   std::string Filename;
0293 
0294   /// The underlying cached entry.
0295   const CachedFileSystemEntry &Entry;
0296 
0297   friend class DependencyScanningWorkerFilesystem;
0298 
0299 public:
0300   EntryRef(StringRef Name, const CachedFileSystemEntry &Entry)
0301       : Filename(Name), Entry(Entry) {}
0302 
0303   llvm::vfs::Status getStatus() const {
0304     llvm::vfs::Status Stat = Entry.getStatus();
0305     if (!Stat.isDirectory())
0306       Stat = llvm::vfs::Status::copyWithNewSize(Stat, getContents().size());
0307     return llvm::vfs::Status::copyWithNewName(Stat, Filename);
0308   }
0309 
0310   bool isError() const { return Entry.isError(); }
0311   bool isDirectory() const { return Entry.isDirectory(); }
0312 
0313   /// If the cached entry represents an error, promotes it into `ErrorOr`.
0314   llvm::ErrorOr<EntryRef> unwrapError() const {
0315     if (isError())
0316       return Entry.getError();
0317     return *this;
0318   }
0319 
0320   StringRef getContents() const { return Entry.getOriginalContents(); }
0321 
0322   std::optional<ArrayRef<dependency_directives_scan::Directive>>
0323   getDirectiveTokens() const {
0324     return Entry.getDirectiveTokens();
0325   }
0326 };
0327 
0328 /// A virtual file system optimized for the dependency discovery.
0329 ///
0330 /// It is primarily designed to work with source files whose contents was
0331 /// preprocessed to remove any tokens that are unlikely to affect the dependency
0332 /// computation.
0333 ///
0334 /// This is not a thread safe VFS. A single instance is meant to be used only in
0335 /// one thread. Multiple instances are allowed to service multiple threads
0336 /// running in parallel.
0337 class DependencyScanningWorkerFilesystem
0338     : public llvm::RTTIExtends<DependencyScanningWorkerFilesystem,
0339                                llvm::vfs::ProxyFileSystem> {
0340 public:
0341   static const char ID;
0342 
0343   DependencyScanningWorkerFilesystem(
0344       DependencyScanningFilesystemSharedCache &SharedCache,
0345       IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS);
0346 
0347   llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override;
0348   llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
0349   openFileForRead(const Twine &Path) override;
0350 
0351   std::error_code getRealPath(const Twine &Path,
0352                               SmallVectorImpl<char> &Output) override;
0353 
0354   std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
0355 
0356   /// Make it so that no paths bypass this VFS.
0357   void resetBypassedPathPrefix() { BypassedPathPrefix.reset(); }
0358   /// Set the prefix for paths that should bypass this VFS and go straight to
0359   /// the underlying VFS.
0360   void setBypassedPathPrefix(StringRef Prefix) { BypassedPathPrefix = Prefix; }
0361 
0362   /// Returns entry for the given filename.
0363   ///
0364   /// Attempts to use the local and shared caches first, then falls back to
0365   /// using the underlying filesystem.
0366   llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename);
0367 
0368   /// Ensure the directive tokens are populated for this file entry.
0369   ///
0370   /// Returns true if the directive tokens are populated for this file entry,
0371   /// false if not (i.e. this entry is not a file or its scan fails).
0372   bool ensureDirectiveTokensArePopulated(EntryRef Entry);
0373 
0374   /// Check whether \p Path exists. By default checks cached result of \c
0375   /// status(), and falls back on FS if unable to do so.
0376   bool exists(const Twine &Path) override;
0377 
0378 private:
0379   /// For a filename that's not yet associated with any entry in the caches,
0380   /// uses the underlying filesystem to either look up the entry based in the
0381   /// shared cache indexed by unique ID, or creates new entry from scratch.
0382   /// \p FilenameForLookup will always be an absolute path, and different than
0383   /// \p OriginalFilename if \p OriginalFilename is relative.
0384   llvm::ErrorOr<const CachedFileSystemEntry &>
0385   computeAndStoreResult(StringRef OriginalFilename,
0386                         StringRef FilenameForLookup);
0387 
0388   /// Represents a filesystem entry that has been stat-ed (and potentially read)
0389   /// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
0390   struct TentativeEntry {
0391     llvm::vfs::Status Status;
0392     std::unique_ptr<llvm::MemoryBuffer> Contents;
0393 
0394     TentativeEntry(llvm::vfs::Status Status,
0395                    std::unique_ptr<llvm::MemoryBuffer> Contents = nullptr)
0396         : Status(std::move(Status)), Contents(std::move(Contents)) {}
0397   };
0398 
0399   /// Reads file at the given path. Enforces consistency between the file size
0400   /// in status and size of read contents.
0401   llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename);
0402 
0403   /// Returns entry associated with the unique ID of the given tentative entry
0404   /// if there is some in the shared cache. Otherwise, constructs new one,
0405   /// associates it with the unique ID and returns the result.
0406   const CachedFileSystemEntry &
0407   getOrEmplaceSharedEntryForUID(TentativeEntry TEntry);
0408 
0409   /// Returns entry associated with the filename or nullptr if none is found.
0410   ///
0411   /// Returns entry from local cache if there is some. Otherwise, if the entry
0412   /// is found in the shared cache, writes it through the local cache and
0413   /// returns it. Otherwise returns nullptr.
0414   const CachedFileSystemEntry *
0415   findEntryByFilenameWithWriteThrough(StringRef Filename);
0416 
0417   /// Returns entry associated with the unique ID in the shared cache or nullptr
0418   /// if none is found.
0419   const CachedFileSystemEntry *
0420   findSharedEntryByUID(llvm::vfs::Status Stat) const {
0421     return SharedCache.getShardForUID(Stat.getUniqueID())
0422         .findEntryByUID(Stat.getUniqueID());
0423   }
0424 
0425   /// Associates the given entry with the filename in the local cache and
0426   /// returns it.
0427   const CachedFileSystemEntry &
0428   insertLocalEntryForFilename(StringRef Filename,
0429                               const CachedFileSystemEntry &Entry) {
0430     return LocalCache.insertEntryForFilename(Filename, Entry);
0431   }
0432 
0433   /// Returns entry associated with the filename in the shared cache if there is
0434   /// some. Otherwise, constructs new one with the given error code, associates
0435   /// it with the filename and returns the result.
0436   const CachedFileSystemEntry &
0437   getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC) {
0438     return SharedCache.getShardForFilename(Filename)
0439         .getOrEmplaceEntryForFilename(Filename, EC);
0440   }
0441 
0442   /// Returns entry associated with the filename in the shared cache if there is
0443   /// some. Otherwise, associates the given entry with the filename and returns
0444   /// it.
0445   const CachedFileSystemEntry &
0446   getOrInsertSharedEntryForFilename(StringRef Filename,
0447                                     const CachedFileSystemEntry &Entry) {
0448     return SharedCache.getShardForFilename(Filename)
0449         .getOrInsertEntryForFilename(Filename, Entry);
0450   }
0451 
0452   void printImpl(raw_ostream &OS, PrintType Type,
0453                  unsigned IndentLevel) const override {
0454     printIndent(OS, IndentLevel);
0455     OS << "DependencyScanningFilesystem\n";
0456     getUnderlyingFS().print(OS, Type, IndentLevel + 1);
0457   }
0458 
0459   /// Whether this path should bypass this VFS and go straight to the underlying
0460   /// VFS.
0461   bool shouldBypass(StringRef Path) const;
0462 
0463   /// The global cache shared between worker threads.
0464   DependencyScanningFilesystemSharedCache &SharedCache;
0465   /// The local cache is used by the worker thread to cache file system queries
0466   /// locally instead of querying the global cache every time.
0467   DependencyScanningFilesystemLocalCache LocalCache;
0468 
0469   /// Prefix of paths that should go straight to the underlying VFS.
0470   std::optional<std::string> BypassedPathPrefix;
0471 
0472   /// The working directory to use for making relative paths absolute before
0473   /// using them for cache lookups.
0474   llvm::ErrorOr<std::string> WorkingDirForCacheLookup;
0475 
0476   void updateWorkingDirForCacheLookup();
0477 
0478   llvm::ErrorOr<StringRef>
0479   tryGetFilenameForLookup(StringRef OriginalFilename,
0480                           llvm::SmallVectorImpl<char> &PathBuf) const;
0481 };
0482 
0483 } // end namespace dependencies
0484 } // end namespace tooling
0485 } // end namespace clang
0486 
0487 #endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_DEPENDENCYSCANNINGFILESYSTEM_H