Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:43:23

0001 //===- CodeGenData.h --------------------------------------------*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 // This file contains support for codegen data that has stable summary which
0010 // can be used to optimize the code in the subsequent codegen.
0011 //
0012 //===----------------------------------------------------------------------===//
0013 
0014 #ifndef LLVM_CGDATA_CODEGENDATA_H
0015 #define LLVM_CGDATA_CODEGENDATA_H
0016 
0017 #include "llvm/ADT/BitmaskEnum.h"
0018 #include "llvm/ADT/StableHashing.h"
0019 #include "llvm/Bitcode/BitcodeReader.h"
0020 #include "llvm/CGData/OutlinedHashTree.h"
0021 #include "llvm/CGData/OutlinedHashTreeRecord.h"
0022 #include "llvm/CGData/StableFunctionMapRecord.h"
0023 #include "llvm/IR/Module.h"
0024 #include "llvm/Object/ObjectFile.h"
0025 #include "llvm/Support/Caching.h"
0026 #include "llvm/Support/ErrorHandling.h"
0027 #include "llvm/TargetParser/Triple.h"
0028 #include <mutex>
0029 
0030 namespace llvm {
0031 
0032 enum CGDataSectKind {
0033 #define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Kind,
0034 #include "llvm/CGData/CodeGenData.inc"
0035 };
0036 
0037 std::string getCodeGenDataSectionName(CGDataSectKind CGSK,
0038                                       Triple::ObjectFormatType OF,
0039                                       bool AddSegmentInfo = true);
0040 
0041 enum class CGDataKind {
0042   Unknown = 0x0,
0043   // A function outlining info.
0044   FunctionOutlinedHashTree = 0x1,
0045   // A function merging info.
0046   StableFunctionMergingMap = 0x2,
0047   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/StableFunctionMergingMap)
0048 };
0049 
0050 const std::error_category &cgdata_category();
0051 
0052 enum class cgdata_error {
0053   success = 0,
0054   eof,
0055   bad_magic,
0056   bad_header,
0057   empty_cgdata,
0058   malformed,
0059   unsupported_version,
0060 };
0061 
0062 inline std::error_code make_error_code(cgdata_error E) {
0063   return std::error_code(static_cast<int>(E), cgdata_category());
0064 }
0065 
0066 class CGDataError : public ErrorInfo<CGDataError> {
0067 public:
0068   CGDataError(cgdata_error Err, const Twine &ErrStr = Twine())
0069       : Err(Err), Msg(ErrStr.str()) {
0070     assert(Err != cgdata_error::success && "Not an error");
0071   }
0072 
0073   std::string message() const override;
0074 
0075   void log(raw_ostream &OS) const override { OS << message(); }
0076 
0077   std::error_code convertToErrorCode() const override {
0078     return make_error_code(Err);
0079   }
0080 
0081   cgdata_error get() const { return Err; }
0082   const std::string &getMessage() const { return Msg; }
0083 
0084   /// Consume an Error and return the raw enum value contained within it, and
0085   /// the optional error message. The Error must either be a success value, or
0086   /// contain a single CGDataError.
0087   static std::pair<cgdata_error, std::string> take(Error E) {
0088     auto Err = cgdata_error::success;
0089     std::string Msg;
0090     handleAllErrors(std::move(E), [&Err, &Msg](const CGDataError &IPE) {
0091       assert(Err == cgdata_error::success && "Multiple errors encountered");
0092       Err = IPE.get();
0093       Msg = IPE.getMessage();
0094     });
0095     return {Err, Msg};
0096   }
0097 
0098   static char ID;
0099 
0100 private:
0101   cgdata_error Err;
0102   std::string Msg;
0103 };
0104 
0105 enum CGDataMode {
0106   None,
0107   Read,
0108   Write,
0109 };
0110 
0111 class CodeGenData {
0112   /// Global outlined hash tree that has oulined hash sequences across modules.
0113   std::unique_ptr<OutlinedHashTree> PublishedHashTree;
0114   /// Global stable function map that has stable function info across modules.
0115   std::unique_ptr<StableFunctionMap> PublishedStableFunctionMap;
0116 
0117   /// This flag is set when -fcodegen-data-generate is passed.
0118   /// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds.
0119   bool EmitCGData;
0120 
0121   /// This is a singleton instance which is thread-safe. Unlike profile data
0122   /// which is largely function-based, codegen data describes the whole module.
0123   /// Therefore, this can be initialized once, and can be used across modules
0124   /// instead of constructing the same one for each codegen backend.
0125   static std::unique_ptr<CodeGenData> Instance;
0126   static std::once_flag OnceFlag;
0127 
0128   CodeGenData() = default;
0129 
0130 public:
0131   ~CodeGenData() = default;
0132 
0133   static CodeGenData &getInstance();
0134 
0135   /// Returns true if we have a valid outlined hash tree.
0136   bool hasOutlinedHashTree() {
0137     return PublishedHashTree && !PublishedHashTree->empty();
0138   }
0139   bool hasStableFunctionMap() {
0140     return PublishedStableFunctionMap && !PublishedStableFunctionMap->empty();
0141   }
0142 
0143   /// Returns the outlined hash tree. This can be globally used in a read-only
0144   /// manner.
0145   const OutlinedHashTree *getOutlinedHashTree() {
0146     return PublishedHashTree.get();
0147   }
0148   const StableFunctionMap *getStableFunctionMap() {
0149     return PublishedStableFunctionMap.get();
0150   }
0151 
0152   /// Returns true if we should write codegen data.
0153   bool emitCGData() { return EmitCGData; }
0154 
0155   /// Publish the (globally) merged or read outlined hash tree.
0156   void publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
0157     PublishedHashTree = std::move(HashTree);
0158     // Ensure we disable emitCGData as we do not want to read and write both.
0159     EmitCGData = false;
0160   }
0161   void
0162   publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
0163     PublishedStableFunctionMap = std::move(FunctionMap);
0164     // Ensure we disable emitCGData as we do not want to read and write both.
0165     EmitCGData = false;
0166   }
0167 };
0168 
0169 namespace cgdata {
0170 
0171 inline bool hasOutlinedHashTree() {
0172   return CodeGenData::getInstance().hasOutlinedHashTree();
0173 }
0174 
0175 inline bool hasStableFunctionMap() {
0176   return CodeGenData::getInstance().hasStableFunctionMap();
0177 }
0178 
0179 inline const OutlinedHashTree *getOutlinedHashTree() {
0180   return CodeGenData::getInstance().getOutlinedHashTree();
0181 }
0182 
0183 inline const StableFunctionMap *getStableFunctionMap() {
0184   return CodeGenData::getInstance().getStableFunctionMap();
0185 }
0186 
0187 inline bool emitCGData() { return CodeGenData::getInstance().emitCGData(); }
0188 
0189 inline void
0190 publishOutlinedHashTree(std::unique_ptr<OutlinedHashTree> HashTree) {
0191   CodeGenData::getInstance().publishOutlinedHashTree(std::move(HashTree));
0192 }
0193 
0194 inline void
0195 publishStableFunctionMap(std::unique_ptr<StableFunctionMap> FunctionMap) {
0196   CodeGenData::getInstance().publishStableFunctionMap(std::move(FunctionMap));
0197 }
0198 
0199 struct StreamCacheData {
0200   /// Backing buffer for serialized data stream.
0201   SmallVector<SmallString<0>> Outputs;
0202   /// Callback function to add serialized data to the stream.
0203   AddStreamFn AddStream;
0204   /// Backing buffer for cached data.
0205   SmallVector<std::unique_ptr<MemoryBuffer>> Files;
0206   /// Cache mechanism for storing data.
0207   FileCache Cache;
0208 
0209   StreamCacheData(unsigned Size, const FileCache &OrigCache,
0210                   const Twine &CachePrefix)
0211       : Outputs(Size), Files(Size) {
0212     AddStream = [&](size_t Task, const Twine &ModuleName) {
0213       return std::make_unique<CachedFileStream>(
0214           std::make_unique<raw_svector_ostream>(Outputs[Task]));
0215     };
0216 
0217     if (OrigCache.isValid()) {
0218       auto CGCacheOrErr =
0219           localCache("ThinLTO", CachePrefix, OrigCache.getCacheDirectoryPath(),
0220                      [&](size_t Task, const Twine &ModuleName,
0221                          std::unique_ptr<MemoryBuffer> MB) {
0222                        Files[Task] = std::move(MB);
0223                      });
0224       if (Error Err = CGCacheOrErr.takeError())
0225         report_fatal_error(std::move(Err));
0226       Cache = std::move(*CGCacheOrErr);
0227     }
0228   }
0229   StreamCacheData() = delete;
0230 
0231   /// Retrieve results from either the cache or the stream.
0232   std::unique_ptr<SmallVector<StringRef>> getResult() {
0233     unsigned NumOutputs = Outputs.size();
0234     auto Result = std::make_unique<SmallVector<StringRef>>(NumOutputs);
0235     for (unsigned I = 0; I < NumOutputs; ++I)
0236       if (Files[I])
0237         (*Result)[I] = Files[I]->getBuffer();
0238       else
0239         (*Result)[I] = Outputs[I];
0240     return Result;
0241   }
0242 };
0243 
0244 /// Save \p TheModule before the first codegen round.
0245 /// \p Task represents the partition number in the parallel code generation
0246 /// process. \p AddStream is the callback used to add the serialized module to
0247 /// the stream.
0248 void saveModuleForTwoRounds(const Module &TheModule, unsigned Task,
0249                             AddStreamFn AddStream);
0250 
0251 /// Load the optimized bitcode module for the second codegen round.
0252 /// \p OrigModule is the original bitcode module.
0253 /// \p Task identifies the partition number in the parallel code generation
0254 /// process. \p Context provides the environment settings for module operations.
0255 /// \p IRFiles contains optimized bitcode module files needed for loading.
0256 /// \return A unique_ptr to the loaded Module, or nullptr if loading fails.
0257 std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
0258                                                unsigned Task,
0259                                                LLVMContext &Context,
0260                                                ArrayRef<StringRef> IRFiles);
0261 
0262 /// Merge the codegen data from the scratch objects \p ObjectFiles from the
0263 /// first codegen round.
0264 /// \return the combined hash of the merged codegen data.
0265 Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjectFiles);
0266 
0267 void warn(Error E, StringRef Whence = "");
0268 void warn(Twine Message, std::string Whence = "", std::string Hint = "");
0269 
0270 } // end namespace cgdata
0271 
0272 namespace IndexedCGData {
0273 
0274 // A signature for data validation, representing "\xffcgdata\x81" in
0275 // little-endian order
0276 const uint64_t Magic = 0x81617461646763ff;
0277 
0278 enum CGDataVersion {
0279   // Version 1 is the first version. This version supports the outlined
0280   // hash tree.
0281   Version1 = 1,
0282   // Version 2 supports the stable function merging map.
0283   Version2 = 2,
0284   CurrentVersion = CG_DATA_INDEX_VERSION
0285 };
0286 const uint64_t Version = CGDataVersion::CurrentVersion;
0287 
0288 struct Header {
0289   uint64_t Magic;
0290   uint32_t Version;
0291   uint32_t DataKind;
0292   uint64_t OutlinedHashTreeOffset;
0293   uint64_t StableFunctionMapOffset;
0294 
0295   // New fields should only be added at the end to ensure that the size
0296   // computation is correct. The methods below need to be updated to ensure that
0297   // the new field is read correctly.
0298 
0299   // Reads a header struct from the buffer.
0300   static Expected<Header> readFromBuffer(const unsigned char *Curr);
0301 };
0302 
0303 } // end namespace IndexedCGData
0304 
0305 } // end namespace llvm
0306 
0307 #endif // LLVM_CODEGEN_PREPARE_H