Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:44:41

0001 //===- Transforms/IPO/SampleProfileMatcher.h ----------*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 /// \file
0010 /// This file provides the interface for SampleProfileMatcher.
0011 //
0012 //===----------------------------------------------------------------------===//
0013 
0014 #ifndef LLVM_TRANSFORMS_IPO_SAMPLEPROFILEMATCHER_H
0015 #define LLVM_TRANSFORMS_IPO_SAMPLEPROFILEMATCHER_H
0016 
0017 #include "llvm/ADT/StringSet.h"
0018 #include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
0019 
0020 namespace llvm {
0021 
0022 using AnchorList = std::vector<std::pair<LineLocation, FunctionId>>;
0023 using AnchorMap = std::map<LineLocation, FunctionId>;
0024 
0025 // Sample profile matching - fuzzy match.
0026 class SampleProfileMatcher {
0027   Module &M;
0028   SampleProfileReader &Reader;
0029   LazyCallGraph &CG;
0030   const PseudoProbeManager *ProbeManager;
0031   const ThinOrFullLTOPhase LTOPhase;
0032   SampleProfileMap FlattenedProfiles;
0033   // For each function, the matcher generates a map, of which each entry is a
0034   // mapping from the source location of current build to the source location
0035   // in the profile.
0036   StringMap<LocToLocMap> FuncMappings;
0037 
0038   // Match state for an anchor/callsite.
0039   enum class MatchState {
0040     Unknown = 0,
0041     // Initial match between input profile and current IR.
0042     InitialMatch = 1,
0043     // Initial mismatch between input profile and current IR.
0044     InitialMismatch = 2,
0045     // InitialMatch stays matched after fuzzy profile matching.
0046     UnchangedMatch = 3,
0047     // InitialMismatch stays mismatched after fuzzy profile matching.
0048     UnchangedMismatch = 4,
0049     // InitialMismatch is recovered after fuzzy profile matching.
0050     RecoveredMismatch = 5,
0051     // InitialMatch is removed and becomes mismatched after fuzzy profile
0052     // matching.
0053     RemovedMatch = 6,
0054   };
0055 
0056   // For each function, store every callsite and its matching state into this
0057   // map, of which each entry is a pair of callsite location and MatchState.
0058   // This is used for profile staleness computation and report.
0059   StringMap<std::unordered_map<LineLocation, MatchState, LineLocationHash>>
0060       FuncCallsiteMatchStates;
0061 
0062   struct FuncToProfileNameMapHash {
0063     uint64_t
0064     operator()(const std::pair<const Function *, FunctionId> &P) const {
0065       return hash_combine(P.first, P.second);
0066     }
0067   };
0068   // A map from a pair of function and profile name to a boolean value
0069   // indicating whether they are matched. This is used as a cache for the
0070   // matching result.
0071   std::unordered_map<std::pair<const Function *, FunctionId>, bool,
0072                      FuncToProfileNameMapHash>
0073       FuncProfileMatchCache;
0074   // The new functions found by the call graph matching. The map's key is the
0075   // the new(renamed) function pointer and the value is old(unused) profile
0076   // name.
0077   std::unordered_map<Function *, FunctionId> FuncToProfileNameMap;
0078 
0079   // A map pointer to the FuncNameToProfNameMap in SampleProfileLoader,
0080   // which maps the function name to the matched profile name. This is used
0081   // for sample loader to look up profile using the new name.
0082   HashKeyMap<std::unordered_map, FunctionId, FunctionId> *FuncNameToProfNameMap;
0083 
0084   // A map pointer to the SymbolMap in SampleProfileLoader, which stores all
0085   // the original matched symbols before the matching. this is to determine if
0086   // the profile is unused(to be matched) or not.
0087   HashKeyMap<std::unordered_map, FunctionId, Function *> *SymbolMap;
0088 
0089   // The new functions from IR.
0090   HashKeyMap<std::unordered_map, FunctionId, Function *>
0091       FunctionsWithoutProfile;
0092 
0093   // Pointer to the Profile Symbol List in the reader.
0094   std::shared_ptr<ProfileSymbolList> PSL;
0095 
0096   // Profile mismatch statstics:
0097   uint64_t TotalProfiledFunc = 0;
0098   // Num of checksum-mismatched function.
0099   uint64_t NumStaleProfileFunc = 0;
0100   uint64_t TotalProfiledCallsites = 0;
0101   uint64_t NumMismatchedCallsites = 0;
0102   uint64_t NumRecoveredCallsites = 0;
0103   // Total samples for all profiled functions.
0104   uint64_t TotalFunctionSamples = 0;
0105   // Total samples for all checksum-mismatched functions.
0106   uint64_t MismatchedFunctionSamples = 0;
0107   uint64_t MismatchedCallsiteSamples = 0;
0108   uint64_t RecoveredCallsiteSamples = 0;
0109 
0110   // Profile call-graph matching statstics:
0111   uint64_t NumCallGraphRecoveredProfiledFunc = 0;
0112   uint64_t NumCallGraphRecoveredFuncSamples = 0;
0113 
0114   // A dummy name for unknown indirect callee, used to differentiate from a
0115   // non-call instruction that also has an empty callee name.
0116   static constexpr const char *UnknownIndirectCallee =
0117       "unknown.indirect.callee";
0118 
0119 public:
0120   SampleProfileMatcher(
0121       Module &M, SampleProfileReader &Reader, LazyCallGraph &CG,
0122       const PseudoProbeManager *ProbeManager, ThinOrFullLTOPhase LTOPhase,
0123       HashKeyMap<std::unordered_map, FunctionId, Function *> &SymMap,
0124       std::shared_ptr<ProfileSymbolList> PSL,
0125       HashKeyMap<std::unordered_map, FunctionId, FunctionId>
0126           &FuncNameToProfNameMap)
0127       : M(M), Reader(Reader), CG(CG), ProbeManager(ProbeManager),
0128         LTOPhase(LTOPhase), FuncNameToProfNameMap(&FuncNameToProfNameMap),
0129         SymbolMap(&SymMap), PSL(PSL) {};
0130   void runOnModule();
0131   void clearMatchingData() {
0132     // Do not clear FuncMappings, it stores IRLoc to ProfLoc remappings which
0133     // will be used for sample loader.
0134     // Do not clear FlattenedProfiles as it contains function names referenced
0135     // by FuncNameToProfNameMap. Clearing this memory could lead to a
0136     // use-after-free error.
0137     freeContainer(FuncCallsiteMatchStates);
0138     freeContainer(FunctionsWithoutProfile);
0139     freeContainer(FuncToProfileNameMap);
0140   }
0141 
0142 private:
0143   FunctionSamples *getFlattenedSamplesFor(const FunctionId &Fname) {
0144     auto It = FlattenedProfiles.find(Fname);
0145     if (It != FlattenedProfiles.end())
0146       return &It->second;
0147     return nullptr;
0148   }
0149   FunctionSamples *getFlattenedSamplesFor(const Function &F) {
0150     StringRef CanonFName = FunctionSamples::getCanonicalFnName(F);
0151     return getFlattenedSamplesFor(FunctionId(CanonFName));
0152   }
0153   template <typename T> inline void freeContainer(T &C) {
0154     T Empty;
0155     std::swap(C, Empty);
0156   }
0157   void getFilteredAnchorList(const AnchorMap &IRAnchors,
0158                              const AnchorMap &ProfileAnchors,
0159                              AnchorList &FilteredIRAnchorsList,
0160                              AnchorList &FilteredProfileAnchorList);
0161   void runOnFunction(Function &F);
0162   void findIRAnchors(const Function &F, AnchorMap &IRAnchors) const;
0163   void findProfileAnchors(const FunctionSamples &FS,
0164                           AnchorMap &ProfileAnchors) const;
0165   // Record the callsite match states for profile staleness report, the result
0166   // is saved in FuncCallsiteMatchStates.
0167   void recordCallsiteMatchStates(const Function &F, const AnchorMap &IRAnchors,
0168                                  const AnchorMap &ProfileAnchors,
0169                                  const LocToLocMap *IRToProfileLocationMap);
0170 
0171   bool isMismatchState(const enum MatchState &State) {
0172     return State == MatchState::InitialMismatch ||
0173            State == MatchState::UnchangedMismatch ||
0174            State == MatchState::RemovedMatch;
0175   };
0176 
0177   bool isInitialState(const enum MatchState &State) {
0178     return State == MatchState::InitialMatch ||
0179            State == MatchState::InitialMismatch;
0180   };
0181 
0182   bool isFinalState(const enum MatchState &State) {
0183     return State == MatchState::UnchangedMatch ||
0184            State == MatchState::UnchangedMismatch ||
0185            State == MatchState::RecoveredMismatch ||
0186            State == MatchState::RemovedMatch;
0187   };
0188 
0189   void countCallGraphRecoveredSamples(
0190       const FunctionSamples &FS,
0191       std::unordered_set<FunctionId> &MatchedUnusedProfile);
0192   // Count the samples of checksum mismatched function for the top-level
0193   // function and all inlinees.
0194   void countMismatchedFuncSamples(const FunctionSamples &FS, bool IsTopLevel);
0195   // Count the number of mismatched or recovered callsites.
0196   void countMismatchCallsites(const FunctionSamples &FS);
0197   // Count the samples of mismatched or recovered callsites for top-level
0198   // function and all inlinees.
0199   void countMismatchedCallsiteSamples(const FunctionSamples &FS);
0200   void computeAndReportProfileStaleness();
0201   void UpdateWithSalvagedProfiles();
0202 
0203   LocToLocMap &getIRToProfileLocationMap(const Function &F) {
0204     return FuncMappings[FunctionSamples::getCanonicalFnName(F.getName())];
0205   }
0206   void distributeIRToProfileLocationMap();
0207   void distributeIRToProfileLocationMap(FunctionSamples &FS);
0208   LocToLocMap longestCommonSequence(const AnchorList &IRCallsiteAnchors,
0209                                     const AnchorList &ProfileCallsiteAnchors,
0210                                     bool MatchUnusedFunction);
0211   void matchNonCallsiteLocs(const LocToLocMap &AnchorMatchings,
0212                             const AnchorMap &IRAnchors,
0213                             LocToLocMap &IRToProfileLocationMap);
0214   void runStaleProfileMatching(const Function &F, const AnchorMap &IRAnchors,
0215                                const AnchorMap &ProfileAnchors,
0216                                LocToLocMap &IRToProfileLocationMap,
0217                                bool RunCFGMatching, bool RunCGMatching);
0218   // If the function doesn't have profile, return the pointer to the function.
0219   bool functionHasProfile(const FunctionId &IRFuncName,
0220                           Function *&FuncWithoutProfile);
0221   bool isProfileUnused(const FunctionId &ProfileFuncName);
0222   bool functionMatchesProfileHelper(const Function &IRFunc,
0223                                     const FunctionId &ProfFunc);
0224   // Determine if the function matches profile. If FindMatchedProfileOnly is
0225   // set, only search the existing matched function. Otherwise, try matching the
0226   // two functions.
0227   bool functionMatchesProfile(const FunctionId &IRFuncName,
0228                               const FunctionId &ProfileFuncName,
0229                               bool FindMatchedProfileOnly);
0230   // Determine if the function matches profile by computing a similarity ratio
0231   // between two sequences of callsite anchors extracted from function and
0232   // profile. If it's above the threshold, the function matches the profile.
0233   bool functionMatchesProfile(Function &IRFunc, const FunctionId &ProfFunc,
0234                               bool FindMatchedProfileOnly);
0235   // Find functions that don't show in the profile or profile symbol list,
0236   // which are supposed to be new functions. We use them as the targets for
0237   // call graph matching.
0238   void findFunctionsWithoutProfile();
0239   void reportOrPersistProfileStats();
0240 };
0241 } // end namespace llvm
0242 #endif // LLVM_TRANSFORMS_IPO_SAMPLEPROFILEMATCHER_H