From 070702c9be2fb437b0765532c03e98c642951906 Mon Sep 17 00:00:00 2001 From: wlei Date: Mon, 29 Jul 2024 10:17:46 -0700 Subject: [PATCH 1/8] [SampleFDO] Read top-level functions recovered by call-graph matching --- .../llvm/ProfileData/SampleProfReader.h | 47 ++++ .../Transforms/IPO/SampleProfileMatcher.h | 1 + llvm/lib/ProfileData/SampleProfReader.cpp | 224 +++++++++------ .../Transforms/IPO/SampleProfileMatcher.cpp | 64 ++++- ...seudo-probe-stale-profile-toplev-func.prof | 23 ++ .../pseudo-probe-stale-profile-toplev-func.ll | 258 ++++++++++++++++++ 6 files changed, 521 insertions(+), 96 deletions(-) create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index f4bdc6525308d..b124233a02d11 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -392,6 +392,11 @@ class SampleProfileReader { /// which doesn't support loading function profiles on demand. virtual bool collectFuncsFromModule() { return false; } + virtual std::error_code readOnDemand(const DenseSet &FuncsToUse, + SampleProfileMap &Profiles) { + return sampleprof_error::not_implemented; + }; + /// Print all the profiles on stream \p OS. void dump(raw_ostream &OS = dbgs()); @@ -413,6 +418,16 @@ class SampleProfileReader { if (It != Profiles.end()) return &It->second; + if (FuncNameToProfNameMap && !FuncNameToProfNameMap->empty()) { + auto R = FuncNameToProfNameMap->find(FunctionId(Fname)); + if (R != FuncNameToProfNameMap->end()) { + Fname = R->second.stringRef(); + auto It = Profiles.find(FunctionId(Fname)); + if (It != Profiles.end()) + return &It->second; + } + } + if (Remapper) { if (auto NameInProfile = Remapper->lookUpNameInProfile(Fname)) { auto It = Profiles.find(FunctionId(*NameInProfile)); @@ -494,6 +509,11 @@ class SampleProfileReader { void setModule(const Module *Mod) { M = Mod; } + void setFuncNameToProfNameMap( + HashKeyMap *FPMap) { + FuncNameToProfNameMap = FPMap; + } + protected: /// Map every function to its associated profile. /// @@ -522,6 +542,21 @@ class SampleProfileReader { std::unique_ptr Remapper; + // A map pointer to the FuncNameToProfNameMap in SampleProfileLoader, + // which maps the function name to the matched profile name. This is used + // for sample loader to look up profile using the new name. + HashKeyMap + *FuncNameToProfNameMap = nullptr; + + // A map from a function's context hash to its meta data section range, used + // for on-demand read function profile metadata. + std::unordered_map> + FContextToMetaDataSecRange; + + std::pair LBRProfileSecRange; + + bool ProfileHasAttribute = false; + /// \brief Whether samples are collected based on pseudo probes. bool ProfileIsProbeBased = false; @@ -621,6 +656,8 @@ class SampleProfileReaderBinary : public SampleProfileReader { /// Read the next function profile instance. std::error_code readFuncProfile(const uint8_t *Start); + std::error_code readFuncProfile(const uint8_t *Start, + SampleProfileMap &Profiles); /// Read the contents of the given profile instance. std::error_code readProfile(FunctionSamples &FProfile); @@ -720,11 +757,15 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary { std::error_code readSecHdrTableEntry(uint64_t Idx); std::error_code readSecHdrTable(); + std::error_code readFuncMetadataOnDemand(bool ProfileHasAttribute, + SampleProfileMap &Profiles); std::error_code readFuncMetadata(bool ProfileHasAttribute); std::error_code readFuncMetadata(bool ProfileHasAttribute, FunctionSamples *FProfile); std::error_code readFuncOffsetTable(); std::error_code readFuncProfiles(); + std::error_code readFuncProfiles(const DenseSet &FuncsToUse, + SampleProfileMap &Profiles); std::error_code readNameTableSec(bool IsMD5, bool FixedLengthMD5); std::error_code readCSNameTableSec(); std::error_code readProfileSymbolList(); @@ -776,6 +817,12 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary { /// the reader has been given a module. bool collectFuncsFromModule() override; + /// Read the profiles on-demand for the given functions. This is used after + /// stale call graph matching finds new functions whose profiles aren't read + /// at the beginning and we need to re-read the profiles. + std::error_code readOnDemand(const DenseSet &FuncsToUse, + SampleProfileMap &Profiles) override; + std::unique_ptr getProfileSymbolList() override { return std::move(ProfSymList); }; diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h index a67f158433391..67edea42e2fe1 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h @@ -198,6 +198,7 @@ class SampleProfileMatcher { // function and all inlinees. void countMismatchedCallsiteSamples(const FunctionSamples &FS); void computeAndReportProfileStaleness(); + void UpdateSampleLoaderWithRecoveredProfiles(); LocToLocMap &getIRToProfileLocationMap(const Function &F) { auto Ret = FuncMappings.try_emplace( diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index 4752465fc072e..f555da866f36e 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -653,7 +653,8 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { } std::error_code -SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) { +SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start, + SampleProfileMap &Profiles) { Data = Start; auto NumHeadSamples = readNumber(); if (std::error_code EC = NumHeadSamples.getError()) @@ -678,6 +679,11 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) { return sampleprof_error::success; } +std::error_code +SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) { + return readFuncProfile(Start, Profiles); +} + std::error_code SampleProfileReaderBinary::readImpl() { ProfileIsFS = ProfileIsFSDisciminator; FunctionSamples::ProfileIsFS = ProfileIsFS; @@ -725,6 +731,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection( break; } case SecLBRProfile: + LBRProfileSecRange = std::make_pair(Data, End); if (std::error_code EC = readFuncProfiles()) return EC; break; @@ -745,9 +752,9 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection( ProfileIsProbeBased = hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased); FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; - bool HasAttribute = + ProfileHasAttribute = hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute); - if (std::error_code EC = readFuncMetadata(HasAttribute)) + if (std::error_code EC = readFuncMetadata(ProfileHasAttribute)) return EC; break; } @@ -791,6 +798,19 @@ bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const { return false; } +std::error_code SampleProfileReaderExtBinaryBase::readOnDemand( + const DenseSet &FuncsToUse, SampleProfileMap &Profiles) { + Data = LBRProfileSecRange.first; + End = LBRProfileSecRange.second; + if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles)) + return EC; + End = Data; + + if (std::error_code EC = + readFuncMetadataOnDemand(ProfileHasAttribute, Profiles)) + return EC; + return sampleprof_error::success; +} bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() { if (!M) @@ -838,6 +858,95 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() { return sampleprof_error::success; } +std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles( + const DenseSet &FuncsToUse, SampleProfileMap &Profiles) { + const uint8_t *Start = Data; + + if (Remapper) { + for (auto Name : FuncsToUse) { + Remapper->insert(Name); + } + } + + if (ProfileIsCS) { + assert(useFuncOffsetList()); + DenseSet FuncGuidsToUse; + if (useMD5()) { + for (auto Name : FuncsToUse) + FuncGuidsToUse.insert(Function::getGUID(Name)); + } + + // For each function in current module, load all context profiles for + // the function as well as their callee contexts which can help profile + // guided importing for ThinLTO. This can be achieved by walking + // through an ordered context container, where contexts are laid out + // as if they were walked in preorder of a context trie. While + // traversing the trie, a link to the highest common ancestor node is + // kept so that all of its decendants will be loaded. + const SampleContext *CommonContext = nullptr; + for (const auto &NameOffset : FuncOffsetList) { + const auto &FContext = NameOffset.first; + FunctionId FName = FContext.getFunction(); + StringRef FNameString; + if (!useMD5()) + FNameString = FName.stringRef(); + + // For function in the current module, keep its farthest ancestor + // context. This can be used to load itself and its child and + // sibling contexts. + if ((useMD5() && FuncGuidsToUse.count(FName.getHashCode())) || + (!useMD5() && (FuncsToUse.count(FNameString) || + (Remapper && Remapper->exist(FNameString))))) { + if (!CommonContext || !CommonContext->isPrefixOf(FContext)) + CommonContext = &FContext; + } + + if (CommonContext == &FContext || + (CommonContext && CommonContext->isPrefixOf(FContext))) { + // Load profile for the current context which originated from + // the common ancestor. + const uint8_t *FuncProfileAddr = Start + NameOffset.second; + if (std::error_code EC = readFuncProfile(FuncProfileAddr)) + return EC; + } + } + } else if (useMD5()) { + assert(!useFuncOffsetList()); + for (auto Name : FuncsToUse) { + auto GUID = MD5Hash(Name); + auto iter = FuncOffsetTable.find(GUID); + if (iter == FuncOffsetTable.end()) + continue; + const uint8_t *FuncProfileAddr = Start + iter->second; + if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles)) + return EC; + } + } else if (Remapper) { + assert(useFuncOffsetList()); + for (auto NameOffset : FuncOffsetList) { + SampleContext FContext(NameOffset.first); + auto FuncName = FContext.getFunction(); + StringRef FuncNameStr = FuncName.stringRef(); + if (!FuncsToUse.count(FuncNameStr) && !Remapper->exist(FuncNameStr)) + continue; + const uint8_t *FuncProfileAddr = Start + NameOffset.second; + if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles)) + return EC; + } + } else { + assert(!useFuncOffsetList()); + for (auto Name : FuncsToUse) { + + auto iter = FuncOffsetTable.find(MD5Hash(Name)); + if (iter == FuncOffsetTable.end()) + continue; + const uint8_t *FuncProfileAddr = Start + iter->second; + if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles)) + return EC; + } + } +} + std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() { // Collect functions used by current module if the Reader has been // given a module. @@ -849,7 +958,6 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() { // When LoadFuncsToBeUsed is false, we are using LLVM tool, need to read all // profiles. - const uint8_t *Start = Data; if (!LoadFuncsToBeUsed) { while (Data < End) { if (std::error_code EC = readFuncProfile(Data)) @@ -858,88 +966,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() { assert(Data == End && "More data is read than expected"); } else { // Load function profiles on demand. - if (Remapper) { - for (auto Name : FuncsToUse) { - Remapper->insert(Name); - } - } - - if (ProfileIsCS) { - assert(useFuncOffsetList()); - DenseSet FuncGuidsToUse; - if (useMD5()) { - for (auto Name : FuncsToUse) - FuncGuidsToUse.insert(Function::getGUID(Name)); - } - - // For each function in current module, load all context profiles for - // the function as well as their callee contexts which can help profile - // guided importing for ThinLTO. This can be achieved by walking - // through an ordered context container, where contexts are laid out - // as if they were walked in preorder of a context trie. While - // traversing the trie, a link to the highest common ancestor node is - // kept so that all of its decendants will be loaded. - const SampleContext *CommonContext = nullptr; - for (const auto &NameOffset : FuncOffsetList) { - const auto &FContext = NameOffset.first; - FunctionId FName = FContext.getFunction(); - StringRef FNameString; - if (!useMD5()) - FNameString = FName.stringRef(); - - // For function in the current module, keep its farthest ancestor - // context. This can be used to load itself and its child and - // sibling contexts. - if ((useMD5() && FuncGuidsToUse.count(FName.getHashCode())) || - (!useMD5() && (FuncsToUse.count(FNameString) || - (Remapper && Remapper->exist(FNameString))))) { - if (!CommonContext || !CommonContext->isPrefixOf(FContext)) - CommonContext = &FContext; - } - - if (CommonContext == &FContext || - (CommonContext && CommonContext->isPrefixOf(FContext))) { - // Load profile for the current context which originated from - // the common ancestor. - const uint8_t *FuncProfileAddr = Start + NameOffset.second; - if (std::error_code EC = readFuncProfile(FuncProfileAddr)) - return EC; - } - } - } else if (useMD5()) { - assert(!useFuncOffsetList()); - for (auto Name : FuncsToUse) { - auto GUID = MD5Hash(Name); - auto iter = FuncOffsetTable.find(GUID); - if (iter == FuncOffsetTable.end()) - continue; - const uint8_t *FuncProfileAddr = Start + iter->second; - if (std::error_code EC = readFuncProfile(FuncProfileAddr)) - return EC; - } - } else if (Remapper) { - assert(useFuncOffsetList()); - for (auto NameOffset : FuncOffsetList) { - SampleContext FContext(NameOffset.first); - auto FuncName = FContext.getFunction(); - StringRef FuncNameStr = FuncName.stringRef(); - if (!FuncsToUse.count(FuncNameStr) && !Remapper->exist(FuncNameStr)) - continue; - const uint8_t *FuncProfileAddr = Start + NameOffset.second; - if (std::error_code EC = readFuncProfile(FuncProfileAddr)) - return EC; - } - } else { - assert(!useFuncOffsetList()); - for (auto Name : FuncsToUse) { - auto iter = FuncOffsetTable.find(MD5Hash(Name)); - if (iter == FuncOffsetTable.end()) - continue; - const uint8_t *FuncProfileAddr = Start + iter->second; - if (std::error_code EC = readFuncProfile(FuncProfileAddr)) - return EC; - } - } + if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles)) + return EC; Data = End; } assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && @@ -1245,6 +1273,27 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute, return sampleprof_error::success; } +std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadataOnDemand( + bool ProfileHasAttribute, SampleProfileMap &Profiles) { + if (FContextToMetaDataSecRange.empty()) + return sampleprof_error::success; + + for (auto &I : Profiles) { + FunctionSamples *FProfile = &I.second; + auto R = + FContextToMetaDataSecRange.find(FProfile->getContext().getHashCode()); + if (R == FContextToMetaDataSecRange.end()) + continue; + + Data = R->second.first; + End = R->second.second; + if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile)) + return EC; + assert(Data == End && "More data is read than expected"); + } + return sampleprof_error::success; +} + std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) { while (Data < End) { @@ -1257,8 +1306,11 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) { if (It != Profiles.end()) FProfile = &It->second; + const uint8_t *Start = Data; if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile)) return EC; + + FContextToMetaDataSecRange[FContext.getHashCode()] = {Start, Data}; } assert(Data == End && "More data is read than expected"); diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp index 312672e56b017..b9adc6a0631b8 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp @@ -782,6 +782,26 @@ bool SampleProfileMatcher::functionMatchesProfileHelper( float Similarity = 0.0; const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc); + // Check if the function is top-level function. For extended profile format, + // if a function profile is unused and it's top-level, even if the profile is + // matched, it's not found in the profile. This is because sample reader only + // read the used profile at the beginning, we need to read the profile + // on-demand. Also save it into the FlattenedProfiles for future look-up. + if (!FSFlattened) { + DenseSet TopLevelFunc; + TopLevelFunc.insert(ProfFunc.stringRef()); + SampleProfileMap TopLevelProfile; + Reader.readOnDemand(TopLevelFunc, TopLevelProfile); + assert(TopLevelProfile.size() <= 1 && + "More than one profile is found for top-level function"); + if (!TopLevelProfile.empty()) { + LLVM_DEBUG(dbgs() << "Read top-level function " << ProfFunc + << " for call-graph matching\n"); + auto &FS = TopLevelProfile.begin()->second; + FSFlattened = + &(FlattenedProfiles.create(FS.getContext()) = std::move(FS)); + } + } if (!FSFlattened) return false; // The check for similarity or checksum may not be reliable if the function is @@ -863,6 +883,39 @@ bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc, return Matched; } +void SampleProfileMatcher::UpdateSampleLoaderWithRecoveredProfiles() { + DenseSet RecoveredFuncs; + // Update FuncNameToProfNameMap and SymbolMap. + for (auto &I : FuncToProfileNameMap) { + assert(I.first && "New function is null"); + FunctionId FuncName(I.first->getName()); + RecoveredFuncs.insert(I.second.stringRef()); + FuncNameToProfNameMap->emplace(FuncName, I.second); + + // We need to remove the old entry to avoid duplicating the function + // processing. + SymbolMap->erase(FuncName); + SymbolMap->emplace(I.second, I.first); + } + + // Read the top-level profiles for the recovered function profiles. This is + // because in extended binary format it only loads the top-level profile for + // the functions in the new build but not the recovered functions which is + // from the old build. + SampleProfileMap TopLevelRecoveredProfiles; + Reader.readOnDemand(RecoveredFuncs, TopLevelRecoveredProfiles); + auto &Profiles = Reader.getProfiles(); + for (auto &I : TopLevelRecoveredProfiles) { + LLVM_DEBUG(dbgs() << "Top-level function " << I.second.getFunction() + << " is recovered and re-read by the sample reader.\n"); + auto &Ctx = I.second.getContext(); + assert(Profiles.find(Ctx) == Profiles.end() && + "Top level profile is found for the unused profile"); + Profiles.create(Ctx) = std::move(I.second); + } + Reader.setFuncNameToProfNameMap(FuncNameToProfNameMap); +} + void SampleProfileMatcher::runOnModule() { ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles, FunctionSamples::ProfileIsCS); @@ -880,17 +933,8 @@ void SampleProfileMatcher::runOnModule() { runOnFunction(*F); } - // Update the data in SampleLoader. if (SalvageUnusedProfile) - for (auto &I : FuncToProfileNameMap) { - assert(I.first && "New function is null"); - FunctionId FuncName(I.first->getName()); - FuncNameToProfNameMap->emplace(FuncName, I.second); - // We need to remove the old entry to avoid duplicating the function - // processing. - SymbolMap->erase(FuncName); - SymbolMap->emplace(I.second, I.first); - } + UpdateSampleLoaderWithRecoveredProfiles(); if (SalvageStaleProfile) distributeIRToProfileLocationMap(); diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof new file mode 100644 index 0000000000000..a1bba5fc88de0 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof @@ -0,0 +1,23 @@ +foo:2724522:51 + 1: 51 + 2: 452674 + 3: 47 + 4: 497875 + 6: 415959 + 10: 452623 + 11: 452687 bar:452687 + 12: 452623 + 13: 47 + !CFGChecksum: 281718392333557 +bar:452687:452687 + 1: 452687 + !CFGChecksum: 4294967295 +main:204:0 + 1: 0 + 2: 51 + 3: 0 + 4: 51 + 5: 51 foo:51 + 6: 51 + 7: 0 + !CFGChecksum: 281582264815352 diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll new file mode 100644 index 0000000000000..f1f2506e08d2a --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll @@ -0,0 +1,258 @@ +; REQUIRES: x86_64-linux +; REQUIRES: asserts +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-toplev-func.prof --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s -check-prefix=CHECK-TEXT +; RUN: llvm-profdata merge --sample %S/Inputs/pseudo-probe-stale-profile-toplev-func.prof -extbinary -o %t.extbinary +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.extbinary --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s -check-prefix=CHECK-EXTBIN + +; CHECK-TEXT: Run stale profile matching for main +; CHECK-TEXT-NOT: Read top-level function foo for call-graph matching +; CHECK-TEXT: The checksums for foo_rename(IR) and foo(Profile) match. +; CHECK-TEXT: Function:foo_rename matches profile:foo +; CHECK-TEXT: Run stale profile matching for foo_rename +; CHECK-TEXT-NOT: Top-level function foo is recovered and re-read by the sample reader. +; CHECK-TEXT: (1/3) of functions' profile are matched and (2724522/3177413) of samples are reused by call graph matching. + +; CHECK-TEXT: Processing Function main +; CHECK-TEXT: 5: call void @foo_rename(), !dbg ![[#]] - weight: 51 +; CHECK-TEXT: Processing Function foo_rename +; CHECK-TEXT: 11: %call = call i32 @bar(i32 noundef %5), !dbg ![[#]] - weight: 452687 + + +; CHECK-EXTBIN: Run stale profile matching for main +; CHECK-EXTBIN: Read top-level function foo for call-graph matching +; CHECK-EXTBIN: The checksums for foo_rename(IR) and foo(Profile) match. +; CHECK-EXTBIN: Function:foo_rename matches profile:foo +; CHECK-EXTBIN: Run stale profile matching for foo_rename +; CHECK-EXTBIN: Top-level function foo is recovered and re-read by the sample reader. +; CHECK-EXTBIN: (1/3) of functions' profile are matched and (2724522/3177413) of samples are reused by call graph matching. + +; CHECK-EXTBIN: Processing Function main +; CHECK-EXTBIN: 5: call void @foo_rename(), !dbg ![[#]] - weight: 51 +; CHECK-EXTBIN: Processing Function foo_rename +; CHECK-EXTBIN: 11: %call = call i32 @bar(i32 noundef %5), !dbg ![[#]] - weight: 452687 + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@x = dso_local global i32 0, align 4, !dbg !0 + +; Function Attrs: noinline nounwind uwtable +define dso_local i32 @bar(i32 noundef %x) #0 !dbg !18 { +entry: + #dbg_value(i32 %x, !22, !DIExpression(), !23) + call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !24 + %add = add nsw i32 %x, 1, !dbg !25 + ret i32 %add, !dbg !26 +} + +; Function Attrs: noinline nounwind uwtable +define dso_local void @foo_rename() #0 !dbg !27 { +entry: + call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 1, i32 0, i64 -1), !dbg !33 + #dbg_value(i32 0, !31, !DIExpression(), !34) + br label %for.cond, !dbg !35 + +for.cond: ; preds = %if.end7, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc9, %if.end7 ], !dbg !36 + #dbg_value(i32 %i.0, !31, !DIExpression(), !34) + call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 2, i32 0, i64 -1), !dbg !37 + %cmp = icmp slt i32 %i.0, 10000, !dbg !39 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !40 + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 3, i32 0, i64 -1), !dbg !41 + call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 13, i32 0, i64 -1), !dbg !42 + ret void, !dbg !42 + +for.body: ; preds = %for.cond + call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 4, i32 0, i64 -1), !dbg !43 + %0 = load volatile i32, ptr @x, align 4, !dbg !43, !tbaa !46 + %rem = srem i32 %0, 3, !dbg !50 + %cmp1 = icmp eq i32 %rem, 1, !dbg !51 + br i1 %cmp1, label %if.then, label %if.else, !dbg !52 + +if.then: ; preds = %for.body + call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 5, i32 0, i64 -1), !dbg !53 + %1 = load volatile i32, ptr @x, align 4, !dbg !53, !tbaa !46 + %add = add nsw i32 %1, 100, !dbg !53 + store volatile i32 %add, ptr @x, align 4, !dbg !53, !tbaa !46 + br label %if.end7, !dbg !54 + +if.else: ; preds = %for.body + call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 6, i32 0, i64 -1), !dbg !55 + %2 = load volatile i32, ptr @x, align 4, !dbg !55, !tbaa !46 + %rem2 = srem i32 %2, 2, !dbg !57 + %cmp3 = icmp eq i32 %rem2, 1, !dbg !58 + br i1 %cmp3, label %if.then4, label %if.else6, !dbg !59 + +if.then4: ; preds = %if.else + call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 7, i32 0, i64 -1), !dbg !60 + %3 = load volatile i32, ptr @x, align 4, !dbg !60, !tbaa !46 + %add5 = add nsw i32 %3, 10, !dbg !60 + store volatile i32 %add5, ptr @x, align 4, !dbg !60, !tbaa !46 + br label %if.end7, !dbg !61 + +if.else6: ; preds = %if.else + call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 8, i32 0, i64 -1), !dbg !62 + %4 = load volatile i32, ptr @x, align 4, !dbg !62, !tbaa !46 + %inc = add nsw i32 %4, 1, !dbg !62 + store volatile i32 %inc, ptr @x, align 4, !dbg !62, !tbaa !46 + br label %if.end7 + +if.end7: ; preds = %if.then4, %if.else6, %if.then + call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 10, i32 0, i64 -1), !dbg !63 + %5 = load volatile i32, ptr @x, align 4, !dbg !63, !tbaa !46 + %call = call i32 @bar(i32 noundef %5), !dbg !64 + %6 = load volatile i32, ptr @x, align 4, !dbg !66, !tbaa !46 + %add8 = add nsw i32 %6, %call, !dbg !66 + store volatile i32 %add8, ptr @x, align 4, !dbg !66, !tbaa !46 + call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 12, i32 0, i64 -1), !dbg !67 + %inc9 = add nsw i32 %i.0, 1, !dbg !67 + #dbg_value(i32 %inc9, !31, !DIExpression(), !34) + br label %for.cond, !dbg !68, !llvm.loop !69 +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nounwind uwtable +define dso_local i32 @main() #2 !dbg !72 { +entry: + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !78 + #dbg_value(i32 0, !76, !DIExpression(), !79) + br label %for.cond, !dbg !80 + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !81 + #dbg_value(i32 %i.0, !76, !DIExpression(), !79) + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !82 + %cmp = icmp slt i32 %i.0, 100000, !dbg !84 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !85 + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !86 + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !87 + ret i32 0, !dbg !87 + +for.body: ; preds = %for.cond + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !88 + call void @foo_rename(), !dbg !90 + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !92 + %inc = add nsw i32 %i.0, 1, !dbg !92 + #dbg_value(i32 %inc, !76, !DIExpression(), !79) + br label %for.cond, !dbg !93, !llvm.loop !94 +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #3 + +attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13} +!llvm.ident = !{!14} +!llvm.pseudo_probe_desc = !{!15, !16, !17} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 20.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "test_rename.c", directory: "/home", checksumkind: CSK_MD5, checksum: "5c9304100fda7763e5a474c768d3b005") +!4 = !{!0} +!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6) +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{i32 7, !"Dwarf Version", i32 5} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{i32 8, !"PIC Level", i32 2} +!11 = !{i32 7, !"PIE Level", i32 2} +!12 = !{i32 7, !"uwtable", i32 2} +!13 = !{i32 7, !"debug-info-assignment-tracking", i1 true} +!14 = !{!"clang version 20.0.0"} +!15 = !{i64 -2012135647395072713, i64 4294967295, !"bar"} +!16 = !{i64 -2115950948644264162, i64 281718392333557, !"foo_rename"} +!17 = !{i64 -2624081020897602054, i64 281582264815352, !"main"} +!18 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 3, type: !19, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21) +!19 = !DISubroutineType(types: !20) +!20 = !{!6, !6} +!21 = !{!22} +!22 = !DILocalVariable(name: "x", arg: 1, scope: !18, file: !3, line: 3, type: !6) +!23 = !DILocation(line: 0, scope: !18) +!24 = !DILocation(line: 4, column: 10, scope: !18) +!25 = !DILocation(line: 4, column: 12, scope: !18) +!26 = !DILocation(line: 4, column: 3, scope: !18) +!27 = distinct !DISubprogram(name: "foo_rename", scope: !3, file: !3, line: 7, type: !28, scopeLine: 7, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !30) +!28 = !DISubroutineType(types: !29) +!29 = !{null} +!30 = !{!31} +!31 = !DILocalVariable(name: "i", scope: !32, file: !3, line: 8, type: !6) +!32 = distinct !DILexicalBlock(scope: !27, file: !3, line: 8, column: 3) +!33 = !DILocation(line: 8, column: 12, scope: !32) +!34 = !DILocation(line: 0, scope: !32) +!35 = !DILocation(line: 8, column: 8, scope: !32) +!36 = !DILocation(line: 8, scope: !32) +!37 = !DILocation(line: 8, column: 19, scope: !38) +!38 = distinct !DILexicalBlock(scope: !32, file: !3, line: 8, column: 3) +!39 = !DILocation(line: 8, column: 21, scope: !38) +!40 = !DILocation(line: 8, column: 3, scope: !32) +!41 = !DILocation(line: 0, scope: !27) +!42 = !DILocation(line: 17, column: 1, scope: !27) +!43 = !DILocation(line: 9, column: 10, scope: !44) +!44 = distinct !DILexicalBlock(scope: !45, file: !3, line: 9, column: 10) +!45 = distinct !DILexicalBlock(scope: !38, file: !3, line: 8, column: 39) +!46 = !{!47, !47, i64 0} +!47 = !{!"int", !48, i64 0} +!48 = !{!"omnipotent char", !49, i64 0} +!49 = !{!"Simple C/C++ TBAA"} +!50 = !DILocation(line: 9, column: 12, scope: !44) +!51 = !DILocation(line: 9, column: 16, scope: !44) +!52 = !DILocation(line: 9, column: 10, scope: !45) +!53 = !DILocation(line: 10, column: 10, scope: !44) +!54 = !DILocation(line: 10, column: 8, scope: !44) +!55 = !DILocation(line: 11, column: 16, scope: !56) +!56 = distinct !DILexicalBlock(scope: !44, file: !3, line: 11, column: 16) +!57 = !DILocation(line: 11, column: 18, scope: !56) +!58 = !DILocation(line: 11, column: 22, scope: !56) +!59 = !DILocation(line: 11, column: 16, scope: !44) +!60 = !DILocation(line: 12, column: 10, scope: !56) +!61 = !DILocation(line: 12, column: 8, scope: !56) +!62 = !DILocation(line: 14, column: 9, scope: !56) +!63 = !DILocation(line: 15, column: 15, scope: !45) +!64 = !DILocation(line: 15, column: 11, scope: !65) +!65 = !DILexicalBlockFile(scope: !45, file: !3, discriminator: 455082079) +!66 = !DILocation(line: 15, column: 8, scope: !45) +!67 = !DILocation(line: 8, column: 35, scope: !38) +!68 = !DILocation(line: 8, column: 3, scope: !38) +!69 = distinct !{!69, !40, !70, !71} +!70 = !DILocation(line: 16, column: 3, scope: !32) +!71 = !{!"llvm.loop.mustprogress"} +!72 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 19, type: !73, scopeLine: 19, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !75) +!73 = !DISubroutineType(types: !74) +!74 = !{!6} +!75 = !{!76} +!76 = !DILocalVariable(name: "i", scope: !77, file: !3, line: 20, type: !6) +!77 = distinct !DILexicalBlock(scope: !72, file: !3, line: 20, column: 3) +!78 = !DILocation(line: 20, column: 12, scope: !77) +!79 = !DILocation(line: 0, scope: !77) +!80 = !DILocation(line: 20, column: 8, scope: !77) +!81 = !DILocation(line: 20, scope: !77) +!82 = !DILocation(line: 20, column: 19, scope: !83) +!83 = distinct !DILexicalBlock(scope: !77, file: !3, line: 20, column: 3) +!84 = !DILocation(line: 20, column: 21, scope: !83) +!85 = !DILocation(line: 20, column: 3, scope: !77) +!86 = !DILocation(line: 0, scope: !72) +!87 = !DILocation(line: 23, column: 1, scope: !72) +!88 = !DILocation(line: 21, column: 7, scope: !89) +!89 = distinct !DILexicalBlock(scope: !83, file: !3, line: 20, column: 40) +!90 = !DILocation(line: 21, column: 7, scope: !91) +!91 = !DILexicalBlockFile(scope: !89, file: !3, discriminator: 455082031) +!92 = !DILocation(line: 20, column: 36, scope: !83) +!93 = !DILocation(line: 20, column: 3, scope: !83) +!94 = distinct !{!94, !85, !95, !71} +!95 = !DILocation(line: 22, column: 3, scope: !77) From 9a420e317601ab13d823c7b4be8fe93a5aaeee42 Mon Sep 17 00:00:00 2001 From: wlei Date: Tue, 13 Aug 2024 00:17:42 -0700 Subject: [PATCH 2/8] addressing comments --- .../llvm/ProfileData/SampleProfReader.h | 27 +- .../Transforms/IPO/SampleProfileMatcher.h | 2 +- llvm/lib/ProfileData/SampleProfReader.cpp | 24 +- .../Transforms/IPO/SampleProfileMatcher.cpp | 43 ++-- ...seudo-probe-stale-profile-toplev-func.prof | 2 +- ...eudo-probe-stale-profile-toplev-func-cp.ll | 147 +++++++++++ .../pseudo-probe-stale-profile-toplev-func.ll | 233 ++++++------------ 7 files changed, 265 insertions(+), 213 deletions(-) create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index b124233a02d11..00e4e7096ab7b 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -380,6 +380,13 @@ class SampleProfileReader { return sampleprof_error::success; } + /// Read sample profiles for the given functions. Currently it's only used + /// for extended binary format to load the profiles on-demand. + virtual std::error_code read(const DenseSet &FuncsToUse, + SampleProfileMap &Profiles) { + return sampleprof_error::not_implemented; + }; + /// The implementaion to read sample profiles from the associated file. virtual std::error_code readImpl() = 0; @@ -392,11 +399,6 @@ class SampleProfileReader { /// which doesn't support loading function profiles on demand. virtual bool collectFuncsFromModule() { return false; } - virtual std::error_code readOnDemand(const DenseSet &FuncsToUse, - SampleProfileMap &Profiles) { - return sampleprof_error::not_implemented; - }; - /// Print all the profiles on stream \p OS. void dump(raw_ostream &OS = dbgs()); @@ -551,7 +553,7 @@ class SampleProfileReader { // A map from a function's context hash to its meta data section range, used // for on-demand read function profile metadata. std::unordered_map> - FContextToMetaDataSecRange; + FuncMetadataIndex; std::pair LBRProfileSecRange; @@ -757,8 +759,8 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary { std::error_code readSecHdrTableEntry(uint64_t Idx); std::error_code readSecHdrTable(); - std::error_code readFuncMetadataOnDemand(bool ProfileHasAttribute, - SampleProfileMap &Profiles); + std::error_code readFuncMetadata(bool ProfileHasAttribute, + SampleProfileMap &Profiles); std::error_code readFuncMetadata(bool ProfileHasAttribute); std::error_code readFuncMetadata(bool ProfileHasAttribute, FunctionSamples *FProfile); @@ -818,10 +820,11 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary { bool collectFuncsFromModule() override; /// Read the profiles on-demand for the given functions. This is used after - /// stale call graph matching finds new functions whose profiles aren't read - /// at the beginning and we need to re-read the profiles. - std::error_code readOnDemand(const DenseSet &FuncsToUse, - SampleProfileMap &Profiles) override; + /// stale call graph matching finds new functions whose profiles aren't loaded + /// at the beginning and we need to loaded the profiles explicitly for + /// potential matching. + std::error_code read(const DenseSet &FuncsToUse, + SampleProfileMap &Profiles) override; std::unique_ptr getProfileSymbolList() override { return std::move(ProfSymList); diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h index 67edea42e2fe1..076d91adfd1de 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h @@ -198,7 +198,7 @@ class SampleProfileMatcher { // function and all inlinees. void countMismatchedCallsiteSamples(const FunctionSamples &FS); void computeAndReportProfileStaleness(); - void UpdateSampleLoaderWithRecoveredProfiles(); + void UpdateWithSalvagedProfiles(); LocToLocMap &getIRToProfileLocationMap(const Function &F) { auto Ret = FuncMappings.try_emplace( diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index f555da866f36e..4c0a45bfb47cf 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -798,16 +798,16 @@ bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const { return false; } -std::error_code SampleProfileReaderExtBinaryBase::readOnDemand( - const DenseSet &FuncsToUse, SampleProfileMap &Profiles) { +std::error_code +SampleProfileReaderExtBinaryBase::read(const DenseSet &FuncsToUse, + SampleProfileMap &Profiles) { Data = LBRProfileSecRange.first; End = LBRProfileSecRange.second; if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles)) return EC; End = Data; - if (std::error_code EC = - readFuncMetadataOnDemand(ProfileHasAttribute, Profiles)) + if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, Profiles)) return EC; return sampleprof_error::success; } @@ -945,6 +945,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles( return EC; } } + + return sampleprof_error::success; } std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() { @@ -1273,16 +1275,16 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute, return sampleprof_error::success; } -std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadataOnDemand( - bool ProfileHasAttribute, SampleProfileMap &Profiles) { - if (FContextToMetaDataSecRange.empty()) +std::error_code +SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute, + SampleProfileMap &Profiles) { + if (FuncMetadataIndex.empty()) return sampleprof_error::success; for (auto &I : Profiles) { FunctionSamples *FProfile = &I.second; - auto R = - FContextToMetaDataSecRange.find(FProfile->getContext().getHashCode()); - if (R == FContextToMetaDataSecRange.end()) + auto R = FuncMetadataIndex.find(FProfile->getContext().getHashCode()); + if (R == FuncMetadataIndex.end()) continue; Data = R->second.first; @@ -1310,7 +1312,7 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) { if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile)) return EC; - FContextToMetaDataSecRange[FContext.getHashCode()] = {Start, Data}; + FuncMetadataIndex[FContext.getHashCode()] = {Start, Data}; } assert(Data == End && "More data is read than expected"); diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp index b9adc6a0631b8..574a157c63683 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp @@ -782,16 +782,15 @@ bool SampleProfileMatcher::functionMatchesProfileHelper( float Similarity = 0.0; const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc); - // Check if the function is top-level function. For extended profile format, - // if a function profile is unused and it's top-level, even if the profile is - // matched, it's not found in the profile. This is because sample reader only - // read the used profile at the beginning, we need to read the profile - // on-demand. Also save it into the FlattenedProfiles for future look-up. + // With extbinary profile format, initial profile loading only reads profile + // based on current function names in the module. + // However, if a function is renamed, sample loader fails to load its original + // profile(which has a different name), we will miss this case. To address + // this, we load the top-level profile candidate explicitly for the matching. if (!FSFlattened) { - DenseSet TopLevelFunc; - TopLevelFunc.insert(ProfFunc.stringRef()); + DenseSet TopLevelFunc({ProfFunc.stringRef()}); SampleProfileMap TopLevelProfile; - Reader.readOnDemand(TopLevelFunc, TopLevelProfile); + Reader.read(TopLevelFunc, TopLevelProfile); assert(TopLevelProfile.size() <= 1 && "More than one profile is found for top-level function"); if (!TopLevelProfile.empty()) { @@ -883,13 +882,13 @@ bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc, return Matched; } -void SampleProfileMatcher::UpdateSampleLoaderWithRecoveredProfiles() { - DenseSet RecoveredFuncs; +void SampleProfileMatcher::UpdateWithSalvagedProfiles() { + DenseSet ProfileSalvagedFuncs; // Update FuncNameToProfNameMap and SymbolMap. for (auto &I : FuncToProfileNameMap) { assert(I.first && "New function is null"); FunctionId FuncName(I.first->getName()); - RecoveredFuncs.insert(I.second.stringRef()); + ProfileSalvagedFuncs.insert(I.second.stringRef()); FuncNameToProfNameMap->emplace(FuncName, I.second); // We need to remove the old entry to avoid duplicating the function @@ -898,21 +897,11 @@ void SampleProfileMatcher::UpdateSampleLoaderWithRecoveredProfiles() { SymbolMap->emplace(I.second, I.first); } - // Read the top-level profiles for the recovered function profiles. This is - // because in extended binary format it only loads the top-level profile for - // the functions in the new build but not the recovered functions which is - // from the old build. - SampleProfileMap TopLevelRecoveredProfiles; - Reader.readOnDemand(RecoveredFuncs, TopLevelRecoveredProfiles); - auto &Profiles = Reader.getProfiles(); - for (auto &I : TopLevelRecoveredProfiles) { - LLVM_DEBUG(dbgs() << "Top-level function " << I.second.getFunction() - << " is recovered and re-read by the sample reader.\n"); - auto &Ctx = I.second.getContext(); - assert(Profiles.find(Ctx) == Profiles.end() && - "Top level profile is found for the unused profile"); - Profiles.create(Ctx) = std::move(I.second); - } + // With extbinary profile format, initial profile loading only reads profile + // based on current function names in the module, so we need to load top-level + // profiles for functions with different profile name explicitly after + // function-profile name map is established with stale profile matching. + Reader.read(ProfileSalvagedFuncs, Reader.getProfiles()); Reader.setFuncNameToProfNameMap(FuncNameToProfNameMap); } @@ -934,7 +923,7 @@ void SampleProfileMatcher::runOnModule() { } if (SalvageUnusedProfile) - UpdateSampleLoaderWithRecoveredProfiles(); + UpdateWithSalvagedProfiles(); if (SalvageStaleProfile) distributeIRToProfileLocationMap(); diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof index a1bba5fc88de0..86c8cb3285afe 100644 --- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof @@ -8,7 +8,7 @@ foo:2724522:51 11: 452687 bar:452687 12: 452623 13: 47 - !CFGChecksum: 281718392333557 + !CFGChecksum: 281479271677951 bar:452687:452687 1: 452687 !CFGChecksum: 4294967295 diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll new file mode 100644 index 0000000000000..750bf03fa2d93 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll @@ -0,0 +1,147 @@ +; *** IR Dump Before SampleProfileLoaderPass on [module] *** +; ModuleID = 'test_rename.c' +source_filename = "test_rename.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@x = dso_local global i32 0, align 4, !dbg !0 + +; Function Attrs: noinline nounwind uwtable +define dso_local i32 @bar(i32 noundef %x) #0 !dbg !18 { +entry: + #dbg_value(i32 %x, !22, !DIExpression(), !23) + call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !24 + %add = add nsw i32 %x, 1, !dbg !25 + ret i32 %add, !dbg !26 +} + +; Function Attrs: noinline nounwind uwtable +define dso_local void @foo_rename() #0 !dbg !27 { +entry: + call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 1, i32 0, i64 -1), !dbg !30 + %0 = load volatile i32, ptr @x, align 4, !dbg !30, !tbaa !31 + %call = call i32 @bar(i32 noundef %0), !dbg !35 + %1 = load volatile i32, ptr @x, align 4, !dbg !37, !tbaa !31 + %add = add nsw i32 %1, %call, !dbg !37 + store volatile i32 %add, ptr @x, align 4, !dbg !37, !tbaa !31 + ret void, !dbg !38 +} + +; Function Attrs: nounwind uwtable +define dso_local i32 @main() #1 !dbg !39 { +entry: + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !45 + #dbg_value(i32 0, !43, !DIExpression(), !46) + br label %for.cond, !dbg !47 + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !48 + #dbg_value(i32 %i.0, !43, !DIExpression(), !46) + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !49 + %cmp = icmp slt i32 %i.0, 100000, !dbg !51 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !52 + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !53 + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !54 + ret i32 0, !dbg !54 + +for.body: ; preds = %for.cond + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !55 + call void @foo_rename(), !dbg !57 + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !59 + %inc = add nsw i32 %i.0, 1, !dbg !59 + #dbg_value(i32 %inc, !43, !DIExpression(), !46) + br label %for.cond, !dbg !60, !llvm.loop !61 +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #3 + +attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #1 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13} +!llvm.ident = !{!14} +!llvm.pseudo_probe_desc = !{!15, !16, !17} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 20.0.0git (https://github.com/llvm/llvm-project.git 070702c9be2fb437b0765532c03e98c642951906)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "test_rename.c", directory: "/home/wlei/local/llvm_test/rename/extbinary", checksumkind: CSK_MD5, checksum: "11a33a83e4d190ebda0792d0610f0c67") +!4 = !{!0} +!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6) +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{i32 7, !"Dwarf Version", i32 5} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{i32 8, !"PIC Level", i32 2} +!11 = !{i32 7, !"PIE Level", i32 2} +!12 = !{i32 7, !"uwtable", i32 2} +!13 = !{i32 7, !"debug-info-assignment-tracking", i1 true} +!14 = !{!"clang version 20.0.0git (https://github.com/llvm/llvm-project.git 070702c9be2fb437b0765532c03e98c642951906)"} +!15 = !{i64 -2012135647395072713, i64 4294967295, !"bar"} +!16 = !{i64 -2115950948644264162, i64 281479271677951, !"foo_rename"} +!17 = !{i64 -2624081020897602054, i64 281582264815352, !"main"} +!18 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 3, type: !19, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21) +!19 = !DISubroutineType(types: !20) +!20 = !{!6, !6} +!21 = !{!22} +!22 = !DILocalVariable(name: "x", arg: 1, scope: !18, file: !3, line: 3, type: !6) +!23 = !DILocation(line: 0, scope: !18) +!24 = !DILocation(line: 4, column: 10, scope: !18) +!25 = !DILocation(line: 4, column: 12, scope: !18) +!26 = !DILocation(line: 4, column: 3, scope: !18) +!27 = distinct !DISubprogram(name: "foo_rename", scope: !3, file: !3, line: 7, type: !28, scopeLine: 7, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!28 = !DISubroutineType(types: !29) +!29 = !{null} +!30 = !DILocation(line: 8, column: 15, scope: !27) +!31 = !{!32, !32, i64 0} +!32 = !{!"int", !33, i64 0} +!33 = !{!"omnipotent char", !34, i64 0} +!34 = !{!"Simple C/C++ TBAA"} +!35 = !DILocation(line: 8, column: 11, scope: !36) +!36 = !DILexicalBlockFile(scope: !27, file: !3, discriminator: 455082007) +!37 = !DILocation(line: 8, column: 8, scope: !27) +!38 = !DILocation(line: 9, column: 1, scope: !27) +!39 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !40, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !42) +!40 = !DISubroutineType(types: !41) +!41 = !{!6} +!42 = !{!43} +!43 = !DILocalVariable(name: "i", scope: !44, file: !3, line: 12, type: !6) +!44 = distinct !DILexicalBlock(scope: !39, file: !3, line: 12, column: 3) +!45 = !DILocation(line: 12, column: 12, scope: !44) +!46 = !DILocation(line: 0, scope: !44) +!47 = !DILocation(line: 12, column: 8, scope: !44) +!48 = !DILocation(line: 12, scope: !44) +!49 = !DILocation(line: 12, column: 19, scope: !50) +!50 = distinct !DILexicalBlock(scope: !44, file: !3, line: 12, column: 3) +!51 = !DILocation(line: 12, column: 21, scope: !50) +!52 = !DILocation(line: 12, column: 3, scope: !44) +!53 = !DILocation(line: 0, scope: !39) +!54 = !DILocation(line: 15, column: 1, scope: !39) +!55 = !DILocation(line: 13, column: 7, scope: !56) +!56 = distinct !DILexicalBlock(scope: !50, file: !3, line: 12, column: 40) +!57 = !DILocation(line: 13, column: 7, scope: !58) +!58 = !DILexicalBlockFile(scope: !56, file: !3, discriminator: 455082031) +!59 = !DILocation(line: 12, column: 36, scope: !50) +!60 = !DILocation(line: 12, column: 3, scope: !50) +!61 = distinct !{!61, !52, !62, !63} +!62 = !DILocation(line: 14, column: 3, scope: !44) +!63 = !{!"llvm.loop.mustprogress"} +Function foo_rename is not in profile or profile symbol list. +Run stale profile matching for main +Run stale profile matching for bar +(0/2) of functions' profile are invalid and (0/452891) of samples are discarded due to function hash mismatch. +(0/2) of functions' profile are matched and (0/452891) of samples are reused by call graph matching. +(1/1) of callsites' profile are invalid and (51/452891) of samples are discarded due to callsite location mismatch. +(0/1) of callsites and (0/51) of samples are recovered by stale profile matching. diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll index f1f2506e08d2a..356b16ca6ad05 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll @@ -9,13 +9,12 @@ ; CHECK-TEXT: The checksums for foo_rename(IR) and foo(Profile) match. ; CHECK-TEXT: Function:foo_rename matches profile:foo ; CHECK-TEXT: Run stale profile matching for foo_rename -; CHECK-TEXT-NOT: Top-level function foo is recovered and re-read by the sample reader. ; CHECK-TEXT: (1/3) of functions' profile are matched and (2724522/3177413) of samples are reused by call graph matching. ; CHECK-TEXT: Processing Function main ; CHECK-TEXT: 5: call void @foo_rename(), !dbg ![[#]] - weight: 51 ; CHECK-TEXT: Processing Function foo_rename -; CHECK-TEXT: 11: %call = call i32 @bar(i32 noundef %5), !dbg ![[#]] - weight: 452687 +; CHECK-TEXT: 2: %call = call i32 @bar(i32 noundef %0), !dbg ![[#]] - weight: 452674 ; CHECK-EXTBIN: Run stale profile matching for main @@ -23,13 +22,12 @@ ; CHECK-EXTBIN: The checksums for foo_rename(IR) and foo(Profile) match. ; CHECK-EXTBIN: Function:foo_rename matches profile:foo ; CHECK-EXTBIN: Run stale profile matching for foo_rename -; CHECK-EXTBIN: Top-level function foo is recovered and re-read by the sample reader. ; CHECK-EXTBIN: (1/3) of functions' profile are matched and (2724522/3177413) of samples are reused by call graph matching. ; CHECK-EXTBIN: Processing Function main ; CHECK-EXTBIN: 5: call void @foo_rename(), !dbg ![[#]] - weight: 51 ; CHECK-EXTBIN: Processing Function foo_rename -; CHECK-EXTBIN: 11: %call = call i32 @bar(i32 noundef %5), !dbg ![[#]] - weight: 452687 +; CHECK-EXTBIN: 2: %call = call i32 @bar(i32 noundef %0), !dbg ![[#]] - weight: 452674 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" @@ -49,110 +47,55 @@ entry: ; Function Attrs: noinline nounwind uwtable define dso_local void @foo_rename() #0 !dbg !27 { entry: - call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 1, i32 0, i64 -1), !dbg !33 - #dbg_value(i32 0, !31, !DIExpression(), !34) - br label %for.cond, !dbg !35 - -for.cond: ; preds = %if.end7, %entry - %i.0 = phi i32 [ 0, %entry ], [ %inc9, %if.end7 ], !dbg !36 - #dbg_value(i32 %i.0, !31, !DIExpression(), !34) - call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 2, i32 0, i64 -1), !dbg !37 - %cmp = icmp slt i32 %i.0, 10000, !dbg !39 - br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !40 - -for.cond.cleanup: ; preds = %for.cond - call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 3, i32 0, i64 -1), !dbg !41 - call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 13, i32 0, i64 -1), !dbg !42 - ret void, !dbg !42 - -for.body: ; preds = %for.cond - call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 4, i32 0, i64 -1), !dbg !43 - %0 = load volatile i32, ptr @x, align 4, !dbg !43, !tbaa !46 - %rem = srem i32 %0, 3, !dbg !50 - %cmp1 = icmp eq i32 %rem, 1, !dbg !51 - br i1 %cmp1, label %if.then, label %if.else, !dbg !52 - -if.then: ; preds = %for.body - call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 5, i32 0, i64 -1), !dbg !53 - %1 = load volatile i32, ptr @x, align 4, !dbg !53, !tbaa !46 - %add = add nsw i32 %1, 100, !dbg !53 - store volatile i32 %add, ptr @x, align 4, !dbg !53, !tbaa !46 - br label %if.end7, !dbg !54 - -if.else: ; preds = %for.body - call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 6, i32 0, i64 -1), !dbg !55 - %2 = load volatile i32, ptr @x, align 4, !dbg !55, !tbaa !46 - %rem2 = srem i32 %2, 2, !dbg !57 - %cmp3 = icmp eq i32 %rem2, 1, !dbg !58 - br i1 %cmp3, label %if.then4, label %if.else6, !dbg !59 - -if.then4: ; preds = %if.else - call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 7, i32 0, i64 -1), !dbg !60 - %3 = load volatile i32, ptr @x, align 4, !dbg !60, !tbaa !46 - %add5 = add nsw i32 %3, 10, !dbg !60 - store volatile i32 %add5, ptr @x, align 4, !dbg !60, !tbaa !46 - br label %if.end7, !dbg !61 - -if.else6: ; preds = %if.else - call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 8, i32 0, i64 -1), !dbg !62 - %4 = load volatile i32, ptr @x, align 4, !dbg !62, !tbaa !46 - %inc = add nsw i32 %4, 1, !dbg !62 - store volatile i32 %inc, ptr @x, align 4, !dbg !62, !tbaa !46 - br label %if.end7 - -if.end7: ; preds = %if.then4, %if.else6, %if.then - call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 10, i32 0, i64 -1), !dbg !63 - %5 = load volatile i32, ptr @x, align 4, !dbg !63, !tbaa !46 - %call = call i32 @bar(i32 noundef %5), !dbg !64 - %6 = load volatile i32, ptr @x, align 4, !dbg !66, !tbaa !46 - %add8 = add nsw i32 %6, %call, !dbg !66 - store volatile i32 %add8, ptr @x, align 4, !dbg !66, !tbaa !46 - call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 12, i32 0, i64 -1), !dbg !67 - %inc9 = add nsw i32 %i.0, 1, !dbg !67 - #dbg_value(i32 %inc9, !31, !DIExpression(), !34) - br label %for.cond, !dbg !68, !llvm.loop !69 + call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 1, i32 0, i64 -1), !dbg !30 + %0 = load volatile i32, ptr @x, align 4, !dbg !30, !tbaa !31 + %call = call i32 @bar(i32 noundef %0), !dbg !35 + %1 = load volatile i32, ptr @x, align 4, !dbg !37, !tbaa !31 + %add = add nsw i32 %1, %call, !dbg !37 + store volatile i32 %add, ptr @x, align 4, !dbg !37, !tbaa !31 + ret void, !dbg !38 } -; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 - -; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 - ; Function Attrs: nounwind uwtable -define dso_local i32 @main() #2 !dbg !72 { +define dso_local i32 @main() #1 !dbg !39 { entry: - call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !78 - #dbg_value(i32 0, !76, !DIExpression(), !79) - br label %for.cond, !dbg !80 + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !45 + #dbg_value(i32 0, !43, !DIExpression(), !46) + br label %for.cond, !dbg !47 for.cond: ; preds = %for.body, %entry - %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !81 - #dbg_value(i32 %i.0, !76, !DIExpression(), !79) - call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !82 - %cmp = icmp slt i32 %i.0, 100000, !dbg !84 - br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !85 + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !48 + #dbg_value(i32 %i.0, !43, !DIExpression(), !46) + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !49 + %cmp = icmp slt i32 %i.0, 100000, !dbg !51 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !52 for.cond.cleanup: ; preds = %for.cond - call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !86 - call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !87 - ret i32 0, !dbg !87 + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !53 + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !54 + ret i32 0, !dbg !54 for.body: ; preds = %for.cond - call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !88 - call void @foo_rename(), !dbg !90 - call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !92 - %inc = add nsw i32 %i.0, 1, !dbg !92 - #dbg_value(i32 %inc, !76, !DIExpression(), !79) - br label %for.cond, !dbg !93, !llvm.loop !94 + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !55 + call void @foo_rename(), !dbg !57 + call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !59 + %inc = add nsw i32 %i.0, 1, !dbg !59 + #dbg_value(i32 %inc, !43, !DIExpression(), !46) + br label %for.cond, !dbg !60, !llvm.loop !61 } +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2 + ; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) declare void @llvm.pseudoprobe(i64, i64, i32, i64) #3 attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } -attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -attributes #2 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #1 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } !llvm.dbg.cu = !{!2} @@ -163,7 +106,7 @@ attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memo !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) !1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true) !2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 20.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) -!3 = !DIFile(filename: "test_rename.c", directory: "/home", checksumkind: CSK_MD5, checksum: "5c9304100fda7763e5a474c768d3b005") +!3 = !DIFile(filename: "test_rename.c", directory: "/home", checksumkind: CSK_MD5, checksum: "11a33a83e4d190ebda0792d0610f0c67") !4 = !{!0} !5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6) !6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) @@ -176,7 +119,7 @@ attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memo !13 = !{i32 7, !"debug-info-assignment-tracking", i1 true} !14 = !{!"clang version 20.0.0"} !15 = !{i64 -2012135647395072713, i64 4294967295, !"bar"} -!16 = !{i64 -2115950948644264162, i64 281718392333557, !"foo_rename"} +!16 = !{i64 -2115950948644264162, i64 281479271677951, !"foo_rename"} !17 = !{i64 -2624081020897602054, i64 281582264815352, !"main"} !18 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 3, type: !19, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21) !19 = !DISubroutineType(types: !20) @@ -187,72 +130,40 @@ attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memo !24 = !DILocation(line: 4, column: 10, scope: !18) !25 = !DILocation(line: 4, column: 12, scope: !18) !26 = !DILocation(line: 4, column: 3, scope: !18) -!27 = distinct !DISubprogram(name: "foo_rename", scope: !3, file: !3, line: 7, type: !28, scopeLine: 7, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !30) +!27 = distinct !DISubprogram(name: "foo_rename", scope: !3, file: !3, line: 7, type: !28, scopeLine: 7, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) !28 = !DISubroutineType(types: !29) !29 = !{null} -!30 = !{!31} -!31 = !DILocalVariable(name: "i", scope: !32, file: !3, line: 8, type: !6) -!32 = distinct !DILexicalBlock(scope: !27, file: !3, line: 8, column: 3) -!33 = !DILocation(line: 8, column: 12, scope: !32) -!34 = !DILocation(line: 0, scope: !32) -!35 = !DILocation(line: 8, column: 8, scope: !32) -!36 = !DILocation(line: 8, scope: !32) -!37 = !DILocation(line: 8, column: 19, scope: !38) -!38 = distinct !DILexicalBlock(scope: !32, file: !3, line: 8, column: 3) -!39 = !DILocation(line: 8, column: 21, scope: !38) -!40 = !DILocation(line: 8, column: 3, scope: !32) -!41 = !DILocation(line: 0, scope: !27) -!42 = !DILocation(line: 17, column: 1, scope: !27) -!43 = !DILocation(line: 9, column: 10, scope: !44) -!44 = distinct !DILexicalBlock(scope: !45, file: !3, line: 9, column: 10) -!45 = distinct !DILexicalBlock(scope: !38, file: !3, line: 8, column: 39) -!46 = !{!47, !47, i64 0} -!47 = !{!"int", !48, i64 0} -!48 = !{!"omnipotent char", !49, i64 0} -!49 = !{!"Simple C/C++ TBAA"} -!50 = !DILocation(line: 9, column: 12, scope: !44) -!51 = !DILocation(line: 9, column: 16, scope: !44) -!52 = !DILocation(line: 9, column: 10, scope: !45) -!53 = !DILocation(line: 10, column: 10, scope: !44) -!54 = !DILocation(line: 10, column: 8, scope: !44) -!55 = !DILocation(line: 11, column: 16, scope: !56) -!56 = distinct !DILexicalBlock(scope: !44, file: !3, line: 11, column: 16) -!57 = !DILocation(line: 11, column: 18, scope: !56) -!58 = !DILocation(line: 11, column: 22, scope: !56) -!59 = !DILocation(line: 11, column: 16, scope: !44) -!60 = !DILocation(line: 12, column: 10, scope: !56) -!61 = !DILocation(line: 12, column: 8, scope: !56) -!62 = !DILocation(line: 14, column: 9, scope: !56) -!63 = !DILocation(line: 15, column: 15, scope: !45) -!64 = !DILocation(line: 15, column: 11, scope: !65) -!65 = !DILexicalBlockFile(scope: !45, file: !3, discriminator: 455082079) -!66 = !DILocation(line: 15, column: 8, scope: !45) -!67 = !DILocation(line: 8, column: 35, scope: !38) -!68 = !DILocation(line: 8, column: 3, scope: !38) -!69 = distinct !{!69, !40, !70, !71} -!70 = !DILocation(line: 16, column: 3, scope: !32) -!71 = !{!"llvm.loop.mustprogress"} -!72 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 19, type: !73, scopeLine: 19, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !75) -!73 = !DISubroutineType(types: !74) -!74 = !{!6} -!75 = !{!76} -!76 = !DILocalVariable(name: "i", scope: !77, file: !3, line: 20, type: !6) -!77 = distinct !DILexicalBlock(scope: !72, file: !3, line: 20, column: 3) -!78 = !DILocation(line: 20, column: 12, scope: !77) -!79 = !DILocation(line: 0, scope: !77) -!80 = !DILocation(line: 20, column: 8, scope: !77) -!81 = !DILocation(line: 20, scope: !77) -!82 = !DILocation(line: 20, column: 19, scope: !83) -!83 = distinct !DILexicalBlock(scope: !77, file: !3, line: 20, column: 3) -!84 = !DILocation(line: 20, column: 21, scope: !83) -!85 = !DILocation(line: 20, column: 3, scope: !77) -!86 = !DILocation(line: 0, scope: !72) -!87 = !DILocation(line: 23, column: 1, scope: !72) -!88 = !DILocation(line: 21, column: 7, scope: !89) -!89 = distinct !DILexicalBlock(scope: !83, file: !3, line: 20, column: 40) -!90 = !DILocation(line: 21, column: 7, scope: !91) -!91 = !DILexicalBlockFile(scope: !89, file: !3, discriminator: 455082031) -!92 = !DILocation(line: 20, column: 36, scope: !83) -!93 = !DILocation(line: 20, column: 3, scope: !83) -!94 = distinct !{!94, !85, !95, !71} -!95 = !DILocation(line: 22, column: 3, scope: !77) +!30 = !DILocation(line: 8, column: 15, scope: !27) +!31 = !{!32, !32, i64 0} +!32 = !{!"int", !33, i64 0} +!33 = !{!"omnipotent char", !34, i64 0} +!34 = !{!"Simple C/C++ TBAA"} +!35 = !DILocation(line: 8, column: 11, scope: !36) +!36 = !DILexicalBlockFile(scope: !27, file: !3, discriminator: 455082007) +!37 = !DILocation(line: 8, column: 8, scope: !27) +!38 = !DILocation(line: 9, column: 1, scope: !27) +!39 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !40, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !42) +!40 = !DISubroutineType(types: !41) +!41 = !{!6} +!42 = !{!43} +!43 = !DILocalVariable(name: "i", scope: !44, file: !3, line: 12, type: !6) +!44 = distinct !DILexicalBlock(scope: !39, file: !3, line: 12, column: 3) +!45 = !DILocation(line: 12, column: 12, scope: !44) +!46 = !DILocation(line: 0, scope: !44) +!47 = !DILocation(line: 12, column: 8, scope: !44) +!48 = !DILocation(line: 12, scope: !44) +!49 = !DILocation(line: 12, column: 19, scope: !50) +!50 = distinct !DILexicalBlock(scope: !44, file: !3, line: 12, column: 3) +!51 = !DILocation(line: 12, column: 21, scope: !50) +!52 = !DILocation(line: 12, column: 3, scope: !44) +!53 = !DILocation(line: 0, scope: !39) +!54 = !DILocation(line: 15, column: 1, scope: !39) +!55 = !DILocation(line: 13, column: 7, scope: !56) +!56 = distinct !DILexicalBlock(scope: !50, file: !3, line: 12, column: 40) +!57 = !DILocation(line: 13, column: 7, scope: !58) +!58 = !DILexicalBlockFile(scope: !56, file: !3, discriminator: 455082031) +!59 = !DILocation(line: 12, column: 36, scope: !50) +!60 = !DILocation(line: 12, column: 3, scope: !50) +!61 = distinct !{!61, !52, !62, !63} +!62 = !DILocation(line: 14, column: 3, scope: !44) +!63 = !{!"llvm.loop.mustprogress"} From 91ce2b23236bde42930a3dbb05fb2531c10d90a7 Mon Sep 17 00:00:00 2001 From: wlei Date: Tue, 13 Aug 2024 09:20:31 -0700 Subject: [PATCH 3/8] fix lint --- llvm/include/llvm/ProfileData/SampleProfReader.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index 00e4e7096ab7b..907663fd50094 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -380,7 +380,7 @@ class SampleProfileReader { return sampleprof_error::success; } - /// Read sample profiles for the given functions. Currently it's only used + /// Read sample profiles for the given functions. Currently it's only used /// for extended binary format to load the profiles on-demand. virtual std::error_code read(const DenseSet &FuncsToUse, SampleProfileMap &Profiles) { @@ -821,7 +821,7 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary { /// Read the profiles on-demand for the given functions. This is used after /// stale call graph matching finds new functions whose profiles aren't loaded - /// at the beginning and we need to loaded the profiles explicitly for + /// at the beginning and we need to loaded the profiles explicitly for /// potential matching. std::error_code read(const DenseSet &FuncsToUse, SampleProfileMap &Profiles) override; From ab2f83da198013aa55e95c7312a65288dee4df18 Mon Sep 17 00:00:00 2001 From: wlei Date: Fri, 16 Aug 2024 16:54:15 -0700 Subject: [PATCH 4/8] addressing comments --- .../llvm/ProfileData/SampleProfReader.h | 20 ++- .../Transforms/IPO/SampleProfileMatcher.cpp | 33 ++-- ...eudo-probe-stale-profile-toplev-func-cp.ll | 147 ------------------ .../pseudo-probe-stale-profile-toplev-func.ll | 4 +- 4 files changed, 35 insertions(+), 169 deletions(-) delete mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index 907663fd50094..c86b97740e4f7 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -380,8 +380,17 @@ class SampleProfileReader { return sampleprof_error::success; } - /// Read sample profiles for the given functions. Currently it's only used - /// for extended binary format to load the profiles on-demand. + /// Read sample profiles for the given functions. Currently it's only used for + /// extended binary format to load the profiles on-demand. + std::error_code read(const DenseSet &FuncsToUse) { + if (std::error_code EC = read(FuncsToUse, Profiles)) + return EC; + return sampleprof_error::success; + }; + + /// Read sample profiles for the given functions and write them to the given + /// profile map. Currently it's only used for extended binary format to load + /// the profiles on-demand. virtual std::error_code read(const DenseSet &FuncsToUse, SampleProfileMap &Profiles) { return sampleprof_error::not_implemented; @@ -512,8 +521,8 @@ class SampleProfileReader { void setModule(const Module *Mod) { M = Mod; } void setFuncNameToProfNameMap( - HashKeyMap *FPMap) { - FuncNameToProfNameMap = FPMap; + const HashKeyMap &FPMap) { + FuncNameToProfNameMap = &FPMap; } protected: @@ -547,7 +556,7 @@ class SampleProfileReader { // A map pointer to the FuncNameToProfNameMap in SampleProfileLoader, // which maps the function name to the matched profile name. This is used // for sample loader to look up profile using the new name. - HashKeyMap + const HashKeyMap *FuncNameToProfNameMap = nullptr; // A map from a function's context hash to its meta data section range, used @@ -557,6 +566,7 @@ class SampleProfileReader { std::pair LBRProfileSecRange; + /// Whether the profile has attribute metadata. bool ProfileHasAttribute = false; /// \brief Whether samples are collected based on pseudo probes. diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp index 574a157c63683..77cede8744707 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp @@ -36,6 +36,12 @@ static cl::opt MinCallCountForCGMatching( cl::desc("The minimum number of call anchors required for a function to " "run stale profile call graph matching.")); +static cl::opt ReadToplevProfileforCGMatching( + "read-toplev-profile-for-cg-matching", cl::Hidden, cl::init(false), + cl::desc( + "Read top-level profiles that the sample reader initially skips for " + "the call-graph matching(only meaningful for extended binary format)")); + extern cl::opt SalvageStaleProfile; extern cl::opt SalvageUnusedProfile; extern cl::opt PersistProfileStaleness; @@ -784,22 +790,19 @@ bool SampleProfileMatcher::functionMatchesProfileHelper( const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc); // With extbinary profile format, initial profile loading only reads profile // based on current function names in the module. - // However, if a function is renamed, sample loader fails to load its original + // However, if a function is renamed, sample loader skips to load its original // profile(which has a different name), we will miss this case. To address // this, we load the top-level profile candidate explicitly for the matching. - if (!FSFlattened) { + if (!FSFlattened && ReadToplevProfileforCGMatching) { DenseSet TopLevelFunc({ProfFunc.stringRef()}); - SampleProfileMap TopLevelProfile; - Reader.read(TopLevelFunc, TopLevelProfile); - assert(TopLevelProfile.size() <= 1 && - "More than one profile is found for top-level function"); - if (!TopLevelProfile.empty()) { - LLVM_DEBUG(dbgs() << "Read top-level function " << ProfFunc - << " for call-graph matching\n"); - auto &FS = TopLevelProfile.begin()->second; - FSFlattened = - &(FlattenedProfiles.create(FS.getContext()) = std::move(FS)); - } + if (std::error_code EC = Reader.read(TopLevelFunc, FlattenedProfiles)) + return false; + FSFlattened = getFlattenedSamplesFor(ProfFunc); + LLVM_DEBUG({ + if (FSFlattened) + dbgs() << "Read top-level function " << ProfFunc + << " for call-graph matching\n"; + }); } if (!FSFlattened) return false; @@ -901,8 +904,8 @@ void SampleProfileMatcher::UpdateWithSalvagedProfiles() { // based on current function names in the module, so we need to load top-level // profiles for functions with different profile name explicitly after // function-profile name map is established with stale profile matching. - Reader.read(ProfileSalvagedFuncs, Reader.getProfiles()); - Reader.setFuncNameToProfNameMap(FuncNameToProfNameMap); + Reader.read(ProfileSalvagedFuncs); + Reader.setFuncNameToProfNameMap(*FuncNameToProfNameMap); } void SampleProfileMatcher::runOnModule() { diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll deleted file mode 100644 index 750bf03fa2d93..0000000000000 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll +++ /dev/null @@ -1,147 +0,0 @@ -; *** IR Dump Before SampleProfileLoaderPass on [module] *** -; ModuleID = 'test_rename.c' -source_filename = "test_rename.c" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -@x = dso_local global i32 0, align 4, !dbg !0 - -; Function Attrs: noinline nounwind uwtable -define dso_local i32 @bar(i32 noundef %x) #0 !dbg !18 { -entry: - #dbg_value(i32 %x, !22, !DIExpression(), !23) - call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !24 - %add = add nsw i32 %x, 1, !dbg !25 - ret i32 %add, !dbg !26 -} - -; Function Attrs: noinline nounwind uwtable -define dso_local void @foo_rename() #0 !dbg !27 { -entry: - call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 1, i32 0, i64 -1), !dbg !30 - %0 = load volatile i32, ptr @x, align 4, !dbg !30, !tbaa !31 - %call = call i32 @bar(i32 noundef %0), !dbg !35 - %1 = load volatile i32, ptr @x, align 4, !dbg !37, !tbaa !31 - %add = add nsw i32 %1, %call, !dbg !37 - store volatile i32 %add, ptr @x, align 4, !dbg !37, !tbaa !31 - ret void, !dbg !38 -} - -; Function Attrs: nounwind uwtable -define dso_local i32 @main() #1 !dbg !39 { -entry: - call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !45 - #dbg_value(i32 0, !43, !DIExpression(), !46) - br label %for.cond, !dbg !47 - -for.cond: ; preds = %for.body, %entry - %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !48 - #dbg_value(i32 %i.0, !43, !DIExpression(), !46) - call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !49 - %cmp = icmp slt i32 %i.0, 100000, !dbg !51 - br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !52 - -for.cond.cleanup: ; preds = %for.cond - call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !53 - call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !54 - ret i32 0, !dbg !54 - -for.body: ; preds = %for.cond - call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !55 - call void @foo_rename(), !dbg !57 - call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !59 - %inc = add nsw i32 %i.0, 1, !dbg !59 - #dbg_value(i32 %inc, !43, !DIExpression(), !46) - br label %for.cond, !dbg !60, !llvm.loop !61 -} - -; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2 - -; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) -declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2 - -; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) -declare void @llvm.pseudoprobe(i64, i64, i32, i64) #3 - -attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } -attributes #1 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } -attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } - -!llvm.dbg.cu = !{!2} -!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13} -!llvm.ident = !{!14} -!llvm.pseudo_probe_desc = !{!15, !16, !17} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true) -!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 20.0.0git (https://github.com/llvm/llvm-project.git 070702c9be2fb437b0765532c03e98c642951906)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) -!3 = !DIFile(filename: "test_rename.c", directory: "/home/wlei/local/llvm_test/rename/extbinary", checksumkind: CSK_MD5, checksum: "11a33a83e4d190ebda0792d0610f0c67") -!4 = !{!0} -!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6) -!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!7 = !{i32 7, !"Dwarf Version", i32 5} -!8 = !{i32 2, !"Debug Info Version", i32 3} -!9 = !{i32 1, !"wchar_size", i32 4} -!10 = !{i32 8, !"PIC Level", i32 2} -!11 = !{i32 7, !"PIE Level", i32 2} -!12 = !{i32 7, !"uwtable", i32 2} -!13 = !{i32 7, !"debug-info-assignment-tracking", i1 true} -!14 = !{!"clang version 20.0.0git (https://github.com/llvm/llvm-project.git 070702c9be2fb437b0765532c03e98c642951906)"} -!15 = !{i64 -2012135647395072713, i64 4294967295, !"bar"} -!16 = !{i64 -2115950948644264162, i64 281479271677951, !"foo_rename"} -!17 = !{i64 -2624081020897602054, i64 281582264815352, !"main"} -!18 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 3, type: !19, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21) -!19 = !DISubroutineType(types: !20) -!20 = !{!6, !6} -!21 = !{!22} -!22 = !DILocalVariable(name: "x", arg: 1, scope: !18, file: !3, line: 3, type: !6) -!23 = !DILocation(line: 0, scope: !18) -!24 = !DILocation(line: 4, column: 10, scope: !18) -!25 = !DILocation(line: 4, column: 12, scope: !18) -!26 = !DILocation(line: 4, column: 3, scope: !18) -!27 = distinct !DISubprogram(name: "foo_rename", scope: !3, file: !3, line: 7, type: !28, scopeLine: 7, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) -!28 = !DISubroutineType(types: !29) -!29 = !{null} -!30 = !DILocation(line: 8, column: 15, scope: !27) -!31 = !{!32, !32, i64 0} -!32 = !{!"int", !33, i64 0} -!33 = !{!"omnipotent char", !34, i64 0} -!34 = !{!"Simple C/C++ TBAA"} -!35 = !DILocation(line: 8, column: 11, scope: !36) -!36 = !DILexicalBlockFile(scope: !27, file: !3, discriminator: 455082007) -!37 = !DILocation(line: 8, column: 8, scope: !27) -!38 = !DILocation(line: 9, column: 1, scope: !27) -!39 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !40, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !42) -!40 = !DISubroutineType(types: !41) -!41 = !{!6} -!42 = !{!43} -!43 = !DILocalVariable(name: "i", scope: !44, file: !3, line: 12, type: !6) -!44 = distinct !DILexicalBlock(scope: !39, file: !3, line: 12, column: 3) -!45 = !DILocation(line: 12, column: 12, scope: !44) -!46 = !DILocation(line: 0, scope: !44) -!47 = !DILocation(line: 12, column: 8, scope: !44) -!48 = !DILocation(line: 12, scope: !44) -!49 = !DILocation(line: 12, column: 19, scope: !50) -!50 = distinct !DILexicalBlock(scope: !44, file: !3, line: 12, column: 3) -!51 = !DILocation(line: 12, column: 21, scope: !50) -!52 = !DILocation(line: 12, column: 3, scope: !44) -!53 = !DILocation(line: 0, scope: !39) -!54 = !DILocation(line: 15, column: 1, scope: !39) -!55 = !DILocation(line: 13, column: 7, scope: !56) -!56 = distinct !DILexicalBlock(scope: !50, file: !3, line: 12, column: 40) -!57 = !DILocation(line: 13, column: 7, scope: !58) -!58 = !DILexicalBlockFile(scope: !56, file: !3, discriminator: 455082031) -!59 = !DILocation(line: 12, column: 36, scope: !50) -!60 = !DILocation(line: 12, column: 3, scope: !50) -!61 = distinct !{!61, !52, !62, !63} -!62 = !DILocation(line: 14, column: 3, scope: !44) -!63 = !{!"llvm.loop.mustprogress"} -Function foo_rename is not in profile or profile symbol list. -Run stale profile matching for main -Run stale profile matching for bar -(0/2) of functions' profile are invalid and (0/452891) of samples are discarded due to function hash mismatch. -(0/2) of functions' profile are matched and (0/452891) of samples are reused by call graph matching. -(1/1) of callsites' profile are invalid and (51/452891) of samples are discarded due to callsite location mismatch. -(0/1) of callsites and (0/51) of samples are recovered by stale profile matching. diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll index 356b16ca6ad05..7b3fe9e047bd2 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll @@ -1,8 +1,8 @@ ; REQUIRES: x86_64-linux ; REQUIRES: asserts -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-toplev-func.prof --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s -check-prefix=CHECK-TEXT +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-toplev-func.prof --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 --read-toplev-profile-for-cg-matching 2>&1 | FileCheck %s -check-prefix=CHECK-TEXT ; RUN: llvm-profdata merge --sample %S/Inputs/pseudo-probe-stale-profile-toplev-func.prof -extbinary -o %t.extbinary -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.extbinary --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s -check-prefix=CHECK-EXTBIN +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.extbinary --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 --read-toplev-profile-for-cg-matching 2>&1 | FileCheck %s -check-prefix=CHECK-EXTBIN ; CHECK-TEXT: Run stale profile matching for main ; CHECK-TEXT-NOT: Read top-level function foo for call-graph matching From 259ab87f5a708cf1ee50e4b83dceb771fee791af Mon Sep 17 00:00:00 2001 From: wlei Date: Mon, 19 Aug 2024 10:45:22 -0700 Subject: [PATCH 5/8] check whether a profile is already loaded --- llvm/include/llvm/ProfileData/SampleProfReader.h | 15 +++++++++------ llvm/lib/ProfileData/SampleProfReader.cpp | 6 +++--- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index c86b97740e4f7..a93cf25e3f7f7 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -380,13 +380,16 @@ class SampleProfileReader { return sampleprof_error::success; } - /// Read sample profiles for the given functions. Currently it's only used for - /// extended binary format to load the profiles on-demand. + /// Read sample profiles for the given functions. std::error_code read(const DenseSet &FuncsToUse) { - if (std::error_code EC = read(FuncsToUse, Profiles)) + DenseSet S; + for (StringRef F : FuncsToUse) + if (Profiles.find(FunctionId(F)) == Profiles.end()) + S.insert(F); + if (std::error_code EC = read(S, Profiles)) return EC; return sampleprof_error::success; - }; + } /// Read sample profiles for the given functions and write them to the given /// profile map. Currently it's only used for extended binary format to load @@ -394,7 +397,7 @@ class SampleProfileReader { virtual std::error_code read(const DenseSet &FuncsToUse, SampleProfileMap &Profiles) { return sampleprof_error::not_implemented; - }; + } /// The implementaion to read sample profiles from the associated file. virtual std::error_code readImpl() = 0; @@ -564,7 +567,7 @@ class SampleProfileReader { std::unordered_map> FuncMetadataIndex; - std::pair LBRProfileSecRange; + std::pair ProfileSecRange; /// Whether the profile has attribute metadata. bool ProfileHasAttribute = false; diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index 4c0a45bfb47cf..71464e8dae65c 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -731,7 +731,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection( break; } case SecLBRProfile: - LBRProfileSecRange = std::make_pair(Data, End); + ProfileSecRange = std::make_pair(Data, End); if (std::error_code EC = readFuncProfiles()) return EC; break; @@ -801,8 +801,8 @@ bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const { std::error_code SampleProfileReaderExtBinaryBase::read(const DenseSet &FuncsToUse, SampleProfileMap &Profiles) { - Data = LBRProfileSecRange.first; - End = LBRProfileSecRange.second; + Data = ProfileSecRange.first; + End = ProfileSecRange.second; if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles)) return EC; End = Data; From 11b5a6602f81942d59f4357b05a6a755a6212f33 Mon Sep 17 00:00:00 2001 From: wlei Date: Mon, 19 Aug 2024 11:29:54 -0700 Subject: [PATCH 6/8] load profiles into the sample reader's profile map --- .../Transforms/IPO/SampleProfileMatcher.cpp | 41 ++++++++++--------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp index 77cede8744707..afd5933e39eb4 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp @@ -416,18 +416,19 @@ void SampleProfileMatcher::runOnFunction(Function &F) { // callsites in one context may differ from those in another context. To get // the maximum number of callsites, we merge the function profiles from all // contexts, aka, the flattened profile to find profile anchors. - const auto *FSFlattened = getFlattenedSamplesFor(F); - if (SalvageUnusedProfile && !FSFlattened) { + const auto *FSForMatching = getFlattenedSamplesFor(F); + if (SalvageUnusedProfile && !FSForMatching) { // Apply the matching in place to find the new function's matched profile. - // TODO: For extended profile format, if a function profile is unused and - // it's top-level, even if the profile is matched, it's not found in the - // profile. This is because sample reader only read the used profile at the - // beginning, we need to support loading the profile on-demand in future. auto R = FuncToProfileNameMap.find(&F); - if (R != FuncToProfileNameMap.end()) - FSFlattened = getFlattenedSamplesFor(R->second); + if (R != FuncToProfileNameMap.end()) { + FSForMatching = getFlattenedSamplesFor(R->second); + // Try to find the salvaged top-level profiles that are explicitly loaded + // for the matching, see "functionMatchesProfileHelper" for the details. + if (!FSForMatching) + FSForMatching = Reader.getSamplesFor(R->second.stringRef()); + } } - if (!FSFlattened) + if (!FSForMatching) return; // Anchors for IR. It's a map from IR location to callee name, callee name is @@ -438,7 +439,7 @@ void SampleProfileMatcher::runOnFunction(Function &F) { // Anchors for profile. It's a map from callsite location to a set of callee // name. AnchorMap ProfileAnchors; - findProfileAnchors(*FSFlattened, ProfileAnchors); + findProfileAnchors(*FSForMatching, ProfileAnchors); // Compute the callsite match states for profile staleness report. if (ReportProfileStaleness || PersistProfileStaleness) @@ -449,7 +450,7 @@ void SampleProfileMatcher::runOnFunction(Function &F) { // For probe-based profiles, run matching only when profile checksum is // mismatched. bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased && - !ProbeManager->profileIsValid(F, *FSFlattened); + !ProbeManager->profileIsValid(F, *FSForMatching); bool RunCFGMatching = !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch; bool RunCGMatching = SalvageUnusedProfile; @@ -787,30 +788,30 @@ bool SampleProfileMatcher::functionMatchesProfileHelper( // two sequences are. float Similarity = 0.0; - const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc); + const auto *FSForMatching = getFlattenedSamplesFor(ProfFunc); // With extbinary profile format, initial profile loading only reads profile // based on current function names in the module. // However, if a function is renamed, sample loader skips to load its original // profile(which has a different name), we will miss this case. To address // this, we load the top-level profile candidate explicitly for the matching. - if (!FSFlattened && ReadToplevProfileforCGMatching) { + if (!FSForMatching && ReadToplevProfileforCGMatching) { DenseSet TopLevelFunc({ProfFunc.stringRef()}); - if (std::error_code EC = Reader.read(TopLevelFunc, FlattenedProfiles)) + if (std::error_code EC = Reader.read(TopLevelFunc)) return false; - FSFlattened = getFlattenedSamplesFor(ProfFunc); + FSForMatching = Reader.getSamplesFor(ProfFunc.stringRef()); LLVM_DEBUG({ - if (FSFlattened) + if (FSForMatching) dbgs() << "Read top-level function " << ProfFunc << " for call-graph matching\n"; }); } - if (!FSFlattened) + if (!FSForMatching) return false; // The check for similarity or checksum may not be reliable if the function is // tiny, we use the number of basic block as a proxy for the function // complexity and skip the matching if it's too small. if (IRFunc.size() < MinFuncCountForCGMatching || - FSFlattened->getBodySamples().size() < MinFuncCountForCGMatching) + FSForMatching->getBodySamples().size() < MinFuncCountForCGMatching) return false; // For probe-based function, we first trust the checksum info. If the checksum @@ -818,7 +819,7 @@ bool SampleProfileMatcher::functionMatchesProfileHelper( if (FunctionSamples::ProfileIsProbeBased) { const auto *FuncDesc = ProbeManager->getDesc(IRFunc); if (FuncDesc && - !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) { + !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSForMatching)) { LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName() << "(IR) and " << ProfFunc << "(Profile) match.\n"); @@ -829,7 +830,7 @@ bool SampleProfileMatcher::functionMatchesProfileHelper( AnchorMap IRAnchors; findIRAnchors(IRFunc, IRAnchors); AnchorMap ProfileAnchors; - findProfileAnchors(*FSFlattened, ProfileAnchors); + findProfileAnchors(*FSForMatching, ProfileAnchors); AnchorList FilteredIRAnchorsList; AnchorList FilteredProfileAnchorList; From 60440849a6e1cdb5ff7ceae1aa5a32c068fae60d Mon Sep 17 00:00:00 2001 From: wlei Date: Mon, 19 Aug 2024 17:59:29 -0700 Subject: [PATCH 7/8] make read function private --- .../llvm/ProfileData/SampleProfReader.h | 29 ++++++++++--------- .../Transforms/IPO/SampleProfileMatcher.cpp | 10 +++---- .../pseudo-probe-stale-profile-toplev-func.ll | 4 +-- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index a93cf25e3f7f7..6cab119593888 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -391,14 +391,6 @@ class SampleProfileReader { return sampleprof_error::success; } - /// Read sample profiles for the given functions and write them to the given - /// profile map. Currently it's only used for extended binary format to load - /// the profiles on-demand. - virtual std::error_code read(const DenseSet &FuncsToUse, - SampleProfileMap &Profiles) { - return sampleprof_error::not_implemented; - } - /// The implementaion to read sample profiles from the associated file. virtual std::error_code readImpl() = 0; @@ -554,6 +546,14 @@ class SampleProfileReader { /// Compute summary for this profile. void computeSummary(); + /// Read sample profiles for the given functions and write them to the given + /// profile map. Currently it's only used for extended binary format to load + /// the profiles on-demand. + virtual std::error_code read(const DenseSet &FuncsToUse, + SampleProfileMap &Profiles) { + return sampleprof_error::not_implemented; + } + std::unique_ptr Remapper; // A map pointer to the FuncNameToProfNameMap in SampleProfileLoader, @@ -832,18 +832,19 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary { /// the reader has been given a module. bool collectFuncsFromModule() override; + std::unique_ptr getProfileSymbolList() override { + return std::move(ProfSymList); + }; + + void setSkipFlatProf(bool Skip) override { SkipFlatProf = Skip; } + +private: /// Read the profiles on-demand for the given functions. This is used after /// stale call graph matching finds new functions whose profiles aren't loaded /// at the beginning and we need to loaded the profiles explicitly for /// potential matching. std::error_code read(const DenseSet &FuncsToUse, SampleProfileMap &Profiles) override; - - std::unique_ptr getProfileSymbolList() override { - return std::move(ProfSymList); - }; - - void setSkipFlatProf(bool Skip) override { SkipFlatProf = Skip; } }; class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase { diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp index afd5933e39eb4..1c3d89bfc3b12 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp @@ -36,10 +36,10 @@ static cl::opt MinCallCountForCGMatching( cl::desc("The minimum number of call anchors required for a function to " "run stale profile call graph matching.")); -static cl::opt ReadToplevProfileforCGMatching( - "read-toplev-profile-for-cg-matching", cl::Hidden, cl::init(false), +static cl::opt LoadFuncProfileforCGMatching( + "load-func-profile-for-cg-matching", cl::Hidden, cl::init(false), cl::desc( - "Read top-level profiles that the sample reader initially skips for " + "Load top-level profiles that the sample reader initially skipped for " "the call-graph matching(only meaningful for extended binary format)")); extern cl::opt SalvageStaleProfile; @@ -424,7 +424,7 @@ void SampleProfileMatcher::runOnFunction(Function &F) { FSForMatching = getFlattenedSamplesFor(R->second); // Try to find the salvaged top-level profiles that are explicitly loaded // for the matching, see "functionMatchesProfileHelper" for the details. - if (!FSForMatching) + if (!FSForMatching && LoadFuncProfileforCGMatching) FSForMatching = Reader.getSamplesFor(R->second.stringRef()); } } @@ -794,7 +794,7 @@ bool SampleProfileMatcher::functionMatchesProfileHelper( // However, if a function is renamed, sample loader skips to load its original // profile(which has a different name), we will miss this case. To address // this, we load the top-level profile candidate explicitly for the matching. - if (!FSForMatching && ReadToplevProfileforCGMatching) { + if (!FSForMatching && LoadFuncProfileforCGMatching) { DenseSet TopLevelFunc({ProfFunc.stringRef()}); if (std::error_code EC = Reader.read(TopLevelFunc)) return false; diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll index 7b3fe9e047bd2..c839364f23553 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll @@ -1,8 +1,8 @@ ; REQUIRES: x86_64-linux ; REQUIRES: asserts -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-toplev-func.prof --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 --read-toplev-profile-for-cg-matching 2>&1 | FileCheck %s -check-prefix=CHECK-TEXT +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-toplev-func.prof --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 --load-func-profile-for-cg-matching 2>&1 | FileCheck %s -check-prefix=CHECK-TEXT ; RUN: llvm-profdata merge --sample %S/Inputs/pseudo-probe-stale-profile-toplev-func.prof -extbinary -o %t.extbinary -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.extbinary --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 --read-toplev-profile-for-cg-matching 2>&1 | FileCheck %s -check-prefix=CHECK-EXTBIN +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.extbinary --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 --load-func-profile-for-cg-matching 2>&1 | FileCheck %s -check-prefix=CHECK-EXTBIN ; CHECK-TEXT: Run stale profile matching for main ; CHECK-TEXT-NOT: Read top-level function foo for call-graph matching From 6fbb401bb1546374eb2a3ffa6bca5df182f5fdbe Mon Sep 17 00:00:00 2001 From: wlei Date: Tue, 27 Aug 2024 11:58:44 -0700 Subject: [PATCH 8/8] add space before ( --- llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp index 1c3d89bfc3b12..0c676e8fb95fd 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp @@ -40,7 +40,8 @@ static cl::opt LoadFuncProfileforCGMatching( "load-func-profile-for-cg-matching", cl::Hidden, cl::init(false), cl::desc( "Load top-level profiles that the sample reader initially skipped for " - "the call-graph matching(only meaningful for extended binary format)")); + "the call-graph matching (only meaningful for extended binary " + "format)")); extern cl::opt SalvageStaleProfile; extern cl::opt SalvageUnusedProfile;