Skip to content

[SampleFDO] Read call-graph matching recovered top-level function profile #101053

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Sep 4, 2024
21 changes: 21 additions & 0 deletions llvm/include/llvm/ProfileData/SampleProfReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,16 @@ class SampleProfileReader {
if (It != Profiles.end())
return &It->second;

if (FuncNameToProfNameMap && !FuncNameToProfNameMap->empty()) {
auto R = FuncNameToProfNameMap->find(FunctionId(Fname));
if (R != FuncNameToProfNameMap->end()) {
Fname = R->second.stringRef();
auto It = Profiles.find(FunctionId(Fname));
if (It != Profiles.end())
return &It->second;
}
}

if (Remapper) {
if (auto NameInProfile = Remapper->lookUpNameInProfile(Fname)) {
auto It = Profiles.find(FunctionId(*NameInProfile));
Expand Down Expand Up @@ -505,6 +515,11 @@ class SampleProfileReader {

void setModule(const Module *Mod) { M = Mod; }

void setFuncNameToProfNameMap(
const HashKeyMap<std::unordered_map, FunctionId, FunctionId> &FPMap) {
FuncNameToProfNameMap = &FPMap;
}

protected:
/// Map every function to its associated profile.
///
Expand Down Expand Up @@ -541,6 +556,12 @@ class SampleProfileReader {

std::unique_ptr<SampleProfileReaderItaniumRemapper> Remapper;

// A map pointer to the FuncNameToProfNameMap in SampleProfileLoader,
// which maps the function name to the matched profile name. This is used
// for sample loader to look up profile using the new name.
const HashKeyMap<std::unordered_map, FunctionId, FunctionId>
*FuncNameToProfNameMap = nullptr;

// A map from a function's context hash to its meta data section range, used
// for on-demand read function profile metadata.
std::unordered_map<uint64_t, std::pair<const uint8_t *, const uint8_t *>>
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ class SampleProfileMatcher {
// function and all inlinees.
void countMismatchedCallsiteSamples(const FunctionSamples &FS);
void computeAndReportProfileStaleness();
void UpdateWithSalvagedProfiles();

LocToLocMap &getIRToProfileLocationMap(const Function &F) {
auto Ret = FuncMappings.try_emplace(
Expand Down
90 changes: 64 additions & 26 deletions llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ static cl::opt<unsigned> MinCallCountForCGMatching(
cl::desc("The minimum number of call anchors required for a function to "
"run stale profile call graph matching."));

static cl::opt<bool> LoadFuncProfileforCGMatching(
"load-func-profile-for-cg-matching", cl::Hidden, cl::init(false),
cl::desc(
"Load top-level profiles that the sample reader initially skipped for "
"the call-graph matching (only meaningful for extended binary "
"format)"));

extern cl::opt<bool> SalvageStaleProfile;
extern cl::opt<bool> SalvageUnusedProfile;
extern cl::opt<bool> PersistProfileStaleness;
Expand Down Expand Up @@ -410,18 +417,19 @@ void SampleProfileMatcher::runOnFunction(Function &F) {
// callsites in one context may differ from those in another context. To get
// the maximum number of callsites, we merge the function profiles from all
// contexts, aka, the flattened profile to find profile anchors.
const auto *FSFlattened = getFlattenedSamplesFor(F);
if (SalvageUnusedProfile && !FSFlattened) {
const auto *FSForMatching = getFlattenedSamplesFor(F);
if (SalvageUnusedProfile && !FSForMatching) {
// Apply the matching in place to find the new function's matched profile.
// TODO: For extended profile format, if a function profile is unused and
// it's top-level, even if the profile is matched, it's not found in the
// profile. This is because sample reader only read the used profile at the
// beginning, we need to support loading the profile on-demand in future.
auto R = FuncToProfileNameMap.find(&F);
if (R != FuncToProfileNameMap.end())
FSFlattened = getFlattenedSamplesFor(R->second);
if (R != FuncToProfileNameMap.end()) {
FSForMatching = getFlattenedSamplesFor(R->second);
// Try to find the salvaged top-level profiles that are explicitly loaded
// for the matching, see "functionMatchesProfileHelper" for the details.
if (!FSForMatching && LoadFuncProfileforCGMatching)
FSForMatching = Reader.getSamplesFor(R->second.stringRef());
}
}
if (!FSFlattened)
if (!FSForMatching)
return;

// Anchors for IR. It's a map from IR location to callee name, callee name is
Expand All @@ -432,7 +440,7 @@ void SampleProfileMatcher::runOnFunction(Function &F) {
// Anchors for profile. It's a map from callsite location to a set of callee
// name.
AnchorMap ProfileAnchors;
findProfileAnchors(*FSFlattened, ProfileAnchors);
findProfileAnchors(*FSForMatching, ProfileAnchors);

// Compute the callsite match states for profile staleness report.
if (ReportProfileStaleness || PersistProfileStaleness)
Expand All @@ -443,7 +451,7 @@ void SampleProfileMatcher::runOnFunction(Function &F) {
// For probe-based profiles, run matching only when profile checksum is
// mismatched.
bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
!ProbeManager->profileIsValid(F, *FSFlattened);
!ProbeManager->profileIsValid(F, *FSForMatching);
bool RunCFGMatching =
!FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
bool RunCGMatching = SalvageUnusedProfile;
Expand Down Expand Up @@ -781,22 +789,38 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
// two sequences are.
float Similarity = 0.0;

const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
if (!FSFlattened)
const auto *FSForMatching = getFlattenedSamplesFor(ProfFunc);
// With extbinary profile format, initial profile loading only reads profile
// based on current function names in the module.
// However, if a function is renamed, sample loader skips to load its original
// profile(which has a different name), we will miss this case. To address
// this, we load the top-level profile candidate explicitly for the matching.
if (!FSForMatching && LoadFuncProfileforCGMatching) {
DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()});
if (std::error_code EC = Reader.read(TopLevelFunc))
return false;
FSForMatching = Reader.getSamplesFor(ProfFunc.stringRef());
LLVM_DEBUG({
if (FSForMatching)
dbgs() << "Read top-level function " << ProfFunc
<< " for call-graph matching\n";
});
}
if (!FSForMatching)
return false;
// The check for similarity or checksum may not be reliable if the function is
// tiny, we use the number of basic block as a proxy for the function
// complexity and skip the matching if it's too small.
if (IRFunc.size() < MinFuncCountForCGMatching ||
FSFlattened->getBodySamples().size() < MinFuncCountForCGMatching)
FSForMatching->getBodySamples().size() < MinFuncCountForCGMatching)
return false;

// For probe-based function, we first trust the checksum info. If the checksum
// doesn't match, we continue checking for similarity.
if (FunctionSamples::ProfileIsProbeBased) {
const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
if (FuncDesc &&
!ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) {
!ProbeManager->profileIsHashMismatched(*FuncDesc, *FSForMatching)) {
LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
<< "(IR) and " << ProfFunc << "(Profile) match.\n");

Expand All @@ -807,7 +831,7 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
AnchorMap IRAnchors;
findIRAnchors(IRFunc, IRAnchors);
AnchorMap ProfileAnchors;
findProfileAnchors(*FSFlattened, ProfileAnchors);
findProfileAnchors(*FSForMatching, ProfileAnchors);

AnchorList FilteredIRAnchorsList;
AnchorList FilteredProfileAnchorList;
Expand Down Expand Up @@ -863,6 +887,29 @@ bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
return Matched;
}

void SampleProfileMatcher::UpdateWithSalvagedProfiles() {
DenseSet<StringRef> ProfileSalvagedFuncs;
// Update FuncNameToProfNameMap and SymbolMap.
for (auto &I : FuncToProfileNameMap) {
assert(I.first && "New function is null");
FunctionId FuncName(I.first->getName());
ProfileSalvagedFuncs.insert(I.second.stringRef());
FuncNameToProfNameMap->emplace(FuncName, I.second);

// We need to remove the old entry to avoid duplicating the function
// processing.
SymbolMap->erase(FuncName);
SymbolMap->emplace(I.second, I.first);
}

// With extbinary profile format, initial profile loading only reads profile
// based on current function names in the module, so we need to load top-level
// profiles for functions with different profile name explicitly after
// function-profile name map is established with stale profile matching.
Reader.read(ProfileSalvagedFuncs);
Reader.setFuncNameToProfNameMap(*FuncNameToProfNameMap);
}

void SampleProfileMatcher::runOnModule() {
ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
FunctionSamples::ProfileIsCS);
Expand All @@ -880,17 +927,8 @@ void SampleProfileMatcher::runOnModule() {
runOnFunction(*F);
}

// Update the data in SampleLoader.
if (SalvageUnusedProfile)
for (auto &I : FuncToProfileNameMap) {
assert(I.first && "New function is null");
FunctionId FuncName(I.first->getName());
FuncNameToProfNameMap->emplace(FuncName, I.second);
// We need to remove the old entry to avoid duplicating the function
// processing.
SymbolMap->erase(FuncName);
SymbolMap->emplace(I.second, I.first);
}
UpdateWithSalvagedProfiles();

if (SalvageStaleProfile)
distributeIRToProfileLocationMap();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
foo:2724522:51
1: 51
2: 452674
3: 47
4: 497875
6: 415959
10: 452623
11: 452687 bar:452687
12: 452623
13: 47
!CFGChecksum: 281479271677951
bar:452687:452687
1: 452687
!CFGChecksum: 4294967295
main:204:0
1: 0
2: 51
3: 0
4: 51
5: 51 foo:51
6: 51
7: 0
!CFGChecksum: 281582264815352
Loading