-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[ThinLTO] Shrink GlobalValueSummary by 8 bytes #107342
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ThinLTO] Shrink GlobalValueSummary by 8 bytes #107342
Conversation
During the ThinLTO indexing step for one of our large applications, we create 7.5 million instances of GlobalValueSummary. Changing: std::vector<ValueInfo> RefEdgeList; to: SmallVector<ValueInfo, 0> RefEdgeList; in GlobalValueSummary reduces the size of each instance by 8 bytes. The rest of the patch makes the same change to other places so that the types stay compatible across function boundaries.
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-ir Author: Kazu Hirata (kazutakahirata) ChangesDuring the ThinLTO indexing step for one of our large applications, we Changing: std::vector<ValueInfo> RefEdgeList; to: SmallVector<ValueInfo, 0> RefEdgeList; in GlobalValueSummary reduces the size of each instance by 8 bytes. Full diff: https://github.com/llvm/llvm-project/pull/107342.diff 6 Files Affected:
diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
index 9e551d3ae2bbeb..2bbcda70938548 100644
--- a/llvm/include/llvm/AsmParser/LLParser.h
+++ b/llvm/include/llvm/AsmParser/LLParser.h
@@ -419,7 +419,7 @@ namespace llvm {
bool parseParamAccessCall(FunctionSummary::ParamAccess::Call &Call,
IdLocListType &IdLocList);
bool parseParamAccessOffset(ConstantRange &Range);
- bool parseOptionalRefs(std::vector<ValueInfo> &Refs);
+ bool parseOptionalRefs(SmallVectorImpl<ValueInfo> &Refs);
bool parseTypeIdEntry(unsigned ID);
bool parseTypeIdSummary(TypeIdSummary &TIS);
bool parseTypeIdCompatibleVtableEntry(unsigned ID);
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 6d39150a03bfd5..f4cf791e1bd13d 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -538,10 +538,11 @@ class GlobalValueSummary {
/// (either by the initializer of a global variable, or referenced
/// from within a function). This does not include functions called, which
/// are listed in the derived FunctionSummary object.
- std::vector<ValueInfo> RefEdgeList;
+ SmallVector<ValueInfo, 0> RefEdgeList;
protected:
- GlobalValueSummary(SummaryKind K, GVFlags Flags, std::vector<ValueInfo> Refs)
+ GlobalValueSummary(SummaryKind K, GVFlags Flags,
+ SmallVector<ValueInfo, 0> Refs)
: Kind(K), Flags(Flags), RefEdgeList(std::move(Refs)) {
assert((K != AliasKind || Refs.empty()) &&
"Expect no references for AliasSummary");
@@ -641,7 +642,7 @@ class AliasSummary : public GlobalValueSummary {
public:
AliasSummary(GVFlags Flags)
- : GlobalValueSummary(AliasKind, Flags, ArrayRef<ValueInfo>{}),
+ : GlobalValueSummary(AliasKind, Flags, SmallVector<ValueInfo, 0>{}),
AliaseeSummary(nullptr) {}
/// Check if this is an alias summary.
@@ -857,7 +858,7 @@ class FunctionSummary : public GlobalValueSummary {
/*NotEligibleToImport=*/true, /*Live=*/true, /*IsLocal=*/false,
/*CanAutoHide=*/false, GlobalValueSummary::ImportKind::Definition),
/*NumInsts=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0,
- std::vector<ValueInfo>(), std::move(Edges),
+ SmallVector<ValueInfo, 0>(), std::move(Edges),
std::vector<GlobalValue::GUID>(),
std::vector<FunctionSummary::VFuncId>(),
std::vector<FunctionSummary::VFuncId>(),
@@ -913,7 +914,7 @@ class FunctionSummary : public GlobalValueSummary {
public:
FunctionSummary(GVFlags Flags, unsigned NumInsts, FFlags FunFlags,
- uint64_t EntryCount, std::vector<ValueInfo> Refs,
+ uint64_t EntryCount, SmallVector<ValueInfo, 0> Refs,
std::vector<EdgeTy> CGEdges,
std::vector<GlobalValue::GUID> TypeTests,
std::vector<VFuncId> TypeTestAssumeVCalls,
@@ -1166,7 +1167,7 @@ class GlobalVarSummary : public GlobalValueSummary {
} VarFlags;
GlobalVarSummary(GVFlags Flags, GVarFlags VarFlags,
- std::vector<ValueInfo> Refs)
+ SmallVector<ValueInfo, 0> Refs)
: GlobalValueSummary(GlobalVarKind, Flags, std::move(Refs)),
VarFlags(VarFlags) {}
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
index 6cc533f043a517..338cd47c608042 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
@@ -216,7 +216,7 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
}
auto &Elem = V.try_emplace(KeyInt, /*IsAnalysis=*/false).first->second;
for (auto &FSum : FSums) {
- std::vector<ValueInfo> Refs;
+ SmallVector<ValueInfo, 0> Refs;
Refs.reserve(FSum.Refs.size());
for (auto &RefGUID : FSum.Refs) {
auto It = V.try_emplace(RefGUID, /*IsAnalysis=*/false).first;
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index e9490ccba82157..39e7926ea43fe0 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -99,10 +99,11 @@ extern cl::opt<bool> MemProfReportHintedSizes;
// can only take an address of basic block located in the same function.
// Set `RefLocalLinkageIFunc` to true if the analyzed value references a
// local-linkage ifunc.
-static bool findRefEdges(ModuleSummaryIndex &Index, const User *CurUser,
- SetVector<ValueInfo, std::vector<ValueInfo>> &RefEdges,
- SmallPtrSet<const User *, 8> &Visited,
- bool &RefLocalLinkageIFunc) {
+static bool
+findRefEdges(ModuleSummaryIndex &Index, const User *CurUser,
+ SetVector<ValueInfo, SmallVector<ValueInfo, 0>> &RefEdges,
+ SmallPtrSet<const User *, 8> &Visited,
+ bool &RefLocalLinkageIFunc) {
bool HasBlockAddress = false;
SmallVector<const User *, 32> Worklist;
if (Visited.insert(CurUser).second)
@@ -310,7 +311,7 @@ static void computeFunctionSummary(
MapVector<ValueInfo, CalleeInfo, DenseMap<ValueInfo, unsigned>,
std::vector<std::pair<ValueInfo, CalleeInfo>>>
CallGraphEdges;
- SetVector<ValueInfo, std::vector<ValueInfo>> RefEdges, LoadRefEdges,
+ SetVector<ValueInfo, SmallVector<ValueInfo, 0>> RefEdges, LoadRefEdges,
StoreRefEdges;
SetVector<GlobalValue::GUID, std::vector<GlobalValue::GUID>> TypeTests;
SetVector<FunctionSummary::VFuncId, std::vector<FunctionSummary::VFuncId>>
@@ -568,16 +569,17 @@ static void computeFunctionSummary(
if (PSI->hasPartialSampleProfile() && ScalePartialSampleProfileWorkingSetSize)
Index.addBlockCount(F.size());
- std::vector<ValueInfo> Refs;
+ SmallVector<ValueInfo, 0> Refs;
if (IsThinLTO) {
- auto AddRefEdges = [&](const std::vector<const Instruction *> &Instrs,
- SetVector<ValueInfo, std::vector<ValueInfo>> &Edges,
- SmallPtrSet<const User *, 8> &Cache) {
- for (const auto *I : Instrs) {
- Cache.erase(I);
- findRefEdges(Index, I, Edges, Cache, HasLocalIFuncCallOrRef);
- }
- };
+ auto AddRefEdges =
+ [&](const std::vector<const Instruction *> &Instrs,
+ SetVector<ValueInfo, SmallVector<ValueInfo, 0>> &Edges,
+ SmallPtrSet<const User *, 8> &Cache) {
+ for (const auto *I : Instrs) {
+ Cache.erase(I);
+ findRefEdges(Index, I, Edges, Cache, HasLocalIFuncCallOrRef);
+ }
+ };
// By now we processed all instructions in a function, except
// non-volatile loads and non-volatile value stores. Let's find
@@ -805,7 +807,7 @@ static void computeVariableSummary(ModuleSummaryIndex &Index,
DenseSet<GlobalValue::GUID> &CantBePromoted,
const Module &M,
SmallVectorImpl<MDNode *> &Types) {
- SetVector<ValueInfo, std::vector<ValueInfo>> RefEdges;
+ SetVector<ValueInfo, SmallVector<ValueInfo, 0>> RefEdges;
SmallPtrSet<const User *, 8> Visited;
bool RefLocalIFunc = false;
bool HasBlockAddress =
@@ -961,7 +963,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
/* MayThrow */ true,
/* HasUnknownCall */ true,
/* MustBeUnreachable */ false},
- /*EntryCount=*/0, ArrayRef<ValueInfo>{},
+ /*EntryCount=*/0, SmallVector<ValueInfo, 0>{},
ArrayRef<FunctionSummary::EdgeTy>{},
ArrayRef<GlobalValue::GUID>{},
ArrayRef<FunctionSummary::VFuncId>{},
@@ -978,7 +980,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
GlobalVarSummary::GVarFlags(
false, false, cast<GlobalVariable>(GV)->isConstant(),
GlobalObject::VCallVisibilityPublic),
- ArrayRef<ValueInfo>{});
+ SmallVector<ValueInfo, 0>{});
Index.addGlobalValueSummary(*GV, std::move(Summary));
}
});
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index f41907f0351257..5964355fd26ccf 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -9388,7 +9388,7 @@ bool LLParser::parseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
std::vector<FunctionSummary::EdgeTy> Calls;
FunctionSummary::TypeIdInfo TypeIdInfo;
std::vector<FunctionSummary::ParamAccess> ParamAccesses;
- std::vector<ValueInfo> Refs;
+ SmallVector<ValueInfo, 0> Refs;
std::vector<CallsiteInfo> Callsites;
std::vector<AllocInfo> Allocs;
// Default is all-zeros (conservative values).
@@ -9476,7 +9476,7 @@ bool LLParser::parseVariableSummary(std::string Name, GlobalValue::GUID GUID,
/* WriteOnly */ false,
/* Constant */ false,
GlobalObject::VCallVisibilityPublic);
- std::vector<ValueInfo> Refs;
+ SmallVector<ValueInfo, 0> Refs;
VTableFuncList VTableFuncs;
if (parseToken(lltok::colon, "expected ':' here") ||
parseToken(lltok::lparen, "expected '(' here") ||
@@ -9982,7 +9982,7 @@ bool LLParser::parseOptionalParamAccesses(
/// OptionalRefs
/// := 'refs' ':' '(' GVReference [',' GVReference]* ')'
-bool LLParser::parseOptionalRefs(std::vector<ValueInfo> &Refs) {
+bool LLParser::parseOptionalRefs(SmallVectorImpl<ValueInfo> &Refs) {
assert(Lex.getKind() == lltok::kw_refs);
Lex.Lex();
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 654be985a3229c..ddfc6b6ca77592 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -985,7 +985,7 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
Error parseValueSymbolTable(
uint64_t Offset,
DenseMap<unsigned, GlobalValue::LinkageTypes> &ValueIdToLinkageMap);
- std::vector<ValueInfo> makeRefList(ArrayRef<uint64_t> Record);
+ SmallVector<ValueInfo, 0> makeRefList(ArrayRef<uint64_t> Record);
std::vector<FunctionSummary::EdgeTy> makeCallList(ArrayRef<uint64_t> Record,
bool IsOldProfileFormat,
bool HasProfile,
@@ -7361,9 +7361,9 @@ Error ModuleSummaryIndexBitcodeReader::parseModule() {
}
}
-std::vector<ValueInfo>
+SmallVector<ValueInfo, 0>
ModuleSummaryIndexBitcodeReader::makeRefList(ArrayRef<uint64_t> Record) {
- std::vector<ValueInfo> Ret;
+ SmallVector<ValueInfo, 0> Ret;
Ret.reserve(Record.size());
for (uint64_t RefValueId : Record)
Ret.push_back(std::get<0>(getValueInfoFromValueId(RefValueId)));
@@ -7508,7 +7508,7 @@ void ModuleSummaryIndexBitcodeReader::parseTypeIdCompatibleVtableSummaryRecord(
parseTypeIdCompatibleVtableInfo(Record, Slot, TypeId);
}
-static void setSpecialRefs(std::vector<ValueInfo> &Refs, unsigned ROCnt,
+static void setSpecialRefs(SmallVector<ValueInfo, 0> &Refs, unsigned ROCnt,
unsigned WOCnt) {
// Readonly and writeonly refs are in the end of the refs list.
assert(ROCnt + WOCnt <= Refs.size());
@@ -7658,7 +7658,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
int CallGraphEdgeStartIndex = RefListStartIndex + NumRefs;
assert(Record.size() >= RefListStartIndex + NumRefs &&
"Record size inconsistent with number of references");
- std::vector<ValueInfo> Refs = makeRefList(
+ SmallVector<ValueInfo, 0> Refs = makeRefList(
ArrayRef<uint64_t>(Record).slice(RefListStartIndex, NumRefs));
bool HasProfile = (BitCode == bitc::FS_PERMODULE_PROFILE);
bool HasRelBF = (BitCode == bitc::FS_PERMODULE_RELBF);
@@ -7733,7 +7733,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
GVF = getDecodedGVarFlags(Record[2]);
RefArrayStart = 3;
}
- std::vector<ValueInfo> Refs =
+ SmallVector<ValueInfo, 0> Refs =
makeRefList(ArrayRef<uint64_t>(Record).slice(RefArrayStart));
auto FS =
std::make_unique<GlobalVarSummary>(Flags, GVF, std::move(Refs));
@@ -7754,7 +7754,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
unsigned RefListStartIndex = 4;
unsigned VTableListStartIndex = RefListStartIndex + NumRefs;
auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
- std::vector<ValueInfo> Refs = makeRefList(
+ SmallVector<ValueInfo, 0> Refs = makeRefList(
ArrayRef<uint64_t>(Record).slice(RefListStartIndex, NumRefs));
VTableFuncList VTableFuncs;
for (unsigned I = VTableListStartIndex, E = Record.size(); I != E; ++I) {
@@ -7815,7 +7815,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
int CallGraphEdgeStartIndex = RefListStartIndex + NumRefs;
assert(Record.size() >= RefListStartIndex + NumRefs &&
"Record size inconsistent with number of references");
- std::vector<ValueInfo> Refs = makeRefList(
+ SmallVector<ValueInfo, 0> Refs = makeRefList(
ArrayRef<uint64_t>(Record).slice(RefListStartIndex, NumRefs));
bool HasProfile = (BitCode == bitc::FS_COMBINED_PROFILE);
std::vector<FunctionSummary::EdgeTy> Edges = makeCallList(
@@ -7875,7 +7875,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
GVF = getDecodedGVarFlags(Record[3]);
RefArrayStart = 4;
}
- std::vector<ValueInfo> Refs =
+ SmallVector<ValueInfo, 0> Refs =
makeRefList(ArrayRef<uint64_t>(Record).slice(RefArrayStart));
auto FS =
std::make_unique<GlobalVarSummary>(Flags, GVF, std::move(Refs));
|
Please take a look. Thanks!
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm as long as interface discussion (https://github.com/llvm/llvm-project/pull/107342/files#r1745875758) is resolved
/// from within a function). This does not include functions called, which | ||
/// are listed in the derived FunctionSummary object. | ||
std::vector<ValueInfo> RefEdgeList; | ||
SmallVector<ValueInfo, 0> RefEdgeList; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: add a comment to explain the motivation to use SmallVector<T, 0>
for future readers / maintainers.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed in the latest iteration. Thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm with mingming's comment suggestion
During the ThinLTO indexing step for one of our large applications, we
create 7.5 million instances of GlobalValueSummary.
Changing:
std::vector RefEdgeList;
to:
SmallVector<ValueInfo, 0> RefEdgeList;
in GlobalValueSummary reduces the size of each instance by 8 bytes.
The rest of the patch makes the same change to other places so that
the types stay compatible across function boundaries.