-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[BOLT] Add pseudo probe inline tree to YAML profile #107137
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BOLT] Add pseudo probe inline tree to YAML profile #107137
Conversation
Created using spr 1.3.4
Created using spr 1.3.4 [skip ci]
@llvm/pr-subscribers-bolt Author: Amir Ayupov (aaupov) ChangesTo be used for pseudo probe function matching (#100446). Full diff: https://github.com/llvm/llvm-project/pull/107137.diff 3 Files Affected:
diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
index 2a0514d7d9304b..f0cc116ebc6cb0 100644
--- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h
+++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h
@@ -95,24 +95,28 @@ template <> struct MappingTraits<bolt::SuccessorInfo> {
namespace bolt {
struct PseudoProbeInfo {
- llvm::yaml::Hex64 GUID;
uint64_t Index;
+ uint32_t InlineTreeIndex;
+ llvm::yaml::Hex32 Offset{0};
uint8_t Type;
bool operator==(const PseudoProbeInfo &Other) const {
- return GUID == Other.GUID && Index == Other.Index;
+ return InlineTreeIndex == Other.InlineTreeIndex && Index == Other.Index;
}
- bool operator!=(const PseudoProbeInfo &Other) const {
- return !(*this == Other);
+ bool operator<(const PseudoProbeInfo &Other) const {
+ if (InlineTreeIndex == Other.InlineTreeIndex)
+ return Index < Other.Index;
+ return InlineTreeIndex < Other.InlineTreeIndex;
}
};
} // end namespace bolt
template <> struct MappingTraits<bolt::PseudoProbeInfo> {
static void mapping(IO &YamlIO, bolt::PseudoProbeInfo &PI) {
- YamlIO.mapRequired("guid", PI.GUID);
YamlIO.mapRequired("id", PI.Index);
YamlIO.mapRequired("type", PI.Type);
+ YamlIO.mapOptional("inline_tree_id", PI.InlineTreeIndex, (uint32_t)0);
+ YamlIO.mapOptional("offset", PI.Offset, (uint32_t)0);
}
static const bool flow = true;
@@ -122,7 +126,7 @@ template <> struct MappingTraits<bolt::PseudoProbeInfo> {
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::CallSiteInfo)
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::SuccessorInfo)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::PseudoProbeInfo)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::PseudoProbeInfo)
namespace llvm {
namespace yaml {
@@ -163,10 +167,35 @@ template <> struct MappingTraits<bolt::BinaryBasicBlockProfile> {
}
};
+namespace bolt {
+struct InlineTreeInfo {
+ uint32_t Index;
+ uint32_t ParentIndex;
+ uint32_t CallSiteProbe;
+ llvm::yaml::Hex64 GUID;
+ llvm::yaml::Hex64 Hash;
+ bool operator==(const InlineTreeInfo &Other) const {
+ return Index == Other.Index;
+ }
+};
+} // end namespace bolt
+
+template <> struct MappingTraits<bolt::InlineTreeInfo> {
+ static void mapping(IO &YamlIO, bolt::InlineTreeInfo &ITI) {
+ YamlIO.mapRequired("guid", ITI.GUID);
+ YamlIO.mapRequired("hash", ITI.Hash);
+ YamlIO.mapRequired("id", ITI.Index);
+ YamlIO.mapOptional("parent", ITI.ParentIndex, (uint32_t)0);
+ YamlIO.mapOptional("callsite", ITI.CallSiteProbe, 0);
+ }
+
+ static const bool flow = true;
+};
} // end namespace yaml
} // end namespace llvm
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryBasicBlockProfile)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::InlineTreeInfo)
namespace llvm {
namespace yaml {
@@ -179,8 +208,7 @@ struct BinaryFunctionProfile {
llvm::yaml::Hex64 Hash{0};
uint64_t ExecCount{0};
std::vector<BinaryBasicBlockProfile> Blocks;
- llvm::yaml::Hex64 GUID{0};
- llvm::yaml::Hex64 PseudoProbeDescHash{0};
+ std::vector<InlineTreeInfo> InlineTree;
bool Used{false};
};
} // end namespace bolt
@@ -194,9 +222,8 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> {
YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks);
YamlIO.mapOptional("blocks", BFP.Blocks,
std::vector<bolt::BinaryBasicBlockProfile>());
- YamlIO.mapOptional("guid", BFP.GUID, (uint64_t)0);
- YamlIO.mapOptional("pseudo_probe_desc_hash", BFP.PseudoProbeDescHash,
- (uint64_t)0);
+ YamlIO.mapOptional("inline_tree", BFP.InlineTree,
+ std::vector<bolt::InlineTreeInfo>());
}
};
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 10d745cc69824b..803cc4725b5702 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -34,6 +34,7 @@
#include "llvm/Support/raw_ostream.h"
#include <map>
#include <optional>
+#include <queue>
#include <unordered_map>
#include <utility>
@@ -2402,12 +2403,43 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
const unsigned BlockIndex = BlockMap.getBBIndex(BI.To.Offset);
YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches;
}
- if (PseudoProbeDecoder) {
- if ((YamlBF.GUID = BF->getGUID())) {
- const MCPseudoProbeFuncDesc *FuncDesc =
- PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID);
- YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash;
+ DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>
+ InlineTreeNodeId;
+ if (PseudoProbeDecoder && BF->getGUID()) {
+ std::queue<const MCDecodedPseudoProbeInlineTree *> ITWorklist;
+ // FIXME: faster inline tree lookup by top-level GUID
+ if (const MCDecodedPseudoProbeInlineTree *InlineTree = llvm::find_if(
+ PseudoProbeDecoder->getDummyInlineRoot().getChildren(),
+ [&](const auto &InlineTree) {
+ return InlineTree.Guid == BF->getGUID();
+ })) {
+ ITWorklist.push(InlineTree);
+ InlineTreeNodeId[InlineTree] = 0;
+ auto Hash =
+ PseudoProbeDecoder->getFuncDescForGUID(BF->getGUID())->FuncHash;
+ YamlBF.InlineTree.emplace_back(
+ yaml::bolt::InlineTreeInfo{0, 0, 0, BF->getGUID(), Hash});
+ }
+ uint32_t ParentId = 0;
+ uint32_t NodeId = 1;
+ while (!ITWorklist.empty()) {
+ const MCDecodedPseudoProbeInlineTree *Cur = ITWorklist.front();
+ for (const MCDecodedPseudoProbeInlineTree &Child :
+ Cur->getChildren()) {
+ InlineTreeNodeId[&Child] = NodeId;
+ auto Hash =
+ PseudoProbeDecoder->getFuncDescForGUID(Child.Guid)->FuncHash;
+ YamlBF.InlineTree.emplace_back(yaml::bolt::InlineTreeInfo{
+ NodeId++, ParentId, std::get<1>(Child.getInlineSite()),
+ Child.Guid, Hash});
+ ITWorklist.push(&Child);
+ }
+ ITWorklist.pop();
+ ++ParentId;
}
+ }
+
+ if (PseudoProbeDecoder) {
// Fetch probes belonging to all fragments
const AddressProbesMap &ProbeMap =
PseudoProbeDecoder->getAddress2ProbesMap();
@@ -2420,12 +2452,19 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
const uint32_t OutputAddress = Probe.getAddress();
const uint32_t InputOffset = BAT->translate(
FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
- const unsigned BlockIndex = getBlock(InputOffset).second;
+ const auto [BlockOffset, BlockIndex] = getBlock(InputOffset);
+ uint32_t NodeId = InlineTreeNodeId[Probe.getInlineTreeNode()];
+ uint32_t Offset = InputOffset - BlockOffset;
YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
- yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
+ yaml::bolt::PseudoProbeInfo{Probe.getIndex(), NodeId, Offset,
Probe.getType()});
}
}
+ for (yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
+ llvm::sort(YamlBB.PseudoProbes);
+ YamlBB.PseudoProbes.erase(llvm::unique(YamlBB.PseudoProbes),
+ YamlBB.PseudoProbes.end());
+ }
}
// Drop blocks without a hash, won't be useful for stale matching.
llvm::erase_if(YamlBF.Blocks,
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index ffbf2388e912fb..817689230e2a70 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -12,11 +12,15 @@
#include "bolt/Profile/BoltAddressTranslation.h"
#include "bolt/Profile/DataAggregator.h"
#include "bolt/Profile/ProfileReaderBase.h"
+#include "bolt/Profile/ProfileYAMLMapping.h"
#include "bolt/Rewrite/RewriteInstance.h"
#include "bolt/Utils/CommandLineOpts.h"
+#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
+#include <deque>
+#include <queue>
#undef DEBUG_TYPE
#define DEBUG_TYPE "bolt-prof"
@@ -77,13 +81,6 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
YamlBF.Hash = BF.getHash();
YamlBF.NumBasicBlocks = BF.size();
YamlBF.ExecCount = BF.getKnownExecutionCount();
- if (PseudoProbeDecoder) {
- if ((YamlBF.GUID = BF.getGUID())) {
- const MCPseudoProbeFuncDesc *FuncDesc =
- PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID);
- YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash;
- }
- }
BinaryFunction::BasicBlockOrderType Order;
llvm::copy(UseDFS ? BF.dfs() : BF.getLayout().blocks(),
@@ -92,6 +89,40 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
const FunctionLayout Layout = BF.getLayout();
Layout.updateLayoutIndices(Order);
+ DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId;
+ if (PseudoProbeDecoder && BF.getGUID()) {
+ std::queue<const MCDecodedPseudoProbeInlineTree *> ITWorklist;
+ // FIXME: faster inline tree lookup by top-level GUID
+ if (const MCDecodedPseudoProbeInlineTree *InlineTree = llvm::find_if(
+ PseudoProbeDecoder->getDummyInlineRoot().getChildren(),
+ [&](const auto &InlineTree) {
+ return InlineTree.Guid == BF.getGUID();
+ })) {
+ ITWorklist.push(InlineTree);
+ InlineTreeNodeId[InlineTree] = 0;
+ auto Hash =
+ PseudoProbeDecoder->getFuncDescForGUID(BF.getGUID())->FuncHash;
+ YamlBF.InlineTree.emplace_back(
+ yaml::bolt::InlineTreeInfo{0, 0, 0, BF.getGUID(), Hash});
+ }
+ uint32_t ParentId = 0;
+ uint32_t NodeId = 1;
+ while (!ITWorklist.empty()) {
+ const MCDecodedPseudoProbeInlineTree *Cur = ITWorklist.front();
+ for (const MCDecodedPseudoProbeInlineTree &Child : Cur->getChildren()) {
+ InlineTreeNodeId[&Child] = NodeId;
+ auto Hash =
+ PseudoProbeDecoder->getFuncDescForGUID(Child.Guid)->FuncHash;
+ YamlBF.InlineTree.emplace_back(yaml::bolt::InlineTreeInfo{
+ NodeId++, ParentId, std::get<1>(Child.getInlineSite()), Child.Guid,
+ Hash});
+ ITWorklist.push(&Child);
+ }
+ ITWorklist.pop();
+ ++ParentId;
+ }
+ }
+
for (const BinaryBasicBlock *BB : Order) {
yaml::bolt::BinaryBasicBlockProfile YamlBB;
YamlBB.Index = BB->getLayoutIndex();
@@ -198,10 +229,18 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
const uint64_t FuncAddr = BF.getAddress();
const std::pair<uint64_t, uint64_t> &BlockRange =
BB->getInputAddressRange();
- for (const MCDecodedPseudoProbe &Probe : ProbeMap.find(
- FuncAddr + BlockRange.first, FuncAddr + BlockRange.second))
+ const std::pair<uint64_t, uint64_t> BlockAddrRange = {
+ FuncAddr + BlockRange.first, FuncAddr + BlockRange.second};
+ for (const MCDecodedPseudoProbe &Probe :
+ ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second)) {
+ uint32_t NodeId = InlineTreeNodeId[Probe.getInlineTreeNode()];
+ uint32_t Offset = Probe.getAddress() - BlockAddrRange.first;
YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
- Probe.getGuid(), Probe.getIndex(), Probe.getType()});
+ Probe.getIndex(), NodeId, Offset, Probe.getType()});
+ }
+ llvm::sort(YamlBB.PseudoProbes);
+ YamlBB.PseudoProbes.erase(llvm::unique(YamlBB.PseudoProbes),
+ YamlBB.PseudoProbes.end());
}
YamlBF.Blocks.emplace_back(YamlBB);
|
Created using spr 1.3.4
Created using spr 1.3.4 [skip ci]
Created using spr 1.3.4
Didn't realize yaml profile currently doesn't have probe inline tree encoded. This can increase profile size a bit, just making sure that's not a concern for yaml profile. |
Good call. Including probe inline tree does increase the size of yaml profile by about 2x pre- and post-compression (221M -> 404M, 20M -> 41M resp.) for a large binary, which also causes a 2x profile reading time increase. Let me experiment with reorganizing the data to reduce the size. |
Update on profile size reduction:
|
Created using spr 1.3.4 [skip ci]
Created using spr 1.3.4
✅ With the latest revision this PR passed the C/C++ code formatter. |
Created using spr 1.3.4
The latest version has even more compact encoding. On top of related change #107970 which drops blocks without counters from BAT YAML profile, this version achieves the following, for the same inputs as above:
The overhead seems reasonable. |
Created using spr 1.3.4
Created using spr 1.3.4
Created using spr 1.3.4
Created using spr 1.3.4
Created using spr 1.3.4 [skip ci]
Created using spr 1.3.4
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not an expert but looks good. Why is operator== in struct InlineTreeInfo always returning false? Is this intentional?
Sorry, didn't see lei was already reviewing this. Go ahead with his expert's opinion, please. |
It's a quirk of YAML: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
Thanks for a review, @wlei-llvm, @rafaelauler, @WenleiHe! |
Created using spr 1.3.4 [skip ci]
Add probe inline tree information to YAML profile, at function level:
This information is used for pseudo probe block matching (#99891).
The encoding adds/changes probe information in multiple levels of
YAML profile:
permits deduplication of data:
into multiple callers,
low block counts.
is included as root of function inline tree, which makes guid and
pseudo_probe_desc_hash fields redundant.
in uint64_t. If only block probe with id=1 is present, it's
encoded as implicit entry (id=0, omitted).
where node indices are combined into a list.
On top of #107970, profile with new probe encoding has the following
characteristics (profile for a large binary):
Profile processing time (YAML parsing, inference, attaching steps):
Test Plan: updated pseudoprobe-decoding-inline.test