diff --git a/bolt/docs/BAT.md b/bolt/docs/BAT.md index 7ffb5d7c00816..817ad288aa34b 100644 --- a/bolt/docs/BAT.md +++ b/bolt/docs/BAT.md @@ -106,9 +106,14 @@ equals output offset. `BRANCHENTRY` bit denotes whether a given offset pair is a control flow source (branch or call instruction). If not set, it signifies a control flow target (basic block offset). + `InputAddr` is omitted for equal offsets in input and output function. In this case, `BRANCHENTRY` bits are encoded separately in a `BranchEntries` bitvector. +Deleted basic blocks are emitted as having `OutputOffset` equal to the size of +the function. They don't affect address translation and only participate in +input basic block mapping. + ### Secondary Entry Points table The table is emitted for hot fragments only. It contains `NumSecEntryPoints` offsets denoting secondary entry points, delta encoded, implicitly starting at zero. diff --git a/bolt/include/bolt/Profile/BoltAddressTranslation.h b/bolt/include/bolt/Profile/BoltAddressTranslation.h index a249bc4498e46..65b9ba874368f 100644 --- a/bolt/include/bolt/Profile/BoltAddressTranslation.h +++ b/bolt/include/bolt/Profile/BoltAddressTranslation.h @@ -70,7 +70,7 @@ class BinaryFunction; class BoltAddressTranslation { public: // In-memory representation of the address translation table - using MapTy = std::map; + using MapTy = std::multimap; // List of taken fall-throughs using FallthroughListTy = SmallVector, 16>; @@ -218,6 +218,7 @@ class BoltAddressTranslation { auto begin() const { return Map.begin(); } auto end() const { return Map.end(); } auto upper_bound(uint32_t Offset) const { return Map.upper_bound(Offset); } + auto size() const { return Map.size(); } }; /// Map function output address to its hash and basic blocks hash map. diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp index a9d56875f5a02..cdfca2b9871ac 100644 --- a/bolt/lib/Profile/BoltAddressTranslation.cpp +++ b/bolt/lib/Profile/BoltAddressTranslation.cpp @@ -54,7 +54,7 @@ void BoltAddressTranslation::writeEntriesForBB( // and this deleted block will both share the same output address (the same // key), and we need to map back. We choose here to privilege the successor by // allowing it to overwrite the previously inserted key in the map. - Map[BBOutputOffset] = BBInputOffset << 1; + Map.emplace(BBOutputOffset, BBInputOffset << 1); const auto &IOAddressMap = BB.getFunction()->getBinaryContext().getIOAddressMap(); @@ -71,8 +71,7 @@ void BoltAddressTranslation::writeEntriesForBB( LLVM_DEBUG(dbgs() << " Key: " << Twine::utohexstr(OutputOffset) << " Val: " << Twine::utohexstr(InputOffset) << " (branch)\n"); - Map.insert(std::pair(OutputOffset, - (InputOffset << 1) | BRANCHENTRY)); + Map.emplace(OutputOffset, (InputOffset << 1) | BRANCHENTRY); } } @@ -107,6 +106,19 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) { for (const BinaryBasicBlock *const BB : Function.getLayout().getMainFragment()) writeEntriesForBB(Map, *BB, InputAddress, OutputAddress); + // Add entries for deleted blocks. They are still required for correct BB + // mapping of branches modified by SCTC. By convention, they would have the + // end of the function as output address. + const BBHashMapTy &BBHashMap = getBBHashMap(InputAddress); + if (BBHashMap.size() != Function.size()) { + const uint64_t EndOffset = Function.getOutputSize(); + std::unordered_set MappedInputOffsets; + for (const BinaryBasicBlock &BB : Function) + MappedInputOffsets.emplace(BB.getInputOffset()); + for (const auto &[InputOffset, _] : BBHashMap) + if (!llvm::is_contained(MappedInputOffsets, InputOffset)) + Map.emplace(EndOffset, InputOffset << 1); + } Maps.emplace(Function.getOutputAddress(), std::move(Map)); ReverseMap.emplace(OutputAddress, InputAddress); diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index ea8e353a9f50c..ce6ec0a04ac16 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -2349,11 +2349,11 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, BAT->getBBHashMap(FuncAddress); YamlBF.Blocks.resize(YamlBF.NumBasicBlocks); - for (auto &&[Idx, YamlBB] : llvm::enumerate(YamlBF.Blocks)) - YamlBB.Index = Idx; - - for (auto BI = BlockMap.begin(), BE = BlockMap.end(); BI != BE; ++BI) - YamlBF.Blocks[BI->second.Index].Hash = BI->second.Hash; + for (auto &&[Entry, YamlBB] : llvm::zip(BlockMap, YamlBF.Blocks)) { + const auto &Block = Entry.second; + YamlBB.Hash = Block.Hash; + YamlBB.Index = Block.Index; + } // Lookup containing basic block offset and index auto getBlock = [&BlockMap](uint32_t Offset) { diff --git a/bolt/test/X86/bb-with-two-tail-calls.s b/bolt/test/X86/bb-with-two-tail-calls.s index b6703e352ff4b..8bbecc498ed75 100644 --- a/bolt/test/X86/bb-with-two-tail-calls.s +++ b/bolt/test/X86/bb-with-two-tail-calls.s @@ -8,11 +8,21 @@ # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib # RUN: llvm-bolt %t.exe -o %t.out --data %t.fdata --lite=0 --dyno-stats \ # RUN: --print-sctc --print-only=_start -enable-bat 2>&1 | FileCheck %s +# RUN: llvm-objdump --syms %t.out > %t.log +# RUN: llvm-bat-dump %t.out --dump-all >> %t.log +# RUN: FileCheck %s --input-file %t.log --check-prefix=CHECK-BAT + # CHECK-NOT: Assertion `BranchInfo.size() == 2 && "could only be called for blocks with 2 successors"' failed. # Two tail calls in the same basic block after SCTC: # CHECK: {{.*}}: ja {{.*}} # TAILCALL # Offset: 7 # CTCTakenCount: 4 # CHECK-NEXT: {{.*}}: jmp {{.*}} # TAILCALL # Offset: 13 +# Confirm that a deleted basic block is emitted at function end offset (0xe) +# CHECK-BAT: [[#%x,ADDR:]] g .text [[#%x,SIZE:]] _start +# CHECK-BAT: Function Address: 0x[[#%x,ADDR]] +# CHECK-BAT: 0x[[#%x,SIZE]] +# CHECK-BAT: NumBlocks: 5 + .globl _start _start: je x diff --git a/bolt/test/X86/bolt-address-translation-yaml.test b/bolt/test/X86/bolt-address-translation-yaml.test index 9f2c2ef3ab987..8f65eaba891ec 100644 --- a/bolt/test/X86/bolt-address-translation-yaml.test +++ b/bolt/test/X86/bolt-address-translation-yaml.test @@ -41,7 +41,7 @@ RUN: | FileCheck --check-prefix CHECK-BOLT-YAML %s WRITE-BAT-CHECK: BOLT-INFO: Wrote 5 BAT maps WRITE-BAT-CHECK: BOLT-INFO: Wrote 4 function and 22 basic block hashes -WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 384 +WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 404 READ-BAT-CHECK-NOT: BOLT-ERROR: unable to save profile in YAML format for input file processed by BOLT READ-BAT-CHECK: BOLT-INFO: Parsed 5 BAT entries diff --git a/bolt/test/X86/bolt-address-translation.test b/bolt/test/X86/bolt-address-translation.test index e6b21c14077b4..dfdd1eea32333 100644 --- a/bolt/test/X86/bolt-address-translation.test +++ b/bolt/test/X86/bolt-address-translation.test @@ -37,7 +37,7 @@ # CHECK: BOLT: 3 out of 7 functions were overwritten. # CHECK: BOLT-INFO: Wrote 6 BAT maps # CHECK: BOLT-INFO: Wrote 3 function and 58 basic block hashes -# CHECK: BOLT-INFO: BAT section size (bytes): 928 +# CHECK: BOLT-INFO: BAT section size (bytes): 940 # # usqrt mappings (hot part). We match against any key (left side containing # the bolted binary offsets) because BOLT may change where it puts instructions