Skip to content

Commit 9f75270

Browse files
authored
[IR] Add per-function numbers to basic blocks (#101052)
Every basic block that is linked into a function now has a unique number, which can be queried using getNumber(). Numbers are densely allocated, but not re-assigned on block removal for stability. Block numbers are intended to be fairly stable and only be updated when removing a several basic blocks to make sure the numbering doesn't become too sparse. To reduce holes in the numbering, renumberBlocks() can be called to re-assign numbers in block order. Additionally, getMaxBlockNumber() returns a value larger than the largest block number, intended to pre-allocate/resize vectors. Furthermore, this introduces the concept of a "block number epoch" -- an integer that changes after every renumbering. This is useful for identifying use of block numbers after renumbering: on initialization, the current epoch is stored, and on all subsequent accesses, equality with the current epoch can be asserted. I added a validate method to catch cases where something goes wrong, even if I can't really imagine how invalid numbers can occur. But I think it's better to be safe and rule out this potential source of bugs when more things depend on the numbering. Previous discussion in: https://discourse.llvm.org/t/rfc-add-auxiliary-field-for-per-pass-custom-data-to-basicblock/80229
1 parent d067062 commit 9f75270

File tree

5 files changed

+163
-0
lines changed

5 files changed

+163
-0
lines changed

llvm/include/llvm/IR/BasicBlock.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,11 @@ class BasicBlock final : public Value, // Basic blocks are data objects also
6767
bool IsNewDbgInfoFormat;
6868

6969
private:
70+
// Allow Function to renumber blocks.
71+
friend class Function;
72+
/// Per-function unique number.
73+
unsigned Number = -1u;
74+
7075
friend class BlockAddress;
7176
friend class SymbolTableListTraits<BasicBlock>;
7277

@@ -96,6 +101,11 @@ class BasicBlock final : public Value, // Basic blocks are data objects also
96101
void setIsNewDbgInfoFormat(bool NewFlag);
97102
void setNewDbgInfoFormatFlag(bool NewFlag);
98103

104+
unsigned getNumber() const {
105+
assert(getParent() && "only basic blocks in functions have valid numbers");
106+
return Number;
107+
}
108+
99109
/// Record that the collection of DbgRecords in \p M "trails" after the last
100110
/// instruction of this block. These are equivalent to dbg.value intrinsics
101111
/// that exist at the end of a basic block with no terminator (a transient

llvm/include/llvm/IR/Function.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,13 @@ class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject,
7575
private:
7676
// Important things that make up a function!
7777
BasicBlockListType BasicBlocks; ///< The basic blocks
78+
79+
// Basic blocks need to get their number when added to a function.
80+
friend void BasicBlock::setParent(Function *);
81+
unsigned NextBlockNum = 0;
82+
/// Epoch of block numbers. (Could be shrinked to uint8_t if required.)
83+
unsigned BlockNumEpoch = 0;
84+
7885
mutable Argument *Arguments = nullptr; ///< The formal arguments
7986
size_t NumArgs;
8087
std::unique_ptr<ValueSymbolTable>
@@ -810,6 +817,34 @@ class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject,
810817
return SymTab.get();
811818
}
812819

820+
//===--------------------------------------------------------------------===//
821+
// Block number functions
822+
823+
/// Return a value larger than the largest block number. Intended to allocate
824+
/// a vector that is sufficiently large to hold all blocks indexed by their
825+
/// number.
826+
unsigned getMaxBlockNumber() const { return NextBlockNum; }
827+
828+
/// Renumber basic blocks into a dense value range starting from 0. Be aware
829+
/// that other data structures and analyses (e.g., DominatorTree) may depend
830+
/// on the value numbers and need to be updated or invalidated.
831+
void renumberBlocks();
832+
833+
/// Return the "epoch" of current block numbers. This will return a different
834+
/// value after every renumbering. The intention is: if something (e.g., an
835+
/// analysis) uses block numbers, it also stores the number epoch and then
836+
/// can assert later on that the epoch didn't change (indicating that the
837+
/// numbering is still valid). If the epoch changed, blocks might have been
838+
/// assigned new numbers and previous uses of the numbers needs to be
839+
/// invalidated. This is solely intended as a debugging feature.
840+
unsigned getBlockNumberEpoch() const { return BlockNumEpoch; }
841+
842+
private:
843+
/// Assert that all blocks have unique numbers within 0..NextBlockNum. This
844+
/// has O(n) runtime complexity.
845+
void validateBlockNumbers() const;
846+
847+
public:
813848
//===--------------------------------------------------------------------===//
814849
// BasicBlock iterator forwarding functions
815850
//

llvm/lib/IR/BasicBlock.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,8 @@ BasicBlock::~BasicBlock() {
240240

241241
void BasicBlock::setParent(Function *parent) {
242242
// Set Parent=parent, updating instruction symtab entries as appropriate.
243+
if (Parent != parent)
244+
Number = parent ? parent->NextBlockNum++ : -1u;
243245
InstList.setSymTabObject(&Parent, parent);
244246
}
245247

llvm/lib/IR/Function.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "llvm/IR/Function.h"
1414
#include "SymbolTableListTraitsImpl.h"
1515
#include "llvm/ADT/ArrayRef.h"
16+
#include "llvm/ADT/BitVector.h"
1617
#include "llvm/ADT/DenseSet.h"
1718
#include "llvm/ADT/STLExtras.h"
1819
#include "llvm/ADT/SmallString.h"
@@ -85,6 +86,27 @@ static cl::opt<int> NonGlobalValueMaxNameSize(
8586

8687
extern cl::opt<bool> UseNewDbgInfoFormat;
8788

89+
void Function::renumberBlocks() {
90+
validateBlockNumbers();
91+
92+
NextBlockNum = 0;
93+
for (auto &BB : *this)
94+
BB.Number = NextBlockNum++;
95+
BlockNumEpoch++;
96+
}
97+
98+
void Function::validateBlockNumbers() const {
99+
#ifndef NDEBUG
100+
BitVector Numbers(NextBlockNum);
101+
for (const auto &BB : *this) {
102+
unsigned Num = BB.getNumber();
103+
assert(Num < NextBlockNum && "out of range block number");
104+
assert(!Numbers[Num] && "duplicate block numbers");
105+
Numbers.set(Num);
106+
}
107+
#endif
108+
}
109+
88110
void Function::convertToNewDbgValues() {
89111
IsNewDbgInfoFormat = true;
90112
for (auto &BB : *this) {
@@ -509,6 +531,8 @@ Function::Function(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace,
509531
}
510532

511533
Function::~Function() {
534+
validateBlockNumbers();
535+
512536
dropAllReferences(); // After this it is safe to delete instructions.
513537

514538
// Delete all of the method arguments and unlink from symbol table...

llvm/unittests/IR/FunctionTest.cpp

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,98 @@ TEST(FunctionTest, EraseBBs) {
487487
EXPECT_EQ(F->size(), 0u);
488488
}
489489

490+
TEST(FunctionTest, BasicBlockNumbers) {
491+
LLVMContext Context;
492+
Type *VoidType = Type::getVoidTy(Context);
493+
FunctionType *FuncType = FunctionType::get(VoidType, false);
494+
std::unique_ptr<Function> Func(
495+
Function::Create(FuncType, GlobalValue::ExternalLinkage));
496+
497+
EXPECT_EQ(Func->getBlockNumberEpoch(), 0u);
498+
EXPECT_EQ(Func->getMaxBlockNumber(), 0u);
499+
500+
BasicBlock *BB1 = BasicBlock::Create(Context, "bb1", Func.get());
501+
EXPECT_EQ(BB1->getNumber(), 0u);
502+
EXPECT_EQ(Func->getMaxBlockNumber(), 1u);
503+
BasicBlock *BB2 = BasicBlock::Create(Context, "bb2", Func.get());
504+
EXPECT_EQ(BB2->getNumber(), 1u);
505+
EXPECT_EQ(Func->getMaxBlockNumber(), 2u);
506+
BasicBlock *BB3 = BasicBlock::Create(Context, "bb3", Func.get());
507+
EXPECT_EQ(BB3->getNumber(), 2u);
508+
EXPECT_EQ(Func->getMaxBlockNumber(), 3u);
509+
510+
BB2->eraseFromParent();
511+
// Erasing doesn't trigger renumbering
512+
EXPECT_EQ(BB1->getNumber(), 0u);
513+
EXPECT_EQ(BB3->getNumber(), 2u);
514+
EXPECT_EQ(Func->getMaxBlockNumber(), 3u);
515+
// ... and number are assigned monotonically increasing
516+
BasicBlock *BB4 = BasicBlock::Create(Context, "bb4", Func.get());
517+
EXPECT_EQ(BB4->getNumber(), 3u);
518+
EXPECT_EQ(Func->getMaxBlockNumber(), 4u);
519+
// ... even if inserted not at the end
520+
BasicBlock *BB5 = BasicBlock::Create(Context, "bb5", Func.get(), BB1);
521+
EXPECT_EQ(BB5->getNumber(), 4u);
522+
EXPECT_EQ(Func->getMaxBlockNumber(), 5u);
523+
524+
// Func is now: bb5, bb1, bb3, bb4
525+
// Renumbering assigns numbers in their order in the function
526+
EXPECT_EQ(Func->getBlockNumberEpoch(), 0u);
527+
Func->renumberBlocks();
528+
EXPECT_EQ(Func->getBlockNumberEpoch(), 1u);
529+
EXPECT_EQ(BB5->getNumber(), 0u);
530+
EXPECT_EQ(BB1->getNumber(), 1u);
531+
EXPECT_EQ(BB3->getNumber(), 2u);
532+
EXPECT_EQ(BB4->getNumber(), 3u);
533+
EXPECT_EQ(Func->getMaxBlockNumber(), 4u);
534+
535+
// Moving a block inside the function doesn't change numbers
536+
BB1->moveBefore(BB5);
537+
EXPECT_EQ(BB5->getNumber(), 0u);
538+
EXPECT_EQ(BB1->getNumber(), 1u);
539+
EXPECT_EQ(BB3->getNumber(), 2u);
540+
EXPECT_EQ(BB4->getNumber(), 3u);
541+
EXPECT_EQ(Func->getMaxBlockNumber(), 4u);
542+
543+
// Removing a block and adding it back assigns a new number, because the
544+
// block was temporarily without a parent.
545+
BB4->removeFromParent();
546+
BB4->insertInto(Func.get());
547+
EXPECT_EQ(BB5->getNumber(), 0u);
548+
EXPECT_EQ(BB1->getNumber(), 1u);
549+
EXPECT_EQ(BB3->getNumber(), 2u);
550+
EXPECT_EQ(BB4->getNumber(), 4u);
551+
EXPECT_EQ(Func->getMaxBlockNumber(), 5u);
552+
553+
std::unique_ptr<Function> Func2(
554+
Function::Create(FuncType, GlobalValue::ExternalLinkage));
555+
BasicBlock *BB6 = BasicBlock::Create(Context, "bb6", Func2.get());
556+
EXPECT_EQ(BB6->getNumber(), 0u);
557+
EXPECT_EQ(Func2->getMaxBlockNumber(), 1u);
558+
// Moving a block to a different function assigns a new number
559+
BB3->removeFromParent();
560+
BB3->insertInto(Func2.get(), BB6);
561+
EXPECT_EQ(BB3->getParent(), Func2.get());
562+
EXPECT_EQ(BB3->getNumber(), 1u);
563+
EXPECT_EQ(Func2->getMaxBlockNumber(), 2u);
564+
565+
EXPECT_EQ(Func2->getBlockNumberEpoch(), 0u);
566+
Func2->renumberBlocks();
567+
EXPECT_EQ(Func2->getBlockNumberEpoch(), 1u);
568+
EXPECT_EQ(BB3->getNumber(), 0u);
569+
EXPECT_EQ(BB6->getNumber(), 1u);
570+
EXPECT_EQ(Func2->getMaxBlockNumber(), 2u);
571+
572+
// splice works as expected and assigns new numbers
573+
Func->splice(Func->end(), Func2.get());
574+
EXPECT_EQ(BB5->getNumber(), 0u);
575+
EXPECT_EQ(BB1->getNumber(), 1u);
576+
EXPECT_EQ(BB4->getNumber(), 4u);
577+
EXPECT_EQ(BB3->getNumber(), 5u);
578+
EXPECT_EQ(BB6->getNumber(), 6u);
579+
EXPECT_EQ(Func->getMaxBlockNumber(), 7u);
580+
}
581+
490582
TEST(FunctionTest, UWTable) {
491583
LLVMContext Ctx;
492584
std::unique_ptr<Module> M = parseIR(Ctx, R"(

0 commit comments

Comments
 (0)