Skip to content

[BOLT] Improve handling of relocations targeting specific instructions #66395

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bolt/include/bolt/Core/MCPlus.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class MCAnnotation {
kTailCall, /// Tail call.
kConditionalTailCall, /// CTC.
kOffset, /// Offset in the function.
kLabel, /// MCSymbol pointing to this instruction.
kGeneric /// First generic annotation.
};

Expand Down
7 changes: 7 additions & 0 deletions bolt/include/bolt/Core/MCPlusBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -1169,6 +1169,13 @@ class MCPlusBuilder {
/// Remove offset annotation.
bool clearOffset(MCInst &Inst);

/// Return the label of \p Inst, if available.
std::optional<MCSymbol *> getLabel(const MCInst &Inst) const;

/// Set the label of \p Inst. This label will be emitted right before \p Inst
/// is emitted to MCStreamer.
bool setLabel(MCInst &Inst, MCSymbol *Label);

/// Return MCSymbol that represents a target of this instruction at a given
/// operand number \p OpNum. If there's no symbol associated with
/// the operand - return nullptr.
Expand Down
4 changes: 4 additions & 0 deletions bolt/include/bolt/Core/Relocation.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ struct Relocation {
/// Return true if relocation type is for thread local storage.
static bool isTLS(uint64_t Type);

/// Return true of relocation type is for referencing a specific instruction
/// (as opposed to a function, basic block, etc).
static bool isInstructionReference(uint64_t Type);

/// Return code for a NONE relocation
static uint64_t getNone();

Expand Down
2 changes: 2 additions & 0 deletions bolt/lib/Core/BinaryContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1863,6 +1863,8 @@ void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
}
if (std::optional<uint32_t> Offset = MIB->getOffset(Instruction))
OS << " # Offset: " << *Offset;
if (auto Label = MIB->getLabel(Instruction))
OS << " # Label: " << **Label;

MIB->printAnnotations(Instruction, OS);

Expand Down
3 changes: 3 additions & 0 deletions bolt/lib/Core/BinaryEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,9 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
BB->getLocSyms().emplace_back(Offset, LocSym);
}

if (auto Label = BC.MIB->getLabel(Instr))
Streamer.emitLabel(*Label);

Streamer.emitInstruction(Instr, *BC.STI);
LastIsPrefix = BC.MIB->isPrefix(Instr);
}
Expand Down
32 changes: 30 additions & 2 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1173,6 +1173,13 @@ bool BinaryFunction::disassemble() {
// basic block.
Labels[0] = Ctx->createNamedTempSymbol("BB0");

// Map offsets in the function to a label that should always point to the
// corresponding instruction. This is used for labels that shouldn't point to
// the start of a basic block but always to a specific instruction. This is
// used, for example, on RISC-V where %pcrel_lo relocations point to the
// corresponding %pcrel_hi.
LabelsMapType InstructionLabels;

uint64_t Size = 0; // instruction size
for (uint64_t Offset = 0; Offset < getSize(); Offset += Size) {
MCInst Instruction;
Expand Down Expand Up @@ -1329,9 +1336,23 @@ bool BinaryFunction::disassemble() {
ItrE = Relocations.lower_bound(Offset + Size);
Itr != ItrE; ++Itr) {
const Relocation &Relocation = Itr->second;
MCSymbol *Symbol = Relocation.Symbol;

if (Relocation::isInstructionReference(Relocation.Type)) {
uint64_t RefOffset = Relocation.Value - getAddress();
LabelsMapType::iterator LI = InstructionLabels.find(RefOffset);

if (LI == InstructionLabels.end()) {
Symbol = BC.Ctx->createNamedTempSymbol();
InstructionLabels.emplace(RefOffset, Symbol);
} else {
Symbol = LI->second;
}
}

int64_t Value = Relocation.Value;
const bool Result = BC.MIB->replaceImmWithSymbolRef(
Instruction, Relocation.Symbol, Relocation.Addend, Ctx.get(), Value,
Instruction, Symbol, Relocation.Addend, Ctx.get(), Value,
Relocation.Type);
(void)Result;
assert(Result && "cannot replace immediate with relocation");
Expand Down Expand Up @@ -1366,6 +1387,13 @@ bool BinaryFunction::disassemble() {
addInstruction(Offset, std::move(Instruction));
}

for (auto [Offset, Label] : InstructionLabels) {
InstrMapType::iterator II = Instructions.find(Offset);
assert(II != Instructions.end() && "reference to non-existing instruction");

BC.MIB->setLabel(II->second, Label);
}

// Reset symbolizer for the disassembler.
BC.SymbolicDisAsm->setSymbolizer(nullptr);

Expand Down Expand Up @@ -4484,7 +4512,7 @@ void BinaryFunction::addRelocation(uint64_t Address, MCSymbol *Symbol,
uint64_t Offset = Address - getAddress();
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: addRelocation in "
<< formatv("{0}@{1:x} against {2}\n", *this, Offset,
Symbol->getName()));
(Symbol ? Symbol->getName() : "<undef>")));
bool IsCI = BC.isAArch64() && isInConstantIsland(Address);
std::map<uint64_t, Relocation> &Rels =
IsCI ? Islands->Relocations : Relocations;
Expand Down
11 changes: 11 additions & 0 deletions bolt/lib/Core/MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,17 @@ bool MCPlusBuilder::clearOffset(MCInst &Inst) {
return true;
}

std::optional<MCSymbol *> MCPlusBuilder::getLabel(const MCInst &Inst) const {
if (auto Label = tryGetAnnotationAs<MCSymbol *>(Inst, MCAnnotation::kLabel))
return *Label;
return std::nullopt;
}

bool MCPlusBuilder::setLabel(MCInst &Inst, MCSymbol *Label) {
getOrCreateAnnotationAs<MCSymbol *>(Inst, MCAnnotation::kLabel) = Label;
return true;
}

bool MCPlusBuilder::hasAnnotation(const MCInst &Inst, unsigned Index) const {
const MCInst *AnnotationInst = getAnnotationInst(Inst);
if (!AnnotationInst)
Expand Down
13 changes: 13 additions & 0 deletions bolt/lib/Core/Relocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -797,6 +797,19 @@ bool Relocation::isTLS(uint64_t Type) {
return isTLSX86(Type);
}

bool Relocation::isInstructionReference(uint64_t Type) {
if (Arch != Triple::riscv64)
return false;

switch (Type) {
default:
return false;
case ELF::R_RISCV_PCREL_LO12_I:
case ELF::R_RISCV_PCREL_LO12_S:
return true;
}
}

uint64_t Relocation::getNone() {
if (Arch == Triple::aarch64)
return ELF::R_AARCH64_NONE;
Expand Down
5 changes: 5 additions & 0 deletions bolt/lib/Passes/BinaryPasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,7 @@ bool CheckLargeFunctions::shouldOptimize(const BinaryFunction &BF) const {

void LowerAnnotations::runOnFunctions(BinaryContext &BC) {
std::vector<std::pair<MCInst *, uint32_t>> PreservedOffsetAnnotations;
std::vector<std::pair<MCInst *, MCSymbol *>> PreservedLabelAnnotations;

for (auto &It : BC.getBinaryFunctions()) {
BinaryFunction &BF = It.second;
Expand Down Expand Up @@ -609,6 +610,8 @@ void LowerAnnotations::runOnFunctions(BinaryContext &BC) {
if (BF.requiresAddressTranslation() && BC.MIB->getOffset(*II))
PreservedOffsetAnnotations.emplace_back(&(*II),
*BC.MIB->getOffset(*II));
if (auto Label = BC.MIB->getLabel(*II))
PreservedLabelAnnotations.emplace_back(&*II, *Label);
BC.MIB->stripAnnotations(*II);
}
}
Expand All @@ -625,6 +628,8 @@ void LowerAnnotations::runOnFunctions(BinaryContext &BC) {
// Reinsert preserved annotations we need during code emission.
for (const std::pair<MCInst *, uint32_t> &Item : PreservedOffsetAnnotations)
BC.MIB->setOffset(*Item.first, Item.second);
for (auto [Instr, Label] : PreservedLabelAnnotations)
BC.MIB->setLabel(*Instr, Label);
}

// Check for dirty state in MCSymbol objects that might be a consequence
Expand Down
12 changes: 11 additions & 1 deletion bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2545,7 +2545,17 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
// Adjust the point of reference to a code location inside a function.
if (ReferencedBF->containsAddress(Address, /*UseMaxSize = */ true)) {
RefFunctionOffset = Address - ReferencedBF->getAddress();
if (RefFunctionOffset) {
if (Relocation::isInstructionReference(RType)) {
// Instruction labels are created while disassembling so we just leave
// the symbol empty for now. Since the extracted value is typically
// unrelated to the referenced symbol (e.g., %pcrel_lo in RISC-V
// references an instruction but the patched value references the low
// bits of a data address), we set the extracted value to the symbol
// address in order to be able to correctly reconstruct the reference
// later.
ReferencedSymbol = nullptr;
ExtractedValue = Address;
} else if (RefFunctionOffset) {
if (ContainingBF && ContainingBF != ReferencedBF) {
ReferencedSymbol =
ReferencedBF->addEntryPointAtOffset(RefFunctionOffset);
Expand Down
3 changes: 1 addition & 2 deletions bolt/test/RISCV/reloc-abs.s
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ _start:
.option push
.option norelax
1:
// CHECK: .Ltmp0
// CHECK: auipc gp, %pcrel_hi(__global_pointer$)
// CHECK: auipc gp, %pcrel_hi(__global_pointer$) # Label: .Ltmp0
// CHECK-NEXT: addi gp, gp, %pcrel_lo(.Ltmp0)
auipc gp, %pcrel_hi(__global_pointer$)
addi gp, gp, %pcrel_lo(1b)
Expand Down
42 changes: 42 additions & 0 deletions bolt/test/RISCV/reloc-bb-split.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// RUN: %clang %cflags -o %t %s
// RUN: llvm-bolt --print-cfg --print-only=_start -o /dev/null %t \
// RUN: | FileCheck %s

.data
.globl d
.p2align 3
d:
.dword 0

.text
.globl _start
.p2align 1
// CHECK-LABEL: Binary Function "_start" after building cfg {
_start:
/// The local label is used for %pcrel_lo as well as a jump target so a new
/// basic block should start there.
// CHECK-LABEL: {{^}}.LBB00
// CHECK: nop
// CHECK-LABEL: {{^}}.Ltmp0
// CHECK: auipc t0, %pcrel_hi(d) # Label: .Ltmp1
// CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp1)(t0)
// CHECK-NEXT: j .Ltmp0
nop
1:
auipc t0, %pcrel_hi(d)
ld t0, %pcrel_lo(1b)(t0)
j 1b

/// The local label is used only for %pcrel_lo so no new basic block should
/// start there.
// CHECK-LABEL: {{^}}.LFT0
// CHECK: nop
// CHECK-NEXT: auipc t0, %pcrel_hi(d) # Label: .Ltmp2
// CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp2)(t0)
// CHECK-NEXT: ret
nop
1:
auipc t0, %pcrel_hi(d)
ld t0, %pcrel_lo(1b)(t0)
ret
.size _start, .-_start
3 changes: 1 addition & 2 deletions bolt/test/RISCV/reloc-got.s
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ d:
// CHECK: Binary Function "_start" after building cfg {
_start:
nop // Here to not make the _start and .Ltmp0 symbols coincide
// CHECK: .Ltmp0
// CHECK: auipc t0, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}})
// CHECK: auipc t0, %pcrel_hi(__BOLT_got_zero+{{[0-9]+}}) # Label: .Ltmp0
// CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp0)(t0)
1:
auipc t0, %got_pcrel_hi(d)
Expand Down
6 changes: 2 additions & 4 deletions bolt/test/RISCV/reloc-pcrel.s
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,10 @@ d:
// CHECK: Binary Function "_start" after building cfg {
_start:
nop // Here to not make the _start and .Ltmp0 symbols coincide
// CHECK: .Ltmp0
// CHECK: auipc t0, %pcrel_hi(d)
// CHECK: auipc t0, %pcrel_hi(d) # Label: .Ltmp0
// CHECK-NEXT: ld t0, %pcrel_lo(.Ltmp0)(t0)
ld t0, d
// CHECK: .Ltmp1
// CHECK: auipc t1, %pcrel_hi(d)
// CHECK-NEXT: auipc t1, %pcrel_hi(d) # Label: .Ltmp1
// CHECK-NEXT: sd t0, %pcrel_lo(.Ltmp1)(t1)
sd t0, d, t1
ret
Expand Down