Skip to content

[feature][riscv] handle target address calculation in llvm-objdump disassembly for riscv #144620

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions llvm/include/llvm/MC/MCInstrAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Compiler.h"
#include <cstdint>
#include <vector>
Expand Down Expand Up @@ -182,6 +183,12 @@ class LLVM_ABI MCInstrAnalysis {
evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
uint64_t &Target) const;

/// Given an instruction that accesses a memory address, try to compute
/// the target address. Return true on success, and the address in \p Target.
virtual bool evaluateInstruction(const MCInst &Inst, uint64_t Addr,
uint64_t Size, uint64_t &Target,
const MCSubtargetInfo &STI) const;

/// Given an instruction tries to get the address of a memory operand. Returns
/// the address on success.
virtual std::optional<uint64_t>
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/MC/MCInstrAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ bool MCInstrAnalysis::evaluateBranch(const MCInst & /*Inst*/, uint64_t /*Addr*/,
return false;
}

bool MCInstrAnalysis::evaluateInstruction(const MCInst &Inst, uint64_t Addr,
uint64_t Size, uint64_t &Target,
const MCSubtargetInfo &STI) const {
return false;
}

std::optional<uint64_t> MCInstrAnalysis::evaluateMemoryOperandAddress(
const MCInst &Inst, const MCSubtargetInfo *STI, uint64_t Addr,
uint64_t Size) const {
Expand Down
105 changes: 99 additions & 6 deletions llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include <bitset>
#include <cstdint>

#define GET_INSTRINFO_MC_DESC
#define ENABLE_INSTR_PREDICATE_VERIFIER
Expand Down Expand Up @@ -182,6 +184,17 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
}

switch (Inst.getOpcode()) {
case RISCV::C_LUI:
case RISCV::LUI: {
setGPRState(Inst.getOperand(0).getReg(),
SignExtend64<32>(Inst.getOperand(1).getImm() << 12));
break;
}
case RISCV::AUIPC: {
setGPRState(Inst.getOperand(0).getReg(),
Addr + SignExtend64<32>(Inst.getOperand(1).getImm() << 12));
break;
}
default: {
// Clear the state of all defined registers for instructions that we don't
// explicitly support.
Expand All @@ -193,10 +206,6 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
}
break;
}
case RISCV::AUIPC:
setGPRState(Inst.getOperand(0).getReg(),
Addr + SignExtend64<32>(Inst.getOperand(1).getImm() << 12));
break;
}
}

Expand Down Expand Up @@ -234,6 +243,91 @@ class RISCVMCInstrAnalysis : public MCInstrAnalysis {
return false;
}

bool evaluateInstruction(const MCInst &Inst, uint64_t Addr, uint64_t Size,
uint64_t &Target,
const MCSubtargetInfo &STI) const override {
unsigned int ArchRegWidth = STI.getTargetTriple().getArchPointerBitWidth();
switch(Inst.getOpcode()) {
default:
return false;
case RISCV::C_ADDI:
case RISCV::ADDI: {
MCRegister Reg = Inst.getOperand(1).getReg();
auto TargetRegState = getGPRState(Reg);
if (TargetRegState && Reg != RISCV::X0) {
Target = *TargetRegState + Inst.getOperand(2).getImm();
Target &= maskTrailingOnes<uint64_t>(ArchRegWidth);
return true;
}
break;
}
case RISCV::C_ADDIW:
case RISCV::ADDIW: {
MCRegister Reg = Inst.getOperand(1).getReg();
auto TargetRegState = getGPRState(Reg);
if (TargetRegState && Reg != RISCV::X0) {
Target = *TargetRegState + Inst.getOperand(2).getImm();
Target = SignExtend64<32>(Target);
return true;
}
break;
}
case RISCV::LB:
case RISCV::LH:
case RISCV::LD:
case RISCV::LW:
case RISCV::LBU:
case RISCV::LHU:
case RISCV::LWU:
case RISCV::SB:
case RISCV::SH:
case RISCV::SW:
case RISCV::SD:
case RISCV::FLH:
case RISCV::FLW:
case RISCV::FLD:
case RISCV::FSH:
case RISCV::FSW:
case RISCV::FSD:
case RISCV::C_LD:
case RISCV::C_SD:
case RISCV::C_FLD:
case RISCV::C_FSD:
case RISCV::C_SW:
case RISCV::C_LW:
case RISCV::C_FSW:
case RISCV::C_FLW:
case RISCV::C_LBU:
case RISCV::C_LH:
case RISCV::C_LHU:
case RISCV::C_SB:
case RISCV::C_SH:
case RISCV::C_LWSP:
case RISCV::C_SWSP:
case RISCV::C_LDSP:
case RISCV::C_SDSP:
case RISCV::C_FLWSP:
case RISCV::C_FSWSP:
case RISCV::C_FLDSP:
case RISCV::C_FSDSP:
case RISCV::C_LD_RV32:
case RISCV::C_SD_RV32:
case RISCV::C_SDSP_RV32:
case RISCV::LD_RV32:
case RISCV::C_LDSP_RV32:
case RISCV::SD_RV32: {
MCRegister Reg = Inst.getOperand(1).getReg();
auto TargetRegState = getGPRState(Reg);
if (TargetRegState && Reg != RISCV::X0) {
Target = *TargetRegState + Inst.getOperand(2).getImm();
return true;
}
break;
}
}
return false;
}

bool isTerminator(const MCInst &Inst) const override {
if (MCInstrAnalysis::isTerminator(Inst))
return true;
Expand Down Expand Up @@ -344,12 +438,11 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetMC() {
TargetRegistry::RegisterELFStreamer(*T, createRISCVELFStreamer);
TargetRegistry::RegisterObjectTargetStreamer(
*T, createRISCVObjectTargetStreamer);
TargetRegistry::RegisterMCInstrAnalysis(*T, createRISCVInstrAnalysis);

// Register the asm target streamer.
TargetRegistry::RegisterAsmTargetStreamer(*T, createRISCVAsmTargetStreamer);
// Register the null target streamer.
TargetRegistry::RegisterNullTargetStreamer(*T,
createRISCVNullTargetStreamer);
TargetRegistry::RegisterMCInstrAnalysis(*T, createRISCVInstrAnalysis);
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We try hard not to introduce precanned binaries for testing. They are opaque, difficult to update, and typically increase the repo size more than text files.

If you want to test an executable, linked by lld, there is a recent example at
cross-project-tests/tools/llvm-objdump/ARM/

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can run clang as the first run line and pass the output to the run line that currently takes as input a precanned binary. My though process was that changes to clang may affect the binary that will be emitted and lead to test failures as clang evolves. Is this something I even worry about or should I instead be having the binary built at the time the test run instead?

Binary file not shown.
Binary file not shown.
Binary file not shown.
2 changes: 2 additions & 0 deletions llvm/test/tools/llvm-objdump/RISCV/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
if not "RISCV" in config.root.targets:
config.unsupported = True
57 changes: 57 additions & 0 deletions llvm/test/tools/llvm-objdump/RISCV/riscv-ar.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# RUN: llvm-objdump -d %p/Inputs/riscv-ar | FileCheck %s

# CHECK: auipc a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: ld a0, {{-?0x[0-9a-fA-F]+}}(a0)
# CHECK: auipc a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}} <gdata>
# CHECK: auipc a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}} <gdata>
# CHECK: auipc a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: lw a0, {{-?0x[0-9a-fA-F]+}}(a0) <gdata>
# CHECK: auipc a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}} <ldata>
# CHECK: auipc ra, {{-?0x[0-9a-fA-F]+}}
# CHECK: jalr {{-?0x[0-9a-fA-F]+}}(ra) <func>
# CHECK: auipc t1, {{-?0x[0-9a-fA-F]+}}
# CHECK: jr {{-?0x[0-9a-fA-F]+}}(t1) <func>
# CHECK: lui a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: addiw a0, a0, {{-?0x[0-9a-fA-F]+}} <gdata+0x12242678>
# CHECK: lui a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: addiw a0, a0, {{-?0x[0-9a-fA-F]+}} <gdata+0x1438ad>
# CHECK: slli a0, a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: slli a0, a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: slli a0, a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: addi a0, a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: lui a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: lui a0, {{-?0x[0-9a-fA-F]+}}
# CHECK: addiw a0, a0, {{-?0x[0-9a-fA-F]+}} <_start+0xfefff>

.global _start
.text
_start:
la a0, gdata
lla a0, gdata
lla a0, gdata
lw a0, gdata
lla a0, ldata

call func
tail func

li a0, 0x12345678
li a0, 0x1234567890abcdef
li a0, 0x10000
li a0, 0xfffff

.skip 0x100000
func:
ret

ldata:
.int 0

.data
gdata:
.int 0
30 changes: 30 additions & 0 deletions llvm/test/tools/llvm-objdump/RISCV/riscv32-ar-coverage.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# RUN: llvm-objdump -d %p/Inputs/riscv32-ar-coverage | FileCheck %s

# CHECK: 00001000 <_start>:
# CHECK-NEXT: 1000: 00000517 auipc a0, 0x0
# CHECK-NEXT: 1004: 0559 addi a0, a0, 0x16 <target>
# CHECK-NEXT: 1006: 00000517 auipc a0, 0x0
# CHECK-NEXT: 100a: 6910 ld a2, 0x10(a0) <target>
# CHECK-NEXT: 100c: 00000517 auipc a0, 0x0
# CHECK-NEXT: 1010: 00c53523 sd a2, 0xa(a0) <target>
# CHECK-NEXT: 1014: 0000 unimp

# the structure of this test file is similar to that of riscv64-ar-coverage
# with the major difference being that these tests are focused on instructions
# for 32 bit architecture

.global _start
.text
_start:
auipc a0, 0x0
addi a0, a0, 0x16 # addi -- behavior changes with differentr architectures

auipc a0, 0x0
c.ld a2, 0x10(a0) # zclsd instruction

auipc a0, 0x0
sd a2, 0xa(a0) # zilsd instruction

.skip 0x2
target:
ret:
106 changes: 106 additions & 0 deletions llvm/test/tools/llvm-objdump/RISCV/riscv64-ar-coverage.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# RUN: llvm-objdump -d %p/Inputs/riscv64-ar-coverage | FileCheck %s

# CHECK: 0000000000001000 <_start>:
# CHECK-NEXT: 1000: 00001517 auipc a0, 0x1
# CHECK-NEXT: 1004: 00450513 addi a0, a0, 0x4 <target>
# CHECK-NEXT: 1008: 00001517 auipc a0, 0x1
# CHECK-NEXT: 100c: 1571 addi a0, a0, -0x4 <target>
# CHECK-NEXT: 100e: 6509 lui a0, 0x2
# CHECK-NEXT: 1010: 0045059b addiw a1, a0, 0x4 <target>
# CHECK-NEXT: 1014: 6509 lui a0, 0x2
# CHECK-NEXT: 1016: 2511 addiw a0, a0, 0x4 <target>
# CHECK-NEXT: 1018: 00102537 lui a0, 0x102
# CHECK-NEXT: 101c: c50c sw a1, 0x8(a0) <far_target>
# CHECK-NEXT: 101e: 00102537 lui a0, 0x102
# CHECK-NEXT: 1022: 4508 lw a0, 0x8(a0) <far_target>
# CHECK-NEXT: 1024: 6509 lui a0, 0x2
# CHECK-NEXT: 1026: 6585 lui a1, 0x1
# CHECK-NEXT: 1028: 0306 slli t1, t1, 0x1
# CHECK-NEXT: 102a: 0511 addi a0, a0, 0x4 <target>
# CHECK-NEXT: 102c: 0505 addi a0, a0, 0x1
# CHECK-NEXT: 102e: 00200037 lui zero, 0x200
# CHECK-NEXT: 1032: 00a02423 sw a0, 0x8(zero)
# CHECK-NEXT: 1036: 00101097 auipc ra, 0x101
# CHECK-NEXT: 103a: fd6080e7 jalr -0x2a(ra) <func>
# CHECK-NEXT: 103e: 640d lui s0, 0x3
# CHECK-NEXT: 1040: 8800 sb s0, 0x0(s0) <zcb>
# CHECK-NEXT: 1042: 4522 lw a0, 0x8(sp)


.global _start
.text

# The core of the feature being added was address resolution for instruction
# sequences where an register is populated by immediate values via two
# separate instructions. First by an instruction that provides the upper bits
# (auipc, lui ...) followed by another instruction for the lower bits (addi,
# jalr, ld ...).

_start:
# Test block 1-3 each focus on a certain starting instruction in a sequences,
# the ones that provide the upper bits. The other sequence is another
# instruction the provides the lower bits. The second instruction is
# arbitrarily chosen to increase code coverage

# test block #1
lla a0, target # addi
auipc a0, 0x1
c.addi a0, -0x4 # c.addi

# test block #2
c.lui a0, 0x2
addiw a1, a0, 0x4 # addiw
c.lui a0, 0x2
c.addiw a0, 0x4 # c.addiw

# test block #3
lui a0, 0x102
sw a1, 0x8(a0) # sw
lui a0, 0x102
c.lw a0, 0x8(a0) # lw

# Test block 4 tests instruction interleaving, essentially the code's
# ability to keep track of a valid sequence even if multiple other unrelated
# instructions separate the two

# test #4
lui a0, 0x2
lui a1, 0x1 # unrelated instruction
slli t1, t1, 0x1 # unrelated instruction
addi a0, a0, 0x4
addi a0, a0, 0x1 # verify register tracking terminates

# Test 5 ensures that an instruction writing into the zero register does
# not trigger resolution because that register's value cannot change and
# the sequence is equivalent to never running the first instruction

# test #5
lui x0, 0x200
sw a0, 0x8(x0)

# Test 6 ensures that the newly added functionality is compatible with
# code that already worked for branch instructions

# test #6
call func

# test #7 zcb extension
lui x8, 0x3
c.sb x8, 0(x8)

# test #8 stack based load/stores
c.lwsp a0, 0x8(sp)

# these are the labels that the instructions above are expecteed to resolve to
.section .data
.skip 0x4
target:
.word 1
.skip 0xff8
zcb:
.word 1
.skip 0xff004
far_target:
.word 2
func:
ret
Loading
Loading