Skip to content

Commit cbd7445

Browse files
committed
ZJIT: Keep a frame pointer and use it for memory params
Previously, ZJIT miscompiled the following because of native SP interference. def a(n1,n2,n3,n4,n5,n6,n7,n8) = [n8] a(0,0,0,0,0,0,0, :ok) Commented problematic disassembly: ; call rb_ary_new_capa mov x0, #1 mov x16, #0x1278 movk x16, #0x4bc, lsl #16 movk x16, #1, lsl #32 blr x16 ; call rb_ary_push mov x1, x0 str x1, [sp, #-0x10]! ; c_push() from alloc_regs() mov x0, x1 ; arg0, the array ldur x1, [sp] ; meant to be arg1=n8, but sp just moved! mov x16, #0x3968 movk x16, #0x4bc, lsl #16 movk x16, #1, lsl #32 blr x16 Since the frame pointer stays constant in the body of the function, static offsets based on it don't run the risk of being invalidated by SP movements. Pass the registers to preserve through Insn::FrameSetup. This allows ARM to use STP and waste no gaps between EC, SP, and CFP. x86 now preserves and restores RBP since we use it as the frame pointer. Since all arches now have a frame pointer, remove offset based SP movement in the epilogue and restore registers using the frame pointer.
1 parent abafb66 commit cbd7445

File tree

6 files changed

+220
-111
lines changed

6 files changed

+220
-111
lines changed

test/ruby/test_zjit.rb

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -819,6 +819,13 @@ def a(n1,n2,n3,n4,n5,n6,n7,n8) = self
819819
}
820820
end
821821

822+
def test_spilled_param_new_arary
823+
assert_compiles '[:ok]', %q{
824+
def a(n1,n2,n3,n4,n5,n6,n7,n8) = [n8]
825+
a(0,0,0,0,0,0,0, :ok)
826+
}
827+
end
828+
822829
def test_opt_aref_with
823830
assert_compiles ':ok', %q{
824831
def aref_with(hash) = hash["key"]

zjit/src/asm/arm64/opnd.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,9 @@ pub const X20_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 20 };
119119
pub const X21_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 21 };
120120
pub const X22_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 22 };
121121

122+
// frame pointer (base pointer)
123+
pub const X29_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 29 };
124+
122125
// link register
123126
pub const X30_REG: A64Reg = A64Reg { num_bits: 64, reg_no: 30 };
124127

zjit/src/backend/arm64/mod.rs

Lines changed: 108 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ pub const C_ARG_OPNDS: [Opnd; 6] = [
2929
pub const C_RET_REG: Reg = X0_REG;
3030
pub const C_RET_OPND: Opnd = Opnd::Reg(X0_REG);
3131
pub const NATIVE_STACK_PTR: Opnd = Opnd::Reg(XZR_REG);
32+
pub const NATIVE_BASE_PTR: Opnd = Opnd::Reg(X29_REG);
3233

3334
// These constants define the way we work with Arm64's stack pointer. The stack
3435
// pointer always needs to be aligned to a 16-byte boundary.
@@ -911,18 +912,54 @@ impl Assembler
911912
cb.write_byte(0);
912913
}
913914
},
914-
Insn::FrameSetup => {
915+
&Insn::FrameSetup { preserved, mut slot_count } => {
916+
const { assert!(SIZEOF_VALUE == 8, "alignment logic relies on SIZEOF_VALUE == 8"); }
917+
// Preserve X29 and set up frame record
915918
stp_pre(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, -16));
916-
917-
// X29 (frame_pointer) = SP
918919
mov(cb, X29, C_SP_REG);
919-
},
920-
Insn::FrameTeardown => {
920+
921+
for regs in preserved.chunks(2) {
922+
// For the body, store pairs and move SP
923+
if let [reg0, reg1] = regs {
924+
stp_pre(cb, reg1.into(), reg0.into(), A64Opnd::new_mem(128, C_SP_REG, -16));
925+
} else if let [reg] = regs {
926+
// For overhang, store but don't move SP. Combine movement with
927+
// movement for slots below.
928+
stur(cb, reg.into(), A64Opnd::new_mem(64, C_SP_REG, -8));
929+
slot_count += 1;
930+
} else {
931+
unreachable!("chunks(2)");
932+
}
933+
}
934+
// Align slot_count
935+
if slot_count % 2 == 1 {
936+
slot_count += 1
937+
}
938+
if slot_count > 0 {
939+
let slot_offset = (slot_count * SIZEOF_VALUE) as u64;
940+
// Bail when asked to reserve too many slots in one instruction.
941+
ShiftedImmediate::try_from(slot_offset).ok()?;
942+
sub(cb, C_SP_REG, C_SP_REG, A64Opnd::new_uimm(slot_offset));
943+
}
944+
}
945+
Insn::FrameTeardown { preserved } => {
946+
// Restore preserved registers below frame pointer.
947+
let mut base_offset = 0;
948+
for regs in preserved.chunks(2) {
949+
if let [reg0, reg1] = regs {
950+
base_offset -= 16;
951+
ldp(cb, reg1.into(), reg0.into(), A64Opnd::new_mem(128, X29, base_offset));
952+
} else if let [reg] = regs {
953+
ldur(cb, reg.into(), A64Opnd::new_mem(64, X29, base_offset - 8));
954+
} else {
955+
unreachable!("chunks(2)");
956+
}
957+
}
958+
921959
// SP = X29 (frame pointer)
922960
mov(cb, C_SP_REG, X29);
923-
924961
ldp_post(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, 16));
925-
},
962+
}
926963
Insn::Add { left, right, out } => {
927964
// Usually, we issue ADDS, so you could branch on overflow, but ADDS with
928965
// out=31 refers to out=XZR, which discards the sum. So, instead of ADDS
@@ -1482,11 +1519,73 @@ mod tests {
14821519
fn test_emit_frame() {
14831520
let (mut asm, mut cb) = setup_asm();
14841521

1485-
asm.frame_setup();
1486-
asm.frame_teardown();
1522+
asm.frame_setup(&[], 0);
1523+
asm.frame_teardown(&[]);
14871524
asm.compile_with_num_regs(&mut cb, 0);
14881525
}
14891526

1527+
#[test]
1528+
fn frame_setup_and_teardown() {
1529+
const THREE_REGS: &'static [Opnd] = &[Opnd::Reg(X19_REG), Opnd::Reg(X20_REG), Opnd::Reg(X21_REG)];
1530+
// Test 3 preserved regs (odd), odd slot_count
1531+
{
1532+
let (mut asm, mut cb) = setup_asm();
1533+
asm.frame_setup(THREE_REGS, 3);
1534+
asm.frame_teardown(THREE_REGS);
1535+
asm.compile_with_num_regs(&mut cb, 0);
1536+
assert_disasm!(cb, "fd7bbfa9fd030091f44fbfa9f5831ff8ff8300d1b44f7fa9b5835ef8bf030091fd7bc1a8", "
1537+
0x0: stp x29, x30, [sp, #-0x10]!
1538+
0x4: mov x29, sp
1539+
0x8: stp x20, x19, [sp, #-0x10]!
1540+
0xc: stur x21, [sp, #-8]
1541+
0x10: sub sp, sp, #0x20
1542+
0x14: ldp x20, x19, [x29, #-0x10]
1543+
0x18: ldur x21, [x29, #-0x18]
1544+
0x1c: mov sp, x29
1545+
0x20: ldp x29, x30, [sp], #0x10
1546+
");
1547+
}
1548+
1549+
// Test 3 preserved regs (odd), even slot_count
1550+
{
1551+
let (mut asm, mut cb) = setup_asm();
1552+
asm.frame_setup(THREE_REGS, 4);
1553+
asm.frame_teardown(THREE_REGS);
1554+
asm.compile_with_num_regs(&mut cb, 0);
1555+
assert_disasm!(cb, "fd7bbfa9fd030091f44fbfa9f5831ff8ffc300d1b44f7fa9b5835ef8bf030091fd7bc1a8", "
1556+
0x0: stp x29, x30, [sp, #-0x10]!
1557+
0x4: mov x29, sp
1558+
0x8: stp x20, x19, [sp, #-0x10]!
1559+
0xc: stur x21, [sp, #-8]
1560+
0x10: sub sp, sp, #0x30
1561+
0x14: ldp x20, x19, [x29, #-0x10]
1562+
0x18: ldur x21, [x29, #-0x18]
1563+
0x1c: mov sp, x29
1564+
0x20: ldp x29, x30, [sp], #0x10
1565+
");
1566+
}
1567+
1568+
// Test 4 preserved regs (even), odd slot_count
1569+
{
1570+
static FOUR_REGS: &'static [Opnd] = &[Opnd::Reg(X19_REG), Opnd::Reg(X20_REG), Opnd::Reg(X21_REG), Opnd::Reg(X22_REG)];
1571+
let (mut asm, mut cb) = setup_asm();
1572+
asm.frame_setup(FOUR_REGS, 3);
1573+
asm.frame_teardown(FOUR_REGS);
1574+
asm.compile_with_num_regs(&mut cb, 0);
1575+
assert_disasm!(cb, "fd7bbfa9fd030091f44fbfa9f657bfa9ff8300d1b44f7fa9b6577ea9bf030091fd7bc1a8", "
1576+
0x0: stp x29, x30, [sp, #-0x10]!
1577+
0x4: mov x29, sp
1578+
0x8: stp x20, x19, [sp, #-0x10]!
1579+
0xc: stp x22, x21, [sp, #-0x10]!
1580+
0x10: sub sp, sp, #0x20
1581+
0x14: ldp x20, x19, [x29, #-0x10]
1582+
0x18: ldp x22, x21, [x29, #-0x20]
1583+
0x1c: mov sp, x29
1584+
0x20: ldp x29, x30, [sp], #0x10
1585+
");
1586+
}
1587+
}
1588+
14901589
#[test]
14911590
fn test_emit_je_fits_into_bcond() {
14921591
let (mut asm, mut cb) = setup_asm();

zjit/src/backend/lir.rs

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@ use crate::asm::{CodeBlock, Label};
1212
pub use crate::backend::current::{
1313
Reg,
1414
EC, CFP, SP,
15-
NATIVE_STACK_PTR,
15+
NATIVE_STACK_PTR, NATIVE_BASE_PTR,
1616
C_ARG_OPNDS, C_RET_REG, C_RET_OPND,
1717
};
1818

19+
pub static JIT_PRESERVED_REGS: &'static [Opnd] = &[CFP, SP, EC];
20+
1921
// Memory operand base
2022
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
2123
pub enum MemBase
@@ -291,8 +293,6 @@ pub enum Target
291293
context: Option<SideExitContext>,
292294
/// We use this to enrich asm comments.
293295
reason: SideExitReason,
294-
/// The number of bytes we need to adjust the C stack pointer by.
295-
c_stack_bytes: usize,
296296
/// Some if the side exit should write this label. We use it for patch points.
297297
label: Option<Label>,
298298
},
@@ -404,10 +404,10 @@ pub enum Insn {
404404
CSelZ { truthy: Opnd, falsy: Opnd, out: Opnd },
405405

406406
/// Set up the frame stack as necessary per the architecture.
407-
FrameSetup,
407+
FrameSetup { preserved: &'static [Opnd], slot_count: usize },
408408

409409
/// Tear down the frame stack as necessary per the architecture.
410-
FrameTeardown,
410+
FrameTeardown { preserved: &'static [Opnd], },
411411

412412
// Atomically increment a counter
413413
// Input: memory operand, increment value
@@ -598,8 +598,8 @@ impl Insn {
598598
Insn::CSelNE { .. } => "CSelNE",
599599
Insn::CSelNZ { .. } => "CSelNZ",
600600
Insn::CSelZ { .. } => "CSelZ",
601-
Insn::FrameSetup => "FrameSetup",
602-
Insn::FrameTeardown => "FrameTeardown",
601+
Insn::FrameSetup { .. } => "FrameSetup",
602+
Insn::FrameTeardown { .. } => "FrameTeardown",
603603
Insn::IncrCounter { .. } => "IncrCounter",
604604
Insn::Jbe(_) => "Jbe",
605605
Insn::Jb(_) => "Jb",
@@ -823,8 +823,8 @@ impl<'a> Iterator for InsnOpndIterator<'a> {
823823
Insn::CPop { .. } |
824824
Insn::CPopAll |
825825
Insn::CPushAll |
826-
Insn::FrameSetup |
827-
Insn::FrameTeardown |
826+
Insn::FrameSetup { .. } |
827+
Insn::FrameTeardown { .. } |
828828
Insn::PadPatchPoint |
829829
Insn::PosMarker(_) => None,
830830

@@ -979,8 +979,8 @@ impl<'a> InsnOpndMutIterator<'a> {
979979
Insn::CPop { .. } |
980980
Insn::CPopAll |
981981
Insn::CPushAll |
982-
Insn::FrameSetup |
983-
Insn::FrameTeardown |
982+
Insn::FrameSetup { .. } |
983+
Insn::FrameTeardown { .. } |
984984
Insn::PadPatchPoint |
985985
Insn::PosMarker(_) => None,
986986

@@ -1813,7 +1813,7 @@ impl Assembler
18131813
for (idx, target) in targets {
18141814
// Compile a side exit. Note that this is past the split pass and alloc_regs(),
18151815
// so you can't use a VReg or an instruction that needs to be split.
1816-
if let Target::SideExit { context, reason, c_stack_bytes, label } = target {
1816+
if let Target::SideExit { context, reason, label } = target {
18171817
asm_comment!(self, "Exit: {reason}");
18181818
let side_exit_label = if let Some(label) = label {
18191819
Target::Label(label)
@@ -1858,13 +1858,8 @@ impl Assembler
18581858
self.store(cfp_sp, Opnd::Reg(Assembler::SCRATCH_REG));
18591859
}
18601860

1861-
if c_stack_bytes > 0 {
1862-
asm_comment!(self, "restore C stack pointer");
1863-
self.add_into(NATIVE_STACK_PTR, c_stack_bytes.into());
1864-
}
1865-
18661861
asm_comment!(self, "exit to the interpreter");
1867-
self.frame_teardown();
1862+
self.frame_teardown(&[]); // matching the setup in :bb0-prologue:
18681863
self.mov(C_RET_OPND, Opnd::UImm(Qundef.as_u64()));
18691864
self.cret(C_RET_OPND);
18701865

@@ -2065,12 +2060,14 @@ impl Assembler {
20652060
out
20662061
}
20672062

2068-
pub fn frame_setup(&mut self) {
2069-
self.push_insn(Insn::FrameSetup);
2063+
pub fn frame_setup(&mut self, preserved_regs: &'static [Opnd], slot_count: usize) {
2064+
self.push_insn(Insn::FrameSetup { preserved: preserved_regs, slot_count });
20702065
}
20712066

2072-
pub fn frame_teardown(&mut self) {
2073-
self.push_insn(Insn::FrameTeardown);
2067+
/// The inverse of [Self::frame_setup] used before return. `reserve_bytes`
2068+
/// not necessary since we use a base pointer register.
2069+
pub fn frame_teardown(&mut self, preserved_regs: &'static [Opnd]) {
2070+
self.push_insn(Insn::FrameTeardown { preserved: preserved_regs });
20742071
}
20752072

20762073
pub fn incr_counter(&mut self, mem: Opnd, value: Opnd) {

zjit/src/backend/x86_64/mod.rs

Lines changed: 61 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ pub const C_ARG_OPNDS: [Opnd; 6] = [
2929
pub const C_RET_REG: Reg = RAX_REG;
3030
pub const C_RET_OPND: Opnd = Opnd::Reg(RAX_REG);
3131
pub const NATIVE_STACK_PTR: Opnd = Opnd::Reg(RSP_REG);
32+
pub const NATIVE_BASE_PTR: Opnd = Opnd::Reg(RBP_REG);
3233

3334
impl CodeBlock {
3435
// The number of bytes that are generated by jmp_ptr
@@ -110,9 +111,9 @@ impl Assembler
110111
vec![RAX_REG, RCX_REG, RDX_REG, RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG, R11_REG]
111112
}
112113

113-
/// How many bytes a call and a [Self::frame_setup] would change native SP
114+
/// How many bytes a call and a bare bones [Self::frame_setup] would change native SP
114115
pub fn frame_size() -> i32 {
115-
0x8
116+
0x10
116117
}
117118

118119
// These are the callee-saved registers in the x86-64 SysV ABI
@@ -476,22 +477,34 @@ impl Assembler
476477
cb.write_byte(0);
477478
},
478479

479-
// Set up RBP to work with frame pointer unwinding
480+
// Set up RBP as frame pointer work with unwinding
480481
// (e.g. with Linux `perf record --call-graph fp`)
481-
Insn::FrameSetup => {
482-
if false { // We don't support --zjit-perf yet
483-
// TODO(alan): Change Assembler::frame_size() when adding --zjit-perf support
484-
push(cb, RBP);
485-
mov(cb, RBP, RSP);
486-
push(cb, RBP);
482+
// and to allow push and pops in the function.
483+
&Insn::FrameSetup { preserved, mut slot_count } => {
484+
// Bump slot_count for alignment if necessary
485+
const { assert!(SIZEOF_VALUE == 8, "alignment logic relies on SIZEOF_VALUE == 8"); }
486+
let total_slots = 2 /* rbp and return address*/ + slot_count + preserved.len();
487+
if total_slots % 2 == 1 {
488+
slot_count += 1;
487489
}
488-
},
489-
Insn::FrameTeardown => {
490-
if false { // We don't support --zjit-perf yet
491-
pop(cb, RBP);
492-
pop(cb, RBP);
490+
push(cb, RBP);
491+
mov(cb, RBP, RSP);
492+
for reg in preserved {
493+
push(cb, reg.into());
493494
}
494-
},
495+
if slot_count > 0 {
496+
sub(cb, RSP, uimm_opnd((slot_count * SIZEOF_VALUE) as u64));
497+
}
498+
}
499+
&Insn::FrameTeardown { preserved } => {
500+
let mut preserved_offset = -8;
501+
for reg in preserved {
502+
mov(cb, reg.into(), mem_opnd(64, RBP, preserved_offset));
503+
preserved_offset -= 8;
504+
}
505+
mov(cb, RSP, RBP);
506+
pop(cb, RBP);
507+
}
495508

496509
Insn::Add { left, right, .. } => {
497510
let opnd1 = emit_64bit_immediate(cb, right);
@@ -1306,4 +1319,37 @@ mod tests {
13061319
0x6: mov dword ptr [rax], 0x80000001
13071320
"});
13081321
}
1322+
1323+
#[test]
1324+
fn frame_setup_teardown() {
1325+
let (mut asm, mut cb) = setup_asm();
1326+
asm.frame_setup(JIT_PRESERVED_REGS, 0);
1327+
asm.frame_teardown(JIT_PRESERVED_REGS);
1328+
1329+
asm.cret(C_RET_OPND);
1330+
1331+
asm.frame_setup(&[], 5);
1332+
asm.frame_teardown(&[]);
1333+
1334+
asm.compile_with_num_regs(&mut cb, 0);
1335+
assert_disasm!(cb, "554889e541555341544883ec084c8b6df8488b5df04c8b65e84889ec5dc3554889e54883ec304889ec5d", {"
1336+
0x0: push rbp
1337+
0x1: mov rbp, rsp
1338+
0x4: push r13
1339+
0x6: push rbx
1340+
0x7: push r12
1341+
0x9: sub rsp, 8
1342+
0xd: mov r13, qword ptr [rbp - 8]
1343+
0x11: mov rbx, qword ptr [rbp - 0x10]
1344+
0x15: mov r12, qword ptr [rbp - 0x18]
1345+
0x19: mov rsp, rbp
1346+
0x1c: pop rbp
1347+
0x1d: ret
1348+
0x1e: push rbp
1349+
0x1f: mov rbp, rsp
1350+
0x22: sub rsp, 0x30
1351+
0x26: mov rsp, rbp
1352+
0x29: pop rbp
1353+
"});
1354+
}
13091355
}

0 commit comments

Comments
 (0)