Skip to content

Commit 33c27d8

Browse files
committed
x64: handle signed mul_with_overflow for non-pow-2 ints
1 parent 5b4de16 commit 33c27d8

File tree

2 files changed

+142
-43
lines changed

2 files changed

+142
-43
lines changed

src/arch/x86_64/CodeGen.zig

Lines changed: 141 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,29 +1086,7 @@ fn airTrunc(self: *Self, inst: Air.Inst.Index) !void {
10861086
// have to be removed. this only happens if the dst if not a power-of-two size.
10871087
const dst_bit_size = dst_ty.bitSize(self.target.*);
10881088
if (!math.isPowerOfTwo(dst_bit_size) or dst_bit_size < 8) {
1089-
const max_reg_bit_width = Register.rax.size();
1090-
const shift = @intCast(u6, max_reg_bit_width - dst_ty.bitSize(self.target.*));
1091-
const mask = (~@as(u64, 0)) >> shift;
1092-
try self.genBinMathOpMir(.@"and", Type.usize, .{ .register = reg }, .{ .immediate = mask });
1093-
1094-
if (src_ty.intInfo(self.target.*).signedness == .signed) {
1095-
_ = try self.addInst(.{
1096-
.tag = .sal,
1097-
.ops = (Mir.Ops{
1098-
.reg1 = reg,
1099-
.flags = 0b10,
1100-
}).encode(),
1101-
.data = .{ .imm = shift },
1102-
});
1103-
_ = try self.addInst(.{
1104-
.tag = .sar,
1105-
.ops = (Mir.Ops{
1106-
.reg1 = reg,
1107-
.flags = 0b10,
1108-
}).encode(),
1109-
.data = .{ .imm = shift },
1110-
});
1111-
}
1089+
try self.truncateRegister(dst_ty, reg);
11121090
}
11131091

11141092
return self.finishAir(inst, .{ .register = reg }, .{ ty_op.operand, .none, .none });
@@ -1478,31 +1456,119 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
14781456
switch (ty.zigTypeTag()) {
14791457
.Vector => return self.fail("TODO implement mul_with_overflow for Vector type", .{}),
14801458
.Int => {
1459+
try self.spillCompareFlagsIfOccupied();
1460+
self.compare_flags_inst = null;
1461+
14811462
const int_info = ty.intInfo(self.target.*);
14821463

14831464
if (int_info.bits > 64) {
14841465
return self.fail("TODO implement mul_with_overflow for Ints larger than 64bits", .{});
14851466
}
14861467

1487-
// Spill .rax and .rdx upfront to ensure we don't spill the operands too late.
1488-
try self.register_manager.getReg(.rax, inst);
1489-
try self.register_manager.getReg(.rdx, null);
1490-
self.register_manager.freezeRegs(&.{ .rax, .rdx });
1491-
defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx });
1468+
if (math.isPowerOfTwo(int_info.bits)) {
1469+
// Spill .rax and .rdx upfront to ensure we don't spill the operands too late.
1470+
try self.register_manager.getReg(.rax, inst);
1471+
try self.register_manager.getReg(.rdx, null);
1472+
self.register_manager.freezeRegs(&.{ .rax, .rdx });
1473+
defer self.register_manager.unfreezeRegs(&.{ .rax, .rdx });
1474+
1475+
const lhs = try self.resolveInst(bin_op.lhs);
1476+
const rhs = try self.resolveInst(bin_op.rhs);
1477+
1478+
try self.genIntMulDivOpMir(switch (int_info.signedness) {
1479+
.signed => .imul,
1480+
.unsigned => .mul,
1481+
}, ty, int_info.signedness, lhs, rhs);
1482+
1483+
const result: MCValue = switch (int_info.signedness) {
1484+
.signed => .{ .register_overflow_signed = .rax },
1485+
.unsigned => .{ .register_overflow_unsigned = .rax },
1486+
};
1487+
break :result result;
1488+
}
14921489

14931490
const lhs = try self.resolveInst(bin_op.lhs);
14941491
const rhs = try self.resolveInst(bin_op.rhs);
14951492

1496-
try self.genIntMulDivOpMir(switch (int_info.signedness) {
1497-
.signed => .imul,
1498-
.unsigned => .mul,
1499-
}, ty, int_info.signedness, lhs, rhs);
1493+
rhs.freezeIfRegister(&self.register_manager);
1494+
defer rhs.unfreezeIfRegister(&self.register_manager);
15001495

1501-
const result: MCValue = switch (int_info.signedness) {
1502-
.signed => .{ .register_overflow_signed = .rax },
1503-
.unsigned => .{ .register_overflow_unsigned = .rax },
1496+
// TODO check if we could reuse rhs instead, and swap the values out.
1497+
const dst_mcv = blk: {
1498+
if (self.reuseOperand(inst, bin_op.lhs, 0, lhs)) {
1499+
if (lhs.isRegister()) break :blk lhs;
1500+
}
1501+
break :blk MCValue{ .register = try self.copyToTmpRegister(ty, lhs) };
15041502
};
1505-
break :result result;
1503+
dst_mcv.freezeIfRegister(&self.register_manager);
1504+
defer dst_mcv.unfreezeIfRegister(&self.register_manager);
1505+
1506+
const rhs_mcv = blk: {
1507+
if (rhs.isRegister() or rhs.isMemory()) break :blk rhs;
1508+
break :blk MCValue{ .register = try self.copyToTmpRegister(ty, rhs) };
1509+
};
1510+
rhs_mcv.freezeIfRegister(&self.register_manager);
1511+
defer rhs_mcv.unfreezeIfRegister(&self.register_manager);
1512+
1513+
const tuple_ty = self.air.typeOfIndex(inst);
1514+
const tuple_size = @intCast(u32, tuple_ty.abiSize(self.target.*));
1515+
const tuple_align = tuple_ty.abiAlignment(self.target.*);
1516+
const overflow_bit_offset = @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*));
1517+
1518+
const stack_offset = @intCast(i32, try self.allocMem(inst, tuple_size, tuple_align));
1519+
const extended_ty = switch (int_info.signedness) {
1520+
.signed => Type.isize,
1521+
.unsigned => ty,
1522+
};
1523+
1524+
try self.genIntMulComplexOpMir(extended_ty, dst_mcv, rhs_mcv);
1525+
1526+
const temp_regs = try self.register_manager.allocRegs(3, .{ null, null, null });
1527+
self.register_manager.freezeRegs(&temp_regs);
1528+
defer self.register_manager.unfreezeRegs(&temp_regs);
1529+
1530+
const overflow_reg = temp_regs[0];
1531+
const flags: u2 = switch (int_info.signedness) {
1532+
.signed => 0b00,
1533+
.unsigned => 0b10,
1534+
};
1535+
_ = try self.addInst(.{
1536+
.tag = .cond_set_byte_overflow,
1537+
.ops = (Mir.Ops{
1538+
.reg1 = overflow_reg.to8(),
1539+
.flags = flags,
1540+
}).encode(),
1541+
.data = undefined,
1542+
});
1543+
1544+
const scratch_reg = temp_regs[1];
1545+
try self.genSetReg(extended_ty, scratch_reg, dst_mcv);
1546+
try self.truncateRegister(ty, scratch_reg);
1547+
try self.genBinMathOpMir(.cmp, extended_ty, dst_mcv, .{ .register = scratch_reg });
1548+
1549+
const eq_reg = temp_regs[2];
1550+
_ = try self.addInst(.{
1551+
.tag = .cond_set_byte_eq_ne,
1552+
.ops = (Mir.Ops{
1553+
.reg1 = eq_reg.to8(),
1554+
.flags = 0b00,
1555+
}).encode(),
1556+
.data = undefined,
1557+
});
1558+
1559+
try self.genBinMathOpMir(
1560+
.@"or",
1561+
Type.u8,
1562+
.{ .register = overflow_reg },
1563+
.{ .register = eq_reg },
1564+
);
1565+
1566+
try self.genSetStack(ty, stack_offset, .{ .register = scratch_reg }, .{});
1567+
try self.genSetStack(Type.initTag(.u1), stack_offset - overflow_bit_offset, .{
1568+
.register = overflow_reg.to8(),
1569+
}, .{});
1570+
1571+
break :result MCValue{ .stack_offset = stack_offset };
15061572
},
15071573
else => unreachable,
15081574
}
@@ -1648,7 +1714,7 @@ fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCVa
16481714
}).encode(),
16491715
.data = undefined,
16501716
});
1651-
try self.genBinMathOpMir(.add, Type.isize, .{ .register = divisor.to64() }, .{ .register = .rax });
1717+
try self.genBinMathOpMir(.add, Type.isize, .{ .register = divisor }, .{ .register = .rax });
16521718
return MCValue{ .register = divisor };
16531719
}
16541720

@@ -2222,8 +2288,8 @@ fn genSliceElemPtr(self: *Self, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue {
22222288
}
22232289
// TODO we could allocate register here, but need to expect addr register and potentially
22242290
// offset register.
2225-
try self.genBinMathOpMir(.add, slice_ptr_field_type, .{ .register = addr_reg.to64() }, .{
2226-
.register = offset_reg.to64(),
2291+
try self.genBinMathOpMir(.add, slice_ptr_field_type, .{ .register = addr_reg }, .{
2292+
.register = offset_reg,
22272293
});
22282294
return MCValue{ .register = addr_reg.to64() };
22292295
}
@@ -2315,7 +2381,7 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void {
23152381
// TODO we could allocate register here, but need to expect addr register and potentially
23162382
// offset register.
23172383
const dst_mcv = try self.allocRegOrMem(inst, false);
2318-
try self.genBinMathOpMir(.add, array_ty, .{ .register = addr_reg.to64() }, .{ .register = offset_reg.to64() });
2384+
try self.genBinMathOpMir(.add, Type.usize, .{ .register = addr_reg }, .{ .register = offset_reg });
23192385
try self.load(dst_mcv, .{ .register = addr_reg.to64() }, array_ty);
23202386
break :result dst_mcv;
23212387
};
@@ -3178,8 +3244,8 @@ fn genBinMathOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MC
31783244
_ = try self.addInst(.{
31793245
.tag = mir_tag,
31803246
.ops = (Mir.Ops{
3181-
.reg1 = registerAlias(dst_reg, @divExact(src_reg.size(), 8)),
3182-
.reg2 = src_reg,
3247+
.reg1 = registerAlias(dst_reg, abi_size),
3248+
.reg2 = registerAlias(src_reg, abi_size),
31833249
}).encode(),
31843250
.data = undefined,
31853251
});
@@ -6531,3 +6597,36 @@ fn shiftRegister(self: *Self, reg: Register, shift: u8) !void {
65316597
});
65326598
}
65336599
}
6600+
6601+
/// Truncates the value in the register in place.
6602+
/// Clobbers any remaining bits.
6603+
fn truncateRegister(self: *Self, ty: Type, reg: Register) !void {
6604+
const int_info = ty.intInfo(self.target.*);
6605+
const max_reg_bit_width = Register.rax.size();
6606+
switch (int_info.signedness) {
6607+
.signed => {
6608+
const shift = @intCast(u6, max_reg_bit_width - int_info.bits);
6609+
_ = try self.addInst(.{
6610+
.tag = .sal,
6611+
.ops = (Mir.Ops{
6612+
.reg1 = reg.to64(),
6613+
.flags = 0b10,
6614+
}).encode(),
6615+
.data = .{ .imm = shift },
6616+
});
6617+
_ = try self.addInst(.{
6618+
.tag = .sar,
6619+
.ops = (Mir.Ops{
6620+
.reg1 = reg.to64(),
6621+
.flags = 0b10,
6622+
}).encode(),
6623+
.data = .{ .imm = shift },
6624+
});
6625+
},
6626+
.unsigned => {
6627+
const shift = @intCast(u6, max_reg_bit_width - int_info.bits);
6628+
const mask = (~@as(u64, 0)) >> shift;
6629+
try self.genBinMathOpMir(.@"and", Type.usize, .{ .register = reg }, .{ .immediate = mask });
6630+
},
6631+
}
6632+
}

src/arch/x86_64/Emit.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1383,7 +1383,7 @@ inline fn getOpCode(tag: Tag, enc: Encoding, is_one_byte: bool) ?OpCode {
13831383
.sub => OpCode.oneByte(if (is_one_byte) 0x2a else 0x2b),
13841384
.xor => OpCode.oneByte(if (is_one_byte) 0x32 else 0x33),
13851385
.@"and" => OpCode.oneByte(if (is_one_byte) 0x22 else 0x23),
1386-
.@"or" => OpCode.oneByte(if (is_one_byte) 0x0b else 0x0b),
1386+
.@"or" => OpCode.oneByte(if (is_one_byte) 0x0a else 0x0b),
13871387
.sbb => OpCode.oneByte(if (is_one_byte) 0x1a else 0x1b),
13881388
.cmp => OpCode.oneByte(if (is_one_byte) 0x3a else 0x3b),
13891389
.mov => OpCode.oneByte(if (is_one_byte) 0x8a else 0x8b),

0 commit comments

Comments
 (0)