diff --git a/doc/langref.html.in b/doc/langref.html.in index ac8671ebb7a0..7e1848668cfd 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -4742,6 +4742,25 @@ fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_val {#see_also|@cVaArg|@cVaCopy|@cVaEnd#} {#header_close#} + {#header_open|@depositBits#} +
{#syntax#}@depositBits(source: T, mask: T) T{#endsyntax#}
+

+ {#syntax#}T{#endsyntax#} must be an unsigned integer type, or {#syntax#}comptime_int{#endsyntax#} (for which both parameters must be non-negative). {#syntax#}T{#endsyntax#} is determined by peer-type resolution. +

+

+ Transfers contiguous bits from the bottom of the {#syntax#}source{#endsyntax#} operand to positions in the destination corresponding to bits that are set in the {#syntax#}mask{#endsyntax#}. The remaining bits in the destination are zeroed. +

+

+ Where available, this builtin compiles down to a {#syntax#}pdep{#endsyntax#} instruction on x86 targets with BMI2 enabled. For x86-64, this will happen for types up to {#syntax#}u64{#endsyntax#}, and will happen for types up to {#syntax#}u32{#endsyntax#} for x86. +

+

+ Example: +

+ + {#code|test_depositbits_builtin.zig#} + {#see_also|@extractBits#} + {#header_close#} + {#header_open|@divExact#}
{#syntax#}@divExact(numerator: T, denominator: T) T{#endsyntax#}

@@ -4896,6 +4915,26 @@ fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_val {#see_also|@export#} {#header_close#} + {#header_open|@extractBits#} +

{#syntax#}@extractBits(source: T, mask: T) T{#endsyntax#}
+

+ {#syntax#}T{#endsyntax#} must be an unsigned integer type, or {#syntax#}comptime_int{#endsyntax#} (for which both parameters must be non-negative). {#syntax#}T{#endsyntax#} is determined by peer-type resolution. +

+

+ Transfers bits in the {#syntax#}source{#endsyntax#} corresponding to bits set in the {#syntax#}mask{#endsyntax#} operand to the destination, writing them as contiguous lower bits. The remaining bits in the destination are zeroed. +

+

+ Where available, this builtin compiles down to a {#syntax#}pext{#endsyntax#} instruction on x86 targets with BMI2 enabled. For x86-64, this will happen for types up to {#syntax#}u64{#endsyntax#}, and will happen for types up to {#syntax#}u32{#endsyntax#} for x86. +

+

+ Example: +

+ + {#code|test_extractbits_builtin.zig#} + + {#see_also|@depositBits#} + {#header_close#} + {#header_open|@field#}
{#syntax#}@field(lhs: anytype, comptime field_name: []const u8) (field){#endsyntax#}

Performs field access by a compile-time string. Works on both fields and declarations. diff --git a/doc/langref/test_depositbits_builtin.zig b/doc/langref/test_depositbits_builtin.zig new file mode 100644 index 000000000000..dfe6b8882f43 --- /dev/null +++ b/doc/langref/test_depositbits_builtin.zig @@ -0,0 +1,7 @@ +const std = @import("std"); + +test "deposit bits" { + try std.testing.expectEqual(@depositBits(0x00001234, 0xf0f0f0f0), 0x10203040); +} + +// test diff --git a/doc/langref/test_extractbits_builtin.zig b/doc/langref/test_extractbits_builtin.zig new file mode 100644 index 000000000000..ca21a7f67873 --- /dev/null +++ b/doc/langref/test_extractbits_builtin.zig @@ -0,0 +1,7 @@ +const std = @import("std"); + +test "extract bits" { + try std.testing.expectEqual(@extractBits(0x12345678, 0xf0f0f0f0), 0x00001357); +} + +// test diff --git a/lib/compiler_rt.zig b/lib/compiler_rt.zig index c8da59eb5f0c..55d31a57489f 100644 --- a/lib/compiler_rt.zig +++ b/lib/compiler_rt.zig @@ -11,6 +11,7 @@ comptime { _ = @import("compiler_rt/bitreverse.zig"); _ = @import("compiler_rt/bswap.zig"); _ = @import("compiler_rt/cmp.zig"); + _ = @import("compiler_rt/pdeppext.zig"); _ = @import("compiler_rt/shift.zig"); _ = @import("compiler_rt/negXi2.zig"); diff --git a/lib/compiler_rt/pdeppext.zig b/lib/compiler_rt/pdeppext.zig new file mode 100644 index 000000000000..82e2957e3ff4 --- /dev/null +++ b/lib/compiler_rt/pdeppext.zig @@ -0,0 +1,312 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const common = @import("common.zig"); + +const Limb = u32; +const Log2Limb = u5; + +comptime { + @export(&__pdep_bigint, .{ .name = "__pdep_bigint", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__pdep_u32, .{ .name = "__pdep_u32", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__pdep_u64, .{ .name = "__pdep_u64", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__pdep_u128, .{ .name = "__pdep_u128", .linkage = common.linkage, .visibility = common.visibility }); + + @export(&__pext_bigint, .{ .name = "__pext_bigint", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__pext_u32, .{ .name = "__pext_u32", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__pext_u64, .{ .name = "__pext_u64", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__pext_u128, .{ .name = "__pext_u128", .linkage = common.linkage, .visibility = common.visibility }); +} + +const endian = builtin.cpu.arch.endian(); + +inline fn limb(x: []const Limb, i: usize) Limb { + return if (endian == .little) x[i] else x[x.len - 1 - i]; +} + +inline fn limb_ptr(x: []Limb, i: usize) *Limb { + return if (endian == .little) &x[i] else &x[x.len - 1 - i]; +} + +inline fn limb_set(x: []Limb, i: usize, v: Limb) void { + if (endian == .little) { + x[i] = v; + } else { + x[x.len - 1 - i] = v; + } +} + +// Assumes that `result` is zeroed. +inline fn pdep_bigint(result: []Limb, source: []const Limb, mask: []const Limb) void { + var mask_limb: Limb = limb(mask, 0); + var mask_limb_index: usize = 0; + var i: usize = 0; + + outer: while (true) : (i += 1) { + // Find the lowest set bit in mask + const mask_limb_bit: Log2Limb = limb_bit: while (true) { + const mask_limb_tz = @ctz(mask_limb); + if (mask_limb_tz != @bitSizeOf(Limb)) { + const cast_limb_bit: Log2Limb = @intCast(mask_limb_tz); + mask_limb ^= @as(Limb, 1) << cast_limb_bit; + break :limb_bit cast_limb_bit; + } + + mask_limb_index += 1; + if (mask_limb_index >= mask.len) break :outer; + + mask_limb = limb(mask, mask_limb_index); + }; + + const i_limb_index = i / 32; + const i_limb_bit: Log2Limb = @truncate(i); + + if (i_limb_index >= source.len) break; + + const source_bit_set = limb(source, i_limb_index) & (@as(Limb, 1) << i_limb_bit) != 0; + + limb_ptr(result, mask_limb_index).* |= @as(Limb, @intFromBool(source_bit_set)) << mask_limb_bit; + } +} + +pub fn __pdep_bigint(r: [*]Limb, s: [*]const Limb, m: [*]const Limb, bits: usize) callconv(.c) void { + const result_full = r[0 .. std.math.divCeil(usize, @intCast(intAbiSize(@intCast(bits), builtin.target)), 4) catch unreachable]; + @memset(result_full, 0); + + const result = r[0 .. std.math.divCeil(usize, bits, 32) catch unreachable]; + const source = s[0 .. std.math.divCeil(usize, bits, 32) catch unreachable]; + const mask = m[0 .. std.math.divCeil(usize, bits, 32) catch unreachable]; + + pdep_bigint(result, source, mask); +} + +// Assumes that `result` is zeroed. +inline fn pext_bigint(result: []Limb, source: []const Limb, mask: []const Limb) void { + var mask_limb: Limb = limb(mask, 0); + var mask_limb_index: usize = 0; + var i: usize = 0; + + outer: while (true) : (i += 1) { + const mask_limb_bit: Log2Limb = limb_bit: while (true) { + const mask_limb_tz = @ctz(mask_limb); + if (mask_limb_tz != @bitSizeOf(Limb)) { + const cast_limb_bit: Log2Limb = @intCast(mask_limb_tz); + mask_limb ^= @as(Limb, 1) << cast_limb_bit; + break :limb_bit cast_limb_bit; + } + + mask_limb_index += 1; + if (mask_limb_index >= mask.len) break :outer; + + mask_limb = limb(mask, mask_limb_index); + }; + + const i_limb_index = i / 32; + const i_limb_bit: Log2Limb = @truncate(i); + + if (i_limb_index >= source.len) break; + + const source_bit_set = limb(source, mask_limb_index) & (@as(Limb, 1) << mask_limb_bit) != 0; + + limb_ptr(result, i_limb_index).* |= @as(Limb, @intFromBool(source_bit_set)) << i_limb_bit; + } +} + +pub fn __pext_bigint(r: [*]Limb, s: [*]const Limb, m: [*]const Limb, bits: usize) callconv(.c) void { + const result_full = r[0 .. std.math.divCeil(usize, @intCast(intAbiSize(@intCast(bits), builtin.target)), 4) catch unreachable]; + @memset(result_full, 0); + + const result = r[0 .. std.math.divCeil(usize, bits, 32) catch unreachable]; + const source = s[0 .. std.math.divCeil(usize, bits, 32) catch unreachable]; + const mask = m[0 .. std.math.divCeil(usize, bits, 32) catch unreachable]; + + pext_bigint(result, source, mask); +} + +inline fn pdep_uX(comptime T: type, source: T, mask_: T) T { + var bb: T = 1; + var result: T = 0; + var mask = mask_; + + while (mask != 0) { + const bit = mask & ~(mask - 1); + mask &= ~bit; + const source_bit = source & bb; + if (source_bit != 0) result |= bit; + bb += bb; + } + + return result; +} + +pub fn __pdep_u32(source: u32, mask: u32) callconv(.c) u32 { + return pdep_uX(u32, source, mask); +} + +pub fn __pdep_u64(source: u64, mask: u64) callconv(.c) u64 { + return pdep_uX(u64, source, mask); +} + +pub fn __pdep_u128(source: u128, mask: u128) callconv(.c) u128 { + return pdep_uX(u128, source, mask); +} + +inline fn pext_uX(comptime T: type, source: T, mask_: T) T { + var bb: T = 1; + var result: T = 0; + var mask = mask_; + + while (mask != 0) { + const bit = mask & ~(mask - 1); + mask &= ~bit; + const source_bit = source & bit; + if (source_bit != 0) result |= bb; + bb += bb; + } + + return result; +} + +pub fn __pext_u32(source: u32, mask: u32) callconv(.c) u32 { + return pext_uX(u32, source, mask); +} + +pub fn __pext_u64(source: u64, mask: u64) callconv(.c) u64 { + return pext_uX(u64, source, mask); +} + +pub fn __pext_u128(source: u128, mask: u128) callconv(.c) u128 { + return pext_uX(u128, source, mask); +} + +// BEGIN HACKY CODE COPY WAIT FOR ALEXRP PR + +const Target = std.Target; +const assert = std.debug.assert; + +pub const Alignment = enum(u6) { + @"1" = 0, + @"2" = 1, + @"4" = 2, + @"8" = 3, + @"16" = 4, + @"32" = 5, + @"64" = 6, + none = std.math.maxInt(u6), + _, + + pub fn fromByteUnits(n: u64) Alignment { + if (n == 0) return .none; + assert(std.math.isPowerOfTwo(n)); + return @enumFromInt(@ctz(n)); + } + + /// Align an address forwards to this alignment. + pub fn forward(a: Alignment, addr: u64) u64 { + assert(a != .none); + const x = (@as(u64, 1) << @intFromEnum(a)) - 1; + return (addr + x) & ~x; + } +}; + +pub fn intAbiSize(bits: u16, target: Target) u64 { + return intAbiAlignment(bits, target).forward(@as(u16, @intCast((@as(u17, bits) + 7) / 8))); +} + +pub fn intAbiAlignment(bits: u16, target: Target) Alignment { + return switch (target.cpu.arch) { + .x86 => switch (bits) { + 0 => .none, + 1...8 => .@"1", + 9...16 => .@"2", + 17...32 => .@"4", + 33...64 => switch (target.os.tag) { + .uefi, .windows => .@"8", + else => .@"4", + }, + else => .@"16", + }, + .x86_64 => switch (bits) { + 0 => .none, + 1...8 => .@"1", + 9...16 => .@"2", + 17...32 => .@"4", + 33...64 => .@"8", + else => .@"16", + }, + else => return Alignment.fromByteUnits(@min( + std.math.ceilPowerOfTwoPromote(u16, @as(u16, @intCast((@as(u17, bits) + 7) / 8))), + maxIntAlignment(target), + )), + }; +} + +pub fn maxIntAlignment(target: std.Target) u16 { + return switch (target.cpu.arch) { + .avr => 1, + .msp430 => 2, + .xcore => 4, + .propeller => 4, + + .arm, + .armeb, + .thumb, + .thumbeb, + .hexagon, + .mips, + .mipsel, + .powerpc, + .powerpcle, + .amdgcn, + .riscv32, + .sparc, + .s390x, + .lanai, + .wasm32, + .wasm64, + => 8, + + // For these, LLVMABIAlignmentOfType(i128) reports 8. Note that 16 + // is a relevant number in three cases: + // 1. Different machine code instruction when loading into SIMD register. + // 2. The C ABI wants 16 for extern structs. + // 3. 16-byte cmpxchg needs 16-byte alignment. + // Same logic for powerpc64, mips64, sparc64. + .powerpc64, + .powerpc64le, + .mips64, + .mips64el, + .sparc64, + => switch (target.ofmt) { + .c => 16, + else => 8, + }, + + .x86_64 => 16, + + // Even LLVMABIAlignmentOfType(i128) agrees on these targets. + .x86, + .aarch64, + .aarch64_be, + .riscv64, + .bpfel, + .bpfeb, + .nvptx, + .nvptx64, + => 16, + + // Below this comment are unverified but based on the fact that C requires + // int128_t to be 16 bytes aligned, it's a safe default. + .csky, + .arc, + .m68k, + .kalimba, + .spirv, + .spirv32, + .ve, + .spirv64, + .loongarch32, + .loongarch64, + .xtensa, + => 16, + }; +} diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig index f6de62550e11..92f5d1b44456 100644 --- a/lib/std/math/big/int.zig +++ b/lib/std/math/big/int.zig @@ -1760,6 +1760,73 @@ pub const Mutable = struct { y.shiftRight(y.toConst(), norm_shift); } + /// result = @depositBits(source, mask) + /// + /// Asserts that `source` and `mask` are positive + /// The value in `result` may use the same number of or less limbs than `mask`. + /// `result` is assumed to have sufficient length to store the result. + pub fn depositBits(result: *Mutable, source: Const, mask: Const) void { + assert(source.positive); + assert(mask.positive); + + result.positive = true; + @memset(result.limbs, 0); + + var shift: usize = 0; + for (mask.limbs, 0..) |mask_limb, i| { + const shift_bits: Log2Limb = @intCast(shift % limb_bits); + const shift_limbs = shift / limb_bits; + + if (shift_limbs >= source.limbs.len) break; + + const result_limb = &result.limbs[i]; + + var source_limb = source.limbs[shift_limbs] >> shift_bits; + if (shift_bits != 0 and shift_limbs + 1 < source.limbs.len) { + source_limb += source.limbs[shift_limbs + 1] << @intCast(limb_bits - shift_bits); + } + + const pdep_limb = @depositBits(source_limb, mask_limb); + + result_limb.* |= pdep_limb; + + shift += @intCast(@popCount(mask_limb)); + } + + result.normalize(result.limbs.len); + } + + /// result = @extractBits(source, mask) + /// + /// Asserts that `source` and `mask` are positive + /// The value in `result` may use the same number of or less limbs than `mask`. + /// `result` is assumed to have sufficient length to store the result. + pub fn extractBits(result: *Mutable, source: Const, mask: Const) void { + assert(source.positive); + assert(mask.positive); + + result.positive = true; + @memset(result.limbs, 0); + + const len = @min(source.limbs.len, mask.limbs.len); + + var shift: usize = 0; + for (source.limbs[0..len], mask.limbs[0..len]) |source_limb, mask_limb| { + const pext_limb = @extractBits(source_limb, mask_limb); + const shift_bits: Log2Limb = @intCast(shift % limb_bits); + const shift_limbs = shift / limb_bits; + result.limbs[shift_limbs] |= pext_limb << shift_bits; + + if (shift_bits != 0) { + result.limbs[shift_limbs + 1] |= pext_limb >> @intCast(limb_bits - shift_bits); + } + + shift += @intCast(@popCount(mask_limb)); + } + + result.normalize(result.limbs.len); + } + /// Truncate an integer to a number of bits, following 2s-complement semantics. /// `r` may alias `a`. /// diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig index 489adc12fbd0..c7e4bb70a0ea 100644 --- a/lib/std/math/big/int_test.zig +++ b/lib/std/math/big/int_test.zig @@ -2960,6 +2960,59 @@ fn popCountTest(val: *const Managed, bit_count: usize, expected: usize) !void { try testing.expectEqual(expected, val.toConst().popCount(bit_count)); } +test "big int extractBits" { + try extractBitsTest(0x12345678, 0x0, 0x0); + try extractBitsTest(0x12345678, 0xf0f0f0f0, 0x1357); + try extractBitsTest(0x12345678, 0xff00ff00, 0x1256); + try extractBitsTest(0x12345678, 0xffff, 0x5678); + + try extractBitsTest(0x12345678_90123456_78901234_56789012, 0xff << 64, 0x56); + try extractBitsTest(0x12345678_90123456_78901234_56789012, (0xff << 64) | 0xff00f, 0x56892); + + try extractBitsTest(0x12345678_90123456_78901234_56789012, 0xf0f0, 0x91); + try extractBitsTest(0x12345678_90123456, 0xffffffff_ffffffff, 0x12345678_90123456); +} + +fn extractBitsTest(comptime source: comptime_int, comptime mask: comptime_int, comptime expected: comptime_int) !void { + var source_bigint = try Managed.initSet(testing.allocator, source); + defer source_bigint.deinit(); + var mask_bigint = try Managed.initSet(testing.allocator, mask); + defer mask_bigint.deinit(); + const limbs = try testing.allocator.alloc(Limb, mask_bigint.limbs.len); + defer testing.allocator.free(limbs); + var result = Mutable{ .limbs = limbs, .positive = undefined, .len = undefined }; + + result.extractBits(source_bigint.toConst(), mask_bigint.toConst()); + + try testing.expectEqual(std.math.Order.eq, result.toConst().orderAgainstScalar(expected)); +} + +test "big int depositBits" { + try depositBitsTest(0x12345678, 0x0, 0x0); + try depositBitsTest(0x12345678, 0xf0f0f0f0, 0x50607080); + try depositBitsTest(0x12345678, 0xff00ff00, 0x56007800); + try depositBitsTest(0x12345678, 0xffff, 0x5678); + + try depositBitsTest(0x1234, 0xff << 64, 0x34_00000000_00000000); + try depositBitsTest(0x12345678, (0xff << 64) | 0xff00f, 0x45_00000000_00067008); + + try depositBitsTest(0x0, 0xff_ffffffff_ffffffff, 0x0); +} + +fn depositBitsTest(comptime source: comptime_int, comptime mask: comptime_int, comptime expected: comptime_int) !void { + var source_bigint = try Managed.initSet(testing.allocator, source); + defer source_bigint.deinit(); + var mask_bigint = try Managed.initSet(testing.allocator, mask); + defer mask_bigint.deinit(); + const limbs = try testing.allocator.alloc(Limb, mask_bigint.limbs.len); + defer testing.allocator.free(limbs); + var result = Mutable{ .limbs = limbs, .positive = undefined, .len = undefined }; + + result.depositBits(source_bigint.toConst(), mask_bigint.toConst()); + + try testing.expectEqual(std.math.Order.eq, result.toConst().orderAgainstScalar(expected)); +} + test "big int conversion read/write twos complement" { var a = try Managed.initSet(testing.allocator, (1 << 493) - 1); defer a.deinit(); diff --git a/lib/std/zig/AstGen.zig b/lib/std/zig/AstGen.zig index ccc870e36340..58c9df1a4515 100644 --- a/lib/std/zig/AstGen.zig +++ b/lib/std/zig/AstGen.zig @@ -9619,10 +9619,14 @@ fn builtinCall( return rvalue(gz, ri, result, node); }, - .add_with_overflow => return overflowArithmetic(gz, scope, ri, node, params, .add_with_overflow), - .sub_with_overflow => return overflowArithmetic(gz, scope, ri, node, params, .sub_with_overflow), - .mul_with_overflow => return overflowArithmetic(gz, scope, ri, node, params, .mul_with_overflow), - .shl_with_overflow => return overflowArithmetic(gz, scope, ri, node, params, .shl_with_overflow), + // zig fmt: off + .add_with_overflow => return extendedBinOp(gz, scope, ri, node, params, .add_with_overflow), + .sub_with_overflow => return extendedBinOp(gz, scope, ri, node, params, .sub_with_overflow), + .mul_with_overflow => return extendedBinOp(gz, scope, ri, node, params, .mul_with_overflow), + .shl_with_overflow => return extendedBinOp(gz, scope, ri, node, params, .shl_with_overflow), + .deposit_bits => return extendedBinOp(gz, scope, ri, node, params, .deposit_bits), + .extract_bits => return extendedBinOp(gz, scope, ri, node, params, .extract_bits), + // zig fmt: on .atomic_load => { const atomic_order_type = try gz.addBuiltinValue(node, .atomic_order); @@ -10077,7 +10081,7 @@ fn cImport( return block_inst.toRef(); } -fn overflowArithmetic( +fn extendedBinOp( gz: *GenZir, scope: *Scope, ri: ResultInfo, diff --git a/lib/std/zig/AstRlAnnotate.zig b/lib/std/zig/AstRlAnnotate.zig index d5fb0a8169cc..187ed8d76a17 100644 --- a/lib/std/zig/AstRlAnnotate.zig +++ b/lib/std/zig/AstRlAnnotate.zig @@ -1117,5 +1117,10 @@ fn builtinCall(astrl: *AstRlAnnotate, block: ?*Block, ri: ResultInfo, node: Ast. _ = try astrl.expr(args[4], block, ResultInfo.type_only); return false; }, + .deposit_bits, .extract_bits => { + _ = try astrl.expr(args[0], block, ResultInfo.none); + _ = try astrl.expr(args[1], block, ResultInfo.none); + return false; + }, } } diff --git a/lib/std/zig/BuiltinFn.zig b/lib/std/zig/BuiltinFn.zig index 1bf31cd165e9..e8cfbb39fd7e 100644 --- a/lib/std/zig/BuiltinFn.zig +++ b/lib/std/zig/BuiltinFn.zig @@ -36,6 +36,7 @@ pub const Tag = enum { c_va_copy, c_va_end, c_va_start, + deposit_bits, div_exact, div_floor, div_trunc, @@ -47,6 +48,7 @@ pub const Tag = enum { error_cast, @"export", @"extern", + extract_bits, field, field_parent_ptr, FieldType, @@ -412,6 +414,12 @@ pub const list = list: { .illegal_outside_function = true, }, }, + .{ + "@depositBits", .{ + .tag = .deposit_bits, + .param_count = 2, + }, + }, .{ "@divExact", .{ @@ -490,6 +498,13 @@ pub const list = list: { .param_count = 2, }, }, + .{ + "@extractBits", + .{ + .tag = .extract_bits, + .param_count = 2, + }, + }, .{ "@field", .{ diff --git a/lib/std/zig/Zir.zig b/lib/std/zig/Zir.zig index 089bc5e2aed8..57503e1742fa 100644 --- a/lib/std/zig/Zir.zig +++ b/lib/std/zig/Zir.zig @@ -2120,6 +2120,12 @@ pub const Inst = struct { /// This instruction is always `noreturn`, however, it is not considered as such by ZIR-level queries. This allows AstGen to assume that /// any code may have gone here, avoiding false-positive "unreachable code" errors. astgen_error, + /// Implements the `@depositBits` builtin. + /// `operand` is payload index to `BinNode`. + deposit_bits, + /// Implements the `@extractBits` builtin. + /// `operand` is payload index to `BinNode`. + extract_bits, pub const InstData = struct { opcode: Extended, @@ -4374,6 +4380,8 @@ fn findTrackableInner( .tuple_decl, .dbg_empty_stmt, .astgen_error, + .deposit_bits, + .extract_bits, => return, // `@TypeOf` has a body. diff --git a/lib/std/zig/llvm/Builder.zig b/lib/std/zig/llvm/Builder.zig index a71383016137..9a71522c60d6 100644 --- a/lib/std/zig/llvm/Builder.zig +++ b/lib/std/zig/llvm/Builder.zig @@ -2776,6 +2776,12 @@ pub const Intrinsic = enum { @"wasm.memory.size", @"wasm.memory.grow", + // x86 PDEP/PEXT + @"x86.bmi.pdep.32", + @"x86.bmi.pdep.64", + @"x86.bmi.pext.32", + @"x86.bmi.pext.64", + const Signature = struct { ret_len: u8, params: []const Parameter, @@ -4020,6 +4026,43 @@ pub const Intrinsic = enum { }, .attrs = &.{ .nocallback, .nofree, .nosync, .nounwind, .willreturn }, }, + + .@"x86.bmi.pext.32" = .{ + .ret_len = 1, + .params = &.{ + .{ .kind = .{ .type = .i32 } }, + .{ .kind = .{ .type = .i32 } }, + .{ .kind = .{ .type = .i32 } }, + }, + .attrs = &.{ .nocallback, .nofree, .nosync, .nounwind, .{ .memory = Attribute.Memory.all(.none) } }, + }, + .@"x86.bmi.pext.64" = .{ + .ret_len = 1, + .params = &.{ + .{ .kind = .{ .type = .i64 } }, + .{ .kind = .{ .type = .i64 } }, + .{ .kind = .{ .type = .i64 } }, + }, + .attrs = &.{ .nocallback, .nofree, .nosync, .nounwind, .{ .memory = Attribute.Memory.all(.none) } }, + }, + .@"x86.bmi.pdep.32" = .{ + .ret_len = 1, + .params = &.{ + .{ .kind = .{ .type = .i32 } }, + .{ .kind = .{ .type = .i32 } }, + .{ .kind = .{ .type = .i32 } }, + }, + .attrs = &.{ .nocallback, .nofree, .nosync, .nounwind, .{ .memory = Attribute.Memory.all(.none) } }, + }, + .@"x86.bmi.pdep.64" = .{ + .ret_len = 1, + .params = &.{ + .{ .kind = .{ .type = .i64 } }, + .{ .kind = .{ .type = .i64 } }, + .{ .kind = .{ .type = .i64 } }, + }, + .attrs = &.{ .nocallback, .nofree, .nosync, .nounwind, .{ .memory = Attribute.Memory.all(.none) } }, + }, }); }; diff --git a/lib/zig.h b/lib/zig.h index 2d9e7a562647..ec610ec58016 100644 --- a/lib/zig.h +++ b/lib/zig.h @@ -1381,6 +1381,92 @@ zig_builtin_clz(16) zig_builtin_clz(32) zig_builtin_clz(64) +#define zig_builtin_extract_bits(w) \ + static inline uint##w##_t zig_extract_bits_u##w(uint##w##_t source, uint##w##_t mask_) { \ + uint##w##_t bb = 1;\ + uint##w##_t result = 0;\ + uint##w##_t mask = mask_;\ + \ + while (mask != 0) {\ + uint##w##_t bit = mask & ~(mask - 1);\ + mask &= ~bit;\ + uint##w##_t source_bit = source & bit;\ + if (source_bit != 0) result |= bb;\ + bb += bb;\ + }\ + \ + return result;\ + } + +#if zig_has_builtin(ia32_pext_di) + static inline uint64_t zig_extract_bits_u64(uint64_t source, uint64_t mask) { + return __builtin_ia32_pext_di(source, mask); + } +#else +zig_builtin_extract_bits(64) +#endif + +#if zig_has_builtin(ia32_pext_si) + static inline uint32_t zig_extract_bits_u32(uint32_t source, uint32_t mask) { + return __builtin_ia32_pext_si(source, mask); + } + + static inline uint16_t zig_extract_bits_u16(uint16_t source, uint16_t mask) { + return (uint16_t)__builtin_ia32_pext_si(source, mask); + } + + static inline uint8_t zig_extract_bits_u8(uint8_t source, uint8_t mask) { + return (uint8_t)__builtin_ia32_pext_si(source, mask); + } +#else +zig_builtin_extract_bits(32) +zig_builtin_extract_bits(16) +zig_builtin_extract_bits(8) +#endif + +#define zig_builtin_deposit_bits(w) \ + static inline uint##w##_t zig_deposit_bits_u##w(uint##w##_t source, uint##w##_t mask_) { \ + uint##w##_t bb = 1;\ + uint##w##_t result = 0;\ + uint##w##_t mask = mask_;\ + \ + while (mask != 0) {\ + uint##w##_t bit = mask & ~(mask - 1);\ + mask &= ~bit;\ + uint##w##_t source_bit = source & bb;\ + if (source_bit != 0) result |= bit;\ + bb += bb;\ + }\ + \ + return result;\ + } + +#if zig_has_builtin(ia32_pext_di) + static inline uint64_t zig_deposit_bits_u64(uint64_t source, uint64_t mask) { + return __builtin_ia32_pdep_di(source, mask); + } +#else +zig_builtin_deposit_bits(64) +#endif + +#if zig_has_builtin(ia32_pext_si) + static inline uint32_t zig_deposit_bits_u32(uint32_t source, uint32_t mask) { + return __builtin_ia32_pdep_si(source, mask); + } + + static inline uint16_t zig_deposit_bits_u16(uint16_t source, uint16_t mask) { + return (uint16_t)__builtin_ia32_pdep_si(source, mask); + } + + static inline uint8_t zig_deposit_bits_u8(uint8_t source, uint8_t mask) { + return (uint8_t)__builtin_ia32_pdep_si(source, mask); + } +#else +zig_builtin_deposit_bits(32) +zig_builtin_deposit_bits(16) +zig_builtin_deposit_bits(8) +#endif + /* ======================== 128-bit Integer Support ========================= */ #if !defined(zig_has_int128) diff --git a/src/Air.zig b/src/Air.zig index 7afe2bfbb51f..d692e6fecc48 100644 --- a/src/Air.zig +++ b/src/Air.zig @@ -864,6 +864,13 @@ pub const Inst = struct { /// Operand is unused and set to Ref.none work_group_id, + /// Implements @depositBits builtin. + /// Uses the `bin_op` field. + deposit_bits, + /// Implements @extractBits builtin. + /// Uses the `bin_op` field. + extract_bits, + pub fn fromCmpOp(op: std.math.CompareOperator, optimized: bool) Tag { switch (op) { .lt => return if (optimized) .cmp_lt_optimized else .cmp_lt, @@ -1383,6 +1390,8 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool) .div_exact_optimized, .rem_optimized, .mod_optimized, + .deposit_bits, + .extract_bits, => return air.typeOf(datas[@intFromEnum(inst)].bin_op.lhs, ip), .sqrt, @@ -1863,6 +1872,8 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool { .work_item_id, .work_group_size, .work_group_id, + .deposit_bits, + .extract_bits, => false, .assembly => { diff --git a/src/Air/types_resolved.zig b/src/Air/types_resolved.zig index 79760ea24d30..0125824f85f9 100644 --- a/src/Air/types_resolved.zig +++ b/src/Air/types_resolved.zig @@ -87,6 +87,8 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool { .atomic_store_monotonic, .atomic_store_release, .atomic_store_seq_cst, + .deposit_bits, + .extract_bits, => { if (!checkRef(data.bin_op.lhs, zcu)) return false; if (!checkRef(data.bin_op.rhs, zcu)) return false; diff --git a/src/Liveness.zig b/src/Liveness.zig index 199e81b86b63..f8a72a29b55d 100644 --- a/src/Liveness.zig +++ b/src/Liveness.zig @@ -283,6 +283,8 @@ pub fn categorizeOperand( .cmp_gte_optimized, .cmp_gt_optimized, .cmp_neq_optimized, + .deposit_bits, + .extract_bits, => { const o = air_datas[@intFromEnum(inst)].bin_op; if (o.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none); @@ -936,6 +938,8 @@ fn analyzeInst( .memset, .memset_safe, .memcpy, + .deposit_bits, + .extract_bits, => { const o = inst_datas[@intFromEnum(inst)].bin_op; return analyzeOperands(a, pass, data, inst, .{ o.lhs, o.rhs, .none }); diff --git a/src/Liveness/Verify.zig b/src/Liveness/Verify.zig index a8cb81c51b9a..a2ca67f66c54 100644 --- a/src/Liveness/Verify.zig +++ b/src/Liveness/Verify.zig @@ -267,6 +267,8 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void { .memset, .memset_safe, .memcpy, + .deposit_bits, + .extract_bits, => { const bin_op = data[@intFromEnum(inst)].bin_op; try self.verifyInstOperands(inst, .{ bin_op.lhs, bin_op.rhs, .none }); diff --git a/src/Sema.zig b/src/Sema.zig index ad800c29a640..2f3c2f32937b 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -1391,6 +1391,8 @@ fn analyzeBodyInner( .work_group_id => try sema.zirWorkItem( block, extended, extended.opcode), .in_comptime => try sema.zirInComptime( block), .closure_get => try sema.zirClosureGet( block, extended), + .deposit_bits => try sema.zirDepositExtractBits(block, extended, .deposit_bits), + .extract_bits => try sema.zirDepositExtractBits(block, extended, .extract_bits), // zig fmt: on .set_float_mode => { @@ -26511,6 +26513,129 @@ fn zirBranchHint(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstDat } } +fn zirDepositExtractBits( + sema: *Sema, + block: *Block, + extended: Zir.Inst.Extended.InstData, + air_tag: Air.Inst.Tag, +) CompileError!Air.Inst.Ref { + const pt = sema.pt; + const zcu = pt.zcu; + + const extra = sema.code.extraData(Zir.Inst.BinNode, extended.operand).data; + const src = block.nodeOffset(extra.node); + + const lhs_src = block.builtinCallArgSrc(extra.node, 0); + const rhs_src = block.builtinCallArgSrc(extra.node, 1); + + const uncasted_lhs = try sema.resolveInst(extra.lhs); + const uncasted_rhs = try sema.resolveInst(extra.rhs); + + const lhs_ty = sema.typeOf(uncasted_lhs); + const rhs_ty = sema.typeOf(uncasted_rhs); + + if (!lhs_ty.isUnsignedInt(zcu) and lhs_ty.zigTypeTag(zcu) != .comptime_int) { + return sema.fail(block, lhs_src, "expected unsigned integer or 'comptime_int', found '{}'", .{lhs_ty.fmt(pt)}); + } + + if (!rhs_ty.isUnsignedInt(zcu) and rhs_ty.zigTypeTag(zcu) != .comptime_int) { + return sema.fail(block, rhs_src, "expected unsigned integer or 'comptime_int', found '{}'", .{rhs_ty.fmt(pt)}); + } + + const instructions = &[_]Air.Inst.Ref{ uncasted_lhs, uncasted_rhs }; + const dest_ty = try sema.resolvePeerTypes(block, src, instructions, .{ + .override = &[_]?LazySrcLoc{ lhs_src, rhs_src }, + }); + + const builtin_name = switch (air_tag) { + .deposit_bits => "@depositBits", + .extract_bits => "@extractBits", + else => unreachable, + }; + + // Coercion errors are intercepted to add a note if the caller is attempting to pass a negative comptime_int + const lhs = sema.coerce(block, dest_ty, uncasted_lhs, lhs_src) catch |err| switch (err) { + error.AnalysisFail => { + const msg = sema.err orelse return err; + const val = (try sema.resolveValue(uncasted_lhs)).?; + if (val.orderAgainstZero(zcu) == .lt) { + try sema.errNote(src, msg, "parameters to {s} must be non-negative", .{builtin_name}); + } + return err; + }, + else => return err, + }; + + const rhs = sema.coerce(block, dest_ty, uncasted_rhs, rhs_src) catch |err| switch (err) { + error.AnalysisFail => { + const msg = sema.err orelse return err; + const val = (try sema.resolveValue(uncasted_rhs)).?; + if (val.orderAgainstZero(zcu) == .lt) { + try sema.errNote(src, msg, "parameters to {s} must be non-negative", .{builtin_name}); + } + return err; + }, + else => return err, + }; + + const maybe_lhs_val = try sema.resolveValue(lhs); + const maybe_rhs_val = try sema.resolveValue(rhs); + + // We check for negative values here only if the type is a comptime_int, as negative values + // would have otherwise been filtered out by coercion and the unsigned type restriction + if (dest_ty.zigTypeTag(zcu) == .comptime_int) { + if (maybe_lhs_val) |lhs_val| { + if (!lhs_val.isUndef(zcu) and lhs_val.orderAgainstZero(zcu) == .lt) { + const err = try sema.errMsg(lhs_src, "use of negative value '{}'", .{lhs_val.fmtValue(pt)}); + try sema.errNote(src, err, "parameters to {s} must be non-negative", .{builtin_name}); + return sema.failWithOwnedErrorMsg(block, err); + } + } + + if (maybe_rhs_val) |rhs_val| { + if (!rhs_val.isUndef(zcu) and rhs_val.orderAgainstZero(zcu) == .lt) { + const err = try sema.errMsg(rhs_src, "use of negative value '{}'", .{rhs_val.fmtValue(pt)}); + try sema.errNote(src, err, "parameters to {s} must be non-negative", .{builtin_name}); + return sema.failWithOwnedErrorMsg(block, err); + } + } + } + + // If either of the operands are zero, the result is zero + // If either of the operands are undefined, the result is undefined + if (maybe_lhs_val) |lhs_val| { + if (lhs_val.orderAgainstZero(zcu) == .eq) return Air.internedToRef((try pt.intValue(dest_ty, 0)).toIntern()); + if (lhs_val.isUndef(zcu)) return try pt.undefRef(dest_ty); + } + if (maybe_rhs_val) |rhs_val| { + if (rhs_val.orderAgainstZero(zcu) == .eq) return Air.internedToRef((try pt.intValue(dest_ty, 0)).toIntern()); + if (rhs_val.isUndef(zcu)) return try pt.undefRef(dest_ty); + } + + if (maybe_lhs_val) |lhs_val| { + if (maybe_rhs_val) |rhs_val| { + const dest_val = switch (air_tag) { + .deposit_bits => try sema.intDepositBits(lhs_val, rhs_val, dest_ty), + .extract_bits => try sema.intExtractBits(lhs_val, rhs_val, dest_ty), + else => unreachable, + }; + + return Air.internedToRef(dest_val.toIntern()); + } + } + + const runtime_src = if (maybe_lhs_val == null) lhs_src else rhs_src; + try sema.requireRuntimeBlock(block, src, runtime_src); + + return block.addInst(.{ + .tag = air_tag, + .data = .{ .bin_op = .{ + .lhs = lhs, + .rhs = rhs, + } }, + }); +} + fn requireRuntimeBlock(sema: *Sema, block: *Block, src: LazySrcLoc, runtime_src: ?LazySrcLoc) !void { if (block.isComptime()) { const msg, const fail_block = msg: { @@ -37105,6 +37230,64 @@ fn enumHasInt(sema: *Sema, ty: Type, int: Value) CompileError!bool { return enum_type.tagValueIndex(&zcu.intern_pool, int_coerced.toIntern()) != null; } +/// Asserts that the values are positive +fn intDepositBits( + sema: *Sema, + lhs: Value, + rhs: Value, + ty: Type, +) !Value { + // TODO is this a performance issue? maybe we should try the operation without + // resorting to BigInt first. + const pt = sema.pt; + const zcu = pt.zcu; + const arena = sema.arena; + + var lhs_space: Value.BigIntSpace = undefined; + var rhs_space: Value.BigIntSpace = undefined; + const source = lhs.toBigInt(&lhs_space, zcu); + const mask = rhs.toBigInt(&rhs_space, zcu); + + const result_limbs = try arena.alloc( + std.math.big.Limb, + mask.limbs.len, + ); + + var result = std.math.big.int.Mutable{ .limbs = result_limbs, .positive = undefined, .len = undefined }; + + result.depositBits(source, mask); + return try pt.intValue_big(ty, result.toConst()); +} + +/// Asserts that the values are positive +fn intExtractBits( + sema: *Sema, + lhs: Value, + rhs: Value, + ty: Type, +) !Value { + // TODO is this a performance issue? maybe we should try the operation without + // resorting to BigInt first. + const pt = sema.pt; + const zcu = pt.zcu; + const arena = sema.arena; + + var lhs_space: Value.BigIntSpace = undefined; + var rhs_space: Value.BigIntSpace = undefined; + const source = lhs.toBigInt(&lhs_space, zcu); + const mask = rhs.toBigInt(&rhs_space, zcu); + + const result_limbs = try arena.alloc( + std.math.big.Limb, + mask.limbs.len, + ); + + var result = std.math.big.int.Mutable{ .limbs = result_limbs, .positive = undefined, .len = undefined }; + + result.extractBits(source, mask); + return try pt.intValue_big(ty, result.toConst()); +} + /// Asserts the values are comparable. Both operands have type `ty`. /// For vectors, returns true if the comparison is true for ALL elements. /// diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index f7ee7eb064a9..ddf19c746401 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -887,6 +887,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .work_item_id => unreachable, .work_group_size => unreachable, .work_group_id => unreachable, + + .deposit_bits => return self.fail("TODO implement deposit_bits", .{}), + .extract_bits => return self.fail("TODO implement extract_bits", .{}), // zig fmt: on } diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig index 8a1600f50ace..5cb7501772e9 100644 --- a/src/arch/arm/CodeGen.zig +++ b/src/arch/arm/CodeGen.zig @@ -876,6 +876,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .work_item_id => unreachable, .work_group_size => unreachable, .work_group_id => unreachable, + + .deposit_bits => return self.fail("TODO implement deposit_bits", .{}), + .extract_bits => return self.fail("TODO implement extract_bits", .{}), // zig fmt: on } diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig index c66de9dd30af..7ab55ff1511d 100644 --- a/src/arch/riscv64/CodeGen.zig +++ b/src/arch/riscv64/CodeGen.zig @@ -1700,6 +1700,9 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void { .work_item_id => unreachable, .work_group_size => unreachable, .work_group_id => unreachable, + + .deposit_bits => return func.fail("TODO implement deposit_bits", .{}), + .extract_bits => return func.fail("TODO implement extract_bits", .{}), // zig fmt: on } diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig index 58cd78a7d2d9..d24b53bc944b 100644 --- a/src/arch/sparc64/CodeGen.zig +++ b/src/arch/sparc64/CodeGen.zig @@ -730,6 +730,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .work_item_id => unreachable, .work_group_size => unreachable, .work_group_id => unreachable, + + .deposit_bits => @panic("TODO implement deposit_bits"), + .extract_bits => @panic("TODO implement extract_bits"), // zig fmt: on } diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index 641347bee150..6f96f6d5b840 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -2089,6 +2089,10 @@ fn genInst(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { .work_group_size, .work_group_id, => unreachable, + + .deposit_bits, + .extract_bits, + => |tag| return cg.fail("TODO implement {s}", .{@tagName(tag)}), }; } diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index e85af0d0eefd..99c298a9b86f 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -85897,6 +85897,346 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { .c_va_end => try cg.airVaEnd(inst), .c_va_start => try cg.airVaStart(inst), .work_item_id, .work_group_size, .work_group_id => unreachable, + + .deposit_bits, + .extract_bits, + => |tag| { + const bin_op = air_datas[@intFromEnum(inst)].bin_op; + var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs }); + var res: [1]Temp = undefined; + cg.select(&res, &.{cg.typeOf(bin_op.lhs)}, &ops, switch (@as(Mir.Inst.Tag, switch (tag) { + .deposit_bits => .pdep, + .extract_bits => .pext, + else => unreachable, + })) { + inline .pdep, .pext => |mir_tag| comptime &.{ + .{ + .required_features = .{ .bmi2, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .mem, .none } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .rc = .general_purpose, .ref = .src1 } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .src1b, ._, ._ }, + .{ ._, ._, mir_tag, .dst0d, .src0d, .dst0d, ._ }, + } }, + }, + .{ + .required_features = .{ .bmi2, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .none } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .rc = .general_purpose, .ref = .src1 } }, .unused }, + .each = .{ + .once = if (mir_tag == .pext) &.{ + .{ ._, ._, .movzx, .dst0d, .src1b, ._, ._ }, + .{ ._, ._, mir_tag, .dst0d, .src0d, .dst0d, ._ }, + } else &.{ + .{ ._, ._, mir_tag, .dst0d, .src0d, .src1d, ._ }, + }, + }, + }, + .{ + .required_features = .{ .bmi2, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .mem, .none } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .rc = .general_purpose, .ref = .src1 } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .dst0d, .src1w, ._, ._ }, + .{ ._, ._, mir_tag, .dst0d, .src0d, .dst0d, ._ }, + } }, + }, + .{ + .required_features = .{ .bmi2, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .to_gpr, .none } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .rc = .general_purpose, .ref = .src1 } }, .unused }, + .each = .{ + .once = if (mir_tag == .pext) &.{ + .{ ._, ._, .movzx, .dst0d, .src1w, ._, ._ }, + .{ ._, ._, mir_tag, .dst0d, .src0d, .dst0d, ._ }, + } else &.{ + .{ ._, ._, mir_tag, .dst0d, .src0d, .src1d, ._ }, + }, + }, + }, + .{ + .required_features = .{ .bmi2, null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .mem, .none } }, + .{ .src = .{ .to_gpr, .to_gpr, .none } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .rc = .general_purpose, .ref = .src0 } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, mir_tag, .dst0d, .src0d, .src1d, ._ }, + } }, + }, + .{ + .required_features = .{ .bmi2, .@"64bit", null, null }, + .src_constraints = .{ .{ .unsigned_int = .qword }, .{ .unsigned_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ .to_gpr, .mem, .none } }, + .{ .src = .{ .to_gpr, .to_gpr, .none } }, + }, + .dst_temps = .{ .{ .mut_rc = .{ .rc = .general_purpose, .ref = .src0 } }, .unused }, + .each = .{ .once = &.{ + .{ ._, ._, mir_tag, .dst0q, .src0q, .src1q, ._ }, + } }, + }, + .{ + .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any }, + .patterns = &.{.{ .src = .{ .{ .to_param_gpr = .{ .cc = .ccc, .index = 0 } }, .mem, .none } }}, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__" ++ @tagName(mir_tag) ++ "_" ++ "u32" } } }, + .{ .type = .u32, .kind = .{ .param_gpr = .{ .cc = .ccc, .index = 1 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_gpr = .{ .cc = .ccc, .index = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp1d, .src1b, ._, ._ }, + .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + } }, + }, + .{ + .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any }, + .patterns = &.{.{ .src = .{ + .{ .to_param_gpr = .{ .cc = .ccc, .index = 0 } }, + .{ .to_param_gpr = .{ .cc = .ccc, .index = 1 } }, + .none, + } }}, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__" ++ @tagName(mir_tag) ++ "_" ++ "u32" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_gpr = .{ .cc = .ccc, .index = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ + .once = if (mir_tag == .pext) &.{ + .{ ._, ._, .movzx, .src1d, .src1b, ._, ._ }, + .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + } else &.{ + .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + }, + }, + }, + .{ + .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any }, + .patterns = &.{.{ .src = .{ .{ .to_param_gpr = .{ .cc = .ccc, .index = 0 } }, .mem, .none } }}, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__" ++ @tagName(mir_tag) ++ "_" ++ "u32" } } }, + .{ .type = .u32, .kind = .{ .param_gpr = .{ .cc = .ccc, .index = 1 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_gpr = .{ .cc = .ccc, .index = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .movzx, .tmp1d, .src1w, ._, ._ }, + .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + } }, + }, + .{ + .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any }, + .patterns = &.{.{ .src = .{ + .{ .to_param_gpr = .{ .cc = .ccc, .index = 0 } }, + .{ .to_param_gpr = .{ .cc = .ccc, .index = 1 } }, + .none, + } }}, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__" ++ @tagName(mir_tag) ++ "_" ++ "u32" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_gpr = .{ .cc = .ccc, .index = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ + .once = if (mir_tag == .pext) &.{ + .{ ._, ._, .movzx, .src1d, .src1w, ._, ._ }, + .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + } else &.{ + .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + }, + }, + }, + .{ + .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword }, .any }, + .patterns = &.{ + .{ .src = .{ + .{ .to_param_gpr = .{ .cc = .ccc, .index = 0 } }, + .{ .to_param_gpr = .{ .cc = .ccc, .index = 1 } }, + .none, + } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__" ++ @tagName(mir_tag) ++ "_" ++ "u32" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_gpr = .{ .cc = .ccc, .index = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + } }, + }, + .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .qword }, .{ .unsigned_int = .qword }, .any }, + .patterns = &.{ + .{ .src = .{ + .{ .to_param_gpr = .{ .cc = .ccc, .index = 0 } }, + .{ .to_param_gpr = .{ .cc = .ccc, .index = 1 } }, + .none, + } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__" ++ @tagName(mir_tag) ++ "_" ++ "u64" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_gpr = .{ .cc = .ccc, .index = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + } }, + }, + .{ + .required_features = .{ .@"64bit", null, null, null }, + .src_constraints = .{ .{ .unsigned_int = .xword }, .{ .unsigned_int = .xword }, .any }, + .patterns = &.{ + .{ .src = .{ + .{ .to_param_gpr_pair = .{ .cc = .ccc, .index = 0 } }, + .{ .to_param_gpr_pair = .{ .cc = .ccc, .index = 2 } }, + .none, + } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__" ++ @tagName(mir_tag) ++ "_" ++ "u128" } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .{ .ret_gpr_pair = .{ .cc = .ccc, .index = 0 } }, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + } }, + }, + .{ + .src_constraints = .{ + .{ .remainder_unsigned_int = .{ .of = .dword, .is = .dword } }, + .{ .remainder_unsigned_int = .{ .of = .dword, .is = .dword } }, + .any, + }, + .patterns = &.{ + .{ .src = .{ .to_mem, .to_mem, .none } }, + }, + .call_frame = .{ .alignment = .@"16" }, + .extra_temps = .{ + .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__" ++ @tagName(mir_tag) ++ "_" ++ "bigint" } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .index = 0 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .index = 1 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .index = 2 } } }, + .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .index = 3 } } }, + .unused, + .unused, + .unused, + .unused, + .unused, + .unused, + }, + .dst_temps = .{ .mem, .unused }, + .clobbers = .{ .eflags = true, .caller_preserved = .ccc }, + .each = .{ .once = &.{ + .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ }, + .{ ._, ._, .lea, .tmp2p, .mem(.src0), ._, ._ }, + .{ ._, ._, .lea, .tmp3p, .mem(.src1), ._, ._ }, + .{ ._, ._, .mov, .tmp4d, .sa(.src0, .add_bit_size), ._, ._ }, + .{ ._, ._, .call, .tmp0d, ._, ._, ._ }, + } }, + }, + }, + else => unreachable, + }) catch |err| switch (err) { + error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{ + @tagName(tag), + cg.typeOf(bin_op.lhs).fmt(pt), + ops[0].tracking(cg), + ops[1].tracking(cg), + }), + else => |e| return e, + }; + try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg); + }, } try cg.resetTemps(@enumFromInt(0)); cg.checkInvariantsAfterAirInst(); diff --git a/src/codegen/c.zig b/src/codegen/c.zig index 2aee078b110f..1025f3f23d1d 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -3496,6 +3496,8 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, .call_never_tail => try airCall(f, inst, .never_tail), .call_never_inline => try airCall(f, inst, .never_inline), + .deposit_bits => try airDepositExtractBits(f, inst, "deposit_bits"), + .extract_bits => try airDepositExtractBits(f, inst, "extract_bits"), // zig fmt: on }; if (result_value == .new_local) { @@ -7690,6 +7692,19 @@ fn airCVaCopy(f: *Function, inst: Air.Inst.Index) !CValue { return local; } +fn airDepositExtractBits(f: *Function, inst: Air.Inst.Index, operation: []const u8) !CValue { + const bin_op = f.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + const ty = f.typeOf(bin_op.lhs); + + const ctype = try f.ctypeFromType(ty, .complete); + const is_big = ctype.info(&f.object.dg.ctype_pool) == .array; + + if (is_big) return f.fail("TODO: C backend: implement @{{deposit,extract}}Bits for bigints", .{}); + if (f.byteSize(ctype) > 8) return f.fail("TODO: C backend: implement @{{deposit,extract}}Bits for u128", .{}); + + return try airBinBuiltinCall(f, inst, operation, .none); +} + fn toMemoryOrder(order: std.builtin.AtomicOrder) [:0]const u8 { return switch (order) { // Note: unordered is actually even less atomic than relaxed diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 06df4232516c..98e03990b8d7 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -5053,6 +5053,8 @@ pub const FuncGen = struct { break :res res; }, + .deposit_bits, + .extract_bits => |tag| try self.airDepositExtractBits(inst, tag), // zig fmt: on }; if (val != .none) try self.func_inst_table.putNoClobber(self.gpa, inst.toRef(), val); @@ -10802,6 +10804,166 @@ pub const FuncGen = struct { }; } + fn airDepositExtractBits(self: *FuncGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !Builder.Value { + if (self.liveness.isUnused(inst)) return .none; + + const o = self.ng.object; + + const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + const source = try self.resolveInst(bin_op.lhs); + const mask = try self.resolveInst(bin_op.rhs); + const inst_ty = self.typeOfIndex(inst); + + const target = o.pt.zcu.getTarget(); + + const llvm_ty = try o.lowerType(inst_ty); + const bits: u16 = @intCast(llvm_ty.scalarBits(&o.builder)); + + switch (target.cpu.arch) { + .x86, .x86_64 => |arch| blk: { + // Doesn't have pdep + if (!std.Target.x86.featureSetHas(target.cpu.features, .bmi2)) break :blk; + + const supports_64 = arch == .x86_64; + // Integer size doesn't match the available instruction(s) + if (!(bits <= 32 or (bits <= 64 and supports_64))) break :blk; + + const compiler_rt_bits = compilerRtIntBits(bits); + + var buf: ["x86.bmi.pdep.32".len]u8 = undefined; + const intrinsic = std.meta.stringToEnum(Builder.Intrinsic, std.fmt.bufPrint(&buf, "x86.bmi.{s}.{d}", .{ + switch (tag) { + .deposit_bits => "pdep", + .extract_bits => "pext", + else => unreachable, + }, + compiler_rt_bits, + }) catch unreachable).?; + + const needs_extend = bits != compiler_rt_bits; + const extended_ty = if (needs_extend) try o.builder.intType(compiler_rt_bits) else llvm_ty; + + const params = .{ + if (needs_extend) try self.wip.cast(.zext, source, extended_ty, "") else source, + if (needs_extend) try self.wip.cast(.zext, mask, extended_ty, "") else mask, + }; + + const result = try self.wip.callIntrinsic( + .normal, + .none, + intrinsic, + &.{}, + ¶ms, + "", + ); + + return if (needs_extend) try self.wip.cast(.trunc, result, llvm_ty, "") else result; + }, + else => {}, + } + + return try self.genDepositExtractBitsEmulated(tag, bits, source, mask, llvm_ty); + } + + fn genDepositExtractBitsEmulated(self: *FuncGen, tag: Air.Inst.Tag, bits: u16, source: Builder.Value, mask: Builder.Value, ty: Builder.Type) !Builder.Value { + const o = self.ng.object; + const zcu = o.pt.zcu; + + if (bits <= 128) { + const rt_int_bits = compilerRtIntBits(bits); + const needs_extend = bits != rt_int_bits; + const rt_int_ty = try o.builder.intType(rt_int_bits); + + const fn_name = try o.builder.strtabStringFmt("__{s}_u{d}", .{ + switch (tag) { + .deposit_bits => "pdep", + .extract_bits => "pext", + else => unreachable, + }, + rt_int_bits, + }); + + var extended_source = try self.wip.conv(.unsigned, source, rt_int_ty, ""); + var extended_mask = try self.wip.conv(.unsigned, mask, rt_int_ty, ""); + + var param_ty = rt_int_ty; + if (rt_int_bits == 128 and (o.target.os.tag == .windows and o.target.cpu.arch == .x86_64)) { + // On Windows x86_64, we expect i128 to be passed in an 2xi64 for both parameters and + // the return type. + param_ty = try o.builder.vectorType(.normal, 2, .i64); + extended_source = try self.wip.cast(.bitcast, extended_source, param_ty, ""); + extended_mask = try self.wip.cast(.bitcast, extended_mask, param_ty, ""); + } + + const libc_fn = try self.getLibcFunction(fn_name, &.{ param_ty, param_ty }, param_ty); + var result = try self.wip.call( + .normal, + .ccc, + .none, + libc_fn.typeOf(&o.builder), + libc_fn.toValue(&o.builder), + &.{ extended_source, extended_mask }, + "", + ); + + if (param_ty != rt_int_ty) result = try self.wip.cast(.bitcast, result, rt_int_ty, ""); + if (needs_extend) result = try self.wip.cast(.trunc, result, ty, ""); + return result; + } + + // Rounded bits to the nearest 32, as limb size is 32. + const extended_bits = (((bits - 1) / 32) + 1) * 32; + const needs_extend = bits != extended_bits; + const extended_ty = if (needs_extend) try o.builder.intType(extended_bits) else ty; + + const source_extended = if (needs_extend) try self.wip.cast(.zext, source, extended_ty, "") else source; + const mask_extended = if (needs_extend) try self.wip.cast(.zext, mask, extended_ty, "") else mask; + const zeroes_extended = try o.builder.intValue(extended_ty, 0); + + const alignment = Type.u32.abiAlignment(zcu).toLlvm(); + + const source_pointer = try self.buildAlloca(extended_ty, alignment); + const mask_pointer = try self.buildAlloca(extended_ty, alignment); + const result_pointer = try self.buildAlloca(extended_ty, alignment); + + _ = try self.wip.store(.normal, source_extended, source_pointer, alignment); + _ = try self.wip.store(.normal, mask_extended, mask_pointer, alignment); + _ = try self.wip.store(.normal, zeroes_extended, result_pointer, alignment); + + const fn_name = try o.builder.strtabStringFmt("__{s}_bigint", .{switch (tag) { + .deposit_bits => "pdep", + .extract_bits => "pext", + else => unreachable, + }}); + + const pointer_ty = source_pointer.typeOfWip(&self.wip); + const usize_ty = try o.lowerType(Type.usize); + const void_ty = try o.lowerType(Type.void); + + const bits_value = try o.builder.intValue(usize_ty, bits); + + const params = .{ + result_pointer, + source_pointer, + mask_pointer, + bits_value, + }; + + const libc_fn = try self.getLibcFunction(fn_name, &.{ pointer_ty, pointer_ty, pointer_ty, usize_ty }, void_ty); + _ = try self.wip.call( + .normal, + .ccc, + .none, + libc_fn.typeOf(&o.builder), + libc_fn.toValue(&o.builder), + ¶ms, + "", + ); + + const result = try self.wip.load(.normal, extended_ty, result_pointer, alignment, ""); + return if (needs_extend) try self.wip.cast(.trunc, result, ty, "") else result; + } + fn getErrorNameTable(self: *FuncGen) Allocator.Error!Builder.Variable.Index { const o = self.ng.object; const pt = o.pt; diff --git a/src/print_air.zig b/src/print_air.zig index 73e075a0b216..949481654360 100644 --- a/src/print_air.zig +++ b/src/print_air.zig @@ -162,6 +162,8 @@ const Writer = struct { .memcpy, .memset, .memset_safe, + .deposit_bits, + .extract_bits, => try w.writeBinOp(s, inst), .is_null, diff --git a/src/print_zir.zig b/src/print_zir.zig index 08e1a30368ea..b56f63d9ecd4 100644 --- a/src/print_zir.zig +++ b/src/print_zir.zig @@ -600,6 +600,8 @@ const Writer = struct { .wasm_memory_grow, .prefetch, .c_va_arg, + .deposit_bits, + .extract_bits, => { const inst_data = self.code.extraData(Zir.Inst.BinNode, extended.operand).data; try self.writeInstRef(stream, inst_data.lhs); diff --git a/stage1/zig.h b/stage1/zig.h index 2d9e7a562647..ec610ec58016 100644 --- a/stage1/zig.h +++ b/stage1/zig.h @@ -1381,6 +1381,92 @@ zig_builtin_clz(16) zig_builtin_clz(32) zig_builtin_clz(64) +#define zig_builtin_extract_bits(w) \ + static inline uint##w##_t zig_extract_bits_u##w(uint##w##_t source, uint##w##_t mask_) { \ + uint##w##_t bb = 1;\ + uint##w##_t result = 0;\ + uint##w##_t mask = mask_;\ + \ + while (mask != 0) {\ + uint##w##_t bit = mask & ~(mask - 1);\ + mask &= ~bit;\ + uint##w##_t source_bit = source & bit;\ + if (source_bit != 0) result |= bb;\ + bb += bb;\ + }\ + \ + return result;\ + } + +#if zig_has_builtin(ia32_pext_di) + static inline uint64_t zig_extract_bits_u64(uint64_t source, uint64_t mask) { + return __builtin_ia32_pext_di(source, mask); + } +#else +zig_builtin_extract_bits(64) +#endif + +#if zig_has_builtin(ia32_pext_si) + static inline uint32_t zig_extract_bits_u32(uint32_t source, uint32_t mask) { + return __builtin_ia32_pext_si(source, mask); + } + + static inline uint16_t zig_extract_bits_u16(uint16_t source, uint16_t mask) { + return (uint16_t)__builtin_ia32_pext_si(source, mask); + } + + static inline uint8_t zig_extract_bits_u8(uint8_t source, uint8_t mask) { + return (uint8_t)__builtin_ia32_pext_si(source, mask); + } +#else +zig_builtin_extract_bits(32) +zig_builtin_extract_bits(16) +zig_builtin_extract_bits(8) +#endif + +#define zig_builtin_deposit_bits(w) \ + static inline uint##w##_t zig_deposit_bits_u##w(uint##w##_t source, uint##w##_t mask_) { \ + uint##w##_t bb = 1;\ + uint##w##_t result = 0;\ + uint##w##_t mask = mask_;\ + \ + while (mask != 0) {\ + uint##w##_t bit = mask & ~(mask - 1);\ + mask &= ~bit;\ + uint##w##_t source_bit = source & bb;\ + if (source_bit != 0) result |= bit;\ + bb += bb;\ + }\ + \ + return result;\ + } + +#if zig_has_builtin(ia32_pext_di) + static inline uint64_t zig_deposit_bits_u64(uint64_t source, uint64_t mask) { + return __builtin_ia32_pdep_di(source, mask); + } +#else +zig_builtin_deposit_bits(64) +#endif + +#if zig_has_builtin(ia32_pext_si) + static inline uint32_t zig_deposit_bits_u32(uint32_t source, uint32_t mask) { + return __builtin_ia32_pdep_si(source, mask); + } + + static inline uint16_t zig_deposit_bits_u16(uint16_t source, uint16_t mask) { + return (uint16_t)__builtin_ia32_pdep_si(source, mask); + } + + static inline uint8_t zig_deposit_bits_u8(uint8_t source, uint8_t mask) { + return (uint8_t)__builtin_ia32_pdep_si(source, mask); + } +#else +zig_builtin_deposit_bits(32) +zig_builtin_deposit_bits(16) +zig_builtin_deposit_bits(8) +#endif + /* ======================== 128-bit Integer Support ========================= */ #if !defined(zig_has_int128) diff --git a/stage1/zig1.wasm b/stage1/zig1.wasm index 2eb55170d047..9c39fe885e71 100644 Binary files a/stage1/zig1.wasm and b/stage1/zig1.wasm differ diff --git a/test/behavior.zig b/test/behavior.zig index 8006d8364d13..52ee5a2b47e4 100644 --- a/test/behavior.zig +++ b/test/behavior.zig @@ -23,9 +23,10 @@ test { _ = @import("behavior/const_slice_child.zig"); _ = @import("behavior/decl_literals.zig"); _ = @import("behavior/decltest.zig"); - _ = @import("behavior/duplicated_test_names.zig"); _ = @import("behavior/defer.zig"); + _ = @import("behavior/deposit_extract_bits.zig"); _ = @import("behavior/destructure.zig"); + _ = @import("behavior/duplicated_test_names.zig"); _ = @import("behavior/empty_union.zig"); _ = @import("behavior/enum.zig"); _ = @import("behavior/error.zig"); diff --git a/test/behavior/deposit_extract_bits.zig b/test/behavior/deposit_extract_bits.zig new file mode 100644 index 000000000000..b8791796a134 --- /dev/null +++ b/test/behavior/deposit_extract_bits.zig @@ -0,0 +1,211 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const mem = std.mem; +const expect = std.testing.expect; +const expectEqual = std.testing.expectEqual; + +const supports_pext_pdep = switch (builtin.zig_backend) { + .stage2_llvm, .stage2_c => true, + .stage2_x86_64 => builtin.target.os.tag != .windows, + else => false, +}; + +test "@depositBits u64" { + if (!supports_pext_pdep) return error.SkipZigTest; // TODO + + const S = struct { + pub fn doTheTest() !void { + var a: u64 = 0; + var b: u64 = 0xFFFF_FFFF_FFFF_FFFF; + var c: u64 = 0x1234_5678_9012_3456; + var d: u64 = 0x00F0_FF00_F00F_00FF; + + _ = &a; + _ = &b; + _ = &c; + _ = &d; + + try expect(@depositBits(b, a) == 0); + try expect(@depositBits(a, b) == 0); + + try expect(@depositBits(b, c) == c); + try expect(@depositBits(b, d) == d); + + try expect(@depositBits(c, d) == 0x0000_1200_3004_0056); + } + }; + + try S.doTheTest(); + try comptime S.doTheTest(); +} + +test "@depositBits u128" { + if (!supports_pext_pdep) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO #19991 + + const S = struct { + pub fn doTheTest() !void { + var a: u64 = 0x1234_5678_9012_3456; + var b: u128 = 0x00F0_FF00_F00F_00FF << 64; + + _ = &a; + _ = &b; + + try expect(@depositBits(a, b) == 0x0000_1200_3004_0056 << 64); + } + }; + + try S.doTheTest(); + try comptime S.doTheTest(); +} + +test "@depositBits u256" { + if (!supports_pext_pdep) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO #19991 + + const S = struct { + pub fn doTheTest() !void { + var a: u64 = 0x1234_5678_9ABC_DEF0; + var b: u256 = 0x0F00_0FF0_0F0F_FF00 << 174; + + _ = &a; + _ = &b; + + try expect(@depositBits(a, b) == 0x0A00_0BC0_0D0E_F000 << 174); + } + }; + + try S.doTheTest(); + try comptime S.doTheTest(); +} + +test "@extractBits u64" { + if (!supports_pext_pdep) return error.SkipZigTest; // TODO + + const S = struct { + pub fn doTheTest() !void { + var a: u64 = 0; + var b: u64 = 0xFFFF_FFFF_FFFF_FFFF; + var c: u64 = 0x1234_5678_9012_3456; + var d: u64 = 0x00F0_FF00_F00F_00FF; + + _ = &a; + _ = &b; + _ = &c; + _ = &d; + + try expect(@extractBits(b, a) == 0); + try expect(@extractBits(a, b) == 0); + + try expect(@extractBits(c, b) == c); + try expect(@extractBits(d, b) == d); + + try expect(@extractBits(c, d) == 0x0356_9256); + } + }; + + try S.doTheTest(); + try comptime S.doTheTest(); +} + +test "@extractBits u128" { + if (!supports_pext_pdep) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO #19991 + + const S = struct { + pub fn doTheTest() !void { + var a: u128 = 0x1234_5678_9012_3456 << 64; + var b: u128 = 0x00F0_FF00_F00F_00FF << 64; + + _ = &a; + _ = &b; + + try expect(@extractBits(a, b) == 0x0356_9256); + } + }; + + try S.doTheTest(); + try comptime S.doTheTest(); +} + +test "@extractBits u256" { + if (!supports_pext_pdep) return error.SkipZigTest; // TODO + if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO #19991 + + const S = struct { + pub fn doTheTest() !void { + var a: u256 = 0x1234_5678_9ABC_DEF0 << 96; + var b: u256 = 0x0F00_0FF0_0F0F_FF00 << 96; + + _ = &a; + _ = &b; + + try expect(@extractBits(a, b) == 0x0267_ACDE); + } + }; + + try S.doTheTest(); + try comptime S.doTheTest(); +} + +test "@depositBits" { + if (!supports_pext_pdep) return error.SkipZigTest; // TODO + + const S = struct { + pub fn doTheTest() !void { + try expectDepositBits(u5, 0xc, 0x0, 0x0); + try expectDepositBits(u8, 0x34, 0x3e, 0x28); + try expectDepositBits(u12, 0x8d1, 0x3ff, 0xd1); + try expectDepositBits(u16, 0x71bf, 0x3af1, 0x32f1); + try expectDepositBits(u32, 0x3bae5063, 0x7b17b132, 0x1200a012); + try expectDepositBits(u48, 0x434aa15ff2fa, 0xce370a6c311, 0xce34086c210); + try expectDepositBits(u64, 0x8361fc9b827793a6, 0xe67fcd567987eee6, 0x425c041639026a24); + + if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO #19991 + + try expectDepositBits(u97, 0x171f755a01d485c4c34c18c81, 0xac06c853b200585f371570eb, 0x80044800a200084030142001); + try expectDepositBits(u128, 0xb7be70a644ee77116f7265b2a4b95a8b, 0x6c3396ebe8de95f9eaf62d08b2c3cb56, 0x80292e38818856148442c0090c14046); + try expectDepositBits(u185, 0x4a0774246e045222bb0ed34d184b1bbde1fc99c9ca0e89, 0x1b91d49bb592ec503cce5e517e87137fff828329d15be8f, 0x11811410a0104c0018ca5a510687104ce4828021001a809); + try expectDepositBits(u256, 0x43837440edafe142bd5b2f022f8a05d596c98b3c4be1ba19f4df4f9cbaadbda2, 0x86942d4fa0882cfeea9b45ad11334e0877b81e6c3e9c8b01a38c673778c8a1d3, 0x8280214120800cc44a98018d0100480875180e640e908a008384223338888102); + try expectDepositBits(u479, 0x4b9850b7dacb9a133557b25750455b9aead11be92175443d26db30bdd39a81e5a9a3a106d679f35067f76e832f15e13af81b56400bbe0ac9dff4cb06, 0x2c318fa22f8ae1373baa74eed5b70b1c7b7ab0bd6ea4804f88f87b21464ad5ee017cacd69a8c82bdcb68fe0b71e787eeda6d770d3c80f03a5b805dcc, 0x203084000f026024320a500805320a0c6370a0900c24804e88b0720002409584013c0086888c8200c320b200000585e8084c23093c80e02242804048); + } + }; + + try S.doTheTest(); + try comptime S.doTheTest(); +} + +test "@extractBits" { + if (!supports_pext_pdep) return error.SkipZigTest; // TODO + + const S = struct { + pub fn doTheTest() !void { + try expectExtractBits(u5, 0x1c, 0xe, 0x6); + try expectExtractBits(u8, 0xc1, 0xbe, 0x20); + try expectExtractBits(u12, 0x8fd, 0x910, 0x5); + try expectExtractBits(u16, 0x694c, 0xaaea, 0xca); + try expectExtractBits(u32, 0xa9f97bcf, 0x64f207c2, 0x179f); + try expectExtractBits(u48, 0x32901c841c2a, 0x3721b7ff376d, 0x6832118c); + try expectExtractBits(u64, 0xbc1ba402eaabd49b, 0x8324f9742e70d227, 0x21406ae3); + + if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO #19991 + + try expectExtractBits(u97, 0x12c7ffc54e5772313dae0aa7a, 0x144e7728badaa0c5edee2f2ab, 0x15fe0da89f7aae); + try expectExtractBits(u128, 0x18eb4eaa5e93441fa28d2860de22961b, 0x3b89eec7dd369bb8634b8da908272721, 0xcebc9e501b2a2699); + try expectExtractBits(u185, 0x1fad06e744cee4f42aa80057dd1fb8b86a2281d124e389e, 0x1a5f25ae5516369fd211e040df64b5fb97ca12d189474b8, 0x1cd5ba75a083f4f2e084f16b); + try expectExtractBits(u256, 0xb5db52469100b3796a6981ed441d685ede3c39e423d91ff5dc33d0ae3696067c, 0x2b03ad2a509cc14a8cfc71b9cfbadee93ab976d6335c3897d5188cec3c89081a, 0x4e0285c9a1da86514ee66af9f7d4bec6); + try expectExtractBits(u479, 0x7c44ec50c139a0fb34d51fa28a9f63f9940e578df33e21792c25b4a4e931df79bcbe45eb5cce05b0e73b5d01d0bc9bd4677e2217285c390012de90cf, 0x3facb493aa8150da7350b5f7ef349addba0fc293a258319cb61c1b224f07f0e096cf117bdb0e2338a7eae3e88e8e392161be97b90e23b879c8c51333, 0x3c1d484fb33d294c7da739eeb28c593afae77739df3a4239cef88b0380743); + } + }; + + try S.doTheTest(); + try comptime S.doTheTest(); +} + +fn expectDepositBits(comptime T: type, src: T, dst: T, exp: T) !void { + return expectEqual(@depositBits(src, dst), exp); +} + +fn expectExtractBits(comptime T: type, src: T, dst: T, exp: T) !void { + return expectEqual(@extractBits(src, dst), exp); +} diff --git a/test/cases/compile_errors/deposit_bits_negative_comptime_int.zig b/test/cases/compile_errors/deposit_bits_negative_comptime_int.zig new file mode 100644 index 000000000000..d8cb4cf9d356 --- /dev/null +++ b/test/cases/compile_errors/deposit_bits_negative_comptime_int.zig @@ -0,0 +1,12 @@ +export fn entry() void { + const a = 0; + const b = -1; + const res = @depositBits(a, b); + _ = res; +} + +// error +// is_test=true +// +// :4:33: error: use of negative value '-1' +// :4:17: note: parameters to @depositBits must be non-negative diff --git a/test/cases/compile_errors/deposit_bits_signed_type.zig b/test/cases/compile_errors/deposit_bits_signed_type.zig new file mode 100644 index 000000000000..c50f117773e4 --- /dev/null +++ b/test/cases/compile_errors/deposit_bits_signed_type.zig @@ -0,0 +1,13 @@ +export fn entry() void { + var a: i32 = 0; + var b: i32 = 0; + var res = @depositBits(a, b); + _ = &a; + _ = &b; + _ = &res; +} + +// error +// is_test=true +// +// :4:28: error: expected unsigned integer or 'comptime_int', found 'i32'