ziglang · bnuuydev · Mar 9, 2025 · Mar 10, 2025 · Mar 13, 2025 · Mar 13, 2025
diff --git a/doc/langref.html.in b/doc/langref.html.in
@@ -4748,6 +4748,25 @@ fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_val
       {#see_also|@cVaArg|@cVaCopy|@cVaEnd#}
       {#header_close#}
 
+      {#header_open|@depositBits#}
+      <pre>{#syntax#}@depositBits(source: T, mask: T) T{#endsyntax#}</pre>
+      <p>
+      {#syntax#}T{#endsyntax#} must be an unsigned integer type, or {#syntax#}comptime_int{#endsyntax#} (for which both parameters must be non-negative). {#syntax#}T{#endsyntax#} is determined by peer-type resolution.
+      </p>
+      <p>
+      Transfers contiguous bits from the bottom of the {#syntax#}source{#endsyntax#} operand to positions in the destination corresponding to bits that are set in the {#syntax#}mask{#endsyntax#}. The remaining bits in the destination are zeroed.
+      </p>
+      <p>
+      Where available, this builtin compiles down to a {#syntax#}pdep{#endsyntax#} instruction on x86 targets with BMI2 enabled. For x86-64, this will happen for types up to {#syntax#}u64{#endsyntax#}, and will happen for types up to {#syntax#}u32{#endsyntax#} for x86.
+      </p>
+      <p>
+      Example:
+      </p>
+
+      {#code|test_depositbits_builtin.zig#}
+      {#see_also|@extractBits#}
+      {#header_close#}
+
       {#header_open|@divExact#}
       <pre>{#syntax#}@divExact(numerator: T, denominator: T) T{#endsyntax#}</pre>
       <p>
@@ -4902,6 +4921,26 @@ fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_val
       {#see_also|@export#}
       {#header_close#}
 
+      {#header_open|@extractBits#}
+      <pre>{#syntax#}@extractBits(source: T, mask: T) T{#endsyntax#}</pre>
+      <p>
+      {#syntax#}T{#endsyntax#} must be an unsigned integer type, or {#syntax#}comptime_int{#endsyntax#} (for which both parameters must be non-negative). {#syntax#}T{#endsyntax#} is determined by peer-type resolution.
+      </p>
+      <p>
+      Transfers bits in the {#syntax#}source{#endsyntax#} corresponding to bits set in the {#syntax#}mask{#endsyntax#} operand to the destination, writing them as contiguous lower bits. The remaining bits in the destination are zeroed.
+      </p>
+      <p>
+      Where available, this builtin compiles down to a {#syntax#}pext{#endsyntax#} instruction on x86 targets with BMI2 enabled. For x86-64, this will happen for types up to {#syntax#}u64{#endsyntax#}, and will happen for types up to {#syntax#}u32{#endsyntax#} for x86.
+      </p>
+      <p>
+      Example:
+      </p>
+
+      {#code|test_extractbits_builtin.zig#}
+
+      {#see_also|@depositBits#}
+      {#header_close#}
+
       {#header_open|@field#}
       <pre>{#syntax#}@field(lhs: anytype, comptime field_name: []const u8) (field){#endsyntax#}</pre>
       <p>Performs field access by a compile-time string. Works on both fields and declarations.

diff --git a/doc/langref/test_depositbits_builtin.zig b/doc/langref/test_depositbits_builtin.zig
@@ -0,0 +1,7 @@
+const std = @import("std");
+
+test "deposit bits" {
+    try std.testing.expectEqual(@depositBits(0x00001234, 0xf0f0f0f0), 0x10203040);
+}
+
+// test
diff --git a/doc/langref/test_extractbits_builtin.zig b/doc/langref/test_extractbits_builtin.zig
@@ -0,0 +1,7 @@
+const std = @import("std");
+
+test "extract bits" {
+    try std.testing.expectEqual(@extractBits(0x12345678, 0xf0f0f0f0), 0x00001357);
+}
+
+// test
diff --git a/lib/compiler_rt.zig b/lib/compiler_rt.zig
@@ -11,6 +11,7 @@ comptime {
     _ = @import("compiler_rt/bitreverse.zig");
     _ = @import("compiler_rt/bswap.zig");
     _ = @import("compiler_rt/cmp.zig");
+    _ = @import("compiler_rt/pdeppext.zig");
 
     _ = @import("compiler_rt/shift.zig");
     _ = @import("compiler_rt/negXi2.zig");

diff --git a/lib/compiler_rt/pdeppext.zig b/lib/compiler_rt/pdeppext.zig
@@ -0,0 +1,179 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const common = @import("common.zig");
+
+const Limb = u32;
+const Log2Limb = u5;
+
+comptime {
+    @export(&__pdep_bigint, .{ .name = "__pdep_bigint", .linkage = common.linkage, .visibility = common.visibility });
+    @export(&__pdep_u32, .{ .name = "__pdep_u32", .linkage = common.linkage, .visibility = common.visibility });
+    @export(&__pdep_u64, .{ .name = "__pdep_u64", .linkage = common.linkage, .visibility = common.visibility });
+    @export(&__pdep_u128, .{ .name = "__pdep_u128", .linkage = common.linkage, .visibility = common.visibility });
+
+    @export(&__pext_bigint, .{ .name = "__pext_bigint", .linkage = common.linkage, .visibility = common.visibility });
+    @export(&__pext_u32, .{ .name = "__pext_u32", .linkage = common.linkage, .visibility = common.visibility });
+    @export(&__pext_u64, .{ .name = "__pext_u64", .linkage = common.linkage, .visibility = common.visibility });
+    @export(&__pext_u128, .{ .name = "__pext_u128", .linkage = common.linkage, .visibility = common.visibility });
+}
+
+const endian = builtin.cpu.arch.endian();
+
+inline fn limb(x: []const Limb, i: usize) Limb {
+    return if (endian == .little) x[i] else x[x.len - 1 - i];
+}
+
+inline fn limb_ptr(x: []Limb, i: usize) *Limb {
+    return if (endian == .little) &x[i] else &x[x.len - 1 - i];
+}
+
+inline fn limb_set(x: []Limb, i: usize, v: Limb) void {
+    if (endian == .little) {
+        x[i] = v;
+    } else {
+        x[x.len - 1 - i] = v;
+    }
+}
+
+// Assumes that `result` is zeroed.
+inline fn pdep_bigint(result: []Limb, source: []const Limb, mask: []const Limb) void {
+    var mask_limb: Limb = limb(mask, 0);
+    var mask_limb_index: usize = 0;
+    var i: usize = 0;
+
+    outer: while (true) : (i += 1) {
+        // Find the lowest set bit in mask
+        const mask_limb_bit: Log2Limb = limb_bit: while (true) {
+            const mask_limb_tz = @ctz(mask_limb);
+            if (mask_limb_tz != @bitSizeOf(Limb)) {
+                const cast_limb_bit: Log2Limb = @intCast(mask_limb_tz);
+                mask_limb ^= @as(Limb, 1) << cast_limb_bit;
+                break :limb_bit cast_limb_bit;
+            }
+
+            mask_limb_index += 1;
+            if (mask_limb_index >= mask.len) break :outer;
+
+            mask_limb = limb(mask, mask_limb_index);
+        };
+
+        const i_limb_index = i / 32;
+        const i_limb_bit: Log2Limb = @truncate(i);
+
+        if (i_limb_index >= source.len) break;
+
+        const source_bit_set = limb(source, i_limb_index) & (@as(Limb, 1) << i_limb_bit) != 0;
+
+        limb_ptr(result, mask_limb_index).* |= @as(Limb, @intFromBool(source_bit_set)) << mask_limb_bit;
+    }
+}
+
+pub fn __pdep_bigint(r: [*]Limb, s: [*]const Limb, m: [*]const Limb, bits: usize) callconv(.c) void {
+    const result_full = r[0 .. std.math.divCeil(usize, @intCast(std.zig.target.intByteSize(builtin.target, @intCast(bits))), 4) catch unreachable];
+    @memset(result_full, 0);
+
+    const result = r[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
+    const source = s[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
+    const mask = m[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
+
+    pdep_bigint(result, source, mask);
+}
+
+// Assumes that `result` is zeroed.
+inline fn pext_bigint(result: []Limb, source: []const Limb, mask: []const Limb) void {
+    var mask_limb: Limb = limb(mask, 0);
+    var mask_limb_index: usize = 0;
+    var i: usize = 0;
+
+    outer: while (true) : (i += 1) {
+        const mask_limb_bit: Log2Limb = limb_bit: while (true) {
+            const mask_limb_tz = @ctz(mask_limb);
+            if (mask_limb_tz != @bitSizeOf(Limb)) {
+                const cast_limb_bit: Log2Limb = @intCast(mask_limb_tz);
+                mask_limb ^= @as(Limb, 1) << cast_limb_bit;
+                break :limb_bit cast_limb_bit;
+            }
+
+            mask_limb_index += 1;
+            if (mask_limb_index >= mask.len) break :outer;
+
+            mask_limb = limb(mask, mask_limb_index);
+        };
+
+        const i_limb_index = i / 32;
+        const i_limb_bit: Log2Limb = @truncate(i);
+
+        if (i_limb_index >= source.len) break;
+
+        const source_bit_set = limb(source, mask_limb_index) & (@as(Limb, 1) << mask_limb_bit) != 0;
+
+        limb_ptr(result, i_limb_index).* |= @as(Limb, @intFromBool(source_bit_set)) << i_limb_bit;
+    }
+}
+
+pub fn __pext_bigint(r: [*]Limb, s: [*]const Limb, m: [*]const Limb, bits: usize) callconv(.c) void {
+    const result_full = r[0 .. std.math.divCeil(usize, @intCast(std.zig.target.intByteSize(builtin.target, @intCast(bits))), 4) catch unreachable];
+    @memset(result_full, 0);
+
+    const result = r[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
+    const source = s[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
+    const mask = m[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
+
+    pext_bigint(result, source, mask);
+}
+
+inline fn pdep_uX(comptime T: type, source: T, mask_: T) T {
+    var current_bit: T = 1;
+    var result: T = 0;
+    var mask = mask_;
+
+    while (mask != 0) {
+        const bit = mask & ~(mask - 1);
+        mask &= mask - 1;
+        const source_bit = source & current_bit;
+        if (source_bit != 0) result |= bit;
+        current_bit <<= 1;
+    }
+
+    return result;
+}
+
+pub fn __pdep_u32(source: u32, mask: u32) callconv(.c) u32 {
+    return pdep_uX(u32, source, mask);
+}
+
+pub fn __pdep_u64(source: u64, mask: u64) callconv(.c) u64 {
+    return pdep_uX(u64, source, mask);
+}
+
+pub fn __pdep_u128(source: u128, mask: u128) callconv(.c) u128 {
+    return pdep_uX(u128, source, mask);
+}
+
+inline fn pext_uX(comptime T: type, source: T, mask_: T) T {
+    var current_bit: T = 1;
+    var result: T = 0;
+    var mask = mask_;
+
+    while (mask != 0) {
+        const bit = mask & ~(mask - 1);
+        mask &= mask - 1;
+        const source_bit = source & bit;
+        if (source_bit != 0) result |= current_bit;
+        current_bit <<= 1;
+    }
+
+    return result;
+}
+
+pub fn __pext_u32(source: u32, mask: u32) callconv(.c) u32 {
+    return pext_uX(u32, source, mask);
+}
+
+pub fn __pext_u64(source: u64, mask: u64) callconv(.c) u64 {
+    return pext_uX(u64, source, mask);
+}
+
+pub fn __pext_u128(source: u128, mask: u128) callconv(.c) u128 {
+    return pext_uX(u128, source, mask);
+}
diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
@@ -1756,6 +1756,85 @@ pub const Mutable = struct {
         y.shiftRight(y.toConst(), norm_shift);
     }
 
+    /// result = @depositBits(source, mask)
+    ///
+    /// Asserts that `source` and `mask` are positive
+    /// The value in `result` may use the same number of or less limbs than `mask`.
+    /// `result` is assumed to have sufficient length to store the result.
+    pub fn depositBits(result: *Mutable, source: Const, mask: Const) void {
+        assert(source.positive or source.eqlZero());
+        assert(mask.positive or mask.eqlZero());
+
+        var shift: usize = 0;
+        for (mask.limbs, 0..) |mask_limb, i| {
+            const shift_bits: Log2Limb = @intCast(shift % limb_bits);
+            const shift_limbs = shift / limb_bits;
+
+            if (shift_limbs >= source.limbs.len) break;
+
+            var source_limb = source.limbs[shift_limbs] >> shift_bits;
+            if (shift_bits != 0 and shift_limbs + 1 < source.limbs.len) {
+                source_limb |= source.limbs[shift_limbs + 1] << @intCast(limb_bits - shift_bits);
+            }
+
+            result.limbs[i] = @depositBits(source_limb, mask_limb);
+
+            shift += @popCount(mask_limb);
+        }
+
+        result.positive = true;
+        result.normalize(mask.limbs.len);
+    }
+
+    /// result = @extractBits(source, mask)
+    ///
+    /// Asserts that `source` and `mask` are positive
+    /// The value in `result` may use the same number of or less limbs than `mask`.
+    /// `result` is assumed to have sufficient length to store the result.
+    pub fn extractBits(result: *Mutable, source: Const, mask: Const) void {
+        assert(source.positive or source.eqlZero());
+        assert(mask.positive or mask.eqlZero());
+
+        result.positive = true;
+
+        const len = @min(source.limbs.len, mask.limbs.len);
+
+        var shift: usize = 0;
+        var result_limb: Limb = 0;
+        for (source.limbs[0..len], mask.limbs[0..len]) |source_limb, mask_limb| {
+            const pext_limb = @extractBits(source_limb, mask_limb);
+            const shift_bits: Log2Limb = @intCast(shift % limb_bits);
+            const shift_limbs = shift / limb_bits;
+
+            result_limb |= pext_limb << shift_bits;
+
+            shift += @popCount(mask_limb);
+            const new_shift_bits: Log2Limb = @intCast(shift % limb_bits);
+            const new_shift_limbs = shift / limb_bits;
+
+            // checking if we're onto the next result limb
+            if (new_shift_limbs > shift_limbs) {
+                result.limbs[shift_limbs] = result_limb;
+                result_limb = 0;
+
+                // checking if we actually need to write any bits here
+                if (new_shift_bits != 0) {
+                    result_limb |= pext_limb >> @intCast(limb_bits - shift_bits);
+                }
+
+            }
+        }
+
+        if (shift % limb_bits != 0) {
+            result.limbs[shift / limb_bits] = result_limb;
+        }
+
+        const limb_count = math.divCeil(usize, shift, limb_bits) catch unreachable;
+        if (limb_count == 0) result.limbs[0] = 0; // if mask is zero, we must still zero the first limb
+
+        result.normalize(limb_count);
+    }
+
     /// Truncate an integer to a number of bits, following 2s-complement semantics.
     /// `r` may alias `a`.
     ///