diff --git a/doc/langref.html.in b/doc/langref.html.in
index 076b70e6517e..454206b12e6e 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -4748,6 +4748,25 @@ fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_val
       {#see_also|@cVaArg|@cVaCopy|@cVaEnd#}
       {#header_close#}
 
+      {#header_open|@depositBits#}
+      <pre>{#syntax#}@depositBits(source: T, mask: T) T{#endsyntax#}</pre>
+      <p>
+      {#syntax#}T{#endsyntax#} must be an unsigned integer type, or {#syntax#}comptime_int{#endsyntax#} (for which both parameters must be non-negative). {#syntax#}T{#endsyntax#} is determined by peer-type resolution.
+      </p>
+      <p>
+      Transfers contiguous bits from the bottom of the {#syntax#}source{#endsyntax#} operand to positions in the destination corresponding to bits that are set in the {#syntax#}mask{#endsyntax#}. The remaining bits in the destination are zeroed.
+      </p>
+      <p>
+      Where available, this builtin compiles down to a {#syntax#}pdep{#endsyntax#} instruction on x86 targets with BMI2 enabled. For x86-64, this will happen for types up to {#syntax#}u64{#endsyntax#}, and will happen for types up to {#syntax#}u32{#endsyntax#} for x86.
+      </p>
+      <p>
+      Example:
+      </p>
+
+      {#code|test_depositbits_builtin.zig#}
+      {#see_also|@extractBits#}
+      {#header_close#}
+
       {#header_open|@divExact#}
       <pre>{#syntax#}@divExact(numerator: T, denominator: T) T{#endsyntax#}</pre>
       <p>
@@ -4902,6 +4921,26 @@ fn cmpxchgWeakButNotAtomic(comptime T: type, ptr: *T, expected_value: T, new_val
       {#see_also|@export#}
       {#header_close#}
 
+      {#header_open|@extractBits#}
+      <pre>{#syntax#}@extractBits(source: T, mask: T) T{#endsyntax#}</pre>
+      <p>
+      {#syntax#}T{#endsyntax#} must be an unsigned integer type, or {#syntax#}comptime_int{#endsyntax#} (for which both parameters must be non-negative). {#syntax#}T{#endsyntax#} is determined by peer-type resolution.
+      </p>
+      <p>
+      Transfers bits in the {#syntax#}source{#endsyntax#} corresponding to bits set in the {#syntax#}mask{#endsyntax#} operand to the destination, writing them as contiguous lower bits. The remaining bits in the destination are zeroed.
+      </p>
+      <p>
+      Where available, this builtin compiles down to a {#syntax#}pext{#endsyntax#} instruction on x86 targets with BMI2 enabled. For x86-64, this will happen for types up to {#syntax#}u64{#endsyntax#}, and will happen for types up to {#syntax#}u32{#endsyntax#} for x86.
+      </p>
+      <p>
+      Example:
+      </p>
+
+      {#code|test_extractbits_builtin.zig#}
+
+      {#see_also|@depositBits#}
+      {#header_close#}
+
       {#header_open|@field#}
       <pre>{#syntax#}@field(lhs: anytype, comptime field_name: []const u8) (field){#endsyntax#}</pre>
       <p>Performs field access by a compile-time string. Works on both fields and declarations.
diff --git a/doc/langref/test_depositbits_builtin.zig b/doc/langref/test_depositbits_builtin.zig
new file mode 100644
index 000000000000..dfe6b8882f43
--- /dev/null
+++ b/doc/langref/test_depositbits_builtin.zig
@@ -0,0 +1,7 @@
+const std = @import("std");
+
+test "deposit bits" {
+    try std.testing.expectEqual(@depositBits(0x00001234, 0xf0f0f0f0), 0x10203040);
+}
+
+// test
diff --git a/doc/langref/test_extractbits_builtin.zig b/doc/langref/test_extractbits_builtin.zig
new file mode 100644
index 000000000000..ca21a7f67873
--- /dev/null
+++ b/doc/langref/test_extractbits_builtin.zig
@@ -0,0 +1,7 @@
+const std = @import("std");
+
+test "extract bits" {
+    try std.testing.expectEqual(@extractBits(0x12345678, 0xf0f0f0f0), 0x00001357);
+}
+
+// test
diff --git a/lib/compiler_rt.zig b/lib/compiler_rt.zig
index c8da59eb5f0c..55d31a57489f 100644
--- a/lib/compiler_rt.zig
+++ b/lib/compiler_rt.zig
@@ -11,6 +11,7 @@ comptime {
     _ = @import("compiler_rt/bitreverse.zig");
     _ = @import("compiler_rt/bswap.zig");
     _ = @import("compiler_rt/cmp.zig");
+    _ = @import("compiler_rt/pdeppext.zig");
 
     _ = @import("compiler_rt/shift.zig");
     _ = @import("compiler_rt/negXi2.zig");
diff --git a/lib/compiler_rt/pdeppext.zig b/lib/compiler_rt/pdeppext.zig
new file mode 100644
index 000000000000..98928b0e8604
--- /dev/null
+++ b/lib/compiler_rt/pdeppext.zig
@@ -0,0 +1,179 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const common = @import("common.zig");
+
+const Limb = u32;
+const Log2Limb = u5;
+
+comptime {
+    @export(&__pdep_bigint, .{ .name = "__pdep_bigint", .linkage = common.linkage, .visibility = common.visibility });
+    @export(&__pdep_u32, .{ .name = "__pdep_u32", .linkage = common.linkage, .visibility = common.visibility });
+    @export(&__pdep_u64, .{ .name = "__pdep_u64", .linkage = common.linkage, .visibility = common.visibility });
+    @export(&__pdep_u128, .{ .name = "__pdep_u128", .linkage = common.linkage, .visibility = common.visibility });
+
+    @export(&__pext_bigint, .{ .name = "__pext_bigint", .linkage = common.linkage, .visibility = common.visibility });
+    @export(&__pext_u32, .{ .name = "__pext_u32", .linkage = common.linkage, .visibility = common.visibility });
+    @export(&__pext_u64, .{ .name = "__pext_u64", .linkage = common.linkage, .visibility = common.visibility });
+    @export(&__pext_u128, .{ .name = "__pext_u128", .linkage = common.linkage, .visibility = common.visibility });
+}
+
+const endian = builtin.cpu.arch.endian();
+
+inline fn limb(x: []const Limb, i: usize) Limb {
+    return if (endian == .little) x[i] else x[x.len - 1 - i];
+}
+
+inline fn limb_ptr(x: []Limb, i: usize) *Limb {
+    return if (endian == .little) &x[i] else &x[x.len - 1 - i];
+}
+
+inline fn limb_set(x: []Limb, i: usize, v: Limb) void {
+    if (endian == .little) {
+        x[i] = v;
+    } else {
+        x[x.len - 1 - i] = v;
+    }
+}
+
+// Assumes that `result` is zeroed.
+inline fn pdep_bigint(result: []Limb, source: []const Limb, mask: []const Limb) void {
+    var mask_limb: Limb = limb(mask, 0);
+    var mask_limb_index: usize = 0;
+    var i: usize = 0;
+
+    outer: while (true) : (i += 1) {
+        // Find the lowest set bit in mask
+        const mask_limb_bit: Log2Limb = limb_bit: while (true) {
+            const mask_limb_tz = @ctz(mask_limb);
+            if (mask_limb_tz != @bitSizeOf(Limb)) {
+                const cast_limb_bit: Log2Limb = @intCast(mask_limb_tz);
+                mask_limb ^= @as(Limb, 1) << cast_limb_bit;
+                break :limb_bit cast_limb_bit;
+            }
+
+            mask_limb_index += 1;
+            if (mask_limb_index >= mask.len) break :outer;
+
+            mask_limb = limb(mask, mask_limb_index);
+        };
+
+        const i_limb_index = i / 32;
+        const i_limb_bit: Log2Limb = @truncate(i);
+
+        if (i_limb_index >= source.len) break;
+
+        const source_bit_set = limb(source, i_limb_index) & (@as(Limb, 1) << i_limb_bit) != 0;
+
+        limb_ptr(result, mask_limb_index).* |= @as(Limb, @intFromBool(source_bit_set)) << mask_limb_bit;
+    }
+}
+
+pub fn __pdep_bigint(r: [*]Limb, s: [*]const Limb, m: [*]const Limb, bits: usize) callconv(.c) void {
+    const result_full = r[0 .. std.math.divCeil(usize, @intCast(std.zig.target.intByteSize(builtin.target, @intCast(bits))), 4) catch unreachable];
+    @memset(result_full, 0);
+
+    const result = r[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
+    const source = s[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
+    const mask = m[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
+
+    pdep_bigint(result, source, mask);
+}
+
+// Assumes that `result` is zeroed.
+inline fn pext_bigint(result: []Limb, source: []const Limb, mask: []const Limb) void {
+    var mask_limb: Limb = limb(mask, 0);
+    var mask_limb_index: usize = 0;
+    var i: usize = 0;
+
+    outer: while (true) : (i += 1) {
+        const mask_limb_bit: Log2Limb = limb_bit: while (true) {
+            const mask_limb_tz = @ctz(mask_limb);
+            if (mask_limb_tz != @bitSizeOf(Limb)) {
+                const cast_limb_bit: Log2Limb = @intCast(mask_limb_tz);
+                mask_limb ^= @as(Limb, 1) << cast_limb_bit;
+                break :limb_bit cast_limb_bit;
+            }
+
+            mask_limb_index += 1;
+            if (mask_limb_index >= mask.len) break :outer;
+
+            mask_limb = limb(mask, mask_limb_index);
+        };
+
+        const i_limb_index = i / 32;
+        const i_limb_bit: Log2Limb = @truncate(i);
+
+        if (i_limb_index >= source.len) break;
+
+        const source_bit_set = limb(source, mask_limb_index) & (@as(Limb, 1) << mask_limb_bit) != 0;
+
+        limb_ptr(result, i_limb_index).* |= @as(Limb, @intFromBool(source_bit_set)) << i_limb_bit;
+    }
+}
+
+pub fn __pext_bigint(r: [*]Limb, s: [*]const Limb, m: [*]const Limb, bits: usize) callconv(.c) void {
+    const result_full = r[0 .. std.math.divCeil(usize, @intCast(std.zig.target.intByteSize(builtin.target, @intCast(bits))), 4) catch unreachable];
+    @memset(result_full, 0);
+
+    const result = r[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
+    const source = s[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
+    const mask = m[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
+
+    pext_bigint(result, source, mask);
+}
+
+inline fn pdep_uX(comptime T: type, source: T, mask_: T) T {
+    var current_bit: T = 1;
+    var result: T = 0;
+    var mask = mask_;
+
+    while (mask != 0) {
+        const bit = mask & ~(mask - 1);
+        mask &= mask - 1;
+        const source_bit = source & current_bit;
+        if (source_bit != 0) result |= bit;
+        current_bit <<= 1;
+    }
+
+    return result;
+}
+
+pub fn __pdep_u32(source: u32, mask: u32) callconv(.c) u32 {
+    return pdep_uX(u32, source, mask);
+}
+
+pub fn __pdep_u64(source: u64, mask: u64) callconv(.c) u64 {
+    return pdep_uX(u64, source, mask);
+}
+
+pub fn __pdep_u128(source: u128, mask: u128) callconv(.c) u128 {
+    return pdep_uX(u128, source, mask);
+}
+
+inline fn pext_uX(comptime T: type, source: T, mask_: T) T {
+    var current_bit: T = 1;
+    var result: T = 0;
+    var mask = mask_;
+
+    while (mask != 0) {
+        const bit = mask & ~(mask - 1);
+        mask &= mask - 1;
+        const source_bit = source & bit;
+        if (source_bit != 0) result |= current_bit;
+        current_bit <<= 1;
+    }
+
+    return result;
+}
+
+pub fn __pext_u32(source: u32, mask: u32) callconv(.c) u32 {
+    return pext_uX(u32, source, mask);
+}
+
+pub fn __pext_u64(source: u64, mask: u64) callconv(.c) u64 {
+    return pext_uX(u64, source, mask);
+}
+
+pub fn __pext_u128(source: u128, mask: u128) callconv(.c) u128 {
+    return pext_uX(u128, source, mask);
+}
diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 485050b1abe6..b9d530d144f7 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -1756,6 +1756,85 @@ pub const Mutable = struct {
         y.shiftRight(y.toConst(), norm_shift);
     }
 
+    /// result = @depositBits(source, mask)
+    ///
+    /// Asserts that `source` and `mask` are positive
+    /// The value in `result` may use the same number of or less limbs than `mask`.
+    /// `result` is assumed to have sufficient length to store the result.
+    pub fn depositBits(result: *Mutable, source: Const, mask: Const) void {
+        assert(source.positive or source.eqlZero());
+        assert(mask.positive or mask.eqlZero());
+
+        var shift: usize = 0;
+        for (mask.limbs, 0..) |mask_limb, i| {
+            const shift_bits: Log2Limb = @intCast(shift % limb_bits);
+            const shift_limbs = shift / limb_bits;
+
+            if (shift_limbs >= source.limbs.len) break;
+
+            var source_limb = source.limbs[shift_limbs] >> shift_bits;
+            if (shift_bits != 0 and shift_limbs + 1 < source.limbs.len) {
+                source_limb |= source.limbs[shift_limbs + 1] << @intCast(limb_bits - shift_bits);
+            }
+
+            result.limbs[i] = @depositBits(source_limb, mask_limb);
+
+            shift += @popCount(mask_limb);
+        }
+
+        result.positive = true;
+        result.normalize(mask.limbs.len);
+    }
+
+    /// result = @extractBits(source, mask)
+    ///
+    /// Asserts that `source` and `mask` are positive
+    /// The value in `result` may use the same number of or less limbs than `mask`.
+    /// `result` is assumed to have sufficient length to store the result.
+    pub fn extractBits(result: *Mutable, source: Const, mask: Const) void {
+        assert(source.positive or source.eqlZero());
+        assert(mask.positive or mask.eqlZero());
+
+        result.positive = true;
+
+        const len = @min(source.limbs.len, mask.limbs.len);
+
+        var shift: usize = 0;
+        var result_limb: Limb = 0;
+        for (source.limbs[0..len], mask.limbs[0..len]) |source_limb, mask_limb| {
+            const pext_limb = @extractBits(source_limb, mask_limb);
+            const shift_bits: Log2Limb = @intCast(shift % limb_bits);
+            const shift_limbs = shift / limb_bits;
+
+            result_limb |= pext_limb << shift_bits;
+
+            shift += @popCount(mask_limb);
+            const new_shift_bits: Log2Limb = @intCast(shift % limb_bits);
+            const new_shift_limbs = shift / limb_bits;
+
+            // checking if we're onto the next result limb
+            if (new_shift_limbs > shift_limbs) {
+                result.limbs[shift_limbs] = result_limb;
+                result_limb = 0;
+
+                // checking if we actually need to write any bits here
+                if (new_shift_bits != 0) {
+                    result_limb |= pext_limb >> @intCast(limb_bits - shift_bits);
+                }
+
+            }
+        }
+
+        if (shift % limb_bits != 0) {
+            result.limbs[shift / limb_bits] = result_limb;
+        }
+
+        const limb_count = math.divCeil(usize, shift, limb_bits) catch unreachable;
+        if (limb_count == 0) result.limbs[0] = 0; // if mask is zero, we must still zero the first limb
+
+        result.normalize(limb_count);
+    }
+
     /// Truncate an integer to a number of bits, following 2s-complement semantics.
     /// `r` may alias `a`.
     ///
diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig
index 2da36f18c2c0..34f6f8394652 100644
--- a/lib/std/math/big/int_test.zig
+++ b/lib/std/math/big/int_test.zig
@@ -2956,6 +2956,59 @@ fn popCountTest(val: *const Managed, bit_count: usize, expected: usize) !void {
     try testing.expectEqual(expected, val.toConst().popCount(bit_count));
 }
 
+test "big int extractBits" {
+    try extractBitsTest(0x12345678, 0x0, 0x0);
+    try extractBitsTest(0x12345678, 0xf0f0f0f0, 0x1357);
+    try extractBitsTest(0x12345678, 0xff00ff00, 0x1256);
+    try extractBitsTest(0x12345678, 0xffff, 0x5678);
+
+    try extractBitsTest(0x12345678_90123456_78901234_56789012, 0xff << 64, 0x56);
+    try extractBitsTest(0x12345678_90123456_78901234_56789012, (0xff << 64) | 0xff00f, 0x56892);
+
+    try extractBitsTest(0x12345678_90123456_78901234_56789012, 0xf0f0, 0x91);
+    try extractBitsTest(0x12345678_90123456, 0xffffffff_ffffffff, 0x12345678_90123456);
+}
+
+fn extractBitsTest(comptime source: comptime_int, comptime mask: comptime_int, comptime expected: comptime_int) !void {
+    var source_bigint = try Managed.initSet(testing.allocator, source);
+    defer source_bigint.deinit();
+    var mask_bigint = try Managed.initSet(testing.allocator, mask);
+    defer mask_bigint.deinit();
+    const limbs = try testing.allocator.alloc(Limb, mask_bigint.limbs.len);
+    defer testing.allocator.free(limbs);
+    var result = Mutable{ .limbs = limbs, .positive = undefined, .len = undefined };
+
+    result.extractBits(source_bigint.toConst(), mask_bigint.toConst());
+
+    try testing.expectEqual(std.math.Order.eq, result.toConst().orderAgainstScalar(expected));
+}
+
+test "big int depositBits" {
+    try depositBitsTest(0x12345678, 0x0, 0x0);
+    try depositBitsTest(0x12345678, 0xf0f0f0f0, 0x50607080);
+    try depositBitsTest(0x12345678, 0xff00ff00, 0x56007800);
+    try depositBitsTest(0x12345678, 0xffff, 0x5678);
+
+    try depositBitsTest(0x1234, 0xff << 64, 0x34_00000000_00000000);
+    try depositBitsTest(0x12345678, (0xff << 64) | 0xff00f, 0x45_00000000_00067008);
+
+    try depositBitsTest(0x0, 0xff_ffffffff_ffffffff, 0x0);
+}
+
+fn depositBitsTest(comptime source: comptime_int, comptime mask: comptime_int, comptime expected: comptime_int) !void {
+    var source_bigint = try Managed.initSet(testing.allocator, source);
+    defer source_bigint.deinit();
+    var mask_bigint = try Managed.initSet(testing.allocator, mask);
+    defer mask_bigint.deinit();
+    const limbs = try testing.allocator.alloc(Limb, mask_bigint.limbs.len);
+    defer testing.allocator.free(limbs);
+    var result = Mutable{ .limbs = limbs, .positive = undefined, .len = undefined };
+
+    result.depositBits(source_bigint.toConst(), mask_bigint.toConst());
+
+    try testing.expectEqual(std.math.Order.eq, result.toConst().orderAgainstScalar(expected));
+}
+
 test "big int conversion read/write twos complement" {
     var a = try Managed.initSet(testing.allocator, (1 << 493) - 1);
     defer a.deinit();
diff --git a/lib/std/zig/AstGen.zig b/lib/std/zig/AstGen.zig
index b34990374228..5b6d6ee4ec72 100644
--- a/lib/std/zig/AstGen.zig
+++ b/lib/std/zig/AstGen.zig
@@ -9620,10 +9620,14 @@ fn builtinCall(
             return rvalue(gz, ri, result, node);
         },
 
-        .add_with_overflow => return overflowArithmetic(gz, scope, ri, node, params, .add_with_overflow),
-        .sub_with_overflow => return overflowArithmetic(gz, scope, ri, node, params, .sub_with_overflow),
-        .mul_with_overflow => return overflowArithmetic(gz, scope, ri, node, params, .mul_with_overflow),
-        .shl_with_overflow => return overflowArithmetic(gz, scope, ri, node, params, .shl_with_overflow),
+        // zig fmt: off
+        .add_with_overflow => return extendedBinOp(gz, scope, ri, node, params, .add_with_overflow),
+        .sub_with_overflow => return extendedBinOp(gz, scope, ri, node, params, .sub_with_overflow),
+        .mul_with_overflow => return extendedBinOp(gz, scope, ri, node, params, .mul_with_overflow),
+        .shl_with_overflow => return extendedBinOp(gz, scope, ri, node, params, .shl_with_overflow),
+        .deposit_bits      => return extendedBinOp(gz, scope, ri, node, params, .deposit_bits),
+        .extract_bits      => return extendedBinOp(gz, scope, ri, node, params, .extract_bits),
+        // zig fmt: on
 
         .atomic_load => {
             const atomic_order_type = try gz.addBuiltinValue(node, .atomic_order);
@@ -10085,7 +10089,7 @@ fn cImport(
     return block_inst.toRef();
 }
 
-fn overflowArithmetic(
+fn extendedBinOp(
     gz: *GenZir,
     scope: *Scope,
     ri: ResultInfo,
diff --git a/lib/std/zig/AstRlAnnotate.zig b/lib/std/zig/AstRlAnnotate.zig
index 628574349b19..ebee7a1a697a 100644
--- a/lib/std/zig/AstRlAnnotate.zig
+++ b/lib/std/zig/AstRlAnnotate.zig
@@ -1117,5 +1117,10 @@ fn builtinCall(astrl: *AstRlAnnotate, block: ?*Block, ri: ResultInfo, node: Ast.
             _ = try astrl.expr(args[4], block, ResultInfo.type_only);
             return false;
         },
+        .deposit_bits, .extract_bits => {
+            _ = try astrl.expr(args[0], block, ResultInfo.none);
+            _ = try astrl.expr(args[1], block, ResultInfo.none);
+            return false;
+        },
     }
 }
diff --git a/lib/std/zig/BuiltinFn.zig b/lib/std/zig/BuiltinFn.zig
index 818362a3719b..884f94eeb02a 100644
--- a/lib/std/zig/BuiltinFn.zig
+++ b/lib/std/zig/BuiltinFn.zig
@@ -36,6 +36,7 @@ pub const Tag = enum {
     c_va_copy,
     c_va_end,
     c_va_start,
+    deposit_bits,
     div_exact,
     div_floor,
     div_trunc,
@@ -47,6 +48,7 @@ pub const Tag = enum {
     error_cast,
     @"export",
     @"extern",
+    extract_bits,
     field,
     field_parent_ptr,
     FieldType,
@@ -413,6 +415,12 @@ pub const list = list: {
                 .illegal_outside_function = true,
             },
         },
+        .{
+            "@depositBits", .{
+                .tag = .deposit_bits,
+                .param_count = 2,
+            },
+        },
         .{
             "@divExact",
             .{
@@ -491,6 +499,13 @@ pub const list = list: {
                 .param_count = 2,
             },
         },
+        .{
+            "@extractBits",
+            .{
+                .tag = .extract_bits,
+                .param_count = 2,
+            },
+        },
         .{
             "@field",
             .{
diff --git a/lib/std/zig/Zir.zig b/lib/std/zig/Zir.zig
index 937b399a5d5d..95ee7cbfe735 100644
--- a/lib/std/zig/Zir.zig
+++ b/lib/std/zig/Zir.zig
@@ -2126,6 +2126,12 @@ pub const Inst = struct {
         /// This instruction is always `noreturn`, however, it is not considered as such by ZIR-level queries. This allows AstGen to assume that
         /// any code may have gone here, avoiding false-positive "unreachable code" errors.
         astgen_error,
+        /// Implements the `@depositBits` builtin.
+        /// `operand` is payload index to `BinNode`.
+        deposit_bits,
+        /// Implements the `@extractBits` builtin.
+        /// `operand` is payload index to `BinNode`.
+        extract_bits,
 
         pub const InstData = struct {
             opcode: Extended,
@@ -4402,6 +4408,8 @@ fn findTrackableInner(
                 .tuple_decl,
                 .dbg_empty_stmt,
                 .astgen_error,
+                .deposit_bits,
+                .extract_bits,
                 => return,
 
                 // `@TypeOf` has a body.
diff --git a/lib/std/zig/llvm/Builder.zig b/lib/std/zig/llvm/Builder.zig
index d8d5ff19c701..da2c6b0bf0ae 100644
--- a/lib/std/zig/llvm/Builder.zig
+++ b/lib/std/zig/llvm/Builder.zig
@@ -2776,6 +2776,12 @@ pub const Intrinsic = enum {
     @"wasm.memory.size",
     @"wasm.memory.grow",
 
+    // x86 PDEP/PEXT
+    @"x86.bmi.pdep.32",
+    @"x86.bmi.pdep.64",
+    @"x86.bmi.pext.32",
+    @"x86.bmi.pext.64",
+
     const Signature = struct {
         ret_len: u8,
         params: []const Parameter,
@@ -4020,6 +4026,43 @@ pub const Intrinsic = enum {
             },
             .attrs = &.{ .nocallback, .nofree, .nosync, .nounwind, .willreturn },
         },
+
+        .@"x86.bmi.pext.32" = .{
+            .ret_len = 1,
+            .params = &.{
+                .{ .kind = .{ .type = .i32 } },
+                .{ .kind = .{ .type = .i32 } },
+                .{ .kind = .{ .type = .i32 } },
+            },
+            .attrs = &.{ .nocallback, .nofree, .nosync, .nounwind, .{ .memory = Attribute.Memory.all(.none) } },
+        },
+        .@"x86.bmi.pext.64" = .{
+            .ret_len = 1,
+            .params = &.{
+                .{ .kind = .{ .type = .i64 } },
+                .{ .kind = .{ .type = .i64 } },
+                .{ .kind = .{ .type = .i64 } },
+            },
+            .attrs = &.{ .nocallback, .nofree, .nosync, .nounwind, .{ .memory = Attribute.Memory.all(.none) } },
+        },
+        .@"x86.bmi.pdep.32" = .{
+            .ret_len = 1,
+            .params = &.{
+                .{ .kind = .{ .type = .i32 } },
+                .{ .kind = .{ .type = .i32 } },
+                .{ .kind = .{ .type = .i32 } },
+            },
+            .attrs = &.{ .nocallback, .nofree, .nosync, .nounwind, .{ .memory = Attribute.Memory.all(.none) } },
+        },
+        .@"x86.bmi.pdep.64" = .{
+            .ret_len = 1,
+            .params = &.{
+                .{ .kind = .{ .type = .i64 } },
+                .{ .kind = .{ .type = .i64 } },
+                .{ .kind = .{ .type = .i64 } },
+            },
+            .attrs = &.{ .nocallback, .nofree, .nosync, .nounwind, .{ .memory = Attribute.Memory.all(.none) } },
+        },
     });
 };
 
diff --git a/lib/zig.h b/lib/zig.h
index 229d6a797312..c97e1a543afa 100644
--- a/lib/zig.h
+++ b/lib/zig.h
@@ -1383,6 +1383,90 @@ zig_builtin_clz(16)
 zig_builtin_clz(32)
 zig_builtin_clz(64)
 
+#define zig_builtin_extract_bits(w) \
+    static inline uint##w##_t zig_extract_bits_u##w(uint##w##_t source, uint##w##_t mask_) { \
+        uint##w##_t current_bit = 1;\
+        uint##w##_t result = 0;\
+        uint##w##_t mask = mask_;\
+        \
+        while (mask != 0) {\
+            uint##w##_t bit = mask & ~(mask - 1);\
+            mask &= mask - 1;\
+            if ((source & bit) != 0) result |= current_bit;\
+            current_bit <<= 1;\
+        }\
+        \
+        return result;\
+    }
+
+#if zig_has_builtin(ia32_pext_di)
+    static inline uint64_t zig_extract_bits_u64(uint64_t source, uint64_t mask) {
+        return __builtin_ia32_pext_di(source, mask);
+    }
+#else
+zig_builtin_extract_bits(64)
+#endif
+
+#if zig_has_builtin(ia32_pext_si)
+    static inline uint32_t zig_extract_bits_u32(uint32_t source, uint32_t mask) {
+        return __builtin_ia32_pext_si(source, mask);
+    }
+
+    static inline uint16_t zig_extract_bits_u16(uint16_t source, uint16_t mask) {
+        return (uint16_t)__builtin_ia32_pext_si(source, mask);
+    }
+
+    static inline uint8_t zig_extract_bits_u8(uint8_t source, uint8_t mask) {
+        return (uint8_t)__builtin_ia32_pext_si(source, mask);
+    }
+#else
+zig_builtin_extract_bits(32)
+zig_builtin_extract_bits(16)
+zig_builtin_extract_bits(8)
+#endif
+
+#define zig_builtin_deposit_bits(w) \
+    static inline uint##w##_t zig_deposit_bits_u##w(uint##w##_t source, uint##w##_t mask_) { \
+        uint##w##_t current_bit = 1;\
+        uint##w##_t result = 0;\
+        uint##w##_t mask = mask_;\
+        \
+        while (mask != 0) {\
+            uint##w##_t bit = mask & ~(mask - 1);\
+            mask &= mask - 1;\
+            if ((source & current_bit) != 0) result |= bit; \
+            current_bit <<= 1;\
+        }\
+        \
+        return result;\
+    }
+
+#if zig_has_builtin(ia32_pext_di)
+    static inline uint64_t zig_deposit_bits_u64(uint64_t source, uint64_t mask) {
+        return __builtin_ia32_pdep_di(source, mask);
+    }
+#else
+zig_builtin_deposit_bits(64)
+#endif
+
+#if zig_has_builtin(ia32_pext_si)
+    static inline uint32_t zig_deposit_bits_u32(uint32_t source, uint32_t mask) {
+        return __builtin_ia32_pdep_si(source, mask);
+    }
+
+    static inline uint16_t zig_deposit_bits_u16(uint16_t source, uint16_t mask) {
+        return (uint16_t)__builtin_ia32_pdep_si(source, mask);
+    }
+
+    static inline uint8_t zig_deposit_bits_u8(uint8_t source, uint8_t mask) {
+        return (uint8_t)__builtin_ia32_pdep_si(source, mask);
+    }
+#else
+zig_builtin_deposit_bits(32)
+zig_builtin_deposit_bits(16)
+zig_builtin_deposit_bits(8)
+#endif
+
 /* ======================== 128-bit Integer Support ========================= */
 
 #if !defined(zig_has_int128)
diff --git a/src/Air.zig b/src/Air.zig
index b468edbb4718..366a0101b1bf 100644
--- a/src/Air.zig
+++ b/src/Air.zig
@@ -897,6 +897,13 @@ pub const Inst = struct {
         /// Operand is unused and set to Ref.none
         work_group_id,
 
+        /// Implements @depositBits builtin.
+        /// Uses the `bin_op` field.
+        deposit_bits,
+        /// Implements @extractBits builtin.
+        /// Uses the `bin_op` field.
+        extract_bits,
+
         pub fn fromCmpOp(op: std.math.CompareOperator, optimized: bool) Tag {
             switch (op) {
                 .lt => return if (optimized) .cmp_lt_optimized else .cmp_lt,
@@ -1441,6 +1448,8 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool)
         .div_exact_optimized,
         .rem_optimized,
         .mod_optimized,
+        .deposit_bits,
+        .extract_bits,
         => return air.typeOf(datas[@intFromEnum(inst)].bin_op.lhs, ip),
 
         .sqrt,
@@ -1922,6 +1931,8 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool {
         .work_item_id,
         .work_group_size,
         .work_group_id,
+        .deposit_bits,
+        .extract_bits,
         => false,
 
         .is_non_null_ptr, .is_null_ptr, .is_non_err_ptr, .is_err_ptr => air.typeOf(data.un_op, ip).isVolatilePtrIp(ip),
diff --git a/src/Air/Liveness.zig b/src/Air/Liveness.zig
index 34ecde26e2fe..78576d914afa 100644
--- a/src/Air/Liveness.zig
+++ b/src/Air/Liveness.zig
@@ -283,6 +283,8 @@ pub fn categorizeOperand(
         .cmp_gte_optimized,
         .cmp_gt_optimized,
         .cmp_neq_optimized,
+        .deposit_bits,
+        .extract_bits,
         => {
             const o = air_datas[@intFromEnum(inst)].bin_op;
             if (o.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
@@ -939,6 +941,8 @@ fn analyzeInst(
         .memset_safe,
         .memcpy,
         .memmove,
+        .deposit_bits,
+        .extract_bits,
         => {
             const o = inst_datas[@intFromEnum(inst)].bin_op;
             return analyzeOperands(a, pass, data, inst, .{ o.lhs, o.rhs, .none });
diff --git a/src/Air/Liveness/Verify.zig b/src/Air/Liveness/Verify.zig
index e7ed37956d16..bc275e16a16e 100644
--- a/src/Air/Liveness/Verify.zig
+++ b/src/Air/Liveness/Verify.zig
@@ -269,6 +269,8 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
             .memset_safe,
             .memcpy,
             .memmove,
+            .deposit_bits,
+            .extract_bits,
             => {
                 const bin_op = data[@intFromEnum(inst)].bin_op;
                 try self.verifyInstOperands(inst, .{ bin_op.lhs, bin_op.rhs, .none });
diff --git a/src/Air/types_resolved.zig b/src/Air/types_resolved.zig
index 873f70ec50a6..8cb5d54ff9ec 100644
--- a/src/Air/types_resolved.zig
+++ b/src/Air/types_resolved.zig
@@ -88,6 +88,8 @@ fn checkBody(air: Air, body: []const Air.Inst.Index, zcu: *Zcu) bool {
             .atomic_store_monotonic,
             .atomic_store_release,
             .atomic_store_seq_cst,
+            .deposit_bits,
+            .extract_bits,
             => {
                 if (!checkRef(data.bin_op.lhs, zcu)) return false;
                 if (!checkRef(data.bin_op.rhs, zcu)) return false;
diff --git a/src/Sema.zig b/src/Sema.zig
index 3fa264be489a..29f23f000dbd 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -1428,6 +1428,8 @@ fn analyzeBodyInner(
                     .work_group_id      => try sema.zirWorkItem(          block, extended, extended.opcode),
                     .in_comptime        => try sema.zirInComptime(        block),
                     .closure_get        => try sema.zirClosureGet(        block, extended),
+                    .deposit_bits       => try sema.zirDepositExtractBits(block, extended, .deposit_bits),
+                    .extract_bits       => try sema.zirDepositExtractBits(block, extended, .extract_bits),
                     // zig fmt: on
 
                     .set_float_mode => {
@@ -26605,6 +26607,129 @@ fn zirBranchHint(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstDat
     }
 }
 
+fn zirDepositExtractBits(
+    sema: *Sema,
+    block: *Block,
+    extended: Zir.Inst.Extended.InstData,
+    air_tag: Air.Inst.Tag,
+) CompileError!Air.Inst.Ref {
+    const pt = sema.pt;
+    const zcu = pt.zcu;
+
+    const extra = sema.code.extraData(Zir.Inst.BinNode, extended.operand).data;
+    const src = block.nodeOffset(extra.node);
+
+    const lhs_src = block.builtinCallArgSrc(extra.node, 0);
+    const rhs_src = block.builtinCallArgSrc(extra.node, 1);
+
+    const uncasted_lhs = try sema.resolveInst(extra.lhs);
+    const uncasted_rhs = try sema.resolveInst(extra.rhs);
+
+    const lhs_ty = sema.typeOf(uncasted_lhs);
+    const rhs_ty = sema.typeOf(uncasted_rhs);
+
+    if (!lhs_ty.isUnsignedInt(zcu) and lhs_ty.zigTypeTag(zcu) != .comptime_int) {
+        return sema.fail(block, lhs_src, "expected unsigned integer or 'comptime_int', found '{}'", .{lhs_ty.fmt(pt)});
+    }
+
+    if (!rhs_ty.isUnsignedInt(zcu) and rhs_ty.zigTypeTag(zcu) != .comptime_int) {
+        return sema.fail(block, rhs_src, "expected unsigned integer or 'comptime_int', found '{}'", .{rhs_ty.fmt(pt)});
+    }
+
+    const instructions = &[_]Air.Inst.Ref{ uncasted_lhs, uncasted_rhs };
+    const dest_ty = try sema.resolvePeerTypes(block, src, instructions, .{
+        .override = &[_]?LazySrcLoc{ lhs_src, rhs_src },
+    });
+
+    const builtin_name = switch (air_tag) {
+        .deposit_bits => "@depositBits",
+        .extract_bits => "@extractBits",
+        else => unreachable,
+    };
+
+    // Coercion errors are intercepted to add a note if the caller is attempting to pass a negative comptime_int
+    const lhs = sema.coerce(block, dest_ty, uncasted_lhs, lhs_src) catch |err| switch (err) {
+        error.AnalysisFail => {
+            const msg = sema.err orelse return err;
+            const val = (try sema.resolveValue(uncasted_lhs)).?;
+            if (val.orderAgainstZero(zcu) == .lt) {
+                try sema.errNote(src, msg, "parameters to {s} must be non-negative", .{builtin_name});
+            }
+            return err;
+        },
+        else => return err,
+    };
+
+    const rhs = sema.coerce(block, dest_ty, uncasted_rhs, rhs_src) catch |err| switch (err) {
+        error.AnalysisFail => {
+            const msg = sema.err orelse return err;
+            const val = (try sema.resolveValue(uncasted_rhs)).?;
+            if (val.orderAgainstZero(zcu) == .lt) {
+                try sema.errNote(src, msg, "parameters to {s} must be non-negative", .{builtin_name});
+            }
+            return err;
+        },
+        else => return err,
+    };
+
+    const maybe_lhs_val = try sema.resolveValue(lhs);
+    const maybe_rhs_val = try sema.resolveValue(rhs);
+
+    // We check for negative values here only if the type is a comptime_int, as negative values
+    // would have otherwise been filtered out by coercion and the unsigned type restriction
+    if (dest_ty.zigTypeTag(zcu) == .comptime_int) {
+        if (maybe_lhs_val) |lhs_val| {
+            if (!lhs_val.isUndef(zcu) and lhs_val.orderAgainstZero(zcu) == .lt) {
+                const err = try sema.errMsg(lhs_src, "use of negative value '{}'", .{lhs_val.fmtValue(pt)});
+                try sema.errNote(src, err, "parameters to {s} must be non-negative", .{builtin_name});
+                return sema.failWithOwnedErrorMsg(block, err);
+            }
+        }
+
+        if (maybe_rhs_val) |rhs_val| {
+            if (!rhs_val.isUndef(zcu) and rhs_val.orderAgainstZero(zcu) == .lt) {
+                const err = try sema.errMsg(rhs_src, "use of negative value '{}'", .{rhs_val.fmtValue(pt)});
+                try sema.errNote(src, err, "parameters to {s} must be non-negative", .{builtin_name});
+                return sema.failWithOwnedErrorMsg(block, err);
+            }
+        }
+    }
+
+    // If either of the operands are zero, the result is zero
+    // If either of the operands are undefined, the result is undefined
+    if (maybe_lhs_val) |lhs_val| {
+        if (lhs_val.orderAgainstZero(zcu) == .eq) return Air.internedToRef((try pt.intValue(dest_ty, 0)).toIntern());
+        if (lhs_val.isUndef(zcu)) return try pt.undefRef(dest_ty);
+    }
+    if (maybe_rhs_val) |rhs_val| {
+        if (rhs_val.orderAgainstZero(zcu) == .eq) return Air.internedToRef((try pt.intValue(dest_ty, 0)).toIntern());
+        if (rhs_val.isUndef(zcu)) return try pt.undefRef(dest_ty);
+    }
+
+    if (maybe_lhs_val) |lhs_val| {
+        if (maybe_rhs_val) |rhs_val| {
+            const dest_val = switch (air_tag) {
+                .deposit_bits => try sema.intDepositBits(lhs_val, rhs_val, dest_ty),
+                .extract_bits => try sema.intExtractBits(lhs_val, rhs_val, dest_ty),
+                else => unreachable,
+            };
+
+            return Air.internedToRef(dest_val.toIntern());
+        }
+    }
+
+    const runtime_src = if (maybe_lhs_val == null) lhs_src else rhs_src;
+    try sema.requireRuntimeBlock(block, src, runtime_src);
+
+    return block.addInst(.{
+        .tag = air_tag,
+        .data = .{ .bin_op = .{
+            .lhs = lhs,
+            .rhs = rhs,
+        } },
+    });
+}
+
 fn requireRuntimeBlock(sema: *Sema, block: *Block, src: LazySrcLoc, runtime_src: ?LazySrcLoc) !void {
     if (block.isComptime()) {
         const msg, const fail_block = msg: {
@@ -37284,6 +37409,64 @@ fn enumHasInt(sema: *Sema, ty: Type, int: Value) CompileError!bool {
     return enum_type.tagValueIndex(&zcu.intern_pool, int_coerced.toIntern()) != null;
 }
 
+/// Asserts that the values are positive
+fn intDepositBits(
+    sema: *Sema,
+    lhs: Value,
+    rhs: Value,
+    ty: Type,
+) !Value {
+    // TODO is this a performance issue? maybe we should try the operation without
+    // resorting to BigInt first.
+    const pt = sema.pt;
+    const zcu = pt.zcu;
+    const arena = sema.arena;
+
+    var lhs_space: Value.BigIntSpace = undefined;
+    var rhs_space: Value.BigIntSpace = undefined;
+    const source = lhs.toBigInt(&lhs_space, zcu);
+    const mask = rhs.toBigInt(&rhs_space, zcu);
+
+    const result_limbs = try arena.alloc(
+        std.math.big.Limb,
+        mask.limbs.len,
+    );
+
+    var result = std.math.big.int.Mutable{ .limbs = result_limbs, .positive = undefined, .len = undefined };
+
+    result.depositBits(source, mask);
+    return try pt.intValue_big(ty, result.toConst());
+}
+
+/// Asserts that the values are positive
+fn intExtractBits(
+    sema: *Sema,
+    lhs: Value,
+    rhs: Value,
+    ty: Type,
+) !Value {
+    // TODO is this a performance issue? maybe we should try the operation without
+    // resorting to BigInt first.
+    const pt = sema.pt;
+    const zcu = pt.zcu;
+    const arena = sema.arena;
+
+    var lhs_space: Value.BigIntSpace = undefined;
+    var rhs_space: Value.BigIntSpace = undefined;
+    const source = lhs.toBigInt(&lhs_space, zcu);
+    const mask = rhs.toBigInt(&rhs_space, zcu);
+
+    const result_limbs = try arena.alloc(
+        std.math.big.Limb,
+        mask.limbs.len,
+    );
+
+    var result = std.math.big.int.Mutable{ .limbs = result_limbs, .positive = undefined, .len = undefined };
+
+    result.extractBits(source, mask);
+    return try pt.intValue_big(ty, result.toConst());
+}
+
 /// Asserts the values are comparable. Both operands have type `ty`.
 /// For vectors, returns true if the comparison is true for ALL elements.
 ///
diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig
index 6fd23cfd1824..e06d57e79852 100644
--- a/src/arch/aarch64/CodeGen.zig
+++ b/src/arch/aarch64/CodeGen.zig
@@ -888,6 +888,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .work_item_id => unreachable,
             .work_group_size => unreachable,
             .work_group_id => unreachable,
+
+            .deposit_bits => return self.fail("TODO implement deposit_bits", .{}),
+            .extract_bits => return self.fail("TODO implement extract_bits", .{}),
             // zig fmt: on
         }
 
diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig
index e9d0e91db151..54a464cff955 100644
--- a/src/arch/arm/CodeGen.zig
+++ b/src/arch/arm/CodeGen.zig
@@ -877,6 +877,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .work_item_id => unreachable,
             .work_group_size => unreachable,
             .work_group_id => unreachable,
+
+            .deposit_bits => return self.fail("TODO implement deposit_bits", .{}),
+            .extract_bits => return self.fail("TODO implement extract_bits", .{}),
             // zig fmt: on
         }
 
diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig
index edba985beb9b..3af1684deeff 100644
--- a/src/arch/riscv64/CodeGen.zig
+++ b/src/arch/riscv64/CodeGen.zig
@@ -1684,6 +1684,9 @@ fn genBody(func: *Func, body: []const Air.Inst.Index) InnerError!void {
             .work_item_id => unreachable,
             .work_group_size => unreachable,
             .work_group_id => unreachable,
+
+            .deposit_bits => return func.fail("TODO implement deposit_bits", .{}),
+            .extract_bits => return func.fail("TODO implement extract_bits", .{}),
             // zig fmt: on
         }
 
diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig
index 4235de94f589..c045611b4fc8 100644
--- a/src/arch/sparc64/CodeGen.zig
+++ b/src/arch/sparc64/CodeGen.zig
@@ -731,6 +731,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .work_item_id => unreachable,
             .work_group_size => unreachable,
             .work_group_id => unreachable,
+
+            .deposit_bits => @panic("TODO implement deposit_bits"),
+            .extract_bits => @panic("TODO implement extract_bits"),
             // zig fmt: on
         }
 
diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig
index a48f7012f536..9d09a212153e 100644
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@@ -2110,6 +2110,10 @@ fn genInst(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
         .work_group_size,
         .work_group_id,
         => unreachable,
+
+        .deposit_bits,
+        .extract_bits,
+        => |tag| return cg.fail("TODO implement {s}", .{@tagName(tag)}),
     };
 }
 
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 0b2a17d19275..12f947bd8c2b 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -162487,6 +162487,354 @@ fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
             .c_va_end => try cg.airVaEnd(inst),
             .c_va_start => try cg.airVaStart(inst),
             .work_item_id, .work_group_size, .work_group_id => unreachable,
+
+            .deposit_bits,
+            .extract_bits,
+            => |tag| {
+                const bin_op = air_datas[@intFromEnum(inst)].bin_op;
+                var ops = try cg.tempsFromOperands(inst, .{ bin_op.lhs, bin_op.rhs });
+                var res: [1]Temp = undefined;
+                cg.select(&res, &.{cg.typeOf(bin_op.lhs)}, &ops, switch (@as(Mir.Inst.Tag, switch (tag) {
+                    .deposit_bits => .pdep,
+                    .extract_bits => .pext,
+                    else => unreachable,
+                })) {
+                    inline .pdep, .pext => |mir_tag| comptime &.{
+                        .{
+                            .required_features = .{ .bmi2, null, null, null },
+                            .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any },
+                            .patterns = &.{
+                                .{ .src = .{ .to_gpr, .mem, .none } },
+                            },
+                            .dst_temps = .{ .{ .mut_rc = .{ .rc = .general_purpose, .ref = .src1 } }, .unused },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .movzx, .dst0d, .src1b, ._, ._ },
+                                .{ ._, ._, mir_tag, .dst0d, .src0d, .dst0d, ._ },
+                            } },
+                        },
+                        .{
+                            .required_features = .{ .bmi2, null, null, null },
+                            .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any },
+                            .patterns = &.{
+                                .{ .src = .{ .to_gpr, .to_gpr, .none } },
+                            },
+                            .dst_temps = .{ .{ .mut_rc = .{ .rc = .general_purpose, .ref = .src1 } }, .unused },
+                            .each = .{ .once = switch (mir_tag) {
+                                .pext => &.{
+                                    .{ ._, ._, .movzx, .dst0d, .src1b, ._, ._ },
+                                    .{ ._, ._, mir_tag, .dst0d, .src0d, .dst0d, ._ },
+                                },
+                                .pdep => &.{
+                                    .{ ._, ._, mir_tag, .dst0d, .src0d, .src1d, ._ },
+                                },
+                                else => unreachable,
+                            } },
+                        },
+                        .{
+                            .required_features = .{ .bmi2, null, null, null },
+                            .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any },
+                            .patterns = &.{
+                                .{ .src = .{ .to_gpr, .mem, .none } },
+                            },
+                            .dst_temps = .{ .{ .mut_rc = .{ .rc = .general_purpose, .ref = .src1 } }, .unused },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .movzx, .dst0d, .src1w, ._, ._ },
+                                .{ ._, ._, mir_tag, .dst0d, .src0d, .dst0d, ._ },
+                            } },
+                        },
+                        .{
+                            .required_features = .{ .bmi2, null, null, null },
+                            .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any },
+                            .patterns = &.{
+                                .{ .src = .{ .to_gpr, .to_gpr, .none } },
+                            },
+                            .dst_temps = .{ .{ .mut_rc = .{ .rc = .general_purpose, .ref = .src1 } }, .unused },
+                            .each = .{ .once = switch (mir_tag) {
+                                .pext => &.{
+                                    .{ ._, ._, .movzx, .dst0d, .src1w, ._, ._ },
+                                    .{ ._, ._, mir_tag, .dst0d, .src0d, .dst0d, ._ },
+                                },
+                                .pdep => &.{
+                                    .{ ._, ._, mir_tag, .dst0d, .src0d, .src1d, ._ },
+                                },
+                                else => unreachable,
+                            } },
+                        },
+                        .{
+                            .required_features = .{ .bmi2, null, null, null },
+                            .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword }, .any },
+                            .patterns = &.{
+                                .{ .src = .{ .to_gpr, .mem, .none } },
+                                .{ .src = .{ .to_gpr, .to_gpr, .none } },
+                            },
+                            .dst_temps = .{ .{ .mut_rc = .{ .rc = .general_purpose, .ref = .src0 } }, .unused },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, mir_tag, .dst0d, .src0d, .src1d, ._ },
+                            } },
+                        },
+                        .{
+                            .required_features = .{ .bmi2, .@"64bit", null, null },
+                            .src_constraints = .{ .{ .unsigned_int = .qword }, .{ .unsigned_int = .qword }, .any },
+                            .patterns = &.{
+                                .{ .src = .{ .to_gpr, .mem, .none } },
+                                .{ .src = .{ .to_gpr, .to_gpr, .none } },
+                            },
+                            .dst_temps = .{ .{ .mut_rc = .{ .rc = .general_purpose, .ref = .src0 } }, .unused },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, mir_tag, .dst0q, .src0q, .src1q, ._ },
+                            } },
+                        },
+                        .{
+                            .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any },
+                            .patterns = &.{.{ .src = .{ .{ .to_param_gpr = .{ .cc = .ccc, .index = 0 } }, .mem, .none } }},
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__" ++ @tagName(mir_tag) ++ "_" ++ "u32" } } },
+                                .{ .type = .u32, .kind = .{ .param_gpr = .{ .cc = .ccc, .index = 1 } } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{ .{ .ret_gpr = .{ .cc = .ccc, .index = 0 } }, .unused },
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .movzx, .tmp1d, .src1b, ._, ._ },
+                                .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+                            } },
+                        },
+                        .{
+                            .src_constraints = .{ .{ .unsigned_int = .byte }, .{ .unsigned_int = .byte }, .any },
+                            .patterns = &.{.{ .src = .{
+                                .{ .to_param_gpr = .{ .cc = .ccc, .index = 0 } },
+                                .{ .to_param_gpr = .{ .cc = .ccc, .index = 1 } },
+                                .none,
+                            } }},
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__" ++ @tagName(mir_tag) ++ "_" ++ "u32" } } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{ .{ .ret_gpr = .{ .cc = .ccc, .index = 0 } }, .unused },
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = switch (mir_tag) {
+                                .pext => &.{
+                                    .{ ._, ._, .movzx, .src1d, .src1b, ._, ._ },
+                                    .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+                                },
+                                .pdep => &.{
+                                    .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+                                },
+                                else => unreachable,
+                            } },
+                        },
+                        .{
+                            .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any },
+                            .patterns = &.{.{ .src = .{ .{ .to_param_gpr = .{ .cc = .ccc, .index = 0 } }, .mem, .none } }},
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__" ++ @tagName(mir_tag) ++ "_" ++ "u32" } } },
+                                .{ .type = .u32, .kind = .{ .param_gpr = .{ .cc = .ccc, .index = 1 } } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{ .{ .ret_gpr = .{ .cc = .ccc, .index = 0 } }, .unused },
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .movzx, .tmp1d, .src1w, ._, ._ },
+                                .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+                            } },
+                        },
+                        .{
+                            .src_constraints = .{ .{ .unsigned_int = .word }, .{ .unsigned_int = .word }, .any },
+                            .patterns = &.{.{ .src = .{
+                                .{ .to_param_gpr = .{ .cc = .ccc, .index = 0 } },
+                                .{ .to_param_gpr = .{ .cc = .ccc, .index = 1 } },
+                                .none,
+                            } }},
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__" ++ @tagName(mir_tag) ++ "_" ++ "u32" } } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{ .{ .ret_gpr = .{ .cc = .ccc, .index = 0 } }, .unused },
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = switch (mir_tag) {
+                                .pext => &.{
+                                    .{ ._, ._, .movzx, .src1d, .src1w, ._, ._ },
+                                    .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+                                },
+                                .pdep => &.{
+                                    .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+                                },
+                                else => unreachable,
+                            } },
+                        },
+                        .{
+                            .src_constraints = .{ .{ .unsigned_int = .dword }, .{ .unsigned_int = .dword }, .any },
+                            .patterns = &.{
+                                .{ .src = .{
+                                    .{ .to_param_gpr = .{ .cc = .ccc, .index = 0 } },
+                                    .{ .to_param_gpr = .{ .cc = .ccc, .index = 1 } },
+                                    .none,
+                                } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__" ++ @tagName(mir_tag) ++ "_" ++ "u32" } } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{ .{ .ret_gpr = .{ .cc = .ccc, .index = 0 } }, .unused },
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+                            } },
+                        },
+                        .{
+                            .required_features = .{ .@"64bit", null, null, null },
+                            .src_constraints = .{ .{ .unsigned_int = .qword }, .{ .unsigned_int = .qword }, .any },
+                            .patterns = &.{
+                                .{ .src = .{
+                                    .{ .to_param_gpr = .{ .cc = .ccc, .index = 0 } },
+                                    .{ .to_param_gpr = .{ .cc = .ccc, .index = 1 } },
+                                    .none,
+                                } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__" ++ @tagName(mir_tag) ++ "_" ++ "u64" } } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{ .{ .ret_gpr = .{ .cc = .ccc, .index = 0 } }, .unused },
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+                            } },
+                        },
+                        .{
+                            .required_features = .{ .@"64bit", null, null, null },
+                            .src_constraints = .{ .{ .unsigned_int = .xword }, .{ .unsigned_int = .xword }, .any },
+                            .patterns = &.{
+                                .{ .src = .{
+                                    .{ .to_param_gpr_pair = .{ .cc = .ccc, .index = 0 } },
+                                    .{ .to_param_gpr_pair = .{ .cc = .ccc, .index = 2 } },
+                                    .none,
+                                } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__" ++ @tagName(mir_tag) ++ "_" ++ "u128" } } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{ .{ .ret_gpr_pair = .{ .cc = .ccc, .index = 0 } }, .unused },
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+                            } },
+                        },
+                        .{
+                            .src_constraints = .{
+                                .{ .remainder_unsigned_int = .{ .of = .dword, .is = .dword } },
+                                .{ .remainder_unsigned_int = .{ .of = .dword, .is = .dword } },
+                                .any,
+                            },
+                            .patterns = &.{
+                                .{ .src = .{ .to_mem, .to_mem, .none } },
+                            },
+                            .call_frame = .{ .alignment = .@"16" },
+                            .extra_temps = .{
+                                .{ .type = .usize, .kind = .{ .symbol = &.{ .name = "__" ++ @tagName(mir_tag) ++ "_" ++ "bigint" } } },
+                                .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .index = 0 } } },
+                                .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .index = 1 } } },
+                                .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .index = 2 } } },
+                                .{ .type = .usize, .kind = .{ .param_gpr = .{ .cc = .ccc, .index = 3 } } },
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                                .unused,
+                            },
+                            .dst_temps = .{ .mem, .unused },
+                            .clobbers = .{ .eflags = true, .caller_preserved = .ccc },
+                            .each = .{ .once = &.{
+                                .{ ._, ._, .lea, .tmp1p, .mem(.dst0), ._, ._ },
+                                .{ ._, ._, .lea, .tmp2p, .mem(.src0), ._, ._ },
+                                .{ ._, ._, .lea, .tmp3p, .mem(.src1), ._, ._ },
+                                .{ ._, ._, .mov, .tmp4d, .sa(.src0, .add_bit_size), ._, ._ },
+                                .{ ._, ._, .call, .tmp0d, ._, ._, ._ },
+                            } },
+                        },
+                    },
+                    else => unreachable,
+                }) catch |err| switch (err) {
+                    error.SelectFailed => return cg.fail("failed to select {s} {} {} {}", .{
+                        @tagName(tag),
+                        cg.typeOf(bin_op.lhs).fmt(pt),
+                        ops[0].tracking(cg),
+                        ops[1].tracking(cg),
+                    }),
+                    else => |e| return e,
+                };
+                try res[0].finish(inst, &.{ bin_op.lhs, bin_op.rhs }, &ops, cg);
+            },
         }
         try cg.resetTemps(@enumFromInt(0));
         cg.checkInvariantsAfterAirInst();
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index 8539efdbfe19..ce201c455689 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -3498,6 +3498,8 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
             .call_never_tail   => try airCall(f, inst, .never_tail),
             .call_never_inline => try airCall(f, inst, .never_inline),
 
+            .deposit_bits => try airDepositExtractBits(f, inst, "deposit_bits"),
+            .extract_bits => try airDepositExtractBits(f, inst, "extract_bits"),
             // zig fmt: on
         };
         if (result_value == .new_local) {
@@ -7697,6 +7699,19 @@ fn airCVaCopy(f: *Function, inst: Air.Inst.Index) !CValue {
     return local;
 }
 
+fn airDepositExtractBits(f: *Function, inst: Air.Inst.Index, operation: []const u8) !CValue {
+    const bin_op = f.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+    const ty = f.typeOf(bin_op.lhs);
+
+    const ctype = try f.ctypeFromType(ty, .complete);
+    const is_big = ctype.info(&f.object.dg.ctype_pool) == .array;
+
+    if (is_big) return f.fail("TODO: C backend: implement @{{deposit,extract}}Bits for bigints", .{});
+    if (f.byteSize(ctype) > 8) return f.fail("TODO: C backend: implement @{{deposit,extract}}Bits for u128", .{});
+
+    return try airBinBuiltinCall(f, inst, operation, .none);
+}
+
 fn toMemoryOrder(order: std.builtin.AtomicOrder) [:0]const u8 {
     return switch (order) {
         // Note: unordered is actually even less atomic than relaxed
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index d2a72502ed83..840559840812 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -5073,6 +5073,8 @@ pub const FuncGen = struct {
                     break :res res;
                 },
 
+                .deposit_bits,
+                .extract_bits => |tag| try self.airDepositExtractBits(inst, tag),
                 // zig fmt: on
             };
             if (val != .none) try self.func_inst_table.putNoClobber(self.gpa, inst.toRef(), val);
@@ -10995,6 +10997,154 @@ pub const FuncGen = struct {
         };
     }
 
+    fn airDepositExtractBits(self: *FuncGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !Builder.Value {
+        if (self.liveness.isUnused(inst)) return .none;
+
+        const o = self.ng.object;
+        const zcu = o.pt.zcu;
+
+        const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+        const source = try self.resolveInst(bin_op.lhs);
+        const mask = try self.resolveInst(bin_op.rhs);
+        const inst_ty = self.typeOfIndex(inst);
+
+        const llvm_ty = try o.lowerType(inst_ty);
+        const bits: u16 = @intCast(llvm_ty.scalarBits(&o.builder));
+
+        switch (o.target.cpu.arch) {
+            .x86, .x86_64 => |arch| blk: {
+                // Doesn't have pdep
+                if (!std.Target.x86.featureSetHas(o.target.cpu.features, .bmi2)) break :blk;
+
+                const supports_64 = arch == .x86_64;
+                // Integer size doesn't match the available instruction(s)
+                if (!(bits <= 32 or (bits <= 64 and supports_64))) break :blk;
+
+                const compiler_rt_bits = compilerRtIntBits(bits);
+
+                var buf: ["x86.bmi.pdep.32".len]u8 = undefined;
+                const intrinsic = std.meta.stringToEnum(Builder.Intrinsic, std.fmt.bufPrint(&buf, "x86.bmi.{s}.{d}", .{
+                    switch (tag) {
+                        .deposit_bits => "pdep",
+                        .extract_bits => "pext",
+                        else => unreachable,
+                    },
+                    compiler_rt_bits,
+                }) catch unreachable).?;
+
+                const extended_ty = try o.builder.intType(compiler_rt_bits);
+
+                const result = try self.wip.callIntrinsic(
+                    .normal,
+                    .none,
+                    intrinsic,
+                    &.{},
+                    &.{
+                        try self.wip.cast(.zext, source, extended_ty, ""),
+                        try self.wip.cast(.zext, mask, extended_ty, ""),
+                    },
+                    "",
+                );
+
+                return try self.wip.cast(.trunc, result, llvm_ty, "");
+            },
+            else => {},
+        }
+
+        // From here we fall back to the emulated implementation.
+
+        if (bits <= 128) {
+            const rt_int_bits = compilerRtIntBits(bits);
+            const rt_int_ty = try o.builder.intType(rt_int_bits);
+
+            const fn_name = try o.builder.strtabStringFmt("__{s}_u{d}", .{
+                switch (tag) {
+                    .deposit_bits => "pdep",
+                    .extract_bits => "pext",
+                    else => unreachable,
+                },
+                rt_int_bits,
+            });
+
+            var extended_source = try self.wip.conv(.unsigned, source, rt_int_ty, "");
+            var extended_mask = try self.wip.conv(.unsigned, mask, rt_int_ty, "");
+
+            var param_ty = rt_int_ty;
+            if (rt_int_bits == 128 and (o.target.os.tag == .windows and o.target.cpu.arch == .x86_64)) {
+                // On Windows x86_64, we expect i128 to be passed in an 2xi64 for both parameters and
+                // the return type.
+                param_ty = try o.builder.vectorType(.normal, 2, .i64);
+                extended_source = try self.wip.cast(.bitcast, extended_source, param_ty, "");
+                extended_mask = try self.wip.cast(.bitcast, extended_mask, param_ty, "");
+            }
+
+            const libc_fn = try self.getLibcFunction(fn_name, &.{ param_ty, param_ty }, param_ty);
+            var result = try self.wip.call(
+                .normal,
+                .ccc,
+                .none,
+                libc_fn.typeOf(&o.builder),
+                libc_fn.toValue(&o.builder),
+                &.{ extended_source, extended_mask },
+                "",
+            );
+
+            if (param_ty != rt_int_ty) result = try self.wip.cast(.bitcast, result, rt_int_ty, "");
+            return try self.wip.cast(.trunc, result, llvm_ty, "");
+        }
+
+        // Rounded bits to the nearest 32, as limb size is 32.
+        const extended_bits = (((bits - 1) / 32) + 1) * 32;
+        const extended_ty = try o.builder.intType(extended_bits);
+
+        const source_extended = try self.wip.cast(.zext, source, extended_ty, "");
+        const mask_extended = try self.wip.cast(.zext, mask, extended_ty, "");
+        const zeroes_extended = try o.builder.intValue(extended_ty, 0);
+
+        const alignment = Type.u32.abiAlignment(zcu).toLlvm();
+
+        const source_pointer = try self.buildAlloca(extended_ty, alignment);
+        const mask_pointer = try self.buildAlloca(extended_ty, alignment);
+        const result_pointer = try self.buildAlloca(extended_ty, alignment);
+
+        _ = try self.wip.store(.normal, source_extended, source_pointer, alignment);
+        _ = try self.wip.store(.normal, mask_extended, mask_pointer, alignment);
+        _ = try self.wip.store(.normal, zeroes_extended, result_pointer, alignment);
+
+        const fn_name = try o.builder.strtabStringFmt("__{s}_bigint", .{switch (tag) {
+            .deposit_bits => "pdep",
+            .extract_bits => "pext",
+            else => unreachable,
+        }});
+
+        const pointer_ty = source_pointer.typeOfWip(&self.wip);
+        const usize_ty = try o.lowerType(Type.usize);
+        const void_ty = try o.lowerType(Type.void);
+
+        const bits_value = try o.builder.intValue(usize_ty, bits);
+
+        const params = .{
+            result_pointer,
+            source_pointer,
+            mask_pointer,
+            bits_value,
+        };
+
+        const libc_fn = try self.getLibcFunction(fn_name, &.{ pointer_ty, pointer_ty, pointer_ty, usize_ty }, void_ty);
+        _ = try self.wip.call(
+            .normal,
+            .ccc,
+            .none,
+            libc_fn.typeOf(&o.builder),
+            libc_fn.toValue(&o.builder),
+            &params,
+            "",
+        );
+
+        const result = try self.wip.load(.normal, extended_ty, result_pointer, alignment, "");
+        return try self.wip.cast(.trunc, result, llvm_ty, "");
+    }
+
     fn getErrorNameTable(self: *FuncGen) Allocator.Error!Builder.Variable.Index {
         const o = self.ng.object;
         const pt = o.pt;
diff --git a/src/print_air.zig b/src/print_air.zig
index 0f658dcd9f84..00109186d34e 100644
--- a/src/print_air.zig
+++ b/src/print_air.zig
@@ -162,6 +162,8 @@ const Writer = struct {
             .memmove,
             .memset,
             .memset_safe,
+            .deposit_bits,
+            .extract_bits,
             => try w.writeBinOp(s, inst),
 
             .is_null,
diff --git a/src/print_zir.zig b/src/print_zir.zig
index a4a141fc4e50..bb0dbaf440ce 100644
--- a/src/print_zir.zig
+++ b/src/print_zir.zig
@@ -602,6 +602,8 @@ const Writer = struct {
             .wasm_memory_grow,
             .prefetch,
             .c_va_arg,
+            .deposit_bits,
+            .extract_bits,
             => {
                 const inst_data = self.code.extraData(Zir.Inst.BinNode, extended.operand).data;
                 try self.writeInstRef(stream, inst_data.lhs);
diff --git a/stage1/zig.h b/stage1/zig.h
index 2d9e7a562647..ec610ec58016 100644
--- a/stage1/zig.h
+++ b/stage1/zig.h
@@ -1381,6 +1381,92 @@ zig_builtin_clz(16)
 zig_builtin_clz(32)
 zig_builtin_clz(64)
 
+#define zig_builtin_extract_bits(w) \
+    static inline uint##w##_t zig_extract_bits_u##w(uint##w##_t source, uint##w##_t mask_) { \
+        uint##w##_t bb = 1;\
+        uint##w##_t result = 0;\
+        uint##w##_t mask = mask_;\
+        \
+        while (mask != 0) {\
+            uint##w##_t bit = mask & ~(mask - 1);\
+            mask &= ~bit;\
+            uint##w##_t source_bit = source & bit;\
+            if (source_bit != 0) result |= bb;\
+            bb += bb;\
+        }\
+        \
+        return result;\
+    }
+
+#if zig_has_builtin(ia32_pext_di)
+    static inline uint64_t zig_extract_bits_u64(uint64_t source, uint64_t mask) {
+        return __builtin_ia32_pext_di(source, mask);
+    }
+#else
+zig_builtin_extract_bits(64)
+#endif
+
+#if zig_has_builtin(ia32_pext_si)
+    static inline uint32_t zig_extract_bits_u32(uint32_t source, uint32_t mask) {
+        return __builtin_ia32_pext_si(source, mask);
+    }
+
+    static inline uint16_t zig_extract_bits_u16(uint16_t source, uint16_t mask) {
+        return (uint16_t)__builtin_ia32_pext_si(source, mask);
+    }
+
+    static inline uint8_t zig_extract_bits_u8(uint8_t source, uint8_t mask) {
+        return (uint8_t)__builtin_ia32_pext_si(source, mask);
+    }
+#else
+zig_builtin_extract_bits(32)
+zig_builtin_extract_bits(16)
+zig_builtin_extract_bits(8)
+#endif
+
+#define zig_builtin_deposit_bits(w) \
+    static inline uint##w##_t zig_deposit_bits_u##w(uint##w##_t source, uint##w##_t mask_) { \
+        uint##w##_t bb = 1;\
+        uint##w##_t result = 0;\
+        uint##w##_t mask = mask_;\
+        \
+        while (mask != 0) {\
+            uint##w##_t bit = mask & ~(mask - 1);\
+            mask &= ~bit;\
+            uint##w##_t source_bit = source & bb;\
+            if (source_bit != 0) result |= bit;\
+            bb += bb;\
+        }\
+        \
+        return result;\
+    }
+
+#if zig_has_builtin(ia32_pext_di)
+    static inline uint64_t zig_deposit_bits_u64(uint64_t source, uint64_t mask) {
+        return __builtin_ia32_pdep_di(source, mask);
+    }
+#else
+zig_builtin_deposit_bits(64)
+#endif
+
+#if zig_has_builtin(ia32_pext_si)
+    static inline uint32_t zig_deposit_bits_u32(uint32_t source, uint32_t mask) {
+        return __builtin_ia32_pdep_si(source, mask);
+    }
+
+    static inline uint16_t zig_deposit_bits_u16(uint16_t source, uint16_t mask) {
+        return (uint16_t)__builtin_ia32_pdep_si(source, mask);
+    }
+
+    static inline uint8_t zig_deposit_bits_u8(uint8_t source, uint8_t mask) {
+        return (uint8_t)__builtin_ia32_pdep_si(source, mask);
+    }
+#else
+zig_builtin_deposit_bits(32)
+zig_builtin_deposit_bits(16)
+zig_builtin_deposit_bits(8)
+#endif
+
 /* ======================== 128-bit Integer Support ========================= */
 
 #if !defined(zig_has_int128)
diff --git a/stage1/zig1.wasm b/stage1/zig1.wasm
index 2eb55170d047..9c39fe885e71 100644
Binary files a/stage1/zig1.wasm and b/stage1/zig1.wasm differ
diff --git a/test/behavior.zig b/test/behavior.zig
index 5ed256dfadd5..807106b14605 100644
--- a/test/behavior.zig
+++ b/test/behavior.zig
@@ -23,9 +23,10 @@ test {
     _ = @import("behavior/const_slice_child.zig");
     _ = @import("behavior/decl_literals.zig");
     _ = @import("behavior/decltest.zig");
-    _ = @import("behavior/duplicated_test_names.zig");
     _ = @import("behavior/defer.zig");
+    _ = @import("behavior/deposit_extract_bits.zig");
     _ = @import("behavior/destructure.zig");
+    _ = @import("behavior/duplicated_test_names.zig");
     _ = @import("behavior/empty_union.zig");
     _ = @import("behavior/enum.zig");
     _ = @import("behavior/error.zig");
diff --git a/test/behavior/deposit_extract_bits.zig b/test/behavior/deposit_extract_bits.zig
new file mode 100644
index 000000000000..b8791796a134
--- /dev/null
+++ b/test/behavior/deposit_extract_bits.zig
@@ -0,0 +1,211 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const mem = std.mem;
+const expect = std.testing.expect;
+const expectEqual = std.testing.expectEqual;
+
+const supports_pext_pdep = switch (builtin.zig_backend) {
+    .stage2_llvm, .stage2_c => true,
+    .stage2_x86_64 => builtin.target.os.tag != .windows,
+    else => false,
+};
+
+test "@depositBits u64" {
+    if (!supports_pext_pdep) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        pub fn doTheTest() !void {
+            var a: u64 = 0;
+            var b: u64 = 0xFFFF_FFFF_FFFF_FFFF;
+            var c: u64 = 0x1234_5678_9012_3456;
+            var d: u64 = 0x00F0_FF00_F00F_00FF;
+
+            _ = &a;
+            _ = &b;
+            _ = &c;
+            _ = &d;
+
+            try expect(@depositBits(b, a) == 0);
+            try expect(@depositBits(a, b) == 0);
+
+            try expect(@depositBits(b, c) == c);
+            try expect(@depositBits(b, d) == d);
+
+            try expect(@depositBits(c, d) == 0x0000_1200_3004_0056);
+        }
+    };
+
+    try S.doTheTest();
+    try comptime S.doTheTest();
+}
+
+test "@depositBits u128" {
+    if (!supports_pext_pdep) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO #19991
+
+    const S = struct {
+        pub fn doTheTest() !void {
+            var a: u64 = 0x1234_5678_9012_3456;
+            var b: u128 = 0x00F0_FF00_F00F_00FF << 64;
+
+            _ = &a;
+            _ = &b;
+
+            try expect(@depositBits(a, b) == 0x0000_1200_3004_0056 << 64);
+        }
+    };
+
+    try S.doTheTest();
+    try comptime S.doTheTest();
+}
+
+test "@depositBits u256" {
+    if (!supports_pext_pdep) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO #19991
+
+    const S = struct {
+        pub fn doTheTest() !void {
+            var a: u64 = 0x1234_5678_9ABC_DEF0;
+            var b: u256 = 0x0F00_0FF0_0F0F_FF00 << 174;
+
+            _ = &a;
+            _ = &b;
+
+            try expect(@depositBits(a, b) == 0x0A00_0BC0_0D0E_F000 << 174);
+        }
+    };
+
+    try S.doTheTest();
+    try comptime S.doTheTest();
+}
+
+test "@extractBits u64" {
+    if (!supports_pext_pdep) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        pub fn doTheTest() !void {
+            var a: u64 = 0;
+            var b: u64 = 0xFFFF_FFFF_FFFF_FFFF;
+            var c: u64 = 0x1234_5678_9012_3456;
+            var d: u64 = 0x00F0_FF00_F00F_00FF;
+
+            _ = &a;
+            _ = &b;
+            _ = &c;
+            _ = &d;
+
+            try expect(@extractBits(b, a) == 0);
+            try expect(@extractBits(a, b) == 0);
+
+            try expect(@extractBits(c, b) == c);
+            try expect(@extractBits(d, b) == d);
+
+            try expect(@extractBits(c, d) == 0x0356_9256);
+        }
+    };
+
+    try S.doTheTest();
+    try comptime S.doTheTest();
+}
+
+test "@extractBits u128" {
+    if (!supports_pext_pdep) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO #19991
+
+    const S = struct {
+        pub fn doTheTest() !void {
+            var a: u128 = 0x1234_5678_9012_3456 << 64;
+            var b: u128 = 0x00F0_FF00_F00F_00FF << 64;
+
+            _ = &a;
+            _ = &b;
+
+            try expect(@extractBits(a, b) == 0x0356_9256);
+        }
+    };
+
+    try S.doTheTest();
+    try comptime S.doTheTest();
+}
+
+test "@extractBits u256" {
+    if (!supports_pext_pdep) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO #19991
+
+    const S = struct {
+        pub fn doTheTest() !void {
+            var a: u256 = 0x1234_5678_9ABC_DEF0 << 96;
+            var b: u256 = 0x0F00_0FF0_0F0F_FF00 << 96;
+
+            _ = &a;
+            _ = &b;
+
+            try expect(@extractBits(a, b) == 0x0267_ACDE);
+        }
+    };
+
+    try S.doTheTest();
+    try comptime S.doTheTest();
+}
+
+test "@depositBits" {
+    if (!supports_pext_pdep) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        pub fn doTheTest() !void {
+            try expectDepositBits(u5, 0xc, 0x0, 0x0);
+            try expectDepositBits(u8, 0x34, 0x3e, 0x28);
+            try expectDepositBits(u12, 0x8d1, 0x3ff, 0xd1);
+            try expectDepositBits(u16, 0x71bf, 0x3af1, 0x32f1);
+            try expectDepositBits(u32, 0x3bae5063, 0x7b17b132, 0x1200a012);
+            try expectDepositBits(u48, 0x434aa15ff2fa, 0xce370a6c311, 0xce34086c210);
+            try expectDepositBits(u64, 0x8361fc9b827793a6, 0xe67fcd567987eee6, 0x425c041639026a24);
+
+            if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO #19991
+
+            try expectDepositBits(u97, 0x171f755a01d485c4c34c18c81, 0xac06c853b200585f371570eb, 0x80044800a200084030142001);
+            try expectDepositBits(u128, 0xb7be70a644ee77116f7265b2a4b95a8b, 0x6c3396ebe8de95f9eaf62d08b2c3cb56, 0x80292e38818856148442c0090c14046);
+            try expectDepositBits(u185, 0x4a0774246e045222bb0ed34d184b1bbde1fc99c9ca0e89, 0x1b91d49bb592ec503cce5e517e87137fff828329d15be8f, 0x11811410a0104c0018ca5a510687104ce4828021001a809);
+            try expectDepositBits(u256, 0x43837440edafe142bd5b2f022f8a05d596c98b3c4be1ba19f4df4f9cbaadbda2, 0x86942d4fa0882cfeea9b45ad11334e0877b81e6c3e9c8b01a38c673778c8a1d3, 0x8280214120800cc44a98018d0100480875180e640e908a008384223338888102);
+            try expectDepositBits(u479, 0x4b9850b7dacb9a133557b25750455b9aead11be92175443d26db30bdd39a81e5a9a3a106d679f35067f76e832f15e13af81b56400bbe0ac9dff4cb06, 0x2c318fa22f8ae1373baa74eed5b70b1c7b7ab0bd6ea4804f88f87b21464ad5ee017cacd69a8c82bdcb68fe0b71e787eeda6d770d3c80f03a5b805dcc, 0x203084000f026024320a500805320a0c6370a0900c24804e88b0720002409584013c0086888c8200c320b200000585e8084c23093c80e02242804048);
+        }
+    };
+
+    try S.doTheTest();
+    try comptime S.doTheTest();
+}
+
+test "@extractBits" {
+    if (!supports_pext_pdep) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        pub fn doTheTest() !void {
+            try expectExtractBits(u5, 0x1c, 0xe, 0x6);
+            try expectExtractBits(u8, 0xc1, 0xbe, 0x20);
+            try expectExtractBits(u12, 0x8fd, 0x910, 0x5);
+            try expectExtractBits(u16, 0x694c, 0xaaea, 0xca);
+            try expectExtractBits(u32, 0xa9f97bcf, 0x64f207c2, 0x179f);
+            try expectExtractBits(u48, 0x32901c841c2a, 0x3721b7ff376d, 0x6832118c);
+            try expectExtractBits(u64, 0xbc1ba402eaabd49b, 0x8324f9742e70d227, 0x21406ae3);
+
+            if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO #19991
+
+            try expectExtractBits(u97, 0x12c7ffc54e5772313dae0aa7a, 0x144e7728badaa0c5edee2f2ab, 0x15fe0da89f7aae);
+            try expectExtractBits(u128, 0x18eb4eaa5e93441fa28d2860de22961b, 0x3b89eec7dd369bb8634b8da908272721, 0xcebc9e501b2a2699);
+            try expectExtractBits(u185, 0x1fad06e744cee4f42aa80057dd1fb8b86a2281d124e389e, 0x1a5f25ae5516369fd211e040df64b5fb97ca12d189474b8, 0x1cd5ba75a083f4f2e084f16b);
+            try expectExtractBits(u256, 0xb5db52469100b3796a6981ed441d685ede3c39e423d91ff5dc33d0ae3696067c, 0x2b03ad2a509cc14a8cfc71b9cfbadee93ab976d6335c3897d5188cec3c89081a, 0x4e0285c9a1da86514ee66af9f7d4bec6);
+            try expectExtractBits(u479, 0x7c44ec50c139a0fb34d51fa28a9f63f9940e578df33e21792c25b4a4e931df79bcbe45eb5cce05b0e73b5d01d0bc9bd4677e2217285c390012de90cf, 0x3facb493aa8150da7350b5f7ef349addba0fc293a258319cb61c1b224f07f0e096cf117bdb0e2338a7eae3e88e8e392161be97b90e23b879c8c51333, 0x3c1d484fb33d294c7da739eeb28c593afae77739df3a4239cef88b0380743);
+        }
+    };
+
+    try S.doTheTest();
+    try comptime S.doTheTest();
+}
+
+fn expectDepositBits(comptime T: type, src: T, dst: T, exp: T) !void {
+    return expectEqual(@depositBits(src, dst), exp);
+}
+
+fn expectExtractBits(comptime T: type, src: T, dst: T, exp: T) !void {
+    return expectEqual(@extractBits(src, dst), exp);
+}
diff --git a/test/cases/compile_errors/deposit_bits_negative_comptime_int.zig b/test/cases/compile_errors/deposit_bits_negative_comptime_int.zig
new file mode 100644
index 000000000000..d8cb4cf9d356
--- /dev/null
+++ b/test/cases/compile_errors/deposit_bits_negative_comptime_int.zig
@@ -0,0 +1,12 @@
+export fn entry() void {
+    const a = 0;
+    const b = -1;
+    const res = @depositBits(a, b);
+    _ = res;
+}
+
+// error
+// is_test=true
+//
+// :4:33: error: use of negative value '-1'
+// :4:17: note: parameters to @depositBits must be non-negative
diff --git a/test/cases/compile_errors/deposit_bits_signed_type.zig b/test/cases/compile_errors/deposit_bits_signed_type.zig
new file mode 100644
index 000000000000..c50f117773e4
--- /dev/null
+++ b/test/cases/compile_errors/deposit_bits_signed_type.zig
@@ -0,0 +1,13 @@
+export fn entry() void {
+    var a: i32 = 0;
+    var b: i32 = 0;
+    var res = @depositBits(a, b);
+    _ = &a;
+    _ = &b;
+    _ = &res;
+}
+
+// error
+// is_test=true
+//
+// :4:28: error: expected unsigned integer or 'comptime_int', found 'i32'