diff --git a/doc/langref.html.in b/doc/langref.html.in
index 6f3e9961c389..3728b9a71637 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -8558,6 +8558,33 @@ test "main" {
       {#see_also|@cVaArg|@cVaCopy|@cVaEnd#}
       {#header_close#}
 
+      {#header_open|@depositBits#}
+      <pre>{#syntax#}@depositBits(source: T, mask: T) T{#endsyntax#}</pre>
+      <p>
+      {#syntax#}T{#endsyntax#} must be an unsigned integer type, or a `comptime_int` (for which both parameters must be positive). `T` is determined by peer-type resolution.
+      </p>
+      <p>
+      Uses a mask to transfer contiguous lower bits in the {#syntax#}source{#endsyntax#} operand to the destination, transferring them to the corresponding bits in the destination that are set in the mask. All other bits in the destination are zeroed.
+      </p>
+      <p>
+      Currently, only x86 processors with BMI2 enabled support this in hardware. On processors without support for the instruction, it will be emulated. AMD processors before Zen 3 implement the corresponding instruction (PDEP) in microcode. It may be faster to use an alternative method in both of these cases.
+      </p>
+      <p>
+      Example:
+      </p>
+
+      {#code_begin|test|test_depositbits_builtin#}
+const std = @import("std");
+
+test "deposit bits" {
+    comptime {
+        try std.testing.expectEqual(@depositBits(0x00001234, 0xf0f0f0f0), 0x10203040);
+    }
+}
+      {#code_end#}
+      {#see_also|@extractBits#}
+      {#header_close#}
+
       {#header_open|@divExact#}
       <pre>{#syntax#}@divExact(numerator: T, denominator: T) T{#endsyntax#}</pre>
       <p>
@@ -8726,6 +8753,33 @@ export fn @"A function name that is a complete sentence."() void {}
       {#see_also|@export#}
       {#header_close#}
 
+      {#header_open|@extractBits#}
+      <pre>{#syntax#}@extractBits(source: T, mask: T) T{#endsyntax#}</pre>
+      <p>
+      {#syntax#}T{#endsyntax#} must be an unsigned integer type, or a `comptime_int` (for which both parameters must be positive). `T` is determined by peer-type resolution.
+      </p>
+      <p>
+      Uses a mask to transfer bits in the {#syntax#}source{#endsyntax#} operand to the destination, writing them as contiguous lower bits in the destination. The upper bits of the destination are zeroed.
+      </p>
+      <p>
+      Currently, only x86 processors with BMI2 enabled support this in hardware. On processors without support for the instruction, it will be emulated. AMD processors before Zen 3 implement the corresponding instruction (PEXT) in microcode. It may be faster to use an alternative method in both of these cases.
+      </p>
+      <p>
+      Example:
+      </p>
+
+      {#code_begin|test|test_depositbits_builtin#}
+const std = @import("std");
+
+test "extract bits" {
+    comptime {
+        try std.testing.expectEqual(@extractBits(0x12345678, 0xf0f0f0f0), 0x00001357);
+    }
+}
+      {#code_end#}
+      {#see_also|@depositBits#}
+      {#header_close#}
+
       {#header_open|@fence#}
       <pre>{#syntax#}@fence(order: AtomicOrder) void{#endsyntax#}</pre>
       <p>
diff --git a/lib/compiler_rt.zig b/lib/compiler_rt.zig
index 173e6af85a5e..5e1cc86abebc 100644
--- a/lib/compiler_rt.zig
+++ b/lib/compiler_rt.zig
@@ -9,6 +9,7 @@ comptime {
     _ = @import("compiler_rt/popcount.zig");
     _ = @import("compiler_rt/bswap.zig");
     _ = @import("compiler_rt/cmp.zig");
+    _ = @import("compiler_rt/pdeppext.zig");
 
     _ = @import("compiler_rt/shift.zig");
     _ = @import("compiler_rt/negXi2.zig");
diff --git a/lib/compiler_rt/pdeppext.zig b/lib/compiler_rt/pdeppext.zig
new file mode 100644
index 000000000000..c9784f946b23
--- /dev/null
+++ b/lib/compiler_rt/pdeppext.zig
@@ -0,0 +1,177 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const common = @import("common.zig");
+
+const Limb = u32;
+const Log2Limb = u5;
+
+comptime {
+    @export(__pdep_bigint, .{ .name = "__pdep_bigint", .linkage = common.linkage, .visibility = common.visibility });
+    @export(__pdep_u32, .{ .name = "__pdep_u32", .linkage = common.linkage, .visibility = common.visibility });
+    @export(__pdep_u64, .{ .name = "__pdep_u64", .linkage = common.linkage, .visibility = common.visibility });
+    @export(__pdep_u128, .{ .name = "__pdep_u128", .linkage = common.linkage, .visibility = common.visibility });
+
+    @export(__pext_bigint, .{ .name = "__pext_bigint", .linkage = common.linkage, .visibility = common.visibility });
+    @export(__pext_u32, .{ .name = "__pext_u32", .linkage = common.linkage, .visibility = common.visibility });
+    @export(__pext_u64, .{ .name = "__pext_u64", .linkage = common.linkage, .visibility = common.visibility });
+    @export(__pext_u128, .{ .name = "__pext_u128", .linkage = common.linkage, .visibility = common.visibility });
+}
+
+const endian = builtin.cpu.arch.endian();
+
+inline fn limb(x: []const Limb, i: usize) Limb {
+    return if (endian == .little) x[i] else x[x.len - 1 - i];
+}
+
+inline fn limb_ptr(x: []Limb, i: usize) *Limb {
+    return if (endian == .little) &x[i] else &x[x.len - 1 - i];
+}
+
+inline fn limb_set(x: []Limb, i: usize, v: Limb) void {
+    if (endian == .little) {
+        x[i] = v;
+    } else {
+        x[x.len - 1 - i] = v;
+    }
+}
+
+// Code for bigint pdep and pext largely taken from std.math.big.int.depositBits and extractBits
+
+inline fn pdep_bigint(result: []Limb, source: []const Limb, mask: []const Limb) void {
+    @memset(result, 0);
+
+    var mask_limb: Limb = limb(mask, 0);
+    var mask_limb_index: usize = 0;
+    var i: usize = 0;
+
+    outer: while (true) : (i += 1) {
+        // Find the lowest set bit in mask
+        const mask_limb_bit: Log2Limb = limb_bit: while (true) {
+            const mask_limb_tz = @ctz(mask_limb);
+            if (mask_limb_tz != @bitSizeOf(Limb)) {
+                const cast_limb_bit: Log2Limb = @intCast(mask_limb_tz);
+                mask_limb ^= @as(Limb, 1) << cast_limb_bit;
+                break :limb_bit cast_limb_bit;
+            }
+
+            mask_limb_index += 1;
+            if (mask_limb_index >= mask.len) break :outer;
+
+            mask_limb = limb(mask, mask_limb_index);
+        };
+
+        const i_limb_index = i / 32;
+        const i_limb_bit: Log2Limb = @truncate(i);
+
+        if (i_limb_index >= source.len) break;
+
+        const source_bit_set = limb(source, i_limb_index) & (@as(Limb, 1) << i_limb_bit) != 0;
+
+        limb_ptr(result, mask_limb_index).* |= @as(Limb, @intFromBool(source_bit_set)) << mask_limb_bit;
+    }
+}
+
+pub fn __pdep_bigint(r: [*]Limb, s: [*]const Limb, m: [*]const Limb, bits: usize) callconv(.C) void {
+    const result = r[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
+    const source = s[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
+    const mask = m[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
+
+    pdep_bigint(result, source, mask);
+}
+
+inline fn pext_bigint(result: []Limb, source: []const Limb, mask: []const Limb) void {
+    @memset(result, 0);
+
+    var mask_limb: Limb = limb(mask, 0);
+    var mask_limb_index: usize = 0;
+    var i: usize = 0;
+
+    outer: while (true) : (i += 1) {
+        const mask_limb_bit: Log2Limb = limb_bit: while (true) {
+            const mask_limb_tz = @ctz(mask_limb);
+            if (mask_limb_tz != @bitSizeOf(Limb)) {
+                const cast_limb_bit: Log2Limb = @intCast(mask_limb_tz);
+                mask_limb ^= @as(Limb, 1) << cast_limb_bit;
+                break :limb_bit cast_limb_bit;
+            }
+
+            mask_limb_index += 1;
+            if (mask_limb_index >= mask.len) break :outer;
+
+            mask_limb = limb(mask, mask_limb_index);
+        };
+
+        const i_limb_index = i / 32;
+        const i_limb_bit: Log2Limb = @truncate(i);
+
+        if (i_limb_index >= source.len) break;
+
+        const source_bit_set = limb(source, mask_limb_index) & (@as(Limb, 1) << mask_limb_bit) != 0;
+
+        limb_ptr(result, i_limb_index).* |= @as(Limb, @intFromBool(source_bit_set)) << i_limb_bit;
+    }
+}
+
+pub fn __pext_bigint(r: [*]Limb, s: [*]const Limb, m: [*]const Limb, bits: usize) callconv(.C) void {
+    const result = r[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
+    const source = s[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
+    const mask = m[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
+
+    pext_bigint(result, source, mask);
+}
+
+inline fn pdep_uX(comptime T: type, source: T, mask_: T) T {
+    var bb: T = 1;
+    var result: T = 0;
+    var mask = mask_;
+
+    while (mask != 0) {
+        const bit = mask & ~(mask - 1);
+        mask &= ~bit;
+        const source_bit = source & bb;
+        if (source_bit != 0) result |= bit;
+        bb += bb;
+    }
+
+    return result;
+}
+
+pub fn __pdep_u32(source: u32, mask: u32) callconv(.C) u32 {
+    return pdep_uX(u32, source, mask);
+}
+
+pub fn __pdep_u64(source: u64, mask: u64) callconv(.C) u64 {
+    return pdep_uX(u64, source, mask);
+}
+
+pub fn __pdep_u128(source: u128, mask: u128) callconv(.C) u128 {
+    return pdep_uX(u128, source, mask);
+}
+
+inline fn pext_uX(comptime T: type, source: T, mask_: T) T {
+    var bb: T = 1;
+    var result: T = 0;
+    var mask = mask_;
+
+    while (mask != 0) {
+        const bit = mask & ~(mask - 1);
+        mask &= ~bit;
+        const source_bit = source & bit;
+        if (source_bit != 0) result |= bb;
+        bb += bb;
+    }
+
+    return result;
+}
+
+pub fn __pext_u32(source: u32, mask: u32) callconv(.C) u32 {
+    return pext_uX(u32, source, mask);
+}
+
+pub fn __pext_u64(source: u64, mask: u64) callconv(.C) u64 {
+    return pext_uX(u64, source, mask);
+}
+
+pub fn __pext_u128(source: u128, mask: u128) callconv(.C) u128 {
+    return pext_uX(u128, source, mask);
+}
diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 93ad1ccbe26a..357d2c93e785 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -1735,6 +1735,98 @@ pub const Mutable = struct {
         y.shiftRight(y.toConst(), norm_shift);
     }
 
+    // TODO this function is quite inefficient and could be optimised
+    /// r = @depositBits(source, mask)
+    ///
+    /// Asserts that `source` and `mask` are positive
+    pub fn depositBits(r: *Mutable, source: Const, mask: Const) void {
+        assert(source.positive);
+        assert(mask.positive);
+
+        r.positive = true;
+        @memset(r.limbs, 0);
+
+        var mask_limb: Limb = mask.limbs[0];
+        var mask_limb_index: Limb = 0;
+        var i: usize = 0;
+        outer: while (true) : (i += 1) {
+            // Find next bit in mask
+            const mask_limb_bit: Log2Limb = limb_bit: while (true) {
+                const mask_limb_tz = @ctz(mask_limb);
+                if (mask_limb_tz != @sizeOf(Limb) * 8) {
+                    const cast_limb_bit: Log2Limb = @intCast(mask_limb_tz);
+                    mask_limb ^= @as(Limb, 1) << cast_limb_bit;
+                    break :limb_bit cast_limb_bit;
+                }
+
+                mask_limb_index += 1;
+                // No more limbs, we've finished iterating the mask
+                if (mask_limb_index >= mask.limbs.len) {
+                    break :outer;
+                }
+
+                mask_limb = mask.limbs[mask_limb_index];
+            };
+
+            const i_limb_index = i / limb_bits;
+            const i_limb_bit: Log2Limb = @truncate(i);
+
+            if (i_limb_index >= source.limbs.len) break; // Stop when we reach the end of `source` (we can treat the rest as zeroes)
+
+            const source_bit_set = source.limbs[i_limb_index] & (@as(Limb, 1) << i_limb_bit) != 0;
+
+            r.limbs[mask_limb_index] |= @as(Limb, @intFromBool(source_bit_set)) << mask_limb_bit;
+        }
+
+        r.normalize(r.limbs.len);
+    }
+
+    // TODO this function is quite inefficient and could be optimised
+    /// r = @extractBits(source, mask)
+    ///
+    /// Asserts that `source` and `mask` are positive
+    pub fn extractBits(r: *Mutable, source: Const, mask: Const) void {
+        assert(source.positive);
+        assert(mask.positive);
+
+        r.positive = true;
+        @memset(r.limbs, 0);
+
+        var mask_limb: Limb = mask.limbs[0];
+        var mask_limb_index: Limb = 0;
+        var i: usize = 0;
+        outer: while (true) : (i += 1) {
+            // Find next bit in mask
+            const mask_limb_bit: Log2Limb = limb_bit: while (true) {
+                const mask_limb_tz = @ctz(mask_limb);
+                if (mask_limb_tz != @sizeOf(Limb) * 8) {
+                    const cast_limb_bit: Log2Limb = @intCast(mask_limb_tz);
+                    mask_limb ^= @as(Limb, 1) << cast_limb_bit;
+                    break :limb_bit cast_limb_bit;
+                }
+
+                mask_limb_index += 1;
+                // No more limbs, we've finished iterating the mask
+                if (mask_limb_index >= mask.limbs.len) {
+                    break :outer;
+                }
+
+                mask_limb = mask.limbs[mask_limb_index];
+            };
+
+            const i_limb_index = i / limb_bits;
+            const i_limb_bit: Log2Limb = @truncate(i);
+
+            if (mask_limb_index >= source.limbs.len) break; // Stop when we reach the end of `source` (we can treat the rest as zeroes)
+
+            const source_bit_set = source.limbs[mask_limb_index] & (@as(Limb, 1) << mask_limb_bit) != 0;
+
+            r.limbs[i_limb_index] |= @as(Limb, @intFromBool(source_bit_set)) << i_limb_bit;
+        }
+
+        r.normalize(r.limbs.len);
+    }
+
     /// If a is positive, this passes through to truncate.
     /// If a is negative, then r is set to positive with the bit pattern ~(a - 1).
     /// r may alias a.
diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig
index 624bdc0b83af..c9dabaa31e30 100644
--- a/lib/std/math/big/int_test.zig
+++ b/lib/std/math/big/int_test.zig
@@ -2800,6 +2800,54 @@ fn popCountTest(val: *const Managed, bit_count: usize, expected: usize) !void {
     try testing.expectEqual(expected, val.toConst().popCount(bit_count));
 }
 
+test "big int extractBits" {
+    try extractBitsTest(0x12345678, 0x0, 0x0);
+    try extractBitsTest(0x12345678, 0xf0f0f0f0, 0x1357);
+    try extractBitsTest(0x12345678, 0xff00ff00, 0x1256);
+    try extractBitsTest(0x12345678, 0xffff, 0x5678);
+
+    try extractBitsTest(0x12345678_90123456_78901234_56789012, 0xff << 64, 0x56);
+    try extractBitsTest(0x12345678_90123456_78901234_56789012, (0xff << 64) | 0xff00f, 0x56892);
+}
+
+fn extractBitsTest(comptime source: comptime_int, comptime mask: comptime_int, comptime expected: comptime_int) !void {
+    var source_bigint = try Managed.initSet(testing.allocator, source);
+    defer source_bigint.deinit();
+    var mask_bigint = try Managed.initSet(testing.allocator, mask);
+    defer mask_bigint.deinit();
+    const limbs = try testing.allocator.alloc(Limb, mask_bigint.limbs.len);
+    defer testing.allocator.free(limbs);
+    var result = Mutable{ .limbs = limbs, .positive = undefined, .len = undefined };
+
+    result.extractBits(source_bigint.toConst(), mask_bigint.toConst());
+
+    try testing.expectEqual(std.math.Order.eq, result.toConst().orderAgainstScalar(expected));
+}
+
+test "big int depositBits" {
+    try depositBitsTest(0x12345678, 0x0, 0x0);
+    try depositBitsTest(0x12345678, 0xf0f0f0f0, 0x50607080);
+    try depositBitsTest(0x12345678, 0xff00ff00, 0x56007800);
+    try depositBitsTest(0x12345678, 0xffff, 0x5678);
+
+    try depositBitsTest(0x1234, 0xff << 64, 0x34_00000000_00000000);
+    try depositBitsTest(0x12345678, (0xff << 64) | 0xff00f, 0x45_00000000_00067008);
+}
+
+fn depositBitsTest(comptime source: comptime_int, comptime mask: comptime_int, comptime expected: comptime_int) !void {
+    var source_bigint = try Managed.initSet(testing.allocator, source);
+    defer source_bigint.deinit();
+    var mask_bigint = try Managed.initSet(testing.allocator, mask);
+    defer mask_bigint.deinit();
+    const limbs = try testing.allocator.alloc(Limb, mask_bigint.limbs.len);
+    defer testing.allocator.free(limbs);
+    var result = Mutable{ .limbs = limbs, .positive = undefined, .len = undefined };
+
+    result.depositBits(source_bigint.toConst(), mask_bigint.toConst());
+
+    try testing.expectEqual(std.math.Order.eq, result.toConst().orderAgainstScalar(expected));
+}
+
 test "big int conversion read/write twos complement" {
     var a = try Managed.initSet(testing.allocator, (1 << 493) - 1);
     defer a.deinit();
diff --git a/lib/std/zig/AstGen.zig b/lib/std/zig/AstGen.zig
index a52007eabf24..e483a9e890e6 100644
--- a/lib/std/zig/AstGen.zig
+++ b/lib/std/zig/AstGen.zig
@@ -9691,6 +9691,9 @@ fn builtinCall(
             });
             return rvalue(gz, ri, result, node);
         },
+
+        .deposit_bits => return depositExtractBits(gz, scope, ri, node, params, .deposit_bits),
+        .extract_bits => return depositExtractBits(gz, scope, ri, node, params, .extract_bits),
     }
 }
 
@@ -9958,6 +9961,24 @@ fn overflowArithmetic(
     return rvalue(gz, ri, result, node);
 }
 
+fn depositExtractBits(
+    gz: *GenZir,
+    scope: *Scope,
+    ri: ResultInfo,
+    node: Ast.Node.Index,
+    params: []const Ast.Node.Index,
+    tag: Zir.Inst.Extended,
+) InnerError!Zir.Inst.Ref {
+    const lhs = try expr(gz, scope, .{ .rl = .none }, params[0]);
+    const rhs = try expr(gz, scope, .{ .rl = .none }, params[1]);
+    const result = try gz.addExtendedPayload(tag, Zir.Inst.BinNode{
+        .node = gz.nodeIndexToRelative(node),
+        .lhs = lhs,
+        .rhs = rhs,
+    });
+    return rvalue(gz, ri, result, node);
+}
+
 fn callExpr(
     gz: *GenZir,
     scope: *Scope,
diff --git a/lib/std/zig/AstRlAnnotate.zig b/lib/std/zig/AstRlAnnotate.zig
index 4a1203ca09fc..edf221caa103 100644
--- a/lib/std/zig/AstRlAnnotate.zig
+++ b/lib/std/zig/AstRlAnnotate.zig
@@ -1100,5 +1100,10 @@ fn builtinCall(astrl: *AstRlAnnotate, block: ?*Block, ri: ResultInfo, node: Ast.
             _ = try astrl.expr(args[4], block, ResultInfo.type_only);
             return false;
         },
+        .deposit_bits, .extract_bits => {
+            _ = try astrl.expr(args[0], block, ResultInfo.none);
+            _ = try astrl.expr(args[1], block, ResultInfo.none);
+            return false;
+        },
     }
 }
diff --git a/lib/std/zig/BuiltinFn.zig b/lib/std/zig/BuiltinFn.zig
index 11d6a17303c8..37f648893da2 100644
--- a/lib/std/zig/BuiltinFn.zig
+++ b/lib/std/zig/BuiltinFn.zig
@@ -35,6 +35,7 @@ pub const Tag = enum {
     c_va_copy,
     c_va_end,
     c_va_start,
+    deposit_bits,
     div_exact,
     div_floor,
     div_trunc,
@@ -46,6 +47,7 @@ pub const Tag = enum {
     error_cast,
     @"export",
     @"extern",
+    extract_bits,
     fence,
     field,
     field_parent_ptr,
@@ -405,6 +407,12 @@ pub const list = list: {
                 .illegal_outside_function = true,
             },
         },
+        .{
+            "@depositBits", .{
+                .tag = .deposit_bits,
+                .param_count = 2,
+            },
+        },
         .{
             "@divExact",
             .{
@@ -483,6 +491,13 @@ pub const list = list: {
                 .param_count = 2,
             },
         },
+        .{
+            "@extractBits",
+            .{
+                .tag = .extract_bits,
+                .param_count = 2,
+            },
+        },
         .{
             "@fence",
             .{
diff --git a/lib/std/zig/Zir.zig b/lib/std/zig/Zir.zig
index 64e8a1c8050f..b9f3e10dd007 100644
--- a/lib/std/zig/Zir.zig
+++ b/lib/std/zig/Zir.zig
@@ -2060,6 +2060,12 @@ pub const Inst = struct {
         /// Guaranteed to not have the `ptr_cast` flag.
         /// Uses the `pl_node` union field with payload `FieldParentPtr`.
         field_parent_ptr,
+        /// Implements the `@depositBits` builtin.
+        /// `operand` is payload index to `BinNode`.
+        deposit_bits,
+        /// Implements the `@extractBits` builtin.
+        /// `operand` is payload index to `BinNode`.
+        extract_bits,
 
         pub const InstData = struct {
             opcode: Extended,
diff --git a/src/Air.zig b/src/Air.zig
index 9554c55561a5..f607f3c29447 100644
--- a/src/Air.zig
+++ b/src/Air.zig
@@ -848,6 +848,13 @@ pub const Inst = struct {
         /// Operand is unused and set to Ref.none
         work_group_id,
 
+        /// Implements @depositBits builtin.
+        /// Uses the `bin_op` field.
+        deposit_bits,
+        /// Implements @extractBits builtin.
+        /// Uses the `bin_op` field.
+        extract_bits,
+
         pub fn fromCmpOp(op: std.math.CompareOperator, optimized: bool) Tag {
             switch (op) {
                 .lt => return if (optimized) .cmp_lt_optimized else .cmp_lt,
@@ -1318,6 +1325,8 @@ pub fn typeOfIndex(air: *const Air, inst: Air.Inst.Index, ip: *const InternPool)
         .div_exact_optimized,
         .rem_optimized,
         .mod_optimized,
+        .deposit_bits,
+        .extract_bits,
         => return air.typeOf(datas[@intFromEnum(inst)].bin_op.lhs, ip),
 
         .sqrt,
@@ -1790,6 +1799,8 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool {
         .work_item_id,
         .work_group_size,
         .work_group_id,
+        .deposit_bits,
+        .extract_bits,
         => false,
 
         .assembly => {
diff --git a/src/Liveness.zig b/src/Liveness.zig
index 4ca28758e222..dd9f4fe24776 100644
--- a/src/Liveness.zig
+++ b/src/Liveness.zig
@@ -286,6 +286,8 @@ pub fn categorizeOperand(
         .cmp_gte_optimized,
         .cmp_gt_optimized,
         .cmp_neq_optimized,
+        .deposit_bits,
+        .extract_bits,
         => {
             const o = air_datas[@intFromEnum(inst)].bin_op;
             if (o.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
@@ -955,6 +957,8 @@ fn analyzeInst(
         .memset,
         .memset_safe,
         .memcpy,
+        .deposit_bits,
+        .extract_bits,
         => {
             const o = inst_datas[@intFromEnum(inst)].bin_op;
             return analyzeOperands(a, pass, data, inst, .{ o.lhs, o.rhs, .none });
diff --git a/src/Liveness/Verify.zig b/src/Liveness/Verify.zig
index 4392f25e101d..f43b498f3a46 100644
--- a/src/Liveness/Verify.zig
+++ b/src/Liveness/Verify.zig
@@ -257,6 +257,8 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
             .memset,
             .memset_safe,
             .memcpy,
+            .deposit_bits,
+            .extract_bits,
             => {
                 const bin_op = data[@intFromEnum(inst)].bin_op;
                 try self.verifyInstOperands(inst, .{ bin_op.lhs, bin_op.rhs, .none });
diff --git a/src/Sema.zig b/src/Sema.zig
index d3989f630cb5..9a39773f0709 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -1260,6 +1260,8 @@ fn analyzeBodyInner(
                     .work_group_id      => try sema.zirWorkItem(          block, extended, extended.opcode),
                     .in_comptime        => try sema.zirInComptime(        block),
                     .closure_get        => try sema.zirClosureGet(        block, extended),
+                    .deposit_bits          => try sema.zirDepositExtractBits(block, extended, .deposit_bits),
+                    .extract_bits          => try sema.zirDepositExtractBits(block, extended, .extract_bits),
                     // zig fmt: on
 
                     .fence => {
@@ -26390,6 +26392,130 @@ fn zirInComptime(
     return if (block.is_comptime) .bool_true else .bool_false;
 }
 
+fn zirDepositExtractBits(
+    sema: *Sema,
+    block: *Block,
+    extended: Zir.Inst.Extended.InstData,
+    air_tag: Air.Inst.Tag,
+) CompileError!Air.Inst.Ref {
+    const mod = sema.mod;
+
+    const target = sema.mod.getTarget();
+    _ = target;
+    const extra = sema.code.extraData(Zir.Inst.BinNode, extended.operand).data;
+    const src = LazySrcLoc.nodeOffset(extra.node);
+
+    const lhs_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = extra.node };
+    const rhs_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = extra.node };
+
+    const uncasted_lhs = try sema.resolveInst(extra.lhs);
+    const uncasted_rhs = try sema.resolveInst(extra.rhs);
+
+    const lhs_ty = sema.typeOf(uncasted_lhs);
+    const rhs_ty = sema.typeOf(uncasted_rhs);
+
+    if (!lhs_ty.isUnsignedInt(mod) and lhs_ty.zigTypeTag(mod) != .ComptimeInt) {
+        return sema.fail(block, lhs_src, "expected unsigned integer or 'comptime_int', found '{}'", .{lhs_ty.fmt(sema.mod)});
+    }
+
+    if (!rhs_ty.isUnsignedInt(mod) and rhs_ty.zigTypeTag(mod) != .ComptimeInt) {
+        return sema.fail(block, rhs_src, "expected unsigned integer or 'comptime_int', found '{}'", .{rhs_ty.fmt(sema.mod)});
+    }
+
+    const instructions = &[_]Air.Inst.Ref{ uncasted_lhs, uncasted_rhs };
+    const dest_ty = try sema.resolvePeerTypes(block, src, instructions, .{
+        .override = &[_]?LazySrcLoc{ lhs_src, rhs_src },
+    });
+
+    const builtin_name = switch (air_tag) {
+        .deposit_bits => "@depositBits",
+        .extract_bits => "@extractBits",
+        else => unreachable,
+    };
+
+    // Coercion errors are intercepted to add a note if the caller is attempting to pass a negative comptime_int
+    const lhs = sema.coerce(block, dest_ty, uncasted_lhs, lhs_src) catch |err| switch (err) {
+        error.AnalysisFail => {
+            const msg = sema.err orelse return err;
+            const val = (try sema.resolveValue(uncasted_lhs)).?;
+            if (val.orderAgainstZero(mod) == .lt) {
+                try sema.errNote(block, src, msg, "parameters to {s} must be positive", .{builtin_name});
+            }
+            return err;
+        },
+        else => return err,
+    };
+
+    const rhs = sema.coerce(block, dest_ty, uncasted_rhs, rhs_src) catch |err| switch (err) {
+        error.AnalysisFail => {
+            const msg = sema.err orelse return err;
+            const val = (try sema.resolveValue(uncasted_rhs)).?;
+            if (val.orderAgainstZero(mod) == .lt) {
+                try sema.errNote(block, src, msg, "parameters to {s} must be positive", .{builtin_name});
+            }
+            return err;
+        },
+        else => return err,
+    };
+
+    const maybe_lhs_val = try sema.resolveValue(lhs);
+    const maybe_rhs_val = try sema.resolveValue(rhs);
+
+    // We check for negative values here only if the type is a comptime_int, as negative values
+    // would have otherwise been filtered out by coercion and the unsigned type restriction
+    if (dest_ty.zigTypeTag(mod) == .ComptimeInt) {
+        if (maybe_lhs_val) |lhs_val| {
+            if (!lhs_val.isUndef(mod) and lhs_val.orderAgainstZero(mod) == .lt) {
+                const err = try sema.errMsg(block, lhs_src, "use of negative value '{}'", .{lhs_val.fmtValue(sema.mod)});
+                try sema.errNote(block, src, err, "parameters to {s} must be positive", .{builtin_name});
+                return sema.failWithOwnedErrorMsg(block, err);
+            }
+        }
+
+        if (maybe_rhs_val) |rhs_val| {
+            if (!rhs_val.isUndef(mod) and rhs_val.orderAgainstZero(mod) == .lt) {
+                const err = try sema.errMsg(block, rhs_src, "use of negative value '{}'", .{rhs_val.fmtValue(sema.mod)});
+                try sema.errNote(block, src, err, "parameters to {s} must be positive", .{builtin_name});
+                return sema.failWithOwnedErrorMsg(block, err);
+            }
+        }
+    }
+
+    // If either of the operands are zero, the result is zero
+    // If either of the operands are undefined, the result is undefined
+    if (maybe_lhs_val) |lhs_val| {
+        if (lhs_val.orderAgainstZero(mod) == .eq) return Air.internedToRef((try mod.intValue(dest_ty, 0)).toIntern());
+        if (lhs_val.isUndef(mod)) return try mod.undefRef(dest_ty);
+    }
+    if (maybe_rhs_val) |rhs_val| {
+        if (rhs_val.orderAgainstZero(mod) == .eq) return Air.internedToRef((try mod.intValue(dest_ty, 0)).toIntern());
+        if (rhs_val.isUndef(mod)) return mod.undefRef(dest_ty);
+    }
+
+    if (maybe_lhs_val) |lhs_val| {
+        if (maybe_rhs_val) |rhs_val| {
+            const dest_val = switch (air_tag) {
+                .deposit_bits => try sema.intDepositBits(lhs_val, rhs_val, dest_ty),
+                .extract_bits => try sema.intExtractBits(lhs_val, rhs_val, dest_ty),
+                else => unreachable,
+            };
+
+            return Air.internedToRef(dest_val.toIntern());
+        }
+    }
+
+    const runtime_src = if (maybe_lhs_val == null) lhs_src else rhs_src;
+    try sema.requireRuntimeBlock(block, src, runtime_src);
+
+    return block.addInst(.{
+        .tag = air_tag,
+        .data = .{ .bin_op = .{
+            .lhs = lhs,
+            .rhs = rhs,
+        } },
+    });
+}
+
 fn requireRuntimeBlock(sema: *Sema, block: *Block, src: LazySrcLoc, runtime_src: ?LazySrcLoc) !void {
     if (block.is_comptime) {
         const msg = msg: {
@@ -38992,6 +39118,62 @@ fn intAddWithOverflowScalar(
     };
 }
 
+/// Asserts that the values are positive
+fn intDepositBits(
+    sema: *Sema,
+    lhs: Value,
+    rhs: Value,
+    ty: Type,
+) !Value {
+    // TODO is this a performance issue? maybe we should try the operation without
+    // resorting to BigInt first. For non-bigints, @intDeposit could be used?
+    const mod = sema.mod;
+    const arena = sema.arena;
+
+    var lhs_space: Value.BigIntSpace = undefined;
+    var rhs_space: Value.BigIntSpace = undefined;
+    const source = lhs.toBigInt(&lhs_space, mod);
+    const mask = rhs.toBigInt(&rhs_space, mod);
+
+    const result_limbs = try arena.alloc(
+        std.math.big.Limb,
+        mask.limbs.len,
+    );
+
+    var result = std.math.big.int.Mutable{ .limbs = result_limbs, .positive = undefined, .len = undefined };
+
+    result.depositBits(source, mask);
+    return try mod.intValue_big(ty, result.toConst());
+}
+
+/// Asserts that the values are positive
+fn intExtractBits(
+    sema: *Sema,
+    lhs: Value,
+    rhs: Value,
+    ty: Type,
+) !Value {
+    // TODO is this a performance issue? maybe we should try the operation without
+    // resorting to BigInt first. For non-bigints, @intExtract could be used?
+    const mod = sema.mod;
+    const arena = sema.arena;
+
+    var lhs_space: Value.BigIntSpace = undefined;
+    var rhs_space: Value.BigIntSpace = undefined;
+    const source = lhs.toBigInt(&lhs_space, mod);
+    const mask = rhs.toBigInt(&rhs_space, mod);
+
+    const result_limbs = try arena.alloc(
+        std.math.big.Limb,
+        mask.limbs.len,
+    );
+
+    var result = std.math.big.int.Mutable{ .limbs = result_limbs, .positive = undefined, .len = undefined };
+
+    result.extractBits(source, mask);
+    return try mod.intValue_big(ty, result.toConst());
+}
+
 /// Asserts the values are comparable. Both operands have type `ty`.
 /// For vectors, returns true if the comparison is true for ALL elements.
 ///
diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig
index ddde72345efe..5a327f1a0ae6 100644
--- a/src/arch/aarch64/CodeGen.zig
+++ b/src/arch/aarch64/CodeGen.zig
@@ -899,6 +899,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .work_item_id => unreachable,
             .work_group_size => unreachable,
             .work_group_id => unreachable,
+
+            .deposit_bits => return self.fail("TODO implement deposit_bits", .{}),
+            .extract_bits => return self.fail("TODO implement extract_bits", .{}),
             // zig fmt: on
         }
 
diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig
index 86d4e8f7fdd6..d55c69d48a7a 100644
--- a/src/arch/arm/CodeGen.zig
+++ b/src/arch/arm/CodeGen.zig
@@ -885,6 +885,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .work_item_id => unreachable,
             .work_group_size => unreachable,
             .work_group_id => unreachable,
+
+            .deposit_bits => return self.fail("TODO implement deposit_bits", .{}),
+            .extract_bits => return self.fail("TODO implement extract_bits", .{}),
             // zig fmt: on
         }
 
diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig
index 5abe3afcfd2a..d45904d7e98c 100644
--- a/src/arch/riscv64/CodeGen.zig
+++ b/src/arch/riscv64/CodeGen.zig
@@ -713,6 +713,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .work_item_id => unreachable,
             .work_group_size => unreachable,
             .work_group_id => unreachable,
+
+            .deposit_bits => return self.fail("TODO implement deposit_bits", .{}),
+            .extract_bits => return self.fail("TODO implement extract_bits", .{}),
             // zig fmt: on
         }
         if (std.debug.runtime_safety) {
diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig
index 19c18ec4a6b0..f2fbb813affd 100644
--- a/src/arch/sparc64/CodeGen.zig
+++ b/src/arch/sparc64/CodeGen.zig
@@ -732,6 +732,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .work_item_id => unreachable,
             .work_group_size => unreachable,
             .work_group_id => unreachable,
+
+            .deposit_bits => @panic("TODO implement deposit_bits"),
+            .extract_bits => @panic("TODO implement extract_bits"),
             // zig fmt: on
         }
 
diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig
index 83159ec80e7d..fcf8bd362a0d 100644
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@@ -2058,6 +2058,10 @@ fn genInst(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
         .work_group_size,
         .work_group_id,
         => unreachable,
+
+        .deposit_bits,
+        .extract_bits,
+        => |tag| return func.fail("TODO implement {s}", .{@tagName(tag)}),
     };
 }
 
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index c165baf7e885..2c217f3f4c33 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -2195,6 +2195,10 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .work_item_id => unreachable,
             .work_group_size => unreachable,
             .work_group_id => unreachable,
+
+            .deposit_bits,
+            .extract_bits,
+            => |tag| try self.airDepositExtractBits(inst, tag),
             // zig fmt: on
         }
 
@@ -5569,6 +5573,168 @@ fn airPtrSlicePtrPtr(self: *Self, inst: Air.Inst.Index) !void {
     return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none });
 }
 
+fn airDepositExtractBits(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void {
+    const mod = self.bin_file.comp.module.?;
+
+    const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+
+    const dest_ty = self.typeOfIndex(inst);
+    const abi_size: u32 = @intCast(@max(dest_ty.abiSize(mod), 4));
+
+    const result = if (!self.hasFeature(.bmi2) or abi_size > 8)
+        try genDepositExtractBitsEmulated(self, inst, tag, bin_op.lhs, bin_op.rhs, dest_ty, abi_size)
+    else
+        try genDepositExtractBitsNative(self, inst, tag, bin_op.lhs, bin_op.rhs, dest_ty, abi_size);
+
+    return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none });
+}
+
+fn genDepositExtractBitsEmulated(
+    self: *Self,
+    inst: Air.Inst.Index,
+    tag: Air.Inst.Tag,
+    lhs: Air.Inst.Ref,
+    rhs: Air.Inst.Ref,
+    dest_ty: Type,
+    abi_size: u32,
+) !MCValue {
+    const mod = self.bin_file.comp.module.?;
+
+    var callee_buf: ["__pdep_bigint".len]u8 = undefined;
+    const callee = std.fmt.bufPrint(&callee_buf, "__{s}_{s}", .{
+        switch (tag) {
+            .deposit_bits => "pdep",
+            .extract_bits => "pext",
+            else => unreachable,
+        },
+        switch (abi_size) {
+            0...4 => "u32",
+            5...8 => "u64",
+            9...16 => "u128",
+            else => "bigint",
+        },
+    }) catch unreachable;
+
+    if (abi_size <= 16) return try self.genCall(.{ .lib = .{
+        .return_type = dest_ty.toIntern(),
+        .param_types = &.{ dest_ty.toIntern(), dest_ty.toIntern() },
+        .callee = callee,
+    } }, &.{ dest_ty, dest_ty }, &.{ .{ .air_ref = lhs }, .{ .air_ref = rhs } });
+
+    const bit_count = dest_ty.intInfo(mod).bits;
+
+    const dest_mcv = try self.allocRegOrMemAdvanced(dest_ty, inst, false);
+    const lhs_mcv = try self.resolveInst(lhs);
+    const rhs_mcv = try self.resolveInst(rhs);
+
+    const manyptr_u32_ty = try mod.ptrType(.{
+        .child = .u32_type,
+        .flags = .{
+            .size = .Many,
+        },
+    });
+    const manyptr_const_u32_ty = try mod.ptrType(.{
+        .child = .u32_type,
+        .flags = .{
+            .size = .Many,
+            .is_const = true,
+        },
+    });
+
+    _ = try self.genCall(.{ .lib = .{
+        .return_type = .void_type,
+        .param_types = &.{
+            manyptr_u32_ty.toIntern(),
+            manyptr_const_u32_ty.toIntern(),
+            manyptr_const_u32_ty.toIntern(),
+            .usize_type,
+        },
+        .callee = callee,
+    } }, &.{
+        manyptr_u32_ty,
+        manyptr_const_u32_ty,
+        manyptr_const_u32_ty,
+        Type.usize,
+    }, &.{
+        dest_mcv.address(),
+        lhs_mcv.address(),
+        rhs_mcv.address(),
+        .{ .immediate = bit_count },
+    });
+
+    return dest_mcv;
+}
+
+fn genDepositExtractBitsNative(
+    self: *Self,
+    inst: Air.Inst.Index,
+    tag: Air.Inst.Tag,
+    lhs: Air.Inst.Ref,
+    rhs: Air.Inst.Ref,
+    dest_ty: Type,
+    abi_size: u32,
+) !MCValue {
+    assert(self.hasFeature(.bmi2)); // BMI2 must be present for PEXT/PDEP instructions
+    assert(abi_size <= 8); // PEXT/PDEP only exist for 64-bit and below
+
+    const lhs_mcv = try self.resolveInst(lhs);
+    const rhs_mcv = try self.resolveInst(rhs);
+
+    const lhs_lock: ?RegisterLock = switch (lhs_mcv) {
+        .register => |reg| self.register_manager.lockRegAssumeUnused(reg),
+        else => null,
+    };
+    defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
+
+    const rhs_lock: ?RegisterLock = switch (rhs_mcv) {
+        .register => |reg| self.register_manager.lockReg(reg),
+        else => null,
+    };
+    defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
+
+    const dest_mcv: MCValue, const dest_is_lhs = dest: {
+        if (rhs_mcv.isRegister() and self.reuseOperand(inst, rhs, 1, rhs_mcv))
+            break :dest .{ rhs_mcv, false };
+
+        if (lhs_mcv.isRegister() and self.reuseOperand(inst, lhs, 0, lhs_mcv))
+            break :dest .{ lhs_mcv, false };
+
+        break :dest .{ try self.copyToRegisterWithInstTracking(inst, dest_ty, lhs_mcv), true };
+    };
+
+    const dest_reg = dest_mcv.getReg().?;
+    const dest_lock = self.register_manager.lockReg(dest_reg);
+    defer if (dest_lock) |lock| self.register_manager.unlockReg(lock);
+
+    const lhs_reg = if (dest_is_lhs) dest_reg else if (lhs_mcv.getReg()) |reg| reg else try self.copyToTmpRegister(dest_ty, lhs_mcv);
+
+    const mir_tag = Mir.Inst.FixedTag{ ._, switch (tag) {
+        .deposit_bits => .pdep,
+        .extract_bits => .pext,
+        else => unreachable,
+    } };
+
+    if (rhs_mcv.isMemory()) {
+        try self.asmRegisterRegisterMemory(
+            mir_tag,
+            registerAlias(dest_reg, abi_size),
+            registerAlias(lhs_reg, abi_size),
+            try rhs_mcv.mem(self, Memory.Size.fromSize(abi_size)),
+        );
+    } else {
+        const rhs_reg = if (rhs_mcv.getReg()) |reg| reg else try self.copyToTmpRegister(dest_ty, rhs_mcv);
+
+        try self.asmRegisterRegisterRegister(
+            mir_tag,
+            registerAlias(dest_reg, abi_size),
+            registerAlias(lhs_reg, abi_size),
+            registerAlias(rhs_reg, abi_size),
+        );
+    }
+
+    return dest_mcv;
+}
+
 fn elemOffset(self: *Self, index_ty: Type, index: MCValue, elem_size: u64) !Register {
     const reg: Register = blk: {
         switch (index) {
diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig
index e4c2a39d18c4..43a7fbbbfb94 100644
--- a/src/arch/x86_64/Encoding.zig
+++ b/src/arch/x86_64/Encoding.zig
@@ -245,6 +245,7 @@ pub const Mnemonic = enum {
     neg, nop, not,
     @"or",
     pause, pop, popcnt, popfq, push, pushfq,
+    pdep, pext,
     rcl, rcr, ret, rol, ror,
     sal, sar, sbb,
     scas, scasb, scasd, scasq, scasw,
@@ -782,6 +783,7 @@ pub const Feature = enum {
     avx,
     avx2,
     bmi,
+    bmi2,
     f16c,
     fma,
     lzcnt,
diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig
index d2dd6237a5e6..9b9126e26249 100644
--- a/src/arch/x86_64/Mir.zig
+++ b/src/arch/x86_64/Mir.zig
@@ -384,6 +384,10 @@ pub const Inst = struct {
         @"or",
         /// Spin loop hint
         pause,
+        /// Parallel bits deposit
+        pdep,
+        /// Parallel bits extract
+        pext,
         /// Pop
         pop,
         /// Return the count of number of bits set to 1
diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig
index d4a7dcafe7bd..dd36cf1e68ea 100644
--- a/src/arch/x86_64/encodings.zig
+++ b/src/arch/x86_64/encodings.zig
@@ -486,6 +486,11 @@ pub const table = [_]Entry{
 
     .{ .pause, .zo, &.{}, &.{ 0xf3, 0x90 }, 0, .none, .none },
 
+    .{ .pdep, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0xf2, 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w0, .bmi2 },
+    .{ .pdep, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0xf2, 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w1, .bmi2 },
+    .{ .pext, .rvm, &.{ .r32, .r32, .rm32 }, &.{ 0xf3, 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w0, .bmi2 },
+    .{ .pext, .rvm, &.{ .r64, .r64, .rm64 }, &.{ 0xf3, 0x0f, 0x38, 0xf5 }, 0, .vex_lz_w1, .bmi2 },
+
     .{ .pop, .o, &.{ .r16  }, &.{ 0x58 }, 0, .short, .none },
     .{ .pop, .o, &.{ .r64  }, &.{ 0x58 }, 0, .none,  .none },
     .{ .pop, .m, &.{ .rm16 }, &.{ 0x8f }, 0, .short, .none },
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index 818267a8b819..80da6ff96482 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -3466,6 +3466,9 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
             .work_group_size,
             .work_group_id,
             => unreachable,
+
+            .deposit_bits => return f.fail("TODO: C backend: implement deposit_bits", .{}),
+            .extract_bits => return f.fail("TODO: C backend: implement extract_bits", .{}),
             // zig fmt: on
         };
         if (result_value == .new_local) {
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index db0eaa3ce5e6..da9da70faf40 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -5103,6 +5103,9 @@ pub const FuncGen = struct {
                 .work_item_id => try self.airWorkItemId(inst),
                 .work_group_size => try self.airWorkGroupSize(inst),
                 .work_group_id => try self.airWorkGroupId(inst),
+
+                .deposit_bits,
+                .extract_bits => |tag| try self.airDepositExtractBits(inst, tag),
                 // zig fmt: on
             };
             if (val != .none) try self.func_inst_table.putNoClobber(self.gpa, inst.toRef(), val);
@@ -10295,6 +10298,157 @@ pub const FuncGen = struct {
         return self.amdgcnWorkIntrinsic(dimension, 0, "amdgcn.workgroup.id");
     }
 
+    fn airDepositExtractBits(self: *FuncGen, inst: Air.Inst.Index, tag: Air.Inst.Tag) !Builder.Value {
+        if (self.liveness.isUnused(inst)) return .none;
+
+        const o = self.dg.object;
+
+        const bin_op = self.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+        const source = try self.resolveInst(bin_op.lhs);
+        const mask = try self.resolveInst(bin_op.rhs);
+        const inst_ty = self.typeOfIndex(inst);
+
+        const target = o.module.getTarget();
+
+        const llvm_ty = try o.lowerType(inst_ty);
+        const bits: u16 = @intCast(llvm_ty.scalarBits(&o.builder));
+
+        switch (target.cpu.arch) {
+            .x86, .x86_64 => |arch| blk: {
+                // Doesn't have pdep
+                if (!std.Target.x86.featureSetHas(target.cpu.features, .bmi2)) break :blk;
+
+                const supports_64 = arch == .x86_64;
+                // Integer size doesn't match the available instruction(s)
+                if (!(bits <= 32 or (bits <= 64 and supports_64))) break :blk;
+
+                const compiler_rt_bits = compilerRtIntBits(bits);
+
+                var buf: ["x86.bmi.pdep.32".len]u8 = undefined;
+                const intrinsic = std.meta.stringToEnum(Builder.Intrinsic, std.fmt.bufPrint(&buf, "x86.bmi.{s}.{d}", .{
+                    switch (tag) {
+                        .deposit_bits => "pdep",
+                        .extract_bits => "pext",
+                        else => unreachable,
+                    },
+                    compiler_rt_bits,
+                }) catch unreachable).?;
+
+                const needs_extend = bits != compiler_rt_bits;
+                const extended_ty = if (needs_extend) try o.builder.intType(compiler_rt_bits) else llvm_ty;
+
+                const params = .{
+                    if (needs_extend) try self.wip.cast(.zext, source, extended_ty, "") else source,
+                    if (needs_extend) try self.wip.cast(.zext, mask, extended_ty, "") else mask,
+                };
+
+                const result = try self.wip.callIntrinsic(
+                    .normal,
+                    .none,
+                    intrinsic,
+                    &.{},
+                    &params,
+                    "",
+                );
+
+                return if (needs_extend) try self.wip.cast(.trunc, result, llvm_ty, "") else result;
+            },
+            else => {},
+        }
+
+        return try self.genDepositExtractBitsEmulated(tag, bits, source, mask, llvm_ty);
+    }
+
+    fn genDepositExtractBitsEmulated(self: *FuncGen, tag: Air.Inst.Tag, bits: u16, source: Builder.Value, mask: Builder.Value, ty: Builder.Type) !Builder.Value {
+        const o = self.dg.object;
+        const mod = o.module;
+
+        if (bits <= 128) {
+            const compiler_rt_bits = compilerRtIntBits(bits);
+            const needs_extend = bits != compiler_rt_bits;
+            const extended_ty = if (needs_extend) try o.builder.intType(compiler_rt_bits) else ty;
+
+            const fn_name = try o.builder.strtabStringFmt("__{s}_u{d}", .{
+                switch (tag) {
+                    .deposit_bits => "pdep",
+                    .extract_bits => "pext",
+                    else => unreachable,
+                },
+                compiler_rt_bits,
+            });
+
+            const params = .{
+                if (needs_extend) try self.wip.cast(.zext, source, extended_ty, "") else source,
+                if (needs_extend) try self.wip.cast(.zext, mask, extended_ty, "") else mask,
+            };
+
+            const libc_fn = try self.getLibcFunction(fn_name, &.{ extended_ty, extended_ty }, extended_ty);
+            const result = try self.wip.call(
+                .normal,
+                .ccc,
+                .none,
+                libc_fn.typeOf(&o.builder),
+                libc_fn.toValue(&o.builder),
+                &params,
+                "",
+            );
+
+            return if (needs_extend) try self.wip.cast(.trunc, result, ty, "") else result;
+        }
+
+        // Rounded bits to the nearest 32, as limb size is 32.
+        const extended_bits = (((bits - 1) / 32) + 1) * 32;
+        const needs_extend = bits != extended_bits;
+        const extended_ty = if (needs_extend) try o.builder.intType(extended_bits) else ty;
+
+        const source_extended = if (needs_extend) try self.wip.cast(.zext, source, extended_ty, "") else source;
+        const mask_extended = if (needs_extend) try self.wip.cast(.zext, mask, extended_ty, "") else mask;
+        const zeroes_extended = try o.builder.intValue(extended_ty, 0);
+
+        const alignment = Type.u32.abiAlignment(mod).toLlvm();
+
+        const source_pointer = try self.buildAlloca(extended_ty, alignment);
+        const mask_pointer = try self.buildAlloca(extended_ty, alignment);
+        const result_pointer = try self.buildAlloca(extended_ty, alignment);
+
+        _ = try self.wip.store(.normal, source_extended, source_pointer, alignment);
+        _ = try self.wip.store(.normal, mask_extended, mask_pointer, alignment);
+        _ = try self.wip.store(.normal, zeroes_extended, result_pointer, alignment);
+
+        const fn_name = try o.builder.strtabStringFmt("__{s}_bigint", .{switch (tag) {
+            .deposit_bits => "pdep",
+            .extract_bits => "pext",
+            else => unreachable,
+        }});
+
+        const pointer_ty = source_pointer.typeOfWip(&self.wip);
+        const usize_ty = try o.lowerType(Type.usize);
+        const void_ty = try o.lowerType(Type.void);
+
+        const bits_value = try o.builder.intValue(usize_ty, bits);
+
+        const params = .{
+            result_pointer,
+            source_pointer,
+            mask_pointer,
+            bits_value,
+        };
+
+        const libc_fn = try self.getLibcFunction(fn_name, &.{ pointer_ty, pointer_ty, pointer_ty, usize_ty }, void_ty);
+        _ = try self.wip.call(
+            .normal,
+            .ccc,
+            .none,
+            libc_fn.typeOf(&o.builder),
+            libc_fn.toValue(&o.builder),
+            &params,
+            "",
+        );
+
+        const result = try self.wip.load(.normal, extended_ty, result_pointer, alignment, "");
+        return if (needs_extend) try self.wip.cast(.trunc, result, ty, "") else result;
+    }
+
     fn getErrorNameTable(self: *FuncGen) Allocator.Error!Builder.Variable.Index {
         const o = self.dg.object;
         const mod = o.module;
diff --git a/src/codegen/llvm/Builder.zig b/src/codegen/llvm/Builder.zig
index 000223499b6f..30cb86e69432 100644
--- a/src/codegen/llvm/Builder.zig
+++ b/src/codegen/llvm/Builder.zig
@@ -2733,6 +2733,12 @@ pub const Intrinsic = enum {
     @"wasm.memory.size",
     @"wasm.memory.grow",
 
+    // x86 PDEP/PEXT
+    @"x86.bmi.pdep.32",
+    @"x86.bmi.pdep.64",
+    @"x86.bmi.pext.32",
+    @"x86.bmi.pext.64",
+
     const Signature = struct {
         ret_len: u8,
         params: []const Parameter,
@@ -3903,6 +3909,43 @@ pub const Intrinsic = enum {
             },
             .attrs = &.{ .nocallback, .nofree, .nosync, .nounwind, .willreturn },
         },
+
+        .@"x86.bmi.pext.32" = .{
+            .ret_len = 1,
+            .params = &.{
+                .{ .kind = .{ .type = .i32 } },
+                .{ .kind = .{ .type = .i32 } },
+                .{ .kind = .{ .type = .i32 } },
+            },
+            .attrs = &.{ .nocallback, .nofree, .nosync, .nounwind, .{ .memory = Attribute.Memory.all(.none) } },
+        },
+        .@"x86.bmi.pext.64" = .{
+            .ret_len = 1,
+            .params = &.{
+                .{ .kind = .{ .type = .i64 } },
+                .{ .kind = .{ .type = .i64 } },
+                .{ .kind = .{ .type = .i64 } },
+            },
+            .attrs = &.{ .nocallback, .nofree, .nosync, .nounwind, .{ .memory = Attribute.Memory.all(.none) } },
+        },
+        .@"x86.bmi.pdep.32" = .{
+            .ret_len = 1,
+            .params = &.{
+                .{ .kind = .{ .type = .i32 } },
+                .{ .kind = .{ .type = .i32 } },
+                .{ .kind = .{ .type = .i32 } },
+            },
+            .attrs = &.{ .nocallback, .nofree, .nosync, .nounwind, .{ .memory = Attribute.Memory.all(.none) } },
+        },
+        .@"x86.bmi.pdep.64" = .{
+            .ret_len = 1,
+            .params = &.{
+                .{ .kind = .{ .type = .i64 } },
+                .{ .kind = .{ .type = .i64 } },
+                .{ .kind = .{ .type = .i64 } },
+            },
+            .attrs = &.{ .nocallback, .nofree, .nosync, .nounwind, .{ .memory = Attribute.Memory.all(.none) } },
+        },
     });
 };
 
diff --git a/src/print_air.zig b/src/print_air.zig
index 12e2825d4ef0..e1a8a4ceeeb7 100644
--- a/src/print_air.zig
+++ b/src/print_air.zig
@@ -162,6 +162,8 @@ const Writer = struct {
             .memcpy,
             .memset,
             .memset_safe,
+            .deposit_bits,
+            .extract_bits,
             => try w.writeBinOp(s, inst),
 
             .is_null,
diff --git a/src/print_zir.zig b/src/print_zir.zig
index dfe94d397097..311d1d1c2240 100644
--- a/src/print_zir.zig
+++ b/src/print_zir.zig
@@ -591,6 +591,8 @@ const Writer = struct {
             .wasm_memory_grow,
             .prefetch,
             .c_va_arg,
+            .deposit_bits,
+            .extract_bits,
             => {
                 const inst_data = self.code.extraData(Zir.Inst.BinNode, extended.operand).data;
                 const src = LazySrcLoc.nodeOffset(inst_data.node);
diff --git a/test/behavior.zig b/test/behavior.zig
index 3081f6c9f969..d131b498e9b0 100644
--- a/test/behavior.zig
+++ b/test/behavior.zig
@@ -21,9 +21,10 @@ test {
     _ = @import("behavior/comptime_memory.zig");
     _ = @import("behavior/const_slice_child.zig");
     _ = @import("behavior/decltest.zig");
-    _ = @import("behavior/duplicated_test_names.zig");
     _ = @import("behavior/defer.zig");
+    _ = @import("behavior/deposit_extract_bits.zig");
     _ = @import("behavior/destructure.zig");
+    _ = @import("behavior/duplicated_test_names.zig");
     _ = @import("behavior/empty_tuple_fields.zig");
     _ = @import("behavior/empty_union.zig");
     _ = @import("behavior/enum.zig");
diff --git a/test/behavior/deposit_extract_bits.zig b/test/behavior/deposit_extract_bits.zig
new file mode 100644
index 000000000000..fb393866be08
--- /dev/null
+++ b/test/behavior/deposit_extract_bits.zig
@@ -0,0 +1,147 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const mem = std.mem;
+const expect = std.testing.expect;
+const expectEqual = std.testing.expectEqual;
+
+const supports_pext_pdep = switch (builtin.zig_backend) {
+    .stage2_llvm => true,
+    .stage2_x86_64 => true,
+    else => false,
+};
+
+test "@depositBits" {
+    if (!supports_pext_pdep) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        pub fn doTheTest() !void {
+            var a: u64 = 0;
+            var b: u64 = 0xFFFF_FFFF_FFFF_FFFF;
+            var c: u64 = 0x1234_5678_9012_3456;
+            var d: u64 = 0x00F0_FF00_F00F_00FF;
+
+            _ = &a;
+            _ = &b;
+            _ = &c;
+            _ = &d;
+
+            try expect(@depositBits(b, a) == 0);
+            try expect(@depositBits(a, b) == 0);
+
+            try expect(@depositBits(b, c) == c);
+            try expect(@depositBits(b, d) == d);
+
+            try expect(@depositBits(c, d) == 0x0000_1200_3004_0056);
+        }
+    };
+
+    try S.doTheTest();
+    try comptime S.doTheTest();
+}
+
+test "@depositBits u128" {
+    if (!supports_pext_pdep) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.os.tag == .windows) return error.SkipZigTest; // TODO #19498
+
+    const S = struct {
+        pub fn doTheTest() !void {
+            var a: u64 = 0x1234_5678_9012_3456;
+            var b: u128 = 0x00F0_FF00_F00F_00FF << 64;
+
+            _ = &a;
+            _ = &b;
+
+            try expect(@depositBits(a, b) == 0x0000_1200_3004_0056 << 64);
+        }
+    };
+
+    try S.doTheTest();
+    try comptime S.doTheTest();
+}
+
+test "@depositBits u256" {
+    if (!supports_pext_pdep) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        pub fn doTheTest() !void {
+            var a: u64 = 0x1234_5678_9ABC_DEF0;
+            var b: u256 = 0x0F00_0FF0_0F0F_FF00 << 174;
+
+            _ = &a;
+            _ = &b;
+
+            try expect(@depositBits(a, b) == 0x0A00_0BC0_0D0E_F000 << 174);
+        }
+    };
+
+    try S.doTheTest();
+    try comptime S.doTheTest();
+}
+
+test "@extractBits" {
+    if (!supports_pext_pdep) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        pub fn doTheTest() !void {
+            var a: u64 = 0;
+            var b: u64 = 0xFFFF_FFFF_FFFF_FFFF;
+            var c: u64 = 0x1234_5678_9012_3456;
+            var d: u64 = 0x00F0_FF00_F00F_00FF;
+
+            _ = &a;
+            _ = &b;
+            _ = &c;
+            _ = &d;
+
+            try expect(@extractBits(b, a) == 0);
+            try expect(@extractBits(a, b) == 0);
+
+            try expect(@extractBits(c, b) == c);
+            try expect(@extractBits(d, b) == d);
+
+            try expect(@extractBits(c, d) == 0x0356_9256);
+        }
+    };
+
+    try S.doTheTest();
+    try comptime S.doTheTest();
+}
+
+test "@extractBits u128" {
+    if (!supports_pext_pdep) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64 and builtin.target.os.tag == .windows) return error.SkipZigTest; // TODO #19498
+
+    const S = struct {
+        pub fn doTheTest() !void {
+            var a: u128 = 0x1234_5678_9012_3456 << 64;
+            var b: u128 = 0x00F0_FF00_F00F_00FF << 64;
+
+            _ = &a;
+            _ = &b;
+
+            try expect(@extractBits(a, b) == 0x0356_9256);
+        }
+    };
+
+    try S.doTheTest();
+    try comptime S.doTheTest();
+}
+
+test "@extractBits u256" {
+    if (!supports_pext_pdep) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        pub fn doTheTest() !void {
+            var a: u256 = 0x1234_5678_9ABC_DEF0 << 96;
+            var b: u256 = 0x0F00_0FF0_0F0F_FF00 << 96;
+
+            _ = &a;
+            _ = &b;
+
+            try expect(@extractBits(a, b) == 0x0267_ACDE);
+        }
+    };
+
+    try S.doTheTest();
+    try comptime S.doTheTest();
+}