diff --git a/doc/langref.html.in b/doc/langref.html.in
index 7c184c8f3688..e1c1aa268c43 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -8292,6 +8292,33 @@ test "main" {
       {#see_also|@cVaArg|@cVaCopy|@cVaEnd#}
       {#header_close#}
 
+      {#header_open|@depositBits#}
+      <pre>{#syntax#}@depositBits(source: T, mask: T) T{#endsyntax#}</pre>
+      <p>
+      {#syntax#}T{#endsyntax#} must be an unsigned integer type, or a `comptime_int` (for which both parameters must be positive). `T` is determined by peer-type resolution.
+      </p>
+      <p>
+      Uses a mask to transfer contiguous lower bits in the {#syntax#}source{#endsyntax#} operand to the destination, transferring them to the corresponding bits in the destination that are set in the mask. All other bits in the destination are zeroed.
+      </p>
+      <p>
+      Currently, only x86 processors with BMI2 enabled support this in hardware. On processors without support for the instruction, it will be emulated. AMD processors before Zen 3 implement the corresponding instruction (PDEP) in microcode. It may be faster to use an alternative method in both of these cases.
+      </p>
+      <p>
+      Example:
+      </p>
+
+      {#code_begin|test|test_depositbits_builtin#}
+const std = @import("std");
+
+test "deposit bits" {
+    comptime {
+        try std.testing.expectEqual(@depositBits(0x00001234, 0xf0f0f0f0), 0x10203040);
+    }
+}
+      {#code_end#}
+      {#see_also|@extractBits#}
+      {#header_close#}
+
       {#header_open|@divExact#}
       <pre>{#syntax#}@divExact(numerator: T, denominator: T) T{#endsyntax#}</pre>
       <p>
@@ -8462,6 +8489,33 @@ export fn @"A function name that is a complete sentence."() void {}
       {#see_also|@export#}
       {#header_close#}
 
+      {#header_open|@extractBits#}
+      <pre>{#syntax#}@extractBits(source: T, mask: T) T{#endsyntax#}</pre>
+      <p>
+      {#syntax#}T{#endsyntax#} must be an unsigned integer type, or a `comptime_int` (for which both parameters must be positive). `T` is determined by peer-type resolution.
+      </p>
+      <p>
+      Uses a mask to transfer bits in the {#syntax#}source{#endsyntax#} operand to the destination, writing them as contiguous lower bits in the destination. The upper bits of the destination are zeroed.
+      </p>
+      <p>
+      Currently, only x86 processors with BMI2 enabled support this in hardware. On processors without support for the instruction, it will be emulated. AMD processors before Zen 3 implement the corresponding instruction (PEXT) in microcode. It may be faster to use an alternative method in both of these cases.
+      </p>
+      <p>
+      Example:
+      </p>
+
+      {#code_begin|test|test_depositbits_builtin#}
+const std = @import("std");
+
+test "extract bits" {
+    comptime {
+        try std.testing.expectEqual(@extractBits(0x12345678, 0xf0f0f0f0), 0x00001357);
+    }
+}
+      {#code_end#}
+      {#see_also|@depositBits#}
+      {#header_close#}
+
       {#header_open|@fence#}
       <pre>{#syntax#}@fence(order: AtomicOrder) void{#endsyntax#}</pre>
       <p>
diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 846a809e0565..2e973f42be4e 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -1732,6 +1732,98 @@ pub const Mutable = struct {
         y.shiftRight(y.toConst(), norm_shift);
     }
 
+    // TODO this function is quite inefficient and could be optimised
+    /// r = @depositBits(source, mask)
+    ///
+    /// Asserts that `source` and `mask` are positive
+    pub fn depositBits(r: *Mutable, source: Const, mask: Const) void {
+        assert(source.positive);
+        assert(mask.positive);
+
+        r.positive = true;
+        @memset(r.limbs, 0);
+
+        var mask_limb: Limb = mask.limbs[0];
+        var mask_limb_index: Limb = 0;
+        var i: usize = 0;
+        outer: while (true) : (i += 1) {
+            // Find next bit in mask
+            const mask_limb_bit: Log2Limb = limb_bit: while (true) {
+                const mask_limb_tz = @ctz(mask_limb);
+                if (mask_limb_tz != @sizeOf(Limb) * 8) {
+                    const cast_limb_bit = @intCast(Log2Limb, mask_limb_tz);
+                    mask_limb ^= @as(Limb, 1) << cast_limb_bit;
+                    break :limb_bit cast_limb_bit;
+                }
+
+                mask_limb_index += 1;
+                // No more limbs, we've finished iterating the mask
+                if (mask_limb_index >= mask.limbs.len) {
+                    break :outer;
+                }
+
+                mask_limb = mask.limbs[mask_limb_index];
+            };
+
+            const i_limb_index = i / limb_bits;
+            const i_limb_bit = @truncate(Log2Limb, i);
+
+            if (i_limb_index >= source.limbs.len) break; // Stop when we reach the end of `source` (we can treat the rest as zeroes)
+
+            const source_bit_set = source.limbs[i_limb_index] & (@as(Limb, 1) << i_limb_bit) != 0;
+
+            r.limbs[mask_limb_index] |= @as(Limb, @intFromBool(source_bit_set)) << mask_limb_bit;
+        }
+
+        r.normalize(r.limbs.len);
+    }
+
+    // TODO this function is quite inefficient and could be optimised
+    /// r = @extractBits(source, mask)
+    ///
+    /// Asserts that `source` and `mask` are positive
+    pub fn extractBits(r: *Mutable, source: Const, mask: Const) void {
+        assert(source.positive);
+        assert(mask.positive);
+
+        r.positive = true;
+        @memset(r.limbs, 0);
+
+        var mask_limb: Limb = mask.limbs[0];
+        var mask_limb_index: Limb = 0;
+        var i: usize = 0;
+        outer: while (true) : (i += 1) {
+            // Find next bit in mask
+            const mask_limb_bit: Log2Limb = limb_bit: while (true) {
+                const mask_limb_tz = @ctz(mask_limb);
+                if (mask_limb_tz != @sizeOf(Limb) * 8) {
+                    const cast_limb_bit = @intCast(Log2Limb, mask_limb_tz);
+                    mask_limb ^= @as(Limb, 1) << cast_limb_bit;
+                    break :limb_bit cast_limb_bit;
+                }
+
+                mask_limb_index += 1;
+                // No more limbs, we've finished iterating the mask
+                if (mask_limb_index >= mask.limbs.len) {
+                    break :outer;
+                }
+
+                mask_limb = mask.limbs[mask_limb_index];
+            };
+
+            const i_limb_index = i / limb_bits;
+            const i_limb_bit = @truncate(Log2Limb, i);
+
+            if (mask_limb_index >= source.limbs.len) break; // Stop when we reach the end of `source` (we can treat the rest as zeroes)
+
+            const source_bit_set = source.limbs[mask_limb_index] & (@as(Limb, 1) << mask_limb_bit) != 0;
+
+            r.limbs[i_limb_index] |= @as(Limb, @intFromBool(source_bit_set)) << i_limb_bit;
+        }
+
+        r.normalize(r.limbs.len);
+    }
+
     /// If a is positive, this passes through to truncate.
     /// If a is negative, then r is set to positive with the bit pattern ~(a - 1).
     /// r may alias a.
diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig
index 9c3c1b68815f..25705bdcf985 100644
--- a/lib/std/math/big/int_test.zig
+++ b/lib/std/math/big/int_test.zig
@@ -2762,6 +2762,54 @@ fn popCountTest(val: *const Managed, bit_count: usize, expected: usize) !void {
     try testing.expectEqual(expected, val.toConst().popCount(bit_count));
 }
 
+test "big int extractBits" {
+    try extractBitsTest(0x12345678, 0x0, 0x0);
+    try extractBitsTest(0x12345678, 0xf0f0f0f0, 0x1357);
+    try extractBitsTest(0x12345678, 0xff00ff00, 0x1256);
+    try extractBitsTest(0x12345678, 0xffff, 0x5678);
+
+    try extractBitsTest(0x12345678_90123456_78901234_56789012, 0xff << 64, 0x56);
+    try extractBitsTest(0x12345678_90123456_78901234_56789012, (0xff << 64) | 0xff00f, 0x56892);
+}
+
+fn extractBitsTest(comptime source: comptime_int, comptime mask: comptime_int, comptime expected: comptime_int) !void {
+    var source_bigint = try Managed.initSet(testing.allocator, source);
+    defer source_bigint.deinit();
+    var mask_bigint = try Managed.initSet(testing.allocator, mask);
+    defer mask_bigint.deinit();
+    const limbs = try testing.allocator.alloc(Limb, mask_bigint.limbs.len);
+    defer testing.allocator.free(limbs);
+    var result = Mutable{ .limbs = limbs, .positive = undefined, .len = undefined };
+
+    result.extractBits(source_bigint.toConst(), mask_bigint.toConst());
+
+    try testing.expectEqual(std.math.Order.eq, result.toConst().orderAgainstScalar(expected));
+}
+
+test "big int depositBits" {
+    try depositBitsTest(0x12345678, 0x0, 0x0);
+    try depositBitsTest(0x12345678, 0xf0f0f0f0, 0x50607080);
+    try depositBitsTest(0x12345678, 0xff00ff00, 0x56007800);
+    try depositBitsTest(0x12345678, 0xffff, 0x5678);
+
+    try depositBitsTest(0x1234, 0xff << 64, 0x34_00000000_00000000);
+    try depositBitsTest(0x12345678, (0xff << 64) | 0xff00f, 0x45_00000000_00067008);
+}
+
+fn depositBitsTest(comptime source: comptime_int, comptime mask: comptime_int, comptime expected: comptime_int) !void {
+    var source_bigint = try Managed.initSet(testing.allocator, source);
+    defer source_bigint.deinit();
+    var mask_bigint = try Managed.initSet(testing.allocator, mask);
+    defer mask_bigint.deinit();
+    const limbs = try testing.allocator.alloc(Limb, mask_bigint.limbs.len);
+    defer testing.allocator.free(limbs);
+    var result = Mutable{ .limbs = limbs, .positive = undefined, .len = undefined };
+
+    result.depositBits(source_bigint.toConst(), mask_bigint.toConst());
+
+    try testing.expectEqual(std.math.Order.eq, result.toConst().orderAgainstScalar(expected));
+}
+
 test "big int conversion read/write twos complement" {
     var a = try Managed.initSet(testing.allocator, (1 << 493) - 1);
     defer a.deinit();
diff --git a/src/Air.zig b/src/Air.zig
index d4d4de07f28c..8a080efb3576 100644
--- a/src/Air.zig
+++ b/src/Air.zig
@@ -822,6 +822,13 @@ pub const Inst = struct {
         /// Operand is unused and set to Ref.none
         work_group_id,
 
+        /// Implements @depositBits builtin.
+        /// Uses the `bin_op` field.
+        deposit_bits,
+        /// Implements @extractBits builtin.
+        /// Uses the `bin_op` field.
+        extract_bits,
+
         pub fn fromCmpOp(op: std.math.CompareOperator, optimized: bool) Tag {
             switch (op) {
                 .lt => return if (optimized) .cmp_lt_optimized else .cmp_lt,
@@ -1232,6 +1239,8 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index, ip: *const InternPool) Type {
         .div_exact_optimized,
         .rem_optimized,
         .mod_optimized,
+        .deposit_bits,
+        .extract_bits,
         => return air.typeOf(datas[inst].bin_op.lhs, ip),
 
         .sqrt,
@@ -1742,6 +1751,8 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool {
         .work_item_id,
         .work_group_size,
         .work_group_id,
+        .deposit_bits,
+        .extract_bits,
         => false,
 
         .assembly => @truncate(u1, air.extraData(Air.Asm, data.ty_pl.payload).data.flags >> 31) != 0,
diff --git a/src/AstGen.zig b/src/AstGen.zig
index f1acd7e3e3c2..3c81526dac8b 100644
--- a/src/AstGen.zig
+++ b/src/AstGen.zig
@@ -8699,6 +8699,9 @@ fn builtinCall(
             });
             return rvalue(gz, ri, result, node);
         },
+
+        .deposit_bits => return depositExtractBits(gz, scope, ri, node, params, .deposit_bits),
+        .extract_bits => return depositExtractBits(gz, scope, ri, node, params, .extract_bits),
     }
 }
 
@@ -8966,6 +8969,24 @@ fn overflowArithmetic(
     return rvalue(gz, ri, result, node);
 }
 
+fn depositExtractBits(
+    gz: *GenZir,
+    scope: *Scope,
+    ri: ResultInfo,
+    node: Ast.Node.Index,
+    params: []const Ast.Node.Index,
+    tag: Zir.Inst.Extended,
+) InnerError!Zir.Inst.Ref {
+    const lhs = try expr(gz, scope, .{ .rl = .none }, params[0]);
+    const rhs = try expr(gz, scope, .{ .rl = .none }, params[1]);
+    const result = try gz.addExtendedPayload(tag, Zir.Inst.BinNode{
+        .node = gz.nodeIndexToRelative(node),
+        .lhs = lhs,
+        .rhs = rhs,
+    });
+    return rvalue(gz, ri, result, node);
+}
+
 fn callExpr(
     gz: *GenZir,
     scope: *Scope,
diff --git a/src/BuiltinFn.zig b/src/BuiltinFn.zig
index 27b963f52871..769f191c78c7 100644
--- a/src/BuiltinFn.zig
+++ b/src/BuiltinFn.zig
@@ -35,6 +35,7 @@ pub const Tag = enum {
     c_va_copy,
     c_va_end,
     c_va_start,
+    deposit_bits,
     div_exact,
     div_floor,
     div_trunc,
@@ -46,6 +47,7 @@ pub const Tag = enum {
     err_set_cast,
     @"export",
     @"extern",
+    extract_bits,
     fence,
     field,
     field_parent_ptr,
@@ -396,6 +398,12 @@ pub const list = list: {
                 .param_count = 0,
             },
         },
+        .{
+            "@depositBits", .{
+                .tag = .deposit_bits,
+                .param_count = 2,
+            },
+        },
         .{
             "@divExact",
             .{
@@ -474,6 +482,13 @@ pub const list = list: {
                 .param_count = 2,
             },
         },
+        .{
+            "@extractBits",
+            .{
+                .tag = .extract_bits,
+                .param_count = 2,
+            },
+        },
         .{
             "@fence",
             .{
diff --git a/src/Liveness.zig b/src/Liveness.zig
index 2ba029136406..5ae53c575bdb 100644
--- a/src/Liveness.zig
+++ b/src/Liveness.zig
@@ -286,6 +286,8 @@ pub fn categorizeOperand(
         .cmp_gte_optimized,
         .cmp_gt_optimized,
         .cmp_neq_optimized,
+        .deposit_bits,
+        .extract_bits,
         => {
             const o = air_datas[inst].bin_op;
             if (o.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
@@ -942,6 +944,8 @@ fn analyzeInst(
         .memset,
         .memset_safe,
         .memcpy,
+        .deposit_bits,
+        .extract_bits,
         => {
             const o = inst_datas[inst].bin_op;
             return analyzeOperands(a, pass, data, inst, .{ o.lhs, o.rhs, .none });
diff --git a/src/Liveness/Verify.zig b/src/Liveness/Verify.zig
index 904e38007353..f81f62ad0a3d 100644
--- a/src/Liveness/Verify.zig
+++ b/src/Liveness/Verify.zig
@@ -261,6 +261,8 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
             .memset,
             .memset_safe,
             .memcpy,
+            .deposit_bits,
+            .extract_bits,
             => {
                 const bin_op = data[inst].bin_op;
                 try self.verifyInstOperands(inst, .{ bin_op.lhs, bin_op.rhs, .none });
diff --git a/src/Sema.zig b/src/Sema.zig
index bb2ef22ca560..a7baab078f63 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -1183,6 +1183,8 @@ fn analyzeBodyInner(
                     .work_group_size       => try sema.zirWorkItem(          block, extended, extended.opcode),
                     .work_group_id         => try sema.zirWorkItem(          block, extended, extended.opcode),
                     .in_comptime           => try sema.zirInComptime(        block),
+                    .deposit_bits          => try sema.zirDepositExtractBits(block, extended, .deposit_bits),
+                    .extract_bits          => try sema.zirDepositExtractBits(block, extended, .extract_bits),
                     // zig fmt: on
 
                     .fence => {
@@ -24145,6 +24147,127 @@ fn zirInComptime(
     }
 }
 
+fn zirDepositExtractBits(
+    sema: *Sema,
+    block: *Block,
+    extended: Zir.Inst.Extended.InstData,
+    air_tag: Air.Inst.Tag,
+) CompileError!Air.Inst.Ref {
+    const mod = sema.mod;
+    const extra = sema.code.extraData(Zir.Inst.BinNode, extended.operand).data;
+    const src = LazySrcLoc.nodeOffset(extra.node);
+
+    const lhs_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = extra.node };
+    const rhs_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = extra.node };
+
+    const uncasted_lhs = try sema.resolveInst(extra.lhs);
+    const uncasted_rhs = try sema.resolveInst(extra.rhs);
+
+    const lhs_ty = sema.typeOf(uncasted_lhs);
+    const rhs_ty = sema.typeOf(uncasted_rhs);
+
+    if (!lhs_ty.isUnsignedInt(mod) and lhs_ty.zigTypeTag(mod) != .ComptimeInt) {
+        return sema.fail(block, lhs_src, "expected unsigned integer or 'comptime_int', found '{}'", .{lhs_ty.fmt(mod)});
+    }
+
+    if (!rhs_ty.isUnsignedInt(mod) and rhs_ty.zigTypeTag(mod) != .ComptimeInt) {
+        return sema.fail(block, rhs_src, "expected unsigned integer or 'comptime_int', found '{}'", .{rhs_ty.fmt(mod)});
+    }
+
+    const instructions = &[_]Air.Inst.Ref{ uncasted_lhs, uncasted_rhs };
+    const dest_ty = try sema.resolvePeerTypes(block, src, instructions, .{
+        .override = &[_]?LazySrcLoc{ lhs_src, rhs_src },
+    });
+
+    const builtin_name = switch (air_tag) {
+        .deposit_bits => "@depositBits",
+        .extract_bits => "@extractBits",
+        else => unreachable,
+    };
+
+    // Coercion errors are intercepted to add a note if the caller is attempting to pass a negative comptime_int
+    const lhs = sema.coerce(block, dest_ty, uncasted_lhs, lhs_src) catch |err| switch (err) {
+        error.AnalysisFail => {
+            const msg = sema.err orelse return err;
+            const val = (try sema.resolveMaybeUndefVal(uncasted_lhs)).?;
+            if (val.orderAgainstZero(mod) == .lt) {
+                try sema.errNote(block, src, msg, "parameters to {s} must be positive", .{builtin_name});
+            }
+            return err;
+        },
+        else => return err,
+    };
+
+    const rhs = sema.coerce(block, dest_ty, uncasted_rhs, rhs_src) catch |err| switch (err) {
+        error.AnalysisFail => {
+            const msg = sema.err orelse return err;
+            const val = (try sema.resolveMaybeUndefVal(uncasted_rhs)).?;
+            if (val.orderAgainstZero(mod) == .lt) {
+                try sema.errNote(block, src, msg, "parameters to {s} must be positive", .{builtin_name});
+            }
+            return err;
+        },
+        else => return err,
+    };
+
+    const maybe_lhs_val = try sema.resolveMaybeUndefVal(lhs);
+    const maybe_rhs_val = try sema.resolveMaybeUndefVal(rhs);
+
+    // We check for negative values here only if the type is a comptime_int, as negative values
+    // would have otherwise been filtered out by coercion and the unsigned type restriction
+    if (dest_ty.zigTypeTag(mod) == .ComptimeInt) {
+        if (maybe_lhs_val) |lhs_val| {
+            if (!lhs_val.isUndef(mod) and lhs_val.orderAgainstZero(mod) == .lt) {
+                const err = try sema.errMsg(block, lhs_src, "use of negative value '{}'", .{lhs_val.fmtValue(lhs_ty, mod)});
+                try sema.errNote(block, src, err, "parameters to {s} must be positive", .{builtin_name});
+                return sema.failWithOwnedErrorMsg(err);
+            }
+        }
+
+        if (maybe_rhs_val) |rhs_val| {
+            if (!rhs_val.isUndef(mod) and rhs_val.orderAgainstZero(mod) == .lt) {
+                const err = try sema.errMsg(block, rhs_src, "use of negative value '{}'", .{rhs_val.fmtValue(rhs_ty, mod)});
+                try sema.errNote(block, src, err, "parameters to {s} must be positive", .{builtin_name});
+                return sema.failWithOwnedErrorMsg(err);
+            }
+        }
+    }
+
+    // If either of the operands are zero, the result is zero
+    // If either of the operands are undefined, the result is undefined
+    if (maybe_lhs_val) |lhs_val| {
+        if (lhs_val.orderAgainstZero(mod) == .eq) return sema.addConstant(dest_ty, try mod.intValue(dest_ty, 0));
+        if (lhs_val.isUndef(mod)) return sema.addConstUndef(dest_ty);
+    }
+    if (maybe_rhs_val) |rhs_val| {
+        if (rhs_val.orderAgainstZero(mod) == .lt) return sema.addConstant(dest_ty, try mod.intValue(dest_ty, 0));
+        if (rhs_val.isUndef(mod)) return sema.addConstUndef(dest_ty);
+    }
+
+    if (maybe_lhs_val) |lhs_val| {
+        if (maybe_rhs_val) |rhs_val| {
+            const dest_val = switch (air_tag) {
+                .deposit_bits => try sema.intDepositBits(lhs_val, rhs_val, dest_ty),
+                .extract_bits => try sema.intExtractBits(lhs_val, rhs_val, dest_ty),
+                else => unreachable,
+            };
+
+            return sema.addConstant(dest_ty, dest_val);
+        }
+    }
+
+    const runtime_src = if (maybe_lhs_val == null) lhs_src else rhs_src;
+    try sema.requireRuntimeBlock(block, src, runtime_src);
+
+    return block.addInst(.{
+        .tag = air_tag,
+        .data = .{ .bin_op = .{
+            .lhs = lhs,
+            .rhs = rhs,
+        } },
+    });
+}
+
 fn requireRuntimeBlock(sema: *Sema, block: *Block, src: LazySrcLoc, runtime_src: ?LazySrcLoc) !void {
     if (block.is_comptime) {
         const msg = msg: {
@@ -36247,6 +36370,62 @@ fn intAddWithOverflowScalar(
     };
 }
 
+/// Asserts that the values are positive
+fn intDepositBits(
+    sema: *Sema,
+    lhs: Value,
+    rhs: Value,
+    ty: Type,
+) !Value {
+    // TODO is this a performance issue? maybe we should try the operation without
+    // resorting to BigInt first. For non-bigints, @intDeposit could be used?
+    const mod = sema.mod;
+    const arena = sema.arena;
+
+    var lhs_space: Value.BigIntSpace = undefined;
+    var rhs_space: Value.BigIntSpace = undefined;
+    const source = lhs.toBigInt(&lhs_space, mod);
+    const mask = rhs.toBigInt(&rhs_space, mod);
+
+    const result_limbs = try arena.alloc(
+        std.math.big.Limb,
+        mask.limbs.len,
+    );
+
+    var result = std.math.big.int.Mutable{ .limbs = result_limbs, .positive = undefined, .len = undefined };
+
+    result.depositBits(source, mask);
+    return mod.intValue_big(ty, result.toConst());
+}
+
+/// Asserts that the values are positive
+fn intExtractBits(
+    sema: *Sema,
+    lhs: Value,
+    rhs: Value,
+    ty: Type,
+) !Value {
+    // TODO is this a performance issue? maybe we should try the operation without
+    // resorting to BigInt first. For non-bigints, @intExtract could be used?
+    const mod = sema.mod;
+    const arena = sema.arena;
+
+    var lhs_space: Value.BigIntSpace = undefined;
+    var rhs_space: Value.BigIntSpace = undefined;
+    const source = lhs.toBigInt(&lhs_space, mod);
+    const mask = rhs.toBigInt(&rhs_space, mod);
+
+    const result_limbs = try arena.alloc(
+        std.math.big.Limb,
+        mask.limbs.len,
+    );
+
+    var result = std.math.big.int.Mutable{ .limbs = result_limbs, .positive = undefined, .len = undefined };
+
+    result.extractBits(source, mask);
+    return mod.intValue_big(ty, result.toConst());
+}
+
 /// Asserts the values are comparable. Both operands have type `ty`.
 /// For vectors, returns true if the comparison is true for ALL elements.
 ///
diff --git a/src/Zir.zig b/src/Zir.zig
index 4a0fdde24f95..7b6284d26412 100644
--- a/src/Zir.zig
+++ b/src/Zir.zig
@@ -1985,6 +1985,12 @@ pub const Inst = struct {
         /// with a specific value. For instance, this is used for the capture of an `errdefer`.
         /// This should never appear in a body.
         value_placeholder,
+        /// Implements the `@depositBits` builtin.
+        /// `operand` is payload index to `BinNode`.
+        deposit_bits,
+        /// Implements the `@extractBits` builtin.
+        /// `operand` is payload index to `BinNode`.
+        extract_bits,
 
         pub const InstData = struct {
             opcode: Extended,
diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig
index 5080a0451a7c..076ce8764b89 100644
--- a/src/arch/aarch64/CodeGen.zig
+++ b/src/arch/aarch64/CodeGen.zig
@@ -903,6 +903,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .work_item_id => unreachable,
             .work_group_size => unreachable,
             .work_group_id => unreachable,
+
+            .deposit_bits => return self.fail("TODO implement deposit_bits", .{}),
+            .extract_bits => return self.fail("TODO implement extract_bits", .{}),
             // zig fmt: on
         }
 
diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig
index 7ece4ba2e3a3..bcc651e99481 100644
--- a/src/arch/arm/CodeGen.zig
+++ b/src/arch/arm/CodeGen.zig
@@ -887,6 +887,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .work_item_id => unreachable,
             .work_group_size => unreachable,
             .work_group_id => unreachable,
+
+            .deposit_bits => return self.fail("TODO implement deposit_bits", .{}),
+            .extract_bits => return self.fail("TODO implement extract_bits", .{}),
             // zig fmt: on
         }
 
diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig
index cba1de92c1ed..65b1cfe6b0f4 100644
--- a/src/arch/riscv64/CodeGen.zig
+++ b/src/arch/riscv64/CodeGen.zig
@@ -717,6 +717,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .work_item_id => unreachable,
             .work_group_size => unreachable,
             .work_group_id => unreachable,
+
+            .deposit_bits => return self.fail("TODO implement deposit_bits", .{}),
+            .extract_bits => return self.fail("TODO implement extract_bits", .{}),
             // zig fmt: on
         }
         if (std.debug.runtime_safety) {
diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig
index f210f8e14461..4aa5e87311dd 100644
--- a/src/arch/sparc64/CodeGen.zig
+++ b/src/arch/sparc64/CodeGen.zig
@@ -735,6 +735,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .work_item_id => unreachable,
             .work_group_size => unreachable,
             .work_group_id => unreachable,
+
+            .deposit_bits => @panic("TODO implement deposit_bits"),
+            .extract_bits => @panic("TODO implement extract_bits"),
             // zig fmt: on
         }
 
diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig
index efd5ea6642f9..d9be132d4fa4 100644
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@@ -2068,6 +2068,10 @@ fn genInst(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
         .work_group_size,
         .work_group_id,
         => unreachable,
+
+        .deposit_bits,
+        .extract_bits,
+        => |tag| return func.fail("TODO implement {s}", .{@tagName(tag)}),
     };
 }
 
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index b4ef42b953c5..fabfc1877666 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -1985,6 +1985,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .work_item_id => unreachable,
             .work_group_size => unreachable,
             .work_group_id => unreachable,
+
+            .deposit_bits => return self.fail("TODO implement deposit_bits", .{}),
+            .extract_bits => return self.fail("TODO implement extract_bits", .{}),
             // zig fmt: on
         }
 
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index 498eca4ce28b..a5c824669ed9 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -3084,6 +3084,9 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
             .work_group_size,
             .work_group_id,
             => unreachable,
+
+            .deposit_bits => return f.fail("TODO: C backend: implement deposit_bits", .{}),
+            .extract_bits => return f.fail("TODO: C backend: implement extract_bits", .{}),
             // zig fmt: on
         };
         if (result_value == .new_local) {
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 1f390edc0210..cdd5277570e9 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -4550,6 +4550,9 @@ pub const FuncGen = struct {
                 .work_item_id => try self.airWorkItemId(inst),
                 .work_group_size => try self.airWorkGroupSize(inst),
                 .work_group_id => try self.airWorkGroupId(inst),
+
+                .deposit_bits => try self.airDepositBits(inst),
+                .extract_bits => try self.airExtractBits(inst),
                 // zig fmt: on
             };
             if (opt_value) |val| {
@@ -9447,6 +9450,302 @@ pub const FuncGen = struct {
         return self.amdgcnWorkIntrinsic(dimension, 0, "llvm.amdgcn.workgroup.id");
     }
 
+    fn airDepositBits(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {
+        if (self.liveness.isUnused(inst)) return null;
+
+        const bin_op = self.air.instructions.items(.data)[inst].bin_op;
+        const lhs = try self.resolveInst(bin_op.lhs);
+        const rhs = try self.resolveInst(bin_op.rhs);
+        const inst_ty = self.typeOfIndex(inst);
+
+        const target = self.dg.module.getTarget();
+        const params = [2]*llvm.Value{ lhs, rhs };
+        switch (target.cpu.arch) {
+            .x86, .x86_64 => |tag| blk: {
+                // Doesn't have pdep
+                if (!std.Target.x86.featureSetHas(target.cpu.features, .bmi2)) break :blk;
+
+                const bits = inst_ty.intInfo(self.dg.module).bits;
+                const supports_64 = tag == .x86_64;
+                // Integer size doesn't match the available instruction(s)
+                if (!(bits <= 32 or (bits <= 64 and supports_64))) break :blk;
+
+                return self.buildDepositBitsNative(inst_ty, params);
+            },
+            else => {},
+        }
+
+        return self.buildDepositBitsEmulated(inst_ty, params);
+    }
+
+    fn buildDepositBitsNative(
+        self: *FuncGen,
+        ty: Type,
+        params: [2]*llvm.Value,
+    ) !*llvm.Value {
+        const target = self.dg.module.getTarget();
+
+        assert(target.cpu.arch.isX86());
+        assert(std.Target.x86.featureSetHas(target.cpu.features, .bmi2));
+
+        const bits = ty.intInfo(self.dg.module).bits;
+        const intrinsic_name = switch (bits) {
+            1...32 => "llvm.x86.bmi.pdep.32",
+            33...64 => "llvm.x86.bmi.pdep.64",
+            else => unreachable,
+        };
+        const needs_extend = bits != 32 and bits != 64;
+
+        var params_cast = params;
+
+        // Cast to either a 32 or 64-bit integer
+        if (needs_extend) {
+            const llvm_extend_ty = self.context.intType(if (bits <= 32) 32 else 64);
+            params_cast = .{
+                self.builder.buildZExt(params[0], llvm_extend_ty, ""),
+                self.builder.buildZExt(params[1], llvm_extend_ty, ""),
+            };
+        }
+
+        const llvm_fn = self.getIntrinsic(intrinsic_name, &.{});
+        const result = self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &params_cast, 2, .Fast, .Auto, "");
+
+        // No cast needed!
+        if (!needs_extend) return result;
+
+        // Cast back to the original integer size
+        const llvm_trunc_ty = try self.dg.lowerType(ty);
+        return self.builder.buildTrunc(result, llvm_trunc_ty, "");
+    }
+
+    // TODO Move this to compiler-rt (see #14609)
+    //
+    // Implements @depositBits(source, mask) in software
+    // (i.e. without platform-specific instructions)
+    //
+    // var bb = 1;
+    // var result = 0;
+    // do {
+    //     const bit = mask & -mask;
+    //     mask &= ~bit;
+    //     const source_bit = source & bb;
+    //     if (source_bit) result |= bit;
+    //     bb += bb;
+    // } while (mask)
+    //
+    // return result;
+    fn buildDepositBitsEmulated(
+        self: *FuncGen,
+        ty: Type,
+        params: [2]*llvm.Value,
+    ) !*llvm.Value {
+        const llvm_ty = try self.dg.lowerType(ty);
+
+        const source = params[0];
+        const mask_start = params[1];
+        const zero = llvm_ty.constNull();
+        const one = llvm_ty.constInt(1, .False);
+        const minus_one = llvm_ty.constInt(@bitCast(c_ulonglong, @as(c_longlong, -1)), .True);
+
+        const prev_block = self.builder.getInsertBlock();
+        const loop_block = self.context.appendBasicBlock(self.llvm_func, "Loop");
+        const after_block = self.context.appendBasicBlock(self.llvm_func, "After");
+
+        _ = self.builder.buildBr(loop_block);
+        self.builder.positionBuilderAtEnd(loop_block);
+        const mask_phi = self.builder.buildPhi(llvm_ty, "");
+        const result_phi = self.builder.buildPhi(llvm_ty, "");
+        const bb_phi = self.builder.buildPhi(llvm_ty, "");
+        const minus_mask = self.builder.buildSub(zero, mask_phi, "");
+        const bit = self.builder.buildAnd(mask_phi, minus_mask, "");
+        const not_bit = self.builder.buildXor(bit, minus_one, "");
+        const new_mask = self.builder.buildAnd(mask_phi, not_bit, "");
+        const source_bit = self.builder.buildAnd(source, bb_phi, "");
+        const source_bit_set = self.builder.buildICmp(.NE, source_bit, zero, "");
+        const bit_or_zero = self.builder.buildSelect(source_bit_set, bit, zero, ""); // avoid using control flow
+        const new_result = self.builder.buildOr(result_phi, bit_or_zero, "");
+        const new_bb = self.builder.buildAdd(bb_phi, bb_phi, "");
+        const while_cond = self.builder.buildICmp(.NE, new_mask, zero, "");
+        _ = self.builder.buildCondBr(while_cond, loop_block, after_block);
+
+        mask_phi.addIncoming(
+            &[2]*llvm.Value{ mask_start, new_mask },
+            &[2]*llvm.BasicBlock{ prev_block, loop_block },
+            2,
+        );
+
+        result_phi.addIncoming(
+            &[2]*llvm.Value{ zero, new_result },
+            &[2]*llvm.BasicBlock{ prev_block, loop_block },
+            2,
+        );
+
+        bb_phi.addIncoming(
+            &[2]*llvm.Value{ one, new_bb },
+            &[2]*llvm.BasicBlock{ prev_block, loop_block },
+            2,
+        );
+
+        self.builder.positionBuilderAtEnd(after_block);
+        const final_result = self.builder.buildPhi(llvm_ty, "");
+        final_result.addIncoming(
+            &[1]*llvm.Value{new_result},
+            &[1]*llvm.BasicBlock{loop_block},
+            1,
+        );
+
+        return final_result;
+    }
+
+    fn airExtractBits(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {
+        if (self.liveness.isUnused(inst)) return null;
+
+        const bin_op = self.air.instructions.items(.data)[inst].bin_op;
+        const lhs = try self.resolveInst(bin_op.lhs);
+        const rhs = try self.resolveInst(bin_op.rhs);
+        const inst_ty = self.typeOfIndex(inst);
+
+        const target = self.dg.module.getTarget();
+        const params = [2]*llvm.Value{ lhs, rhs };
+        switch (target.cpu.arch) {
+            .x86, .x86_64 => |tag| blk: {
+                // Doesn't have pext
+                if (!std.Target.x86.featureSetHas(target.cpu.features, .bmi2)) break :blk;
+
+                const bits = inst_ty.intInfo(self.dg.module).bits;
+                const supports_64 = tag == .x86_64;
+                // Integer size doesn't match the available instruction(s)
+                if (!(bits <= 32 or (bits <= 64 and supports_64))) break :blk;
+
+                return self.buildExtractBitsNative(inst_ty, params);
+            },
+            else => {},
+        }
+
+        return self.buildExtractBitsEmulated(inst_ty, params);
+    }
+
+    fn buildExtractBitsNative(
+        self: *FuncGen,
+        ty: Type,
+        params: [2]*llvm.Value,
+    ) !*llvm.Value {
+        const target = self.dg.module.getTarget();
+
+        assert(target.cpu.arch.isX86());
+        assert(std.Target.x86.featureSetHas(target.cpu.features, .bmi2));
+
+        const bits = ty.intInfo(self.dg.module).bits;
+        const intrinsic_name = switch (bits) {
+            1...32 => "llvm.x86.bmi.pext.32",
+            33...64 => "llvm.x86.bmi.pext.64",
+            else => unreachable,
+        };
+        const needs_extend = bits != 32 and bits != 64;
+
+        var params_cast = params;
+
+        // Cast to either a 32 or 64-bit integer
+        if (needs_extend) {
+            const llvm_extend_ty = self.context.intType(if (bits <= 32) 32 else 64);
+            params_cast = .{
+                self.builder.buildZExt(params[0], llvm_extend_ty, ""),
+                self.builder.buildZExt(params[1], llvm_extend_ty, ""),
+            };
+        }
+
+        const llvm_fn = self.getIntrinsic(intrinsic_name, &.{});
+        const result = self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &params_cast, 2, .Fast, .Auto, "");
+
+        // No cast needed!
+        if (!needs_extend) return result;
+
+        // Cast back to the original integer size
+        const llvm_trunc_ty = try self.dg.lowerType(ty);
+        return self.builder.buildTrunc(result, llvm_trunc_ty, "");
+    }
+
+    // TODO Move this to compiler-rt (see #14609)
+    //
+    // Implements @extractBits(source, mask) in software
+    // (i.e. without platform-specific instructions)
+    //
+    // var bb = 1;
+    // var result = 0;
+    // do {
+    //     const bit = mask & -mask;
+    //     mask &= ~bit;
+    //     const source_bit = source & bit;
+    //     if (source_bit != 0) result |= bb;
+    //     bb += bb;
+    // } while (mask)
+    //
+    // return result;
+    fn buildExtractBitsEmulated(
+        self: *FuncGen,
+        ty: Type,
+        params: [2]*llvm.Value,
+    ) !*llvm.Value {
+        const llvm_ty = try self.dg.lowerType(ty);
+
+        const zero = llvm_ty.constNull();
+        const one = llvm_ty.constInt(1, .False);
+        const minus_one = llvm_ty.constInt(@bitCast(c_ulonglong, @as(c_longlong, -1)), .True);
+        const source = params[0];
+        const start_mask = params[1];
+        const start_result = zero;
+        const start_bb = one;
+
+        const prev_block = self.builder.getInsertBlock();
+        const loop_block = self.context.appendBasicBlock(self.llvm_func, "Loop");
+        const after_block = self.context.appendBasicBlock(self.llvm_func, "After");
+
+        _ = self.builder.buildBr(loop_block);
+        self.builder.positionBuilderAtEnd(loop_block);
+        const mask_phi = self.builder.buildPhi(llvm_ty, "");
+        const result_phi = self.builder.buildPhi(llvm_ty, "");
+        const bb_phi = self.builder.buildPhi(llvm_ty, "");
+        const minus_mask = self.builder.buildSub(zero, mask_phi, "");
+        const bit = self.builder.buildAnd(mask_phi, minus_mask, "");
+        const not_bit = self.builder.buildXor(bit, minus_one, "");
+        const new_mask = self.builder.buildAnd(mask_phi, not_bit, "");
+        const source_bit = self.builder.buildAnd(source, bit, "");
+        const source_bit_set = self.builder.buildICmp(.NE, source_bit, zero, "");
+        const bb_or_zero = self.builder.buildSelect(source_bit_set, bb_phi, zero, ""); // avoid using control flow
+        const new_result = self.builder.buildOr(result_phi, bb_or_zero, "");
+        const new_bb = self.builder.buildAdd(bb_phi, bb_phi, "");
+        const while_cond = self.builder.buildICmp(.NE, new_mask, zero, "");
+        _ = self.builder.buildCondBr(while_cond, loop_block, after_block);
+
+        mask_phi.addIncoming(
+            &[2]*llvm.Value{ start_mask, new_mask },
+            &[2]*llvm.BasicBlock{ prev_block, loop_block },
+            2,
+        );
+
+        result_phi.addIncoming(
+            &[2]*llvm.Value{ start_result, new_result },
+            &[2]*llvm.BasicBlock{ prev_block, loop_block },
+            2,
+        );
+
+        bb_phi.addIncoming(
+            &[2]*llvm.Value{ start_bb, new_bb },
+            &[2]*llvm.BasicBlock{ prev_block, loop_block },
+            2,
+        );
+
+        self.builder.positionBuilderAtEnd(after_block);
+        const final_result = self.builder.buildPhi(llvm_ty, "");
+        final_result.addIncoming(
+            &[1]*llvm.Value{new_result},
+            &[1]*llvm.BasicBlock{loop_block},
+            1,
+        );
+
+        return final_result;
+    }
+
     fn getErrorNameTable(self: *FuncGen) !*llvm.Value {
         if (self.dg.object.error_name_table) |table| {
             return table;
diff --git a/src/print_air.zig b/src/print_air.zig
index d73ec308917f..700fdbffadef 100644
--- a/src/print_air.zig
+++ b/src/print_air.zig
@@ -173,6 +173,8 @@ const Writer = struct {
             .memcpy,
             .memset,
             .memset_safe,
+            .deposit_bits,
+            .extract_bits,
             => try w.writeBinOp(s, inst),
 
             .is_null,
diff --git a/src/print_zir.zig b/src/print_zir.zig
index 029157818957..b85cf3e2eff7 100644
--- a/src/print_zir.zig
+++ b/src/print_zir.zig
@@ -527,6 +527,8 @@ const Writer = struct {
             .prefetch,
             .addrspace_cast,
             .c_va_arg,
+            .deposit_bits,
+            .extract_bits,
             => {
                 const inst_data = self.code.extraData(Zir.Inst.BinNode, extended.operand).data;
                 const src = LazySrcLoc.nodeOffset(inst_data.node);
diff --git a/test/behavior.zig b/test/behavior.zig
index 6e9435c49ef4..3006a7602a3c 100644
--- a/test/behavior.zig
+++ b/test/behavior.zig
@@ -152,6 +152,7 @@ test {
     _ = @import("behavior/const_slice_child.zig");
     _ = @import("behavior/decltest.zig");
     _ = @import("behavior/duplicated_test_names.zig");
+    _ = @import("behavior/deposit_extract_bits.zig");
     _ = @import("behavior/defer.zig");
     _ = @import("behavior/empty_tuple_fields.zig");
     _ = @import("behavior/empty_union.zig");
diff --git a/test/behavior/deposit_extract_bits.zig b/test/behavior/deposit_extract_bits.zig
new file mode 100644
index 000000000000..9f2bafe22560
--- /dev/null
+++ b/test/behavior/deposit_extract_bits.zig
@@ -0,0 +1,58 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const mem = std.mem;
+const expect = std.testing.expect;
+const expectEqual = std.testing.expectEqual;
+
+test "@depositBits" {
+    if (builtin.zig_backend != .stage2_llvm) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        pub fn doTheTest() !void {
+            var a: u64 = 0;
+            var b: u64 = 0xFFFF_FFFF_FFFF_FFFF;
+            var c: u64 = 0x1234_5678_9012_3456;
+            var d: u64 = 0x00F0_FF00_F00F_00FF;
+            var e: u128 = @as(u128, d) << 64;
+
+            try expect(@depositBits(b, a) == 0);
+            try expect(@depositBits(a, b) == 0);
+
+            try expect(@depositBits(b, c) == c);
+            try expect(@depositBits(b, d) == d);
+
+            try expect(@depositBits(c, d) == 0x0000_1200_3004_0056);
+            try expect(@depositBits(c, e) == 0x0000_1200_3004_0056 << 64);
+        }
+    };
+
+    try S.doTheTest();
+    try comptime S.doTheTest();
+}
+
+test "@extractBits" {
+    if (builtin.zig_backend != .stage2_llvm) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        pub fn doTheTest() !void {
+            var a: u64 = 0;
+            var b: u64 = 0xFFFF_FFFF_FFFF_FFFF;
+            var c: u64 = 0x1234_5678_9012_3456;
+            var d: u64 = 0x00F0_FF00_F00F_00FF;
+            var e: u128 = @as(u128, c) << 64;
+            var f: u128 = @as(u128, d) << 64;
+
+            try expect(@extractBits(b, a) == 0);
+            try expect(@extractBits(a, b) == 0);
+
+            try expect(@extractBits(c, b) == c);
+            try expect(@extractBits(d, b) == d);
+
+            try expect(@extractBits(c, d) == 0x0356_9256);
+            try expect(@extractBits(e, f) == 0x0356_9256);
+        }
+    };
+
+    try S.doTheTest();
+    try comptime S.doTheTest();
+}