diff --git a/lib/std/builtin.zig b/lib/std/builtin.zig
index 53786c86616e..d352ac29dca5 100644
--- a/lib/std/builtin.zig
+++ b/lib/std/builtin.zig
@@ -767,8 +767,7 @@ pub fn default_panic(msg: []const u8, error_return_trace: ?*StackTrace) noreturn
 
     // Until self-hosted catches up with stage1 language features, we have a simpler
     // default panic function:
-    if ((builtin.zig_backend == .stage2_llvm and builtin.link_libc) or
-        builtin.zig_backend == .stage2_c or
+    if (builtin.zig_backend == .stage2_c or
         builtin.zig_backend == .stage2_wasm or
         builtin.zig_backend == .stage2_arm or
         builtin.zig_backend == .stage2_aarch64 or
diff --git a/src/Sema.zig b/src/Sema.zig
index 2cc9b824101a..471639ba963b 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -1574,6 +1574,12 @@ fn failWithErrorSetCodeMissing(
     });
 }
 
+fn failWithIntegerOverflow(sema: *Sema, block: *Block, src: LazySrcLoc, int_ty: Type, val: Value) CompileError {
+    return sema.fail(block, src, "overflow of integer type '{}' with value '{}'", .{
+        int_ty.fmt(sema.mod), val.fmtValue(Type.@"comptime_int", sema.mod),
+    });
+}
+
 /// We don't return a pointer to the new error note because the pointer
 /// becomes invalid when you add another one.
 fn errNote(
@@ -8820,8 +8826,6 @@ fn zirShl(
         return sema.addConstant(lhs_ty, val);
     } else lhs_src;
 
-    // TODO: insert runtime safety check for shl_exact
-
     const new_rhs = if (air_tag == .shl_sat) rhs: {
         // Limit the RHS type for saturating shl to be an integer as small as the LHS.
         if (rhs_is_comptime_int or
@@ -8839,6 +8843,41 @@ fn zirShl(
     } else rhs;
 
     try sema.requireRuntimeBlock(block, runtime_src);
+    if (block.wantSafety()) {
+        const maybe_op_ov: ?Air.Inst.Tag = switch (air_tag) {
+            .shl_exact => .shl_with_overflow,
+            else => null,
+        };
+        if (maybe_op_ov) |op_ov_tag| {
+            const op_ov_tuple_ty = try sema.overflowArithmeticTupleType(lhs_ty);
+            const op_ov = try block.addInst(.{
+                .tag = op_ov_tag,
+                .data = .{ .ty_pl = .{
+                    .ty = try sema.addType(op_ov_tuple_ty),
+                    .payload = try sema.addExtra(Air.Bin{
+                        .lhs = lhs,
+                        .rhs = rhs,
+                    }),
+                } },
+            });
+            const ov_bit = try sema.tupleFieldValByIndex(block, src, op_ov, 1, op_ov_tuple_ty);
+            const any_ov_bit = if (lhs_ty.zigTypeTag() == .Vector)
+                try block.addInst(.{
+                    .tag = .reduce,
+                    .data = .{ .reduce = .{
+                        .operand = ov_bit,
+                        .operation = .Or,
+                    } },
+                })
+            else
+                ov_bit;
+            const zero_ov = try sema.addConstant(Type.@"u1", Value.zero);
+            const no_ov = try block.addBinOp(.cmp_eq, any_ov_bit, zero_ov);
+
+            try sema.addSafetyCheck(block, no_ov, .shl_overflow);
+            return sema.tupleFieldValByIndex(block, src, op_ov, 0, op_ov_tuple_ty);
+        }
+    }
     return block.addBinOp(air_tag, lhs, new_rhs);
 }
 
@@ -9417,32 +9456,29 @@ fn zirOverflowArithmetic(
     const ptr = sema.resolveInst(extra.ptr);
 
     const lhs_ty = sema.typeOf(lhs);
+    const rhs_ty = sema.typeOf(rhs);
     const mod = sema.mod;
     const target = mod.getTarget();
 
     // Note, the types of lhs/rhs (also for shifting)/ptr are already correct as ensured by astgen.
+    try sema.checkVectorizableBinaryOperands(block, src, lhs_ty, rhs_ty, lhs_src, rhs_src);
     const dest_ty = lhs_ty;
-    if (dest_ty.zigTypeTag() != .Int) {
-        return sema.fail(block, src, "expected integer type, found '{}'", .{dest_ty.fmt(mod)});
+    if (dest_ty.scalarType().zigTypeTag() != .Int) {
+        return sema.fail(block, src, "expected vector of integers or integer type, found '{}'", .{dest_ty.fmt(mod)});
     }
 
     const maybe_lhs_val = try sema.resolveMaybeUndefVal(block, lhs_src, lhs);
     const maybe_rhs_val = try sema.resolveMaybeUndefVal(block, rhs_src, rhs);
 
-    const types = try sema.arena.alloc(Type, 2);
-    const values = try sema.arena.alloc(Value, 2);
-    const tuple_ty = try Type.Tag.tuple.create(sema.arena, .{
-        .types = types,
-        .values = values,
-    });
-
-    types[0] = dest_ty;
-    types[1] = Type.initTag(.u1);
-    values[0] = Value.initTag(.unreachable_value);
-    values[1] = Value.initTag(.unreachable_value);
+    const tuple_ty = try sema.overflowArithmeticTupleType(dest_ty);
+    const ov_ty = tuple_ty.tupleFields().types[1];
+    // TODO: Remove and use `ov_ty` instead.
+    //       This is a temporary type used until overflow arithmetic properly returns `u1` instead of `bool`.
+    const overflowed_ty = if (dest_ty.zigTypeTag() == .Vector) try Type.vector(sema.arena, dest_ty.vectorLen(), Type.@"bool") else Type.@"bool";
 
     const result: struct {
-        overflowed: enum { yes, no, undef },
+        /// TODO: Rename to `overflow_bit` and make of type `u1`.
+        overflowed: Air.Inst.Ref,
         wrapped: Air.Inst.Ref,
     } = result: {
         switch (zir_tag) {
@@ -9452,23 +9488,24 @@ fn zirOverflowArithmetic(
                 // Otherwise, if either of the argument is undefined, undefined is returned.
                 if (maybe_lhs_val) |lhs_val| {
                     if (!lhs_val.isUndef() and lhs_val.compareWithZero(.eq)) {
-                        break :result .{ .overflowed = .no, .wrapped = rhs };
+                        break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = rhs };
                     }
                 }
                 if (maybe_rhs_val) |rhs_val| {
                     if (!rhs_val.isUndef() and rhs_val.compareWithZero(.eq)) {
-                        break :result .{ .overflowed = .no, .wrapped = lhs };
+                        break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = lhs };
                     }
                 }
                 if (maybe_lhs_val) |lhs_val| {
                     if (maybe_rhs_val) |rhs_val| {
                         if (lhs_val.isUndef() or rhs_val.isUndef()) {
-                            break :result .{ .overflowed = .undef, .wrapped = try sema.addConstUndef(dest_ty) };
+                            break :result .{ .overflowed = try sema.addConstUndef(overflowed_ty), .wrapped = try sema.addConstUndef(dest_ty) };
                         }
 
                         const result = try lhs_val.intAddWithOverflow(rhs_val, dest_ty, sema.arena, target);
-                        const inst = try sema.addConstant(dest_ty, result.wrapped_result);
-                        break :result .{ .overflowed = if (result.overflowed) .yes else .no, .wrapped = inst };
+                        const overflowed = try sema.addConstant(overflowed_ty, result.overflowed);
+                        const wrapped = try sema.addConstant(dest_ty, result.wrapped_result);
+                        break :result .{ .overflowed = overflowed, .wrapped = wrapped };
                     }
                 }
             },
@@ -9477,17 +9514,18 @@ fn zirOverflowArithmetic(
                 // Otherwise, if either result is undefined, both results are undefined.
                 if (maybe_rhs_val) |rhs_val| {
                     if (rhs_val.isUndef()) {
-                        break :result .{ .overflowed = .undef, .wrapped = try sema.addConstUndef(dest_ty) };
+                        break :result .{ .overflowed = try sema.addConstUndef(overflowed_ty), .wrapped = try sema.addConstUndef(dest_ty) };
                     } else if (rhs_val.compareWithZero(.eq)) {
-                        break :result .{ .overflowed = .no, .wrapped = lhs };
+                        break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = lhs };
                     } else if (maybe_lhs_val) |lhs_val| {
                         if (lhs_val.isUndef()) {
-                            break :result .{ .overflowed = .undef, .wrapped = try sema.addConstUndef(dest_ty) };
+                            break :result .{ .overflowed = try sema.addConstUndef(overflowed_ty), .wrapped = try sema.addConstUndef(dest_ty) };
                         }
 
                         const result = try lhs_val.intSubWithOverflow(rhs_val, dest_ty, sema.arena, target);
-                        const inst = try sema.addConstant(dest_ty, result.wrapped_result);
-                        break :result .{ .overflowed = if (result.overflowed) .yes else .no, .wrapped = inst };
+                        const overflowed = try sema.addConstant(overflowed_ty, result.overflowed);
+                        const wrapped = try sema.addConstant(dest_ty, result.wrapped_result);
+                        break :result .{ .overflowed = overflowed, .wrapped = wrapped };
                     }
                 }
             },
@@ -9498,9 +9536,9 @@ fn zirOverflowArithmetic(
                 if (maybe_lhs_val) |lhs_val| {
                     if (!lhs_val.isUndef()) {
                         if (lhs_val.compareWithZero(.eq)) {
-                            break :result .{ .overflowed = .no, .wrapped = lhs };
+                            break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = lhs };
                         } else if (lhs_val.compare(.eq, Value.one, dest_ty, mod)) {
-                            break :result .{ .overflowed = .no, .wrapped = rhs };
+                            break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = rhs };
                         }
                     }
                 }
@@ -9508,9 +9546,9 @@ fn zirOverflowArithmetic(
                 if (maybe_rhs_val) |rhs_val| {
                     if (!rhs_val.isUndef()) {
                         if (rhs_val.compareWithZero(.eq)) {
-                            break :result .{ .overflowed = .no, .wrapped = rhs };
+                            break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = rhs };
                         } else if (rhs_val.compare(.eq, Value.one, dest_ty, mod)) {
-                            break :result .{ .overflowed = .no, .wrapped = lhs };
+                            break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = lhs };
                         }
                     }
                 }
@@ -9518,12 +9556,13 @@ fn zirOverflowArithmetic(
                 if (maybe_lhs_val) |lhs_val| {
                     if (maybe_rhs_val) |rhs_val| {
                         if (lhs_val.isUndef() or rhs_val.isUndef()) {
-                            break :result .{ .overflowed = .undef, .wrapped = try sema.addConstUndef(dest_ty) };
+                            break :result .{ .overflowed = try sema.addConstUndef(overflowed_ty), .wrapped = try sema.addConstUndef(dest_ty) };
                         }
 
                         const result = try lhs_val.intMulWithOverflow(rhs_val, dest_ty, sema.arena, target);
-                        const inst = try sema.addConstant(dest_ty, result.wrapped_result);
-                        break :result .{ .overflowed = if (result.overflowed) .yes else .no, .wrapped = inst };
+                        const overflowed = try sema.addConstant(overflowed_ty, result.overflowed);
+                        const wrapped = try sema.addConstant(dest_ty, result.wrapped_result);
+                        break :result .{ .overflowed = overflowed, .wrapped = wrapped };
                     }
                 }
             },
@@ -9533,23 +9572,24 @@ fn zirOverflowArithmetic(
                 // Oterhwise if either of the arguments is undefined, both results are undefined.
                 if (maybe_lhs_val) |lhs_val| {
                     if (!lhs_val.isUndef() and lhs_val.compareWithZero(.eq)) {
-                        break :result .{ .overflowed = .no, .wrapped = lhs };
+                        break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = lhs };
                     }
                 }
                 if (maybe_rhs_val) |rhs_val| {
                     if (!rhs_val.isUndef() and rhs_val.compareWithZero(.eq)) {
-                        break :result .{ .overflowed = .no, .wrapped = lhs };
+                        break :result .{ .overflowed = try sema.addBool(overflowed_ty, false), .wrapped = lhs };
                     }
                 }
                 if (maybe_lhs_val) |lhs_val| {
                     if (maybe_rhs_val) |rhs_val| {
                         if (lhs_val.isUndef() or rhs_val.isUndef()) {
-                            break :result .{ .overflowed = .undef, .wrapped = try sema.addConstUndef(dest_ty) };
+                            break :result .{ .overflowed = try sema.addConstUndef(overflowed_ty), .wrapped = try sema.addConstUndef(dest_ty) };
                         }
 
                         const result = try lhs_val.shlWithOverflow(rhs_val, dest_ty, sema.arena, target);
-                        const inst = try sema.addConstant(dest_ty, result.wrapped_result);
-                        break :result .{ .overflowed = if (result.overflowed) .yes else .no, .wrapped = inst };
+                        const overflowed = try sema.addConstant(overflowed_ty, result.overflowed);
+                        const wrapped = try sema.addConstant(dest_ty, result.wrapped_result);
+                        break :result .{ .overflowed = overflowed, .wrapped = wrapped };
                     }
                 }
             },
@@ -9577,21 +9617,40 @@ fn zirOverflowArithmetic(
             } },
         });
 
-        const wrapped = try block.addStructFieldVal(tuple, 0, dest_ty);
+        const wrapped = try sema.tupleFieldValByIndex(block, src, tuple, 0, tuple_ty);
         try sema.storePtr2(block, src, ptr, ptr_src, wrapped, src, .store);
 
-        const overflow_bit = try block.addStructFieldVal(tuple, 1, Type.initTag(.u1));
-        const zero_u1 = try sema.addConstant(Type.initTag(.u1), Value.zero);
-        return try block.addBinOp(.cmp_neq, overflow_bit, zero_u1);
+        const overflow_bit = try sema.tupleFieldValByIndex(block, src, tuple, 1, tuple_ty);
+        const zero_ov_val = if (dest_ty.zigTypeTag() == .Vector) try Value.Tag.repeated.create(sema.arena, Value.zero) else Value.zero;
+        const zero_ov = try sema.addConstant(ov_ty, zero_ov_val);
+
+        const overflowed_inst = if (dest_ty.zigTypeTag() == .Vector)
+            block.addCmpVector(overflow_bit, .zero, .neq, try sema.addType(ov_ty))
+        else
+            block.addBinOp(.cmp_neq, overflow_bit, zero_ov);
+        return overflowed_inst;
     };
 
     try sema.storePtr2(block, src, ptr, ptr_src, result.wrapped, src, .store);
+    return result.overflowed;
+}
 
-    return switch (result.overflowed) {
-        .yes => Air.Inst.Ref.bool_true,
-        .no => Air.Inst.Ref.bool_false,
-        .undef => try sema.addConstUndef(Type.bool),
-    };
+fn overflowArithmeticTupleType(sema: *Sema, ty: Type) !Type {
+    const ov_ty = if (ty.zigTypeTag() == .Vector) try Type.vector(sema.arena, ty.vectorLen(), Type.@"u1") else Type.@"u1";
+
+    const types = try sema.arena.alloc(Type, 2);
+    const values = try sema.arena.alloc(Value, 2);
+    const tuple_ty = try Type.Tag.tuple.create(sema.arena, .{
+        .types = types,
+        .values = values,
+    });
+
+    types[0] = ty;
+    types[1] = ov_ty;
+    values[0] = Value.initTag(.unreachable_value);
+    values[1] = Value.initTag(.unreachable_value);
+
+    return tuple_ty;
 }
 
 fn analyzeArithmetic(
@@ -9691,10 +9750,11 @@ fn analyzeArithmetic(
                     }
                     if (maybe_rhs_val) |rhs_val| {
                         if (is_int) {
-                            return sema.addConstant(
-                                resolved_type,
-                                try lhs_val.intAdd(rhs_val, resolved_type, sema.arena, target),
-                            );
+                            const sum = try lhs_val.intAdd(rhs_val, resolved_type, sema.arena, target);
+                            if (!sum.intFitsInType(resolved_type, target)) {
+                                return sema.failWithIntegerOverflow(block, src, resolved_type, sum);
+                            }
+                            return sema.addConstant(resolved_type, sum);
                         } else {
                             return sema.addConstant(
                                 resolved_type,
@@ -9784,10 +9844,11 @@ fn analyzeArithmetic(
                     }
                     if (maybe_rhs_val) |rhs_val| {
                         if (is_int) {
-                            return sema.addConstant(
-                                resolved_type,
-                                try lhs_val.intSub(rhs_val, resolved_type, sema.arena, target),
-                            );
+                            const diff = try lhs_val.intSub(rhs_val, resolved_type, sema.arena, target);
+                            if (!diff.intFitsInType(resolved_type, target)) {
+                                return sema.failWithIntegerOverflow(block, src, resolved_type, diff);
+                            }
+                            return sema.addConstant(resolved_type, diff);
                         } else {
                             return sema.addConstant(
                                 resolved_type,
@@ -10157,10 +10218,11 @@ fn analyzeArithmetic(
                             }
                         }
                         if (is_int) {
-                            return sema.addConstant(
-                                resolved_type,
-                                try lhs_val.intMul(rhs_val, resolved_type, sema.arena, target),
-                            );
+                            const product = try lhs_val.intMul(rhs_val, resolved_type, sema.arena, target);
+                            if (!product.intFitsInType(resolved_type, target)) {
+                                return sema.failWithIntegerOverflow(block, src, resolved_type, product);
+                            }
+                            return sema.addConstant(resolved_type, product);
                         } else {
                             return sema.addConstant(
                                 resolved_type,
@@ -10448,6 +10510,45 @@ fn analyzeArithmetic(
     };
 
     try sema.requireRuntimeBlock(block, rs.src);
+    if (block.wantSafety()) {
+        if (scalar_tag == .Int) {
+            const maybe_op_ov: ?Air.Inst.Tag = switch (rs.air_tag) {
+                .add => .add_with_overflow,
+                .sub => .sub_with_overflow,
+                .mul => .mul_with_overflow,
+                else => null,
+            };
+            if (maybe_op_ov) |op_ov_tag| {
+                const op_ov_tuple_ty = try sema.overflowArithmeticTupleType(resolved_type);
+                const op_ov = try block.addInst(.{
+                    .tag = op_ov_tag,
+                    .data = .{ .ty_pl = .{
+                        .ty = try sema.addType(op_ov_tuple_ty),
+                        .payload = try sema.addExtra(Air.Bin{
+                            .lhs = casted_lhs,
+                            .rhs = casted_rhs,
+                        }),
+                    } },
+                });
+                const ov_bit = try sema.tupleFieldValByIndex(block, src, op_ov, 1, op_ov_tuple_ty);
+                const any_ov_bit = if (resolved_type.zigTypeTag() == .Vector)
+                    try block.addInst(.{
+                        .tag = .reduce,
+                        .data = .{ .reduce = .{
+                            .operand = ov_bit,
+                            .operation = .Or,
+                        } },
+                    })
+                else
+                    ov_bit;
+                const zero_ov = try sema.addConstant(Type.@"u1", Value.zero);
+                const no_ov = try block.addBinOp(.cmp_eq, any_ov_bit, zero_ov);
+
+                try sema.addSafetyCheck(block, no_ov, .integer_overflow);
+                return sema.tupleFieldValByIndex(block, src, op_ov, 0, op_ov_tuple_ty);
+            }
+        }
+    }
     return block.addBinOp(rs.air_tag, casted_lhs, casted_rhs);
 }
 
@@ -16682,6 +16783,8 @@ pub const PanicId = enum {
     invalid_error_code,
     index_out_of_bounds,
     cast_truncated_data,
+    integer_overflow,
+    shl_overflow,
 };
 
 fn addSafetyCheck(
@@ -16805,6 +16908,8 @@ fn safetyPanic(
         .invalid_error_code => "invalid error code",
         .index_out_of_bounds => "attempt to index out of bounds",
         .cast_truncated_data => "integer cast truncated bits",
+        .integer_overflow => "integer overflow",
+        .shl_overflow => "left shift overflowed bits",
     };
 
     const msg_inst = msg_inst: {
@@ -23093,6 +23198,14 @@ fn addIntUnsigned(sema: *Sema, ty: Type, int: u64) CompileError!Air.Inst.Ref {
     return sema.addConstant(ty, try Value.Tag.int_u64.create(sema.arena, int));
 }
 
+fn addBool(sema: *Sema, ty: Type, boolean: bool) CompileError!Air.Inst.Ref {
+    return switch (ty.zigTypeTag()) {
+        .Vector => sema.addConstant(ty, try Value.Tag.repeated.create(sema.arena, Value.makeBool(boolean))),
+        .Bool => sema.resolveInst(if (boolean) .bool_true else .bool_false),
+        else => unreachable,
+    };
+}
+
 fn addConstUndef(sema: *Sema, ty: Type) CompileError!Air.Inst.Ref {
     return sema.addConstant(ty, Value.undef);
 }
diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig
index e43cbca1c7f7..10730c446f35 100644
--- a/src/arch/aarch64/CodeGen.zig
+++ b/src/arch/aarch64/CodeGen.zig
@@ -1901,6 +1901,10 @@ fn airOverflow(self: *Self, inst: Air.Inst.Index) !void {
                             }
                         };
 
+                        if (tag == .sub_with_overflow) {
+                            break :result MCValue{ .register_v_flag = dest.register };
+                        }
+
                         switch (int_info.signedness) {
                             .unsigned => break :result MCValue{ .register_c_flag = dest.register },
                             .signed => break :result MCValue{ .register_v_flag = dest.register },
diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig
index 02ca66f29791..93e6e95ba9ae 100644
--- a/src/arch/arm/CodeGen.zig
+++ b/src/arch/arm/CodeGen.zig
@@ -1455,6 +1455,10 @@ fn airOverflow(self: *Self, inst: Air.Inst.Index) !void {
                         }
                     };
 
+                    if (tag == .sub_with_overflow) {
+                        break :result MCValue{ .register_v_flag = dest.register };
+                    }
+
                     switch (int_info.signedness) {
                         .unsigned => break :result MCValue{ .register_c_flag = dest.register },
                         .signed => break :result MCValue{ .register_v_flag = dest.register },
diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig
index 947174aaed14..8e84b7d1fe3d 100644
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@@ -1450,9 +1450,9 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue {
         .min => self.airMaxMin(inst, .min),
         .mul_add => self.airMulAdd(inst),
 
-        .add_with_overflow => self.airBinOpOverflow(inst, .add),
-        .sub_with_overflow => self.airBinOpOverflow(inst, .sub),
-        .shl_with_overflow => self.airBinOpOverflow(inst, .shl),
+        .add_with_overflow => self.airAddSubWithOverflow(inst, .add),
+        .sub_with_overflow => self.airAddSubWithOverflow(inst, .sub),
+        .shl_with_overflow => self.airShlWithOverflow(inst),
         .mul_with_overflow => self.airMulWithOverflow(inst),
 
         .clz => self.airClz(inst),
@@ -3941,25 +3941,22 @@ fn airPtrSliceFieldPtr(self: *Self, inst: Air.Inst.Index, offset: u32) InnerErro
     return self.buildPointerOffset(slice_ptr, offset, .new);
 }
 
-fn airBinOpOverflow(self: *Self, inst: Air.Inst.Index, op: Op) InnerError!WValue {
-    if (self.liveness.isUnused(inst)) return WValue{ .none = {} };
-
+fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index, op: Op) InnerError!WValue {
+    assert(op == .add or op == .sub);
     const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
     const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
-    const lhs = try self.resolveInst(extra.lhs);
-    const rhs = try self.resolveInst(extra.rhs);
+    const lhs_op = try self.resolveInst(extra.lhs);
+    const rhs_op = try self.resolveInst(extra.rhs);
     const lhs_ty = self.air.typeOf(extra.lhs);
 
     if (lhs_ty.zigTypeTag() == .Vector) {
         return self.fail("TODO: Implement overflow arithmetic for vectors", .{});
     }
 
-    // We store the bit if it's overflowed or not in this. As it's zero-initialized
-    // we only need to update it if an overflow (or underflow) occured.
-    const overflow_bit = try self.allocLocal(Type.initTag(.u1));
     const int_info = lhs_ty.intInfo(self.target);
+    const is_signed = int_info.signedness == .signed;
     const wasm_bits = toWasmBits(int_info.bits) orelse {
-        return self.fail("TODO: Implement overflow arithmetic for integer bitsize: {d}", .{int_info.bits});
+        return self.fail("TODO: Implement {{add/sub}}_with_overflow for integer bitsize: {d}", .{int_info.bits});
     };
 
     const zero = switch (wasm_bits) {
@@ -3967,83 +3964,93 @@ fn airBinOpOverflow(self: *Self, inst: Air.Inst.Index, op: Op) InnerError!WValue
         64 => WValue{ .imm64 = 0 },
         else => unreachable,
     };
-    const int_max = (@as(u65, 1) << @intCast(u7, int_info.bits - @boolToInt(int_info.signedness == .signed))) - 1;
-    const int_max_wvalue = switch (wasm_bits) {
-        32 => WValue{ .imm32 = @intCast(u32, int_max) },
-        64 => WValue{ .imm64 = @intCast(u64, int_max) },
-        else => unreachable,
-    };
-    const int_min = if (int_info.signedness == .unsigned)
-        @as(i64, 0)
-    else
-        -@as(i64, 1) << @intCast(u6, int_info.bits - 1);
-    const int_min_wvalue = switch (wasm_bits) {
-        32 => WValue{ .imm32 = @bitCast(u32, @intCast(i32, int_min)) },
-        64 => WValue{ .imm64 = @bitCast(u64, int_min) },
+    const shift_amt = wasm_bits - int_info.bits;
+    const shift_val = switch (wasm_bits) {
+        32 => WValue{ .imm32 = shift_amt },
+        64 => WValue{ .imm64 = shift_amt },
         else => unreachable,
     };
 
-    if (int_info.signedness == .unsigned and op == .add) {
-        const diff = try self.binOp(int_max_wvalue, lhs, lhs_ty, .sub);
-        const cmp_res = try self.cmp(rhs, diff, lhs_ty, .gt);
-        try self.emitWValue(cmp_res);
-        try self.addLabel(.local_set, overflow_bit.local);
-    } else if (int_info.signedness == .unsigned and op == .sub) {
-        const cmp_res = try self.cmp(lhs, rhs, lhs_ty, .lt);
-        try self.emitWValue(cmp_res);
-        try self.addLabel(.local_set, overflow_bit.local);
-    } else if (int_info.signedness == .signed and op != .shl) {
-        // for overflow, we first check if lhs is > 0 (or lhs < 0 in case of subtraction). If not, we will not overflow.
-        // We first create an outer block, where we handle overflow.
-        // Then we create an inner block, where underflow is handled.
-        try self.startBlock(.block, wasm.block_empty);
-        try self.startBlock(.block, wasm.block_empty);
-        {
-            try self.emitWValue(lhs);
-            const cmp_result = try self.cmp(lhs, zero, lhs_ty, .lt);
-            try self.emitWValue(cmp_result);
+    // for signed integers, we first apply signed shifts by the difference in bits
+    // to get the signed value, as we store it internally as 2's complement.
+    const lhs = if (wasm_bits != int_info.bits and is_signed) blk: {
+        const shl = try self.binOp(lhs_op, shift_val, lhs_ty, .shl);
+        break :blk try self.binOp(shl, shift_val, lhs_ty, .shr);
+    } else lhs_op;
+    const rhs = if (wasm_bits != int_info.bits and is_signed) blk: {
+        const shl = try self.binOp(rhs_op, shift_val, lhs_ty, .shl);
+        break :blk try self.binOp(shl, shift_val, lhs_ty, .shr);
+    } else rhs_op;
+
+    const bin_op = try self.binOp(lhs, rhs, lhs_ty, op);
+    const result = if (wasm_bits != int_info.bits) blk: {
+        break :blk try self.wrapOperand(bin_op, lhs_ty);
+    } else bin_op;
+
+    const cmp_op: std.math.CompareOperator = if (op == .sub) .gt else .lt;
+    const overflow_bit: WValue = if (is_signed) blk: {
+        if (wasm_bits == int_info.bits) {
+            const cmp_zero = try self.cmp(rhs, zero, lhs_ty, cmp_op);
+            const lt = try self.cmp(bin_op, lhs, lhs_ty, .lt);
+            break :blk try self.binOp(cmp_zero, lt, Type.u32, .xor); // result of cmp_zero and lt is always 32bit
         }
-        try self.addLabel(.br_if, 0); // break to outer block, and handle underflow
+        const shl = try self.binOp(bin_op, shift_val, lhs_ty, .shl);
+        const shr = try self.binOp(shl, shift_val, lhs_ty, .shr);
+        break :blk try self.cmp(shr, bin_op, lhs_ty, .neq);
+    } else if (wasm_bits == int_info.bits)
+        try self.cmp(bin_op, lhs, lhs_ty, cmp_op)
+    else
+        try self.cmp(bin_op, result, lhs_ty, .neq);
 
-        // handle overflow
-        {
-            const diff = try self.binOp(int_max_wvalue, lhs, lhs_ty, .sub);
-            const cmp_res = try self.cmp(rhs, diff, lhs_ty, if (op == .add) .gt else .lt);
-            try self.emitWValue(cmp_res);
-            try self.addLabel(.local_set, overflow_bit.local);
-        }
-        try self.addLabel(.br, 1); // break from blocks, and continue regular flow.
-        try self.endBlock();
+    const result_ptr = try self.allocStack(self.air.typeOfIndex(inst));
+    try self.store(result_ptr, result, lhs_ty, 0);
+    const offset = @intCast(u32, lhs_ty.abiSize(self.target));
+    try self.store(result_ptr, overflow_bit, Type.initTag(.u1), offset);
 
-        // handle underflow
-        {
-            const diff = try self.binOp(int_min_wvalue, lhs, lhs_ty, .sub);
-            const cmp_res = try self.cmp(rhs, diff, lhs_ty, if (op == .add) .lt else .gt);
-            try self.emitWValue(cmp_res);
-            try self.addLabel(.local_set, overflow_bit.local);
-        }
-        try self.endBlock();
+    return result_ptr;
+}
+
+fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) InnerError!WValue {
+    const ty_pl = self.air.instructions.items(.data)[inst].ty_pl;
+    const extra = self.air.extraData(Air.Bin, ty_pl.payload).data;
+    const lhs = try self.resolveInst(extra.lhs);
+    const rhs = try self.resolveInst(extra.rhs);
+    const lhs_ty = self.air.typeOf(extra.lhs);
+
+    if (lhs_ty.zigTypeTag() == .Vector) {
+        return self.fail("TODO: Implement overflow arithmetic for vectors", .{});
     }
 
-    const bin_op = if (op == .shl) blk: {
-        const tmp_val = try self.binOp(lhs, rhs, lhs_ty, op);
-        const cmp_res = try self.cmp(tmp_val, int_max_wvalue, lhs_ty, .gt);
-        try self.emitWValue(cmp_res);
-        try self.addLabel(.local_set, overflow_bit.local);
+    const int_info = lhs_ty.intInfo(self.target);
+    const is_signed = int_info.signedness == .signed;
+    const wasm_bits = toWasmBits(int_info.bits) orelse {
+        return self.fail("TODO: Implement shl_with_overflow for integer bitsize: {d}", .{int_info.bits});
+    };
 
-        try self.emitWValue(tmp_val);
-        try self.emitWValue(int_max_wvalue);
-        switch (wasm_bits) {
-            32 => try self.addTag(.i32_and),
-            64 => try self.addTag(.i64_and),
+    const shl = try self.binOp(lhs, rhs, lhs_ty, .shl);
+    const result = if (wasm_bits != int_info.bits) blk: {
+        break :blk try self.wrapOperand(shl, lhs_ty);
+    } else shl;
+
+    const overflow_bit = if (wasm_bits != int_info.bits and is_signed) blk: {
+        const shift_amt = wasm_bits - int_info.bits;
+        const shift_val = switch (wasm_bits) {
+            32 => WValue{ .imm32 = shift_amt },
+            64 => WValue{ .imm64 = shift_amt },
             else => unreachable,
-        }
-        try self.addLabel(.local_set, tmp_val.local);
-        break :blk tmp_val;
-    } else try self.wrapBinOp(lhs, rhs, lhs_ty, op);
+        };
+
+        const secondary_shl = try self.binOp(shl, shift_val, lhs_ty, .shl);
+        const initial_shr = try self.binOp(secondary_shl, shift_val, lhs_ty, .shr);
+        const shr = try self.wrapBinOp(initial_shr, rhs, lhs_ty, .shr);
+        break :blk try self.cmp(lhs, shr, lhs_ty, .neq);
+    } else blk: {
+        const shr = try self.binOp(result, rhs, lhs_ty, .shr);
+        break :blk try self.cmp(lhs, shr, lhs_ty, .neq);
+    };
 
     const result_ptr = try self.allocStack(self.air.typeOfIndex(inst));
-    try self.store(result_ptr, bin_op, lhs_ty, 0);
+    try self.store(result_ptr, result, lhs_ty, 0);
     const offset = @intCast(u32, lhs_ty.abiSize(self.target));
     try self.store(result_ptr, overflow_bit, Type.initTag(.u1), offset);
 
diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig
index 518635b8060b..57100abc0f75 100644
--- a/src/arch/x86_64/Emit.zig
+++ b/src/arch/x86_64/Emit.zig
@@ -1896,7 +1896,7 @@ fn lowerToMrEnc(
     const opc = getOpCode(tag, .mr, reg.size() == 8 or reg_or_mem.size() == 8).?;
     switch (reg_or_mem) {
         .register => |dst_reg| {
-            const encoder = try Encoder.init(code, 3);
+            const encoder = try Encoder.init(code, 4);
             if (dst_reg.size() == 16) {
                 encoder.prefix16BitMode();
             }
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index 44b616c493dc..998271cd7f31 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -1766,10 +1766,10 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
 
             .mul_add => try airMulAdd(f, inst),
 
-            .add_with_overflow => try airAddWithOverflow(f, inst),
-            .sub_with_overflow => try airSubWithOverflow(f, inst),
-            .mul_with_overflow => try airMulWithOverflow(f, inst),
-            .shl_with_overflow => try airShlWithOverflow(f, inst),
+            .add_with_overflow => try airOverflow(f, inst, "addo_"),
+            .sub_with_overflow => try airOverflow(f, inst, "subo_"),
+            .mul_with_overflow => try airOverflow(f, inst, "mulo_"),
+            .shl_with_overflow => try airOverflow(f, inst, "shlo_"),
 
             .min => try airMinMax(f, inst, "<"),
             .max => try airMinMax(f, inst, ">"),
@@ -2295,7 +2295,8 @@ fn airWrapOp(
 
     const bin_op = f.air.instructions.items(.data)[inst].bin_op;
     const inst_ty = f.air.typeOfIndex(inst);
-    const int_info = inst_ty.intInfo(f.object.dg.module.getTarget());
+    const target = f.object.dg.module.getTarget();
+    const int_info = inst_ty.intInfo(target);
     const bits = int_info.bits;
 
     // if it's an unsigned int with non-arbitrary bit size then we can just add
@@ -2313,47 +2314,8 @@ fn airWrapOp(
         return f.fail("TODO: C backend: airWrapOp for large integers", .{});
     }
 
-    var min_buf: [80]u8 = undefined;
-    const min = switch (int_info.signedness) {
-        .unsigned => "0",
-        else => switch (inst_ty.tag()) {
-            .c_short => "SHRT_MIN",
-            .c_int => "INT_MIN",
-            .c_long => "LONG_MIN",
-            .c_longlong => "LLONG_MIN",
-            .isize => "INTPTR_MIN",
-            else => blk: {
-                const val = -1 * std.math.pow(i64, 2, @intCast(i64, bits - 1));
-                break :blk std.fmt.bufPrint(&min_buf, "{d}", .{val}) catch |err| switch (err) {
-                    error.NoSpaceLeft => unreachable,
-                };
-            },
-        },
-    };
-
     var max_buf: [80]u8 = undefined;
-    const max = switch (inst_ty.tag()) {
-        .c_short => "SHRT_MAX",
-        .c_ushort => "USHRT_MAX",
-        .c_int => "INT_MAX",
-        .c_uint => "UINT_MAX",
-        .c_long => "LONG_MAX",
-        .c_ulong => "ULONG_MAX",
-        .c_longlong => "LLONG_MAX",
-        .c_ulonglong => "ULLONG_MAX",
-        .isize => "INTPTR_MAX",
-        .usize => "UINTPTR_MAX",
-        else => blk: {
-            const pow_bits = switch (int_info.signedness) {
-                .signed => bits - 1,
-                .unsigned => bits,
-            };
-            const val = std.math.pow(u64, 2, pow_bits) - 1;
-            break :blk std.fmt.bufPrint(&max_buf, "{}", .{val}) catch |err| switch (err) {
-                error.NoSpaceLeft => unreachable,
-            };
-        },
-    };
+    const max = intMax(inst_ty, target, &max_buf);
 
     const lhs = try f.resolveInst(bin_op.lhs);
     const rhs = try f.resolveInst(bin_op.rhs);
@@ -2369,10 +2331,7 @@ fn airWrapOp(
         .c_long => try w.writeAll("long"),
         .c_longlong => try w.writeAll("longlong"),
         else => {
-            const prefix_byte: u8 = switch (int_info.signedness) {
-                .signed => 'i',
-                .unsigned => 'u',
-            };
+            const prefix_byte: u8 = signAbbrev(int_info.signedness);
             for ([_]u8{ 8, 16, 32, 64 }) |nbits| {
                 if (bits <= nbits) {
                     try w.print("{c}{d}", .{ prefix_byte, nbits });
@@ -2390,6 +2349,9 @@ fn airWrapOp(
     try f.writeCValue(w, rhs);
 
     if (int_info.signedness == .signed) {
+        var min_buf: [80]u8 = undefined;
+        const min = intMin(inst_ty, target, &min_buf);
+
         try w.print(", {s}", .{min});
     }
 
@@ -2475,10 +2437,7 @@ fn airSatOp(f: *Function, inst: Air.Inst.Index, fn_op: [*:0]const u8) !CValue {
         .c_long => try w.writeAll("long"),
         .c_longlong => try w.writeAll("longlong"),
         else => {
-            const prefix_byte: u8 = switch (int_info.signedness) {
-                .signed => 'i',
-                .unsigned => 'u',
-            };
+            const prefix_byte: u8 = signAbbrev(int_info.signedness);
             for ([_]u8{ 8, 16, 32, 64 }) |nbits| {
                 if (bits <= nbits) {
                     try w.print("{c}{d}", .{ prefix_byte, nbits });
@@ -2505,28 +2464,63 @@ fn airSatOp(f: *Function, inst: Air.Inst.Index, fn_op: [*:0]const u8) !CValue {
     return ret;
 }
 
-fn airAddWithOverflow(f: *Function, inst: Air.Inst.Index) !CValue {
-    _ = f;
-    _ = inst;
-    return f.fail("TODO add with overflow", .{});
-}
+fn airOverflow(f: *Function, inst: Air.Inst.Index, op_abbrev: [*:0]const u8) !CValue {
+    if (f.liveness.isUnused(inst))
+        return CValue.none;
 
-fn airSubWithOverflow(f: *Function, inst: Air.Inst.Index) !CValue {
-    _ = f;
-    _ = inst;
-    return f.fail("TODO sub with overflow", .{});
-}
+    const ty_pl = f.air.instructions.items(.data)[inst].ty_pl;
+    const bin_op = f.air.extraData(Air.Bin, ty_pl.payload).data;
 
-fn airMulWithOverflow(f: *Function, inst: Air.Inst.Index) !CValue {
-    _ = f;
-    _ = inst;
-    return f.fail("TODO mul with overflow", .{});
-}
+    const lhs = try f.resolveInst(bin_op.lhs);
+    const rhs = try f.resolveInst(bin_op.rhs);
 
-fn airShlWithOverflow(f: *Function, inst: Air.Inst.Index) !CValue {
-    _ = f;
-    _ = inst;
-    return f.fail("TODO shl with overflow", .{});
+    const inst_ty = f.air.typeOfIndex(inst);
+    const scalar_ty = f.air.typeOf(bin_op.lhs).scalarType();
+    const target = f.object.dg.module.getTarget();
+    const int_info = scalar_ty.intInfo(target);
+    const w = f.object.writer();
+    const c_bits = toCIntBits(int_info.bits) orelse
+        return f.fail("TODO: C backend: implement integer arithmetic larger than 128 bits", .{});
+
+    var max_buf: [80]u8 = undefined;
+    const max = intMax(scalar_ty, target, &max_buf);
+
+    const ret = try f.allocLocal(inst_ty, .Mut);
+    try w.writeAll(";");
+    try f.object.indent_writer.insertNewline();
+    try f.writeCValue(w, ret);
+
+    switch (int_info.signedness) {
+        .unsigned => {
+            try w.print(".field_1 = zig_{s}u{d}(", .{
+                op_abbrev, c_bits,
+            });
+            try f.writeCValue(w, lhs);
+            try w.writeAll(", ");
+            try f.writeCValue(w, rhs);
+            try w.writeAll(", &");
+            try f.writeCValue(w, ret);
+            try w.print(".field_0, {s}", .{max});
+        },
+        .signed => {
+            var min_buf: [80]u8 = undefined;
+            const min = intMin(scalar_ty, target, &min_buf);
+
+            try w.print(".field_1 = zig_{s}i{d}(", .{
+                op_abbrev, c_bits,
+            });
+            try f.writeCValue(w, lhs);
+            try w.writeAll(", ");
+            try f.writeCValue(w, rhs);
+            try w.writeAll(", &");
+            try f.writeCValue(w, ret);
+            try w.print(".field_0, {s}, {s}", .{ min, max });
+        },
+    }
+
+    try w.writeAll(");");
+    try f.object.indent_writer.insertNewline();
+    return ret;
 }
 
 fn airNot(f: *Function, inst: Air.Inst.Index) !CValue {
@@ -3571,11 +3565,7 @@ fn airBuiltinCall(f: *Function, inst: Air.Inst.Index, fn_name: [*:0]const u8) !C
         return f.fail("TODO: C backend: implement integer types larger than 128 bits", .{});
 
     try writer.print(" = zig_{s}_", .{fn_name});
-    const prefix_byte: u8 = switch (int_info.signedness) {
-        .signed => 'i',
-        .unsigned => 'u',
-    };
-    try writer.print("{c}{d}(", .{ prefix_byte, c_bits });
+    try writer.print("{c}{d}(", .{ signAbbrev(int_info.signedness), c_bits });
     try f.writeCValue(writer, try f.resolveInst(operand));
     try writer.print(", {d});\n", .{int_info.bits});
     return local;
@@ -3596,11 +3586,7 @@ fn airBinOpBuiltinCall(f: *Function, inst: Air.Inst.Index, fn_name: [*:0]const u
         const int_info = lhs_ty.intInfo(target);
         const c_bits = toCIntBits(int_info.bits) orelse
             return f.fail("TODO: C backend: implement integer types larger than 128 bits", .{});
-        const prefix_byte: u8 = switch (int_info.signedness) {
-            .signed => 'i',
-            .unsigned => 'u',
-        };
-        try writer.print(" = zig_{s}_{c}{d}", .{ fn_name, prefix_byte, c_bits });
+        try writer.print(" = zig_{s}_{c}{d}", .{ fn_name, signAbbrev(int_info.signedness), c_bits });
     } else if (lhs_ty.isRuntimeFloat()) {
         const c_bits = lhs_ty.floatBits(target);
         try writer.print(" = zig_{s}_f{d}", .{ fn_name, c_bits });
@@ -4085,3 +4071,53 @@ fn toCIntBits(zig_bits: u32) ?u32 {
     }
     return null;
 }
+
+fn signAbbrev(signedness: std.builtin.Signedness) u8 {
+    return switch (signedness) {
+        .signed => 'i',
+        .unsigned => 'u',
+    };
+}
+
+fn intMax(ty: Type, target: std.Target, buf: []u8) []const u8 {
+    switch (ty.tag()) {
+        .c_short => return "SHRT_MAX",
+        .c_ushort => return "USHRT_MAX",
+        .c_int => return "INT_MAX",
+        .c_uint => return "UINT_MAX",
+        .c_long => return "LONG_MAX",
+        .c_ulong => return "ULONG_MAX",
+        .c_longlong => return "LLONG_MAX",
+        .c_ulonglong => return "ULLONG_MAX",
+        else => {
+            const int_info = ty.intInfo(target);
+            const rhs = @intCast(u7, int_info.bits - @boolToInt(int_info.signedness == .signed));
+            const val = (@as(u128, 1) << rhs) - 1;
+            // TODO make this integer literal have a suffix if necessary (such as "ull")
+            return std.fmt.bufPrint(buf, "{}", .{val}) catch |err| switch (err) {
+                error.NoSpaceLeft => unreachable,
+            };
+        },
+    }
+}
+
+fn intMin(ty: Type, target: std.Target, buf: []u8) []const u8 {
+    switch (ty.tag()) {
+        .c_short => return "SHRT_MIN",
+        .c_int => return "INT_MIN",
+        .c_long => return "LONG_MIN",
+        .c_longlong => return "LLONG_MIN",
+        else => {
+            const int_info = ty.intInfo(target);
+            assert(int_info.signedness == .signed);
+            const val = v: {
+                if (int_info.bits == 0) break :v 0;
+                const rhs = @intCast(u7, (int_info.bits - 1));
+                break :v -(@as(i128, 1) << rhs);
+            };
+            return std.fmt.bufPrint(buf, "{d}", .{val}) catch |err| switch (err) {
+                error.NoSpaceLeft => unreachable,
+            };
+        },
+    }
+}
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 5a8516b363f9..7392b2068b7c 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -5604,14 +5604,25 @@ pub const FuncGen = struct {
         const rhs = try self.resolveInst(extra.rhs);
 
         const lhs_ty = self.air.typeOf(extra.lhs);
+        const scalar_ty = lhs_ty.scalarType();
+        const dest_ty = self.air.typeOfIndex(inst);
 
-        const intrinsic_name = if (lhs_ty.isSignedInt()) signed_intrinsic else unsigned_intrinsic;
+        const intrinsic_name = if (scalar_ty.isSignedInt()) signed_intrinsic else unsigned_intrinsic;
 
         const llvm_lhs_ty = try self.dg.llvmType(lhs_ty);
+        const llvm_dest_ty = try self.dg.llvmType(dest_ty);
+
+        const tg = self.dg.module.getTarget();
 
         const llvm_fn = self.getIntrinsic(intrinsic_name, &.{llvm_lhs_ty});
         const result_struct = self.builder.buildCall(llvm_fn, &[_]*const llvm.Value{ lhs, rhs }, 2, .Fast, .Auto, "");
-        return result_struct;
+
+        const result = self.builder.buildExtractValue(result_struct, 0, "");
+        const overflow_bit = self.builder.buildExtractValue(result_struct, 1, "");
+
+        var ty_buf: Type.Payload.Pointer = undefined;
+        const partial = self.builder.buildInsertValue(llvm_dest_ty.getUndef(), result, llvmFieldIndex(dest_ty, 0, tg, &ty_buf).?, "");
+        return self.builder.buildInsertValue(partial, overflow_bit, llvmFieldIndex(dest_ty, 1, tg, &ty_buf).?, "");
     }
 
     fn buildElementwiseCall(
@@ -5898,26 +5909,30 @@ pub const FuncGen = struct {
 
         const lhs_ty = self.air.typeOf(extra.lhs);
         const rhs_ty = self.air.typeOf(extra.rhs);
+        const lhs_scalar_ty = lhs_ty.scalarType();
+        const rhs_scalar_ty = rhs_ty.scalarType();
+
         const dest_ty = self.air.typeOfIndex(inst);
         const llvm_dest_ty = try self.dg.llvmType(dest_ty);
 
         const tg = self.dg.module.getTarget();
 
-        const casted_rhs = if (rhs_ty.bitSize(tg) < lhs_ty.bitSize(tg))
+        const casted_rhs = if (rhs_scalar_ty.bitSize(tg) < lhs_scalar_ty.bitSize(tg))
             self.builder.buildZExt(rhs, try self.dg.llvmType(lhs_ty), "")
         else
             rhs;
 
         const result = self.builder.buildShl(lhs, casted_rhs, "");
-        const reconstructed = if (lhs_ty.isSignedInt())
+        const reconstructed = if (lhs_scalar_ty.isSignedInt())
             self.builder.buildAShr(result, casted_rhs, "")
         else
             self.builder.buildLShr(result, casted_rhs, "");
 
         const overflow_bit = self.builder.buildICmp(.NE, lhs, reconstructed, "");
 
-        const partial = self.builder.buildInsertValue(llvm_dest_ty.getUndef(), result, 0, "");
-        return self.builder.buildInsertValue(partial, overflow_bit, 1, "");
+        var ty_buf: Type.Payload.Pointer = undefined;
+        const partial = self.builder.buildInsertValue(llvm_dest_ty.getUndef(), result, llvmFieldIndex(dest_ty, 0, tg, &ty_buf).?, "");
+        return self.builder.buildInsertValue(partial, overflow_bit, llvmFieldIndex(dest_ty, 1, tg, &ty_buf).?, "");
     }
 
     fn airAnd(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
diff --git a/src/link/C/zig.h b/src/link/C/zig.h
index 85c7856d2bde..43d9913039c2 100644
--- a/src/link/C/zig.h
+++ b/src/link/C/zig.h
@@ -165,8 +165,24 @@
 
 #define int128_t __int128
 #define uint128_t unsigned __int128
+#define UINT128_MAX ((uint128_t)(0xffffffffffffffffull) | 0xffffffffffffffffull)
 ZIG_EXTERN_C void *memcpy (void *ZIG_RESTRICT, const void *ZIG_RESTRICT, size_t);
 ZIG_EXTERN_C void *memset (void *, int, size_t);
+ZIG_EXTERN_C int64_t    __addodi4(int64_t   lhs, int64_t   rhs, int *overflow);
+ZIG_EXTERN_C int128_t   __addoti4(int128_t  lhs, int128_t  rhs, int *overflow);
+ZIG_EXTERN_C uint64_t  __uaddodi4(uint64_t  lhs, uint64_t  rhs, int *overflow);
+ZIG_EXTERN_C uint128_t __uaddoti4(uint128_t lhs, uint128_t rhs, int *overflow);
+ZIG_EXTERN_C int32_t    __subosi4(int32_t   lhs, int32_t   rhs, int *overflow);
+ZIG_EXTERN_C int64_t    __subodi4(int64_t   lhs, int64_t   rhs, int *overflow);
+ZIG_EXTERN_C int128_t   __suboti4(int128_t  lhs, int128_t  rhs, int *overflow);
+ZIG_EXTERN_C uint32_t  __usubosi4(uint32_t  lhs, uint32_t  rhs, int *overflow);
+ZIG_EXTERN_C uint64_t  __usubodi4(uint64_t  lhs, uint64_t  rhs, int *overflow);
+ZIG_EXTERN_C uint128_t __usuboti4(uint128_t lhs, uint128_t rhs, int *overflow);
+ZIG_EXTERN_C int64_t    __mulodi4(int64_t   lhs, int64_t   rhs, int *overflow);
+ZIG_EXTERN_C int128_t   __muloti4(int128_t  lhs, int128_t  rhs, int *overflow);
+ZIG_EXTERN_C uint64_t  __umulodi4(uint64_t  lhs, uint64_t  rhs, int *overflow);
+ZIG_EXTERN_C uint128_t __umuloti4(uint128_t lhs, uint128_t rhs, int *overflow);
+
 
 static inline uint8_t zig_addw_u8(uint8_t lhs, uint8_t rhs, uint8_t max) {
     uint8_t thresh = max - rhs;
@@ -396,6 +412,689 @@ static inline long long zig_subw_longlong(long long lhs, long long rhs, long lon
     return (long long)(((unsigned long long)lhs) - ((unsigned long long)rhs));
 }
 
+static inline bool zig_addo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min, int8_t max) {
+#if defined(__GNUC__) && INT8_MAX == INT_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_sadd_overflow(lhs, rhs, (int*)res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_saddl_overflow(lhs, rhs, (long*)res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LLONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_saddll_overflow(lhs, rhs, (long long*)res);
+    }
+#endif
+    int16_t big_result = (int16_t)lhs + (int16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_addo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t min, int16_t max) {
+#if defined(__GNUC__) && INT16_MAX == INT_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_sadd_overflow(lhs, rhs, (int*)res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_saddl_overflow(lhs, rhs, (long*)res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LLONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_saddll_overflow(lhs, rhs, (long long*)res);
+    }
+#endif
+    int32_t big_result = (int32_t)lhs + (int32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_addo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t min, int32_t max) {
+#if defined(__GNUC__) && INT32_MAX == INT_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_sadd_overflow(lhs, rhs, (int*)res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_saddl_overflow(lhs, rhs, (long*)res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LLONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_saddll_overflow(lhs, rhs, (long long*)res);
+    }
+#endif
+    int64_t big_result = (int64_t)lhs + (int64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_addo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t min, int64_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT64_MAX == INT_MAX
+    overflow = __builtin_sadd_overflow(lhs, rhs, (int*)res);
+#elif defined(__GNUC__) && INT64_MAX == LONG_MAX
+    overflow = __builtin_saddl_overflow(lhs, rhs, (long*)res);
+#elif defined(__GNUC__) && INT64_MAX == LLONG_MAX
+    overflow = __builtin_saddll_overflow(lhs, rhs, (long long*)res);
+#else
+    int int_overflow;
+    *res = __addodi4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_addo_i128(int128_t lhs, int128_t rhs, int128_t *res, int128_t min, int128_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT128_MAX == INT_MAX
+    overflow = __builtin_sadd_overflow(lhs, rhs, (int*)res);
+#elif defined(__GNUC__) && INT128_MAX == LONG_MAX
+    overflow = __builtin_saddl_overflow(lhs, rhs, (long*)res);
+#elif defined(__GNUC__) && INT128_MAX == LLONG_MAX
+    overflow = __builtin_saddll_overflow(lhs, rhs, (long long*)res);
+#else
+    int int_overflow;
+    *res = __addoti4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_addo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t max) {
+#if defined(__GNUC__) && UINT8_MAX == UINT_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_uadd_overflow(lhs, rhs, (unsigned int*)res);
+    }
+#elif defined(__GNUC__) && UINT8_MAX == ULONG_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_uaddl_overflow(lhs, rhs, (unsigned long*)res);
+    }
+#elif defined(__GNUC__) && UINT8_MAX == ULLONG_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_uaddll_overflow(lhs, rhs, (unsigned long long*)res);
+    }
+#endif
+    uint16_t big_result = (uint16_t)lhs + (uint16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint16_t zig_addo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint16_t max) {
+#if defined(__GNUC__) && UINT16_MAX == UINT_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_uadd_overflow(lhs, rhs, (unsigned int*)res);
+    }
+#elif defined(__GNUC__) && UINT16_MAX == ULONG_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_uaddl_overflow(lhs, rhs, (unsigned long*)res);
+    }
+#elif defined(__GNUC__) && UINT16_MAX == ULLONG_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_uaddll_overflow(lhs, rhs, (unsigned long long*)res);
+    }
+#endif
+    uint32_t big_result = (uint32_t)lhs + (uint32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint32_t zig_addo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint32_t max) {
+#if defined(__GNUC__) && UINT32_MAX == UINT_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_uadd_overflow(lhs, rhs, (unsigned int*)res);
+    }
+#elif defined(__GNUC__) && UINT32_MAX == ULONG_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_uaddl_overflow(lhs, rhs, (unsigned long*)res);
+    }
+#elif defined(__GNUC__) && UINT32_MAX == ULLONG_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_uaddll_overflow(lhs, rhs, (unsigned long long*)res);
+    }
+#endif
+    uint64_t big_result = (uint64_t)lhs + (uint64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint64_t zig_addo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint64_t max) {
+    bool overflow;
+#if defined(__GNUC__) && UINT64_MAX == UINT_MAX
+    overflow = __builtin_uadd_overflow(lhs, rhs, (unsigned int*)res);
+#elif defined(__GNUC__) && UINT64_MAX == ULONG_MAX
+    overflow = __builtin_uaddl_overflow(lhs, rhs, (unsigned long*)res);
+#elif defined(__GNUC__) && UINT64_MAX == ULLONG_MAX
+    overflow = __builtin_uaddll_overflow(lhs, rhs, (unsigned long long*)res);
+#else
+    int int_overflow;
+    *res = __uaddodi4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (*res > max && !overflow) {
+        *res -= max - 1;
+        return true;
+    }
+    return overflow;
+}
+
+static inline uint128_t zig_addo_u128(uint128_t lhs, uint128_t rhs, uint128_t *res, uint128_t max) {
+    int overflow;
+    *res = __uaddoti4(lhs, rhs, &overflow);
+    if (*res > max && overflow == 0) {
+        *res -= max - 1;
+        return true;
+    }
+    return overflow != 0;
+}
+
+static inline bool zig_subo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min, int8_t max) {
+#if defined(__GNUC__) && INT8_MAX == INT_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_ssub_overflow(lhs, rhs, (int*)res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_ssubl_overflow(lhs, rhs, (long*)res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LLONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_ssubll_overflow(lhs, rhs, (long long*)res);
+    }
+#endif
+    int16_t big_result = (int16_t)lhs - (int16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_subo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t min, int16_t max) {
+#if defined(__GNUC__) && INT16_MAX == INT_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_ssub_overflow(lhs, rhs, (int*)res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_ssubl_overflow(lhs, rhs, (long*)res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LLONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_ssubll_overflow(lhs, rhs, (long long*)res);
+    }
+#endif
+    int32_t big_result = (int32_t)lhs - (int32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_subo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t min, int32_t max) {
+#if defined(__GNUC__) && INT32_MAX == INT_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_ssub_overflow(lhs, rhs, (int*)res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_ssubl_overflow(lhs, rhs, (long*)res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LLONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_ssubll_overflow(lhs, rhs, (long long*)res);
+    }
+#endif
+    int64_t big_result = (int64_t)lhs - (int64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_subo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t min, int64_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT64_MAX == INT_MAX
+    overflow = __builtin_ssub_overflow(lhs, rhs, (int*)res);
+#elif defined(__GNUC__) && INT64_MAX == LONG_MAX
+    overflow = __builtin_ssubl_overflow(lhs, rhs, (long*)res);
+#elif defined(__GNUC__) && INT64_MAX == LLONG_MAX
+    overflow = __builtin_ssubll_overflow(lhs, rhs, (long long*)res);
+#else
+    int int_overflow;
+    *res = __subodi4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_subo_i128(int128_t lhs, int128_t rhs, int128_t *res, int128_t min, int128_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT128_MAX == INT_MAX
+    overflow = __builtin_ssub_overflow(lhs, rhs, (int*)res);
+#elif defined(__GNUC__) && INT128_MAX == LONG_MAX
+    overflow = __builtin_ssubl_overflow(lhs, rhs, (long*)res);
+#elif defined(__GNUC__) && INT128_MAX == LLONG_MAX
+    overflow = __builtin_ssubll_overflow(lhs, rhs, (long long*)res);
+#else
+    int int_overflow;
+    *res = __suboti4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_subo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t max) {
+#if defined(__GNUC__) && UINT8_MAX == UINT_MAX
+    return __builtin_usub_overflow(lhs, rhs, (unsigned int*)res);
+#elif defined(__GNUC__) && UINT8_MAX == ULONG_MAX
+    return __builtin_usubl_overflow(lhs, rhs, (unsigned long*)res);
+#elif defined(__GNUC__) && UINT8_MAX == ULLONG_MAX
+    return __builtin_usubll_overflow(lhs, rhs, (unsigned long long*)res);
+#endif
+    if (rhs > lhs) {
+        *res = max - (rhs - lhs - 1);
+        return true;
+    }
+    *res = lhs - rhs;
+    return false;
+}
+
+static inline uint16_t zig_subo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint16_t max) {
+#if defined(__GNUC__) && UINT16_MAX == UINT_MAX
+    return __builtin_usub_overflow(lhs, rhs, (unsigned int*)res);
+#elif defined(__GNUC__) && UINT16_MAX == ULONG_MAX
+    return __builtin_usubl_overflow(lhs, rhs, (unsigned long*)res);
+#elif defined(__GNUC__) && UINT16_MAX == ULLONG_MAX
+    return __builtin_usubll_overflow(lhs, rhs, (unsigned long long*)res);
+#endif
+    if (rhs > lhs) {
+        *res = max - (rhs - lhs - 1);
+        return true;
+    }
+    *res = lhs - rhs;
+    return false;
+}
+
+static inline uint32_t zig_subo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint32_t max) {
+    if (max == UINT32_MAX) {
+#if defined(__GNUC__) && UINT32_MAX == UINT_MAX
+        return __builtin_usub_overflow(lhs, rhs, (unsigned int*)res);
+#elif defined(__GNUC__) && UINT32_MAX == ULONG_MAX
+        return __builtin_usubl_overflow(lhs, rhs, (unsigned long*)res);
+#elif defined(__GNUC__) && UINT32_MAX == ULLONG_MAX
+        return __builtin_usubll_overflow(lhs, rhs, (unsigned long long*)res);
+#endif
+        int int_overflow;
+        *res = __usubosi4(lhs, rhs, &int_overflow);
+        return int_overflow != 0;
+    } else {
+        if (rhs > lhs) {
+            *res = max - (rhs - lhs - 1);
+            return true;
+        }
+        *res = lhs - rhs;
+        return false;
+    }
+}
+
+static inline uint64_t zig_subo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint64_t max) {
+    if (max == UINT64_MAX) {
+#if defined(__GNUC__) && UINT64_MAX == UINT_MAX
+        return __builtin_usub_overflow(lhs, rhs, (unsigned int*)res);
+#elif defined(__GNUC__) && UINT64_MAX == ULONG_MAX
+        return __builtin_usubl_overflow(lhs, rhs, (unsigned long*)res);
+#elif defined(__GNUC__) && UINT64_MAX == ULLONG_MAX
+        return __builtin_usubll_overflow(lhs, rhs, (unsigned long long*)res);
+#else
+        int int_overflow;
+        *res = __usubodi4(lhs, rhs, &int_overflow);
+        return int_overflow != 0;
+#endif
+    } else {
+        if (rhs > lhs) {
+            *res = max - (rhs - lhs - 1);
+            return true;
+        }
+        *res = lhs - rhs;
+        return false;
+    }
+}
+
+static inline uint128_t zig_subo_u128(uint128_t lhs, uint128_t rhs, uint128_t *res, uint128_t max) {
+    if (max == UINT128_MAX) {
+        int int_overflow;
+        *res = __usuboti4(lhs, rhs, &int_overflow);
+        return int_overflow != 0;
+    } else {
+        if (rhs > lhs) {
+            *res = max - (rhs - lhs - 1);
+            return true;
+        }
+        *res = lhs - rhs;
+        return false;
+    }
+}
+
+static inline bool zig_mulo_i8(int8_t lhs, int8_t rhs, int8_t *res, int8_t min, int8_t max) {
+#if defined(__GNUC__) && INT8_MAX == INT_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_smul_overflow(lhs, rhs, (int*)res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_smull_overflow(lhs, rhs, (long*)res);
+    }
+#elif defined(__GNUC__) && INT8_MAX == LLONG_MAX
+    if (min == INT8_MIN && max == INT8_MAX) {
+        return __builtin_smulll_overflow(lhs, rhs, (long long*)res);
+    }
+#endif
+    int16_t big_result = (int16_t)lhs * (int16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int16_t)max - (int16_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_mulo_i16(int16_t lhs, int16_t rhs, int16_t *res, int16_t min, int16_t max) {
+#if defined(__GNUC__) && INT16_MAX == INT_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_smul_overflow(lhs, rhs, (int*)res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_smull_overflow(lhs, rhs, (long*)res);
+    }
+#elif defined(__GNUC__) && INT16_MAX == LLONG_MAX
+    if (min == INT16_MIN && max == INT16_MAX) {
+        return __builtin_smulll_overflow(lhs, rhs, (long long*)res);
+    }
+#endif
+    int32_t big_result = (int32_t)lhs * (int32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int32_t)max - (int32_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_mulo_i32(int32_t lhs, int32_t rhs, int32_t *res, int32_t min, int32_t max) {
+#if defined(__GNUC__) && INT32_MAX == INT_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_smul_overflow(lhs, rhs, (int*)res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_smull_overflow(lhs, rhs, (long*)res);
+    }
+#elif defined(__GNUC__) && INT32_MAX == LLONG_MAX
+    if (min == INT32_MIN && max == INT32_MAX) {
+        return __builtin_smulll_overflow(lhs, rhs, (long long*)res);
+    }
+#endif
+    int64_t big_result = (int64_t)lhs * (int64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    if (big_result < min) {
+        *res = big_result + ((int64_t)max - (int64_t)min);
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline bool zig_mulo_i64(int64_t lhs, int64_t rhs, int64_t *res, int64_t min, int64_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT64_MAX == INT_MAX
+    overflow = __builtin_smul_overflow(lhs, rhs, (int*)res);
+#elif defined(__GNUC__) && INT64_MAX == LONG_MAX
+    overflow = __builtin_smull_overflow(lhs, rhs, (long*)res);
+#elif defined(__GNUC__) && INT64_MAX == LLONG_MAX
+    overflow = __builtin_smulll_overflow(lhs, rhs, (long long*)res);
+#else
+    int int_overflow;
+    *res = __mulodi4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_mulo_i128(int128_t lhs, int128_t rhs, int128_t *res, int128_t min, int128_t max) {
+    bool overflow;
+#if defined(__GNUC__) && INT128_MAX == INT_MAX
+    overflow = __builtin_smul_overflow(lhs, rhs, (int*)res);
+#elif defined(__GNUC__) && INT128_MAX == LONG_MAX
+    overflow = __builtin_smull_overflow(lhs, rhs, (long*)res);
+#elif defined(__GNUC__) && INT128_MAX == LLONG_MAX
+    overflow = __builtin_smulll_overflow(lhs, rhs, (long long*)res);
+#else
+    int int_overflow;
+    *res = __muloti4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (!overflow) {
+        if (*res > max) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        } else if (*res < min) {
+            // TODO adjust the result to be the truncated bits
+            return true;
+        }
+    }
+    return overflow;
+}
+
+static inline bool zig_mulo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t max) {
+#if defined(__GNUC__) && UINT8_MAX == UINT_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_umul_overflow(lhs, rhs, (unsigned int*)res);
+    }
+#elif defined(__GNUC__) && UINT8_MAX == ULONG_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_umull_overflow(lhs, rhs, (unsigned long*)res);
+    }
+#elif defined(__GNUC__) && UINT8_MAX == ULLONG_MAX
+    if (max == UINT8_MAX) {
+        return __builtin_umulll_overflow(lhs, rhs, (unsigned long long*)res);
+    }
+#endif
+    uint16_t big_result = (uint16_t)lhs * (uint16_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint16_t zig_mulo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint16_t max) {
+#if defined(__GNUC__) && UINT16_MAX == UINT_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_umul_overflow(lhs, rhs, (unsigned int*)res);
+    }
+#elif defined(__GNUC__) && UINT16_MAX == ULONG_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_umull_overflow(lhs, rhs, (unsigned long*)res);
+    }
+#elif defined(__GNUC__) && UINT16_MAX == ULLONG_MAX
+    if (max == UINT16_MAX) {
+        return __builtin_umulll_overflow(lhs, rhs, (unsigned long long*)res);
+    }
+#endif
+    uint32_t big_result = (uint32_t)lhs * (uint32_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint32_t zig_mulo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint32_t max) {
+#if defined(__GNUC__) && UINT32_MAX == UINT_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_umul_overflow(lhs, rhs, (unsigned int*)res);
+    }
+#elif defined(__GNUC__) && UINT32_MAX == ULONG_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_umull_overflow(lhs, rhs, (unsigned long*)res);
+    }
+#elif defined(__GNUC__) && UINT32_MAX == ULLONG_MAX
+    if (max == UINT32_MAX) {
+        return __builtin_umulll_overflow(lhs, rhs, (unsigned long long*)res);
+    }
+#endif
+    uint64_t big_result = (uint64_t)lhs * (uint64_t)rhs;
+    if (big_result > max) {
+        *res = big_result - max - 1;
+        return true;
+    }
+    *res = big_result;
+    return false;
+}
+
+static inline uint64_t zig_mulo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint64_t max) {
+    bool overflow;
+#if defined(__GNUC__) && UINT64_MAX == UINT_MAX
+    overflow = __builtin_umul_overflow(lhs, rhs, (unsigned int*)res);
+#elif defined(__GNUC__) && UINT64_MAX == ULONG_MAX
+    overflow = __builtin_umull_overflow(lhs, rhs, (unsigned long*)res);
+#elif defined(__GNUC__) && UINT64_MAX == ULLONG_MAX
+    overflow = __builtin_umulll_overflow(lhs, rhs, (unsigned long long*)res);
+#else
+    int int_overflow;
+    *res = __umulodi4(lhs, rhs, &int_overflow);
+    overflow = int_overflow != 0;
+#endif
+    if (*res > max && !overflow) {
+        *res -= max - 1;
+        return true;
+    }
+    return overflow;
+}
+
+static inline uint128_t zig_mulo_u128(uint128_t lhs, uint128_t rhs, uint128_t *res, uint128_t max) {
+    int overflow;
+    *res = __umuloti4(lhs, rhs, &overflow);
+    if (*res > max && overflow == 0) {
+        *res -= max - 1;
+        return true;
+    }
+    return overflow != 0;
+}
+
 static inline float zig_bitcast_f32_u32(uint32_t arg) {
     float dest;
     memcpy(&dest, &arg, sizeof dest);
@@ -608,6 +1307,76 @@ static inline int zig_popcount_u128(uint128_t value, uint8_t zig_type_bit_width)
 
 #define zig_popcount_i128 zig_popcount_u128
 
+static inline bool zig_shlo_i8(int8_t lhs, int8_t rhs, int8_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_i8(lhs, bits) >= rhs) return false;
+    *res &= UINT8_MAX >> (8 - bits);
+    return true;
+}
+
+static inline bool zig_shlo_i16(int16_t lhs, int16_t rhs, int16_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_i16(lhs, bits) >= rhs) return false;
+    *res &= UINT16_MAX >> (16 - bits);
+    return true;
+}
+
+static inline bool zig_shlo_i32(int32_t lhs, int32_t rhs, int32_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_i32(lhs, bits) >= rhs) return false;
+    *res &= UINT32_MAX >> (32 - bits);
+    return true;
+}
+
+static inline bool zig_shlo_i64(int64_t lhs, int64_t rhs, int64_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_i64(lhs, bits) >= rhs) return false;
+    *res &= UINT64_MAX >> (64 - bits);
+    return true;
+}
+
+static inline bool zig_shlo_i128(int128_t lhs, int128_t rhs, int128_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_i128(lhs, bits) >= rhs) return false;
+    *res &= UINT128_MAX >> (128 - bits);
+    return true;
+}
+
+static inline bool zig_shlo_u8(uint8_t lhs, uint8_t rhs, uint8_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_u8(lhs, bits) >= rhs) return false;
+    *res &= UINT8_MAX >> (8 - bits);
+    return true;
+}
+
+static inline uint16_t zig_shlo_u16(uint16_t lhs, uint16_t rhs, uint16_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_u16(lhs, bits) >= rhs) return false;
+    *res &= UINT16_MAX >> (16 - bits);
+    return true;
+}
+
+static inline uint32_t zig_shlo_u32(uint32_t lhs, uint32_t rhs, uint32_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_u32(lhs, bits) >= rhs) return false;
+    *res &= UINT32_MAX >> (32 - bits);
+    return true;
+}
+
+static inline uint64_t zig_shlo_u64(uint64_t lhs, uint64_t rhs, uint64_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_u64(lhs, bits) >= rhs) return false;
+    *res &= UINT64_MAX >> (64 - bits);
+    return true;
+}
+
+static inline uint128_t zig_shlo_u128(uint128_t lhs, uint128_t rhs, uint128_t *res, uint8_t bits) {
+    *res = lhs << rhs;
+    if (zig_clz_u128(lhs, bits) >= rhs) return false;
+    *res &= UINT128_MAX >> (128 - bits);
+    return true;
+}
+
 #define zig_sign_extend(T) \
     static inline T zig_sign_extend_##T(T value, uint8_t zig_type_bit_width) { \
         const T m = (T)1 << (T)(zig_type_bit_width - 1); \
diff --git a/src/type.zig b/src/type.zig
index 2c3ce0d90008..54b7d44a3d6e 100644
--- a/src/type.zig
+++ b/src/type.zig
@@ -5999,6 +5999,7 @@ pub const Type = extern union {
         };
     };
 
+    pub const @"u1" = initTag(.u1);
     pub const @"u8" = initTag(.u8);
     pub const @"u16" = initTag(.u16);
     pub const @"u32" = initTag(.u32);
diff --git a/src/value.zig b/src/value.zig
index 1e70ad0c5448..b7764327c099 100644
--- a/src/value.zig
+++ b/src/value.zig
@@ -1671,6 +1671,7 @@ pub const Value = extern union {
     }
 
     /// Asserts the value is an integer, and the destination type is ComptimeInt or Int.
+    /// Vectors are also accepted. Vector results are reduced with AND.
     pub fn intFitsInType(self: Value, ty: Type, target: Target) bool {
         switch (self.tag()) {
             .zero,
@@ -1767,6 +1768,16 @@ pub const Value = extern union {
                 else => unreachable,
             },
 
+            .aggregate => {
+                assert(ty.zigTypeTag() == .Vector);
+                for (self.castTag(.aggregate).?.data) |elem| {
+                    if (!elem.intFitsInType(ty.scalarType(), target)) {
+                        return false;
+                    }
+                }
+                return true;
+            },
+
             else => unreachable,
         }
     }
@@ -2015,7 +2026,7 @@ pub const Value = extern union {
         const result_data = try allocator.alloc(Value, ty.vectorLen());
         for (result_data) |*scalar, i| {
             const res_bool = compareScalar(lhs.indexVectorlike(i), op, rhs.indexVectorlike(i), ty.scalarType(), mod);
-            scalar.* = if (res_bool) Value.@"true" else Value.@"false";
+            scalar.* = makeBool(res_bool);
         }
         return Value.Tag.aggregate.create(allocator, result_data);
     }
@@ -2950,7 +2961,8 @@ pub const Value = extern union {
     }
 
     pub const OverflowArithmeticResult = struct {
-        overflowed: bool,
+        /// TODO: Rename to `overflow_bit` and make of type `u1`.
+        overflowed: Value,
         wrapped_result: Value,
     };
 
@@ -2960,6 +2972,29 @@ pub const Value = extern union {
         ty: Type,
         arena: Allocator,
         target: Target,
+    ) !OverflowArithmeticResult {
+        if (ty.zigTypeTag() == .Vector) {
+            const overflowed_data = try arena.alloc(Value, ty.vectorLen());
+            const result_data = try arena.alloc(Value, ty.vectorLen());
+            for (result_data) |*scalar, i| {
+                const of_math_result = try intAddWithOverflowScalar(lhs.indexVectorlike(i), rhs.indexVectorlike(i), ty.scalarType(), arena, target);
+                overflowed_data[i] = of_math_result.overflowed;
+                scalar.* = of_math_result.wrapped_result;
+            }
+            return OverflowArithmeticResult{
+                .overflowed = try Value.Tag.aggregate.create(arena, overflowed_data),
+                .wrapped_result = try Value.Tag.aggregate.create(arena, result_data),
+            };
+        }
+        return intAddWithOverflowScalar(lhs, rhs, ty, arena, target);
+    }
+
+    pub fn intAddWithOverflowScalar(
+        lhs: Value,
+        rhs: Value,
+        ty: Type,
+        arena: Allocator,
+        target: Target,
     ) !OverflowArithmeticResult {
         const info = ty.intInfo(target);
 
@@ -2975,7 +3010,7 @@ pub const Value = extern union {
         const overflowed = result_bigint.addWrap(lhs_bigint, rhs_bigint, info.signedness, info.bits);
         const result = try fromBigInt(arena, result_bigint.toConst());
         return OverflowArithmeticResult{
-            .overflowed = overflowed,
+            .overflowed = makeBool(overflowed),
             .wrapped_result = result,
         };
     }
@@ -3086,6 +3121,29 @@ pub const Value = extern union {
         ty: Type,
         arena: Allocator,
         target: Target,
+    ) !OverflowArithmeticResult {
+        if (ty.zigTypeTag() == .Vector) {
+            const overflowed_data = try arena.alloc(Value, ty.vectorLen());
+            const result_data = try arena.alloc(Value, ty.vectorLen());
+            for (result_data) |*scalar, i| {
+                const of_math_result = try intSubWithOverflowScalar(lhs.indexVectorlike(i), rhs.indexVectorlike(i), ty.scalarType(), arena, target);
+                overflowed_data[i] = of_math_result.overflowed;
+                scalar.* = of_math_result.wrapped_result;
+            }
+            return OverflowArithmeticResult{
+                .overflowed = try Value.Tag.aggregate.create(arena, overflowed_data),
+                .wrapped_result = try Value.Tag.aggregate.create(arena, result_data),
+            };
+        }
+        return intSubWithOverflowScalar(lhs, rhs, ty, arena, target);
+    }
+
+    pub fn intSubWithOverflowScalar(
+        lhs: Value,
+        rhs: Value,
+        ty: Type,
+        arena: Allocator,
+        target: Target,
     ) !OverflowArithmeticResult {
         const info = ty.intInfo(target);
 
@@ -3101,7 +3159,7 @@ pub const Value = extern union {
         const overflowed = result_bigint.subWrap(lhs_bigint, rhs_bigint, info.signedness, info.bits);
         const wrapped_result = try fromBigInt(arena, result_bigint.toConst());
         return OverflowArithmeticResult{
-            .overflowed = overflowed,
+            .overflowed = makeBool(overflowed),
             .wrapped_result = wrapped_result,
         };
     }
@@ -3196,6 +3254,29 @@ pub const Value = extern union {
         ty: Type,
         arena: Allocator,
         target: Target,
+    ) !OverflowArithmeticResult {
+        if (ty.zigTypeTag() == .Vector) {
+            const overflowed_data = try arena.alloc(Value, ty.vectorLen());
+            const result_data = try arena.alloc(Value, ty.vectorLen());
+            for (result_data) |*scalar, i| {
+                const of_math_result = try intMulWithOverflowScalar(lhs.indexVectorlike(i), rhs.indexVectorlike(i), ty.scalarType(), arena, target);
+                overflowed_data[i] = of_math_result.overflowed;
+                scalar.* = of_math_result.wrapped_result;
+            }
+            return OverflowArithmeticResult{
+                .overflowed = try Value.Tag.aggregate.create(arena, overflowed_data),
+                .wrapped_result = try Value.Tag.aggregate.create(arena, result_data),
+            };
+        }
+        return intMulWithOverflowScalar(lhs, rhs, ty, arena, target);
+    }
+
+    pub fn intMulWithOverflowScalar(
+        lhs: Value,
+        rhs: Value,
+        ty: Type,
+        arena: Allocator,
+        target: Target,
     ) !OverflowArithmeticResult {
         const info = ty.intInfo(target);
 
@@ -3220,7 +3301,7 @@ pub const Value = extern union {
         }
 
         return OverflowArithmeticResult{
-            .overflowed = overflowed,
+            .overflowed = makeBool(overflowed),
             .wrapped_result = try fromBigInt(arena, result_bigint.toConst()),
         };
     }
@@ -3910,6 +3991,29 @@ pub const Value = extern union {
         ty: Type,
         allocator: Allocator,
         target: Target,
+    ) !OverflowArithmeticResult {
+        if (ty.zigTypeTag() == .Vector) {
+            const overflowed_data = try allocator.alloc(Value, ty.vectorLen());
+            const result_data = try allocator.alloc(Value, ty.vectorLen());
+            for (result_data) |*scalar, i| {
+                const of_math_result = try shlWithOverflowScalar(lhs.indexVectorlike(i), rhs.indexVectorlike(i), ty.scalarType(), allocator, target);
+                overflowed_data[i] = of_math_result.overflowed;
+                scalar.* = of_math_result.wrapped_result;
+            }
+            return OverflowArithmeticResult{
+                .overflowed = try Value.Tag.aggregate.create(allocator, overflowed_data),
+                .wrapped_result = try Value.Tag.aggregate.create(allocator, result_data),
+            };
+        }
+        return shlWithOverflowScalar(lhs, rhs, ty, allocator, target);
+    }
+
+    pub fn shlWithOverflowScalar(
+        lhs: Value,
+        rhs: Value,
+        ty: Type,
+        allocator: Allocator,
+        target: Target,
     ) !OverflowArithmeticResult {
         const info = ty.intInfo(target);
         var lhs_space: Value.BigIntSpace = undefined;
@@ -3930,7 +4034,7 @@ pub const Value = extern union {
             result_bigint.truncate(result_bigint.toConst(), info.signedness, info.bits);
         }
         return OverflowArithmeticResult{
-            .overflowed = overflowed,
+            .overflowed = makeBool(overflowed),
             .wrapped_result = try fromBigInt(allocator, result_bigint.toConst()),
         };
     }
diff --git a/test/behavior/math.zig b/test/behavior/math.zig
index 011c714935a3..2f8cf06ee739 100644
--- a/test/behavior/math.zig
+++ b/test/behavior/math.zig
@@ -621,24 +621,41 @@ test "128-bit multiplication" {
 test "@addWithOverflow" {
     if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
 
-    var result: u8 = undefined;
-    try expect(@addWithOverflow(u8, 250, 100, &result));
-    try expect(result == 94);
-    try expect(!@addWithOverflow(u8, 100, 150, &result));
-    try expect(result == 250);
-
-    var a: u8 = 200;
-    var b: u8 = 99;
-    try expect(@addWithOverflow(u8, a, b, &result));
-    try expect(result == 43);
-    b = 55;
-    try expect(!@addWithOverflow(u8, a, b, &result));
-    try expect(result == 255);
+    {
+        var result: u8 = undefined;
+        try expect(@addWithOverflow(u8, 250, 100, &result));
+        try expect(result == 94);
+        try expect(!@addWithOverflow(u8, 100, 150, &result));
+        try expect(result == 250);
+
+        var a: u8 = 200;
+        var b: u8 = 99;
+        try expect(@addWithOverflow(u8, a, b, &result));
+        try expect(result == 43);
+        b = 55;
+        try expect(!@addWithOverflow(u8, a, b, &result));
+        try expect(result == 255);
+    }
+
+    {
+        var a: usize = 6;
+        var b: usize = 6;
+        var res: usize = undefined;
+        try expect(!@addWithOverflow(usize, a, b, &res));
+        try expect(res == 12);
+    }
+
+    {
+        var a: isize = -6;
+        var b: isize = -6;
+        var res: isize = undefined;
+        try expect(!@addWithOverflow(isize, a, b, &res));
+        try expect(res == -12);
+    }
 }
 
 test "small int addition" {
     if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
-    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
     if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
 
     var x: u2 = 0;
@@ -886,19 +903,37 @@ test "@mulWithOverflow bitsize > 32" {
 test "@subWithOverflow" {
     if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
 
-    var result: u8 = undefined;
-    try expect(@subWithOverflow(u8, 1, 2, &result));
-    try expect(result == 255);
-    try expect(!@subWithOverflow(u8, 1, 1, &result));
-    try expect(result == 0);
+    {
+        var result: u8 = undefined;
+        try expect(@subWithOverflow(u8, 1, 2, &result));
+        try expect(result == 255);
+        try expect(!@subWithOverflow(u8, 1, 1, &result));
+        try expect(result == 0);
 
-    var a: u8 = 1;
-    var b: u8 = 2;
-    try expect(@subWithOverflow(u8, a, b, &result));
-    try expect(result == 255);
-    b = 1;
-    try expect(!@subWithOverflow(u8, a, b, &result));
-    try expect(result == 0);
+        var a: u8 = 1;
+        var b: u8 = 2;
+        try expect(@subWithOverflow(u8, a, b, &result));
+        try expect(result == 255);
+        b = 1;
+        try expect(!@subWithOverflow(u8, a, b, &result));
+        try expect(result == 0);
+    }
+
+    {
+        var a: usize = 6;
+        var b: usize = 6;
+        var res: usize = undefined;
+        try expect(!@subWithOverflow(usize, a, b, &res));
+        try expect(res == 0);
+    }
+
+    {
+        var a: isize = -6;
+        var b: isize = -6;
+        var res: isize = undefined;
+        try expect(!@subWithOverflow(isize, a, b, &res));
+        try expect(res == 0);
+    }
 }
 
 test "@shlWithOverflow" {
diff --git a/test/behavior/vector.zig b/test/behavior/vector.zig
index 9847054692c8..cbd878770100 100644
--- a/test/behavior/vector.zig
+++ b/test/behavior/vector.zig
@@ -903,3 +903,123 @@ test "multiplication-assignment operator with an array operand" {
     try S.doTheTest();
     comptime try S.doTheTest();
 }
+
+test "@addWithOverflow" {
+    if (builtin.zig_backend == .stage1) {
+        // stage1 doesn't support vector args
+        return error.SkipZigTest;
+    }
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        fn doTheTest() !void {
+            {
+                var result: @Vector(4, u8) = undefined;
+                var overflow = @addWithOverflow(@Vector(4, u8), @Vector(4, u8){ 250, 250, 250, 250 }, @Vector(4, u8){ 0, 5, 6, 10 }, &result);
+                var expected: @Vector(4, bool) = .{ false, false, true, true };
+                try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+            }
+            {
+                var result: @Vector(4, i8) = undefined;
+                var overflow = @addWithOverflow(@Vector(4, i8), @Vector(4, i8){ -125, -125, 125, 125 }, @Vector(4, i8){ -3, -4, 2, 3 }, &result);
+                var expected: @Vector(4, bool) = .{ false, true, false, true };
+                try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+            }
+            {
+                var result: @Vector(4, u1) = undefined;
+                var overflow = @addWithOverflow(@Vector(4, u1), @Vector(4, u1){ 0, 0, 1, 1 }, @Vector(4, u1){ 0, 1, 0, 1 }, &result);
+                var expected: @Vector(4, bool) = .{ false, false, false, true };
+                try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+            }
+            {
+                var result: @Vector(4, u0) = undefined;
+                var overflow = @addWithOverflow(@Vector(4, u0), @Vector(4, u0){ 0, 0, 0, 0 }, @Vector(4, u0){ 0, 0, 0, 0 }, &result);
+                var expected: @Vector(4, bool) = .{ false, false, false, false };
+                try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+            }
+        }
+    };
+    try S.doTheTest();
+    comptime try S.doTheTest();
+}
+
+test "@subWithOverflow" {
+    if (builtin.zig_backend == .stage1) {
+        // stage1 doesn't support vector args
+        return error.SkipZigTest;
+    }
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        fn doTheTest() !void {
+            {
+                var result: @Vector(2, u8) = undefined;
+                var overflow = @subWithOverflow(@Vector(2, u8), @Vector(2, u8){ 5, 5 }, @Vector(2, u8){ 5, 6 }, &result);
+                var expected: @Vector(2, bool) = .{ false, true };
+                try expect(mem.eql(bool, &@as([2]bool, overflow), &@as([2]bool, expected)));
+            }
+            {
+                var result: @Vector(4, i8) = undefined;
+                var overflow = @subWithOverflow(@Vector(4, i8), @Vector(4, i8){ -120, -120, 120, 120 }, @Vector(4, i8){ 8, 9, -7, -8 }, &result);
+                var expected: @Vector(4, bool) = .{ false, true, false, true };
+                try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+            }
+        }
+    };
+    try S.doTheTest();
+    comptime try S.doTheTest();
+}
+
+test "@mulWithOverflow" {
+    if (builtin.zig_backend == .stage1) {
+        // stage1 doesn't support vector args
+        return error.SkipZigTest;
+    }
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        fn doTheTest() !void {
+            var result: @Vector(4, u8) = undefined;
+            var overflow = @mulWithOverflow(@Vector(4, u8), @Vector(4, u8){ 10, 10, 10, 10 }, @Vector(4, u8){ 25, 26, 0, 30 }, &result);
+            var expected: @Vector(4, bool) = .{ false, true, false, true };
+            try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+        }
+    };
+    try S.doTheTest();
+    comptime try S.doTheTest();
+}
+
+test "@shlWithOverflow" {
+    if (builtin.zig_backend == .stage1) {
+        // stage1 doesn't support vector args
+        return error.SkipZigTest;
+    }
+    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_c) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        fn doTheTest() !void {
+            var result: @Vector(4, u8) = undefined;
+            var overflow = @shlWithOverflow(@Vector(4, u8), @Vector(4, u8){ 0, 1, 8, 255 }, @Vector(4, u3){ 7, 7, 7, 7 }, &result);
+            var expected: @Vector(4, bool) = .{ false, false, true, true };
+            try expect(mem.eql(bool, &@as([4]bool, overflow), &@as([4]bool, expected)));
+        }
+    };
+    try S.doTheTest();
+    comptime try S.doTheTest();
+}
diff --git a/test/cases/recursive_fibonacci.zig b/test/cases/recursive_fibonacci.zig
index 4e284e3fc180..a2b8436dd79b 100644
--- a/test/cases/recursive_fibonacci.zig
+++ b/test/cases/recursive_fibonacci.zig
@@ -20,5 +20,5 @@ fn assert(ok: bool) void {
 }
 
 // run
-// target=arm-linux,x86_64-linux,x86_64-macos,wasm32-wasi
+// target=x86_64-linux,x86_64-macos,wasm32-wasi
 //