diff --git a/compiler/rustc_ty_utils/src/abi.rs b/compiler/rustc_ty_utils/src/abi.rs
index f23c2cf2c07a7..536bcc0cd9e4e 100644
--- a/compiler/rustc_ty_utils/src/abi.rs
+++ b/compiler/rustc_ty_utils/src/abi.rs
@@ -726,6 +726,49 @@ fn fn_abi_adjust_for_abi<'tcx>(
                 };
             }
 
+            if arg_idx.is_none() && arg.layout.size > Pointer(AddressSpace::DATA).size(cx) * 2 {
+                // Return values larger than 2 registers using a return area
+                // pointer. LLVM and Cranelift disagree about how to return
+                // values that don't fit in the registers designated for return
+                // values. LLVM will force the entire return value to be passed
+                // by return area pointer, while Cranelift will look at each IR level
+                // return value independently and decide to pass it in a
+                // register or not, which would result in the return value
+                // being passed partially in registers and partially through a
+                // return area pointer.
+                //
+                // While Cranelift may need to be fixed as the LLVM behavior is
+                // generally more correct with respect to the surface language,
+                // forcing this behavior in rustc itself makes it easier for
+                // other backends to conform to the Rust ABI and for the C ABI
+                // rustc already handles this behavior anyway.
+                //
+                // In addition LLVM's decision to pass the return value in
+                // registers or using a return area pointer depends on how
+                // exactly the return type is lowered to an LLVM IR type. For
+                // example `Option<u128>` can be lowered as `{ i128, i128 }`
+                // in which case the x86_64 backend would use a return area
+                // pointer, or it could be passed as `{ i32, i128 }` in which
+                // case the x86_64 backend would pass it in registers by taking
+                // advantage of an LLVM ABI extension that allows using 3
+                // registers for the x86_64 sysv call conv rather than the
+                // officially specified 2 registers.
+                //
+                // FIXME: Technically we should look at the amount of available
+                // return registers rather than guessing that there are 2
+                // registers for return values. In practice only a couple of
+                // architectures have less than 2 return registers. None of
+                // which supported by Cranelift.
+                //
+                // NOTE: This adjustment is only necessary for the Rust ABI as
+                // for other ABI's the calling convention implementations in
+                // rustc_target already ensure any return value which doesn't
+                // fit in the available amount of return registers is passed in
+                // the right way for the current target.
+                arg.make_indirect();
+                return;
+            }
+
             match arg.layout.abi {
                 Abi::Aggregate { .. } => {}
 
diff --git a/tests/assembly/x86-return-float.rs b/tests/assembly/x86-return-float.rs
index f9ebf53dddca9..30c5e86963393 100644
--- a/tests/assembly/x86-return-float.rs
+++ b/tests/assembly/x86-return-float.rs
@@ -315,10 +315,10 @@ pub fn return_f16(x: f16) -> f16 {
 #[no_mangle]
 pub fn return_f128(x: f128) -> f128 {
     // CHECK: movl [[#%d,OFFSET:]](%ebp), %[[PTR:.*]]
-    // CHECK-NEXT: movl [[#%d,OFFSET+16]](%ebp), %[[VAL4:.*]]
     // CHECK-NEXT: movl [[#%d,OFFSET+4]](%ebp), %[[VAL1:.*]]
     // CHECK-NEXT: movl [[#%d,OFFSET+8]](%ebp), %[[VAL2:.*]]
     // CHECK-NEXT: movl [[#%d,OFFSET+12]](%ebp), %[[VAL3:.*]]
+    // CHECK-NEXT: movl [[#%d,OFFSET+16]](%ebp), %[[VAL4:.*]]
     // CHECK-NEXT: movl %[[VAL4:.*]] 12(%[[PTR]])
     // CHECK-NEXT: movl %[[VAL3:.*]] 8(%[[PTR]])
     // CHECK-NEXT: movl %[[VAL2:.*]] 4(%[[PTR]])
diff --git a/tests/codegen/float/f128.rs b/tests/codegen/float/f128.rs
index 80b572fbbc9f7..4af264101deed 100644
--- a/tests/codegen/float/f128.rs
+++ b/tests/codegen/float/f128.rs
@@ -1,7 +1,11 @@
 // 32-bit x86 returns `f32` and `f64` differently to avoid the x87 stack.
-//@ revisions: x86 other
+// 32-bit systems will return 128bit values using a return area pointer.
+//@ revisions: x86 bit32 bit64
 //@[x86] only-x86
-//@[other] ignore-x86
+//@[bit32] ignore-x86
+//@[bit32] only-32bit
+//@[bit64] ignore-x86
+//@[bit64] only-64bit
 
 // Verify that our intrinsics generate the correct LLVM calls for f128
 
@@ -52,42 +56,54 @@ pub fn f128_le(a: f128, b: f128) -> bool {
     a <= b
 }
 
-// CHECK-LABEL: fp128 @f128_neg(
+// x86-LABEL: void @f128_neg({{.*}}sret([16 x i8])
+// bit32-LABEL: void @f128_neg({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @f128_neg(
 #[no_mangle]
 pub fn f128_neg(a: f128) -> f128 {
     // CHECK: fneg fp128
     -a
 }
 
-// CHECK-LABEL: fp128 @f128_add(
+// x86-LABEL: void @f128_add({{.*}}sret([16 x i8])
+// bit32-LABEL: void @f128_add({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @f128_add(
 #[no_mangle]
 pub fn f128_add(a: f128, b: f128) -> f128 {
     // CHECK: fadd fp128 %{{.+}}, %{{.+}}
     a + b
 }
 
-// CHECK-LABEL: fp128 @f128_sub(
+// x86-LABEL: void @f128_sub({{.*}}sret([16 x i8])
+// bit32-LABEL: void @f128_sub({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @f128_sub(
 #[no_mangle]
 pub fn f128_sub(a: f128, b: f128) -> f128 {
     // CHECK: fsub fp128 %{{.+}}, %{{.+}}
     a - b
 }
 
-// CHECK-LABEL: fp128 @f128_mul(
+// x86-LABEL: void @f128_mul({{.*}}sret([16 x i8])
+// bit32-LABEL: void @f128_mul({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @f128_mul(
 #[no_mangle]
 pub fn f128_mul(a: f128, b: f128) -> f128 {
     // CHECK: fmul fp128 %{{.+}}, %{{.+}}
     a * b
 }
 
-// CHECK-LABEL: fp128 @f128_div(
+// x86-LABEL: void @f128_div({{.*}}sret([16 x i8])
+// bit32-LABEL: void @f128_div({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @f128_div(
 #[no_mangle]
 pub fn f128_div(a: f128, b: f128) -> f128 {
     // CHECK: fdiv fp128 %{{.+}}, %{{.+}}
     a / b
 }
 
-// CHECK-LABEL: fp128 @f128_rem(
+// x86-LABEL: void @f128_rem({{.*}}sret([16 x i8])
+// bit32-LABEL: void @f128_rem({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @f128_rem(
 #[no_mangle]
 pub fn f128_rem(a: f128, b: f128) -> f128 {
     // CHECK: frem fp128 %{{.+}}, %{{.+}}
@@ -143,44 +159,56 @@ pub fn f128_as_f16(a: f128) -> f16 {
     a as f16
 }
 
-// other-LABEL: float @f128_as_f32(
 // x86-LABEL: i32 @f128_as_f32(
+// bit32-LABEL: float @f128_as_f32(
+// bit64-LABEL: float @f128_as_f32(
 #[no_mangle]
 pub fn f128_as_f32(a: f128) -> f32 {
     // CHECK: fptrunc fp128 %{{.+}} to float
     a as f32
 }
 
-// other-LABEL: double @f128_as_f64(
 // x86-LABEL: void @f128_as_f64(
+// bit32-LABEL: double @f128_as_f64(
+// bit64-LABEL: double @f128_as_f64(
 #[no_mangle]
 pub fn f128_as_f64(a: f128) -> f64 {
     // CHECK: fptrunc fp128 %{{.+}} to double
     a as f64
 }
 
-// CHECK-LABEL: fp128 @f128_as_self(
+// x86-LABEL: void @f128_as_self({{.*}}sret([16 x i8])
+// bit32-LABEL: void @f128_as_self({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @f128_as_self(
 #[no_mangle]
 pub fn f128_as_self(a: f128) -> f128 {
-    // CHECK: ret fp128 %{{.+}}
+    // x86: store fp128 %a, ptr %_0, align 16
+    // bit32: store fp128 %a, ptr %_0, align 16
+    // bit64: ret fp128 %{{.+}}
     a as f128
 }
 
-// CHECK-LABEL: fp128 @f16_as_f128(
+// x86-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
+// bit32-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @f16_as_f128(
 #[no_mangle]
 pub fn f16_as_f128(a: f16) -> f128 {
     // CHECK: fpext half %{{.+}} to fp128
     a as f128
 }
 
-// CHECK-LABEL: fp128 @f32_as_f128(
+// x86-LABEL: void @f32_as_f128({{.*}}sret([16 x i8])
+// bit32-LABEL: void @f32_as_f128({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @f32_as_f128(
 #[no_mangle]
 pub fn f32_as_f128(a: f32) -> f128 {
     // CHECK: fpext float %{{.+}} to fp128
     a as f128
 }
 
-// CHECK-LABEL: fp128 @f64_as_f128(
+// x86-LABEL: void @f64_as_f128({{.*}}sret([16 x i8])
+// bit32-LABEL: void @f64_as_f128({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @f64_as_f128(
 #[no_mangle]
 pub fn f64_as_f128(a: f64) -> f128 {
     // CHECK: fpext double %{{.+}} to fp128
@@ -216,7 +244,9 @@ pub fn f128_as_u64(a: f128) -> u64 {
     a as u64
 }
 
-// CHECK-LABEL: i128 @f128_as_u128(
+// x86-LABEL: void @f128_as_u128({{.*}}sret([16 x i8])
+// bit32-LABEL: void @f128_as_u128({{.*}}sret([16 x i8])
+// bit64-LABEL: i128 @f128_as_u128(
 #[no_mangle]
 pub fn f128_as_u128(a: f128) -> u128 {
     // CHECK: call i128 @llvm.fptoui.sat.i128.f128(fp128 %{{.+}})
@@ -250,7 +280,9 @@ pub fn f128_as_i64(a: f128) -> i64 {
     a as i64
 }
 
-// CHECK-LABEL: i128 @f128_as_i128(
+// x86-LABEL: void @f128_as_i128({{.*}}sret([16 x i8])
+// bit32-LABEL: void @f128_as_i128({{.*}}sret([16 x i8])
+// bit64-LABEL: i128 @f128_as_i128(
 #[no_mangle]
 pub fn f128_as_i128(a: f128) -> i128 {
     // CHECK: call i128 @llvm.fptosi.sat.i128.f128(fp128 %{{.+}})
@@ -259,70 +291,90 @@ pub fn f128_as_i128(a: f128) -> i128 {
 
 /* int to float conversions */
 
-// CHECK-LABEL: fp128 @u8_as_f128(
+// x86-LABEL: void @u8_as_f128({{.*}}sret([16 x i8])
+// bit32-LABEL: void @u8_as_f128({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @u8_as_f128(
 #[no_mangle]
 pub fn u8_as_f128(a: u8) -> f128 {
     // CHECK: uitofp i8 %{{.+}} to fp128
     a as f128
 }
 
-// CHECK-LABEL: fp128 @u16_as_f128(
+// x86-LABEL: void @u16_as_f128({{.*}}sret([16 x i8])
+// bit32-LABEL: void @u16_as_f128({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @u16_as_f128(
 #[no_mangle]
 pub fn u16_as_f128(a: u16) -> f128 {
     // CHECK: uitofp i16 %{{.+}} to fp128
     a as f128
 }
 
-// CHECK-LABEL: fp128 @u32_as_f128(
+// x86-LABEL: void @u32_as_f128({{.*}}sret([16 x i8])
+// bit32-LABEL: void @u32_as_f128({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @u32_as_f128(
 #[no_mangle]
 pub fn u32_as_f128(a: u32) -> f128 {
     // CHECK: uitofp i32 %{{.+}} to fp128
     a as f128
 }
 
-// CHECK-LABEL: fp128 @u64_as_f128(
+// x86-LABEL: void @u64_as_f128({{.*}}sret([16 x i8])
+// bit32-LABEL: void @u64_as_f128({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @u64_as_f128(
 #[no_mangle]
 pub fn u64_as_f128(a: u64) -> f128 {
     // CHECK: uitofp i64 %{{.+}} to fp128
     a as f128
 }
 
-// CHECK-LABEL: fp128 @u128_as_f128(
+// x86-LABEL: void @u128_as_f128({{.*}}sret([16 x i8])
+// bit32-LABEL: void @u128_as_f128({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @u128_as_f128(
 #[no_mangle]
 pub fn u128_as_f128(a: u128) -> f128 {
     // CHECK: uitofp i128 %{{.+}} to fp128
     a as f128
 }
 
-// CHECK-LABEL: fp128 @i8_as_f128(
+// x86-LABEL: void @i8_as_f128({{.*}}sret([16 x i8])
+// bit32-LABEL: void @i8_as_f128({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @i8_as_f128(
 #[no_mangle]
 pub fn i8_as_f128(a: i8) -> f128 {
     // CHECK: sitofp i8 %{{.+}} to fp128
     a as f128
 }
 
-// CHECK-LABEL: fp128 @i16_as_f128(
+// x86-LABEL: void @i16_as_f128({{.*}}sret([16 x i8])
+// bit32-LABEL: void @i16_as_f128({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @i16_as_f128(
 #[no_mangle]
 pub fn i16_as_f128(a: i16) -> f128 {
     // CHECK: sitofp i16 %{{.+}} to fp128
     a as f128
 }
 
-// CHECK-LABEL: fp128 @i32_as_f128(
+// x86-LABEL: void @i32_as_f128({{.*}}sret([16 x i8])
+// bit32-LABEL: void @i32_as_f128({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @i32_as_f128(
 #[no_mangle]
 pub fn i32_as_f128(a: i32) -> f128 {
     // CHECK: sitofp i32 %{{.+}} to fp128
     a as f128
 }
 
-// CHECK-LABEL: fp128 @i64_as_f128(
+// x86-LABEL: void @i64_as_f128({{.*}}sret([16 x i8])
+// bit32-LABEL: void @i64_as_f128({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @i64_as_f128(
 #[no_mangle]
 pub fn i64_as_f128(a: i64) -> f128 {
     // CHECK: sitofp i64 %{{.+}} to fp128
     a as f128
 }
 
-// CHECK-LABEL: fp128 @i128_as_f128(
+// x86-LABEL: void @i128_as_f128({{.*}}sret([16 x i8])
+// bit32-LABEL: void @i128_as_f128({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @i128_as_f128(
 #[no_mangle]
 pub fn i128_as_f128(a: i128) -> f128 {
     // CHECK: sitofp i128 %{{.+}} to fp128
diff --git a/tests/codegen/float/f16.rs b/tests/codegen/float/f16.rs
index 2910d7d3e928d..80931051f1885 100644
--- a/tests/codegen/float/f16.rs
+++ b/tests/codegen/float/f16.rs
@@ -1,7 +1,11 @@
 // 32-bit x86 returns `f32` and `f64` differently to avoid the x87 stack.
-//@ revisions: x86 other
+// 32-bit systems will return 128bit values using a return area pointer.
+//@ revisions: x86 bit32 bit64
 //@[x86] only-x86
-//@[other] ignore-x86
+//@[bit32] ignore-x86
+//@[bit32] only-32bit
+//@[bit64] ignore-x86
+//@[bit64] only-64bit
 
 // Verify that our intrinsics generate the correct LLVM calls for f16
 
@@ -145,23 +149,27 @@ pub fn f16_as_self(a: f16) -> f16 {
     a as f16
 }
 
-// other-LABEL: float @f16_as_f32(
 // x86-LABEL: i32 @f16_as_f32(
+// bit32-LABEL: float @f16_as_f32(
+// bit64-LABEL: float @f16_as_f32(
 #[no_mangle]
 pub fn f16_as_f32(a: f16) -> f32 {
     // CHECK: fpext half %{{.+}} to float
     a as f32
 }
 
-// other-LABEL: double @f16_as_f64(
 // x86-LABEL: void @f16_as_f64(
+// bit32-LABEL: double @f16_as_f64(
+// bit64-LABEL: double @f16_as_f64(
 #[no_mangle]
 pub fn f16_as_f64(a: f16) -> f64 {
     // CHECK: fpext half %{{.+}} to double
     a as f64
 }
 
-// CHECK-LABEL: fp128 @f16_as_f128(
+// x86-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
+// bit32-LABEL: void @f16_as_f128({{.*}}sret([16 x i8])
+// bit64-LABEL: fp128 @f16_as_f128(
 #[no_mangle]
 pub fn f16_as_f128(a: f16) -> f128 {
     // CHECK: fpext half %{{.+}} to fp128
@@ -218,7 +226,9 @@ pub fn f16_as_u64(a: f16) -> u64 {
     a as u64
 }
 
-// CHECK-LABEL: i128 @f16_as_u128(
+// x86-LABEL: void @f16_as_u128({{.*}}sret([16 x i8])
+// bit32-LABEL: void @f16_as_u128({{.*}}sret([16 x i8])
+// bit64-LABEL: i128 @f16_as_u128(
 #[no_mangle]
 pub fn f16_as_u128(a: f16) -> u128 {
     // CHECK: call i128 @llvm.fptoui.sat.i128.f16(half %{{.+}})
@@ -252,7 +262,9 @@ pub fn f16_as_i64(a: f16) -> i64 {
     a as i64
 }
 
-// CHECK-LABEL: i128 @f16_as_i128(
+// x86-LABEL: void @f16_as_i128({{.*}}sret([16 x i8])
+// bit32-LABEL: void @f16_as_i128({{.*}}sret([16 x i8])
+// bit64-LABEL: i128 @f16_as_i128(
 #[no_mangle]
 pub fn f16_as_i128(a: f16) -> i128 {
     // CHECK: call i128 @llvm.fptosi.sat.i128.f16(half %{{.+}})
diff --git a/tests/codegen/i128-x86-align.rs b/tests/codegen/i128-x86-align.rs
index 3e6ed2b8e16a2..6b1da445c40ca 100644
--- a/tests/codegen/i128-x86-align.rs
+++ b/tests/codegen/i128-x86-align.rs
@@ -19,13 +19,15 @@ pub struct ScalarPair {
 #[no_mangle]
 pub fn load(x: &ScalarPair) -> ScalarPair {
     // CHECK-LABEL: @load(
+    // CHECK-SAME: sret([32 x i8]) align 16 dereferenceable(32) %_0,
     // CHECK-SAME: align 16 dereferenceable(32) %x
     // CHECK:      [[A:%.*]] = load i32, ptr %x, align 16
     // CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr %x, i64 16
     // CHECK-NEXT: [[B:%.*]] = load i128, ptr [[GEP]], align 16
-    // CHECK-NEXT: [[IV1:%.*]] = insertvalue { i32, i128 } poison, i32 [[A]], 0
-    // CHECK-NEXT: [[IV2:%.*]] = insertvalue { i32, i128 } [[IV1]], i128 [[B]], 1
-    // CHECK-NEXT: ret { i32, i128 } [[IV2]]
+    // CHECK-NEXT: store i32 [[A]], ptr %_0, align 16
+    // CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr %_0, i64 16
+    // CHECK-NEXT: store i128 [[B]], ptr [[GEP]], align 16
+    // CHECK-NEXT: ret void
     *x
 }
 
@@ -53,29 +55,23 @@ pub fn alloca() {
 #[no_mangle]
 pub fn load_volatile(x: &ScalarPair) -> ScalarPair {
     // CHECK-LABEL: @load_volatile(
+    // CHECK-SAME: sret([32 x i8]) align 16 dereferenceable(32) %_0,
     // CHECK-SAME: align 16 dereferenceable(32) %x
-    // CHECK:      [[TMP:%.*]] = alloca [32 x i8], align 16
     // CHECK:      [[LOAD:%.*]] = load volatile %ScalarPair, ptr %x, align 16
-    // CHECK-NEXT: store %ScalarPair [[LOAD]], ptr [[TMP]], align 16
-    // CHECK-NEXT: [[A:%.*]] = load i32, ptr [[TMP]], align 16
-    // CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 16
-    // CHECK-NEXT: [[B:%.*]] = load i128, ptr [[GEP]], align 16
+    // CHECK-NEXT: store %ScalarPair [[LOAD]], ptr %_0, align 16
+    // CHECK-NEXT: ret void
     unsafe { std::intrinsics::volatile_load(x) }
 }
 
 #[no_mangle]
 pub fn transmute(x: ScalarPair) -> (std::mem::MaybeUninit<i128>, i128) {
-    // CHECK-LABEL: define { i128, i128 } @transmute(i32 noundef %x.0, i128 noundef %x.1)
-    // CHECK:       [[TMP:%.*]] = alloca [32 x i8], align 16
-    // CHECK-NEXT:  store i32 %x.0, ptr [[TMP]], align 16
-    // CHECK-NEXT:  [[GEP:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 16
+    // CHECK-LABEL: @transmute(
+    // CHECK-SAME:  sret([32 x i8]) align 16 dereferenceable(32) %_0,
+    // CHECK-SAME:  i32 noundef %x.0, i128 noundef %x.1
+    // CHECK:       store i32 %x.0, ptr %_0, align 16
+    // CHECK-NEXT:  [[GEP:%.*]] = getelementptr inbounds i8, ptr %_0, i64 16
     // CHECK-NEXT:  store i128 %x.1, ptr [[GEP]], align 16
-    // CHECK-NEXT:  [[LOAD1:%.*]] = load i128, ptr %_0, align 16
-    // CHECK-NEXT:  [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 16
-    // CHECK-NEXT:  [[LOAD2:%.*]] = load i128, ptr [[GEP2]], align 16
-    // CHECK-NEXT:  [[IV1:%.*]] = insertvalue { i128, i128 } poison, i128 [[LOAD1]], 0
-    // CHECK-NEXT:  [[IV2:%.*]] = insertvalue { i128, i128 } [[IV1]], i128 [[LOAD2]], 1
-    // CHECK-NEXT:  ret { i128, i128 } [[IV2]]
+    // CHECK-NEXT:  ret void
     unsafe { std::mem::transmute(x) }
 }
 
diff --git a/tests/codegen/mir-aggregate-no-alloca.rs b/tests/codegen/mir-aggregate-no-alloca.rs
index c0e7e1a05e390..04ffb0755382b 100644
--- a/tests/codegen/mir-aggregate-no-alloca.rs
+++ b/tests/codegen/mir-aggregate-no-alloca.rs
@@ -1,3 +1,7 @@
+// 32-bit systems will return 128bit values using a return area pointer.
+//@ revisions: bit32 bit64
+//@[bit32] only-32bit
+//@[bit64] only-64bit
 //@ compile-flags: -O -C no-prepopulate-passes -Z randomize-layout=no
 
 #![crate_type = "lib"]
@@ -98,26 +102,36 @@ pub fn make_struct_1(a: i32) -> Struct1 {
 
 pub struct Struct2Asc(i16, i64);
 
-// CHECK-LABEL: { i64, i16 } @make_struct_2_asc(i16 noundef %a, i64 noundef %b)
+// bit32-LABEL: void @make_struct_2_asc({{.*}} sret({{[^,]*}}) {{.*}} %s,
+// bit64-LABEL: { i64, i16 } @make_struct_2_asc(
+// CHECK-SAME: i16 noundef %a, i64 noundef %b)
 #[no_mangle]
 pub fn make_struct_2_asc(a: i16, b: i64) -> Struct2Asc {
     // CHECK-NOT: alloca
-    // CHECK: %[[TEMP0:.+]] = insertvalue { i64, i16 } poison, i64 %b, 0
-    // CHECK: %[[TEMP1:.+]] = insertvalue { i64, i16 } %[[TEMP0]], i16 %a, 1
-    // CHECK: ret { i64, i16 } %[[TEMP1]]
+    // bit32: %[[GEP:.+]] = getelementptr inbounds i8, ptr %s, i32 8
+    // bit32: store i16 %a, ptr %[[GEP]]
+    // bit32: store i64 %b, ptr %s
+    // bit64: %[[TEMP0:.+]] = insertvalue { i64, i16 } poison, i64 %b, 0
+    // bit64: %[[TEMP1:.+]] = insertvalue { i64, i16 } %[[TEMP0]], i16 %a, 1
+    // bit64: ret { i64, i16 } %[[TEMP1]]
     let s = Struct2Asc(a, b);
     s
 }
 
 pub struct Struct2Desc(i64, i16);
 
-// CHECK-LABEL: { i64, i16 } @make_struct_2_desc(i64 noundef %a, i16 noundef %b)
+// bit32-LABEL: void @make_struct_2_desc({{.*}} sret({{[^,]*}}) {{.*}} %s,
+// bit64-LABEL: { i64, i16 } @make_struct_2_desc(
+// CHECK-SAME: i64 noundef %a, i16 noundef %b)
 #[no_mangle]
 pub fn make_struct_2_desc(a: i64, b: i16) -> Struct2Desc {
     // CHECK-NOT: alloca
-    // CHECK: %[[TEMP0:.+]] = insertvalue { i64, i16 } poison, i64 %a, 0
-    // CHECK: %[[TEMP1:.+]] = insertvalue { i64, i16 } %[[TEMP0]], i16 %b, 1
-    // CHECK: ret { i64, i16 } %[[TEMP1]]
+    // bit32: store i64 %a, ptr %s
+    // bit32: %[[GEP:.+]] = getelementptr inbounds i8, ptr %s, i32 8
+    // bit32: store i16 %b, ptr %[[GEP]]
+    // bit64: %[[TEMP0:.+]] = insertvalue { i64, i16 } poison, i64 %a, 0
+    // bit64: %[[TEMP1:.+]] = insertvalue { i64, i16 } %[[TEMP0]], i16 %b, 1
+    // bit64: ret { i64, i16 } %[[TEMP1]]
     let s = Struct2Desc(a, b);
     s
 }
diff --git a/tests/codegen/range-attribute.rs b/tests/codegen/range-attribute.rs
index bb19bec0fb932..8972fc76ca28b 100644
--- a/tests/codegen/range-attribute.rs
+++ b/tests/codegen/range-attribute.rs
@@ -1,6 +1,10 @@
 // Checks that range metadata gets emitted on functions result and arguments
 // with scalar value.
 
+// 32-bit systems will return 128bit values using a return area pointer.
+//@ revisions: bit32 bit64
+//@[bit32] only-32bit
+//@[bit64] only-64bit
 //@ compile-flags: -O -C no-prepopulate-passes
 //@ min-llvm-version: 19
 
@@ -13,7 +17,8 @@ use std::num::NonZero;
 #[no_mangle]
 pub fn helper(_: usize) {}
 
-// CHECK: noundef range(i128 1, 0) i128 @nonzero_int(i128 noundef range(i128 1, 0) %x)
+// bit32: void @nonzero_int({{.*}} sret([16 x i8]) {{.*}}, i128 noundef range(i128 1, 0) %x)
+// bit64: noundef range(i128 1, 0) i128 @nonzero_int(i128 noundef range(i128 1, 0) %x)
 #[no_mangle]
 pub fn nonzero_int(x: NonZero<u128>) -> NonZero<u128> {
     x
@@ -43,7 +48,9 @@ pub enum Enum1 {
     C(u64),
 }
 
-// CHECK: { [[ENUM1_TYP:i[0-9]+]], i64 } @enum1_value([[ENUM1_TYP]] noundef range([[ENUM1_TYP]] 0, 3) %x.0, i64 noundef %x.1)
+// bit32: void @enum1_value({{.*}} sret({{[^,]*}}) {{[^,]*}}, [[ENUM1_TYP:i[0-9]+]]
+// bit64: { [[ENUM1_TYP:i[0-9]+]], i64 } @enum1_value([[ENUM1_TYP]]
+// CHECK-SAME: noundef range([[ENUM1_TYP]] 0, 3) %x.0, i64 noundef %x.1)
 #[no_mangle]
 pub fn enum1_value(x: Enum1) -> Enum1 {
     x
diff --git a/tests/codegen/tuple-layout-opt.rs b/tests/codegen/tuple-layout-opt.rs
index 601563bc0613a..5b2f65e7aa716 100644
--- a/tests/codegen/tuple-layout-opt.rs
+++ b/tests/codegen/tuple-layout-opt.rs
@@ -1,3 +1,7 @@
+// 32-bit systems will return 128bit values using a return area pointer.
+//@ revisions: bit32 bit64
+//@[bit32] only-32bit
+//@[bit64] only-64bit
 //@ compile-flags: -C no-prepopulate-passes -Copt-level=0
 
 // Test that tuples get optimized layout, in particular with a ZST in the last field (#63244)
@@ -5,42 +9,48 @@
 #![crate_type = "lib"]
 
 type ScalarZstLast = (u128, ());
-// CHECK: define {{(dso_local )?}}i128 @test_ScalarZstLast(i128 %_1)
+// bit32: define {{(dso_local )?}}void @test_ScalarZstLast({{.*}} sret([16 x i8]) {{.*}}, i128 %_1)
+// bit64: define {{(dso_local )?}}i128 @test_ScalarZstLast(i128 %_1)
 #[no_mangle]
 pub fn test_ScalarZstLast(_: ScalarZstLast) -> ScalarZstLast {
     loop {}
 }
 
 type ScalarZstFirst = ((), u128);
-// CHECK: define {{(dso_local )?}}i128 @test_ScalarZstFirst(i128 %_1)
+// bit32: define {{(dso_local )?}}void @test_ScalarZstFirst({{.*}} sret([16 x i8]) {{.*}}, i128 %_1)
+// bit64: define {{(dso_local )?}}i128 @test_ScalarZstFirst(i128 %_1)
 #[no_mangle]
 pub fn test_ScalarZstFirst(_: ScalarZstFirst) -> ScalarZstFirst {
     loop {}
 }
 
 type ScalarPairZstLast = (u8, u128, ());
-// CHECK: define {{(dso_local )?}}{ i128, i8 } @test_ScalarPairZstLast(i128 %_1.0, i8 %_1.1)
+// CHECK: define {{(dso_local )?}}void @test_ScalarPairZstLast(ptr sret({{[^,]*}})
+// CHECK-SAME: %_0, i128 %_1.0, i8 %_1.1)
 #[no_mangle]
 pub fn test_ScalarPairZstLast(_: ScalarPairZstLast) -> ScalarPairZstLast {
     loop {}
 }
 
 type ScalarPairZstFirst = ((), u8, u128);
-// CHECK: define {{(dso_local )?}}{ i8, i128 } @test_ScalarPairZstFirst(i8 %_1.0, i128 %_1.1)
+// CHECK: define {{(dso_local )?}}void @test_ScalarPairZstFirst(ptr sret({{[^,]*}})
+// CHECK-SAME: %_0, i8 %_1.0, i128 %_1.1)
 #[no_mangle]
 pub fn test_ScalarPairZstFirst(_: ScalarPairZstFirst) -> ScalarPairZstFirst {
     loop {}
 }
 
 type ScalarPairLotsOfZsts = ((), u8, (), u128, ());
-// CHECK: define {{(dso_local )?}}{ i128, i8 } @test_ScalarPairLotsOfZsts(i128 %_1.0, i8 %_1.1)
+// CHECK: define {{(dso_local )?}}void @test_ScalarPairLotsOfZsts(ptr sret({{[^,]*}})
+// CHECK-SAME: %_0, i128 %_1.0, i8 %_1.1)
 #[no_mangle]
 pub fn test_ScalarPairLotsOfZsts(_: ScalarPairLotsOfZsts) -> ScalarPairLotsOfZsts {
     loop {}
 }
 
 type ScalarPairLottaNesting = (((), ((), u8, (), u128, ())), ());
-// CHECK: define {{(dso_local )?}}{ i128, i8 } @test_ScalarPairLottaNesting(i128 %_1.0, i8 %_1.1)
+// CHECK: define {{(dso_local )?}}void @test_ScalarPairLottaNesting(ptr sret({{[^,]*}})
+// CHECK-SAME: %_0, i128 %_1.0, i8 %_1.1)
 #[no_mangle]
 pub fn test_ScalarPairLottaNesting(_: ScalarPairLottaNesting) -> ScalarPairLottaNesting {
     loop {}
diff --git a/tests/codegen/union-abi.rs b/tests/codegen/union-abi.rs
index a1c081d7d61c2..b3c67a59730d9 100644
--- a/tests/codegen/union-abi.rs
+++ b/tests/codegen/union-abi.rs
@@ -1,9 +1,13 @@
 //@ ignore-emscripten vectors passed directly
 //@ compile-flags: -O -C no-prepopulate-passes
 // 32-bit x86 returns `f32` differently to avoid the x87 stack.
-//@ revisions: x86 other
+// 32-bit systems will return 128bit values using a return area pointer.
+//@ revisions: x86 bit32 bit64
 //@[x86] only-x86
-//@[other] ignore-x86
+//@[bit32] ignore-x86
+//@[bit32] only-32bit
+//@[bit64] ignore-x86
+//@[bit64] only-64bit
 
 // This test that using union forward the abi of the inner type, as
 // discussed in #54668
@@ -71,8 +75,9 @@ pub union UnionF32 {
     a: f32,
 }
 
-// other: define {{(dso_local )?}}float @test_UnionF32(float %_1)
 // x86: define {{(dso_local )?}}i32 @test_UnionF32(float %_1)
+// bit32: define {{(dso_local )?}}float @test_UnionF32(float %_1)
+// bit64: define {{(dso_local )?}}float @test_UnionF32(float %_1)
 #[no_mangle]
 pub fn test_UnionF32(_: UnionF32) -> UnionF32 {
     loop {}
@@ -83,8 +88,9 @@ pub union UnionF32F32 {
     b: f32,
 }
 
-// other: define {{(dso_local )?}}float @test_UnionF32F32(float %_1)
 // x86: define {{(dso_local )?}}i32 @test_UnionF32F32(float %_1)
+// bit32: define {{(dso_local )?}}float @test_UnionF32F32(float %_1)
+// bit64: define {{(dso_local )?}}float @test_UnionF32F32(float %_1)
 #[no_mangle]
 pub fn test_UnionF32F32(_: UnionF32F32) -> UnionF32F32 {
     loop {}
@@ -104,7 +110,9 @@ pub fn test_UnionF32U32(_: UnionF32U32) -> UnionF32U32 {
 pub union UnionU128 {
     a: u128,
 }
-// CHECK: define {{(dso_local )?}}i128 @test_UnionU128(i128 %_1)
+// x86: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1)
+// bit32: define {{(dso_local )?}}void @test_UnionU128({{.*}}sret([16 x i8]){{.*}}, i128 %_1)
+// bit64: define {{(dso_local )?}}i128 @test_UnionU128(i128 %_1)
 #[no_mangle]
 pub fn test_UnionU128(_: UnionU128) -> UnionU128 {
     loop {}