From eb22d70aed39614dd9318e3fa1bf91846d74b8aa Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Tue, 6 Dec 2022 17:36:09 +0100
Subject: [PATCH] Implement va_list and va_arg for s390x FFI

Following the s390x ELF ABI and based on the clang implementation,
provide appropriate definitions of va_list in library/core/src/ffi/mod.rs
and va_arg handling in compiler/rustc_codegen_llvm/src/va_arg.rs.

Fixes the following test cases on s390x:
src/test/run-make-fulldeps/c-link-to-rust-va-list-fn
src/test/ui/abi/variadic-ffi.rs

Fixes https://github.com/rust-lang/rust/issues/84628.
---
 compiler/rustc_codegen_llvm/src/va_arg.rs | 84 +++++++++++++++++++++++
 library/core/src/ffi/mod.rs               | 55 +++++++++++++--
 2 files changed, 134 insertions(+), 5 deletions(-)

diff --git a/compiler/rustc_codegen_llvm/src/va_arg.rs b/compiler/rustc_codegen_llvm/src/va_arg.rs
index ceb3d5a84abf3..b19398e68c260 100644
--- a/compiler/rustc_codegen_llvm/src/va_arg.rs
+++ b/compiler/rustc_codegen_llvm/src/va_arg.rs
@@ -175,6 +175,89 @@ fn emit_aapcs_va_arg<'ll, 'tcx>(
     val
 }
 
+fn emit_s390x_va_arg<'ll, 'tcx>(
+    bx: &mut Builder<'_, 'll, 'tcx>,
+    list: OperandRef<'tcx, &'ll Value>,
+    target_ty: Ty<'tcx>,
+) -> &'ll Value {
+    // Implementation of the s390x ELF ABI calling convention for va_args see
+    // https://github.com/IBM/s390x-abi (chapter 1.2.4)
+    let va_list_addr = list.immediate();
+    let va_list_layout = list.deref(bx.cx).layout;
+    let va_list_ty = va_list_layout.llvm_type(bx);
+    let layout = bx.cx.layout_of(target_ty);
+
+    let in_reg = bx.append_sibling_block("va_arg.in_reg");
+    let in_mem = bx.append_sibling_block("va_arg.in_mem");
+    let end = bx.append_sibling_block("va_arg.end");
+
+    // FIXME: vector ABI not yet supported.
+    let target_ty_size = bx.cx.size_of(target_ty).bytes();
+    let indirect: bool = target_ty_size > 8 || !target_ty_size.is_power_of_two();
+    let unpadded_size = if indirect { 8 } else { target_ty_size };
+    let padded_size = 8;
+    let padding = padded_size - unpadded_size;
+
+    let gpr_type = indirect || !layout.is_single_fp_element(bx.cx);
+    let (max_regs, reg_count_field, reg_save_index, reg_padding) =
+        if gpr_type { (5, 0, 2, padding) } else { (4, 1, 16, 0) };
+
+    // Check whether the value was passed in a register or in memory.
+    let reg_count = bx.struct_gep(
+        va_list_ty,
+        va_list_addr,
+        va_list_layout.llvm_field_index(bx.cx, reg_count_field),
+    );
+    let reg_count_v = bx.load(bx.type_i64(), reg_count, Align::from_bytes(8).unwrap());
+    let use_regs = bx.icmp(IntPredicate::IntULT, reg_count_v, bx.const_u64(max_regs));
+    bx.cond_br(use_regs, in_reg, in_mem);
+
+    // Emit code to load the value if it was passed in a register.
+    bx.switch_to_block(in_reg);
+
+    // Work out the address of the value in the register save area.
+    let reg_ptr =
+        bx.struct_gep(va_list_ty, va_list_addr, va_list_layout.llvm_field_index(bx.cx, 3));
+    let reg_ptr_v = bx.load(bx.type_i8p(), reg_ptr, bx.tcx().data_layout.pointer_align.abi);
+    let scaled_reg_count = bx.mul(reg_count_v, bx.const_u64(8));
+    let reg_off = bx.add(scaled_reg_count, bx.const_u64(reg_save_index * 8 + reg_padding));
+    let reg_addr = bx.gep(bx.type_i8(), reg_ptr_v, &[reg_off]);
+
+    // Update the register count.
+    let new_reg_count_v = bx.add(reg_count_v, bx.const_u64(1));
+    bx.store(new_reg_count_v, reg_count, Align::from_bytes(8).unwrap());
+    bx.br(end);
+
+    // Emit code to load the value if it was passed in memory.
+    bx.switch_to_block(in_mem);
+
+    // Work out the address of the value in the argument overflow area.
+    let arg_ptr =
+        bx.struct_gep(va_list_ty, va_list_addr, va_list_layout.llvm_field_index(bx.cx, 2));
+    let arg_ptr_v = bx.load(bx.type_i8p(), arg_ptr, bx.tcx().data_layout.pointer_align.abi);
+    let arg_off = bx.const_u64(padding);
+    let mem_addr = bx.gep(bx.type_i8(), arg_ptr_v, &[arg_off]);
+
+    // Update the argument overflow area pointer.
+    let arg_size = bx.cx().const_u64(padded_size);
+    let new_arg_ptr_v = bx.inbounds_gep(bx.type_i8(), arg_ptr_v, &[arg_size]);
+    bx.store(new_arg_ptr_v, arg_ptr, bx.tcx().data_layout.pointer_align.abi);
+    bx.br(end);
+
+    // Return the appropriate result.
+    bx.switch_to_block(end);
+    let val_addr = bx.phi(bx.type_i8p(), &[reg_addr, mem_addr], &[in_reg, in_mem]);
+    let val_type = layout.llvm_type(bx);
+    let val_addr = if indirect {
+        let ptr_type = bx.cx.type_ptr_to(val_type);
+        let ptr_addr = bx.bitcast(val_addr, bx.cx.type_ptr_to(ptr_type));
+        bx.load(ptr_type, ptr_addr, bx.tcx().data_layout.pointer_align.abi)
+    } else {
+        bx.bitcast(val_addr, bx.cx.type_ptr_to(val_type))
+    };
+    bx.load(val_type, val_addr, layout.align.abi)
+}
+
 pub(super) fn emit_va_arg<'ll, 'tcx>(
     bx: &mut Builder<'_, 'll, 'tcx>,
     addr: OperandRef<'tcx, &'ll Value>,
@@ -200,6 +283,7 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
             emit_ptr_va_arg(bx, addr, target_ty, false, Align::from_bytes(8).unwrap(), true)
         }
         "aarch64" => emit_aapcs_va_arg(bx, addr, target_ty),
+        "s390x" => emit_s390x_va_arg(bx, addr, target_ty),
         // Windows x86_64
         "x86_64" if target.is_like_windows => {
             let target_ty_size = bx.cx.size_of(target_ty).bytes();
diff --git a/library/core/src/ffi/mod.rs b/library/core/src/ffi/mod.rs
index ec1eaa99f0b8e..76daceecd7bef 100644
--- a/library/core/src/ffi/mod.rs
+++ b/library/core/src/ffi/mod.rs
@@ -227,7 +227,12 @@ impl fmt::Debug for c_void {
 /// Basic implementation of a `va_list`.
 // The name is WIP, using `VaListImpl` for now.
 #[cfg(any(
-    all(not(target_arch = "aarch64"), not(target_arch = "powerpc"), not(target_arch = "x86_64")),
+    all(
+        not(target_arch = "aarch64"),
+        not(target_arch = "powerpc"),
+        not(target_arch = "s390x"),
+        not(target_arch = "x86_64")
+    ),
     all(target_arch = "aarch64", any(target_os = "macos", target_os = "ios")),
     target_family = "wasm",
     target_arch = "asmjs",
@@ -251,7 +256,12 @@ pub struct VaListImpl<'f> {
 }
 
 #[cfg(any(
-    all(not(target_arch = "aarch64"), not(target_arch = "powerpc"), not(target_arch = "x86_64")),
+    all(
+        not(target_arch = "aarch64"),
+        not(target_arch = "powerpc"),
+        not(target_arch = "s390x"),
+        not(target_arch = "x86_64")
+    ),
     all(target_arch = "aarch64", any(target_os = "macos", target_os = "ios")),
     target_family = "wasm",
     target_arch = "asmjs",
@@ -319,6 +329,25 @@ pub struct VaListImpl<'f> {
     _marker: PhantomData<&'f mut &'f c_void>,
 }
 
+/// s390x ABI implementation of a `va_list`.
+#[cfg(target_arch = "s390x")]
+#[repr(C)]
+#[derive(Debug)]
+#[unstable(
+    feature = "c_variadic",
+    reason = "the `c_variadic` feature has not been properly tested on \
+              all supported platforms",
+    issue = "44930"
+)]
+#[lang = "va_list"]
+pub struct VaListImpl<'f> {
+    gpr: i64,
+    fpr: i64,
+    overflow_arg_area: *mut c_void,
+    reg_save_area: *mut c_void,
+    _marker: PhantomData<&'f mut &'f c_void>,
+}
+
 /// x86_64 ABI implementation of a `va_list`.
 #[cfg(all(target_arch = "x86_64", not(target_os = "uefi"), not(windows)))]
 #[repr(C)]
@@ -352,6 +381,7 @@ pub struct VaList<'a, 'f: 'a> {
         all(
             not(target_arch = "aarch64"),
             not(target_arch = "powerpc"),
+            not(target_arch = "s390x"),
             not(target_arch = "x86_64")
         ),
         all(target_arch = "aarch64", any(target_os = "macos", target_os = "ios")),
@@ -363,7 +393,12 @@ pub struct VaList<'a, 'f: 'a> {
     inner: VaListImpl<'f>,
 
     #[cfg(all(
-        any(target_arch = "aarch64", target_arch = "powerpc", target_arch = "x86_64"),
+        any(
+            target_arch = "aarch64",
+            target_arch = "powerpc",
+            target_arch = "s390x",
+            target_arch = "x86_64"
+        ),
         any(not(target_arch = "aarch64"), not(any(target_os = "macos", target_os = "ios"))),
         not(target_family = "wasm"),
         not(target_arch = "asmjs"),
@@ -376,7 +411,12 @@ pub struct VaList<'a, 'f: 'a> {
 }
 
 #[cfg(any(
-    all(not(target_arch = "aarch64"), not(target_arch = "powerpc"), not(target_arch = "x86_64")),
+    all(
+        not(target_arch = "aarch64"),
+        not(target_arch = "powerpc"),
+        not(target_arch = "s390x"),
+        not(target_arch = "x86_64")
+    ),
     all(target_arch = "aarch64", any(target_os = "macos", target_os = "ios")),
     target_family = "wasm",
     target_arch = "asmjs",
@@ -398,7 +438,12 @@ impl<'f> VaListImpl<'f> {
 }
 
 #[cfg(all(
-    any(target_arch = "aarch64", target_arch = "powerpc", target_arch = "x86_64"),
+    any(
+        target_arch = "aarch64",
+        target_arch = "powerpc",
+        target_arch = "s390x",
+        target_arch = "x86_64"
+    ),
     any(not(target_arch = "aarch64"), not(any(target_os = "macos", target_os = "ios"))),
     not(target_family = "wasm"),
     not(target_arch = "asmjs"),