From e4b600e16fb0864f8d350f5126e2cc3e81219257 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Fri, 13 Oct 2023 13:37:04 +0000 Subject: [PATCH 1/2] [SME][compiler-rt] Guard .variant_pcs under HAS_ASM_SME macro. This also adds the .variant_pcs directive to some functions from which it was previously missing. --- compiler-rt/lib/builtins/aarch64/sme-abi.S | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S index 207810b2e2521..fee9993151c9e 100644 --- a/compiler-rt/lib/builtins/aarch64/sme-abi.S +++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S @@ -19,6 +19,7 @@ #define STR_ZA_W15_0_X16 str za[w15,0], [x16] #define CNTD_X0 cntd x0 #define CFI_OFFSET_VG_MINUS_16 .cfi_offset vg, -16 +#define VARIANT_PCS(sym) .variant_pcs sym #else #define ARCH armv8-a #define SMSTOP_SM .inst 0xd503427f @@ -30,6 +31,7 @@ #define STR_ZA_W15_0_X16 .inst 0xe1206200 #define CNTD_X0 .inst 0x04e0e3e0 #define CFI_OFFSET_VG_MINUS_16 .cfi_escape 0x10, 0x2e, 0x03, 0x11, 0x70, 0x22 // $vg @ cfa - 16 +#define VARIANT_PCS(sym) #endif #if !defined(__APPLE__) @@ -50,7 +52,7 @@ // because the function does not return. DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort) .cfi_startproc - .variant_pcs SYMBOL_NAME(do_abort) + VARIANT_PCS(SYMBOL_NAME(do_abort)) stp x29, x30, [sp, #-32]! CNTD_X0 // Store VG to a stack location that we describe with .cfi_offset @@ -74,7 +76,7 @@ END_COMPILERRT_FUNCTION(do_abort) // that is set as part of the compiler-rt startup code. // __aarch64_has_sme_and_tpidr2_el0 DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state) - .variant_pcs __arm_sme_state + VARIANT_PCS(__arm_sme_state) mov x0, xzr mov x1, xzr @@ -91,7 +93,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state) END_COMPILERRT_OUTLINE_FUNCTION(__arm_sme_state) DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore) - .variant_pcs __arm_tpidr2_restore + VARIANT_PCS(__arm_tpidr2_restore) // If TPIDR2_EL0 is nonnull, the subroutine aborts in some platform-specific // manner. mrs x14, REG_TPIDR2_EL0 @@ -126,6 +128,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore) END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_restore) DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save) + VARIANT_PCS(__arm_tpidr2_restore) // If the current thread does not have access to TPIDR2_EL0, the subroutine // does nothing. adrp x14, TPIDR2_SYMBOL @@ -165,6 +168,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save) END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_save) DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable) + VARIANT_PCS(__arm_tpidr2_restore) // If the current thread does not have access to SME, the subroutine does // nothing. adrp x14, TPIDR2_SYMBOL From 60caf95964d6f416c1b40d16acb8d6bfd6b638b7 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Fri, 13 Oct 2023 15:55:09 +0000 Subject: [PATCH 2/2] Disable SME if asm support not available [to squash] This is mostly needed because the .variant_pcs directive is required to build the SME ABI routines, but takes it one step further by requiring SME assembler support from the compiler. This removes the need for the macros that use .inst otherwise. --- compiler-rt/lib/builtins/CMakeLists.txt | 9 ++-- compiler-rt/lib/builtins/aarch64/sme-abi.S | 63 +++++++--------------- 2 files changed, 25 insertions(+), 47 deletions(-) diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index b1863746a57e7..753d08273ea54 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -551,10 +551,14 @@ set(aarch64_SOURCES ${GENERIC_SOURCES} cpu_model.c aarch64/fp_mode.c - aarch64/sme-abi.S - aarch64/sme-abi-init.c ) +if(COMPILER_RT_HAS_ASM_SME) + list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-abi-init.c) +else() + message(STATUS "AArch64 SME ABI routines disabled") +endif() + # Generate outline atomics helpers from lse.S base set(OA_HELPERS_DIR "${CMAKE_CURRENT_BINARY_DIR}/outline_atomic_helpers.dir") file(MAKE_DIRECTORY "${OA_HELPERS_DIR}") @@ -782,7 +786,6 @@ else () endif() append_list_if(COMPILER_RT_HAS_ASM_LSE HAS_ASM_LSE BUILTIN_DEFS) - append_list_if(COMPILER_RT_HAS_ASM_SME HAS_ASM_SME BUILTIN_DEFS) foreach (arch ${BUILTIN_SUPPORTED_ARCH}) if (CAN_TARGET_${arch}) diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S index fee9993151c9e..b3612c68066f2 100644 --- a/compiler-rt/lib/builtins/aarch64/sme-abi.S +++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S @@ -8,31 +8,6 @@ #include "../assembly.h" -#ifdef HAS_ASM_SME -#define ARCH armv9-a+sme -#define SMSTOP_SM smstop sm -#define SMSTOP_ZA smstop za -#define REG_TPIDR2_EL0 TPIDR2_EL0 -#define REG_SVCR SVCR -#define ADDSVL_X16_X16_1 addsvl x16, x16, #1 -#define LDR_ZA_W15_0_X16 ldr za[w15,0], [x16] -#define STR_ZA_W15_0_X16 str za[w15,0], [x16] -#define CNTD_X0 cntd x0 -#define CFI_OFFSET_VG_MINUS_16 .cfi_offset vg, -16 -#define VARIANT_PCS(sym) .variant_pcs sym -#else -#define ARCH armv8-a -#define SMSTOP_SM .inst 0xd503427f -#define SMSTOP_ZA .inst 0xd503447f -#define REG_TPIDR2_EL0 S3_3_C13_C0_5 -#define REG_SVCR S3_3_C4_C2_2 -#define ADDSVL_X16_X16_1 .inst 0x04305830 -#define LDR_ZA_W15_0_X16 .inst 0xe1006200 -#define STR_ZA_W15_0_X16 .inst 0xe1206200 -#define CNTD_X0 .inst 0x04e0e3e0 -#define CFI_OFFSET_VG_MINUS_16 .cfi_escape 0x10, 0x2e, 0x03, 0x11, 0x70, 0x22 // $vg @ cfa - 16 -#define VARIANT_PCS(sym) -#endif #if !defined(__APPLE__) #define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0) @@ -44,7 +19,7 @@ #define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff #endif -.arch ARCH +.arch armv9-a+sme // Utility function which calls a system's abort() routine. Because the function // is streaming-compatible it should disable streaming-SVE mode before calling @@ -52,19 +27,19 @@ // because the function does not return. DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort) .cfi_startproc - VARIANT_PCS(SYMBOL_NAME(do_abort)) + .variant_pcs SYMBOL_NAME(do_abort) stp x29, x30, [sp, #-32]! - CNTD_X0 + cntd x0 // Store VG to a stack location that we describe with .cfi_offset str x0, [sp, #16] .cfi_def_cfa_offset 32 .cfi_offset w30, -24 .cfi_offset w29, -32 - CFI_OFFSET_VG_MINUS_16 + .cfi_offset vg, -16 bl __arm_sme_state tbz x0, #0, 2f 1: - SMSTOP_SM + smstop sm 2: // We can't make this into a tail-call because the unwinder would // need to restore the value of VG. @@ -76,7 +51,7 @@ END_COMPILERRT_FUNCTION(do_abort) // that is set as part of the compiler-rt startup code. // __aarch64_has_sme_and_tpidr2_el0 DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state) - VARIANT_PCS(__arm_sme_state) + .variant_pcs __arm_sme_state mov x0, xzr mov x1, xzr @@ -85,18 +60,18 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state) cbz w16, 1f 0: orr x0, x0, #0xC000000000000000 - mrs x16, REG_SVCR + mrs x16, SVCR bfxil x0, x16, #0, #2 - mrs x1, REG_TPIDR2_EL0 + mrs x1, TPIDR2_EL0 1: ret END_COMPILERRT_OUTLINE_FUNCTION(__arm_sme_state) DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore) - VARIANT_PCS(__arm_tpidr2_restore) + .variant_pcs __arm_tpidr2_restore // If TPIDR2_EL0 is nonnull, the subroutine aborts in some platform-specific // manner. - mrs x14, REG_TPIDR2_EL0 + mrs x14, TPIDR2_EL0 cbnz x14, 2f // If any of the reserved bytes in the first 16 bytes of BLK are nonzero, @@ -116,8 +91,8 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore) mov x15, xzr 0: - LDR_ZA_W15_0_X16 - ADDSVL_X16_X16_1 + ldr za[w15,0], [x16] + addsvl x16, x16, #1 add x15, x15, #1 cmp x14, x15 b.ne 0b @@ -128,7 +103,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore) END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_restore) DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save) - VARIANT_PCS(__arm_tpidr2_restore) + .variant_pcs __arm_tpidr2_restore // If the current thread does not have access to TPIDR2_EL0, the subroutine // does nothing. adrp x14, TPIDR2_SYMBOL @@ -136,7 +111,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save) cbz w14, 1f // If TPIDR2_EL0 is null, the subroutine does nothing. - mrs x16, REG_TPIDR2_EL0 + mrs x16, TPIDR2_EL0 cbz x16, 1f // If any of the reserved bytes in the first 16 bytes of the TPIDR2 block are @@ -156,8 +131,8 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save) mov x15, xzr 0: - STR_ZA_W15_0_X16 - ADDSVL_X16_X16_1 + str za[w15,0], [x16] + addsvl x16, x16, #1 add x15, x15, #1 cmp x14, x15 b.ne 0b @@ -168,7 +143,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save) END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_save) DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable) - VARIANT_PCS(__arm_tpidr2_restore) + .variant_pcs __arm_tpidr2_restore // If the current thread does not have access to SME, the subroutine does // nothing. adrp x14, TPIDR2_SYMBOL @@ -186,10 +161,10 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable) bl __arm_tpidr2_save // * Set TPIDR2_EL0 to null. - msr REG_TPIDR2_EL0, xzr + msr TPIDR2_EL0, xzr // * Set PSTATE.ZA to 0. - SMSTOP_ZA + smstop za .cfi_def_cfa wsp, 16 ldp x29, x30, [sp], #16