Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions compiler-rt/lib/builtins/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -551,10 +551,14 @@ set(aarch64_SOURCES
${GENERIC_SOURCES}
cpu_model.c
aarch64/fp_mode.c
aarch64/sme-abi.S
aarch64/sme-abi-init.c
)

if(COMPILER_RT_HAS_ASM_SME)
list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-abi-init.c)
else()
message(STATUS "AArch64 SME ABI routines disabled")
endif()

# Generate outline atomics helpers from lse.S base
set(OA_HELPERS_DIR "${CMAKE_CURRENT_BINARY_DIR}/outline_atomic_helpers.dir")
file(MAKE_DIRECTORY "${OA_HELPERS_DIR}")
Expand Down Expand Up @@ -782,7 +786,6 @@ else ()
endif()

append_list_if(COMPILER_RT_HAS_ASM_LSE HAS_ASM_LSE BUILTIN_DEFS)
append_list_if(COMPILER_RT_HAS_ASM_SME HAS_ASM_SME BUILTIN_DEFS)

foreach (arch ${BUILTIN_SUPPORTED_ARCH})
if (CAN_TARGET_${arch})
Expand Down
59 changes: 19 additions & 40 deletions compiler-rt/lib/builtins/aarch64/sme-abi.S
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,6 @@

#include "../assembly.h"

#ifdef HAS_ASM_SME
#define ARCH armv9-a+sme
#define SMSTOP_SM smstop sm
#define SMSTOP_ZA smstop za
#define REG_TPIDR2_EL0 TPIDR2_EL0
#define REG_SVCR SVCR
#define ADDSVL_X16_X16_1 addsvl x16, x16, #1
#define LDR_ZA_W15_0_X16 ldr za[w15,0], [x16]
#define STR_ZA_W15_0_X16 str za[w15,0], [x16]
#define CNTD_X0 cntd x0
#define CFI_OFFSET_VG_MINUS_16 .cfi_offset vg, -16
#else
#define ARCH armv8-a
#define SMSTOP_SM .inst 0xd503427f
#define SMSTOP_ZA .inst 0xd503447f
#define REG_TPIDR2_EL0 S3_3_C13_C0_5
#define REG_SVCR S3_3_C4_C2_2
#define ADDSVL_X16_X16_1 .inst 0x04305830
#define LDR_ZA_W15_0_X16 .inst 0xe1006200
#define STR_ZA_W15_0_X16 .inst 0xe1206200
#define CNTD_X0 .inst 0x04e0e3e0
#define CFI_OFFSET_VG_MINUS_16 .cfi_escape 0x10, 0x2e, 0x03, 0x11, 0x70, 0x22 // $vg @ cfa - 16
#endif

#if !defined(__APPLE__)
#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
Expand All @@ -42,27 +19,27 @@
#define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff
#endif

.arch ARCH
.arch armv9-a+sme

// Utility function which calls a system's abort() routine. Because the function
// is streaming-compatible it should disable streaming-SVE mode before calling
// abort(). Note that there is no need to preserve any state before the call,
// because the function does not return.
DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort)
.cfi_startproc
.variant_pcs SYMBOL_NAME(do_abort)
.variant_pcs SYMBOL_NAME(do_abort)
stp x29, x30, [sp, #-32]!
CNTD_X0
cntd x0
// Store VG to a stack location that we describe with .cfi_offset
str x0, [sp, #16]
.cfi_def_cfa_offset 32
.cfi_offset w30, -24
.cfi_offset w29, -32
CFI_OFFSET_VG_MINUS_16
.cfi_offset vg, -16
bl __arm_sme_state
tbz x0, #0, 2f
1:
SMSTOP_SM
smstop sm
2:
// We can't make this into a tail-call because the unwinder would
// need to restore the value of VG.
Expand All @@ -74,7 +51,7 @@ END_COMPILERRT_FUNCTION(do_abort)
// that is set as part of the compiler-rt startup code.
// __aarch64_has_sme_and_tpidr2_el0
DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state)
.variant_pcs __arm_sme_state
.variant_pcs __arm_sme_state
mov x0, xzr
mov x1, xzr

Expand All @@ -83,18 +60,18 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state)
cbz w16, 1f
0:
orr x0, x0, #0xC000000000000000
mrs x16, REG_SVCR
mrs x16, SVCR
bfxil x0, x16, #0, #2
mrs x1, REG_TPIDR2_EL0
mrs x1, TPIDR2_EL0
1:
ret
END_COMPILERRT_OUTLINE_FUNCTION(__arm_sme_state)

DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore)
.variant_pcs __arm_tpidr2_restore
.variant_pcs __arm_tpidr2_restore
// If TPIDR2_EL0 is nonnull, the subroutine aborts in some platform-specific
// manner.
mrs x14, REG_TPIDR2_EL0
mrs x14, TPIDR2_EL0
cbnz x14, 2f

// If any of the reserved bytes in the first 16 bytes of BLK are nonzero,
Expand All @@ -114,8 +91,8 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore)

mov x15, xzr
0:
LDR_ZA_W15_0_X16
ADDSVL_X16_X16_1
ldr za[w15,0], [x16]
addsvl x16, x16, #1
add x15, x15, #1
cmp x14, x15
b.ne 0b
Expand All @@ -126,14 +103,15 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore)
END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_restore)

DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save)
.variant_pcs __arm_tpidr2_restore
// If the current thread does not have access to TPIDR2_EL0, the subroutine
// does nothing.
adrp x14, TPIDR2_SYMBOL
ldrb w14, [x14, TPIDR2_SYMBOL_OFFSET]
cbz w14, 1f

// If TPIDR2_EL0 is null, the subroutine does nothing.
mrs x16, REG_TPIDR2_EL0
mrs x16, TPIDR2_EL0
cbz x16, 1f

// If any of the reserved bytes in the first 16 bytes of the TPIDR2 block are
Expand All @@ -153,8 +131,8 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save)

mov x15, xzr
0:
STR_ZA_W15_0_X16
ADDSVL_X16_X16_1
str za[w15,0], [x16]
addsvl x16, x16, #1
add x15, x15, #1
cmp x14, x15
b.ne 0b
Expand All @@ -165,6 +143,7 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save)
END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_save)

DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
.variant_pcs __arm_tpidr2_restore
// If the current thread does not have access to SME, the subroutine does
// nothing.
adrp x14, TPIDR2_SYMBOL
Expand All @@ -182,10 +161,10 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
bl __arm_tpidr2_save

// * Set TPIDR2_EL0 to null.
msr REG_TPIDR2_EL0, xzr
msr TPIDR2_EL0, xzr

// * Set PSTATE.ZA to 0.
SMSTOP_ZA
smstop za

.cfi_def_cfa wsp, 16
ldp x29, x30, [sp], #16
Expand Down