Skip to content

Commit 3a14ffb

Browse files
[AArch64] Implement GCS ACLE intrinsics (#96903)
This adds intrinsics defined in ARM-software/acle#260 Doing this requires some changes to the GCS instruction definitions, as these intrinsics make use of how some instructions don't modify the input register when GCS is disabled, and they need to be correctly marked with mayLoad/mayStore/hasSideEffects for instruction selection to work.
1 parent 81cdf94 commit 3a14ffb

File tree

7 files changed

+185
-4
lines changed

7 files changed

+185
-4
lines changed

clang/include/clang/Basic/BuiltinsAArch64.def

+5
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ BUILTIN(__builtin_arm_wfe, "v", "")
4949
BUILTIN(__builtin_arm_wfi, "v", "")
5050
BUILTIN(__builtin_arm_sev, "v", "")
5151
BUILTIN(__builtin_arm_sevl, "v", "")
52+
BUILTIN(__builtin_arm_chkfeat, "WUiWUi", "")
5253

5354
// Like __builtin_trap but provide an 16-bit immediate reason code (which goes into `brk #N`).
5455
BUILTIN(__builtin_arm_trap, "vUIs", "nr")
@@ -136,6 +137,10 @@ TARGET_BUILTIN(__builtin_arm_st64b, "vv*WUiC*", "n", "ls64")
136137
TARGET_BUILTIN(__builtin_arm_st64bv, "WUiv*WUiC*", "n", "ls64")
137138
TARGET_BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n", "ls64")
138139

140+
// Armv9.3-A Guarded Control Stack
141+
TARGET_BUILTIN(__builtin_arm_gcspopm, "WUiWUi", "n", "gcs")
142+
TARGET_BUILTIN(__builtin_arm_gcsss, "vC*vC*", "n", "gcs")
143+
139144
TARGET_HEADER_BUILTIN(_BitScanForward, "UcUNi*UNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
140145
TARGET_HEADER_BUILTIN(_BitScanReverse, "UcUNi*UNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
141146
TARGET_HEADER_BUILTIN(_BitScanForward64, "UcUNi*ULLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")

clang/lib/Headers/arm_acle.h

+26
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,14 @@ static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(v
7575
#define __dbg(t) __builtin_arm_dbg(t)
7676
#endif
7777

78+
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
79+
#define _CHKFEAT_GCS 1
80+
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
81+
__chkfeat(uint64_t __features) {
82+
return __builtin_arm_chkfeat(__features) ^ __features;
83+
}
84+
#endif
85+
7886
/* 7.5 Swap */
7987
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
8088
__swp(uint32_t __x, volatile uint32_t *__p) {
@@ -855,6 +863,24 @@ __rndrrs(uint64_t *__p) {
855863
}
856864
#endif
857865

866+
/* 11.2 Guarded Control Stack intrinsics */
867+
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
868+
static __inline__ void * __attribute__((__always_inline__, __nodebug__))
869+
__gcspr() {
870+
return (void *)__builtin_arm_rsr64("gcspr_el0");
871+
}
872+
873+
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("gcs")))
874+
__gcspopm() {
875+
return __builtin_arm_gcspopm(0);
876+
}
877+
878+
static __inline__ const void * __attribute__((__always_inline__, __nodebug__, target("gcs")))
879+
__gcsss(const void *__stack) {
880+
return __builtin_arm_gcsss(__stack);
881+
}
882+
#endif
883+
858884
#if defined(__cplusplus)
859885
}
860886
#endif

clang/test/CodeGen/aarch64-gcs.c

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
2+
// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +gcs -emit-llvm %s -o - | FileCheck %s
3+
4+
#include <arm_acle.h>
5+
6+
// CHECK-LABEL: define dso_local i64 @test_chkfeat
7+
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
8+
// CHECK-NEXT: entry:
9+
// CHECK-NEXT: [[__FEATURES_ADDR_I:%.*]] = alloca i64, align 8
10+
// CHECK-NEXT: store i64 1, ptr [[__FEATURES_ADDR_I]], align 8
11+
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[__FEATURES_ADDR_I]], align 8
12+
// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.chkfeat(i64 [[TMP0]])
13+
// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[__FEATURES_ADDR_I]], align 8
14+
// CHECK-NEXT: [[XOR_I:%.*]] = xor i64 [[TMP1]], [[TMP2]]
15+
// CHECK-NEXT: ret i64 [[XOR_I]]
16+
//
17+
uint64_t test_chkfeat() {
18+
return __chkfeat(_CHKFEAT_GCS);
19+
}
20+
21+
// CHECK-LABEL: define dso_local ptr @test_gcspr
22+
// CHECK-SAME: () #[[ATTR0]] {
23+
// CHECK-NEXT: entry:
24+
// CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.read_volatile_register.i64(metadata [[META2:![0-9]+]])
25+
// CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr
26+
// CHECK-NEXT: ret ptr [[TMP1]]
27+
//
28+
void *test_gcspr() {
29+
return __gcspr();
30+
}
31+
32+
// CHECK-LABEL: define dso_local i64 @test_gcspopm
33+
// CHECK-SAME: () #[[ATTR0]] {
34+
// CHECK-NEXT: entry:
35+
// CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.aarch64.gcspopm(i64 0)
36+
// CHECK-NEXT: ret i64 [[TMP0]]
37+
//
38+
uint64_t test_gcspopm() {
39+
return __gcspopm();
40+
}
41+
42+
// CHECK-LABEL: define dso_local ptr @test_gcsss
43+
// CHECK-SAME: (ptr noundef [[P:%.*]]) #[[ATTR0]] {
44+
// CHECK-NEXT: entry:
45+
// CHECK-NEXT: [[__STACK_ADDR_I:%.*]] = alloca ptr, align 8
46+
// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8
47+
// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8
48+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8
49+
// CHECK-NEXT: store ptr [[TMP0]], ptr [[__STACK_ADDR_I]], align 8
50+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__STACK_ADDR_I]], align 8
51+
// CHECK-NEXT: [[TMP2:%.*]] = call ptr @llvm.aarch64.gcsss(ptr [[TMP1]])
52+
// CHECK-NEXT: ret ptr [[TMP2]]
53+
//
54+
const void *test_gcsss(const void *p) {
55+
return __gcsss(p);
56+
}

llvm/include/llvm/IR/IntrinsicsAArch64.td

+17
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,23 @@ def int_aarch64_isb : ClangBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">,
9292
// ordering during ISel.
9393
def int_aarch64_space : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty], []>;
9494

95+
//===----------------------------------------------------------------------===//
96+
// Guarded Control Stack
97+
98+
def int_aarch64_chkfeat : ClangBuiltin<"__builtin_arm_chkfeat">,
99+
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
100+
[IntrNoMem]>;
101+
102+
// FIXME: This should be marked as [IntrReadMem, IntrHasSideEffects], as it has
103+
// the side-effect of updating gcspr, but this combination doesn't work
104+
// correctly.
105+
def int_aarch64_gcspopm : ClangBuiltin<"__builtin_arm_gcspopm">,
106+
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
107+
[]>;
108+
109+
def int_aarch64_gcsss : ClangBuiltin<"__builtin_arm_gcsss">,
110+
DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>;
111+
95112
}
96113

97114
//===----------------------------------------------------------------------===//

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -4587,6 +4587,18 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
45874587
switch (IntNo) {
45884588
default:
45894589
break;
4590+
case Intrinsic::aarch64_gcsss: {
4591+
SDLoc DL(Node);
4592+
SDValue Chain = Node->getOperand(0);
4593+
SDValue Val = Node->getOperand(2);
4594+
SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4595+
SDNode *SS1 =
4596+
CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4597+
SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4598+
MVT::Other, Zero, SDValue(SS1, 0));
4599+
ReplaceNode(Node, SS2);
4600+
return;
4601+
}
45904602
case Intrinsic::aarch64_ldaxp:
45914603
case Intrinsic::aarch64_ldxp: {
45924604
unsigned Op =

llvm/lib/Target/AArch64/AArch64InstrInfo.td

+15-4
Original file line numberDiff line numberDiff line change
@@ -1267,23 +1267,34 @@ class GCSRtIn<bits<3> op1, bits<3> op2, string mnemonic,
12671267
let Inst{15-8} = 0b01110111;
12681268
let Inst{7-5} = op2;
12691269
let Predicates = [HasGCS];
1270+
let hasSideEffects = 1;
12701271
}
12711272

1273+
let mayStore = 1, mayLoad = 1 in
12721274
def GCSSS1 : GCSRtIn<0b011, 0b010, "gcsss1">;
1275+
let mayStore = 1 in
12731276
def GCSPUSHM : GCSRtIn<0b011, 0b000, "gcspushm">;
12741277

12751278
class GCSRtOut<bits<3> op1, bits<3> op2, string mnemonic,
12761279
list<dag> pattern = []>
1277-
: RtSystemI<1, (outs GPR64:$Rt), (ins), mnemonic, "\t$Rt", pattern> {
1280+
: RtSystemI<1, (outs GPR64:$Rt), (ins GPR64:$src), mnemonic, "\t$Rt", pattern> {
12781281
let Inst{20-19} = 0b01;
12791282
let Inst{18-16} = op1;
12801283
let Inst{15-8} = 0b01110111;
12811284
let Inst{7-5} = op2;
12821285
let Predicates = [HasGCS];
1286+
let hasSideEffects = 1;
1287+
// The input register is unchanged when GCS is disabled, so we need it as
1288+
// both an input and output operand.
1289+
let Constraints = "$src = $Rt";
12831290
}
12841291

1292+
let mayStore = 1, mayLoad = 1 in
12851293
def GCSSS2 : GCSRtOut<0b011, 0b011, "gcsss2">;
1286-
def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm">;
1294+
// FIXME: mayStore = 1 only needed to match the intrinsic definition
1295+
let mayStore = 1, mayLoad = 1 in
1296+
def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm",
1297+
[(set GPR64:$Rt, (int_aarch64_gcspopm GPR64:$src))]>;
12871298
def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent
12881299

12891300
def GCSB_DSYNC_disable : InstAlias<"gcsb\tdsync", (HINT 19), 0>;
@@ -1292,7 +1303,8 @@ def GCSB_DSYNC : InstAlias<"gcsb\tdsync", (HINT 19), 1>, Requires<[HasGC
12921303
def : TokenAlias<"DSYNC", "dsync">;
12931304

12941305
let Uses = [X16], Defs = [X16], CRm = 0b0101 in {
1295-
def CHKFEAT : SystemNoOperands<0b000, "hint\t#40">;
1306+
def CHKFEAT : SystemNoOperands<0b000, "hint\t#40",
1307+
[(set X16, (int_aarch64_chkfeat X16))]>;
12961308
}
12971309
def : InstAlias<"chkfeat\tx16", (CHKFEAT), 0>;
12981310
def : InstAlias<"chkfeat\tx16", (CHKFEAT), 1>, Requires<[HasCHK]>;
@@ -1311,7 +1323,6 @@ class GCSSt<string mnemonic, bits<3> op>
13111323
def GCSSTR : GCSSt<"gcsstr", 0b000>;
13121324
def GCSSTTR : GCSSt<"gcssttr", 0b001>;
13131325

1314-
13151326
// ARMv8.2-A Dot Product
13161327
let Predicates = [HasDotProd] in {
13171328
defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64 -mattr=+gcs -verify-machineinstrs -o - %s | FileCheck %s
3+
4+
; We call each intrinsic twice, once with the result being unused and once with
5+
; it being used, to check that dead code elimination is being done correctly.
6+
; chkfeat does not have side effects so can be eliminated, but the others do and
7+
; can't be eliminated.
8+
9+
define i64 @test_chkfeat(i64 %arg) {
10+
; CHECK-LABEL: test_chkfeat:
11+
; CHECK: // %bb.0: // %entry
12+
; CHECK-NEXT: mov x16, x0
13+
; CHECK-NEXT: chkfeat x16
14+
; CHECK-NEXT: mov x0, x16
15+
; CHECK-NEXT: ret
16+
entry:
17+
%0 = call i64 @llvm.aarch64.chkfeat(i64 %arg)
18+
%1 = call i64 @llvm.aarch64.chkfeat(i64 %arg)
19+
ret i64 %1
20+
}
21+
22+
define i64 @test_gcspopm(i64 %arg) {
23+
; CHECK-LABEL: test_gcspopm:
24+
; CHECK: // %bb.0: // %entry
25+
; CHECK-NEXT: mov x8, x0
26+
; CHECK-NEXT: gcspopm x8
27+
; CHECK-NEXT: gcspopm x0
28+
; CHECK-NEXT: ret
29+
entry:
30+
%0 = call i64 @llvm.aarch64.gcspopm(i64 %arg)
31+
%1 = call i64 @llvm.aarch64.gcspopm(i64 %arg)
32+
ret i64 %1
33+
}
34+
35+
define ptr @test_gcsss(ptr %p) {
36+
; CHECK-LABEL: test_gcsss:
37+
; CHECK: // %bb.0: // %entry
38+
; CHECK-NEXT: mov x9, xzr
39+
; CHECK-NEXT: gcsss1 x0
40+
; CHECK-NEXT: mov x8, xzr
41+
; CHECK-NEXT: gcsss2 x9
42+
; CHECK-NEXT: gcsss1 x0
43+
; CHECK-NEXT: gcsss2 x8
44+
; CHECK-NEXT: mov x0, x8
45+
; CHECK-NEXT: ret
46+
entry:
47+
%0 = call ptr @llvm.aarch64.gcsss(ptr %p)
48+
%1 = call ptr @llvm.aarch64.gcsss(ptr %p)
49+
ret ptr %1
50+
}
51+
52+
declare i64 @llvm.aarch64.chkfeat(i64)
53+
declare i64 @llvm.aarch64.gcspopm(i64)
54+
declare ptr @llvm.aarch64.gcsss(ptr)

0 commit comments

Comments
 (0)