Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit dd1897c

Browse files
authoredFeb 14, 2024
[AArch64] Initial Ampere1B scheduling model (#81341)
The Ampere1B core is enabled with a new scheduling/pipeline model, as it provides significant updates over the Ampere1 core; it reduces latencies on many instructions, has some micro-ops reassigned between the XY and X units, and provides modelling for the instructions added since Ampere1 and Ampere1A. As this is the first model implementing the CSSC instructions, we update the UnsupportedFeatures on all other models (that have CompleteModel set). Testcases are added under llvm-mca: these showed the FullFP16 feature missing, so we are adding it in as part of this commit. This *adds tests and additional fixes* compared to the reverted #81338.
1 parent 43c7eb5 commit dd1897c

24 files changed

+8587
-19
lines changed
 

‎llvm/lib/Target/AArch64/AArch64.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -837,6 +837,7 @@ include "AArch64SchedA64FX.td"
837837
include "AArch64SchedThunderX3T110.td"
838838
include "AArch64SchedTSV110.td"
839839
include "AArch64SchedAmpere1.td"
840+
include "AArch64SchedAmpere1B.td"
840841
include "AArch64SchedNeoverseN1.td"
841842
include "AArch64SchedNeoverseN2.td"
842843
include "AArch64SchedNeoverseV1.td"
@@ -1555,7 +1556,7 @@ def ProcessorFeatures {
15551556
FeatureMTE, FeatureSSBS, FeatureRandGen,
15561557
FeatureSB, FeatureSM4, FeatureSHA2,
15571558
FeatureSHA3, FeatureAES, FeatureCSSC,
1558-
FeatureWFxT];
1559+
FeatureWFxT, FeatureFullFP16];
15591560

15601561
// ETE and TRBE are future architecture extensions. We temporarily enable them
15611562
// by default for users targeting generic AArch64. The extensions do not
@@ -1723,7 +1724,7 @@ def : ProcessorModel<"ampere1", Ampere1Model, ProcessorFeatures.Ampere1,
17231724
def : ProcessorModel<"ampere1a", Ampere1Model, ProcessorFeatures.Ampere1A,
17241725
[TuneAmpere1A]>;
17251726

1726-
def : ProcessorModel<"ampere1b", Ampere1Model, ProcessorFeatures.Ampere1B,
1727+
def : ProcessorModel<"ampere1b", Ampere1BModel, ProcessorFeatures.Ampere1B,
17271728
[TuneAmpere1B]>;
17281729

17291730
//===----------------------------------------------------------------------===//

‎llvm/lib/Target/AArch64/AArch64SchedA53.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def CortexA53Model : SchedMachineModel {
2929
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
3030
PAUnsupported.F,
3131
SMEUnsupported.F,
32-
[HasMTE]);
32+
[HasMTE, HasCSSC]);
3333
}
3434

3535

‎llvm/lib/Target/AArch64/AArch64SchedA57.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def CortexA57Model : SchedMachineModel {
3434
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
3535
PAUnsupported.F,
3636
SMEUnsupported.F,
37-
[HasMTE]);
37+
[HasMTE, HasCSSC]);
3838
}
3939

4040
//===----------------------------------------------------------------------===//

‎llvm/lib/Target/AArch64/AArch64SchedA64FX.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ def A64FXModel : SchedMachineModel {
2222

2323
list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, SVEUnsupported.F,
2424
[HasMTE, HasMatMulInt8, HasBF16,
25-
HasPAuth, HasPAuthLR, HasCPA]);
25+
HasPAuth, HasPAuthLR, HasCPA,
26+
HasCSSC]);
2627
let FullInstRWOverlapCheck = 0;
2728
}
2829

‎llvm/lib/Target/AArch64/AArch64SchedAmpere1B.td

Lines changed: 1149 additions & 0 deletions
Large diffs are not rendered by default.

‎llvm/lib/Target/AArch64/AArch64SchedCyclone.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def CycloneModel : SchedMachineModel {
2121
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
2222
PAUnsupported.F,
2323
SMEUnsupported.F,
24-
[HasMTE]);
24+
[HasMTE, HasCSSC]);
2525
}
2626

2727
//===----------------------------------------------------------------------===//

‎llvm/lib/Target/AArch64/AArch64SchedExynosM3.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def ExynosM3Model : SchedMachineModel {
2727
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
2828
PAUnsupported.F,
2929
SMEUnsupported.F,
30-
[HasMTE]);
30+
[HasMTE, HasCSSC]);
3131
}
3232

3333
//===----------------------------------------------------------------------===//

‎llvm/lib/Target/AArch64/AArch64SchedExynosM4.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def ExynosM4Model : SchedMachineModel {
2727
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
2828
PAUnsupported.F,
2929
SMEUnsupported.F,
30-
[HasMTE]);
30+
[HasMTE, HasCSSC]);
3131
}
3232

3333
//===----------------------------------------------------------------------===//

‎llvm/lib/Target/AArch64/AArch64SchedExynosM5.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def ExynosM5Model : SchedMachineModel {
2727
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
2828
PAUnsupported.F,
2929
SMEUnsupported.F,
30-
[HasMTE]);
30+
[HasMTE, HasCSSC]);
3131
}
3232

3333
//===----------------------------------------------------------------------===//

‎llvm/lib/Target/AArch64/AArch64SchedFalkor.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def FalkorModel : SchedMachineModel {
2626
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
2727
PAUnsupported.F,
2828
SMEUnsupported.F,
29-
[HasMTE]);
29+
[HasMTE, HasCSSC]);
3030
// FIXME: Remove when all errors have been fixed.
3131
let FullInstRWOverlapCheck = 0;
3232
}

‎llvm/lib/Target/AArch64/AArch64SchedKryo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def KryoModel : SchedMachineModel {
3030
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
3131
PAUnsupported.F,
3232
SMEUnsupported.F,
33-
[HasMTE]);
33+
[HasMTE, HasCSSC]);
3434
// FIXME: Remove when all errors have been fixed.
3535
let FullInstRWOverlapCheck = 0;
3636
}

‎llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def NeoverseN1Model : SchedMachineModel {
2525
list<Predicate> UnsupportedFeatures = !listconcat(PAUnsupported.F,
2626
SMEUnsupported.F,
2727
SVEUnsupported.F,
28-
[HasMTE]);
28+
[HasMTE, HasCSSC]);
2929
}
3030

3131
//===----------------------------------------------------------------------===//

‎llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def NeoverseN2Model : SchedMachineModel {
1919
let CompleteModel = 1;
2020

2121
list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
22-
[HasSVE2p1, HasPAuthLR, HasCPA]);
22+
[HasSVE2p1, HasPAuthLR, HasCPA, HasCSSC]);
2323
}
2424

2525
//===----------------------------------------------------------------------===//

‎llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ def NeoverseV1Model : SchedMachineModel {
2828

2929
list<Predicate> UnsupportedFeatures = !listconcat(SVE2Unsupported.F,
3030
SMEUnsupported.F,
31-
[HasMTE, HasCPA]);
31+
[HasMTE, HasCPA,
32+
HasCSSC]);
3233
}
3334

3435
//===----------------------------------------------------------------------===//

‎llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ def NeoverseV2Model : SchedMachineModel {
2222
let CompleteModel = 1;
2323

2424
list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
25-
[HasSVE2p1, HasCPA]);
25+
[HasSVE2p1, HasCPA,
26+
HasCSSC]);
2627
}
2728

2829
//===----------------------------------------------------------------------===//

‎llvm/lib/Target/AArch64/AArch64SchedTSV110.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def TSV110Model : SchedMachineModel {
2727
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
2828
PAUnsupported.F,
2929
SMEUnsupported.F,
30-
[HasMTE]);
30+
[HasMTE, HasCSSC]);
3131
}
3232

3333
// Define each kind of processor resource and number available on the TSV110,

‎llvm/lib/Target/AArch64/AArch64SchedThunderX.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def ThunderXT8XModel : SchedMachineModel {
2828
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
2929
PAUnsupported.F,
3030
SMEUnsupported.F,
31-
[HasMTE]);
31+
[HasMTE, HasCSSC]);
3232
// FIXME: Remove when all errors have been fixed.
3333
let FullInstRWOverlapCheck = 0;
3434
}

‎llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def ThunderX2T99Model : SchedMachineModel {
2828
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
2929
PAUnsupported.F,
3030
SMEUnsupported.F,
31-
[HasMTE]);
31+
[HasMTE, HasCSSC]);
3232
// FIXME: Remove when all errors have been fixed.
3333
let FullInstRWOverlapCheck = 0;
3434
}

‎llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def ThunderX3T110Model : SchedMachineModel {
2727
list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
2828
PAUnsupported.F,
2929
SMEUnsupported.F,
30-
[HasMTE]);
30+
[HasMTE, HasCSSC]);
3131
// FIXME: Remove when all errors have been fixed.
3232
let FullInstRWOverlapCheck = 0;
3333
}

‎llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/basic-instructions.s

Lines changed: 3724 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2+
# RUN: llvm-mca -mtriple=aarch64 -mcpu=ampere1b -instruction-tables < %s | FileCheck %s
3+
4+
abs w1, w2
5+
abs x2, x3
6+
cnt w3, w4
7+
cnt x4, x5
8+
ctz w5, w6
9+
ctz x6, x7
10+
smax w7, w8, w9
11+
smax x8, x9, x10
12+
umax w9, w10, w11
13+
umax x10, x11, x12
14+
smin w11, w12, w13
15+
smin w12, w13, w14
16+
umin w13, w14, w15
17+
umin x14, x15, x16
18+
19+
# CHECK: Instruction Info:
20+
# CHECK-NEXT: [1]: #uOps
21+
# CHECK-NEXT: [2]: Latency
22+
# CHECK-NEXT: [3]: RThroughput
23+
# CHECK-NEXT: [4]: MayLoad
24+
# CHECK-NEXT: [5]: MayStore
25+
# CHECK-NEXT: [6]: HasSideEffects (U)
26+
27+
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
28+
# CHECK-NEXT: 1 1 0.25 abs w1, w2
29+
# CHECK-NEXT: 1 1 0.25 abs x2, x3
30+
# CHECK-NEXT: 1 3 1.00 cnt w3, w4
31+
# CHECK-NEXT: 1 3 1.00 cnt x4, x5
32+
# CHECK-NEXT: 1 1 0.50 ctz w5, w6
33+
# CHECK-NEXT: 1 1 0.50 ctz x6, x7
34+
# CHECK-NEXT: 2 1 0.50 smax w7, w8, w9
35+
# CHECK-NEXT: 2 1 0.50 smax x8, x9, x10
36+
# CHECK-NEXT: 2 1 0.50 umax w9, w10, w11
37+
# CHECK-NEXT: 2 1 0.50 umax x10, x11, x12
38+
# CHECK-NEXT: 2 1 0.50 smin w11, w12, w13
39+
# CHECK-NEXT: 2 1 0.50 smin w12, w13, w14
40+
# CHECK-NEXT: 2 1 0.50 umin w13, w14, w15
41+
# CHECK-NEXT: 2 1 0.50 umin x14, x15, x16
42+
43+
# CHECK: Resources:
44+
# CHECK-NEXT: [0.0] - Ampere1BUnitA
45+
# CHECK-NEXT: [0.1] - Ampere1BUnitA
46+
# CHECK-NEXT: [1.0] - Ampere1BUnitB
47+
# CHECK-NEXT: [1.1] - Ampere1BUnitB
48+
# CHECK-NEXT: [2] - Ampere1BUnitBS
49+
# CHECK-NEXT: [3.0] - Ampere1BUnitL
50+
# CHECK-NEXT: [3.1] - Ampere1BUnitL
51+
# CHECK-NEXT: [4.0] - Ampere1BUnitS
52+
# CHECK-NEXT: [4.1] - Ampere1BUnitS
53+
# CHECK-NEXT: [5] - Ampere1BUnitX
54+
# CHECK-NEXT: [6] - Ampere1BUnitY
55+
# CHECK-NEXT: [7] - Ampere1BUnitZ
56+
57+
# CHECK: Resource pressure per iteration:
58+
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4.0] [4.1] [5] [6] [7]
59+
# CHECK-NEXT: 6.50 6.50 3.50 3.50 2.00 - - - - - - -
60+
61+
# CHECK: Resource pressure by instruction:
62+
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4.0] [4.1] [5] [6] [7] Instructions:
63+
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - abs w1, w2
64+
# CHECK-NEXT: 0.25 0.25 0.25 0.25 - - - - - - - - abs x2, x3
65+
# CHECK-NEXT: - - - - 1.00 - - - - - - - cnt w3, w4
66+
# CHECK-NEXT: - - - - 1.00 - - - - - - - cnt x4, x5
67+
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ctz w5, w6
68+
# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - ctz x6, x7
69+
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - smax w7, w8, w9
70+
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - smax x8, x9, x10
71+
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - umax w9, w10, w11
72+
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - umax x10, x11, x12
73+
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - smin w11, w12, w13
74+
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - smin w12, w13, w14
75+
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - umin w13, w14, w15
76+
# CHECK-NEXT: 0.75 0.75 0.25 0.25 - - - - - - - - umin x14, x15, x16

‎llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/mte-instructions.s

Lines changed: 349 additions & 0 deletions
Large diffs are not rendered by default.

‎llvm/test/tools/llvm-mca/AArch64/Ampere/Ampere1B/neon-instructions.s

Lines changed: 3235 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2+
# RUN: llvm-mca -march=aarch64 -mcpu=ampere1b -resource-pressure=false < %s | FileCheck %s
3+
4+
add w0, w1, w2, lsl #0
5+
sub x3, x4, x5, lsl #1
6+
adds x6, x7, x8, lsr #2
7+
subs x9, x10, x11, asr #3
8+
9+
# CHECK: Iterations: 100
10+
# CHECK-NEXT: Instructions: 400
11+
# CHECK-NEXT: Total Cycles: 156
12+
# CHECK-NEXT: Total uOps: 600
13+
14+
# CHECK: Dispatch Width: 12
15+
# CHECK-NEXT: uOps Per Cycle: 3.85
16+
# CHECK-NEXT: IPC: 2.56
17+
# CHECK-NEXT: Block RThroughput: 1.0
18+
19+
# CHECK: Instruction Info:
20+
# CHECK-NEXT: [1]: #uOps
21+
# CHECK-NEXT: [2]: Latency
22+
# CHECK-NEXT: [3]: RThroughput
23+
# CHECK-NEXT: [4]: MayLoad
24+
# CHECK-NEXT: [5]: MayStore
25+
# CHECK-NEXT: [6]: HasSideEffects (U)
26+
27+
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
28+
# CHECK-NEXT: 1 1 0.25 add w0, w1, w2
29+
# CHECK-NEXT: 1 1 0.25 sub x3, x4, x5, lsl #1
30+
# CHECK-NEXT: 2 2 0.50 adds x6, x7, x8, lsr #2
31+
# CHECK-NEXT: 2 2 0.50 subs x9, x10, x11, asr #3

0 commit comments

Comments
 (0)
Please sign in to comment.