-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[AArch64][SME] Tile slices to lazy-save/restore should be RDSVL. #68403
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
sdesmalen-arm
merged 1 commit into
llvm:main
from
sdesmalen-arm:fix-lazy-save-tile-slices
Oct 6, 2023
Merged
[AArch64][SME] Tile slices to lazy-save/restore should be RDSVL. #68403
sdesmalen-arm
merged 1 commit into
llvm:main
from
sdesmalen-arm:fix-lazy-save-tile-slices
Oct 6, 2023
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Instead of RDSVL * RDSVL.
@llvm/pr-subscribers-backend-aarch64 ChangesInstead of RDSVL * RDSVL. Full diff: https://github.com/llvm/llvm-project/pull/68403.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3ae7a893ca4e9e3..e667d0cca19f795 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7367,10 +7367,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SDValue NumZaSaveSlices;
if (!CalleeAttrs.preservesZA()) {
// Set up a lazy save mechanism by storing the runtime live slices
- // (worst-case SVL*SVL) to the TPIDR2 stack object.
- SDValue SVL = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
- DAG.getConstant(1, DL, MVT::i32));
- NumZaSaveSlices = DAG.getNode(ISD::MUL, DL, MVT::i64, SVL, SVL);
+ // (worst-case SVL) to the TPIDR2 stack object.
+ NumZaSaveSlices = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
+ DAG.getConstant(1, DL, MVT::i32));
} else if (CalleeAttrs.preservesZA()) {
NumZaSaveSlices = DAG.getConstant(0, DL, MVT::i64);
}
diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
index 98a8769afea8513..a831cee09619c83 100644
--- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
+++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
@@ -251,9 +251,8 @@ define double @za_shared_caller_to_za_none_callee(double %x) nounwind noinline
; CHECK-COMMON-NEXT: mov x29, sp
; CHECK-COMMON-NEXT: sub sp, sp, #16
; CHECK-COMMON-NEXT: rdsvl x8, #1
-; CHECK-COMMON-NEXT: mul x8, x8, x8
; CHECK-COMMON-NEXT: mov x9, sp
-; CHECK-COMMON-NEXT: subs x9, x9, x8
+; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
; CHECK-COMMON-NEXT: mov sp, x9
; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
@@ -291,8 +290,7 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_pstate_za_shared" nounwi
; CHECK-COMMON-NEXT: sub sp, sp, #16
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: mov x9, sp
-; CHECK-COMMON-NEXT: mul x8, x8, x8
-; CHECK-COMMON-NEXT: sub x9, x9, x8
+; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
; CHECK-COMMON-NEXT: mov sp, x9
; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
; CHECK-COMMON-NEXT: sub x9, x29, #16
@@ -352,8 +350,7 @@ define double @frem_call_za(double %a, double %b) "aarch64_pstate_za_shared" nou
; CHECK-COMMON-NEXT: sub sp, sp, #16
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: mov x9, sp
-; CHECK-COMMON-NEXT: mul x8, x8, x8
-; CHECK-COMMON-NEXT: sub x9, x9, x8
+; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
; CHECK-COMMON-NEXT: mov sp, x9
; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
; CHECK-COMMON-NEXT: sub x9, x29, #16
diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
index ad16402a18f8b92..7944c7f94c7018b 100644
--- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
@@ -14,8 +14,7 @@ define void @test_lazy_save_1_callee() nounwind "aarch64_pstate_za_shared" {
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: mul x8, x8, x8
-; CHECK-NEXT: sub x9, x9, x8
+; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: stur x9, [x29, #-16]
; CHECK-NEXT: sub x9, x29, #16
@@ -45,10 +44,9 @@ define void @test_lazy_save_2_callees() nounwind "aarch64_pstate_za_shared" {
; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: rdsvl x8, #1
-; CHECK-NEXT: mul x19, x8, x8
+; CHECK-NEXT: rdsvl x19, #1
; CHECK-NEXT: mov x8, sp
-; CHECK-NEXT: sub x8, x8, x19
+; CHECK-NEXT: msub x8, x19, x19, x8
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: sub x20, x29, #16
; CHECK-NEXT: stur x8, [x29, #-16]
@@ -92,8 +90,7 @@ define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_psta
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: mul x8, x8, x8
-; CHECK-NEXT: sub x9, x9, x8
+; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: stur x9, [x29, #-16]
; CHECK-NEXT: sub x9, x29, #16
@@ -129,8 +126,7 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_pstate_z
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: mul x8, x8, x8
-; CHECK-NEXT: sub x9, x9, x8
+; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: stur x9, [x29, #-80]
; CHECK-NEXT: sub x9, x29, #80
diff --git a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
index de7df1c9831908f..0ac2b21c6aba360 100644
--- a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
@@ -12,8 +12,7 @@ define void @disable_tailcallopt() "aarch64_pstate_za_shared" nounwind {
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: mul x8, x8, x8
-; CHECK-NEXT: sub x9, x9, x8
+; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: stur x9, [x29, #-16]
; CHECK-NEXT: sub x9, x29, #16
@@ -44,8 +43,7 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_pstate_za_shared" nounwi
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: mul x8, x8, x8
-; CHECK-NEXT: sub x9, x9, x8
+; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: stur x9, [x29, #-16]
; CHECK-NEXT: sub x9, x29, #16
|
Is SVL x SVL not the correct size for ZA? |
That's indeed the correct size of ZA, but the number of slices to save/restore is simply SVL (because each slice itself is SVL bytes wide). |
aemerson
approved these changes
Oct 6, 2023
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Instead of RDSVL * RDSVL.