-
Notifications
You must be signed in to change notification settings - Fork 13.4k
release/18.x: [AArch64][GISEL] Consider fcmp true and fcmp false in cond code selection (#86972) #91126
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…tion (llvm#86972) Fixes llvm#86917 `FCMP_TRUE` and `FCMP_FALSE` were previously not considered and we ended up in an llvm_unreachable assertion. (cherry picked from commit c482fad)
@marcauberer What do you think about merging this PR to the release branch? |
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-globalisel Author: None (llvmbot) ChangesBackport c482fad Requested by: @marcauberer Full diff: https://github.com/llvm/llvm-project/pull/91126.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
index 92db89cc0915b8..80fe4bcb8b58f7 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
@@ -147,6 +147,12 @@ void AArch64GISelUtils::changeFCMPPredToAArch64CC(
case CmpInst::FCMP_UNE:
CondCode = AArch64CC::NE;
break;
+ case CmpInst::FCMP_TRUE:
+ CondCode = AArch64CC::AL;
+ break;
+ case CmpInst::FCMP_FALSE:
+ CondCode = AArch64CC::NV;
+ break;
}
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
index 60cddbf794bc7a..ae78d4be0f88af 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
@@ -183,6 +183,14 @@ registers:
- { id: 5, class: gpr }
- { id: 6, class: gpr }
- { id: 7, class: gpr }
+ - { id: 8, class: fpr }
+ - { id: 9, class: gpr }
+ - { id: 10, class: fpr }
+ - { id: 11, class: gpr }
+ - { id: 12, class: gpr }
+ - { id: 13, class: gpr }
+ - { id: 14, class: gpr }
+ - { id: 15, class: gpr }
# CHECK: body:
# CHECK: nofpexcept FCMPSrr %0, %0, implicit-def $nzcv
@@ -209,6 +217,18 @@ body: |
%7(s32) = G_ANYEXT %5
$w0 = COPY %7(s32)
+ %8(s32) = COPY $s0
+ %9(s32) = G_FCMP floatpred(true), %8, %8
+ %12(s8) = G_TRUNC %9(s32)
+ %14(s32) = G_ANYEXT %12
+ $w0 = COPY %14(s32)
+
+ %10(s64) = COPY $d0
+ %11(s32) = G_FCMP floatpred(false), %10, %10
+ %13(s8) = G_TRUNC %11(s32)
+ %15(s32) = G_ANYEXT %13
+ $w0 = COPY %15(s32)
+
...
---
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index 765c81e26e13ca..c4c00f8e97942e 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -2870,6 +2870,107 @@ define <2 x i64> @fcmune2xdouble(<2 x double> %A, <2 x double> %B) {
ret <2 x i64> %tmp4
}
+define <2 x i32> @fcmal2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-SD-LABEL: fcmal2xfloat:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmal2xfloat:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.2s, #1
+; CHECK-GI-NEXT: shl v0.2s, v0.2s, #31
+; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #31
+; CHECK-GI-NEXT: ret
+ %tmp3 = fcmp true <2 x float> %A, %B
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmal4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-SD-LABEL: fcmal4xfloat:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmal4xfloat:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: mov v1.h[1], v0.h[0]
+; CHECK-GI-NEXT: mov v0.h[1], v0.h[0]
+; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT: shl v0.4s, v1.4s, #31
+; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT: ret
+ %tmp3 = fcmp true <4 x float> %A, %B
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmal2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-SD-LABEL: fcmal2xdouble:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmal2xdouble:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI221_0
+; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI221_0]
+; CHECK-GI-NEXT: shl v0.2d, v0.2d, #63
+; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63
+; CHECK-GI-NEXT: ret
+ %tmp3 = fcmp true <2 x double> %A, %B
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+ ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmnv2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmnv2xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: ret
+ %tmp3 = fcmp false <2 x float> %A, %B
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmnv4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-SD-LABEL: fcmnv4xfloat:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmnv4xfloat:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #0 // =0x0
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: mov v1.h[1], v0.h[0]
+; CHECK-GI-NEXT: mov v0.h[1], v0.h[0]
+; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT: shl v0.4s, v1.4s, #31
+; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT: ret
+ %tmp3 = fcmp false <4 x float> %A, %B
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmnv2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmnv2xdouble:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: ret
+ %tmp3 = fcmp false <2 x double> %A, %B
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+ ret <2 x i64> %tmp4
+}
+
define <2 x i32> @fcmoeqz2xfloat(<2 x float> %A) {
; CHECK-LABEL: fcmoeqz2xfloat:
; CHECK: // %bb.0:
|
@arsenm How are lit test failures handled in case of cherry-picks? Seems like GISEL behaves a bit different on |
@marcauberer You can just create manually create a pull request against the release/18.x branch with the fixes. |
@marcauberer Has this fix been submitted in another PR? |
Backport c482fad
Requested by: @marcauberer