-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[AArch64][GISEL] Consider fcmp true and fcmp false in cond code selection #86972
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64][GISEL] Consider fcmp true and fcmp false in cond code selection #86972
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Marc Auberer (marcauberer) ChangesFixes #86917
Full diff: https://github.com/llvm/llvm-project/pull/86972.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
index 92db89cc0915b8..80fe4bcb8b58f7 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
@@ -147,6 +147,12 @@ void AArch64GISelUtils::changeFCMPPredToAArch64CC(
case CmpInst::FCMP_UNE:
CondCode = AArch64CC::NE;
break;
+ case CmpInst::FCMP_TRUE:
+ CondCode = AArch64CC::AL;
+ break;
+ case CmpInst::FCMP_FALSE:
+ CondCode = AArch64CC::NV;
+ break;
}
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
index bcdd77ac57570d..e207a31063bacf 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
@@ -183,6 +183,14 @@ registers:
- { id: 5, class: gpr }
- { id: 6, class: gpr }
- { id: 7, class: gpr }
+ - { id: 8, class: fpr }
+ - { id: 9, class: gpr }
+ - { id: 10, class: fpr }
+ - { id: 11, class: gpr }
+ - { id: 12, class: gpr }
+ - { id: 13, class: gpr }
+ - { id: 14, class: gpr }
+ - { id: 15, class: gpr }
# CHECK: body:
# CHECK: nofpexcept FCMPSrr %0, %0, implicit-def $nzcv
@@ -209,6 +217,18 @@ body: |
%7(s32) = G_ANYEXT %5
$w0 = COPY %7(s32)
+ %8(s32) = COPY $s0
+ %9(s32) = G_FCMP floatpred(true), %8, %8
+ %12(s8) = G_TRUNC %9(s32)
+ %14(s32) = G_ANYEXT %12
+ $w0 = COPY %14(s32)
+
+ %10(s64) = COPY $d0
+ %11(s32) = G_FCMP floatpred(false), %10, %10
+ %13(s8) = G_TRUNC %11(s32)
+ %15(s32) = G_ANYEXT %13
+ $w0 = COPY %15(s32)
+
...
---
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index 632b6b32625022..dbb5dfebd44abc 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -2870,6 +2870,107 @@ define <2 x i64> @fcmune2xdouble(<2 x double> %A, <2 x double> %B) {
ret <2 x i64> %tmp4
}
+define <2 x i32> @fcmal2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-SD-LABEL: fcmal2xfloat:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmal2xfloat:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.2s, #1
+; CHECK-GI-NEXT: shl v0.2s, v0.2s, #31
+; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #31
+; CHECK-GI-NEXT: ret
+ %tmp3 = fcmp true <2 x float> %A, %B
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmal4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-SD-LABEL: fcmal4xfloat:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmal4xfloat:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: mov v1.h[1], v0.h[0]
+; CHECK-GI-NEXT: mov v0.h[1], v0.h[0]
+; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT: shl v0.4s, v1.4s, #31
+; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT: ret
+ %tmp3 = fcmp true <4 x float> %A, %B
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmal2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-SD-LABEL: fcmal2xdouble:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmal2xdouble:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI221_0
+; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI221_0]
+; CHECK-GI-NEXT: shl v0.2d, v0.2d, #63
+; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63
+; CHECK-GI-NEXT: ret
+ %tmp3 = fcmp true <2 x double> %A, %B
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+ ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmnv2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmnv2xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: ret
+ %tmp3 = fcmp false <2 x float> %A, %B
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmnv4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-SD-LABEL: fcmnv4xfloat:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmnv4xfloat:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #0 // =0x0
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: mov v1.h[1], v0.h[0]
+; CHECK-GI-NEXT: mov v0.h[1], v0.h[0]
+; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT: shl v0.4s, v1.4s, #31
+; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT: ret
+ %tmp3 = fcmp false <4 x float> %A, %B
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmnv2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmnv2xdouble:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: ret
+ %tmp3 = fcmp false <2 x double> %A, %B
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+ ret <2 x i64> %tmp4
+}
+
define <2 x i32> @fcmoeqz2xfloat(<2 x float> %A) {
; CHECK-LABEL: fcmoeqz2xfloat:
; CHECK: // %bb.0:
|
@llvm/pr-subscribers-llvm-globalisel Author: Marc Auberer (marcauberer) ChangesFixes #86917
Full diff: https://github.com/llvm/llvm-project/pull/86972.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
index 92db89cc0915b8..80fe4bcb8b58f7 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
@@ -147,6 +147,12 @@ void AArch64GISelUtils::changeFCMPPredToAArch64CC(
case CmpInst::FCMP_UNE:
CondCode = AArch64CC::NE;
break;
+ case CmpInst::FCMP_TRUE:
+ CondCode = AArch64CC::AL;
+ break;
+ case CmpInst::FCMP_FALSE:
+ CondCode = AArch64CC::NV;
+ break;
}
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
index bcdd77ac57570d..e207a31063bacf 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
@@ -183,6 +183,14 @@ registers:
- { id: 5, class: gpr }
- { id: 6, class: gpr }
- { id: 7, class: gpr }
+ - { id: 8, class: fpr }
+ - { id: 9, class: gpr }
+ - { id: 10, class: fpr }
+ - { id: 11, class: gpr }
+ - { id: 12, class: gpr }
+ - { id: 13, class: gpr }
+ - { id: 14, class: gpr }
+ - { id: 15, class: gpr }
# CHECK: body:
# CHECK: nofpexcept FCMPSrr %0, %0, implicit-def $nzcv
@@ -209,6 +217,18 @@ body: |
%7(s32) = G_ANYEXT %5
$w0 = COPY %7(s32)
+ %8(s32) = COPY $s0
+ %9(s32) = G_FCMP floatpred(true), %8, %8
+ %12(s8) = G_TRUNC %9(s32)
+ %14(s32) = G_ANYEXT %12
+ $w0 = COPY %14(s32)
+
+ %10(s64) = COPY $d0
+ %11(s32) = G_FCMP floatpred(false), %10, %10
+ %13(s8) = G_TRUNC %11(s32)
+ %15(s32) = G_ANYEXT %13
+ $w0 = COPY %15(s32)
+
...
---
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index 632b6b32625022..dbb5dfebd44abc 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -2870,6 +2870,107 @@ define <2 x i64> @fcmune2xdouble(<2 x double> %A, <2 x double> %B) {
ret <2 x i64> %tmp4
}
+define <2 x i32> @fcmal2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-SD-LABEL: fcmal2xfloat:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmal2xfloat:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v0.2s, #1
+; CHECK-GI-NEXT: shl v0.2s, v0.2s, #31
+; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #31
+; CHECK-GI-NEXT: ret
+ %tmp3 = fcmp true <2 x float> %A, %B
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmal4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-SD-LABEL: fcmal4xfloat:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmal4xfloat:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: mov v1.h[1], v0.h[0]
+; CHECK-GI-NEXT: mov v0.h[1], v0.h[0]
+; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT: shl v0.4s, v1.4s, #31
+; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT: ret
+ %tmp3 = fcmp true <4 x float> %A, %B
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmal2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-SD-LABEL: fcmal2xdouble:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmal2xdouble:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI221_0
+; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI221_0]
+; CHECK-GI-NEXT: shl v0.2d, v0.2d, #63
+; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #63
+; CHECK-GI-NEXT: ret
+ %tmp3 = fcmp true <2 x double> %A, %B
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+ ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmnv2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmnv2xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: ret
+ %tmp3 = fcmp false <2 x float> %A, %B
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+ ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmnv4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-SD-LABEL: fcmnv4xfloat:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fcmnv4xfloat:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #0 // =0x0
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: mov v1.h[1], v0.h[0]
+; CHECK-GI-NEXT: mov v0.h[1], v0.h[0]
+; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT: shl v0.4s, v1.4s, #31
+; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT: ret
+ %tmp3 = fcmp false <4 x float> %A, %B
+ %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmnv2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmnv2xdouble:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: ret
+ %tmp3 = fcmp false <2 x double> %A, %B
+ %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+ ret <2 x i64> %tmp4
+}
+
define <2 x i32> @fcmoeqz2xfloat(<2 x float> %A) {
; CHECK-LABEL: fcmoeqz2xfloat:
; CHECK: // %bb.0:
|
@arsenm How is the procedure for such crash fixes. I am unsure, because this is my first one. Do we need to downport this to release/18.x? |
/cherry-pick c482fad |
Error: Command failed due to missing milestone. |
/cherry-pick c482fad |
…tion (llvm#86972) Fixes llvm#86917 `FCMP_TRUE` and `FCMP_FALSE` were previously not considered and we ended up in an llvm_unreachable assertion. (cherry picked from commit c482fad)
/pull-request #91126 |
…tion (llvm#86972) Fixes llvm#86917 `FCMP_TRUE` and `FCMP_FALSE` were previously not considered and we ended up in an llvm_unreachable assertion.
…tion (llvm#86972) Fixes llvm#86917 `FCMP_TRUE` and `FCMP_FALSE` were previously not considered and we ended up in an llvm_unreachable assertion.
Fixes #86917
FCMP_TRUE
andFCMP_FALSE
were previously not considered and we ended up in an llvm_unreachable assertion.