-
Notifications
You must be signed in to change notification settings - Fork 13.7k
[SDAG] Intersect poison-generating flags after CSE #97434
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-selectiondag Author: Yingwei Zheng (dtcxzyw) ChangesThis patch fixes a miscompilation when
An alternative is to block CSE by taking SDFlags into account in Fixes #96366. Full diff: https://github.com/llvm/llvm-project/pull/97434.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index bc16f885f6a04..96242305e9eab 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1239,6 +1239,7 @@ SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) {
// If there was already an existing matching node, use ReplaceAllUsesWith
// to replace the dead one with the existing one. This can cause
// recursive merging of other unrelated nodes down the line.
+ Existing->intersectFlagsWith(N->getFlags());
ReplaceAllUsesWith(N, Existing);
// N is now dead. Inform the listeners and delete it.
diff --git a/llvm/test/CodeGen/AArch64/pr96366.ll b/llvm/test/CodeGen/AArch64/pr96366.ll
new file mode 100644
index 0000000000000..392b7b66b0bb1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr96366.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+
+declare void @mumble(i32)
+
+define i32 @f(i32 %0) nounwind {
+; CHECK-LABEL: f:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: mov w19, w0
+; CHECK-NEXT: neg w0, w0
+; CHECK-NEXT: bl mumble
+; CHECK-NEXT: mov w8, #4 // =0x4
+; CHECK-NEXT: sub w0, w8, w19
+; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ %2 = sub nuw i32 0, %0
+ call void @mumble(i32 %2)
+ %3 = sub i32 1, %0
+ %4 = sub i32 3, %0
+ %5 = mul i32 %0, 1
+ %6 = add i32 %3, %5
+ %7 = add i32 %6, %4
+ ret i32 %7
+}
diff --git a/llvm/test/CodeGen/RISCV/pr96366.ll b/llvm/test/CodeGen/RISCV/pr96366.ll
new file mode 100644
index 0000000000000..05563d7bc14af
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/pr96366.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv64 | FileCheck %s
+
+declare void @mumble(i32)
+
+define i32 @f(i32 %0) nounwind {
+; CHECK-LABEL: f:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: mv s0, a0
+; CHECK-NEXT: negw a0, a0
+; CHECK-NEXT: call mumble
+; CHECK-NEXT: li a0, 4
+; CHECK-NEXT: subw a0, a0, s0
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %2 = sub nuw i32 0, %0
+ call void @mumble(i32 %2)
+ %3 = sub i32 1, %0
+ %4 = sub i32 3, %0
+ %5 = mul i32 %0, 1
+ %6 = add i32 %3, %5
+ %7 = add i32 %6, %4
+ ret i32 %7
+}
|
|
This patch fixes a miscompilation when `N` gets CSEed to `Existing`: ``` Existing: t5: i32 = sub nuw Constant:i32<0>, t3 N: t30: i32 = sub Constant:i32<0>, t3 ``` Fixes llvm#96366.
This patch fixes a miscompilation when `N` gets CSEed to `Existing`: ``` Existing: t5: i32 = sub nuw Constant:i32<0>, t3 N: t30: i32 = sub Constant:i32<0>, t3 ``` Fixes llvm#96366.
This patch fixes a miscompilation when
N
gets CSEed toExisting
:An alternative is to block CSE by taking SDFlags into account in
AddNodeIDNode
. It will enable more folds (e.g.,sub nuw 0, X -> 0
in this case).Fixes #96366.