From 377c85908b8e0709c60d378eb3849f7c8bb0eb46 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 13 Feb 2024 16:41:00 +0100 Subject: [PATCH] [DAGCombine] Fix multi-use miscompile in load combine (#81586) The load combine replaces a number of original loads with one new loads and also replaces the output chains of the original loads with the output chain of the new load. This is incorrect if the original load is retained (due to multi-use), as it may get incorrectly reordered. Fix this by using makeEquivalentMemoryOrdering() instead, which will create a TokenFactor with both chains. Fixes https://github.com/llvm/llvm-project/issues/80911. (cherry picked from commit 25b9ed6e4964344e3710359bec4c831e5a8448b9) --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- llvm/test/CodeGen/X86/load-combine.ll | 32 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 98d8a6d9409f2..3135ec73a99e7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9253,7 +9253,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // Transfer chain users from old loads to the new load. for (LoadSDNode *L : Loads) - DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1)); + DAG.makeEquivalentMemoryOrdering(L, NewLoad); if (!NeedsBswap) return NewLoad; diff --git a/llvm/test/CodeGen/X86/load-combine.ll b/llvm/test/CodeGen/X86/load-combine.ll index 7f8115dc1ce38..b5f3e78991881 100644 --- a/llvm/test/CodeGen/X86/load-combine.ll +++ b/llvm/test/CodeGen/X86/load-combine.ll @@ -1282,3 +1282,35 @@ define i32 @zext_load_i32_by_i8_bswap_shl_16(ptr %arg) { %tmp8 = or i32 %tmp7, %tmp30 ret i32 %tmp8 } + +define i32 @pr80911_vector_load_multiuse(ptr %ptr, ptr %clobber) nounwind { +; CHECK-LABEL: pr80911_vector_load_multiuse: +; CHECK: # %bb.0: +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl (%edx), %esi +; CHECK-NEXT: movzwl (%edx), %eax +; CHECK-NEXT: movl $0, (%ecx) +; CHECK-NEXT: movl %esi, (%edx) +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl +; +; CHECK64-LABEL: pr80911_vector_load_multiuse: +; CHECK64: # %bb.0: +; CHECK64-NEXT: movl (%rdi), %ecx +; CHECK64-NEXT: movzwl (%rdi), %eax +; CHECK64-NEXT: movl $0, (%rsi) +; CHECK64-NEXT: movl %ecx, (%rdi) +; CHECK64-NEXT: retq + %load = load <4 x i8>, ptr %ptr, align 16 + store i32 0, ptr %clobber + store <4 x i8> %load, ptr %ptr, align 16 + %e1 = extractelement <4 x i8> %load, i64 1 + %e1.ext = zext i8 %e1 to i32 + %e1.ext.shift = shl nuw nsw i32 %e1.ext, 8 + %e0 = extractelement <4 x i8> %load, i64 0 + %e0.ext = zext i8 %e0 to i32 + %res = or i32 %e1.ext.shift, %e0.ext + ret i32 %res +}