Skip to content

Commit 735d721

Browse files
w2yehiaWael Yehia
andauthored
[PowerPC] Fix handling of undefs in the PPC::isSplatShuffleMask query (#145149)
Currently, the query assumes that a single undef byte implies the rest of the `EltSize - 1` bytes are undefs, but that's not always true. e.g. isSplatShuffleMask( <0,1,2,3,4,5,6,7,undef,undef,undef,undef,0,1,2,3>, 8) should return false. --------- Co-authored-by: Wael Yehia <[email protected]>
1 parent 319a51a commit 735d721

File tree

2 files changed

+22
-4
lines changed

2 files changed

+22
-4
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2242,10 +2242,15 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
22422242
return false;
22432243

22442244
for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2245-
if (N->getMaskElt(i) < 0) continue;
2246-
for (unsigned j = 0; j != EltSize; ++j)
2247-
if (N->getMaskElt(i+j) != N->getMaskElt(j))
2248-
return false;
2245+
// An UNDEF element is a sequence of UNDEF bytes.
2246+
if (N->getMaskElt(i) < 0) {
2247+
for (unsigned j = 1; j != EltSize; ++j)
2248+
if (N->getMaskElt(i + j) >= 0)
2249+
return false;
2250+
} else
2251+
for (unsigned j = 0; j != EltSize; ++j)
2252+
if (N->getMaskElt(i + j) != N->getMaskElt(j))
2253+
return false;
22492254
}
22502255
return true;
22512256
}

llvm/test/CodeGen/PowerPC/pr141642.ll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O0 -debug-only=selectiondag -o - < %s 2>&1 | \
2+
; RUN: FileCheck %s
3+
; CHECK-NOT: lxvdsx
4+
; CHECK-NOT: LD_SPLAT
5+
6+
define weak_odr dso_local void @unpack(ptr noalias noundef %packed_in) local_unnamed_addr {
7+
entry:
8+
%ld = load <2 x i32>, ptr %packed_in, align 2
9+
%shuf = shufflevector <2 x i32> %ld, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 0>
10+
%ie = insertelement <4 x i32> %shuf, i32 7, i32 2
11+
store <4 x i32> %shuf, ptr %packed_in, align 2
12+
ret void
13+
}

0 commit comments

Comments
 (0)