Skip to content

Commit 6e4c580

Browse files
authored
[AArch64] Guard against non-vector abd long nodes. (#102026)
This fixes a problem if abd nodes are generated more readily (#92576). The folding of abd nodes into abdl needs to check that the inputs are the correct form of vector. The added test requires vector legalization to occur in order to hit the combine at the wrong time.
1 parent 37d7b06 commit 6e4c580

File tree

3 files changed

+49
-0
lines changed

3 files changed

+49
-0
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21769,6 +21769,7 @@ static SDValue performExtendCombine(SDNode *N,
2176921769
// helps the backend to decide that an sabdl2 would be useful, saving a real
2177021770
// extract_high operation.
2177121771
if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
21772+
N->getOperand(0).getValueType().is64BitVector() &&
2177221773
(N->getOperand(0).getOpcode() == ISD::ABDU ||
2177321774
N->getOperand(0).getOpcode() == ISD::ABDS)) {
2177421775
SDNode *ABDNode = N->getOperand(0).getNode();

llvm/test/CodeGen/AArch64/abds.ll

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,28 @@ define i32 @abd_sub_i32(i32 %a, i32 %b) nounwind {
571571
ret i32 %abs
572572
}
573573

574+
define i64 @vector_legalized(i16 %a, i16 %b) {
575+
; CHECK-LABEL: vector_legalized:
576+
; CHECK: // %bb.0:
577+
; CHECK-NEXT: movi v0.2d, #0000000000000000
578+
; CHECK-NEXT: sxth w8, w0
579+
; CHECK-NEXT: sub w8, w8, w1, sxth
580+
; CHECK-NEXT: addp d0, v0.2d
581+
; CHECK-NEXT: cmp w8, #0
582+
; CHECK-NEXT: cneg w8, w8, mi
583+
; CHECK-NEXT: fmov x9, d0
584+
; CHECK-NEXT: add x0, x9, x8
585+
; CHECK-NEXT: ret
586+
%ea = sext i16 %a to i32
587+
%eb = sext i16 %b to i32
588+
%s = sub i32 %ea, %eb
589+
%ab = call i32 @llvm.abs.i32(i32 %s, i1 false)
590+
%e = zext i32 %ab to i64
591+
%red = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> zeroinitializer)
592+
%z = add i64 %red, %e
593+
ret i64 %z
594+
}
595+
574596

575597
declare i8 @llvm.abs.i8(i8, i1)
576598
declare i16 @llvm.abs.i16(i16, i1)

llvm/test/CodeGen/AArch64/abdu.ll

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,32 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
409409
ret i128 %sel
410410
}
411411

412+
;
413+
; negative tests
414+
;
415+
416+
define i64 @vector_legalized(i16 %a, i16 %b) {
417+
; CHECK-LABEL: vector_legalized:
418+
; CHECK: // %bb.0:
419+
; CHECK-NEXT: movi v0.2d, #0000000000000000
420+
; CHECK-NEXT: and w8, w0, #0xffff
421+
; CHECK-NEXT: sub w8, w8, w1, uxth
422+
; CHECK-NEXT: cmp w8, #0
423+
; CHECK-NEXT: addp d0, v0.2d
424+
; CHECK-NEXT: cneg w8, w8, mi
425+
; CHECK-NEXT: fmov x9, d0
426+
; CHECK-NEXT: add x0, x9, x8
427+
; CHECK-NEXT: ret
428+
%ea = zext i16 %a to i32
429+
%eb = zext i16 %b to i32
430+
%s = sub i32 %ea, %eb
431+
%ab = call i32 @llvm.abs.i32(i32 %s, i1 false)
432+
%e = zext i32 %ab to i64
433+
%red = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> zeroinitializer)
434+
%z = add i64 %red, %e
435+
ret i64 %z
436+
}
437+
412438
declare i8 @llvm.abs.i8(i8, i1)
413439
declare i16 @llvm.abs.i16(i16, i1)
414440
declare i32 @llvm.abs.i32(i32, i1)

0 commit comments

Comments
 (0)