From 9f70feba847f91a9aea74525979b0bf80c246067 Mon Sep 17 00:00:00 2001 From: Matthew Devereau Date: Thu, 13 Feb 2025 12:47:32 +0000 Subject: [PATCH 1/3] [InstCombine] Fold cttz with power of 2 operands (#121386) Introduced cttz intrinsics which caused a regression where vscale/vscale divisions could no longer be constant folded. This fold was suggested as a fix in (#126411) --- .../InstCombine/InstCombineShifts.cpp | 16 +++++++++ .../Transforms/InstCombine/shift-cttz-ctlz.ll | 34 +++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 7ef95800975db..ac0f9b005f317 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -1613,6 +1613,22 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { if (Instruction *Overflow = foldLShrOverflowBit(I)) return Overflow; + // Transform ((pow2 << x) >> cttz(pow2 << y)) -> ((1 << x) >> y) + Value *Shl0_Op0, *Shl0_Op1, *Shl1_Op0, *Shl1_Op1; + BinaryOperator *Shl1; + if (match(Op0, m_Shl(m_Value(Shl0_Op0), m_Value(Shl0_Op1))) && + match(Op1, m_Intrinsic(m_BinOp(Shl1))) && + match(Shl1, m_Shl(m_Value(Shl1_Op0), m_Value(Shl1_Op1))) && + isKnownToBeAPowerOfTwo(Shl1, false, 0, SQ.getWithInstruction(&I).CxtI) && + Shl0_Op0 == Shl1_Op0) { + auto *Shl0 = cast(Op0); + if ((Shl0->hasNoUnsignedWrap() && Shl1->hasNoUnsignedWrap()) || + (Shl0->hasNoSignedWrap() && Shl1->hasNoSignedWrap())) { + Value *NewShl = + Builder.CreateShl(ConstantInt::get(Shl1->getType(), 1), Shl0_Op1); + return BinaryOperator::CreateLShr(NewShl, Shl1_Op1); + } + } return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll b/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll index 63caec9501325..6269f29c880e3 100644 --- a/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll +++ b/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll @@ -103,4 +103,38 @@ entry: ret i32 %res } +define i64 @fold_cttz_64() vscale_range(1,16) { +; CHECK-LABEL: define i64 @fold_cttz_64( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i64 4 +; +entry: + %0 = tail call i64 @llvm.vscale.i64() + %1 = shl nuw nsw i64 %0, 4 + %2 = shl nuw nsw i64 %0, 2 + %3 = tail call range(i64 2, 65) i64 @llvm.cttz.i64(i64 %2, i1 true) + %div1 = lshr i64 %1, %3 + ret i64 %div1 +} + +define i32 @fold_cttz_32() vscale_range(1,16) { +; CHECK-LABEL: define i32 @fold_cttz_32( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 4 +; +entry: + %0 = tail call i32 @llvm.vscale.i32() + %1 = shl nuw nsw i32 %0, 4 + %2 = shl nuw nsw i32 %0, 2 + %3 = tail call range(i32 2, 65) i32 @llvm.cttz.i32(i32 %2, i1 true) + %div1 = lshr i32 %1, %3 + ret i32 %div1 +} + +declare i64 @llvm.vscale.i64() +declare i64 @llvm.cttz.i64(i64, i1 immarg) +declare i32 @llvm.vscale.i32() +declare i32 @llvm.cttz.i32(i32, i1 immarg) declare void @use(i32) From c26fff81334e978a5def0b53d37555da90af459c Mon Sep 17 00:00:00 2001 From: Matthew Devereau Date: Thu, 13 Feb 2025 16:43:18 +0000 Subject: [PATCH 2/3] Add fold for zero Propagate nowrap flags to new shift Use named values in tests Remove intrinsic declarations --- .../InstCombine/InstCombineShifts.cpp | 17 ++++++------- .../Transforms/InstCombine/shift-cttz-ctlz.ll | 24 ++++++++----------- 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index ac0f9b005f317..6c3efb41c6a17 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -1614,18 +1614,19 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { return Overflow; // Transform ((pow2 << x) >> cttz(pow2 << y)) -> ((1 << x) >> y) - Value *Shl0_Op0, *Shl0_Op1, *Shl1_Op0, *Shl1_Op1; + Value *Shl0_Op0, *Shl0_Op1, *Shl1_Op1; BinaryOperator *Shl1; if (match(Op0, m_Shl(m_Value(Shl0_Op0), m_Value(Shl0_Op1))) && match(Op1, m_Intrinsic(m_BinOp(Shl1))) && - match(Shl1, m_Shl(m_Value(Shl1_Op0), m_Value(Shl1_Op1))) && - isKnownToBeAPowerOfTwo(Shl1, false, 0, SQ.getWithInstruction(&I).CxtI) && - Shl0_Op0 == Shl1_Op0) { + match(Shl1, m_Shl(m_Specific(Shl0_Op0), m_Value(Shl1_Op1))) && + isKnownToBeAPowerOfTwo(Shl0_Op0, /*OrZero=*/true, 0, &I)) { auto *Shl0 = cast(Op0); - if ((Shl0->hasNoUnsignedWrap() && Shl1->hasNoUnsignedWrap()) || - (Shl0->hasNoSignedWrap() && Shl1->hasNoSignedWrap())) { - Value *NewShl = - Builder.CreateShl(ConstantInt::get(Shl1->getType(), 1), Shl0_Op1); + bool HasNUW = Shl0->hasNoUnsignedWrap() && Shl1->hasNoUnsignedWrap(); + bool HasNSW = Shl0->hasNoSignedWrap() && Shl1->hasNoSignedWrap(); + if (HasNUW || HasNSW) { + Value *NewShl = Builder.CreateShl(ConstantInt::get(Shl1->getType(), 1), + Shl0_Op1, "", HasNUW, HasNSW); + Builder.CreateShl(ConstantInt::get(Shl1->getType(), 1), Shl0_Op1); return BinaryOperator::CreateLShr(NewShl, Shl1_Op1); } } diff --git a/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll b/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll index 6269f29c880e3..e82e33e9d7f04 100644 --- a/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll +++ b/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll @@ -110,11 +110,11 @@ define i64 @fold_cttz_64() vscale_range(1,16) { ; CHECK-NEXT: ret i64 4 ; entry: - %0 = tail call i64 @llvm.vscale.i64() - %1 = shl nuw nsw i64 %0, 4 - %2 = shl nuw nsw i64 %0, 2 - %3 = tail call range(i64 2, 65) i64 @llvm.cttz.i64(i64 %2, i1 true) - %div1 = lshr i64 %1, %3 + %vscale = tail call i64 @llvm.vscale.i64() + %shl0 = shl nuw nsw i64 %vscale, 4 + %shl1 = shl nuw nsw i64 %vscale, 2 + %cttz = tail call range(i64 2, 65) i64 @llvm.cttz.i64(i64 %shl1, i1 true) + %div1 = lshr i64 %shl0, %cttz ret i64 %div1 } @@ -125,16 +125,12 @@ define i32 @fold_cttz_32() vscale_range(1,16) { ; CHECK-NEXT: ret i32 4 ; entry: - %0 = tail call i32 @llvm.vscale.i32() - %1 = shl nuw nsw i32 %0, 4 - %2 = shl nuw nsw i32 %0, 2 - %3 = tail call range(i32 2, 65) i32 @llvm.cttz.i32(i32 %2, i1 true) - %div1 = lshr i32 %1, %3 + %vscale = tail call i32 @llvm.vscale.i32() + %shl0 = shl nuw nsw i32 %vscale, 4 + %shl1 = shl nuw nsw i32 %vscale, 2 + %cttz = tail call range(i32 2, 65) i32 @llvm.cttz.i32(i32 %shl1, i1 true) + %div1 = lshr i32 %shl0, %cttz ret i32 %div1 } -declare i64 @llvm.vscale.i64() -declare i64 @llvm.cttz.i64(i64, i1 immarg) -declare i32 @llvm.vscale.i32() -declare i32 @llvm.cttz.i32(i32, i1 immarg) declare void @use(i32) From 4c2004ae5f2cb0b3900bbd91be0d642012223c3d Mon Sep 17 00:00:00 2001 From: Matthew Devereau Date: Mon, 17 Feb 2025 10:39:48 +0000 Subject: [PATCH 3/3] Remove unnecessary line --- llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 6c3efb41c6a17..90cd279e8a457 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -1626,7 +1626,6 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { if (HasNUW || HasNSW) { Value *NewShl = Builder.CreateShl(ConstantInt::get(Shl1->getType(), 1), Shl0_Op1, "", HasNUW, HasNSW); - Builder.CreateShl(ConstantInt::get(Shl1->getType(), 1), Shl0_Op1); return BinaryOperator::CreateLShr(NewShl, Shl1_Op1); } }