Skip to content

Conversation

dtcxzyw
Copy link
Member

@dtcxzyw dtcxzyw commented Oct 25, 2024

This patch is inspired by #113686. I found that it removes a lot of unnecessary "and X, 1" in some applications that represent boolean values with int.

@dtcxzyw dtcxzyw requested a review from nikic as a code owner October 25, 2024 16:26
@llvmbot llvmbot added llvm:analysis Includes value tracking, cost tables and constant folding llvm:transforms labels Oct 25, 2024
@llvmbot
Copy link
Member

llvmbot commented Oct 25, 2024

@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-llvm-analysis

Author: Yingwei Zheng (dtcxzyw)

Changes

This patch is inspired by #113686. I found that it removes a lot of unnecessary "and X, 1" in some applications that represent boolean values with int.


Full diff: https://github.com/llvm/llvm-project/pull/113707.diff

2 Files Affected:

  • (modified) llvm/lib/Analysis/ValueTracking.cpp (+7)
  • (modified) llvm/test/Transforms/InstCombine/known-phi-recurse.ll (+115)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index e9ed8b3c862b55..e443488a56b3cb 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1566,6 +1566,13 @@ static void computeKnownBitsFromOperator(const Operator *I,
         // Skip direct self references.
         if (IncValue == P) continue;
 
+        // If the Use is a select of this phi, use the knownbit of the other
+        // operand to break the recursion.
+        Value *V;
+        if (match(IncValue, m_Select(m_Value(), m_Specific(P), m_Value(V))) ||
+            match(IncValue, m_Select(m_Value(), m_Value(V), m_Specific(P))))
+          IncValue = V;
+
         // Change the context instruction to the "edge" that flows into the
         // phi. This is important because that is where the value is actually
         // "evaluated" even though it is used later somewhere else. (see also
diff --git a/llvm/test/Transforms/InstCombine/known-phi-recurse.ll b/llvm/test/Transforms/InstCombine/known-phi-recurse.ll
index c2007d16ae93be..fd3728324b8ea8 100644
--- a/llvm/test/Transforms/InstCombine/known-phi-recurse.ll
+++ b/llvm/test/Transforms/InstCombine/known-phi-recurse.ll
@@ -142,3 +142,118 @@ end:
   ret i32 %res
 }
 
+define i32 @knownbits_phi_select_test1(ptr %p1, ptr %p2, i8 %x) {
+; CHECK-LABEL: @knownbits_phi_select_test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVAR1:%.*]] = phi i8 [ [[LOAD2:%.*]], [[BB2:%.*]] ], [ [[X:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[INDVAR3:%.*]] = phi ptr [ [[INDVAR3_NEXT:%.*]], [[BB2]] ], [ [[P1:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[INDVAR4:%.*]] = phi i32 [ [[INDVAR4_NEXT:%.*]], [[BB2]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    [[INDVAR5:%.*]] = phi i32 [ [[INDVAR5_NEXT:%.*]], [[BB2]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    switch i8 [[INDVAR1]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT:      i8 0, label [[EXIT:%.*]]
+; CHECK-NEXT:      i8 59, label [[BB1:%.*]]
+; CHECK-NEXT:      i8 35, label [[BB1]]
+; CHECK-NEXT:    ]
+; CHECK:       default:
+; CHECK-NEXT:    [[EXT:%.*]] = sext i8 [[INDVAR1]] to i64
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i16, ptr [[P2:%.*]], i64 [[EXT]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i16, ptr [[GEP1]], align 2
+; CHECK-NEXT:    [[MASK:%.*]] = and i16 [[LOAD1]], 8192
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i16 [[MASK]], 0
+; CHECK-NEXT:    br i1 [[CMP1]], label [[BB2]], label [[BB1]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[INDVAR4]], 0
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp ne i32 [[INDVAR5]], 0
+; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[CMP2]], i1 true, i1 [[CMP3]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[BB2]], label [[EXIT]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[CMP4:%.*]] = icmp eq i8 [[INDVAR1]], 39
+; CHECK-NEXT:    [[EXT2:%.*]] = zext i1 [[CMP4]] to i32
+; CHECK-NEXT:    [[INDVAR4_NEXT]] = xor i32 [[INDVAR4]], [[EXT2]]
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp eq i8 [[INDVAR1]], 34
+; CHECK-NEXT:    [[EXT3:%.*]] = zext i1 [[CMP6]] to i32
+; CHECK-NEXT:    [[INDVAR5_NEXT]] = xor i32 [[INDVAR5]], [[EXT3]]
+; CHECK-NEXT:    [[INDVAR3_NEXT]] = getelementptr inbounds i8, ptr [[INDVAR3]], i64 1
+; CHECK-NEXT:    [[LOAD2]] = load i8, ptr [[INDVAR3_NEXT]], align 1
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 [[INDVAR5]]
+;
+entry:
+  br label %loop
+
+loop:
+  %indvar1 = phi i8 [ %load2, %bb2 ], [ %x, %entry ]
+  %indvar2 = phi i64 [ %indvar2.next, %bb2 ], [ 0, %entry ]
+  %indvar3 = phi ptr [ %indvar3.next, %bb2 ], [ %p1, %entry ]
+  %indvar4 = phi i32 [ %indvar4.next, %bb2 ], [ 0, %entry ]
+  %indvar5 = phi i32 [ %indvar5.next, %bb2 ], [ 0, %entry ]
+  switch i8 %indvar1, label %default [
+  i8 0, label %exit
+  i8 59, label %bb1
+  i8 35, label %bb1
+  ]
+
+default:
+  %ext = sext i8 %indvar1 to i64
+  %gep1 = getelementptr inbounds i16, ptr %p2, i64 %ext
+  %load1 = load i16, ptr %gep1, align 2
+  %mask = and i16 %load1, 8192
+  %cmp1 = icmp eq i16 %mask, 0
+  br i1 %cmp1, label %bb2, label %bb1
+
+bb1:
+  %cmp2 = icmp ne i32 %indvar4, 0
+  %cmp3 = icmp ne i32 %indvar5, 0
+  %or.cond = select i1 %cmp2, i1 true, i1 %cmp3
+  br i1 %or.cond, label %bb2, label %exit
+
+bb2:
+  %cmp4 = icmp eq i8 %indvar1, 39
+  %cmp5 = icmp eq i32 %indvar4, 0
+  %ext2 = zext i1 %cmp5 to i32
+  %indvar4.next = select i1 %cmp4, i32 %ext2, i32 %indvar4
+  %cmp6 = icmp eq i8 %indvar1, 34
+  %cmp7 = icmp eq i32 %indvar5, 0
+  %ext3 = zext i1 %cmp7 to i32
+  %indvar5.next = select i1 %cmp6, i32 %ext3, i32 %indvar5
+  %indvar3.next = getelementptr inbounds i8, ptr %indvar3, i64 1
+  %indvar2.next = add i64 %indvar2, 1
+  %load2 = load i8, ptr %indvar3.next, align 1
+  br label %loop
+
+exit:
+  ret i32 %indvar5
+}
+
+define i8 @knownbits_phi_select_test2() {
+; CHECK-LABEL: @knownbits_phi_select_test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[CONTAIN:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[COND0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    [[CONTAIN]] = select i1 [[COND0]], i8 1, i8 [[INDVAR]]
+; CHECK-NEXT:    [[COND1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[COND1]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i8 [[CONTAIN]]
+;
+entry:
+  br label %loop
+
+loop:
+  %indvar = phi i8 [ 0, %entry ], [ %contain, %loop ]
+  %cond0 = call i1 @cond()
+  %contain = select i1 %cond0, i8 1, i8 %indvar
+  %cond1 = call i1 @cond()
+  br i1 %cond1, label %exit, label %loop
+
+exit:
+  %bool = and i8 %contain, 1
+  ret i8 %bool
+}
+
+declare i1 @cond()

@llvmbot
Copy link
Member

llvmbot commented Oct 25, 2024

@llvm/pr-subscribers-llvm-transforms

Author: Yingwei Zheng (dtcxzyw)

Changes

This patch is inspired by #113686. I found that it removes a lot of unnecessary "and X, 1" in some applications that represent boolean values with int.


Full diff: https://github.com/llvm/llvm-project/pull/113707.diff

2 Files Affected:

  • (modified) llvm/lib/Analysis/ValueTracking.cpp (+7)
  • (modified) llvm/test/Transforms/InstCombine/known-phi-recurse.ll (+115)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index e9ed8b3c862b55..e443488a56b3cb 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1566,6 +1566,13 @@ static void computeKnownBitsFromOperator(const Operator *I,
         // Skip direct self references.
         if (IncValue == P) continue;
 
+        // If the Use is a select of this phi, use the knownbit of the other
+        // operand to break the recursion.
+        Value *V;
+        if (match(IncValue, m_Select(m_Value(), m_Specific(P), m_Value(V))) ||
+            match(IncValue, m_Select(m_Value(), m_Value(V), m_Specific(P))))
+          IncValue = V;
+
         // Change the context instruction to the "edge" that flows into the
         // phi. This is important because that is where the value is actually
         // "evaluated" even though it is used later somewhere else. (see also
diff --git a/llvm/test/Transforms/InstCombine/known-phi-recurse.ll b/llvm/test/Transforms/InstCombine/known-phi-recurse.ll
index c2007d16ae93be..fd3728324b8ea8 100644
--- a/llvm/test/Transforms/InstCombine/known-phi-recurse.ll
+++ b/llvm/test/Transforms/InstCombine/known-phi-recurse.ll
@@ -142,3 +142,118 @@ end:
   ret i32 %res
 }
 
+define i32 @knownbits_phi_select_test1(ptr %p1, ptr %p2, i8 %x) {
+; CHECK-LABEL: @knownbits_phi_select_test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVAR1:%.*]] = phi i8 [ [[LOAD2:%.*]], [[BB2:%.*]] ], [ [[X:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[INDVAR3:%.*]] = phi ptr [ [[INDVAR3_NEXT:%.*]], [[BB2]] ], [ [[P1:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[INDVAR4:%.*]] = phi i32 [ [[INDVAR4_NEXT:%.*]], [[BB2]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    [[INDVAR5:%.*]] = phi i32 [ [[INDVAR5_NEXT:%.*]], [[BB2]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    switch i8 [[INDVAR1]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT:      i8 0, label [[EXIT:%.*]]
+; CHECK-NEXT:      i8 59, label [[BB1:%.*]]
+; CHECK-NEXT:      i8 35, label [[BB1]]
+; CHECK-NEXT:    ]
+; CHECK:       default:
+; CHECK-NEXT:    [[EXT:%.*]] = sext i8 [[INDVAR1]] to i64
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds i16, ptr [[P2:%.*]], i64 [[EXT]]
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i16, ptr [[GEP1]], align 2
+; CHECK-NEXT:    [[MASK:%.*]] = and i16 [[LOAD1]], 8192
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i16 [[MASK]], 0
+; CHECK-NEXT:    br i1 [[CMP1]], label [[BB2]], label [[BB1]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[INDVAR4]], 0
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp ne i32 [[INDVAR5]], 0
+; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[CMP2]], i1 true, i1 [[CMP3]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label [[BB2]], label [[EXIT]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[CMP4:%.*]] = icmp eq i8 [[INDVAR1]], 39
+; CHECK-NEXT:    [[EXT2:%.*]] = zext i1 [[CMP4]] to i32
+; CHECK-NEXT:    [[INDVAR4_NEXT]] = xor i32 [[INDVAR4]], [[EXT2]]
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp eq i8 [[INDVAR1]], 34
+; CHECK-NEXT:    [[EXT3:%.*]] = zext i1 [[CMP6]] to i32
+; CHECK-NEXT:    [[INDVAR5_NEXT]] = xor i32 [[INDVAR5]], [[EXT3]]
+; CHECK-NEXT:    [[INDVAR3_NEXT]] = getelementptr inbounds i8, ptr [[INDVAR3]], i64 1
+; CHECK-NEXT:    [[LOAD2]] = load i8, ptr [[INDVAR3_NEXT]], align 1
+; CHECK-NEXT:    br label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 [[INDVAR5]]
+;
+entry:
+  br label %loop
+
+loop:
+  %indvar1 = phi i8 [ %load2, %bb2 ], [ %x, %entry ]
+  %indvar2 = phi i64 [ %indvar2.next, %bb2 ], [ 0, %entry ]
+  %indvar3 = phi ptr [ %indvar3.next, %bb2 ], [ %p1, %entry ]
+  %indvar4 = phi i32 [ %indvar4.next, %bb2 ], [ 0, %entry ]
+  %indvar5 = phi i32 [ %indvar5.next, %bb2 ], [ 0, %entry ]
+  switch i8 %indvar1, label %default [
+  i8 0, label %exit
+  i8 59, label %bb1
+  i8 35, label %bb1
+  ]
+
+default:
+  %ext = sext i8 %indvar1 to i64
+  %gep1 = getelementptr inbounds i16, ptr %p2, i64 %ext
+  %load1 = load i16, ptr %gep1, align 2
+  %mask = and i16 %load1, 8192
+  %cmp1 = icmp eq i16 %mask, 0
+  br i1 %cmp1, label %bb2, label %bb1
+
+bb1:
+  %cmp2 = icmp ne i32 %indvar4, 0
+  %cmp3 = icmp ne i32 %indvar5, 0
+  %or.cond = select i1 %cmp2, i1 true, i1 %cmp3
+  br i1 %or.cond, label %bb2, label %exit
+
+bb2:
+  %cmp4 = icmp eq i8 %indvar1, 39
+  %cmp5 = icmp eq i32 %indvar4, 0
+  %ext2 = zext i1 %cmp5 to i32
+  %indvar4.next = select i1 %cmp4, i32 %ext2, i32 %indvar4
+  %cmp6 = icmp eq i8 %indvar1, 34
+  %cmp7 = icmp eq i32 %indvar5, 0
+  %ext3 = zext i1 %cmp7 to i32
+  %indvar5.next = select i1 %cmp6, i32 %ext3, i32 %indvar5
+  %indvar3.next = getelementptr inbounds i8, ptr %indvar3, i64 1
+  %indvar2.next = add i64 %indvar2, 1
+  %load2 = load i8, ptr %indvar3.next, align 1
+  br label %loop
+
+exit:
+  ret i32 %indvar5
+}
+
+define i8 @knownbits_phi_select_test2() {
+; CHECK-LABEL: @knownbits_phi_select_test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[CONTAIN:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[COND0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    [[CONTAIN]] = select i1 [[COND0]], i8 1, i8 [[INDVAR]]
+; CHECK-NEXT:    [[COND1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[COND1]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i8 [[CONTAIN]]
+;
+entry:
+  br label %loop
+
+loop:
+  %indvar = phi i8 [ 0, %entry ], [ %contain, %loop ]
+  %cond0 = call i1 @cond()
+  %contain = select i1 %cond0, i8 1, i8 %indvar
+  %cond1 = call i1 @cond()
+  br i1 %cond1, label %exit, label %loop
+
+exit:
+  %bool = and i8 %contain, 1
+  ret i8 %bool
+}
+
+declare i1 @cond()

@goldsteinn
Copy link
Contributor

This looks fine although I have two notes.

  1. since we are sure to be breaking the recursion here we can probably do a full depth computeKnownBits below.

  2. it would be nice if we could also use the selects compare context.

@dtcxzyw
Copy link
Member Author

dtcxzyw commented Oct 26, 2024

it would be nice if we could also use the selects compare context.

Looks like it breaks the stage2 build.

@dtcxzyw
Copy link
Member Author

dtcxzyw commented Oct 26, 2024

it would be nice if we could also use the selects compare context.

It doesn't improve anything (with +0.01% compile-time overhead). Should I drop it?

@goldsteinn
Copy link
Contributor

goldsteinn commented Oct 26, 2024

it would be nice if we could also use the selects compare context.

It doesn't improve anything (with +0.01% compile-time overhead). Should I drop it?

If it doesn't improve anything yeah up to you, although IMO leave a comment explaining as much.

Edit: Typically I prefer completeness, although I can't really argue on behalf of the change if there are
no changes from it except a minor compile time disadvantage.

Copy link
Contributor

@goldsteinn goldsteinn left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM like this or if you want to revert to prior version.

@goldsteinn
Copy link
Contributor

Are you going to update to handle #114008.
If not I can post a patch after you merge this.

@dtcxzyw
Copy link
Member Author

dtcxzyw commented Oct 30, 2024

Are you going to update to handle #114008. If not I can post a patch after you merge this.

No, feel free to do this :)

@dtcxzyw dtcxzyw merged commit f1e1055 into llvm:main Nov 2, 2024
8 checks passed
@dtcxzyw dtcxzyw deleted the perf/knownbits-phi-select branch November 2, 2024 07:45
goldsteinn added a commit to goldsteinn/llvm-project that referenced this pull request Nov 3, 2024
goldsteinn added a commit to goldsteinn/llvm-project that referenced this pull request Nov 3, 2024
smallp-o-p pushed a commit to smallp-o-p/llvm-project that referenced this pull request Nov 3, 2024
…3707)

This patch is inspired by
llvm#113686. I found that it
removes a lot of unnecessary "and X, 1" in some applications that
represent boolean values with int.
goldsteinn added a commit to goldsteinn/llvm-project that referenced this pull request Nov 3, 2024
if (SI->getTrueValue() == P || SI->getFalseValue() == P) {
IncValue = SI->getTrueValue() == P ? SI->getFalseValue()
: SI->getTrueValue();
IncDepth = Depth + 1;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Forwarding the question from #114689 (review) because the issue was introduced here: Why do we use the full depth for the select case?

As far as I can tell, a) there may be multiple incoming values in this form, resulting in a fan-out problem and b) we are not actually guaranteed that this breaks recursion (because the other select operand may also be based on the phi indirectly) so the fan-out may be recursive as well.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Answered in #114689 (comment), lets keep the conversion in one place.

NoumanAmir657 pushed a commit to NoumanAmir657/llvm-project that referenced this pull request Nov 4, 2024
…3707)

This patch is inspired by
llvm#113686. I found that it
removes a lot of unnecessary "and X, 1" in some applications that
represent boolean values with int.
nikic added a commit to nikic/llvm-project that referenced this pull request Nov 5, 2024
As discussed on llvm#114689 (review) and following, there is no principled reason why
the phi of select case should have a different recursion limit
than the general case. There may still be fan-out, and there may
still be indirect recursion. Revert that part of llvm#113707.
nikic added a commit that referenced this pull request Nov 7, 2024
As discussed on
#114689 (review)
and following, there is no principled reason why the phi of select case
should have a different recursion limit than the general case. There may
still be fan-out, and there may still be indirect recursion. Revert that
part of #113707.
banach-space pushed a commit to banach-space/llvm-project that referenced this pull request Nov 7, 2024
As discussed on
llvm#114689 (review)
and following, there is no principled reason why the phi of select case
should have a different recursion limit than the general case. There may
still be fan-out, and there may still be indirect recursion. Revert that
part of llvm#113707.
goldsteinn added a commit to goldsteinn/llvm-project that referenced this pull request Jan 11, 2025
goldsteinn added a commit that referenced this pull request Jan 22, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:X86 llvm:analysis Includes value tracking, cost tables and constant folding llvm:transforms
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants