[AMDGPU] Emit S_CBRANCH_SCC for floating-point conditions. #120588

kmitropoulou · 2024-12-19T15:06:32Z

[AMDGPU] Add new test.
[AMDGPU] Emit S_CBRANCH_SCC for floating-point conditions.

llvmbot · 2024-12-19T15:07:13Z

@llvm/pr-subscribers-backend-amdgpu

Author: Konstantina Mitropoulou (kmitropoulou)

Changes

[AMDGPU] Add new test.
[AMDGPU] Emit S_CBRANCH_SCC for floating-point conditions.

Full diff: https://github.com/llvm/llvm-project/pull/120588.diff

3 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (+4-2)
(modified) llvm/test/CodeGen/AMDGPU/branch-relaxation.ll (+1-4)
(added) llvm/test/CodeGen/AMDGPU/uniform_branch_with_floating_point_cond.ll (+97)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index c0e01a020e0eb9..97d21fb80d3dac 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2389,13 +2389,15 @@ bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
   if (VT == MVT::i32)
     return true;
 
+  const auto *ST = static_cast<const GCNSubtarget *>(Subtarget);
   if (VT == MVT::i64) {
-    const auto *ST = static_cast<const GCNSubtarget *>(Subtarget);
-
     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
     return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
   }
 
+  if ((VT == MVT::f32 || VT == MVT::f64) && ST->hasSALUFloatInsts())
+    return true;
+
   return false;
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
index 1d984bd49756e0..ff47c865c67e65 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -297,10 +297,7 @@ define amdgpu_kernel void @uniform_conditional_min_long_forward_vcnd_branch(ptr
 ; GFX12-NEXT:    s_load_b32 s0, s[4:5], 0x2c
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-NEXT:    s_cmp_eq_f32 s0, 0
-; GFX12-NEXT:    s_cselect_b32 s1, -1, 0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_and_b32 vcc_lo, exec_lo, s1
-; GFX12-NEXT:    s_cbranch_vccz .LBB2_1
+; GFX12-NEXT:    s_cbranch_scc0 .LBB2_1
 ; GFX12-NEXT:  ; %bb.3: ; %bb0
 ; GFX12-NEXT:    s_getpc_b64 s[2:3]
 ; GFX12-NEXT:  .Lpost_getpc2:
diff --git a/llvm/test/CodeGen/AMDGPU/uniform_branch_with_floating_point_cond.ll b/llvm/test/CodeGen/AMDGPU/uniform_branch_with_floating_point_cond.ll
new file mode 100644
index 00000000000000..4cf1c2af55b7e9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/uniform_branch_with_floating_point_cond.ll
@@ -0,0 +1,97 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs  -stop-after=amdgpu-isel < %s 2>&1 | FileCheck %s
+
+@external_constant = external addrspace(4) constant i32, align 4
+@const.ptr = external addrspace(4) constant ptr, align 4
+
+define void @test() {
+  ; CHECK-LABEL: name: test
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x30000000), %bb.3(0x50000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant, target-flags(amdgpu-gotprel32-hi) @external_constant, implicit-def dead $scc
+  ; CHECK-NEXT:   [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
+  ; CHECK-NEXT:   [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed [[S_LOAD_DWORDX2_IMM]], 0, 0 :: (dereferenceable invariant load (s32) from @external_constant, addrspace 4)
+  ; CHECK-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+  ; CHECK-NEXT:   nofpexcept S_CMP_LG_F32 killed [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_]], implicit-def $scc, implicit $mode
+  ; CHECK-NEXT:   S_CBRANCH_SCC1 %bb.3, implicit $scc
+  ; CHECK-NEXT:   S_BRANCH %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1.bb1:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.4(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[SI_PC_ADD_REL_OFFSET1:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @const.ptr, target-flags(amdgpu-gotprel32-hi) @const.ptr, implicit-def dead $scc
+  ; CHECK-NEXT:   [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET1]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
+  ; CHECK-NEXT:   [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM killed [[S_LOAD_DWORDX2_IMM1]], 0, 0 :: (invariant load (s64) from @const.ptr, addrspace 4)
+  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR killed [[S_LOAD_DWORDX2_IMM2]], killed [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32) from %ir.0, addrspace 1)
+  ; CHECK-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1092616192
+  ; CHECK-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sreg_32 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
+  ; CHECK-NEXT:   nofpexcept S_CMP_LT_F32 killed [[COPY]], killed [[S_MOV_B32_2]], implicit-def $scc, implicit $mode
+  ; CHECK-NEXT:   S_CBRANCH_SCC1 %bb.4, implicit $scc
+  ; CHECK-NEXT:   S_BRANCH %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.bb2:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+  ; CHECK-NEXT:   S_BRANCH %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.Flow1:
+  ; CHECK-NEXT:   successors: %bb.7(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_BRANCH %bb.7
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4.bb3:
+  ; CHECK-NEXT:   successors: %bb.5(0x50000000), %bb.6(0x30000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:sgpr_32 = PHI [[S_MOV_B32_1]], %bb.1, [[S_MOV_B32_3]], %bb.2
+  ; CHECK-NEXT:   [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
+  ; CHECK-NEXT:   nofpexcept S_CMP_NEQ_F32 [[PHI]], killed [[S_MOV_B32_4]], implicit-def $scc, implicit $mode
+  ; CHECK-NEXT:   S_CBRANCH_SCC1 %bb.6, implicit $scc
+  ; CHECK-NEXT:   S_BRANCH %bb.5
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.5.bb4:
+  ; CHECK-NEXT:   successors: %bb.6(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1082130432, implicit $exec
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:sreg_32_xexec_hi = IMPLICIT_DEF
+  ; CHECK-NEXT:   SCRATCH_STORE_DWORD_SADDR killed [[V_MOV_B32_e32_1]], killed [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(5) undef`, addrspace 5)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.6.Flow:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_BRANCH %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.7.bb5:
+  ; CHECK-NEXT:   SI_RETURN
+entry:
+  %ld1 = load float, ptr addrspace(4) @external_constant
+  %cmp1 = fcmp one float %ld1, 0.0
+  br i1 %cmp1, label %bb5, label %bb1, !amdgpu.uniform !0
+
+bb1:
+  %ptr = load ptr, ptr addrspace(4) @const.ptr
+  %ld2 = load float, ptr %ptr, align 4
+  %cmp2 = fcmp olt float %ld2, 1.0
+  %or = or i1 %cmp2, false
+  br i1 %or, label %bb3, label %bb2, !amdgpu.uniform !0
+
+bb2:
+  br label %bb3
+
+bb3:
+  %phi = phi float [ 10.0, %bb1 ], [ 0.0, %bb2 ]
+  %cmp3 = fcmp oeq float %phi, 0.0
+  br i1 %cmp3, label %bb4, label %bb5, !amdgpu.uniform !0
+
+bb4:
+  store float 4.0, ptr addrspace(5) undef, align 4
+  br label %bb5
+
+bb5:
+  ret void
+}
+
+!0 = !{}

github-actions · 2024-12-19T15:09:59Z

✅ With the latest revision this PR passed the undef deprecator.

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

jayfoad · 2024-12-19T16:43:19Z

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

  }

+  if ((VT == MVT::f16 || VT == MVT::f32) && ST->hasSALUFloatInsts())


Suggested change

if ((VT == MVT::f16 || VT == MVT::f32) && ST->hasSALUFloatInsts())

if ((VT == MVT::f16 || VT == MVT::f32) && Subtarget->hasSALUFloatInsts())

Thank you Jay :) I forgot to rebase the patch. I just updated a new version.

jayfoad · 2024-12-19T16:46:09Z

llvm/test/CodeGen/AMDGPU/uniform_branch_with_floating_point_cond.ll

@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs  -stop-after=amdgpu-isel < %s 2>&1 | FileCheck %s


Suggested change

; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=amdgpu-isel < %s 2>&1 | FileCheck %s

; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -stop-after=amdgpu-isel < %s 2>&1 | FileCheck %s

jayfoad · 2024-12-19T16:46:23Z

llvm/test/CodeGen/AMDGPU/uniform_branch_with_floating_point_cond.ll

@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs  -stop-after=amdgpu-isel < %s 2>&1 | FileCheck %s


Why do you need 2>&1?

I removed it. Thank you :)

jayfoad

LGTM, thanks.

…lvm#120588)" This reverts commit d3508cc. Change-Id: Idc3b9497c81779055fe226a2705bcbe25cd70889

Local branch amd-gfx d71fa76 Revert "[AMDGPU] Emit S_CBRANCH_SCC for floating-point conditions. (llvm#120588)" Remote branch main 0fa59c6 [llvm][Docs] Update supported hardware (llvm#121743) Change-Id: Ic39049333a827f4a1840c385d1d6ce004af4bd64

llvmbot added the backend:AMDGPU label Dec 19, 2024

kmitropoulou requested a review from dstutt December 19, 2024 15:07

kmitropoulou changed the title ~~uniform branch floating point condition~~ [AMDGPU] Emit S_CBRANCH_SCC for floating-point conditions. Dec 19, 2024

kmitropoulou requested review from jayfoad and perlfu December 19, 2024 15:07

[AMDGPU] Add new test.

cb0b753

jayfoad reviewed Dec 19, 2024

View reviewed changes

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Outdated Show resolved Hide resolved

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Outdated Show resolved Hide resolved

kmitropoulou force-pushed the uniform_branch_floating_point_condition branch 2 times, most recently from 8d3d8d0 to 2995c8c Compare December 19, 2024 16:34

kmitropoulou requested a review from jayfoad December 19, 2024 16:36

jayfoad reviewed Dec 19, 2024

View reviewed changes

kmitropoulou force-pushed the uniform_branch_floating_point_condition branch from 2995c8c to 699bc18 Compare December 19, 2024 16:43

kmitropoulou requested a review from jayfoad December 19, 2024 16:45

jayfoad reviewed Dec 19, 2024

View reviewed changes

[AMDGPU] Emit S_CBRANCH_SCC for floating-point conditions.

0dc2a0a

kmitropoulou force-pushed the uniform_branch_floating_point_condition branch from 699bc18 to 0dc2a0a Compare December 19, 2024 16:51

kmitropoulou requested a review from jayfoad December 19, 2024 16:52

jayfoad approved these changes Dec 19, 2024

View reviewed changes

kmitropoulou merged commit d3508cc into llvm:main Dec 19, 2024
8 checks passed

qiaojbao pushed a commit to GPUOpen-Drivers/llvm-project that referenced this pull request Feb 7, 2025

Revert "[AMDGPU] Emit S_CBRANCH_SCC for floating-point conditions. (l…

d71fa76

…lvm#120588)" This reverts commit d3508cc. Change-Id: Idc3b9497c81779055fe226a2705bcbe25cd70889

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] Emit S_CBRANCH_SCC for floating-point conditions. #120588

[AMDGPU] Emit S_CBRANCH_SCC for floating-point conditions. #120588

Uh oh!

kmitropoulou commented Dec 19, 2024

Uh oh!

llvmbot commented Dec 19, 2024

Uh oh!

github-actions bot commented Dec 19, 2024 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

jayfoad Dec 19, 2024

Uh oh!

kmitropoulou Dec 19, 2024

Uh oh!

jayfoad Dec 19, 2024

Uh oh!

kmitropoulou Dec 19, 2024

Uh oh!

jayfoad Dec 19, 2024

Uh oh!

kmitropoulou Dec 19, 2024

Uh oh!

jayfoad left a comment

Uh oh!

Uh oh!

Uh oh!

		}

		if ((VT == MVT::f16 \|\| VT == MVT::f32) && ST->hasSALUFloatInsts())

		@@ -0,0 +1,100 @@
		; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
		; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=amdgpu-isel < %s 2>&1 \| FileCheck %s

[AMDGPU] Emit S_CBRANCH_SCC for floating-point conditions. #120588

[AMDGPU] Emit S_CBRANCH_SCC for floating-point conditions. #120588

Uh oh!

Conversation

kmitropoulou commented Dec 19, 2024

Uh oh!

llvmbot commented Dec 19, 2024

Uh oh!

github-actions bot commented Dec 19, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

jayfoad Dec 19, 2024

Choose a reason for hiding this comment

Uh oh!

kmitropoulou Dec 19, 2024

Choose a reason for hiding this comment

Uh oh!

jayfoad Dec 19, 2024

Choose a reason for hiding this comment

Uh oh!

kmitropoulou Dec 19, 2024

Choose a reason for hiding this comment

Uh oh!

jayfoad Dec 19, 2024

Choose a reason for hiding this comment

Uh oh!

kmitropoulou Dec 19, 2024

Choose a reason for hiding this comment

Uh oh!

jayfoad left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

github-actions bot commented Dec 19, 2024 •

edited

Loading