Skip to content

Commit d648e11

Browse files
committed
Revert "[AMDGPU] Try to fix the block prologs broken by RA inserted instructions (llvm#69924)"
This reverts commit a0eb6b8. Caused CTS failures: ubuntu_20-04_navi21_vk-llvm-test / CTS.dEQP-VK.ssbo.phys.layout.unsized_nested_struct_array.single_buffer.scalar_instance_array_comp_access ubuntu_20-04_navi21_vk-llvm-test / CTS.dEQP-VK.ssbo.phys.layout.unsized_nested_struct_array.single_buffer.scalar_instance_array_comp_access_store_cols ubuntu_22-04_navi31_vk-llvm-test / CTS.dEQP-VK.ssbo.phys.layout.unsized_nested_struct_array.single_buffer.scalar_instance_array_comp_access ubuntu_22-04_navi31_vk-llvm-test / CTS.dEQP-VK.ssbo.phys.layout.unsized_nested_struct_array.single_buffer.scalar_instance_array_comp_access_store_cols Change-Id: Id804440b442cae543c2dd0f5c2ba1bcb6b805d1e
1 parent c294719 commit d648e11

16 files changed

+523
-629
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8397,16 +8397,8 @@ unsigned SIInstrInfo::getLiveRangeSplitOpcode(Register SrcReg,
83978397
}
83988398

83998399
bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
8400-
// We need to handle instructions which may be inserted during register
8401-
// allocation to handle the prolog. The initial prolog instruction may have
8402-
// been separated from the start of the block by spills and copies inserted
8403-
// needed by the prolog.
8404-
uint16_t Opc = MI.getOpcode();
8405-
8406-
// FIXME: Copies inserted in the block prolog for live-range split should also
8407-
// be included.
8408-
return (isSpillOpcode(Opc) || (!MI.isTerminator() && Opc != AMDGPU::COPY &&
8409-
MI.modifiesRegister(AMDGPU::EXEC, &RI)));
8400+
return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
8401+
MI.modifiesRegister(AMDGPU::EXEC, &RI);
84108402
}
84118403

84128404
MachineInstrBuilder

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -675,11 +675,6 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
675675
return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill;
676676
}
677677

678-
bool isSpillOpcode(uint16_t Opcode) const {
679-
return get(Opcode).TSFlags &
680-
(SIInstrFlags::SGPRSpill | SIInstrFlags::VGPRSpill);
681-
}
682-
683678
static bool isWWMRegSpillOpcode(uint16_t Opcode) {
684679
return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE ||
685680
Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE ||

llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) {
144144
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
145145
; CHECK-NEXT: s_mov_b32 exec_lo, s21
146146
; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
147+
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
148+
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
147149
; CHECK-NEXT: s_or_saveexec_b32 s21, -1
148150
; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload
149151
; CHECK-NEXT: s_mov_b32 exec_lo, s21
@@ -161,9 +163,6 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) {
161163
; CHECK-NEXT: v_readlane_b32 s17, v2, 1
162164
; CHECK-NEXT: v_readlane_b32 s18, v2, 2
163165
; CHECK-NEXT: v_readlane_b32 s19, v2, 3
164-
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
165-
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
166-
; CHECK-NEXT: s_waitcnt vmcnt(0)
167166
; CHECK-NEXT: image_sample v0, v[0:1], s[8:15], s[16:19] dmask:0x1 dim:SQ_RSRC_IMG_2D
168167
; CHECK-NEXT: s_waitcnt vmcnt(0)
169168
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill

llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
22
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -O0 -verify-machineinstrs --stop-after=regallocfast,1 -o - %s | FileCheck -check-prefix=REGALLOC %s
33

4-
; Test to check if the bb prolog spills are inserted correctly during regalloc.
4+
; FIXME: There are two spill codes inserted wrongly in this test.
5+
; They are inserted during regalloc for the BBLiveIns - the spill restores for vgpr1 in the Flow block (bb.1) and for vgpr0 in the return block (bb.4).
56
define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
67
; REGALLOC-LABEL: name: prolog_spill
78
; REGALLOC: bb.0.bb.0:
@@ -32,10 +33,10 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
3233
; REGALLOC-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
3334
; REGALLOC-NEXT: {{ $}}
3435
; REGALLOC-NEXT: $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
36+
; REGALLOC-NEXT: $vgpr1 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
3537
; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr4_sgpr5
3638
; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
3739
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 killed renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def dead $scc, implicit $exec
38-
; REGALLOC-NEXT: $vgpr1 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
3940
; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
4041
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_AND_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
4142
; REGALLOC-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr4, 2, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
@@ -65,10 +66,10 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
6566
; REGALLOC-NEXT: {{ $}}
6667
; REGALLOC-NEXT: bb.4.bb.3:
6768
; REGALLOC-NEXT: $vgpr1 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
69+
; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
6870
; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 2, implicit-def $sgpr4_sgpr5
6971
; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 3
7072
; REGALLOC-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
71-
; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
7273
; REGALLOC-NEXT: renamable $sgpr4 = S_MOV_B32 5
7374
; REGALLOC-NEXT: renamable $vgpr0 = V_MUL_LO_U32_e64 killed $vgpr0, killed $sgpr4, implicit $exec
7475
; REGALLOC-NEXT: KILL killed renamable $vgpr1

0 commit comments

Comments
 (0)