From f901e1c9b4879d836fa12bbf9b1985398507d48e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 27 Jan 2025 19:31:41 +0700 Subject: [PATCH 1/2] PeepholeOpt: Do not skip reg_sequence sources with subregs Contrary to the comment, this particular code is not responsible for handling any composes that may be required, and unhandled cases are already rejected later. Lift this restriction to permit composes and reg_sequence subregisters later. --- llvm/lib/CodeGen/PeepholeOptimizer.cpp | 4 +--- llvm/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir | 6 +++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp index cced54fa72819..0e1c65c98544e 100644 --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -406,9 +406,7 @@ class RegSequenceRewriter : public Rewriter { const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); Src.Reg = MOInsertedReg.getReg(); - // If we have to compose sub-register indices, bail out. - if ((Src.SubReg = MOInsertedReg.getSubReg())) - return false; + Src.SubReg = MOInsertedReg.getSubReg(); // We want to track something that is compatible with the related // partial definition. diff --git a/llvm/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir b/llvm/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir index e1ff42125ce9a..333f7de921c24 100644 --- a/llvm/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir +++ b/llvm/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir @@ -22,9 +22,9 @@ body: | ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[REG_SEQUENCE]].sub1, %subreg.sub0, [[REG_SEQUENCE]].sub0, %subreg.sub1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GCN-NEXT: KILL [[COPY3]], implicit [[COPY2]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = COPY $vgpr1 From d535a0ada218690b39811bcde51c86604f50d845 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 6 Mar 2025 17:45:09 +0700 Subject: [PATCH 2/2] Add some more tests --- .../AMDGPU/peephole-opt-regseq-removal.mir | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/llvm/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir b/llvm/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir index 333f7de921c24..f1f2eb6baf008 100644 --- a/llvm/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir +++ b/llvm/test/CodeGen/AMDGPU/peephole-opt-regseq-removal.mir @@ -34,3 +34,49 @@ body: | %5:vgpr_32 = COPY %3.sub1 KILL implicit %4, %5 ... + +--- +name: reg_sequence_removal_2 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 + + ; GCN-LABEL: name: reg_sequence_removal_2 + ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub0_sub1, %subreg.sub2_sub3, [[COPY1]].sub2_sub3, %subreg.sub0_sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]].sub1_sub2_sub3, %subreg.sub0_sub1_sub2, [[COPY1]].sub0, %subreg.sub3 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]].sub1, %subreg.sub0, [[REG_SEQUENCE]].sub2, %subreg.sub1, [[REG_SEQUENCE]].sub3, %subreg.sub2, [[COPY1]].sub0, %subreg.sub3 + ; GCN-NEXT: KILL implicit [[REG_SEQUENCE2]] + %0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:vreg_128 = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:vreg_128 = REG_SEQUENCE %0.sub0_sub1, %subreg.sub2_sub3, %1.sub2_sub3, %subreg.sub0_sub1 + %3:vreg_128 = REG_SEQUENCE %2.sub1_sub2_sub3, %subreg.sub0_sub1_sub2, %1.sub0, %subreg.sub3 + %4:vreg_128 = REG_SEQUENCE %3.sub0, %subreg.sub0, %3.sub1, %subreg.sub1, %3.sub2, %subreg.sub2, %3.sub3, %subreg.sub3 + KILL implicit %4 +... + +--- +name: reg_sequence_removal_3 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 + + ; GCN-LABEL: name: reg_sequence_removal_3 + ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub0_sub1, %subreg.sub2_sub3, [[COPY1]].sub2_sub3, %subreg.sub0_sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]].sub2_sub3, %subreg.sub2_sub3, [[COPY]].sub0, %subreg.sub1, [[COPY]].sub1, %subreg.sub0 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]].sub1, %subreg.sub0, [[COPY]].sub0, %subreg.sub1, [[COPY1]].sub2, %subreg.sub2, [[COPY1]].sub3, %subreg.sub3 + ; GCN-NEXT: KILL implicit [[REG_SEQUENCE2]] + %0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:vreg_128 = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:vreg_128 = REG_SEQUENCE %0.sub0_sub1, %subreg.sub2_sub3, %1.sub2_sub3, %subreg.sub0_sub1 + %3:vreg_128 = REG_SEQUENCE %2.sub0_sub1, %subreg.sub2_sub3, %2.sub2, %subreg.sub1, %2.sub3, %subreg.sub0 + %4:vreg_128 = REG_SEQUENCE %3.sub0, %subreg.sub0, %3.sub1, %subreg.sub1, %3.sub2, %subreg.sub2, %3.sub3, %subreg.sub3 + KILL implicit %4 +...