Skip to content

Commit 6bb7d51

Browse files
authored
[AMDGPU] Properly check op_sel in GCNDPPCombine (#79122)
1 parent 632f44e commit 6bb7d51

File tree

2 files changed

+100
-12
lines changed

2 files changed

+100
-12
lines changed

llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -274,8 +274,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
274274
break;
275275
}
276276

277-
if (auto *Mod0 = TII->getNamedOperand(OrigMI,
278-
AMDGPU::OpName::src0_modifiers)) {
277+
auto *Mod0 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0_modifiers);
278+
if (Mod0) {
279279
assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
280280
AMDGPU::OpName::src0_modifiers));
281281
assert(HasVOP3DPP ||
@@ -298,8 +298,8 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
298298
DPPInst->getOperand(NumOperands).setIsKill(false);
299299
++NumOperands;
300300

301-
if (auto *Mod1 = TII->getNamedOperand(OrigMI,
302-
AMDGPU::OpName::src1_modifiers)) {
301+
auto *Mod1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1_modifiers);
302+
if (Mod1) {
303303
assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
304304
AMDGPU::OpName::src1_modifiers));
305305
assert(HasVOP3DPP ||
@@ -330,8 +330,9 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
330330
DPPInst.add(*Src1);
331331
++NumOperands;
332332
}
333-
if (auto *Mod2 =
334-
TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2_modifiers)) {
333+
334+
auto *Mod2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2_modifiers);
335+
if (Mod2) {
335336
assert(NumOperands ==
336337
AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::src2_modifiers));
337338
assert(HasVOP3DPP ||
@@ -350,6 +351,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
350351
DPPInst.add(*Src2);
351352
++NumOperands;
352353
}
354+
353355
if (HasVOP3DPP) {
354356
auto *ClampOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::clamp);
355357
if (ClampOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::clamp)) {
@@ -368,7 +370,13 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
368370
// all 1.
369371
if (auto *OpSelOpr =
370372
TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel)) {
371-
auto OpSel = OpSelOpr->getImm();
373+
int64_t OpSel = 0;
374+
OpSel |= (Mod0 ? (!!(Mod0->getImm() & SISrcMods::OP_SEL_0) << 0) : 0);
375+
OpSel |= (Mod1 ? (!!(Mod1->getImm() & SISrcMods::OP_SEL_0) << 1) : 0);
376+
OpSel |= (Mod2 ? (!!(Mod2->getImm() & SISrcMods::OP_SEL_0) << 2) : 0);
377+
if (Mod0 && TII->isVOP3(OrigMI) && !TII->isVOP3P(OrigMI))
378+
OpSel |= !!(Mod0->getImm() & SISrcMods::DST_OP_SEL) << 3;
379+
372380
if (OpSel != 0) {
373381
LLVM_DEBUG(dbgs() << " failed: op_sel must be zero\n");
374382
Fail = true;
@@ -379,7 +387,11 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
379387
}
380388
if (auto *OpSelHiOpr =
381389
TII->getNamedOperand(OrigMI, AMDGPU::OpName::op_sel_hi)) {
382-
auto OpSelHi = OpSelHiOpr->getImm();
390+
int64_t OpSelHi = 0;
391+
OpSelHi |= (Mod0 ? (!!(Mod0->getImm() & SISrcMods::OP_SEL_1) << 0) : 0);
392+
OpSelHi |= (Mod1 ? (!!(Mod1->getImm() & SISrcMods::OP_SEL_1) << 1) : 0);
393+
OpSelHi |= (Mod2 ? (!!(Mod2->getImm() & SISrcMods::OP_SEL_1) << 2) : 0);
394+
383395
// Only vop3p has op_sel_hi, and all vop3p have 3 operands, so check
384396
// the bitmask for 3 op_sel_hi bits set
385397
assert(Src2 && "Expected vop3p with 3 operands");

llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir

Lines changed: 80 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ body: |
8383
# Regression test for src_modifiers on base u16 opcode
8484
# GCN-label: name: vop3_u16
8585
# GCN: %5:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 0, %1, 0, %3, 0, 0, 1, 15, 15, 1, implicit $exec
86-
# GCN: %7:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 4, %5, 8, %5, 0, 0, 1, 15, 15, 1, implicit $exec
86+
# GCN: %7:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 1, %5, 2, %5, 0, 0, 1, 15, 15, 1, implicit $exec
87+
# GCN: %9:vgpr_32 = V_ADD_NC_U16_e64 4, %8, 8, %7, 0, 0, implicit $exec
8788
name: vop3_u16
8889
tracksRegLiveness: true
8990
body: |
@@ -97,7 +98,9 @@ body: |
9798
%4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
9899
%5:vgpr_32 = V_ADD_NC_U16_e64 0, %4, 0, %3, 0, 0, implicit $exec
99100
%6:vgpr_32 = V_MOV_B32_dpp %3, %5, 1, 15, 15, 1, implicit $exec
100-
%7:vgpr_32 = V_ADD_NC_U16_e64 4, %6, 8, %5, 0, 0, implicit $exec
101+
%7:vgpr_32 = V_ADD_NC_U16_e64 1, %6, 2, %5, 0, 0, implicit $exec
102+
%8:vgpr_32 = V_MOV_B32_dpp %3, %7, 1, 15, 15, 1, implicit $exec
103+
%9:vgpr_32 = V_ADD_NC_U16_e64 4, %8, 8, %7, 0, 0, implicit $exec
101104
...
102105

103106
name: vop3p
@@ -116,7 +119,7 @@ body: |
116119
; GCN: [[V_DOT2_F32_F16_:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp]], 0, [[COPY]], 0, [[COPY2]], 0, 5, 0, 0, 0, implicit $mode, implicit $exec
117120
; GCN: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
118121
; GCN: [[V_DOT2_F32_F16_1:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, 4, 0, 0, implicit $mode, implicit $exec
119-
; GCN: [[V_DOT2_F32_F16_dpp:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16_dpp [[DEF]], 10, [[COPY1]], 8, [[COPY]], 13, [[COPY2]], 1, 0, 7, 4, 5, 1, 15, 15, 1, implicit $mode, implicit $exec
122+
; GCN: [[V_DOT2_F32_F16_dpp:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16_dpp [[DEF]], 10, [[COPY1]], 8, [[COPY]], 9, [[COPY2]], 1, 0, 7, 4, 5, 1, 15, 15, 1, implicit $mode, implicit $exec
120123
; GCN: [[V_FMA_MIX_F32_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIX_F32_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
121124
; GCN: [[V_FMA_MIXLO_F16_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIXLO_F16_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 0, [[COPY2]], 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
122125
; GCN: [[V_FMA_MIXHI_F16_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIXHI_F16_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, [[COPY]], 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
@@ -134,7 +137,7 @@ body: |
134137
%7:vgpr_32 = V_DOT2_F32_F16 0, %6, 0, %0, 0, %2, 0, 0, 4, 0, 0, implicit $mode, implicit $exec
135138
136139
%8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
137-
%9:vgpr_32 = V_DOT2_F32_F16 10, %8, 8, %0, 13, %2, 1, 0, 7, 4, 5, implicit $mode, implicit $exec
140+
%9:vgpr_32 = V_DOT2_F32_F16 10, %8, 8, %0, 9, %2, 1, 0, 7, 4, 5, implicit $mode, implicit $exec
138141
139142
%10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
140143
%11:vgpr_32 = V_FMA_MIX_F32 8, %10, 8, %0, 8, %2, 1, 0, 7, implicit $mode, implicit $exec
@@ -871,3 +874,76 @@ body: |
871874
%5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec
872875
%6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
873876
...
877+
878+
# Check op_sel is all 0s when combining
879+
# GCN-LABEL: name: opsel_vop3
880+
# GCN: %4:vgpr_32 = V_ADD_I16_e64_dpp %2, 0, %0, 0, %1, 0, 0, 1, 15, 15, 1, implicit $exec
881+
# GCN: %6:vgpr_32 = V_ADD_I16_e64 4, %5, 0, %1, 0, 0, implicit $exec
882+
# GCN: %8:vgpr_32 = V_ADD_I16_e64 0, %7, 4, %1, 0, 0, implicit $exec
883+
# GCN: %10:vgpr_32 = V_ADD_I16_e64 4, %9, 4, %1, 0, 0, implicit $exec
884+
# GCN: %12:vgpr_32 = V_ADD_I16_e64 8, %11, 0, %1, 0, 0, implicit $exec
885+
name: opsel_vop3
886+
tracksRegLiveness: true
887+
body: |
888+
bb.0:
889+
liveins: $vgpr0, $vgpr1
890+
891+
%0:vgpr_32 = COPY $vgpr0
892+
%1:vgpr_32 = COPY $vgpr1
893+
%2:vgpr_32 = IMPLICIT_DEF
894+
895+
; Combine for op_sel:[0,0,0]
896+
%3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
897+
%4:vgpr_32 = V_ADD_I16_e64 0, %3, 0, %1, 0, 0, implicit $exec
898+
899+
; Do not combine for op_sel:[1,0,0]
900+
%5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
901+
%6:vgpr_32 = V_ADD_I16_e64 4, %5, 0, %1, 0, 0, implicit $exec
902+
903+
; Do not combine for op_sel:[0,1,0]
904+
%7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
905+
%8:vgpr_32 = V_ADD_I16_e64 0, %7, 4, %1, 0, 0, implicit $exec
906+
907+
; Do not combine for op_sel:[1,1,0]
908+
%9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
909+
%10:vgpr_32 = V_ADD_I16_e64 4, %9, 4, %1, 0, 0, implicit $exec
910+
911+
; Do not combine for op_sel:[0,0,1] (dst_op_sel only)
912+
%11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
913+
%12:vgpr_32 = V_ADD_I16_e64 8, %11, 0, %1, 0, 0, implicit $exec
914+
...
915+
916+
# Check op_sel is all 0s and op_sel_hi is all 1s when combining
917+
# GCN-LABEL: name: opsel_vop3p
918+
# GCN: %5:vgpr_32 = V_FMA_MIX_F32 0, %4, 0, %1, 0, %2, 0, 0, 0, implicit $mode, implicit $exec
919+
# GCN: %7:vgpr_32 = V_FMA_MIX_F32 4, %6, 4, %1, 4, %2, 0, 0, 0, implicit $mode, implicit $exec
920+
# GCN: %9:vgpr_32 = V_FMA_MIX_F32_dpp %3, 8, %0, 8, %1, 8, %2, 0, 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
921+
# GCN: %11:vgpr_32 = V_FMA_MIX_F32 12, %10, 12, %1, 12, %2, 0, 0, 0, implicit $mode, implicit $exec
922+
923+
name: opsel_vop3p
924+
tracksRegLiveness: true
925+
body: |
926+
bb.0:
927+
liveins: $vgpr0, $vgpr1, $vgpr2
928+
929+
%0:vgpr_32 = COPY $vgpr0
930+
%1:vgpr_32 = COPY $vgpr1
931+
%2:vgpr_32 = COPY $vgpr2
932+
%3:vgpr_32 = IMPLICIT_DEF
933+
934+
; Do not combine for op_sel:[0,0,0] op_sel_hi:[0,0,0]
935+
%4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
936+
%5:vgpr_32 = V_FMA_MIX_F32 0, %4, 0, %1, 0, %2, 0, 0, 0, implicit $mode, implicit $exec
937+
938+
; Do not combine for op_sel:[1,1,1] op_sel_hi:[0,0,0]
939+
%6:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
940+
%7:vgpr_32 = V_FMA_MIX_F32 4, %6, 4, %1, 4, %2, 0, 0, 0, implicit $mode, implicit $exec
941+
942+
; Combine for op_sel:[0,0,0] op_sel_hi:[1,1,1]
943+
%8:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
944+
%9:vgpr_32 = V_FMA_MIX_F32 8, %8, 8, %1, 8, %2, 0, 0, 0, implicit $mode, implicit $exec
945+
946+
; Do not combine for op_sel:[1,1,1] op_sel_hi:[1,1,1]
947+
%10:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
948+
%11:vgpr_32 = V_FMA_MIX_F32 12, %10, 12, %1, 12, %2, 0, 0, 0, implicit $mode, implicit $exec
949+
...

0 commit comments

Comments
 (0)