Skip to content

Commit f7fd88a

Browse files
AMDGPU: update GFX11 wmma hazards
One V_NOP od unrelated VALU instruction in between is required for correctness when matrix A or B of current WMMA instruction overlaps with matrix D of previous WMMA instruction. Remaining cases of WMMA operand overlaps are handled by the hardware and do not require handling in hazard recognizer. Hardware may stall in cases where: - matrix C of current WMMA instruction overlaps with matrix D of previous WMMA instruction - VALU instruction reads matrix D of previous WMMA instruction - matrix A,B or C of WMMA instruction reads result of previous VALU instruction
1 parent 1fbf533 commit f7fd88a

File tree

2 files changed

+35
-36
lines changed

2 files changed

+35
-36
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1713,8 +1713,8 @@ bool GCNHazardRecognizer::fixWMMAHazards(MachineInstr *MI) {
17131713
if (!SIInstrInfo::isWMMA(I))
17141714
return false;
17151715

1716-
// Src0 or Src1 of the current wmma instruction overlaps with the dest of
1717-
// the previous wmma.
1716+
// Src0(matrix A) or Src1(matrix B) of the current wmma instruction overlaps
1717+
// with the dest(matrix D) of the previous wmma.
17181718
const Register CurSrc0Reg =
17191719
TII->getNamedOperand(*MI, AMDGPU::OpName::src0)->getReg();
17201720
const Register CurSrc1Reg =
@@ -1728,25 +1728,6 @@ bool GCNHazardRecognizer::fixWMMAHazards(MachineInstr *MI) {
17281728
return true;
17291729
}
17301730

1731-
// Src2 of the current wmma instruction overlaps with the dest of the
1732-
// previous wmma.
1733-
const MachineOperand *Src2 =
1734-
TII->getNamedOperand(*MI, AMDGPU::OpName::src2);
1735-
const Register CurSrc2Reg = Src2->isReg() ? Src2->getReg() : Register();
1736-
1737-
if (CurSrc2Reg != AMDGPU::NoRegister &&
1738-
TRI->regsOverlap(PrevDstReg, CurSrc2Reg)) {
1739-
1740-
const MachineOperand *Src2Mods =
1741-
TII->getNamedOperand(*MI, AMDGPU::OpName::src2_modifiers);
1742-
const bool NoSrc2Mods =
1743-
(Src2Mods->getImm() & (SISrcMods::NEG | SISrcMods::NEG_HI)) == 0;
1744-
// Exception: there is no hazard if the wmma instructions are of the same
1745-
// type and there is no input modifier on src2 of the current instruction.
1746-
return !(NoSrc2Mods && (TII->pseudoToMCOpcode(I.getOpcode()) ==
1747-
TII->pseudoToMCOpcode(MI->getOpcode())));
1748-
}
1749-
17501731
return false;
17511732
};
17521733

0 commit comments

Comments
 (0)