Skip to content

[TableGen] Add MacroFusions to SchedMachineModel #72223

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions llvm/include/llvm/CodeGen/MacroFusion.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
#ifndef LLVM_CODEGEN_MACROFUSION_H
#define LLVM_CODEGEN_MACROFUSION_H

#include <functional>
#include <memory>
#include <vector>

namespace llvm {

Expand All @@ -29,10 +29,10 @@ class SUnit;
/// Check if the instr pair, FirstMI and SecondMI, should be fused
/// together. Given SecondMI, when FirstMI is unspecified, then check if
/// SecondMI may be part of a fused pair at all.
using ShouldSchedulePredTy = std::function<bool(const TargetInstrInfo &TII,
const TargetSubtargetInfo &TSI,
const MachineInstr *FirstMI,
const MachineInstr &SecondMI)>;
using MacroFusionPredTy = bool (*)(const TargetInstrInfo &TII,
const TargetSubtargetInfo &STI,
const MachineInstr *FirstMI,
const MachineInstr &SecondMI);

/// Checks if the number of cluster edges between SU and its predecessors is
/// less than FuseLimit
Expand All @@ -48,15 +48,17 @@ bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,

/// Create a DAG scheduling mutation to pair instructions back to back
/// for instructions that benefit according to the target-specific
/// shouldScheduleAdjacent predicate function.
/// predicate functions. shouldScheduleAdjacent will be true if any of the
/// provided predicates are true.
std::unique_ptr<ScheduleDAGMutation>
createMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent);
createMacroFusionDAGMutation(std::vector<MacroFusionPredTy> Predicates);

/// Create a DAG scheduling mutation to pair branch instructions with one
/// of their predecessors back to back for instructions that benefit according
/// to the target-specific shouldScheduleAdjacent predicate function.
/// to the target-specific predicate functions. shouldScheduleAdjacent will be
/// true if any of the provided predicates are true.
std::unique_ptr<ScheduleDAGMutation>
createBranchMacroFusionDAGMutation(ShouldSchedulePredTy shouldScheduleAdjacent);
createBranchMacroFusionDAGMutation(std::vector<MacroFusionPredTy> Predicates);

} // end namespace llvm

Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MacroFusion.h"
#include "llvm/CodeGen/PBQPRAConstraint.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/IR/GlobalValue.h"
Expand Down Expand Up @@ -323,6 +324,9 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
/// helps removing redundant copies generated by register allocator when
/// handling complex eviction chains.
virtual bool enableSpillageCopyElimination() const { return false; }

/// Get the list of MacroFusion predicates.
virtual std::vector<MacroFusionPredTy> getMacroFusions() const { return {}; }
};

} // end namespace llvm
Expand Down
9 changes: 9 additions & 0 deletions llvm/include/llvm/MC/MCSchedule.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#ifndef LLVM_MC_MCSCHEDULE_H
#define LLVM_MC_MCSCHEDULE_H

#include "llvm/ADT/Bitset.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/DataTypes.h"
#include <cassert>
Expand Down Expand Up @@ -196,6 +197,9 @@ struct MCExtraProcessorInfo {
unsigned StoreQueueID;
};

const unsigned MaxMacroFusions = 256;
using MacroFusionBitset = Bitset<MaxMacroFusions>;

/// Machine model for scheduling, bundling, and heuristics.
///
/// The machine model directly provides basic information about the
Expand Down Expand Up @@ -325,9 +329,14 @@ struct MCSchedModel {
const InstrItinerary *InstrItineraries;

const MCExtraProcessorInfo *ExtraProcessorInfo;
const MacroFusionBitset *MacroFusionBits;

bool hasExtraProcessorInfo() const { return ExtraProcessorInfo; }

const MacroFusionBitset *getMacroFusionBits() const {
return MacroFusionBits;
}

unsigned getProcessorID() const { return ProcID; }

/// Does this machine model include instruction-level scheduling.
Expand Down
13 changes: 13 additions & 0 deletions llvm/include/llvm/MC/MCSubtargetInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@ class MCSubtargetInfo {
return FeatureBits[Feature];
}

bool hasMacroFusion(unsigned MacroFusion) const {
const MacroFusionBitset *MacroFusionBits =
CPUSchedModel->getMacroFusionBits();
return MacroFusionBits && MacroFusionBits->test(MacroFusion);
}

protected:
/// Initialize the scheduling model and feature bits.
///
Expand Down Expand Up @@ -295,6 +301,13 @@ class MCSubtargetInfo {

/// \return if target want to issue a prefetch in address space \p AS.
virtual bool shouldPrefetchAddressSpace(unsigned AS) const;

/// Enable macro fusion for this subtarget.
virtual bool enableMacroFusion() const {
const MacroFusionBitset *MacroFusionBits =
CPUSchedModel->getMacroFusionBits();
return MacroFusionBits && MacroFusionBits->any();
}
};

} // end namespace llvm
Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/Target/TargetInstrPredicate.td
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,12 @@ class MCOperandPredicate<int Index> : MCInstPredicate {
// Return true if machine operand at position `Index` is a register operand.
class CheckIsRegOperand<int Index> : MCOperandPredicate<Index>;

// Return true if machine operand at position `Index` is a virtual register operand.
class CheckIsVRegOperand<int Index> : MCOperandPredicate<Index>;

// Return true if machine operand at position `Index` is not a virtual register operand.
class CheckIsNotVRegOperand<int Index> : CheckNot<CheckIsVRegOperand<Index>>;

// Return true if machine operand at position `Index` is an immediate operand.
class CheckIsImmOperand<int Index> : MCOperandPredicate<Index>;

Expand Down
117 changes: 117 additions & 0 deletions llvm/include/llvm/Target/TargetSchedule.td
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
include "llvm/Target/TargetItinerary.td"

class Predicate; // Forward def
class Fusion;

// DAG operator that interprets the DAG args as Instruction defs.
def instrs;
Expand Down Expand Up @@ -122,6 +123,9 @@ class SchedMachineModel {
// using intervals via ResourceSegments (see
// llvm/include/llvm/CodeGen/MachineScheduler.h).
bit EnableIntervals = false;

// List of Fusion.
list<Fusion> MacroFusions = [];
}

def NoSchedModel : SchedMachineModel {
Expand Down Expand Up @@ -584,3 +588,116 @@ class MemoryQueue<ProcResourceKind PR> {

class LoadQueue<ProcResourceKind LDQueue> : MemoryQueue<LDQueue>;
class StoreQueue<ProcResourceKind STQueue> : MemoryQueue<STQueue>;

// The target instruction that FusionPredicate will be evaluated on.
class FusionTarget;
def first : FusionTarget;
def second : FusionTarget;
def both : FusionTarget;

// Base class of FusionPredicate, etc. The avaliable variables are:
// * const TargetInstrInfo &TII
// * const TargetSubtargetInfo &STI
// * const MachineRegisterInfo &MRI
// * const MachineInstr *FirstMI
// * const MachineInstr &SecondMI
class FusionPredicate<FusionTarget target> {
FusionTarget Target = target;
}
class FirstFusionPredicate: FusionPredicate<first>;
class SecondFusionPredicate: FusionPredicate<second>;
class BothFusionPredicate: FusionPredicate<both>;

// FusionPredicate with raw code predicate.
class FusionPredicateWithCode<code pred> : FusionPredicate<both> {
code Predicate = pred;
}

// FusionPredicate with MCInstPredicate.
class FusionPredicateWithMCInstPredicate<FusionTarget target, MCInstPredicate pred>
: FusionPredicate<target> {
MCInstPredicate Predicate = pred;
}
class FirstFusionPredicateWithMCInstPredicate<MCInstPredicate pred>
: FusionPredicateWithMCInstPredicate<first, pred>;
class SecondFusionPredicateWithMCInstPredicate<MCInstPredicate pred>
: FusionPredicateWithMCInstPredicate<second, pred>;
// The pred will be applied on both firstMI and secondMI.
class BothFusionPredicateWithMCInstPredicate<MCInstPredicate pred>
: FusionPredicateWithMCInstPredicate<second, pred>;

// Tie firstOpIdx and secondOpIdx. The operand of `FirstMI` at position
// `firstOpIdx` should be the same as the operand of `SecondMI` at position
// `secondOpIdx`.
class TieReg<int firstOpIdx, int secondOpIdx> : BothFusionPredicate {
int FirstOpIdx = firstOpIdx;
int SecondOpIdx = secondOpIdx;
}

// A predicate for wildcard. The generated code will be like:
// ```
// if (!FirstMI)
// return ReturnValue;
// ```
class WildcardPred<bit ret> : FirstFusionPredicate {
bit ReturnValue = ret;
}
def WildcardFalse : WildcardPred<0>;
def WildcardTrue : WildcardPred<1>;

// Indicates that the destination register of `FirstMI` should have one use if
// it is a virtual register.
class OneUsePred : FirstFusionPredicate;
def OneUse : OneUsePred;

// Handled by MacroFusionPredicatorEmitter backend.
// The generated predicator will be like:
// ```
// bool isNAME(const TargetInstrInfo &TII,
// const TargetSubtargetInfo &STI,
// const MachineInstr *FirstMI,
// const MachineInstr &SecondMI) {
// auto &MRI = SecondMI.getMF()->getRegInfo();
// /* Predicates */
// return true;
// }
// ```
class Fusion<list<FusionPredicate> predicates> {
list<FusionPredicate> Predicates = predicates;
}

// The generated predicator will be like:
// ```
// bool isNAME(const TargetInstrInfo &TII,
// const TargetSubtargetInfo &STI,
// const MachineInstr *FirstMI,
// const MachineInstr &SecondMI) {
// auto &MRI = SecondMI.getMF()->getRegInfo();
// /* Prolog */
// /* Predicate for `SecondMI` */
// /* Wildcard */
// /* Predicate for `FirstMI` */
// /* Check One Use */
// /* Tie registers */
// /* Epilog */
// return true;
// }
// ```
class SimpleFusion<MCInstPredicate firstPred, MCInstPredicate secondPred,
list<FusionPredicate> prolog = [],
list<FusionPredicate> epilog = []>
: Fusion<!listconcat(
prolog,
[
SecondFusionPredicateWithMCInstPredicate<secondPred>,
WildcardTrue,
FirstFusionPredicateWithMCInstPredicate<firstPred>,
SecondFusionPredicateWithMCInstPredicate<
CheckAny<[
CheckIsVRegOperand<0>,
CheckSameRegOperand<0, 1>
]>>,
OneUse,
TieReg<0, 1>,
],
epilog)>;
36 changes: 24 additions & 12 deletions llvm/lib/CodeGen/MacroFusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,19 +137,33 @@ namespace {
/// Post-process the DAG to create cluster edges between instrs that may
/// be fused by the processor into a single operation.
class MacroFusion : public ScheduleDAGMutation {
ShouldSchedulePredTy shouldScheduleAdjacent;
std::vector<MacroFusionPredTy> Predicates;
bool FuseBlock;
bool scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU);

public:
MacroFusion(ShouldSchedulePredTy shouldScheduleAdjacent, bool FuseBlock)
: shouldScheduleAdjacent(shouldScheduleAdjacent), FuseBlock(FuseBlock) {}
MacroFusion(std::vector<MacroFusionPredTy> Predicates, bool FuseBlock)
: Predicates(std::move(Predicates)), FuseBlock(FuseBlock) {}

void apply(ScheduleDAGInstrs *DAGInstrs) override;

bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
const TargetSubtargetInfo &STI,
const MachineInstr *FirstMI,
const MachineInstr &SecondMI);
};

} // end anonymous namespace

bool MacroFusion::shouldScheduleAdjacent(const TargetInstrInfo &TII,
const TargetSubtargetInfo &STI,
const MachineInstr *FirstMI,
const MachineInstr &SecondMI) {
return llvm::any_of(Predicates, [&](MacroFusionPredTy Predicate) {
return Predicate(TII, STI, FirstMI, SecondMI);
});
}

void MacroFusion::apply(ScheduleDAGInstrs *DAG) {
if (FuseBlock)
// For each of the SUnits in the scheduling block, try to fuse the instr in
Expand Down Expand Up @@ -197,17 +211,15 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU)
}

std::unique_ptr<ScheduleDAGMutation>
llvm::createMacroFusionDAGMutation(
ShouldSchedulePredTy shouldScheduleAdjacent) {
if(EnableMacroFusion)
return std::make_unique<MacroFusion>(shouldScheduleAdjacent, true);
llvm::createMacroFusionDAGMutation(std::vector<MacroFusionPredTy> Predicates) {
if (EnableMacroFusion)
return std::make_unique<MacroFusion>(std::move(Predicates), true);
return nullptr;
}

std::unique_ptr<ScheduleDAGMutation>
llvm::createBranchMacroFusionDAGMutation(
ShouldSchedulePredTy shouldScheduleAdjacent) {
if(EnableMacroFusion)
return std::make_unique<MacroFusion>(shouldScheduleAdjacent, false);
std::unique_ptr<ScheduleDAGMutation> llvm::createBranchMacroFusionDAGMutation(
std::vector<MacroFusionPredTy> Predicates) {
if (EnableMacroFusion)
return std::make_unique<MacroFusion>(std::move(Predicates), false);
return nullptr;
}
1 change: 1 addition & 0 deletions llvm/lib/MC/MCSchedule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ const MCSchedModel MCSchedModel::Default = {DefaultIssueWidth,
0,
0,
nullptr,
nullptr,
nullptr};

int MCSchedModel::computeInstrLatency(const MCSubtargetInfo &STI,
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -478,5 +478,5 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,

std::unique_ptr<ScheduleDAGMutation>
llvm::createAArch64MacroFusionDAGMutation() {
return createMacroFusionDAGMutation(shouldScheduleAdjacent);
return createMacroFusionDAGMutation({shouldScheduleAdjacent});
}
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII_,
namespace llvm {

std::unique_ptr<ScheduleDAGMutation> createAMDGPUMacroFusionDAGMutation() {
return createMacroFusionDAGMutation(shouldScheduleAdjacent);
return createMacroFusionDAGMutation({shouldScheduleAdjacent});
}

} // end namespace llvm
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,10 @@ namespace {
/// be turned into VOPD instructions
/// Greedily pairs instruction candidates. O(n^2) algorithm.
struct VOPDPairingMutation : ScheduleDAGMutation {
ShouldSchedulePredTy shouldScheduleAdjacent; // NOLINT: function pointer
MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer

VOPDPairingMutation(
ShouldSchedulePredTy shouldScheduleAdjacent) // NOLINT: function pointer
MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
: shouldScheduleAdjacent(shouldScheduleAdjacent) {}

void apply(ScheduleDAGInstrs *DAG) override {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/ARM/ARMMacroFusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
}

std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation() {
return createMacroFusionDAGMutation(shouldScheduleAdjacent);
return createMacroFusionDAGMutation({shouldScheduleAdjacent});
}

} // end namespace llvm
2 changes: 1 addition & 1 deletion llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
namespace llvm {

std::unique_ptr<ScheduleDAGMutation> createPowerPCMacroFusionDAGMutation() {
return createMacroFusionDAGMutation(shouldScheduleAdjacent);
return createMacroFusionDAGMutation({shouldScheduleAdjacent});
}

} // end namespace llvm
2 changes: 1 addition & 1 deletion llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,5 +65,5 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
}

std::unique_ptr<ScheduleDAGMutation> llvm::createRISCVMacroFusionDAGMutation() {
return createMacroFusionDAGMutation(shouldScheduleAdjacent);
return createMacroFusionDAGMutation({shouldScheduleAdjacent});
}
Loading