-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[Xtensa] Implement Xtensa S32C1I Option and atomics lowering. #137134
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Implement Xtensa S32C1I Option and use s32c1i instruction to implement atomics operations.
@llvm/pr-subscribers-backend-xtensa Author: Andrei Safronov (andreisfr) ChangesImplement Xtensa S32C1I Option and use s32c1i instruction to implement atomics operations. Patch is 279.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137134.diff 13 Files Affected:
diff --git a/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp b/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp
index 6b355e6363b22..f3873ca4dbbe2 100644
--- a/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp
+++ b/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp
@@ -114,9 +114,10 @@ static DecodeStatus DecodeMR23RegisterClass(MCInst &Inst, uint64_t RegNo,
}
const MCPhysReg SRDecoderTable[] = {
- Xtensa::SAR, 3, Xtensa::ACCLO, 16, Xtensa::ACCHI, 17,
- Xtensa::M0, 32, Xtensa::M1, 33, Xtensa::M2, 34,
- Xtensa::M3, 35, Xtensa::WINDOWBASE, 72, Xtensa::WINDOWSTART, 73};
+ Xtensa::SAR, 3, Xtensa::ACCLO, 16, Xtensa::ACCHI, 17,
+ Xtensa::SCOMPARE1, 12, Xtensa::M0, 32, Xtensa::M1, 33,
+ Xtensa::M2, 34, Xtensa::M3, 35, Xtensa::WINDOWBASE, 72,
+ Xtensa::WINDOWSTART, 73, Xtensa::ATOMCTL, 99};
static DecodeStatus DecodeSRRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp
index 792faf811aca9..59b7582c0268f 100644
--- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp
+++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp
@@ -86,6 +86,9 @@ bool Xtensa::checkRegister(MCRegister RegNo, const FeatureBitset &FeatureBits) {
case Xtensa::WINDOWBASE:
case Xtensa::WINDOWSTART:
return FeatureBits[Xtensa::FeatureWindowed];
+ case Xtensa::ATOMCTL:
+ case Xtensa::SCOMPARE1:
+ return FeatureBits[Xtensa::FeatureWindowed];
case Xtensa::NoRegister:
return false;
}
diff --git a/llvm/lib/Target/Xtensa/XtensaFeatures.td b/llvm/lib/Target/Xtensa/XtensaFeatures.td
index 2a47214946401..623573840953b 100644
--- a/llvm/lib/Target/Xtensa/XtensaFeatures.td
+++ b/llvm/lib/Target/Xtensa/XtensaFeatures.td
@@ -67,3 +67,19 @@ def FeatureDiv32 : SubtargetFeature<"div32", "HasDiv32", "true",
"Enable Xtensa Div32 option">;
def HasDiv32 : Predicate<"Subtarget->hasDiv32()">,
AssemblerPredicate<(all_of FeatureDiv32)>;
+
+def FeatureS32C1I : SubtargetFeature<"s32c1i", "HasS32C1I", "true",
+ "Enable Xtensa S32C1I option">;
+def HasS32C1I : Predicate<"Subtarget->hasS32C1I()">,
+ AssemblerPredicate<(all_of FeatureS32C1I)>;
+
+// Assume that lock-free native-width atomics are available, even if the target
+// and operating system combination would not usually provide them. The user
+// is responsible for providing any necessary __sync implementations. Code
+// built with this feature is not ABI-compatible with code built without this
+// feature, if atomic variables are exposed across the ABI boundary.
+def FeatureForcedAtomics : SubtargetFeature<"forced-atomics", "HasForcedAtomics", "true",
+ "Assume that lock-free native-width atomics are available">;
+def HasForcedAtomics : Predicate<"Subtarget->hasForcedAtomics()">,
+ AssemblerPredicate<(all_of FeatureForcedAtomics)>;
+def HasAtomicLdSt : Predicate<"Subtarget->hasS32C1I() || Subtarget->hasForcedAtomics()">;
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
index b17840aad9b4d..e74c5c1e61b5d 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
@@ -175,6 +175,40 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VACOPY, MVT::Other, Custom);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ // to have the best chance and doing something good with fences custom lower
+ // them
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ if (!Subtarget.hasS32C1I()) {
+ for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
+ I <= MVT::LAST_INTEGER_VALUETYPE; ++I) {
+ MVT VT = MVT::SimpleValueType(I);
+ if (isTypeLegal(VT)) {
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
+ }
+ }
+ }
+
+ if (Subtarget.hasS32C1I()) {
+ setMaxAtomicSizeInBitsSupported(32);
+ setMinCmpXchgSizeInBits(32);
+ } else if (Subtarget.hasForcedAtomics()) {
+ setMaxAtomicSizeInBitsSupported(32);
+ } else {
+ setMaxAtomicSizeInBitsSupported(0);
+ }
+
// Compute derived properties from the register classes
computeRegisterProperties(STI.getRegisterInfo());
}
@@ -1241,6 +1275,13 @@ bool XtensaTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
return false;
}
+SDValue XtensaTargetLowering::LowerATOMIC_FENCE(SDValue Op,
+ SelectionDAG &DAG) const {
+SDLoc DL(Op);
+SDValue Chain = Op.getOperand(0);
+return DAG.getNode(XtensaISD::MEMW, DL, MVT::Other, Chain);
+}
+
SDValue XtensaTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -1282,6 +1323,8 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op,
return LowerShiftRightParts(Op, DAG, true);
case ISD::SRL_PARTS:
return LowerShiftRightParts(Op, DAG, false);
+ case ISD::ATOMIC_FENCE:
+ return LowerATOMIC_FENCE(Op, DAG);
default:
report_fatal_error("Unexpected node to lower");
}
@@ -1383,6 +1426,731 @@ XtensaTargetLowering::emitSelectCC(MachineInstr &MI,
return SinkMBB;
}
+// Emit instructions for atomic_cmp_swap node for 8/16 bit operands
+MachineBasicBlock *
+XtensaTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, MachineBasicBlock *BB,
+ int isByteOperand) const {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ MachineBasicBlock *thisBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *BBLoop = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBExit = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(It, BBLoop);
+ F->insert(It, BBExit);
+
+ // Transfer the remainder of BB and its successor edges to BBExit.
+ BBExit->splice(BBExit->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ BBExit->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(BBLoop);
+
+ MachineOperand &Res = MI.getOperand(0);
+ MachineOperand &AtomValAddr = MI.getOperand(1);
+ MachineOperand &CmpVal = MI.getOperand(2);
+ MachineOperand &SwpVal = MI.getOperand(3);
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+
+ unsigned R1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R1).addImm(3);
+
+ unsigned ByteOffs = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), ByteOffs)
+ .addReg(R1)
+ .addReg(AtomValAddr.getReg());
+
+ unsigned AddrAlign = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SUB), AddrAlign)
+ .addReg(AtomValAddr.getReg())
+ .addReg(ByteOffs);
+
+ unsigned BitOffs = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), BitOffs)
+ .addReg(ByteOffs)
+ .addImm(3);
+
+ unsigned Mask1 = MRI.createVirtualRegister(RC);
+ if (isByteOperand) {
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), Mask1).addImm(0xff);
+ } else {
+ unsigned R2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(1);
+ unsigned R3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), R3).addReg(R2).addImm(16);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::ADDI), Mask1).addReg(R3).addImm(-1);
+ }
+
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SSL)).addReg(BitOffs);
+
+ unsigned R2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(-1);
+
+ unsigned Mask2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Mask2).addReg(Mask1);
+
+ unsigned Mask3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::XOR), Mask3).addReg(Mask2).addReg(R2);
+
+ unsigned R3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), R3).addReg(AddrAlign).addImm(0);
+
+ unsigned R4 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), R4).addReg(R3).addReg(Mask3);
+
+ unsigned Cmp1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Cmp1).addReg(CmpVal.getReg());
+
+ unsigned Swp1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Swp1).addReg(SwpVal.getReg());
+
+ BB = BBLoop;
+
+ unsigned MaskPhi = MRI.createVirtualRegister(RC);
+ unsigned MaskLoop = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BB, BB->begin(), DL, TII.get(Xtensa::PHI), MaskPhi)
+ .addReg(MaskLoop)
+ .addMBB(BBLoop)
+ .addReg(R4)
+ .addMBB(thisBB);
+
+ unsigned Cmp2 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::OR), Cmp2).addReg(Cmp1).addReg(MaskPhi);
+
+ unsigned Swp2 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::OR), Swp2).addReg(Swp1).addReg(MaskPhi);
+
+ BuildMI(BB, DL, TII.get(Xtensa::WSR), Xtensa::SCOMPARE1).addReg(Cmp2);
+
+ unsigned Swp3 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::S32C1I), Swp3)
+ .addReg(Swp2)
+ .addReg(AddrAlign)
+ .addImm(0);
+
+ BuildMI(BB, DL, TII.get(Xtensa::AND), MaskLoop).addReg(Swp3).addReg(Mask3);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BNE))
+ .addReg(MaskLoop)
+ .addReg(MaskPhi)
+ .addMBB(BBLoop);
+
+ BB->addSuccessor(BBLoop);
+ BB->addSuccessor(BBExit);
+
+ BB = BBExit;
+ auto St = BBExit->begin();
+
+ unsigned R5 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SSR)).addReg(BitOffs);
+
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SRL), R5).addReg(Swp3);
+
+ BuildMI(*BB, St, DL, TII.get(Xtensa::AND), Res.getReg())
+ .addReg(R5)
+ .addReg(Mask1);
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit instructions for atomic_swap node for 8/16 bit operands
+MachineBasicBlock *
+XtensaTargetLowering::emitAtomicSwap(MachineInstr &MI, MachineBasicBlock *BB,
+ int isByteOperand) const {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *BBLoop1 = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBLoop2 = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBLoop3 = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBLoop4 = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBExit = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(It, BBLoop1);
+ F->insert(It, BBLoop2);
+ F->insert(It, BBLoop3);
+ F->insert(It, BBLoop4);
+ F->insert(It, BBExit);
+
+ // Transfer the remainder of BB and its successor edges to BBExit.
+ BBExit->splice(BBExit->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ BBExit->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(BBLoop1);
+ BBLoop1->addSuccessor(BBLoop2);
+ BBLoop2->addSuccessor(BBLoop3);
+ BBLoop2->addSuccessor(BBLoop4);
+ BBLoop3->addSuccessor(BBLoop2);
+ BBLoop3->addSuccessor(BBLoop4);
+ BBLoop4->addSuccessor(BBLoop1);
+ BBLoop4->addSuccessor(BBExit);
+
+ MachineOperand &Res = MI.getOperand(0);
+ MachineOperand &AtomValAddr = MI.getOperand(1);
+ MachineOperand &SwpVal = MI.getOperand(2);
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+
+ unsigned R1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R1).addImm(3);
+
+ unsigned ByteOffs = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), ByteOffs)
+ .addReg(R1)
+ .addReg(AtomValAddr.getReg());
+
+ unsigned AddrAlign = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SUB), AddrAlign)
+ .addReg(AtomValAddr.getReg())
+ .addReg(ByteOffs);
+
+ unsigned BitOffs = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), BitOffs)
+ .addReg(ByteOffs)
+ .addImm(3);
+
+ unsigned Mask1 = MRI.createVirtualRegister(RC);
+ if (isByteOperand) {
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), Mask1).addImm(0xff);
+ } else {
+ unsigned R2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(1);
+ unsigned R3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), R3).addReg(R2).addImm(16);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::ADDI), Mask1).addReg(R3).addImm(-1);
+ }
+
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SSL)).addReg(BitOffs);
+
+ unsigned R2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(-1);
+
+ unsigned Mask2 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Mask2).addReg(Mask1);
+
+ unsigned Mask3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::XOR), Mask3).addReg(Mask2).addReg(R2);
+
+ unsigned R3 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), R3).addReg(AddrAlign).addImm(0);
+
+ unsigned R4 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), R4).addReg(R3).addReg(Mask3);
+
+ unsigned SwpValShifted = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), SwpValShifted)
+ .addReg(SwpVal.getReg());
+
+ unsigned R5 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), R5).addReg(AddrAlign).addImm(0);
+
+ unsigned AtomVal = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), AtomVal).addReg(R5).addReg(Mask2);
+
+ unsigned AtomValPhi = MRI.createVirtualRegister(RC);
+ unsigned AtomValLoop = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BBLoop1, BBLoop1->begin(), DL, TII.get(Xtensa::PHI), AtomValPhi)
+ .addReg(AtomValLoop)
+ .addMBB(BBLoop4)
+ .addReg(AtomVal)
+ .addMBB(BB);
+
+ BB = BBLoop1;
+
+ BuildMI(BB, DL, TII.get(Xtensa::MEMW));
+
+ unsigned R6 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::L32I), R6).addReg(AddrAlign).addImm(0);
+
+ unsigned R7 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::AND), R7).addReg(R6).addReg(Mask3);
+
+ unsigned MaskPhi = MRI.createVirtualRegister(RC);
+ unsigned MaskLoop = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BBLoop2, BBLoop2->begin(), DL, TII.get(Xtensa::PHI), MaskPhi)
+ .addReg(MaskLoop)
+ .addMBB(BBLoop3)
+ .addReg(R7)
+ .addMBB(BBLoop1);
+
+ BB = BBLoop2;
+
+ unsigned Swp1 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::OR), Swp1)
+ .addReg(SwpValShifted)
+ .addReg(MaskPhi);
+
+ unsigned AtomVal1 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::OR), AtomVal1)
+ .addReg(AtomValPhi)
+ .addReg(MaskPhi);
+
+ BuildMI(BB, DL, TII.get(Xtensa::WSR), Xtensa::SCOMPARE1).addReg(AtomVal1);
+
+ unsigned Swp2 = MRI.createVirtualRegister(RC);
+ BuildMI(BB, DL, TII.get(Xtensa::S32C1I), Swp2)
+ .addReg(Swp1)
+ .addReg(AddrAlign)
+ .addImm(0);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BEQ))
+ .addReg(AtomVal1)
+ .addReg(Swp2)
+ .addMBB(BBLoop4);
+
+ BB = BBLoop3;
+
+ BuildMI(BB, DL, TII.get(Xtensa::AND), MaskLoop).addReg(Swp2).addReg(Mask3);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BNE))
+ .addReg(MaskLoop)
+ .addReg(MaskPhi)
+ .addMBB(BBLoop2);
+
+ BB = BBLoop4;
+
+ BuildMI(BB, DL, TII.get(Xtensa::AND), AtomValLoop).addReg(Swp2).addReg(Mask2);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BNE))
+ .addReg(AtomValLoop)
+ .addReg(AtomValPhi)
+ .addMBB(BBLoop1);
+
+ BB = BBExit;
+
+ auto St = BB->begin();
+
+ unsigned R8 = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SSR)).addReg(BitOffs);
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SRL), R8).addReg(AtomValLoop);
+
+ if (isByteOperand) {
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SEXT), Res.getReg())
+ .addReg(R8)
+ .addImm(7);
+ } else {
+ BuildMI(*BB, St, DL, TII.get(Xtensa::SEXT), Res.getReg())
+ .addReg(R8)
+ .addImm(15);
+ }
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+// Emit instructions for atomic_swap node for 32 bit operands
+MachineBasicBlock *
+XtensaTargetLowering::emitAtomicSwap(MachineInstr &MI,
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *BBLoop = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *BBExit = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(It, BBLoop);
+ F->insert(It, BBExit);
+
+ // Transfer the remainder of BB and its successor edges to BBExit.
+ BBExit->splice(BBExit->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ BBExit->transferSuccessorsAndUpdatePHIs(BB);
+
+ BB->addSuccessor(BBLoop);
+ BBLoop->addSuccessor(BBLoop);
+ BBLoop->addSuccessor(BBExit);
+
+ MachineOperand &Res = MI.getOperand(0);
+ MachineOperand &AtomValAddr = MI.getOperand(1);
+ MachineOperand &SwpVal = MI.getOperand(2);
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::MEMW));
+
+ unsigned AtomVal = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), AtomVal)
+ .addReg(AtomValAddr.getReg())
+ .addImm(0);
+
+ unsigned AtomValLoop = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BBLoop, BBLoop->begin(), DL, TII.get(Xtensa::PHI), Res.getReg())
+ .addReg(AtomValLoop)
+ .addMBB(BBLoop)
+ .addReg(AtomVal)
+ .addMBB(BB);
+
+ BB = BBLoop;
+
+ BuildMI(BB, DL, TII.get(Xtensa::WSR), Xtensa::SCOMPARE1).addReg(Res.getReg());
+
+ BuildMI(BB, DL, TII.get(Xtensa::S32C1I), AtomValLoop)
+ .addReg(SwpVal.getReg())
+ .addReg(AtomValAddr.getReg())
+ .addImm(0);
+
+ BuildMI(BB, DL, TII.get(Xtensa::BNE))
+ .addReg(AtomValLoop)
+ .addReg(Res.getReg())
+ .addMBB(BBLoop);
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *XtensaTargetLowering::emitAtomicRMW(MachineInstr &MI,
+ MachineBasicBlock *BB,
+ unsigned Opcode,
+ bool inv,
+ bool minmax) const {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = ++BB->getIterator();
+
+ MachineBasicBlock *ThisBB = BB;
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *BBLoop = F->CreateMachineBasicBlock(LLV...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions h,cpp -- llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp llvm/lib/Target/Xtensa/XtensaISelLowering.cpp llvm/lib/Target/Xtensa/XtensaISelLowering.h llvm/lib/Target/Xtensa/XtensaSubtarget.h llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp View the diff from clang-format here.diff --git a/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp b/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp
index f3873ca4d..928a0c623 100644
--- a/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp
+++ b/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp
@@ -114,9 +114,9 @@ static DecodeStatus DecodeMR23RegisterClass(MCInst &Inst, uint64_t RegNo,
}
const MCPhysReg SRDecoderTable[] = {
- Xtensa::SAR, 3, Xtensa::ACCLO, 16, Xtensa::ACCHI, 17,
- Xtensa::SCOMPARE1, 12, Xtensa::M0, 32, Xtensa::M1, 33,
- Xtensa::M2, 34, Xtensa::M3, 35, Xtensa::WINDOWBASE, 72,
+ Xtensa::SAR, 3, Xtensa::ACCLO, 16, Xtensa::ACCHI, 17,
+ Xtensa::SCOMPARE1, 12, Xtensa::M0, 32, Xtensa::M1, 33,
+ Xtensa::M2, 34, Xtensa::M3, 35, Xtensa::WINDOWBASE, 72,
Xtensa::WINDOWSTART, 73, Xtensa::ATOMCTL, 99};
static DecodeStatus DecodeSRRegisterClass(MCInst &Inst, uint64_t RegNo,
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
index e74c5c1e6..1dc8a2369 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
@@ -175,7 +175,7 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VACOPY, MVT::Other, Custom);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
- // to have the best chance and doing something good with fences custom lower
+ // to have the best chance and doing something good with fences custom lower
// them
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
@@ -1276,10 +1276,10 @@ bool XtensaTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
}
SDValue XtensaTargetLowering::LowerATOMIC_FENCE(SDValue Op,
- SelectionDAG &DAG) const {
-SDLoc DL(Op);
-SDValue Chain = Op.getOperand(0);
-return DAG.getNode(XtensaISD::MEMW, DL, MVT::Other, Chain);
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Chain = Op.getOperand(0);
+ return DAG.getNode(XtensaISD::MEMW, DL, MVT::Other, Chain);
}
SDValue XtensaTargetLowering::LowerOperation(SDValue Op,
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h
index 0e5582526..6d610c99f 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h
@@ -178,7 +178,7 @@ private:
MachineBasicBlock *emitSelectCC(MachineInstr &MI,
MachineBasicBlock *BB) const;
- MachineBasicBlock *emitAtomicSwap(MachineInstr &MI, MachineBasicBlock *BB,
+ MachineBasicBlock *emitAtomicSwap(MachineInstr &MI, MachineBasicBlock *BB,
int isByteOperand) const;
MachineBasicBlock *emitAtomicCmpSwap(MachineInstr &MI, MachineBasicBlock *BB,
int isByteOperand) const;
|
Xtensa::SAR, 3, Xtensa::ACCLO, 16, Xtensa::ACCHI, 17, | ||
Xtensa::SCOMPARE1, 12, Xtensa::M0, 32, Xtensa::M1, 33, | ||
Xtensa::M2, 34, Xtensa::M3, 35, Xtensa::WINDOWBASE, 72, | ||
Xtensa::WINDOWSTART, 73, Xtensa::ATOMCTL, 99}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand this table, it's mixed MCPhysReg and then a random number? Should this be some kind of pair or struct?
for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; | ||
I <= MVT::LAST_INTEGER_VALUETYPE; ++I) { | ||
MVT VT = MVT::SimpleValueType(I); | ||
if (isTypeLegal(VT)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You only make one type legal, so why not just setOperationAction({list_of_atomic_opcodes}, MVT::i32) legal? Alternatively define a list of legal types
// to have the best chance and doing something good with fences custom lower | ||
// them |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove or replace comment with something more informative about why it needs custom lowering
BB = BBExit; | ||
auto St = BBExit->begin(); | ||
|
||
unsigned R5 = MRI.createVirtualRegister(RC); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
unsigned R5 = MRI.createVirtualRegister(RC); | |
Register R5 = MRI.createVirtualRegister(RC); |
Use Register instead of unsigned throughout
MachineBasicBlock *BBLoop1 = F->CreateMachineBasicBlock(LLVM_BB); | ||
F->insert(++BB->getIterator(), BBLoop1); | ||
BB->addSuccessor(BBLoop1); | ||
MachineBasicBlock *BBLoop2 = F->CreateMachineBasicBlock(LLVM_BB); | ||
F->insert(++BB->getIterator(), BBLoop2); | ||
BB->addSuccessor(BBLoop2); | ||
BBLoop2->addSuccessor(BBLoop1); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can possibly use MachineBasicBlock::splitAt?
return BB; | ||
} | ||
|
||
MachineBasicBlock *XtensaTargetLowering::emitAtomicRMW(MachineInstr &MI, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a reason you need this custom post-selection expansion instead of using the default in AtomicExpandPass?
MachineBasicBlock * | ||
XtensaTargetLowering::emitAtomicSwap(MachineInstr &MI, | ||
MachineBasicBlock *BB) const { | ||
const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Directly use the target specific instance
setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_SWAP, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand); | ||
setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure any of this is doing anything. The default expansion only emits the libcall (really should migrate these to using the explicit libcall action..), and I'm not sure what the default is for the atomic runtime libcalls.
To expand atomics you need to override shouldExpandAtomicRMWInIR
SDValue XtensaTargetLowering::LowerATOMIC_FENCE(SDValue Op, | ||
SelectionDAG &DAG) const { | ||
SDLoc DL(Op); | ||
SDValue Chain = Op.getOperand(0); | ||
return DAG.getNode(XtensaISD::MEMW, DL, MVT::Other, Chain); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Formatting. Can you make this directly legal and select to memw, or do you really need to do something in the combiner with memw?
@@ -117,6 +118,10 @@ class XtensaTargetLowering : public TargetLowering { | |||
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, | |||
SelectionDAG &DAG) const override; | |||
|
|||
bool shouldInsertFencesForAtomic(const Instruction *I) const override { | |||
return true; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should get test coverage in test/Transforms/AtomicExpand
Implement Xtensa S32C1I Option and use s32c1i instruction to implement atomics operations.