diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 8715403f3839a..6229029106ae2 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -52,6 +52,7 @@ add_llvm_target(RISCVCodeGen RISCVPushPopOptimizer.cpp RISCVRegisterInfo.cpp RISCVSubtarget.cpp + RISCVSelectionDAGTargetInfo.cpp RISCVTargetMachine.cpp RISCVTargetObjectFile.cpp RISCVTargetTransformInfo.cpp diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index e647f56416bfa..5ae7f536bfb96 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -17655,6 +17655,167 @@ static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB, return DoneMBB; } +static MachineBasicBlock *emitSTRCMPI(MachineInstr &MI, MachineBasicBlock *MBB, + const RISCVSubtarget &Subtarget) { + + const RISCVInstrInfo &TII = *Subtarget.getInstrInfo(); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + MachineFunction &MF = *MI.getParent()->getParent(); + DebugLoc DL = MI.getDebugLoc(); + + const GlobalVariable *GV = cast(MI.getOperand(2).getGlobal()); + StringRef Str = cast(GV->getInitializer())->getAsCString(); + int NumOfBytes = Str.str().length(); + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + MachineFunction::iterator MBBI = ++MBB->getIterator(); + + MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(LLVM_BB); + MF.insert(MBBI, ExitMBB); + ExitMBB->splice(ExitMBB->end(), MBB, std::next(MI.getIterator()), MBB->end()); + ExitMBB->transferSuccessorsAndUpdatePHIs(MBB); + MBBI = ExitMBB->getIterator(); + + // In the code below we assume that the constant string is second argument + // and negate the result if needed. + bool NeedToNegateResult = MI.getOperand(3).getImm() == 0; + Register PHIReg = NeedToNegateResult + ? MRI.createVirtualRegister(&RISCV::GPRRegClass) + : MI.getOperand(0).getReg(); + MachineInstrBuilder PHI_MIB = + BuildMI(*ExitMBB, ExitMBB->begin(), DL, TII.get(RISCV::PHI), PHIReg); + if (NeedToNegateResult) { + BuildMI(*ExitMBB, ++ExitMBB->begin(), DL, TII.get(RISCV::SUB), + MI.getOperand(0).getReg()) + .addReg(RISCV::X0) + .addReg(PHIReg); + } + + MachineBasicBlock *ReturnEarlyNullByteMBB = + MF.CreateMachineBasicBlock(LLVM_BB); + MF.insert(MBBI, ReturnEarlyNullByteMBB); + Register NegReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(*ReturnEarlyNullByteMBB, ReturnEarlyNullByteMBB->end(), DL, + TII.get(RISCV::ADDI), NegReg) + .addReg(RISCV::X0) + .addImm(-1); + ReturnEarlyNullByteMBB->addSuccessor(ExitMBB); + PHI_MIB.addReg(NegReg).addMBB(ReturnEarlyNullByteMBB); + MBBI = ReturnEarlyNullByteMBB->getIterator(); + + Register BaseReg = MI.getOperand(1).getReg(); + MachineMemOperand &MMO = *MI.memoperands()[0]; + + MachineBasicBlock *CheckNullByteMBB = MF.CreateMachineBasicBlock(LLVM_BB); + MF.insert(MBBI, CheckNullByteMBB); + Register LoadedLastByteReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + MachineInstr &LoadLastByteMI = + *BuildMI(*CheckNullByteMBB, CheckNullByteMBB->end(), DL, + TII.get(RISCV::LBU), LoadedLastByteReg) + .addReg(BaseReg) + .addImm(NumOfBytes) + .cloneMemRefs(MI) + .getInstr(); + MachineMemOperand *NewMMO = MF.getMachineMemOperand( + MMO.getPointerInfo(), MachineMemOperand::MOLoad, LLT(MVT::i8), Align(1)); + LoadLastByteMI.setMemRefs(MF, {NewMMO}); + LoadLastByteMI.memoperands()[0]->setOffset(NumOfBytes); + + Register NegLoadedLastByteReg = + MRI.createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(*CheckNullByteMBB, CheckNullByteMBB->end(), DL, TII.get(RISCV::SUB), + NegLoadedLastByteReg) + .addReg(RISCV::X0) + .addReg(LoadedLastByteReg); + BuildMI(*CheckNullByteMBB, CheckNullByteMBB->end(), DL, + TII.get(RISCV::PseudoBR)) + .addMBB(ExitMBB); + CheckNullByteMBB->addSuccessor(ExitMBB); + PHI_MIB.addReg(NegLoadedLastByteReg).addMBB(CheckNullByteMBB); + MBBI = CheckNullByteMBB->getIterator(); + + // First byte will be processed in the original MBB. + // Create NewMBBs for all other (non-null) bytes. + MachineFunction::iterator NewMBBI = MBBI; + SmallVector NewMBBs(NumOfBytes); + for (int i = NumOfBytes - 2; i >= 0; --i) { + MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(LLVM_BB); + NewMBBs[i] = NewMBB; + MF.insert(NewMBBI, NewMBB); + NewMBBI = NewMBB->getIterator(); + } + // The CheckNullByteMBB will be a fall-through successor + // of the block checking last non-null byte. + NewMBBs[NumOfBytes - 1] = CheckNullByteMBB; + + int64_t Offset = 0; + char Byte = Str[0]; + MachineBasicBlock::iterator MII = std::next(MI.getIterator()); + MachineBasicBlock *CurrMBB = MBB; + MachineBasicBlock *NextMBB = NewMBBs[0]; + + auto emitCodeToCheckOneByteEquality = [&] { + Register LoadedByteReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + MachineInstr &LoadByteMI = + *BuildMI(*CurrMBB, MII, DL, TII.get(RISCV::LBU), LoadedByteReg) + .addReg(BaseReg) + .addImm(Offset) + .cloneMemRefs(MI) + .getInstr(); + MachineMemOperand *NewMMO = + MF.getMachineMemOperand(MMO.getPointerInfo(), MachineMemOperand::MOLoad, + LLT(MVT::i8), Align(1)); + LoadByteMI.setMemRefs(MF, {NewMMO}); + LoadByteMI.memoperands()[0]->setOffset(Offset); + + BuildMI(*CurrMBB, MII, DL, TII.get(RISCV::BEQ)) + .addReg(LoadedByteReg) + .addReg(RISCV::X0) + .addMBB(ReturnEarlyNullByteMBB); + + MBBI = NextMBB->getIterator(); + MachineBasicBlock *CheckBytesEqualMBB = MF.CreateMachineBasicBlock(LLVM_BB); + MF.insert(MBBI, CheckBytesEqualMBB); + CurrMBB->addSuccessor(ReturnEarlyNullByteMBB); + CurrMBB->addSuccessor(CheckBytesEqualMBB); + + MachineBasicBlock::iterator CheckBytesEqualMMBI = + CheckBytesEqualMBB->begin(); + Register DiffReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(*CheckBytesEqualMBB, CheckBytesEqualMMBI, DL, TII.get(RISCV::ADDI), + DiffReg) + .addReg(LoadedByteReg) + .addImm(-Byte); + + BuildMI(*CheckBytesEqualMBB, CheckBytesEqualMMBI, DL, TII.get(RISCV::BNE)) + .addReg(DiffReg) + .addReg(RISCV::X0) + .addMBB(ExitMBB); + + CheckBytesEqualMBB->addSuccessor(ExitMBB); + PHI_MIB.addReg(DiffReg).addMBB(CheckBytesEqualMBB); + CheckBytesEqualMBB->addSuccessor(NextMBB); + }; + + // Check the first byte. + emitCodeToCheckOneByteEquality(); + + for (int i = 0; i < NumOfBytes - 1; ++i) { + ++Offset; + Byte = Str[i + 1]; + CurrMBB = NewMBBs[i]; + MII = CurrMBB->begin(); + NextMBB = NewMBBs[i + 1]; + // Check all other non-null bytes. + // On the last iteration of this loop, + // NextMBB is CheckNullByteMBB, so it will become + // a fall-through successor of basic block checking last non-null byte. + emitCodeToCheckOneByteEquality(); + } + + MI.eraseFromParent(); + return ExitMBB; +} + MachineBasicBlock * RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { @@ -17737,6 +17898,8 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case RISCV::PseudoFROUND_D_INX: case RISCV::PseudoFROUND_D_IN32X: return emitFROUND(MI, BB, Subtarget); + case RISCV::PseudoSTRCMPI: + return emitSTRCMPI(MI, BB, Subtarget); case TargetOpcode::STATEPOINT: case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: @@ -19512,6 +19675,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(SWAP_CSR) NODE_NAME_CASE(CZERO_EQZ) NODE_NAME_CASE(CZERO_NEZ) + NODE_NAME_CASE(STRCMP) NODE_NAME_CASE(SF_VC_XV_SE) NODE_NAME_CASE(SF_VC_IV_SE) NODE_NAME_CASE(SF_VC_VV_SE) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index a38463f810270..52dda10a56a66 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -456,6 +456,7 @@ enum NodeType : unsigned { TH_LDD, TH_SWD, TH_SDD, + STRCMP }; // clang-format on } // namespace RISCVISD diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index e753c1f1add0c..209a6380a8809 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1952,6 +1952,29 @@ def : Pat<(shl (zext GPR:$rs), uimm5:$shamt), (SRLI (i64 (SLLI GPR:$rs, 32)), (ImmSubFrom32 uimm5:$shamt))>; } +def riscv_strcmp : SDNode< + "RISCVISD::STRCMP", + SDTypeProfile<1, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand] +>; + +let usesCustomInserter = 1, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +def PseudoSTRCMPI : Pseudo< + (outs GPR:$rd), + (ins GPR:$str1, i64imm:$str2, i64imm:$constant_str_idx), + [] +>; + +def : Pat< + (XLenVT (riscv_strcmp tglobaladdr:$str1, iPTR:$str2)), + (PseudoSTRCMPI GPR:$str2, tglobaladdr:$str1, 0) +>; + +def : Pat< + (XLenVT (riscv_strcmp iPTR:$str1, tglobaladdr:$str2)), + (PseudoSTRCMPI GPR:$str1, tglobaladdr:$str2, 1) +>; + //===----------------------------------------------------------------------===// // Standard extensions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGTargetInfo.cpp b/llvm/lib/Target/RISCV/RISCVSelectionDAGTargetInfo.cpp new file mode 100644 index 0000000000000..12112103fadee --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGTargetInfo.cpp @@ -0,0 +1,127 @@ +//===-- RISCVSelectionDAGTargetInfo.cpp - RISCV SelectionDAG Info +//-----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the RISCVSelectionDAGTargetInfo class. +// +//===----------------------------------------------------------------------===// + +#include "RISCVSelectionDAGTargetInfo.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Type.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-selectiondag-target-info" + +static cl::opt MaxStrcmpSpecializeLength( + "riscv-max-strcmp-specialize-length", cl::Hidden, + cl::desc("Do not specialize strcmp if the length of constant string is " + "greater or equal to this parameter"), + cl::init(0)); + +static bool canSpecializeStrcmp(const GlobalAddressSDNode *GA) { + const GlobalVariable *GV = dyn_cast(GA->getGlobal()); + if (!GV || !GV->isConstant() || !GV->hasInitializer()) + return false; + // NOTE: this doesn't work for empty strings + const ConstantDataArray *CDA = + dyn_cast(GV->getInitializer()); + if (!CDA || !CDA->isCString()) + return false; + + StringRef CString = CDA->getAsCString(); + if (CString.str().length() >= MaxStrcmpSpecializeLength) + return false; + + return true; +} + +std::pair +RISCVSelectionDAGTargetInfo::EmitTargetCodeForStrcmp( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1, + SDValue Src2, MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const { + // This is the default setting, so exit early if the optimization is turned + // off. + if (MaxStrcmpSpecializeLength == 0) + return std::make_pair(SDValue(), Chain); + + const RISCVSubtarget &Subtarget = + DAG.getMachineFunction().getSubtarget(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT XLenVT = Subtarget.getXLenVT(); + const DataLayout &DLayout = DAG.getDataLayout(); + + Align NeededAlignment = Align(XLenVT.getSizeInBits() / 8); + Align Src1Align; + Align Src2Align; + if (const Value *Src1V = dyn_cast_if_present(Op1PtrInfo.V)) { + Src1Align = Src1V->getPointerAlignment(DLayout); + } + if (const Value *Src2V = dyn_cast_if_present(Op2PtrInfo.V)) { + Src2Align = Src2V->getPointerAlignment(DLayout); + } + if (!(Src1Align < NeededAlignment || Src2Align < NeededAlignment)) + return std::make_pair(SDValue(), Chain); + + const GlobalAddressSDNode *CStringGA = nullptr; + SDValue Other; + MachinePointerInfo MPI; + bool ConstantStringIsSecond = false; + + const GlobalAddressSDNode *GA = dyn_cast(Src1); + if (GA && canSpecializeStrcmp(GA)) { + CStringGA = GA; + Other = Src2; + MPI = Op2PtrInfo; + } + if (!CStringGA) { + GA = dyn_cast(Src2); + if (GA && canSpecializeStrcmp(GA)) { + ConstantStringIsSecond = true; + CStringGA = GA; + Other = Src1; + MPI = Op1PtrInfo; + } + } + + if (!CStringGA) + return std::make_pair(SDValue(), Chain); + + // It could be that the non-constant string is actually aligned, but + // we can't prove it, so getPointerAlignment will return Align(1). + // In this case, if the constant string is sufficiently aligned, It is better + // to call to libc's strcmp? + Align ConstantStrAlignment = ConstantStringIsSecond ? Src2Align : Src1Align; + if (ConstantStrAlignment >= NeededAlignment) + return std::make_pair(SDValue(), Chain); + + SDValue TGA = DAG.getTargetGlobalAddress(CStringGA->getGlobal(), DL, + TLI.getPointerTy(DLayout), 0, + CStringGA->getTargetFlags()); + + SDValue Str1 = TGA; + SDValue Str2 = Other; + if (ConstantStringIsSecond) + std::swap(Str1, Str2); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MPI, MachineMemOperand::MOLoad, LLT(MVT::i8), Align(1)); + // TODO: what should be the MemVT? + SDValue STRCMPNode = DAG.getMemIntrinsicNode( + RISCVISD::STRCMP, DL, DAG.getVTList(XLenVT, MVT::Other), + {Chain, Str1, Str2}, MVT::i8, MMO); + + SDValue ChainOut = STRCMPNode.getValue(1); + return std::make_pair(STRCMPNode, ChainOut); +} diff --git a/llvm/lib/Target/RISCV/RISCVSelectionDAGTargetInfo.h b/llvm/lib/Target/RISCV/RISCVSelectionDAGTargetInfo.h new file mode 100644 index 0000000000000..1b95ff0e81a5a --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVSelectionDAGTargetInfo.h @@ -0,0 +1,33 @@ +//===-- RISCVSelectionDAGTargetInfo.h - RISCV SelectionDAG Info ---*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the RISCV subclass for SelectionDAGTargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCVSELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_RISCV_RISCVSELECTIONDAGINFO_H + +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" + +namespace llvm { + +class RISCVSelectionDAGTargetInfo : public SelectionDAGTargetInfo { +public: + explicit RISCVSelectionDAGTargetInfo() = default; + std::pair + EmitTargetCodeForStrcmp(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, + SDValue Src1, SDValue Src2, + MachinePointerInfo Op1PtrInfo, + MachinePointerInfo Op2PtrInfo) const override; +}; + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index d3236bb07d56d..00ec619b760fa 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -16,6 +16,7 @@ #include "GISel/RISCVRegisterBankInfo.h" #include "RISCV.h" #include "RISCVFrameLowering.h" +#include "RISCVSelectionDAGTargetInfo.h" #include "RISCVTargetMachine.h" #include "llvm/CodeGen/MacroFusion.h" #include "llvm/CodeGen/ScheduleDAGMutation.h" diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index ba108912d9340..e4ad26d70c933 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -17,6 +17,7 @@ #include "RISCVFrameLowering.h" #include "RISCVISelLowering.h" #include "RISCVInstrInfo.h" +#include "RISCVSelectionDAGTargetInfo.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" @@ -86,7 +87,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { RISCVInstrInfo InstrInfo; RISCVRegisterInfo RegInfo; RISCVTargetLowering TLInfo; - SelectionDAGTargetInfo TSInfo; + RISCVSelectionDAGTargetInfo TSInfo; /// Initializes using the passed in CPU and feature strings so that we can /// use initializer lists for subtarget initialization. diff --git a/llvm/test/CodeGen/RISCV/specialize-strcmp.ll b/llvm/test/CodeGen/RISCV/specialize-strcmp.ll new file mode 100644 index 0000000000000..8cbe641b1fe7e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/specialize-strcmp.ll @@ -0,0 +1,371 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=riscv64 -riscv-max-strcmp-specialize-length=10 -verify-machineinstrs -stop-after=finalize-isel < %s | FileCheck %s + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "riscv64-unknown-linux-gnu" + +@str1 = private unnamed_addr constant [2 x i8] c"a\00", align 1 +@str2 = private unnamed_addr constant [3 x i8] c"ab\00", align 1 +@str3 = private unnamed_addr constant [4 x i8] c"abc\00", align 1 +@str4 = private unnamed_addr constant [2 x i8] c"a\00", align 8 + +define i32 @test_1(ptr %x) { + ; CHECK-LABEL: name: test_1 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LBU:%[0-9]+]]:gpr = LBU [[COPY]], 0 :: (load (s8) from %ir.x) + ; CHECK-NEXT: BEQ [[LBU]], $x0, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[LBU]], -97 + ; CHECK-NEXT: BNE [[ADDI]], $x0, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LBU1:%[0-9]+]]:gpr = LBU [[COPY]], 1 :: (load (s8) from %ir.x + 1) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB $x0, [[LBU1]] + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, -1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.entry: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr = PHI [[ADDI1]], %bb.2, [[SUB]], %bb.3, [[ADDI]], %bb.4 + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:gpr = SUB $x0, [[PHI]] + ; CHECK-NEXT: $x10 = COPY [[SUB1]] + ; CHECK-NEXT: PseudoRET implicit $x10 +entry: + %call = call i32 @strcmp(ptr @str1, ptr %x) + ret i32 %call +} + +define i32 @test_2(ptr %x) { + ; CHECK-LABEL: name: test_2 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LBU:%[0-9]+]]:gpr = LBU [[COPY]], 0 :: (load (s8) from %ir.x) + ; CHECK-NEXT: BEQ [[LBU]], $x0, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[LBU]], -97 + ; CHECK-NEXT: BNE [[ADDI]], $x0, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LBU1:%[0-9]+]]:gpr = LBU [[COPY]], 1 :: (load (s8) from %ir.x + 1) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB $x0, [[LBU1]] + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, -1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.entry: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr = PHI [[ADDI1]], %bb.2, [[SUB]], %bb.3, [[ADDI]], %bb.4 + ; CHECK-NEXT: $x10 = COPY [[PHI]] + ; CHECK-NEXT: PseudoRET implicit $x10 +entry: + %call = call i32 @strcmp(ptr %x, ptr @str1) + ret i32 %call +} + +define i32 @test_3(ptr %x) { + ; CHECK-LABEL: name: test_3 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LBU:%[0-9]+]]:gpr = LBU [[COPY]], 0 :: (load (s8) from %ir.x) + ; CHECK-NEXT: BEQ [[LBU]], $x0, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[LBU]], -97 + ; CHECK-NEXT: BNE [[ADDI]], $x0, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LBU1:%[0-9]+]]:gpr = LBU [[COPY]], 1 :: (load (s8) from %ir.x + 1) + ; CHECK-NEXT: BEQ [[LBU1]], $x0, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI [[LBU1]], -98 + ; CHECK-NEXT: BNE [[ADDI1]], $x0, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LBU2:%[0-9]+]]:gpr = LBU [[COPY]], 2 :: (load (s8) from %ir.x + 2) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB $x0, [[LBU2]] + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI2:%[0-9]+]]:gpr = ADDI $x0, -1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.entry: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr = PHI [[ADDI2]], %bb.2, [[SUB]], %bb.3, [[ADDI]], %bb.5, [[ADDI1]], %bb.6 + ; CHECK-NEXT: $x10 = COPY [[PHI]] + ; CHECK-NEXT: PseudoRET implicit $x10 +entry: + %call = call i32 @strcmp(ptr %x, ptr @str2) + ret i32 %call +} + +define i32 @test_4(ptr %x) { + ; CHECK-LABEL: name: test_4 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LBU:%[0-9]+]]:gpr = LBU [[COPY]], 0 :: (load (s8) from %ir.x) + ; CHECK-NEXT: BEQ [[LBU]], $x0, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[LBU]], -97 + ; CHECK-NEXT: BNE [[ADDI]], $x0, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LBU1:%[0-9]+]]:gpr = LBU [[COPY]], 1 :: (load (s8) from %ir.x + 1) + ; CHECK-NEXT: BEQ [[LBU1]], $x0, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI [[LBU1]], -98 + ; CHECK-NEXT: BNE [[ADDI1]], $x0, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LBU2:%[0-9]+]]:gpr = LBU [[COPY]], 2 :: (load (s8) from %ir.x + 2) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB $x0, [[LBU2]] + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI2:%[0-9]+]]:gpr = ADDI $x0, -1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.entry: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr = PHI [[ADDI2]], %bb.2, [[SUB]], %bb.3, [[ADDI]], %bb.5, [[ADDI1]], %bb.6 + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:gpr = SUB $x0, [[PHI]] + ; CHECK-NEXT: $x10 = COPY [[SUB1]] + ; CHECK-NEXT: PseudoRET implicit $x10 +entry: + %call = call i32 @strcmp(ptr @str2, ptr %x) + ret i32 %call +} + +define i32 @test_5(ptr %x) { + ; CHECK-LABEL: name: test_5 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LBU:%[0-9]+]]:gpr = LBU [[COPY]], 0 :: (load (s8) from %ir.x) + ; CHECK-NEXT: BEQ [[LBU]], $x0, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[LBU]], -97 + ; CHECK-NEXT: BNE [[ADDI]], $x0, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.7(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LBU1:%[0-9]+]]:gpr = LBU [[COPY]], 1 :: (load (s8) from %ir.x + 1) + ; CHECK-NEXT: BEQ [[LBU1]], $x0, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI [[LBU1]], -98 + ; CHECK-NEXT: BNE [[ADDI1]], $x0, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.8(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LBU2:%[0-9]+]]:gpr = LBU [[COPY]], 2 :: (load (s8) from %ir.x + 2) + ; CHECK-NEXT: BEQ [[LBU2]], $x0, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI2:%[0-9]+]]:gpr = ADDI [[LBU2]], -99 + ; CHECK-NEXT: BNE [[ADDI2]], $x0, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LBU3:%[0-9]+]]:gpr = LBU [[COPY]], 3 :: (load (s8) from %ir.x + 3) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB $x0, [[LBU3]] + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI3:%[0-9]+]]:gpr = ADDI $x0, -1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.entry: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr = PHI [[ADDI3]], %bb.2, [[SUB]], %bb.3, [[ADDI]], %bb.6, [[ADDI1]], %bb.7, [[ADDI2]], %bb.8 + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:gpr = SUB $x0, [[PHI]] + ; CHECK-NEXT: $x10 = COPY [[SUB1]] + ; CHECK-NEXT: PseudoRET implicit $x10 +entry: + %call = call i32 @strcmp(ptr @str3, ptr %x) + ret i32 %call +} + +define i32 @test_6(ptr %x) { + ; CHECK-LABEL: name: test_6 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LBU:%[0-9]+]]:gpr = LBU [[COPY]], 0 :: (load (s8) from %ir.x) + ; CHECK-NEXT: BEQ [[LBU]], $x0, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[LBU]], -97 + ; CHECK-NEXT: BNE [[ADDI]], $x0, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.7(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LBU1:%[0-9]+]]:gpr = LBU [[COPY]], 1 :: (load (s8) from %ir.x + 1) + ; CHECK-NEXT: BEQ [[LBU1]], $x0, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI [[LBU1]], -98 + ; CHECK-NEXT: BNE [[ADDI1]], $x0, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.8(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LBU2:%[0-9]+]]:gpr = LBU [[COPY]], 2 :: (load (s8) from %ir.x + 2) + ; CHECK-NEXT: BEQ [[LBU2]], $x0, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI2:%[0-9]+]]:gpr = ADDI [[LBU2]], -99 + ; CHECK-NEXT: BNE [[ADDI2]], $x0, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LBU3:%[0-9]+]]:gpr = LBU [[COPY]], 3 :: (load (s8) from %ir.x + 3) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB $x0, [[LBU3]] + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI3:%[0-9]+]]:gpr = ADDI $x0, -1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.entry: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr = PHI [[ADDI3]], %bb.2, [[SUB]], %bb.3, [[ADDI]], %bb.6, [[ADDI1]], %bb.7, [[ADDI2]], %bb.8 + ; CHECK-NEXT: $x10 = COPY [[PHI]] + ; CHECK-NEXT: PseudoRET implicit $x10 +entry: + %call = call i32 @strcmp(ptr %x, ptr @str3) + ret i32 %call +} + +define i32 @test_7(ptr align 8 %x) { + ; CHECK-LABEL: name: test_7 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LBU:%[0-9]+]]:gpr = LBU [[COPY]], 0 :: (load (s8) from %ir.x) + ; CHECK-NEXT: BEQ [[LBU]], $x0, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[LBU]], -97 + ; CHECK-NEXT: BNE [[ADDI]], $x0, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.7(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LBU1:%[0-9]+]]:gpr = LBU [[COPY]], 1 :: (load (s8) from %ir.x + 1) + ; CHECK-NEXT: BEQ [[LBU1]], $x0, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI [[LBU1]], -98 + ; CHECK-NEXT: BNE [[ADDI1]], $x0, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.entry: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.8(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LBU2:%[0-9]+]]:gpr = LBU [[COPY]], 2 :: (load (s8) from %ir.x + 2) + ; CHECK-NEXT: BEQ [[LBU2]], $x0, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8.entry: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI2:%[0-9]+]]:gpr = ADDI [[LBU2]], -99 + ; CHECK-NEXT: BNE [[ADDI2]], $x0, %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LBU3:%[0-9]+]]:gpr = LBU [[COPY]], 3 :: (load (s8) from %ir.x + 3) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB $x0, [[LBU3]] + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.entry: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDI3:%[0-9]+]]:gpr = ADDI $x0, -1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.entry: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr = PHI [[ADDI3]], %bb.2, [[SUB]], %bb.3, [[ADDI]], %bb.6, [[ADDI1]], %bb.7, [[ADDI2]], %bb.8 + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:gpr = SUB $x0, [[PHI]] + ; CHECK-NEXT: $x10 = COPY [[SUB1]] + ; CHECK-NEXT: PseudoRET implicit $x10 +entry: + %call = call i32 @strcmp(ptr @str3, ptr %x) + ret i32 %call +} + +declare i32 @strcmp(ptr nocapture noundef, ptr nocapture noundef)