diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 43e5434ea2700..936a7e791093b 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1419,27 +1419,21 @@ let OtherPredicates = [HasPackedD16VMem] in { defm : MUBUF_LoadIntrinsicPat; } // End HasPackedD16VMem. -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; -defm : MUBUF_LoadIntrinsicPat; +foreach vt = Reg32Types.types in { +defm : MUBUF_LoadIntrinsicPat; +} + +foreach vt = Reg64Types.types in { +defm : MUBUF_LoadIntrinsicPat; +} + +foreach vt = Reg96Types.types in { +defm : MUBUF_LoadIntrinsicPat; +} + +foreach vt = Reg128Types.types in { +defm : MUBUF_LoadIntrinsicPat; +} defm : MUBUF_LoadIntrinsicPat; defm : MUBUF_LoadIntrinsicPat; @@ -1530,27 +1524,21 @@ let OtherPredicates = [HasPackedD16VMem] in { defm : MUBUF_StoreIntrinsicPat; } // End HasPackedD16VMem. -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; -defm : MUBUF_StoreIntrinsicPat; +foreach vt = Reg32Types.types in { +defm : MUBUF_StoreIntrinsicPat; +} + +foreach vt = Reg64Types.types in { +defm : MUBUF_StoreIntrinsicPat; +} + +foreach vt = Reg96Types.types in { +defm : MUBUF_StoreIntrinsicPat; +} + +foreach vt = Reg128Types.types in { +defm : MUBUF_StoreIntrinsicPat; +} defm : MUBUF_StoreIntrinsicPat; defm : MUBUF_StoreIntrinsicPat; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d9a163ded6bab..1faccd7e061ce 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1112,29 +1112,33 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv( Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT); } -static EVT memVTFromLoadIntrData(Type *Ty, unsigned MaxNumLanes) { +static EVT memVTFromLoadIntrData(const SITargetLowering &TLI, + const DataLayout &DL, Type *Ty, + unsigned MaxNumLanes) { assert(MaxNumLanes != 0); + LLVMContext &Ctx = Ty->getContext(); if (auto *VT = dyn_cast(Ty)) { unsigned NumElts = std::min(MaxNumLanes, VT->getNumElements()); - return EVT::getVectorVT(Ty->getContext(), - EVT::getEVT(VT->getElementType()), + return EVT::getVectorVT(Ctx, TLI.getValueType(DL, VT->getElementType()), NumElts); } - return EVT::getEVT(Ty); + return TLI.getValueType(DL, Ty); } // Peek through TFE struct returns to only use the data size. -static EVT memVTFromLoadIntrReturn(Type *Ty, unsigned MaxNumLanes) { +static EVT memVTFromLoadIntrReturn(const SITargetLowering &TLI, + const DataLayout &DL, Type *Ty, + unsigned MaxNumLanes) { auto *ST = dyn_cast(Ty); if (!ST) - return memVTFromLoadIntrData(Ty, MaxNumLanes); + return memVTFromLoadIntrData(TLI, DL, Ty, MaxNumLanes); // TFE intrinsics return an aggregate type. assert(ST->getNumContainedTypes() == 2 && ST->getContainedType(1)->isIntegerTy(32)); - return memVTFromLoadIntrData(ST->getContainedType(0), MaxNumLanes); + return memVTFromLoadIntrData(TLI, DL, ST->getContainedType(0), MaxNumLanes); } /// Map address space 7 to MVT::v5i32 because that's its in-memory @@ -1219,10 +1223,12 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, MaxNumLanes = DMask == 0 ? 1 : llvm::popcount(DMask); } - Info.memVT = memVTFromLoadIntrReturn(CI.getType(), MaxNumLanes); + Info.memVT = memVTFromLoadIntrReturn(*this, MF.getDataLayout(), + CI.getType(), MaxNumLanes); } else { - Info.memVT = memVTFromLoadIntrReturn( - CI.getType(), std::numeric_limits::max()); + Info.memVT = + memVTFromLoadIntrReturn(*this, MF.getDataLayout(), CI.getType(), + std::numeric_limits::max()); } // FIXME: What does alignment mean for an image? @@ -1235,9 +1241,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, if (RsrcIntr->IsImage) { unsigned DMask = cast(CI.getArgOperand(1))->getZExtValue(); unsigned DMaskLanes = DMask == 0 ? 1 : llvm::popcount(DMask); - Info.memVT = memVTFromLoadIntrData(DataTy, DMaskLanes); + Info.memVT = memVTFromLoadIntrData(*this, MF.getDataLayout(), DataTy, + DMaskLanes); } else - Info.memVT = EVT::getEVT(DataTy); + Info.memVT = getValueType(MF.getDataLayout(), DataTy); Info.flags |= MachineMemOperand::MOStore; } else { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 3666976cf82f8..a8efe2b2ba35e 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -586,7 +586,9 @@ class RegisterTypes reg_types> { def Reg16Types : RegisterTypes<[i16, f16, bf16]>; def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, v2bf16, p2, p3, p5, p6]>; -def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, v4i16, v4f16, v4bf16, p0]>; +def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, v4i16, v4f16, v4bf16]>; +def Reg96Types : RegisterTypes<[v3i32, v3f32]>; +def Reg128Types : RegisterTypes<[v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16]>; let HasVGPR = 1 in { // VOP3 and VINTERP can access 256 lo and 256 hi registers. @@ -744,7 +746,7 @@ def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, let BaseClassOrder = 10000; } -def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64, v8i16, v8f16, v8bf16], 32, +def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", Reg128Types.types, 32, (add PRIVATE_RSRC_REG)> { let isAllocatable = 0; let CopyCost = -1; @@ -815,7 +817,7 @@ def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v let HasSGPR = 1; } -def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16, v4bf16], 32, +def SGPR_64 : SIRegisterClass<"AMDGPU", Reg64Types.types, 32, (add SGPR_64Regs)> { let CopyCost = 1; let AllocationPriority = 1; @@ -905,8 +907,8 @@ multiclass SRegClass; -defm "" : SRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16], SGPR_128Regs, TTMP_128Regs>; +defm "" : SRegClass<3, Reg96Types.types, SGPR_96Regs, TTMP_96Regs>; +defm "" : SRegClass<4, Reg128Types.types, SGPR_128Regs, TTMP_128Regs>; defm "" : SRegClass<5, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>; defm "" : SRegClass<6, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>; defm "" : SRegClass<7, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>; @@ -958,8 +960,8 @@ multiclass VRegClass regTypes, dag regList> { defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4bf16, v4i16, p0, p1, p4], (add VGPR_64)>; -defm VReg_96 : VRegClass<3, [v3i32, v3f32], (add VGPR_96)>; -defm VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16], (add VGPR_128)>; +defm VReg_96 : VRegClass<3, Reg96Types.types, (add VGPR_96)>; +defm VReg_128 : VRegClass<4, Reg128Types.types, (add VGPR_128)>; defm VReg_160 : VRegClass<5, [v5i32, v5f32], (add VGPR_160)>; defm VReg_192 : VRegClass<6, [v6i32, v6f32, v3i64, v3f64], (add VGPR_192)>; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll index 3e3371091ef72..4d557c76dc4d0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll @@ -1280,6 +1280,602 @@ define <2 x i64> @buffer_load_v2i64__voffset_add(ptr addrspace(8) inreg %rsrc, i ret <2 x i64> %data } +define ptr @buffer_load_p0__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; PREGFX10-LABEL: buffer_load_p0__voffset_add: +; PREGFX10: ; %bb.0: +; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60 +; PREGFX10-NEXT: s_waitcnt vmcnt(0) +; PREGFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: buffer_load_p0__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_p0__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call ptr @llvm.amdgcn.raw.ptr.buffer.load.p0(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret ptr %data +} + +define <2 x ptr> @buffer_load_v2p0__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; PREGFX10-LABEL: buffer_load_v2p0__voffset_add: +; PREGFX10: ; %bb.0: +; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60 +; PREGFX10-NEXT: s_waitcnt vmcnt(0) +; PREGFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: buffer_load_v2p0__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_v2p0__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call <2 x ptr> @llvm.amdgcn.raw.ptr.buffer.load.p0(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret <2 x ptr> %data +} + +define ptr addrspace(1) @buffer_load_p1__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; PREGFX10-LABEL: buffer_load_p1__voffset_add: +; PREGFX10: ; %bb.0: +; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60 +; PREGFX10-NEXT: s_waitcnt vmcnt(0) +; PREGFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: buffer_load_p1__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_p1__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call ptr addrspace(1) @llvm.amdgcn.raw.ptr.buffer.load.p1(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret ptr addrspace(1) %data +} + +define <2 x ptr addrspace(1)> @buffer_load_v2p1__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; PREGFX10-LABEL: buffer_load_v2p1__voffset_add: +; PREGFX10: ; %bb.0: +; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60 +; PREGFX10-NEXT: s_waitcnt vmcnt(0) +; PREGFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: buffer_load_v2p1__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_v2p1__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call <2 x ptr addrspace(1)> @llvm.amdgcn.raw.ptr.buffer.load.p1(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret <2 x ptr addrspace(1)> %data +} + +define ptr addrspace(4) @buffer_load_p4__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; PREGFX10-LABEL: buffer_load_p4__voffset_add: +; PREGFX10: ; %bb.0: +; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60 +; PREGFX10-NEXT: s_waitcnt vmcnt(0) +; PREGFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: buffer_load_p4__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_p4__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call ptr addrspace(4) @llvm.amdgcn.raw.ptr.buffer.load.p4(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret ptr addrspace(4) %data +} + +define <2 x ptr addrspace(4)> @buffer_load_v2p4__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; PREGFX10-LABEL: buffer_load_v2p4__voffset_add: +; PREGFX10: ; %bb.0: +; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60 +; PREGFX10-NEXT: s_waitcnt vmcnt(0) +; PREGFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: buffer_load_v2p4__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_v2p4__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call <2 x ptr addrspace(4)> @llvm.amdgcn.raw.ptr.buffer.load.p4(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret <2 x ptr addrspace(4)> %data +} + +define ptr addrspace(999) @buffer_load_p999__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; PREGFX10-LABEL: buffer_load_p999__voffset_add: +; PREGFX10: ; %bb.0: +; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60 +; PREGFX10-NEXT: s_waitcnt vmcnt(0) +; PREGFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: buffer_load_p999__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_p999__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call ptr addrspace(999) @llvm.amdgcn.raw.ptr.buffer.load.p999(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret ptr addrspace(999) %data +} + +define <2 x ptr addrspace(999)> @buffer_load_v2p999__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; PREGFX10-LABEL: buffer_load_v2p999__voffset_add: +; PREGFX10: ; %bb.0: +; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60 +; PREGFX10-NEXT: s_waitcnt vmcnt(0) +; PREGFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: buffer_load_v2p999__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_v2p999__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call <2 x ptr addrspace(999)> @llvm.amdgcn.raw.ptr.buffer.load.p999(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret <2 x ptr addrspace(999)> %data +} + +define ptr addrspace(2) @buffer_load_p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; PREGFX10-LABEL: buffer_load_p2__voffset_add: +; PREGFX10: ; %bb.0: +; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX10-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen offset:60 +; PREGFX10-NEXT: s_waitcnt vmcnt(0) +; PREGFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: buffer_load_p2__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_p2__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call ptr addrspace(2) @llvm.amdgcn.raw.ptr.buffer.load.p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret ptr addrspace(2) %data +} + +define <2 x ptr addrspace(2)> @buffer_load_v2p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; PREGFX10-LABEL: buffer_load_v2p2__voffset_add: +; PREGFX10: ; %bb.0: +; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60 +; PREGFX10-NEXT: s_waitcnt vmcnt(0) +; PREGFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: buffer_load_v2p2__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_v2p2__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call <2 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v2p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret <2 x ptr addrspace(2)> %data +} + +define <3 x ptr addrspace(2)> @buffer_load_v3p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; GFX10-LABEL: buffer_load_v3p2__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dwordx3 v[0:2], v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_v3p2__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call <3 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v3p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret <3 x ptr addrspace(2)> %data +} + +define <4 x ptr addrspace(2)> @buffer_load_v4p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; PREGFX10-LABEL: buffer_load_v4p2__voffset_add: +; PREGFX10: ; %bb.0: +; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60 +; PREGFX10-NEXT: s_waitcnt vmcnt(0) +; PREGFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: buffer_load_v4p2__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_v4p2__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call <4 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v4p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret <4 x ptr addrspace(2)> %data +} + +define ptr addrspace(3) @buffer_load_p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; PREGFX10-LABEL: buffer_load_p3__voffset_add: +; PREGFX10: ; %bb.0: +; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX10-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen offset:60 +; PREGFX10-NEXT: s_waitcnt vmcnt(0) +; PREGFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: buffer_load_p3__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_p3__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call ptr addrspace(3) @llvm.amdgcn.raw.ptr.buffer.load.p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret ptr addrspace(3) %data +} + +define <2 x ptr addrspace(3)> @buffer_load_v2p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; PREGFX10-LABEL: buffer_load_v2p3__voffset_add: +; PREGFX10: ; %bb.0: +; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60 +; PREGFX10-NEXT: s_waitcnt vmcnt(0) +; PREGFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: buffer_load_v2p3__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_v2p3__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call <2 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v2p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret <2 x ptr addrspace(3)> %data +} + +define <3 x ptr addrspace(3)> @buffer_load_v3p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; GFX10-LABEL: buffer_load_v3p3__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dwordx3 v[0:2], v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_v3p3__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call <3 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v3p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret <3 x ptr addrspace(3)> %data +} + +define <4 x ptr addrspace(3)> @buffer_load_v4p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; PREGFX10-LABEL: buffer_load_v4p3__voffset_add: +; PREGFX10: ; %bb.0: +; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60 +; PREGFX10-NEXT: s_waitcnt vmcnt(0) +; PREGFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: buffer_load_v4p3__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_v4p3__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call <4 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v4p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret <4 x ptr addrspace(3)> %data +} + +define ptr addrspace(5) @buffer_load_p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; PREGFX10-LABEL: buffer_load_p5__voffset_add: +; PREGFX10: ; %bb.0: +; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX10-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen offset:60 +; PREGFX10-NEXT: s_waitcnt vmcnt(0) +; PREGFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: buffer_load_p5__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_p5__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call ptr addrspace(5) @llvm.amdgcn.raw.ptr.buffer.load.p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret ptr addrspace(5) %data +} + +define <2 x ptr addrspace(5)> @buffer_load_v2p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; PREGFX10-LABEL: buffer_load_v2p5__voffset_add: +; PREGFX10: ; %bb.0: +; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60 +; PREGFX10-NEXT: s_waitcnt vmcnt(0) +; PREGFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: buffer_load_v2p5__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_v2p5__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call <2 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v2p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret <2 x ptr addrspace(5)> %data +} + +define <3 x ptr addrspace(5)> @buffer_load_v3p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; GFX10-LABEL: buffer_load_v3p5__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dwordx3 v[0:2], v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_v3p5__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call <3 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v3p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret <3 x ptr addrspace(5)> %data +} + +define <4 x ptr addrspace(5)> @buffer_load_v4p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; PREGFX10-LABEL: buffer_load_v4p5__voffset_add: +; PREGFX10: ; %bb.0: +; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60 +; PREGFX10-NEXT: s_waitcnt vmcnt(0) +; PREGFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: buffer_load_v4p5__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_v4p5__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call <4 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v4p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret <4 x ptr addrspace(5)> %data +} + +define ptr addrspace(6) @buffer_load_p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; PREGFX10-LABEL: buffer_load_p6__voffset_add: +; PREGFX10: ; %bb.0: +; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX10-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen offset:60 +; PREGFX10-NEXT: s_waitcnt vmcnt(0) +; PREGFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: buffer_load_p6__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_p6__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call ptr addrspace(6) @llvm.amdgcn.raw.ptr.buffer.load.p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret ptr addrspace(6) %data +} + +define <2 x ptr addrspace(6)> @buffer_load_v2p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; PREGFX10-LABEL: buffer_load_v2p6__voffset_add: +; PREGFX10: ; %bb.0: +; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60 +; PREGFX10-NEXT: s_waitcnt vmcnt(0) +; PREGFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: buffer_load_v2p6__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_v2p6__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call <2 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v2p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret <2 x ptr addrspace(6)> %data +} + +define <3 x ptr addrspace(6)> @buffer_load_v3p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; GFX10-LABEL: buffer_load_v3p6__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dwordx3 v[0:2], v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_v3p6__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call <3 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v3p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret <3 x ptr addrspace(6)> %data +} + +define <4 x ptr addrspace(6)> @buffer_load_v4p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { +; PREGFX10-LABEL: buffer_load_v4p6__voffset_add: +; PREGFX10: ; %bb.0: +; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60 +; PREGFX10-NEXT: s_waitcnt vmcnt(0) +; PREGFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: buffer_load_v4p6__voffset_add: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: buffer_load_v4p6__voffset_add: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + %data = call <4 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v4p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret <4 x ptr addrspace(6)> %data +} + declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #0 declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32) #0 declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) #0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.ll index 8dd9b4ab61d4f..4fbb4ec342ff5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.ll @@ -424,6 +424,462 @@ define void @buffer_store_v2i64__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ret void } +define void @buffer_store_p0__voffset_add(ptr addrspace(8) inreg %rsrc, ptr %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_p0__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_p0__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.p0(ptr %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_v2p0__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr> %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_v2p0__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_v2p0__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.v2p0(<2 x ptr> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_p1__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_p1__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_p1__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.p1(ptr addrspace(1) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_v2p1__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(1)> %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_v2p1__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_v2p1__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.v2p1(<2 x ptr addrspace(1)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_p4__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(4) %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_p4__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_p4__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.p4(ptr addrspace(4) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_v2p4__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(4)> %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_v2p4__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_v2p4__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.v2p4(<2 x ptr addrspace(4)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_p999__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(999) %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_p999__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_p999__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.p999(ptr addrspace(999) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_v2p999__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(999)> %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_v2p999__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_v2p999__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.v2p999(<2 x ptr addrspace(999)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_p2__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(2) %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_p2__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_p2__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.p2(ptr addrspace(2) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_v2p2__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(2)> %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_v2p2__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_v2p2__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.v2p2(<2 x ptr addrspace(2)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_v3p2__voffset_add(ptr addrspace(8) inreg %rsrc, <3 x ptr addrspace(2)> %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_v3p2__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dwordx3 v[0:2], v3, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_v3p2__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dwordx3 v[0:2], v3, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.v3p2(<3 x ptr addrspace(2)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_v4p2__voffset_add(ptr addrspace(8) inreg %rsrc, <4 x ptr addrspace(2)> %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_v4p2__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_v4p2__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.v4p2(<4 x ptr addrspace(2)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_p3__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_p3__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_p3__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.p3(ptr addrspace(3) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_v2p3__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(3)> %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_v2p3__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_v2p3__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.v2p3(<2 x ptr addrspace(3)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_v3p3__voffset_add(ptr addrspace(8) inreg %rsrc, <3 x ptr addrspace(3)> %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_v3p3__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dwordx3 v[0:2], v3, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_v3p3__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dwordx3 v[0:2], v3, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.v3p3(<3 x ptr addrspace(3)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_v4p3__voffset_add(ptr addrspace(8) inreg %rsrc, <4 x ptr addrspace(3)> %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_v4p3__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_v4p3__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.v4p3(<4 x ptr addrspace(3)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_p5__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(5) %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_p5__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_p5__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.p5(ptr addrspace(5) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_v2p5__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(5)> %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_v2p5__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_v2p5__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.v2p5(<2 x ptr addrspace(5)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_v3p5__voffset_add(ptr addrspace(8) inreg %rsrc, <3 x ptr addrspace(5)> %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_v3p5__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dwordx3 v[0:2], v3, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_v3p5__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dwordx3 v[0:2], v3, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.v3p5(<3 x ptr addrspace(5)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_v4p5__voffset_add(ptr addrspace(8) inreg %rsrc, <4 x ptr addrspace(5)> %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_v4p5__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_v4p5__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.v4p5(<4 x ptr addrspace(5)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_p6__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(6) %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_p6__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_p6__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.p6(ptr addrspace(6) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_v2p6__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(6)> %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_v2p6__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_v2p6__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.v2p6(<2 x ptr addrspace(6)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_v3p6__voffset_add(ptr addrspace(8) inreg %rsrc, <3 x ptr addrspace(6)> %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_v3p6__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dwordx3 v[0:2], v3, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_v3p6__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dwordx3 v[0:2], v3, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.v3p6(<3 x ptr addrspace(6)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + +define void @buffer_store_v4p6__voffset_add(ptr addrspace(8) inreg %rsrc, <4 x ptr addrspace(6)> %data, i32 %voffset) #0 { +; VERDE-LABEL: buffer_store_v4p6__voffset_add: +; VERDE: ; %bb.0: +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60 +; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VERDE-NEXT: s_setpc_b64 s[30:31] +; +; CHECK-LABEL: buffer_store_v4p6__voffset_add: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %voffset.add = add i32 %voffset, 60 + call void @llvm.amdgcn.raw.ptr.buffer.store.v4p6(<4 x ptr addrspace(6)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) + ret void +} + declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32) #0 declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32) #0 declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32) #0