Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 4510619

Browse files
arsenmAlexisPerry
authored andcommittedJun 27, 2024
AMDGPU: Fix buffer load/store of pointers (llvm#95379)
Make sure we test all the address spaces since this support isn't free in gisel.
1 parent 4134401 commit 4510619

File tree

3 files changed

+1071
-12
lines changed

3 files changed

+1071
-12
lines changed
 

‎llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,29 +1114,33 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
11141114
Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
11151115
}
11161116

1117-
static EVT memVTFromLoadIntrData(Type *Ty, unsigned MaxNumLanes) {
1117+
static EVT memVTFromLoadIntrData(const SITargetLowering &TLI,
1118+
const DataLayout &DL, Type *Ty,
1119+
unsigned MaxNumLanes) {
11181120
assert(MaxNumLanes != 0);
11191121

1122+
LLVMContext &Ctx = Ty->getContext();
11201123
if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
11211124
unsigned NumElts = std::min(MaxNumLanes, VT->getNumElements());
1122-
return EVT::getVectorVT(Ty->getContext(),
1123-
EVT::getEVT(VT->getElementType()),
1125+
return EVT::getVectorVT(Ctx, TLI.getValueType(DL, VT->getElementType()),
11241126
NumElts);
11251127
}
11261128

1127-
return EVT::getEVT(Ty);
1129+
return TLI.getValueType(DL, Ty);
11281130
}
11291131

11301132
// Peek through TFE struct returns to only use the data size.
1131-
static EVT memVTFromLoadIntrReturn(Type *Ty, unsigned MaxNumLanes) {
1133+
static EVT memVTFromLoadIntrReturn(const SITargetLowering &TLI,
1134+
const DataLayout &DL, Type *Ty,
1135+
unsigned MaxNumLanes) {
11321136
auto *ST = dyn_cast<StructType>(Ty);
11331137
if (!ST)
1134-
return memVTFromLoadIntrData(Ty, MaxNumLanes);
1138+
return memVTFromLoadIntrData(TLI, DL, Ty, MaxNumLanes);
11351139

11361140
// TFE intrinsics return an aggregate type.
11371141
assert(ST->getNumContainedTypes() == 2 &&
11381142
ST->getContainedType(1)->isIntegerTy(32));
1139-
return memVTFromLoadIntrData(ST->getContainedType(0), MaxNumLanes);
1143+
return memVTFromLoadIntrData(TLI, DL, ST->getContainedType(0), MaxNumLanes);
11401144
}
11411145

11421146
/// Map address space 7 to MVT::v5i32 because that's its in-memory
@@ -1221,10 +1225,12 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
12211225
MaxNumLanes = DMask == 0 ? 1 : llvm::popcount(DMask);
12221226
}
12231227

1224-
Info.memVT = memVTFromLoadIntrReturn(CI.getType(), MaxNumLanes);
1228+
Info.memVT = memVTFromLoadIntrReturn(*this, MF.getDataLayout(),
1229+
CI.getType(), MaxNumLanes);
12251230
} else {
1226-
Info.memVT = memVTFromLoadIntrReturn(
1227-
CI.getType(), std::numeric_limits<unsigned>::max());
1231+
Info.memVT =
1232+
memVTFromLoadIntrReturn(*this, MF.getDataLayout(), CI.getType(),
1233+
std::numeric_limits<unsigned>::max());
12281234
}
12291235

12301236
// FIXME: What does alignment mean for an image?
@@ -1237,9 +1243,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
12371243
if (RsrcIntr->IsImage) {
12381244
unsigned DMask = cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue();
12391245
unsigned DMaskLanes = DMask == 0 ? 1 : llvm::popcount(DMask);
1240-
Info.memVT = memVTFromLoadIntrData(DataTy, DMaskLanes);
1246+
Info.memVT = memVTFromLoadIntrData(*this, MF.getDataLayout(), DataTy,
1247+
DMaskLanes);
12411248
} else
1242-
Info.memVT = EVT::getEVT(DataTy);
1249+
Info.memVT = getValueType(MF.getDataLayout(), DataTy);
12431250

12441251
Info.flags |= MachineMemOperand::MOStore;
12451252
} else {

‎llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll

Lines changed: 596 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,6 +1280,602 @@ define <2 x i64> @buffer_load_v2i64__voffset_add(ptr addrspace(8) inreg %rsrc, i
12801280
ret <2 x i64> %data
12811281
}
12821282

1283+
define ptr @buffer_load_p0__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1284+
; PREGFX10-LABEL: buffer_load_p0__voffset_add:
1285+
; PREGFX10: ; %bb.0:
1286+
; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1287+
; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
1288+
; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1289+
; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1290+
;
1291+
; GFX10-LABEL: buffer_load_p0__voffset_add:
1292+
; GFX10: ; %bb.0:
1293+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1294+
; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
1295+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1296+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1297+
;
1298+
; GFX11-LABEL: buffer_load_p0__voffset_add:
1299+
; GFX11: ; %bb.0:
1300+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1301+
; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1302+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1303+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1304+
%voffset.add = add i32 %voffset, 60
1305+
%data = call ptr @llvm.amdgcn.raw.ptr.buffer.load.p0(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1306+
ret ptr %data
1307+
}
1308+
1309+
define <2 x ptr> @buffer_load_v2p0__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1310+
; PREGFX10-LABEL: buffer_load_v2p0__voffset_add:
1311+
; PREGFX10: ; %bb.0:
1312+
; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1313+
; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
1314+
; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1315+
; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1316+
;
1317+
; GFX10-LABEL: buffer_load_v2p0__voffset_add:
1318+
; GFX10: ; %bb.0:
1319+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1320+
; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
1321+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1322+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1323+
;
1324+
; GFX11-LABEL: buffer_load_v2p0__voffset_add:
1325+
; GFX11: ; %bb.0:
1326+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1327+
; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1328+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1329+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1330+
%voffset.add = add i32 %voffset, 60
1331+
%data = call <2 x ptr> @llvm.amdgcn.raw.ptr.buffer.load.p0(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1332+
ret <2 x ptr> %data
1333+
}
1334+
1335+
define ptr addrspace(1) @buffer_load_p1__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1336+
; PREGFX10-LABEL: buffer_load_p1__voffset_add:
1337+
; PREGFX10: ; %bb.0:
1338+
; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1339+
; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
1340+
; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1341+
; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1342+
;
1343+
; GFX10-LABEL: buffer_load_p1__voffset_add:
1344+
; GFX10: ; %bb.0:
1345+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1346+
; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
1347+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1348+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1349+
;
1350+
; GFX11-LABEL: buffer_load_p1__voffset_add:
1351+
; GFX11: ; %bb.0:
1352+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1353+
; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1354+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1355+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1356+
%voffset.add = add i32 %voffset, 60
1357+
%data = call ptr addrspace(1) @llvm.amdgcn.raw.ptr.buffer.load.p1(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1358+
ret ptr addrspace(1) %data
1359+
}
1360+
1361+
define <2 x ptr addrspace(1)> @buffer_load_v2p1__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1362+
; PREGFX10-LABEL: buffer_load_v2p1__voffset_add:
1363+
; PREGFX10: ; %bb.0:
1364+
; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1365+
; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
1366+
; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1367+
; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1368+
;
1369+
; GFX10-LABEL: buffer_load_v2p1__voffset_add:
1370+
; GFX10: ; %bb.0:
1371+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1372+
; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
1373+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1374+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1375+
;
1376+
; GFX11-LABEL: buffer_load_v2p1__voffset_add:
1377+
; GFX11: ; %bb.0:
1378+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1379+
; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1380+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1381+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1382+
%voffset.add = add i32 %voffset, 60
1383+
%data = call <2 x ptr addrspace(1)> @llvm.amdgcn.raw.ptr.buffer.load.p1(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1384+
ret <2 x ptr addrspace(1)> %data
1385+
}
1386+
1387+
define ptr addrspace(4) @buffer_load_p4__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1388+
; PREGFX10-LABEL: buffer_load_p4__voffset_add:
1389+
; PREGFX10: ; %bb.0:
1390+
; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1391+
; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
1392+
; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1393+
; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1394+
;
1395+
; GFX10-LABEL: buffer_load_p4__voffset_add:
1396+
; GFX10: ; %bb.0:
1397+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1398+
; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
1399+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1400+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1401+
;
1402+
; GFX11-LABEL: buffer_load_p4__voffset_add:
1403+
; GFX11: ; %bb.0:
1404+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1405+
; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1406+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1407+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1408+
%voffset.add = add i32 %voffset, 60
1409+
%data = call ptr addrspace(4) @llvm.amdgcn.raw.ptr.buffer.load.p4(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1410+
ret ptr addrspace(4) %data
1411+
}
1412+
1413+
define <2 x ptr addrspace(4)> @buffer_load_v2p4__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1414+
; PREGFX10-LABEL: buffer_load_v2p4__voffset_add:
1415+
; PREGFX10: ; %bb.0:
1416+
; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1417+
; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
1418+
; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1419+
; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1420+
;
1421+
; GFX10-LABEL: buffer_load_v2p4__voffset_add:
1422+
; GFX10: ; %bb.0:
1423+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1424+
; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
1425+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1426+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1427+
;
1428+
; GFX11-LABEL: buffer_load_v2p4__voffset_add:
1429+
; GFX11: ; %bb.0:
1430+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1431+
; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1432+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1433+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1434+
%voffset.add = add i32 %voffset, 60
1435+
%data = call <2 x ptr addrspace(4)> @llvm.amdgcn.raw.ptr.buffer.load.p4(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1436+
ret <2 x ptr addrspace(4)> %data
1437+
}
1438+
1439+
define ptr addrspace(999) @buffer_load_p999__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1440+
; PREGFX10-LABEL: buffer_load_p999__voffset_add:
1441+
; PREGFX10: ; %bb.0:
1442+
; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1443+
; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
1444+
; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1445+
; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1446+
;
1447+
; GFX10-LABEL: buffer_load_p999__voffset_add:
1448+
; GFX10: ; %bb.0:
1449+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1450+
; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
1451+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1452+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1453+
;
1454+
; GFX11-LABEL: buffer_load_p999__voffset_add:
1455+
; GFX11: ; %bb.0:
1456+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1457+
; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1458+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1459+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1460+
%voffset.add = add i32 %voffset, 60
1461+
%data = call ptr addrspace(999) @llvm.amdgcn.raw.ptr.buffer.load.p999(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1462+
ret ptr addrspace(999) %data
1463+
}
1464+
1465+
define <2 x ptr addrspace(999)> @buffer_load_v2p999__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1466+
; PREGFX10-LABEL: buffer_load_v2p999__voffset_add:
1467+
; PREGFX10: ; %bb.0:
1468+
; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1469+
; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
1470+
; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1471+
; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1472+
;
1473+
; GFX10-LABEL: buffer_load_v2p999__voffset_add:
1474+
; GFX10: ; %bb.0:
1475+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1476+
; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
1477+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1478+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1479+
;
1480+
; GFX11-LABEL: buffer_load_v2p999__voffset_add:
1481+
; GFX11: ; %bb.0:
1482+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1483+
; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1484+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1485+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1486+
%voffset.add = add i32 %voffset, 60
1487+
%data = call <2 x ptr addrspace(999)> @llvm.amdgcn.raw.ptr.buffer.load.p999(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1488+
ret <2 x ptr addrspace(999)> %data
1489+
}
1490+
1491+
define ptr addrspace(2) @buffer_load_p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1492+
; PREGFX10-LABEL: buffer_load_p2__voffset_add:
1493+
; PREGFX10: ; %bb.0:
1494+
; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1495+
; PREGFX10-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen offset:60
1496+
; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1497+
; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1498+
;
1499+
; GFX10-LABEL: buffer_load_p2__voffset_add:
1500+
; GFX10: ; %bb.0:
1501+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1502+
; GFX10-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen offset:60
1503+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1504+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1505+
;
1506+
; GFX11-LABEL: buffer_load_p2__voffset_add:
1507+
; GFX11: ; %bb.0:
1508+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1509+
; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60
1510+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1511+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1512+
%voffset.add = add i32 %voffset, 60
1513+
%data = call ptr addrspace(2) @llvm.amdgcn.raw.ptr.buffer.load.p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1514+
ret ptr addrspace(2) %data
1515+
}
1516+
1517+
define <2 x ptr addrspace(2)> @buffer_load_v2p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1518+
; PREGFX10-LABEL: buffer_load_v2p2__voffset_add:
1519+
; PREGFX10: ; %bb.0:
1520+
; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1521+
; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
1522+
; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1523+
; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1524+
;
1525+
; GFX10-LABEL: buffer_load_v2p2__voffset_add:
1526+
; GFX10: ; %bb.0:
1527+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1528+
; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
1529+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1530+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1531+
;
1532+
; GFX11-LABEL: buffer_load_v2p2__voffset_add:
1533+
; GFX11: ; %bb.0:
1534+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1535+
; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1536+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1537+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1538+
%voffset.add = add i32 %voffset, 60
1539+
%data = call <2 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v2p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1540+
ret <2 x ptr addrspace(2)> %data
1541+
}
1542+
1543+
define <3 x ptr addrspace(2)> @buffer_load_v3p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1544+
; GFX10-LABEL: buffer_load_v3p2__voffset_add:
1545+
; GFX10: ; %bb.0:
1546+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1547+
; GFX10-NEXT: buffer_load_dwordx3 v[0:2], v0, s[4:7], 0 offen offset:60
1548+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1549+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1550+
;
1551+
; GFX11-LABEL: buffer_load_v3p2__voffset_add:
1552+
; GFX11: ; %bb.0:
1553+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1554+
; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60
1555+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1556+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1557+
%voffset.add = add i32 %voffset, 60
1558+
%data = call <3 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v3p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1559+
ret <3 x ptr addrspace(2)> %data
1560+
}
1561+
1562+
define <4 x ptr addrspace(2)> @buffer_load_v4p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1563+
; PREGFX10-LABEL: buffer_load_v4p2__voffset_add:
1564+
; PREGFX10: ; %bb.0:
1565+
; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1566+
; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
1567+
; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1568+
; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1569+
;
1570+
; GFX10-LABEL: buffer_load_v4p2__voffset_add:
1571+
; GFX10: ; %bb.0:
1572+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1573+
; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
1574+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1575+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1576+
;
1577+
; GFX11-LABEL: buffer_load_v4p2__voffset_add:
1578+
; GFX11: ; %bb.0:
1579+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1580+
; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1581+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1582+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1583+
%voffset.add = add i32 %voffset, 60
1584+
%data = call <4 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v4p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1585+
ret <4 x ptr addrspace(2)> %data
1586+
}
1587+
1588+
define ptr addrspace(3) @buffer_load_p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1589+
; PREGFX10-LABEL: buffer_load_p3__voffset_add:
1590+
; PREGFX10: ; %bb.0:
1591+
; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1592+
; PREGFX10-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen offset:60
1593+
; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1594+
; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1595+
;
1596+
; GFX10-LABEL: buffer_load_p3__voffset_add:
1597+
; GFX10: ; %bb.0:
1598+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1599+
; GFX10-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen offset:60
1600+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1601+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1602+
;
1603+
; GFX11-LABEL: buffer_load_p3__voffset_add:
1604+
; GFX11: ; %bb.0:
1605+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1606+
; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60
1607+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1608+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1609+
%voffset.add = add i32 %voffset, 60
1610+
%data = call ptr addrspace(3) @llvm.amdgcn.raw.ptr.buffer.load.p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1611+
ret ptr addrspace(3) %data
1612+
}
1613+
1614+
define <2 x ptr addrspace(3)> @buffer_load_v2p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1615+
; PREGFX10-LABEL: buffer_load_v2p3__voffset_add:
1616+
; PREGFX10: ; %bb.0:
1617+
; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1618+
; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
1619+
; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1620+
; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1621+
;
1622+
; GFX10-LABEL: buffer_load_v2p3__voffset_add:
1623+
; GFX10: ; %bb.0:
1624+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1625+
; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
1626+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1627+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1628+
;
1629+
; GFX11-LABEL: buffer_load_v2p3__voffset_add:
1630+
; GFX11: ; %bb.0:
1631+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1632+
; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1633+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1634+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1635+
%voffset.add = add i32 %voffset, 60
1636+
%data = call <2 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v2p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1637+
ret <2 x ptr addrspace(3)> %data
1638+
}
1639+
1640+
define <3 x ptr addrspace(3)> @buffer_load_v3p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1641+
; GFX10-LABEL: buffer_load_v3p3__voffset_add:
1642+
; GFX10: ; %bb.0:
1643+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1644+
; GFX10-NEXT: buffer_load_dwordx3 v[0:2], v0, s[4:7], 0 offen offset:60
1645+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1646+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1647+
;
1648+
; GFX11-LABEL: buffer_load_v3p3__voffset_add:
1649+
; GFX11: ; %bb.0:
1650+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1651+
; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60
1652+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1653+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1654+
%voffset.add = add i32 %voffset, 60
1655+
%data = call <3 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v3p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1656+
ret <3 x ptr addrspace(3)> %data
1657+
}
1658+
1659+
define <4 x ptr addrspace(3)> @buffer_load_v4p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1660+
; PREGFX10-LABEL: buffer_load_v4p3__voffset_add:
1661+
; PREGFX10: ; %bb.0:
1662+
; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1663+
; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
1664+
; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1665+
; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1666+
;
1667+
; GFX10-LABEL: buffer_load_v4p3__voffset_add:
1668+
; GFX10: ; %bb.0:
1669+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1670+
; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
1671+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1672+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1673+
;
1674+
; GFX11-LABEL: buffer_load_v4p3__voffset_add:
1675+
; GFX11: ; %bb.0:
1676+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1677+
; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1678+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1679+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1680+
%voffset.add = add i32 %voffset, 60
1681+
%data = call <4 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v4p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1682+
ret <4 x ptr addrspace(3)> %data
1683+
}
1684+
1685+
define ptr addrspace(5) @buffer_load_p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1686+
; PREGFX10-LABEL: buffer_load_p5__voffset_add:
1687+
; PREGFX10: ; %bb.0:
1688+
; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1689+
; PREGFX10-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen offset:60
1690+
; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1691+
; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1692+
;
1693+
; GFX10-LABEL: buffer_load_p5__voffset_add:
1694+
; GFX10: ; %bb.0:
1695+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1696+
; GFX10-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen offset:60
1697+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1698+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1699+
;
1700+
; GFX11-LABEL: buffer_load_p5__voffset_add:
1701+
; GFX11: ; %bb.0:
1702+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1703+
; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60
1704+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1705+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1706+
%voffset.add = add i32 %voffset, 60
1707+
%data = call ptr addrspace(5) @llvm.amdgcn.raw.ptr.buffer.load.p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1708+
ret ptr addrspace(5) %data
1709+
}
1710+
1711+
define <2 x ptr addrspace(5)> @buffer_load_v2p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1712+
; PREGFX10-LABEL: buffer_load_v2p5__voffset_add:
1713+
; PREGFX10: ; %bb.0:
1714+
; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1715+
; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
1716+
; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1717+
; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1718+
;
1719+
; GFX10-LABEL: buffer_load_v2p5__voffset_add:
1720+
; GFX10: ; %bb.0:
1721+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1722+
; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
1723+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1724+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1725+
;
1726+
; GFX11-LABEL: buffer_load_v2p5__voffset_add:
1727+
; GFX11: ; %bb.0:
1728+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1729+
; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1730+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1731+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1732+
%voffset.add = add i32 %voffset, 60
1733+
%data = call <2 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v2p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1734+
ret <2 x ptr addrspace(5)> %data
1735+
}
1736+
1737+
define <3 x ptr addrspace(5)> @buffer_load_v3p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1738+
; GFX10-LABEL: buffer_load_v3p5__voffset_add:
1739+
; GFX10: ; %bb.0:
1740+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1741+
; GFX10-NEXT: buffer_load_dwordx3 v[0:2], v0, s[4:7], 0 offen offset:60
1742+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1743+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1744+
;
1745+
; GFX11-LABEL: buffer_load_v3p5__voffset_add:
1746+
; GFX11: ; %bb.0:
1747+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1748+
; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60
1749+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1750+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1751+
%voffset.add = add i32 %voffset, 60
1752+
%data = call <3 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v3p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1753+
ret <3 x ptr addrspace(5)> %data
1754+
}
1755+
1756+
define <4 x ptr addrspace(5)> @buffer_load_v4p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1757+
; PREGFX10-LABEL: buffer_load_v4p5__voffset_add:
1758+
; PREGFX10: ; %bb.0:
1759+
; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1760+
; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
1761+
; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1762+
; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1763+
;
1764+
; GFX10-LABEL: buffer_load_v4p5__voffset_add:
1765+
; GFX10: ; %bb.0:
1766+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1767+
; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
1768+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1769+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1770+
;
1771+
; GFX11-LABEL: buffer_load_v4p5__voffset_add:
1772+
; GFX11: ; %bb.0:
1773+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1774+
; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1775+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1776+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1777+
%voffset.add = add i32 %voffset, 60
1778+
%data = call <4 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v4p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1779+
ret <4 x ptr addrspace(5)> %data
1780+
}
1781+
1782+
define ptr addrspace(6) @buffer_load_p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1783+
; PREGFX10-LABEL: buffer_load_p6__voffset_add:
1784+
; PREGFX10: ; %bb.0:
1785+
; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1786+
; PREGFX10-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen offset:60
1787+
; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1788+
; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1789+
;
1790+
; GFX10-LABEL: buffer_load_p6__voffset_add:
1791+
; GFX10: ; %bb.0:
1792+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1793+
; GFX10-NEXT: buffer_load_dword v0, v0, s[4:7], 0 offen offset:60
1794+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1795+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1796+
;
1797+
; GFX11-LABEL: buffer_load_p6__voffset_add:
1798+
; GFX11: ; %bb.0:
1799+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1800+
; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60
1801+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1802+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1803+
%voffset.add = add i32 %voffset, 60
1804+
%data = call ptr addrspace(6) @llvm.amdgcn.raw.ptr.buffer.load.p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1805+
ret ptr addrspace(6) %data
1806+
}
1807+
1808+
define <2 x ptr addrspace(6)> @buffer_load_v2p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1809+
; PREGFX10-LABEL: buffer_load_v2p6__voffset_add:
1810+
; PREGFX10: ; %bb.0:
1811+
; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1812+
; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
1813+
; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1814+
; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1815+
;
1816+
; GFX10-LABEL: buffer_load_v2p6__voffset_add:
1817+
; GFX10: ; %bb.0:
1818+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1819+
; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
1820+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1821+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1822+
;
1823+
; GFX11-LABEL: buffer_load_v2p6__voffset_add:
1824+
; GFX11: ; %bb.0:
1825+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1826+
; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1827+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1828+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1829+
%voffset.add = add i32 %voffset, 60
1830+
%data = call <2 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v2p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1831+
ret <2 x ptr addrspace(6)> %data
1832+
}
1833+
1834+
define <3 x ptr addrspace(6)> @buffer_load_v3p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1835+
; GFX10-LABEL: buffer_load_v3p6__voffset_add:
1836+
; GFX10: ; %bb.0:
1837+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1838+
; GFX10-NEXT: buffer_load_dwordx3 v[0:2], v0, s[4:7], 0 offen offset:60
1839+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1840+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1841+
;
1842+
; GFX11-LABEL: buffer_load_v3p6__voffset_add:
1843+
; GFX11: ; %bb.0:
1844+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1845+
; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60
1846+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1847+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1848+
%voffset.add = add i32 %voffset, 60
1849+
%data = call <3 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v3p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1850+
ret <3 x ptr addrspace(6)> %data
1851+
}
1852+
1853+
define <4 x ptr addrspace(6)> @buffer_load_v4p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1854+
; PREGFX10-LABEL: buffer_load_v4p6__voffset_add:
1855+
; PREGFX10: ; %bb.0:
1856+
; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1857+
; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
1858+
; PREGFX10-NEXT: s_waitcnt vmcnt(0)
1859+
; PREGFX10-NEXT: s_setpc_b64 s[30:31]
1860+
;
1861+
; GFX10-LABEL: buffer_load_v4p6__voffset_add:
1862+
; GFX10: ; %bb.0:
1863+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1864+
; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
1865+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1866+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1867+
;
1868+
; GFX11-LABEL: buffer_load_v4p6__voffset_add:
1869+
; GFX11: ; %bb.0:
1870+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1871+
; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1872+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1873+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1874+
%voffset.add = add i32 %voffset, 60
1875+
%data = call <4 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v4p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1876+
ret <4 x ptr addrspace(6)> %data
1877+
}
1878+
12831879
declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #0
12841880
declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32) #0
12851881
declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) #0

‎llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.ll

Lines changed: 456 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,462 @@ define void @buffer_store_v2i64__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x
424424
ret void
425425
}
426426

427+
define void @buffer_store_p0__voffset_add(ptr addrspace(8) inreg %rsrc, ptr %data, i32 %voffset) #0 {
428+
; VERDE-LABEL: buffer_store_p0__voffset_add:
429+
; VERDE: ; %bb.0:
430+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431+
; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60
432+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
433+
; VERDE-NEXT: s_setpc_b64 s[30:31]
434+
;
435+
; CHECK-LABEL: buffer_store_p0__voffset_add:
436+
; CHECK: ; %bb.0:
437+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
438+
; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60
439+
; CHECK-NEXT: s_waitcnt vmcnt(0)
440+
; CHECK-NEXT: s_setpc_b64 s[30:31]
441+
%voffset.add = add i32 %voffset, 60
442+
call void @llvm.amdgcn.raw.ptr.buffer.store.p0(ptr %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
443+
ret void
444+
}
445+
446+
define void @buffer_store_v2p0__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr> %data, i32 %voffset) #0 {
447+
; VERDE-LABEL: buffer_store_v2p0__voffset_add:
448+
; VERDE: ; %bb.0:
449+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
450+
; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60
451+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
452+
; VERDE-NEXT: s_setpc_b64 s[30:31]
453+
;
454+
; CHECK-LABEL: buffer_store_v2p0__voffset_add:
455+
; CHECK: ; %bb.0:
456+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
457+
; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60
458+
; CHECK-NEXT: s_waitcnt vmcnt(0)
459+
; CHECK-NEXT: s_setpc_b64 s[30:31]
460+
%voffset.add = add i32 %voffset, 60
461+
call void @llvm.amdgcn.raw.ptr.buffer.store.v2p0(<2 x ptr> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
462+
ret void
463+
}
464+
465+
define void @buffer_store_p1__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %data, i32 %voffset) #0 {
466+
; VERDE-LABEL: buffer_store_p1__voffset_add:
467+
; VERDE: ; %bb.0:
468+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
469+
; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60
470+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
471+
; VERDE-NEXT: s_setpc_b64 s[30:31]
472+
;
473+
; CHECK-LABEL: buffer_store_p1__voffset_add:
474+
; CHECK: ; %bb.0:
475+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
476+
; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60
477+
; CHECK-NEXT: s_waitcnt vmcnt(0)
478+
; CHECK-NEXT: s_setpc_b64 s[30:31]
479+
%voffset.add = add i32 %voffset, 60
480+
call void @llvm.amdgcn.raw.ptr.buffer.store.p1(ptr addrspace(1) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
481+
ret void
482+
}
483+
484+
define void @buffer_store_v2p1__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(1)> %data, i32 %voffset) #0 {
485+
; VERDE-LABEL: buffer_store_v2p1__voffset_add:
486+
; VERDE: ; %bb.0:
487+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
488+
; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60
489+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
490+
; VERDE-NEXT: s_setpc_b64 s[30:31]
491+
;
492+
; CHECK-LABEL: buffer_store_v2p1__voffset_add:
493+
; CHECK: ; %bb.0:
494+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
495+
; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60
496+
; CHECK-NEXT: s_waitcnt vmcnt(0)
497+
; CHECK-NEXT: s_setpc_b64 s[30:31]
498+
%voffset.add = add i32 %voffset, 60
499+
call void @llvm.amdgcn.raw.ptr.buffer.store.v2p1(<2 x ptr addrspace(1)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
500+
ret void
501+
}
502+
503+
define void @buffer_store_p4__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(4) %data, i32 %voffset) #0 {
504+
; VERDE-LABEL: buffer_store_p4__voffset_add:
505+
; VERDE: ; %bb.0:
506+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
507+
; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60
508+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
509+
; VERDE-NEXT: s_setpc_b64 s[30:31]
510+
;
511+
; CHECK-LABEL: buffer_store_p4__voffset_add:
512+
; CHECK: ; %bb.0:
513+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
514+
; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60
515+
; CHECK-NEXT: s_waitcnt vmcnt(0)
516+
; CHECK-NEXT: s_setpc_b64 s[30:31]
517+
%voffset.add = add i32 %voffset, 60
518+
call void @llvm.amdgcn.raw.ptr.buffer.store.p4(ptr addrspace(4) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
519+
ret void
520+
}
521+
522+
define void @buffer_store_v2p4__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(4)> %data, i32 %voffset) #0 {
523+
; VERDE-LABEL: buffer_store_v2p4__voffset_add:
524+
; VERDE: ; %bb.0:
525+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
526+
; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60
527+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
528+
; VERDE-NEXT: s_setpc_b64 s[30:31]
529+
;
530+
; CHECK-LABEL: buffer_store_v2p4__voffset_add:
531+
; CHECK: ; %bb.0:
532+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
533+
; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60
534+
; CHECK-NEXT: s_waitcnt vmcnt(0)
535+
; CHECK-NEXT: s_setpc_b64 s[30:31]
536+
%voffset.add = add i32 %voffset, 60
537+
call void @llvm.amdgcn.raw.ptr.buffer.store.v2p4(<2 x ptr addrspace(4)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
538+
ret void
539+
}
540+
541+
define void @buffer_store_p999__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(999) %data, i32 %voffset) #0 {
542+
; VERDE-LABEL: buffer_store_p999__voffset_add:
543+
; VERDE: ; %bb.0:
544+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
545+
; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60
546+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
547+
; VERDE-NEXT: s_setpc_b64 s[30:31]
548+
;
549+
; CHECK-LABEL: buffer_store_p999__voffset_add:
550+
; CHECK: ; %bb.0:
551+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
552+
; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60
553+
; CHECK-NEXT: s_waitcnt vmcnt(0)
554+
; CHECK-NEXT: s_setpc_b64 s[30:31]
555+
%voffset.add = add i32 %voffset, 60
556+
call void @llvm.amdgcn.raw.ptr.buffer.store.p999(ptr addrspace(999) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
557+
ret void
558+
}
559+
560+
define void @buffer_store_v2p999__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(999)> %data, i32 %voffset) #0 {
561+
; VERDE-LABEL: buffer_store_v2p999__voffset_add:
562+
; VERDE: ; %bb.0:
563+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
564+
; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60
565+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
566+
; VERDE-NEXT: s_setpc_b64 s[30:31]
567+
;
568+
; CHECK-LABEL: buffer_store_v2p999__voffset_add:
569+
; CHECK: ; %bb.0:
570+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
571+
; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60
572+
; CHECK-NEXT: s_waitcnt vmcnt(0)
573+
; CHECK-NEXT: s_setpc_b64 s[30:31]
574+
%voffset.add = add i32 %voffset, 60
575+
call void @llvm.amdgcn.raw.ptr.buffer.store.v2p999(<2 x ptr addrspace(999)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
576+
ret void
577+
}
578+
579+
define void @buffer_store_p2__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(2) %data, i32 %voffset) #0 {
580+
; VERDE-LABEL: buffer_store_p2__voffset_add:
581+
; VERDE: ; %bb.0:
582+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
583+
; VERDE-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen offset:60
584+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
585+
; VERDE-NEXT: s_setpc_b64 s[30:31]
586+
;
587+
; CHECK-LABEL: buffer_store_p2__voffset_add:
588+
; CHECK: ; %bb.0:
589+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
590+
; CHECK-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen offset:60
591+
; CHECK-NEXT: s_waitcnt vmcnt(0)
592+
; CHECK-NEXT: s_setpc_b64 s[30:31]
593+
%voffset.add = add i32 %voffset, 60
594+
call void @llvm.amdgcn.raw.ptr.buffer.store.p2(ptr addrspace(2) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
595+
ret void
596+
}
597+
598+
define void @buffer_store_v2p2__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(2)> %data, i32 %voffset) #0 {
599+
; VERDE-LABEL: buffer_store_v2p2__voffset_add:
600+
; VERDE: ; %bb.0:
601+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
602+
; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60
603+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
604+
; VERDE-NEXT: s_setpc_b64 s[30:31]
605+
;
606+
; CHECK-LABEL: buffer_store_v2p2__voffset_add:
607+
; CHECK: ; %bb.0:
608+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
609+
; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60
610+
; CHECK-NEXT: s_waitcnt vmcnt(0)
611+
; CHECK-NEXT: s_setpc_b64 s[30:31]
612+
%voffset.add = add i32 %voffset, 60
613+
call void @llvm.amdgcn.raw.ptr.buffer.store.v2p2(<2 x ptr addrspace(2)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
614+
ret void
615+
}
616+
617+
define void @buffer_store_v3p2__voffset_add(ptr addrspace(8) inreg %rsrc, <3 x ptr addrspace(2)> %data, i32 %voffset) #0 {
618+
; VERDE-LABEL: buffer_store_v3p2__voffset_add:
619+
; VERDE: ; %bb.0:
620+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
621+
; VERDE-NEXT: buffer_store_dwordx3 v[0:2], v3, s[4:7], 0 offen offset:60
622+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
623+
; VERDE-NEXT: s_setpc_b64 s[30:31]
624+
;
625+
; CHECK-LABEL: buffer_store_v3p2__voffset_add:
626+
; CHECK: ; %bb.0:
627+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
628+
; CHECK-NEXT: buffer_store_dwordx3 v[0:2], v3, s[4:7], 0 offen offset:60
629+
; CHECK-NEXT: s_waitcnt vmcnt(0)
630+
; CHECK-NEXT: s_setpc_b64 s[30:31]
631+
%voffset.add = add i32 %voffset, 60
632+
call void @llvm.amdgcn.raw.ptr.buffer.store.v3p2(<3 x ptr addrspace(2)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
633+
ret void
634+
}
635+
636+
define void @buffer_store_v4p2__voffset_add(ptr addrspace(8) inreg %rsrc, <4 x ptr addrspace(2)> %data, i32 %voffset) #0 {
637+
; VERDE-LABEL: buffer_store_v4p2__voffset_add:
638+
; VERDE: ; %bb.0:
639+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
640+
; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60
641+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
642+
; VERDE-NEXT: s_setpc_b64 s[30:31]
643+
;
644+
; CHECK-LABEL: buffer_store_v4p2__voffset_add:
645+
; CHECK: ; %bb.0:
646+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
647+
; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60
648+
; CHECK-NEXT: s_waitcnt vmcnt(0)
649+
; CHECK-NEXT: s_setpc_b64 s[30:31]
650+
%voffset.add = add i32 %voffset, 60
651+
call void @llvm.amdgcn.raw.ptr.buffer.store.v4p2(<4 x ptr addrspace(2)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
652+
ret void
653+
}
654+
655+
define void @buffer_store_p3__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %data, i32 %voffset) #0 {
656+
; VERDE-LABEL: buffer_store_p3__voffset_add:
657+
; VERDE: ; %bb.0:
658+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
659+
; VERDE-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen offset:60
660+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
661+
; VERDE-NEXT: s_setpc_b64 s[30:31]
662+
;
663+
; CHECK-LABEL: buffer_store_p3__voffset_add:
664+
; CHECK: ; %bb.0:
665+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
666+
; CHECK-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen offset:60
667+
; CHECK-NEXT: s_waitcnt vmcnt(0)
668+
; CHECK-NEXT: s_setpc_b64 s[30:31]
669+
%voffset.add = add i32 %voffset, 60
670+
call void @llvm.amdgcn.raw.ptr.buffer.store.p3(ptr addrspace(3) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
671+
ret void
672+
}
673+
674+
define void @buffer_store_v2p3__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(3)> %data, i32 %voffset) #0 {
675+
; VERDE-LABEL: buffer_store_v2p3__voffset_add:
676+
; VERDE: ; %bb.0:
677+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
678+
; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60
679+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
680+
; VERDE-NEXT: s_setpc_b64 s[30:31]
681+
;
682+
; CHECK-LABEL: buffer_store_v2p3__voffset_add:
683+
; CHECK: ; %bb.0:
684+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
685+
; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60
686+
; CHECK-NEXT: s_waitcnt vmcnt(0)
687+
; CHECK-NEXT: s_setpc_b64 s[30:31]
688+
%voffset.add = add i32 %voffset, 60
689+
call void @llvm.amdgcn.raw.ptr.buffer.store.v2p3(<2 x ptr addrspace(3)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
690+
ret void
691+
}
692+
693+
define void @buffer_store_v3p3__voffset_add(ptr addrspace(8) inreg %rsrc, <3 x ptr addrspace(3)> %data, i32 %voffset) #0 {
694+
; VERDE-LABEL: buffer_store_v3p3__voffset_add:
695+
; VERDE: ; %bb.0:
696+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
697+
; VERDE-NEXT: buffer_store_dwordx3 v[0:2], v3, s[4:7], 0 offen offset:60
698+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
699+
; VERDE-NEXT: s_setpc_b64 s[30:31]
700+
;
701+
; CHECK-LABEL: buffer_store_v3p3__voffset_add:
702+
; CHECK: ; %bb.0:
703+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
704+
; CHECK-NEXT: buffer_store_dwordx3 v[0:2], v3, s[4:7], 0 offen offset:60
705+
; CHECK-NEXT: s_waitcnt vmcnt(0)
706+
; CHECK-NEXT: s_setpc_b64 s[30:31]
707+
%voffset.add = add i32 %voffset, 60
708+
call void @llvm.amdgcn.raw.ptr.buffer.store.v3p3(<3 x ptr addrspace(3)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
709+
ret void
710+
}
711+
712+
define void @buffer_store_v4p3__voffset_add(ptr addrspace(8) inreg %rsrc, <4 x ptr addrspace(3)> %data, i32 %voffset) #0 {
713+
; VERDE-LABEL: buffer_store_v4p3__voffset_add:
714+
; VERDE: ; %bb.0:
715+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
716+
; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60
717+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
718+
; VERDE-NEXT: s_setpc_b64 s[30:31]
719+
;
720+
; CHECK-LABEL: buffer_store_v4p3__voffset_add:
721+
; CHECK: ; %bb.0:
722+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
723+
; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60
724+
; CHECK-NEXT: s_waitcnt vmcnt(0)
725+
; CHECK-NEXT: s_setpc_b64 s[30:31]
726+
%voffset.add = add i32 %voffset, 60
727+
call void @llvm.amdgcn.raw.ptr.buffer.store.v4p3(<4 x ptr addrspace(3)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
728+
ret void
729+
}
730+
731+
define void @buffer_store_p5__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(5) %data, i32 %voffset) #0 {
732+
; VERDE-LABEL: buffer_store_p5__voffset_add:
733+
; VERDE: ; %bb.0:
734+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
735+
; VERDE-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen offset:60
736+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
737+
; VERDE-NEXT: s_setpc_b64 s[30:31]
738+
;
739+
; CHECK-LABEL: buffer_store_p5__voffset_add:
740+
; CHECK: ; %bb.0:
741+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
742+
; CHECK-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen offset:60
743+
; CHECK-NEXT: s_waitcnt vmcnt(0)
744+
; CHECK-NEXT: s_setpc_b64 s[30:31]
745+
%voffset.add = add i32 %voffset, 60
746+
call void @llvm.amdgcn.raw.ptr.buffer.store.p5(ptr addrspace(5) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
747+
ret void
748+
}
749+
750+
define void @buffer_store_v2p5__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(5)> %data, i32 %voffset) #0 {
751+
; VERDE-LABEL: buffer_store_v2p5__voffset_add:
752+
; VERDE: ; %bb.0:
753+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
754+
; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60
755+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
756+
; VERDE-NEXT: s_setpc_b64 s[30:31]
757+
;
758+
; CHECK-LABEL: buffer_store_v2p5__voffset_add:
759+
; CHECK: ; %bb.0:
760+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
761+
; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60
762+
; CHECK-NEXT: s_waitcnt vmcnt(0)
763+
; CHECK-NEXT: s_setpc_b64 s[30:31]
764+
%voffset.add = add i32 %voffset, 60
765+
call void @llvm.amdgcn.raw.ptr.buffer.store.v2p5(<2 x ptr addrspace(5)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
766+
ret void
767+
}
768+
769+
define void @buffer_store_v3p5__voffset_add(ptr addrspace(8) inreg %rsrc, <3 x ptr addrspace(5)> %data, i32 %voffset) #0 {
770+
; VERDE-LABEL: buffer_store_v3p5__voffset_add:
771+
; VERDE: ; %bb.0:
772+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
773+
; VERDE-NEXT: buffer_store_dwordx3 v[0:2], v3, s[4:7], 0 offen offset:60
774+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
775+
; VERDE-NEXT: s_setpc_b64 s[30:31]
776+
;
777+
; CHECK-LABEL: buffer_store_v3p5__voffset_add:
778+
; CHECK: ; %bb.0:
779+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
780+
; CHECK-NEXT: buffer_store_dwordx3 v[0:2], v3, s[4:7], 0 offen offset:60
781+
; CHECK-NEXT: s_waitcnt vmcnt(0)
782+
; CHECK-NEXT: s_setpc_b64 s[30:31]
783+
%voffset.add = add i32 %voffset, 60
784+
call void @llvm.amdgcn.raw.ptr.buffer.store.v3p5(<3 x ptr addrspace(5)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
785+
ret void
786+
}
787+
788+
define void @buffer_store_v4p5__voffset_add(ptr addrspace(8) inreg %rsrc, <4 x ptr addrspace(5)> %data, i32 %voffset) #0 {
789+
; VERDE-LABEL: buffer_store_v4p5__voffset_add:
790+
; VERDE: ; %bb.0:
791+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
792+
; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60
793+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
794+
; VERDE-NEXT: s_setpc_b64 s[30:31]
795+
;
796+
; CHECK-LABEL: buffer_store_v4p5__voffset_add:
797+
; CHECK: ; %bb.0:
798+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
799+
; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60
800+
; CHECK-NEXT: s_waitcnt vmcnt(0)
801+
; CHECK-NEXT: s_setpc_b64 s[30:31]
802+
%voffset.add = add i32 %voffset, 60
803+
call void @llvm.amdgcn.raw.ptr.buffer.store.v4p5(<4 x ptr addrspace(5)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
804+
ret void
805+
}
806+
807+
define void @buffer_store_p6__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(6) %data, i32 %voffset) #0 {
808+
; VERDE-LABEL: buffer_store_p6__voffset_add:
809+
; VERDE: ; %bb.0:
810+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
811+
; VERDE-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen offset:60
812+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
813+
; VERDE-NEXT: s_setpc_b64 s[30:31]
814+
;
815+
; CHECK-LABEL: buffer_store_p6__voffset_add:
816+
; CHECK: ; %bb.0:
817+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
818+
; CHECK-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen offset:60
819+
; CHECK-NEXT: s_waitcnt vmcnt(0)
820+
; CHECK-NEXT: s_setpc_b64 s[30:31]
821+
%voffset.add = add i32 %voffset, 60
822+
call void @llvm.amdgcn.raw.ptr.buffer.store.p6(ptr addrspace(6) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
823+
ret void
824+
}
825+
826+
define void @buffer_store_v2p6__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(6)> %data, i32 %voffset) #0 {
827+
; VERDE-LABEL: buffer_store_v2p6__voffset_add:
828+
; VERDE: ; %bb.0:
829+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
830+
; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60
831+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
832+
; VERDE-NEXT: s_setpc_b64 s[30:31]
833+
;
834+
; CHECK-LABEL: buffer_store_v2p6__voffset_add:
835+
; CHECK: ; %bb.0:
836+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
837+
; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[4:7], 0 offen offset:60
838+
; CHECK-NEXT: s_waitcnt vmcnt(0)
839+
; CHECK-NEXT: s_setpc_b64 s[30:31]
840+
%voffset.add = add i32 %voffset, 60
841+
call void @llvm.amdgcn.raw.ptr.buffer.store.v2p6(<2 x ptr addrspace(6)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
842+
ret void
843+
}
844+
845+
define void @buffer_store_v3p6__voffset_add(ptr addrspace(8) inreg %rsrc, <3 x ptr addrspace(6)> %data, i32 %voffset) #0 {
846+
; VERDE-LABEL: buffer_store_v3p6__voffset_add:
847+
; VERDE: ; %bb.0:
848+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
849+
; VERDE-NEXT: buffer_store_dwordx3 v[0:2], v3, s[4:7], 0 offen offset:60
850+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
851+
; VERDE-NEXT: s_setpc_b64 s[30:31]
852+
;
853+
; CHECK-LABEL: buffer_store_v3p6__voffset_add:
854+
; CHECK: ; %bb.0:
855+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
856+
; CHECK-NEXT: buffer_store_dwordx3 v[0:2], v3, s[4:7], 0 offen offset:60
857+
; CHECK-NEXT: s_waitcnt vmcnt(0)
858+
; CHECK-NEXT: s_setpc_b64 s[30:31]
859+
%voffset.add = add i32 %voffset, 60
860+
call void @llvm.amdgcn.raw.ptr.buffer.store.v3p6(<3 x ptr addrspace(6)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
861+
ret void
862+
}
863+
864+
define void @buffer_store_v4p6__voffset_add(ptr addrspace(8) inreg %rsrc, <4 x ptr addrspace(6)> %data, i32 %voffset) #0 {
865+
; VERDE-LABEL: buffer_store_v4p6__voffset_add:
866+
; VERDE: ; %bb.0:
867+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
868+
; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60
869+
; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0)
870+
; VERDE-NEXT: s_setpc_b64 s[30:31]
871+
;
872+
; CHECK-LABEL: buffer_store_v4p6__voffset_add:
873+
; CHECK: ; %bb.0:
874+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
875+
; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[4:7], 0 offen offset:60
876+
; CHECK-NEXT: s_waitcnt vmcnt(0)
877+
; CHECK-NEXT: s_setpc_b64 s[30:31]
878+
%voffset.add = add i32 %voffset, 60
879+
call void @llvm.amdgcn.raw.ptr.buffer.store.v4p6(<4 x ptr addrspace(6)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
880+
ret void
881+
}
882+
427883
declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32) #0
428884
declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32) #0
429885
declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32) #0

0 commit comments

Comments
 (0)
Please sign in to comment.