Skip to content

Commit b2937eb

Browse files
committed
AMDGPU/GlobalISel: Handle atomic sextload and zextload
Atomic loads are handled differently from the DAG, and have separate opcodes and explicit control over the extensions, like ordinary loads. Add new patterns for these. There's room for cleanup and improvement. d16 cases aren't handled. Fixes #111645
1 parent b9754e9 commit b2937eb

File tree

9 files changed

+1416
-0
lines changed

9 files changed

+1416
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,8 @@ def : GINodeEquiv<G_STORE, AMDGPUst_glue> {
207207

208208
def : GINodeEquiv<G_LOAD, AMDGPUatomic_ld_glue> {
209209
bit CheckMMOIsAtomic = 1;
210+
let IfSignExtend = G_SEXTLOAD;
211+
let IfZeroExtend = G_ZEXTLOAD;
210212
}
211213

212214
def : GINodeEquiv<G_STORE, AMDGPUatomic_st_glue> {

llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,27 @@ def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> {
521521
let IsAtomic = 1;
522522
let MemoryVT = i64;
523523
}
524+
525+
def atomic_load_zext_8_#as : PatFrag<(ops node:$ptr), (atomic_load_zext node:$ptr)> {
526+
let IsAtomic = 1;
527+
let MemoryVT = i8;
528+
}
529+
530+
def atomic_load_sext_8_#as : PatFrag<(ops node:$ptr), (atomic_load_sext node:$ptr)> {
531+
let IsAtomic = 1;
532+
let MemoryVT = i8;
533+
}
534+
535+
def atomic_load_zext_16_#as : PatFrag<(ops node:$ptr), (atomic_load_zext node:$ptr)> {
536+
let IsAtomic = 1;
537+
let MemoryVT = i16;
538+
}
539+
540+
def atomic_load_sext_16_#as : PatFrag<(ops node:$ptr), (atomic_load_sext node:$ptr)> {
541+
let IsAtomic = 1;
542+
let MemoryVT = i16;
543+
}
544+
524545
} // End let AddressSpaces
525546
} // End foreach as
526547

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -983,15 +983,22 @@ defm BUFFER_LOAD_LDS_U16 : MUBUF_Pseudo_Loads_LDSOpc <
983983
>;
984984

985985
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, atomic_load_8_global>;
986+
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, atomic_load_zext_8_global>;
986987
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, atomic_load_16_global>;
988+
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, atomic_load_zext_16_global>;
987989
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i16, atomic_load_8_global>;
988990
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i16, atomic_load_16_global>;
991+
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i16, atomic_load_zext_8_global>;
992+
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i16, atomic_load_zext_16_global>;
989993
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, extloadi8_global>;
990994
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, zextloadi8_global>;
991995
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, sextloadi8_global>;
996+
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, atomic_load_sext_8_global>;
997+
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, atomic_load_sext_16_global>;
992998
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, extloadi16_global>;
993999
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, zextloadi16_global>;
9941000
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SSHORT", i32, sextloadi16_global>;
1001+
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SSHORT", i32, atomic_load_sext_16_global>;
9951002

9961003
foreach vt = Reg32Types.types in {
9971004
defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORD", vt, load_global>;

llvm/lib/Target/AMDGPU/DSInstructions.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -795,12 +795,19 @@ defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">;
795795

796796
defm : DSReadPat_mc <DS_READ_U8, i16, "atomic_load_8_local">;
797797
defm : DSReadPat_mc <DS_READ_U8, i32, "atomic_load_8_local">;
798+
defm : DSReadPat_mc <DS_READ_U8, i16, "atomic_load_zext_8_local">;
799+
defm : DSReadPat_mc <DS_READ_U8, i32, "atomic_load_zext_8_local">;
800+
defm : DSReadPat_mc <DS_READ_I8, i16, "atomic_load_sext_8_local">;
801+
defm : DSReadPat_mc <DS_READ_I8, i32, "atomic_load_sext_8_local">;
798802
defm : DSReadPat_mc <DS_READ_U16, i16, "atomic_load_16_local">;
799803
defm : DSReadPat_mc <DS_READ_U16, i32, "atomic_load_16_local">;
804+
defm : DSReadPat_mc <DS_READ_U16, i32, "atomic_load_zext_16_local">;
805+
defm : DSReadPat_mc <DS_READ_I16, i32, "atomic_load_sext_16_local">;
800806
defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
801807
defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">;
802808

803809
let OtherPredicates = [D16PreservesUnusedBits] in {
810+
// TODO: Atomic loads
804811
def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2i16>;
805812
def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2f16>;
806813
def : DSReadPat_D16<DS_READ_U8_D16_HI, az_extloadi8_d16_hi_local, v2i16>;

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1355,11 +1355,17 @@ let OtherPredicates = [HasFlatAddressSpace] in {
13551355

13561356
def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>;
13571357
def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>;
1358+
def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_zext_8_flat, i32>;
1359+
def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_zext_8_flat, i16>;
13581360
def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>;
13591361
def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>;
1362+
def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_zext_16_flat, i32>;
1363+
def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_zext_16_flat, i16>;
13601364
def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>;
13611365
def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>;
13621366
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>;
1367+
def : FlatLoadPat <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i32>;
1368+
def : FlatLoadPat <FLAT_LOAD_SBYTE, atomic_load_sext_8_flat, i16>;
13631369
def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>;
13641370
def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>;
13651371
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>;
@@ -1456,6 +1462,7 @@ def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>;
14561462
}
14571463

14581464
let OtherPredicates = [D16PreservesUnusedBits] in {
1465+
// TODO: Handle atomic loads
14591466
def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>;
14601467
def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>;
14611468
def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>;
@@ -1477,8 +1484,14 @@ let OtherPredicates = [HasFlatGlobalInsts] in {
14771484

14781485
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i32>;
14791486
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i16>;
1487+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_zext_8_global, i32>;
1488+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_zext_8_global, i16>;
14801489
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i32>;
14811490
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i16>;
1491+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i32>;
1492+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i16>;
1493+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, atomic_load_sext_8_global, i32>;
1494+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, atomic_load_sext_8_global, i16>;
14821495
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>;
14831496
defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>;
14841497
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>;
@@ -1488,6 +1501,8 @@ defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>;
14881501
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>;
14891502
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>;
14901503
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>;
1504+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, atomic_load_sext_16_global, i32>;
1505+
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_zext_16_global, i32>;
14911506
defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>;
14921507

14931508
foreach vt = Reg32Types.types in {
@@ -1525,6 +1540,7 @@ defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global,
15251540
}
15261541

15271542
let OtherPredicates = [D16PreservesUnusedBits] in {
1543+
// TODO: Handle atomic loads
15281544
defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>;
15291545
defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>;
15301546
defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>;

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,18 @@ def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
348348
let IsNonExtLoad = 1;
349349
}
350350

351+
def atomic_load_zext_glue :
352+
PatFrag<(ops node:$ptr), (AMDGPUatomic_ld_glue node:$ptr)> {
353+
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
354+
let IsZeroExtLoad = true;
355+
}
356+
357+
def atomic_load_sext_glue :
358+
PatFrag<(ops node:$ptr), (AMDGPUatomic_ld_glue node:$ptr)> {
359+
let IsAtomic = true; // FIXME: Should be IsLoad and/or IsAtomic?
360+
let IsSignExtLoad = true;
361+
}
362+
351363
def atomic_load_8_glue : PatFrag<(ops node:$ptr),
352364
(AMDGPUatomic_ld_glue node:$ptr)> {
353365
let IsAtomic = 1;
@@ -372,6 +384,30 @@ def atomic_load_64_glue : PatFrag<(ops node:$ptr),
372384
let MemoryVT = i64;
373385
}
374386

387+
def atomic_load_zext_8_glue : PatFrag<(ops node:$ptr),
388+
(atomic_load_zext_glue node:$ptr)> {
389+
let IsAtomic = 1;
390+
let MemoryVT = i8;
391+
}
392+
393+
def atomic_load_sext_8_glue : PatFrag<(ops node:$ptr),
394+
(atomic_load_sext_glue node:$ptr)> {
395+
let IsAtomic = 1;
396+
let MemoryVT = i8;
397+
}
398+
399+
def atomic_load_zext_16_glue : PatFrag<(ops node:$ptr),
400+
(atomic_load_zext_glue node:$ptr)> {
401+
let IsAtomic = 1;
402+
let MemoryVT = i16;
403+
}
404+
405+
def atomic_load_sext_16_glue : PatFrag<(ops node:$ptr),
406+
(atomic_load_sext_glue node:$ptr)> {
407+
let IsAtomic = 1;
408+
let MemoryVT = i16;
409+
}
410+
375411
def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> {
376412
let IsLoad = 1;
377413
let IsAnyExtLoad = 1;
@@ -453,6 +489,15 @@ def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
453489
(atomic_load_32_glue node:$ptr)>;
454490
def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr),
455491
(atomic_load_64_glue node:$ptr)>;
492+
493+
def atomic_load_zext_8_local_m0 : PatFrag<(ops node:$ptr),
494+
(atomic_load_zext_8_glue node:$ptr)>;
495+
def atomic_load_sext_8_local_m0 : PatFrag<(ops node:$ptr),
496+
(atomic_load_sext_8_glue node:$ptr)>;
497+
def atomic_load_zext_16_local_m0 : PatFrag<(ops node:$ptr),
498+
(atomic_load_zext_16_glue node:$ptr)>;
499+
def atomic_load_sext_16_local_m0 : PatFrag<(ops node:$ptr),
500+
(atomic_load_sext_16_glue node:$ptr)>;
456501
} // End let AddressSpaces = LoadAddress_local.AddrSpaces
457502

458503

0 commit comments

Comments
 (0)