Skip to content

Commit 720be6c

Browse files
authored
[AMDGPU] Add encoding/decoding support for non-result-returning ATOMIC_CSUB instructions (#68684)
The BUFFER_ATOMIC_CSUB and GLOBAL_ATOMIC_CSUB instructions have encodings for non-value-returning forms, although actually using them isn't supported by hardware. However, these encodings aren't supported by the backend, meaning that they can't even be assembled or disassembled. Add support for the non-returning encodings, but gate actually using them in instruction selection behind a new feature FeatureAtomicCSubNoRtnInsts, which no target uses. This does allow the non-returning instructions to be tested manually and llvm.amdgcn.atomic.csub.ll is extended to cover them. The feature does not gate assembling or disassembling them, this is now not an error, and encoding and decoding tests have been adapted accordingly.
1 parent 37b93f0 commit 720be6c

16 files changed

+171
-36
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -693,6 +693,13 @@ def FeatureAtomicGlobalPkAddBF16Inst : SubtargetFeature<"atomic-global-pk-add-bf
693693
[FeatureFlatGlobalInsts]
694694
>;
695695

696+
def FeatureAtomicCSubNoRtnInsts : SubtargetFeature<"atomic-csub-no-rtn-insts",
697+
"HasAtomicCSubNoRtnInsts",
698+
"true",
699+
"Has buffer_atomic_csub and global_atomic_csub instructions that don't "
700+
"return original value"
701+
>;
702+
696703
def FeatureFlatAtomicFaddF32Inst
697704
: SubtargetFeature<"flat-atomic-fadd-f32-inst",
698705
"HasFlatAtomicFaddF32Inst",
@@ -1927,6 +1934,8 @@ def HasGWS : Predicate<"Subtarget->hasGWS()">;
19271934
def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">;
19281935
def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">;
19291936

1937+
def HasAtomicCSubNoRtnInsts : Predicate<"Subtarget->hasAtomicCSubNoRtnInsts()">;
1938+
19301939
// Include AMDGPU TD files
19311940
include "SISchedule.td"
19321941
include "GCNProcessors.td"

llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,7 @@ defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op;
621621
defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op;
622622
defm int_amdgcn_global_atomic_fmin : noret_op;
623623
defm int_amdgcn_global_atomic_fmax : noret_op;
624+
defm int_amdgcn_global_atomic_csub : noret_op;
624625
defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op;
625626
defm int_amdgcn_ds_fadd_v2bf16 : noret_op;
626627

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,10 +1046,16 @@ defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics <
10461046
"buffer_atomic_dec_x2", VReg_64, i64
10471047
>;
10481048

1049-
let SubtargetPredicate = HasGFX10_BEncoding in
1050-
defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN <
1051-
"buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub
1052-
>;
1049+
let SubtargetPredicate = HasGFX10_BEncoding in {
1050+
defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN <
1051+
"buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub
1052+
>;
1053+
1054+
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1055+
defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_NO_RTN <
1056+
"buffer_atomic_csub", VGPR_32, i32
1057+
>;
1058+
}
10531059

10541060
let SubtargetPredicate = isGFX8GFX9 in {
10551061
def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">;
@@ -1585,6 +1591,9 @@ defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i64, "BUFFER_ATOMIC_XOR_X2">;
15851591
defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i64, "BUFFER_ATOMIC_INC_X2">;
15861592
defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i64, "BUFFER_ATOMIC_DEC_X2">;
15871593

1594+
let SubtargetPredicate = HasAtomicCSubNoRtnInsts in
1595+
defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["noret"]>;
1596+
15881597
let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
15891598
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">;
15901599
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">;
@@ -2243,7 +2252,7 @@ defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomic_gfx11_Renamed<0x049, "buff
22432252
defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomic_gfx11_Renamed<0x034, "buffer_atomic_cmpswap_b32">;
22442253
defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomic_gfx11_Renamed<0x042, "buffer_atomic_cmpswap_b64">;
22452254
defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomic_gfx11_Renamed<0x050, "buffer_atomic_cmpswap_f32">;
2246-
defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomic_gfx11_Renamed_impl<0x037, 1, "buffer_atomic_csub_u32">;
2255+
defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomic_gfx11_Renamed<0x037, "buffer_atomic_csub_u32">;
22472256
def : Pre_gfx11_MUBUF_Name<"buffer_atomic_csub", "buffer_atomic_csub_u32">;
22482257
defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomic_gfx11_Renamed<0x040, "buffer_atomic_dec_u32">;
22492258
defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomic_gfx11_Renamed<0x04D, "buffer_atomic_dec_u64">;
@@ -2515,7 +2524,7 @@ defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
25152524
defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>;
25162525
defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>;
25172526

2518-
defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomics_RTN_gfx10<0x034>;
2527+
defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomics_gfx10<0x034>;
25192528

25202529
defm BUFFER_WBINVL1_SC : MUBUF_Real_gfx6<0x070>;
25212530
defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>;

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -870,9 +870,14 @@ defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
870870
defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
871871
VReg_64, i64>;
872872

873-
let SubtargetPredicate = HasGFX10_BEncoding in
874-
defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub",
875-
VGPR_32, i32>;
873+
let SubtargetPredicate = HasGFX10_BEncoding in {
874+
defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub",
875+
VGPR_32, i32>;
876+
877+
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
878+
defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_NO_RTN <"global_atomic_csub",
879+
VGPR_32, i32>;
880+
}
876881

877882
defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">;
878883
defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">;
@@ -1442,6 +1447,9 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_glo
14421447
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", "atomic_load_xor_global", i32>;
14431448
defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>;
14441449

1450+
let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
1451+
defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>;
1452+
14451453
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>;
14461454
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>;
14471455
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_load_uinc_wrap_global", i64>;
@@ -2102,7 +2110,7 @@ defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>;
21022110
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>;
21032111
defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>;
21042112
defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>;
2105-
defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_RTN_gfx10<0x034>;
2113+
defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_gfx10<0x034>;
21062114
defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>;
21072115
defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>;
21082116
defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>;
@@ -2333,7 +2341,7 @@ defm GLOBAL_ATOMIC_SWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x033, "GLOBAL_ATO
23332341
defm GLOBAL_ATOMIC_CMPSWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>;
23342342
defm GLOBAL_ATOMIC_ADD_U32 : FLAT_Real_GlblAtomics_gfx11<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>;
23352343
defm GLOBAL_ATOMIC_SUB_U32 : FLAT_Real_GlblAtomics_gfx11<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>;
2336-
defm GLOBAL_ATOMIC_CSUB_U32 : FLAT_Real_GlblAtomics_RTN_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32", true>;
2344+
defm GLOBAL_ATOMIC_CSUB_U32 : FLAT_Real_GlblAtomics_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32", true>;
23372345
defm GLOBAL_ATOMIC_MIN_I32 : FLAT_Real_GlblAtomics_gfx11<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>;
23382346
defm GLOBAL_ATOMIC_MIN_U32 : FLAT_Real_GlblAtomics_gfx11<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>;
23392347
defm GLOBAL_ATOMIC_MAX_I32 : FLAT_Real_GlblAtomics_gfx11<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>;

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
160160
bool HasAtomicFaddNoRtnInsts = false;
161161
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false;
162162
bool HasAtomicBufferGlobalPkAddF16Insts = false;
163+
bool HasAtomicCSubNoRtnInsts = false;
163164
bool HasAtomicGlobalPkAddBF16Inst = false;
164165
bool HasFlatAtomicFaddF32Inst = false;
165166
bool SupportsSRAMECC = false;
@@ -1203,6 +1204,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
12031204
// \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
12041205
bool hasCvtFP8VOP1Bug() const { return true; }
12051206

1207+
// \returns true is CSUB atomics support a no-return form.
1208+
bool hasAtomicCSubNoRtnInsts() const { return HasAtomicCSubNoRtnInsts; }
1209+
12061210
/// \returns SGPR allocation granularity supported by the subtarget.
12071211
unsigned getSGPRAllocGranule() const {
12081212
return AMDGPU::IsaInfo::getSGPRAllocGranule(this);

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ defm SIbuffer_atomic_or : SDBufferAtomicNoRet;
210210
defm SIbuffer_atomic_xor : SDBufferAtomicNoRet;
211211
defm SIbuffer_atomic_inc : SDBufferAtomicNoRet;
212212
defm SIbuffer_atomic_dec : SDBufferAtomicNoRet;
213+
defm SIbuffer_atomic_csub : SDBufferAtomicNoRet;
213214
defm SIbuffer_atomic_fadd : SDBufferAtomicNoRet;
214215
defm SIbuffer_atomic_fmin : SDBufferAtomicNoRet;
215216
defm SIbuffer_atomic_fmax : SDBufferAtomicNoRet;

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,35 +4,70 @@
44
declare i32 @llvm.amdgcn.buffer.atomic.csub(i32, <4 x i32>, i32, i32, i1)
55
declare i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1), i32)
66

7-
; GCN-LABEL: {{^}}buffer_atomic_csub:
7+
; GCN-LABEL: {{^}}buffer_atomic_csub_rtn:
88
; GCN: buffer_atomic_csub v0, v1, s[0:3], 0 idxen glc
9-
define amdgpu_ps void @buffer_atomic_csub(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) {
9+
define amdgpu_ps void @buffer_atomic_csub_rtn(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) {
1010
main_body:
1111
%ret = call i32 @llvm.amdgcn.buffer.atomic.csub(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
1212
ret void
1313
}
1414

15-
; GCN-LABEL: {{^}}buffer_atomic_csub_off4_slc:
15+
; GCN-LABEL: {{^}}buffer_atomic_csub_no_rtn:
16+
; GCN: buffer_atomic_csub v0, v1, s[0:3], 0 idxen
17+
define amdgpu_ps void @buffer_atomic_csub_no_rtn(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) #0 {
18+
main_body:
19+
%ret = call i32 @llvm.amdgcn.buffer.atomic.csub(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
20+
ret void
21+
}
22+
23+
; GCN-LABEL: {{^}}buffer_atomic_csub_off4_slc_rtn:
1624
; GCN: buffer_atomic_csub v0, v1, s[0:3], 0 idxen offset:4 glc slc
17-
define amdgpu_ps void @buffer_atomic_csub_off4_slc(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) {
25+
define amdgpu_ps void @buffer_atomic_csub_off4_slc_rtn(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) {
1826
main_body:
1927
%ret = call i32 @llvm.amdgcn.buffer.atomic.csub(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1)
2028
ret void
2129
}
2230

23-
; GCN-LABEL: {{^}}global_atomic_csub:
31+
; GCN-LABEL: {{^}}buffer_atomic_csub_off4_slc_no_rtn:
32+
; GCN: buffer_atomic_csub v0, v1, s[0:3], 0 idxen offset:4 slc
33+
define amdgpu_ps void @buffer_atomic_csub_off4_slc_no_rtn(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) #0 {
34+
main_body:
35+
%ret = call i32 @llvm.amdgcn.buffer.atomic.csub(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1)
36+
ret void
37+
}
38+
39+
; GCN-LABEL: {{^}}global_atomic_csub_rtn:
2440
; GCN: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9:]+}}, s{{\[[0-9]+:[0-9]+\]}} glc
25-
define amdgpu_kernel void @global_atomic_csub(ptr addrspace(1) %ptr, i32 %data) {
41+
define amdgpu_kernel void @global_atomic_csub_rtn(ptr addrspace(1) %ptr, i32 %data) {
42+
main_body:
43+
%ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %ptr, i32 %data)
44+
ret void
45+
}
46+
47+
; GCN-LABEL: {{^}}global_atomic_csub_no_rtn:
48+
; GCN: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}
49+
define amdgpu_kernel void @global_atomic_csub_no_rtn(ptr addrspace(1) %ptr, i32 %data) #0 {
2650
main_body:
2751
%ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %ptr, i32 %data)
2852
ret void
2953
}
3054

31-
; GCN-LABEL: {{^}}global_atomic_csub_off4:
55+
; GCN-LABEL: {{^}}global_atomic_csub_off4_rtn:
3256
; GCN: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4 glc
33-
define amdgpu_kernel void @global_atomic_csub_off4(ptr addrspace(1) %ptr, i32 %data) {
57+
define amdgpu_kernel void @global_atomic_csub_off4_rtn(ptr addrspace(1) %ptr, i32 %data) {
3458
main_body:
3559
%p = getelementptr i32, ptr addrspace(1) %ptr, i64 1
3660
%ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %p, i32 %data)
3761
ret void
3862
}
63+
64+
; GCN-LABEL: {{^}}global_atomic_csub_off4_no_rtn:
65+
; GCN: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4
66+
define amdgpu_kernel void @global_atomic_csub_off4_no_rtn(ptr addrspace(1) %ptr, i32 %data) #0 {
67+
main_body:
68+
%p = getelementptr i32, ptr addrspace(1) %ptr, i64 1
69+
%ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %p, i32 %data)
70+
ret void
71+
}
72+
73+
attributes #0 = { "target-features"="+atomic-csub-no-rtn-insts" }

llvm/test/MC/AMDGPU/gfx1030_err.s

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -141,12 +141,6 @@ ds_write_src2_b32 v1 offset:65535
141141
ds_write_src2_b64 v1 offset:65535
142142
// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
143143

144-
buffer_atomic_csub v5, off, s[8:11], s3 offset:4095
145-
// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc
146-
147-
global_atomic_csub v2, v[0:1], v2, off offset:100 slc
148-
// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc
149-
150144
image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D
151145
// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: invalid dim; must be MSAA type
152146

llvm/test/MC/AMDGPU/gfx1030_new.s

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,30 @@ global_store_dword v254, v1, s[2:3] offset:16
3030
global_atomic_csub v2, v[0:1], v2, off offset:100 glc slc
3131
// GFX10: encoding: [0x64,0x80,0xd3,0xdc,0x00,0x02,0x7d,0x02]
3232

33+
global_atomic_csub v[0:1], v2, off offset:100 slc
34+
// GFX10: encoding: [0x64,0x80,0xd2,0xdc,0x00,0x02,0x7d,0x00]
35+
3336
global_atomic_csub v2, v[0:1], v2, off glc
3437
// GFX10: encoding: [0x00,0x80,0xd1,0xdc,0x00,0x02,0x7d,0x02]
3538

39+
global_atomic_csub v[0:1], v2, off
40+
// GFX10: encoding: [0x00,0x80,0xd0,0xdc,0x00,0x02,0x7d,0x00]
41+
3642
global_atomic_csub v2, v0, v2, s[2:3] glc
3743
// GFX10: encoding: [0x00,0x80,0xd1,0xdc,0x00,0x02,0x02,0x02]
3844

45+
global_atomic_csub v0, v2, s[2:3]
46+
// GFX10: encoding: [0x00,0x80,0xd0,0xdc,0x00,0x02,0x02,0x00]
47+
3948
global_atomic_csub v2, v0, v2, s[2:3] offset:100 glc slc
4049
// GFX10: encoding: [0x64,0x80,0xd3,0xdc,0x00,0x02,0x02,0x02]
4150

4251
buffer_atomic_csub v5, off, s[8:11], s3 glc
4352
// GFX10: encoding: [0x00,0x40,0xd0,0xe0,0x00,0x05,0x02,0x03]
4453

54+
buffer_atomic_csub v5, off, s[8:11], s3
55+
// GFX10: encoding: [0x00,0x00,0xd0,0xe0,0x00,0x05,0x02,0x03]
56+
4557
buffer_atomic_csub v5, off, s[8:11], s3 offset:4095 glc
4658
// GFX10: encoding: [0xff,0x4f,0xd0,0xe0,0x00,0x05,0x02,0x03]
4759

@@ -51,12 +63,21 @@ buffer_atomic_csub v5, off, s[8:11], -1 offset:4095 glc
5163
buffer_atomic_csub v5, v0, s[8:11], s3 offen offset:4095 glc
5264
// GFX10: encoding: [0xff,0x5f,0xd0,0xe0,0x00,0x05,0x02,0x03]
5365

66+
buffer_atomic_csub v5, v0, s[8:11], s3 offen offset:4095
67+
// GFX10: encoding: [0xff,0x1f,0xd0,0xe0,0x00,0x05,0x02,0x03]
68+
5469
buffer_atomic_csub v5, v0, s[8:11], s3 idxen offset:4095 glc
5570
// GFX10: encoding: [0xff,0x6f,0xd0,0xe0,0x00,0x05,0x02,0x03]
5671

72+
buffer_atomic_csub v5, v0, s[8:11], s3 idxen offset:4095
73+
// GFX10: encoding: [0xff,0x2f,0xd0,0xe0,0x00,0x05,0x02,0x03]
74+
5775
buffer_atomic_csub v5, off, s[8:11], s3 glc slc
5876
// GFX10: encoding: [0x00,0x40,0xd0,0xe0,0x00,0x05,0x42,0x03]
5977

78+
buffer_atomic_csub v5, off, s[8:11], s3 slc
79+
// GFX10: encoding: [0x00,0x00,0xd0,0xe0,0x00,0x05,0x42,0x03]
80+
6081
s_getreg_b32 s2, hwreg(HW_REG_SHADER_CYCLES)
6182
// GFX10: encoding: [0x1d,0xf8,0x02,0xb9]
6283

llvm/test/MC/AMDGPU/gfx11_asm_flat.s

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1536,6 +1536,9 @@ global_atomic_cmpswap_x2 v[254:255], v255, v[252:255], ttmp[14:15] offset:-4096
15361536
global_atomic_csub v5, v[1:2], v2, off glc
15371537
// GFX11: [0x00,0x40,0xde,0xdc,0x01,0x02,0x7c,0x05]
15381538

1539+
global_atomic_csub v[1:2], v2, off
1540+
// GFX11: [0x00,0x00,0xde,0xdc,0x01,0x02,0x7c,0x00]
1541+
15391542
global_atomic_csub v5, v[254:255], v2, off glc
15401543
// GFX11: [0x00,0x40,0xde,0xdc,0xfe,0x02,0x7c,0x05]
15411544

@@ -1563,9 +1566,15 @@ global_atomic_csub v5, v255, v2, vcc offset:4095 glc
15631566
global_atomic_csub v255, v255, v255, ttmp[14:15] offset:-4096 glc slc dlc
15641567
// GFX11: [0x00,0xf0,0xde,0xdc,0xff,0xff,0x7a,0xff]
15651568

1569+
global_atomic_csub v255, v255, ttmp[14:15] offset:-4096 slc dlc
1570+
// GFX11: [0x00,0xb0,0xde,0xdc,0xff,0xff,0x7a,0x00]
1571+
15661572
global_atomic_csub_u32 v5, v[1:2], v2, off glc
15671573
// GFX11: [0x00,0x40,0xde,0xdc,0x01,0x02,0x7c,0x05]
15681574

1575+
global_atomic_csub_u32 v[1:2], v2, off
1576+
// GFX11: [0x00,0x00,0xde,0xdc,0x01,0x02,0x7c,0x00]
1577+
15691578
global_atomic_csub_u32 v5, v[254:255], v2, off glc
15701579
// GFX11: [0x00,0x40,0xde,0xdc,0xfe,0x02,0x7c,0x05]
15711580

@@ -1593,6 +1602,9 @@ global_atomic_csub_u32 v5, v255, v2, vcc offset:4095 glc
15931602
global_atomic_csub_u32 v255, v255, v255, ttmp[14:15] offset:-4096 glc slc dlc
15941603
// GFX11: [0x00,0xf0,0xde,0xdc,0xff,0xff,0x7a,0xff]
15951604

1605+
global_atomic_csub_u32 v255, v255, ttmp[14:15] offset:-4096 slc dlc
1606+
// GFX11: [0x00,0xb0,0xde,0xdc,0xff,0xff,0x7a,0x00]
1607+
15961608
global_atomic_dec v[1:2], v2, off
15971609
// GFX11: [0x00,0x00,0x02,0xdd,0x01,0x02,0x7c,0x00]
15981610

0 commit comments

Comments
 (0)