Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,13 @@ def FeatureAtomicGlobalPkAddBF16Inst : SubtargetFeature<"atomic-global-pk-add-bf
[FeatureFlatGlobalInsts]
>;

def FeatureAtomicCSubNoRtnInsts : SubtargetFeature<"atomic-csub-no-rtn-insts",
"HasAtomicCSubNoRtnInsts",
"true",
"Has buffer_atomic_csub and global_atomic_csub instructions that don't "
"return original value"
>;

def FeatureFlatAtomicFaddF32Inst
: SubtargetFeature<"flat-atomic-fadd-f32-inst",
"HasFlatAtomicFaddF32Inst",
Expand Down Expand Up @@ -1927,6 +1934,8 @@ def HasGWS : Predicate<"Subtarget->hasGWS()">;
def HasCvtFP8VOP1Bug : Predicate<"Subtarget->hasCvtFP8VOP1Bug()">;
def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">;

def HasAtomicCSubNoRtnInsts : Predicate<"Subtarget->hasAtomicCSubNoRtnInsts()">;

// Include AMDGPU TD files
include "SISchedule.td"
include "GCNProcessors.td"
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,7 @@ defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op;
defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op;
defm int_amdgcn_global_atomic_fmin : noret_op;
defm int_amdgcn_global_atomic_fmax : noret_op;
defm int_amdgcn_global_atomic_csub : noret_op;
defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op;
defm int_amdgcn_ds_fadd_v2bf16 : noret_op;

Expand Down
21 changes: 15 additions & 6 deletions llvm/lib/Target/AMDGPU/BUFInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1046,10 +1046,16 @@ defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics <
"buffer_atomic_dec_x2", VReg_64, i64
>;

let SubtargetPredicate = HasGFX10_BEncoding in
defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN <
"buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub
>;
let SubtargetPredicate = HasGFX10_BEncoding in {
defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_RTN <
"buffer_atomic_csub", VGPR_32, i32, int_amdgcn_global_atomic_csub
>;

let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
defm BUFFER_ATOMIC_CSUB : MUBUF_Pseudo_Atomics_NO_RTN <
"buffer_atomic_csub", VGPR_32, i32
>;
}

let SubtargetPredicate = isGFX8GFX9 in {
def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">;
Expand Down Expand Up @@ -1585,6 +1591,9 @@ defm : SIBufferAtomicPat<"SIbuffer_atomic_xor", i64, "BUFFER_ATOMIC_XOR_X2">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_inc", i64, "BUFFER_ATOMIC_INC_X2">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i64, "BUFFER_ATOMIC_DEC_X2">;

let SubtargetPredicate = HasAtomicCSubNoRtnInsts in
defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["noret"]>;

let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">;
Expand Down Expand Up @@ -2243,7 +2252,7 @@ defm BUFFER_ATOMIC_AND_X2 : MUBUF_Real_Atomic_gfx11_Renamed<0x049, "buff
defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Real_Atomic_gfx11_Renamed<0x034, "buffer_atomic_cmpswap_b32">;
defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Real_Atomic_gfx11_Renamed<0x042, "buffer_atomic_cmpswap_b64">;
defm BUFFER_ATOMIC_FCMPSWAP : MUBUF_Real_Atomic_gfx11_Renamed<0x050, "buffer_atomic_cmpswap_f32">;
defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomic_gfx11_Renamed_impl<0x037, 1, "buffer_atomic_csub_u32">;
defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomic_gfx11_Renamed<0x037, "buffer_atomic_csub_u32">;
def : Pre_gfx11_MUBUF_Name<"buffer_atomic_csub", "buffer_atomic_csub_u32">;
defm BUFFER_ATOMIC_DEC : MUBUF_Real_Atomic_gfx11_Renamed<0x040, "buffer_atomic_dec_u32">;
defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Real_Atomic_gfx11_Renamed<0x04D, "buffer_atomic_dec_u64">;
Expand Down Expand Up @@ -2515,7 +2524,7 @@ defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05e>;
defm BUFFER_ATOMIC_FMIN_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x05f>;
defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Real_Atomics_gfx6_gfx7_gfx10<0x060>;

defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomics_RTN_gfx10<0x034>;
defm BUFFER_ATOMIC_CSUB : MUBUF_Real_Atomics_gfx10<0x034>;

defm BUFFER_WBINVL1_SC : MUBUF_Real_gfx6<0x070>;
defm BUFFER_WBINVL1_VOL : MUBUF_Real_gfx7<0x070>;
Expand Down
18 changes: 13 additions & 5 deletions llvm/lib/Target/AMDGPU/FLATInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -870,9 +870,14 @@ defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2",
defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2",
VReg_64, i64>;

let SubtargetPredicate = HasGFX10_BEncoding in
defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub",
VGPR_32, i32>;
let SubtargetPredicate = HasGFX10_BEncoding in {
defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub",
VGPR_32, i32>;

let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_NO_RTN <"global_atomic_csub",
VGPR_32, i32>;
}

defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">;
defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">;
Expand Down Expand Up @@ -1442,6 +1447,9 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_glo
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", "atomic_load_xor_global", i32>;
defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>;

let OtherPredicates = [HasAtomicCSubNoRtnInsts] in
defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>;

defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_load_uinc_wrap_global", i64>;
Expand Down Expand Up @@ -2102,7 +2110,7 @@ defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>;
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>;
defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>;
defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>;
defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_RTN_gfx10<0x034>;
defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_gfx10<0x034>;
defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>;
defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>;
defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>;
Expand Down Expand Up @@ -2333,7 +2341,7 @@ defm GLOBAL_ATOMIC_SWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x033, "GLOBAL_ATO
defm GLOBAL_ATOMIC_CMPSWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>;
defm GLOBAL_ATOMIC_ADD_U32 : FLAT_Real_GlblAtomics_gfx11<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>;
defm GLOBAL_ATOMIC_SUB_U32 : FLAT_Real_GlblAtomics_gfx11<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>;
defm GLOBAL_ATOMIC_CSUB_U32 : FLAT_Real_GlblAtomics_RTN_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32", true>;
defm GLOBAL_ATOMIC_CSUB_U32 : FLAT_Real_GlblAtomics_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32", true>;
defm GLOBAL_ATOMIC_MIN_I32 : FLAT_Real_GlblAtomics_gfx11<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>;
defm GLOBAL_ATOMIC_MIN_U32 : FLAT_Real_GlblAtomics_gfx11<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>;
defm GLOBAL_ATOMIC_MAX_I32 : FLAT_Real_GlblAtomics_gfx11<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>;
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasAtomicFaddNoRtnInsts = false;
bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false;
bool HasAtomicBufferGlobalPkAddF16Insts = false;
bool HasAtomicCSubNoRtnInsts = false;
bool HasAtomicGlobalPkAddBF16Inst = false;
bool HasFlatAtomicFaddF32Inst = false;
bool SupportsSRAMECC = false;
Expand Down Expand Up @@ -1203,6 +1204,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// \returns true if FP8/BF8 VOP1 form of conversion to F32 is unreliable.
bool hasCvtFP8VOP1Bug() const { return true; }

// \returns true is CSUB atomics support a no-return form.
bool hasAtomicCSubNoRtnInsts() const { return HasAtomicCSubNoRtnInsts; }

/// \returns SGPR allocation granularity supported by the subtarget.
unsigned getSGPRAllocGranule() const {
return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ defm SIbuffer_atomic_or : SDBufferAtomicNoRet;
defm SIbuffer_atomic_xor : SDBufferAtomicNoRet;
defm SIbuffer_atomic_inc : SDBufferAtomicNoRet;
defm SIbuffer_atomic_dec : SDBufferAtomicNoRet;
defm SIbuffer_atomic_csub : SDBufferAtomicNoRet;
defm SIbuffer_atomic_fadd : SDBufferAtomicNoRet;
defm SIbuffer_atomic_fmin : SDBufferAtomicNoRet;
defm SIbuffer_atomic_fmax : SDBufferAtomicNoRet;
Expand Down
51 changes: 43 additions & 8 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,35 +4,70 @@
declare i32 @llvm.amdgcn.buffer.atomic.csub(i32, <4 x i32>, i32, i32, i1)
declare i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1), i32)

; GCN-LABEL: {{^}}buffer_atomic_csub:
; GCN-LABEL: {{^}}buffer_atomic_csub_rtn:
; GCN: buffer_atomic_csub v0, v1, s[0:3], 0 idxen glc
define amdgpu_ps void @buffer_atomic_csub(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) {
define amdgpu_ps void @buffer_atomic_csub_rtn(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) {
main_body:
%ret = call i32 @llvm.amdgcn.buffer.atomic.csub(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
ret void
}

; GCN-LABEL: {{^}}buffer_atomic_csub_off4_slc:
; GCN-LABEL: {{^}}buffer_atomic_csub_no_rtn:
; GCN: buffer_atomic_csub v0, v1, s[0:3], 0 idxen
define amdgpu_ps void @buffer_atomic_csub_no_rtn(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) #0 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Personally I prefer to put the attributes inline here, rather than indirecting via #0.

Suggested change
define amdgpu_ps void @buffer_atomic_csub_no_rtn(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) #0 {
define amdgpu_ps void @buffer_atomic_csub_no_rtn(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) "target-features"="+atomic-csub-no-rtn-insts" {

main_body:
%ret = call i32 @llvm.amdgcn.buffer.atomic.csub(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
ret void
}

; GCN-LABEL: {{^}}buffer_atomic_csub_off4_slc_rtn:
; GCN: buffer_atomic_csub v0, v1, s[0:3], 0 idxen offset:4 glc slc
define amdgpu_ps void @buffer_atomic_csub_off4_slc(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) {
define amdgpu_ps void @buffer_atomic_csub_off4_slc_rtn(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) {
main_body:
%ret = call i32 @llvm.amdgcn.buffer.atomic.csub(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1)
ret void
}

; GCN-LABEL: {{^}}global_atomic_csub:
; GCN-LABEL: {{^}}buffer_atomic_csub_off4_slc_no_rtn:
; GCN: buffer_atomic_csub v0, v1, s[0:3], 0 idxen offset:4 slc
define amdgpu_ps void @buffer_atomic_csub_off4_slc_no_rtn(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex) #0 {
main_body:
%ret = call i32 @llvm.amdgcn.buffer.atomic.csub(i32 %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1)
ret void
}

; GCN-LABEL: {{^}}global_atomic_csub_rtn:
; GCN: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9:]+}}, s{{\[[0-9]+:[0-9]+\]}} glc
define amdgpu_kernel void @global_atomic_csub(ptr addrspace(1) %ptr, i32 %data) {
define amdgpu_kernel void @global_atomic_csub_rtn(ptr addrspace(1) %ptr, i32 %data) {
main_body:
%ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %ptr, i32 %data)
ret void
}

; GCN-LABEL: {{^}}global_atomic_csub_no_rtn:
; GCN: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @global_atomic_csub_no_rtn(ptr addrspace(1) %ptr, i32 %data) #0 {
main_body:
%ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %ptr, i32 %data)
ret void
}

; GCN-LABEL: {{^}}global_atomic_csub_off4:
; GCN-LABEL: {{^}}global_atomic_csub_off4_rtn:
; GCN: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4 glc
define amdgpu_kernel void @global_atomic_csub_off4(ptr addrspace(1) %ptr, i32 %data) {
define amdgpu_kernel void @global_atomic_csub_off4_rtn(ptr addrspace(1) %ptr, i32 %data) {
main_body:
%p = getelementptr i32, ptr addrspace(1) %ptr, i64 1
%ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %p, i32 %data)
ret void
}

; GCN-LABEL: {{^}}global_atomic_csub_off4_no_rtn:
; GCN: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4
define amdgpu_kernel void @global_atomic_csub_off4_no_rtn(ptr addrspace(1) %ptr, i32 %data) #0 {
main_body:
%p = getelementptr i32, ptr addrspace(1) %ptr, i64 1
%ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %p, i32 %data)
ret void
}

attributes #0 = { "target-features"="+atomic-csub-no-rtn-insts" }
6 changes: 0 additions & 6 deletions llvm/test/MC/AMDGPU/gfx1030_err.s
Original file line number Diff line number Diff line change
Expand Up @@ -141,12 +141,6 @@ ds_write_src2_b32 v1 offset:65535
ds_write_src2_b64 v1 offset:65535
// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU

buffer_atomic_csub v5, off, s[8:11], s3 offset:4095
// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc

global_atomic_csub v2, v[0:1], v2, off offset:100 slc
// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc

image_msaa_load v[1:4], v5, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D
// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: invalid dim; must be MSAA type

Expand Down
21 changes: 21 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1030_new.s
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,30 @@ global_store_dword v254, v1, s[2:3] offset:16
global_atomic_csub v2, v[0:1], v2, off offset:100 glc slc
// GFX10: encoding: [0x64,0x80,0xd3,0xdc,0x00,0x02,0x7d,0x02]

global_atomic_csub v[0:1], v2, off offset:100 slc
// GFX10: encoding: [0x64,0x80,0xd2,0xdc,0x00,0x02,0x7d,0x00]

global_atomic_csub v2, v[0:1], v2, off glc
// GFX10: encoding: [0x00,0x80,0xd1,0xdc,0x00,0x02,0x7d,0x02]

global_atomic_csub v[0:1], v2, off
// GFX10: encoding: [0x00,0x80,0xd0,0xdc,0x00,0x02,0x7d,0x00]

global_atomic_csub v2, v0, v2, s[2:3] glc
// GFX10: encoding: [0x00,0x80,0xd1,0xdc,0x00,0x02,0x02,0x02]

global_atomic_csub v0, v2, s[2:3]
// GFX10: encoding: [0x00,0x80,0xd0,0xdc,0x00,0x02,0x02,0x00]

global_atomic_csub v2, v0, v2, s[2:3] offset:100 glc slc
// GFX10: encoding: [0x64,0x80,0xd3,0xdc,0x00,0x02,0x02,0x02]

buffer_atomic_csub v5, off, s[8:11], s3 glc
// GFX10: encoding: [0x00,0x40,0xd0,0xe0,0x00,0x05,0x02,0x03]

buffer_atomic_csub v5, off, s[8:11], s3
// GFX10: encoding: [0x00,0x00,0xd0,0xe0,0x00,0x05,0x02,0x03]

buffer_atomic_csub v5, off, s[8:11], s3 offset:4095 glc
// GFX10: encoding: [0xff,0x4f,0xd0,0xe0,0x00,0x05,0x02,0x03]

Expand All @@ -51,12 +63,21 @@ buffer_atomic_csub v5, off, s[8:11], -1 offset:4095 glc
buffer_atomic_csub v5, v0, s[8:11], s3 offen offset:4095 glc
// GFX10: encoding: [0xff,0x5f,0xd0,0xe0,0x00,0x05,0x02,0x03]

buffer_atomic_csub v5, v0, s[8:11], s3 offen offset:4095
// GFX10: encoding: [0xff,0x1f,0xd0,0xe0,0x00,0x05,0x02,0x03]

buffer_atomic_csub v5, v0, s[8:11], s3 idxen offset:4095 glc
// GFX10: encoding: [0xff,0x6f,0xd0,0xe0,0x00,0x05,0x02,0x03]

buffer_atomic_csub v5, v0, s[8:11], s3 idxen offset:4095
// GFX10: encoding: [0xff,0x2f,0xd0,0xe0,0x00,0x05,0x02,0x03]

buffer_atomic_csub v5, off, s[8:11], s3 glc slc
// GFX10: encoding: [0x00,0x40,0xd0,0xe0,0x00,0x05,0x42,0x03]

buffer_atomic_csub v5, off, s[8:11], s3 slc
// GFX10: encoding: [0x00,0x00,0xd0,0xe0,0x00,0x05,0x42,0x03]

s_getreg_b32 s2, hwreg(HW_REG_SHADER_CYCLES)
// GFX10: encoding: [0x1d,0xf8,0x02,0xb9]

Expand Down
12 changes: 12 additions & 0 deletions llvm/test/MC/AMDGPU/gfx11_asm_flat.s
Original file line number Diff line number Diff line change
Expand Up @@ -1536,6 +1536,9 @@ global_atomic_cmpswap_x2 v[254:255], v255, v[252:255], ttmp[14:15] offset:-4096
global_atomic_csub v5, v[1:2], v2, off glc
// GFX11: [0x00,0x40,0xde,0xdc,0x01,0x02,0x7c,0x05]

global_atomic_csub v[1:2], v2, off
// GFX11: [0x00,0x00,0xde,0xdc,0x01,0x02,0x7c,0x00]

global_atomic_csub v5, v[254:255], v2, off glc
// GFX11: [0x00,0x40,0xde,0xdc,0xfe,0x02,0x7c,0x05]

Expand Down Expand Up @@ -1563,9 +1566,15 @@ global_atomic_csub v5, v255, v2, vcc offset:4095 glc
global_atomic_csub v255, v255, v255, ttmp[14:15] offset:-4096 glc slc dlc
// GFX11: [0x00,0xf0,0xde,0xdc,0xff,0xff,0x7a,0xff]

global_atomic_csub v255, v255, ttmp[14:15] offset:-4096 slc dlc
// GFX11: [0x00,0xb0,0xde,0xdc,0xff,0xff,0x7a,0x00]

global_atomic_csub_u32 v5, v[1:2], v2, off glc
// GFX11: [0x00,0x40,0xde,0xdc,0x01,0x02,0x7c,0x05]

global_atomic_csub_u32 v[1:2], v2, off
// GFX11: [0x00,0x00,0xde,0xdc,0x01,0x02,0x7c,0x00]

global_atomic_csub_u32 v5, v[254:255], v2, off glc
// GFX11: [0x00,0x40,0xde,0xdc,0xfe,0x02,0x7c,0x05]

Expand Down Expand Up @@ -1593,6 +1602,9 @@ global_atomic_csub_u32 v5, v255, v2, vcc offset:4095 glc
global_atomic_csub_u32 v255, v255, v255, ttmp[14:15] offset:-4096 glc slc dlc
// GFX11: [0x00,0xf0,0xde,0xdc,0xff,0xff,0x7a,0xff]

global_atomic_csub_u32 v255, v255, ttmp[14:15] offset:-4096 slc dlc
// GFX11: [0x00,0xb0,0xde,0xdc,0xff,0xff,0x7a,0x00]

global_atomic_dec v[1:2], v2, off
// GFX11: [0x00,0x00,0x02,0xdd,0x01,0x02,0x7c,0x00]

Expand Down
15 changes: 12 additions & 3 deletions llvm/test/MC/AMDGPU/gfx11_asm_mubuf.s
Original file line number Diff line number Diff line change
Expand Up @@ -3091,12 +3091,12 @@ buffer_atomic_cmpswap_f32 v[5:6], off, s[8:11], s3 offset:4095 dlc
buffer_atomic_cmpswap_f32 v[5:6], off, s[8:11], s3 offset:4095 glc slc dlc
// GFX11: encoding: [0xff,0x7f,0x40,0xe1,0x00,0x05,0x02,0x03]

buffer_atomic_csub_u32 v5, off, s[8:11], s3 offset:4095
// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc

buffer_atomic_csub_u32 v255, off, s[8:11], s3 offset:4095 glc
// GFX11: encoding: [0xff,0x4f,0xdc,0xe0,0x00,0xff,0x02,0x03]

buffer_atomic_csub_u32 v255, off, s[8:11], s3 offset:4095
// GFX11: encoding: [0xff,0x0f,0xdc,0xe0,0x00,0xff,0x02,0x03]

buffer_atomic_csub_u32 v5, off, s[12:15], s3 offset:4095 glc
// GFX11: encoding: [0xff,0x4f,0xdc,0xe0,0x00,0x05,0x03,0x03]

Expand Down Expand Up @@ -3142,12 +3142,21 @@ buffer_atomic_csub_u32 v5, off, s[8:11], s3 offset:4095 glc
buffer_atomic_csub_u32 v5, off, s[8:11], s3 offset:4095 glc slc
// GFX11: encoding: [0xff,0x5f,0xdc,0xe0,0x00,0x05,0x02,0x03]

buffer_atomic_csub_u32 v5, off, s[8:11], s3 offset:4095 slc
// GFX11: encoding: [0xff,0x1f,0xdc,0xe0,0x00,0x05,0x02,0x03]

buffer_atomic_csub_u32 v5, off, s[8:11], s3 offset:4095 glc dlc
// GFX11: encoding: [0xff,0x6f,0xdc,0xe0,0x00,0x05,0x02,0x03]

buffer_atomic_csub_u32 v5, off, s[8:11], s3 offset:4095 dlc
// GFX11: encoding: [0xff,0x2f,0xdc,0xe0,0x00,0x05,0x02,0x03]

buffer_atomic_csub_u32 v5, off, s[8:11], s3 offset:4095 glc slc dlc
// GFX11: encoding: [0xff,0x7f,0xdc,0xe0,0x00,0x05,0x02,0x03]

buffer_atomic_csub_u32 v5, off, s[8:11], s3 offset:4095 slc dlc
// GFX11: encoding: [0xff,0x3f,0xdc,0xe0,0x00,0x05,0x02,0x03]

buffer_atomic_dec_u32 v5, off, s[8:11], s3 offset:4095
// GFX11: encoding: [0xff,0x0f,0x00,0xe1,0x00,0x05,0x02,0x03]

Expand Down
9 changes: 6 additions & 3 deletions llvm/test/MC/AMDGPU/gfx11_asm_mubuf_alias.s
Original file line number Diff line number Diff line change
Expand Up @@ -2210,12 +2210,12 @@ buffer_atomic_fcmpswap v[5:6], off, s[8:11], s3 offset:4095 slc
buffer_atomic_fcmpswap v[5:6], off, s[8:11], s3 offset:4095 glc slc
// GFX11: encoding: [0xff,0x5f,0x40,0xe1,0x00,0x05,0x02,0x03]

buffer_atomic_csub v5, off, s[8:11], s3 offset:4095
// GFX11-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: instruction must use glc

buffer_atomic_csub v255, off, s[8:11], s3 offset:4095 glc
// GFX11: encoding: [0xff,0x4f,0xdc,0xe0,0x00,0xff,0x02,0x03]

buffer_atomic_csub v255, off, s[8:11], s3 offset:4095
// GFX11: encoding: [0xff,0x0f,0xdc,0xe0,0x00,0xff,0x02,0x03]

buffer_atomic_csub v5, off, s[12:15], s3 offset:4095 glc
// GFX11: encoding: [0xff,0x4f,0xdc,0xe0,0x00,0x05,0x03,0x03]

Expand Down Expand Up @@ -2261,6 +2261,9 @@ buffer_atomic_csub v5, off, s[8:11], s3 offset:4095 glc
buffer_atomic_csub v5, off, s[8:11], s3 offset:4095 glc slc
// GFX11: encoding: [0xff,0x5f,0xdc,0xe0,0x00,0x05,0x02,0x03]

buffer_atomic_csub v5, off, s[8:11], s3 offset:4095 slc
// GFX11: encoding: [0xff,0x1f,0xdc,0xe0,0x00,0x05,0x02,0x03]

buffer_atomic_dec v5, off, s[8:11], s3 offset:4095
// GFX11: encoding: [0xff,0x0f,0x00,0xe1,0x00,0x05,0x02,0x03]

Expand Down
Loading