-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[AMDGPU][MC] Fix disassembler problem for image_atomic with TFE #112622
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][MC] Fix disassembler problem for image_atomic with TFE #112622
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-mc Author: Jun Wang (jwanggit86) ChangesFor image_atomic instructions with TFE, in some cases (e.g., when dmask=3) the disassembler produces dst register with wrong size (e.g., image_atomic_smin v5, v1, s[8:15] dmask:0x3 tfe). Moreover, another problem in MIMGInstructions.td is also fixed. Patch is 21.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112622.diff 6 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 5c49a8116ae7fc..f52392ba1bd19c 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -1123,10 +1123,13 @@ multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0,
defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_64, VGPR_32), 1, isFP, renamed>;
let VDataDwords = !if(isCmpSwap, 4, 2) in
defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_128, VReg_64), 0, isFP, renamed>;
- let VDataDwords = !if(isCmpSwap, 2, 2) in
+ let VDataDwords = !if(isCmpSwap, 3, 3) in
defm _V3 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_96, 0, isFP, renamed>;
- let VDataDwords = !if(isCmpSwap, 4, 4) in
- defm _V4 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_160, 0, isFP, renamed>;
+
+ if isCmpSwap then {
+ let VDataDwords = 5 in
+ defm _V4 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_160, 0, isFP, renamed>;
+ }
}
} // End IsAtomicRet = 1
}
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg_features.txt
new file mode 100644
index 00000000000000..133af6b0c1da4d
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg_features.txt
@@ -0,0 +1,84 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -disassemble -show-encoding < %s | FileCheck %s -check-prefix=GFX1010
+
+#===------------------------------------------------------------------------===#
+# Image atomics
+#===------------------------------------------------------------------------===#
+
+# GFX1010: image_atomic_add v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x45,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_add v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x45,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_and v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x61,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_and v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x61,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x41,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_cmpswap v[5:9], v1, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
+0x00,0x0f,0x41,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_dec v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x71,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_dec v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x71,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_inc v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x6d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_inc v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x6d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_or v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x65,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_or v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x65,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_smax v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x59,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_smax v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x59,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_smin v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x51,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_smin v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x51,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_sub v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x49,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_sub v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x49,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_swap v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x3d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_swap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x3d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_umax v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x5d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_umax v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x5d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_umin v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x55,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_umin v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x55,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_xor v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x69,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_xor v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x69,0xf0,0x01,0x05,0x02,0x00
+
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt
index 46488a9aa4ec70..fdf376ed6d6932 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt
@@ -135,6 +135,89 @@
# GFX11: image_atomic_dec v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x41,0x58,0xf0,0x20,0x04,0x18,0x00]
0x00,0x41,0x58,0xf0,0x20,0x04,0x18,0x00
+#===------------------------------------------------------------------------===#
+# TFE in image_atomic
+#===------------------------------------------------------------------------===#
+
+# GFX11: image_atomic_add v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x30,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_add v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x30,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_and v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x48,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_and v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x48,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x2c,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_cmpswap v[5:9], v1, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
+0x00,0x0f,0x2c,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_dec v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x58,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_dec v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x58,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_inc v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x54,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_inc v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x54,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_or v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x4c,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_or v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x4c,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_smax v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x40,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_smax v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x40,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_smin v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x38,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_smin v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x38,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_sub v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x34,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_sub v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x34,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_swap v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x28,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_swap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x28,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_umax v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x44,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_umax v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x44,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_umin v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x3c,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_umin v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x3c,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_xor v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x50,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_xor v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x50,0xf0,0x01,0x05,0x22,0x00
+
+
# GFX11: image_sample v[64:66], v32, s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x07,0x6c,0xf0,0x20,0x40,0x01,0x64]
0x00,0x07,0x6c,0xf0,0x20,0x40,0x01,0x64
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage_features.txt
new file mode 100644
index 00000000000000..2e747adbd07205
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage_features.txt
@@ -0,0 +1,101 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s
+
+#===------------------------------------------------------------------------===#
+# TFE in image_atomic
+#===------------------------------------------------------------------------===#
+
+# GFX12: image_atomic_add_flt v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0x60,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_add_flt v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0xe0,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_add_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x00,0x43,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_add_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x00,0xc3,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_and v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0x44,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_and v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0xc4,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0xc2,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_cmpswap v[5:9], v1, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0xc2,0xd3,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_dec_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0x45,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_dec_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0xc5,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_inc_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0x45,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_inc_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0xc5,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_max_int v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x00,0x44,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_max_int v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x00,0xc4,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_max_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0x44,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_max_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0xc4,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_min_int v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0x43,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_min_int v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0xc3,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_min_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0x43,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_min_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0xc3,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_or v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0x44,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_or v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0xc4,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_sub_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0x43,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_sub_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0xc3,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_swap v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0x42,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_swap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0xc2,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_max_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0x44,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_max_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0xc4,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_min_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0x43,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_min_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0xc3,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_xor v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x00,0x45,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_xor v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x00,0xc5,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
index 0a5bafc55f4d43..1bb1014a431d37 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
@@ -184,6 +184,85 @@
# VI: image_atomic_cmpswap v[5:8], v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x44,0xf0,0x01,0x05,0x02,0x00]
0x00,0x1f,0x44,0xf0,0x01,0x05,0x02,0x00
+# VI: image_atomic_add v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x49,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_add v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x49,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_and v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x61,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_and v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x61,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x45,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_cmpswap v[5:9], v1, s[8:15] dmask:0xf tfe
+0x00,0x0f,0x45,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_dec v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x71,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_dec v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x71,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_inc v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x6d,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_inc v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x6d,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_or v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x65,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_or v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x65,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_smax v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x59,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_smax v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x59,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_smin v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x51,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_smin v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x51,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_sub v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x4d,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_sub v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x4d,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_swap v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x41,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_swap v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x41,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_umax v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x5d,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_umax v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x5d,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_umin v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x55,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_umin v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x55,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_xor v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x69,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_xor v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x69,0xf0,0x01,0x05,0x02,0x00
+
+
#===------------------------------------------------------------------------===#
# Invalid image atomics (incorrect dmask value).
# Disassembler may produce a partially incorrect instruction but should not fail.
@@ -192,10 +271,10 @@
# VI: image_atomic_add v5, v1, s[8:15] dmask:0x2 unorm ; encoding: [0x00,0x12,0x48,0xf0,0x01,0x05,0x02,0x00]
0x00,0x12,0x48,0xf0,0x01,0x05,0x02,0x00
-# VI: image_atomic_add v5, v1, s[8:15] dmask:0x7 unorm ; encoding: [0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00]
+# VI: image_atomic_add v[5:7], v1, s[8:15] dmask:0x7 unorm ; encoding: [0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00]
0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00
-# VI: image_atomic_add v[5:9], v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00]
+# VI: image_atomic_add v5, v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00]
0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00
# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] unorm ; encoding: [0x00,0x10,0x44,0xf0,0x01,0x05,0x02,0x00]
@@ -204,7 +283,7 @@
# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x44,0xf0,0x01,0x05,0x02,0x00]
0x00,0x11,0x44,0xf0,0x01,0x05,0x02,0x00
-# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] dmask:0xe unorm ; encoding: [0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00]
+# VI: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0xe unorm ; encoding: [0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00]
0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00
#===------------------------------------------------------------------------===#
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_mimg_features.txt
new file mode 100644
index 00000000000000..8300fe4e9db50b
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_mimg_features.txt
@@ -0,0 +1,83 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -disassemble -show-encoding < %s | FileCheck %s -check-prefix=GFX900
+
+#===------------------------------------------------------------------------===#
+# Image atomics
+#===------------------------------------------------------------------------===#
+# GFX900: image_atomic_add v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x49,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_add v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x49,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_and v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x61,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_and v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x61,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x45,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_cmpswap v[5:9], v1, s[8:15] dmask:0xf tfe
+0x00,0x0f,0x45,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_dec v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x71,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_dec v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x71,0xf0,0x01,0x05,0x02,0x00
+...
[truncated]
|
This is vague. Please say what the problem is. |
9bd4bcc
to
1301661
Compare
Fixed. |
0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00 | ||
|
||
# VI: image_atomic_add v[5:9], v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00] | ||
# VI: image_atomic_add v5, v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are 4 bits set in dmask so why isn't this v[5:8]
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Disassembler defaults to V1 variants. That would have to be IMAGE_ATOMIC_ADD_V4_V*
but there are not V4_V*
variants for IMAGE_ATOMIC
s, only V1
, V2
(64 or 32+tfe) and V3
(64+tfe).
Only cmpswap needs forth variant and this patch deletes V4
s for non cmpswap atomics.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Mirko is right. For non-cmpswap atomics, dmask can only be 0x1 or 0x3. So the destination reg can be 96b at most (dmask=0x3 + tfe). In the above example, the binary is incorrect to begin with (because dmask=0xf), the disassemble result is also incorrect.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the explanation. It would still be nice to handle this better, either by printing the v[5:8]
(maybe with a comment that something is wrong) or by diagnosing it properly as an error case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll look into this and maybe do a separate PR.
There is still this issue with inconsistent naming for Vx where x would match the size of vdata. Currently it is:
|
Are you saying for atomic_cmpswap, V2 should be VReg_96, and V3 VReg_128? |
I was thinking that it would make more sense if cmpswap skipped V1 and had variants V2, V3, V4, V5 with values VReg_64, VReg_96, VReg_128, VReg_160 respectively. That way name Vx matches size (32*x). (might need to update some .mir tests) |
This is done now. |
defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, VGPR_32, 1, isFP, renamed>; | ||
|
||
let VDataDwords = 2 in | ||
defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_64, !if(isCmpSwap, 1, 0), isFP, renamed>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_64, !if(isCmpSwap, 1, 0), isFP, renamed>; | |
defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_64, isCmpSwap, isFP, renamed>; |
if !not(isCmpSwap) then | ||
let VDataDwords = 1 in | ||
defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, VGPR_32, 1, isFP, renamed>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if !not(isCmpSwap) then | |
let VDataDwords = 1 in | |
defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, VGPR_32, 1, isFP, renamed>; | |
if !not(isCmpSwap) then { | |
let VDataDwords = 1 in | |
defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, VGPR_32, 1, isFP, renamed>; | |
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
brackets
LGTM with nits |
For image_atomic instructions with TFE, in some cases (e.g., when dmask=3) the disassembler produces dst register with wrong size (e.g., image_atomic_smin v5, v1, s[8:15] dmask:0x3 tfe, instead of v[5:7]).
cmpswap have V1-V3, while cmpswap has V2-V5.
0adf671
to
5e49be4
Compare
…#112622) For image_atomic instructions with TFE, in some cases (e.g., when dmask=3) the disassembler produces dst register with wrong size (e.g., image_atomic_smin v5, v1, s[8:15] dmask:0x3 tfe, instead of v[5:7]). This patch fixes the VDataDwords values for image atomic instructions.
For image_atomic instructions with TFE, in some cases (e.g., when dmask=3) the disassembler produces dst register with wrong size (e.g., image_atomic_smin v5, v1, s[8:15] dmask:0x3 tfe, instead of v[5:7]). Additionally, VDataDwords can only be 5 for cmpswap instructions.