[AMDGPU][MC] Fix disassembler problem for image_atomic with TFE #112622

jwanggit86 · 2024-10-16T21:55:09Z

For image_atomic instructions with TFE, in some cases (e.g., when dmask=3) the disassembler produces dst register with wrong size (e.g., image_atomic_smin v5, v1, s[8:15] dmask:0x3 tfe, instead of v[5:7]). Additionally, VDataDwords can only be 5 for cmpswap instructions.

llvmbot · 2024-10-16T21:55:30Z

@llvm/pr-subscribers-llvm-globalisel
@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-mc

Author: Jun Wang (jwanggit86)

Changes

For image_atomic instructions with TFE, in some cases (e.g., when dmask=3) the disassembler produces dst register with wrong size (e.g., image_atomic_smin v5, v1, s[8:15] dmask:0x3 tfe). Moreover, another problem in MIMGInstructions.td is also fixed.

Patch is 21.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112622.diff

6 Files Affected:

(modified) llvm/lib/Target/AMDGPU/MIMGInstructions.td (+6-3)
(added) llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg_features.txt (+84)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt (+83)
(added) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage_features.txt (+101)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt (+82-3)
(added) llvm/test/MC/Disassembler/AMDGPU/gfx9_mimg_features.txt (+83)

diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 5c49a8116ae7fc..f52392ba1bd19c 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -1123,10 +1123,13 @@ multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0,
       defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_64, VGPR_32), 1, isFP, renamed>;
       let VDataDwords = !if(isCmpSwap, 4, 2) in
       defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_128, VReg_64), 0, isFP, renamed>;
-      let VDataDwords = !if(isCmpSwap, 2, 2) in
+      let VDataDwords = !if(isCmpSwap, 3, 3) in
       defm _V3 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_96, 0, isFP, renamed>;
-      let VDataDwords = !if(isCmpSwap, 4, 4) in
-      defm _V4 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_160, 0, isFP, renamed>;
+
+      if isCmpSwap then {
+        let VDataDwords = 5 in
+        defm _V4 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_160, 0, isFP, renamed>;
+      }
     }
   } // End IsAtomicRet = 1
 }
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg_features.txt
new file mode 100644
index 00000000000000..133af6b0c1da4d
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg_features.txt
@@ -0,0 +1,84 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -disassemble -show-encoding < %s | FileCheck %s -check-prefix=GFX1010
+
+#===------------------------------------------------------------------------===#
+# Image atomics
+#===------------------------------------------------------------------------===#
+
+# GFX1010: image_atomic_add v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x45,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_add v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x45,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_and v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x61,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_and v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x61,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x41,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_cmpswap v[5:9], v1, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
+0x00,0x0f,0x41,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_dec v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x71,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_dec v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x71,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_inc v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x6d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_inc v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x6d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_or v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x65,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_or v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x65,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_smax v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x59,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_smax v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x59,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_smin v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x51,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_smin v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x51,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_sub v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x49,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_sub v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x49,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_swap v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x3d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_swap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x3d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_umax v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x5d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_umax v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x5d,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_umin v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x55,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_umin v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x55,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_xor v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x69,0xf0,0x01,0x05,0x02,0x00
+
+# GFX1010: image_atomic_xor v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x69,0xf0,0x01,0x05,0x02,0x00
+
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt
index 46488a9aa4ec70..fdf376ed6d6932 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt
@@ -135,6 +135,89 @@
 # GFX11: image_atomic_dec v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x41,0x58,0xf0,0x20,0x04,0x18,0x00]
 0x00,0x41,0x58,0xf0,0x20,0x04,0x18,0x00
 
+#===------------------------------------------------------------------------===#
+# TFE in image_atomic
+#===------------------------------------------------------------------------===#
+
+# GFX11: image_atomic_add v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x30,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_add v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x30,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_and v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x48,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_and v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x48,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x2c,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_cmpswap v[5:9], v1, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
+0x00,0x0f,0x2c,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_dec v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x58,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_dec v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x58,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_inc v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x54,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_inc v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x54,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_or v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x4c,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_or v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x4c,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_smax v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x40,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_smax v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x40,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_smin v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x38,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_smin v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x38,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_sub v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x34,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_sub v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x34,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_swap v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x28,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_swap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x28,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_umax v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x44,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_umax v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x44,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_umin v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x3c,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_umin v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x3c,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_xor v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x01,0x50,0xf0,0x01,0x05,0x22,0x00
+
+# GFX11: image_atomic_xor v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x03,0x50,0xf0,0x01,0x05,0x22,0x00
+
+
 # GFX11: image_sample v[64:66], v32, s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x07,0x6c,0xf0,0x20,0x40,0x01,0x64]
 0x00,0x07,0x6c,0xf0,0x20,0x40,0x01,0x64
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage_features.txt
new file mode 100644
index 00000000000000..2e747adbd07205
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage_features.txt
@@ -0,0 +1,101 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s
+
+#===------------------------------------------------------------------------===#
+# TFE in image_atomic
+#===------------------------------------------------------------------------===#
+
+# GFX12: image_atomic_add_flt v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0x60,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_add_flt v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0xe0,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_add_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x00,0x43,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_add_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x00,0xc3,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_and v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0x44,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_and v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0xc4,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0xc2,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_cmpswap v[5:9], v1, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0xc2,0xd3,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_dec_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0x45,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_dec_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0xc5,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_inc_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0x45,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_inc_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0xc5,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_max_int v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x00,0x44,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_max_int v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x00,0xc4,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_max_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0x44,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_max_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0xc4,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_min_int v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0x43,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_min_int v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0xc3,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_min_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0x43,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_min_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0xc3,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_or v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0x44,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_or v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0xc4,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_sub_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0x43,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_sub_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0xc3,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_swap v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0x42,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_swap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x80,0xc2,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_max_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0x44,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_max_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x40,0xc4,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_min_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0x43,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_min_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0xc0,0xc3,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_xor v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x00,0x45,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_atomic_xor v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
+0x00,0x00,0xc5,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
index 0a5bafc55f4d43..1bb1014a431d37 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt
@@ -184,6 +184,85 @@
 # VI: image_atomic_cmpswap v[5:8], v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x44,0xf0,0x01,0x05,0x02,0x00]
 0x00,0x1f,0x44,0xf0,0x01,0x05,0x02,0x00
 
+# VI: image_atomic_add v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x49,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_add v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x49,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_and v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x61,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_and v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x61,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x45,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_cmpswap v[5:9], v1, s[8:15] dmask:0xf tfe
+0x00,0x0f,0x45,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_dec v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x71,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_dec v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x71,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_inc v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x6d,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_inc v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x6d,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_or v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x65,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_or v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x65,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_smax v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x59,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_smax v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x59,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_smin v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x51,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_smin v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x51,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_sub v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x4d,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_sub v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x4d,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_swap v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x41,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_swap v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x41,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_umax v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x5d,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_umax v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x5d,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_umin v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x55,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_umin v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x55,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_xor v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x69,0xf0,0x01,0x05,0x02,0x00
+
+# VI: image_atomic_xor v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x69,0xf0,0x01,0x05,0x02,0x00
+
+
 #===------------------------------------------------------------------------===#
 # Invalid image atomics (incorrect dmask value).
 # Disassembler may produce a partially incorrect instruction but should not fail.
@@ -192,10 +271,10 @@
 # VI: image_atomic_add v5, v1, s[8:15] dmask:0x2 unorm ; encoding: [0x00,0x12,0x48,0xf0,0x01,0x05,0x02,0x00]
 0x00,0x12,0x48,0xf0,0x01,0x05,0x02,0x00
 
-# VI: image_atomic_add v5, v1, s[8:15] dmask:0x7 unorm ; encoding: [0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00]
+# VI: image_atomic_add v[5:7], v1, s[8:15] dmask:0x7 unorm ; encoding: [0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00]
 0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00
 
-# VI: image_atomic_add v[5:9], v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00]
+# VI: image_atomic_add v5, v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00]
 0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00
 
 # VI: image_atomic_cmpswap v[5:6], v1, s[8:15] unorm ; encoding: [0x00,0x10,0x44,0xf0,0x01,0x05,0x02,0x00]
@@ -204,7 +283,7 @@
 # VI: image_atomic_cmpswap v[5:6], v1, s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x44,0xf0,0x01,0x05,0x02,0x00]
 0x00,0x11,0x44,0xf0,0x01,0x05,0x02,0x00
 
-# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] dmask:0xe unorm ; encoding: [0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00]
+# VI: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0xe unorm ; encoding: [0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00]
 0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00
 
 #===------------------------------------------------------------------------===#
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_mimg_features.txt
new file mode 100644
index 00000000000000..8300fe4e9db50b
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_mimg_features.txt
@@ -0,0 +1,83 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -disassemble -show-encoding < %s | FileCheck %s -check-prefix=GFX900
+
+#===------------------------------------------------------------------------===#
+# Image atomics
+#===------------------------------------------------------------------------===#
+# GFX900: image_atomic_add v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x49,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_add v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x49,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_and v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x61,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_and v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x61,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x45,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_cmpswap v[5:9], v1, s[8:15] dmask:0xf tfe
+0x00,0x0f,0x45,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_dec v[5:6], v1, s[8:15] dmask:0x1 tfe
+0x00,0x01,0x71,0xf0,0x01,0x05,0x02,0x00
+
+# GFX900: image_atomic_dec v[5:7], v1, s[8:15] dmask:0x3 tfe
+0x00,0x03,0x71,0xf0,0x01,0x05,0x02,0x00
+...
[truncated]

jayfoad · 2024-10-17T12:47:28Z

Moreover, another problem in MIMGInstructions.td is also fixed.

This is vague. Please say what the problem is.

jwanggit86 · 2024-10-21T19:16:34Z

Moreover, another problem in MIMGInstructions.td is also fixed.

This is vague. Please say what the problem is.

Fixed.

jayfoad · 2024-10-22T09:45:52Z

llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt

 0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00

-# VI: image_atomic_add v[5:9], v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00]
+# VI: image_atomic_add v5, v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00]


There are 4 bits set in dmask so why isn't this v[5:8]?

Disassembler defaults to V1 variants. That would have to be IMAGE_ATOMIC_ADD_V4_V* but there are not V4_V* variants for IMAGE_ATOMICs, only V1, V2 (64 or 32+tfe) and V3 (64+tfe).

Only cmpswap needs forth variant and this patch deletes V4s for non cmpswap atomics.

Mirko is right. For non-cmpswap atomics, dmask can only be 0x1 or 0x3. So the destination reg can be 96b at most (dmask=0x3 + tfe). In the above example, the binary is incorrect to begin with (because dmask=0xf), the disassemble result is also incorrect.

Thanks for the explanation. It would still be nice to handle this better, either by printing the v[5:8] (maybe with a comment that something is wrong) or by diagnosing it properly as an error case.

I'll look into this and maybe do a separate PR.

mbrkusanin · 2024-10-22T16:07:14Z

There is still this issue with inconsistent naming for Vx where x would match the size of vdata. Currently it is:

IMAGE_ATOMIC_CMPSWAP_V1_* has VReg_64
IMAGE_ATOMIC_CMPSWAP_V2_* has VReg_128
IMAGE_ATOMIC_CMPSWAP_V3_* has VReg_96
IMAGE_ATOMIC_CMPSWAP_V4_* has VReg_160

jwanggit86 · 2024-10-23T00:22:29Z

There is still this issue with inconsistent naming for Vx where x would match the size of vdata. Currently it is:

IMAGE_ATOMIC_CMPSWAP_V1_* has VReg_64 IMAGE_ATOMIC_CMPSWAP_V2_* has VReg_128 IMAGE_ATOMIC_CMPSWAP_V3_* has VReg_96 IMAGE_ATOMIC_CMPSWAP_V4_* has VReg_160

Are you saying for atomic_cmpswap, V2 should be VReg_96, and V3 VReg_128?

mbrkusanin · 2024-10-23T08:58:58Z

There is still this issue with inconsistent naming for Vx where x would match the size of vdata. Currently it is:
IMAGE_ATOMIC_CMPSWAP_V1_* has VReg_64 IMAGE_ATOMIC_CMPSWAP_V2_* has VReg_128 IMAGE_ATOMIC_CMPSWAP_V3_* has VReg_96 IMAGE_ATOMIC_CMPSWAP_V4_* has VReg_160

Are you saying for atomic_cmpswap, V2 should be VReg_96, and V3 VReg_128?

I was thinking that it would make more sense if cmpswap skipped V1 and had variants V2, V3, V4, V5 with values VReg_64, VReg_96, VReg_128, VReg_160 respectively. That way name Vx matches size (32*x).

(might need to update some .mir tests)

jwanggit86 · 2024-10-23T22:02:07Z

There is still this issue with inconsistent naming for Vx where x would match the size of vdata. Currently it is:
IMAGE_ATOMIC_CMPSWAP_V1_* has VReg_64 IMAGE_ATOMIC_CMPSWAP_V2_* has VReg_128 IMAGE_ATOMIC_CMPSWAP_V3_* has VReg_96 IMAGE_ATOMIC_CMPSWAP_V4_* has VReg_160

Are you saying for atomic_cmpswap, V2 should be VReg_96, and V3 VReg_128?

I was thinking that it would make more sense if cmpswap skipped V1 and had variants V2, V3, V4, V5 with values VReg_64, VReg_96, VReg_128, VReg_160 respectively. That way name Vx matches size (32*x).

(might need to update some .mir tests)

This is done now.

mbrkusanin · 2024-10-24T07:59:20Z

llvm/lib/Target/AMDGPU/MIMGInstructions.td

+        defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, VGPR_32, 1, isFP, renamed>;
+
+      let VDataDwords = 2 in
+      defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_64, !if(isCmpSwap, 1, 0), isFP, renamed>;


Suggested change

defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_64, !if(isCmpSwap, 1, 0), isFP, renamed>;

defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_64, isCmpSwap, isFP, renamed>;

mbrkusanin · 2024-10-24T08:00:00Z

llvm/lib/Target/AMDGPU/MIMGInstructions.td

+      if !not(isCmpSwap) then
+        let VDataDwords = 1 in
+        defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, VGPR_32, 1, isFP, renamed>;


Suggested change

if !not(isCmpSwap) then

let VDataDwords = 1 in

defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, VGPR_32, 1, isFP, renamed>;

if !not(isCmpSwap) then {

let VDataDwords = 1 in

defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, VGPR_32, 1, isFP, renamed>;

}

mbrkusanin · 2024-10-24T08:02:27Z

LGTM with nits

For image_atomic instructions with TFE, in some cases (e.g., when dmask=3) the disassembler produces dst register with wrong size (e.g., image_atomic_smin v5, v1, s[8:15] dmask:0x3 tfe, instead of v[5:7]).

cmpswap have V1-V3, while cmpswap has V2-V5.

…#112622) For image_atomic instructions with TFE, in some cases (e.g., when dmask=3) the disassembler produces dst register with wrong size (e.g., image_atomic_smin v5, v1, s[8:15] dmask:0x3 tfe, instead of v[5:7]). This patch fixes the VDataDwords values for image atomic instructions.

jwanggit86 added backend:AMDGPU mc Machine (object) code labels Oct 16, 2024

jwanggit86 requested a review from arsenm October 16, 2024 21:55

jwanggit86 linked an issue Oct 16, 2024 that may be closed by this pull request

[AMDGPU][MC] Invalid disassembly of image_atomic instructions with tfe #48191

Closed

arsenm requested review from kosarev, Flakebi, Sisyph, rovka, jayfoad and dstutt October 17, 2024 04:26

jwanggit86 force-pushed the fix-disassemble-of-image_atomic-with-tfe branch from 9bd4bcc to 1301661 Compare October 21, 2024 19:08

arsenm approved these changes Oct 21, 2024

View reviewed changes

jayfoad reviewed Oct 22, 2024

View reviewed changes

llvmbot added the llvm:globalisel label Oct 23, 2024

jwanggit86 requested review from jayfoad and mbrkusanin October 24, 2024 00:43

mbrkusanin reviewed Oct 24, 2024

View reviewed changes

mbrkusanin approved these changes Oct 24, 2024

View reviewed changes

jwanggit86 added 3 commits October 24, 2024 10:44

[AMDGPU][MC] Fix disassembler problem for image_atomic with TFE

bed2cde

For image_atomic instructions with TFE, in some cases (e.g., when dmask=3) the disassembler produces dst register with wrong size (e.g., image_atomic_smin v5, v1, s[8:15] dmask:0x3 tfe, instead of v[5:7]).

A minor code change.

e0bcc29

Make the _Vx suffix align with reg size. Atomics other than

e5cec28

cmpswap have V1-V3, while cmpswap has V2-V5.

Minor changes.

5e49be4

jwanggit86 force-pushed the fix-disassemble-of-image_atomic-with-tfe branch from 0adf671 to 5e49be4 Compare October 24, 2024 21:50

jwanggit86 merged commit 19b0453 into llvm:main Oct 24, 2024
8 checks passed

frobtech mentioned this pull request Oct 25, 2024

p/libc fexcept t #113664

Closed

jwanggit86 mentioned this pull request Nov 15, 2024

[AMDGPU][MC] Many MIMG opcodes do not accept tfe with dmask:0xf #38648

Closed

	defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_64, !if(isCmpSwap, 1, 0), isFP, renamed>;
	defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_64, isCmpSwap, isFP, renamed>;

[AMDGPU][MC] Fix disassembler problem for image_atomic with TFE #112622

[AMDGPU][MC] Fix disassembler problem for image_atomic with TFE #112622

Uh oh!

Conversation

jwanggit86 commented Oct 16, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Oct 16, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

jayfoad commented Oct 17, 2024

Uh oh!

jwanggit86 commented Oct 21, 2024

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

mbrkusanin commented Oct 22, 2024

Uh oh!

jwanggit86 commented Oct 23, 2024

Uh oh!

mbrkusanin commented Oct 23, 2024

Uh oh!

jwanggit86 commented Oct 23, 2024

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

mbrkusanin commented Oct 24, 2024

Uh oh!

Uh oh!

Uh oh!

jwanggit86 commented Oct 16, 2024 •

edited

Loading

llvmbot commented Oct 16, 2024 •

edited

Loading