diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 5c49a8116ae7f..e0d1cde284524 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -1119,14 +1119,22 @@ multiclass MIMG_Atomic ; - let VDataDwords = !if(isCmpSwap, 4, 2) in - defm _V2 : MIMG_Atomic_Addr_Helper_m ; - let VDataDwords = !if(isCmpSwap, 2, 2) in + if !not(isCmpSwap) then { + let VDataDwords = 1 in + defm _V1 : MIMG_Atomic_Addr_Helper_m ; + } + + let VDataDwords = 2 in + defm _V2 : MIMG_Atomic_Addr_Helper_m ; + let VDataDwords = 3 in defm _V3 : MIMG_Atomic_Addr_Helper_m ; - let VDataDwords = !if(isCmpSwap, 4, 4) in - defm _V4 : MIMG_Atomic_Addr_Helper_m ; + + if isCmpSwap then { + let VDataDwords = 4 in + defm _V4 : MIMG_Atomic_Addr_Helper_m ; + let VDataDwords = 5 in + defm _V5 : MIMG_Atomic_Addr_Helper_m ; + } } } // End IsAtomicRet = 1 } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir index fb8c76bb6f51d..292fa4be1ca1d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir @@ -21,8 +21,8 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si]].sub0 + ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: atomic_cmpswap_i32_1d @@ -31,8 +31,8 @@ body: | ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi]].sub0 + ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi]].sub0 ; GFX8-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX10-LABEL: name: atomic_cmpswap_i32_1d @@ -41,8 +41,8 @@ body: | ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_]].sub0 + ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0 ; GFX10-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX11-LABEL: name: atomic_cmpswap_i32_1d @@ -51,8 +51,8 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx11_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx11_]].sub0 + ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_]].sub0 ; GFX11-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX12-LABEL: name: atomic_cmpswap_i32_1d @@ -61,8 +61,8 @@ body: | ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx12_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx12_]].sub0 + ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_]].sub0 ; GFX12-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 @@ -89,7 +89,7 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) ; GFX6-NEXT: S_ENDPGM 0 ; GFX8-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 @@ -97,7 +97,7 @@ body: | ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 @@ -105,7 +105,7 @@ body: | ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) ; GFX10-NEXT: S_ENDPGM 0 ; GFX11-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 @@ -113,7 +113,7 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx11_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 @@ -121,7 +121,7 @@ body: | ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx12_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -146,8 +146,8 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si]].sub0_sub1 + ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si]].sub0_sub1 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 ; GFX8-LABEL: name: atomic_cmpswap_i64_1d @@ -156,8 +156,8 @@ body: | ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) - ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi]].sub0_sub1 + ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi]].sub0_sub1 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 ; GFX10-LABEL: name: atomic_cmpswap_i64_1d @@ -166,8 +166,8 @@ body: | ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0_sub1 + ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_]].sub0_sub1 ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 ; GFX11-LABEL: name: atomic_cmpswap_i64_1d @@ -176,8 +176,8 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) - ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_]].sub0_sub1 + ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11_]].sub0_sub1 ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 ; GFX12-LABEL: name: atomic_cmpswap_i64_1d @@ -186,8 +186,8 @@ body: | ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) - ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_]].sub0_sub1 + ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12_]].sub0_sub1 ; GFX12-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 @@ -214,7 +214,7 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) ; GFX6-NEXT: S_ENDPGM 0 ; GFX8-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 @@ -222,7 +222,7 @@ body: | ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 @@ -230,7 +230,7 @@ body: | ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) ; GFX10-NEXT: S_ENDPGM 0 ; GFX11-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 @@ -238,7 +238,7 @@ body: | ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) ; GFX11-NEXT: S_ENDPGM 0 ; GFX12-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 @@ -246,7 +246,7 @@ body: | ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir index a9cb169d1a096..c845a4c82b9cc 100644 --- a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir +++ b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir @@ -548,9 +548,9 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: image_atomic - ; CHECK: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) + ; CHECK: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr97 - renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) + renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) S_ENDPGM 0, implicit $vgpr97 ... diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg_features.txt new file mode 100644 index 0000000000000..133af6b0c1da4 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg_features.txt @@ -0,0 +1,84 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -disassemble -show-encoding < %s | FileCheck %s -check-prefix=GFX1010 + +#===------------------------------------------------------------------------===# +# Image atomics +#===------------------------------------------------------------------------===# + +# GFX1010: image_atomic_add v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x45,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_add v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x45,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_and v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x61,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_and v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x61,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x41,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_cmpswap v[5:9], v1, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D tfe +0x00,0x0f,0x41,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_dec v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x71,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_dec v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x71,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_inc v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x6d,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_inc v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x6d,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_or v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x65,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_or v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x65,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_smax v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x59,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_smax v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x59,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_smin v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x51,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_smin v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x51,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_sub v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x49,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_sub v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x49,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_swap v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x3d,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_swap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x3d,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_umax v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x5d,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_umax v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x5d,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_umin v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x55,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_umin v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x55,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_xor v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x69,0xf0,0x01,0x05,0x02,0x00 + +# GFX1010: image_atomic_xor v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x69,0xf0,0x01,0x05,0x02,0x00 + diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt index 46488a9aa4ec7..fdf376ed6d693 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_mimg_features.txt @@ -135,6 +135,89 @@ # GFX11: image_atomic_dec v4, v32, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D glc ; encoding: [0x00,0x41,0x58,0xf0,0x20,0x04,0x18,0x00] 0x00,0x41,0x58,0xf0,0x20,0x04,0x18,0x00 +#===------------------------------------------------------------------------===# +# TFE in image_atomic +#===------------------------------------------------------------------------===# + +# GFX11: image_atomic_add v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x30,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_add v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x30,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_and v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x48,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_and v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x48,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x2c,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_cmpswap v[5:9], v1, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D tfe +0x00,0x0f,0x2c,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_dec v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x58,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_dec v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x58,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_inc v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x54,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_inc v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x54,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_or v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x4c,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_or v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x4c,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_smax v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x40,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_smax v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x40,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_smin v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x38,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_smin v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x38,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_sub v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x34,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_sub v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x34,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_swap v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x28,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_swap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x28,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_umax v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x44,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_umax v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x44,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_umin v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x3c,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_umin v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x3c,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_xor v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x01,0x50,0xf0,0x01,0x05,0x22,0x00 + +# GFX11: image_atomic_xor v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x03,0x50,0xf0,0x01,0x05,0x22,0x00 + + # GFX11: image_sample v[64:66], v32, s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x07,0x6c,0xf0,0x20,0x40,0x01,0x64] 0x00,0x07,0x6c,0xf0,0x20,0x40,0x01,0x64 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage_features.txt new file mode 100644 index 0000000000000..2e747adbd0720 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage_features.txt @@ -0,0 +1,101 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s + +#===------------------------------------------------------------------------===# +# TFE in image_atomic +#===------------------------------------------------------------------------===# + +# GFX12: image_atomic_add_flt v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0xc0,0x60,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_add_flt v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0xc0,0xe0,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_add_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x00,0x43,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_add_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x00,0xc3,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_and v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x80,0x44,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_and v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x80,0xc4,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0xc0,0xc2,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_cmpswap v[5:9], v1, s[8:15] dmask:0xf dim:SQ_RSRC_IMG_1D tfe +0x00,0xc0,0xc2,0xd3,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_dec_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x80,0x45,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_dec_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x80,0xc5,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_inc_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x40,0x45,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_inc_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x40,0xc5,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_max_int v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x00,0x44,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_max_int v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x00,0xc4,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_max_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x40,0x44,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_max_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x40,0xc4,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_min_int v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x80,0x43,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_min_int v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x80,0xc3,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_min_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0xc0,0x43,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_min_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0xc0,0xc3,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_or v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0xc0,0x44,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_or v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0xc0,0xc4,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_sub_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x40,0x43,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_sub_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x40,0xc3,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_swap v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x80,0x42,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_swap v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x80,0xc2,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_max_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x40,0x44,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_max_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x40,0xc4,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_min_uint v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0xc0,0x43,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_min_uint v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0xc0,0xc3,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_xor v[5:6], v1, s[8:15] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe +0x00,0x00,0x45,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 + +# GFX12: image_atomic_xor v[5:7], v1, s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe +0x00,0x00,0xc5,0xd0,0x05,0x10,0x80,0x00,0x01,0x00,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt index 0a5bafc55f4d4..1bb1014a431d3 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx8_mimg_features.txt @@ -184,6 +184,85 @@ # VI: image_atomic_cmpswap v[5:8], v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x44,0xf0,0x01,0x05,0x02,0x00] 0x00,0x1f,0x44,0xf0,0x01,0x05,0x02,0x00 +# VI: image_atomic_add v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x49,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_add v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x49,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_and v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x61,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_and v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x61,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x45,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_cmpswap v[5:9], v1, s[8:15] dmask:0xf tfe +0x00,0x0f,0x45,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_dec v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x71,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_dec v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x71,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_inc v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x6d,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_inc v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x6d,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_or v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x65,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_or v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x65,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_smax v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x59,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_smax v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x59,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_smin v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x51,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_smin v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x51,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_sub v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x4d,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_sub v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x4d,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_swap v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x41,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_swap v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x41,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_umax v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x5d,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_umax v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x5d,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_umin v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x55,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_umin v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x55,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_xor v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x69,0xf0,0x01,0x05,0x02,0x00 + +# VI: image_atomic_xor v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x69,0xf0,0x01,0x05,0x02,0x00 + + #===------------------------------------------------------------------------===# # Invalid image atomics (incorrect dmask value). # Disassembler may produce a partially incorrect instruction but should not fail. @@ -192,10 +271,10 @@ # VI: image_atomic_add v5, v1, s[8:15] dmask:0x2 unorm ; encoding: [0x00,0x12,0x48,0xf0,0x01,0x05,0x02,0x00] 0x00,0x12,0x48,0xf0,0x01,0x05,0x02,0x00 -# VI: image_atomic_add v5, v1, s[8:15] dmask:0x7 unorm ; encoding: [0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00] +# VI: image_atomic_add v[5:7], v1, s[8:15] dmask:0x7 unorm ; encoding: [0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00] 0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00 -# VI: image_atomic_add v[5:9], v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00] +# VI: image_atomic_add v5, v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00] 0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00 # VI: image_atomic_cmpswap v[5:6], v1, s[8:15] unorm ; encoding: [0x00,0x10,0x44,0xf0,0x01,0x05,0x02,0x00] @@ -204,7 +283,7 @@ # VI: image_atomic_cmpswap v[5:6], v1, s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x44,0xf0,0x01,0x05,0x02,0x00] 0x00,0x11,0x44,0xf0,0x01,0x05,0x02,0x00 -# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] dmask:0xe unorm ; encoding: [0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00] +# VI: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0xe unorm ; encoding: [0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00] 0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00 #===------------------------------------------------------------------------===# diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_mimg_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_mimg_features.txt new file mode 100644 index 0000000000000..8300fe4e9db50 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_mimg_features.txt @@ -0,0 +1,83 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -disassemble -show-encoding < %s | FileCheck %s -check-prefix=GFX900 + +#===------------------------------------------------------------------------===# +# Image atomics +#===------------------------------------------------------------------------===# +# GFX900: image_atomic_add v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x49,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_add v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x49,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_and v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x61,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_and v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x61,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x45,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_cmpswap v[5:9], v1, s[8:15] dmask:0xf tfe +0x00,0x0f,0x45,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_dec v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x71,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_dec v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x71,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_inc v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x6d,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_inc v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x6d,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_or v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x65,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_or v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x65,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_smax v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x59,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_smax v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x59,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_smin v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x51,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_smin v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x51,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_sub v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x4d,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_sub v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x4d,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_swap v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x41,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_swap v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x41,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_umax v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x5d,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_umax v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x5d,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_umin v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x55,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_umin v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x55,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_xor v[5:6], v1, s[8:15] dmask:0x1 tfe +0x00,0x01,0x69,0xf0,0x01,0x05,0x02,0x00 + +# GFX900: image_atomic_xor v[5:7], v1, s[8:15] dmask:0x3 tfe +0x00,0x03,0x69,0xf0,0x01,0x05,0x02,0x00 +