diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 749b9efc81378..4b3dc371c65f0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1415,6 +1415,7 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD, MD->setHwStage(CC, ".wgp_mode", (bool)CurrentProgramInfo.WgpMode); MD->setHwStage(CC, ".mem_ordered", (bool)CurrentProgramInfo.MemOrdered); + MD->setHwStage(CC, ".forward_progress", (bool)CurrentProgramInfo.FwdProgress); if (AMDGPU::isCompute(CC)) { MD->setHwStage(CC, ".trap_present", diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp index 7093fe6405abb..5940f45e74bf2 100644 --- a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp @@ -85,7 +85,8 @@ static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo, S_00B848_PRIV(ProgInfo.Priv) | S_00B848_DEBUG_MODE(ProgInfo.DebugMode) | S_00B848_WGP_MODE(ProgInfo.WgpMode) | - S_00B848_MEM_ORDERED(ProgInfo.MemOrdered); + S_00B848_MEM_ORDERED(ProgInfo.MemOrdered) | + S_00B848_FWD_PROGRESS(ProgInfo.FwdProgress); if (ST.hasDX10ClampMode()) Reg |= S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp); @@ -93,10 +94,6 @@ static uint64_t getComputePGMRSrc1Reg(const SIProgramInfo &ProgInfo, if (ST.hasIEEEMode()) Reg |= S_00B848_IEEE_MODE(ProgInfo.IEEEMode); - // TODO: in the long run we will want to enable this unconditionally. - if (ST.getTargetTriple().getOS() == Triple::OSType::AMDHSA) - Reg |= S_00B848_FWD_PROGRESS(ProgInfo.FwdProgress); - if (ST.hasRrWGMode()) Reg |= S_00B848_RR_WG_MODE(ProgInfo.RrWgMode); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll index 9b35920f8547a..fa4676e4befe4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll @@ -3211,7 +3211,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel ; GFX10-NEXT: enable_ieee_mode = 1 ; GFX10-NEXT: enable_wgp_mode = 1 ; GFX10-NEXT: enable_mem_ordered = 1 -; GFX10-NEXT: enable_fwd_progress = 0 +; GFX10-NEXT: enable_fwd_progress = 1 ; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; GFX10-NEXT: user_sgpr_count = 14 ; GFX10-NEXT: enable_trap_handler = 0 @@ -3303,7 +3303,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel ; GFX11-NEXT: enable_ieee_mode = 1 ; GFX11-NEXT: enable_wgp_mode = 1 ; GFX11-NEXT: enable_mem_ordered = 1 -; GFX11-NEXT: enable_fwd_progress = 0 +; GFX11-NEXT: enable_fwd_progress = 1 ; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; GFX11-NEXT: user_sgpr_count = 13 ; GFX11-NEXT: enable_trap_handler = 0 @@ -4215,7 +4215,7 @@ define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %s ; GFX10-NEXT: enable_ieee_mode = 1 ; GFX10-NEXT: enable_wgp_mode = 1 ; GFX10-NEXT: enable_mem_ordered = 1 -; GFX10-NEXT: enable_fwd_progress = 0 +; GFX10-NEXT: enable_fwd_progress = 1 ; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; GFX10-NEXT: user_sgpr_count = 14 ; GFX10-NEXT: enable_trap_handler = 0 @@ -4300,7 +4300,7 @@ define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %s ; GFX11-NEXT: enable_ieee_mode = 1 ; GFX11-NEXT: enable_wgp_mode = 1 ; GFX11-NEXT: enable_mem_ordered = 1 -; GFX11-NEXT: enable_fwd_progress = 0 +; GFX11-NEXT: enable_fwd_progress = 1 ; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; GFX11-NEXT: user_sgpr_count = 13 ; GFX11-NEXT: enable_trap_handler = 0 @@ -4569,7 +4569,7 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %s ; GFX10-NEXT: enable_ieee_mode = 1 ; GFX10-NEXT: enable_wgp_mode = 1 ; GFX10-NEXT: enable_mem_ordered = 1 -; GFX10-NEXT: enable_fwd_progress = 0 +; GFX10-NEXT: enable_fwd_progress = 1 ; GFX10-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; GFX10-NEXT: user_sgpr_count = 14 ; GFX10-NEXT: enable_trap_handler = 0 @@ -4657,7 +4657,7 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %s ; GFX11-NEXT: enable_ieee_mode = 1 ; GFX11-NEXT: enable_wgp_mode = 1 ; GFX11-NEXT: enable_mem_ordered = 1 -; GFX11-NEXT: enable_fwd_progress = 0 +; GFX11-NEXT: enable_fwd_progress = 1 ; GFX11-NEXT: enable_sgpr_private_segment_wave_byte_offset = 0 ; GFX11-NEXT: user_sgpr_count = 13 ; GFX11-NEXT: enable_trap_handler = 0 diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll index d4826a22db795..6044f6e354ee0 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-ieee.ll @@ -7,7 +7,7 @@ ; SI-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf0000{{$}} ; VI-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf02c0{{$}} ; GFX9-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xaf0000{{$}} -; GFX12-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0x600f0000{{$}} +; GFX12-DAG: '0x2e12 (COMPUTE_PGM_RSRC1)': 0xe00f0000{{$}} define amdgpu_cs half @cs_amdpal(half %arg0) #0 { %add = fadd half %arg0, 1.0 ret half %add diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable-dvgpr.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable-dvgpr.ll index ae35d0dcb88f3..e6bc733775b17 100644 --- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable-dvgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable-dvgpr.ll @@ -17,6 +17,7 @@ ; CHECK-NEXT: .debug_mode: 0 ; CHECK-NEXT: .excp_en: 0 ; CHECK-NEXT: .float_mode: 0xc0 +; CHECK-NEXT: .forward_progress: true ; CHECK-NEXT: .image_op: false ; CHECK-NEXT: .lds_size: 0x200 ; CHECK-NEXT: .mem_ordered: true diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll index 638dc8965987e..310040d44bc34 100644 --- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll +++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-callable.ll @@ -19,6 +19,7 @@ ; CHECK-NEXT: .debug_mode: 0 ; CHECK-NEXT: .excp_en: 0 ; CHECK-NEXT: .float_mode: 0xc0 +; CHECK-NEXT: .forward_progress: true ; GFX11-NEXT: .ieee_mode: true ; CHECK-NEXT: .image_op: false ; CHECK-NEXT: .lds_size: 0x200 diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-dvgpr.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-dvgpr.ll index fb6ac2e8833be..c1846c0f2c23b 100644 --- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-dvgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0-dvgpr.ll @@ -59,6 +59,7 @@ ; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main ; CHECK-NEXT: .excp_en: 0 ; CHECK-NEXT: .float_mode: 0xc0 +; CHECK-NEXT: .forward_progress: true ; CHECK-NEXT: .image_op: false ; CHECK-NEXT: .lds_size: 0 ; CHECK-NEXT: .mem_ordered: true @@ -113,6 +114,7 @@ ; CHECK-NEXT: .debug_mode: false ; CHECK-NEXT: .entry_point: _amdgpu_gs ; CHECK-NEXT: .entry_point_symbol: gs_shader +; CHECK-NEXT: .forward_progress: true ; CHECK-NEXT: .lds_size: 0x200 ; CHECK-NEXT: .mem_ordered: true ; CHECK-NEXT: .scratch_en: false @@ -124,6 +126,7 @@ ; CHECK-NEXT: .debug_mode: false ; CHECK-NEXT: .entry_point: _amdgpu_hs ; CHECK-NEXT: .entry_point_symbol: hs_shader +; CHECK-NEXT: .forward_progress: true ; CHECK-NEXT: .lds_size: 0x1000 ; CHECK-NEXT: .mem_ordered: true ; CHECK-NEXT: .scratch_en: false @@ -135,6 +138,7 @@ ; CHECK-NEXT: .debug_mode: false ; CHECK-NEXT: .entry_point: _amdgpu_ps ; CHECK-NEXT: .entry_point_symbol: ps_shader +; CHECK-NEXT: .forward_progress: true ; CHECK-NEXT: .lds_size: 0 ; CHECK-NEXT: .mem_ordered: true ; CHECK-NEXT: .scratch_en: false diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll index 15778c8861e83..5c0c366277829 100644 --- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll +++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll @@ -62,6 +62,7 @@ ; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main ; CHECK-NEXT: .excp_en: 0 ; CHECK-NEXT: .float_mode: 0xc0 +; CHECK-NEXT: .forward_progress: true ; GFX11-NEXT: .ieee_mode: false ; CHECK-NEXT: .image_op: false ; CHECK-NEXT: .lds_size: 0 @@ -118,6 +119,7 @@ ; CHECK-NEXT: .debug_mode: false ; CHECK-NEXT: .entry_point: _amdgpu_gs_main ; CHECK-NEXT: .entry_point_symbol: gs_shader +; CHECK-NEXT: .forward_progress: true ; GFX11-NEXT: .ieee_mode: false ; CHECK-NEXT: .lds_size: 0x200 ; CHECK-NEXT: .mem_ordered: true @@ -130,6 +132,7 @@ ; CHECK-NEXT: .debug_mode: false ; CHECK-NEXT: .entry_point: _amdgpu_hs_main ; CHECK-NEXT: .entry_point_symbol: hs_shader +; CHECK-NEXT: .forward_progress: true ; GFX11-NEXT: .ieee_mode: false ; CHECK-NEXT: .lds_size: 0x1000 ; CHECK-NEXT: .mem_ordered: true @@ -142,6 +145,7 @@ ; CHECK-NEXT: .debug_mode: false ; CHECK-NEXT: .entry_point: _amdgpu_ps_main ; CHECK-NEXT: .entry_point_symbol: ps_shader +; CHECK-NEXT: .forward_progress: true ; GFX11-NEXT: .ieee_mode: false ; CHECK-NEXT: .lds_size: 0 ; CHECK-NEXT: .mem_ordered: true diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.6.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.6.ll index 644722bdd1273..830872a58f0b8 100644 --- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.6.ll +++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.6.ll @@ -62,6 +62,7 @@ ; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main ; CHECK-NEXT: .excp_en: 0 ; CHECK-NEXT: .float_mode: 0xc0 +; CHECK-NEXT: .forward_progress: true ; GFX11-NEXT: .ieee_mode: false ; CHECK-NEXT: .image_op: false ; CHECK-NEXT: .lds_size: 0 @@ -118,6 +119,7 @@ ; CHECK-NEXT: .debug_mode: false ; CHECK-NOT: .entry_point: _amdgpu_gs_main ; CHECK-NEXT: .entry_point_symbol: gs_shader +; CHECK-NEXT: .forward_progress: true ; GFX11-NEXT: .ieee_mode: false ; CHECK-NEXT: .lds_size: 0x200 ; CHECK-NEXT: .mem_ordered: true @@ -130,6 +132,7 @@ ; CHECK-NEXT: .debug_mode: false ; CHECK-NOT: .entry_point: _amdgpu_hs_main ; CHECK-NEXT: .entry_point_symbol: hs_shader +; CHECK-NEXT: .forward_progress: true ; GFX11-NEXT: .ieee_mode: false ; CHECK-NEXT: .lds_size: 0x1000 ; CHECK-NEXT: .mem_ordered: true @@ -142,6 +145,7 @@ ; CHECK-NEXT: .debug_mode: false ; CHECK-NOT: .entry_point: _amdgpu_ps_main ; CHECK-NEXT: .entry_point_symbol: ps_shader +; CHECK-NEXT: .forward_progress: true ; GFX11-NEXT: .ieee_mode: false ; CHECK-NEXT: .lds_size: 0 ; CHECK-NEXT: .mem_ordered: true