diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll index 39423ea22cbb4..d317a3ef54162 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll @@ -12,7 +12,7 @@ define void @value_finder_bug(ptr addrspace(5) %store_ptr, ptr addrspace(4) %ptr ; GFX10-NEXT: s_setpc_b64 s[30:31] %vec = load <4 x float>, ptr addrspace(4) %ptr, align 4 %vec.3 = extractelement <4 x float> %vec, i32 3 - %shuffle = shufflevector <4 x float> %vec, <4 x float> undef, <2 x i32> + %shuffle = shufflevector <4 x float> %vec, <4 x float> poison, <2 x i32> %new_vec = insertelement <2 x float> %shuffle, float %vec.3, i32 1 store <2 x float> %new_vec, ptr addrspace(5) %store_ptr, align 8 ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll index 298dfcf048fc4..ece08e40ae453 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -870,10 +870,10 @@ define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) { ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: %insert = insertelement <8 x double> , double %val, i32 %idx - %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> + %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> store volatile <2 x double> %vec.0, ptr addrspace(1) undef store volatile <2 x double> %vec.1, ptr addrspace(1) undef store volatile <2 x double> %vec.2, ptr addrspace(1) undef @@ -1081,10 +1081,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, do ; GFX11-NEXT: s_endpgm entry: %insert = insertelement <8 x double> %vec, double %val, i32 %idx - %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> + %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> store volatile <2 x double> %vec.0, ptr addrspace(1) undef store volatile <2 x double> %vec.1, ptr addrspace(1) undef store volatile <2 x double> %vec.2, ptr addrspace(1) undef @@ -1229,10 +1229,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_v_s(<8 x double> inreg %vec, do ; GFX11-NEXT: s_endpgm entry: %insert = insertelement <8 x double> %vec, double %val, i32 %idx - %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> + %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> store volatile <2 x double> %vec.0, ptr addrspace(1) undef store volatile <2 x double> %vec.1, ptr addrspace(1) undef store volatile <2 x double> %vec.2, ptr addrspace(1) undef @@ -1289,10 +1289,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_s_s(<8 x double> %vec, double i ; GFX11-NEXT: s_endpgm entry: %insert = insertelement <8 x double> %vec, double %val, i32 %idx - %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> + %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> store volatile <2 x double> %vec.0, ptr addrspace(1) undef store volatile <2 x double> %vec.1, ptr addrspace(1) undef store volatile <2 x double> %vec.2, ptr addrspace(1) undef @@ -1494,10 +1494,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_v_v(<8 x double> inreg %vec, do ; GFX11-NEXT: s_endpgm entry: %insert = insertelement <8 x double> %vec, double %val, i32 %idx - %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> + %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> store volatile <2 x double> %vec.0, ptr addrspace(1) undef store volatile <2 x double> %vec.1, ptr addrspace(1) undef store volatile <2 x double> %vec.2, ptr addrspace(1) undef @@ -1617,10 +1617,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_s_v(<8 x double> %vec, double i ; GFX11-NEXT: s_endpgm entry: %insert = insertelement <8 x double> %vec, double %val, i32 %idx - %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> + %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> store volatile <2 x double> %vec.0, ptr addrspace(1) undef store volatile <2 x double> %vec.1, ptr addrspace(1) undef store volatile <2 x double> %vec.2, ptr addrspace(1) undef @@ -1677,10 +1677,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_s(<8 x double> %vec, double % ; GFX11-NEXT: s_endpgm entry: %insert = insertelement <8 x double> %vec, double %val, i32 %idx - %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> + %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> store volatile <2 x double> %vec.0, ptr addrspace(1) undef store volatile <2 x double> %vec.1, ptr addrspace(1) undef store volatile <2 x double> %vec.2, ptr addrspace(1) undef @@ -1794,10 +1794,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v(<8 x double> %vec, double % ; GFX11-NEXT: s_endpgm entry: %insert = insertelement <8 x double> %vec, double %val, i32 %idx - %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> + %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> store volatile <2 x double> %vec.0, ptr addrspace(1) undef store volatile <2 x double> %vec.1, ptr addrspace(1) undef store volatile <2 x double> %vec.2, ptr addrspace(1) undef @@ -2401,10 +2401,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_s_s_add_1(<8 x double> inreg %v entry: %idx.add = add i32 %idx, 1 %insert = insertelement <8 x double> %vec, double %val, i32 %idx.add - %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> + %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> store volatile <2 x double> %vec.0, ptr addrspace(1) undef store volatile <2 x double> %vec.1, ptr addrspace(1) undef store volatile <2 x double> %vec.2, ptr addrspace(1) undef @@ -2525,10 +2525,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, do entry: %idx.add = add i32 %idx, 1 %insert = insertelement <8 x double> %vec, double %val, i32 %idx.add - %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> - %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> + %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> + %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> store volatile <2 x double> %vec.0, ptr addrspace(1) undef store volatile <2 x double> %vec.1, ptr addrspace(1) undef store volatile <2 x double> %vec.2, ptr addrspace(1) undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index 78f33a174980d..a7b063427bc8d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -1519,7 +1519,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; CHECK-NEXT: S_ENDPGM 0 %load = load <2 x i64>, ptr addrspace(1) null - %val = shufflevector <2 x i64> %load, <2 x i64> , <3 x i32> + %val = shufflevector <2 x i64> %load, <2 x i64> , <3 x i32> call void @external_void_func_v3i64(<3 x i64> %val) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll index e2dab03e410aa..744a5b7feb48d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll @@ -322,7 +322,7 @@ define i32 @v_sdot2_shuffle10_a(<2 x i16> %a, <2 x i16> %b, i32 %c) { ; GFX10-NEXT: v_alignbit_b32 v0, v0, v0, 16 ; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] - %shuf.a = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> + %shuf.a = shufflevector <2 x i16> %a, <2 x i16> poison, <2 x i32> %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %shuf.a, <2 x i16> %b, i32 %c, i1 false) ret i32 %r } @@ -349,7 +349,7 @@ define i32 @v_sdot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) { ; GFX10-NEXT: v_alignbit_b32 v1, v1, v1, 16 ; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] - %shuf.b = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> + %shuf.b = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %shuf.b, i32 %c, i1 false) ret i32 %r } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll index 3acff52874dd9..9e623494a5a04 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll @@ -306,7 +306,7 @@ define i32 @v_udot2_shuffle10_a(<2 x i16> %a, <2 x i16> %b, i32 %c) { ; GFX10-NEXT: v_alignbit_b32 v0, v0, v0, 16 ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] - %shuf.a = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> + %shuf.a = shufflevector <2 x i16> %a, <2 x i16> poison, <2 x i32> %r = call i32 @llvm.amdgcn.udot2(<2 x i16> %shuf.a, <2 x i16> %b, i32 %c, i1 false) ret i32 %r } @@ -332,7 +332,7 @@ define i32 @v_udot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) { ; GFX10-NEXT: v_alignbit_b32 v1, v1, v1, 16 ; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] - %shuf.b = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> + %shuf.b = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> %r = call i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %shuf.b, i32 %c, i1 false) ret i32 %r } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll index 0cb346c35552c..569ed35b150d8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll @@ -121,7 +121,7 @@ define amdgpu_ps i32 @s_trunc_v2i32_to_v2i16(<2 x i32> inreg %src) { ; ; FIXME: G_INSERT mishandled ; define <2 x i32> @v_trunc_v3i32_to_v3i16(<3 x i32> %src) { ; %trunc = trunc <3 x i32> %src to <3 x i16> -; %ext = shufflevector <3 x i16> %trunc, <3 x i16> undef, <4 x i32> +; %ext = shufflevector <3 x i16> %trunc, <3 x i16> poison, <4 x i32> ; %cast = bitcast <4 x i16> %ext to <2 x i32> ; ret <2 x i32> %cast ; } @@ -129,7 +129,7 @@ define amdgpu_ps i32 @s_trunc_v2i32_to_v2i16(<2 x i32> inreg %src) { ; ; FIXME: G_INSERT mishandled ; define amdgpu_ps <2 x i32> @s_trunc_v3i32_to_v3i16(<3 x i32> inreg %src) { ; %trunc = trunc <3 x i32> %src to <3 x i16> -; %ext = shufflevector <3 x i16> %trunc, <3 x i16> undef, <4 x i32> +; %ext = shufflevector <3 x i16> %trunc, <3 x i16> poison, <4 x i32> ; %cast = bitcast <4 x i16> %ext to <2 x i32> ; ret <2 x i32> %cast ; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll index 9cf9839e69d5d..e12ac25867adb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll @@ -426,7 +426,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<8 x half> %A, <8 x ; GFX12-NEXT: s_endpgm bb: %C = load <16 x half>, ptr %Caddr - %C_shuffle = shufflevector <16 x half> %C, <16 x half> undef, <8 x i32> + %C_shuffle = shufflevector <16 x half> %C, <16 x half> poison, <8 x i32> %fneg.C_shuffle = fneg <8 x half> %C_shuffle %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> %fneg.C_shuffle , i1 0) store <8 x half> %res, ptr addrspace(1) %out diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll index be1761227f802..b871ac7976222 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll @@ -381,7 +381,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<4 x half> %A, <4 x ; GFX12-NEXT: s_endpgm bb: %C = load <8 x half>, ptr %Caddr - %C_shuffle = shufflevector <8 x half> %C, <8 x half> undef, <4 x i32> + %C_shuffle = shufflevector <8 x half> %C, <8 x half> poison, <4 x i32> %fneg.C_shuffle = fneg <4 x half> %C_shuffle %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %A, <4 x half> %B, <4 x half> %fneg.C_shuffle , i1 0) store <4 x half> %res, ptr addrspace(1) %out diff --git a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll index ec4e2dda10d3a..d0c7a803ed6b5 100644 --- a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll @@ -9,7 +9,7 @@ define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 { main_body: %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp1 = bitcast <2 x float> %tmp to <2 x i32> - %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> %tmp4 = extractelement <4 x float> %tmp3, i32 0 store volatile float %tmp4, ptr addrspace(1) undef @@ -25,7 +25,7 @@ define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 { main_body: %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp1 = bitcast <2 x float> %tmp to <2 x i32> - %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> %tmp4 = extractelement <4 x float> %tmp3, i32 1 store volatile float %tmp4, ptr addrspace(1) undef @@ -41,7 +41,7 @@ define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 { main_body: %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp1 = bitcast <2 x float> %tmp to <2 x i32> - %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> %tmp4 = extractelement <4 x float> %tmp3, i32 0 store volatile float %tmp4, ptr addrspace(1) undef @@ -57,7 +57,7 @@ define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 { main_body: %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp1 = bitcast <2 x float> %tmp to <2 x i32> - %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> + %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> %tmp4 = extractelement <4 x float> %tmp3, i32 1 store volatile float %tmp4, ptr addrspace(1) undef @@ -68,7 +68,7 @@ define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 { main_body: %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) %tmp1 = bitcast <4 x float> %tmp to <4 x i32> - %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> + %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> poison, <4 x i32> %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float> %tmp4 = extractelement <4 x float> %tmp3, i32 0 store volatile float %tmp4, ptr addrspace(1) undef diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll index 651b030bd3f79..c94b33334646d 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll @@ -477,7 +477,7 @@ entry: then: %x.1 = insertelement <5 x double> , double %x, i32 %idx - %0 = shufflevector <5 x double> %x.1, <5 x double> , <5 x i32> + %0 = shufflevector <5 x double> %x.1, <5 x double> , <5 x i32> %x.4 = insertelement <5 x double> %0, double %x, i64 2 br label %finally diff --git a/llvm/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll b/llvm/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll index b610fca02f92e..2ddb2fea5ddc6 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll @@ -18,7 +18,7 @@ define amdgpu_hs void @_amdgpu_hs_main(i32 inreg %arg, i32 inreg %arg1, i32 inre .beginls: ; preds = %.entry %tmp15 = extractelement <6 x i32> %arg8, i32 3 %.0.vec.insert.i = insertelement <2 x i32> poison, i32 %tmp15, i32 0 - %.4.vec.insert.i = shufflevector <2 x i32> %.0.vec.insert.i, <2 x i32> undef, <2 x i32> + %.4.vec.insert.i = shufflevector <2 x i32> %.0.vec.insert.i, <2 x i32> poison, <2 x i32> %tmp16 = bitcast <2 x i32> %.4.vec.insert.i to i64 br label %.endls diff --git a/llvm/test/CodeGen/AMDGPU/buffer-rsrc-ptr-ops.ll b/llvm/test/CodeGen/AMDGPU/buffer-rsrc-ptr-ops.ll index ce55e9171c818..84a4b579fef0c 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-rsrc-ptr-ops.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-rsrc-ptr-ops.ll @@ -49,7 +49,7 @@ main_body: %buf1.int = ptrtoint ptr addrspace(8) %buf1 to i128 %buf1.vec = bitcast i128 %buf1.int to <4 x i32> call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> %buf1.vec, ptr addrspace(8) %buf2, i32 0, i32 0, i32 0) - %shuffled = shufflevector <2 x ptr addrspace(8)> %buffers, <2 x ptr addrspace(8)> undef, <2 x i32> + %shuffled = shufflevector <2 x ptr addrspace(8)> %buffers, <2 x ptr addrspace(8)> poison, <2 x i32> %somewhere.next = getelementptr <2 x ptr addrspace(8)>, ptr addrspace(1) %somewhere, i64 1 store <2 x ptr addrspace(8)> %shuffled, ptr addrspace(1) %somewhere.next ret void diff --git a/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll b/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll index 8a24845980e1a..95f97adcada31 100644 --- a/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll +++ b/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll @@ -45,7 +45,7 @@ bb1789: ; preds = %bb1750 %i1879 = bitcast <3 x i32> %i1878 to <3 x float> %i1881 = fmul reassoc nnan nsz arcp contract afn <3 x float> %i1540, %i1879 %i1882 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> poison, i32 poison, i32 0) - %i1883 = shufflevector <3 x i32> %i1882, <3 x i32> poison, <4 x i32> + %i1883 = shufflevector <3 x i32> %i1882, <3 x i32> poison, <4 x i32> %i1884 = bitcast <4 x i32> %i1883 to <4 x float> %i1885 = shufflevector <4 x float> %i1884, <4 x float> poison, <3 x i32> %i1886 = insertelement <3 x i32> poison, i32 %i1819, i64 0 @@ -57,7 +57,7 @@ bb1789: ; preds = %bb1750 %i1892 = fmul reassoc nnan nsz arcp contract afn <3 x float> %i1885, %i1891 %i1893 = fmul reassoc nnan nsz arcp contract afn <3 x float> %i1892, %i1881 %i1894 = bitcast <3 x float> %i1893 to <3 x i32> - %i1895 = shufflevector <3 x i32> %i1894, <3 x i32> poison, <4 x i32> + %i1895 = shufflevector <3 x i32> %i1894, <3 x i32> poison, <4 x i32> %i1896 = insertelement <4 x i32> %i1895, i32 %i1819, i64 3 br label %bb1897 @@ -70,7 +70,7 @@ bb1897: ; preds = %bb1789, %bb1787 %i1901 = bitcast <3 x i32> %i1900 to <3 x float> %i1902 = fadd reassoc nnan nsz arcp contract afn <3 x float> %i1901, %i1899 %i1903 = bitcast <3 x float> %i1902 to <3 x i32> - %i1907 = shufflevector <3 x i32> %i1903, <3 x i32> poison, <4 x i32> + %i1907 = shufflevector <3 x i32> %i1903, <3 x i32> poison, <4 x i32> %i1908 = shufflevector <4 x i32> %i1907, <4 x i32> %__llpc_global_proxy_r11.19, <4 x i32> %i1914 = shufflevector <4 x i32> %i1908, <4 x i32> poison, <4 x i32> %__llpc_global_proxy_r3.12.vec.extract2358 = extractelement <2 x i32> zeroinitializer, i64 1 diff --git a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll index f369b24e0f1b3..5c7172ff8d047 100644 --- a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll +++ b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll @@ -168,7 +168,7 @@ define void @undef_lo2_v4i16(<2 x i16> %arg0) { ; GFX8-NEXT: ; use v[0:1] ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: s_setpc_b64 s[30:31] - %undef.lo = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32> + %undef.lo = shufflevector <2 x i16> %arg0, <2 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.lo); ret void } @@ -193,7 +193,7 @@ define void @undef_lo2_v4f16(<2 x half> %arg0) { ; GFX8-NEXT: ; use v[0:1] ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: s_setpc_b64 s[30:31] - %undef.lo = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32> + %undef.lo = shufflevector <2 x half> %arg0, <2 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "v"(<4 x half> %undef.lo); ret void } @@ -348,7 +348,7 @@ define void @undef_hi2_v4i16(<2 x i16> %arg0) { ; GFX8-NEXT: ; use v[0:1] ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: s_setpc_b64 s[30:31] - %undef.hi = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32> + %undef.hi = shufflevector <2 x i16> %arg0, <2 x i16> poison, <4 x i32> call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.hi); ret void } @@ -369,7 +369,7 @@ define void @undef_hi2_v4f16(<2 x half> %arg0) { ; GFX8-NEXT: ; use v[0:1] ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: s_setpc_b64 s[30:31] - %undef.hi = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32> + %undef.hi = shufflevector <2 x half> %arg0, <2 x half> poison, <4 x i32> call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi); ret void } diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll index 3451e389fef8b..775cad3f3cec1 100644 --- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -1454,7 +1454,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] ; HSA-NEXT: s_endpgm %load = load <2 x i64>, ptr addrspace(1) null - %val = shufflevector <2 x i64> %load, <2 x i64> , <3 x i32> + %val = shufflevector <2 x i64> %load, <2 x i64> , <3 x i32> call void @external_void_func_v3i64(<3 x i64> %val) ret void diff --git a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll index 6571d515cfef2..49d6254dcd51a 100644 --- a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll +++ b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll @@ -1293,7 +1293,7 @@ define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_shuf(ptr addrspace(1) %o %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid %a = load <2 x half>, ptr addrspace(1) %gep0 %add = fadd <2 x half> %a, - %shuf = shufflevector <2 x half> %add, <2 x half> undef, <2 x i32> + %shuf = shufflevector <2 x half> %add, <2 x half> poison, <2 x i32> %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %shuf, <2 x half> zeroinitializer) %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> ) diff --git a/llvm/test/CodeGen/AMDGPU/clamp.ll b/llvm/test/CodeGen/AMDGPU/clamp.ll index 73ed23ab681f0..b3b7ce55a56e6 100644 --- a/llvm/test/CodeGen/AMDGPU/clamp.ll +++ b/llvm/test/CodeGen/AMDGPU/clamp.ll @@ -3728,7 +3728,7 @@ define amdgpu_kernel void @v_clamp_v2f16_shuffle(ptr addrspace(1) %out, ptr addr %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid %a = load <2 x half>, ptr addrspace(1) %gep0 - %shuf = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> + %shuf = shufflevector <2 x half> %a, <2 x half> poison, <2 x i32> %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %shuf, <2 x half> zeroinitializer) %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> ) diff --git a/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll b/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll index dcd088e2bd988..380f14b125ead 100644 --- a/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll +++ b/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll @@ -14,7 +14,7 @@ bb: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() %gep1 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i32 %lid %load = load <4 x i32>, ptr addrspace(1) %gep1, align 16 - %shuffle = shufflevector <4 x i32> %load, <4 x i32> undef, <2 x i32> + %shuffle = shufflevector <4 x i32> %load, <4 x i32> poison, <2 x i32> %gep2 = getelementptr inbounds <2 x i32>, ptr addrspace(1) %arg1, i32 %lid store <2 x i32> %shuffle, ptr addrspace(1) %gep2, align 8 ret void @@ -27,9 +27,9 @@ bb: define amdgpu_kernel void @test_vector_creation() #0 { entry: %tmp231 = load <4 x i16>, ptr addrspace(1) undef, align 2 - %vext466 = shufflevector <4 x i16> %tmp231, <4 x i16> undef, <8 x i32> - %vecinit467 = shufflevector <8 x i16> undef, <8 x i16> %vext466, <8 x i32> - %vecinit471 = shufflevector <8 x i16> %vecinit467, <8 x i16> undef, <8 x i32> + %vext466 = shufflevector <4 x i16> %tmp231, <4 x i16> poison, <8 x i32> + %vecinit467 = shufflevector <8 x i16> poison, <8 x i16> %vext466, <8 x i32> + %vecinit471 = shufflevector <8 x i16> %vecinit467, <8 x i16> poison, <8 x i32> store <8 x i16> %vecinit471, ptr addrspace(1) undef, align 16 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/combine_vloads.ll b/llvm/test/CodeGen/AMDGPU/combine_vloads.ll index 42a9b80b134cc..e2e37047912a1 100644 --- a/llvm/test/CodeGen/AMDGPU/combine_vloads.ll +++ b/llvm/test/CodeGen/AMDGPU/combine_vloads.ll @@ -23,12 +23,12 @@ for.body: ; preds = %for.body, %entry %i.01 = phi i32 [ 0, %entry ], [ %tmp19, %for.body ] %vecload2 = load <8 x i32>, ptr addrspace(1) %src, align 32 %0 = bitcast <8 x i32> %vecload2 to <32 x i8> - %tmp5 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> - %tmp8 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> + %tmp5 = shufflevector <32 x i8> %0, <32 x i8> poison, <8 x i32> + %tmp8 = shufflevector <32 x i8> %0, <32 x i8> poison, <8 x i32> %tmp9 = add nsw <8 x i8> %tmp5, %tmp8 - %tmp12 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> + %tmp12 = shufflevector <32 x i8> %0, <32 x i8> poison, <8 x i32> %tmp13 = add nsw <8 x i8> %tmp9, %tmp12 - %tmp16 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> + %tmp16 = shufflevector <32 x i8> %0, <32 x i8> poison, <8 x i32> %tmp17 = add nsw <8 x i8> %tmp13, %tmp16 %scevgep = getelementptr <8 x i8>, ptr addrspace(1) %result, i32 %i.01 %1 = bitcast <8 x i8> %tmp17 to <2 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/computeKnownBits-scalar-to-vector-crash.ll b/llvm/test/CodeGen/AMDGPU/computeKnownBits-scalar-to-vector-crash.ll index 350997c9d8674..e15e701f9e805 100644 --- a/llvm/test/CodeGen/AMDGPU/computeKnownBits-scalar-to-vector-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/computeKnownBits-scalar-to-vector-crash.ll @@ -3,7 +3,7 @@ ; CHECK: s_waitcnt define <2 x i16> @main(<2 x float>) #0 { %2 = bitcast <2 x float> %0 to <4 x i16> - %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <2 x i32> + %3 = shufflevector <4 x i16> %2, <4 x i16> poison, <2 x i32> %4 = extractelement <4 x i16> %2, i32 0 %5 = insertelement <2 x i16> %3, i16 %4, i32 0 ret <2 x i16> %5 diff --git a/llvm/test/CodeGen/AMDGPU/concat_vectors.ll b/llvm/test/CodeGen/AMDGPU/concat_vectors.ll index 2998bd3475cbe..a9e4c8991bb2b 100644 --- a/llvm/test/CodeGen/AMDGPU/concat_vectors.ll +++ b/llvm/test/CodeGen/AMDGPU/concat_vectors.ll @@ -289,8 +289,8 @@ define amdgpu_kernel void @test_concat_v16i16(ptr addrspace(1) %out, <16 x i16> define amdgpu_kernel void @concat_vector_crash(ptr addrspace(1) %out, ptr addrspace(1) %in) { bb: %tmp = load <2 x float>, ptr addrspace(1) %in, align 4 - %tmp1 = shufflevector <2 x float> %tmp, <2 x float> undef, <8 x i32> - %tmp2 = shufflevector <8 x float> undef, <8 x float> %tmp1, <8 x i32> + %tmp1 = shufflevector <2 x float> %tmp, <2 x float> poison, <8 x i32> + %tmp2 = shufflevector <8 x float> poison, <8 x float> %tmp1, <8 x i32> store <8 x float> %tmp2, ptr addrspace(1) %out, align 32 ret void } @@ -301,8 +301,8 @@ define amdgpu_kernel void @concat_vector_crash2(ptr addrspace(1) %out, ptr addrs %tmp = load i32, ptr addrspace(1) %in, align 1 %tmp1 = trunc i32 %tmp to i24 %tmp2 = bitcast i24 %tmp1 to <3 x i8> - %tmp3 = shufflevector <3 x i8> %tmp2, <3 x i8> undef, <8 x i32> - %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> , <8 x i32> + %tmp3 = shufflevector <3 x i8> %tmp2, <3 x i8> poison, <8 x i32> + %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> , <8 x i32> store <8 x i8> %tmp4, ptr addrspace(1) %out, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll b/llvm/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll index e66fdce283026..09607c982ed7f 100644 --- a/llvm/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll @@ -17,7 +17,7 @@ define amdgpu_ps void @main(i32 %in1, i32 inreg %arg) local_unnamed_addr { bb: %__llpc_global_proxy_r5.12.vec.insert = insertelement <4 x i32> poison, i32 %in1, i32 3 - %tmp3 = shufflevector <4 x i32> %__llpc_global_proxy_r5.12.vec.insert, <4 x i32> undef, <3 x i32> + %tmp3 = shufflevector <4 x i32> %__llpc_global_proxy_r5.12.vec.insert, <4 x i32> poison, <3 x i32> %tmp4 = bitcast <3 x i32> %tmp3 to <3 x float> %a2.i123 = extractelement <3 x float> %tmp4, i32 2 %tmp5 = bitcast float %a2.i123 to i32 @@ -26,7 +26,7 @@ bb: bb12: %__llpc_global_proxy_r2.0 = phi <4 x i32> [ %__llpc_global_proxy_r2.0.vec.insert196, %bb ], [ poison, %.entry ] - %tmp6 = shufflevector <4 x i32> %__llpc_global_proxy_r2.0, <4 x i32> undef, <3 x i32> + %tmp6 = shufflevector <4 x i32> %__llpc_global_proxy_r2.0, <4 x i32> poison, <3 x i32> %tmp7 = bitcast <3 x i32> %tmp6 to <3 x float> %a0.i = extractelement <3 x float> %tmp7, i32 0 ret void diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll index f88d3f1261f0b..2464275a87992 100644 --- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll @@ -146,7 +146,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 { %0 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) %.i2243 = extractelement <3 x float> %0, i32 2 %1 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 0, i32 0) - %2 = shufflevector <3 x i32> %1, <3 x i32> poison, <4 x i32> + %2 = shufflevector <3 x i32> %1, <3 x i32> poison, <4 x i32> %3 = bitcast <4 x i32> %2 to <4 x float> %.i2248 = extractelement <4 x float> %3, i32 2 %.i2249 = fmul reassoc nnan nsz arcp contract afn float %.i2243, %.i2248 @@ -159,17 +159,17 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 { %.i0364 = extractelement <2 x float> %7, i32 0 %8 = call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) %9 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 112, i32 0) - %10 = shufflevector <3 x i32> %9, <3 x i32> poison, <4 x i32> + %10 = shufflevector <3 x i32> %9, <3 x i32> poison, <4 x i32> %11 = bitcast <4 x i32> %10 to <4 x float> %.i2360 = extractelement <4 x float> %11, i32 2 %.i2363 = fmul reassoc nnan nsz arcp contract afn float %.i2360, %8 %12 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 96, i32 0) - %13 = shufflevector <3 x i32> %12, <3 x i32> poison, <4 x i32> + %13 = shufflevector <3 x i32> %12, <3 x i32> poison, <4 x i32> %14 = bitcast <4 x i32> %13 to <4 x float> %.i2367 = extractelement <4 x float> %14, i32 2 %.i2370 = fmul reassoc nnan nsz arcp contract afn float %.i0364, %.i2367 %15 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 32, i32 0) - %16 = shufflevector <3 x i32> %15, <3 x i32> poison, <4 x i32> + %16 = shufflevector <3 x i32> %15, <3 x i32> poison, <4 x i32> %17 = bitcast <4 x i32> %16 to <4 x float> %.i2373 = extractelement <4 x float> %17, i32 2 %.i2376 = fsub reassoc nnan nsz arcp contract afn float %.i2373, %.i2370 @@ -212,12 +212,12 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 { %.i2466 = fmul reassoc nnan nsz arcp contract afn float %.i2465, %43 %.i2469 = fmul reassoc nnan nsz arcp contract afn float %.i2415, %.i2466 %45 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 64, i32 0) - %46 = shufflevector <3 x i32> %45, <3 x i32> poison, <4 x i32> + %46 = shufflevector <3 x i32> %45, <3 x i32> poison, <4 x i32> %47 = bitcast <4 x i32> %46 to <4 x float> %.i2476 = extractelement <4 x float> %47, i32 2 %.i2479 = fmul reassoc nnan nsz arcp contract afn float %.i2476, %18 %48 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 80, i32 0) - %49 = shufflevector <3 x i32> %48, <3 x i32> poison, <4 x i32> + %49 = shufflevector <3 x i32> %48, <3 x i32> poison, <4 x i32> %50 = bitcast <4 x i32> %49 to <4 x float> %.i2482 = extractelement <4 x float> %50, i32 2 %.i2485 = fsub reassoc nnan nsz arcp contract afn float %.i2482, %.i2479 @@ -230,7 +230,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 { %.i2522 = fadd reassoc nnan nsz arcp contract afn float %.i2521, %.i2516 %.i2525 = fmul reassoc nnan nsz arcp contract afn float %.i2522, %43 %52 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 16, i32 0) - %53 = shufflevector <3 x i32> %52, <3 x i32> poison, <4 x i32> + %53 = shufflevector <3 x i32> %52, <3 x i32> poison, <4 x i32> %54 = bitcast <4 x i32> %53 to <4 x float> %.i2530 = extractelement <4 x float> %54, i32 2 %.i2531 = fmul reassoc nnan nsz arcp contract afn float %.i2333, %.i2530 diff --git a/llvm/test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll b/llvm/test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll index ccd497670a3f0..f5bf1793b9f40 100644 --- a/llvm/test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll @@ -16,7 +16,7 @@ entry: %sint = load i32, ptr addrspace(1) %in %conv = sitofp i32 %sint to float %0 = insertelement <4 x float> poison, float %conv, i32 0 - %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer + %splat = shufflevector <4 x float> %0, <4 x float> poison, <4 x i32> zeroinitializer store <4 x float> %splat, ptr addrspace(1) %out ret void } @@ -30,7 +30,7 @@ entry: %uint = load i32, ptr addrspace(1) %in %conv = uitofp i32 %uint to float %0 = insertelement <4 x float> poison, float %conv, i32 0 - %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer + %splat = shufflevector <4 x float> %0, <4 x float> poison, <4 x i32> zeroinitializer store <4 x float> %splat, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/debug-value.ll b/llvm/test/CodeGen/AMDGPU/debug-value.ll index 755f517156a7a..e7b56cbe2e4d4 100644 --- a/llvm/test/CodeGen/AMDGPU/debug-value.ll +++ b/llvm/test/CodeGen/AMDGPU/debug-value.ll @@ -7,7 +7,7 @@ bb: %tmp = load i32, ptr addrspace(1) undef, align 4 %tmp1 = load <4 x float>, ptr addrspace(1) undef, align 16 %tmp2 = sext i32 %tmp to i64 - %tmp3 = shufflevector <4 x float> undef, <4 x float> %tmp1, <2 x i32> + %tmp3 = shufflevector <4 x float> poison, <4 x float> %tmp1, <2 x i32> %tmp4 = call float @barney() #2 %tmp9 = getelementptr inbounds %struct.wombat, ptr addrspace(1) %arg, i64 %tmp2, i32 2, i64 0 %tmp10 = load i32, ptr addrspace(1) %tmp9, align 4 @@ -53,7 +53,7 @@ bb28: ; preds = %bb25, %bb21 %tmp45 = fadd float undef, undef %tmp46 = fdiv float %tmp44, %tmp45 %tmp47 = insertelement <4 x float> poison, float %tmp46, i32 0 - %tmp48 = shufflevector <4 x float> %tmp47, <4 x float> undef, <4 x i32> zeroinitializer + %tmp48 = shufflevector <4 x float> %tmp47, <4 x float> poison, <4 x i32> zeroinitializer %tmp49 = fsub <4 x float> %tmp48, %tmp40 %tmp50 = extractelement <4 x float> %tmp41, i32 1 %tmp51 = extractelement <4 x float> %tmp42, i32 2 @@ -71,7 +71,7 @@ bb28: ; preds = %bb25, %bb21 call void @llvm.dbg.value(metadata <4 x float> %tmp29, metadata !3, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #2, !dbg !5 %tmp59 = bitcast i64 %tmp35 to <2 x float> %tmp60 = insertelement <2 x float> poison, float %tmp58, i32 0 - %tmp61 = shufflevector <2 x float> %tmp60, <2 x float> undef, <2 x i32> zeroinitializer + %tmp61 = shufflevector <2 x float> %tmp60, <2 x float> poison, <2 x i32> zeroinitializer %tmp62 = fmul <2 x float> %tmp61, undef %tmp63 = fsub <2 x float> %tmp62, %tmp59 %tmp64 = extractelement <2 x float> %tmp63, i64 0 diff --git a/llvm/test/CodeGen/AMDGPU/debug-value2.ll b/llvm/test/CodeGen/AMDGPU/debug-value2.ll index bd27100a27cfc..3a16476adf5c1 100644 --- a/llvm/test/CodeGen/AMDGPU/debug-value2.ll +++ b/llvm/test/CodeGen/AMDGPU/debug-value2.ll @@ -26,7 +26,7 @@ entry: %m_scaleMotion = getelementptr inbounds %struct.ShapeData, ptr addrspace(1) %call, i64 0, i32 4 %tmp2 = load <4 x float>, ptr addrspace(1) %m_scaleMotion, align 16 %splat.splatinsert = insertelement <4 x float> poison, float %time, i32 0 - %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer + %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer %tmp3 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %tmp2, <4 x float> %splat.splat, <4 x float> ) %tmp4 = load <4 x float>, ptr addrspace(1) %call, align 16 %m_quaternion = getelementptr inbounds %struct.ShapeData, ptr addrspace(1) %call, i64 0, i32 1 @@ -61,8 +61,8 @@ entry: %tmp24 = insertelement <4 x float> %tmp23, float %tmp19, i32 1 %tmp25 = insertelement <4 x float> %tmp24, float %tmp22, i32 2 %tmp26 = extractelement <4 x float> %tmp5, i64 3 - %splat.splat.i8.i = shufflevector <4 x float> %tmp5, <4 x float> undef, <4 x i32> - %splat.splat2.i9.i = shufflevector <4 x float> %tmp10, <4 x float> undef, <4 x i32> + %splat.splat.i8.i = shufflevector <4 x float> %tmp5, <4 x float> poison, <4 x i32> + %splat.splat2.i9.i = shufflevector <4 x float> %tmp10, <4 x float> poison, <4 x i32> %mul3.i10.i = fmul <4 x float> %tmp5, %splat.splat2.i9.i %tmp27 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %splat.splat.i8.i, <4 x float> %tmp10, <4 x float> %mul3.i10.i) %add.i11.i = fadd <4 x float> %tmp27, %tmp25 @@ -94,7 +94,7 @@ entry: %tmp52 = insertelement <4 x float> , float %tmp44, i32 0 %tmp53 = insertelement <4 x float> %tmp52, float %tmp48, i32 1 %tmp54 = insertelement <4 x float> %tmp53, float %tmp51, i32 2 - %splat.splat.i.i = shufflevector <4 x float> %tmp39, <4 x float> undef, <4 x i32> + %splat.splat.i.i = shufflevector <4 x float> %tmp39, <4 x float> poison, <4 x i32> %tmp55 = extractelement <4 x float> %tmp5, i32 3 %mul3.i.i = fmul <4 x float> %splat.splat.i8.i, %tmp39 %tmp56 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %splat.splat.i.i, <4 x float> %vecinit5.i.i, <4 x float> %mul3.i.i) @@ -113,12 +113,12 @@ entry: %tmp66 = extractelement <4 x float> %tmp1, i64 3 %mul3 = fmul float %tmp66, %time %tmp67 = insertelement <4 x float> %tmp1, float 0.000000e+00, i32 3 - %tmp68 = shufflevector <4 x float> %tmp67, <4 x float> %tmp1, <4 x i32> + %tmp68 = shufflevector <4 x float> %tmp67, <4 x float> %tmp1, <4 x i32> %vecinit3.i.i = shufflevector <4 x float> %tmp68, <4 x float> %tmp1, <4 x i32> %tmp69 = fcmp oeq <4 x float> %vecinit3.i.i, zeroinitializer %tmp70 = sext <4 x i1> %tmp69 to <4 x i32> - %tmp71 = shufflevector <4 x i32> %tmp70, <4 x i32> undef, <2 x i32> - %tmp72 = shufflevector <4 x i32> %tmp70, <4 x i32> undef, <2 x i32> + %tmp71 = shufflevector <4 x i32> %tmp70, <4 x i32> poison, <2 x i32> + %tmp72 = shufflevector <4 x i32> %tmp70, <4 x i32> poison, <2 x i32> %tmp73 = and <2 x i32> %tmp71, %tmp72 %tmp74 = extractelement <2 x i32> %tmp73, i64 0 %tmp75 = extractelement <2 x i32> %tmp73, i64 1 @@ -205,7 +205,7 @@ bb141: ; preds = %bb109, %bb98, %bb96 %tmp143 = phi float [ %tmp95, %bb86 ], [ %tmp140, %bb109 ], [ %tmp107, %bb98 ], [ %tmp84, %bb96 ] %tmp144 = tail call float @llvm.amdgcn.rsq.f32(float %tmp143) %tmp145 = insertelement <4 x float> poison, float %tmp144, i32 0 - %tmp146 = shufflevector <4 x float> %tmp145, <4 x float> undef, <4 x i32> zeroinitializer + %tmp146 = shufflevector <4 x float> %tmp145, <4 x float> poison, <4 x i32> zeroinitializer %tmp147 = fmul <4 x float> %tmp142, %tmp146 br label %qtSet.exit diff --git a/llvm/test/CodeGen/AMDGPU/early-if-convert.ll b/llvm/test/CodeGen/AMDGPU/early-if-convert.ll index 028cd9110eb2b..8be5d1a3fde7c 100644 --- a/llvm/test/CodeGen/AMDGPU/early-if-convert.ll +++ b/llvm/test/CodeGen/AMDGPU/early-if-convert.ll @@ -331,7 +331,7 @@ if: endif: %r = phi <3 x i32> [ %v, %entry ], [ %u, %if ] - %r.ext = shufflevector <3 x i32> %r, <3 x i32> undef, <4 x i32> + %r.ext = shufflevector <3 x i32> %r, <3 x i32> poison, <4 x i32> call void asm sideeffect "; reg use $0", "s"(<4 x i32> %r.ext) #0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll index 6dabd8c0b83ea..24dc5b5bb3150 100644 --- a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll @@ -156,7 +156,7 @@ F: exit: %m = phi <8 x i16> [ %t, %T ], [ %f, %F ] - %v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <4 x i32> + %v2 = shufflevector <8 x i16> %m, <8 x i16> poison, <4 x i32> %b2 = icmp sgt <4 x i16> %v2, %r2 = select <4 x i1> %b2, <4 x i16> , <4 x i16> ret <4 x i16> %r2 @@ -317,7 +317,7 @@ F: exit: %m = phi <8 x i16> [ %t, %T ], [ %f, %F ] - %v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <4 x i32> + %v2 = shufflevector <8 x i16> %m, <8 x i16> poison, <4 x i32> %b2 = icmp sgt <4 x i16> %v2, %r2 = select <4 x i1> %b2, <4 x i16> , <4 x i16> ret <4 x i16> %r2 @@ -482,7 +482,7 @@ F: exit: %m = phi <8 x half> [ %t, %T ], [ %f, %F ] - %v2 = shufflevector <8 x half> %m, <8 x half> undef, <4 x i32> + %v2 = shufflevector <8 x half> %m, <8 x half> poison, <4 x i32> %b2 = fcmp ugt <4 x half> %v2, %r2 = select <4 x i1> %b2, <4 x half> , <4 x half> ret <4 x half> %r2 @@ -685,7 +685,7 @@ F: exit: %m = phi <16 x i16> [ %t, %T ], [ %f, %F ] - %v2 = shufflevector <16 x i16> %m, <16 x i16> undef, <4 x i32> + %v2 = shufflevector <16 x i16> %m, <16 x i16> poison, <4 x i32> %b2 = icmp sgt <4 x i16> %v2, %r2 = select <4 x i1> %b2, <4 x i16> , <4 x i16> ret <4 x i16> %r2 @@ -890,7 +890,7 @@ F: exit: %m = phi <16 x i16> [ %t, %T ], [ %f, %F ] - %v2 = shufflevector <16 x i16> %m, <16 x i16> undef, <4 x i32> + %v2 = shufflevector <16 x i16> %m, <16 x i16> poison, <4 x i32> %b2 = icmp sgt <4 x i16> %v2, %r2 = select <4 x i1> %b2, <4 x i16> , <4 x i16> ret <4 x i16> %r2 @@ -1099,7 +1099,7 @@ F: exit: %m = phi <16 x half> [ %t, %T ], [ %f, %F ] - %v2 = shufflevector <16 x half> %m, <16 x half> undef, <4 x i32> + %v2 = shufflevector <16 x half> %m, <16 x half> poison, <4 x i32> %b2 = fcmp ugt <4 x half> %v2, %r2 = select <4 x i1> %b2, <4 x half> , <4 x half> ret <4 x half> %r2 @@ -1184,8 +1184,8 @@ define <8 x i16> @large_vector(ptr addrspace(3) %p, i32 %idxp) { %x.7 = load i16, ptr addrspace(3) %p.7, align 2 %v3 = insertelement <8 x i16> %v3p, i16 %x.7, i32 1 - %z.1 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> - %z.2 = shufflevector <8 x i16> %z.1, <8 x i16> %v2, <8 x i32> + %z.1 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> + %z.2 = shufflevector <8 x i16> %z.1, <8 x i16> %v2, <8 x i32> %z.3 = shufflevector <8 x i16> %z.2, <8 x i16> %v3, <8 x i32> ret <8 x i16> %z.3 } @@ -1464,7 +1464,7 @@ F: exit: %m = phi <16 x i16> [ %t, %T ], [ %f, %F ] - %v2 = shufflevector <16 x i16> %m, <16 x i16> undef, <8 x i32> + %v2 = shufflevector <16 x i16> %m, <16 x i16> poison, <8 x i32> %b2 = icmp ugt <8 x i16> %v2, %r2 = select <8 x i1> %b2, <8 x i16> , <8 x i16> ret <8 x i16> %r2 @@ -1755,7 +1755,7 @@ F: exit: %m = phi <16 x half> [ %t, %T ], [ %f, %F ] - %v2 = shufflevector <16 x half> %m, <16 x half> undef, <8 x i32> + %v2 = shufflevector <16 x half> %m, <16 x half> poison, <8 x i32> %b2 = fcmp ugt <8 x half> %v2, %r2 = select <8 x i1> %b2, <8 x half> , <8 x half> ret <8 x half> %r2 diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll index bab78aee3ff18..4cd39590bfc34 100644 --- a/llvm/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll @@ -13,7 +13,7 @@ define <3 x i32> @quux() { ; CHECK-NEXT: v_mov_b32_e32 v2, 1 ; CHECK-NEXT: s_setpc_b64 s[30:31] bb: - %tmp = shufflevector <4 x i8> , <4 x i8> undef, <3 x i32> + %tmp = shufflevector <4 x i8> , <4 x i8> poison, <3 x i32> %tmp1 = extractelement <3 x i8> %tmp, i64 0 %tmp2 = zext i8 %tmp1 to i32 %tmp3 = insertelement <3 x i32> poison, i32 %tmp2, i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll index 1e86842be4e5e..61c0b8b861d5b 100644 --- a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll @@ -90,7 +90,7 @@ F: exit: %m = phi <8 x i16> [ %t, %T ], [ %f, %F ] - %v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <2 x i32> + %v2 = shufflevector <8 x i16> %m, <8 x i16> poison, <2 x i32> %b2 = icmp sgt <2 x i16> %v2, %r2 = select <2 x i1> %b2, <2 x i16> , <2 x i16> ret <2 x i16> %r2 @@ -161,7 +161,7 @@ F: exit: %m = phi <8 x i64> [ %t, %T ], [ %f, %F ] - %v2 = shufflevector <8 x i64> %m, <8 x i64> undef, <2 x i32> + %v2 = shufflevector <8 x i64> %m, <8 x i64> poison, <2 x i32> %b2 = icmp sgt <2 x i64> %v2, %r2 = select <2 x i1> %b2, <2 x i64> , <2 x i64> ret <2 x i64> %r2 @@ -238,7 +238,7 @@ F: exit: %m = phi <8 x i64> [ %t, %T ], [ %f, %F ] - %v2 = shufflevector <8 x i64> %m, <8 x i64> undef, <4 x i32> + %v2 = shufflevector <8 x i64> %m, <8 x i64> poison, <4 x i32> %b2 = icmp sgt <4 x i64> %v2, %r2 = select <4 x i1> %b2, <4 x i64> , <4 x i64> ret <4 x i64> %r2 @@ -342,7 +342,7 @@ F: exit: %m = phi <16 x i64> [ %t, %T ], [ %f, %F ] - %v2 = shufflevector <16 x i64> %m, <16 x i64> undef, <8 x i32> + %v2 = shufflevector <16 x i64> %m, <16 x i64> poison, <8 x i32> %b2 = icmp sgt <8 x i64> %v2, %r2 = select <8 x i1> %b2, <8 x i64> , <8 x i64> ret <8 x i64> %r2 @@ -413,7 +413,7 @@ F: exit: %m = phi <8 x double> [ %t, %T ], [ %f, %F ] - %v2 = shufflevector <8 x double> %m, <8 x double> undef, <2 x i32> + %v2 = shufflevector <8 x double> %m, <8 x double> poison, <2 x i32> %b2 = fcmp ogt <2 x double> %v2, %r2 = select <2 x i1> %b2, <2 x double> , <2 x double> ret <2 x double> %r2 @@ -490,7 +490,7 @@ F: exit: %m = phi <8 x double> [ %t, %T ], [ %f, %F ] - %v2 = shufflevector <8 x double> %m, <8 x double> undef, <4 x i32> + %v2 = shufflevector <8 x double> %m, <8 x double> poison, <4 x i32> %b2 = fcmp ogt <4 x double> %v2, %r2 = select <4 x i1> %b2, <4 x double> , <4 x double> ret <4 x double> %r2 @@ -594,7 +594,7 @@ F: exit: %m = phi <16 x double> [ %t, %T ], [ %f, %F ] - %v2 = shufflevector <16 x double> %m, <16 x double> undef, <8 x i32> + %v2 = shufflevector <16 x double> %m, <16 x double> poison, <8 x i32> %b2 = fcmp ogt <8 x double> %v2, %r2 = select <8 x i1> %b2, <8 x double> , <8 x double> ret <8 x double> %r2 diff --git a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll index 1319c526868c3..a44231f39ae6f 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll @@ -27,7 +27,7 @@ define amdgpu_hs void @main(ptr addrspace(6) inreg %arg) { main_body: %tmp25 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) undef, i32 undef, i32 0, i32 0) %tmp27 = bitcast <4 x float> %tmp25 to <16 x i8> - %tmp28 = shufflevector <16 x i8> %tmp27, <16 x i8> undef, <12 x i32> + %tmp28 = shufflevector <16 x i8> %tmp27, <16 x i8> poison, <12 x i32> %tmp29 = bitcast <12 x i8> %tmp28 to <3 x i32> call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %tmp29, ptr addrspace(8) undef, i32 undef, i32 0, i32 0) #3 ret void diff --git a/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll b/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll index dc60007cad977..827e5da15e50c 100644 --- a/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll +++ b/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll @@ -39,7 +39,7 @@ bb14: ; preds = %bb14, %bb11 %tmp25 = load float, ptr addrspace(4) %tmp24, align 4 %tmp26 = fptrunc float %tmp25 to half %tmp27 = insertelement <4 x half> poison, half %tmp26, i32 0 - %tmp28 = shufflevector <4 x half> %tmp27, <4 x half> undef, <4 x i32> zeroinitializer + %tmp28 = shufflevector <4 x half> %tmp27, <4 x half> poison, <4 x i32> zeroinitializer %vec.A.0 = extractelement <4 x half> %tmp21, i32 0 %vec.B.0 = extractelement <4 x half> %tmp28, i32 0 %vec.C.0 = extractelement <4 x half> %tmp15, i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll index e1791daa3aa0c..781a2ca3146f5 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll @@ -1585,7 +1585,7 @@ define amdgpu_kernel void @fnge_select_f32_multi_use_regression(float %.i2369) { bb: ; preds = %.entry %i2 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> zeroinitializer, i32 1, i32 0) - %i3 = shufflevector <2 x i32> %i2, <2 x i32> zeroinitializer, <4 x i32> + %i3 = shufflevector <2 x i32> %i2, <2 x i32> zeroinitializer, <4 x i32> %i4 = bitcast <4 x i32> %i3 to <4 x float> %.i0753 = extractelement <4 x float> %i4, i64 0 br label %bb5 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll index 2322b29abaa10..c4ea8fc67122c 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll @@ -1841,7 +1841,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %load = load <2 x i64>, ptr addrspace(1) null - %val = shufflevector <2 x i64> %load, <2 x i64> , <3 x i32> + %val = shufflevector <2 x i64> %load, <2 x i64> , <3 x i32> call amdgpu_gfx void @external_void_func_v3i64(<3 x i64> %val) ret void @@ -10439,7 +10439,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %load = load <2 x i64>, ptr addrspace(4) null - %val = shufflevector <2 x i64> %load, <2 x i64> , <3 x i32> + %val = shufflevector <2 x i64> %load, <2 x i64> , <3 x i32> call amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> inreg %val) ret void diff --git a/llvm/test/CodeGen/AMDGPU/greedy-reverse-local-assignment.ll b/llvm/test/CodeGen/AMDGPU/greedy-reverse-local-assignment.ll index 6c921441c972d..6e82a294243d2 100644 --- a/llvm/test/CodeGen/AMDGPU/greedy-reverse-local-assignment.ll +++ b/llvm/test/CodeGen/AMDGPU/greedy-reverse-local-assignment.ll @@ -48,6 +48,6 @@ define <4 x half> @shuffle_v4f16_234u(ptr addrspace(1) %arg0, ptr addrspace(1) % ; NOXNACK-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x half>, ptr addrspace(1) %arg0 %val1 = load <4 x half>, ptr addrspace(1) %arg1 - %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> + %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> ret <4 x half> %shuffle } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll index b4f05bce37680..dbd324b0334ae 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll @@ -36,7 +36,7 @@ main_body: define amdgpu_ps <2 x float> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { main_body: %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) - %ext = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> + %ext = shufflevector <3 x half> %tex, <3 x half> poison, <4 x i32> %r = bitcast <4 x half> %ext to <2 x float> ret <2 x float> %r } @@ -90,7 +90,7 @@ main_body: define amdgpu_ps <2 x float> @image_load_3d_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) { main_body: %tex = call <3 x half> @llvm.amdgcn.image.load.3d.v3f16.i32(i32 7, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) - %ext = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> + %ext = shufflevector <3 x half> %tex, <3 x half> poison, <4 x i32> %res = bitcast <4 x half> %ext to <2 x float> ret <2 x float> %res } @@ -129,7 +129,7 @@ main_body: define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x float> %in) { main_body: %r = bitcast <2 x float> %in to <4 x half> - %data = shufflevector <4 x half> %r, <4 x half> undef, <3 x i32> + %data = shufflevector <4 x half> %r, <4 x half> poison, <3 x i32> call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %data, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll index 389acd31a9d08..8861ff4c78137 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll @@ -315,7 +315,7 @@ define amdgpu_ps <2 x float> @image_sample_b_2d_v3f16(<8 x i32> inreg %rsrc, <4 ; GFX12-NEXT: ; return to shader part epilog main_body: %tex = call <3 x half> @llvm.amdgcn.image.sample.b.2d.v3f16.f32.f32(i32 7, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) - %tex_wide = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> + %tex_wide = shufflevector <3 x half> %tex, <3 x half> poison, <4 x i32> %r = bitcast <4 x half> %tex_wide to <2 x float> ret <2 x float> %r } @@ -410,7 +410,7 @@ define amdgpu_ps <4 x float> @image_sample_b_2d_v3f16_tfe(<8 x i32> inreg %rsrc, main_body: %tex = call {<3 x half>,i32} @llvm.amdgcn.image.sample.b.2d.v3f16i32.f32.f32(i32 7, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0) %tex.vec = extractvalue {<3 x half>, i32} %tex, 0 - %tex.vec_wide = shufflevector <3 x half> %tex.vec, <3 x half> undef, <4 x i32> + %tex.vec_wide = shufflevector <3 x half> %tex.vec, <3 x half> poison, <4 x i32> %tex.err = extractvalue {<3 x half>, i32} %tex, 1 %tex.vecf = bitcast <4 x half> %tex.vec_wide to <2 x float> %tex.vecf.0 = extractelement <2 x float> %tex.vecf, i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll index 6d8ce071371c9..a713b1db119b8 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll @@ -2394,7 +2394,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, ; GFX12-NEXT: ; return to shader part epilog main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) - %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> + %out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> ret <2 x float> %out } @@ -2436,7 +2436,7 @@ define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, ; GFX12-NEXT: ; return to shader part epilog main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) - %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> + %out = shufflevector <4 x float> %r, <4 x float> poison, <3 x i32> ret <3 x float> %out } @@ -2478,7 +2478,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, ; GFX12-NEXT: ; return to shader part epilog main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) - %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> + %out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> ret <2 x float> %out } @@ -2520,7 +2520,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, ; GFX12-NEXT: ; return to shader part epilog main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) - %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> + %out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> ret <2 x float> %out } @@ -2562,7 +2562,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, ; GFX12-NEXT: ; return to shader part epilog main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) - %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> + %out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> ret <2 x float> %out } @@ -2604,7 +2604,7 @@ define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, ; GFX12-NEXT: ; return to shader part epilog main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) - %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> + %out = shufflevector <4 x float> %r, <4 x float> poison, <3 x i32> ret <3 x float> %out } @@ -2667,7 +2667,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg ; GFX12-NEXT: ; return to shader part epilog main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) - %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> + %out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> ret <2 x float> %out } @@ -2709,7 +2709,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg ; GFX12-NEXT: ; return to shader part epilog main_body: %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) - %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> + %out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> ret <2 x float> %out } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll index ca03d954d5b2a..5b4e2e4ce171b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll @@ -47,7 +47,7 @@ main_body: ; PACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen define amdgpu_kernel void @buffer_store_format_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %voffset) { main_body: - %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> + %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> call void @llvm.amdgcn.raw.buffer.store.format.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll index 8e278c101057a..b1a2747cb5784 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll @@ -47,7 +47,7 @@ main_body: ; PACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen define amdgpu_kernel void @buffer_store_format_d16_xyz(ptr addrspace(8) %rsrc, <4 x half> %data, i32 %voffset) { main_body: - %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> + %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> call void @llvm.amdgcn.raw.ptr.buffer.store.format.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.d16.ll index 02fc82de5d7bc..9c576039ff6ac 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.d16.ll @@ -144,7 +144,7 @@ define amdgpu_kernel void @tbuffer_store_d16_xyz(ptr addrspace(8) %rsrc, <4 x ha ; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] ; GFX11-PACKED-NEXT: s_endpgm main_body: - %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> + %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> call void @llvm.amdgcn.raw.ptr.tbuffer.store.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 33, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll index 63b139bb25e77..cde2627008e27 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll @@ -191,7 +191,7 @@ define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <4 x half> %da ; GFX12-PACKED-GISEL-NEXT: tbuffer_store_d16_format_xyzw v[0:1], off, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] ; GFX12-PACKED-GISEL-NEXT: s_endpgm main_body: - %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> + %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> call void @llvm.amdgcn.raw.tbuffer.store.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll index 73ac61a77a94e..aa7064dad9e95 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll @@ -47,7 +47,7 @@ main_body: ; PACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen define amdgpu_kernel void @buffer_store_format_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %index) { main_body: - %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> + %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> call void @llvm.amdgcn.struct.buffer.store.format.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.d16.ll index 8a281376965d1..51d3687ac5185 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.d16.ll @@ -77,7 +77,7 @@ define amdgpu_kernel void @buffer_store_format_d16_xyz(ptr addrspace(8) %rsrc, < ; PACKED-NEXT: buffer_store_format_d16_xyz v[0:1], v2, s[0:3], 0 idxen ; PACKED-NEXT: s_endpgm main_body: - %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> + %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> call void @llvm.amdgcn.struct.ptr.buffer.store.format.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 %index, i32 0, i32 0, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll index e5eae03bb5bde..7e92f0d2c5973 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll @@ -160,7 +160,7 @@ define amdgpu_kernel void @tbuffer_store_d16_xyz(ptr addrspace(8) %rsrc, <4 x ha ; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xyz v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen ; GFX11-PACKED-NEXT: s_endpgm main_body: - %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> + %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> call void @llvm.amdgcn.struct.ptr.tbuffer.store.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll index 17ebb1a835462..4a02973941ec5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll @@ -211,7 +211,7 @@ define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <4 x half> %da ; GFX12-PACKED-GISEL-NEXT: tbuffer_store_d16_format_xyzw v[0:1], v2, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] idxen ; GFX12-PACKED-GISEL-NEXT: s_endpgm main_body: - %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> + %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> call void @llvm.amdgcn.struct.tbuffer.store.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll b/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll index 01bab28a49858..2cee66f902b6d 100644 --- a/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll @@ -22,7 +22,7 @@ define amdgpu_vs void @test(ptr addrspace(8) inreg %arg1, ptr addrspace(3) %arg2 ; CHECK-NEXT: s_endpgm call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float undef, float undef, float undef, float undef, i1 false, i1 false) %var1 = load <6 x float>, ptr addrspace(3) %arg2, align 4 - %var2 = shufflevector <6 x float> %var1, <6 x float> undef, <4 x i32> + %var2 = shufflevector <6 x float> %var1, <6 x float> poison, <4 x i32> call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %var2, ptr addrspace(8) %arg1, i32 0, i32 0, i32 0, i32 126, i32 0) ret void } @@ -52,9 +52,9 @@ define amdgpu_vs void @test_2(ptr addrspace(8) inreg %arg1, i32 %arg2, i32 inreg ; CHECK-NEXT: tbuffer_store_format_xyzw v[2:5], v0, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:16 glc slc ; CHECK-NEXT: s_endpgm %load = load <8 x float>, ptr addrspace(3) %arg4, align 4 - %vec1 = shufflevector <8 x float> %load, <8 x float> undef, <4 x i32> + %vec1 = shufflevector <8 x float> %load, <8 x float> poison, <4 x i32> call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %vec1, ptr addrspace(8) %arg1, i32 %arg2, i32 0, i32 %arg3, i32 77, i32 3) - %vec2 = shufflevector <8 x float> %load, <8 x float> undef, <4 x i32> + %vec2 = shufflevector <8 x float> %load, <8 x float> poison, <4 x i32> call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %vec2, ptr addrspace(8) %arg1, i32 %arg2, i32 16, i32 %arg3, i32 77, i32 3) ret void } @@ -102,17 +102,17 @@ define amdgpu_vs void @test_3(i32 inreg %arg1, i32 inreg %arg2, ptr addrspace(8) ; CHECK-NEXT: tbuffer_store_format_xy v[0:1], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:256 glc slc ; CHECK-NEXT: s_endpgm %load1 = load <6 x float>, ptr addrspace(3) %arg5, align 4 - %vec11 = shufflevector <6 x float> %load1, <6 x float> undef, <4 x i32> + %vec11 = shufflevector <6 x float> %load1, <6 x float> poison, <4 x i32> call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %vec11, ptr addrspace(8) %arg3, i32 %arg1, i32 264, i32 %arg2, i32 77, i32 3) - %vec12 = shufflevector <6 x float> %load1, <6 x float> undef, <2 x i32> + %vec12 = shufflevector <6 x float> %load1, <6 x float> poison, <2 x i32> call void @llvm.amdgcn.struct.ptr.tbuffer.store.v2f32(<2 x float> %vec12, ptr addrspace(8) %arg3, i32 %arg1, i32 280, i32 %arg2, i32 64, i32 3) call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float undef, float undef, float undef, float undef, i1 false, i1 false) %load2 = load <6 x float>, ptr addrspace(3) %arg6, align 4 - %vec21 = shufflevector <6 x float> %load2, <6 x float> undef, <4 x i32> + %vec21 = shufflevector <6 x float> %load2, <6 x float> poison, <4 x i32> call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %vec21, ptr addrspace(8) %arg3, i32 %arg1, i32 240, i32 %arg2, i32 77, i32 3) - %vec22 = shufflevector <6 x float> %load2, <6 x float> undef, <2 x i32> + %vec22 = shufflevector <6 x float> %load2, <6 x float> poison, <2 x i32> call void @llvm.amdgcn.struct.ptr.tbuffer.store.v2f32(<2 x float> %vec22, ptr addrspace(8) %arg3, i32 %arg1, i32 256, i32 %arg2, i32 64, i32 3) ret void diff --git a/llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll b/llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll index 9c8aad177db65..dec86d41769f6 100644 --- a/llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll +++ b/llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll @@ -36,7 +36,7 @@ bb1: ; preds = %bb3, %bb %i3 = fmul float %i2, 1.000000e+00 %i4 = fmul nsz <3 x float> %arg, %i5 = insertelement <3 x float> poison, float %i3, i32 0 - %i6 = shufflevector <3 x float> %i5, <3 x float> undef, <3 x i32> zeroinitializer + %i6 = shufflevector <3 x float> %i5, <3 x float> poison, <3 x i32> zeroinitializer %i7 = fmul <3 x float> %i4, %i6 %i8 = fcmp oeq float %i3, 0.000000e+00 br i1 %i8, label %bb3, label %bb2 diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix.ll b/llvm/test/CodeGen/AMDGPU/mad-mix.ll index 4c2a16c17b38a..a4568b58661db 100644 --- a/llvm/test/CodeGen/AMDGPU/mad-mix.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-mix.ll @@ -456,9 +456,9 @@ define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1, ; GISEL-CI-NEXT: v_mad_f32 v0, v4, v0, v1 ; GISEL-CI-NEXT: v_mac_f32_e32 v1, v5, v2 ; GISEL-CI-NEXT: s_setpc_b64 s[30:31] - %src0.shuf = shufflevector <2 x half> %src0, <2 x half> undef, <2 x i32> - %src1.shuf = shufflevector <2 x half> %src1, <2 x half> undef, <2 x i32> - %src2.shuf = shufflevector <2 x half> %src2, <2 x half> undef, <2 x i32> + %src0.shuf = shufflevector <2 x half> %src0, <2 x half> poison, <2 x i32> + %src1.shuf = shufflevector <2 x half> %src1, <2 x half> poison, <2 x i32> + %src2.shuf = shufflevector <2 x half> %src2, <2 x half> poison, <2 x i32> %src0.ext = fpext <2 x half> %src0.shuf to <2 x float> %src1.ext = fpext <2 x half> %src1.shuf to <2 x float> %src2.ext = fpext <2 x half> %src2.shuf to <2 x float> diff --git a/llvm/test/CodeGen/AMDGPU/mul_int24.ll b/llvm/test/CodeGen/AMDGPU/mul_int24.ll index ad3f36dc3f216..11cf129b1e479 100644 --- a/llvm/test/CodeGen/AMDGPU/mul_int24.ll +++ b/llvm/test/CodeGen/AMDGPU/mul_int24.ll @@ -795,8 +795,8 @@ bb: br i1 %cmp, label %bb11, label %bb7 bb11: - %tmp14 = shufflevector <2 x i32> %arg1, <2 x i32> undef, <2 x i32> zeroinitializer - %tmp16 = shufflevector <2 x i32> %arg2, <2 x i32> undef, <2 x i32> zeroinitializer + %tmp14 = shufflevector <2 x i32> %arg1, <2 x i32> poison, <2 x i32> zeroinitializer + %tmp16 = shufflevector <2 x i32> %arg2, <2 x i32> poison, <2 x i32> zeroinitializer %tmp17 = shl <2 x i32> %tmp14, %tmp18 = ashr <2 x i32> %tmp17, %tmp19 = shl <2 x i32> %tmp16, diff --git a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll index a74db456ab6be..866abc10b5777 100644 --- a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll +++ b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll @@ -486,7 +486,7 @@ bb: %neg.scalar0 = fsub float -0.0, %scalar0 %neg.scalar0.vec = insertelement <2 x float> poison, float %neg.scalar0, i32 0 - %neg.scalar0.broadcast = shufflevector <2 x float> %neg.scalar0.vec, <2 x float> undef, <2 x i32> zeroinitializer + %neg.scalar0.broadcast = shufflevector <2 x float> %neg.scalar0.vec, <2 x float> poison, <2 x i32> zeroinitializer %result = fadd <2 x float> %vec0, %neg.scalar0.broadcast store <2 x float> %result, ptr addrspace(1) %out, align 4 @@ -526,7 +526,7 @@ bb: %vec0 = load volatile <2 x float>, ptr addrspace(3) %lds, align 8 %lds.gep1 = getelementptr inbounds <2 x float>, ptr addrspace(3) %lds, i32 1 %vec1 = load volatile <2 x float>, ptr addrspace(3) %lds.gep1, align 8 - %vec1.swap = shufflevector <2 x float> %vec1, <2 x float> undef, <2 x i32> + %vec1.swap = shufflevector <2 x float> %vec1, <2 x float> poison, <2 x i32> %result = fadd <2 x float> %vec0, %vec1.swap store <2 x float> %result, ptr addrspace(1) %out, align 8 ret void @@ -543,7 +543,7 @@ bb: %f32 = load volatile float, ptr addrspace(3) undef, align 8 %vec1 = load volatile <2 x float>, ptr addrspace(3) %lds.gep1, align 8 %vec1.neg = fsub <2 x float> , %vec1 - %vec1.neg.swap = shufflevector <2 x float> %vec1.neg, <2 x float> undef, <2 x i32> + %vec1.neg.swap = shufflevector <2 x float> %vec1.neg, <2 x float> poison, <2 x i32> %result = fadd <2 x float> %vec0, %vec1.neg.swap store <2 x float> %result, ptr addrspace(1) %out, align 8 ret void @@ -598,7 +598,7 @@ bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i32 %tid %in.1 = load <4 x float>, ptr addrspace(1) %gep - %shuf = shufflevector <4 x float> %in.1, <4 x float> undef, <4 x i32> zeroinitializer + %shuf = shufflevector <4 x float> %in.1, <4 x float> poison, <4 x i32> zeroinitializer %add.1 = fadd <4 x float> %in.1, %shuf store <4 x float> %add.1, ptr addrspace(1) %gep ret void diff --git a/llvm/test/CodeGen/AMDGPU/packed-op-sel.ll b/llvm/test/CodeGen/AMDGPU/packed-op-sel.ll index 5514cd3e0b367..081756a0b7816 100644 --- a/llvm/test/CodeGen/AMDGPU/packed-op-sel.ll +++ b/llvm/test/CodeGen/AMDGPU/packed-op-sel.ll @@ -24,7 +24,7 @@ bb: %scalar0 = load volatile half, ptr addrspace(3) %arg2, align 2 %scalar0.vec = insertelement <2 x half> poison, half %scalar0, i32 0 - %scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer + %scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> poison, <2 x i32> zeroinitializer %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %scalar0.broadcast) store <2 x half> %result, ptr addrspace(1) %out, align 4 @@ -55,7 +55,7 @@ bb: %scalar0 = load volatile half, ptr addrspace(3) %arg2, align 2 %scalar0.vec = insertelement <2 x half> poison, half %scalar0, i32 0 - %scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer + %scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> poison, <2 x i32> zeroinitializer %neg.scalar0.broadcast = fsub <2 x half> , %scalar0.broadcast %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.scalar0.broadcast) @@ -88,7 +88,7 @@ bb: %neg.scalar0 = fsub half -0.0, %scalar0 %neg.scalar0.vec = insertelement <2 x half> poison, half %neg.scalar0, i32 0 - %neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer + %neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> poison, <2 x i32> zeroinitializer %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.scalar0.broadcast) store <2 x half> %result, ptr addrspace(1) %out, align 4 @@ -120,7 +120,7 @@ bb: %neg.scalar0 = fsub half -0.0, %scalar0 %neg.scalar0.vec = insertelement <2 x half> poison, half %neg.scalar0, i32 0 - %neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer + %neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> poison, <2 x i32> zeroinitializer %neg.neg.scalar0.broadcast = fsub <2 x half> , %neg.scalar0.broadcast %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.neg.scalar0.broadcast) @@ -212,7 +212,7 @@ bb: %neg.scalar0.bc = bitcast half %neg.scalar0 to i16 %neg.scalar0.vec = insertelement <2 x i16> poison, i16 %neg.scalar0.bc, i32 0 - %neg.scalar0.broadcast = shufflevector <2 x i16> %neg.scalar0.vec, <2 x i16> undef, <2 x i32> zeroinitializer + %neg.scalar0.broadcast = shufflevector <2 x i16> %neg.scalar0.vec, <2 x i16> poison, <2 x i32> zeroinitializer %result = add <2 x i16> %vec0, %neg.scalar0.broadcast store <2 x i16> %result, ptr addrspace(1) %out, align 4 @@ -318,7 +318,7 @@ bb: %vec2 = load volatile <2 x half>, ptr addrspace(3) %lds.gep2, align 4 %vec2.fneg = fsub <2 x half> , %vec2 - %vec2.fneg.elt1.broadcast = shufflevector <2 x half> %vec2.fneg, <2 x half> undef, <2 x i32> + %vec2.fneg.elt1.broadcast = shufflevector <2 x half> %vec2.fneg, <2 x half> poison, <2 x i32> %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %vec2.fneg.elt1.broadcast) store <2 x half> %result, ptr addrspace(1) %out, align 4 @@ -377,7 +377,7 @@ bb: %vec0 = load volatile <2 x i16>, ptr addrspace(3) %lds, align 4 %vec1 = load volatile <2 x i16>, ptr addrspace(3) %lds.gep1, align 4 - %vec1.elt1.broadcast = shufflevector <2 x i16> %vec1, <2 x i16> undef, <2 x i32> + %vec1.elt1.broadcast = shufflevector <2 x i16> %vec1, <2 x i16> poison, <2 x i32> %result = add <2 x i16> %vec0, %vec1.elt1.broadcast store <2 x i16> %result, ptr addrspace(1) %out, align 4 @@ -407,7 +407,7 @@ bb: %vec1 = load volatile <2 x half>, ptr addrspace(3) %lds.gep1, align 4 %vec2 = load volatile <2 x half>, ptr addrspace(3) %lds.gep2, align 4 - %vec2.elt1.broadcast = shufflevector <2 x half> %vec2, <2 x half> undef, <2 x i32> + %vec2.elt1.broadcast = shufflevector <2 x half> %vec2, <2 x half> poison, <2 x i32> %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %vec2.elt1.broadcast) @@ -471,7 +471,7 @@ bb: %vec1 = load volatile <2 x half>, ptr addrspace(3) %lds.gep1, align 4 %vec2 = load volatile <2 x half>, ptr addrspace(3) %lds.gep2, align 4 - %vec2.swap = shufflevector <2 x half> %vec2, <2 x half> undef, <2 x i32> + %vec2.swap = shufflevector <2 x half> %vec2, <2 x half> poison, <2 x i32> %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %vec2.swap) store <2 x half> %result, ptr addrspace(1) %out, align 4 @@ -502,7 +502,7 @@ bb: %vec2 = load volatile <2 x half>, ptr addrspace(3) %lds.gep2, align 4 %neg.vec2 = fsub <2 x half> , %vec2 - %neg.vec2.swap = shufflevector <2 x half> %neg.vec2, <2 x half> undef, <2 x i32> + %neg.vec2.swap = shufflevector <2 x half> %neg.vec2, <2 x half> poison, <2 x i32> %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.vec2.swap) store <2 x half> %result, ptr addrspace(1) %out, align 4 @@ -678,7 +678,7 @@ bb: %f32 = load volatile float, ptr addrspace(3) undef, align 4 %neg.f32 = fsub float -0.0, %f32 %bc = bitcast float %neg.f32 to <2 x half> - %shuf = shufflevector <2 x half> %bc, <2 x half> undef, <2 x i32> + %shuf = shufflevector <2 x half> %bc, <2 x half> poison, <2 x i32> %result = fadd <2 x half> %vec0, %shuf store <2 x half> %result, ptr addrspace(1) %out, align 4 ret void diff --git a/llvm/test/CodeGen/AMDGPU/permute_i8.ll b/llvm/test/CodeGen/AMDGPU/permute_i8.ll index 312dfa3717c77..5e867deb56ebe 100644 --- a/llvm/test/CodeGen/AMDGPU/permute_i8.ll +++ b/llvm/test/CodeGen/AMDGPU/permute_i8.ll @@ -342,7 +342,7 @@ define hidden void @shuffle7330ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4 - %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> + %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4 ret void } @@ -367,7 +367,7 @@ define hidden void @shuffle5341ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4 - %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> + %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4 ret void } @@ -393,7 +393,7 @@ define hidden void @shuffle6106ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4 - %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> + %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4 ret void } @@ -420,7 +420,7 @@ define hidden void @shuffle4327ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4 - %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> + %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4 ret void } @@ -446,7 +446,7 @@ define hidden void @shuffle3263ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4 - %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> + %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4 ret void } @@ -472,7 +472,7 @@ define hidden void @shuffle2763ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4 - %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> + %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4 ret void } @@ -498,7 +498,7 @@ define hidden void @shuffle1327ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4 - %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> + %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4 ret void } @@ -524,7 +524,7 @@ define hidden void @shuffle0605ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4 - %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> + %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4 ret void } @@ -554,7 +554,7 @@ define hidden void @insertUsesOr(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i ; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4 %vec1 = load <4 x i8>, ptr addrspace(1) %in1, align 4 - %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> + %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> %vecins = insertelement <4 x i8> %shuffle0_0, i8 %elt, i32 1 store <4 x i8> %vecins, ptr addrspace(1) %out0 ret void @@ -598,7 +598,7 @@ define hidden void @addUsesOr(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i8 % ; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4 %vec1 = load <4 x i8>, ptr addrspace(1) %in1, align 4 - %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> + %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> %added = add <4 x i8> %shuffle0_0, %vec1 store <4 x i8> %added, ptr addrspace(1) %out0 ret void @@ -783,7 +783,7 @@ define hidden void @add_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i8 %el %gep1 = getelementptr <4 x i8>, ptr addrspace(1) %in1, i32 %tid %vec0 = load <4 x i8>, ptr addrspace(1) %gep0, align 4 %vec1 = load <4 x i8>, ptr addrspace(1) %gep1, align 4 - %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> + %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> %vecins = add <4 x i8> %shuffle0_0, %vec1 store <4 x i8> %vecins, ptr addrspace(1) %out0 ret void @@ -835,7 +835,7 @@ define hidden void @add_store(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i8 % ; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4 %vec1 = load <4 x i8>, ptr addrspace(1) %in1, align 4 - %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> + %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> %vecins = add <4 x i8> %shuffle0_0, %vec1 store <4 x i8> %vecins, ptr addrspace(1) %out0 store <4 x i8> %shuffle0_0, ptr addrspace(1) %out1 @@ -903,7 +903,7 @@ define hidden void @add_store_div_16(ptr addrspace(1) %in0, ptr addrspace(1) %in %gep1 = getelementptr <4 x i8>, ptr addrspace(1) %in1, i32 %tid %vec0 = load <4 x i8>, ptr addrspace(1) %gep0, align 4 %vec1 = load <4 x i8>, ptr addrspace(1) %gep1, align 4 - %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> + %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> %vecins = add <4 x i8> %shuffle0_0, %vec1 store <4 x i8> %vecins, ptr addrspace(1) %out0 store <4 x i8> %shuffle0_0, ptr addrspace(1) %out1 diff --git a/llvm/test/CodeGen/AMDGPU/reduction.ll b/llvm/test/CodeGen/AMDGPU/reduction.ll index 53a036b617725..cd4b3c1d1b546 100644 --- a/llvm/test/CodeGen/AMDGPU/reduction.ll +++ b/llvm/test/CodeGen/AMDGPU/reduction.ll @@ -10,9 +10,9 @@ ; VI-NEXT: v_add_f16_e32 define half @reduction_fadd_v4f16(<4 x half> %vec4) { entry: - %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> + %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> %bin.rdx = fadd <4 x half> %vec4, %rdx.shuf - %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> + %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> poison, <4 x i32> %bin.rdx2 = fadd <4 x half> %bin.rdx, %rdx.shuf1 %res = extractelement <4 x half> %bin.rdx2, i32 0 ret half %res @@ -30,9 +30,9 @@ entry: ; VI-NEXT: s_setpc_b64 define half @reduction_fsub_v4f16(<4 x half> %vec4) { entry: - %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> + %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> %bin.rdx = fsub <4 x half> %vec4, %rdx.shuf - %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> + %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> poison, <4 x i32> %bin.rdx2 = fsub <4 x half> %bin.rdx, %rdx.shuf1 %res = extractelement <4 x half> %bin.rdx2, i32 0 ret half %res @@ -52,9 +52,9 @@ entry: ; VI-NEXT: s_setpc_b64 define half @reduction_fsub_v4f16_preserve_fmf(<4 x half> %vec4) { entry: - %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> + %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> %bin.rdx = fsub nsz <4 x half> %vec4, %rdx.shuf - %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> + %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> poison, <4 x i32> %bin.rdx2 = fsub nsz <4 x half> %bin.rdx, %rdx.shuf1 %res = extractelement <4 x half> %bin.rdx2, i32 0 %neg.res = fsub half -0.0, %res @@ -70,9 +70,9 @@ entry: ; VI-NEXT: v_mul_f16_e32 define half @reduction_fmul_half4(<4 x half> %vec4) { entry: - %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> + %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> %bin.rdx = fmul <4 x half> %vec4, %rdx.shuf - %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> + %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> poison, <4 x i32> %bin.rdx2 = fmul <4 x half> %bin.rdx, %rdx.shuf1 %res = extractelement <4 x half> %bin.rdx2, i32 0 ret half %res @@ -87,9 +87,9 @@ entry: ; VI-NEXT: v_add_u16_e32 define i16 @reduction_v4i16(<4 x i16> %vec4) { entry: - %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> + %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> poison, <4 x i32> %bin.rdx = add <4 x i16> %vec4, %rdx.shuf - %rdx.shuf1 = shufflevector <4 x i16> %bin.rdx, <4 x i16> undef, <4 x i32> + %rdx.shuf1 = shufflevector <4 x i16> %bin.rdx, <4 x i16> poison, <4 x i32> %bin.rdx2 = add <4 x i16> %bin.rdx, %rdx.shuf1 %res = extractelement <4 x i16> %bin.rdx2, i32 0 ret i16 %res @@ -111,11 +111,11 @@ entry: define half @reduction_half8(<8 x half> %vec8) { entry: - %rdx.shuf = shufflevector <8 x half> %vec8, <8 x half> undef, <8 x i32> + %rdx.shuf = shufflevector <8 x half> %vec8, <8 x half> poison, <8 x i32> %bin.rdx = fadd <8 x half> %vec8, %rdx.shuf - %rdx.shuf1 = shufflevector <8 x half> %bin.rdx, <8 x half> undef, <8 x i32> + %rdx.shuf1 = shufflevector <8 x half> %bin.rdx, <8 x half> poison, <8 x i32> %bin.rdx2 = fadd <8 x half> %bin.rdx, %rdx.shuf1 - %rdx.shuf3 = shufflevector <8 x half> %bin.rdx2, <8 x half> undef, <8 x i32> + %rdx.shuf3 = shufflevector <8 x half> %bin.rdx2, <8 x half> poison, <8 x i32> %bin.rdx4 = fadd <8 x half> %bin.rdx2, %rdx.shuf3 %res = extractelement <8 x half> %bin.rdx4, i32 0 ret half %res @@ -137,11 +137,11 @@ entry: define i16 @reduction_v8i16(<8 x i16> %vec8) { entry: - %rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> undef, <8 x i32> + %rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> poison, <8 x i32> %bin.rdx = add <8 x i16> %vec8, %rdx.shuf - %rdx.shuf1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> + %rdx.shuf1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> poison, <8 x i32> %bin.rdx2 = add <8 x i16> %bin.rdx, %rdx.shuf1 - %rdx.shuf3 = shufflevector <8 x i16> %bin.rdx2, <8 x i16> undef, <8 x i32> + %rdx.shuf3 = shufflevector <8 x i16> %bin.rdx2, <8 x i16> poison, <8 x i32> %bin.rdx4 = add <8 x i16> %bin.rdx2, %rdx.shuf3 %res = extractelement <8 x i16> %bin.rdx4, i32 0 ret i16 %res @@ -175,13 +175,13 @@ entry: define half @reduction_half16(<16 x half> %vec16) { entry: - %rdx.shuf = shufflevector <16 x half> %vec16, <16 x half> undef, <16 x i32> + %rdx.shuf = shufflevector <16 x half> %vec16, <16 x half> poison, <16 x i32> %bin.rdx = fadd <16 x half> %vec16, %rdx.shuf - %rdx.shuf1 = shufflevector <16 x half> %bin.rdx, <16 x half> undef, <16 x i32> + %rdx.shuf1 = shufflevector <16 x half> %bin.rdx, <16 x half> poison, <16 x i32> %bin.rdx2 = fadd <16 x half> %bin.rdx, %rdx.shuf1 - %rdx.shuf3 = shufflevector <16 x half> %bin.rdx2, <16 x half> undef, <16 x i32> + %rdx.shuf3 = shufflevector <16 x half> %bin.rdx2, <16 x half> poison, <16 x i32> %bin.rdx4 = fadd <16 x half> %bin.rdx2, %rdx.shuf3 - %rdx.shuf5 = shufflevector <16 x half> %bin.rdx4, <16 x half> undef, <16 x i32> + %rdx.shuf5 = shufflevector <16 x half> %bin.rdx4, <16 x half> poison, <16 x i32> %bin.rdx6 = fadd <16 x half> %bin.rdx4, %rdx.shuf5 %res = extractelement <16 x half> %bin.rdx6, i32 0 ret half %res @@ -196,10 +196,10 @@ entry: ; VI-NEXT: v_min_u16_e32 define i16 @reduction_min_v4i16(<4 x i16> %vec4) { entry: - %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> + %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> poison, <4 x i32> %rdx.minmax.cmp = icmp ult <4 x i16> %vec4, %rdx.shuf %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf - %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> + %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> poison, <4 x i32> %rdx.minmax.cmp2 = icmp ult <4 x i16> %rdx.minmax.select, %rdx.shuf1 %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1 %res = extractelement <4 x i16> %rdx.minmax.select3, i32 0 @@ -221,13 +221,13 @@ entry: ; VI-NEXT: v_min_u16_e32 define i16 @reduction_umin_v8i16(<8 x i16> %vec8) { entry: - %rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> undef, <8 x i32> + %rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> poison, <8 x i32> %rdx.minmax.cmp = icmp ult <8 x i16> %vec8, %rdx.shuf %rdx.minmax.select = select <8 x i1> %rdx.minmax.cmp, <8 x i16> %vec8, <8 x i16> %rdx.shuf - %rdx.shuf1 = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> + %rdx.shuf1 = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> poison, <8 x i32> %rdx.minmax.cmp2 = icmp ult <8 x i16> %rdx.minmax.select, %rdx.shuf1 %rdx.minmax.select3 = select <8 x i1> %rdx.minmax.cmp2, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf1 - %rdx.shuf4 = shufflevector <8 x i16> %rdx.minmax.select3, <8 x i16> undef, <8 x i32> + %rdx.shuf4 = shufflevector <8 x i16> %rdx.minmax.select3, <8 x i16> poison, <8 x i32> %rdx.minmax.cmp5 = icmp ult <8 x i16> %rdx.minmax.select3, %rdx.shuf4 %rdx.minmax.select6 = select <8 x i1> %rdx.minmax.cmp5, <8 x i16> %rdx.minmax.select3, <8 x i16> %rdx.shuf4 %res = extractelement <8 x i16> %rdx.minmax.select6, i32 0 @@ -301,16 +301,16 @@ entry: ; VI-NEXT: v_min_i16_e32 define i16 @reduction_smin_v16i16(<16 x i16> %vec16) { entry: - %rdx.shuf = shufflevector <16 x i16> %vec16, <16 x i16> undef, <16 x i32> + %rdx.shuf = shufflevector <16 x i16> %vec16, <16 x i16> poison, <16 x i32> %rdx.minmax.cmp = icmp slt <16 x i16> %vec16, %rdx.shuf %rdx.minmax.select = select <16 x i1> %rdx.minmax.cmp, <16 x i16> %vec16, <16 x i16> %rdx.shuf - %rdx.shuf1 = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> + %rdx.shuf1 = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> poison, <16 x i32> %rdx.minmax.cmp2 = icmp slt <16 x i16> %rdx.minmax.select, %rdx.shuf1 %rdx.minmax.select3 = select <16 x i1> %rdx.minmax.cmp2, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf1 - %rdx.shuf4 = shufflevector <16 x i16> %rdx.minmax.select3, <16 x i16> undef, <16 x i32> + %rdx.shuf4 = shufflevector <16 x i16> %rdx.minmax.select3, <16 x i16> poison, <16 x i32> %rdx.minmax.cmp5 = icmp slt <16 x i16> %rdx.minmax.select3, %rdx.shuf4 %rdx.minmax.select6 = select <16 x i1> %rdx.minmax.cmp5, <16 x i16> %rdx.minmax.select3, <16 x i16> %rdx.shuf4 - %rdx.shuf7 = shufflevector <16 x i16> %rdx.minmax.select6, <16 x i16> undef, <16 x i32> + %rdx.shuf7 = shufflevector <16 x i16> %rdx.minmax.select6, <16 x i16> poison, <16 x i32> %rdx.minmax.cmp8 = icmp slt <16 x i16> %rdx.minmax.select6, %rdx.shuf7 %rdx.minmax.select9 = select <16 x i1> %rdx.minmax.cmp8, <16 x i16> %rdx.minmax.select6, <16 x i16> %rdx.shuf7 %res = extractelement <16 x i16> %rdx.minmax.select9, i32 0 @@ -404,10 +404,10 @@ entry: ; VI-NEXT: v_max_u16_e32 define i16 @reduction_umax_v4i16(<4 x i16> %vec4) { entry: - %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> + %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> poison, <4 x i32> %rdx.minmax.cmp = icmp ugt <4 x i16> %vec4, %rdx.shuf %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf - %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> + %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> poison, <4 x i32> %rdx.minmax.cmp2 = icmp ugt <4 x i16> %rdx.minmax.select, %rdx.shuf1 %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1 %res = extractelement <4 x i16> %rdx.minmax.select3, i32 0 @@ -423,10 +423,10 @@ entry: ; VI-NEXT: v_max_i16_e32 define i16 @reduction_smax_v4i16(<4 x i16> %vec4) #0 { entry: - %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> + %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> poison, <4 x i32> %rdx.minmax.cmp = icmp sgt <4 x i16> %vec4, %rdx.shuf %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf - %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> + %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> poison, <4 x i32> %rdx.minmax.cmp2 = icmp sgt <4 x i16> %rdx.minmax.select, %rdx.shuf1 %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1 %res = extractelement <4 x i16> %rdx.minmax.select3, i32 0 @@ -451,9 +451,9 @@ entry: ; VI: v_max_f16_e32 v0, [[MAX1]], [[MAX0]] define half @reduction_maxnum_v4f16(<4 x half> %vec4) { entry: - %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> + %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> %rdx.minmax = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %vec4, <4 x half> %rdx.shuf) - %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax, <4 x half> undef, <4 x i32> + %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax, <4 x half> poison, <4 x i32> %rdx.minmax3 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %rdx.minmax, <4 x half> %rdx.shuf1) %res = extractelement <4 x half> %rdx.minmax3, i32 0 ret half %res @@ -476,9 +476,9 @@ entry: ; VI: v_min_f16_e32 v0, [[MAX1]], [[MAX0]] define half @reduction_minnum_v4f16(<4 x half> %vec4) { entry: - %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> + %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> %rdx.minmax = call <4 x half> @llvm.minnum.v4f16(<4 x half> %vec4, <4 x half> %rdx.shuf) - %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax, <4 x half> undef, <4 x i32> + %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax, <4 x half> poison, <4 x i32> %rdx.minmax3 = call <4 x half> @llvm.minnum.v4f16(<4 x half> %rdx.minmax, <4 x half> %rdx.shuf1) %res = extractelement <4 x half> %rdx.minmax3, i32 0 ret half %res @@ -513,10 +513,10 @@ entry: ; VI: v_max_f16_e32 v0, [[MAX1]], [[MAX0]] define half @reduction_fast_max_pattern_v4f16(<4 x half> %vec4) { entry: - %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> + %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> %rdx.minmax.cmp = fcmp nnan nsz ogt <4 x half> %vec4, %rdx.shuf %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf - %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> undef, <4 x i32> + %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> poison, <4 x i32> %rdx.minmax.cmp2 = fcmp nnan nsz ogt <4 x half> %rdx.minmax.select, %rdx.shuf1 %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1 %res = extractelement <4 x half> %rdx.minmax.select3, i32 0 @@ -552,10 +552,10 @@ entry: ; VI: v_min_f16_e32 v0, [[MAX1]], [[MAX0]] define half @reduction_fast_min_pattern_v4f16(<4 x half> %vec4) { entry: - %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> + %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> %rdx.minmax.cmp = fcmp nnan nsz olt <4 x half> %vec4, %rdx.shuf %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf - %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> undef, <4 x i32> + %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> poison, <4 x i32> %rdx.minmax.cmp2 = fcmp nnan nsz olt <4 x half> %rdx.minmax.select, %rdx.shuf1 %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1 %res = extractelement <4 x half> %rdx.minmax.select3, i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll index b744d3357b8ab..90b1eb5500f4c 100644 --- a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll +++ b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll @@ -67,7 +67,7 @@ define amdgpu_kernel void @scalar_to_vector_v2i32(ptr addrspace(1) %out, ptr add ; GFX9-NEXT: s_endpgm %tmp1 = load i32, ptr addrspace(1) %in, align 4 %bc = bitcast i32 %tmp1 to <2 x i16> - %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> + %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> poison, <4 x i32> store <4 x i16> %tmp2, ptr addrspace(1) %out, align 8 ret void } @@ -135,7 +135,7 @@ define amdgpu_kernel void @scalar_to_vector_v2f32(ptr addrspace(1) %out, ptr add ; GFX9-NEXT: s_endpgm %tmp1 = load float, ptr addrspace(1) %in, align 4 %bc = bitcast float %tmp1 to <2 x i16> - %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> + %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> poison, <4 x i32> store <4 x i16> %tmp2, ptr addrspace(1) %out, align 8 ret void } @@ -193,7 +193,7 @@ define amdgpu_kernel void @scalar_to_vector_v4i16() { bb: %tmp = load <2 x i8>, ptr addrspace(1) undef, align 1 %tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> - %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> poison, <8 x i32> store <8 x i8> %tmp2, ptr addrspace(1) undef, align 8 ret void } @@ -262,7 +262,7 @@ bb: %load = load half, ptr addrspace(1) undef, align 1 %tmp = bitcast half %load to <2 x i8> %tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> - %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> + %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> poison, <8 x i32> store <8 x i8> %tmp2, ptr addrspace(1) undef, align 8 ret void } @@ -275,7 +275,7 @@ bb: ; %tmp1 = load i32, ptr addrspace(1) %in, align 4 ; %bc = bitcast i32 %tmp1 to <4 x i8> -; %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> undef, <8 x i32> +; %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> poison, <8 x i32> ; store <8 x i8> %tmp2, ptr addrspace(1) %out, align 4 ; ret void ; } diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll index 4993df7e1ba48..6a88be6e55859 100644 --- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll +++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -56,13 +56,13 @@ declare <2 x float> @_Z3cosDv2_f(<2 x float>) define amdgpu_kernel void @test_sincos_v3(ptr addrspace(1) nocapture %a) { entry: %loadVec4 = load <4 x float>, ptr addrspace(1) %a, align 16 - %extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> + %extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> poison, <3 x i32> %call = call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4) - %extractVec6 = shufflevector <3 x float> %call, <3 x float> undef, <4 x i32> + %extractVec6 = shufflevector <3 x float> %call, <3 x float> poison, <4 x i32> store <4 x float> %extractVec6, ptr addrspace(1) %a, align 16 %call11 = call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4) %arrayidx12 = getelementptr inbounds <3 x float>, ptr addrspace(1) %a, i64 1 - %extractVec13 = shufflevector <3 x float> %call11, <3 x float> undef, <4 x i32> + %extractVec13 = shufflevector <3 x float> %call11, <3 x float> poison, <4 x i32> store <4 x float> %extractVec13, ptr addrspace(1) %arrayidx12, align 16 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll index 6d91c33fd2876..109c7d638f924 100644 --- a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll +++ b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll @@ -52,7 +52,7 @@ define hidden <4 x float> @split_v4f32_multi_arg(<4 x float> %arg0, <2 x float> ; GCN: .cfi_endproc call void @llvm.dbg.value(metadata <4 x float> %arg0, metadata !29, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !31 call void @llvm.dbg.value(metadata <2 x float> %arg1, metadata !30, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !31 - %tmp = shufflevector <2 x float> %arg1, <2 x float> undef, <4 x i32> , !dbg !32 + %tmp = shufflevector <2 x float> %arg1, <2 x float> poison, <4 x i32> , !dbg !32 %add = fadd <4 x float> %tmp, %arg0, !dbg !33 ret <4 x float> %add, !dbg !34 } diff --git a/llvm/test/CodeGen/AMDGPU/trunc-store-vec-i16-to-i8.ll b/llvm/test/CodeGen/AMDGPU/trunc-store-vec-i16-to-i8.ll index 7cb30fc8247e9..18bcd6cf880a4 100644 --- a/llvm/test/CodeGen/AMDGPU/trunc-store-vec-i16-to-i8.ll +++ b/llvm/test/CodeGen/AMDGPU/trunc-store-vec-i16-to-i8.ll @@ -15,8 +15,8 @@ entry: define protected amdgpu_kernel void @short2_char4(ptr addrspace(1) %out) { entry: %tmp = load <2 x i16>, ptr addrspace(1) undef, align 4 - %vecinit = shufflevector <2 x i16> %tmp, <2 x i16> undef, <4 x i32> - %vecinit2 = shufflevector <4 x i16> %vecinit, <4 x i16> , <4 x i32> + %vecinit = shufflevector <2 x i16> %tmp, <2 x i16> poison, <4 x i32> + %vecinit2 = shufflevector <4 x i16> %vecinit, <4 x i16> , <4 x i32> %tmp1 = trunc <4 x i16> %vecinit2 to <4 x i8> store <4 x i8> %tmp1, ptr addrspace(1) %out, align 4 ret void @@ -27,8 +27,8 @@ entry: define protected amdgpu_kernel void @short4_char8(ptr addrspace(1) %out) { entry: %tmp = load <4 x i16>, ptr addrspace(1) undef, align 8 - %vecinit = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> - %vecinit2 = shufflevector <8 x i16> %vecinit, <8 x i16> , <8 x i32> + %vecinit = shufflevector <4 x i16> %tmp, <4 x i16> poison, <8 x i32> + %vecinit2 = shufflevector <8 x i16> %vecinit, <8 x i16> , <8 x i32> %tmp1 = trunc <8 x i16> %vecinit2 to <8 x i8> store <8 x i8> %tmp1, ptr addrspace(1) %out, align 8 ret void @@ -39,8 +39,8 @@ entry: define protected amdgpu_kernel void @short8_char16(ptr addrspace(1) %out) { entry: %tmp = load <8 x i16>, ptr addrspace(1) undef, align 16 - %vecinit = shufflevector <8 x i16> %tmp, <8 x i16> undef, <16 x i32> - %vecinit2 = shufflevector <16 x i16> %vecinit, <16 x i16> , <16 x i32> + %vecinit = shufflevector <8 x i16> %tmp, <8 x i16> poison, <16 x i32> + %vecinit2 = shufflevector <16 x i16> %vecinit, <16 x i16> , <16 x i32> %tmp1 = trunc <16 x i16> %vecinit2 to <16 x i8> store <16 x i8> %tmp1, ptr addrspace(1) %out, align 16 ret void @@ -52,8 +52,8 @@ entry: define protected amdgpu_kernel void @short16_char32(ptr addrspace(1) %out) { entry: %tmp = load <16 x i16>, ptr addrspace(1) undef, align 32 - %vecinit = shufflevector <16 x i16> %tmp, <16 x i16> undef, <32 x i32> - %vecinit2 = shufflevector <32 x i16> %vecinit, <32 x i16> , <32 x i32> + %vecinit = shufflevector <16 x i16> %tmp, <16 x i16> poison, <32 x i32> + %vecinit2 = shufflevector <32 x i16> %vecinit, <32 x i16> , <32 x i32> %tmp1 = trunc <32 x i16> %vecinit2 to <32 x i8> store <32 x i8> %tmp1, ptr addrspace(1) %out, align 32 ret void diff --git a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll index 0ab09354ec06b..bc8d0d1f8cd8f 100644 --- a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll +++ b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll @@ -27,7 +27,7 @@ define <4 x half> @shuffle_v4f16_23uu(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x half>, ptr addrspace(1) %arg0 %val1 = load <4 x half>, ptr addrspace(1) %arg1 - %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> + %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> ret <4 x half> %shuffle } @@ -74,7 +74,7 @@ define <4 x half> @shuffle_v4f16_234u(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x half>, ptr addrspace(1) %arg0 %val1 = load <4 x half>, ptr addrspace(1) %arg1 - %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> + %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> ret <4 x half> %shuffle } @@ -101,7 +101,7 @@ define <4 x half> @shuffle_v4f16_u1u3(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x half>, ptr addrspace(1) %arg0 %val1 = load <4 x half>, ptr addrspace(1) %arg1 - %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> + %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> ret <4 x half> %shuffle } @@ -140,7 +140,7 @@ define <4 x half> @shuffle_v4f16_u3u1(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x half>, ptr addrspace(1) %arg0 %val1 = load <4 x half>, ptr addrspace(1) %arg1 - %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> + %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> ret <4 x half> %shuffle } @@ -167,7 +167,7 @@ define <4 x half> @shuffle_v4f16_u3uu(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x half>, ptr addrspace(1) %arg0 %val1 = load <4 x half>, ptr addrspace(1) %arg1 - %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> + %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> ret <4 x half> %shuffle } @@ -216,7 +216,7 @@ define <4 x half> @shuffle_v4f16_3u6u(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x half>, ptr addrspace(1) %arg0 %val1 = load <4 x half>, ptr addrspace(1) %arg1 - %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> + %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> ret <4 x half> %shuffle } @@ -265,7 +265,7 @@ define <4 x half> @shuffle_v4f16_3uu7(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x half>, ptr addrspace(1) %arg0 %val1 = load <4 x half>, ptr addrspace(1) %arg1 - %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> + %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> ret <4 x half> %shuffle } @@ -312,7 +312,7 @@ define <4 x half> @shuffle_v4f16_35u5(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x half>, ptr addrspace(1) %arg0 %val1 = load <4 x half>, ptr addrspace(1) %arg1 - %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> + %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> ret <4 x half> %shuffle } @@ -364,7 +364,7 @@ define <4 x half> @shuffle_v4f16_357u(ptr addrspace(1) %arg0, ptr addrspace(1) % ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x half>, ptr addrspace(1) %arg0 %val1 = load <4 x half>, ptr addrspace(1) %arg1 - %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> + %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> ret <4 x half> %shuffle } @@ -1881,21 +1881,21 @@ entry: %tmp15 = load <4 x half>, ptr addrspace(1) %arrayidx1, align 8 %arrayidx2 = getelementptr inbounds <4 x half>, ptr addrspace(1) %C, i64 %tmp12 %tmp16 = load <4 x half>, ptr addrspace(1) %arrayidx2, align 8 - %tmp17 = shufflevector <4 x half> %tmp14, <4 x half> undef, <2 x i32> zeroinitializer - %tmp18 = shufflevector <4 x half> %tmp15, <4 x half> undef, <2 x i32> - %tmp19 = shufflevector <4 x half> %tmp16, <4 x half> undef, <2 x i32> + %tmp17 = shufflevector <4 x half> %tmp14, <4 x half> poison, <2 x i32> zeroinitializer + %tmp18 = shufflevector <4 x half> %tmp15, <4 x half> poison, <2 x i32> + %tmp19 = shufflevector <4 x half> %tmp16, <4 x half> poison, <2 x i32> %tmp20 = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %tmp17, <2 x half> %tmp18, <2 x half> %tmp19) - %tmp21 = shufflevector <4 x half> %tmp14, <4 x half> undef, <2 x i32> - %tmp22 = shufflevector <4 x half> %tmp15, <4 x half> undef, <2 x i32> + %tmp21 = shufflevector <4 x half> %tmp14, <4 x half> poison, <2 x i32> + %tmp22 = shufflevector <4 x half> %tmp15, <4 x half> poison, <2 x i32> %tmp23 = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %tmp21, <2 x half> %tmp22, <2 x half> %tmp20) - %tmp24 = shufflevector <2 x half> %tmp23, <2 x half> undef, <4 x i32> + %tmp24 = shufflevector <2 x half> %tmp23, <2 x half> poison, <4 x i32> %tmp25 = shufflevector <4 x half> %tmp24, <4 x half> %tmp16, <4 x i32> - %tmp26 = shufflevector <4 x half> %tmp14, <4 x half> undef, <2 x i32> - %tmp27 = shufflevector <4 x half> %tmp25, <4 x half> undef, <2 x i32> + %tmp26 = shufflevector <4 x half> %tmp14, <4 x half> poison, <2 x i32> + %tmp27 = shufflevector <4 x half> %tmp25, <4 x half> poison, <2 x i32> %tmp28 = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %tmp26, <2 x half> %tmp18, <2 x half> %tmp27) - %tmp29 = shufflevector <4 x half> %tmp14, <4 x half> undef, <2 x i32> + %tmp29 = shufflevector <4 x half> %tmp14, <4 x half> poison, <2 x i32> %tmp30 = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %tmp29, <2 x half> %tmp22, <2 x half> %tmp28) - %tmp31 = shufflevector <2 x half> %tmp30, <2 x half> undef, <4 x i32> + %tmp31 = shufflevector <2 x half> %tmp30, <2 x half> poison, <4 x i32> %tmp32 = shufflevector <4 x half> %tmp25, <4 x half> %tmp31, <4 x i32> store <4 x half> %tmp32, ptr addrspace(1) %arrayidx2, align 8 ret void @@ -2006,7 +2006,7 @@ define amdgpu_kernel void @shuffle_scalar_load_v8i32_0123(ptr addrspace(4) %in, ; GFX11-NEXT: global_store_b128 v4, v[0:3], s[2:3] ; GFX11-NEXT: s_endpgm %ld8 = load <8 x i32>, ptr addrspace(4) %in, align 16 - %id = shufflevector <8 x i32> %ld8, <8 x i32> undef, <4 x i32> + %id = shufflevector <8 x i32> %ld8, <8 x i32> poison, <4 x i32> store <4 x i32> %id, ptr addrspace(1) %out, align 8 ret void } @@ -2052,7 +2052,7 @@ define <2 x half> @low16bits_v2f16(ptr addrspace(1) %x0, ptr addrspace(1) %x1) { entry: %0 = load <2 x half>, ptr addrspace(1) %x0, align 4 %1 = load <2 x half>, ptr addrspace(1) %x1, align 4 - %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> + %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> %vy1.2.vec.insert = shufflevector <2 x half> %vy1.0.vec.insert, <2 x half> %1, <2 x i32> ret <2 x half> %vy1.2.vec.insert } @@ -2098,7 +2098,7 @@ define <2 x half> @hi16bits_v2f16(ptr addrspace(1) %x0, ptr addrspace(1) %x1) { entry: %0 = load <2 x half>, ptr addrspace(1) %x0, align 4 %1 = load <2 x half>, ptr addrspace(1) %x1, align 4 - %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> + %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> %vy1.2.vec.insert = shufflevector <2 x half> %vy1.0.vec.insert, <2 x half> %1, <2 x i32> ret <2 x half> %vy1.2.vec.insert } @@ -2144,7 +2144,7 @@ define <2 x half> @low16hi16bits_v2f16(ptr addrspace(1) %x0, ptr addrspace(1) %x entry: %0 = load <2 x half>, ptr addrspace(1) %x0, align 4 %1 = load <2 x half>, ptr addrspace(1) %x1, align 4 - %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> + %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> %vy1.2.vec.insert = shufflevector <2 x half> %vy1.0.vec.insert, <2 x half> %1, <2 x i32> ret <2 x half> %vy1.2.vec.insert } @@ -2179,7 +2179,7 @@ define <2 x half> @hi16low16bits_v2bf16(ptr addrspace(1) %x0, ptr addrspace(1) % entry: %0 = load <2 x half>, ptr addrspace(1) %x0, align 4 %1 = load <2 x half>, ptr addrspace(1) %x1, align 4 - %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> + %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> %vy1.2.vec.insert = shufflevector <2 x half> %vy1.0.vec.insert, <2 x half> %1, <2 x i32> ret <2 x half> %vy1.2.vec.insert } @@ -2225,7 +2225,7 @@ define <2 x i16> @i16_low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) { entry: %0 = load <2 x i16>, ptr addrspace(1) %x0, align 4 %1 = load <2 x i16>, ptr addrspace(1) %x1, align 4 - %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> + %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> %vy1.2.vec.insert = shufflevector <2 x i16> %vy1.0.vec.insert, <2 x i16> %1, <2 x i32> ret <2 x i16> %vy1.2.vec.insert } @@ -2271,7 +2271,7 @@ define <2 x i16> @i16_low16hi16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) entry: %0 = load <2 x i16>, ptr addrspace(1) %x0, align 4 %1 = load <2 x i16>, ptr addrspace(1) %x1, align 4 - %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> + %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> %vy1.2.vec.insert = shufflevector <2 x i16> %vy1.0.vec.insert, <2 x i16> %1, <2 x i32> ret <2 x i16> %vy1.2.vec.insert } @@ -2306,7 +2306,7 @@ define <2 x i16> @i16_hi16low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) entry: %0 = load <2 x i16>, ptr addrspace(1) %x0, align 4 %1 = load <2 x i16>, ptr addrspace(1) %x1, align 4 - %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> + %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> %vy1.2.vec.insert = shufflevector <2 x i16> %vy1.0.vec.insert, <2 x i16> %1, <2 x i32> ret <2 x i16> %vy1.2.vec.insert } @@ -2352,7 +2352,7 @@ define <2 x i16> @i16_hi16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) { entry: %0 = load <2 x i16>, ptr addrspace(1) %x0, align 4 %1 = load <2 x i16>, ptr addrspace(1) %x1, align 4 - %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> + %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> %vy1.2.vec.insert = shufflevector <2 x i16> %vy1.0.vec.insert, <2 x i16> %1, <2 x i32> ret <2 x i16> %vy1.2.vec.insert } @@ -3020,7 +3020,7 @@ define <4 x bfloat> @shuffle_v4bf16_23uu(ptr addrspace(1) %arg0, ptr addrspace(1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0 %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1 - %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> + %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> ret <4 x bfloat> %shuffle } @@ -3067,7 +3067,7 @@ define <4 x bfloat> @shuffle_v4bf16_234u(ptr addrspace(1) %arg0, ptr addrspace(1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0 %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1 - %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> + %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> ret <4 x bfloat> %shuffle } @@ -3094,7 +3094,7 @@ define <4 x bfloat> @shuffle_v4bf16_u1u3(ptr addrspace(1) %arg0, ptr addrspace(1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0 %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1 - %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> + %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> ret <4 x bfloat> %shuffle } @@ -3133,7 +3133,7 @@ define <4 x bfloat> @shuffle_v4bf16_u3u1(ptr addrspace(1) %arg0, ptr addrspace(1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0 %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1 - %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> + %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> ret <4 x bfloat> %shuffle } @@ -3160,7 +3160,7 @@ define <4 x bfloat> @shuffle_v4bf16_u3uu(ptr addrspace(1) %arg0, ptr addrspace(1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0 %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1 - %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> + %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> ret <4 x bfloat> %shuffle } @@ -3209,7 +3209,7 @@ define <4 x bfloat> @shuffle_v4bf16_3u6u(ptr addrspace(1) %arg0, ptr addrspace(1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0 %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1 - %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> + %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> ret <4 x bfloat> %shuffle } @@ -3258,7 +3258,7 @@ define <4 x bfloat> @shuffle_v4bf16_3uu7(ptr addrspace(1) %arg0, ptr addrspace(1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0 %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1 - %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> + %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> ret <4 x bfloat> %shuffle } @@ -3305,7 +3305,7 @@ define <4 x bfloat> @shuffle_v4bf16_35u5(ptr addrspace(1) %arg0, ptr addrspace(1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0 %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1 - %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> + %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> ret <4 x bfloat> %shuffle } @@ -3357,7 +3357,7 @@ define <4 x bfloat> @shuffle_v4bf16_357u(ptr addrspace(1) %arg0, ptr addrspace(1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0 %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1 - %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> + %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> ret <4 x bfloat> %shuffle } @@ -5059,21 +5059,21 @@ entry: %tmp15 = load <4 x bfloat>, ptr addrspace(1) %arrayidx1, align 8 %arrayidx2 = getelementptr inbounds <4 x bfloat>, ptr addrspace(1) %C, i64 %tmp12 %tmp16 = load <4 x bfloat>, ptr addrspace(1) %arrayidx2, align 8 - %tmp17 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> undef, <2 x i32> zeroinitializer - %tmp18 = shufflevector <4 x bfloat> %tmp15, <4 x bfloat> undef, <2 x i32> - %tmp19 = shufflevector <4 x bfloat> %tmp16, <4 x bfloat> undef, <2 x i32> + %tmp17 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> poison, <2 x i32> zeroinitializer + %tmp18 = shufflevector <4 x bfloat> %tmp15, <4 x bfloat> poison, <2 x i32> + %tmp19 = shufflevector <4 x bfloat> %tmp16, <4 x bfloat> poison, <2 x i32> %tmp20 = tail call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %tmp17, <2 x bfloat> %tmp18, <2 x bfloat> %tmp19) - %tmp21 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> undef, <2 x i32> - %tmp22 = shufflevector <4 x bfloat> %tmp15, <4 x bfloat> undef, <2 x i32> + %tmp21 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> poison, <2 x i32> + %tmp22 = shufflevector <4 x bfloat> %tmp15, <4 x bfloat> poison, <2 x i32> %tmp23 = tail call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %tmp21, <2 x bfloat> %tmp22, <2 x bfloat> %tmp20) - %tmp24 = shufflevector <2 x bfloat> %tmp23, <2 x bfloat> undef, <4 x i32> + %tmp24 = shufflevector <2 x bfloat> %tmp23, <2 x bfloat> poison, <4 x i32> %tmp25 = shufflevector <4 x bfloat> %tmp24, <4 x bfloat> %tmp16, <4 x i32> - %tmp26 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> undef, <2 x i32> - %tmp27 = shufflevector <4 x bfloat> %tmp25, <4 x bfloat> undef, <2 x i32> + %tmp26 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> poison, <2 x i32> + %tmp27 = shufflevector <4 x bfloat> %tmp25, <4 x bfloat> poison, <2 x i32> %tmp28 = tail call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %tmp26, <2 x bfloat> %tmp18, <2 x bfloat> %tmp27) - %tmp29 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> undef, <2 x i32> + %tmp29 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> poison, <2 x i32> %tmp30 = tail call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %tmp29, <2 x bfloat> %tmp22, <2 x bfloat> %tmp28) - %tmp31 = shufflevector <2 x bfloat> %tmp30, <2 x bfloat> undef, <4 x i32> + %tmp31 = shufflevector <2 x bfloat> %tmp30, <2 x bfloat> poison, <4 x i32> %tmp32 = shufflevector <4 x bfloat> %tmp25, <4 x bfloat> %tmp31, <4 x i32> store <4 x bfloat> %tmp32, ptr addrspace(1) %arrayidx2, align 8 ret void @@ -5172,7 +5172,7 @@ define <2 x bfloat> @low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) { entry: %0 = load <2 x bfloat>, ptr addrspace(1) %x0, align 4 %1 = load <2 x bfloat>, ptr addrspace(1) %x1, align 4 - %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> + %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> %vy1.2.vec.insert = shufflevector <2 x bfloat> %vy1.0.vec.insert, <2 x bfloat> %1, <2 x i32> ret <2 x bfloat> %vy1.2.vec.insert } @@ -5218,7 +5218,7 @@ define <2 x bfloat> @hi16bits_v2bf16(ptr addrspace(1) %x0, ptr addrspace(1) %x1) entry: %0 = load <2 x bfloat>, ptr addrspace(1) %x0, align 4 %1 = load <2 x bfloat>, ptr addrspace(1) %x1, align 4 - %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> + %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> %vy1.2.vec.insert = shufflevector <2 x bfloat> %vy1.0.vec.insert, <2 x bfloat> %1, <2 x i32> ret <2 x bfloat> %vy1.2.vec.insert } @@ -5264,7 +5264,7 @@ define <2 x bfloat> @low16hi16bits_v2bf16(ptr addrspace(1) %x0, ptr addrspace(1) entry: %0 = load <2 x bfloat>, ptr addrspace(1) %x0, align 4 %1 = load <2 x bfloat>, ptr addrspace(1) %x1, align 4 - %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> + %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> %vy1.2.vec.insert = shufflevector <2 x bfloat> %vy1.0.vec.insert, <2 x bfloat> %1, <2 x i32> ret <2 x bfloat> %vy1.2.vec.insert } @@ -5299,7 +5299,7 @@ define <2 x bfloat> @hi16low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) { entry: %0 = load <2 x bfloat>, ptr addrspace(1) %x0, align 4 %1 = load <2 x bfloat>, ptr addrspace(1) %x1, align 4 - %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> + %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> %vy1.2.vec.insert = shufflevector <2 x bfloat> %vy1.0.vec.insert, <2 x bfloat> %1, <2 x i32> ret <2 x bfloat> %vy1.2.vec.insert } diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll index 2c6c9a50a72f9..30d9465c33e40 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll @@ -543,12 +543,12 @@ if.then9: ; preds = %entry sw.bb: ; preds = %if.then9 %i17 = load i8, ptr addrspace(1) null, align 1 %i18 = insertelement <4 x i8> zeroinitializer, i8 %i17, i64 0 - %a.sroa.0.0.vecblend = shufflevector <4 x i8> %i18, <4 x i8> zeroinitializer, <4 x i32> + %a.sroa.0.0.vecblend = shufflevector <4 x i8> %i18, <4 x i8> zeroinitializer, <4 x i32> br label %sw.bb18 sw.bb18: ; preds = %sw.bb, %if.then9 %a.sroa.0.0 = phi <4 x i8> [ %a.sroa.0.0.vecblend, %sw.bb ], [ poison, %if.then9 ] - %a.sroa.0.0.vec.extract61 = shufflevector <4 x i8> %a.sroa.0.0, <4 x i8> zeroinitializer, <3 x i32> + %a.sroa.0.0.vec.extract61 = shufflevector <4 x i8> %a.sroa.0.0, <4 x i8> zeroinitializer, <3 x i32> %i19 = insertelement <3 x i8> %a.sroa.0.0.vec.extract61, i8 0, i64 0 %i20 = select <3 x i1> zeroinitializer, <3 x i8> zeroinitializer, <3 x i8> %i19 %i21 = extractelement <3 x i8> %i20, i64 1 diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll index 48ab512a7ae0d..2833237c1ea37 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll @@ -421,7 +421,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<8 x half> %A, <8 x ; GFX12-NEXT: s_endpgm bb: %C = load <16 x half>, ptr %Caddr - %C_shuffle = shufflevector <16 x half> %C, <16 x half> undef, <8 x i32> + %C_shuffle = shufflevector <16 x half> %C, <16 x half> poison, <8 x i32> %fneg.C_shuffle = fneg <8 x half> %C_shuffle %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> %fneg.C_shuffle , i1 0) store <8 x half> %res, ptr addrspace(1) %out diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll index 8f7cd5cb2bb65..98da9ef2e8819 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll @@ -378,7 +378,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<4 x half> %A, <4 x ; GFX12-NEXT: s_endpgm bb: %C = load <8 x half>, ptr %Caddr - %C_shuffle = shufflevector <8 x half> %C, <8 x half> undef, <4 x i32> + %C_shuffle = shufflevector <8 x half> %C, <8 x half> poison, <4 x i32> %fneg.C_shuffle = fneg <4 x half> %C_shuffle %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %A, <4 x half> %B, <4 x half> %fneg.C_shuffle , i1 0) store <4 x half> %res, ptr addrspace(1) %out diff --git a/llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll index 2efbd3277f209..692491457ae3d 100644 --- a/llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll +++ b/llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll @@ -12,9 +12,9 @@ define amdgpu_cs void @xyz () { br label %loop loop: %ld = load <8 x float>, ptr addrspace(5) null, align 32 - %in_shuffle = shufflevector <8 x float> %ld, <8 x float> undef, <4 x i32> + %in_shuffle = shufflevector <8 x float> %ld, <8 x float> poison, <4 x i32> %wmma = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v16f16(<16 x half> undef, <16 x half> undef, <4 x float> %in_shuffle) - %out_shuffle = shufflevector <4 x float> %wmma, <4 x float> undef, <8 x i32> + %out_shuffle = shufflevector <4 x float> %wmma, <4 x float> poison, <8 x i32> store <8 x float> %out_shuffle, ptr addrspace(5) null, align 32 br i1 false, label %.exit, label %loop .exit: