diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll
index 39423ea22cbb4..d317a3ef54162 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll
@@ -12,7 +12,7 @@ define void @value_finder_bug(ptr addrspace(5) %store_ptr, ptr addrspace(4) %ptr
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %vec = load <4 x float>, ptr addrspace(4) %ptr, align 4
   %vec.3 = extractelement <4 x float> %vec, i32 3
-  %shuffle = shufflevector <4 x float> %vec, <4 x float> undef, <2 x i32> <i32 2, i32 undef>
+  %shuffle = shufflevector <4 x float> %vec, <4 x float> poison, <2 x i32> <i32 2, i32 poison>
   %new_vec = insertelement <2 x float> %shuffle, float %vec.3, i32 1
   store <2 x float> %new_vec, ptr addrspace(5) %store_ptr, align 8
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
index 298dfcf048fc4..ece08e40ae453 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
@@ -870,10 +870,10 @@ define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) {
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %insert = insertelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>, double %val, i32 %idx
-  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
-  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
-  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
-  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
+  %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
+  %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
+  %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
   store volatile <2 x double> %vec.0, ptr addrspace(1) undef
   store volatile <2 x double> %vec.1, ptr addrspace(1) undef
   store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@@ -1081,10 +1081,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, do
 ; GFX11-NEXT:    s_endpgm
 entry:
   %insert = insertelement <8 x double> %vec, double %val, i32 %idx
-  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
-  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
-  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
-  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
+  %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
+  %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
+  %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
   store volatile <2 x double> %vec.0, ptr addrspace(1) undef
   store volatile <2 x double> %vec.1, ptr addrspace(1) undef
   store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@@ -1229,10 +1229,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_v_s(<8 x double> inreg %vec, do
 ; GFX11-NEXT:    s_endpgm
 entry:
   %insert = insertelement <8 x double> %vec, double %val, i32 %idx
-  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
-  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
-  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
-  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
+  %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
+  %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
+  %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
   store volatile <2 x double> %vec.0, ptr addrspace(1) undef
   store volatile <2 x double> %vec.1, ptr addrspace(1) undef
   store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@@ -1289,10 +1289,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_s_s(<8 x double> %vec, double i
 ; GFX11-NEXT:    s_endpgm
 entry:
   %insert = insertelement <8 x double> %vec, double %val, i32 %idx
-  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
-  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
-  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
-  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
+  %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
+  %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
+  %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
   store volatile <2 x double> %vec.0, ptr addrspace(1) undef
   store volatile <2 x double> %vec.1, ptr addrspace(1) undef
   store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@@ -1494,10 +1494,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_v_v(<8 x double> inreg %vec, do
 ; GFX11-NEXT:    s_endpgm
 entry:
   %insert = insertelement <8 x double> %vec, double %val, i32 %idx
-  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
-  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
-  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
-  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
+  %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
+  %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
+  %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
   store volatile <2 x double> %vec.0, ptr addrspace(1) undef
   store volatile <2 x double> %vec.1, ptr addrspace(1) undef
   store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@@ -1617,10 +1617,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_s_v(<8 x double> %vec, double i
 ; GFX11-NEXT:    s_endpgm
 entry:
   %insert = insertelement <8 x double> %vec, double %val, i32 %idx
-  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
-  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
-  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
-  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
+  %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
+  %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
+  %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
   store volatile <2 x double> %vec.0, ptr addrspace(1) undef
   store volatile <2 x double> %vec.1, ptr addrspace(1) undef
   store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@@ -1677,10 +1677,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_s(<8 x double> %vec, double %
 ; GFX11-NEXT:    s_endpgm
 entry:
   %insert = insertelement <8 x double> %vec, double %val, i32 %idx
-  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
-  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
-  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
-  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
+  %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
+  %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
+  %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
   store volatile <2 x double> %vec.0, ptr addrspace(1) undef
   store volatile <2 x double> %vec.1, ptr addrspace(1) undef
   store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@@ -1794,10 +1794,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v(<8 x double> %vec, double %
 ; GFX11-NEXT:    s_endpgm
 entry:
   %insert = insertelement <8 x double> %vec, double %val, i32 %idx
-  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
-  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
-  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
-  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
+  %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
+  %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
+  %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
   store volatile <2 x double> %vec.0, ptr addrspace(1) undef
   store volatile <2 x double> %vec.1, ptr addrspace(1) undef
   store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@@ -2401,10 +2401,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_s_s_add_1(<8 x double> inreg %v
 entry:
   %idx.add = add i32 %idx, 1
   %insert = insertelement <8 x double> %vec, double %val, i32 %idx.add
-  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
-  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
-  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
-  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
+  %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
+  %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
+  %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
   store volatile <2 x double> %vec.0, ptr addrspace(1) undef
   store volatile <2 x double> %vec.1, ptr addrspace(1) undef
   store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@@ -2525,10 +2525,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, do
 entry:
   %idx.add = add i32 %idx, 1
   %insert = insertelement <8 x double> %vec, double %val, i32 %idx.add
-  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
-  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
-  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
-  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
+  %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
+  %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
+  %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
   store volatile <2 x double> %vec.0, ptr addrspace(1) undef
   store volatile <2 x double> %vec.1, ptr addrspace(1) undef
   store volatile <2 x double> %vec.2, ptr addrspace(1) undef
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index 78f33a174980d..a7b063427bc8d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -1519,7 +1519,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
   %load = load <2 x i64>, ptr addrspace(1) null
-  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
+  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 poison>, <3 x i32> <i32 0, i32 1, i32 2>
 
   call void @external_void_func_v3i64(<3 x i64> %val)
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll
index e2dab03e410aa..744a5b7feb48d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll
@@ -322,7 +322,7 @@ define i32 @v_sdot2_shuffle10_a(<2 x i16> %a, <2 x i16> %b, i32 %c) {
 ; GFX10-NEXT:    v_alignbit_b32 v0, v0, v0, 16
 ; GFX10-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
-  %shuf.a = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+  %shuf.a = shufflevector <2 x i16> %a, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
   %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %shuf.a, <2 x i16> %b, i32 %c, i1 false)
   ret i32 %r
 }
@@ -349,7 +349,7 @@ define i32 @v_sdot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) {
 ; GFX10-NEXT:    v_alignbit_b32 v1, v1, v1, 16
 ; GFX10-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
-  %shuf.b = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+  %shuf.b = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
   %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %shuf.b, i32 %c, i1 false)
   ret i32 %r
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll
index 3acff52874dd9..9e623494a5a04 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll
@@ -306,7 +306,7 @@ define i32 @v_udot2_shuffle10_a(<2 x i16> %a, <2 x i16> %b, i32 %c) {
 ; GFX10-NEXT:    v_alignbit_b32 v0, v0, v0, 16
 ; GFX10-NEXT:    v_dot2_u32_u16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
-  %shuf.a = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+  %shuf.a = shufflevector <2 x i16> %a, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
   %r = call i32 @llvm.amdgcn.udot2(<2 x i16> %shuf.a, <2 x i16> %b, i32 %c, i1 false)
   ret i32 %r
 }
@@ -332,7 +332,7 @@ define i32 @v_udot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) {
 ; GFX10-NEXT:    v_alignbit_b32 v1, v1, v1, 16
 ; GFX10-NEXT:    v_dot2_u32_u16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
-  %shuf.b = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+  %shuf.b = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
   %r = call i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %shuf.b, i32 %c, i1 false)
   ret i32 %r
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll
index 0cb346c35552c..569ed35b150d8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll
@@ -121,7 +121,7 @@ define amdgpu_ps i32 @s_trunc_v2i32_to_v2i16(<2 x i32> inreg %src) {
 ; ; FIXME: G_INSERT mishandled
 ; define <2 x i32> @v_trunc_v3i32_to_v3i16(<3 x i32> %src) {
 ;   %trunc = trunc <3 x i32> %src to <3 x i16>
-;   %ext = shufflevector <3 x i16> %trunc, <3 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+;   %ext = shufflevector <3 x i16> %trunc, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ;   %cast = bitcast <4 x i16> %ext to <2 x i32>
 ;   ret <2 x i32> %cast
 ; }
@@ -129,7 +129,7 @@ define amdgpu_ps i32 @s_trunc_v2i32_to_v2i16(<2 x i32> inreg %src) {
 ; ; FIXME: G_INSERT mishandled
 ; define amdgpu_ps <2 x i32> @s_trunc_v3i32_to_v3i16(<3 x i32> inreg %src) {
 ;   %trunc = trunc <3 x i32> %src to <3 x i16>
-;   %ext = shufflevector <3 x i16> %trunc, <3 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+;   %ext = shufflevector <3 x i16> %trunc, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ;   %cast = bitcast <4 x i16> %ext to <2 x i32>
 ;   ret <2 x i32> %cast
 ; }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
index 9cf9839e69d5d..e12ac25867adb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
@@ -426,7 +426,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<8 x half> %A, <8 x
 ; GFX12-NEXT:    s_endpgm
 bb:
   %C = load <16 x half>, ptr %Caddr
-  %C_shuffle = shufflevector <16 x half> %C, <16 x half> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %C_shuffle = shufflevector <16 x half> %C, <16 x half> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   %fneg.C_shuffle = fneg <8 x half> %C_shuffle
   %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> %fneg.C_shuffle , i1 0)
   store <8 x half> %res, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
index be1761227f802..b871ac7976222 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
@@ -381,7 +381,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<4 x half> %A, <4 x
 ; GFX12-NEXT:    s_endpgm
 bb:
   %C = load <8 x half>, ptr %Caddr
-  %C_shuffle = shufflevector <8 x half> %C, <8 x half> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %C_shuffle = shufflevector <8 x half> %C, <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   %fneg.C_shuffle = fneg <4 x half> %C_shuffle
   %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %A, <4 x half> %B, <4 x half> %fneg.C_shuffle , i1 0)
   store <4 x half> %res, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
index ec4e2dda10d3a..d0c7a803ed6b5 100644
--- a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
@@ -9,7 +9,7 @@ define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 {
 main_body:
   %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
-  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
   %tmp4 = extractelement <4 x float> %tmp3, i32 0
   store volatile float %tmp4, ptr addrspace(1) undef
@@ -25,7 +25,7 @@ define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 {
 main_body:
   %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
-  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
+  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
   %tmp4 = extractelement <4 x float> %tmp3, i32 1
   store volatile float %tmp4, ptr addrspace(1) undef
@@ -41,7 +41,7 @@ define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 {
 main_body:
   %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
-  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
   %tmp4 = extractelement <4 x float> %tmp3, i32 0
   store volatile float %tmp4, ptr addrspace(1) undef
@@ -57,7 +57,7 @@ define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 {
 main_body:
   %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
-  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
+  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
   %tmp4 = extractelement <4 x float> %tmp3, i32 1
   store volatile float %tmp4, ptr addrspace(1) undef
@@ -68,7 +68,7 @@ define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
 main_body:
   %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
   %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
-  %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
   %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
   %tmp4 = extractelement <4 x float> %tmp3, i32 0
   store volatile float %tmp4, ptr addrspace(1) undef
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll
index 651b030bd3f79..c94b33334646d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll
@@ -477,7 +477,7 @@ entry:
 
 then:
   %x.1 = insertelement <5 x double> <double 3.140000e+00, double poison, double poison, double poison, double poison>, double %x, i32 %idx
-  %0 = shufflevector <5 x double> %x.1, <5 x double> <double poison, double poison, double poison, double 6.140000e+00, double 9.900000e+00>, <5 x i32> <i32 0, i32 1, i32 undef, i32 8, i32 9>
+  %0 = shufflevector <5 x double> %x.1, <5 x double> <double poison, double poison, double poison, double 6.140000e+00, double 9.900000e+00>, <5 x i32> <i32 0, i32 1, i32 poison, i32 8, i32 9>
   %x.4 = insertelement <5 x double> %0, double %x, i64 2
   br label %finally
 
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll b/llvm/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll
index b610fca02f92e..2ddb2fea5ddc6 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll
@@ -18,7 +18,7 @@ define amdgpu_hs void @_amdgpu_hs_main(i32 inreg %arg, i32 inreg %arg1, i32 inre
 .beginls:                                         ; preds = %.entry
   %tmp15 = extractelement <6 x i32> %arg8, i32 3
   %.0.vec.insert.i = insertelement <2 x i32> poison, i32 %tmp15, i32 0
-  %.4.vec.insert.i = shufflevector <2 x i32> %.0.vec.insert.i, <2 x i32> undef, <2 x i32> <i32 0, i32 3>
+  %.4.vec.insert.i = shufflevector <2 x i32> %.0.vec.insert.i, <2 x i32> poison, <2 x i32> <i32 0, i32 3>
   %tmp16 = bitcast <2 x i32> %.4.vec.insert.i to i64
   br label %.endls
 
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-rsrc-ptr-ops.ll b/llvm/test/CodeGen/AMDGPU/buffer-rsrc-ptr-ops.ll
index ce55e9171c818..84a4b579fef0c 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-rsrc-ptr-ops.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-rsrc-ptr-ops.ll
@@ -49,7 +49,7 @@ main_body:
   %buf1.int = ptrtoint ptr addrspace(8) %buf1 to i128
   %buf1.vec = bitcast i128 %buf1.int to <4 x i32>
   call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> %buf1.vec, ptr addrspace(8) %buf2, i32 0, i32 0, i32 0)
-  %shuffled = shufflevector <2 x ptr addrspace(8)> %buffers, <2 x ptr addrspace(8)> undef, <2 x i32> <i32 1, i32 0>
+  %shuffled = shufflevector <2 x ptr addrspace(8)> %buffers, <2 x ptr addrspace(8)> poison, <2 x i32> <i32 1, i32 0>
   %somewhere.next = getelementptr <2 x ptr addrspace(8)>, ptr addrspace(1) %somewhere, i64 1
   store <2 x ptr addrspace(8)> %shuffled, ptr addrspace(1) %somewhere.next
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll b/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll
index 8a24845980e1a..95f97adcada31 100644
--- a/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll
+++ b/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll
@@ -45,7 +45,7 @@ bb1789:                                           ; preds = %bb1750
   %i1879 = bitcast <3 x i32> %i1878 to <3 x float>
   %i1881 = fmul reassoc nnan nsz arcp contract afn <3 x float> %i1540, %i1879
   %i1882 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> poison, i32 poison, i32 0)
-  %i1883 = shufflevector <3 x i32> %i1882, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %i1883 = shufflevector <3 x i32> %i1882, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   %i1884 = bitcast <4 x i32> %i1883 to <4 x float>
   %i1885 = shufflevector <4 x float> %i1884, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   %i1886 = insertelement <3 x i32> poison, i32 %i1819, i64 0
@@ -57,7 +57,7 @@ bb1789:                                           ; preds = %bb1750
   %i1892 = fmul reassoc nnan nsz arcp contract afn <3 x float> %i1885, %i1891
   %i1893 = fmul reassoc nnan nsz arcp contract afn <3 x float> %i1892, %i1881
   %i1894 = bitcast <3 x float> %i1893 to <3 x i32>
-  %i1895 = shufflevector <3 x i32> %i1894, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %i1895 = shufflevector <3 x i32> %i1894, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   %i1896 = insertelement <4 x i32> %i1895, i32 %i1819, i64 3
   br label %bb1897
 
@@ -70,7 +70,7 @@ bb1897:                                           ; preds = %bb1789, %bb1787
   %i1901 = bitcast <3 x i32> %i1900 to <3 x float>
   %i1902 = fadd reassoc nnan nsz arcp contract afn <3 x float> %i1901, %i1899
   %i1903 = bitcast <3 x float> %i1902 to <3 x i32>
-  %i1907 = shufflevector <3 x i32> %i1903, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %i1907 = shufflevector <3 x i32> %i1903, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   %i1908 = shufflevector <4 x i32> %i1907, <4 x i32> %__llpc_global_proxy_r11.19, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
   %i1914 = shufflevector <4 x i32> %i1908, <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
   %__llpc_global_proxy_r3.12.vec.extract2358 = extractelement <2 x i32> zeroinitializer, i64 1
diff --git a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
index f369b24e0f1b3..5c7172ff8d047 100644
--- a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
@@ -168,7 +168,7 @@ define void @undef_lo2_v4i16(<2 x i16> %arg0) {
 ; GFX8-NEXT:    ; use v[0:1]
 ; GFX8-NEXT:    ;;#ASMEND
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
-  %undef.lo = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
+  %undef.lo = shufflevector <2 x i16> %arg0, <2 x i16> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
   call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.lo);
   ret void
 }
@@ -193,7 +193,7 @@ define void @undef_lo2_v4f16(<2 x half> %arg0) {
 ; GFX8-NEXT:    ; use v[0:1]
 ; GFX8-NEXT:    ;;#ASMEND
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
-  %undef.lo = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
+  %undef.lo = shufflevector <2 x half> %arg0, <2 x half> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
   call void asm sideeffect "; use $0", "v"(<4 x half> %undef.lo);
   ret void
 }
@@ -348,7 +348,7 @@ define void @undef_hi2_v4i16(<2 x i16> %arg0) {
 ; GFX8-NEXT:    ; use v[0:1]
 ; GFX8-NEXT:    ;;#ASMEND
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
-  %undef.hi = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %undef.hi = shufflevector <2 x i16> %arg0, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.hi);
   ret void
 }
@@ -369,7 +369,7 @@ define void @undef_hi2_v4f16(<2 x half> %arg0) {
 ; GFX8-NEXT:    ; use v[0:1]
 ; GFX8-NEXT:    ;;#ASMEND
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
-  %undef.hi = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %undef.hi = shufflevector <2 x half> %arg0, <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
index 3451e389fef8b..775cad3f3cec1 100644
--- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
@@ -1454,7 +1454,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
   %load = load <2 x i64>, ptr addrspace(1) null
-  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
+  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 poison>, <3 x i32> <i32 0, i32 1, i32 2>
 
   call void @external_void_func_v3i64(<3 x i64> %val)
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
index 6571d515cfef2..49d6254dcd51a 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
@@ -1293,7 +1293,7 @@ define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_shuf(ptr addrspace(1) %o
   %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
   %a = load <2 x half>, ptr addrspace(1) %gep0
   %add = fadd <2 x half> %a, <half 1.0, half 1.0>
-  %shuf = shufflevector <2 x half> %add, <2 x half> undef, <2 x i32> <i32 1, i32 0>
+  %shuf = shufflevector <2 x half> %add, <2 x half> poison, <2 x i32> <i32 1, i32 0>
 
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %shuf, <2 x half> zeroinitializer)
   %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
diff --git a/llvm/test/CodeGen/AMDGPU/clamp.ll b/llvm/test/CodeGen/AMDGPU/clamp.ll
index 73ed23ab681f0..b3b7ce55a56e6 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp.ll
@@ -3728,7 +3728,7 @@ define amdgpu_kernel void @v_clamp_v2f16_shuffle(ptr addrspace(1) %out, ptr addr
   %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
   %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
   %a = load <2 x half>, ptr addrspace(1) %gep0
-  %shuf = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0>
+  %shuf = shufflevector <2 x half> %a, <2 x half> poison, <2 x i32> <i32 1, i32 0>
   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %shuf, <2 x half> zeroinitializer)
   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
 
diff --git a/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll b/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll
index dcd088e2bd988..380f14b125ead 100644
--- a/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll
+++ b/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll
@@ -14,7 +14,7 @@ bb:
   %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep1 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i32 %lid
   %load = load <4 x i32>, ptr addrspace(1) %gep1, align 16
-  %shuffle = shufflevector <4 x i32> %load, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
+  %shuffle = shufflevector <4 x i32> %load, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
   %gep2 = getelementptr inbounds <2 x i32>, ptr addrspace(1) %arg1, i32 %lid
   store <2 x i32> %shuffle, ptr addrspace(1) %gep2, align 8
   ret void
@@ -27,9 +27,9 @@ bb:
 define amdgpu_kernel void @test_vector_creation() #0 {
 entry:
   %tmp231 = load <4 x i16>, ptr addrspace(1) undef, align 2
-  %vext466 = shufflevector <4 x i16> %tmp231, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-  %vecinit467 = shufflevector <8 x i16> undef, <8 x i16> %vext466, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 undef, i32 undef>
-  %vecinit471 = shufflevector <8 x i16> %vecinit467, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+  %vext466 = shufflevector <4 x i16> %tmp231, <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+  %vecinit467 = shufflevector <8 x i16> poison, <8 x i16> %vext466, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison>
+  %vecinit471 = shufflevector <8 x i16> %vecinit467, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
   store <8 x i16> %vecinit471, ptr addrspace(1) undef, align 16
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/combine_vloads.ll b/llvm/test/CodeGen/AMDGPU/combine_vloads.ll
index 42a9b80b134cc..e2e37047912a1 100644
--- a/llvm/test/CodeGen/AMDGPU/combine_vloads.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine_vloads.ll
@@ -23,12 +23,12 @@ for.body:                                         ; preds = %for.body, %entry
   %i.01 = phi i32 [ 0, %entry ], [ %tmp19, %for.body ]
   %vecload2 = load <8 x i32>, ptr addrspace(1) %src, align 32
   %0 = bitcast <8 x i32> %vecload2 to <32 x i8>
-  %tmp5 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %tmp8 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %tmp5 = shufflevector <32 x i8> %0, <32 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %tmp8 = shufflevector <32 x i8> %0, <32 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %tmp9 = add nsw <8 x i8> %tmp5, %tmp8
-  %tmp12 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  %tmp12 = shufflevector <32 x i8> %0, <32 x i8> poison, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
   %tmp13 = add nsw <8 x i8> %tmp9, %tmp12
-  %tmp16 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %tmp16 = shufflevector <32 x i8> %0, <32 x i8> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
   %tmp17 = add nsw <8 x i8> %tmp13, %tmp16
   %scevgep = getelementptr <8 x i8>, ptr addrspace(1) %result, i32 %i.01
   %1 = bitcast <8 x i8> %tmp17 to <2 x i32>
diff --git a/llvm/test/CodeGen/AMDGPU/computeKnownBits-scalar-to-vector-crash.ll b/llvm/test/CodeGen/AMDGPU/computeKnownBits-scalar-to-vector-crash.ll
index 350997c9d8674..e15e701f9e805 100644
--- a/llvm/test/CodeGen/AMDGPU/computeKnownBits-scalar-to-vector-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/computeKnownBits-scalar-to-vector-crash.ll
@@ -3,7 +3,7 @@
 ; CHECK: s_waitcnt
 define <2 x i16> @main(<2 x float>) #0 {
   %2 = bitcast <2 x float> %0 to <4 x i16>
-  %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <2 x i32> <i32 0, i32 undef>
+  %3 = shufflevector <4 x i16> %2, <4 x i16> poison, <2 x i32> <i32 0, i32 poison>
   %4 = extractelement <4 x i16> %2, i32 0
   %5 = insertelement <2 x i16> %3, i16 %4, i32 0
   ret <2 x i16> %5
diff --git a/llvm/test/CodeGen/AMDGPU/concat_vectors.ll b/llvm/test/CodeGen/AMDGPU/concat_vectors.ll
index 2998bd3475cbe..a9e4c8991bb2b 100644
--- a/llvm/test/CodeGen/AMDGPU/concat_vectors.ll
+++ b/llvm/test/CodeGen/AMDGPU/concat_vectors.ll
@@ -289,8 +289,8 @@ define amdgpu_kernel void @test_concat_v16i16(ptr addrspace(1) %out, <16 x i16>
 define amdgpu_kernel void @concat_vector_crash(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 bb:
   %tmp = load <2 x float>, ptr addrspace(1) %in, align 4
-  %tmp1 = shufflevector <2 x float> %tmp, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %tmp2 = shufflevector <8 x float> undef, <8 x float> %tmp1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+  %tmp1 = shufflevector <2 x float> %tmp, <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %tmp2 = shufflevector <8 x float> poison, <8 x float> %tmp1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
   store <8 x float> %tmp2, ptr addrspace(1) %out, align 32
   ret void
 }
@@ -301,8 +301,8 @@ define amdgpu_kernel void @concat_vector_crash2(ptr addrspace(1) %out, ptr addrs
   %tmp = load i32, ptr addrspace(1) %in, align 1
   %tmp1 = trunc i32 %tmp to i24
   %tmp2 = bitcast i24 %tmp1 to <3 x i8>
-  %tmp3 = shufflevector <3 x i8> %tmp2, <3 x i8> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef>
-  %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 7, i8 8>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 15>
+  %tmp3 = shufflevector <3 x i8> %tmp2, <3 x i8> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison>
+  %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 7, i8 8>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 15>
   store <8 x i8> %tmp4, ptr addrspace(1) %out, align 8
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll b/llvm/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll
index e66fdce283026..09607c982ed7f 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll
@@ -17,7 +17,7 @@ define amdgpu_ps void @main(i32 %in1, i32 inreg %arg) local_unnamed_addr {
 
 bb:
   %__llpc_global_proxy_r5.12.vec.insert = insertelement <4 x i32> poison, i32 %in1, i32 3
-  %tmp3 = shufflevector <4 x i32> %__llpc_global_proxy_r5.12.vec.insert, <4 x i32> undef, <3 x i32> <i32 undef, i32 undef, i32 1>
+  %tmp3 = shufflevector <4 x i32> %__llpc_global_proxy_r5.12.vec.insert, <4 x i32> poison, <3 x i32> <i32 poison, i32 poison, i32 1>
   %tmp4 = bitcast <3 x i32> %tmp3 to <3 x float>
   %a2.i123 = extractelement <3 x float> %tmp4, i32 2
   %tmp5 = bitcast float %a2.i123 to i32
@@ -26,7 +26,7 @@ bb:
 
 bb12:
   %__llpc_global_proxy_r2.0 = phi <4 x i32> [ %__llpc_global_proxy_r2.0.vec.insert196, %bb ], [ poison, %.entry ]
-  %tmp6 = shufflevector <4 x i32> %__llpc_global_proxy_r2.0, <4 x i32> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  %tmp6 = shufflevector <4 x i32> %__llpc_global_proxy_r2.0, <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
   %tmp7 = bitcast <3 x i32> %tmp6 to <3 x float>
   %a0.i = extractelement <3 x float> %tmp7, i32 0
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
index f88d3f1261f0b..2464275a87992 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
@@ -146,7 +146,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
   %0 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
   %.i2243 = extractelement <3 x float> %0, i32 2
   %1 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 0, i32 0)
-  %2 = shufflevector <3 x i32> %1, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %2 = shufflevector <3 x i32> %1, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   %3 = bitcast <4 x i32> %2 to <4 x float>
   %.i2248 = extractelement <4 x float> %3, i32 2
   %.i2249 = fmul reassoc nnan nsz arcp contract afn float %.i2243, %.i2248
@@ -159,17 +159,17 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
   %.i0364 = extractelement <2 x float> %7, i32 0
   %8 = call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
   %9 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 112, i32 0)
-  %10 = shufflevector <3 x i32> %9, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %10 = shufflevector <3 x i32> %9, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   %11 = bitcast <4 x i32> %10 to <4 x float>
   %.i2360 = extractelement <4 x float> %11, i32 2
   %.i2363 = fmul reassoc nnan nsz arcp contract afn float %.i2360, %8
   %12 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 96, i32 0)
-  %13 = shufflevector <3 x i32> %12, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %13 = shufflevector <3 x i32> %12, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   %14 = bitcast <4 x i32> %13 to <4 x float>
   %.i2367 = extractelement <4 x float> %14, i32 2
   %.i2370 = fmul reassoc nnan nsz arcp contract afn float %.i0364, %.i2367
   %15 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 32, i32 0)
-  %16 = shufflevector <3 x i32> %15, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %16 = shufflevector <3 x i32> %15, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   %17 = bitcast <4 x i32> %16 to <4 x float>
   %.i2373 = extractelement <4 x float> %17, i32 2
   %.i2376 = fsub reassoc nnan nsz arcp contract afn float %.i2373, %.i2370
@@ -212,12 +212,12 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
   %.i2466 = fmul reassoc nnan nsz arcp contract afn float %.i2465, %43
   %.i2469 = fmul reassoc nnan nsz arcp contract afn float %.i2415, %.i2466
   %45 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 64, i32 0)
-  %46 = shufflevector <3 x i32> %45, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %46 = shufflevector <3 x i32> %45, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   %47 = bitcast <4 x i32> %46 to <4 x float>
   %.i2476 = extractelement <4 x float> %47, i32 2
   %.i2479 = fmul reassoc nnan nsz arcp contract afn float %.i2476, %18
   %48 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 80, i32 0)
-  %49 = shufflevector <3 x i32> %48, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %49 = shufflevector <3 x i32> %48, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   %50 = bitcast <4 x i32> %49 to <4 x float>
   %.i2482 = extractelement <4 x float> %50, i32 2
   %.i2485 = fsub reassoc nnan nsz arcp contract afn float %.i2482, %.i2479
@@ -230,7 +230,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
   %.i2522 = fadd reassoc nnan nsz arcp contract afn float %.i2521, %.i2516
   %.i2525 = fmul reassoc nnan nsz arcp contract afn float %.i2522, %43
   %52 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 16, i32 0)
-  %53 = shufflevector <3 x i32> %52, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %53 = shufflevector <3 x i32> %52, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   %54 = bitcast <4 x i32> %53 to <4 x float>
   %.i2530 = extractelement <4 x float> %54, i32 2
   %.i2531 = fmul reassoc nnan nsz arcp contract afn float %.i2333, %.i2530
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll b/llvm/test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll
index ccd497670a3f0..f5bf1793b9f40 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll
@@ -16,7 +16,7 @@ entry:
   %sint = load i32, ptr addrspace(1) %in
   %conv = sitofp i32 %sint to float
   %0 = insertelement <4 x float> poison, float %conv, i32 0
-  %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
+  %splat = shufflevector <4 x float> %0, <4 x float> poison, <4 x i32> zeroinitializer
   store <4 x float> %splat, ptr addrspace(1) %out
   ret void
 }
@@ -30,7 +30,7 @@ entry:
   %uint = load i32, ptr addrspace(1) %in
   %conv = uitofp i32 %uint to float
   %0 = insertelement <4 x float> poison, float %conv, i32 0
-  %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
+  %splat = shufflevector <4 x float> %0, <4 x float> poison, <4 x i32> zeroinitializer
   store <4 x float> %splat, ptr addrspace(1) %out
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/debug-value.ll b/llvm/test/CodeGen/AMDGPU/debug-value.ll
index 755f517156a7a..e7b56cbe2e4d4 100644
--- a/llvm/test/CodeGen/AMDGPU/debug-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/debug-value.ll
@@ -7,7 +7,7 @@ bb:
   %tmp = load i32, ptr addrspace(1) undef, align 4
   %tmp1 = load <4 x float>, ptr addrspace(1) undef, align 16
   %tmp2 = sext i32 %tmp to i64
-  %tmp3 = shufflevector <4 x float> undef, <4 x float> %tmp1, <2 x i32> <i32 3, i32 7>
+  %tmp3 = shufflevector <4 x float> poison, <4 x float> %tmp1, <2 x i32> <i32 3, i32 7>
   %tmp4 = call float @barney() #2
   %tmp9 = getelementptr inbounds %struct.wombat, ptr addrspace(1) %arg, i64 %tmp2, i32 2, i64 0
   %tmp10 = load i32, ptr addrspace(1) %tmp9, align 4
@@ -53,7 +53,7 @@ bb28:                                             ; preds = %bb25, %bb21
   %tmp45 = fadd float undef, undef
   %tmp46 = fdiv float %tmp44, %tmp45
   %tmp47 = insertelement <4 x float> poison, float %tmp46, i32 0
-  %tmp48 = shufflevector <4 x float> %tmp47, <4 x float> undef, <4 x i32> zeroinitializer
+  %tmp48 = shufflevector <4 x float> %tmp47, <4 x float> poison, <4 x i32> zeroinitializer
   %tmp49 = fsub <4 x float> %tmp48, %tmp40
   %tmp50 = extractelement <4 x float> %tmp41, i32 1
   %tmp51 = extractelement <4 x float> %tmp42, i32 2
@@ -71,7 +71,7 @@ bb28:                                             ; preds = %bb25, %bb21
   call void @llvm.dbg.value(metadata <4 x float> %tmp29, metadata !3, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #2, !dbg !5
   %tmp59 = bitcast i64 %tmp35 to <2 x float>
   %tmp60 = insertelement <2 x float> poison, float %tmp58, i32 0
-  %tmp61 = shufflevector <2 x float> %tmp60, <2 x float> undef, <2 x i32> zeroinitializer
+  %tmp61 = shufflevector <2 x float> %tmp60, <2 x float> poison, <2 x i32> zeroinitializer
   %tmp62 = fmul <2 x float> %tmp61, undef
   %tmp63 = fsub <2 x float> %tmp62, %tmp59
   %tmp64 = extractelement <2 x float> %tmp63, i64 0
diff --git a/llvm/test/CodeGen/AMDGPU/debug-value2.ll b/llvm/test/CodeGen/AMDGPU/debug-value2.ll
index bd27100a27cfc..3a16476adf5c1 100644
--- a/llvm/test/CodeGen/AMDGPU/debug-value2.ll
+++ b/llvm/test/CodeGen/AMDGPU/debug-value2.ll
@@ -26,7 +26,7 @@ entry:
   %m_scaleMotion = getelementptr inbounds %struct.ShapeData, ptr addrspace(1) %call, i64 0, i32 4
   %tmp2 = load <4 x float>, ptr addrspace(1) %m_scaleMotion, align 16
   %splat.splatinsert = insertelement <4 x float> poison, float %time, i32 0
-  %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
+  %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
   %tmp3 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %tmp2, <4 x float> %splat.splat, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>)
   %tmp4 = load <4 x float>, ptr addrspace(1) %call, align 16
   %m_quaternion = getelementptr inbounds %struct.ShapeData, ptr addrspace(1) %call, i64 0, i32 1
@@ -61,8 +61,8 @@ entry:
   %tmp24 = insertelement <4 x float> %tmp23, float %tmp19, i32 1
   %tmp25 = insertelement <4 x float> %tmp24, float %tmp22, i32 2
   %tmp26 = extractelement <4 x float> %tmp5, i64 3
-  %splat.splat.i8.i = shufflevector <4 x float> %tmp5, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %splat.splat2.i9.i = shufflevector <4 x float> %tmp10, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %splat.splat.i8.i = shufflevector <4 x float> %tmp5, <4 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %splat.splat2.i9.i = shufflevector <4 x float> %tmp10, <4 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
   %mul3.i10.i = fmul <4 x float> %tmp5, %splat.splat2.i9.i
   %tmp27 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %splat.splat.i8.i, <4 x float> %tmp10, <4 x float> %mul3.i10.i)
   %add.i11.i = fadd <4 x float> %tmp27, %tmp25
@@ -94,7 +94,7 @@ entry:
   %tmp52 = insertelement <4 x float> <float poison, float poison, float poison, float 0.000000e+00>, float %tmp44, i32 0
   %tmp53 = insertelement <4 x float> %tmp52, float %tmp48, i32 1
   %tmp54 = insertelement <4 x float> %tmp53, float %tmp51, i32 2
-  %splat.splat.i.i = shufflevector <4 x float> %tmp39, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %splat.splat.i.i = shufflevector <4 x float> %tmp39, <4 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
   %tmp55 = extractelement <4 x float> %tmp5, i32 3
   %mul3.i.i = fmul <4 x float> %splat.splat.i8.i, %tmp39
   %tmp56 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %splat.splat.i.i, <4 x float> %vecinit5.i.i, <4 x float> %mul3.i.i)
@@ -113,12 +113,12 @@ entry:
   %tmp66 = extractelement <4 x float> %tmp1, i64 3
   %mul3 = fmul float %tmp66, %time
   %tmp67 = insertelement <4 x float> %tmp1, float 0.000000e+00, i32 3
-  %tmp68 = shufflevector <4 x float> %tmp67, <4 x float> %tmp1, <4 x i32> <i32 0, i32 5, i32 undef, i32 3>
+  %tmp68 = shufflevector <4 x float> %tmp67, <4 x float> %tmp1, <4 x i32> <i32 0, i32 5, i32 poison, i32 3>
   %vecinit3.i.i = shufflevector <4 x float> %tmp68, <4 x float> %tmp1, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
   %tmp69 = fcmp oeq <4 x float> %vecinit3.i.i, zeroinitializer
   %tmp70 = sext <4 x i1> %tmp69 to <4 x i32>
-  %tmp71 = shufflevector <4 x i32> %tmp70, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %tmp72 = shufflevector <4 x i32> %tmp70, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  %tmp71 = shufflevector <4 x i32> %tmp70, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+  %tmp72 = shufflevector <4 x i32> %tmp70, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
   %tmp73 = and <2 x i32> %tmp71, %tmp72
   %tmp74 = extractelement <2 x i32> %tmp73, i64 0
   %tmp75 = extractelement <2 x i32> %tmp73, i64 1
@@ -205,7 +205,7 @@ bb141:                                            ; preds = %bb109, %bb98, %bb96
   %tmp143 = phi float [ %tmp95, %bb86 ], [ %tmp140, %bb109 ], [ %tmp107, %bb98 ], [ %tmp84, %bb96 ]
   %tmp144 = tail call float @llvm.amdgcn.rsq.f32(float %tmp143)
   %tmp145 = insertelement <4 x float> poison, float %tmp144, i32 0
-  %tmp146 = shufflevector <4 x float> %tmp145, <4 x float> undef, <4 x i32> zeroinitializer
+  %tmp146 = shufflevector <4 x float> %tmp145, <4 x float> poison, <4 x i32> zeroinitializer
   %tmp147 = fmul <4 x float> %tmp142, %tmp146
   br label %qtSet.exit
 
diff --git a/llvm/test/CodeGen/AMDGPU/early-if-convert.ll b/llvm/test/CodeGen/AMDGPU/early-if-convert.ll
index 028cd9110eb2b..8be5d1a3fde7c 100644
--- a/llvm/test/CodeGen/AMDGPU/early-if-convert.ll
+++ b/llvm/test/CodeGen/AMDGPU/early-if-convert.ll
@@ -331,7 +331,7 @@ if:
 
 endif:
   %r = phi <3 x i32> [ %v, %entry ], [ %u, %if ]
-  %r.ext = shufflevector <3 x i32> %r, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %r.ext = shufflevector <3 x i32> %r, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   call void asm sideeffect "; reg use $0", "s"(<4 x i32> %r.ext) #0
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
index 6dabd8c0b83ea..24dc5b5bb3150 100644
--- a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
@@ -156,7 +156,7 @@ F:
 
 exit:
   %m = phi <8 x i16> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  %v2 = shufflevector <8 x i16> %m, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
   %b2 = icmp sgt <4 x i16> %v2, <i16 -1, i16 -1, i16 -1, i16 -1>
   %r2 = select <4 x i1> %b2, <4 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768>, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>
   ret <4 x i16> %r2
@@ -317,7 +317,7 @@ F:
 
 exit:
   %m = phi <8 x i16> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %v2 = shufflevector <8 x i16> %m, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %b2 = icmp sgt <4 x i16> %v2, <i16 -1, i16 -1, i16 -1, i16 -1>
   %r2 = select <4 x i1> %b2, <4 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768>, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>
   ret <4 x i16> %r2
@@ -482,7 +482,7 @@ F:
 
 exit:
   %m = phi <8 x half> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <8 x half> %m, <8 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  %v2 = shufflevector <8 x half> %m, <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
   %b2 = fcmp ugt <4 x half> %v2, <half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800>
   %r2 = select <4 x i1> %b2, <4 x half> <half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900>, <4 x half> <half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00>
   ret <4 x half> %r2
@@ -685,7 +685,7 @@ F:
 
 exit:
   %m = phi <16 x i16> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <16 x i16> %m, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  %v2 = shufflevector <16 x i16> %m, <16 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
   %b2 = icmp sgt <4 x i16> %v2, <i16 -1, i16 -1, i16 -1, i16 -1>
   %r2 = select <4 x i1> %b2, <4 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768>, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>
   ret <4 x i16> %r2
@@ -890,7 +890,7 @@ F:
 
 exit:
   %m = phi <16 x i16> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <16 x i16> %m, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %v2 = shufflevector <16 x i16> %m, <16 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %b2 = icmp sgt <4 x i16> %v2, <i16 -1, i16 -1, i16 -1, i16 -1>
   %r2 = select <4 x i1> %b2, <4 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768>, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>
   ret <4 x i16> %r2
@@ -1099,7 +1099,7 @@ F:
 
 exit:
   %m = phi <16 x half> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <16 x half> %m, <16 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  %v2 = shufflevector <16 x half> %m, <16 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
   %b2 = fcmp ugt <4 x half> %v2, <half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800>
   %r2 = select <4 x i1> %b2, <4 x half> <half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900>, <4 x half> <half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00>
   ret <4 x half> %r2
@@ -1184,8 +1184,8 @@ define <8 x i16> @large_vector(ptr addrspace(3) %p, i32 %idxp) {
   %x.7 = load i16, ptr addrspace(3) %p.7, align 2
   %v3 = insertelement <8 x i16> %v3p, i16 %x.7, i32 1
 
-  %z.1 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
-  %z.2 = shufflevector <8 x i16> %z.1, <8 x i16> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef>
+  %z.1 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
+  %z.2 = shufflevector <8 x i16> %z.1, <8 x i16> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
   %z.3 = shufflevector <8 x i16> %z.2, <8 x i16> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
   ret <8 x i16> %z.3
 }
@@ -1464,7 +1464,7 @@ F:
 
 exit:
   %m = phi <16 x i16> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <16 x i16> %m, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v2 = shufflevector <16 x i16> %m, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   %b2 = icmp ugt <8 x i16> %v2, <i16 u0x3800, i16 u0x3800, i16 u0x3800, i16 u0x3800, i16 u0x3800, i16 u0x3800, i16 u0x3800, i16 u0x3800>
   %r2 = select <8 x i1> %b2, <8 x i16> <i16 u0x3900, i16 u0x3900, i16 u0x3900, i16 u0x3900, i16 u0x3900, i16 u0x3900, i16 u0x3900, i16 u0x3900>, <8 x i16> <i16 u0x3D00, i16 u0x3D00, i16 u0x3D00, i16 u0x3D00, i16 u0x3D00, i16 u0x3D00, i16 u0x3D00, i16 u0x3D00>
   ret <8 x i16> %r2
@@ -1755,7 +1755,7 @@ F:
 
 exit:
   %m = phi <16 x half> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <16 x half> %m, <16 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v2 = shufflevector <16 x half> %m, <16 x half> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   %b2 = fcmp ugt <8 x half> %v2, <half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800>
   %r2 = select <8 x i1> %b2, <8 x half> <half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900>, <8 x half> <half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00>
   ret <8 x half> %r2
diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll
index bab78aee3ff18..4cd39590bfc34 100644
--- a/llvm/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll
@@ -13,7 +13,7 @@ define <3 x i32> @quux() {
 ; CHECK-NEXT:    v_mov_b32_e32 v2, 1
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
 bb:
-  %tmp = shufflevector <4 x i8> <i8 1, i8 2, i8 3, i8 4>, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %tmp = shufflevector <4 x i8> <i8 1, i8 2, i8 3, i8 4>, <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
   %tmp1 = extractelement <3 x i8> %tmp, i64 0
   %tmp2 = zext i8 %tmp1 to i32
   %tmp3 = insertelement <3 x i32> poison, i32 %tmp2, i32 0
diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll
index 1e86842be4e5e..61c0b8b861d5b 100644
--- a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll
@@ -90,7 +90,7 @@ F:
 
 exit:
   %m = phi <8 x i16> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+  %v2 = shufflevector <8 x i16> %m, <8 x i16> poison, <2 x i32> <i32 0, i32 1>
   %b2 = icmp sgt <2 x i16> %v2, <i16 -1, i16 -1>
   %r2 = select <2 x i1> %b2, <2 x i16> <i16 -32768, i16 -32768>, <2 x i16> <i16 -1, i16 -1>
   ret <2 x i16> %r2
@@ -161,7 +161,7 @@ F:
 
 exit:
   %m = phi <8 x i64> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <8 x i64> %m, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %v2 = shufflevector <8 x i64> %m, <8 x i64> poison, <2 x i32> <i32 0, i32 1>
   %b2 = icmp sgt <2 x i64> %v2, <i64 -1, i64 -1>
   %r2 = select <2 x i1> %b2, <2 x i64> <i64 -32768, i64 -32768>, <2 x i64> <i64 -1, i64 -1>
   ret <2 x i64> %r2
@@ -238,7 +238,7 @@ F:
 
 exit:
   %m = phi <8 x i64> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <8 x i64> %m, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v2 = shufflevector <8 x i64> %m, <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %b2 = icmp sgt <4 x i64> %v2, <i64 -1, i64 -1, i64 -1, i64 -1>
   %r2 = select <4 x i1> %b2, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768>, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>
   ret <4 x i64> %r2
@@ -342,7 +342,7 @@ F:
 
 exit:
   %m = phi <16 x i64> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <16 x i64> %m, <16 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v2 = shufflevector <16 x i64> %m, <16 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   %b2 = icmp sgt <8 x i64> %v2, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
   %r2 = select <8 x i1> %b2, <8 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768>, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
   ret <8 x i64> %r2
@@ -413,7 +413,7 @@ F:
 
 exit:
   %m = phi <8 x double> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <8 x double> %m, <8 x double> undef, <2 x i32> <i32 0, i32 1>
+  %v2 = shufflevector <8 x double> %m, <8 x double> poison, <2 x i32> <i32 0, i32 1>
   %b2 = fcmp ogt <2 x double> %v2, <double -1.0, double -1.0>
   %r2 = select <2 x i1> %b2, <2 x double> <double -2.0, double -2.0>, <2 x double> <double -1.0, double -1.0>
   ret <2 x double> %r2
@@ -490,7 +490,7 @@ F:
 
 exit:
   %m = phi <8 x double> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <8 x double> %m, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v2 = shufflevector <8 x double> %m, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %b2 = fcmp ogt <4 x double> %v2, <double -1.0, double -1.0, double -1.0, double -1.0>
   %r2 = select <4 x i1> %b2, <4 x double> <double -2.0, double -2.0, double -2.0, double -2.0>, <4 x double> <double -1.0, double -1.0, double -1.0, double -1.0>
   ret <4 x double> %r2
@@ -594,7 +594,7 @@ F:
 
 exit:
   %m = phi <16 x double> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <16 x double> %m, <16 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v2 = shufflevector <16 x double> %m, <16 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   %b2 = fcmp ogt <8 x double> %v2, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
   %r2 = select <8 x i1> %b2, <8 x double> <double -2.0, double -2.0, double -2.0, double -2.0, double -2.0, double -2.0, double -2.0, double -2.0>, <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
   ret <8 x double> %r2
diff --git a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
index 1319c526868c3..a44231f39ae6f 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
@@ -27,7 +27,7 @@ define amdgpu_hs void @main(ptr addrspace(6) inreg %arg) {
 main_body:
   %tmp25 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) undef, i32 undef, i32 0, i32 0)
   %tmp27 = bitcast <4 x float> %tmp25 to <16 x i8>
-  %tmp28 = shufflevector <16 x i8> %tmp27, <16 x i8> undef, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+  %tmp28 = shufflevector <16 x i8> %tmp27, <16 x i8> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
   %tmp29 = bitcast <12 x i8> %tmp28 to <3 x i32>
   call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %tmp29, ptr addrspace(8) undef, i32 undef, i32 0, i32 0) #3
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll b/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll
index dc60007cad977..827e5da15e50c 100644
--- a/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll
@@ -39,7 +39,7 @@ bb14:                                             ; preds = %bb14, %bb11
   %tmp25 = load float, ptr addrspace(4) %tmp24, align 4
   %tmp26 = fptrunc float %tmp25 to half
   %tmp27 = insertelement <4 x half> poison, half %tmp26, i32 0
-  %tmp28 = shufflevector <4 x half> %tmp27, <4 x half> undef, <4 x i32> zeroinitializer
+  %tmp28 = shufflevector <4 x half> %tmp27, <4 x half> poison, <4 x i32> zeroinitializer
   %vec.A.0 = extractelement <4 x half> %tmp21, i32 0
   %vec.B.0 = extractelement <4 x half> %tmp28, i32 0
   %vec.C.0 = extractelement <4 x half> %tmp15, i32 0
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
index e1791daa3aa0c..781a2ca3146f5 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
@@ -1585,7 +1585,7 @@ define amdgpu_kernel void @fnge_select_f32_multi_use_regression(float %.i2369) {
 
 bb:                                               ; preds = %.entry
   %i2 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> zeroinitializer, i32 1, i32 0)
-  %i3 = shufflevector <2 x i32> %i2, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %i3 = shufflevector <2 x i32> %i2, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
   %i4 = bitcast <4 x i32> %i3 to <4 x float>
   %.i0753 = extractelement <4 x float> %i4, i64 0
   br label %bb5
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
index 2322b29abaa10..c4ea8fc67122c 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
@@ -1841,7 +1841,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 {
 ; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
   %load = load <2 x i64>, ptr addrspace(1) null
-  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
+  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 poison>, <3 x i32> <i32 0, i32 1, i32 2>
 
   call amdgpu_gfx void @external_void_func_v3i64(<3 x i64> %val)
   ret void
@@ -10439,7 +10439,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 {
 ; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
   %load = load <2 x i64>, ptr addrspace(4) null
-  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
+  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 poison>, <3 x i32> <i32 0, i32 1, i32 2>
 
   call amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> inreg %val)
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/greedy-reverse-local-assignment.ll b/llvm/test/CodeGen/AMDGPU/greedy-reverse-local-assignment.ll
index 6c921441c972d..6e82a294243d2 100644
--- a/llvm/test/CodeGen/AMDGPU/greedy-reverse-local-assignment.ll
+++ b/llvm/test/CodeGen/AMDGPU/greedy-reverse-local-assignment.ll
@@ -48,6 +48,6 @@ define <4 x half> @shuffle_v4f16_234u(ptr addrspace(1) %arg0, ptr addrspace(1) %
 ; NOXNACK-NEXT:    s_setpc_b64 s[30:31]
   %val0 = load <4 x half>, ptr addrspace(1) %arg0
   %val1 = load <4 x half>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 undef>
+  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 poison>
   ret <4 x half> %shuffle
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
index b4f05bce37680..dbd324b0334ae 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
@@ -36,7 +36,7 @@ main_body:
 define amdgpu_ps <2 x float> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
 main_body:
   %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
-  %ext = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %ext = shufflevector <3 x half> %tex, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %r = bitcast <4 x half> %ext to <2 x float>
   ret <2 x float> %r
 }
@@ -90,7 +90,7 @@ main_body:
 define amdgpu_ps <2 x float> @image_load_3d_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
 main_body:
   %tex = call <3 x half> @llvm.amdgcn.image.load.3d.v3f16.i32(i32 7, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
-  %ext = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %ext = shufflevector <3 x half> %tex, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %res = bitcast <4 x half> %ext to <2 x float>
   ret <2 x float> %res
 }
@@ -129,7 +129,7 @@ main_body:
 define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x float> %in) {
 main_body:
   %r = bitcast <2 x float> %in to <4 x half>
-  %data = shufflevector <4 x half> %r, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %data = shufflevector <4 x half> %r, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
   call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %data, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
index 389acd31a9d08..8861ff4c78137 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
@@ -315,7 +315,7 @@ define amdgpu_ps <2 x float> @image_sample_b_2d_v3f16(<8 x i32> inreg %rsrc, <4
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
   %tex = call <3 x half> @llvm.amdgcn.image.sample.b.2d.v3f16.f32.f32(i32 7, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  %tex_wide = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %tex_wide = shufflevector <3 x half> %tex, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %r = bitcast <4 x half> %tex_wide to <2 x float>
   ret <2 x float> %r
 }
@@ -410,7 +410,7 @@ define amdgpu_ps <4 x float> @image_sample_b_2d_v3f16_tfe(<8 x i32> inreg %rsrc,
 main_body:
   %tex = call {<3 x half>,i32} @llvm.amdgcn.image.sample.b.2d.v3f16i32.f32.f32(i32 7, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0)
   %tex.vec = extractvalue {<3 x half>, i32} %tex, 0
-  %tex.vec_wide = shufflevector <3 x half> %tex.vec, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %tex.vec_wide = shufflevector <3 x half> %tex.vec, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %tex.err = extractvalue {<3 x half>, i32} %tex, 1
   %tex.vecf = bitcast <4 x half> %tex.vec_wide to <2 x float>
   %tex.vecf.0 = extractelement <2 x float> %tex.vecf, i32 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
index 6d8ce071371c9..a713b1db119b8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
@@ -2394,7 +2394,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc,
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %out
 }
 
@@ -2436,7 +2436,7 @@ define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc,
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %out = shufflevector <4 x float> %r, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   ret <3 x float> %out
 }
 
@@ -2478,7 +2478,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc,
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  %out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %out
 }
 
@@ -2520,7 +2520,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc,
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 3>
+  %out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 0, i32 3>
   ret <2 x float> %out
 }
 
@@ -2562,7 +2562,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc,
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 3>
+  %out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 1, i32 3>
   ret <2 x float> %out
 }
 
@@ -2604,7 +2604,7 @@ define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc,
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  %out = shufflevector <4 x float> %r, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
   ret <3 x float> %out
 }
 
@@ -2667,7 +2667,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 0, i32 1>
   ret <2 x float> %out
 }
 
@@ -2709,7 +2709,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
   %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  %out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 1, i32 2>
   ret <2 x float> %out
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll
index ca03d954d5b2a..5b4e2e4ce171b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll
@@ -47,7 +47,7 @@ main_body:
 ; PACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
 define amdgpu_kernel void @buffer_store_format_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %voffset) {
 main_body:
-  %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
   call void @llvm.amdgcn.raw.buffer.store.format.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll
index 8e278c101057a..b1a2747cb5784 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll
@@ -47,7 +47,7 @@ main_body:
 ; PACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
 define amdgpu_kernel void @buffer_store_format_d16_xyz(ptr addrspace(8) %rsrc, <4 x half> %data, i32 %voffset) {
 main_body:
-  %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
   call void @llvm.amdgcn.raw.ptr.buffer.store.format.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.d16.ll
index 02fc82de5d7bc..9c576039ff6ac 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.d16.ll
@@ -144,7 +144,7 @@ define amdgpu_kernel void @tbuffer_store_d16_xyz(ptr addrspace(8) %rsrc, <4 x ha
 ; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
 ; GFX11-PACKED-NEXT:    s_endpgm
 main_body:
-  %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
   call void @llvm.amdgcn.raw.ptr.tbuffer.store.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 33, i32 0)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll
index 63b139bb25e77..cde2627008e27 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll
@@ -191,7 +191,7 @@ define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <4 x half> %da
 ; GFX12-PACKED-GISEL-NEXT:    tbuffer_store_d16_format_xyzw v[0:1], off, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM]
 ; GFX12-PACKED-GISEL-NEXT:    s_endpgm
 main_body:
-  %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
   call void @llvm.amdgcn.raw.tbuffer.store.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll
index 73ac61a77a94e..aa7064dad9e95 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll
@@ -47,7 +47,7 @@ main_body:
 ; PACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
 define amdgpu_kernel void @buffer_store_format_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %index) {
 main_body:
-  %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
   call void @llvm.amdgcn.struct.buffer.store.format.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.d16.ll
index 8a281376965d1..51d3687ac5185 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.d16.ll
@@ -77,7 +77,7 @@ define amdgpu_kernel void @buffer_store_format_d16_xyz(ptr addrspace(8) %rsrc, <
 ; PACKED-NEXT:    buffer_store_format_d16_xyz v[0:1], v2, s[0:3], 0 idxen
 ; PACKED-NEXT:    s_endpgm
 main_body:
-  %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
   call void @llvm.amdgcn.struct.ptr.buffer.store.format.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 %index, i32 0, i32 0, i32 0)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll
index e5eae03bb5bde..7e92f0d2c5973 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll
@@ -160,7 +160,7 @@ define amdgpu_kernel void @tbuffer_store_d16_xyz(ptr addrspace(8) %rsrc, <4 x ha
 ; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_xyz v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
 ; GFX11-PACKED-NEXT:    s_endpgm
 main_body:
-  %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
   call void @llvm.amdgcn.struct.ptr.tbuffer.store.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll
index 17ebb1a835462..4a02973941ec5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll
@@ -211,7 +211,7 @@ define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <4 x half> %da
 ; GFX12-PACKED-GISEL-NEXT:    tbuffer_store_d16_format_xyzw v[0:1], v2, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] idxen
 ; GFX12-PACKED-GISEL-NEXT:    s_endpgm
 main_body:
-  %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
   call void @llvm.amdgcn.struct.tbuffer.store.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll b/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll
index 01bab28a49858..2cee66f902b6d 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll
@@ -22,7 +22,7 @@ define amdgpu_vs void @test(ptr addrspace(8) inreg %arg1, ptr addrspace(3) %arg2
 ; CHECK-NEXT:    s_endpgm
   call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float undef, float undef, float undef, float undef, i1 false, i1 false)
   %var1 = load <6 x float>, ptr addrspace(3) %arg2, align 4
-  %var2 = shufflevector <6 x float> %var1, <6 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %var2 = shufflevector <6 x float> %var1, <6 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %var2, ptr addrspace(8) %arg1, i32 0, i32 0, i32 0, i32 126, i32 0)
   ret void
 }
@@ -52,9 +52,9 @@ define amdgpu_vs void @test_2(ptr addrspace(8) inreg %arg1, i32 %arg2, i32 inreg
 ; CHECK-NEXT:    tbuffer_store_format_xyzw v[2:5], v0, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:16 glc slc
 ; CHECK-NEXT:    s_endpgm
   %load = load <8 x float>, ptr addrspace(3) %arg4, align 4
-  %vec1 = shufflevector <8 x float> %load, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %vec1 = shufflevector <8 x float> %load, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %vec1, ptr addrspace(8) %arg1, i32 %arg2, i32 0, i32 %arg3, i32 77, i32 3)
-  %vec2 = shufflevector <8 x float> %load, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vec2 = shufflevector <8 x float> %load, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %vec2, ptr addrspace(8) %arg1, i32 %arg2, i32 16, i32 %arg3, i32 77, i32 3)
   ret void
 }
@@ -102,17 +102,17 @@ define amdgpu_vs void @test_3(i32 inreg %arg1, i32 inreg %arg2, ptr addrspace(8)
 ; CHECK-NEXT:    tbuffer_store_format_xy v[0:1], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:256 glc slc
 ; CHECK-NEXT:    s_endpgm
   %load1 = load <6 x float>, ptr addrspace(3) %arg5, align 4
-  %vec11 = shufflevector <6 x float> %load1, <6 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %vec11 = shufflevector <6 x float> %load1, <6 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %vec11, ptr addrspace(8) %arg3, i32 %arg1, i32 264, i32 %arg2, i32 77, i32 3)
-  %vec12 = shufflevector <6 x float> %load1, <6 x float> undef, <2 x i32> <i32 4, i32 5>
+  %vec12 = shufflevector <6 x float> %load1, <6 x float> poison, <2 x i32> <i32 4, i32 5>
   call void @llvm.amdgcn.struct.ptr.tbuffer.store.v2f32(<2 x float> %vec12, ptr addrspace(8) %arg3, i32 %arg1, i32 280, i32 %arg2, i32 64, i32 3)
 
   call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float undef, float undef, float undef, float undef, i1 false, i1 false)
 
   %load2 = load <6 x float>, ptr addrspace(3) %arg6, align 4
-  %vec21 = shufflevector <6 x float> %load2, <6 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %vec21 = shufflevector <6 x float> %load2, <6 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %vec21, ptr addrspace(8) %arg3, i32 %arg1, i32 240, i32 %arg2, i32 77, i32 3)
-  %vec22 = shufflevector <6 x float> %load2, <6 x float> undef, <2 x i32> <i32 4, i32 5>
+  %vec22 = shufflevector <6 x float> %load2, <6 x float> poison, <2 x i32> <i32 4, i32 5>
   call void @llvm.amdgcn.struct.ptr.tbuffer.store.v2f32(<2 x float> %vec22, ptr addrspace(8) %arg3, i32 %arg1, i32 256, i32 %arg2, i32 64, i32 3)
 
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll b/llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll
index 9c8aad177db65..dec86d41769f6 100644
--- a/llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll
@@ -36,7 +36,7 @@ bb1:                                              ; preds = %bb3, %bb
   %i3 = fmul float %i2, 1.000000e+00
   %i4 = fmul nsz <3 x float> %arg, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
   %i5 = insertelement <3 x float> poison, float %i3, i32 0
-  %i6 = shufflevector <3 x float> %i5, <3 x float> undef, <3 x i32> zeroinitializer
+  %i6 = shufflevector <3 x float> %i5, <3 x float> poison, <3 x i32> zeroinitializer
   %i7 = fmul <3 x float> %i4, %i6
   %i8 = fcmp oeq float %i3, 0.000000e+00
   br i1 %i8, label %bb3, label %bb2
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix.ll b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
index 4c2a16c17b38a..a4568b58661db 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
@@ -456,9 +456,9 @@ define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1,
 ; GISEL-CI-NEXT:    v_mad_f32 v0, v4, v0, v1
 ; GISEL-CI-NEXT:    v_mac_f32_e32 v1, v5, v2
 ; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
-  %src0.shuf = shufflevector <2 x half> %src0, <2 x half> undef, <2 x i32> <i32 1, i32 0>
-  %src1.shuf = shufflevector <2 x half> %src1, <2 x half> undef, <2 x i32> <i32 0, i32 1>
-  %src2.shuf = shufflevector <2 x half> %src2, <2 x half> undef, <2 x i32> <i32 1, i32 1>
+  %src0.shuf = shufflevector <2 x half> %src0, <2 x half> poison, <2 x i32> <i32 1, i32 0>
+  %src1.shuf = shufflevector <2 x half> %src1, <2 x half> poison, <2 x i32> <i32 0, i32 1>
+  %src2.shuf = shufflevector <2 x half> %src2, <2 x half> poison, <2 x i32> <i32 1, i32 1>
   %src0.ext = fpext <2 x half> %src0.shuf to <2 x float>
   %src1.ext = fpext <2 x half> %src1.shuf to <2 x float>
   %src2.ext = fpext <2 x half> %src2.shuf to <2 x float>
diff --git a/llvm/test/CodeGen/AMDGPU/mul_int24.ll b/llvm/test/CodeGen/AMDGPU/mul_int24.ll
index ad3f36dc3f216..11cf129b1e479 100644
--- a/llvm/test/CodeGen/AMDGPU/mul_int24.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul_int24.ll
@@ -795,8 +795,8 @@ bb:
   br i1 %cmp, label %bb11, label %bb7
 
 bb11:
-  %tmp14 = shufflevector <2 x i32> %arg1, <2 x i32> undef, <2 x i32> zeroinitializer
-  %tmp16 = shufflevector <2 x i32> %arg2, <2 x i32> undef, <2 x i32> zeroinitializer
+  %tmp14 = shufflevector <2 x i32> %arg1, <2 x i32> poison, <2 x i32> zeroinitializer
+  %tmp16 = shufflevector <2 x i32> %arg2, <2 x i32> poison, <2 x i32> zeroinitializer
   %tmp17 = shl <2 x i32> %tmp14, <i32 8, i32 8>
   %tmp18 = ashr <2 x i32> %tmp17, <i32 8, i32 8>
   %tmp19 = shl <2 x i32> %tmp16, <i32 8, i32 8>
diff --git a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
index a74db456ab6be..866abc10b5777 100644
--- a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
+++ b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
@@ -486,7 +486,7 @@ bb:
   %neg.scalar0 = fsub float -0.0, %scalar0
 
   %neg.scalar0.vec = insertelement <2 x float> poison, float %neg.scalar0, i32 0
-  %neg.scalar0.broadcast = shufflevector <2 x float> %neg.scalar0.vec, <2 x float> undef, <2 x i32> zeroinitializer
+  %neg.scalar0.broadcast = shufflevector <2 x float> %neg.scalar0.vec, <2 x float> poison, <2 x i32> zeroinitializer
 
   %result = fadd <2 x float> %vec0, %neg.scalar0.broadcast
   store <2 x float> %result, ptr addrspace(1) %out, align 4
@@ -526,7 +526,7 @@ bb:
   %vec0 = load volatile <2 x float>, ptr addrspace(3) %lds, align 8
   %lds.gep1 = getelementptr inbounds <2 x float>, ptr addrspace(3) %lds, i32 1
   %vec1 = load volatile <2 x float>, ptr addrspace(3) %lds.gep1, align 8
-  %vec1.swap = shufflevector <2 x float> %vec1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+  %vec1.swap = shufflevector <2 x float> %vec1, <2 x float> poison, <2 x i32> <i32 1, i32 0>
   %result = fadd <2 x float> %vec0, %vec1.swap
   store <2 x float> %result, ptr addrspace(1) %out, align 8
   ret void
@@ -543,7 +543,7 @@ bb:
   %f32 = load volatile float, ptr addrspace(3) undef, align 8
   %vec1 = load volatile <2 x float>, ptr addrspace(3) %lds.gep1, align 8
   %vec1.neg = fsub <2 x float> <float -0.0, float -0.0>, %vec1
-  %vec1.neg.swap = shufflevector <2 x float> %vec1.neg, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+  %vec1.neg.swap = shufflevector <2 x float> %vec1.neg, <2 x float> poison, <2 x i32> <i32 1, i32 0>
   %result = fadd <2 x float> %vec0, %vec1.neg.swap
   store <2 x float> %result, ptr addrspace(1) %out, align 8
   ret void
@@ -598,7 +598,7 @@ bb:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i32 %tid
   %in.1 = load <4 x float>, ptr addrspace(1) %gep
-  %shuf = shufflevector <4 x float> %in.1, <4 x float> undef, <4 x i32> zeroinitializer
+  %shuf = shufflevector <4 x float> %in.1, <4 x float> poison, <4 x i32> zeroinitializer
   %add.1 = fadd <4 x float> %in.1, %shuf
   store <4 x float> %add.1, ptr addrspace(1) %gep
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/packed-op-sel.ll b/llvm/test/CodeGen/AMDGPU/packed-op-sel.ll
index 5514cd3e0b367..081756a0b7816 100644
--- a/llvm/test/CodeGen/AMDGPU/packed-op-sel.ll
+++ b/llvm/test/CodeGen/AMDGPU/packed-op-sel.ll
@@ -24,7 +24,7 @@ bb:
   %scalar0 = load volatile half, ptr addrspace(3) %arg2, align 2
 
   %scalar0.vec = insertelement <2 x half> poison, half %scalar0, i32 0
-  %scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer
+  %scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> poison, <2 x i32> zeroinitializer
 
   %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %scalar0.broadcast)
   store <2 x half> %result, ptr addrspace(1) %out, align 4
@@ -55,7 +55,7 @@ bb:
   %scalar0 = load volatile half, ptr addrspace(3) %arg2, align 2
 
   %scalar0.vec = insertelement <2 x half> poison, half %scalar0, i32 0
-  %scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer
+  %scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> poison, <2 x i32> zeroinitializer
   %neg.scalar0.broadcast = fsub <2 x half> <half -0.0, half -0.0>, %scalar0.broadcast
 
   %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.scalar0.broadcast)
@@ -88,7 +88,7 @@ bb:
 
   %neg.scalar0 = fsub half -0.0, %scalar0
   %neg.scalar0.vec = insertelement <2 x half> poison, half %neg.scalar0, i32 0
-  %neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer
+  %neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> poison, <2 x i32> zeroinitializer
 
   %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.scalar0.broadcast)
   store <2 x half> %result, ptr addrspace(1) %out, align 4
@@ -120,7 +120,7 @@ bb:
 
   %neg.scalar0 = fsub half -0.0, %scalar0
   %neg.scalar0.vec = insertelement <2 x half> poison, half %neg.scalar0, i32 0
-  %neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer
+  %neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> poison, <2 x i32> zeroinitializer
   %neg.neg.scalar0.broadcast = fsub <2 x half> <half -0.0, half -0.0>, %neg.scalar0.broadcast
 
   %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.neg.scalar0.broadcast)
@@ -212,7 +212,7 @@ bb:
   %neg.scalar0.bc = bitcast half %neg.scalar0 to i16
 
   %neg.scalar0.vec = insertelement <2 x i16> poison, i16 %neg.scalar0.bc, i32 0
-  %neg.scalar0.broadcast = shufflevector <2 x i16> %neg.scalar0.vec, <2 x i16> undef, <2 x i32> zeroinitializer
+  %neg.scalar0.broadcast = shufflevector <2 x i16> %neg.scalar0.vec, <2 x i16> poison, <2 x i32> zeroinitializer
 
   %result = add <2 x i16> %vec0, %neg.scalar0.broadcast
   store <2 x i16> %result, ptr addrspace(1) %out, align 4
@@ -318,7 +318,7 @@ bb:
   %vec2 = load volatile <2 x half>, ptr addrspace(3) %lds.gep2, align 4
 
   %vec2.fneg = fsub <2 x half> <half -0.0, half -0.0>, %vec2
-  %vec2.fneg.elt1.broadcast = shufflevector <2 x half> %vec2.fneg, <2 x half> undef, <2 x i32> <i32 1, i32 1>
+  %vec2.fneg.elt1.broadcast = shufflevector <2 x half> %vec2.fneg, <2 x half> poison, <2 x i32> <i32 1, i32 1>
 
   %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %vec2.fneg.elt1.broadcast)
   store <2 x half> %result, ptr addrspace(1) %out, align 4
@@ -377,7 +377,7 @@ bb:
   %vec0 = load volatile <2 x i16>, ptr addrspace(3) %lds, align 4
   %vec1 = load volatile <2 x i16>, ptr addrspace(3) %lds.gep1, align 4
 
-  %vec1.elt1.broadcast = shufflevector <2 x i16> %vec1, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+  %vec1.elt1.broadcast = shufflevector <2 x i16> %vec1, <2 x i16> poison, <2 x i32> <i32 1, i32 1>
   %result = add <2 x i16> %vec0, %vec1.elt1.broadcast
 
   store <2 x i16> %result, ptr addrspace(1) %out, align 4
@@ -407,7 +407,7 @@ bb:
   %vec1 = load volatile <2 x half>, ptr addrspace(3) %lds.gep1, align 4
   %vec2 = load volatile <2 x half>, ptr addrspace(3) %lds.gep2, align 4
 
-  %vec2.elt1.broadcast = shufflevector <2 x half> %vec2, <2 x half> undef, <2 x i32> <i32 1, i32 1>
+  %vec2.elt1.broadcast = shufflevector <2 x half> %vec2, <2 x half> poison, <2 x i32> <i32 1, i32 1>
 
   %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %vec2.elt1.broadcast)
 
@@ -471,7 +471,7 @@ bb:
   %vec1 = load volatile <2 x half>, ptr addrspace(3) %lds.gep1, align 4
   %vec2 = load volatile <2 x half>, ptr addrspace(3) %lds.gep2, align 4
 
-  %vec2.swap = shufflevector <2 x half> %vec2, <2 x half> undef, <2 x i32> <i32 1, i32 0>
+  %vec2.swap = shufflevector <2 x half> %vec2, <2 x half> poison, <2 x i32> <i32 1, i32 0>
   %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %vec2.swap)
 
   store <2 x half> %result, ptr addrspace(1) %out, align 4
@@ -502,7 +502,7 @@ bb:
   %vec2 = load volatile <2 x half>, ptr addrspace(3) %lds.gep2, align 4
   %neg.vec2 = fsub <2 x half> <half -0.0, half -0.0>, %vec2
 
-  %neg.vec2.swap = shufflevector <2 x half> %neg.vec2, <2 x half> undef, <2 x i32> <i32 1, i32 0>
+  %neg.vec2.swap = shufflevector <2 x half> %neg.vec2, <2 x half> poison, <2 x i32> <i32 1, i32 0>
   %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.vec2.swap)
 
   store <2 x half> %result, ptr addrspace(1) %out, align 4
@@ -678,7 +678,7 @@ bb:
   %f32 = load volatile float, ptr addrspace(3) undef, align 4
   %neg.f32 = fsub float -0.0, %f32
   %bc = bitcast float %neg.f32 to <2 x half>
-  %shuf = shufflevector <2 x half> %bc, <2 x half> undef, <2 x i32> <i32 1, i32 0>
+  %shuf = shufflevector <2 x half> %bc, <2 x half> poison, <2 x i32> <i32 1, i32 0>
   %result = fadd <2 x half> %vec0, %shuf
   store <2 x half> %result, ptr addrspace(1) %out, align 4
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/permute_i8.ll b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
index 312dfa3717c77..5e867deb56ebe 100644
--- a/llvm/test/CodeGen/AMDGPU/permute_i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
@@ -342,7 +342,7 @@ define hidden void @shuffle7330ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 7, i32 3, i32 3, i32 0>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 7, i32 3, i32 3, i32 0>
   store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
   ret void
 }
@@ -367,7 +367,7 @@ define hidden void @shuffle5341ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 5, i32 3, i32 4, i32 1>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 5, i32 3, i32 4, i32 1>
   store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
   ret void
 }
@@ -393,7 +393,7 @@ define hidden void @shuffle6106ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 6, i32 1, i32 0, i32 6>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 6, i32 1, i32 0, i32 6>
   store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
   ret void
 }
@@ -420,7 +420,7 @@ define hidden void @shuffle4327ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 4, i32 3, i32 2, i32 7>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 4, i32 3, i32 2, i32 7>
   store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
   ret void
 }
@@ -446,7 +446,7 @@ define hidden void @shuffle3263ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 6, i32 3>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 6, i32 3>
   store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
   ret void
 }
@@ -472,7 +472,7 @@ define hidden void @shuffle2763ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 2, i32 7, i32 6, i32 3>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 2, i32 7, i32 6, i32 3>
   store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
   ret void
 }
@@ -498,7 +498,7 @@ define hidden void @shuffle1327ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 7>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 1, i32 3, i32 2, i32 7>
   store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
   ret void
 }
@@ -524,7 +524,7 @@ define hidden void @shuffle0605ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 0, i32 6, i32 0, i32 5>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 0, i32 6, i32 0, i32 5>
   store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
   ret void
 }
@@ -554,7 +554,7 @@ define hidden void @insertUsesOr(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
   %vec1 = load <4 x i8>, ptr addrspace(1) %in1, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
   %vecins = insertelement <4 x i8> %shuffle0_0, i8 %elt, i32 1
   store <4 x i8> %vecins, ptr addrspace(1) %out0
   ret void
@@ -598,7 +598,7 @@ define hidden void @addUsesOr(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i8 %
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
   %vec1 = load <4 x i8>, ptr addrspace(1) %in1, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 7, i32 0, i32 6, i32 3>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 7, i32 0, i32 6, i32 3>
   %added = add <4 x i8> %shuffle0_0, %vec1
   store <4 x i8> %added, ptr addrspace(1) %out0
   ret void
@@ -783,7 +783,7 @@ define hidden void @add_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i8 %el
   %gep1 = getelementptr <4 x i8>, ptr addrspace(1) %in1, i32 %tid
   %vec0 = load <4 x i8>, ptr addrspace(1) %gep0, align 4
   %vec1 = load <4 x i8>, ptr addrspace(1) %gep1, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
   %vecins = add <4 x i8> %shuffle0_0, %vec1
   store <4 x i8> %vecins, ptr addrspace(1) %out0
   ret void
@@ -835,7 +835,7 @@ define hidden void @add_store(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i8 %
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
   %vec1 = load <4 x i8>, ptr addrspace(1) %in1, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
   %vecins = add <4 x i8> %shuffle0_0, %vec1
   store <4 x i8> %vecins, ptr addrspace(1) %out0
   store <4 x i8> %shuffle0_0, ptr addrspace(1) %out1
@@ -903,7 +903,7 @@ define hidden void @add_store_div_16(ptr addrspace(1) %in0, ptr addrspace(1) %in
   %gep1 = getelementptr <4 x i8>, ptr addrspace(1) %in1, i32 %tid
   %vec0 = load <4 x i8>, ptr addrspace(1) %gep0, align 4
   %vec1 = load <4 x i8>, ptr addrspace(1) %gep1, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
   %vecins = add <4 x i8> %shuffle0_0, %vec1
   store <4 x i8> %vecins, ptr addrspace(1) %out0
   store <4 x i8> %shuffle0_0, ptr addrspace(1) %out1
diff --git a/llvm/test/CodeGen/AMDGPU/reduction.ll b/llvm/test/CodeGen/AMDGPU/reduction.ll
index 53a036b617725..cd4b3c1d1b546 100644
--- a/llvm/test/CodeGen/AMDGPU/reduction.ll
+++ b/llvm/test/CodeGen/AMDGPU/reduction.ll
@@ -10,9 +10,9 @@
 ; VI-NEXT: v_add_f16_e32
 define half @reduction_fadd_v4f16(<4 x half> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
   %bin.rdx = fadd <4 x half> %vec4, %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
   %bin.rdx2 = fadd <4 x half> %bin.rdx, %rdx.shuf1
   %res = extractelement <4 x half> %bin.rdx2, i32 0
   ret half %res
@@ -30,9 +30,9 @@ entry:
 ; VI-NEXT: s_setpc_b64
 define half @reduction_fsub_v4f16(<4 x half> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
   %bin.rdx = fsub <4 x half> %vec4, %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
   %bin.rdx2 = fsub <4 x half> %bin.rdx, %rdx.shuf1
   %res = extractelement <4 x half> %bin.rdx2, i32 0
   ret half %res
@@ -52,9 +52,9 @@ entry:
 ; VI-NEXT: s_setpc_b64
 define half @reduction_fsub_v4f16_preserve_fmf(<4 x half> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
   %bin.rdx = fsub nsz <4 x half> %vec4, %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
   %bin.rdx2 = fsub nsz <4 x half> %bin.rdx, %rdx.shuf1
   %res = extractelement <4 x half> %bin.rdx2, i32 0
   %neg.res = fsub half -0.0, %res
@@ -70,9 +70,9 @@ entry:
 ; VI-NEXT: v_mul_f16_e32
 define half @reduction_fmul_half4(<4 x half> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
   %bin.rdx = fmul <4 x half> %vec4, %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
   %bin.rdx2 = fmul <4 x half> %bin.rdx, %rdx.shuf1
   %res = extractelement <4 x half> %bin.rdx2, i32 0
   ret half %res
@@ -87,9 +87,9 @@ entry:
 ; VI-NEXT: v_add_u16_e32
 define i16 @reduction_v4i16(<4 x i16> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
   %bin.rdx = add <4 x i16> %vec4, %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x i16> %bin.rdx, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x i16> %bin.rdx, <4 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
   %bin.rdx2 = add <4 x i16> %bin.rdx, %rdx.shuf1
   %res = extractelement <4 x i16> %bin.rdx2, i32 0
   ret i16 %res
@@ -111,11 +111,11 @@ entry:
 
 define half @reduction_half8(<8 x half> %vec8) {
 entry:
-  %rdx.shuf = shufflevector <8 x half> %vec8, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <8 x half> %vec8, <8 x half> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
   %bin.rdx = fadd <8 x half> %vec8, %rdx.shuf
-  %rdx.shuf1 = shufflevector <8 x half> %bin.rdx, <8 x half> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <8 x half> %bin.rdx, <8 x half> poison, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
   %bin.rdx2 = fadd <8 x half> %bin.rdx, %rdx.shuf1
-  %rdx.shuf3 = shufflevector <8 x half> %bin.rdx2, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf3 = shufflevector <8 x half> %bin.rdx2, <8 x half> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
   %bin.rdx4 = fadd <8 x half> %bin.rdx2, %rdx.shuf3
   %res = extractelement <8 x half> %bin.rdx4, i32 0
   ret half %res
@@ -137,11 +137,11 @@ entry:
 
 define i16 @reduction_v8i16(<8 x i16> %vec8) {
 entry:
-  %rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
   %bin.rdx = add <8 x i16> %vec8, %rdx.shuf
-  %rdx.shuf1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
   %bin.rdx2 = add <8 x i16> %bin.rdx, %rdx.shuf1
-  %rdx.shuf3 = shufflevector <8 x i16> %bin.rdx2, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf3 = shufflevector <8 x i16> %bin.rdx2, <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
   %bin.rdx4 = add <8 x i16> %bin.rdx2, %rdx.shuf3
   %res = extractelement <8 x i16> %bin.rdx4, i32 0
   ret i16 %res
@@ -175,13 +175,13 @@ entry:
 
 define half @reduction_half16(<16 x half> %vec16) {
 entry:
-  %rdx.shuf = shufflevector <16 x half> %vec16, <16 x half> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <16 x half> %vec16, <16 x half> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
   %bin.rdx = fadd <16 x half> %vec16, %rdx.shuf
-  %rdx.shuf1 = shufflevector <16 x half> %bin.rdx, <16 x half> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <16 x half> %bin.rdx, <16 x half> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
   %bin.rdx2 = fadd <16 x half> %bin.rdx, %rdx.shuf1
-  %rdx.shuf3 = shufflevector <16 x half> %bin.rdx2, <16 x half> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf3 = shufflevector <16 x half> %bin.rdx2, <16 x half> poison, <16 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
   %bin.rdx4 = fadd <16 x half> %bin.rdx2, %rdx.shuf3
-  %rdx.shuf5 = shufflevector <16 x half> %bin.rdx4, <16 x half> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf5 = shufflevector <16 x half> %bin.rdx4, <16 x half> poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
   %bin.rdx6 = fadd <16 x half> %bin.rdx4, %rdx.shuf5
   %res = extractelement <16 x half> %bin.rdx6, i32 0
   ret half %res
@@ -196,10 +196,10 @@ entry:
 ; VI-NEXT: v_min_u16_e32
 define i16 @reduction_min_v4i16(<4 x i16> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
   %rdx.minmax.cmp = icmp ult <4 x i16> %vec4, %rdx.shuf
   %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
   %rdx.minmax.cmp2 = icmp ult <4 x i16> %rdx.minmax.select, %rdx.shuf1
   %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1
   %res = extractelement <4 x i16> %rdx.minmax.select3, i32 0
@@ -221,13 +221,13 @@ entry:
 ; VI-NEXT: v_min_u16_e32
 define i16 @reduction_umin_v8i16(<8 x i16> %vec8) {
 entry:
-  %rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
   %rdx.minmax.cmp = icmp ult <8 x i16> %vec8, %rdx.shuf
   %rdx.minmax.select = select <8 x i1> %rdx.minmax.cmp, <8 x i16> %vec8, <8 x i16> %rdx.shuf
-  %rdx.shuf1 = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
   %rdx.minmax.cmp2 = icmp ult <8 x i16> %rdx.minmax.select, %rdx.shuf1
   %rdx.minmax.select3 = select <8 x i1> %rdx.minmax.cmp2, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf1
-  %rdx.shuf4 = shufflevector <8 x i16> %rdx.minmax.select3, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf4 = shufflevector <8 x i16> %rdx.minmax.select3, <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
   %rdx.minmax.cmp5 = icmp ult <8 x i16> %rdx.minmax.select3, %rdx.shuf4
   %rdx.minmax.select6 = select <8 x i1> %rdx.minmax.cmp5, <8 x i16> %rdx.minmax.select3, <8 x i16> %rdx.shuf4
   %res = extractelement <8 x i16> %rdx.minmax.select6, i32 0
@@ -301,16 +301,16 @@ entry:
 ; VI-NEXT: v_min_i16_e32
 define i16 @reduction_smin_v16i16(<16 x i16> %vec16) {
 entry:
-  %rdx.shuf = shufflevector <16 x i16> %vec16, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <16 x i16> %vec16, <16 x i16> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
   %rdx.minmax.cmp = icmp slt <16 x i16> %vec16, %rdx.shuf
   %rdx.minmax.select = select <16 x i1> %rdx.minmax.cmp, <16 x i16> %vec16, <16 x i16> %rdx.shuf
-  %rdx.shuf1 = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
   %rdx.minmax.cmp2 = icmp slt <16 x i16> %rdx.minmax.select, %rdx.shuf1
   %rdx.minmax.select3 = select <16 x i1> %rdx.minmax.cmp2, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf1
-  %rdx.shuf4 = shufflevector <16 x i16> %rdx.minmax.select3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf4 = shufflevector <16 x i16> %rdx.minmax.select3, <16 x i16> poison, <16 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
   %rdx.minmax.cmp5 = icmp slt <16 x i16> %rdx.minmax.select3, %rdx.shuf4
   %rdx.minmax.select6 = select <16 x i1> %rdx.minmax.cmp5, <16 x i16> %rdx.minmax.select3, <16 x i16> %rdx.shuf4
-  %rdx.shuf7 = shufflevector <16 x i16> %rdx.minmax.select6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf7 = shufflevector <16 x i16> %rdx.minmax.select6, <16 x i16> poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
   %rdx.minmax.cmp8 = icmp slt <16 x i16> %rdx.minmax.select6, %rdx.shuf7
   %rdx.minmax.select9 = select <16 x i1> %rdx.minmax.cmp8, <16 x i16> %rdx.minmax.select6, <16 x i16> %rdx.shuf7
   %res = extractelement <16 x i16> %rdx.minmax.select9, i32 0
@@ -404,10 +404,10 @@ entry:
 ; VI-NEXT: v_max_u16_e32
 define i16 @reduction_umax_v4i16(<4 x i16> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
   %rdx.minmax.cmp = icmp ugt <4 x i16> %vec4, %rdx.shuf
   %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
   %rdx.minmax.cmp2 = icmp ugt <4 x i16> %rdx.minmax.select, %rdx.shuf1
   %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1
   %res = extractelement <4 x i16> %rdx.minmax.select3, i32 0
@@ -423,10 +423,10 @@ entry:
 ; VI-NEXT: v_max_i16_e32
 define i16 @reduction_smax_v4i16(<4 x i16> %vec4) #0 {
 entry:
-  %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
   %rdx.minmax.cmp = icmp sgt <4 x i16> %vec4, %rdx.shuf
   %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
   %rdx.minmax.cmp2 = icmp sgt <4 x i16> %rdx.minmax.select, %rdx.shuf1
   %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1
   %res = extractelement <4 x i16> %rdx.minmax.select3, i32 0
@@ -451,9 +451,9 @@ entry:
 ; VI: v_max_f16_e32 v0, [[MAX1]], [[MAX0]]
 define half @reduction_maxnum_v4f16(<4 x half> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
   %rdx.minmax = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %vec4, <4 x half> %rdx.shuf)
-  %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
   %rdx.minmax3 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %rdx.minmax, <4 x half> %rdx.shuf1)
   %res = extractelement <4 x half> %rdx.minmax3, i32 0
   ret half %res
@@ -476,9 +476,9 @@ entry:
 ; VI: v_min_f16_e32 v0, [[MAX1]], [[MAX0]]
 define half @reduction_minnum_v4f16(<4 x half> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
   %rdx.minmax = call <4 x half> @llvm.minnum.v4f16(<4 x half> %vec4, <4 x half> %rdx.shuf)
-  %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
   %rdx.minmax3 = call <4 x half> @llvm.minnum.v4f16(<4 x half> %rdx.minmax, <4 x half> %rdx.shuf1)
   %res = extractelement <4 x half> %rdx.minmax3, i32 0
   ret half %res
@@ -513,10 +513,10 @@ entry:
 ; VI: v_max_f16_e32 v0, [[MAX1]], [[MAX0]]
 define half @reduction_fast_max_pattern_v4f16(<4 x half> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
   %rdx.minmax.cmp = fcmp nnan nsz ogt <4 x half> %vec4, %rdx.shuf
   %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
   %rdx.minmax.cmp2 = fcmp nnan nsz ogt <4 x half> %rdx.minmax.select, %rdx.shuf1
   %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
   %res = extractelement <4 x half> %rdx.minmax.select3, i32 0
@@ -552,10 +552,10 @@ entry:
 ; VI: v_min_f16_e32 v0, [[MAX1]], [[MAX0]]
 define half @reduction_fast_min_pattern_v4f16(<4 x half> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
   %rdx.minmax.cmp = fcmp nnan nsz olt <4 x half> %vec4, %rdx.shuf
   %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
   %rdx.minmax.cmp2 = fcmp nnan nsz olt <4 x half> %rdx.minmax.select, %rdx.shuf1
   %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
   %res = extractelement <4 x half> %rdx.minmax.select3, i32 0
diff --git a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll
index b744d3357b8ab..90b1eb5500f4c 100644
--- a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll
+++ b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll
@@ -67,7 +67,7 @@ define amdgpu_kernel void @scalar_to_vector_v2i32(ptr addrspace(1) %out, ptr add
 ; GFX9-NEXT:    s_endpgm
   %tmp1 = load i32, ptr addrspace(1) %in, align 4
   %bc = bitcast i32 %tmp1 to <2 x i16>
-  %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   store <4 x i16> %tmp2, ptr addrspace(1) %out, align 8
   ret void
 }
@@ -135,7 +135,7 @@ define amdgpu_kernel void @scalar_to_vector_v2f32(ptr addrspace(1) %out, ptr add
 ; GFX9-NEXT:    s_endpgm
   %tmp1 = load float, ptr addrspace(1) %in, align 4
   %bc = bitcast float %tmp1 to <2 x i16>
-  %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   store <4 x i16> %tmp2, ptr addrspace(1) %out, align 8
   ret void
 }
@@ -193,7 +193,7 @@ define amdgpu_kernel void @scalar_to_vector_v4i16() {
 bb:
   %tmp = load <2 x i8>, ptr addrspace(1) undef, align 1
   %tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
+  %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> poison, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
   store <8 x i8> %tmp2, ptr addrspace(1) undef, align 8
   ret void
 }
@@ -262,7 +262,7 @@ bb:
   %load = load half, ptr addrspace(1) undef, align 1
   %tmp = bitcast half %load to <2 x i8>
   %tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
+  %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> poison, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
   store <8 x i8> %tmp2, ptr addrspace(1) undef, align 8
   ret void
 }
@@ -275,7 +275,7 @@ bb:
 ;   %tmp1 = load i32, ptr addrspace(1) %in, align 4
 ;   %bc = bitcast i32 %tmp1 to <4 x i8>
 
-;   %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+;   %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 ;   store <8 x i8> %tmp2, ptr addrspace(1) %out, align 4
 ;   ret void
 ; }
diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
index 4993df7e1ba48..6a88be6e55859 100644
--- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
+++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
@@ -56,13 +56,13 @@ declare <2 x float> @_Z3cosDv2_f(<2 x float>)
 define amdgpu_kernel void @test_sincos_v3(ptr addrspace(1) nocapture %a) {
 entry:
   %loadVec4 = load <4 x float>, ptr addrspace(1) %a, align 16
-  %extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
   %call = call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4)
-  %extractVec6 = shufflevector <3 x float> %call, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %extractVec6 = shufflevector <3 x float> %call, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   store <4 x float> %extractVec6, ptr addrspace(1) %a, align 16
   %call11 = call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4)
   %arrayidx12 = getelementptr inbounds <3 x float>, ptr addrspace(1) %a, i64 1
-  %extractVec13 = shufflevector <3 x float> %call11, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %extractVec13 = shufflevector <3 x float> %call11, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
   store <4 x float> %extractVec13, ptr addrspace(1) %arrayidx12, align 16
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
index 6d91c33fd2876..109c7d638f924 100644
--- a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
@@ -52,7 +52,7 @@ define hidden <4 x float> @split_v4f32_multi_arg(<4 x float> %arg0, <2 x float>
 ; GCN:         .cfi_endproc
   call void @llvm.dbg.value(metadata <4 x float> %arg0, metadata !29, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !31
   call void @llvm.dbg.value(metadata <2 x float> %arg1, metadata !30, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !31
-  %tmp = shufflevector <2 x float> %arg1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>, !dbg !32
+  %tmp = shufflevector <2 x float> %arg1, <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>, !dbg !32
   %add = fadd <4 x float> %tmp, %arg0, !dbg !33
   ret <4 x float> %add, !dbg !34
 }
diff --git a/llvm/test/CodeGen/AMDGPU/trunc-store-vec-i16-to-i8.ll b/llvm/test/CodeGen/AMDGPU/trunc-store-vec-i16-to-i8.ll
index 7cb30fc8247e9..18bcd6cf880a4 100644
--- a/llvm/test/CodeGen/AMDGPU/trunc-store-vec-i16-to-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/trunc-store-vec-i16-to-i8.ll
@@ -15,8 +15,8 @@ entry:
 define protected amdgpu_kernel void @short2_char4(ptr addrspace(1) %out) {
 entry:
   %tmp = load <2 x i16>, ptr addrspace(1) undef, align 4
-  %vecinit = shufflevector <2 x i16> %tmp, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-  %vecinit2 = shufflevector <4 x i16> %vecinit, <4 x i16> <i16 undef, i16 undef, i16 0, i16 0>, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  %vecinit = shufflevector <2 x i16> %tmp, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+  %vecinit2 = shufflevector <4 x i16> %vecinit, <4 x i16> <i16 poison, i16 poison, i16 0, i16 0>, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
   %tmp1 = trunc <4 x i16> %vecinit2 to <4 x i8>
   store <4 x i8> %tmp1, ptr addrspace(1) %out, align 4
   ret void
@@ -27,8 +27,8 @@ entry:
 define protected amdgpu_kernel void @short4_char8(ptr addrspace(1) %out) {
 entry:
   %tmp = load <4 x i16>, ptr addrspace(1) undef, align 8
-  %vecinit = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-  %vecinit2 = shufflevector <8 x i16> %vecinit, <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0, i16 0, i16 0>, <8 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
+  %vecinit = shufflevector <4 x i16> %tmp, <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+  %vecinit2 = shufflevector <8 x i16> %vecinit, <8 x i16> <i16 poison, i16 poison, i16 poison, i16 poison, i16 0, i16 0, i16 0, i16 0>, <8 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
   %tmp1 = trunc <8 x i16> %vecinit2 to <8 x i8>
   store <8 x i8> %tmp1, ptr addrspace(1) %out, align 8
   ret void
@@ -39,8 +39,8 @@ entry:
 define protected amdgpu_kernel void @short8_char16(ptr addrspace(1) %out) {
 entry:
   %tmp = load <8 x i16>, ptr addrspace(1) undef, align 16
-  %vecinit = shufflevector <8 x i16> %tmp, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %vecinit2 = shufflevector <16 x i16> %vecinit, <16 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <16 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
+  %vecinit = shufflevector <8 x i16> %tmp, <8 x i16> poison, <16 x i32> <i32 0, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %vecinit2 = shufflevector <16 x i16> %vecinit, <16 x i16> <i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <16 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
   %tmp1 = trunc <16 x i16> %vecinit2 to <16 x i8>
   store <16 x i8> %tmp1, ptr addrspace(1) %out, align 16
   ret void
@@ -52,8 +52,8 @@ entry:
 define protected amdgpu_kernel void @short16_char32(ptr addrspace(1) %out) {
 entry:
   %tmp = load <16 x i16>, ptr addrspace(1) undef, align 32
-  %vecinit = shufflevector <16 x i16> %tmp, <16 x i16> undef, <32 x i32> <i32 0, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %vecinit2 = shufflevector <32 x i16> %vecinit, <32 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 1, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <32 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
+  %vecinit = shufflevector <16 x i16> %tmp, <16 x i16> poison, <32 x i32> <i32 0, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %vecinit2 = shufflevector <32 x i16> %vecinit, <32 x i16> <i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 0, i16 1, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 poison, i16 poison, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <32 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
   %tmp1 = trunc <32 x i16> %vecinit2 to <32 x i8>
   store <32 x i8> %tmp1, ptr addrspace(1) %out, align 32
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
index 0ab09354ec06b..bc8d0d1f8cd8f 100644
--- a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
@@ -27,7 +27,7 @@ define <4 x half> @shuffle_v4f16_23uu(ptr addrspace(1) %arg0, ptr addrspace(1) %
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %val0 = load <4 x half>, ptr addrspace(1) %arg0
   %val1 = load <4 x half>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
   ret <4 x half> %shuffle
 }
 
@@ -74,7 +74,7 @@ define <4 x half> @shuffle_v4f16_234u(ptr addrspace(1) %arg0, ptr addrspace(1) %
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %val0 = load <4 x half>, ptr addrspace(1) %arg0
   %val1 = load <4 x half>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 undef>
+  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 poison>
   ret <4 x half> %shuffle
 }
 
@@ -101,7 +101,7 @@ define <4 x half> @shuffle_v4f16_u1u3(ptr addrspace(1) %arg0, ptr addrspace(1) %
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %val0 = load <4 x half>, ptr addrspace(1) %arg0
   %val1 = load <4 x half>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 undef, i32 1, i32 undef, i32 3>
+  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 poison, i32 1, i32 poison, i32 3>
   ret <4 x half> %shuffle
 }
 
@@ -140,7 +140,7 @@ define <4 x half> @shuffle_v4f16_u3u1(ptr addrspace(1) %arg0, ptr addrspace(1) %
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %val0 = load <4 x half>, ptr addrspace(1) %arg0
   %val1 = load <4 x half>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 undef, i32 3, i32 undef, i32 1>
+  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 poison, i32 3, i32 poison, i32 1>
   ret <4 x half> %shuffle
 }
 
@@ -167,7 +167,7 @@ define <4 x half> @shuffle_v4f16_u3uu(ptr addrspace(1) %arg0, ptr addrspace(1) %
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %val0 = load <4 x half>, ptr addrspace(1) %arg0
   %val1 = load <4 x half>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
+  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
   ret <4 x half> %shuffle
 }
 
@@ -216,7 +216,7 @@ define <4 x half> @shuffle_v4f16_3u6u(ptr addrspace(1) %arg0, ptr addrspace(1) %
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %val0 = load <4 x half>, ptr addrspace(1) %arg0
   %val1 = load <4 x half>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 undef, i32 6, i32 undef>
+  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 poison, i32 6, i32 poison>
   ret <4 x half> %shuffle
 }
 
@@ -265,7 +265,7 @@ define <4 x half> @shuffle_v4f16_3uu7(ptr addrspace(1) %arg0, ptr addrspace(1) %
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %val0 = load <4 x half>, ptr addrspace(1) %arg0
   %val1 = load <4 x half>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 undef, i32 undef, i32 7>
+  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 poison, i32 poison, i32 7>
   ret <4 x half> %shuffle
 }
 
@@ -312,7 +312,7 @@ define <4 x half> @shuffle_v4f16_35u5(ptr addrspace(1) %arg0, ptr addrspace(1) %
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %val0 = load <4 x half>, ptr addrspace(1) %arg0
   %val1 = load <4 x half>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 5, i32 undef, i32 5>
+  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 5, i32 poison, i32 5>
   ret <4 x half> %shuffle
 }
 
@@ -364,7 +364,7 @@ define <4 x half> @shuffle_v4f16_357u(ptr addrspace(1) %arg0, ptr addrspace(1) %
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %val0 = load <4 x half>, ptr addrspace(1) %arg0
   %val1 = load <4 x half>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 5, i32 7, i32 undef>
+  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 5, i32 7, i32 poison>
   ret <4 x half> %shuffle
 }
 
@@ -1881,21 +1881,21 @@ entry:
   %tmp15 = load <4 x half>, ptr addrspace(1) %arrayidx1, align 8
   %arrayidx2 = getelementptr inbounds <4 x half>, ptr addrspace(1) %C, i64 %tmp12
   %tmp16 = load <4 x half>, ptr addrspace(1) %arrayidx2, align 8
-  %tmp17 = shufflevector <4 x half> %tmp14, <4 x half> undef, <2 x i32> zeroinitializer
-  %tmp18 = shufflevector <4 x half> %tmp15, <4 x half> undef, <2 x i32> <i32 0, i32 1>
-  %tmp19 = shufflevector <4 x half> %tmp16, <4 x half> undef, <2 x i32> <i32 0, i32 1>
+  %tmp17 = shufflevector <4 x half> %tmp14, <4 x half> poison, <2 x i32> zeroinitializer
+  %tmp18 = shufflevector <4 x half> %tmp15, <4 x half> poison, <2 x i32> <i32 0, i32 1>
+  %tmp19 = shufflevector <4 x half> %tmp16, <4 x half> poison, <2 x i32> <i32 0, i32 1>
   %tmp20 = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %tmp17, <2 x half> %tmp18, <2 x half> %tmp19)
-  %tmp21 = shufflevector <4 x half> %tmp14, <4 x half> undef, <2 x i32> <i32 1, i32 1>
-  %tmp22 = shufflevector <4 x half> %tmp15, <4 x half> undef, <2 x i32> <i32 2, i32 3>
+  %tmp21 = shufflevector <4 x half> %tmp14, <4 x half> poison, <2 x i32> <i32 1, i32 1>
+  %tmp22 = shufflevector <4 x half> %tmp15, <4 x half> poison, <2 x i32> <i32 2, i32 3>
   %tmp23 = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %tmp21, <2 x half> %tmp22, <2 x half> %tmp20)
-  %tmp24 = shufflevector <2 x half> %tmp23, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %tmp24 = shufflevector <2 x half> %tmp23, <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
   %tmp25 = shufflevector <4 x half> %tmp24, <4 x half> %tmp16, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-  %tmp26 = shufflevector <4 x half> %tmp14, <4 x half> undef, <2 x i32> <i32 2, i32 2>
-  %tmp27 = shufflevector <4 x half> %tmp25, <4 x half> undef, <2 x i32> <i32 2, i32 3>
+  %tmp26 = shufflevector <4 x half> %tmp14, <4 x half> poison, <2 x i32> <i32 2, i32 2>
+  %tmp27 = shufflevector <4 x half> %tmp25, <4 x half> poison, <2 x i32> <i32 2, i32 3>
   %tmp28 = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %tmp26, <2 x half> %tmp18, <2 x half> %tmp27)
-  %tmp29 = shufflevector <4 x half> %tmp14, <4 x half> undef, <2 x i32> <i32 3, i32 3>
+  %tmp29 = shufflevector <4 x half> %tmp14, <4 x half> poison, <2 x i32> <i32 3, i32 3>
   %tmp30 = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %tmp29, <2 x half> %tmp22, <2 x half> %tmp28)
-  %tmp31 = shufflevector <2 x half> %tmp30, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %tmp31 = shufflevector <2 x half> %tmp30, <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
   %tmp32 = shufflevector <4 x half> %tmp25, <4 x half> %tmp31, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   store <4 x half> %tmp32, ptr addrspace(1) %arrayidx2, align 8
   ret void
@@ -2006,7 +2006,7 @@ define amdgpu_kernel void @shuffle_scalar_load_v8i32_0123(ptr addrspace(4) %in,
 ; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[2:3]
 ; GFX11-NEXT:    s_endpgm
   %ld8 = load <8 x i32>, ptr addrspace(4) %in, align 16
-  %id = shufflevector <8 x i32> %ld8, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %id = shufflevector <8 x i32> %ld8, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   store <4 x i32> %id, ptr addrspace(1) %out, align 8
   ret void
 }
@@ -2052,7 +2052,7 @@ define <2 x half> @low16bits_v2f16(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
 entry:
   %0 = load <2 x half>, ptr addrspace(1) %x0, align 4
   %1 = load <2 x half>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 0, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 0, i32 poison>
   %vy1.2.vec.insert = shufflevector <2 x half> %vy1.0.vec.insert, <2 x half> %1, <2 x i32> <i32 0, i32 2>
   ret <2 x half> %vy1.2.vec.insert
 }
@@ -2098,7 +2098,7 @@ define <2 x half> @hi16bits_v2f16(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
 entry:
   %0 = load <2 x half>, ptr addrspace(1) %x0, align 4
   %1 = load <2 x half>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 1, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 1, i32 poison>
   %vy1.2.vec.insert = shufflevector <2 x half> %vy1.0.vec.insert, <2 x half> %1, <2 x i32> <i32 0, i32 3>
   ret <2 x half> %vy1.2.vec.insert
 }
@@ -2144,7 +2144,7 @@ define <2 x half> @low16hi16bits_v2f16(ptr addrspace(1) %x0, ptr addrspace(1) %x
 entry:
   %0 = load <2 x half>, ptr addrspace(1) %x0, align 4
   %1 = load <2 x half>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 0, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 0, i32 poison>
   %vy1.2.vec.insert = shufflevector <2 x half> %vy1.0.vec.insert, <2 x half> %1, <2 x i32> <i32 0, i32 3>
   ret <2 x half> %vy1.2.vec.insert
 }
@@ -2179,7 +2179,7 @@ define <2 x half> @hi16low16bits_v2bf16(ptr addrspace(1) %x0, ptr addrspace(1) %
 entry:
   %0 = load <2 x half>, ptr addrspace(1) %x0, align 4
   %1 = load <2 x half>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 1, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 1, i32 poison>
   %vy1.2.vec.insert = shufflevector <2 x half> %vy1.0.vec.insert, <2 x half> %1, <2 x i32> <i32 0, i32 2>
   ret <2 x half> %vy1.2.vec.insert
 }
@@ -2225,7 +2225,7 @@ define <2 x i16> @i16_low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
 entry:
   %0 = load <2 x i16>, ptr addrspace(1) %x0, align 4
   %1 = load <2 x i16>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 0, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 0, i32 poison>
   %vy1.2.vec.insert = shufflevector <2 x i16> %vy1.0.vec.insert, <2 x i16> %1, <2 x i32> <i32 0, i32 2>
   ret <2 x i16> %vy1.2.vec.insert
 }
@@ -2271,7 +2271,7 @@ define <2 x i16> @i16_low16hi16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1)
 entry:
   %0 = load <2 x i16>, ptr addrspace(1) %x0, align 4
   %1 = load <2 x i16>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 0, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 0, i32 poison>
   %vy1.2.vec.insert = shufflevector <2 x i16> %vy1.0.vec.insert, <2 x i16> %1, <2 x i32> <i32 0, i32 3>
   ret <2 x i16> %vy1.2.vec.insert
 }
@@ -2306,7 +2306,7 @@ define <2 x i16> @i16_hi16low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1)
 entry:
   %0 = load <2 x i16>, ptr addrspace(1) %x0, align 4
   %1 = load <2 x i16>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 1, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
   %vy1.2.vec.insert = shufflevector <2 x i16> %vy1.0.vec.insert, <2 x i16> %1, <2 x i32> <i32 0, i32 2>
   ret <2 x i16> %vy1.2.vec.insert
 }
@@ -2352,7 +2352,7 @@ define <2 x i16> @i16_hi16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
 entry:
   %0 = load <2 x i16>, ptr addrspace(1) %x0, align 4
   %1 = load <2 x i16>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 1, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
   %vy1.2.vec.insert = shufflevector <2 x i16> %vy1.0.vec.insert, <2 x i16> %1, <2 x i32> <i32 0, i32 3>
   ret <2 x i16> %vy1.2.vec.insert
 }
@@ -3020,7 +3020,7 @@ define <4 x bfloat> @shuffle_v4bf16_23uu(ptr addrspace(1) %arg0, ptr addrspace(1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
   %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
   ret <4 x bfloat> %shuffle
 }
 
@@ -3067,7 +3067,7 @@ define <4 x bfloat> @shuffle_v4bf16_234u(ptr addrspace(1) %arg0, ptr addrspace(1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
   %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 undef>
+  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 poison>
   ret <4 x bfloat> %shuffle
 }
 
@@ -3094,7 +3094,7 @@ define <4 x bfloat> @shuffle_v4bf16_u1u3(ptr addrspace(1) %arg0, ptr addrspace(1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
   %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 undef, i32 1, i32 undef, i32 3>
+  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 poison, i32 1, i32 poison, i32 3>
   ret <4 x bfloat> %shuffle
 }
 
@@ -3133,7 +3133,7 @@ define <4 x bfloat> @shuffle_v4bf16_u3u1(ptr addrspace(1) %arg0, ptr addrspace(1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
   %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 undef, i32 3, i32 undef, i32 1>
+  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 poison, i32 3, i32 poison, i32 1>
   ret <4 x bfloat> %shuffle
 }
 
@@ -3160,7 +3160,7 @@ define <4 x bfloat> @shuffle_v4bf16_u3uu(ptr addrspace(1) %arg0, ptr addrspace(1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
   %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
+  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
   ret <4 x bfloat> %shuffle
 }
 
@@ -3209,7 +3209,7 @@ define <4 x bfloat> @shuffle_v4bf16_3u6u(ptr addrspace(1) %arg0, ptr addrspace(1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
   %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 undef, i32 6, i32 undef>
+  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 poison, i32 6, i32 poison>
   ret <4 x bfloat> %shuffle
 }
 
@@ -3258,7 +3258,7 @@ define <4 x bfloat> @shuffle_v4bf16_3uu7(ptr addrspace(1) %arg0, ptr addrspace(1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
   %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 undef, i32 undef, i32 7>
+  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 poison, i32 poison, i32 7>
   ret <4 x bfloat> %shuffle
 }
 
@@ -3305,7 +3305,7 @@ define <4 x bfloat> @shuffle_v4bf16_35u5(ptr addrspace(1) %arg0, ptr addrspace(1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
   %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 5, i32 undef, i32 5>
+  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 5, i32 poison, i32 5>
   ret <4 x bfloat> %shuffle
 }
 
@@ -3357,7 +3357,7 @@ define <4 x bfloat> @shuffle_v4bf16_357u(ptr addrspace(1) %arg0, ptr addrspace(1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
   %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 5, i32 7, i32 undef>
+  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 5, i32 7, i32 poison>
   ret <4 x bfloat> %shuffle
 }
 
@@ -5059,21 +5059,21 @@ entry:
   %tmp15 = load <4 x bfloat>, ptr addrspace(1) %arrayidx1, align 8
   %arrayidx2 = getelementptr inbounds <4 x bfloat>, ptr addrspace(1) %C, i64 %tmp12
   %tmp16 = load <4 x bfloat>, ptr addrspace(1) %arrayidx2, align 8
-  %tmp17 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> undef, <2 x i32> zeroinitializer
-  %tmp18 = shufflevector <4 x bfloat> %tmp15, <4 x bfloat> undef, <2 x i32> <i32 0, i32 1>
-  %tmp19 = shufflevector <4 x bfloat> %tmp16, <4 x bfloat> undef, <2 x i32> <i32 0, i32 1>
+  %tmp17 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> poison, <2 x i32> zeroinitializer
+  %tmp18 = shufflevector <4 x bfloat> %tmp15, <4 x bfloat> poison, <2 x i32> <i32 0, i32 1>
+  %tmp19 = shufflevector <4 x bfloat> %tmp16, <4 x bfloat> poison, <2 x i32> <i32 0, i32 1>
   %tmp20 = tail call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %tmp17, <2 x bfloat> %tmp18, <2 x bfloat> %tmp19)
-  %tmp21 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> undef, <2 x i32> <i32 1, i32 1>
-  %tmp22 = shufflevector <4 x bfloat> %tmp15, <4 x bfloat> undef, <2 x i32> <i32 2, i32 3>
+  %tmp21 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> poison, <2 x i32> <i32 1, i32 1>
+  %tmp22 = shufflevector <4 x bfloat> %tmp15, <4 x bfloat> poison, <2 x i32> <i32 2, i32 3>
   %tmp23 = tail call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %tmp21, <2 x bfloat> %tmp22, <2 x bfloat> %tmp20)
-  %tmp24 = shufflevector <2 x bfloat> %tmp23, <2 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %tmp24 = shufflevector <2 x bfloat> %tmp23, <2 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
   %tmp25 = shufflevector <4 x bfloat> %tmp24, <4 x bfloat> %tmp16, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-  %tmp26 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> undef, <2 x i32> <i32 2, i32 2>
-  %tmp27 = shufflevector <4 x bfloat> %tmp25, <4 x bfloat> undef, <2 x i32> <i32 2, i32 3>
+  %tmp26 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> poison, <2 x i32> <i32 2, i32 2>
+  %tmp27 = shufflevector <4 x bfloat> %tmp25, <4 x bfloat> poison, <2 x i32> <i32 2, i32 3>
   %tmp28 = tail call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %tmp26, <2 x bfloat> %tmp18, <2 x bfloat> %tmp27)
-  %tmp29 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> undef, <2 x i32> <i32 3, i32 3>
+  %tmp29 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> poison, <2 x i32> <i32 3, i32 3>
   %tmp30 = tail call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %tmp29, <2 x bfloat> %tmp22, <2 x bfloat> %tmp28)
-  %tmp31 = shufflevector <2 x bfloat> %tmp30, <2 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %tmp31 = shufflevector <2 x bfloat> %tmp30, <2 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
   %tmp32 = shufflevector <4 x bfloat> %tmp25, <4 x bfloat> %tmp31, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   store <4 x bfloat> %tmp32, ptr addrspace(1) %arrayidx2, align 8
   ret void
@@ -5172,7 +5172,7 @@ define <2 x bfloat> @low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
 entry:
   %0 = load <2 x bfloat>, ptr addrspace(1) %x0, align 4
   %1 = load <2 x bfloat>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 poison>
   %vy1.2.vec.insert = shufflevector <2 x bfloat> %vy1.0.vec.insert, <2 x bfloat> %1, <2 x i32> <i32 0, i32 2>
   ret <2 x bfloat> %vy1.2.vec.insert
 }
@@ -5218,7 +5218,7 @@ define <2 x bfloat> @hi16bits_v2bf16(ptr addrspace(1) %x0, ptr addrspace(1) %x1)
 entry:
   %0 = load <2 x bfloat>, ptr addrspace(1) %x0, align 4
   %1 = load <2 x bfloat>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 poison>
   %vy1.2.vec.insert = shufflevector <2 x bfloat> %vy1.0.vec.insert, <2 x bfloat> %1, <2 x i32> <i32 0, i32 3>
   ret <2 x bfloat> %vy1.2.vec.insert
 }
@@ -5264,7 +5264,7 @@ define <2 x bfloat> @low16hi16bits_v2bf16(ptr addrspace(1) %x0, ptr addrspace(1)
 entry:
   %0 = load <2 x bfloat>, ptr addrspace(1) %x0, align 4
   %1 = load <2 x bfloat>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 poison>
   %vy1.2.vec.insert = shufflevector <2 x bfloat> %vy1.0.vec.insert, <2 x bfloat> %1, <2 x i32> <i32 0, i32 3>
   ret <2 x bfloat> %vy1.2.vec.insert
 }
@@ -5299,7 +5299,7 @@ define <2 x bfloat> @hi16low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
 entry:
   %0 = load <2 x bfloat>, ptr addrspace(1) %x0, align 4
   %1 = load <2 x bfloat>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 poison>
   %vy1.2.vec.insert = shufflevector <2 x bfloat> %vy1.0.vec.insert, <2 x bfloat> %1, <2 x i32> <i32 0, i32 2>
   ret <2 x bfloat> %vy1.2.vec.insert
 }
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
index 2c6c9a50a72f9..30d9465c33e40 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
@@ -543,12 +543,12 @@ if.then9:                                         ; preds = %entry
 sw.bb:                                            ; preds = %if.then9
   %i17 = load i8, ptr addrspace(1) null, align 1
   %i18 = insertelement <4 x i8> zeroinitializer, i8 %i17, i64 0
-  %a.sroa.0.0.vecblend = shufflevector <4 x i8> %i18, <4 x i8> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
+  %a.sroa.0.0.vecblend = shufflevector <4 x i8> %i18, <4 x i8> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 0, i32 poison>
   br label %sw.bb18
 
 sw.bb18:                                          ; preds = %sw.bb, %if.then9
   %a.sroa.0.0 = phi <4 x i8> [ %a.sroa.0.0.vecblend, %sw.bb ], [ poison, %if.then9 ]
-  %a.sroa.0.0.vec.extract61 = shufflevector <4 x i8> %a.sroa.0.0, <4 x i8> zeroinitializer, <3 x i32> <i32 undef, i32 1, i32 undef>
+  %a.sroa.0.0.vec.extract61 = shufflevector <4 x i8> %a.sroa.0.0, <4 x i8> zeroinitializer, <3 x i32> <i32 poison, i32 1, i32 poison>
   %i19 = insertelement <3 x i8> %a.sroa.0.0.vec.extract61, i8 0, i64 0
   %i20 = select <3 x i1> zeroinitializer, <3 x i8> zeroinitializer, <3 x i8> %i19
   %i21 = extractelement <3 x i8> %i20, i64 1
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
index 48ab512a7ae0d..2833237c1ea37 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
@@ -421,7 +421,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<8 x half> %A, <8 x
 ; GFX12-NEXT:    s_endpgm
 bb:
   %C = load <16 x half>, ptr %Caddr
-  %C_shuffle = shufflevector <16 x half> %C, <16 x half> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %C_shuffle = shufflevector <16 x half> %C, <16 x half> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   %fneg.C_shuffle = fneg <8 x half> %C_shuffle
   %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> %fneg.C_shuffle , i1 0)
   store <8 x half> %res, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
index 8f7cd5cb2bb65..98da9ef2e8819 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
@@ -378,7 +378,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<4 x half> %A, <4 x
 ; GFX12-NEXT:    s_endpgm
 bb:
   %C = load <8 x half>, ptr %Caddr
-  %C_shuffle = shufflevector <8 x half> %C, <8 x half> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %C_shuffle = shufflevector <8 x half> %C, <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   %fneg.C_shuffle = fneg <4 x half> %C_shuffle
   %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %A, <4 x half> %B, <4 x half> %fneg.C_shuffle , i1 0)
   store <4 x half> %res, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll
index 2efbd3277f209..692491457ae3d 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll
@@ -12,9 +12,9 @@ define amdgpu_cs void @xyz () {
   br label %loop
 loop:
   %ld = load <8 x float>, ptr addrspace(5) null, align 32
-  %in_shuffle = shufflevector <8 x float> %ld, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %in_shuffle = shufflevector <8 x float> %ld, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %wmma = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v16f16(<16 x half> undef, <16 x half> undef, <4 x float> %in_shuffle)
-  %out_shuffle = shufflevector <4 x float> %wmma, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %out_shuffle = shufflevector <4 x float> %wmma, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
   store <8 x float> %out_shuffle, ptr addrspace(5) null, align 32
   br i1 false, label %.exit, label %loop
 .exit: