diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 8ffb471070d91..718d272dd0ac7 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5042,14 +5042,6 @@ static Value *simplifyGEPInst(Type *SrcTy, Value *Ptr, if (Q.isUndefValue(Ptr)) return UndefValue::get(GEPTy); - // getelementptr inbounds null, idx -> null - if (NW.isInBounds() && Q.IIQ.UseInstrInfo && Q.CxtI) { - if (auto *BaseC = dyn_cast(Ptr)) - if (BaseC->isNullValue() && - !NullPointerIsDefined(Q.CxtI->getFunction(), AS)) - return Constant::getNullValue(GEPTy); - } - bool IsScalableVec = SrcTy->isScalableTy() || any_of(Indices, [](const Value *V) { return isa(V->getType()); diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll b/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll index 94056913f3a33..c92c672dda2ad 100644 --- a/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll +++ b/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll @@ -7,141 +7,138 @@ define void @issue63986(i64 %0, i64 %idxprom, ptr inreg %ptr) { ; CHECK-LABEL: issue63986: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_lshlrev_b64 v[8:9], 6, v[2:3] -; CHECK-NEXT: v_mov_b32_e32 v4, s17 -; CHECK-NEXT: v_add_co_u32_e32 v10, vcc, s16, v8 -; CHECK-NEXT: v_addc_co_u32_e32 v11, vcc, v4, v9, vcc -; CHECK-NEXT: ; %bb.1: ; %entry.loop-memcpy-expansion_crit_edge -; CHECK-NEXT: v_mov_b32_e32 v4, 0 -; CHECK-NEXT: v_mov_b32_e32 v5, 0 -; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[4:5] +; CHECK-NEXT: v_lshlrev_b64 v[4:5], 6, v[2:3] +; CHECK-NEXT: v_mov_b32_e32 v6, s17 +; CHECK-NEXT: v_add_co_u32_e32 v8, vcc, s16, v4 +; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v6, v5, vcc ; CHECK-NEXT: s_mov_b64 s[4:5], 0 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: .LBB0_2: ; %loop-memcpy-expansion +; CHECK-NEXT: .LBB0_1: ; %loop-memcpy-expansion ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: v_add_co_u32_e32 v12, vcc, s4, v10 +; CHECK-NEXT: v_mov_b32_e32 v7, s5 +; CHECK-NEXT: v_mov_b32_e32 v6, s4 +; CHECK-NEXT: flat_load_dwordx4 v[10:13], v[6:7] +; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s4, v8 ; CHECK-NEXT: s_add_u32 s4, s4, 16 -; CHECK-NEXT: v_mov_b32_e32 v13, s5 ; CHECK-NEXT: s_addc_u32 s5, s5, 0 ; CHECK-NEXT: v_cmp_ge_u64_e64 s[6:7], s[4:5], 32 -; CHECK-NEXT: v_addc_co_u32_e32 v13, vcc, v11, v13, vcc +; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v9, v7, vcc ; CHECK-NEXT: s_and_b64 vcc, exec, s[6:7] -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[4:7] -; CHECK-NEXT: s_cbranch_vccz .LBB0_2 -; CHECK-NEXT: ; %bb.3: ; %loop-memcpy-residual-header +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_dwordx4 v[6:7], v[10:13] +; CHECK-NEXT: s_cbranch_vccz .LBB0_1 +; CHECK-NEXT: ; %bb.2: ; %loop-memcpy-residual-header +; CHECK-NEXT: s_branch .LBB0_4 +; CHECK-NEXT: ; %bb.3: +; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7 ; CHECK-NEXT: s_branch .LBB0_5 -; CHECK-NEXT: ; %bb.4: -; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 -; CHECK-NEXT: s_branch .LBB0_6 -; CHECK-NEXT: .LBB0_5: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge -; CHECK-NEXT: v_lshlrev_b64 v[2:3], 6, v[2:3] -; CHECK-NEXT: s_cbranch_execnz .LBB0_9 -; CHECK-NEXT: .LBB0_6: ; %loop-memcpy-residual-header.loop-memcpy-residual_crit_edge -; CHECK-NEXT: v_mov_b32_e32 v2, 0 -; CHECK-NEXT: v_mov_b32_e32 v3, 0 -; CHECK-NEXT: flat_load_ubyte v2, v[2:3] -; CHECK-NEXT: s_add_u32 s6, s16, 32 -; CHECK-NEXT: s_addc_u32 s4, s17, 0 -; CHECK-NEXT: v_mov_b32_e32 v4, s4 -; CHECK-NEXT: v_add_co_u32_e32 v3, vcc, s6, v8 +; CHECK-NEXT: .LBB0_4: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge +; CHECK-NEXT: v_lshlrev_b64 v[6:7], 6, v[2:3] +; CHECK-NEXT: s_cbranch_execnz .LBB0_8 +; CHECK-NEXT: .LBB0_5: ; %loop-memcpy-residual.preheader +; CHECK-NEXT: s_add_u32 s4, s16, 32 +; CHECK-NEXT: s_addc_u32 s5, s17, 0 +; CHECK-NEXT: v_mov_b32_e32 v3, s5 +; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, s4, v4 +; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc ; CHECK-NEXT: s_mov_b64 s[4:5], 0 -; CHECK-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v9, vcc -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: ; %bb.7: ; %loop-memcpy-residual -; CHECK-NEXT: v_mov_b32_e32 v6, s5 -; CHECK-NEXT: v_add_co_u32_e32 v5, vcc, s4, v3 +; CHECK-NEXT: ; %bb.6: ; %loop-memcpy-residual +; CHECK-NEXT: s_add_u32 s6, 32, s4 +; CHECK-NEXT: s_addc_u32 s7, 0, s5 +; CHECK-NEXT: v_mov_b32_e32 v6, s6 +; CHECK-NEXT: v_mov_b32_e32 v7, s7 +; CHECK-NEXT: flat_load_ubyte v10, v[6:7] +; CHECK-NEXT: v_mov_b32_e32 v7, s5 +; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s4, v2 +; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v3, v7, vcc ; CHECK-NEXT: s_add_u32 s4, s4, 1 -; CHECK-NEXT: v_addc_co_u32_e32 v6, vcc, v4, v6, vcc ; CHECK-NEXT: s_addc_u32 s5, s5, 0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: flat_store_byte v[5:6], v2 -; CHECK-NEXT: ; %bb.8: -; CHECK-NEXT: v_mov_b32_e32 v2, v8 -; CHECK-NEXT: v_mov_b32_e32 v3, v9 -; CHECK-NEXT: .LBB0_9: ; %post-loop-memcpy-expansion -; CHECK-NEXT: v_and_b32_e32 v6, 15, v0 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[6:7], v10 +; CHECK-NEXT: ; %bb.7: +; CHECK-NEXT: v_mov_b32_e32 v7, v5 +; CHECK-NEXT: v_mov_b32_e32 v6, v4 +; CHECK-NEXT: .LBB0_8: ; %post-loop-memcpy-expansion +; CHECK-NEXT: v_and_b32_e32 v2, 15, v0 ; CHECK-NEXT: v_and_b32_e32 v0, -16, v0 -; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, v2, v0 -; CHECK-NEXT: v_mov_b32_e32 v7, 0 -; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v1, vcc +; CHECK-NEXT: v_add_co_u32_e32 v4, vcc, v6, v0 +; CHECK-NEXT: v_mov_b32_e32 v3, 0 +; CHECK-NEXT: v_addc_co_u32_e32 v5, vcc, v7, v1, vcc ; CHECK-NEXT: v_cmp_ne_u64_e64 s[4:5], 0, v[0:1] -; CHECK-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[6:7] -; CHECK-NEXT: v_mov_b32_e32 v4, s17 -; CHECK-NEXT: v_mov_b32_e32 v8, 0 -; CHECK-NEXT: v_add_co_u32_e32 v12, vcc, s16, v2 -; CHECK-NEXT: v_mov_b32_e32 v9, 0 -; CHECK-NEXT: v_addc_co_u32_e32 v13, vcc, v4, v3, vcc -; CHECK-NEXT: s_branch .LBB0_12 -; CHECK-NEXT: .LBB0_10: ; %Flow14 -; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 +; CHECK-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[2:3] +; CHECK-NEXT: v_mov_b32_e32 v6, s17 +; CHECK-NEXT: v_add_co_u32_e32 v4, vcc, s16, v4 +; CHECK-NEXT: v_addc_co_u32_e32 v5, vcc, v6, v5, vcc +; CHECK-NEXT: s_branch .LBB0_11 +; CHECK-NEXT: .LBB0_9: ; %Flow14 +; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1 ; CHECK-NEXT: s_or_b64 exec, exec, s[10:11] ; CHECK-NEXT: s_mov_b64 s[8:9], 0 -; CHECK-NEXT: .LBB0_11: ; %Flow16 -; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 +; CHECK-NEXT: .LBB0_10: ; %Flow16 +; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1 ; CHECK-NEXT: s_andn2_b64 vcc, exec, s[8:9] -; CHECK-NEXT: s_cbranch_vccz .LBB0_20 -; CHECK-NEXT: .LBB0_12: ; %while.cond +; CHECK-NEXT: s_cbranch_vccz .LBB0_19 +; CHECK-NEXT: .LBB0_11: ; %while.cond ; CHECK-NEXT: ; =>This Loop Header: Depth=1 -; CHECK-NEXT: ; Child Loop BB0_14 Depth 2 -; CHECK-NEXT: ; Child Loop BB0_18 Depth 2 +; CHECK-NEXT: ; Child Loop BB0_13 Depth 2 +; CHECK-NEXT: ; Child Loop BB0_17 Depth 2 ; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] -; CHECK-NEXT: s_cbranch_execz .LBB0_15 -; CHECK-NEXT: ; %bb.13: ; %while.cond.loop-memcpy-expansion2_crit_edge -; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 -; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[8:9] +; CHECK-NEXT: s_cbranch_execz .LBB0_14 +; CHECK-NEXT: ; %bb.12: ; %loop-memcpy-expansion2.preheader +; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1 ; CHECK-NEXT: s_mov_b64 s[10:11], 0 ; CHECK-NEXT: s_mov_b64 s[12:13], 0 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: .LBB0_14: ; %loop-memcpy-expansion2 -; CHECK-NEXT: ; Parent Loop BB0_12 Depth=1 +; CHECK-NEXT: .LBB0_13: ; %loop-memcpy-expansion2 +; CHECK-NEXT: ; Parent Loop BB0_11 Depth=1 ; CHECK-NEXT: ; => This Inner Loop Header: Depth=2 -; CHECK-NEXT: v_mov_b32_e32 v15, s13 -; CHECK-NEXT: v_add_co_u32_e32 v14, vcc, s12, v10 +; CHECK-NEXT: v_mov_b32_e32 v6, s12 +; CHECK-NEXT: v_mov_b32_e32 v7, s13 +; CHECK-NEXT: flat_load_dwordx4 v[10:13], v[6:7] +; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s12, v8 ; CHECK-NEXT: s_add_u32 s12, s12, 16 -; CHECK-NEXT: v_addc_co_u32_e32 v15, vcc, v11, v15, vcc +; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v9, v7, vcc ; CHECK-NEXT: s_addc_u32 s13, s13, 0 ; CHECK-NEXT: v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1] -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: flat_store_dwordx4 v[14:15], v[2:5] ; CHECK-NEXT: s_or_b64 s[10:11], vcc, s[10:11] +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_dwordx4 v[6:7], v[10:13] ; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11] -; CHECK-NEXT: s_cbranch_execnz .LBB0_14 -; CHECK-NEXT: .LBB0_15: ; %Flow15 -; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 +; CHECK-NEXT: s_cbranch_execnz .LBB0_13 +; CHECK-NEXT: .LBB0_14: ; %Flow15 +; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1 ; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] ; CHECK-NEXT: s_mov_b64 s[8:9], -1 -; CHECK-NEXT: s_cbranch_execz .LBB0_11 -; CHECK-NEXT: ; %bb.16: ; %loop-memcpy-residual-header5 -; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 +; CHECK-NEXT: s_cbranch_execz .LBB0_10 +; CHECK-NEXT: ; %bb.15: ; %loop-memcpy-residual-header5 +; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1 ; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[6:7] ; CHECK-NEXT: s_xor_b64 s[10:11], exec, s[8:9] -; CHECK-NEXT: s_cbranch_execz .LBB0_10 -; CHECK-NEXT: ; %bb.17: ; %loop-memcpy-residual-header5.loop-memcpy-residual4_crit_edge -; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 -; CHECK-NEXT: flat_load_ubyte v2, v[8:9] +; CHECK-NEXT: s_cbranch_execz .LBB0_9 +; CHECK-NEXT: ; %bb.16: ; %loop-memcpy-residual4.preheader +; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1 ; CHECK-NEXT: s_mov_b64 s[12:13], 0 ; CHECK-NEXT: s_mov_b64 s[14:15], 0 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: .LBB0_18: ; %loop-memcpy-residual4 -; CHECK-NEXT: ; Parent Loop BB0_12 Depth=1 +; CHECK-NEXT: .LBB0_17: ; %loop-memcpy-residual4 +; CHECK-NEXT: ; Parent Loop BB0_11 Depth=1 ; CHECK-NEXT: ; => This Inner Loop Header: Depth=2 -; CHECK-NEXT: v_add_co_u32_e32 v3, vcc, s14, v12 +; CHECK-NEXT: v_mov_b32_e32 v10, s15 +; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s14, v0 +; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v1, v10, vcc +; CHECK-NEXT: flat_load_ubyte v11, v[6:7] +; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, s14, v4 ; CHECK-NEXT: s_add_u32 s14, s14, 1 -; CHECK-NEXT: v_mov_b32_e32 v4, s15 ; CHECK-NEXT: s_addc_u32 s15, s15, 0 -; CHECK-NEXT: v_cmp_ge_u64_e64 s[8:9], s[14:15], v[6:7] -; CHECK-NEXT: v_addc_co_u32_e32 v4, vcc, v13, v4, vcc +; CHECK-NEXT: v_cmp_ge_u64_e64 s[8:9], s[14:15], v[2:3] +; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v5, v10, vcc ; CHECK-NEXT: s_or_b64 s[12:13], s[8:9], s[12:13] -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: flat_store_byte v[3:4], v2 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[6:7], v11 ; CHECK-NEXT: s_andn2_b64 exec, exec, s[12:13] -; CHECK-NEXT: s_cbranch_execnz .LBB0_18 -; CHECK-NEXT: ; %bb.19: ; %Flow -; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 +; CHECK-NEXT: s_cbranch_execnz .LBB0_17 +; CHECK-NEXT: ; %bb.18: ; %Flow +; CHECK-NEXT: ; in Loop: Header=BB0_11 Depth=1 ; CHECK-NEXT: s_or_b64 exec, exec, s[12:13] -; CHECK-NEXT: s_branch .LBB0_10 -; CHECK-NEXT: .LBB0_20: ; %DummyReturnBlock +; CHECK-NEXT: s_branch .LBB0_9 +; CHECK-NEXT: .LBB0_19: ; %DummyReturnBlock ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: diff --git a/llvm/test/Transforms/InstCombine/store.ll b/llvm/test/Transforms/InstCombine/store.ll index 48c63c6f24c72..daa40da1828b5 100644 --- a/llvm/test/Transforms/InstCombine/store.ll +++ b/llvm/test/Transforms/InstCombine/store.ll @@ -49,7 +49,8 @@ define void @test2(ptr %P) { define void @store_at_gep_off_null_inbounds(i64 %offset) { ; CHECK-LABEL: @store_at_gep_off_null_inbounds( -; CHECK-NEXT: store i32 poison, ptr null, align 4 +; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, ptr null, i64 [[OFFSET:%.*]] +; CHECK-NEXT: store i32 poison, ptr [[PTR]], align 4 ; CHECK-NEXT: ret void ; %ptr = getelementptr inbounds i32, ptr null, i64 %offset diff --git a/llvm/test/Transforms/InstSimplify/gep.ll b/llvm/test/Transforms/InstSimplify/gep.ll index a73c902fac647..272067c66cf9f 100644 --- a/llvm/test/Transforms/InstSimplify/gep.ll +++ b/llvm/test/Transforms/InstSimplify/gep.ll @@ -389,7 +389,8 @@ define i64 @gep_array_of_scalable_vectors_ptrdiff(ptr %ptr) { define ptr @gep_null_inbounds(i64 %idx) { ; CHECK-LABEL: @gep_null_inbounds( -; CHECK-NEXT: ret ptr null +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr null, i64 [[IDX:%.*]] +; CHECK-NEXT: ret ptr [[GEP]] ; %gep = getelementptr inbounds i8, ptr null, i64 %idx ret ptr %gep @@ -415,7 +416,8 @@ define ptr @gep_null_defined(i64 %idx) null_pointer_is_valid { define ptr @gep_null_inbounds_different_type(i64 %idx1, i64 %idx2) { ; CHECK-LABEL: @gep_null_inbounds_different_type( -; CHECK-NEXT: ret ptr null +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [0 x i8], ptr null, i64 [[IDX1:%.*]], i64 [[IDX2:%.*]] +; CHECK-NEXT: ret ptr [[GEP]] ; %gep = getelementptr inbounds [0 x i8], ptr null, i64 %idx1, i64 %idx2 ret ptr %gep @@ -423,7 +425,8 @@ define ptr @gep_null_inbounds_different_type(i64 %idx1, i64 %idx2) { define <2 x ptr> @gep_inbounds_null_vec(i64 %idx) { ; CHECK-LABEL: @gep_inbounds_null_vec( -; CHECK-NEXT: ret <2 x ptr> zeroinitializer +; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, <2 x ptr> zeroinitializer, i64 [[IDX:%.*]] +; CHECK-NEXT: ret <2 x ptr> [[P]] ; %p = getelementptr inbounds i8, <2 x ptr> zeroinitializer, i64 %idx ret <2 x ptr> %p @@ -431,7 +434,8 @@ define <2 x ptr> @gep_inbounds_null_vec(i64 %idx) { define <2 x ptr> @gep_inbounds_null_vec_broadcast(<2 x i64> %idx) { ; CHECK-LABEL: @gep_inbounds_null_vec_broadcast( -; CHECK-NEXT: ret <2 x ptr> zeroinitializer +; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, ptr null, <2 x i64> [[IDX:%.*]] +; CHECK-NEXT: ret <2 x ptr> [[P]] ; %p = getelementptr inbounds i8, ptr null, <2 x i64> %idx ret <2 x ptr> %p