Skip to content

Commit 10f5e98

Browse files
authored
[DSE] Delay deleting non-memory-defs until end of DSE. (llvm#83411)
DSE uses BatchAA, which caches queries using pairs of MemoryLocations. At the moment, DSE may remove instructions that are used as pointers in cached MemoryLocations. If a new instruction used by a new MemoryLoation and this instruction gets allocated at the same address as a previosuly cached and then removed instruction, we may access an incorrect entry in the cache. To avoid this delay removing all instructions except MemoryDefs until the end of DSE. This should avoid removing any values used in BatchAA's cache. Test case by @vporpo from llvm#83181. (Test not precommitted because the results are non-determinstic - memset only sometimes gets removed) PR: llvm#83411
1 parent bf08d02 commit 10f5e98

File tree

2 files changed

+215
-5
lines changed

2 files changed

+215
-5
lines changed

llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp

+26-5
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,9 @@ struct DSEState {
857857
// no longer be captured.
858858
bool ShouldIterateEndOfFunctionDSE;
859859

860+
/// Dead instructions to be removed at the end of DSE.
861+
SmallVector<Instruction *> ToRemove;
862+
860863
// Class contains self-reference, make sure it's not copied/moved.
861864
DSEState(const DSEState &) = delete;
862865
DSEState &operator=(const DSEState &) = delete;
@@ -1692,7 +1695,8 @@ struct DSEState {
16921695
return {MaybeDeadAccess};
16931696
}
16941697

1695-
// Delete dead memory defs
1698+
/// Delete dead memory defs and recursively add their operands to ToRemove if
1699+
/// they became dead.
16961700
void deleteDeadInstruction(Instruction *SI) {
16971701
MemorySSAUpdater Updater(&MSSA);
16981702
SmallVector<Instruction *, 32> NowDeadInsts;
@@ -1708,8 +1712,11 @@ struct DSEState {
17081712
salvageKnowledge(DeadInst);
17091713

17101714
// Remove the Instruction from MSSA.
1711-
if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
1712-
if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
1715+
MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);
1716+
bool IsMemDef = MA && isa<MemoryDef>(MA);
1717+
if (MA) {
1718+
if (IsMemDef) {
1719+
auto *MD = cast<MemoryDef>(MA);
17131720
SkipStores.insert(MD);
17141721
if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
17151722
if (SI->getValueOperand()->getType()->isPointerTy()) {
@@ -1730,13 +1737,21 @@ struct DSEState {
17301737
// Remove its operands
17311738
for (Use &O : DeadInst->operands())
17321739
if (Instruction *OpI = dyn_cast<Instruction>(O)) {
1733-
O = nullptr;
1740+
O.set(PoisonValue::get(O->getType()));
17341741
if (isInstructionTriviallyDead(OpI, &TLI))
17351742
NowDeadInsts.push_back(OpI);
17361743
}
17371744

17381745
EI.removeInstruction(DeadInst);
1739-
DeadInst->eraseFromParent();
1746+
// Remove memory defs directly if they don't produce results, but only
1747+
// queue other dead instructions for later removal. They may have been
1748+
// used as memory locations that have been cached by BatchAA. Removing
1749+
// them here may lead to newly created instructions to be allocated at the
1750+
// same address, yielding stale cache entries.
1751+
if (IsMemDef && DeadInst->getType()->isVoidTy())
1752+
DeadInst->eraseFromParent();
1753+
else
1754+
ToRemove.push_back(DeadInst);
17401755
}
17411756
}
17421757

@@ -2287,6 +2302,12 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
22872302

22882303
MadeChange |= State.eliminateRedundantStoresOfExistingValues();
22892304
MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2305+
2306+
while (!State.ToRemove.empty()) {
2307+
Instruction *DeadInst = State.ToRemove.pop_back_val();
2308+
DeadInst->eraseFromParent();
2309+
}
2310+
22902311
return MadeChange;
22912312
}
22922313
} // end anonymous namespace
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S -passes=dse < %s | FileCheck %s
3+
;
4+
; DSE kills `store i32 44, ptr %struct.byte.4, align 4` but should not kill
5+
; `call void @llvm.memset.p0.i64(...)` because it has a clobber read:
6+
; `%ret = load ptr, ptr %struct.byte.8`
7+
8+
9+
%struct.type = type { ptr, ptr }
10+
11+
define ptr @foo(ptr noundef %ptr) {
12+
; CHECK-LABEL: define ptr @foo(
13+
; CHECK-SAME: ptr noundef [[PTR:%.*]]) {
14+
; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8
15+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6:[0-9]+]]
16+
; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8
17+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4
18+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false)
19+
; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4
20+
; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8
21+
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
22+
; CHECK-NEXT: ret ptr [[RET]]
23+
;
24+
%struct.alloca = alloca %struct.type, align 8
25+
call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind
26+
%struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8
27+
; Set %struct.alloca[8, 16) to 42.
28+
call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false)
29+
; Set %struct.alloca[8, 12) to 43.
30+
store i32 43, ptr %struct.byte.8, align 4
31+
; Set %struct.alloca[4, 8) to 44.
32+
%struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4
33+
store i32 44, ptr %struct.byte.4, align 4
34+
; Return %struct.alloca[8, 16).
35+
%ret = load ptr, ptr %struct.byte.8
36+
call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind
37+
ret ptr %ret
38+
}
39+
40+
; Set of tests based on @foo, but where the memset's operands cannot be erased
41+
; due to other uses. Instead, they contain a number of removable MemoryDefs;
42+
; with non-void types result types.
43+
44+
define ptr @foo_with_removable_malloc() {
45+
; CHECK-LABEL: define ptr @foo_with_removable_malloc() {
46+
; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8
47+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
48+
; CHECK-NEXT: [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4
49+
; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8
50+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4
51+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false)
52+
; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4
53+
; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8
54+
; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_4]])
55+
; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_8]])
56+
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
57+
; CHECK-NEXT: ret ptr [[RET]]
58+
;
59+
%struct.alloca = alloca %struct.type, align 8
60+
call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind
61+
%struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4
62+
%struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8
63+
64+
; Set of removable memory deffs
65+
%m2 = tail call ptr @malloc(i64 4)
66+
%m1 = tail call ptr @malloc(i64 4)
67+
store i32 0, ptr %struct.byte.8
68+
store i32 0, ptr %struct.byte.8
69+
store i32 123, ptr %m1
70+
store i32 123, ptr %m2
71+
72+
; Set %struct.alloca[8, 16) to 42.
73+
call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false)
74+
; Set %struct.alloca[8, 12) to 43.
75+
store i32 43, ptr %struct.byte.8, align 4
76+
; Set %struct.alloca[4, 8) to 44.
77+
store i32 44, ptr %struct.byte.4, align 4
78+
; Return %struct.alloca[8, 16).
79+
%ret = load ptr, ptr %struct.byte.8
80+
call void @readnone(ptr %struct.byte.4);
81+
call void @readnone(ptr %struct.byte.8);
82+
call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind
83+
ret ptr %ret
84+
}
85+
86+
define ptr @foo_with_removable_malloc_free() {
87+
; CHECK-LABEL: define ptr @foo_with_removable_malloc_free() {
88+
; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8
89+
; CHECK-NEXT: [[M1:%.*]] = tail call ptr @malloc(i64 4)
90+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
91+
; CHECK-NEXT: [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4
92+
; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8
93+
; CHECK-NEXT: [[M2:%.*]] = tail call ptr @malloc(i64 4)
94+
; CHECK-NEXT: call void @free(ptr [[M1]])
95+
; CHECK-NEXT: call void @free(ptr [[M2]])
96+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4
97+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false)
98+
; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4
99+
; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8
100+
; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_4]])
101+
; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_8]])
102+
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
103+
; CHECK-NEXT: ret ptr [[RET]]
104+
;
105+
%struct.alloca = alloca %struct.type, align 8
106+
%m1 = tail call ptr @malloc(i64 4)
107+
call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind
108+
%struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4
109+
%struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8
110+
111+
store i32 0, ptr %struct.byte.4
112+
store i32 0, ptr %struct.byte.8
113+
%m2 = tail call ptr @malloc(i64 4)
114+
store i32 123, ptr %m1
115+
call void @free(ptr %m1);
116+
store i32 123, ptr %m2
117+
call void @free(ptr %m2);
118+
119+
; Set %struct.alloca[8, 16) to 42.
120+
call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false)
121+
; Set %struct.alloca[8, 12) to 43.
122+
store i32 43, ptr %struct.byte.8, align 4
123+
; Set %struct.alloca[4, 8) to 44.
124+
store i32 44, ptr %struct.byte.4, align 4
125+
; Return %struct.alloca[8, 16).
126+
%ret = load ptr, ptr %struct.byte.8
127+
call void @readnone(ptr %struct.byte.4);
128+
call void @readnone(ptr %struct.byte.8);
129+
call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind
130+
ret ptr %ret
131+
}
132+
133+
define ptr @foo_with_malloc_to_calloc() {
134+
; CHECK-LABEL: define ptr @foo_with_malloc_to_calloc() {
135+
; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8
136+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
137+
; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8
138+
; CHECK-NEXT: [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4
139+
; CHECK-NEXT: [[CALLOC1:%.*]] = call ptr @calloc(i64 1, i64 4)
140+
; CHECK-NEXT: [[CALLOC:%.*]] = call ptr @calloc(i64 1, i64 4)
141+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4
142+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false)
143+
; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4
144+
; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8
145+
; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_4]])
146+
; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_8]])
147+
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
148+
; CHECK-NEXT: call void @use(ptr [[CALLOC1]])
149+
; CHECK-NEXT: call void @use(ptr [[CALLOC]])
150+
; CHECK-NEXT: ret ptr [[RET]]
151+
;
152+
%struct.alloca = alloca %struct.type, align 8
153+
call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind
154+
%struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8
155+
%struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4
156+
157+
; Set of removable memory deffs
158+
%m1 = tail call ptr @malloc(i64 4)
159+
%m2 = tail call ptr @malloc(i64 4)
160+
store i32 0, ptr %struct.byte.4
161+
store i32 0, ptr %struct.byte.8
162+
call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %m2, i8 0, i64 4, i1 false)
163+
call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %m1, i8 0, i64 4, i1 false)
164+
165+
; Set %struct.alloca[8, 16) to 42.
166+
call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false)
167+
; Set %struct.alloca[8, 12) to 43.
168+
store i32 43, ptr %struct.byte.8, align 4
169+
; Set %struct.alloca[4, 8) to 44.
170+
store i32 44, ptr %struct.byte.4, align 4
171+
; Return %struct.alloca[8, 16).
172+
%ret = load ptr, ptr %struct.byte.8
173+
call void @readnone(ptr %struct.byte.4);
174+
call void @readnone(ptr %struct.byte.8);
175+
call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind
176+
call void @use(ptr %m1)
177+
call void @use(ptr %m2)
178+
ret ptr %ret
179+
}
180+
181+
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
182+
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
183+
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
184+
185+
declare noalias ptr @malloc(i64) willreturn allockind("alloc,uninitialized") "alloc-family"="malloc"
186+
declare void @readnone(ptr) readnone nounwind
187+
declare void @free(ptr nocapture) allockind("free") "alloc-family"="malloc"
188+
189+
declare void @use(ptr)

0 commit comments

Comments
 (0)