From f29e3f783df95027542a90f848c452dbc7243a81 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 29 Feb 2024 10:45:04 +0000 Subject: [PATCH 1/4] [DSE] Delay deleting non-memory-defs until end of DSE. DSE uses BatchAA, which caches queries using pairs of MemoryLocations. At the moment, DSE may remove instructions that are used as pointers in cached MemoryLocations. If a new instruction used by a new MemoryLoation and this instruction gets allocated at the same address as a previosuly cached and then removed instruction, we may access an incorrect entry in the cache. To avoid this delay removing all instructions except MemoryDefs until the end of DSE. This should avoid removing any values used in BatchAA's cache. Test case by @vporpo from https://github.com/llvm/llvm-project/pull/83181. (Test not precommitted because the results are non-determinstic - memset only sometimes gets removed) --- .../Scalar/DeadStoreElimination.cpp | 31 +++++++++++--- .../batchaa-caching-new-pointers.ll | 42 +++++++++++++++++++ 2 files changed, 68 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index d30c68a2f0871..a8eeef42f0f0d 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -857,6 +857,9 @@ struct DSEState { // no longer be captured. bool ShouldIterateEndOfFunctionDSE; + /// Dead instructions to be removed at the end of DSE. + SmallVector ToRemove; + // Class contains self-reference, make sure it's not copied/moved. DSEState(const DSEState &) = delete; DSEState &operator=(const DSEState &) = delete; @@ -1692,7 +1695,8 @@ struct DSEState { return {MaybeDeadAccess}; } - // Delete dead memory defs + /// Delete dead memory defs and recursively add their operands to ToRemove if + /// they became dead. void deleteDeadInstruction(Instruction *SI) { MemorySSAUpdater Updater(&MSSA); SmallVector NowDeadInsts; @@ -1708,8 +1712,10 @@ struct DSEState { salvageKnowledge(DeadInst); // Remove the Instruction from MSSA. - if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) { - if (MemoryDef *MD = dyn_cast(MA)) { + MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst); + bool IsMemDef = MA && isa(MA); + if (MA) { + if (IsMemDef) { SkipStores.insert(MD); if (auto *SI = dyn_cast(MD->getMemoryInst())) { if (SI->getValueOperand()->getType()->isPointerTy()) { @@ -1730,13 +1736,21 @@ struct DSEState { // Remove its operands for (Use &O : DeadInst->operands()) if (Instruction *OpI = dyn_cast(O)) { - O = nullptr; + O.set(PoisonValue::get(O->getType())); if (isInstructionTriviallyDead(OpI, &TLI)) NowDeadInsts.push_back(OpI); } EI.removeInstruction(DeadInst); - DeadInst->eraseFromParent(); + // Remove memory defs directly, but only queue other dead instructions for + // later removal. They may have been used as memory locations that have + // been cached by BatchAA. Removing them here may lead to newly created + // instructions to be allocated at the same address, yielding stale cache + // entries. + if (IsMemDef) + DeadInst->eraseFromParent(); + else + ToRemove.push_back(DeadInst); } } @@ -2287,6 +2301,13 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, MadeChange |= State.eliminateRedundantStoresOfExistingValues(); MadeChange |= State.eliminateDeadWritesAtEndOfFunction(); + + while (!State.ToRemove.empty()) { + Instruction *DeadInst = State.ToRemove.pop_back_val(); + assert(!MSSA.getMemoryAccess(DeadInst)); + DeadInst->eraseFromParent(); + } + return MadeChange; } } // end anonymous namespace diff --git a/llvm/test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll b/llvm/test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll new file mode 100644 index 0000000000000..0e6eb8e7ef8d5 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -passes=dse < %s | FileCheck %s +; +; DSE kills `store i32 44, ptr %struct.byte.4, align 4` but should not kill +; `call void @llvm.memset.p0.i64(...)` because it has a clobber read: +; `%ret = load ptr, ptr %struct.byte.8` + + +%struct.type = type { ptr, ptr } + +define ptr @foo(ptr noundef %ptr) { +; CHECK-LABEL: define ptr @foo( +; CHECK-SAME: ptr noundef [[PTR:%.*]]) { +; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false) +; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4 +; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR2]] +; CHECK-NEXT: ret ptr [[RET]] +; + %struct.alloca = alloca %struct.type, align 8 + call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind + %struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8 + ; Set %struct.alloca[8, 16) to 42. + call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false) + ; Set %struct.alloca[8, 12) to 43. + store i32 43, ptr %struct.byte.8, align 4 + ; Set %struct.alloca[4, 8) to 44. + %struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4 + store i32 44, ptr %struct.byte.4, align 4 + ; Return %struct.alloca[8, 16). + %ret = load ptr, ptr %struct.byte.8 + call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind + ret ptr %ret +} + +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) From 33f9631c9c621cbac0e9882fde3ac1c6889b1abd Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 29 Feb 2024 12:49:32 +0000 Subject: [PATCH 2/4] !fixup fix build failure. --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index a8eeef42f0f0d..214c10c52be4a 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1716,6 +1716,7 @@ struct DSEState { bool IsMemDef = MA && isa(MA); if (MA) { if (IsMemDef) { + auto *MD = cast(MA); SkipStores.insert(MD); if (auto *SI = dyn_cast(MD->getMemoryInst())) { if (SI->getValueOperand()->getType()->isPointerTy()) { From be3aaa17939c993a18edd8ebe319eb675c06ad2f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 1 Mar 2024 17:30:01 +0000 Subject: [PATCH 3/4] !fixup remove assert, directly delete MemDefs with void ty. --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 214c10c52be4a..c2c63d100014f 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1743,12 +1743,12 @@ struct DSEState { } EI.removeInstruction(DeadInst); - // Remove memory defs directly, but only queue other dead instructions for - // later removal. They may have been used as memory locations that have - // been cached by BatchAA. Removing them here may lead to newly created - // instructions to be allocated at the same address, yielding stale cache - // entries. - if (IsMemDef) + // Remove memory defs directly if they don't produce results, but only + // queue other dead instructions for later removal. They may have been + // used as memory locations that have been cached by BatchAA. Removing + // them here may lead to newly created instructions to be allocated at the + // same address, yielding stale cache entries. + if (IsMemDef && DeadInst->getType()->isVoidTy()) DeadInst->eraseFromParent(); else ToRemove.push_back(DeadInst); @@ -2305,7 +2305,6 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, while (!State.ToRemove.empty()) { Instruction *DeadInst = State.ToRemove.pop_back_val(); - assert(!MSSA.getMemoryAccess(DeadInst)); DeadInst->eraseFromParent(); } From c839e008e26fce23c594c6c8b4e55a6f3a5a1bef Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 2 Mar 2024 10:59:25 +0000 Subject: [PATCH 4/4] !fixup add extra tests. --- .../batchaa-caching-new-pointers.ll | 151 +++++++++++++++++- 1 file changed, 149 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll b/llvm/test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll index 0e6eb8e7ef8d5..ee9bd6912e2ae 100644 --- a/llvm/test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll +++ b/llvm/test/Transforms/DeadStoreElimination/batchaa-caching-new-pointers.ll @@ -12,13 +12,13 @@ define ptr @foo(ptr noundef %ptr) { ; CHECK-LABEL: define ptr @foo( ; CHECK-SAME: ptr noundef [[PTR:%.*]]) { ; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4 ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false) ; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4 ; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8 -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR2]] +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]] ; CHECK-NEXT: ret ptr [[RET]] ; %struct.alloca = alloca %struct.type, align 8 @@ -37,6 +37,153 @@ define ptr @foo(ptr noundef %ptr) { ret ptr %ret } +; Set of tests based on @foo, but where the memset's operands cannot be erased +; due to other uses. Instead, they contain a number of removable MemoryDefs; +; with non-void types result types. + +define ptr @foo_with_removable_malloc() { +; CHECK-LABEL: define ptr @foo_with_removable_malloc() { +; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]] +; CHECK-NEXT: [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4 +; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false) +; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4 +; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8 +; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_4]]) +; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_8]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]] +; CHECK-NEXT: ret ptr [[RET]] +; + %struct.alloca = alloca %struct.type, align 8 + call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind + %struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4 + %struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8 + + ; Set of removable memory deffs + %m2 = tail call ptr @malloc(i64 4) + %m1 = tail call ptr @malloc(i64 4) + store i32 0, ptr %struct.byte.8 + store i32 0, ptr %struct.byte.8 + store i32 123, ptr %m1 + store i32 123, ptr %m2 + + ; Set %struct.alloca[8, 16) to 42. + call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false) + ; Set %struct.alloca[8, 12) to 43. + store i32 43, ptr %struct.byte.8, align 4 + ; Set %struct.alloca[4, 8) to 44. + store i32 44, ptr %struct.byte.4, align 4 + ; Return %struct.alloca[8, 16). + %ret = load ptr, ptr %struct.byte.8 + call void @readnone(ptr %struct.byte.4); + call void @readnone(ptr %struct.byte.8); + call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind + ret ptr %ret +} + +define ptr @foo_with_removable_malloc_free() { +; CHECK-LABEL: define ptr @foo_with_removable_malloc_free() { +; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8 +; CHECK-NEXT: [[M1:%.*]] = tail call ptr @malloc(i64 4) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]] +; CHECK-NEXT: [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4 +; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8 +; CHECK-NEXT: [[M2:%.*]] = tail call ptr @malloc(i64 4) +; CHECK-NEXT: call void @free(ptr [[M1]]) +; CHECK-NEXT: call void @free(ptr [[M2]]) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false) +; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4 +; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8 +; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_4]]) +; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_8]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]] +; CHECK-NEXT: ret ptr [[RET]] +; + %struct.alloca = alloca %struct.type, align 8 + %m1 = tail call ptr @malloc(i64 4) + call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind + %struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4 + %struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8 + + store i32 0, ptr %struct.byte.4 + store i32 0, ptr %struct.byte.8 + %m2 = tail call ptr @malloc(i64 4) + store i32 123, ptr %m1 + call void @free(ptr %m1); + store i32 123, ptr %m2 + call void @free(ptr %m2); + + ; Set %struct.alloca[8, 16) to 42. + call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false) + ; Set %struct.alloca[8, 12) to 43. + store i32 43, ptr %struct.byte.8, align 4 + ; Set %struct.alloca[4, 8) to 44. + store i32 44, ptr %struct.byte.4, align 4 + ; Return %struct.alloca[8, 16). + %ret = load ptr, ptr %struct.byte.8 + call void @readnone(ptr %struct.byte.4); + call void @readnone(ptr %struct.byte.8); + call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind + ret ptr %ret +} + +define ptr @foo_with_malloc_to_calloc() { +; CHECK-LABEL: define ptr @foo_with_malloc_to_calloc() { +; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]] +; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8 +; CHECK-NEXT: [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4 +; CHECK-NEXT: [[CALLOC1:%.*]] = call ptr @calloc(i64 1, i64 4) +; CHECK-NEXT: [[CALLOC:%.*]] = call ptr @calloc(i64 1, i64 4) +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false) +; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4 +; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8 +; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_4]]) +; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_8]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]] +; CHECK-NEXT: call void @use(ptr [[CALLOC1]]) +; CHECK-NEXT: call void @use(ptr [[CALLOC]]) +; CHECK-NEXT: ret ptr [[RET]] +; + %struct.alloca = alloca %struct.type, align 8 + call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind + %struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8 + %struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4 + + ; Set of removable memory deffs + %m1 = tail call ptr @malloc(i64 4) + %m2 = tail call ptr @malloc(i64 4) + store i32 0, ptr %struct.byte.4 + store i32 0, ptr %struct.byte.8 + call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %m2, i8 0, i64 4, i1 false) + call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %m1, i8 0, i64 4, i1 false) + + ; Set %struct.alloca[8, 16) to 42. + call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false) + ; Set %struct.alloca[8, 12) to 43. + store i32 43, ptr %struct.byte.8, align 4 + ; Set %struct.alloca[4, 8) to 44. + store i32 44, ptr %struct.byte.4, align 4 + ; Return %struct.alloca[8, 16). + %ret = load ptr, ptr %struct.byte.8 + call void @readnone(ptr %struct.byte.4); + call void @readnone(ptr %struct.byte.8); + call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind + call void @use(ptr %m1) + call void @use(ptr %m2) + ret ptr %ret +} + declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) + +declare noalias ptr @malloc(i64) willreturn allockind("alloc,uninitialized") "alloc-family"="malloc" +declare void @readnone(ptr) readnone nounwind +declare void @free(ptr nocapture) allockind("free") "alloc-family"="malloc" + +declare void @use(ptr)