diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 1d67773585d59..2f88b19a8d390 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -638,6 +638,7 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI, if (!LI->isSimple() || !LI->hasOneUse() || LI->getParent() != SI->getParent()) return false; + BatchAAResults BAA(*AA); auto *T = LI->getType(); // Don't introduce calls to memcpy/memmove intrinsics out of thin air if // the corresponding libcalls are not available. @@ -647,19 +648,17 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI, (EnableMemCpyOptWithoutLibcalls || (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) { MemoryLocation LoadLoc = MemoryLocation::get(LI); - - // We use alias analysis to check if an instruction may store to - // the memory we load from in between the load and the store. If - // such an instruction is found, we try to promote there instead - // of at the store position. - // TODO: Can use MSSA for this. - Instruction *P = SI; - for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) { - if (isModSet(AA->getModRefInfo(&I, LoadLoc))) { - P = &I; - break; - } - } + MemoryUseOrDef *LoadAccess = MSSA->getMemoryAccess(LI), + *StoreAccess = MSSA->getMemoryAccess(SI); + + // We use MSSA to check if an instruction may store to the memory we load + // from in between the load and the store. If such an instruction is found, + // we try to promote there instead of at the store position. + auto *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( + StoreAccess->getDefiningAccess(), LoadLoc, BAA); + Instruction *P = MSSA->dominates(LoadAccess, Clobber) + ? cast(Clobber)->getMemoryInst() + : SI; // If we found an instruction that may write to the loaded memory, // we can try to promote at this position instead of the store @@ -707,7 +706,6 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI, // Detect cases where we're performing call slot forwarding, but // happen to be using a load-store pair to implement it, rather than // a memcpy. - BatchAAResults BAA(*AA); auto GetCall = [&]() -> CallInst * { // We defer this expensive clobber walk until the cheap checks // have been done on the source inside performCallSlotOptzn. diff --git a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll index 51fad82050939..61e349e01ed91 100644 --- a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll @@ -141,4 +141,19 @@ define void @throwing_call(ptr noalias %src, ptr %dst) { ret void } +define void @loop_memoryphi(ptr %a, ptr %b) { +; CHECK-LABEL: @loop_memoryphi( +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 [[B:%.*]], ptr align 8 [[A:%.*]], i64 16, i1 false) +; CHECK-NEXT: br label [[LOOP]] +; + br label %loop + +loop: + %v = load { i64, i64 }, ptr %a + store { i64, i64 } %v, ptr %b + br label %loop +} + declare void @call()