Skip to content

Commit d5784d2

Browse files
wsmosesKristofferC
authored andcommitted
optimizer: fix alloc opt on unknown offset with references (#47076)
Fixes issued mentioned in #47075 (comment) (cherry picked from commit a68235c)
1 parent d1b2980 commit d5784d2

File tree

4 files changed

+59
-5
lines changed

4 files changed

+59
-5
lines changed

src/llvm-alloc-helpers.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,12 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
161161
auto check_inst = [&] (Instruction *inst, Use *use) {
162162
if (isa<LoadInst>(inst)) {
163163
required.use_info.hasload = true;
164-
if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, 0, cur.offset,
164+
if (cur.offset == UINT32_MAX) {
165+
auto elty = inst->getType();
166+
required.use_info.has_unknown_objref |= hasObjref(elty);
167+
required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
168+
required.use_info.hasunknownmem = true;
169+
} else if (!required.use_info.addMemOp(inst, 0, cur.offset,
165170
inst->getType(),
166171
false, required.DL))
167172
required.use_info.hasunknownmem = true;
@@ -229,7 +234,12 @@ void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArg
229234
return false;
230235
}
231236
auto storev = store->getValueOperand();
232-
if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, use->getOperandNo(),
237+
if (cur.offset == UINT32_MAX) {
238+
auto elty = storev->getType();
239+
required.use_info.has_unknown_objref |= hasObjref(elty);
240+
required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
241+
required.use_info.hasunknownmem = true;
242+
} else if (!required.use_info.addMemOp(inst, use->getOperandNo(),
233243
cur.offset, storev->getType(),
234244
true, required.DL))
235245
required.use_info.hasunknownmem = true;

src/llvm-alloc-helpers.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,11 @@ namespace jl_alloc {
8787
// The object is used in an error function
8888
bool haserror:1;
8989

90+
// The alloc has a Julia object reference not in an explicit field.
91+
bool has_unknown_objref:1;
92+
// The alloc has an aggregate Julia object reference not in an explicit field.
93+
bool has_unknown_objrefaggr:1;
94+
9095
void reset()
9196
{
9297
escaped = false;
@@ -99,6 +104,8 @@ namespace jl_alloc {
99104
hasunknownmem = false;
100105
returned = false;
101106
haserror = false;
107+
has_unknown_objref = false;
108+
has_unknown_objrefaggr = false;
102109
uses.clear();
103110
preserves.clear();
104111
memops.clear();

src/llvm-alloc-opt.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,8 @@ void Optimizer::optimizeAll()
233233
removeAlloc(orig);
234234
continue;
235235
}
236-
bool has_ref = false;
237-
bool has_refaggr = false;
236+
bool has_ref = use_info.has_unknown_objref;
237+
bool has_refaggr = use_info.has_unknown_objrefaggr;
238238
for (auto memop: use_info.memops) {
239239
auto &field = memop.second;
240240
if (field.hasobjref) {
@@ -577,7 +577,9 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
577577
// treat this as a non-mem2reg'd alloca
578578
// The ccall root and GC preserve handling below makes sure that
579579
// the alloca isn't optimized out.
580-
buff = prolog_builder.CreateAlloca(pass.T_prjlvalue);
580+
const DataLayout &DL = F.getParent()->getDataLayout();
581+
auto asize = ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), sz / DL.getTypeAllocSize(pass.T_prjlvalue));
582+
buff = prolog_builder.CreateAlloca(pass.T_prjlvalue, asize);
581583
buff->setAlignment(Align(align));
582584
ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, pass.T_pint8));
583585
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -AllocOpt -S %s | FileCheck %s
2+
3+
source_filename = "text"
4+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
5+
target triple = "x86_64-linux-gnu"
6+
7+
declare {}*** @julia.get_pgcstack()
8+
9+
declare {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*)
10+
11+
declare void @julia.write_barrier({} addrspace(10)*, ...)
12+
13+
define void @diffejulia_objective__1864_inner_1wrap({} addrspace(10)* %arg, i64 %iv.i) {
14+
entry:
15+
%i5 = call {}*** @julia.get_pgcstack()
16+
%i13 = bitcast {}*** %i5 to {}**
17+
%i14 = getelementptr inbounds {}*, {}** %i13, i64 -12
18+
%i18 = call noalias nonnull dereferenceable(8000) dereferenceable_or_null(8000) {} addrspace(10)* @julia.gc_alloc_obj({}** %i14, i64 8000, {} addrspace(10)* addrspacecast ({}* inttoptr (i64 139756155247504 to {}*) to {} addrspace(10)*))
19+
%_malloccache.i = bitcast {} addrspace(10)* %i18 to {} addrspace(10)* addrspace(10)*
20+
%i23 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %_malloccache.i, i64 %iv.i
21+
store {} addrspace(10)* %arg, {} addrspace(10)* addrspace(10)* %i23, align 8
22+
%i24 = bitcast {} addrspace(10)* addrspace(10)* %_malloccache.i to {} addrspace(10)*
23+
call void ({} addrspace(10)*, ...) @julia.write_barrier({} addrspace(10)* %i24, {} addrspace(10)* %arg)
24+
%l = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %i23
25+
ret void
26+
}
27+
28+
; CHECK: %[[i0:.+]] = alloca {} addrspace(10)*, i64 1000, align 16
29+
; CHECK: %[[i1:.+]] = bitcast {} addrspace(10)** %[[i0]] to i8*
30+
; CHECK: %i18 = bitcast i8* %[[i1]] to {}*
31+
; CHECK: %_malloccache.i = bitcast {}* %i18 to {} addrspace(10)**
32+
; CHECK: %i23 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %_malloccache.i, i64 %iv.i
33+
; CHECK: store {} addrspace(10)* %arg, {} addrspace(10)** %i23, align 8
34+
; CHECK: %i24 = bitcast {} addrspace(10)** %_malloccache.i to {}*
35+
; CHECK: %l = load {} addrspace(10)*, {} addrspace(10)** %i23, align 8

0 commit comments

Comments
 (0)