Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ RUNTIME_SRCS += jitlayers aotcompile debuginfo disasm llvm-simdloop llvm-muladd
llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering \
llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces \
llvm-multiversioning llvm-alloc-opt cgmemmgr llvm-api llvm-remove-addrspaces \
llvm-remove-ni llvm-julia-licm llvm-demote-float16
llvm-remove-ni llvm-julia-licm llvm-demote-float16 llvm-ptls-reuse
FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
LLVM_LIBS := all
ifeq ($(USE_POLLY),1)
Expand Down Expand Up @@ -246,6 +246,7 @@ $(BUILDDIR)/llvm-gc-invariant-verifier.o $(BUILDDIR)/llvm-gc-invariant-verifier.
$(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h
$(BUILDDIR)/llvm-multiversioning.o $(BUILDDIR)/llvm-multiversioning.dbg.obj: $(SRCDIR)/codegen_shared.h
$(BUILDDIR)/llvm-pass-helpers.o $(BUILDDIR)/llvm-pass-helpers.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/codegen_shared.h
$(BUILDDIR)/llvm-ptls-reuse.o $(BUILDDIR)/llvm-ptls-reuse.dbg.obj: $(SRCDIR)/codegen_shared.h
$(BUILDDIR)/llvm-ptls.o $(BUILDDIR)/llvm-ptls.dbg.obj: $(SRCDIR)/codegen_shared.h
$(BUILDDIR)/processor.o $(BUILDDIR)/processor.dbg.obj: $(addprefix $(SRCDIR)/,processor_*.cpp processor.h features_*.h)
$(BUILDDIR)/signal-handling.o $(BUILDDIR)/signal-handling.dbg.obj: $(addprefix $(SRCDIR)/,signals-*.c)
Expand Down
12 changes: 12 additions & 0 deletions src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,10 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
PM->add(createVerifierPass());
#endif

PM->add(createLowerPTLSReusePass());
#ifdef JL_DEBUG_BUILD
PM->add(createVerifierPass(false));
#endif
Comment on lines +631 to +634
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll remove PM->add(createVerifierPass(false));s before merge.

PM->add(createConstantMergePass());
if (opt_level < 2) {
PM->add(createCFGSimplificationPass());
Expand All @@ -648,6 +652,10 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
PM->add(createRemoveNIPass());
PM->add(createLateLowerGCFramePass());
PM->add(createFinalLowerGCPass());
PM->add(createLowerPTLSReusePass());
#ifdef JL_DEBUG_BUILD
PM->add(createVerifierPass(false));
#endif
PM->add(createLowerPTLSPass(dump_native));
}
else {
Expand Down Expand Up @@ -777,6 +785,10 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
PM->add(createRemoveNIPass());
PM->add(createLateLowerGCFramePass());
PM->add(createFinalLowerGCPass());
PM->add(createLowerPTLSReusePass());
#ifdef JL_DEBUG_BUILD
PM->add(createVerifierPass(false));
#endif
// We need these two passes and the instcombine below
// after GC lowering to let LLVM do some constant propagation on the tags.
// and remove some unnecessary write barrier checks.
Expand Down
12 changes: 6 additions & 6 deletions src/ccall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1458,7 +1458,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
JL_GC_POP();
ctx.builder.CreateCall(prepare_call(gcroot_flush_func));
emit_signal_fence(ctx);
ctx.builder.CreateLoad(T_size, ctx.signalPage, true);
ctx.builder.CreateLoad(T_size, current_signal_page(ctx), true);
emit_signal_fence(ctx);
return ghostValue(jl_nothing_type);
}
Expand All @@ -1467,14 +1467,14 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
assert(!isVa && !llvmcall && nccallargs == 0);
JL_GC_POP();
return mark_or_box_ccall_result(ctx,
ctx.builder.CreatePtrToInt(ctx.ptlsStates, lrt),
ctx.builder.CreatePtrToInt(current_ptls(ctx), lrt),
retboxed, rt, unionall, static_rt);
}
else if (is_libjulia_func(jl_threadid)) {
assert(lrt == T_int16);
assert(!isVa && !llvmcall && nccallargs == 0);
JL_GC_POP();
Value *ptls_i16 = emit_bitcast(ctx, ctx.ptlsStates, T_pint16);
Value *ptls_i16 = emit_bitcast(ctx, current_ptls(ctx), T_pint16);
const int tid_offset = offsetof(jl_tls_states_t, tid);
Value *ptid = ctx.builder.CreateInBoundsGEP(ptls_i16, ConstantInt::get(T_size, tid_offset / 2));
LoadInst *tid = ctx.builder.CreateAlignedLoad(ptid, Align(sizeof(int16_t)));
Expand All @@ -1485,7 +1485,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
assert(lrt == T_prjlvalue);
assert(!isVa && !llvmcall && nccallargs == 0);
JL_GC_POP();
Value *ptls_pv = emit_bitcast(ctx, ctx.ptlsStates, T_pprjlvalue);
Value *ptls_pv = emit_bitcast(ctx, current_ptls(ctx), T_pprjlvalue);
const int ct_offset = offsetof(jl_tls_states_t, current_task);
Value *pct = ctx.builder.CreateInBoundsGEP(ptls_pv, ConstantInt::get(T_size, ct_offset / sizeof(void*)));
LoadInst *ct = ctx.builder.CreateAlignedLoad(pct, Align(sizeof(void*)));
Expand All @@ -1496,7 +1496,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
assert(lrt == T_void);
assert(!isVa && !llvmcall && nccallargs == 1);
JL_GC_POP();
Value *ptls_pv = emit_bitcast(ctx, ctx.ptlsStates, T_ppjlvalue);
Value *ptls_pv = emit_bitcast(ctx, current_ptls(ctx), T_ppjlvalue);
const int nt_offset = offsetof(jl_tls_states_t, next_task);
Value *pnt = ctx.builder.CreateInBoundsGEP(ptls_pv, ConstantInt::get(T_size, nt_offset / sizeof(void*)));
ctx.builder.CreateStore(emit_pointer_from_objref(ctx, boxed(ctx, argv[0])), pnt);
Expand Down Expand Up @@ -1537,7 +1537,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
checkBB, contBB);
ctx.builder.SetInsertPoint(checkBB);
ctx.builder.CreateLoad(
ctx.builder.CreateConstInBoundsGEP1_32(T_size, ctx.signalPage, -1),
ctx.builder.CreateConstInBoundsGEP1_32(T_size, current_signal_page(ctx), -1),
true);
ctx.builder.CreateBr(contBB);
ctx.f->getBasicBlockList().push_back(contBB);
Expand Down
4 changes: 2 additions & 2 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2761,7 +2761,7 @@ static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const std
// allocation for known size object
static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
{
Value *ptls_ptr = emit_bitcast(ctx, ctx.ptlsStates, T_pint8);
Value *ptls_ptr = emit_bitcast(ctx, current_ptls(ctx), T_pint8);
Function *F = prepare_call(jl_alloc_obj_func);
auto call = ctx.builder.CreateCall(F, {ptls_ptr, ConstantInt::get(T_size, static_size), maybe_decay_untracked(ctx, jt)});
call->setAttributes(F->getAttributes());
Expand Down Expand Up @@ -3087,7 +3087,7 @@ static void emit_signal_fence(jl_codectx_t &ctx)

static Value *emit_defer_signal(jl_codectx_t &ctx)
{
Value *ptls = emit_bitcast(ctx, ctx.ptlsStates,
Value *ptls = emit_bitcast(ctx, current_ptls(ctx),
PointerType::get(T_sigatomic, 0));
Constant *offset = ConstantInt::getSigned(T_int32,
offsetof(jl_tls_states_t, defer_signal) / sizeof(sig_atomic_t));
Expand Down
72 changes: 52 additions & 20 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -795,6 +795,21 @@ BOX_FUNC(ssavalue, T_prjlvalue, T_size, get_func_attrs);


// placeholder functions
static const auto refetch_jltls_states_func = new JuliaFunction{
"julia.refetch_ptls_states",
[](LLVMContext &C) { return FunctionType::get(PointerType::get(T_ppjlvalue, 0), false); },
nullptr,
};
static const auto reuse_jltls_states_func = new JuliaFunction{
"julia.reuse_ptls_states",
[](LLVMContext &C) { return FunctionType::get(PointerType::get(T_ppjlvalue, 0), false); },
nullptr,
};
static const auto reuse_signal_page_func = new JuliaFunction{
"julia.reuse_signal_page",
[](LLVMContext &C) { return FunctionType::get(PointerType::get(T_psize, 0), false); },
nullptr,
};
static const auto gcroot_flush_func = new JuliaFunction{
"julia.gcroot_flush",
[](LLVMContext &C) { return FunctionType::get(T_void, false); },
Expand Down Expand Up @@ -1085,8 +1100,6 @@ class jl_codectx_t {
int nargs = 0;
int nvargs = -1;

CallInst *ptlsStates = NULL;
Value *signalPage = NULL;
Value *world_age_field = NULL;

bool debug_enabled = false;
Expand Down Expand Up @@ -1121,7 +1134,9 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t
static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, bool isvol, MDNode *tbaa);
static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i);
static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const std::string &msg);
static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0);
static Instruction *current_ptls(jl_codectx_t &ctx);
static void emit_refetch_ptls(jl_codectx_t &ctx);
static Value *current_signal_page(jl_codectx_t &ctx);
static void CreateTrap(IRBuilder<> &irbuilder);
static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF,
jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
Expand Down Expand Up @@ -1195,7 +1210,13 @@ static GlobalVariable *get_pointer_to_constant(jl_codegen_params_t &emission_con

static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, Type *lty)
{
return new AllocaInst(lty, 0, "", /*InsertBefore=*/ctx.ptlsStates);
auto InsertBefore = ctx.f->getEntryBlock().getFirstNonPHI();
if (InsertBefore) {
return new AllocaInst(lty, 0, "", InsertBefore);
}
else {
return new AllocaInst(lty, 0, "", &ctx.f->getEntryBlock());
}
}

static void undef_derived_strct(IRBuilder<> &irbuilder, Value *ptr, jl_datatype_t *sty, MDNode *tbaa)
Expand Down Expand Up @@ -3475,6 +3496,9 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
Value *r = emit_jlcall(ctx, jlinvoke_func, boxed(ctx, lival), argv, nargs, JLCALL_F2_CC);
result = mark_julia_type(ctx, r, true, rt);
}
#ifdef MIGRATE_TASKS
emit_refetch_ptls(ctx);
#endif
if (result.typ == jl_bottom_type)
CreateTrap(ctx.builder);
return result;
Expand Down Expand Up @@ -4576,23 +4600,32 @@ JL_GCC_IGNORE_STOP

// --- generate function bodies ---

// gc frame emission
static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0)
// Get currently usable PTLS at the insertion point of `ctx.builder`.
static Instruction *current_ptls(jl_codectx_t &ctx)
{
// TODO: requires the runtime, but is generated unconditionally
return ctx.builder.CreateCall(prepare_call(reuse_jltls_states_func));
}

static void emit_refetch_ptls(jl_codectx_t &ctx) {
ctx.builder.CreateCall(prepare_call(refetch_jltls_states_func));
}

// allocate a placeholder gc instruction
ctx.ptlsStates = ctx.builder.CreateCall(prepare_call(jltls_states_func));
int nthfield = offsetof(jl_tls_states_t, safepoint) / sizeof(void*);
ctx.signalPage = emit_nthptr_recast(ctx, ctx.ptlsStates, nthfield, tbaa_const,
PointerType::get(T_psize, 0));
// Get signal page associated with the currently usable PTLS at the insertion
// point of `ctx.builder`.
static Value *current_signal_page(jl_codectx_t &ctx)
{
// return ctx.builder.CreateCall(prepare_call(reuse_signal_page_func));
auto ptls = current_ptls(ctx);
int nthfield = offsetof(jl_tls_states_t, safepoint) / sizeof(void *);
return emit_nthptr_recast(ctx, ptls, nthfield, tbaa_const,
PointerType::get(T_psize, 0));
}

static void emit_last_age_field(jl_codectx_t &ctx)
{
ctx.world_age_field = ctx.builder.CreateInBoundsGEP(
T_size,
ctx.builder.CreateBitCast(ctx.ptlsStates, T_psize),
ctx.builder.CreateBitCast(current_ptls(ctx), T_psize),
ConstantInt::get(T_size, offsetof(jl_tls_states_t, world_age) / sizeof(size_t)));
}

Expand Down Expand Up @@ -4644,7 +4677,6 @@ static void emit_cfunc_invalidate(
ctx.builder.SetInsertPoint(b0);
DebugLoc noDbg;
ctx.builder.SetCurrentDebugLocation(noDbg);
allocate_gc_frame(ctx, b0);

Function::arg_iterator AI = gf_thunk->arg_begin();
jl_cgval_t *myargs = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
Expand Down Expand Up @@ -4814,11 +4846,10 @@ static Function* gen_cfun_wrapper(
ctx.builder.SetInsertPoint(b0);
DebugLoc noDbg;
ctx.builder.SetCurrentDebugLocation(noDbg);
allocate_gc_frame(ctx, b0);
emit_last_age_field(ctx);

Value *dummy_world = ctx.builder.CreateAlloca(T_size);
Value *have_tls = ctx.builder.CreateIsNotNull(ctx.ptlsStates);
Value *have_tls = ctx.builder.CreateIsNotNull(current_ptls(ctx));
// TODO: in the future, try to initialize a full TLS context here
// for now, just use a dummy field to avoid a branch in this function
ctx.world_age_field = ctx.builder.CreateSelect(have_tls, ctx.world_age_field, dummy_world);
Expand Down Expand Up @@ -5471,7 +5502,6 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
ctx.builder.SetInsertPoint(b0);
DebugLoc noDbg;
ctx.builder.SetCurrentDebugLocation(noDbg);
allocate_gc_frame(ctx, b0);

// TODO: replace this with emit_call_specfun_other?
FunctionType *ftype = f.decl->getFunctionType();
Expand Down Expand Up @@ -6097,7 +6127,6 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
*/

// step 7. set up GC frame
allocate_gc_frame(ctx, b0);
Value *last_age = NULL;
if (toplevel) {
emit_last_age_field(ctx);
Expand Down Expand Up @@ -6163,9 +6192,9 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
(va && (int)i == ctx.vaSlot) || // or it's the va arg tuple
i == 0) { // or it is the first argument (which isn't in `argArray`)
AllocaInst *av = new AllocaInst(T_prjlvalue, 0,
jl_symbol_name(s), /*InsertBefore*/ctx.ptlsStates);
jl_symbol_name(s), /*InsertBefore*/current_ptls(ctx));
StoreInst *SI = new StoreInst(V_rnull, av, false, Align(sizeof(void*)));
SI->insertAfter(ctx.ptlsStates);
SI->insertAfter(current_ptls(ctx));
varinfo.boxroot = av;
if (ctx.debug_enabled && varinfo.dinfo) {
DIExpression *expr;
Expand Down Expand Up @@ -7589,6 +7618,9 @@ static void init_jit_functions(void)
add_named_global(diff_gc_total_bytes_func, &jl_gc_diff_total_bytes);
add_named_global(sync_gc_total_bytes_func, &jl_gc_sync_total_bytes);
add_named_global(jlarray_data_owner_func, &jl_array_data_owner);
add_named_global(refetch_jltls_states_func, (void*)NULL);
add_named_global(reuse_jltls_states_func, (void*)NULL);
add_named_global(reuse_signal_page_func, (void*)NULL);
add_named_global(gcroot_flush_func, (void*)NULL);
add_named_global(gc_preserve_begin_func, (void*)NULL);
add_named_global(gc_preserve_end_func, (void*)NULL);
Expand Down
1 change: 1 addition & 0 deletions src/jitlayers.h
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ class JuliaOJIT {
};
extern JuliaOJIT *jl_ExecutionEngine;

Pass *createLowerPTLSReusePass();
Pass *createLowerPTLSPass(bool imaging_mode);
Pass *createCombineMulAddPass();
Pass *createFinalLowerGCPass();
Expand Down
1 change: 1 addition & 0 deletions src/julia_threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#ifndef JL_THREADS_H
#define JL_THREADS_H

#include "options.h"
#include <atomics.h>
// threading ------------------------------------------------------------------

Expand Down
10 changes: 2 additions & 8 deletions src/llvm-final-gc-lowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ struct FinalLowerGC: public FunctionPass, private JuliaPassContext {
Function *queueRootFunc;
Function *poolAllocFunc;
Function *bigAllocFunc;
CallInst *ptlsStates;

bool doInitialization(Module &M) override;
bool doFinalization(Module &M) override;
Expand Down Expand Up @@ -111,6 +110,7 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
T_size->getPointerTo()),
Align(sizeof(void*)));
inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
auto ptlsStates = ptlsBefore(*builder.GetInsertPoint());
Value *pgcstack = builder.Insert(getPgcstack(ptlsStates));
inst = builder.CreateAlignedStore(
builder.CreateAlignedLoad(pgcstack, Align(sizeof(void*))),
Expand All @@ -136,6 +136,7 @@ void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
cast<Instruction>(builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 1));
Instruction *inst = builder.CreateAlignedLoad(gcpop, Align(sizeof(void*)));
inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
auto ptlsStates = ptlsBefore(*builder.GetInsertPoint());
inst = builder.CreateAlignedStore(
inst,
builder.CreateBitCast(
Expand Down Expand Up @@ -282,13 +283,6 @@ bool FinalLowerGC::runOnFunction(Function &F)
LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");
// Check availability of functions again since they might have been deleted.
initFunctions(*F.getParent());
if (!ptls_getter)
return true;

// Look for a call to 'julia.ptls_states'.
ptlsStates = getPtls(F);
if (!ptlsStates)
return true;
Comment on lines -285 to -291
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Were these early returns some kind of optimization? If so, we can add something like

if (!usePtls(F))
    return true;


// Acquire intrinsic functions.
auto newGCFrameFunc = getOrNull(jl_intrinsics::newGCFrame);
Expand Down
Loading