Skip to content

Commit 2bd9348

Browse files
committed
codegen: optimize setfield/arrayset with inline isa test
1 parent 96e2400 commit 2bd9348

File tree

1 file changed

+101
-95
lines changed

1 file changed

+101
-95
lines changed

src/codegen.cpp

Lines changed: 101 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -2883,7 +2883,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
28832883

28842884
else if (f == jl_builtin_arrayset && nargs >= 4) {
28852885
const jl_cgval_t &ary = argv[2];
2886-
const jl_cgval_t &val = argv[3];
2886+
jl_cgval_t val = argv[3];
28872887
bool indices_ok = true;
28882888
for (size_t i = 4; i <= nargs; i++) {
28892889
if (argv[i].typ != (jl_value_t*)jl_long_type) {
@@ -2896,101 +2896,103 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
28962896
jl_value_t *ety = jl_tparam0(aty_dt);
28972897
jl_value_t *ndp = jl_tparam1(aty_dt);
28982898
if (!jl_has_free_typevars(ety) && (jl_is_long(ndp) || nargs == 4)) {
2899-
if (jl_subtype(val.typ, ety)) { // TODO: probably should just convert this to a type-assert
2900-
size_t elsz = 0, al = 0;
2901-
int union_max = jl_islayout_inline(ety, &elsz, &al);
2902-
bool isboxed = (union_max == 0);
2903-
if (isboxed)
2904-
ety = (jl_value_t*)jl_any_type;
2905-
jl_value_t *ary_ex = jl_exprarg(ex, 2);
2906-
ssize_t nd = jl_is_long(ndp) ? jl_unbox_long(ndp) : -1;
2907-
jl_value_t *boundscheck = argv[1].constant;
2908-
emit_typecheck(ctx, argv[1], (jl_value_t*)jl_bool_type, "arrayset");
2909-
Value *idx = emit_array_nd_index(ctx, ary, ary_ex, nd, &argv[4], nargs - 3, boundscheck);
2910-
if (!isboxed && jl_is_datatype(ety) && jl_datatype_size(ety) == 0) {
2911-
// no-op
2912-
}
2913-
else {
2914-
PHINode *data_owner = NULL; // owner object against which the write barrier must check
2915-
if (isboxed || (jl_is_datatype(ety) && ((jl_datatype_t*)ety)->layout->npointers > 0)) { // if elements are just bits, don't need a write barrier
2916-
Value *aryv = boxed(ctx, ary);
2917-
Value *flags = emit_arrayflags(ctx, ary);
2918-
// the owner of the data is ary itself except if ary->how == 3
2919-
flags = ctx.builder.CreateAnd(flags, 3);
2920-
Value *is_owned = ctx.builder.CreateICmpEQ(flags, ConstantInt::get(T_int16, 3));
2921-
BasicBlock *curBB = ctx.builder.GetInsertBlock();
2922-
BasicBlock *ownedBB = BasicBlock::Create(jl_LLVMContext, "array_owned", ctx.f);
2923-
BasicBlock *mergeBB = BasicBlock::Create(jl_LLVMContext, "merge_own", ctx.f);
2924-
ctx.builder.CreateCondBr(is_owned, ownedBB, mergeBB);
2925-
ctx.builder.SetInsertPoint(ownedBB);
2926-
// load owner pointer
2927-
Instruction *own_ptr;
2928-
if (jl_is_long(ndp)) {
2929-
own_ptr = ctx.builder.CreateAlignedLoad(T_prjlvalue,
2930-
ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue,
2931-
emit_bitcast(ctx, decay_derived(ctx, aryv), T_pprjlvalue),
2932-
jl_array_data_owner_offset(nd) / sizeof(jl_value_t*)),
2933-
Align(sizeof(void*)));
2934-
tbaa_decorate(tbaa_const, maybe_mark_load_dereferenceable(own_ptr, false, (jl_value_t*)jl_array_any_type));
2935-
}
2936-
else {
2937-
own_ptr = ctx.builder.CreateCall(
2938-
prepare_call(jlarray_data_owner_func),
2939-
{aryv});
2940-
}
2941-
ctx.builder.CreateBr(mergeBB);
2942-
ctx.builder.SetInsertPoint(mergeBB);
2943-
data_owner = ctx.builder.CreatePHI(T_prjlvalue, 2);
2944-
data_owner->addIncoming(aryv, curBB);
2945-
data_owner->addIncoming(own_ptr, ownedBB);
2899+
if (!jl_subtype(val.typ, ety)) {
2900+
emit_typecheck(ctx, val, ety, "arrayset");
2901+
val = update_julia_type(ctx, val, ety);
2902+
}
2903+
size_t elsz = 0, al = 0;
2904+
int union_max = jl_islayout_inline(ety, &elsz, &al);
2905+
bool isboxed = (union_max == 0);
2906+
if (isboxed)
2907+
ety = (jl_value_t*)jl_any_type;
2908+
jl_value_t *ary_ex = jl_exprarg(ex, 2);
2909+
ssize_t nd = jl_is_long(ndp) ? jl_unbox_long(ndp) : -1;
2910+
jl_value_t *boundscheck = argv[1].constant;
2911+
emit_typecheck(ctx, argv[1], (jl_value_t*)jl_bool_type, "arrayset");
2912+
Value *idx = emit_array_nd_index(ctx, ary, ary_ex, nd, &argv[4], nargs - 3, boundscheck);
2913+
if (!isboxed && jl_is_datatype(ety) && jl_datatype_size(ety) == 0) {
2914+
// no-op
2915+
}
2916+
else {
2917+
PHINode *data_owner = NULL; // owner object against which the write barrier must check
2918+
if (isboxed || (jl_is_datatype(ety) && ((jl_datatype_t*)ety)->layout->npointers > 0)) { // if elements are just bits, don't need a write barrier
2919+
Value *aryv = boxed(ctx, ary);
2920+
Value *flags = emit_arrayflags(ctx, ary);
2921+
// the owner of the data is ary itself except if ary->how == 3
2922+
flags = ctx.builder.CreateAnd(flags, 3);
2923+
Value *is_owned = ctx.builder.CreateICmpEQ(flags, ConstantInt::get(T_int16, 3));
2924+
BasicBlock *curBB = ctx.builder.GetInsertBlock();
2925+
BasicBlock *ownedBB = BasicBlock::Create(jl_LLVMContext, "array_owned", ctx.f);
2926+
BasicBlock *mergeBB = BasicBlock::Create(jl_LLVMContext, "merge_own", ctx.f);
2927+
ctx.builder.CreateCondBr(is_owned, ownedBB, mergeBB);
2928+
ctx.builder.SetInsertPoint(ownedBB);
2929+
// load owner pointer
2930+
Instruction *own_ptr;
2931+
if (jl_is_long(ndp)) {
2932+
own_ptr = ctx.builder.CreateAlignedLoad(T_prjlvalue,
2933+
ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue,
2934+
emit_bitcast(ctx, decay_derived(ctx, aryv), T_pprjlvalue),
2935+
jl_array_data_owner_offset(nd) / sizeof(jl_value_t*)),
2936+
Align(sizeof(void*)));
2937+
tbaa_decorate(tbaa_const, maybe_mark_load_dereferenceable(own_ptr, false, (jl_value_t*)jl_array_any_type));
29462938
}
2947-
if (!isboxed && jl_is_uniontype(ety)) {
2948-
Type *AT = ArrayType::get(IntegerType::get(jl_LLVMContext, 8 * al), (elsz + al - 1) / al);
2949-
Value *data = emit_bitcast(ctx, emit_arrayptr(ctx, ary, ary_ex), AT->getPointerTo());
2950-
// compute tindex from val
2951-
jl_cgval_t rhs_union = convert_julia_type(ctx, val, ety);
2952-
Value *tindex = compute_tindex_unboxed(ctx, rhs_union, ety);
2953-
tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(T_int8, 1));
2954-
Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(T_int16, nd));
2955-
Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(T_int16, 1));
2956-
Value *offset = emit_arrayoffset(ctx, ary, nd);
2957-
Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, T_size));
2958-
Value *selidx_m = emit_arraylen(ctx, ary);
2959-
Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m);
2960-
Value *ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx);
2961-
ptindex = emit_bitcast(ctx, ptindex, T_pint8);
2962-
ptindex = ctx.builder.CreateInBoundsGEP(T_int8, ptindex, offset);
2963-
ptindex = ctx.builder.CreateInBoundsGEP(T_int8, ptindex, idx);
2964-
tbaa_decorate(tbaa_arrayselbyte, ctx.builder.CreateStore(tindex, ptindex));
2965-
if (jl_is_datatype(val.typ) && jl_datatype_size(val.typ) == 0) {
2966-
// no-op
2967-
}
2968-
else {
2969-
// copy data
2970-
Value *addr = ctx.builder.CreateInBoundsGEP(AT, data, idx);
2971-
emit_unionmove(ctx, addr, tbaa_arraybuf, val, nullptr);
2972-
}
2939+
else {
2940+
own_ptr = ctx.builder.CreateCall(
2941+
prepare_call(jlarray_data_owner_func),
2942+
{aryv});
2943+
}
2944+
ctx.builder.CreateBr(mergeBB);
2945+
ctx.builder.SetInsertPoint(mergeBB);
2946+
data_owner = ctx.builder.CreatePHI(T_prjlvalue, 2);
2947+
data_owner->addIncoming(aryv, curBB);
2948+
data_owner->addIncoming(own_ptr, ownedBB);
2949+
}
2950+
if (!isboxed && jl_is_uniontype(ety)) {
2951+
Type *AT = ArrayType::get(IntegerType::get(jl_LLVMContext, 8 * al), (elsz + al - 1) / al);
2952+
Value *data = emit_bitcast(ctx, emit_arrayptr(ctx, ary, ary_ex), AT->getPointerTo());
2953+
// compute tindex from val
2954+
jl_cgval_t rhs_union = convert_julia_type(ctx, val, ety);
2955+
Value *tindex = compute_tindex_unboxed(ctx, rhs_union, ety);
2956+
tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(T_int8, 1));
2957+
Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(T_int16, nd));
2958+
Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(T_int16, 1));
2959+
Value *offset = emit_arrayoffset(ctx, ary, nd);
2960+
Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, T_size));
2961+
Value *selidx_m = emit_arraylen(ctx, ary);
2962+
Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m);
2963+
Value *ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx);
2964+
ptindex = emit_bitcast(ctx, ptindex, T_pint8);
2965+
ptindex = ctx.builder.CreateInBoundsGEP(T_int8, ptindex, offset);
2966+
ptindex = ctx.builder.CreateInBoundsGEP(T_int8, ptindex, idx);
2967+
tbaa_decorate(tbaa_arrayselbyte, ctx.builder.CreateStore(tindex, ptindex));
2968+
if (jl_is_datatype(val.typ) && jl_datatype_size(val.typ) == 0) {
2969+
// no-op
29732970
}
29742971
else {
2975-
typed_store(ctx,
2976-
emit_arrayptr(ctx, ary, ary_ex, isboxed),
2977-
idx, val, jl_cgval_t(), ety,
2978-
isboxed ? tbaa_ptrarraybuf : tbaa_arraybuf,
2979-
ctx.aliasscope,
2980-
data_owner,
2981-
isboxed,
2982-
isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
2983-
isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
2984-
0,
2985-
false,
2986-
true,
2987-
false,
2988-
false);
2972+
// copy data
2973+
Value *addr = ctx.builder.CreateInBoundsGEP(AT, data, idx);
2974+
emit_unionmove(ctx, addr, tbaa_arraybuf, val, nullptr);
29892975
}
29902976
}
2991-
*ret = ary;
2992-
return true;
2977+
else {
2978+
typed_store(ctx,
2979+
emit_arrayptr(ctx, ary, ary_ex, isboxed),
2980+
idx, val, jl_cgval_t(), ety,
2981+
isboxed ? tbaa_ptrarraybuf : tbaa_arraybuf,
2982+
ctx.aliasscope,
2983+
data_owner,
2984+
isboxed,
2985+
isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
2986+
isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
2987+
0,
2988+
false,
2989+
true,
2990+
false,
2991+
false);
2992+
}
29932993
}
2994+
*ret = ary;
2995+
return true;
29942996
}
29952997
}
29962998
}
@@ -3134,21 +3136,21 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
31343136
const jl_cgval_t undefval;
31353137
const jl_cgval_t &obj = argv[1];
31363138
const jl_cgval_t &fld = argv[2];
3137-
const jl_cgval_t &val = argv[isreplacefield ? 4 : 3];
3139+
jl_cgval_t val = argv[isreplacefield ? 4 : 3];
31383140
const jl_cgval_t &cmp = isreplacefield ? argv[3] : undefval;
31393141
enum jl_memory_order order = jl_memory_order_notatomic;
3142+
const std::string &fname = issetfield ? "setfield!" : isreplacefield ? "replacefield!" : "swapfield!";
31403143
if (nargs >= (isreplacefield ? 5 : 4)) {
31413144
const jl_cgval_t &ord = argv[isreplacefield ? 5 : 4];
3142-
emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type,
3143-
issetfield ? "setfield!" : isreplacefield ? "replacefield!" : "swapfield!");
3145+
emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
31443146
if (!ord.constant)
31453147
return false;
31463148
order = jl_get_atomic_order((jl_sym_t*)ord.constant, !issetfield, true);
31473149
}
31483150
enum jl_memory_order fail_order = order;
31493151
if (isreplacefield && nargs == 6) {
31503152
const jl_cgval_t &ord = argv[6];
3151-
emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, "replacefield!");
3153+
emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
31523154
if (!ord.constant)
31533155
return false;
31543156
fail_order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
@@ -3172,7 +3174,11 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
31723174
}
31733175
if (idx != -1) {
31743176
jl_value_t *ft = jl_svecref(uty->types, idx);
3175-
if (!jl_has_free_typevars(ft) && jl_subtype(val.typ, ft)) {
3177+
if (!jl_has_free_typevars(ft)) {
3178+
if (!jl_subtype(val.typ, ft)) {
3179+
emit_typecheck(ctx, val, ft, fname);
3180+
val = update_julia_type(ctx, val, ft);
3181+
}
31763182
// TODO: attempt better codegen for approximate types
31773183
bool isboxed = jl_field_isptr(uty, idx);
31783184
bool isatomic = jl_field_isatomic(uty, idx);

0 commit comments

Comments
 (0)