Skip to content

Patch2 #4

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jul 6, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 59 additions & 28 deletions base/abstractarray.jl
Original file line number Diff line number Diff line change
@@ -884,34 +884,6 @@ end

## from general iterable to any array

"""
copyto!(dest::AbstractArray, src) -> dest
Copy all elements from collection `src` to array `dest`, whose length must be greater than
or equal to the length `n` of `src`. The first `n` elements of `dest` are overwritten,
the other elements are left untouched.
See also [`copy!`](@ref Base.copy!), [`copy`](@ref).
# Examples
```jldoctest
julia> x = [1., 0., 3., 0., 5.];
julia> y = zeros(7);
julia> copyto!(y, x);
julia> y
7-element Vector{Float64}:
1.0
0.0
3.0
0.0
5.0
0.0
0.0
```
"""
function copyto!(dest::AbstractArray, src)
destiter = eachindex(dest)
y = iterate(destiter)
@@ -989,6 +961,65 @@ function copyto!(dest::AbstractArray, dstart::Integer, src, sstart::Integer, n::
return dest
end

## copy between abstract arrays - generally more efficient
## since a single index variable can be used.

"""
copyto!(dest::AbstractArray, src) -> dest
Copy all elements from collection `src` to array `dest`, whose length must be greater than
or equal to the length `n` of `src`. The first `n` elements of `dest` are overwritten,
the other elements are left untouched.
See also [`copy!`](@ref Base.copy!), [`copy`](@ref).
# Examples
```jldoctest
julia> x = [1., 0., 3., 0., 5.];
julia> y = zeros(7);
julia> copyto!(y, x);
julia> y
7-element Vector{Float64}:
1.0
0.0
3.0
0.0
5.0
0.0
0.0
```
"""
function copyto!(dest::AbstractArray, src::AbstractArray)
isempty(src) && return dest
src′ = unalias(dest, src)
copyto_unaliased!(IndexStyle(dest), dest, IndexStyle(src′), src′)
end

function copyto!(deststyle::IndexStyle, dest::AbstractArray, srcstyle::IndexStyle, src::AbstractArray)
isempty(src) && return dest
src′ = unalias(dest, src)
copyto_unaliased!(deststyle, dest, srcstyle, src′)
end

function copyto_unaliased!(deststyle::IndexStyle, dest::AbstractArray, srcstyle::IndexStyle, src::AbstractArray)
isempty(src) && return dest
length(dest) < length(src) && throw(BoundsError(dest, LinearIndices(src)))
_unaliased_copyto!(deststyle, dest, srcstyle, src)
return dest
end

# IndexCartesian and CartesianIndices has not been defined, only implement Linear to Linear here.
function _unaliased_copyto!(::IndexLinear, dest::AbstractArray, ::IndexLinear, src::AbstractArray)
@_inline_meta
Δi = firstindex(dest) - firstindex(src)
for i in eachindex(src)
@inbounds dest[i + Δi] = src[i]
end
end

function copyto!(dest::AbstractArray, dstart::Integer, src::AbstractArray)
copyto!(dest, dstart, src, first(LinearIndices(src)), length(src))
end
15 changes: 8 additions & 7 deletions base/array.jl
Original file line number Diff line number Diff line change
@@ -1354,14 +1354,15 @@ See also: [`push!`](@ref), [`replace`](@ref), [`popat!`](@ref), [`splice!`](@ref
# Examples
```jldoctest
julia> insert!([6, 5, 4, 2, 1], 4, 3)
6-element Vector{Int64}:
6
5
4
3
2
julia> insert!(Any[1:6;], 3, "here")
7-element Vector{Any}:
1
2
"here"
3
4
5
6
```
"""
function insert!(a::Array{T,1}, i::Integer, item) where T
6 changes: 6 additions & 0 deletions base/deprecated.jl
Original file line number Diff line number Diff line change
@@ -243,6 +243,12 @@ cat_shape(dims, shape::Tuple{}) = () # make sure `cat_shape(dims, ())` do not re
@deprecate unsafe_indices(A) axes(A) false
@deprecate unsafe_length(r) length(r) false

# these were internal type aliases, but some pacakges seem to be relying on them
const Any16{N} = Tuple{Any,Any,Any,Any,Any,Any,Any,Any,
Any,Any,Any,Any,Any,Any,Any,Any,Vararg{Any,N}}
const All16{T,N} = Tuple{T,T,T,T,T,T,T,T,
T,T,T,T,T,T,T,T,Vararg{T,N}}

# END 1.6 deprecations

# BEGIN 1.7 deprecations
34 changes: 34 additions & 0 deletions base/multidimensional.jl
Original file line number Diff line number Diff line change
@@ -1100,6 +1100,40 @@ in the range of `Rdest`. The sizes of the two regions must match.
"""
copyto!(::AbstractArray, ::CartesianIndices, ::AbstractArray, ::CartesianIndices)

# Cartesian to Linear unaliased copy
function _unaliased_copyto!(::IndexLinear, dest::AbstractArray, ::IndexCartesian, src::AbstractArray)
@_inline_meta
axs = axes(src)
ax, iter = axs[1], CartesianIndices(tail(axs))
len, j = length(ax), firstindex(dest)
@inbounds for I in iter
n = 0
while n < len
dest[j + n] = src[first(ax) + n, I.I...]
n += 1
end
j += len
end
end

# Linear to Cartesian unaliased copy
function _unaliased_copyto!(::IndexCartesian, dest::AbstractArray, ::IndexLinear, src::AbstractArray)
@_inline_meta
axs = axes(dest)
ax, iter = axs[1], CartesianIndices(tail(axs))
len, i = length(ax), firstindex(src)
final = lastindex(src) + 1
@inbounds for I in iter
len′ = min(final - i, len)
n = 0
while n < len′
dest[first(ax) + n, I.I...] = src[i + n]
n += 1
end
(i += len′) == final && break
end
end

# circshift!
circshift!(dest::AbstractArray, src, ::Tuple{}) = copyto!(dest, src)
"""
4 changes: 2 additions & 2 deletions base/operators.jl
Original file line number Diff line number Diff line change
@@ -876,8 +876,8 @@ Modulus after flooring division, returning a value `r` such that `mod(r, y) == m
in the range ``(0, y]`` for positive `y` and in the range ``[y,0)`` for negative `y`.
With integer arguments and positive `y`, this is equal to `mod(x, 1:y)`, and hence natural
for 1-based indexing. By comparison, `mod(x, y) == mod(x, 0:y-1)` is natural for computations with
offsets or strides.
for 1-based indexing. By comparison, `mod(x, y) == mod(x, 0:y-1)` is natural for computations with
offsets or strides.
See also [`mod`](@ref), [`fld1`](@ref), [`fldmod1`](@ref).
6 changes: 3 additions & 3 deletions base/range.jl
Original file line number Diff line number Diff line change
@@ -465,17 +465,17 @@ julia> LinRange(1.5, 5.5, 9)
Compared to using [`range`](@ref), directly constructing a `LinRange` should
have less overhead but won't try to correct for floating point errors:
```julia
```jldoctest
julia> collect(range(-0.1, 0.3, length=5))
5-element Array{Float64,1}:
5-element Vector{Float64}:
-0.1
0.0
0.1
0.2
0.3
julia> collect(LinRange(-0.1, 0.3, 5))
5-element Array{Float64,1}:
5-element Vector{Float64}:
-0.1
-1.3877787807814457e-17
0.09999999999999999
5 changes: 2 additions & 3 deletions base/tuple.jl
Original file line number Diff line number Diff line change
@@ -55,11 +55,10 @@ function setindex(x::Tuple, v, i::Integer)
_setindex(v, i, x...)
end

function _setindex(v, i::Integer, first, tail...)
function _setindex(v, i::Integer, args...)
@_inline_meta
return (ifelse(i == 1, v, first), _setindex(v, i - 1, tail...)...)
return ntuple(j -> ifelse(j == i, v, args[j]), length(args))
end
_setindex(v, i::Integer) = ()


## iterating ##
15 changes: 0 additions & 15 deletions doc/build/build.md
Original file line number Diff line number Diff line change
@@ -250,21 +250,6 @@ Julia uses a custom fork of libuv. It is a small dependency, and can be safely b

As a high-performance numerical language, Julia should be linked to a multi-threaded BLAS and LAPACK, such as OpenBLAS or ATLAS, which will provide much better performance than the reference `libblas` implementations which may be default on some systems.

### Intel MKL

**Note:** If you are building Julia for the sole purpose of incorporating Intel MKL, it may be beneficial to first try [MKL.jl](https://github.com/JuliaComputing/MKL.jl). This package will automatically download MKL and rebuild Julia's system image against it, sidestepping the need to set up a working build environment just to add MKL functionality. MKL.jl replaces OpenBLAS with MKL for dense linear algebra functions called directly from Julia, but SuiteSparse and other C/Fortran libraries will continue to use the BLAS they were linked against at build time. If you want SuiteSparse to use MKL, you will need to build from source.

For a 64-bit architecture, the environment should be set up as follows:
```sh
# bash
source /path/to/intel/bin/compilervars.sh intel64
```
Add the following to the `Make.user` file:

USE_INTEL_MKL = 1

It is highly recommended to start with a fresh clone of the Julia repository.

## Source distributions of releases

Each pre-release and release of Julia has a "full" source distribution and a "light" source
94 changes: 63 additions & 31 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
@@ -1479,12 +1479,19 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
if (type_is_ghost(elty))
return ghostValue(jltype);
AllocaInst *intcast = NULL;
if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy() && !elty->isFloatingPointTy()) {
if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy()) {
const DataLayout &DL = jl_data_layout;
unsigned nb = DL.getTypeSizeInBits(elty);
intcast = ctx.builder.CreateAlloca(elty);
elty = Type::getIntNTy(jl_LLVMContext, nb);
}
Type *realelty = elty;
if (Order != AtomicOrdering::NotAtomic && isa<IntegerType>(elty)) {
unsigned nb = cast<IntegerType>(elty)->getBitWidth();
unsigned nb2 = PowerOf2Ceil(nb);
if (nb != nb2)
elty = Type::getIntNTy(jl_LLVMContext, nb2);
}
Type *ptrty = PointerType::get(elty, ptr->getType()->getPointerAddressSpace());
Value *data;
if (ptr->getType() != ptrty)
@@ -1493,7 +1500,7 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
data = ptr;
if (idx_0based)
data = ctx.builder.CreateInBoundsGEP(elty, data, idx_0based);
Instruction *load;
Value *instr;
// TODO: can only lazy load if we can create a gc root for ptr for the lifetime of elt
//if (elty->isAggregateType() && tbaa == tbaa_immut && !alignment) { // can lazy load on demand, no copy needed
// elt = data;
@@ -1503,20 +1510,23 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
alignment = sizeof(void*);
else if (!alignment)
alignment = julia_alignment(jltype);
load = ctx.builder.CreateAlignedLoad(data, Align(alignment), false);
cast<LoadInst>(load)->setOrdering(Order);
LoadInst *load = ctx.builder.CreateAlignedLoad(data, Align(alignment), false);
load->setOrdering(Order);
if (aliasscope)
load->setMetadata("alias.scope", aliasscope);
if (isboxed)
load = maybe_mark_load_dereferenceable(load, true, jltype);
maybe_mark_load_dereferenceable(load, true, jltype);
if (tbaa)
load = tbaa_decorate(tbaa, load);
tbaa_decorate(tbaa, load);
instr = load;
if (elty != realelty)
instr = ctx.builder.CreateTrunc(instr, realelty);
if (intcast) {
ctx.builder.CreateStore(load, ctx.builder.CreateBitCast(intcast, load->getType()->getPointerTo()));
load = ctx.builder.CreateLoad(intcast);
ctx.builder.CreateStore(instr, ctx.builder.CreateBitCast(intcast, instr->getType()->getPointerTo()));
instr = ctx.builder.CreateLoad(intcast);
}
if (maybe_null_if_boxed) {
Value *first_ptr = isboxed ? load : extract_first_ptr(ctx, load);
Value *first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr);
if (first_ptr)
null_pointer_check(ctx, first_ptr, nullcheck);
}
@@ -1526,9 +1536,9 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
//load->setMetadata(LLVMContext::MD_range, MDNode::get(jl_LLVMContext, {
// ConstantAsMetadata::get(ConstantInt::get(T_int8, 0)),
// ConstantAsMetadata::get(ConstantInt::get(T_int8, 2)) }));
load = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, load, T_int1));
instr = ctx.builder.CreateTrunc(instr, T_int1);
}
return mark_julia_type(ctx, load, isboxed, jltype);
return mark_julia_type(ctx, instr, isboxed, jltype);
}

static jl_cgval_t typed_store(jl_codectx_t &ctx,
@@ -1544,18 +1554,27 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
if (type_is_ghost(elty))
return oldval;
Value *intcast = nullptr;
if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy() && !elty->isFloatingPointTy()) {
if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy()) {
const DataLayout &DL = jl_data_layout;
unsigned nb = DL.getTypeSizeInBits(elty);
if (!issetfield)
intcast = ctx.builder.CreateAlloca(elty);
elty = Type::getIntNTy(jl_LLVMContext, nb);
}
Type *realelty = elty;
if (Order != AtomicOrdering::NotAtomic && isa<IntegerType>(elty)) {
unsigned nb = cast<IntegerType>(elty)->getBitWidth();
unsigned nb2 = PowerOf2Ceil(nb);
if (nb != nb2)
elty = Type::getIntNTy(jl_LLVMContext, nb2);
}
Value *r;
if (!isboxed)
r = emit_unbox(ctx, elty, rhs, jltype);
r = emit_unbox(ctx, realelty, rhs, jltype);
else
r = boxed(ctx, rhs);
if (realelty != elty)
r = ctx.builder.CreateZExt(r, elty);
Type *ptrty = PointerType::get(elty, ptr->getType()->getPointerAddressSpace());
if (ptr->getType() != ptrty)
ptr = ctx.builder.CreateBitCast(ptr, ptrty);
@@ -1578,18 +1597,19 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
instr->setMetadata("noalias", aliasscope);
if (tbaa)
tbaa_decorate(tbaa, instr);
}
if (isreplacefield) {
oldval = mark_julia_type(ctx, instr, isboxed, jltype);
Value *first_ptr = nullptr;
if (maybe_null_if_boxed)
first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr);
Success = emit_nullcheck_guard(ctx, first_ptr, [&] {
return emit_f_is(ctx, oldval, cmp);
});
BasicBlock *BB = BasicBlock::Create(jl_LLVMContext, "xchg", ctx.f);
ctx.builder.CreateCondBr(Success, BB, DoneBB);
ctx.builder.SetInsertPoint(BB);
assert(realelty == elty);
if (isreplacefield) {
oldval = mark_julia_type(ctx, instr, isboxed, jltype);
Value *first_ptr = nullptr;
if (maybe_null_if_boxed)
first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr);
Success = emit_nullcheck_guard(ctx, first_ptr, [&] {
return emit_f_is(ctx, oldval, cmp);
});
BasicBlock *BB = BasicBlock::Create(jl_LLVMContext, "xchg", ctx.f);
ctx.builder.CreateCondBr(Success, BB, DoneBB);
ctx.builder.SetInsertPoint(BB);
}
}
StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment));
store->setOrdering(Order);
@@ -1628,7 +1648,9 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
Current->addIncoming(instr, SkipBB);
ctx.builder.SetInsertPoint(BB);
}
Compare = emit_unbox(ctx, elty, cmp, jltype);
Compare = emit_unbox(ctx, realelty, cmp, jltype);
if (realelty != elty)
Compare = ctx.builder.CreateZExt(Compare, elty);
}
else if (cmp.isboxed) {
Compare = boxed(ctx, cmp);
@@ -1676,21 +1698,26 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
if (tbaa)
tbaa_decorate(tbaa, store);
instr = ctx.builder.Insert(ExtractValueInst::Create(store, 0));
Success = ctx.builder.CreateExtractValue(store, 1);
Success = ctx.builder.Insert(ExtractValueInst::Create(store, 1));
Value *Done = Success;
if (needloop) {
if (isreplacefield) {
Value *realinstr = instr;
if (realelty != elty)
realinstr = ctx.builder.CreateTrunc(instr, realelty);
if (intcast) {
ctx.builder.CreateStore(instr, ctx.builder.CreateBitCast(intcast, instr->getType()->getPointerTo()));
ctx.builder.CreateStore(realinstr, ctx.builder.CreateBitCast(intcast, realinstr->getType()->getPointerTo()));
oldval = mark_julia_slot(intcast, jltype, NULL, tbaa_stack);
if (maybe_null_if_boxed)
realinstr = ctx.builder.CreateLoad(intcast);
}
else {
oldval = mark_julia_type(ctx, instr, isboxed, jltype);
oldval = mark_julia_type(ctx, realinstr, isboxed, jltype);
}
Done = emit_guarded_test(ctx, ctx.builder.CreateNot(Success), false, [&] {
Value *first_ptr = nullptr;
if (maybe_null_if_boxed)
first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr);
first_ptr = isboxed ? realinstr : extract_first_ptr(ctx, realinstr);
return emit_nullcheck_guard(ctx, first_ptr, [&] {
return emit_f_is(ctx, oldval, cmp);
});
@@ -1747,6 +1774,8 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
}
}
if (!issetfield) {
if (realelty != elty)
instr = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, instr, realelty));
if (intcast) {
ctx.builder.CreateStore(instr, ctx.builder.CreateBitCast(intcast, instr->getType()->getPointerTo()));
instr = ctx.builder.CreateLoad(intcast);
@@ -2053,6 +2082,9 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
emit_atomic_error(ctx, "getfield: atomic field cannot be accessed non-atomically");
return jl_cgval_t(); // unreachable
}
if (order == jl_memory_order_unspecified) {
order = isatomic ? jl_memory_order_unordered : jl_memory_order_notatomic;
}
if (jfty == jl_bottom_type) {
raise_exception(ctx, literal_pointer_val(ctx, jl_undefref_exception));
return jl_cgval_t(); // unreachable
@@ -2126,7 +2158,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
if (needlock)
emit_lockstate_value(ctx, strct, true);
jl_cgval_t ret = typed_load(ctx, addr, NULL, jfty, tbaa, nullptr, false,
needlock || order <= jl_memory_order_notatomic ? AtomicOrdering::NotAtomic : get_llvm_atomic_order(order), // TODO: we should use unordered for anything with CountTrackedPointers(elty).count > 0
needlock ? AtomicOrdering::NotAtomic : get_llvm_atomic_order(order), // TODO: we should use unordered for anything with CountTrackedPointers(elty).count > 0
maybe_null, align, nullcheck);
if (needlock)
emit_lockstate_value(ctx, strct, false);
219 changes: 129 additions & 90 deletions src/datatype.c
Original file line number Diff line number Diff line change
@@ -730,7 +730,53 @@ JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt)
#error MAX_POINTERATOMIC_SIZE too large
#endif
#if MAX_POINTERATOMIC_SIZE >= 16
#ifndef _P64
#error 12 byte GC pool size not implemented for 32-bit
#endif
typedef __uint128_t uint128_t;
typedef uint128_t jl_uatomicmax_t;
#else
typedef uint64_t jl_uatomicmax_t;
#endif

#if BYTE_ORDER != LITTLE_ENDIAN
#error using masks for atomics (instead of memcpy like nb == 16) assumes little endian
#endif

static inline uint32_t zext_read32(const jl_value_t *x, size_t nb) JL_NOTSAFEPOINT
{
uint32_t y = *(uint32_t*)x;
if (nb == 4)
return y;
else // if (nb == 3)
return 0xffffffu & y;
}

#if MAX_POINTERATOMIC_SIZE >= 8
static inline uint64_t zext_read64(const jl_value_t *x, size_t nb) JL_NOTSAFEPOINT
{
uint64_t y = *(uint64_t*)x;
if (nb == 8)
return y;
else if (nb == 7)
return 0xffffffffffffffu & y;
else if (nb == 6)
return 0xffffffffffffu & y;
else // if (nb == 5)
return 0xffffffffffu & y;
}
#endif

#if MAX_POINTERATOMIC_SIZE >= 16
static inline uint128_t zext_read128(const jl_value_t *x, size_t nb) JL_NOTSAFEPOINT
{
uint128_t y = 0;
if (nb == 16)
y = *(uint128_t*)x;
else
memcpy(&y, x, nb);
return y;
}
#endif

JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, const void *data)
@@ -755,16 +801,7 @@ JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, const void *data)

jl_task_t *ct = jl_current_task;
jl_value_t *v = jl_gc_alloc(ct->ptls, nb, bt);
switch (nb) {
case 1: *(uint8_t*) v = *(uint8_t*)data; break;
case 2: *(uint16_t*)v = jl_load_unaligned_i16(data); break;
case 4: *(uint32_t*)v = jl_load_unaligned_i32(data); break;
case 8: *(uint64_t*)v = jl_load_unaligned_i64(data); break;
case 16:
memcpy(jl_assume_aligned(v, 16), data, 16);
break;
default: memcpy(v, data, nb);
}
memcpy(jl_assume_aligned(v, sizeof(void*)), data, nb);
return v;
}

@@ -789,40 +826,51 @@ JL_DLLEXPORT jl_value_t *jl_atomic_new_bits(jl_value_t *dt, const char *data)

jl_task_t *ct = jl_current_task;
jl_value_t *v = jl_gc_alloc(ct->ptls, nb, bt);
switch (nb) {
case 1: *(uint8_t*) v = jl_atomic_load((uint8_t*)data); break;
case 2: *(uint16_t*)v = jl_atomic_load((uint16_t*)data); break;
case 4: *(uint32_t*)v = jl_atomic_load((uint32_t*)data); break;
// data is aligned to the power of two,
// we will write too much of v, but the padding should exist
if (nb == 1)
*(uint8_t*) v = jl_atomic_load((uint8_t*)data);
else if (nb <= 2)
*(uint16_t*)v = jl_atomic_load((uint16_t*)data);
else if (nb <= 4)
*(uint32_t*)v = jl_atomic_load((uint32_t*)data);
#if MAX_POINTERATOMIC_SIZE >= 8
case 8: *(uint64_t*)v = jl_atomic_load((uint64_t*)data); break;
else if (nb <= 8)
*(uint64_t*)v = jl_atomic_load((uint64_t*)data);
#endif
#if MAX_POINTERATOMIC_SIZE >= 16
case 16: *(uint128_t*)v = jl_atomic_load((uint128_t*)data); break;
else if (nb <= 16)
*(uint128_t*)v = jl_atomic_load((uint128_t*)data);
#endif
default:
else
abort();
}
return v;
}

JL_DLLEXPORT void jl_atomic_store_bits(char *dst, const jl_value_t *src, int nb)
{
// dst must have the required alignment for an atomic of the given size
// src must be aligned by the GC
switch (nb) {
case 0: break;
case 1: jl_atomic_store((uint8_t*)dst, *(uint8_t*)src); break;
case 2: jl_atomic_store((uint16_t*)dst, *(uint16_t*)src); break;
case 4: jl_atomic_store((uint32_t*)dst, *(uint32_t*)src); break;
// we may therefore read too much from src, but will zero the excess bits
// before the store (so that we can get faster cmpswap later)
if (nb == 0)
;
else if (nb == 1)
jl_atomic_store((uint8_t*)dst, *(uint8_t*)src);
else if (nb == 2)
jl_atomic_store((uint16_t*)dst, *(uint16_t*)src);
else if (nb <= 4)
jl_atomic_store((uint32_t*)dst, zext_read32(src, nb));
#if MAX_POINTERATOMIC_SIZE >= 8
case 8: jl_atomic_store((uint64_t*)dst, *(uint64_t*)src); break;
else if (nb <= 8)
jl_atomic_store((uint64_t*)dst, zext_read64(src, nb));
#endif
#if MAX_POINTERATOMIC_SIZE >= 16
case 16: jl_atomic_store((uint128_t*)dst, *(uint128_t*)src); break;
else if (nb <= 16)
jl_atomic_store((uint128_t*)dst, zext_read128(src, nb));
#endif
default:
else
abort();
}
}

JL_DLLEXPORT jl_value_t *jl_atomic_swap_bits(jl_value_t *dt, char *dst, const jl_value_t *src, int nb)
@@ -845,19 +893,22 @@ JL_DLLEXPORT jl_value_t *jl_atomic_swap_bits(jl_value_t *dt, char *dst, const jl

jl_task_t *ct = jl_current_task;
jl_value_t *v = jl_gc_alloc(ct->ptls, jl_datatype_size(bt), bt);
switch (nb) {
case 1: *(uint8_t*) v = jl_atomic_exchange((uint8_t*)dst, *(uint8_t*)src); break;
case 2: *(uint16_t*)v = jl_atomic_exchange((uint16_t*)dst, *(uint16_t*)src); break;
case 4: *(uint32_t*)v = jl_atomic_exchange((uint32_t*)dst, *(uint32_t*)src); break;
if (nb == 1)
*(uint8_t*)v = jl_atomic_exchange((uint8_t*)dst, *(uint8_t*)src);
else if (nb == 2)
*(uint16_t*)v = jl_atomic_exchange((uint16_t*)dst, *(uint16_t*)src);
else if (nb <= 4)
*(uint32_t*)v = jl_atomic_exchange((uint32_t*)dst, zext_read32(src, nb));
#if MAX_POINTERATOMIC_SIZE >= 8
case 8: *(uint64_t*)v = jl_atomic_exchange((uint64_t*)dst, *(uint64_t*)src); break;
else if (nb <= 8)
*(uint64_t*)v = jl_atomic_exchange((uint64_t*)dst, zext_read64(src, nb));
#endif
#if MAX_POINTERATOMIC_SIZE >= 16
case 16: *(uint128_t*)v = jl_atomic_exchange((uint128_t*)dst, *(uint128_t*)src); break;
else if (nb <= 16)
*(uint128_t*)v = jl_atomic_exchange((uint128_t*)dst, zext_read128(src, nb));
#endif
default:
else
abort();
}
return v;
}

@@ -866,41 +917,37 @@ JL_DLLEXPORT int jl_atomic_bool_cmpswap_bits(char *dst, const jl_value_t *expect
// dst must have the required alignment for an atomic of the given size
// n.b.: this can spuriously fail if there are padding bits, the caller should deal with that
int success;
switch (nb) {
case 0: {
if (nb == 0) {
success = 1;
break;
}
case 1: {
else if (nb == 1) {
uint8_t y = *(uint8_t*)expected;
success = jl_atomic_cmpswap((uint8_t*)dst, &y, *(uint8_t*)src);
break;
}
case 2: {
else if (nb == 2) {
uint16_t y = *(uint16_t*)expected;
success = jl_atomic_cmpswap((uint16_t*)dst, &y, *(uint16_t*)src);
break;
}
case 4: {
uint32_t y = *(uint32_t*)expected;
success = jl_atomic_cmpswap((uint32_t*)dst, &y, *(uint32_t*)src);
break;
else if (nb <= 4) {
uint32_t y = zext_read32(expected, nb);
uint32_t z = zext_read32(src, nb);
success = jl_atomic_cmpswap((uint32_t*)dst, &y, z);
}
#if MAX_POINTERATOMIC_SIZE >= 8
case 8: {
uint64_t y = *(uint64_t*)expected;
success = jl_atomic_cmpswap((uint64_t*)dst, &y, *(uint64_t*)src);
break;
else if (nb <= 8) {
uint64_t y = zext_read64(expected, nb);
uint64_t z = zext_read64(src, nb);
success = jl_atomic_cmpswap((uint64_t*)dst, &y, z);
}
#endif
#if MAX_POINTERATOMIC_SIZE >= 16
case 16: {
uint128_t y = *(uint128_t*)expected;
success = jl_atomic_cmpswap((uint128_t*)dst, &y, *(uint128_t*)src);
break;
else if (nb <= 16) {
uint128_t y = zext_read128(expected, nb);
uint128_t z = zext_read128(src, nb);
success = jl_atomic_cmpswap((uint128_t*)dst, &y, z);
}
#endif
default:
else {
abort();
}
return success;
@@ -920,45 +967,42 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, char *dst, co
jl_value_t *y = jl_gc_alloc(ct->ptls, isptr ? nb : tuptyp->size, isptr ? dt : tuptyp);
int success;
jl_datatype_t *et = (jl_datatype_t*)jl_typeof(expected);
switch (nb) {
case 0: {
if (nb == 0) {
success = (dt == et);
break;
}
case 1: {
else if (nb == 1) {
uint8_t *y8 = (uint8_t*)y;
assert(!dt->layout->haspadding);
if (dt == et) {
*y8 = *(uint8_t*)expected;
success = jl_atomic_cmpswap((uint8_t*)dst, y8, *(uint8_t*)src);
uint8_t z8 = *(uint8_t*)src;
success = jl_atomic_cmpswap((uint8_t*)dst, y8, z8);
}
else {
*y8 = jl_atomic_load((uint8_t*)dst);
success = 0;
}
break;
}
case 2: {
else if (nb == 2) {
uint16_t *y16 = (uint16_t*)y;
assert(!dt->layout->haspadding);
if (dt == et) {
*y16 = *(uint16_t*)expected;
while (1) {
success = jl_atomic_cmpswap((uint16_t*)dst, y16, *(uint16_t*)src);
if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
break;
}
uint16_t z16 = *(uint16_t*)src;
success = jl_atomic_cmpswap((uint16_t*)dst, y16, z16);
}
else {
*y16 = jl_atomic_load((uint16_t*)dst);
success = 0;
}
break;
}
case 4: {
else if (nb <= 4) {
uint32_t *y32 = (uint32_t*)y;
if (dt == et) {
*y32 = *(uint32_t*)expected;
*y32 = zext_read32(expected, nb);
uint32_t z32 = zext_read32(src, nb);
while (1) {
success = jl_atomic_cmpswap((uint32_t*)dst, y32, *(uint32_t*)src);
success = jl_atomic_cmpswap((uint32_t*)dst, y32, z32);
if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
break;
}
@@ -967,15 +1011,15 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, char *dst, co
*y32 = jl_atomic_load((uint32_t*)dst);
success = 0;
}
break;
}
#if MAX_POINTERATOMIC_SIZE >= 8
case 8: {
else if (nb <= 8) {
uint64_t *y64 = (uint64_t*)y;
if (dt == et) {
*y64 = *(uint64_t*)expected;
*y64 = zext_read64(expected, nb);
uint64_t z64 = zext_read64(src, nb);
while (1) {
success = jl_atomic_cmpswap((uint64_t*)dst, y64, *(uint64_t*)src);
success = jl_atomic_cmpswap((uint64_t*)dst, y64, z64);
if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
break;
}
@@ -984,16 +1028,16 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, char *dst, co
*y64 = jl_atomic_load((uint64_t*)dst);
success = 0;
}
break;
}
#endif
#if MAX_POINTERATOMIC_SIZE >= 16
case 16: {
else if (nb <= 16) {
uint128_t *y128 = (uint128_t*)y;
if (dt == et) {
*y128 = *(uint128_t*)expected;
*y128 = zext_read128(expected, nb);
uint128_t z128 = zext_read128(src, nb);
while (1) {
success = jl_atomic_cmpswap((uint128_t*)dst, y128, *(uint128_t*)src);
success = jl_atomic_cmpswap((uint128_t*)dst, y128, z128);
if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
break;
}
@@ -1002,10 +1046,9 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, char *dst, co
*y128 = jl_atomic_load((uint128_t*)dst);
success = 0;
}
break;
}
#endif
default:
else {
abort();
}
if (isptr) {
@@ -1417,16 +1460,12 @@ static inline void memassign_safe(int hasptr, jl_value_t *parent, char *dst, con
else {
// src must be a heap box.
assert(nb == jl_datatype_size(jl_typeof(src)));
if (nb >= 16) {
memcpy(dst, jl_assume_aligned(src, 16), nb);
return;
}
}
switch (nb) {
case 0: break;
case 1: *(uint8_t*)dst = *(uint8_t*)src; break;
case 2: jl_store_unaligned_i16(dst, *(uint16_t*)src); break;
case 4: jl_store_unaligned_i32(dst, *(uint32_t*)src); break;
case 8: jl_store_unaligned_i64(dst, *(uint64_t*)src); break;
case 16: memcpy(dst, jl_assume_aligned(src, 16), 16); break;
default: memcpy(dst, src, nb); break;
}
memcpy(dst, jl_assume_aligned(src, sizeof(void*)), nb);
}

void set_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic) JL_NOTSAFEPOINT
1 change: 1 addition & 0 deletions src/jl_exported_funcs.inc
Original file line number Diff line number Diff line change
@@ -149,6 +149,7 @@
XX(jl_expand_with_loc) \
XX(jl_expand_with_loc_warn) \
XX(jl_extern_c) \
XX(jl_field_index) \
XX(jl_gc_add_finalizer) \
XX(jl_gc_add_finalizer_th) \
XX(jl_gc_add_ptr_finalizer) \
2 changes: 2 additions & 0 deletions src/processor.cpp
Original file line number Diff line number Diff line change
@@ -401,6 +401,8 @@ static inline std::vector<uint8_t> serialize_target_data(llvm::StringRef name,
{
std::vector<uint8_t> res;
auto add_data = [&] (const void *data, size_t sz) {
if (sz == 0)
return;
size_t old_sz = res.size();
res.resize(old_sz + sz);
memcpy(&res[old_sz], data, sz);
67 changes: 47 additions & 20 deletions test/atomics.jl
Original file line number Diff line number Diff line change
@@ -63,34 +63,59 @@ let (x, y) = (Complex{Int128}(10, 30), Complex{Int128}(20, 40))
@test sizeof(r) == sizeof(ar) - Int(fieldoffset(typeof(ar), 1))
end

struct PadIntA <: Number # internal padding
a::Int8
b::Int16
PadIntA(x) = new(82, x)
end
struct PadIntB <: Number # external padding
a::UInt8
b::UInt8
c::UInt8
PadIntB(x) = new(x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff)
end
primitive type Int24 <: Signed 24 end # integral padding
Int24(x::Int) = Core.Intrinsics.trunc_int(Int24, x)
Base.Int(x::PadIntB) = x.a + (Int(x.b) << 8) + (Int(x.c) << 16)
Base.:(+)(x::PadIntA, b::Int) = PadIntA(x.b + b)
Base.:(+)(x::PadIntB, b::Int) = PadIntB(Int(x) + b)
Base.:(+)(x::Int24, b::Int) = Core.Intrinsics.add_int(x, Int24(b))
Base.show(io::IO, x::PadIntA) = print(io, "PadIntA(", x.b, ")")
Base.show(io::IO, x::PadIntB) = print(io, "PadIntB(", Int(x), ")")
Base.show(io::IO, x::Int24) = print(io, "Int24(", Core.Intrinsics.zext_int(Int, x), ")")

@noinline function _test_field_operators(r)
r = r[]
T = typeof(getfield(r, :x))
@test getfield(r, :x, :sequentially_consistent) === T(12345_10)
@test setfield!(r, :x, T(12345_1), :sequentially_consistent) === T(12345_1)
@test getfield(r, :x, :sequentially_consistent) === T(12345_1)
@test replacefield!(r, :x, 12345_1 % UInt, T(12345_100), :sequentially_consistent, :sequentially_consistent) === (T(12345_1), false)
@test replacefield!(r, :x, T(12345_1), T(12345_100), :sequentially_consistent, :sequentially_consistent) === (T(12345_1), true)
@test getfield(r, :x, :sequentially_consistent) === T(12345_100)
@test replacefield!(r, :x, T(12345_1), T(12345_1), :sequentially_consistent, :sequentially_consistent) === (T(12345_100), false)
@test getfield(r, :x, :sequentially_consistent) === T(12345_100)
@test modifyfield!(r, :x, add, 1, :sequentially_consistent) === (T(12345_100), T(12345_101))
@test modifyfield!(r, :x, add, 1, :sequentially_consistent) === (T(12345_101), T(12345_102))
@test getfield(r, :x, :sequentially_consistent) === T(12345_102)
@test swapfield!(r, :x, T(12345_1), :sequentially_consistent) === T(12345_102)
@test getfield(r, :x, :sequentially_consistent) === T(12345_1)
@test getfield(r, :x, :sequentially_consistent) === T(123_10)
@test setfield!(r, :x, T(123_1), :sequentially_consistent) === T(123_1)
@test getfield(r, :x, :sequentially_consistent) === T(123_1)
@test replacefield!(r, :x, 123_1 % UInt, T(123_30), :sequentially_consistent, :sequentially_consistent) === (T(123_1), false)
@test replacefield!(r, :x, T(123_1), T(123_30), :sequentially_consistent, :sequentially_consistent) === (T(123_1), true)
@test getfield(r, :x, :sequentially_consistent) === T(123_30)
@test replacefield!(r, :x, T(123_1), T(123_1), :sequentially_consistent, :sequentially_consistent) === (T(123_30), false)
@test getfield(r, :x, :sequentially_consistent) === T(123_30)
@test modifyfield!(r, :x, add, 1, :sequentially_consistent) === (T(123_30), T(123_31))
@test modifyfield!(r, :x, add, 1, :sequentially_consistent) === (T(123_31), T(123_32))
@test getfield(r, :x, :sequentially_consistent) === T(123_32)
@test swapfield!(r, :x, T(123_1), :sequentially_consistent) === T(123_32)
@test getfield(r, :x, :sequentially_consistent) === T(123_1)
nothing
end
@noinline function test_field_operators(r)
_test_field_operators(Ref(copy(r)))
_test_field_operators(Ref{Any}(copy(r)))
nothing
end
test_field_operators(ARefxy{Int}(12345_10, 12345_20))
test_field_operators(ARefxy{Any}(12345_10, 12345_20))
test_field_operators(ARefxy{Union{Nothing,Int}}(12345_10, nothing))
test_field_operators(ARefxy{Complex{Int32}}(12345_10, 12345_20))
test_field_operators(ARefxy{Complex{Int128}}(12345_10, 12345_20))
test_field_operators(ARefxy{Int}(123_10, 123_20))
test_field_operators(ARefxy{Any}(123_10, 123_20))
test_field_operators(ARefxy{Union{Nothing,Int}}(123_10, nothing))
test_field_operators(ARefxy{Complex{Int32}}(123_10, 123_20))
test_field_operators(ARefxy{Complex{Int128}}(123_10, 123_20))
test_field_operators(ARefxy{PadIntA}(123_10, 123_20))
test_field_operators(ARefxy{PadIntB}(123_10, 123_20))
#FIXME: test_field_operators(ARefxy{Int24}(123_10, 123_20))
test_field_operators(ARefxy{Float64}(123_10, 123_20))

@noinline function _test_field_orderings(r, x, y)
@nospecialize x y
@@ -247,11 +272,13 @@ test_field_orderings(true, false)
test_field_orderings("hi", "bye")
test_field_orderings(:hi, :bye)
test_field_orderings(nothing, nothing)
test_field_orderings(ARefxy{Any}(12345_10, 12345_20), 12345_10, 12345_20)
test_field_orderings(ARefxy{Any}(123_10, 123_20), 123_10, 123_20)
test_field_orderings(ARefxy{Any}(true, false), true, false)
test_field_orderings(ARefxy{Union{Nothing,Missing}}(nothing, missing), nothing, missing)
test_field_orderings(ARefxy{Union{Nothing,Int}}(nothing, 12345_1), nothing, 12345_1)
test_field_orderings(ARefxy{Union{Nothing,Int}}(nothing, 123_1), nothing, 123_1)
test_field_orderings(Complex{Int128}(10, 30), Complex{Int128}(20, 40))
test_field_orderings(10.0, 20.0)
test_field_orderings(NaN, Inf)

struct UndefComplex{T}
re::T
3 changes: 2 additions & 1 deletion test/mpfr.jl
Original file line number Diff line number Diff line change
@@ -606,7 +606,8 @@ end
@test log(x) == log(42)
@test isinf(log(BigFloat(0)))
@test_throws DomainError log(BigFloat(-1))
@test log2(x) == log2(42)
# issue #41450
@test_skip log2(x) == log2(42)
@test isinf(log2(BigFloat(0)))
@test_throws DomainError log2(BigFloat(-1))
@test log10(x) == log10(42)
3 changes: 3 additions & 0 deletions test/tuple.jl
Original file line number Diff line number Diff line change
@@ -533,6 +533,9 @@ end

@test Base.setindex((1, 2, 4), 4, true) === (4, 2, 4)
@test_throws BoundsError Base.setindex((1, 2), 2, false)

f() = Base.setindex((1:1, 2:2, 3:3), 9, 1)
@test @inferred(f()) == (9, 2:2, 3:3)
end

@testset "inferrable range indexing with constant values" begin