Skip to content

Support memcpy/memmove with differing src/dst alignment #55633

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 9, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/librustc_codegen_llvm/abi.rs
Original file line number Diff line number Diff line change
@@ -225,9 +225,10 @@ impl ArgTypeExt<'ll, 'tcx> for ArgType<'tcx, Ty<'tcx>> {
// ...and then memcpy it to the intended destination.
base::call_memcpy(bx,
bx.pointercast(dst.llval, Type::i8p(cx)),
self.layout.align,
bx.pointercast(llscratch, Type::i8p(cx)),
scratch_align,
C_usize(cx, self.layout.size.bytes()),
self.layout.align.min(scratch_align),
MemFlags::empty());

bx.lifetime_end(llscratch, scratch_size);
27 changes: 12 additions & 15 deletions src/librustc_codegen_llvm/base.rs
Original file line number Diff line number Diff line change
@@ -53,7 +53,7 @@ use mir::place::PlaceRef;
use attributes;
use builder::{Builder, MemFlags};
use callee;
use common::{C_bool, C_bytes_in_context, C_i32, C_usize};
use common::{C_bool, C_bytes_in_context, C_usize};
use rustc_mir::monomorphize::item::DefPathBasedNames;
use common::{C_struct_in_context, C_array, val_ty};
use consts;
@@ -77,7 +77,6 @@ use rustc_data_structures::sync::Lrc;
use std::any::Any;
use std::cmp;
use std::ffi::CString;
use std::i32;
use std::ops::{Deref, DerefMut};
use std::sync::mpsc;
use std::time::{Instant, Duration};
@@ -319,8 +318,8 @@ pub fn coerce_unsized_into(
}

if src_f.layout.ty == dst_f.layout.ty {
memcpy_ty(bx, dst_f.llval, src_f.llval, src_f.layout,
src_f.align.min(dst_f.align), MemFlags::empty());
memcpy_ty(bx, dst_f.llval, dst_f.align, src_f.llval, src_f.align,
src_f.layout, MemFlags::empty());
} else {
coerce_unsized_into(bx, src_f, dst_f);
}
@@ -420,44 +419,42 @@ pub fn to_immediate_scalar(
pub fn call_memcpy(
bx: &Builder<'_, 'll, '_>,
dst: &'ll Value,
dst_align: Align,
src: &'ll Value,
src_align: Align,
n_bytes: &'ll Value,
align: Align,
flags: MemFlags,
) {
if flags.contains(MemFlags::NONTEMPORAL) {
// HACK(nox): This is inefficient but there is no nontemporal memcpy.
let val = bx.load(src, align);
let val = bx.load(src, src_align);
let ptr = bx.pointercast(dst, val_ty(val).ptr_to());
bx.store_with_flags(val, ptr, align, flags);
bx.store_with_flags(val, ptr, dst_align, flags);
return;
}
let cx = bx.cx;
let ptr_width = &cx.sess().target.target.target_pointer_width;
let key = format!("llvm.memcpy.p0i8.p0i8.i{}", ptr_width);
let memcpy = cx.get_intrinsic(&key);
let src_ptr = bx.pointercast(src, Type::i8p(cx));
let dst_ptr = bx.pointercast(dst, Type::i8p(cx));
let size = bx.intcast(n_bytes, cx.isize_ty, false);
let align = C_i32(cx, align.abi() as i32);
let volatile = C_bool(cx, flags.contains(MemFlags::VOLATILE));
bx.call(memcpy, &[dst_ptr, src_ptr, size, align, volatile], None);
let volatile = flags.contains(MemFlags::VOLATILE);
bx.memcpy(dst_ptr, dst_align.abi(), src_ptr, src_align.abi(), size, volatile);
}

pub fn memcpy_ty(
Copy link
Member

@nagisa nagisa Nov 3, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the benefit of allowing caller to specify different src layout and src alignment (contained within their respective layout) separately?

Seeing it is a convenience wrapper, it would make sense to simply take TyLayout for both dst and src and extract the necessary information from that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think those alignments necessarily match. A type can be stored at a location with higher alignment than what the type layout alignment specifies. One example where this may happen is for field projections, where the alignment of the field is determined by the alignment of the containing structure and the field offset, not the layout alignment of the field type.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One example where this may happen is for field projections, where the alignment of the field is determined by the alignment of the containing structure and the field offset, not the layout alignment of the field type.

Fair. Although we don’t optimise this way currently, there were some discussion about doing so, so code might end up becoming what it looks like now anyway.

bx: &Builder<'_, 'll, 'tcx>,
dst: &'ll Value,
dst_align: Align,
src: &'ll Value,
src_align: Align,
layout: TyLayout<'tcx>,
align: Align,
flags: MemFlags,
) {
let size = layout.size.bytes();
if size == 0 {
return;
}

call_memcpy(bx, dst, src, C_usize(bx.cx, size), align, flags);
call_memcpy(bx, dst, dst_align, src, src_align, C_usize(bx.cx, size), flags);
}

pub fn call_memset(
18 changes: 18 additions & 0 deletions src/librustc_codegen_llvm/builder.rs
Original file line number Diff line number Diff line change
@@ -784,6 +784,24 @@ impl Builder<'a, 'll, 'tcx> {
}
}

pub fn memcpy(&self, dst: &'ll Value, dst_align: u64,
src: &'ll Value, src_align: u64,
size: &'ll Value, is_volatile: bool) -> &'ll Value {
unsafe {
llvm::LLVMRustBuildMemCpy(self.llbuilder, dst, dst_align as c_uint,
src, src_align as c_uint, size, is_volatile)
}
}

pub fn memmove(&self, dst: &'ll Value, dst_align: u64,
src: &'ll Value, src_align: u64,
size: &'ll Value, is_volatile: bool) -> &'ll Value {
unsafe {
llvm::LLVMRustBuildMemMove(self.llbuilder, dst, dst_align as c_uint,
src, src_align as c_uint, size, is_volatile)
}
}

pub fn minnum(&self, lhs: &'ll Value, rhs: &'ll Value) -> &'ll Value {
self.count_insn("minnum");
unsafe {
6 changes: 0 additions & 6 deletions src/librustc_codegen_llvm/context.rs
Original file line number Diff line number Diff line change
@@ -530,12 +530,6 @@ fn declare_intrinsic(cx: &CodegenCx<'ll, '_>, key: &str) -> Option<&'ll Value> {
let t_v4f64 = Type::vector(t_f64, 4);
let t_v8f64 = Type::vector(t_f64, 8);

ifn!("llvm.memcpy.p0i8.p0i8.i16", fn(i8p, i8p, t_i16, t_i32, i1) -> void);
ifn!("llvm.memcpy.p0i8.p0i8.i32", fn(i8p, i8p, t_i32, t_i32, i1) -> void);
ifn!("llvm.memcpy.p0i8.p0i8.i64", fn(i8p, i8p, t_i64, t_i32, i1) -> void);
ifn!("llvm.memmove.p0i8.p0i8.i16", fn(i8p, i8p, t_i16, t_i32, i1) -> void);
ifn!("llvm.memmove.p0i8.p0i8.i32", fn(i8p, i8p, t_i32, t_i32, i1) -> void);
ifn!("llvm.memmove.p0i8.p0i8.i64", fn(i8p, i8p, t_i64, t_i32, i1) -> void);
ifn!("llvm.memset.p0i8.i16", fn(i8p, t_i8, t_i16, t_i32, i1) -> void);
ifn!("llvm.memset.p0i8.i32", fn(i8p, t_i8, t_i32, t_i32, i1) -> void);
ifn!("llvm.memset.p0i8.i64", fn(i8p, t_i8, t_i64, t_i32, i1) -> void);
28 changes: 7 additions & 21 deletions src/librustc_codegen_llvm/intrinsic.rs
Original file line number Diff line number Diff line change
@@ -23,7 +23,7 @@ use glue;
use type_::Type;
use type_of::LayoutLlvmExt;
use rustc::ty::{self, Ty};
use rustc::ty::layout::{HasDataLayout, LayoutOf};
use rustc::ty::layout::LayoutOf;
use rustc::hir;
use syntax::ast;
use syntax::symbol::Symbol;
@@ -690,28 +690,14 @@ fn copy_intrinsic(
let cx = bx.cx;
let (size, align) = cx.size_and_align_of(ty);
let size = C_usize(cx, size.bytes());
let align = C_i32(cx, align.abi() as i32);

let operation = if allow_overlap {
"memmove"
} else {
"memcpy"
};

let name = format!("llvm.{}.p0i8.p0i8.i{}", operation,
cx.data_layout().pointer_size.bits());

let align = align.abi();
let dst_ptr = bx.pointercast(dst, Type::i8p(cx));
let src_ptr = bx.pointercast(src, Type::i8p(cx));
let llfn = cx.get_intrinsic(&name);

bx.call(llfn,
&[dst_ptr,
src_ptr,
bx.mul(size, count),
align,
C_bool(cx, volatile)],
None)
if allow_overlap {
bx.memmove(dst_ptr, align, src_ptr, align, bx.mul(size, count), volatile)
} else {
bx.memcpy(dst_ptr, align, src_ptr, align, bx.mul(size, count), volatile)
}
}

fn memset_intrinsic(
16 changes: 16 additions & 0 deletions src/librustc_codegen_llvm/llvm/ffi.rs
Original file line number Diff line number Diff line change
@@ -998,6 +998,22 @@ extern "C" {
Bundle: Option<&OperandBundleDef<'a>>,
Name: *const c_char)
-> &'a Value;
pub fn LLVMRustBuildMemCpy(B: &Builder<'a>,
Dst: &'a Value,
DstAlign: c_uint,
Src: &'a Value,
SrcAlign: c_uint,
Size: &'a Value,
IsVolatile: bool)
-> &'a Value;
pub fn LLVMRustBuildMemMove(B: &Builder<'a>,
Dst: &'a Value,
DstAlign: c_uint,
Src: &'a Value,
SrcAlign: c_uint,
Size: &'a Value,
IsVolatile: bool)
-> &'a Value;
pub fn LLVMBuildSelect(B: &Builder<'a>,
If: &'a Value,
Then: &'a Value,
3 changes: 2 additions & 1 deletion src/librustc_codegen_llvm/mir/block.rs
Original file line number Diff line number Diff line change
@@ -784,7 +784,8 @@ impl FunctionCx<'a, 'll, 'tcx> {
// have scary latent bugs around.

let scratch = PlaceRef::alloca(bx, arg.layout, "arg");
base::memcpy_ty(bx, scratch.llval, llval, op.layout, align, MemFlags::empty());
base::memcpy_ty(bx, scratch.llval, scratch.align, llval, align,
op.layout, MemFlags::empty());
(scratch.llval, scratch.align, true)
} else {
(llval, align, true)
6 changes: 3 additions & 3 deletions src/librustc_codegen_llvm/mir/operand.rs
Original file line number Diff line number Diff line change
@@ -282,8 +282,8 @@ impl OperandValue<'ll> {
}
match self {
OperandValue::Ref(r, None, source_align) => {
base::memcpy_ty(bx, dest.llval, r, dest.layout,
source_align.min(dest.align), flags)
base::memcpy_ty(bx, dest.llval, dest.align, r, source_align,
dest.layout, flags)
}
OperandValue::Ref(_, Some(_), _) => {
bug!("cannot directly store unsized values");
@@ -324,7 +324,7 @@ impl OperandValue<'ll> {
// Allocate an appropriate region on the stack, and copy the value into it
let (llsize, _) = glue::size_and_align_of_dst(&bx, unsized_ty, Some(llextra));
let lldst = bx.array_alloca(Type::i8(bx.cx), llsize, "unsized_tmp", max_align);
base::call_memcpy(&bx, lldst, llptr, llsize, min_align, flags);
base::call_memcpy(&bx, lldst, max_align, llptr, min_align, llsize, flags);

// Store the allocated region and the extra to the indirect place.
let indirect_operand = OperandValue::Pair(lldst, llextra);
34 changes: 34 additions & 0 deletions src/rustllvm/RustWrapper.cpp
Original file line number Diff line number Diff line change
@@ -1239,6 +1239,40 @@ extern "C" LLVMValueRef LLVMRustBuildCall(LLVMBuilderRef B, LLVMValueRef Fn,
unwrap(Fn), makeArrayRef(unwrap(Args), NumArgs), Bundles, Name));
}

extern "C" LLVMValueRef LLVMRustBuildMemCpy(LLVMBuilderRef B,
LLVMValueRef Dst, unsigned DstAlign,
LLVMValueRef Src, unsigned SrcAlign,
LLVMValueRef Size, bool IsVolatile) {
#if LLVM_VERSION_GE(7, 0)
return wrap(unwrap(B)->CreateMemCpy(
unwrap(Dst), DstAlign,
unwrap(Src), SrcAlign,
unwrap(Size), IsVolatile));
#else
unsigned Align = std::min(DstAlign, SrcAlign);
return wrap(unwrap(B)->CreateMemCpy(
unwrap(Dst), unwrap(Src),
unwrap(Size), Align, IsVolatile));
#endif
}

extern "C" LLVMValueRef LLVMRustBuildMemMove(LLVMBuilderRef B,
LLVMValueRef Dst, unsigned DstAlign,
LLVMValueRef Src, unsigned SrcAlign,
LLVMValueRef Size, bool IsVolatile) {
#if LLVM_VERSION_GE(7, 0)
return wrap(unwrap(B)->CreateMemMove(
unwrap(Dst), DstAlign,
unwrap(Src), SrcAlign,
unwrap(Size), IsVolatile));
#else
unsigned Align = std::min(DstAlign, SrcAlign);
return wrap(unwrap(B)->CreateMemMove(
unwrap(Dst), unwrap(Src),
unwrap(Size), Align, IsVolatile));
#endif
}

extern "C" LLVMValueRef
LLVMRustBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn, LLVMValueRef *Args,
unsigned NumArgs, LLVMBasicBlockRef Then,
4 changes: 2 additions & 2 deletions src/test/codegen/packed.rs
Original file line number Diff line number Diff line change
@@ -65,7 +65,7 @@ pub struct BigPacked2 {
pub fn call_pkd1(f: fn() -> Array) -> BigPacked1 {
// CHECK: [[ALLOCA:%[_a-z0-9]+]] = alloca %Array
// CHECK: call void %{{.*}}(%Array* noalias nocapture sret dereferenceable(32) [[ALLOCA]])
// CHECK: call void @llvm.memcpy.{{.*}}(i8* align 1 %{{.*}}, i8* align 1 %{{.*}}, i{{[0-9]+}} 32, i1 false)
// CHECK: call void @llvm.memcpy.{{.*}}(i8* align 1 %{{.*}}, i8* align 4 %{{.*}}, i{{[0-9]+}} 32, i1 false)
// check that calls whose destination is a field of a packed struct
// go through an alloca rather than calling the function with an
// unaligned destination.
@@ -77,7 +77,7 @@ pub fn call_pkd1(f: fn() -> Array) -> BigPacked1 {
pub fn call_pkd2(f: fn() -> Array) -> BigPacked2 {
// CHECK: [[ALLOCA:%[_a-z0-9]+]] = alloca %Array
// CHECK: call void %{{.*}}(%Array* noalias nocapture sret dereferenceable(32) [[ALLOCA]])
// CHECK: call void @llvm.memcpy.{{.*}}(i8* align 2 %{{.*}}, i8* align 2 %{{.*}}, i{{[0-9]+}} 32, i1 false)
// CHECK: call void @llvm.memcpy.{{.*}}(i8* align 2 %{{.*}}, i8* align 4 %{{.*}}, i{{[0-9]+}} 32, i1 false)
// check that calls whose destination is a field of a packed struct
// go through an alloca rather than calling the function with an
// unaligned destination.
4 changes: 2 additions & 2 deletions src/test/codegen/stores.rs
Original file line number Diff line number Diff line change
@@ -31,7 +31,7 @@ pub fn small_array_alignment(x: &mut [i8; 4], y: [i8; 4]) {
// CHECK: store i32 %0, i32* [[TMP]]
// CHECK: [[Y8:%[0-9]+]] = bitcast [4 x i8]* %y to i8*
// CHECK: [[TMP8:%[0-9]+]] = bitcast i32* [[TMP]] to i8*
// CHECK: call void @llvm.memcpy.{{.*}}(i8* align 1 [[Y8]], i8* align 1 [[TMP8]], i{{[0-9]+}} 4, i1 false)
// CHECK: call void @llvm.memcpy.{{.*}}(i8* align 1 [[Y8]], i8* align 4 [[TMP8]], i{{[0-9]+}} 4, i1 false)
*x = y;
}

@@ -45,6 +45,6 @@ pub fn small_struct_alignment(x: &mut Bytes, y: Bytes) {
// CHECK: store i32 %0, i32* [[TMP]]
// CHECK: [[Y8:%[0-9]+]] = bitcast %Bytes* %y to i8*
// CHECK: [[TMP8:%[0-9]+]] = bitcast i32* [[TMP]] to i8*
// CHECK: call void @llvm.memcpy.{{.*}}(i8* align 1 [[Y8]], i8* align 1 [[TMP8]], i{{[0-9]+}} 4, i1 false)
// CHECK: call void @llvm.memcpy.{{.*}}(i8* align 1 [[Y8]], i8* align 4 [[TMP8]], i{{[0-9]+}} 4, i1 false)
*x = y;
}