Skip to content

Add sendmmsg syscall wrapper #2326

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 27 additions & 5 deletions std/c/freebsd.zig
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,36 @@ pub const pthread_attr_t = extern struct {
};

pub const msghdr = extern struct {
msg_name: *u8,
/// optional address
msg_name: ?*sockaddr,
/// size of address
msg_namelen: socklen_t,
msg_iov: *iovec,
/// scatter/gather array
msg_iov: [*]iovec,
/// # elements in msg_iov
msg_iovlen: i32,
__pad1: i32,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you remove this padding field? Doesn't that break the ABI?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This padding field isn't present in freebsd headers. It looked like someone just copy/pasted the linux definition without checking.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Argh. That's a problem. Now the entire freebsd.zig file needs to be audited.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

likewise netbsd.zig; which seems to have been copied from freebsd....

msg_control: *u8,
/// ancillary data
msg_control: ?*c_void,
/// ancillary data buffer len
msg_controllen: socklen_t,
__pad2: socklen_t,
/// flags on received message
msg_flags: i32,
};

pub const msghdr_const = extern struct {
/// optional address
msg_name: ?*const sockaddr,
/// size of address
msg_namelen: socklen_t,
/// scatter/gather array
msg_iov: [*]iovec_const,
/// # elements in msg_iov
msg_iovlen: i32,
/// ancillary data
msg_control: ?*c_void,
/// ancillary data buffer len
msg_controllen: socklen_t,
/// flags on received message
msg_flags: i32,
};

Expand Down
32 changes: 27 additions & 5 deletions std/c/netbsd.zig
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,36 @@ pub const pthread_attr_t = extern struct {
};

pub const msghdr = extern struct {
msg_name: *u8,
/// optional address
msg_name: ?*sockaddr,
/// size of address
msg_namelen: socklen_t,
msg_iov: *iovec,
/// scatter/gather array
msg_iov: [*]iovec,
/// # elements in msg_iov
msg_iovlen: i32,
__pad1: i32,
msg_control: *u8,
/// ancillary data
msg_control: ?*c_void,
/// ancillary data buffer len
msg_controllen: socklen_t,
__pad2: socklen_t,
/// flags on received message
msg_flags: i32,
};

pub const msghdr_const = extern struct {
/// optional address
msg_name: ?*const sockaddr,
/// size of address
msg_namelen: socklen_t,
/// scatter/gather array
msg_iov: [*]iovec_const,
/// # elements in msg_iov
msg_iovlen: i32,
/// ancillary data
msg_control: ?*c_void,
/// ancillary data buffer len
msg_controllen: socklen_t,
/// flags on received message
msg_flags: i32,
};

Expand Down
53 changes: 52 additions & 1 deletion std/os/linux.zig
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ pub use switch (builtin.arch) {
pub use @import("linux/errno.zig");

pub const PATH_MAX = 4096;
pub const IOV_MAX = 1024;

pub const STDIN_FILENO = 0;
pub const STDOUT_FILENO = 1;
Expand Down Expand Up @@ -1193,6 +1194,16 @@ pub const iovec_const = extern struct {
iov_len: usize,
};

pub const mmsghdr = extern struct {
msg_hdr: msghdr,
msg_len: u32,
};

pub const mmsghdr_const = extern struct {
msg_hdr: msghdr_const,
msg_len: u32,
};

pub fn getsockname(fd: i32, noalias addr: *sockaddr, noalias len: *socklen_t) usize {
return syscall3(SYS_getsockname, @bitCast(usize, isize(fd)), @ptrToInt(addr), @ptrToInt(len));
}
Expand All @@ -1213,10 +1224,50 @@ pub fn getsockopt(fd: i32, level: u32, optname: u32, noalias optval: [*]u8, noal
return syscall5(SYS_getsockopt, @bitCast(usize, isize(fd)), level, optname, @ptrToInt(optval), @ptrToInt(optlen));
}

pub fn sendmsg(fd: i32, msg: *const msghdr, flags: u32) usize {
pub fn sendmsg(fd: i32, msg: *msghdr_const, flags: u32) usize {
return syscall3(SYS_sendmsg, @bitCast(usize, isize(fd)), @ptrToInt(msg), flags);
}

pub fn sendmmsg(fd: i32, msgvec: [*]mmsghdr_const, vlen: u32, flags: u32) usize {
if (@typeInfo(usize).Int.bits > @typeInfo(@typeOf(mmsghdr(undefined).msg_len)).Int.bits) {
// workaround kernel brokenness:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I opened #2380 to address this. I'd like to resolve that issue before merging this pull request. It's not a particularly big time investment for me to do that, so I can prioritize it to unblock you.

Copy link
Contributor Author

@daurnimator daurnimator Apr 30, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure #2380 is related :P
sendmmsg is not a posix API (see my comment here)

This is an oversight in the linux kernel when introducing 64bit support to code originally written for 32bit that has now resulted in a broken kernel ABI forever.
But it does of course bring up the question of: where does this wrapper belong?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of std.os.posix mapping directly to OS-specific files, it will provide a "zig flavored POSIX" API on top of the native OS API. This is where kernel limitations will be worked around; fork might call clone on Linux, etc.

I think this applies to the sendmmsg workaround. std.os.linux.sendmmsg is meant to be completely raw. std.os.posix.sendmmsg will be a compile error on systems that don't have it, that's fine. Zig is using std.os.posix to mean a specific, slightly different thing than it means in C and that's OK.

// if adding up all iov_len overflows a i32 then split into multiple calls
// see https://www.openwall.com/lists/musl/2014/06/07/5
const kvlen = if (vlen > IOV_MAX) IOV_MAX else vlen; // matches kernel
var next_unsent: usize = 0;
for (msgvec[0..kvlen]) |*msg, i| {
var size: i32 = 0;
const msg_iovlen = @intCast(usize, msg.msg_hdr.msg_iovlen); // kernel side this is treated as unsigned
for (msg.msg_hdr.msg_iov[0..msg_iovlen]) |iov, j| {
if (iov.iov_len > std.math.maxInt(i32) or @addWithOverflow(i32, size, @intCast(i32, iov.iov_len), &size)) {
// batch-send all messages up to the current message
if (next_unsent < i) {
const batch_size = i - next_unsent;
const r = syscall4(SYS_sendmmsg, @bitCast(usize, isize(fd)), @ptrToInt(&msgvec[next_unsent]), batch_size, flags);
if (getErrno(r) != 0) return next_unsent;
if (r < batch_size) return next_unsent + r;
}
// send current message as own packet
const r = sendmsg(fd, &msg.msg_hdr, flags);
if (getErrno(r) != 0) return r;
// Linux limits the total bytes sent by sendmsg to INT_MAX, so this cast is safe.
msg.msg_len = @intCast(u32, r);
next_unsent = i + 1;
break;
}
}
}
if (next_unsent < kvlen or next_unsent == 0) { // want to make sure at least one syscall occurs (e.g. to trigger MSG_EOR)
const batch_size = kvlen - next_unsent;
const r = syscall4(SYS_sendmmsg, @bitCast(usize, isize(fd)), @ptrToInt(&msgvec[next_unsent]), batch_size, flags);
if (getErrno(r) != 0) return r;
return next_unsent + r;
}
return kvlen;
}
return syscall4(SYS_sendmmsg, @bitCast(usize, isize(fd)), @ptrToInt(msgvec), vlen, flags);
}

pub fn connect(fd: i32, addr: *const c_void, len: socklen_t) usize {
return syscall3(SYS_connect, @bitCast(usize, isize(fd)), @ptrToInt(addr), len);
}
Expand Down
19 changes: 16 additions & 3 deletions std/os/linux/arm64.zig
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ const std = @import("../../std.zig");
const linux = std.os.linux;
const socklen_t = linux.socklen_t;
const iovec = linux.iovec;
const iovec_const = linux.iovec_const;

pub const SYS_io_setup = 0;
pub const SYS_io_destroy = 1;
Expand Down Expand Up @@ -415,12 +416,24 @@ pub fn syscall6(
pub extern fn clone(func: extern fn (arg: usize) u8, stack: usize, flags: u32, arg: usize, ptid: *i32, tls: usize, ctid: *i32) usize;

pub const msghdr = extern struct {
msg_name: *u8,
msg_name: ?*sockaddr,
msg_namelen: socklen_t,
msg_iov: *iovec,
msg_iov: [*]iovec,
msg_iovlen: i32,
__pad1: i32,
msg_control: *u8,
msg_control: ?*c_void,
msg_controllen: socklen_t,
__pad2: socklen_t,
msg_flags: i32,
};

pub const msghdr_const = extern struct {
msg_name: ?*const sockaddr,
msg_namelen: socklen_t,
msg_iov: [*]iovec_const,
msg_iovlen: i32,
__pad1: i32,
msg_control: ?*c_void,
msg_controllen: socklen_t,
__pad2: socklen_t,
msg_flags: i32,
Expand Down
20 changes: 17 additions & 3 deletions std/os/linux/x86_64.zig
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
const std = @import("../../std.zig");
const linux = std.os.linux;
const sockaddr = linux.sockaddr;
const socklen_t = linux.socklen_t;
const iovec = linux.iovec;
const iovec_const = linux.iovec_const;

pub const SYS_read = 0;
pub const SYS_write = 1;
Expand Down Expand Up @@ -483,12 +485,24 @@ pub nakedcc fn restore_rt() void {
}

pub const msghdr = extern struct {
msg_name: *u8,
msg_name: ?*sockaddr,
msg_namelen: socklen_t,
msg_iov: *iovec,
msg_iov: [*]iovec,
msg_iovlen: i32,
__pad1: i32,
msg_control: *u8,
msg_control: ?*c_void,
msg_controllen: socklen_t,
__pad2: socklen_t,
msg_flags: i32,
};

pub const msghdr_const = extern struct {
msg_name: ?*const sockaddr,
msg_namelen: socklen_t,
msg_iov: [*]iovec_const,
msg_iovlen: i32,
__pad1: i32,
msg_control: ?*c_void,
msg_controllen: socklen_t,
__pad2: socklen_t,
msg_flags: i32,
Expand Down