Skip to content

std: Windows Console IO Fix #12400

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 92 additions & 2 deletions lib/std/fs/file.zig
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,13 @@ const assert = std.debug.assert;
const windows = os.windows;
const Os = std.builtin.Os;
const maxInt = std.math.maxInt;
const FifoType = std.fifo.LinearFifo(u8, std.fifo.LinearFifoBufferType{ .Static = 4 });
const is_windows = builtin.os.tag == .windows;

threadlocal var win_surrogate: ?u16 = null;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm after thinking a bit more about this, I'm not sure this should be threadlocal after all. What I'd propose would be:

  • Leave the buffer as a static global (non-threadlocal)
  • Any use of stdin across threads on Windows requires you to synchronize I/O access using your own mutex or similar

Do you see any issues with that approach? cc @kprotty

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its an odd restriction for only a single platform. If it does exist, I think it should extend to all platforms and even all tty Files

Copy link
Contributor Author

@Vulfox Vulfox Aug 24, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@topolarity I've been mulling over the implications of threadlocal as well. Your suggestion sounds like exactly what the proposal I mentioned before definitely wanted to maybe tackle.

@kprotty I can try to see what I can cook up to settle this in a platform agnostic way, while trying to dodge the bullet of changing any current public APIs/types.

edit: Y'all got any recommended material that might help me get started down the mutex buffer path for best practices?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My thought was to require the user to synchronize their use of stdout/stdin with their own mutex, not to wrap this in a mutex in the standard library

That would leave the default behavior fast for single-threaded usage, while still making it possible to perform multi-threaded access correctly (multi-threaded access is broken with threadlocal, since each thread will see a different buffer)

threadlocal var win_fifo_utf8: FifoType = FifoType.init();
const win_u16_buffer_size = 4096;

pub const File = struct {
/// The OS-specific file descriptor or file handle.
handle: Handle,
Expand Down Expand Up @@ -968,7 +973,11 @@ pub const File = struct {

pub fn read(self: File, buffer: []u8) ReadError!usize {
if (is_windows) {
return windows.ReadFile(self.handle, buffer, null, self.intended_io_mode);
if (!self.isTty()) {
return windows.ReadFile(self.handle, buffer, null, self.intended_io_mode);
}

return readWindowsConsole(self, buffer);
}

if (self.intended_io_mode == .blocking) {
Expand All @@ -978,6 +987,68 @@ pub const File = struct {
}
}

/// readWindowsConsole reads a Windows console (cmd, powershell, etc) handle in a wide char format (utf16le) and converts to utf8.
/// It relies on a set of threadlocal vars to convert utf16le chars to utf8 for when the u8 buffer is too small.
fn readWindowsConsole(self: File, buffer: []u8) ReadError!usize {
var dest_index: usize = 0;
while (dest_index < buffer.len) {
// Read from a utf8 buffer for any potential missing bytes needed for a codepoint
const written = win_fifo_utf8.read(buffer[dest_index..]);
if (written != 0) {
dest_index += written;
continue;
}
const delta_len = buffer.len - dest_index;
if (delta_len < 4) {
// Not enough room is left in the destination buffer to handle larger codepoint lengths
// Read a character at a time and place the encoding utf8 bytes into a fifo buffer
win_fifo_utf8.realign();
const n = try self.readWindowsConsoleToUtf8(win_fifo_utf8.writableSlice(0), 1);
if (n == 0) return dest_index;
win_fifo_utf8.update(n);
} else {
const n = try self.readWindowsConsoleToUtf8(buffer[delta_len..], win_u16_buffer_size);
if (n == 0) return dest_index;
dest_index += n;
}
}
return buffer.len;
}

fn readWindowsConsoleToUtf8(self: File, dest: []u8, comptime u16_buffer_size: usize) ReadError!usize {
var utf16_buffer: [u16_buffer_size]u16 = undefined;
const end = if (u16_buffer_size > 1) math.min(dest.len / 3, u16_buffer_size) else u16_buffer_size;

const n = try self.readWindowsConsoleW(utf16_buffer[0..end]);
if (n == 0) return n;

return std.unicode.utf16leToUtf8(dest, utf16_buffer[0..n]) catch return ReadError.Unexpected;
}

fn readWindowsConsoleW(self: File, buffer: []u16) ReadError!usize {
if (buffer.len == 0) return 0;
var start: usize = 0;

// If we stored a surrogate half, place it back at the beginning of this buffer
if (win_surrogate != null) {
buffer[0] = win_surrogate.?;
win_surrogate = null;
start = 1;
}

var n = try os.windows.ReadConsoleW(self.handle, buffer[start..], null);

// Remove and store incomplete surrogate at end of buffer
if (n > 0) {
const last_char = buffer[n - 1];
if (0xd800 <= last_char and last_char <= 0xdfff) {
win_surrogate = last_char;
n -= 1;
}
}
return n;
}

/// Returns the number of bytes read. If the number read is smaller than `buffer.len`, it
/// means the file reached the end. Reaching the end of a file is not an error condition.
pub fn readAll(self: File, buffer: []u8) ReadError!usize {
Expand Down Expand Up @@ -1113,7 +1184,11 @@ pub const File = struct {

pub fn write(self: File, bytes: []const u8) WriteError!usize {
if (is_windows) {
return windows.WriteFile(self.handle, bytes, null, self.intended_io_mode);
if (!self.isTty()) {
return windows.WriteFile(self.handle, bytes, null, self.intended_io_mode);
}

return writeWindowsConsole(self, bytes);
}

if (self.intended_io_mode == .blocking) {
Expand All @@ -1123,6 +1198,21 @@ pub const File = struct {
}
}

/// writeWindowsConsole writes to a Windows console (cmd, powershell, etc) handle with utf8 input to a wide char format (utf16le).
fn writeWindowsConsole(self: File, bytes: []const u8) WriteError!usize {
var utf16 = [_]u16{0} ** win_u16_buffer_size;
const max_data_len = std.unicode.getClampedUtf8SizeForUtf16LeSize(bytes, utf16.len) catch return WriteError.Unexpected;
const n = std.unicode.utf8ToUtf16Le(utf16[0..], bytes[0..max_data_len]) catch return WriteError.Unexpected;
const m = try os.windows.WriteConsoleW(self.handle, utf16[0..n]);

if (n != m) {
// If the number of u16s don't match between converted utf8 chars and what's written to console,
// we need to calculate the number of bytes written provided from Windows.
return std.unicode.getClampedUtf8SizeForUtf16LeSize(bytes, m) catch unreachable;
}
return max_data_len;
}

pub fn writeAll(self: File, bytes: []const u8) WriteError!void {
var index: usize = 0;
while (index < bytes.len) {
Expand Down
39 changes: 39 additions & 0 deletions lib/std/os/windows.zig
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,21 @@ pub fn ReadFile(in_hFile: HANDLE, buffer: []u8, offset: ?u64, io_mode: std.io.Mo
}
}

pub fn ReadConsoleW(in_hFile: HANDLE, buffer: []u16, control: ?*CONSOLE_READCONSOLE_CONTROL) ReadFileError!usize {
const want_read_count = @intCast(DWORD, math.min(@as(DWORD, maxInt(DWORD)), buffer.len));
var amt_read: DWORD = undefined;

if (kernel32.ReadConsoleW(in_hFile, buffer.ptr, want_read_count, &amt_read, control) == 0) {
switch (kernel32.GetLastError()) {
.BROKEN_PIPE => return 0,
.HANDLE_EOF => return 0,
else => |err| return unexpectedError(err),
}
}

return amt_read;
}

pub const WriteFileError = error{
SystemResources,
OperationAborted,
Expand Down Expand Up @@ -608,6 +623,23 @@ pub fn WriteFile(
}
}

pub fn WriteConsoleW(handle: HANDLE, buffer: []const u16) WriteFileError!usize {
var written: DWORD = undefined;
if (kernel32.WriteConsoleW(handle, buffer.ptr, @intCast(u32, buffer.len), &written, null) == 0) {
switch (kernel32.GetLastError()) {
.INVALID_USER_BUFFER => return error.SystemResources,
.NOT_ENOUGH_MEMORY => return error.SystemResources,
.OPERATION_ABORTED => return error.OperationAborted,
.NOT_ENOUGH_QUOTA => return error.SystemResources,
.IO_PENDING => unreachable,
.BROKEN_PIPE => return error.BrokenPipe,
.INVALID_HANDLE => return error.NotOpenForWriting,
else => |err| return unexpectedError(err),
}
}
return written;
}

pub const SetCurrentDirectoryError = error{
NameTooLong,
InvalidUtf8,
Expand Down Expand Up @@ -3726,3 +3758,10 @@ pub const HANDLER_ROUTINE = switch (builtin.zig_backend) {
.stage1 => fn (dwCtrlType: DWORD) callconv(.C) BOOL,
else => *const fn (dwCtrlType: DWORD) callconv(.C) BOOL,
};

pub const CONSOLE_READCONSOLE_CONTROL = extern struct {
nLength: ULONG,
nInitialChars: ULONG,
dwCtrlWakeupMask: ULONG,
dwControlKeyState: ULONG,
};
17 changes: 17 additions & 0 deletions lib/std/os/windows/kernel32.zig
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ const UCHAR = windows.UCHAR;
const FARPROC = windows.FARPROC;
const INIT_ONCE_FN = windows.INIT_ONCE_FN;
const PMEMORY_BASIC_INFORMATION = windows.PMEMORY_BASIC_INFORMATION;
const CONSOLE_READCONSOLE_CONTROL = windows.CONSOLE_READCONSOLE_CONTROL;

pub extern "kernel32" fn AddVectoredExceptionHandler(First: c_ulong, Handler: ?VECTORED_EXCEPTION_HANDLER) callconv(WINAPI) ?*anyopaque;
pub extern "kernel32" fn RemoveVectoredExceptionHandler(Handle: HANDLE) callconv(WINAPI) c_ulong;
Expand Down Expand Up @@ -284,6 +285,14 @@ pub extern "kernel32" fn ReadFile(
in_out_lpOverlapped: ?*OVERLAPPED,
) callconv(WINAPI) BOOL;

pub extern "kernel32" fn ReadConsoleW(
in_hFile: HANDLE,
out_lpBuffer: [*]u16,
in_nNumberOfBytesToRead: DWORD,
out_lpNumberOfBytesRead: ?*DWORD,
in_pInputControl: ?*CONSOLE_READCONSOLE_CONTROL,
) callconv(WINAPI) BOOL;

pub extern "kernel32" fn RemoveDirectoryW(lpPathName: [*:0]const u16) callconv(WINAPI) BOOL;

pub extern "kernel32" fn SetConsoleTextAttribute(hConsoleOutput: HANDLE, wAttributes: WORD) callconv(WINAPI) BOOL;
Expand Down Expand Up @@ -356,6 +365,14 @@ pub extern "kernel32" fn WriteFileEx(
lpCompletionRoutine: LPOVERLAPPED_COMPLETION_ROUTINE,
) callconv(WINAPI) BOOL;

pub extern "kernel32" fn WriteConsoleW(
in_hConsoleOutput: HANDLE,
in_lpBuffer: [*]const u16,
in_nNumberOfCharsToWrite: DWORD,
out_lpNumberOfCharsWritten: ?*DWORD,
reserved_lpReserved: ?LPVOID,
) callconv(WINAPI) BOOL;

pub extern "kernel32" fn LoadLibraryW(lpLibFileName: [*:0]const u16) callconv(WINAPI) ?HMODULE;

pub extern "kernel32" fn GetProcAddress(hModule: HMODULE, lpProcName: [*]const u8) callconv(WINAPI) ?FARPROC;
Expand Down
17 changes: 17 additions & 0 deletions lib/std/unicode.zig
Original file line number Diff line number Diff line change
Expand Up @@ -813,6 +813,23 @@ pub fn calcUtf16LeLen(utf8: []const u8) CalcUtf16LeLenError!usize {
return dest_len;
}

pub fn getClampedUtf8SizeForUtf16LeSize(utf8: []const u8, utf16le_size: usize) !usize {
var src_i: usize = 0;
var dest_len: usize = 0;
while (src_i < utf8.len) {
const n = try utf8ByteSequenceLength(utf8[src_i]);
const next_src_i = src_i + n;
const codepoint = try utf8Decode(utf8[src_i..next_src_i]);

const u16_len: usize = if (codepoint < 0x10000) 1 else 2;
if (dest_len + u16_len > utf16le_size) return src_i;
dest_len += u16_len;
src_i = next_src_i;
}

return src_i;
}

fn testCalcUtf16LeLen() !void {
try testing.expectEqual(@as(usize, 1), try calcUtf16LeLen("a"));
try testing.expectEqual(@as(usize, 10), try calcUtf16LeLen("abcdefghij"));
Expand Down