diff --git a/lib/std/fs/file.zig b/lib/std/fs/file.zig index 3792e1c1f29b..7ea0dcc39ab4 100644 --- a/lib/std/fs/file.zig +++ b/lib/std/fs/file.zig @@ -8,8 +8,13 @@ const assert = std.debug.assert; const windows = os.windows; const Os = std.builtin.Os; const maxInt = std.math.maxInt; +const FifoType = std.fifo.LinearFifo(u8, std.fifo.LinearFifoBufferType{ .Static = 4 }); const is_windows = builtin.os.tag == .windows; +threadlocal var win_surrogate: ?u16 = null; +threadlocal var win_fifo_utf8: FifoType = FifoType.init(); +const win_u16_buffer_size = 4096; + pub const File = struct { /// The OS-specific file descriptor or file handle. handle: Handle, @@ -968,7 +973,11 @@ pub const File = struct { pub fn read(self: File, buffer: []u8) ReadError!usize { if (is_windows) { - return windows.ReadFile(self.handle, buffer, null, self.intended_io_mode); + if (!self.isTty()) { + return windows.ReadFile(self.handle, buffer, null, self.intended_io_mode); + } + + return readWindowsConsole(self, buffer); } if (self.intended_io_mode == .blocking) { @@ -978,6 +987,68 @@ pub const File = struct { } } + /// readWindowsConsole reads a Windows console (cmd, powershell, etc) handle in a wide char format (utf16le) and converts to utf8. + /// It relies on a set of threadlocal vars to convert utf16le chars to utf8 for when the u8 buffer is too small. + fn readWindowsConsole(self: File, buffer: []u8) ReadError!usize { + var dest_index: usize = 0; + while (dest_index < buffer.len) { + // Read from a utf8 buffer for any potential missing bytes needed for a codepoint + const written = win_fifo_utf8.read(buffer[dest_index..]); + if (written != 0) { + dest_index += written; + continue; + } + const delta_len = buffer.len - dest_index; + if (delta_len < 4) { + // Not enough room is left in the destination buffer to handle larger codepoint lengths + // Read a character at a time and place the encoding utf8 bytes into a fifo buffer + win_fifo_utf8.realign(); + const n = try self.readWindowsConsoleToUtf8(win_fifo_utf8.writableSlice(0), 1); + if (n == 0) return dest_index; + win_fifo_utf8.update(n); + } else { + const n = try self.readWindowsConsoleToUtf8(buffer[delta_len..], win_u16_buffer_size); + if (n == 0) return dest_index; + dest_index += n; + } + } + return buffer.len; + } + + fn readWindowsConsoleToUtf8(self: File, dest: []u8, comptime u16_buffer_size: usize) ReadError!usize { + var utf16_buffer: [u16_buffer_size]u16 = undefined; + const end = if (u16_buffer_size > 1) math.min(dest.len / 3, u16_buffer_size) else u16_buffer_size; + + const n = try self.readWindowsConsoleW(utf16_buffer[0..end]); + if (n == 0) return n; + + return std.unicode.utf16leToUtf8(dest, utf16_buffer[0..n]) catch return ReadError.Unexpected; + } + + fn readWindowsConsoleW(self: File, buffer: []u16) ReadError!usize { + if (buffer.len == 0) return 0; + var start: usize = 0; + + // If we stored a surrogate half, place it back at the beginning of this buffer + if (win_surrogate != null) { + buffer[0] = win_surrogate.?; + win_surrogate = null; + start = 1; + } + + var n = try os.windows.ReadConsoleW(self.handle, buffer[start..], null); + + // Remove and store incomplete surrogate at end of buffer + if (n > 0) { + const last_char = buffer[n - 1]; + if (0xd800 <= last_char and last_char <= 0xdfff) { + win_surrogate = last_char; + n -= 1; + } + } + return n; + } + /// Returns the number of bytes read. If the number read is smaller than `buffer.len`, it /// means the file reached the end. Reaching the end of a file is not an error condition. pub fn readAll(self: File, buffer: []u8) ReadError!usize { @@ -1113,7 +1184,11 @@ pub const File = struct { pub fn write(self: File, bytes: []const u8) WriteError!usize { if (is_windows) { - return windows.WriteFile(self.handle, bytes, null, self.intended_io_mode); + if (!self.isTty()) { + return windows.WriteFile(self.handle, bytes, null, self.intended_io_mode); + } + + return writeWindowsConsole(self, bytes); } if (self.intended_io_mode == .blocking) { @@ -1123,6 +1198,21 @@ pub const File = struct { } } + /// writeWindowsConsole writes to a Windows console (cmd, powershell, etc) handle with utf8 input to a wide char format (utf16le). + fn writeWindowsConsole(self: File, bytes: []const u8) WriteError!usize { + var utf16 = [_]u16{0} ** win_u16_buffer_size; + const max_data_len = std.unicode.getClampedUtf8SizeForUtf16LeSize(bytes, utf16.len) catch return WriteError.Unexpected; + const n = std.unicode.utf8ToUtf16Le(utf16[0..], bytes[0..max_data_len]) catch return WriteError.Unexpected; + const m = try os.windows.WriteConsoleW(self.handle, utf16[0..n]); + + if (n != m) { + // If the number of u16s don't match between converted utf8 chars and what's written to console, + // we need to calculate the number of bytes written provided from Windows. + return std.unicode.getClampedUtf8SizeForUtf16LeSize(bytes, m) catch unreachable; + } + return max_data_len; + } + pub fn writeAll(self: File, bytes: []const u8) WriteError!void { var index: usize = 0; while (index < bytes.len) { diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index b0a0f6d40747..60266ad2db52 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -512,6 +512,21 @@ pub fn ReadFile(in_hFile: HANDLE, buffer: []u8, offset: ?u64, io_mode: std.io.Mo } } +pub fn ReadConsoleW(in_hFile: HANDLE, buffer: []u16, control: ?*CONSOLE_READCONSOLE_CONTROL) ReadFileError!usize { + const want_read_count = @intCast(DWORD, math.min(@as(DWORD, maxInt(DWORD)), buffer.len)); + var amt_read: DWORD = undefined; + + if (kernel32.ReadConsoleW(in_hFile, buffer.ptr, want_read_count, &amt_read, control) == 0) { + switch (kernel32.GetLastError()) { + .BROKEN_PIPE => return 0, + .HANDLE_EOF => return 0, + else => |err| return unexpectedError(err), + } + } + + return amt_read; +} + pub const WriteFileError = error{ SystemResources, OperationAborted, @@ -608,6 +623,23 @@ pub fn WriteFile( } } +pub fn WriteConsoleW(handle: HANDLE, buffer: []const u16) WriteFileError!usize { + var written: DWORD = undefined; + if (kernel32.WriteConsoleW(handle, buffer.ptr, @intCast(u32, buffer.len), &written, null) == 0) { + switch (kernel32.GetLastError()) { + .INVALID_USER_BUFFER => return error.SystemResources, + .NOT_ENOUGH_MEMORY => return error.SystemResources, + .OPERATION_ABORTED => return error.OperationAborted, + .NOT_ENOUGH_QUOTA => return error.SystemResources, + .IO_PENDING => unreachable, + .BROKEN_PIPE => return error.BrokenPipe, + .INVALID_HANDLE => return error.NotOpenForWriting, + else => |err| return unexpectedError(err), + } + } + return written; +} + pub const SetCurrentDirectoryError = error{ NameTooLong, InvalidUtf8, @@ -3726,3 +3758,10 @@ pub const HANDLER_ROUTINE = switch (builtin.zig_backend) { .stage1 => fn (dwCtrlType: DWORD) callconv(.C) BOOL, else => *const fn (dwCtrlType: DWORD) callconv(.C) BOOL, }; + +pub const CONSOLE_READCONSOLE_CONTROL = extern struct { + nLength: ULONG, + nInitialChars: ULONG, + dwCtrlWakeupMask: ULONG, + dwControlKeyState: ULONG, +}; diff --git a/lib/std/os/windows/kernel32.zig b/lib/std/os/windows/kernel32.zig index 8d146def7f2e..b51a8ca333ce 100644 --- a/lib/std/os/windows/kernel32.zig +++ b/lib/std/os/windows/kernel32.zig @@ -57,6 +57,7 @@ const UCHAR = windows.UCHAR; const FARPROC = windows.FARPROC; const INIT_ONCE_FN = windows.INIT_ONCE_FN; const PMEMORY_BASIC_INFORMATION = windows.PMEMORY_BASIC_INFORMATION; +const CONSOLE_READCONSOLE_CONTROL = windows.CONSOLE_READCONSOLE_CONTROL; pub extern "kernel32" fn AddVectoredExceptionHandler(First: c_ulong, Handler: ?VECTORED_EXCEPTION_HANDLER) callconv(WINAPI) ?*anyopaque; pub extern "kernel32" fn RemoveVectoredExceptionHandler(Handle: HANDLE) callconv(WINAPI) c_ulong; @@ -284,6 +285,14 @@ pub extern "kernel32" fn ReadFile( in_out_lpOverlapped: ?*OVERLAPPED, ) callconv(WINAPI) BOOL; +pub extern "kernel32" fn ReadConsoleW( + in_hFile: HANDLE, + out_lpBuffer: [*]u16, + in_nNumberOfBytesToRead: DWORD, + out_lpNumberOfBytesRead: ?*DWORD, + in_pInputControl: ?*CONSOLE_READCONSOLE_CONTROL, +) callconv(WINAPI) BOOL; + pub extern "kernel32" fn RemoveDirectoryW(lpPathName: [*:0]const u16) callconv(WINAPI) BOOL; pub extern "kernel32" fn SetConsoleTextAttribute(hConsoleOutput: HANDLE, wAttributes: WORD) callconv(WINAPI) BOOL; @@ -356,6 +365,14 @@ pub extern "kernel32" fn WriteFileEx( lpCompletionRoutine: LPOVERLAPPED_COMPLETION_ROUTINE, ) callconv(WINAPI) BOOL; +pub extern "kernel32" fn WriteConsoleW( + in_hConsoleOutput: HANDLE, + in_lpBuffer: [*]const u16, + in_nNumberOfCharsToWrite: DWORD, + out_lpNumberOfCharsWritten: ?*DWORD, + reserved_lpReserved: ?LPVOID, +) callconv(WINAPI) BOOL; + pub extern "kernel32" fn LoadLibraryW(lpLibFileName: [*:0]const u16) callconv(WINAPI) ?HMODULE; pub extern "kernel32" fn GetProcAddress(hModule: HMODULE, lpProcName: [*]const u8) callconv(WINAPI) ?FARPROC; diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig index e8ad0d5e5bac..30d1206e207f 100644 --- a/lib/std/unicode.zig +++ b/lib/std/unicode.zig @@ -813,6 +813,23 @@ pub fn calcUtf16LeLen(utf8: []const u8) CalcUtf16LeLenError!usize { return dest_len; } +pub fn getClampedUtf8SizeForUtf16LeSize(utf8: []const u8, utf16le_size: usize) !usize { + var src_i: usize = 0; + var dest_len: usize = 0; + while (src_i < utf8.len) { + const n = try utf8ByteSequenceLength(utf8[src_i]); + const next_src_i = src_i + n; + const codepoint = try utf8Decode(utf8[src_i..next_src_i]); + + const u16_len: usize = if (codepoint < 0x10000) 1 else 2; + if (dest_len + u16_len > utf16le_size) return src_i; + dest_len += u16_len; + src_i = next_src_i; + } + + return src_i; +} + fn testCalcUtf16LeLen() !void { try testing.expectEqual(@as(usize, 1), try calcUtf16LeLen("a")); try testing.expectEqual(@as(usize, 10), try calcUtf16LeLen("abcdefghij"));