From 3549d0b0029f9d2cb0addd989dfb21b2d2b461ac Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Thu, 3 Nov 2022 23:52:07 -0400 Subject: [PATCH 1/3] Add peek, readLine untested. --- lib/std/compress/deflate/compressor_test.zig | 24 +- lib/std/compress/deflate/decompressor.zig | 2 +- lib/std/compress/gzip.zig | 27 +- lib/std/compress/zlib.zig | 2 +- lib/std/fifo.zig | 6 +- lib/std/fs/file.zig | 18 +- lib/std/io/bit_reader.zig | 2 +- lib/std/io/buffered_reader.zig | 60 +++-- lib/std/io/counting_reader.zig | 7 +- lib/std/io/fixed_buffer_stream.zig | 11 +- lib/std/io/limited_reader.zig | 8 +- lib/std/io/peek_stream.zig | 2 +- lib/std/io/reader.zig | 244 +++++++++++++++++++ lib/std/io/stream_source.zig | 10 +- lib/std/net.zig | 2 +- lib/std/os/uefi/protocols/file_protocol.zig | 2 +- lib/std/pdb.zig | 2 +- lib/std/x/net/tcp.zig | 2 +- 18 files changed, 397 insertions(+), 34 deletions(-) diff --git a/lib/std/compress/deflate/compressor_test.zig b/lib/std/compress/deflate/compressor_test.zig index 4f8efd0d6e79..8a91ce1f655c 100644 --- a/lib/std/compress/deflate/compressor_test.zig +++ b/lib/std/compress/deflate/compressor_test.zig @@ -188,7 +188,7 @@ test "very long sparse chunk" { const Self = @This(); const Error = error{}; - pub const Reader = io.Reader(*Self, Error, read); + pub const Reader = io.Reader(*Self, Error, read, peek); pub fn reader(self: *Self) Reader { return .{ .context = self }; @@ -216,6 +216,28 @@ test "very long sparse chunk" { s.cur = cur; return n; } + + fn peek(s: *Self, b: []u8) Error!usize { + var n: usize = 0; // amount read + + if (s.cur >= s.l) { + return 0; + } + n = b.len; + var cur = s.cur + n; + if (cur > s.l) { + n -= cur - s.l; + cur = s.l; + } + for (b[0..n]) |_, i| { + if (s.cur + i >= s.l -| (1 << 16)) { + b[i] = 1; + } else { + b[i] = 0; + } + } + return n; + } }; var comp = try compressor( diff --git a/lib/std/compress/deflate/decompressor.zig b/lib/std/compress/deflate/decompressor.zig index 37f45f1272c5..d4153b98d602 100644 --- a/lib/std/compress/deflate/decompressor.zig +++ b/lib/std/compress/deflate/decompressor.zig @@ -306,7 +306,7 @@ pub fn Decompressor(comptime ReaderType: type) type { error{EndOfStream} || InflateError || Allocator.Error; - pub const Reader = io.Reader(*Self, Error, read); + pub const Reader = io.Reader(*Self, Error, read, null); allocator: Allocator, diff --git a/lib/std/compress/gzip.zig b/lib/std/compress/gzip.zig index 2377e182c24e..a555f0161d3f 100644 --- a/lib/std/compress/gzip.zig +++ b/lib/std/compress/gzip.zig @@ -22,7 +22,7 @@ pub fn GzipStream(comptime ReaderType: type) type { pub const Error = ReaderType.Error || deflate.Decompressor(ReaderType).Error || error{ CorruptedData, WrongChecksum }; - pub const Reader = io.Reader(*Self, Error, read); + pub const Reader = io.Reader(*Self, Error, read, peek); allocator: mem.Allocator, inflater: deflate.Decompressor(ReaderType), @@ -140,6 +140,31 @@ pub fn GzipStream(comptime ReaderType: type) type { return 0; } + // Implements the io.Reader interface + pub fn peek(self: *Self, buffer: []u8) Error!usize { + if (buffer.len == 0) + return 0; + + // Read from the compressed stream and update the computed checksum + const r = try self.inflater.read(buffer); + if (r != 0) { + self.hasher.update(buffer[0..r]); + return r; + } + + // We've reached the end of stream, check if the checksum matches + const hash = try self.in_reader.readIntLittle(u32); + if (hash != self.hasher.final()) + return error.WrongChecksum; + + // The ISIZE field is the size of the uncompressed input modulo 2^32 + const input_size = try self.in_reader.readIntLittle(u32); + if (self.read_amt & 0xffffffff != input_size) + return error.CorruptedData; + + return 0; + } + pub fn reader(self: *Self) Reader { return .{ .context = self }; } diff --git a/lib/std/compress/zlib.zig b/lib/std/compress/zlib.zig index d7d33f9fa933..5de067e2a9db 100644 --- a/lib/std/compress/zlib.zig +++ b/lib/std/compress/zlib.zig @@ -15,7 +15,7 @@ pub fn ZlibStream(comptime ReaderType: type) type { pub const Error = ReaderType.Error || deflate.Decompressor(ReaderType).Error || error{ WrongChecksum, Unsupported }; - pub const Reader = io.Reader(*Self, Error, read); + pub const Reader = io.Reader(*Self, Error, read, null); allocator: mem.Allocator, inflater: deflate.Decompressor(ReaderType), diff --git a/lib/std/fifo.zig b/lib/std/fifo.zig index b7c8f761d3dd..dfafa3c97f47 100644 --- a/lib/std/fifo.zig +++ b/lib/std/fifo.zig @@ -39,7 +39,7 @@ pub fn LinearFifo( count: usize, const Self = @This(); - pub const Reader = std.io.Reader(*Self, error{}, readFn); + pub const Reader = std.io.Reader(*Self, error{}, readFn, peekFn); pub const Writer = std.io.Writer(*Self, error{OutOfMemory}, appendWrite); // Type of Self argument for slice operations. @@ -227,6 +227,10 @@ pub fn LinearFifo( return self.read(dest); } + fn peekFn(self: *Self, dest: []u8) error{}!usize { + return self.peek(dest); + } + pub fn reader(self: *Self) Reader { return .{ .context = self }; } diff --git a/lib/std/fs/file.zig b/lib/std/fs/file.zig index 0b5ba6de0bf2..41050389ae0b 100644 --- a/lib/std/fs/file.zig +++ b/lib/std/fs/file.zig @@ -977,6 +977,22 @@ pub const File = struct { } } + pub fn peek(self: File, buffer: []u8) !usize { + if (is_windows) { + var size = windows.ReadFile(self.handle, buffer, null, self.intended_io_mode) catch |e| { + return e; + }; + os.lseek_CUR(self.handle, -@bitCast(isize, size)) catch unreachable; + return size; + } + + var size = os.read(self.handle, buffer) catch |e| { + return e; + }; + os.lseek_CUR(self.handle, -@bitCast(isize, size)) catch unreachable; + return size; + } + /// Returns the number of bytes read. If the number read is smaller than `buffer.len`, it /// means the file reached the end. Reaching the end of a file is not an error condition. pub fn readAll(self: File, buffer: []u8) ReadError!usize { @@ -1374,7 +1390,7 @@ pub const File = struct { } } - pub const Reader = io.Reader(File, ReadError, read); + pub const Reader = io.Reader(File, ReadError, read, peek); pub fn reader(file: File) Reader { return .{ .context = file }; diff --git a/lib/std/io/bit_reader.zig b/lib/std/io/bit_reader.zig index e897850b83fd..3c6163465d20 100644 --- a/lib/std/io/bit_reader.zig +++ b/lib/std/io/bit_reader.zig @@ -14,7 +14,7 @@ pub fn BitReader(comptime endian: std.builtin.Endian, comptime ReaderType: type) bit_count: u3, pub const Error = ReaderType.Error; - pub const Reader = io.Reader(*Self, Error, read); + pub const Reader = io.Reader(*Self, Error, read, null); const Self = @This(); const u8_bit_count = @bitSizeOf(u8); diff --git a/lib/std/io/buffered_reader.zig b/lib/std/io/buffered_reader.zig index b803e37602e6..8f676e0b3072 100644 --- a/lib/std/io/buffered_reader.zig +++ b/lib/std/io/buffered_reader.zig @@ -9,30 +9,54 @@ pub fn BufferedReader(comptime buffer_size: usize, comptime ReaderType: type) ty fifo: FifoType = FifoType.init(), pub const Error = ReaderType.Error; - pub const Reader = io.Reader(*Self, Error, read); + pub const Reader = io.Reader(*Self, Error, read, peek); const Self = @This(); const FifoType = std.fifo.LinearFifo(u8, std.fifo.LinearFifoBufferType{ .Static = buffer_size }); - pub fn read(self: *Self, dest: []u8) Error!usize { - var dest_index: usize = 0; - while (dest_index < dest.len) { - const written = self.fifo.read(dest[dest_index..]); - if (written == 0) { - // fifo empty, fill it - const writable = self.fifo.writableSlice(0); - assert(writable.len > 0); - const n = try self.unbuffered_reader.read(writable); - if (n == 0) { - // reading from the unbuffered stream returned nothing - // so we have nothing left to read. - return dest_index; - } - self.fifo.update(n); + fn read(s: *Self, b: []u8) Error!usize { + var n: usize = 0; // amount read + + if (s.cur >= s.l) { + return 0; + } + n = b.len; + var cur = s.cur + n; + if (cur > s.l) { + n -= cur - s.l; + cur = s.l; + } + for (b[0..n]) |_, i| { + if (s.cur + i >= s.l -| (1 << 16)) { + b[i] = 1; + } else { + b[i] = 0; + } + } + s.cur = cur; + return n; + } + + fn peek(s: *Self, b: []u8) Error!usize { + var n: usize = 0; // amount read + + if (s.cur >= s.l) { + return 0; + } + n = b.len; + var cur = s.cur + n; + if (cur > s.l) { + n -= cur - s.l; + cur = s.l; + } + for (b[0..n]) |_, i| { + if (s.cur + i >= s.l -| (1 << 16)) { + b[i] = 1; + } else { + b[i] = 0; } - dest_index += written; } - return dest.len; + return n; } pub fn reader(self: *Self) Reader { diff --git a/lib/std/io/counting_reader.zig b/lib/std/io/counting_reader.zig index 54e8e6f531a3..f8056fd86418 100644 --- a/lib/std/io/counting_reader.zig +++ b/lib/std/io/counting_reader.zig @@ -9,7 +9,7 @@ pub fn CountingReader(comptime ReaderType: anytype) type { bytes_read: u64 = 0, pub const Error = ReaderType.Error; - pub const Reader = io.Reader(*@This(), Error, read); + pub const Reader = io.Reader(*@This(), Error, read, peek); pub fn read(self: *@This(), buf: []u8) Error!usize { const amt = try self.child_reader.read(buf); @@ -17,6 +17,11 @@ pub fn CountingReader(comptime ReaderType: anytype) type { return amt; } + pub fn peek(self: *@This(), buf: []u8) Error!usize { + const amt = try self.child_reader.read(buf); + return amt; + } + pub fn reader(self: *@This()) Reader { return .{ .context = self }; } diff --git a/lib/std/io/fixed_buffer_stream.zig b/lib/std/io/fixed_buffer_stream.zig index b002bb47b83a..75288e53ee98 100644 --- a/lib/std/io/fixed_buffer_stream.zig +++ b/lib/std/io/fixed_buffer_stream.zig @@ -17,7 +17,7 @@ pub fn FixedBufferStream(comptime Buffer: type) type { pub const SeekError = error{}; pub const GetSeekPosError = error{}; - pub const Reader = io.Reader(*Self, ReadError, read); + pub const Reader = io.Reader(*Self, ReadError, read, peek); pub const Writer = io.Writer(*Self, WriteError, write); pub const SeekableStream = io.SeekableStream( @@ -54,6 +54,15 @@ pub fn FixedBufferStream(comptime Buffer: type) type { return size; } + pub fn peek(self: *Self, dest: []u8) ReadError!usize { + const size = std.math.min(dest.len, self.buffer.len - self.pos); + const end = self.pos + size; + + mem.copy(u8, dest[0..size], self.buffer[self.pos..end]); + + return size; + } + /// If the returned number of bytes written is less than requested, the /// buffer is full. Returns `error.NoSpaceLeft` when no bytes would be written. /// Note: `error.NoSpaceLeft` matches the corresponding error from diff --git a/lib/std/io/limited_reader.zig b/lib/std/io/limited_reader.zig index aa00af0d0925..9a5c2faeaba4 100644 --- a/lib/std/io/limited_reader.zig +++ b/lib/std/io/limited_reader.zig @@ -9,7 +9,7 @@ pub fn LimitedReader(comptime ReaderType: type) type { bytes_left: u64, pub const Error = ReaderType.Error; - pub const Reader = io.Reader(*Self, Error, read); + pub const Reader = io.Reader(*Self, Error, read, peek); const Self = @This(); @@ -20,6 +20,12 @@ pub fn LimitedReader(comptime ReaderType: type) type { return n; } + pub fn peek(self: *Self, dest: []u8) Error!usize { + const max_read = std.math.min(self.bytes_left, dest.len); + const n = try self.inner_reader.read(dest[0..max_read]); + return n; + } + pub fn reader(self: *Self) Reader { return .{ .context = self }; } diff --git a/lib/std/io/peek_stream.zig b/lib/std/io/peek_stream.zig index 8779e22250eb..efb51e99cdc6 100644 --- a/lib/std/io/peek_stream.zig +++ b/lib/std/io/peek_stream.zig @@ -15,7 +15,7 @@ pub fn PeekStream( fifo: FifoType, pub const Error = ReaderType.Error; - pub const Reader = io.Reader(*Self, Error, read); + pub const Reader = io.Reader(*Self, Error, read, null); const Self = @This(); const FifoType = std.fifo.LinearFifo(u8, buffer_type); diff --git a/lib/std/io/reader.zig b/lib/std/io/reader.zig index 6acc00485130..444346963e13 100644 --- a/lib/std/io/reader.zig +++ b/lib/std/io/reader.zig @@ -11,6 +11,11 @@ pub fn Reader( /// If the number of bytes read is 0, it means end of stream. /// End of stream is not an error condition. comptime readFn: fn (context: Context, buffer: []u8) ReadError!usize, + /// Returns the number of bytes read. It may be less than buffer.len. + /// If the number of bytes read is 0, it means end of stream. + /// End of stream is not an error condition. + /// This function does not advance the reader position. + comptime peekFn: ?fn (context: Context, buffer: []u8) ReadError!usize, ) type { return struct { pub const Error = ReadError; @@ -26,6 +31,15 @@ pub fn Reader( return readFn(self.context, buffer); } + /// Returns the number of bytes read. It may be less than buffer.len. + /// If the number of bytes read is 0, it means end of stream. + /// End of stream is not an error condition. + /// If there is no peek function, the peek method returns 0 size. + /// This function does not advance the reader position. + pub fn peek(self: Self, buffer: []u8) Error!usize { + return if (peekFn == null) 0 else peekFn(self.context, buffer); + } + /// Returns the number of bytes read. If the number read is smaller than `buffer.len`, it /// means the stream reached the end. Reaching the end of a stream is not an error /// condition. @@ -39,12 +53,33 @@ pub fn Reader( return index; } + /// Returns the number of bytes read. If the number read is smaller than `buffer.len`, it + /// means the stream reached the end. Reaching the end of a stream is not an error + /// condition. + /// This function does not advance the reader position. + pub fn peekAll(self: Self, buffer: []u8) Error!usize { + var index: usize = 0; + while (index != buffer.len) { + const amt = try self.peek(buffer[index..]); + if (amt == 0) return index; + index += amt; + } + return index; + } + /// If the number read would be smaller than `buf.len`, `error.EndOfStream` is returned instead. pub fn readNoEof(self: Self, buf: []u8) !void { const amt_read = try self.readAll(buf); if (amt_read < buf.len) return error.EndOfStream; } + /// If the number read would be smaller than `buf.len`, `error.EndOfStream` is returned instead. + /// This function does not advance the reader position. + pub fn peekNoEof(self: Self, buf: []u8) !void { + const amt_read = try self.peekAll(buf); + if (amt_read < buf.len) return error.EndOfStream; + } + /// Appends to the `std.ArrayList` contents by reading from the stream /// until end of stream is found. /// If the number of bytes appended would exceed `max_append_size`, @@ -95,6 +130,108 @@ pub fn Reader( return array_list.toOwnedSlice(); } + /// Determines whether or not a character is an endline terminatior. + /// The Unicode standard defines a number of characters that conforming applications + /// should recognize as line terminators: + /// LF: Line Feed, U+000A + /// VT: Vertical Tab, U+000B + /// FF: Form Feed, U+000C + /// CR: Carriage Return, U+000D + /// CR+LF: CR (U+000D) followed by LF (U+000A) + /// NEL: Next Line, U+0085 + /// LS: Line Separator, U+2028 + /// PS: Paragraph Separator, U+2029 + /// This method will recognize LF, VT, FF, CR and NEL + fn isEndline(char: u8) bool { + return char == 0x0A or char == 0x0B or char == 0x0C or char == 0x0D or char == 0x85; + } + + /// Allocates enough memory to read until endline characters are found. If the allocated + /// memory would be greater than `max_size`, returns `error.StreamTooLong`. + /// If end-of-stream is found, the function returns everything up until the end. + /// If the function is called after that point, it returns null. + /// Caller owns returned memory. + /// If this function returns an error, the contents from the stream read so far are lost. + pub fn readLineAlloc( + self: Self, + allocator: mem.Allocator, + max_size: usize, + ) !?[]u8 { + var array_list = std.ArrayList(u8).init(allocator); + defer array_list.deinit(); + array_list.shrinkRetainingCapacity(0); + + while (true) { + if (array_list.items.len == max_size) { + return error.StreamTooLong; + } + + var byte: u8 = try self.readByte() catch |err| switch (err) { + error.EndOfStream => { + return if (array_list.items.len == 0) null else array_list.toOwnedSlice(); + }, + }; + + try array_list.append(byte); + + while (isEndline(byte)) { + byte = try self.peekByte() catch |err| switch (err) { + error.EndOfStream => { + return array_list.toOwnedSlice(); + }, + }; + + if (!isEndline(byte)) + return array_list.toOwnedSlice(); + + _ = try self.readByte(); + array_list.append(byte); + + if (array_list.items.len == max_size) return error.StreamTooLong; + } + } + } + + /// Reads from the stream until endline characters. If the buffer is not + /// large enough to hold the entire contents, `error.StreamTooLong` is returned. + /// If end-of-stream is found, the function returns everything up until the end. + /// If the function is called after that point, it returns null. + /// Returns a slice of the stream data, with ptr equal to `buf.ptr`. The + /// endline characters are written to the output buffer but is not included + /// in the returned slice. + pub fn readLine(self: Self, buf: []u8) !?[]u8 { + var index: usize = 0; + while (true) { + if (index >= buf.len) return error.StreamTooLong; + + var byte = try self.readByte() catch |err| switch (err) { + error.EndOfStream => { + return if (index == 0) null else buf[0..index]; + }, + }; + buf[index] = byte; + + while (isEndline(byte)) { + byte = try self.peekByte() catch |err| switch (err) { + error.EndOfStream => { + return buf[0..index]; + }, + }; + + if (!isEndline(byte)) + return buf[0..index]; + + _ = try self.readByte(); + index += 1; + if (index >= buf.len) return error.StreamTooLong; + + buf[index] = byte; + } + + index += 1; + } + } + /// Replaces the `std.ArrayList` contents by reading from the stream until `delimiter` is found. /// Does not include the delimiter in the result. /// If the `std.ArrayList` length would exceed `max_size`, `error.StreamTooLong` is returned and the @@ -213,6 +350,29 @@ pub fn Reader( } } + /// Reads from the stream until specified byte is found, discarding all data, + /// including the endline characters. + /// If end-of-stream is found, this function succeeds. + pub fn skipLine(self: Self) !void { + while (true) { + var byte = try self.readByte() catch |err| switch (err) { + error.EndOfStream => return, + else => |e| return e, + }; + + while (isEndline(byte)) { + byte = try self.peekByte() catch |err| switch (err) { + error.EndOfStream => return, + }; + + if (!isEndline(byte)) + return; + + _ = try self.readByte(); + } + } + } + /// Reads from the stream until specified byte is found, discarding all data, /// including the delimiter. /// If end-of-stream is found, this function succeeds. @@ -234,11 +394,24 @@ pub fn Reader( return result[0]; } + /// Peeks 1 byte from the stream or returns `error.EndOfStream`. + pub fn peekByte(self: Self) !u8 { + var result: [1]u8 = undefined; + const amt_read = try self.peek(result[0..], false); + if (amt_read < 1) return error.EndOfStream; + return result[0]; + } + /// Same as `readByte` except the returned byte is signed. pub fn readByteSigned(self: Self) !i8 { return @bitCast(i8, try self.readByte()); } + /// Same as `peekByte` except the returned byte is signed. + pub fn peekByteSigned(self: Self) !i8 { + return @bitCast(i8, try self.peekByte()); + } + /// Reads exactly `num_bytes` bytes and returns as an array. /// `num_bytes` must be comptime-known pub fn readBytesNoEof(self: Self, comptime num_bytes: usize) ![num_bytes]u8 { @@ -247,6 +420,14 @@ pub fn Reader( return bytes; } + /// Peeks exactly `num_bytes` bytes and returns as an array. + /// `num_bytes` must be comptime-known + pub fn peekBytesNoEof(self: Self, comptime num_bytes: usize) ![num_bytes]u8 { + var bytes: [num_bytes]u8 = undefined; + try self.peekNoEof(&bytes); + return bytes; + } + /// Reads bytes into the bounded array, until /// the bounded array is full, or the stream ends. pub fn readIntoBoundedBytes( @@ -274,27 +455,54 @@ pub fn Reader( return mem.readIntNative(T, &bytes); } + /// Peeks a native-endian integer + pub fn peekIntNative(self: Self, comptime T: type) !T { + const bytes = try self.peekBytesNoEof((@typeInfo(T).Int.bits + 7) / 8); + return mem.readIntNative(T, &bytes); + } + /// Reads a foreign-endian integer pub fn readIntForeign(self: Self, comptime T: type) !T { const bytes = try self.readBytesNoEof((@typeInfo(T).Int.bits + 7) / 8); return mem.readIntForeign(T, &bytes); } + /// Peeks a foreign-endian integer + pub fn peeksIntForeign(self: Self, comptime T: type) !T { + const bytes = try self.peekBytesNoEof((@typeInfo(T).Int.bits + 7) / 8); + return mem.readIntForeign(T, &bytes); + } + pub fn readIntLittle(self: Self, comptime T: type) !T { const bytes = try self.readBytesNoEof((@typeInfo(T).Int.bits + 7) / 8); return mem.readIntLittle(T, &bytes); } + pub fn peekIntLittle(self: Self, comptime T: type) !T { + const bytes = try self.peekBytesNoEof((@typeInfo(T).Int.bits + 7) / 8); + return mem.readIntLittle(T, &bytes); + } + pub fn readIntBig(self: Self, comptime T: type) !T { const bytes = try self.readBytesNoEof((@typeInfo(T).Int.bits + 7) / 8); return mem.readIntBig(T, &bytes); } + pub fn peekIntBig(self: Self, comptime T: type) !T { + const bytes = try self.peekBytesNoEof((@typeInfo(T).Int.bits + 7) / 8); + return mem.readIntBig(T, &bytes); + } + pub fn readInt(self: Self, comptime T: type, endian: std.builtin.Endian) !T { const bytes = try self.readBytesNoEof((@typeInfo(T).Int.bits + 7) / 8); return mem.readInt(T, &bytes, endian); } + pub fn peekInt(self: Self, comptime T: type, endian: std.builtin.Endian) !T { + const bytes = try self.peekBytesNoEof((@typeInfo(T).Int.bits + 7) / 8); + return mem.readInt(T, &bytes, endian); + } + pub fn readVarInt(self: Self, comptime ReturnType: type, endian: std.builtin.Endian, size: usize) !ReturnType { assert(size <= @sizeOf(ReturnType)); var bytes_buf: [@sizeOf(ReturnType)]u8 = undefined; @@ -303,6 +511,14 @@ pub fn Reader( return mem.readVarInt(ReturnType, bytes, endian); } + pub fn peekVarInt(self: Self, comptime ReturnType: type, endian: std.builtin.Endian, size: usize) !ReturnType { + assert(size <= @sizeOf(ReturnType)); + var bytes_buf: [@sizeOf(ReturnType)]u8 = undefined; + const bytes = bytes_buf[0..size]; + try self.peekNoEof(bytes); + return mem.readVarInt(ReturnType, bytes, endian); + } + /// Optional parameters for `skipBytes` pub const SkipBytesOptions = struct { buf_size: usize = 512, @@ -341,6 +557,14 @@ pub fn Reader( return res[0]; } + pub fn peekStruct(self: Self, comptime T: type) !T { + // Only extern and packed structs have defined in-memory layout. + comptime assert(@typeInfo(T).Struct.layout != .Auto); + var res: [1]T = undefined; + try self.peekNoEof(mem.sliceAsBytes(res[0..])); + return res[0]; + } + /// Reads an integer with the same size as the given enum's tag type. If the integer matches /// an enum tag, casts the integer to the enum tag and returns it. Otherwise, returns an error. /// TODO optimization taking advantage of most fields being in order @@ -360,6 +584,26 @@ pub fn Reader( return E.InvalidValue; } + + /// Peeks an integer with the same size as the given enum's tag type. If the integer matches + /// an enum tag, casts the integer to the enum tag and returns it. Otherwise, returns an error. + /// TODO optimization taking advantage of most fields being in order + pub fn peekEnum(self: Self, comptime Enum: type, endian: std.builtin.Endian) !Enum { + const E = error{ + /// An integer was read, but it did not match any of the tags in the supplied enum. + InvalidValue, + }; + const type_info = @typeInfo(Enum).Enum; + const tag = try self.peekInt(type_info.tag_type, endian); + + inline for (std.meta.fields(Enum)) |field| { + if (tag == field.value) { + return @field(Enum, field.name); + } + } + + return E.InvalidValue; + } }; } diff --git a/lib/std/io/stream_source.zig b/lib/std/io/stream_source.zig index ce5256028c04..8633f7a267e8 100644 --- a/lib/std/io/stream_source.zig +++ b/lib/std/io/stream_source.zig @@ -25,7 +25,7 @@ pub const StreamSource = union(enum) { pub const SeekError = io.FixedBufferStream([]u8).SeekError || (if (has_file) std.fs.File.SeekError else error{}); pub const GetSeekPosError = io.FixedBufferStream([]u8).GetSeekPosError || (if (has_file) std.fs.File.GetSeekPosError else error{}); - pub const Reader = io.Reader(*StreamSource, ReadError, read); + pub const Reader = io.Reader(*StreamSource, ReadError, read, peek); pub const Writer = io.Writer(*StreamSource, WriteError, write); pub const SeekableStream = io.SeekableStream( *StreamSource, @@ -45,6 +45,14 @@ pub const StreamSource = union(enum) { } } + pub fn peek(self: *StreamSource, dest: []u8) ReadError!usize { + switch (self.*) { + .buffer => |*x| return x.peek(dest), + .const_buffer => |*x| return x.peek(dest), + .file => |x| if (!has_file) unreachable else return x.peek(dest), + } + } + pub fn write(self: *StreamSource, bytes: []const u8) WriteError!usize { switch (self.*) { .buffer => |*x| return x.write(bytes), diff --git a/lib/std/net.zig b/lib/std/net.zig index 21fa36c4eb82..ed7d0f3aa6e6 100644 --- a/lib/std/net.zig +++ b/lib/std/net.zig @@ -1636,7 +1636,7 @@ pub const Stream = struct { pub const ReadError = os.ReadError; pub const WriteError = os.WriteError; - pub const Reader = io.Reader(Stream, ReadError, read); + pub const Reader = io.Reader(Stream, ReadError, read, null); pub const Writer = io.Writer(Stream, WriteError, write); pub fn reader(self: Stream) Reader { diff --git a/lib/std/os/uefi/protocols/file_protocol.zig b/lib/std/os/uefi/protocols/file_protocol.zig index aa419093bf92..7bcd96d115ca 100644 --- a/lib/std/os/uefi/protocols/file_protocol.zig +++ b/lib/std/os/uefi/protocols/file_protocol.zig @@ -24,7 +24,7 @@ pub const FileProtocol = extern struct { pub const WriteError = error{WriteError}; pub const SeekableStream = io.SeekableStream(*const FileProtocol, SeekError, GetSeekPosError, seekTo, seekBy, getPos, getEndPos); - pub const Reader = io.Reader(*const FileProtocol, ReadError, readFn); + pub const Reader = io.Reader(*const FileProtocol, ReadError, readFn, null); pub const Writer = io.Writer(*const FileProtocol, WriteError, writeFn); pub fn seekableStream(self: *FileProtocol) SeekableStream { diff --git a/lib/std/pdb.zig b/lib/std/pdb.zig index 00ce2cc5baab..bc144a00cb54 100644 --- a/lib/std/pdb.zig +++ b/lib/std/pdb.zig @@ -1092,7 +1092,7 @@ const MsfStream = struct { return block * self.block_size + offset; } - pub fn reader(self: *MsfStream) std.io.Reader(*MsfStream, Error, read) { + pub fn reader(self: *MsfStream) std.io.Reader(*MsfStream, Error, read, null) { return .{ .context = self }; } }; diff --git a/lib/std/x/net/tcp.zig b/lib/std/x/net/tcp.zig index a750e27fc9ed..168b3ee20e1b 100644 --- a/lib/std/x/net/tcp.zig +++ b/lib/std/x/net/tcp.zig @@ -116,7 +116,7 @@ pub const Client = struct { } /// Wrap `tcp.Client` into `std.io.Reader`. - pub fn reader(self: Client, flags: u32) io.Reader(Client.Reader, ErrorSetOf(Client.Reader.read), Client.Reader.read) { + pub fn reader(self: Client, flags: u32) io.Reader(Client.Reader, ErrorSetOf(Client.Reader.read), Client.Reader.read, Client.Reader.peek) { return .{ .context = .{ .client = self, .flags = flags } }; } From a0ed6b085bf630f71873f320820c4a893d885b33 Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Fri, 4 Nov 2022 23:26:26 -0400 Subject: [PATCH 2/3] Fix Tests Failing, Implement TCP, FIFO Peek(). Fix BufferedReader error & implement peek. --- lib/std/fifo.zig | 69 +++++++++++++++++++++++++++++++ lib/std/io/buffered_reader.zig | 73 ++++++++++++++++----------------- lib/std/x/net/tcp.zig | 11 +++++ lib/std/x/os/socket_posix.zig | 6 +++ lib/std/x/os/socket_windows.zig | 36 ++++++++++++++++ 5 files changed, 157 insertions(+), 38 deletions(-) diff --git a/lib/std/fifo.zig b/lib/std/fifo.zig index dfafa3c97f47..9f6c1b4ac9a9 100644 --- a/lib/std/fifo.zig +++ b/lib/std/fifo.zig @@ -204,6 +204,14 @@ pub fn LinearFifo( self.discard(1); return c; } + + /// Peek the next item from the fifo + pub fn peekItemNext(self: *Self) ?T { + if (self.count == 0) return null; + + const c = self.buf[self.head]; + return c; + } /// Read data from the fifo into `dst`, returns number of items copied. pub fn read(self: *Self, dst: []T) usize { @@ -220,6 +228,22 @@ pub fn LinearFifo( return dst.len - dst_left.len; } + + pub fn peek(self: *Self, dst: []T) usize { + var dst_left = dst; + + var off : usize = 0; + while (dst_left.len > 0) { + const slice = self.readableSlice(off); + if (slice.len == 0) break; + const n = math.min(slice.len, dst_left.len); + mem.copy(T, dst_left, slice[off..n]); + off += n; + dst_left = dst_left[n..]; + } + + return dst.len - dst_left.len; + } /// Same as `read` except it returns an error union /// The purpose of this function existing is to match `std.io.Reader` API. @@ -525,3 +549,48 @@ test "LinearFifo" { } } } + +test "LinearFifo Peek" { + inline for ([_]type{ u1, u8, u16, u64 }) |T| { + inline for ([_]LinearFifoBufferType{ LinearFifoBufferType{ .Static = 32 }, .Slice, .Dynamic }) |bt| { + const FifoType = LinearFifo(T, bt); + var buf: if (bt == .Slice) [32]T else void = undefined; + var fifo = switch (bt) { + .Static => FifoType.init(), + .Slice => FifoType.init(buf[0..]), + .Dynamic => FifoType.init(testing.allocator), + }; + defer fifo.deinit(); + + try fifo.write(&[_]T{ 0, 1, 1, 0, 1 }); + try testing.expectEqual(@as(usize, 5), fifo.readableLength()); + + { + var a = fifo.peekItemNext().?; + var b = fifo.readItem().?; + + try testing.expectEqual(a, b); + try testing.expectEqual(@as(usize, 4), fifo.readableLength()); + + var c = fifo.peekItemNext().?; + try testing.expectEqual(@as(T, 1), c); + try testing.expectEqual(@as(usize, 4), fifo.readableLength()); + + fifo.discard(fifo.readableLength()); + } + + { + try fifo.writeItem(1); + try fifo.writeItem(1); + try fifo.writeItem(1); + try testing.expectEqual(@as(usize, 3), fifo.readableLength()); + } + + { + var readBuf: [3]T = undefined; + const n = fifo.read(&readBuf); + try testing.expectEqual(@as(usize, 3), n); // NOTE: It should be the number of items. + } + } + } +} \ No newline at end of file diff --git a/lib/std/io/buffered_reader.zig b/lib/std/io/buffered_reader.zig index 8f676e0b3072..726070e573c8 100644 --- a/lib/std/io/buffered_reader.zig +++ b/lib/std/io/buffered_reader.zig @@ -14,49 +14,46 @@ pub fn BufferedReader(comptime buffer_size: usize, comptime ReaderType: type) ty const Self = @This(); const FifoType = std.fifo.LinearFifo(u8, std.fifo.LinearFifoBufferType{ .Static = buffer_size }); - fn read(s: *Self, b: []u8) Error!usize { - var n: usize = 0; // amount read - - if (s.cur >= s.l) { - return 0; - } - n = b.len; - var cur = s.cur + n; - if (cur > s.l) { - n -= cur - s.l; - cur = s.l; - } - for (b[0..n]) |_, i| { - if (s.cur + i >= s.l -| (1 << 16)) { - b[i] = 1; - } else { - b[i] = 0; + pub fn read(self: *Self, dest: []u8) Error!usize { + var dest_index: usize = 0; + while (dest_index < dest.len) { + const written = self.fifo.read(dest[dest_index..]); + if (written == 0) { + // fifo empty, fill it + const writable = self.fifo.writableSlice(0); + assert(writable.len > 0); + const n = try self.unbuffered_reader.read(writable); + if (n == 0) { + // reading from the unbuffered stream returned nothing + // so we have nothing left to read. + return dest_index; + } + self.fifo.update(n); } + dest_index += written; } - s.cur = cur; - return n; + return dest.len; } - fn peek(s: *Self, b: []u8) Error!usize { - var n: usize = 0; // amount read - - if (s.cur >= s.l) { - return 0; - } - n = b.len; - var cur = s.cur + n; - if (cur > s.l) { - n -= cur - s.l; - cur = s.l; - } - for (b[0..n]) |_, i| { - if (s.cur + i >= s.l -| (1 << 16)) { - b[i] = 1; - } else { - b[i] = 0; + fn peek(self: *Self, dest: []u8) Error!usize { + var dest_index: usize = 0; + while (dest_index < dest.len) { + const written = self.fifo.peek(dest[dest_index..]); + if (written == 0) { + // fifo empty, fill it + const writable = self.fifo.writableSlice(0); + assert(writable.len > 0); + const n = try self.unbuffered_reader.peek(writable); + if (n == 0) { + // reading from the unbuffered stream returned nothing + // so we have nothing left to read. + return dest_index; + } + self.fifo.update(n); } + dest_index += written; } - return n; + return dest.len; } pub fn reader(self: *Self) Reader { @@ -76,7 +73,7 @@ test "io.BufferedReader" { const Error = error{NoError}; const Self = @This(); - const Reader = io.Reader(*Self, Error, read); + const Reader = io.Reader(*Self, Error, read, null); fn init(str: []const u8) Self { return Self{ diff --git a/lib/std/x/net/tcp.zig b/lib/std/x/net/tcp.zig index 168b3ee20e1b..4d55425f4103 100644 --- a/lib/std/x/net/tcp.zig +++ b/lib/std/x/net/tcp.zig @@ -64,6 +64,11 @@ pub const Client = struct { pub fn read(self: Client.Reader, buffer: []u8) !usize { return self.client.read(buffer, self.flags); } + + /// Implements `peekFn` for `std.io.Reader`. + pub fn peek(self: Client.Reader, buffer: []u8) !usize { + return self.client.peek(buffer, self.flags); + } }; /// Implements `std.io.Writer`. @@ -131,6 +136,12 @@ pub const Client = struct { return self.socket.read(buf, flags); } + /// Read data from the socket into the buffer provided with a set of flags + /// specified. It returns the number of bytes read into the buffer provided. + pub fn peek(self: Client, buf: []u8, flags: u32) !usize { + return self.socket.peek(buf, flags); + } + /// Write a buffer of data provided to the socket with a set of flags specified. /// It returns the number of bytes that are written to the socket. pub fn write(self: Client, buf: []const u8, flags: u32) !usize { diff --git a/lib/std/x/os/socket_posix.zig b/lib/std/x/os/socket_posix.zig index 859075aa20bd..ea34bf05b119 100644 --- a/lib/std/x/os/socket_posix.zig +++ b/lib/std/x/os/socket_posix.zig @@ -62,6 +62,12 @@ pub fn Mixin(comptime Socket: type) type { pub fn read(self: Socket, buf: []u8, flags: u32) !usize { return os.recv(self.fd, buf, flags); } + + /// Peek data from the socket into the buffer provided with a set of flags + /// specified. It returns the number of bytes read into the buffer provided. + pub fn peek(self: Socket, buf: []u8, flags: u32) !usize { + return os.recv(self.fd, buf, flags | os.MSG.PEEK); + } /// Write a buffer of data provided to the socket with a set of flags specified. /// It returns the number of bytes that are written to the socket. diff --git a/lib/std/x/os/socket_windows.zig b/lib/std/x/os/socket_windows.zig index dc6d27c05016..892b99b30bdc 100644 --- a/lib/std/x/os/socket_windows.zig +++ b/lib/std/x/os/socket_windows.zig @@ -210,6 +210,42 @@ pub fn Mixin(comptime Socket: type) type { return @intCast(usize, num_bytes); } + /// Peek data from the socket into the buffer provided with a set of flags + /// specified. It returns the number of bytes read into the buffer provided. + pub fn peek(self: Socket, buf: []u8, flags: u32) !usize { + var bufs = &[_]ws2_32.WSABUF{.{ .len = @intCast(u32, buf.len), .buf = buf.ptr }}; + var num_bytes: u32 = undefined; + var flags_ = flags | ws2_32.MSG.PEEK; + + const rc = ws2_32.WSARecv(self.fd, bufs, 1, &num_bytes, &flags_, null, null); + if (rc == ws2_32.SOCKET_ERROR) { + return switch (ws2_32.WSAGetLastError()) { + .WSAECONNABORTED => error.ConnectionAborted, + .WSAECONNRESET => error.ConnectionResetByPeer, + .WSAEDISCON => error.ConnectionClosedByPeer, + .WSAEFAULT => error.BadBuffer, + .WSAEINPROGRESS, + .WSAEWOULDBLOCK, + .WSA_IO_PENDING, + .WSAETIMEDOUT, + => error.WouldBlock, + .WSAEINTR => error.Cancelled, + .WSAEINVAL => error.SocketNotBound, + .WSAEMSGSIZE => error.MessageTooLarge, + .WSAENETDOWN => error.NetworkSubsystemFailed, + .WSAENETRESET => error.NetworkReset, + .WSAENOTCONN => error.SocketNotConnected, + .WSAENOTSOCK => error.FileDescriptorNotASocket, + .WSAEOPNOTSUPP => error.OperationNotSupported, + .WSAESHUTDOWN => error.AlreadyShutdown, + .WSA_OPERATION_ABORTED => error.OperationAborted, + else => |err| windows.unexpectedWSAError(err), + }; + } + + return @intCast(usize, num_bytes); + } + /// Write a buffer of data provided to the socket with a set of flags specified. /// It returns the number of bytes that are written to the socket. pub fn write(self: Socket, buf: []const u8, flags: u32) !usize { From c2fa2d5a13b80abcffe24f2886020e8b599d8f7e Mon Sep 17 00:00:00 2001 From: Nathan Bourgeois Date: Fri, 4 Nov 2022 23:58:15 -0400 Subject: [PATCH 3/3] Fix formatting error --- lib/std/fifo.zig | 8 ++++---- lib/std/x/os/socket_posix.zig | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/std/fifo.zig b/lib/std/fifo.zig index 9f6c1b4ac9a9..1174d46eadea 100644 --- a/lib/std/fifo.zig +++ b/lib/std/fifo.zig @@ -204,7 +204,7 @@ pub fn LinearFifo( self.discard(1); return c; } - + /// Peek the next item from the fifo pub fn peekItemNext(self: *Self) ?T { if (self.count == 0) return null; @@ -228,11 +228,11 @@ pub fn LinearFifo( return dst.len - dst_left.len; } - + pub fn peek(self: *Self, dst: []T) usize { var dst_left = dst; - var off : usize = 0; + var off: usize = 0; while (dst_left.len > 0) { const slice = self.readableSlice(off); if (slice.len == 0) break; @@ -593,4 +593,4 @@ test "LinearFifo Peek" { } } } -} \ No newline at end of file +} diff --git a/lib/std/x/os/socket_posix.zig b/lib/std/x/os/socket_posix.zig index ea34bf05b119..dee52fcdf8af 100644 --- a/lib/std/x/os/socket_posix.zig +++ b/lib/std/x/os/socket_posix.zig @@ -62,7 +62,7 @@ pub fn Mixin(comptime Socket: type) type { pub fn read(self: Socket, buf: []u8, flags: u32) !usize { return os.recv(self.fd, buf, flags); } - + /// Peek data from the socket into the buffer provided with a set of flags /// specified. It returns the number of bytes read into the buffer provided. pub fn peek(self: Socket, buf: []u8, flags: u32) !usize {