From ce71c29faf54c1b9949a43bfa4fa67f79baaeb0e Mon Sep 17 00:00:00 2001 From: Kendall Condon <goon.pri.low@gmail.com> Date: Sun, 4 May 2025 14:54:54 -0400 Subject: [PATCH 1/3] add fuzz test for zig fmt Adds a new fuzz test for zig fmt. This fuzz test checks that Ast.render succeeds for parsed inputs. Additionally, for inputs it knows that Ast.render cannot change the order of, it checks that they are not rewritten. This fuzz test has been very successful. Using #23416, it has found three bugs (one was a TODO); two of them I have fixed and the other is in #23754. I have run the test for 650,000,000 iterations (about 2 hours) and haven't found any more bugs. Some functions in the fuzz test have instrumentation disabled because their branches are not interesting to the fuzzer; doing this found a ~40% boost to the runs per second. Additionally, the fuzz test handles tokenization itself since so it can determine if the input can be rewritten. --- lib/std/zig/parser_test.zig | 131 ++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index e5961621bdd5..848d42b6c36f 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -6528,3 +6528,134 @@ fn testError(source: [:0]const u8, expected_errors: []const Error) !void { try std.testing.expectEqual(expected, tree.errors[i].tag); } } + +test "fuzz zig fmt" { + try std.testing.fuzz({}, fuzzTestOneRender, .{}); +} + +fn parseTokens( + fba: std.mem.Allocator, + source: [:0]const u8, +) error{ Invalid, OutOfMemory }!struct { + toks: std.zig.Ast.TokenList, + maybe_rewriteable: bool, +} { + @disableInstrumentation(); + // Byte-order marker can be stripped + var maybe_rewriteable: bool = std.mem.startsWith(u8, source, "\xEF\xBB\xBF"); + + var tokens: std.zig.Ast.TokenList = .{}; + try tokens.ensureTotalCapacity(fba, source.len / 2); + var tokenizer: std.zig.Tokenizer = .init(source); + while (true) { + const tok = tokenizer.next(); + switch (tok.tag) { + .invalid, + .invalid_periodasterisks, + => return error.Invalid, + // Extra colons can be removed + .keyword_asm, + // Qualifiers can be reordered + // keyword_const is intentionally excluded since it is used in other contexts and + // having only one qualifier will never lead to reordering. + .keyword_addrspace, + .keyword_align, + .keyword_allowzero, + .keyword_callconv, + .keyword_linksection, + .keyword_volatile, + => maybe_rewriteable = true, + // Labeled statements can sometimes be (questionably) rewritten due to ambigous grammer + // ex: `O: for (x) |T| (break O: T)` -> `O: O: for (x) |T| (break :O T)` + .keyword_for, + .keyword_while, + .l_brace, + => { + const tags = tokens.items(.tag); + maybe_rewriteable = maybe_rewriteable or (tags.len >= 2 and + tags[tags.len - 2] == .identifier and tags[tags.len - 1] == .colon); + }, + // #23754 + .container_doc_comment => maybe_rewriteable = true, + // Quoted identifiers can be unquoted + .identifier => maybe_rewriteable = maybe_rewriteable or source[tok.loc.start] == '@', + else => {}, + } + try tokens.append(fba, .{ + .tag = tok.tag, + .start = @intCast(tok.loc.start), + }); + if (tok.tag == .eof) break; + } + return .{ + .toks = tokens, + .maybe_rewriteable = maybe_rewriteable, + }; +} + +fn parseAstFromTokens( + fba: std.mem.Allocator, + source: [:0]const u8, + toks: std.zig.Ast.TokenList, +) error{OutOfMemory}!std.zig.Ast { + var parser: @import("Parse.zig") = .{ + .source = source, + .gpa = fba, + .tokens = toks.slice(), + .errors = .{}, + .nodes = .{}, + .extra_data = .{}, + .scratch = .{}, + .tok_i = 0, + }; + try parser.nodes.ensureTotalCapacity(fba, 1 + toks.len / 2); + try parser.parseRoot(); + return .{ + .source = source, + .mode = .zig, + .tokens = parser.tokens, + .nodes = parser.nodes.slice(), + .extra_data = parser.extra_data.items, + .errors = parser.errors.items, + }; +} + +/// Checks equivelence of non-whitespace characters +/// If there are commas in `bytes`, then it is checked they are also present in `rendered`. Extra +/// commas in `rendered` are considered equivelent. +fn isRewritten(bytes: []const u8, rendered: []const u8) bool { + @disableInstrumentation(); + var i: usize = 0; + for (bytes) |c| switch (c) { + ' ', '\r', '\t', '\n' => {}, + else => while (true) { + if (i == rendered.len) return true; + defer i += 1; + switch (rendered[i]) { + ' ', '\r', '\n' => {}, + ',' => if (c == ',') break, + else => |n| if (c != n) return false else break, + } + }, + }; + for (rendered[i..]) |c| switch (c) { + ' ', '\n', ',' => {}, + else => return true, + }; + return false; +} + +fn fuzzTestOneRender(_: void, bytes: []const u8) anyerror!void { + if (bytes.len < 2) return; + const mem_limit: u16 = @bitCast(bytes[0..2].*); + + var fba_ctx = std.heap.FixedBufferAllocator.init(fixed_buffer_mem[0..mem_limit]); + const fba = fba_ctx.allocator(); + const source = fba.dupeZ(u8, bytes[2..]) catch return; + const toks = parseTokens(fba, source) catch return; + const tree = parseAstFromTokens(fba, source, toks.toks) catch return; + if (tree.errors.len != 0) return; + + const rendered = tree.render(fba) catch return; + if (!toks.maybe_rewriteable and isRewritten(source, rendered)) return error.TestFailed; +} From d36cbe3ac211a05fdae7270611f67f2cf7b2b943 Mon Sep 17 00:00:00 2001 From: Kendall Condon <goon.pri.low@gmail.com> Date: Sun, 4 May 2025 15:33:04 -0400 Subject: [PATCH 2/3] zig fmt: seperate errors in error sets with comments --- lib/std/zig/parser_test.zig | 31 ++++++++++++++++ lib/std/zig/render.zig | 73 ++++++++++++++++++++++--------------- 2 files changed, 74 insertions(+), 30 deletions(-) diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index 848d42b6c36f..807406adcede 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -6107,6 +6107,37 @@ test "zig fmt: indentation of comments within catch, else, orelse" { ); } +test "zig fmt: seperate errors in error sets with comments" { + try testTransform( + \\error{ + \\ /// This error is very bad! + \\ A, B} + \\ + , + \\error{ + \\ /// This error is very bad! + \\ A, + \\ B, + \\} + \\ + ); + + try testTransform( + \\error{ + \\ A, B + \\ // something important + \\} + \\ + , + \\error{ + \\ A, + \\ B, + \\ // something important + \\} + \\ + ); +} + test "recovery: top level" { try testError( \\test "" {inline} diff --git a/lib/std/zig/render.zig b/lib/std/zig/render.zig index 586bb9696b08..ee3315f4f185 100644 --- a/lib/std/zig/render.zig +++ b/lib/std/zig/render.zig @@ -753,39 +753,52 @@ fn renderExpression(r: *Render, node: Ast.Node.Index, space: Space) Error!void { try renderToken(r, lbrace, .none); try renderIdentifier(r, lbrace + 1, .none, .eagerly_unquote); // identifier return renderToken(r, rbrace, space); - } else if (tree.tokenTag(rbrace - 1) == .comma) { - // There is a trailing comma so render each member on a new line. - try ais.pushIndent(.normal); - try renderToken(r, lbrace, .newline); - var i = lbrace + 1; - while (i < rbrace) : (i += 1) { - if (i > lbrace + 1) try renderExtraNewlineToken(r, i); - switch (tree.tokenTag(i)) { - .doc_comment => try renderToken(r, i, .newline), - .identifier => { - try ais.pushSpace(.comma); - try renderIdentifier(r, i, .comma, .eagerly_unquote); - ais.popSpace(); - }, - .comma => {}, - else => unreachable, - } - } - ais.popIndent(); - return renderToken(r, rbrace, space); } else { - // There is no trailing comma so render everything on one line. - try renderToken(r, lbrace, .space); - var i = lbrace + 1; - while (i < rbrace) : (i += 1) { - switch (tree.tokenTag(i)) { - .doc_comment => unreachable, // TODO - .identifier => try renderIdentifier(r, i, .comma_space, .eagerly_unquote), - .comma => {}, - else => unreachable, + // If there is a trailing comma, comment, or document comment, then render each + // item on its own line. + const multi_line = tree.tokenTag(rbrace - 1) == .comma or + hasComment(tree, lbrace, rbrace) or + blk: { + var i = lbrace + 1; + break :blk while (i < rbrace) : (i += 1) { + if (tree.tokenTag(i) == .doc_comment) + break true; + } else false; + }; + + if (multi_line) { + // There is a trailing comma so render each member on a new line. + try ais.pushIndent(.normal); + try renderToken(r, lbrace, .newline); + var i = lbrace + 1; + while (i < rbrace) : (i += 1) { + if (i > lbrace + 1) try renderExtraNewlineToken(r, i); + switch (tree.tokenTag(i)) { + .doc_comment => try renderToken(r, i, .newline), + .identifier => { + try ais.pushSpace(.comma); + try renderIdentifier(r, i, .comma, .eagerly_unquote); + ais.popSpace(); + }, + .comma => {}, + else => unreachable, + } } + ais.popIndent(); + return renderToken(r, rbrace, space); + } else { + // There is no trailing comma so render everything on one line. + try renderToken(r, lbrace, .space); + var i = lbrace + 1; + while (i < rbrace) : (i += 1) { + switch (tree.tokenTag(i)) { + .identifier => try renderIdentifier(r, i, .comma_space, .eagerly_unquote), + .comma => {}, + else => unreachable, + } + } + return renderToken(r, rbrace, space); } - return renderToken(r, rbrace, space); } }, From 270f4ac9047ebf009522b843898de4191e072de5 Mon Sep 17 00:00:00 2001 From: Kendall Condon <goon.pri.low@gmail.com> Date: Sun, 4 May 2025 15:33:49 -0400 Subject: [PATCH 3/3] zig fmt: properly check escape sequences --- lib/std/zig/parser_test.zig | 10 ++++++++++ lib/std/zig/render.zig | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index 807406adcede..3ea07efa1411 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -6138,6 +6138,16 @@ test "zig fmt: seperate errors in error sets with comments" { ); } +test "zig fmt: proper escape checks" { + try testTransform( + \\@"\x41\x42\!" + \\ + , + \\@"AB\!" + \\ + ); +} + test "recovery: top level" { try testError( \\test "" {inline} diff --git a/lib/std/zig/render.zig b/lib/std/zig/render.zig index ee3315f4f185..b46845cb02d4 100644 --- a/lib/std/zig/render.zig +++ b/lib/std/zig/render.zig @@ -2821,7 +2821,7 @@ fn renderIdentifier(r: *Render, token_index: Ast.TokenIndex, space: Space, quote }, .failure => return renderQuotedIdentifier(r, token_index, space, false), } - contents_i += esc_offset; + contents_i = esc_offset; continue; }, else => return renderQuotedIdentifier(r, token_index, space, false),