From d3d2127032357e2980bf781b5f5ab040b58c7f46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sta=C5=9B=20Ma=C5=82olepszy?= Date: Thu, 28 Jun 2018 17:02:42 +0200 Subject: [PATCH 1/2] Parse Comments as standalone if they're attached to Junk Syntax 0.6 changes the parsing logic of Comments. Comments attached to what ends up being Junk are not part of Junk anymore. Instead, they end up as standalone Comments in the final Resource. This PR also changes the behavior of FluentParser.parseEntry() to ignore all valid comments and only start parsing when a Message or a Term is encountered. Comments with syntax errors are not skipped, however. --- fluent-syntax/src/ftlstream.js | 8 +- fluent-syntax/src/parser.js | 76 ++++++--- fluent-syntax/test/entry_test.js | 157 +++++++++++++----- .../test/fixtures_structure/leading_dots.json | 70 ++++++-- 4 files changed, 235 insertions(+), 76 deletions(-) diff --git a/fluent-syntax/src/ftlstream.js b/fluent-syntax/src/ftlstream.js index 8786c8222..77c9e6113 100644 --- a/fluent-syntax/src/ftlstream.js +++ b/fluent-syntax/src/ftlstream.js @@ -28,15 +28,21 @@ export class FTLParserStream extends ParserStream { } skipBlankLines() { + // Many Parser methods leave the cursor at the line break + // without going into the next line. We want to count fully blank lines in + // this case. Starting the count at -1 will give the right number. + let lineCount = this.currentIs("\n") ? -1 : 0; + while (true) { this.peekInlineWS(); if (this.currentPeekIs("\n")) { this.skipToPeek(); this.next(); + lineCount++; } else { this.resetPeek(); - break; + return lineCount; } } } diff --git a/fluent-syntax/src/parser.js b/fluent-syntax/src/parser.js index 015a06fca..3077c719f 100644 --- a/fluent-syntax/src/parser.js +++ b/fluent-syntax/src/parser.js @@ -59,9 +59,32 @@ export default class FluentParser { while (ps.current()) { const entry = this.getEntryOrJunk(ps); + const blankLines = ps.skipBlankLines(); + + // Regular Comments require special logic. Comments may be attached to + // Messages or Terms if they are followed immediately by them. However + // they should parse as standalone when they're followed by Junk. + // Consequently, we only attach them once we know that the Message or the + // Term parsed successfully. + if (entry.type === "Comment" && blankLines === 0 && ps.current()) { + const next = this.getEntryOrJunk(ps); + ps.skipBlankLines(); + + if (next.type === "Message" || next.type === "Term") { + // Attach it. + next.comment = entry; + entries.push(next); + } else { + // entry is a standalone Comment and next is Junk, GroupComment or + // ResourceComment. Push both independently of each other. + entries.push(entry, next); + } + + continue; + } + // No special logic for other types of entries. entries.push(entry); - ps.skipBlankLines(); } const res = new AST.Resource(entries); @@ -73,9 +96,28 @@ export default class FluentParser { return res; } + /* + * Parse the first Message or Term in `source`. + * + * Skip all encountered comments and start parsing at the first Message or + * Term start. Return Junk if the parsing is not successful. + * + * Preceding comments are ignored unless they contain syntax errors + * themselves, in which case Junk for the invalid comment is returned. + */ parseEntry(source) { const ps = new FTLParserStream(source); ps.skipBlankLines(); + + while (ps.currentIs("#")) { + const skipped = this.getEntryOrJunk(ps); + if (skipped.type === "Junk") { + // Don't skip Junk comments. + return skipped; + } + ps.skipBlankLines(); + } + return this.getEntryOrJunk(ps); } @@ -107,34 +149,16 @@ export default class FluentParser { } getEntry(ps) { - let comment; - if (ps.currentIs("#")) { - comment = this.getComment(ps); - - // The Comment content doesn't include the trailing newline. Consume - // this newline here to be ready for the next entry. undefined stands - // for EOF. - ps.expectChar(ps.current() ? "\n" : undefined); - - if (comment.type === "GroupComment" - || comment.type === "ResourceComment") { - // Group and Resource comments are always standalone. - return comment; - } + return this.getComment(ps); } if (ps.currentIs("-")) { - return this.getTerm(ps, comment); + return this.getTerm(ps); } if (ps.isIdentifierStart()) { - return this.getMessage(ps, comment); - } - - if (comment) { - // It's a standalone Comment. - return comment; + return this.getMessage(ps); } throw new ParseError("E0002"); @@ -189,7 +213,7 @@ export default class FluentParser { return new Comment(content); } - getMessage(ps, comment) { + getMessage(ps) { const id = this.getIdentifier(ps); ps.skipInlineWS(); @@ -210,10 +234,10 @@ export default class FluentParser { throw new ParseError("E0005", id.name); } - return new AST.Message(id, pattern, attrs, comment); + return new AST.Message(id, pattern, attrs); } - getTerm(ps, comment) { + getTerm(ps) { const id = this.getTermIdentifier(ps); ps.skipInlineWS(); @@ -230,7 +254,7 @@ export default class FluentParser { var attrs = this.getAttributes(ps); } - return new AST.Term(id, pattern, attrs, comment); + return new AST.Term(id, pattern, attrs); } getAttribute(ps) { diff --git a/fluent-syntax/test/entry_test.js b/fluent-syntax/test/entry_test.js index dce200f6b..6f3f18a32 100644 --- a/fluent-syntax/test/entry_test.js +++ b/fluent-syntax/test/entry_test.js @@ -5,7 +5,7 @@ import { FluentParser, FluentSerializer } from '../src'; suite('Parse entry', function() { setup(function() { - this.parser = new FluentParser(); + this.parser = new FluentParser({withSpans: false}); }); test('simple message', function() { @@ -14,47 +14,141 @@ suite('Parse entry', function() { `; const output = { "comment": null, - "span": { - "start": 0, - "end": 9, - "type": "Span" + "value": { + "elements": [ + { + "type": "TextElement", + "value": "Foo" + } + ], + "type": "Pattern" }, + "annotations": [], + "attributes": [], + "type": "Message", + "id": { + "type": "Identifier", + "name": "foo" + } + }; + + const message = this.parser.parseEntry(input) + assert.deepEqual(message, output) + }); + + test('ignore attached comment', function() { + const input = ftl` + # Attached Comment + foo = Foo + `; + const output = { + "comment": null, "value": { "elements": [ { "type": "TextElement", - "value": "Foo", - "span": { - "start": 6, - "end": 9, - "type": "Span" - } + "value": "Foo" } ], - "type": "Pattern", - "span": { - "start": 6, - "end": 9, - "type": "Span" - } + "type": "Pattern" }, "annotations": [], "attributes": [], "type": "Message", "id": { "type": "Identifier", - "name": "foo", - "span": { - "start": 0, - "end": 3, - "type": "Span" + "name": "foo" + } + }; + + const message = this.parser.parseEntry(input) + assert.deepEqual(message, output) + }); + + test('return junk', function() { + const input = ftl` + # Attached Comment + junk + `; + const output = { + "content": "junk\n", + "annotations": [ + { + "args": ["="], + "code": "E0003", + "message": "Expected token: \"=\"", + "span": { + "end": 23, + "start": 23, + "type": "Span" + }, + "type": "Annotation" } + ], + "type": "Junk" + }; + + const message = this.parser.parseEntry(input) + assert.deepEqual(message, output) + }); + + test('ignore all valid comments', function() { + const input = ftl` + # Attached Comment + ## Group Comment + ### Resource Comment + foo = Foo + `; + const output = { + "comment": null, + "value": { + "elements": [ + { + "type": "TextElement", + "value": "Foo" + } + ], + "type": "Pattern" + }, + "annotations": [], + "attributes": [], + "type": "Message", + "id": { + "type": "Identifier", + "name": "foo" } }; const message = this.parser.parseEntry(input) assert.deepEqual(message, output) }); + + test('do not ignore invalid comments', function() { + const input = ftl` + # Attached Comment + ##Invalid Comment + `; + const output = { + "content": "##Invalid Comment\n", + "annotations": [ + { + "args": [" "], + "code": "E0003", + "message": "Expected token: \" \"", + "span": { + "end": 21, + "start": 21, + "type": "Span" + }, + "type": "Annotation" + } + ], + "type": "Junk" + }; + + const message = this.parser.parseEntry(input) + assert.deepEqual(message, output) + }); }); @@ -66,11 +160,6 @@ suite('Serialize entry', function() { test('simple message', function() { const input = { "comment": null, - "span": { - "start": 0, - "end": 9, - "type": "Span" - }, "value": { "elements": [ { @@ -78,24 +167,14 @@ suite('Serialize entry', function() { "value": "Foo" } ], - "type": "Pattern", - "span": { - "start": 6, - "end": 9, - "type": "Span" - } + "type": "Pattern" }, "annotations": [], "attributes": [], "type": "Message", "id": { "type": "Identifier", - "name": "foo", - "span": { - "start": 0, - "end": 3, - "type": "Span" - } + "name": "foo" } }; const output = ftl` diff --git a/fluent-syntax/test/fixtures_structure/leading_dots.json b/fluent-syntax/test/fixtures_structure/leading_dots.json index bc137038d..ce0078252 100644 --- a/fluent-syntax/test/fixtures_structure/leading_dots.json +++ b/fluent-syntax/test/fixtures_structure/leading_dots.json @@ -321,6 +321,16 @@ "end": 140 } }, + { + "type": "Comment", + "annotations": [], + "content": "ERROR (attr .Continued must have a value)", + "span": { + "type": "Span", + "start": 142, + "end": 185 + } + }, { "type": "Junk", "annotations": [ @@ -338,13 +348,23 @@ } } ], - "content": "# ERROR (attr .Continued must have a value)\nkey07 = Value\n .Continued\n\n", + "content": "key07 = Value\n .Continued\n\n", "span": { "type": "Span", - "start": 142, + "start": 186, "end": 216 } }, + { + "type": "Comment", + "annotations": [], + "content": "ERROR (attr .Value must have a value)", + "span": { + "type": "Span", + "start": 216, + "end": 255 + } + }, { "type": "Junk", "annotations": [ @@ -362,13 +382,23 @@ } } ], - "content": "# ERROR (attr .Value must have a value)\nkey08 =\n .Value\n\n", + "content": "key08 =\n .Value\n\n", "span": { "type": "Span", - "start": 216, + "start": 256, "end": 276 } }, + { + "type": "Comment", + "annotations": [], + "content": "ERROR (attr .Value must have a value)", + "span": { + "type": "Span", + "start": 276, + "end": 315 + } + }, { "type": "Junk", "annotations": [ @@ -386,10 +416,10 @@ } } ], - "content": "# ERROR (attr .Value must have a value)\nkey09 =\n .Value\n Continued\n\n", + "content": "key09 =\n .Value\n Continued\n\n", "span": { "type": "Span", - "start": 276, + "start": 316, "end": 350 } }, @@ -846,6 +876,16 @@ "end": 685 } }, + { + "type": "Comment", + "annotations": [], + "content": "ERROR (variant must have a value)", + "span": { + "type": "Span", + "start": 687, + "end": 722 + } + }, { "type": "Junk", "annotations": [ @@ -861,13 +901,23 @@ } } ], - "content": "# ERROR (variant must have a value)\nkey16 =\n { 1 ->\n *[one]\n .Value\n }\n\n", + "content": "key16 =\n { 1 ->\n *[one]\n .Value\n }\n\n", "span": { "type": "Span", - "start": 687, + "start": 723, "end": 781 } }, + { + "type": "Comment", + "annotations": [], + "content": "ERROR (unclosed placeable)", + "span": { + "type": "Span", + "start": 781, + "end": 809 + } + }, { "type": "Junk", "annotations": [ @@ -885,10 +935,10 @@ } } ], - "content": "# ERROR (unclosed placeable)\nkey17 =\n { 1 ->\n *[one] Value\n .Continued\n }\n", + "content": "key17 =\n { 1 ->\n *[one] Value\n .Continued\n }\n", "span": { "type": "Span", - "start": 781, + "start": 810, "end": 877 } } From 11b96ed78e9e2b855a42cc812c799818ae4431c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sta=C5=9B=20Ma=C5=82olepszy?= Date: Mon, 2 Jul 2018 16:12:08 +0200 Subject: [PATCH 2/2] Use lastComment to stash attached Comments --- fluent-syntax/src/parser.js | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/fluent-syntax/src/parser.js b/fluent-syntax/src/parser.js index 3077c719f..33c44f8be 100644 --- a/fluent-syntax/src/parser.js +++ b/fluent-syntax/src/parser.js @@ -56,6 +56,7 @@ export default class FluentParser { ps.skipBlankLines(); const entries = []; + let lastComment = null; while (ps.current()) { const entry = this.getEntryOrJunk(ps); @@ -64,23 +65,22 @@ export default class FluentParser { // Regular Comments require special logic. Comments may be attached to // Messages or Terms if they are followed immediately by them. However // they should parse as standalone when they're followed by Junk. - // Consequently, we only attach them once we know that the Message or the - // Term parsed successfully. + // Consequently, we only attach Comments once we know that the Message + // or the Term parsed successfully. if (entry.type === "Comment" && blankLines === 0 && ps.current()) { - const next = this.getEntryOrJunk(ps); - ps.skipBlankLines(); + // Stash the comment and decide what to do with it in the next pass. + lastComment = entry; + continue; + } - if (next.type === "Message" || next.type === "Term") { - // Attach it. - next.comment = entry; - entries.push(next); + if (lastComment) { + if (entry.type === "Message" || entry.type === "Term") { + entry.comment = lastComment; } else { - // entry is a standalone Comment and next is Junk, GroupComment or - // ResourceComment. Push both independently of each other. - entries.push(entry, next); + entries.push(lastComment); } - - continue; + // In either case, the stashed comment has been dealt with; clear it. + lastComment = null; } // No special logic for other types of entries.