projectfluent · stasm · Jul 2, 2018 · Jun 28, 2018 · Jul 2, 2018
diff --git a/fluent-syntax/src/ftlstream.js b/fluent-syntax/src/ftlstream.js
@@ -28,15 +28,21 @@ export class FTLParserStream extends ParserStream {
   }
 
   skipBlankLines() {
+    // Many Parser methods leave the cursor at the line break
+    // without going into the next line. We want to count fully blank lines in
+    // this case. Starting the count at -1 will give the right number.
+    let lineCount = this.currentIs("\n") ? -1 : 0;
+
     while (true) {
       this.peekInlineWS();
 
       if (this.currentPeekIs("\n")) {
         this.skipToPeek();
         this.next();
+        lineCount++;
       } else {
         this.resetPeek();
-        break;
+        return lineCount;
       }
     }
   }

diff --git a/fluent-syntax/src/parser.js b/fluent-syntax/src/parser.js
@@ -56,12 +56,35 @@ export default class FluentParser {
     ps.skipBlankLines();
 
     const entries = [];
+    let lastComment = null;
 
     while (ps.current()) {
       const entry = this.getEntryOrJunk(ps);
+      const blankLines = ps.skipBlankLines();
+
+      // Regular Comments require special logic. Comments may be attached to
+      // Messages or Terms if they are followed immediately by them. However
+      // they should parse as standalone when they're followed by Junk.
+      // Consequently, we only attach Comments once we know that the Message
+      // or the Term parsed successfully.
+      if (entry.type === "Comment" && blankLines === 0 && ps.current()) {
+        // Stash the comment and decide what to do with it in the next pass.
+        lastComment = entry;
+        continue;
+      }
 
+      if (lastComment) {
+        if (entry.type === "Message" || entry.type === "Term") {
+          entry.comment = lastComment;
+        } else {
+          entries.push(lastComment);
+        }
+        // In either case, the stashed comment has been dealt with; clear it.
+        lastComment = null;
+      }
+
+      // No special logic for other types of entries.
       entries.push(entry);
-      ps.skipBlankLines();
     }
 
     const res = new AST.Resource(entries);
@@ -73,9 +96,28 @@ export default class FluentParser {
     return res;
   }
 
+  /*
+   * Parse the first Message or Term in `source`.
+   *
+   * Skip all encountered comments and start parsing at the first Message or
+   * Term start. Return Junk if the parsing is not successful.
+   *
+   * Preceding comments are ignored unless they contain syntax errors
+   * themselves, in which case Junk for the invalid comment is returned.
+   */
   parseEntry(source) {
     const ps = new FTLParserStream(source);
     ps.skipBlankLines();
+
+    while (ps.currentIs("#")) {
+      const skipped = this.getEntryOrJunk(ps);
+      if (skipped.type === "Junk") {
+        // Don't skip Junk comments.
+        return skipped;
+      }
+      ps.skipBlankLines();
+    }
+
     return this.getEntryOrJunk(ps);
   }
 
@@ -107,34 +149,16 @@ export default class FluentParser {
   }
 
   getEntry(ps) {
-    let comment;
-
     if (ps.currentIs("#")) {
-      comment = this.getComment(ps);
-
-      // The Comment content doesn't include the trailing newline. Consume
-      // this newline here to be ready for the next entry.  undefined stands
-      // for EOF.
-      ps.expectChar(ps.current() ? "\n" : undefined);
-
-      if (comment.type === "GroupComment"
-        || comment.type === "ResourceComment") {
-        // Group and Resource comments are always standalone.
-        return comment;
-      }
+      return this.getComment(ps);
     }
 
     if (ps.currentIs("-")) {
-      return this.getTerm(ps, comment);
+      return this.getTerm(ps);
     }
 
     if (ps.isIdentifierStart()) {
-      return this.getMessage(ps, comment);
-    }
-
-    if (comment) {
-      // It's a standalone Comment.
-      return comment;
+      return this.getMessage(ps);
     }
 
     throw new ParseError("E0002");
@@ -189,7 +213,7 @@ export default class FluentParser {
     return new Comment(content);
   }
 
-  getMessage(ps, comment) {
+  getMessage(ps) {
     const id = this.getIdentifier(ps);
 
     ps.skipInlineWS();
@@ -210,10 +234,10 @@ export default class FluentParser {
       throw new ParseError("E0005", id.name);
     }
 
-    return new AST.Message(id, pattern, attrs, comment);
+    return new AST.Message(id, pattern, attrs);
   }
 
-  getTerm(ps, comment) {
+  getTerm(ps) {
     const id = this.getTermIdentifier(ps);
 
     ps.skipInlineWS();
@@ -230,7 +254,7 @@ export default class FluentParser {
       var attrs = this.getAttributes(ps);
     }
 
-    return new AST.Term(id, pattern, attrs, comment);
+    return new AST.Term(id, pattern, attrs);
   }
 
   getAttribute(ps) {

diff --git a/fluent-syntax/test/entry_test.js b/fluent-syntax/test/entry_test.js
@@ -5,7 +5,7 @@ import { FluentParser, FluentSerializer } from '../src';
 
 suite('Parse entry', function() {
   setup(function() {
-    this.parser = new FluentParser();
+    this.parser = new FluentParser({withSpans: false});
   });
 
   test('simple message', function() {
@@ -14,47 +14,141 @@ suite('Parse entry', function() {
     `;
     const output = {
       "comment": null,
-      "span": {
-        "start": 0,
-        "end": 9,
-        "type": "Span"
+      "value": {
+        "elements": [
+          {
+            "type": "TextElement",
+            "value": "Foo"
+          }
+        ],
+        "type": "Pattern"
       },
+      "annotations": [],
+      "attributes": [],
+      "type": "Message",
+      "id": {
+        "type": "Identifier",
+        "name": "foo"
+      }
+    };
+
+    const message = this.parser.parseEntry(input)
+    assert.deepEqual(message, output)
+  });
+
+  test('ignore attached comment', function() {
+    const input = ftl`
+      # Attached Comment
+      foo = Foo
+    `;
+    const output = {
+      "comment": null,
       "value": {
         "elements": [
           {
             "type": "TextElement",
-            "value": "Foo",
-            "span": {
-              "start": 6,
-              "end": 9,
-              "type": "Span"
-            }
+            "value": "Foo"
           }
         ],
-        "type": "Pattern",
-        "span": {
-          "start": 6,
-          "end": 9,
-          "type": "Span"
-        }
+        "type": "Pattern"
       },
       "annotations": [],
       "attributes": [],
       "type": "Message",
       "id": {
         "type": "Identifier",
-        "name": "foo",
-        "span": {
-          "start": 0,
-          "end": 3,
-          "type": "Span"
+        "name": "foo"
+      }
+    };
+
+    const message = this.parser.parseEntry(input)
+    assert.deepEqual(message, output)
+  });
+
+  test('return junk', function() {
+    const input = ftl`
+      # Attached Comment
+      junk
+    `;
+    const output = {
+      "content": "junk\n",
+      "annotations": [
+        {
+          "args": ["="],
+          "code": "E0003",
+          "message": "Expected token: \"=\"",
+          "span": {
+            "end": 23,
+            "start": 23,
+            "type": "Span"
+          },
+          "type": "Annotation"
         }
+      ],
+      "type": "Junk"
+    };
+
+    const message = this.parser.parseEntry(input)
+    assert.deepEqual(message, output)
+  });
+
+  test('ignore all valid comments', function() {
+    const input = ftl`
+      # Attached Comment
+      ## Group Comment
+      ### Resource Comment
+      foo = Foo
+    `;
+    const output = {
+      "comment": null,
+      "value": {
+        "elements": [
+          {
+            "type": "TextElement",
+            "value": "Foo"
+          }
+        ],
+        "type": "Pattern"
+      },
+      "annotations": [],
+      "attributes": [],
+      "type": "Message",
+      "id": {
+        "type": "Identifier",
+        "name": "foo"
       }
     };
 
     const message = this.parser.parseEntry(input)
     assert.deepEqual(message, output)
   });
+
+  test('do not ignore invalid comments', function() {
+    const input = ftl`
+      # Attached Comment
+      ##Invalid Comment
+    `;
+    const output = {
+      "content": "##Invalid Comment\n",
+      "annotations": [
+        {
+          "args": [" "],
+          "code": "E0003",
+          "message": "Expected token: \" \"",
+          "span": {
+            "end": 21,
+            "start": 21,
+            "type": "Span"
+          },
+          "type": "Annotation"
+        }
+      ],
+      "type": "Junk"
+    };
+
+    const message = this.parser.parseEntry(input)
+    assert.deepEqual(message, output)
+  });
 });
 
 
@@ -66,36 +160,21 @@ suite('Serialize entry', function() {
   test('simple message', function() {
     const input = {
       "comment": null,
-      "span": {
-        "start": 0,
-        "end": 9,
-        "type": "Span"
-      },
       "value": {
         "elements": [
           {
             "type": "TextElement",
             "value": "Foo"
           }
         ],
-        "type": "Pattern",
-        "span": {
-          "start": 6,
-          "end": 9,
-          "type": "Span"
-        }
+        "type": "Pattern"
       },
       "annotations": [],
       "attributes": [],
       "type": "Message",
       "id": {
         "type": "Identifier",
-        "name": "foo",
-        "span": {
-          "start": 0,
-          "end": 3,
-          "type": "Span"
-        }
+        "name": "foo"
       }
     };
     const output = ftl`