From 46c22e29e91e65af11a270235ea998de532562ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sta=C5=9B=20Ma=C5=82olepszy?= Date: Mon, 5 Nov 2018 19:00:59 +0100 Subject: [PATCH 1/2] Recognize \UHHHHHH as an escape sequence --- spec/fluent.ebnf | 3 +- syntax/grammar.mjs | 10 ++- test/fixtures/escaped_characters.ftl | 16 +++- test/fixtures/escaped_characters.json | 112 +++++++++++++++++++++++++- 4 files changed, 133 insertions(+), 8 deletions(-) diff --git a/spec/fluent.ebnf b/spec/fluent.ebnf index 13a16f2..2ae415c 100644 --- a/spec/fluent.ebnf +++ b/spec/fluent.ebnf @@ -119,7 +119,8 @@ indented_char ::= text_char - "[" - "*" - "." special_quoted_char ::= "\"" | "\\" special_escape ::= "\\" special_quoted_char -unicode_escape ::= "\\u" /[0-9a-fA-F]{4}/ +unicode_escape ::= ("\\u" /[0-9a-fA-F]{4}/) + | ("\\U" /[0-9a-fA-F]{6}/) quoted_char ::= (any_char - special_quoted_char) | special_escape | unicode_escape diff --git a/syntax/grammar.mjs b/syntax/grammar.mjs index e52f2f9..7f7f822 100644 --- a/syntax/grammar.mjs +++ b/syntax/grammar.mjs @@ -437,9 +437,13 @@ let special_escape = .map(join); let unicode_escape = - sequence( - string("\\u"), - regex(/[0-9a-fA-F]{4}/)) + either( + sequence( + string("\\u"), + regex(/[0-9a-fA-F]{4}/)), + sequence( + string("\\U"), + regex(/[0-9a-fA-F]{6}/))) .map(join); let quoted_char = diff --git a/test/fixtures/escaped_characters.ftl b/test/fixtures/escaped_characters.ftl index 5242a4b..b156c3b 100644 --- a/test/fixtures/escaped_characters.ftl +++ b/test/fixtures/escaped_characters.ftl @@ -14,8 +14,20 @@ mismatched-quote = {"\\""} unknown-escape = {"\x"} ## Unicode escapes -string-unicode-sequence = {"\u0041"} -string-escaped-unicode = {"\\u0041"} +string-unicode-4digits = {"\u0041"} +escape-unicode-4digits = {"\\u0041"} +string-unicode-6digits = {"\U01F602"} +escape-unicode-6digits = {"\\U01F602"} + +# OK The trailing "00" is part of the raw value. +string-too-many-4digits = {"\u004100"} +# OK The trailing "00" is part of the raw value. +string-too-many-6digits = {"\U01F60200"} + +# ERROR Too few hex digits after \u. +string-too-few-4digits = {"\u41"} +# ERROR Too few hex digits after \U. +string-too-few-6digits = {"\U1F602"} ## Literal braces brace-open = An opening {"{"} brace. diff --git a/test/fixtures/escaped_characters.json b/test/fixtures/escaped_characters.json index 26b1974..07a115e 100644 --- a/test/fixtures/escaped_characters.json +++ b/test/fixtures/escaped_characters.json @@ -177,7 +177,7 @@ "type": "Message", "id": { "type": "Identifier", - "name": "string-unicode-sequence" + "name": "string-unicode-4digits" }, "value": { "type": "Pattern", @@ -198,7 +198,7 @@ "type": "Message", "id": { "type": "Identifier", - "name": "string-escaped-unicode" + "name": "escape-unicode-4digits" }, "value": { "type": "Pattern", @@ -215,6 +215,114 @@ "attributes": [], "comment": null }, + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "string-unicode-6digits" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "Placeable", + "expression": { + "type": "StringLiteral", + "value": "\\U01F602" + } + } + ] + }, + "attributes": [], + "comment": null + }, + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "escape-unicode-6digits" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "Placeable", + "expression": { + "type": "StringLiteral", + "value": "\\\\U01F602" + } + } + ] + }, + "attributes": [], + "comment": null + }, + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "string-too-many-4digits" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "Placeable", + "expression": { + "type": "StringLiteral", + "value": "\\u004100" + } + } + ] + }, + "attributes": [], + "comment": { + "type": "Comment", + "content": "OK The trailing \"00\" is part of the raw value." + } + }, + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "string-too-many-6digits" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "Placeable", + "expression": { + "type": "StringLiteral", + "value": "\\U01F60200" + } + } + ] + }, + "attributes": [], + "comment": { + "type": "Comment", + "content": "OK The trailing \"00\" is part of the raw value." + } + }, + { + "type": "Comment", + "content": "ERROR Too few hex digits after \\u." + }, + { + "type": "Junk", + "annotations": [], + "content": "string-too-few-4digits = {\"\\u41\"}\n" + }, + { + "type": "Comment", + "content": "ERROR Too few hex digits after \\U." + }, + { + "type": "Junk", + "annotations": [], + "content": "string-too-few-6digits = {\"\\U1F602\"}\n" + }, { "type": "GroupComment", "content": "Literal braces" From 58ab87835f4fac3360b79c654c23391c64db1fbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sta=C5=9B=20Ma=C5=82olepszy?= Date: Mon, 5 Nov 2018 19:46:35 +0100 Subject: [PATCH 2/2] =?UTF-8?q?Only=20allow=20the=20\u{=E2=80=A6}=20escape?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- spec/fluent.ebnf | 3 +- syntax/grammar.mjs | 11 +-- test/fixtures/astral.ftl | 4 +- test/fixtures/astral.json | 6 +- test/fixtures/escaped_characters.ftl | 33 ++++--- test/fixtures/escaped_characters.json | 120 ++++++++++++++++++++------ 6 files changed, 123 insertions(+), 54 deletions(-) diff --git a/spec/fluent.ebnf b/spec/fluent.ebnf index 2ae415c..8f04a84 100644 --- a/spec/fluent.ebnf +++ b/spec/fluent.ebnf @@ -119,8 +119,7 @@ indented_char ::= text_char - "[" - "*" - "." special_quoted_char ::= "\"" | "\\" special_escape ::= "\\" special_quoted_char -unicode_escape ::= ("\\u" /[0-9a-fA-F]{4}/) - | ("\\U" /[0-9a-fA-F]{6}/) +unicode_escape ::= "\\u{" /[0-9a-fA-F]{1,6}/ "}" quoted_char ::= (any_char - special_quoted_char) | special_escape | unicode_escape diff --git a/syntax/grammar.mjs b/syntax/grammar.mjs index 7f7f822..08ae138 100644 --- a/syntax/grammar.mjs +++ b/syntax/grammar.mjs @@ -437,13 +437,10 @@ let special_escape = .map(join); let unicode_escape = - either( - sequence( - string("\\u"), - regex(/[0-9a-fA-F]{4}/)), - sequence( - string("\\U"), - regex(/[0-9a-fA-F]{6}/))) + sequence( + string("\\u{"), + regex(/[0-9a-fA-F]{1,6}/), + string("}")) .map(join); let quoted_char = diff --git a/test/fixtures/astral.ftl b/test/fixtures/astral.ftl index b77e32e..7063c57 100644 --- a/test/fixtures/astral.ftl +++ b/test/fixtures/astral.ftl @@ -2,8 +2,8 @@ face-with-tears-of-joy = 😂 tetragram-for-centre = 𝌆 surrogates-in-text = \uD83D\uDE02 -surrogates-in-string = {"\uD83D\uDE02"} -surrogates-in-adjacent-strings = {"\uD83D"}{"\uDE02"} +surrogates-in-string = {"\u{D83D}\u{DE02}"} +surrogates-in-adjacent-strings = {"\u{D83D}"}{"\u{DE02}"} emoji-in-text = A face 😂 with tears of joy. emoji-in-string = {"A face 😂 with tears of joy."} diff --git a/test/fixtures/astral.json b/test/fixtures/astral.json index 6056fb7..422f5b8 100644 --- a/test/fixtures/astral.json +++ b/test/fixtures/astral.json @@ -68,7 +68,7 @@ "type": "Placeable", "expression": { "type": "StringLiteral", - "value": "\\uD83D\\uDE02" + "value": "\\u{D83D}\\u{DE02}" } } ] @@ -89,14 +89,14 @@ "type": "Placeable", "expression": { "type": "StringLiteral", - "value": "\\uD83D" + "value": "\\u{D83D}" } }, { "type": "Placeable", "expression": { "type": "StringLiteral", - "value": "\\uDE02" + "value": "\\u{DE02}" } } ] diff --git a/test/fixtures/escaped_characters.ftl b/test/fixtures/escaped_characters.ftl index b156c3b..589e5c6 100644 --- a/test/fixtures/escaped_characters.ftl +++ b/test/fixtures/escaped_characters.ftl @@ -2,8 +2,8 @@ text-backslash-one = Value with \ a backslash text-backslash-two = Value with \\ two backslashes text-backslash-brace = Value with \{placeable} -text-backslash-u = \u0041 -text-backslash-backslash-u = \\u0041 +text-backslash-u = \u{41} +text-backslash-backslash-u = \\u{41} ## String literals quote-in-string = {"\""} @@ -14,20 +14,25 @@ mismatched-quote = {"\\""} unknown-escape = {"\x"} ## Unicode escapes -string-unicode-4digits = {"\u0041"} -escape-unicode-4digits = {"\\u0041"} -string-unicode-6digits = {"\U01F602"} -escape-unicode-6digits = {"\\U01F602"} +string-unicode-1digit = {"\u{9}"} +string-unicode-2digits = {"\u{09}"} +string-unicode-3digits = {"\u{009}"} +string-unicode-4digits = {"\u{0009}"} +string-unicode-5digits = {"\u{00009}"} +string-unicode-6digits = {"\u{000009}"} -# OK The trailing "00" is part of the raw value. -string-too-many-4digits = {"\u004100"} -# OK The trailing "00" is part of the raw value. -string-too-many-6digits = {"\U01F60200"} +escape-unicode-4digits = {"\\u{41}"} +escape-unicode-6digits = {"\\u{01F602}"} -# ERROR Too few hex digits after \u. -string-too-few-4digits = {"\u41"} -# ERROR Too few hex digits after \U. -string-too-few-6digits = {"\U1F602"} +# ERROR Too few hex digits. +string-unicode-0digits = {"\u{}"} +# ERROR Too many hex digits. +string-unicode-7digits = {"\U{001F602}"} + +# ERROR Missing opening brace. +string-unicode-missing-open = {"\u9}"} +# ERROR Missing closing brace. +string-unicode-missing-close = {"\u{9"} ## Literal braces brace-open = An opening {"{"} brace. diff --git a/test/fixtures/escaped_characters.json b/test/fixtures/escaped_characters.json index 07a115e..2dab38d 100644 --- a/test/fixtures/escaped_characters.json +++ b/test/fixtures/escaped_characters.json @@ -80,7 +80,14 @@ "elements": [ { "type": "TextElement", - "value": "\\u0041" + "value": "\\u" + }, + { + "type": "Placeable", + "expression": { + "type": "NumberLiteral", + "value": "41" + } } ] }, @@ -98,7 +105,14 @@ "elements": [ { "type": "TextElement", - "value": "\\\\u0041" + "value": "\\\\u" + }, + { + "type": "Placeable", + "expression": { + "type": "NumberLiteral", + "value": "41" + } } ] }, @@ -177,7 +191,7 @@ "type": "Message", "id": { "type": "Identifier", - "name": "string-unicode-4digits" + "name": "string-unicode-1digit" }, "value": { "type": "Pattern", @@ -186,7 +200,7 @@ "type": "Placeable", "expression": { "type": "StringLiteral", - "value": "\\u0041" + "value": "\\u{9}" } } ] @@ -198,7 +212,7 @@ "type": "Message", "id": { "type": "Identifier", - "name": "escape-unicode-4digits" + "name": "string-unicode-2digits" }, "value": { "type": "Pattern", @@ -207,7 +221,7 @@ "type": "Placeable", "expression": { "type": "StringLiteral", - "value": "\\\\u0041" + "value": "\\u{09}" } } ] @@ -219,7 +233,7 @@ "type": "Message", "id": { "type": "Identifier", - "name": "string-unicode-6digits" + "name": "string-unicode-3digits" }, "value": { "type": "Pattern", @@ -228,7 +242,7 @@ "type": "Placeable", "expression": { "type": "StringLiteral", - "value": "\\U01F602" + "value": "\\u{009}" } } ] @@ -240,7 +254,49 @@ "type": "Message", "id": { "type": "Identifier", - "name": "escape-unicode-6digits" + "name": "string-unicode-4digits" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "Placeable", + "expression": { + "type": "StringLiteral", + "value": "\\u{0009}" + } + } + ] + }, + "attributes": [], + "comment": null + }, + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "string-unicode-5digits" + }, + "value": { + "type": "Pattern", + "elements": [ + { + "type": "Placeable", + "expression": { + "type": "StringLiteral", + "value": "\\u{00009}" + } + } + ] + }, + "attributes": [], + "comment": null + }, + { + "type": "Message", + "id": { + "type": "Identifier", + "name": "string-unicode-6digits" }, "value": { "type": "Pattern", @@ -249,7 +305,7 @@ "type": "Placeable", "expression": { "type": "StringLiteral", - "value": "\\\\U01F602" + "value": "\\u{000009}" } } ] @@ -261,7 +317,7 @@ "type": "Message", "id": { "type": "Identifier", - "name": "string-too-many-4digits" + "name": "escape-unicode-4digits" }, "value": { "type": "Pattern", @@ -270,22 +326,19 @@ "type": "Placeable", "expression": { "type": "StringLiteral", - "value": "\\u004100" + "value": "\\\\u{41}" } } ] }, "attributes": [], - "comment": { - "type": "Comment", - "content": "OK The trailing \"00\" is part of the raw value." - } + "comment": null }, { "type": "Message", "id": { "type": "Identifier", - "name": "string-too-many-6digits" + "name": "escape-unicode-6digits" }, "value": { "type": "Pattern", @@ -294,34 +347,49 @@ "type": "Placeable", "expression": { "type": "StringLiteral", - "value": "\\U01F60200" + "value": "\\\\u{01F602}" } } ] }, "attributes": [], - "comment": { - "type": "Comment", - "content": "OK The trailing \"00\" is part of the raw value." - } + "comment": null + }, + { + "type": "Comment", + "content": "ERROR Too few hex digits." + }, + { + "type": "Junk", + "annotations": [], + "content": "string-unicode-0digits = {\"\\u{}\"}\n" + }, + { + "type": "Comment", + "content": "ERROR Too many hex digits." + }, + { + "type": "Junk", + "annotations": [], + "content": "string-unicode-7digits = {\"\\U{001F602}\"}\n" }, { "type": "Comment", - "content": "ERROR Too few hex digits after \\u." + "content": "ERROR Missing opening brace." }, { "type": "Junk", "annotations": [], - "content": "string-too-few-4digits = {\"\\u41\"}\n" + "content": "string-unicode-missing-open = {\"\\u9}\"}\n" }, { "type": "Comment", - "content": "ERROR Too few hex digits after \\U." + "content": "ERROR Missing closing brace." }, { "type": "Junk", "annotations": [], - "content": "string-too-few-6digits = {\"\\U1F602\"}\n" + "content": "string-unicode-missing-close = {\"\\u{9\"}\n" }, { "type": "GroupComment",