Skip to content

Commit 08af020

Browse files
authored
Recognize \UHHHHHH as an escape sequence (#201)
1 parent 45b47d9 commit 08af020

File tree

5 files changed

+142
-11
lines changed

5 files changed

+142
-11
lines changed

spec/fluent.ebnf

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,8 @@ indented_char ::= text_char - "[" - "*" - "."
132132
special_quoted_char ::= "\""
133133
| "\\"
134134
special_escape ::= "\\" special_quoted_char
135-
unicode_escape ::= "\\u" /[0-9a-fA-F]{4}/
135+
unicode_escape ::= ("\\u" /[0-9a-fA-F]{4}/)
136+
| ("\\U" /[0-9a-fA-F]{6}/)
136137
quoted_char ::= (any_char - special_quoted_char - line_end)
137138
| special_escape
138139
| unicode_escape

syntax/abstract.mjs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -242,20 +242,22 @@ function remove_blank_lines(element) {
242242
return typeof(element) !== "string";
243243
}
244244

245-
const KNOWN_ESCAPES = /(?:\\\\|\\\"|\\u([0-9a-fA-F]{4}))/g;
245+
// Backslash backslash, backslash double quote, uHHHH, UHHHHHH.
246+
const KNOWN_ESCAPES =
247+
/(?:\\\\|\\\"|\\u([0-9a-fA-F]{4})|\\U([0-9a-fA-F]{6}))/g;
246248

247249
function unescape(raw) {
248250
return raw.replace(KNOWN_ESCAPES, from_escape_sequence);
249251
}
250252

251-
function from_escape_sequence(match, group1) {
253+
function from_escape_sequence(match, codepoint4, codepoint6) {
252254
switch (match) {
253255
case "\\\\":
254256
return "\\";
255257
case "\\\"":
256258
return "\"";
257259
default:
258-
let codepoint = parseInt(group1, 16);
260+
let codepoint = parseInt(codepoint4 || codepoint6, 16);
259261
if (codepoint <= 0xD7FF || 0xE000 <= codepoint) {
260262
// It's a Unicode scalar value.
261263
return String.fromCodePoint(codepoint);

syntax/grammar.mjs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -457,9 +457,13 @@ let special_escape =
457457
.map(join);
458458

459459
let unicode_escape =
460-
sequence(
461-
string("\\u"),
462-
regex(/[0-9a-fA-F]{4}/))
460+
either(
461+
sequence(
462+
string("\\u"),
463+
regex(/[0-9a-fA-F]{4}/)),
464+
sequence(
465+
string("\\U"),
466+
regex(/[0-9a-fA-F]{6}/)))
463467
.map(join);
464468

465469
let quoted_char = defer(() =>

test/fixtures/escaped_characters.ftl

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,20 @@ mismatched-quote = {"\\""}
1414
unknown-escape = {"\x"}
1515
1616
## Unicode escapes
17-
string-unicode-sequence = {"\u0041"}
18-
string-escaped-unicode = {"\\u0041"}
17+
string-unicode-4digits = {"\u0041"}
18+
escape-unicode-4digits = {"\\u0041"}
19+
string-unicode-6digits = {"\U01F602"}
20+
escape-unicode-6digits = {"\\U01F602"}
21+
22+
# OK The trailing "00" is part of the literal value.
23+
string-too-many-4digits = {"\u004100"}
24+
# OK The trailing "00" is part of the literal value.
25+
string-too-many-6digits = {"\U01F60200"}
26+
27+
# ERROR Too few hex digits after \u.
28+
string-too-few-4digits = {"\u41"}
29+
# ERROR Too few hex digits after \U.
30+
string-too-few-6digits = {"\U1F602"}
1931
2032
## Literal braces
2133
brace-open = An opening {"{"} brace.

test/fixtures/escaped_characters.json

Lines changed: 114 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@
179179
"type": "Message",
180180
"id": {
181181
"type": "Identifier",
182-
"name": "string-unicode-sequence"
182+
"name": "string-unicode-4digits"
183183
},
184184
"value": {
185185
"type": "Pattern",
@@ -201,7 +201,7 @@
201201
"type": "Message",
202202
"id": {
203203
"type": "Identifier",
204-
"name": "string-escaped-unicode"
204+
"name": "escape-unicode-4digits"
205205
},
206206
"value": {
207207
"type": "Pattern",
@@ -219,6 +219,118 @@
219219
"attributes": [],
220220
"comment": null
221221
},
222+
{
223+
"type": "Message",
224+
"id": {
225+
"type": "Identifier",
226+
"name": "string-unicode-6digits"
227+
},
228+
"value": {
229+
"type": "Pattern",
230+
"elements": [
231+
{
232+
"type": "Placeable",
233+
"expression": {
234+
"type": "StringLiteral",
235+
"raw": "\\U01F602",
236+
"value": "😂"
237+
}
238+
}
239+
]
240+
},
241+
"attributes": [],
242+
"comment": null
243+
},
244+
{
245+
"type": "Message",
246+
"id": {
247+
"type": "Identifier",
248+
"name": "escape-unicode-6digits"
249+
},
250+
"value": {
251+
"type": "Pattern",
252+
"elements": [
253+
{
254+
"type": "Placeable",
255+
"expression": {
256+
"type": "StringLiteral",
257+
"raw": "\\\\U01F602",
258+
"value": "\\U01F602"
259+
}
260+
}
261+
]
262+
},
263+
"attributes": [],
264+
"comment": null
265+
},
266+
{
267+
"type": "Message",
268+
"id": {
269+
"type": "Identifier",
270+
"name": "string-too-many-4digits"
271+
},
272+
"value": {
273+
"type": "Pattern",
274+
"elements": [
275+
{
276+
"type": "Placeable",
277+
"expression": {
278+
"type": "StringLiteral",
279+
"raw": "\\u004100",
280+
"value": "A00"
281+
}
282+
}
283+
]
284+
},
285+
"attributes": [],
286+
"comment": {
287+
"type": "Comment",
288+
"content": "OK The trailing \"00\" is part of the literal value."
289+
}
290+
},
291+
{
292+
"type": "Message",
293+
"id": {
294+
"type": "Identifier",
295+
"name": "string-too-many-6digits"
296+
},
297+
"value": {
298+
"type": "Pattern",
299+
"elements": [
300+
{
301+
"type": "Placeable",
302+
"expression": {
303+
"type": "StringLiteral",
304+
"raw": "\\U01F60200",
305+
"value": "😂00"
306+
}
307+
}
308+
]
309+
},
310+
"attributes": [],
311+
"comment": {
312+
"type": "Comment",
313+
"content": "OK The trailing \"00\" is part of the literal value."
314+
}
315+
},
316+
{
317+
"type": "Comment",
318+
"content": "ERROR Too few hex digits after \\u."
319+
},
320+
{
321+
"type": "Junk",
322+
"annotations": [],
323+
"content": "string-too-few-4digits = {\"\\u41\"}\n"
324+
},
325+
{
326+
"type": "Comment",
327+
"content": "ERROR Too few hex digits after \\U."
328+
},
329+
{
330+
"type": "Junk",
331+
"annotations": [],
332+
"content": "string-too-few-6digits = {\"\\U1F602\"}\n\n"
333+
},
222334
{
223335
"type": "GroupComment",
224336
"content": "Literal braces"

0 commit comments

Comments
 (0)