Skip to content

Commit 0ea4bff

Browse files
stasmZibi Braniecki
authored and
Zibi Braniecki
committed
Support astral Unicode characters in TextElements and StringLiteral (#179)
1 parent 011adc9 commit 0ea4bff

File tree

8 files changed

+212
-8
lines changed

8 files changed

+212
-8
lines changed

lib/combinators.mjs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ export function regex(re) {
3131
}
3232

3333
export function charset(range) {
34-
return regex(new RegExp(`[${range}]`));
34+
return regex(`[${range}]`);
3535
}
3636

3737
export function eof() {

lib/stream.mjs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,11 @@ export default class Stream {
2222

2323
// Execute a regex on the iterable.
2424
exec(re) {
25-
const sticky = new RegExp(re, "y");
25+
// The "u" flag is a feature of ES2015 which makes regexes Unicode-aware.
26+
// See https://mathiasbynens.be/notes/es6-unicode-regex.
27+
// The "y" flag makes the regex sticky. The match must start at the
28+
// offset specified by the regex's lastIndex property.
29+
let sticky = new RegExp(re, "uy");
2630
sticky.lastIndex = this.cursor;
2731
return sticky.exec(this.iterable);
2832
}

lib/visitor.mjs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,9 @@ export default {
7979

8080
function escape(str) {
8181
return str
82-
.replace("\\", "\\\\")
83-
.replace("\"", "\\\"")
82+
// Escape backslash and double quote, which are special in EBNF.
83+
.replace(/\\/g, "\\\\")
84+
.replace(/"/g, "\\\"")
8485
// Replace all Control and non-Basic Latin characters.
8586
.replace(/([^\u0021-\u007E])/g, unicode_sequence);
8687
}

spec/CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Changelog
22

3+
## Unreleased
4+
5+
- Support astral Unicode characters. (#179)
6+
7+
Unicode characters from outside of the Basic Multilingual Plane can now
8+
be used in values of `TextElements` and `StringLiterals`. This means all
9+
characters in the U+10000 to U+10FFFF range. 🎉
310

411
## 0.7.0 (October 15, 2018)
512

spec/fluent.ebnf

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,8 @@ quote ::= "\""
9292
/* Any Unicode character from BMP excluding C0 control characters, space,
9393
* surrogate blocks and non-characters (U+FFFE, U+FFFF).
9494
* Cf. https://www.w3.org/TR/REC-xml/#NT-Char
95-
* TODO Add characters from other planes: U+10000 to U+10FFFF.
9695
*/
97-
regular_char ::= [!-\uD7FF\uE000-\uFFFD]
96+
regular_char ::= [\\u{21}-\\u{D7FF}\\u{E000}-\\u{FFFD}\\u{10000}-\\u{10FFFF}]
9897
text_char ::= blank_inline
9998
| "\u0009"
10099
| /\\u[0-9a-fA-F]{4}/

syntax/grammar.mjs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -397,10 +397,9 @@ let quote = string("\"");
397397
/* Any Unicode character from BMP excluding C0 control characters, space,
398398
* surrogate blocks and non-characters (U+FFFE, U+FFFF).
399399
* Cf. https://www.w3.org/TR/REC-xml/#NT-Char
400-
* TODO Add characters from other planes: U+10000 to U+10FFFF.
401400
*/
402401
let regular_char =
403-
charset("\u0021-\uD7FF\uE000-\uFFFD");
402+
charset("\\u{21}-\\u{D7FF}\\u{E000}-\\u{FFFD}\\u{10000}-\\u{10FFFF}");
404403

405404
let text_char = defer(() =>
406405
either(

test/fixtures/astral.ftl

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
face-with-tears-of-joy = 😂
2+
tetragram-for-centre = 𝌆
3+
4+
surrogates-in-text = \uD83D\uDE02
5+
surrogates-in-string = {"\uD83D\uDE02"}
6+
surrogates-in-adjacent-strings = {"\uD83D"}{"\uDE02"}
7+
8+
emoji-in-text = A face 😂 with tears of joy.
9+
emoji-in-string = {"A face 😂 with tears of joy."}
10+
11+
# ERROR Invalid identifier
12+
err-😂 = Value
13+
14+
# ERROR Invalid expression
15+
err-invalid-expression = { 😂 }
16+
17+
# ERROR Invalid variant key
18+
err-invalid-variant-key = { $sel ->
19+
*[😂] Value
20+
}

test/fixtures/astral.json

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
{
2+
"type": "Resource",
3+
"body": [
4+
{
5+
"type": "Message",
6+
"id": {
7+
"type": "Identifier",
8+
"name": "face-with-tears-of-joy"
9+
},
10+
"value": {
11+
"type": "Pattern",
12+
"elements": [
13+
{
14+
"type": "TextElement",
15+
"value": "😂"
16+
}
17+
]
18+
},
19+
"attributes": [],
20+
"comment": null
21+
},
22+
{
23+
"type": "Message",
24+
"id": {
25+
"type": "Identifier",
26+
"name": "tetragram-for-centre"
27+
},
28+
"value": {
29+
"type": "Pattern",
30+
"elements": [
31+
{
32+
"type": "TextElement",
33+
"value": "𝌆"
34+
}
35+
]
36+
},
37+
"attributes": [],
38+
"comment": null
39+
},
40+
{
41+
"type": "Message",
42+
"id": {
43+
"type": "Identifier",
44+
"name": "surrogates-in-text"
45+
},
46+
"value": {
47+
"type": "Pattern",
48+
"elements": [
49+
{
50+
"type": "TextElement",
51+
"value": "\\uD83D\\uDE02"
52+
}
53+
]
54+
},
55+
"attributes": [],
56+
"comment": null
57+
},
58+
{
59+
"type": "Message",
60+
"id": {
61+
"type": "Identifier",
62+
"name": "surrogates-in-string"
63+
},
64+
"value": {
65+
"type": "Pattern",
66+
"elements": [
67+
{
68+
"type": "Placeable",
69+
"expression": {
70+
"type": "StringLiteral",
71+
"value": "\\uD83D\\uDE02"
72+
}
73+
}
74+
]
75+
},
76+
"attributes": [],
77+
"comment": null
78+
},
79+
{
80+
"type": "Message",
81+
"id": {
82+
"type": "Identifier",
83+
"name": "surrogates-in-adjacent-strings"
84+
},
85+
"value": {
86+
"type": "Pattern",
87+
"elements": [
88+
{
89+
"type": "Placeable",
90+
"expression": {
91+
"type": "StringLiteral",
92+
"value": "\\uD83D"
93+
}
94+
},
95+
{
96+
"type": "Placeable",
97+
"expression": {
98+
"type": "StringLiteral",
99+
"value": "\\uDE02"
100+
}
101+
}
102+
]
103+
},
104+
"attributes": [],
105+
"comment": null
106+
},
107+
{
108+
"type": "Message",
109+
"id": {
110+
"type": "Identifier",
111+
"name": "emoji-in-text"
112+
},
113+
"value": {
114+
"type": "Pattern",
115+
"elements": [
116+
{
117+
"type": "TextElement",
118+
"value": "A face 😂 with tears of joy."
119+
}
120+
]
121+
},
122+
"attributes": [],
123+
"comment": null
124+
},
125+
{
126+
"type": "Message",
127+
"id": {
128+
"type": "Identifier",
129+
"name": "emoji-in-string"
130+
},
131+
"value": {
132+
"type": "Pattern",
133+
"elements": [
134+
{
135+
"type": "Placeable",
136+
"expression": {
137+
"type": "StringLiteral",
138+
"value": "A face 😂 with tears of joy."
139+
}
140+
}
141+
]
142+
},
143+
"attributes": [],
144+
"comment": null
145+
},
146+
{
147+
"type": "Comment",
148+
"content": "ERROR Invalid identifier"
149+
},
150+
{
151+
"type": "Junk",
152+
"annotations": [],
153+
"content": "err-😂 = Value\n"
154+
},
155+
{
156+
"type": "Comment",
157+
"content": "ERROR Invalid expression"
158+
},
159+
{
160+
"type": "Junk",
161+
"annotations": [],
162+
"content": "err-invalid-expression = { 😂 }\n"
163+
},
164+
{
165+
"type": "Comment",
166+
"content": "ERROR Invalid variant key"
167+
},
168+
{
169+
"type": "Junk",
170+
"annotations": [],
171+
"content": "err-invalid-variant-key = { $sel ->\n *[😂] Value\n}\n"
172+
}
173+
]
174+
}

0 commit comments

Comments
 (0)