File tree 5 files changed +143
-11
lines changed 5 files changed +143
-11
lines changed Original file line number Diff line number Diff line change @@ -131,7 +131,8 @@ indented_char ::= text_char - "[" - "*" - "."
131
131
special_quoted_char ::= " \""
132
132
| " \\"
133
133
special_escape ::= " \\" special_quoted_char
134
- unicode_escape ::= " \\u" /[0- 9a- fA- F]{4}/
134
+ unicode_escape ::= (" \\u" /[0- 9a- fA- F]{4}/)
135
+ | (" \\U" /[0- 9a- fA- F]{6}/)
135
136
quoted_char ::= (any_char - special_quoted_char - line_end )
136
137
| special_escape
137
138
| unicode_escape
Original file line number Diff line number Diff line change @@ -216,20 +216,23 @@ function remove_blank_lines(element) {
216
216
return typeof ( element ) !== "string" ;
217
217
}
218
218
219
- const KNOWN_ESCAPES = / (?: \\ \\ | \\ \" | \\ u ( [ 0 - 9 a - f A - F ] { 4 } ) ) / g;
219
+ // Backslash backslash, backslash double quote and the well-formed Unicode
220
+ // escape sequences: uHHHH, UHHHHHH.
221
+ const KNOWN_ESCAPES =
222
+ / (?: \\ \\ | \\ \" | \\ u ( [ 0 - 9 a - f A - F ] { 4 } ) | \\ U ( [ 0 - 9 a - f A - F ] { 6 } ) ) / g;
220
223
221
224
function unescape ( raw ) {
222
225
return raw . replace ( KNOWN_ESCAPES , from_escape_sequence ) ;
223
226
}
224
227
225
- function from_escape_sequence ( match , group1 ) {
228
+ function from_escape_sequence ( match , codepoint4 , codepoint6 ) {
226
229
switch ( match ) {
227
230
case "\\\\" :
228
231
return "\\" ;
229
232
case "\\\"" :
230
233
return "\"" ;
231
234
default :
232
- let codepoint = parseInt ( group1 , 16 ) ;
235
+ let codepoint = parseInt ( codepoint4 || codepoint6 , 16 ) ;
233
236
if ( codepoint <= 0xD7FF || 0xE000 <= codepoint ) {
234
237
// It's a Unicode scalar value.
235
238
return String . fromCodePoint ( codepoint ) ;
Original file line number Diff line number Diff line change @@ -456,9 +456,13 @@ let special_escape =
456
456
. map ( join ) ;
457
457
458
458
let unicode_escape =
459
- sequence (
460
- string ( "\\u" ) ,
461
- regex ( / [ 0 - 9 a - f A - F ] { 4 } / ) )
459
+ either (
460
+ sequence (
461
+ string ( "\\u" ) ,
462
+ regex ( / [ 0 - 9 a - f A - F ] { 4 } / ) ) ,
463
+ sequence (
464
+ string ( "\\U" ) ,
465
+ regex ( / [ 0 - 9 a - f A - F ] { 6 } / ) ) )
462
466
. map ( join ) ;
463
467
464
468
let quoted_char = defer ( ( ) =>
Original file line number Diff line number Diff line change @@ -14,8 +14,20 @@ mismatched-quote = {"\\""}
14
14
unknown-escape = {" \x " }
15
15
16
16
## Unicode escapes
17
- string-unicode-sequence = { " \u0041 " }
18
- string-escaped-unicode = { " \\u0041 " }
17
+ string-unicode-4digits = { " \u0041 " }
18
+ escape-unicode-4digits = { " \\u0041 " }
19
+ string-unicode-6digits = { " \U01F602 " }
20
+ escape-unicode-6digits = { " \\U01F602 " }
21
+
22
+ # OK The trailing "00" is part of the raw value.
23
+ string-too-many-4digits = { " \u004100 " }
24
+ # OK The trailing "00" is part of the raw value.
25
+ string-too-many-6digits = { " \U01F60200 " }
26
+
27
+ # ERROR Too few hex digits after \u.
28
+ string-too-few-4digits = { " \u41 " }
29
+ # ERROR Too few hex digits after \U.
30
+ string-too-few-6digits = { " \U1F602 " }
19
31
20
32
## Literal braces
21
33
brace-open = An opening { " { " } brace.
Original file line number Diff line number Diff line change 179
179
"type" : " Message" ,
180
180
"id" : {
181
181
"type" : " Identifier" ,
182
- "name" : " string-unicode-sequence "
182
+ "name" : " string-unicode-4digits "
183
183
},
184
184
"value" : {
185
185
"type" : " Pattern" ,
201
201
"type" : " Message" ,
202
202
"id" : {
203
203
"type" : " Identifier" ,
204
- "name" : " string-escaped- unicode"
204
+ "name" : " escape- unicode-4digits "
205
205
},
206
206
"value" : {
207
207
"type" : " Pattern" ,
219
219
"attributes" : [],
220
220
"comment" : null
221
221
},
222
+ {
223
+ "type" : " Message" ,
224
+ "id" : {
225
+ "type" : " Identifier" ,
226
+ "name" : " string-unicode-6digits"
227
+ },
228
+ "value" : {
229
+ "type" : " Pattern" ,
230
+ "elements" : [
231
+ {
232
+ "type" : " Placeable" ,
233
+ "expression" : {
234
+ "type" : " StringLiteral" ,
235
+ "raw" : " \\ U01F602" ,
236
+ "value" : " 😂"
237
+ }
238
+ }
239
+ ]
240
+ },
241
+ "attributes" : [],
242
+ "comment" : null
243
+ },
244
+ {
245
+ "type" : " Message" ,
246
+ "id" : {
247
+ "type" : " Identifier" ,
248
+ "name" : " escape-unicode-6digits"
249
+ },
250
+ "value" : {
251
+ "type" : " Pattern" ,
252
+ "elements" : [
253
+ {
254
+ "type" : " Placeable" ,
255
+ "expression" : {
256
+ "type" : " StringLiteral" ,
257
+ "raw" : " \\\\ U01F602" ,
258
+ "value" : " \\ U01F602"
259
+ }
260
+ }
261
+ ]
262
+ },
263
+ "attributes" : [],
264
+ "comment" : null
265
+ },
266
+ {
267
+ "type" : " Message" ,
268
+ "id" : {
269
+ "type" : " Identifier" ,
270
+ "name" : " string-too-many-4digits"
271
+ },
272
+ "value" : {
273
+ "type" : " Pattern" ,
274
+ "elements" : [
275
+ {
276
+ "type" : " Placeable" ,
277
+ "expression" : {
278
+ "type" : " StringLiteral" ,
279
+ "raw" : " \\ u004100" ,
280
+ "value" : " A00"
281
+ }
282
+ }
283
+ ]
284
+ },
285
+ "attributes" : [],
286
+ "comment" : {
287
+ "type" : " Comment" ,
288
+ "content" : " OK The trailing \" 00\" is part of the raw value."
289
+ }
290
+ },
291
+ {
292
+ "type" : " Message" ,
293
+ "id" : {
294
+ "type" : " Identifier" ,
295
+ "name" : " string-too-many-6digits"
296
+ },
297
+ "value" : {
298
+ "type" : " Pattern" ,
299
+ "elements" : [
300
+ {
301
+ "type" : " Placeable" ,
302
+ "expression" : {
303
+ "type" : " StringLiteral" ,
304
+ "raw" : " \\ U01F60200" ,
305
+ "value" : " 😂00"
306
+ }
307
+ }
308
+ ]
309
+ },
310
+ "attributes" : [],
311
+ "comment" : {
312
+ "type" : " Comment" ,
313
+ "content" : " OK The trailing \" 00\" is part of the raw value."
314
+ }
315
+ },
316
+ {
317
+ "type" : " Comment" ,
318
+ "content" : " ERROR Too few hex digits after \\ u."
319
+ },
320
+ {
321
+ "type" : " Junk" ,
322
+ "annotations" : [],
323
+ "content" : " string-too-few-4digits = {\"\\ u41\" }\n "
324
+ },
325
+ {
326
+ "type" : " Comment" ,
327
+ "content" : " ERROR Too few hex digits after \\ U."
328
+ },
329
+ {
330
+ "type" : " Junk" ,
331
+ "annotations" : [],
332
+ "content" : " string-too-few-6digits = {\"\\ U1F602\" }\n\n "
333
+ },
222
334
{
223
335
"type" : " GroupComment" ,
224
336
"content" : " Literal braces"
You can’t perform that action at this time.
0 commit comments