Skip to content

Commit b56cbb1

Browse files
authored
Implement extended named capture group identifiers (#90)
Perfectly reasonable and not at all uncommon regular expressions like /(?<𝑓𝑜𝑥>fox).*(?<𝓓𝓸𝓰>dog)/ are now accepted.
1 parent 20b3aca commit b56cbb1

File tree

2 files changed

+14
-11
lines changed

2 files changed

+14
-11
lines changed

libregexp.c

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,11 +1071,10 @@ static int re_is_simple_quantifier(const uint8_t *bc_buf, int bc_buf_len)
10711071
}
10721072

10731073
/* '*pp' is the first char after '<' */
1074-
static int re_parse_group_name(char *buf, int buf_size,
1075-
const uint8_t **pp, BOOL is_utf16)
1074+
static int re_parse_group_name(char *buf, int buf_size, const uint8_t **pp)
10761075
{
1077-
const uint8_t *p;
1078-
uint32_t c;
1076+
const uint8_t *p, *p1;
1077+
uint32_t c, d;
10791078
char *q;
10801079

10811080
p = *pp;
@@ -1086,11 +1085,18 @@ static int re_parse_group_name(char *buf, int buf_size,
10861085
p++;
10871086
if (*p != 'u')
10881087
return -1;
1089-
c = lre_parse_escape(&p, is_utf16 * 2);
1088+
c = lre_parse_escape(&p, 2); // accept surrogate pairs
10901089
} else if (c == '>') {
10911090
break;
10921091
} else if (c >= 128) {
10931092
c = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p);
1093+
if (c >= 0xD800 && c <= 0xDBFF) {
1094+
d = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p1);
1095+
if (d >= 0xDC00 && d <= 0xDFFF) {
1096+
c = 0x10000 + 0x400 * (c - 0xD800) + (d - 0xDC00);
1097+
p = p1;
1098+
}
1099+
}
10941100
} else {
10951101
p++;
10961102
}
@@ -1140,8 +1146,7 @@ static int re_parse_captures(REParseState *s, int *phas_named_captures,
11401146
/* potential named capture */
11411147
if (capture_name) {
11421148
p += 3;
1143-
if (re_parse_group_name(name, sizeof(name), &p,
1144-
s->is_utf16) == 0) {
1149+
if (re_parse_group_name(name, sizeof(name), &p) == 0) {
11451150
if (!strcmp(name, capture_name))
11461151
return capture_index;
11471152
}
@@ -1314,7 +1319,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
13141319
} else if (p[2] == '<') {
13151320
p += 3;
13161321
if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf),
1317-
&p, s->is_utf16)) {
1322+
&p)) {
13181323
return re_parse_error(s, "invalid group name");
13191324
}
13201325
if (find_group_name(s, s->u.tmp_buf) > 0) {
@@ -1378,7 +1383,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
13781383
}
13791384
p1 += 3;
13801385
if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf),
1381-
&p1, s->is_utf16)) {
1386+
&p1)) {
13821387
if (s->is_utf16 || re_has_named_captures(s))
13831388
return re_parse_error(s, "invalid group name");
13841389
else

test262_errors.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,6 @@ test262/test/built-ins/Function/internals/Construct/derived-this-uninitialized-r
3737
test262/test/built-ins/Function/internals/Construct/derived-this-uninitialized-realm.js:20: strict mode: Test262Error: Expected a ReferenceError but got a different error constructor with the same name
3838
test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: Test262Error: Expected [a, abc] and [a, undefined] to have the same contents. ? quantifier
3939
test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: strict mode: Test262Error: Expected [a, abc] and [a, undefined] to have the same contents. ? quantifier
40-
test262/test/built-ins/RegExp/named-groups/non-unicode-property-names-valid.js:46: SyntaxError: invalid group name
41-
test262/test/built-ins/RegExp/named-groups/non-unicode-property-names-valid.js:46: strict mode: SyntaxError: invalid group name
4240
test262/test/built-ins/RegExp/prototype/Symbol.match/flags-tostring-error.js:22: Test262Error: Expected a CustomError but got a Test262Error
4341
test262/test/built-ins/RegExp/prototype/Symbol.match/flags-tostring-error.js:22: strict mode: Test262Error: Expected a CustomError but got a Test262Error
4442
test262/test/built-ins/RegExp/prototype/Symbol.match/get-flags-err.js:23: Test262Error: Expected a CustomError but got a Test262Error

0 commit comments

Comments
 (0)