Skip to content

Commit 9239e7e

Browse files
committed
data: update to UCD 16
1 parent 7691e49 commit 9239e7e

18 files changed

+2149
-625
lines changed

regex-automata/src/nfa/thompson/compiler.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -230,15 +230,15 @@ impl Config {
230230
/// # if cfg!(miri) { return Ok(()); } // miri takes too long
231231
/// use regex_automata::nfa::thompson::NFA;
232232
///
233-
/// // 300KB isn't enough!
233+
/// // 400KB isn't enough!
234234
/// NFA::compiler()
235-
/// .configure(NFA::config().nfa_size_limit(Some(300_000)))
235+
/// .configure(NFA::config().nfa_size_limit(Some(400_000)))
236236
/// .build(r"\w{20}")
237237
/// .unwrap_err();
238238
///
239-
/// // ... but 400KB probably is.
239+
/// // ... but 500KB probably is.
240240
/// let nfa = NFA::compiler()
241-
/// .configure(NFA::config().nfa_size_limit(Some(400_000)))
241+
/// .configure(NFA::config().nfa_size_limit(Some(500_000)))
242242
/// .build(r"\w{20}")?;
243243
///
244244
/// assert_eq!(nfa.pattern_len(), 1);

regex-automata/src/util/unicode_data/perl_word.rs

+45-20
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
22
//
3-
// ucd-generate perl-word tmp/ucd-15.0.0/ --chars
3+
// ucd-generate perl-word ucd-16.0.0 --chars
44
//
5-
// Unicode version: 15.0.0.
5+
// Unicode version: 16.0.0.
66
//
7-
// ucd-generate 0.2.15 is available on crates.io.
7+
// ucd-generate 0.3.1 is available on crates.io.
88

99
pub const PERL_WORD: &'static [(char, char)] = &[
1010
('0', '9'),
@@ -59,7 +59,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
5959
('ࡠ', 'ࡪ'),
6060
('ࡰ', 'ࢇ'),
6161
('ࢉ', 'ࢎ'),
62-
('\u{898}', '\u{8e1}'),
62+
('\u{897}', '\u{8e1}'),
6363
('\u{8e3}', '\u{963}'),
6464
('०', '९'),
6565
('ॱ', 'ঃ'),
@@ -158,8 +158,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[
158158
('ಪ', 'ಳ'),
159159
('ವ', 'ಹ'),
160160
('\u{cbc}', 'ೄ'),
161-
('\u{cc6}', ''),
162-
('', '\u{ccd}'),
161+
('\u{cc6}', '\u{cc8}'),
162+
('\u{cca}', '\u{ccd}'),
163163
('\u{cd5}', '\u{cd6}'),
164164
('ೝ', 'ೞ'),
165165
('ೠ', '\u{ce3}'),
@@ -243,8 +243,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[
243243
('ᚁ', 'ᚚ'),
244244
('ᚠ', 'ᛪ'),
245245
('ᛮ', 'ᛸ'),
246-
('ᜀ', ''),
247-
('ᜟ', ''),
246+
('ᜀ', '\u{1715}'),
247+
('ᜟ', '\u{1734}'),
248248
('ᝀ', '\u{1753}'),
249249
('ᝠ', 'ᝬ'),
250250
('ᝮ', 'ᝰ'),
@@ -276,11 +276,11 @@ pub const PERL_WORD: &'static [(char, char)] = &[
276276
('\u{1b00}', 'ᭌ'),
277277
('᭐', '᭙'),
278278
('\u{1b6b}', '\u{1b73}'),
279-
('\u{1b80}', ''),
279+
('\u{1b80}', '\u{1bf3}'),
280280
('ᰀ', '\u{1c37}'),
281281
('᱀', '᱉'),
282282
('ᱍ', 'ᱽ'),
283-
('ᲀ', ''),
283+
('ᲀ', ''),
284284
('Ა', 'Ჺ'),
285285
('Ჽ', 'Ჿ'),
286286
('\u{1cd0}', '\u{1cd2}'),
@@ -367,10 +367,10 @@ pub const PERL_WORD: &'static [(char, char)] = &[
367367
('ꙿ', '\u{a6f1}'),
368368
('ꜗ', 'ꜟ'),
369369
('Ꜣ', 'ꞈ'),
370-
('Ꞌ', ''),
370+
('Ꞌ', ''),
371371
('Ꟑ', 'ꟑ'),
372372
('ꟓ', 'ꟓ'),
373-
('ꟕ', ''),
373+
('ꟕ', ''),
374374
('ꟲ', 'ꠧ'),
375375
('\u{a82c}', '\u{a82c}'),
376376
('ꡀ', 'ꡳ'),
@@ -379,9 +379,9 @@ pub const PERL_WORD: &'static [(char, char)] = &[
379379
('\u{a8e0}', 'ꣷ'),
380380
('ꣻ', 'ꣻ'),
381381
('ꣽ', '\u{a92d}'),
382-
('ꤰ', ''),
382+
('ꤰ', '\u{a953}'),
383383
('ꥠ', 'ꥼ'),
384-
('\u{a980}', ''),
384+
('\u{a980}', '\u{a9c0}'),
385385
('ꧏ', '꧙'),
386386
('ꧠ', 'ꧾ'),
387387
('ꨀ', '\u{aa36}'),
@@ -468,6 +468,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
468468
('𐖣', '𐖱'),
469469
('𐖳', '𐖹'),
470470
('𐖻', '𐖼'),
471+
('𐗀', '𐗳'),
471472
('𐘀', '𐜶'),
472473
('𐝀', '𐝕'),
473474
('𐝠', '𐝧'),
@@ -508,10 +509,14 @@ pub const PERL_WORD: &'static [(char, char)] = &[
508509
('𐳀', '𐳲'),
509510
('𐴀', '\u{10d27}'),
510511
('𐴰', '𐴹'),
512+
('𐵀', '𐵥'),
513+
('\u{10d69}', '\u{10d6d}'),
514+
('𐵯', '𐶅'),
511515
('𐺀', '𐺩'),
512516
('\u{10eab}', '\u{10eac}'),
513517
('𐺰', '𐺱'),
514-
('\u{10efd}', '𐼜'),
518+
('𐻂', '𐻄'),
519+
('\u{10efc}', '𐼜'),
515520
('𐼧', '𐼧'),
516521
('𐼰', '\u{10f50}'),
517522
('𐽰', '\u{10f85}'),
@@ -551,12 +556,22 @@ pub const PERL_WORD: &'static [(char, char)] = &[
551556
('𑌵', '𑌹'),
552557
('\u{1133b}', '𑍄'),
553558
('𑍇', '𑍈'),
554-
('𑍋', '𑍍'),
559+
('𑍋', '\u{1134d}'),
555560
('𑍐', '𑍐'),
556561
('\u{11357}', '\u{11357}'),
557562
('𑍝', '𑍣'),
558563
('\u{11366}', '\u{1136c}'),
559564
('\u{11370}', '\u{11374}'),
565+
('𑎀', '𑎉'),
566+
('𑎋', '𑎋'),
567+
('𑎎', '𑎎'),
568+
('𑎐', '𑎵'),
569+
('𑎷', '\u{113c0}'),
570+
('\u{113c2}', '\u{113c2}'),
571+
('\u{113c5}', '\u{113c5}'),
572+
('\u{113c7}', '𑏊'),
573+
('𑏌', '𑏓'),
574+
('\u{113e1}', '\u{113e2}'),
560575
('𑐀', '𑑊'),
561576
('𑑐', '𑑙'),
562577
('\u{1145e}', '𑑡'),
@@ -571,6 +586,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
571586
('𑙐', '𑙙'),
572587
('𑚀', '𑚸'),
573588
('𑛀', '𑛉'),
589+
('𑛐', '𑛣'),
574590
('𑜀', '𑜚'),
575591
('\u{1171d}', '\u{1172b}'),
576592
('𑜰', '𑜹'),
@@ -594,6 +610,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[
594610
('𑩐', '\u{11a99}'),
595611
('𑪝', '𑪝'),
596612
('𑪰', '𑫸'),
613+
('𑯀', '𑯠'),
614+
('𑯰', '𑯹'),
597615
('𑰀', '𑰈'),
598616
('𑰊', '\u{11c36}'),
599617
('\u{11c38}', '𑱀'),
@@ -618,15 +636,17 @@ pub const PERL_WORD: &'static [(char, char)] = &[
618636
('\u{11f00}', '𑼐'),
619637
('𑼒', '\u{11f3a}'),
620638
('𑼾', '\u{11f42}'),
621-
('𑽐', '𑽙'),
639+
('𑽐', '\u{11f5a}'),
622640
('𑾰', '𑾰'),
623641
('𒀀', '𒎙'),
624642
('𒐀', '𒑮'),
625643
('𒒀', '𒕃'),
626644
('𒾐', '𒿰'),
627645
('𓀀', '𓐯'),
628646
('\u{13440}', '\u{13455}'),
647+
('𓑠', '𔏺'),
629648
('𔐀', '𔙆'),
649+
('𖄀', '𖄹'),
630650
('𖠀', '𖨸'),
631651
('𖩀', '𖩞'),
632652
('𖩠', '𖩩'),
@@ -639,16 +659,18 @@ pub const PERL_WORD: &'static [(char, char)] = &[
639659
('𖭐', '𖭙'),
640660
('𖭣', '𖭷'),
641661
('𖭽', '𖮏'),
662+
('𖵀', '𖵬'),
663+
('𖵰', '𖵹'),
642664
('𖹀', '𖹿'),
643665
('𖼀', '𖽊'),
644666
('\u{16f4f}', '𖾇'),
645667
('\u{16f8f}', '𖾟'),
646668
('𖿠', '𖿡'),
647669
('𖿣', '\u{16fe4}'),
648-
('𖿰', '𖿱'),
670+
('\u{16ff0}', '\u{16ff1}'),
649671
('𗀀', '𘟷'),
650672
('𘠀', '𘳕'),
651-
('𘴀', '𘴈'),
673+
('𘳿', '𘴈'),
652674
('𚿰', '𚿳'),
653675
('𚿵', '𚿻'),
654676
('𚿽', '𚿾'),
@@ -663,10 +685,11 @@ pub const PERL_WORD: &'static [(char, char)] = &[
663685
('𛲀', '𛲈'),
664686
('𛲐', '𛲙'),
665687
('\u{1bc9d}', '\u{1bc9e}'),
688+
('𜳰', '𜳹'),
666689
('\u{1cf00}', '\u{1cf2d}'),
667690
('\u{1cf30}', '\u{1cf46}'),
668691
('\u{1d165}', '\u{1d169}'),
669-
('𝅭', '\u{1d172}'),
692+
('\u{1d16d}', '\u{1d172}'),
670693
('\u{1d17b}', '\u{1d182}'),
671694
('\u{1d185}', '\u{1d18b}'),
672695
('\u{1d1aa}', '\u{1d1ad}'),
@@ -724,6 +747,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
724747
('𞊐', '\u{1e2ae}'),
725748
('𞋀', '𞋹'),
726749
('𞓐', '𞓹'),
750+
('𞗐', '𞗺'),
727751
('𞟠', '𞟦'),
728752
('𞟨', '𞟫'),
729753
('𞟭', '𞟮'),
@@ -774,6 +798,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
774798
('𫝀', '𫠝'),
775799
('𫠠', '𬺡'),
776800
('𬺰', '𮯠'),
801+
('𮯰', '𮹝'),
777802
('丽', '𪘀'),
778803
('𰀀', '𱍊'),
779804
('𱍐', '𲎯'),

regex-syntax/src/hir/translate.rs

+21
Original file line numberDiff line numberDiff line change
@@ -3143,10 +3143,31 @@ mod tests {
31433143
#[cfg(feature = "unicode-script")]
31443144
assert_eq!(
31453145
t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
3146+
// Class({
3147+
// '·'..='·',
3148+
// '\u{300}'..='\u{301}',
3149+
// '\u{304}'..='\u{304}',
3150+
// '\u{306}'..='\u{306}',
3151+
// '\u{308}'..='\u{308}',
3152+
// '\u{313}'..='\u{313}',
3153+
// '\u{342}'..='\u{342}',
3154+
// '\u{345}'..='\u{345}',
3155+
// 'ʹ'..='ʹ',
3156+
// '\u{1dc0}'..='\u{1dc1}',
3157+
// '⁝'..='⁝',
3158+
// })
31463159
hir_uclass(&[
3160+
('·', '·'),
3161+
('\u{0300}', '\u{0301}'),
3162+
('\u{0304}', '\u{0304}'),
3163+
('\u{0306}', '\u{0306}'),
3164+
('\u{0308}', '\u{0308}'),
3165+
('\u{0313}', '\u{0313}'),
31473166
('\u{0342}', '\u{0342}'),
31483167
('\u{0345}', '\u{0345}'),
3168+
('ʹ', 'ʹ'),
31493169
('\u{1DC0}', '\u{1DC1}'),
3170+
('⁝', '⁝'),
31503171
])
31513172
);
31523173
assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));

regex-syntax/src/unicode.rs

+2
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,8 @@ fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>, Error> {
675675
("V13_0", age::V13_0),
676676
("V14_0", age::V14_0),
677677
("V15_0", age::V15_0),
678+
("V15_1", age::V15_1),
679+
("V16_0", age::V16_0),
678680
];
679681
assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync");
680682

0 commit comments

Comments
 (0)