Skip to content

Commit a2a4db7

Browse files
committed
unicode: upgrade to version 9.0.0
Changes beyond generated tables: - Now supports aliases to handle deprecated property classes. - Some Mongolian letters are now modifiers. Other changes: - strconv: newly generated table to be in sync - regexp/syntax: updated maxFold Fixes #16191 Change-Id: I56bdf21ee2f775f2a82d0465b3772faf5c24cb61 Reviewed-on: https://go-review.googlesource.com/24496 Run-TryBot: Marcel van Lohuizen <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]>
1 parent ed9362f commit a2a4db7

File tree

8 files changed

+473
-137
lines changed

8 files changed

+473
-137
lines changed

api/except.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,3 +329,4 @@ pkg syscall (netbsd-arm-cgo), const SizeofIfData = 132
329329
pkg syscall (netbsd-arm-cgo), type IfMsghdr struct, Pad_cgo_1 [4]uint8
330330
pkg unicode, const Version = "6.3.0"
331331
pkg unicode, const Version = "7.0.0"
332+
pkg unicode, const Version = "8.0.0"

api/go1.7.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,3 +274,12 @@ pkg syscall (linux-arm-cgo), type SysProcAttr struct, Unshareflags uintptr
274274
pkg testing, method (*B) Run(string, func(*B)) bool
275275
pkg testing, method (*T) Run(string, func(*T)) bool
276276
pkg testing, type InternalExample struct, Unordered bool
277+
pkg unicode, const Version = "9.0.0"
278+
pkg unicode, var Adlam *RangeTable
279+
pkg unicode, var Bhaiksuki *RangeTable
280+
pkg unicode, var Marchen *RangeTable
281+
pkg unicode, var Newa *RangeTable
282+
pkg unicode, var Osage *RangeTable
283+
pkg unicode, var Prepended_Concatenation_Mark *RangeTable
284+
pkg unicode, var Sentence_Terminal *RangeTable
285+
pkg unicode, var Tangut *RangeTable

src/regexp/syntax/parse.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1692,7 +1692,7 @@ const (
16921692
// minimum and maximum runes involved in folding.
16931693
// checked during test.
16941694
minFold = 0x0041
1695-
maxFold = 0x118df
1695+
maxFold = 0x1e943
16961696
)
16971697

16981698
// appendFoldedRange returns the result of appending the range lo-hi

src/strconv/isprint.go

Lines changed: 54 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
package strconv
99

10-
// (470+136+73)*2 + (342)*4 = 2726 bytes
10+
// (462+139+82)*2 + (378)*4 = 2878 bytes
1111

1212
var isPrint16 = []uint16{
1313
0x0020, 0x007e,
@@ -26,8 +26,8 @@ var isPrint16 = []uint16{
2626
0x0800, 0x082d,
2727
0x0830, 0x085b,
2828
0x085e, 0x085e,
29-
0x08a0, 0x08b4,
30-
0x08e3, 0x098c,
29+
0x08a0, 0x08bd,
30+
0x08d4, 0x098c,
3131
0x098f, 0x0990,
3232
0x0993, 0x09b2,
3333
0x09b6, 0x09b9,
@@ -83,11 +83,9 @@ var isPrint16 = []uint16{
8383
0x0cde, 0x0ce3,
8484
0x0ce6, 0x0cf2,
8585
0x0d01, 0x0d3a,
86-
0x0d3d, 0x0d4e,
87-
0x0d57, 0x0d57,
88-
0x0d5f, 0x0d63,
89-
0x0d66, 0x0d75,
90-
0x0d79, 0x0d7f,
86+
0x0d3d, 0x0d4f,
87+
0x0d54, 0x0d63,
88+
0x0d66, 0x0d7f,
9189
0x0d82, 0x0d96,
9290
0x0d9a, 0x0dbd,
9391
0x0dc0, 0x0dc6,
@@ -153,11 +151,11 @@ var isPrint16 = []uint16{
153151
0x1b80, 0x1bf3,
154152
0x1bfc, 0x1c37,
155153
0x1c3b, 0x1c49,
156-
0x1c4d, 0x1c7f,
154+
0x1c4d, 0x1c88,
157155
0x1cc0, 0x1cc7,
158156
0x1cd0, 0x1cf9,
159157
0x1d00, 0x1df5,
160-
0x1dfc, 0x1f15,
158+
0x1dfb, 0x1f15,
161159
0x1f18, 0x1f1d,
162160
0x1f20, 0x1f45,
163161
0x1f48, 0x1f4d,
@@ -172,8 +170,7 @@ var isPrint16 = []uint16{
172170
0x20a0, 0x20be,
173171
0x20d0, 0x20f0,
174172
0x2100, 0x218b,
175-
0x2190, 0x23fa,
176-
0x2400, 0x2426,
173+
0x2190, 0x2426,
177174
0x2440, 0x244a,
178175
0x2460, 0x2b73,
179176
0x2b76, 0x2b95,
@@ -186,7 +183,7 @@ var isPrint16 = []uint16{
186183
0x2d30, 0x2d67,
187184
0x2d6f, 0x2d70,
188185
0x2d7f, 0x2d96,
189-
0x2da0, 0x2e42,
186+
0x2da0, 0x2e44,
190187
0x2e80, 0x2ef3,
191188
0x2f00, 0x2fd5,
192189
0x2ff0, 0x2ffb,
@@ -201,12 +198,11 @@ var isPrint16 = []uint16{
201198
0xa490, 0xa4c6,
202199
0xa4d0, 0xa62b,
203200
0xa640, 0xa6f7,
204-
0xa700, 0xa7ad,
205-
0xa7b0, 0xa7b7,
201+
0xa700, 0xa7b7,
206202
0xa7f7, 0xa82b,
207203
0xa830, 0xa839,
208204
0xa840, 0xa877,
209-
0xa880, 0xa8c4,
205+
0xa880, 0xa8c5,
210206
0xa8ce, 0xa8d9,
211207
0xa8e0, 0xa8fd,
212208
0xa900, 0xa953,
@@ -258,6 +254,8 @@ var isNotPrint16 = []uint16{
258254
0x0590,
259255
0x06dd,
260256
0x083f,
257+
0x08b5,
258+
0x08e2,
261259
0x0984,
262260
0x09a9,
263261
0x09b1,
@@ -294,7 +292,6 @@ var isNotPrint16 = []uint16{
294292
0x0c45,
295293
0x0c49,
296294
0x0c57,
297-
0x0c80,
298295
0x0c84,
299296
0x0c8d,
300297
0x0c91,
@@ -354,6 +351,7 @@ var isNotPrint16 = []uint16{
354351
0x1fdc,
355352
0x1ff5,
356353
0x208f,
354+
0x23ff,
357355
0x2bc9,
358356
0x2c2f,
359357
0x2c5f,
@@ -371,6 +369,7 @@ var isNotPrint16 = []uint16{
371369
0x318f,
372370
0x321f,
373371
0x32ff,
372+
0xa7af,
374373
0xa9ce,
375374
0xa9ff,
376375
0xab27,
@@ -392,8 +391,7 @@ var isPrint32 = []uint32{
392391
0x010080, 0x0100fa,
393392
0x010100, 0x010102,
394393
0x010107, 0x010133,
395-
0x010137, 0x01018c,
396-
0x010190, 0x01019b,
394+
0x010137, 0x01019b,
397395
0x0101a0, 0x0101a0,
398396
0x0101d0, 0x0101fd,
399397
0x010280, 0x01029c,
@@ -406,6 +404,8 @@ var isPrint32 = []uint32{
406404
0x0103c8, 0x0103d5,
407405
0x010400, 0x01049d,
408406
0x0104a0, 0x0104a9,
407+
0x0104b0, 0x0104d3,
408+
0x0104d8, 0x0104fb,
409409
0x010500, 0x010527,
410410
0x010530, 0x010563,
411411
0x01056f, 0x01056f,
@@ -451,7 +451,7 @@ var isPrint32 = []uint32{
451451
0x011150, 0x011176,
452452
0x011180, 0x0111cd,
453453
0x0111d0, 0x0111f4,
454-
0x011200, 0x01123d,
454+
0x011200, 0x01123e,
455455
0x011280, 0x0112a9,
456456
0x0112b0, 0x0112ea,
457457
0x0112f0, 0x0112f9,
@@ -466,12 +466,14 @@ var isPrint32 = []uint32{
466466
0x01135d, 0x011363,
467467
0x011366, 0x01136c,
468468
0x011370, 0x011374,
469+
0x011400, 0x01145d,
469470
0x011480, 0x0114c7,
470471
0x0114d0, 0x0114d9,
471472
0x011580, 0x0115b5,
472473
0x0115b8, 0x0115dd,
473474
0x011600, 0x011644,
474475
0x011650, 0x011659,
476+
0x011660, 0x01166c,
475477
0x011680, 0x0116b7,
476478
0x0116c0, 0x0116c9,
477479
0x011700, 0x011719,
@@ -480,6 +482,10 @@ var isPrint32 = []uint32{
480482
0x0118a0, 0x0118f2,
481483
0x0118ff, 0x0118ff,
482484
0x011ac0, 0x011af8,
485+
0x011c00, 0x011c45,
486+
0x011c50, 0x011c6c,
487+
0x011c70, 0x011c8f,
488+
0x011c92, 0x011cb6,
483489
0x012000, 0x012399,
484490
0x012400, 0x012474,
485491
0x012480, 0x012543,
@@ -496,6 +502,9 @@ var isPrint32 = []uint32{
496502
0x016f00, 0x016f44,
497503
0x016f50, 0x016f7e,
498504
0x016f8f, 0x016f9f,
505+
0x016fe0, 0x016fe0,
506+
0x017000, 0x0187ec,
507+
0x018800, 0x018af2,
499508
0x01b000, 0x01b001,
500509
0x01bc00, 0x01bc6a,
501510
0x01bc70, 0x01bc7c,
@@ -518,8 +527,13 @@ var isPrint32 = []uint32{
518527
0x01d6a8, 0x01d7cb,
519528
0x01d7ce, 0x01da8b,
520529
0x01da9b, 0x01daaf,
530+
0x01e000, 0x01e018,
531+
0x01e01b, 0x01e02a,
521532
0x01e800, 0x01e8c4,
522533
0x01e8c7, 0x01e8d6,
534+
0x01e900, 0x01e94a,
535+
0x01e950, 0x01e959,
536+
0x01e95e, 0x01e95f,
523537
0x01ee00, 0x01ee24,
524538
0x01ee27, 0x01ee3b,
525539
0x01ee42, 0x01ee42,
@@ -534,23 +548,26 @@ var isPrint32 = []uint32{
534548
0x01f0b1, 0x01f0f5,
535549
0x01f100, 0x01f10c,
536550
0x01f110, 0x01f16b,
537-
0x01f170, 0x01f19a,
551+
0x01f170, 0x01f1ac,
538552
0x01f1e6, 0x01f202,
539-
0x01f210, 0x01f23a,
553+
0x01f210, 0x01f23b,
540554
0x01f240, 0x01f248,
541555
0x01f250, 0x01f251,
542-
0x01f300, 0x01f6d0,
556+
0x01f300, 0x01f6d2,
543557
0x01f6e0, 0x01f6ec,
544-
0x01f6f0, 0x01f6f3,
558+
0x01f6f0, 0x01f6f6,
545559
0x01f700, 0x01f773,
546560
0x01f780, 0x01f7d4,
547561
0x01f800, 0x01f80b,
548562
0x01f810, 0x01f847,
549563
0x01f850, 0x01f859,
550564
0x01f860, 0x01f887,
551565
0x01f890, 0x01f8ad,
552-
0x01f910, 0x01f918,
553-
0x01f980, 0x01f984,
566+
0x01f910, 0x01f927,
567+
0x01f930, 0x01f930,
568+
0x01f933, 0x01f94b,
569+
0x01f950, 0x01f95e,
570+
0x01f980, 0x01f991,
554571
0x01f9c0, 0x01f9c0,
555572
0x020000, 0x02a6d6,
556573
0x02a700, 0x02b734,
@@ -565,6 +582,7 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
565582
0x0027,
566583
0x003b,
567584
0x003e,
585+
0x018f,
568586
0x039e,
569587
0x0809,
570588
0x0836,
@@ -585,6 +603,11 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
585603
0x1329,
586604
0x1331,
587605
0x1334,
606+
0x145a,
607+
0x145c,
608+
0x1c09,
609+
0x1c37,
610+
0x1ca8,
588611
0x246f,
589612
0x6a5f,
590613
0x6b5a,
@@ -603,6 +626,9 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
603626
0xd545,
604627
0xd551,
605628
0xdaa0,
629+
0xe007,
630+
0xe022,
631+
0xe025,
606632
0xee04,
607633
0xee20,
608634
0xee23,
@@ -632,8 +658,8 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
632658
0xf0c0,
633659
0xf0d0,
634660
0xf12f,
635-
0xf57a,
636-
0xf5a4,
661+
0xf91f,
662+
0xf93f,
637663
}
638664

639665
// isGraphic lists the graphic runes not matched by IsPrint.

src/unicode/letter_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ var letterTest = []rune{
7373
0x1200,
7474
0x1312,
7575
0x1401,
76-
0x1885,
7776
0x2c00,
7877
0xa800,
7978
0xf900,
@@ -94,6 +93,7 @@ var notletterTest = []rune{
9493
0x375,
9594
0x619,
9695
0x700,
96+
0x1885,
9797
0xfffe,
9898
0x1ffff,
9999
0x10ffff,

src/unicode/maketables.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ func main() {
4444
var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt")
4545
var casefoldingURL = flag.String("casefolding", "", "full URL for CaseFolding.txt; defaults to --url/CaseFolding.txt")
4646
var url = flag.String("url",
47-
"http://www.unicode.org/Public/8.0.0/ucd/",
47+
"http://www.unicode.org/Public/9.0.0/ucd/",
4848
"URL of Unicode database directory")
4949
var tablelist = flag.String("tables",
5050
"all",
@@ -743,6 +743,10 @@ func fullScriptTest(list []string, installed map[string]*unicode.RangeTable, scr
743743
}
744744
}
745745

746+
var deprecatedAliases = map[string]string{
747+
"Sentence_Terminal": "STerm",
748+
}
749+
746750
// PropList.txt has the same format as Scripts.txt so we can share its parser.
747751
func printScriptOrProperty(doProps bool) {
748752
flag := "scripts"
@@ -797,11 +801,14 @@ func printScriptOrProperty(doProps bool) {
797801
}
798802
for _, k := range all(table) {
799803
printf("\t%q: %s,\n", k, k)
804+
if alias, ok := deprecatedAliases[k]; ok {
805+
printf("\t%q: %s,\n", alias, k)
806+
}
800807
}
801808
print("}\n\n")
802809
}
803810

804-
decl := make(sort.StringSlice, len(list))
811+
decl := make(sort.StringSlice, len(list)+len(deprecatedAliases))
805812
ndecl := 0
806813
for _, name := range list {
807814
if doProps {
@@ -814,6 +821,12 @@ func printScriptOrProperty(doProps bool) {
814821
name, name, name, name)
815822
}
816823
ndecl++
824+
if alias, ok := deprecatedAliases[name]; ok {
825+
decl[ndecl] = fmt.Sprintf(
826+
"\t%[1]s = _%[2]s;\t// %[1]s is an alias for %[2]s.\n",
827+
alias, name)
828+
ndecl++
829+
}
817830
printf("var _%s = &RangeTable {\n", name)
818831
ranges := foldAdjacent(table[name])
819832
print("\tR16: []Range16{\n")

0 commit comments

Comments
 (0)