Skip to content

Commit 58d0aad

Browse files
committed
mb_detect_encoding recognizes all letters in Hungarian alphabet
1 parent 6a4b6d2 commit 58d0aad

File tree

3 files changed

+6
-1
lines changed

3 files changed

+6
-1
lines changed

ext/mbstring/common_codepoints.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@
88
0x0118 0x011B # Polish, Czech
99
0x0141 0x0144 # Polish
1010
0x0147 0x0148 # Czech
11+
0x0150 0x0151 # Hungarian
1112
0x0158 0x015B # Czech, Polish
1213
0x0160 0x0161 # Used in Slavic names
1314
0x0164 0x0165 # Czech
1415
0x016E 0x016F # Czech
16+
0x0170 0x0171 # Hungarian
1517
0x0179 0x017E # Polish, Czech, other Slavic languages
1618
0x0300 0x030A # Diacritical marks
1719
0x0370 0x0377 # Greek

ext/mbstring/rare_cp_bitvec.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
static uint32_t rare_codepoint_bitvec[] = {
1313
0xffffd9ff, 0x00000000, 0x00000000, 0x80000000, 0xffffffff, 0x00002001, 0x00000000, 0x00000000,
14-
0xf0ff0f0f, 0xffffffff, 0xf0fffe61, 0x81ff3fcc, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
14+
0xf0ff0f0f, 0xffffffff, 0xf0fcfe61, 0x81fc3fcc, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
1515
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
1616
0xfffff800, 0xffffffff, 0xffffffff, 0x0300ffff, 0x0000280f, 0x00000004, 0x00000000, 0x00000000,
1717
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,

ext/mbstring/tests/mb_detect_encoding.phpt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ $euc_jp = "\xC6\xFC\xCB\xDC\xB8\xEC\xA5\xC6\xA5\xAD\xA5\xB9\xA5\xC8\xA4\xC7\xA4\
1515
// UTF-8
1616
$polish1 = "Zażółć gęślą jaźń.";
1717
$polish2 = "Wół poszedł spać bardzo wcześnie. A to zdanie bez ogonka.";
18+
$hungarian = "Árvíztűrő tükörfúrógép";
1819

1920
echo "== BASIC TEST ==\n";
2021

@@ -309,6 +310,8 @@ $czechEncodings = [
309310
];
310311
test($czechStrings, $czechEncodings);
311312

313+
test([$hungarian], ['UTF-8', 'UTF-16', 'Windows-1252']);
314+
312315
echo "Done!\n";
313316

314317
?>

0 commit comments

Comments
 (0)