Skip to content

Commit 0da37c6

Browse files
committed
Code cleanup in mb_str_split
- Make everything a bit more concise - Remove trivial comments - Improve grammar of remaining comments
1 parent 212dc4b commit 0da37c6

File tree

1 file changed

+28
-51
lines changed

1 file changed

+28
-51
lines changed

ext/mbstring/mbstring.c

Lines changed: 28 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1747,9 +1747,7 @@ PHP_FUNCTION(mb_str_split)
17471747
{
17481748
zend_string *str, *encoding = NULL;
17491749
size_t mb_len, chunks, chunk_len;
1750-
const char *p, *last; /* pointer for the string cursor and last string char */
17511750
mbfl_string string, result_string;
1752-
const mbfl_encoding *mbfl_encoding;
17531751
zend_long split_length = 1;
17541752

17551753
ZEND_PARSE_PARAMETERS_START(1, 3)
@@ -1764,20 +1762,17 @@ PHP_FUNCTION(mb_str_split)
17641762
RETURN_THROWS();
17651763
}
17661764

1767-
/* fill mbfl_string structure */
17681765
string.val = (unsigned char *) ZSTR_VAL(str);
17691766
string.len = ZSTR_LEN(str);
1770-
string.encoding = php_mb_get_encoding(encoding, 3);
1767+
const mbfl_encoding *mbfl_encoding = string.encoding = php_mb_get_encoding(encoding, 3);
17711768
if (!string.encoding) {
17721769
RETURN_THROWS();
17731770
}
17741771

1775-
p = ZSTR_VAL(str); /* string cursor pointer */
1776-
last = ZSTR_VAL(str) + ZSTR_LEN(str); /* last string char pointer */
1777-
1778-
mbfl_encoding = string.encoding;
1772+
const char *p = ZSTR_VAL(str);
1773+
const char *last = ZSTR_VAL(str) + ZSTR_LEN(str);
17791774

1780-
/* first scenario: 1,2,4-bytes fixed width encodings (head part) */
1775+
/* first scenario: 1/2/4-byte fixed width encoding */
17811776
if (mbfl_encoding->flag & MBFL_ENCTYPE_SBCS) { /* 1 byte */
17821777
mb_len = string.len;
17831778
chunk_len = (size_t)split_length; /* chunk length in bytes */
@@ -1787,46 +1782,37 @@ PHP_FUNCTION(mb_str_split)
17871782
} else if (mbfl_encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { /* 4 bytes */
17881783
mb_len = string.len / 4;
17891784
chunk_len = split_length * 4;
1790-
} else if (mbfl_encoding->mblen_table != NULL) {
1791-
/* second scenario: variable width encodings with length table */
1792-
char unsigned const *mbtab = mbfl_encoding->mblen_table;
1785+
} else if (mbfl_encoding->mblen_table) {
1786+
/* second scenario: variable width encoding with length table */
1787+
const unsigned char *mbtab = mbfl_encoding->mblen_table;
17931788

1794-
/* assume that we have 1-bytes characters */
1789+
/* assume that we have 1-byte characters */
17951790
array_init_size(return_value, (string.len + split_length) / split_length); /* round up */
17961791

1797-
while (p < last) { /* split cycle work until the cursor has reached the last byte */
1798-
char const *chunk_p = p; /* chunk first byte pointer */
1799-
chunk_len = 0; /* chunk length in bytes */
1800-
zend_long char_count;
1792+
while (p < last) {
1793+
char *chunk_p = p; /* pointer to first byte in chunk */
18011794

1802-
for (char_count = 0; char_count < split_length && p < last; ++char_count) {
1803-
char unsigned const m = mbtab[*(const unsigned char *)p]; /* single character length table */
1804-
chunk_len += m;
1805-
p += m;
1795+
for (int char_count = 0; char_count < split_length && p < last; char_count++) {
1796+
p += mbtab[*(unsigned char*)p]; /* character byte length table */
18061797
}
1807-
if (p >= last) chunk_len -= p - last; /* check if chunk is in bounds */
1808-
add_next_index_stringl(return_value, chunk_p, chunk_len);
1798+
if (p > last) { /* check if chunk is in bounds */
1799+
p = last;
1800+
}
1801+
add_next_index_stringl(return_value, chunk_p, p - chunk_p);
18091802
}
18101803
return;
18111804
} else {
18121805
/* third scenario: other multibyte encodings */
1813-
mbfl_convert_filter *filter, *decoder;
1814-
1815-
/* assume that we have 1-bytes characters */
1806+
/* assume that we have 1-byte characters */
18161807
array_init_size(return_value, (string.len + split_length) / split_length); /* round up */
18171808

18181809
/* decoder filter to decode wchar to encoding */
18191810
mbfl_memory_device device;
18201811
mbfl_memory_device_init(&device, split_length + 1, 0);
18211812

1822-
decoder = mbfl_convert_filter_new(
1823-
&mbfl_encoding_wchar,
1824-
string.encoding,
1825-
mbfl_memory_device_output,
1826-
NULL,
1827-
&device);
1828-
/* assert that nothing is wrong with the decoder */
1829-
ZEND_ASSERT(decoder != NULL);
1813+
mbfl_convert_filter *decoder = mbfl_convert_filter_new(&mbfl_encoding_wchar, string.encoding,
1814+
mbfl_memory_device_output, NULL, &device);
1815+
ZEND_ASSERT(decoder);
18301816

18311817
/* wchar filter */
18321818
mbfl_string_init(&result_string); /* mbfl_string to store chunk in the callback */
@@ -1838,14 +1824,9 @@ PHP_FUNCTION(mb_str_split)
18381824
.next_filter = decoder,
18391825
};
18401826

1841-
filter = mbfl_convert_filter_new(
1842-
string.encoding,
1843-
&mbfl_encoding_wchar,
1844-
mbfl_split_output,
1845-
NULL,
1846-
&params);
1847-
/* assert that nothing is wrong with the filter */
1848-
ZEND_ASSERT(filter != NULL);
1827+
mbfl_convert_filter *filter = mbfl_convert_filter_new(string.encoding, &mbfl_encoding_wchar,
1828+
mbfl_split_output, NULL, &params);
1829+
ZEND_ASSERT(filter);
18491830

18501831
while (p < last - 1) { /* cycle each byte except last with callback function */
18511832
(*filter->filter_function)(*p++, filter);
@@ -1859,16 +1840,12 @@ PHP_FUNCTION(mb_str_split)
18591840
return;
18601841
}
18611842

1862-
/* first scenario: 1,2,4-bytes fixed width encodings (tail part) */
1863-
chunks = (mb_len + split_length - 1) / split_length; /* (round up idiom) */
1843+
/* first scenario: 1/2/4-byte fixed width encoding */
1844+
chunks = (mb_len + split_length - 1) / split_length; /* round up */
18641845
array_init_size(return_value, chunks);
1865-
if (chunks != 0) {
1866-
zend_long i;
1867-
1868-
for (i = 0; i < chunks - 1; p += chunk_len, ++i) {
1869-
add_next_index_stringl(return_value, p, chunk_len);
1870-
}
1871-
add_next_index_stringl(return_value, p, last - p);
1846+
while (chunks--) {
1847+
add_next_index_stringl(return_value, p, chunk_len);
1848+
p += chunk_len;
18721849
}
18731850
}
18741851
/* }}} */

0 commit comments

Comments
 (0)