Skip to content

Commit 3297f92

Browse files
committed
Code cleanup in mb_str_split
- Make everything a bit more concise - Remove trivial comments - Improve grammar of remaining comments
1 parent a46c8bc commit 3297f92

File tree

1 file changed

+28
-51
lines changed

1 file changed

+28
-51
lines changed

ext/mbstring/mbstring.c

Lines changed: 28 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1746,9 +1746,7 @@ PHP_FUNCTION(mb_str_split)
17461746
{
17471747
zend_string *str, *encoding = NULL;
17481748
size_t mb_len, chunks, chunk_len;
1749-
const char *p, *last; /* pointer for the string cursor and last string char */
17501749
mbfl_string string, result_string;
1751-
const mbfl_encoding *mbfl_encoding;
17521750
zend_long split_length = 1;
17531751

17541752
ZEND_PARSE_PARAMETERS_START(1, 3)
@@ -1763,20 +1761,17 @@ PHP_FUNCTION(mb_str_split)
17631761
RETURN_THROWS();
17641762
}
17651763

1766-
/* fill mbfl_string structure */
17671764
string.val = (unsigned char *) ZSTR_VAL(str);
17681765
string.len = ZSTR_LEN(str);
1769-
string.encoding = php_mb_get_encoding(encoding, 3);
1766+
const mbfl_encoding *mbfl_encoding = string.encoding = php_mb_get_encoding(encoding, 3);
17701767
if (!string.encoding) {
17711768
RETURN_THROWS();
17721769
}
17731770

1774-
p = ZSTR_VAL(str); /* string cursor pointer */
1775-
last = ZSTR_VAL(str) + ZSTR_LEN(str); /* last string char pointer */
1776-
1777-
mbfl_encoding = string.encoding;
1771+
const char *p = ZSTR_VAL(str);
1772+
const char *last = ZSTR_VAL(str) + ZSTR_LEN(str);
17781773

1779-
/* first scenario: 1,2,4-bytes fixed width encodings (head part) */
1774+
/* first scenario: 1/2/4-byte fixed width encoding */
17801775
if (mbfl_encoding->flag & MBFL_ENCTYPE_SBCS) { /* 1 byte */
17811776
mb_len = string.len;
17821777
chunk_len = (size_t)split_length; /* chunk length in bytes */
@@ -1786,46 +1781,37 @@ PHP_FUNCTION(mb_str_split)
17861781
} else if (mbfl_encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { /* 4 bytes */
17871782
mb_len = string.len / 4;
17881783
chunk_len = split_length * 4;
1789-
} else if (mbfl_encoding->mblen_table != NULL) {
1790-
/* second scenario: variable width encodings with length table */
1791-
char unsigned const *mbtab = mbfl_encoding->mblen_table;
1784+
} else if (mbfl_encoding->mblen_table) {
1785+
/* second scenario: variable width encoding with length table */
1786+
const unsigned char *mbtab = mbfl_encoding->mblen_table;
17921787

1793-
/* assume that we have 1-bytes characters */
1788+
/* assume that we have 1-byte characters */
17941789
array_init_size(return_value, (string.len + split_length) / split_length); /* round up */
17951790

1796-
while (p < last) { /* split cycle work until the cursor has reached the last byte */
1797-
char const *chunk_p = p; /* chunk first byte pointer */
1798-
chunk_len = 0; /* chunk length in bytes */
1799-
zend_long char_count;
1791+
while (p < last) {
1792+
char *chunk_p = p; /* pointer to first byte in chunk */
18001793

1801-
for (char_count = 0; char_count < split_length && p < last; ++char_count) {
1802-
char unsigned const m = mbtab[*(const unsigned char *)p]; /* single character length table */
1803-
chunk_len += m;
1804-
p += m;
1794+
for (int char_count = 0; char_count < split_length && p < last; char_count++) {
1795+
p += mbtab[*(unsigned char*)p]; /* character byte length table */
18051796
}
1806-
if (p >= last) chunk_len -= p - last; /* check if chunk is in bounds */
1807-
add_next_index_stringl(return_value, chunk_p, chunk_len);
1797+
if (p > last) { /* check if chunk is in bounds */
1798+
p = last;
1799+
}
1800+
add_next_index_stringl(return_value, chunk_p, p - chunk_p);
18081801
}
18091802
return;
18101803
} else {
18111804
/* third scenario: other multibyte encodings */
1812-
mbfl_convert_filter *filter, *decoder;
1813-
1814-
/* assume that we have 1-bytes characters */
1805+
/* assume that we have 1-byte characters */
18151806
array_init_size(return_value, (string.len + split_length) / split_length); /* round up */
18161807

18171808
/* decoder filter to decode wchar to encoding */
18181809
mbfl_memory_device device;
18191810
mbfl_memory_device_init(&device, split_length + 1, 0);
18201811

1821-
decoder = mbfl_convert_filter_new(
1822-
&mbfl_encoding_wchar,
1823-
string.encoding,
1824-
mbfl_memory_device_output,
1825-
NULL,
1826-
&device);
1827-
/* assert that nothing is wrong with the decoder */
1828-
ZEND_ASSERT(decoder != NULL);
1812+
mbfl_convert_filter *decoder = mbfl_convert_filter_new(&mbfl_encoding_wchar, string.encoding,
1813+
mbfl_memory_device_output, NULL, &device);
1814+
ZEND_ASSERT(decoder);
18291815

18301816
/* wchar filter */
18311817
mbfl_string_init(&result_string); /* mbfl_string to store chunk in the callback */
@@ -1837,14 +1823,9 @@ PHP_FUNCTION(mb_str_split)
18371823
.next_filter = decoder,
18381824
};
18391825

1840-
filter = mbfl_convert_filter_new(
1841-
string.encoding,
1842-
&mbfl_encoding_wchar,
1843-
mbfl_split_output,
1844-
NULL,
1845-
&params);
1846-
/* assert that nothing is wrong with the filter */
1847-
ZEND_ASSERT(filter != NULL);
1826+
mbfl_convert_filter *filter = mbfl_convert_filter_new(string.encoding, &mbfl_encoding_wchar,
1827+
mbfl_split_output, NULL, &params);
1828+
ZEND_ASSERT(filter);
18481829

18491830
while (p < last - 1) { /* cycle each byte except last with callback function */
18501831
(*filter->filter_function)(*p++, filter);
@@ -1858,16 +1839,12 @@ PHP_FUNCTION(mb_str_split)
18581839
return;
18591840
}
18601841

1861-
/* first scenario: 1,2,4-bytes fixed width encodings (tail part) */
1862-
chunks = (mb_len + split_length - 1) / split_length; /* (round up idiom) */
1842+
/* first scenario: 1/2/4-byte fixed width encoding */
1843+
chunks = (mb_len + split_length - 1) / split_length; /* round up */
18631844
array_init_size(return_value, chunks);
1864-
if (chunks != 0) {
1865-
zend_long i;
1866-
1867-
for (i = 0; i < chunks - 1; p += chunk_len, ++i) {
1868-
add_next_index_stringl(return_value, p, chunk_len);
1869-
}
1870-
add_next_index_stringl(return_value, p, last - p);
1845+
while (chunks--) {
1846+
add_next_index_stringl(return_value, p, chunk_len);
1847+
p += chunk_len;
18711848
}
18721849
}
18731850
/* }}} */

0 commit comments

Comments
 (0)