@@ -1746,9 +1746,7 @@ PHP_FUNCTION(mb_str_split)
1746
1746
{
1747
1747
zend_string * str , * encoding = NULL ;
1748
1748
size_t mb_len , chunks , chunk_len ;
1749
- const char * p , * last ; /* pointer for the string cursor and last string char */
1750
1749
mbfl_string string , result_string ;
1751
- const mbfl_encoding * mbfl_encoding ;
1752
1750
zend_long split_length = 1 ;
1753
1751
1754
1752
ZEND_PARSE_PARAMETERS_START (1 , 3 )
@@ -1763,20 +1761,17 @@ PHP_FUNCTION(mb_str_split)
1763
1761
RETURN_THROWS ();
1764
1762
}
1765
1763
1766
- /* fill mbfl_string structure */
1767
1764
string .val = (unsigned char * ) ZSTR_VAL (str );
1768
1765
string .len = ZSTR_LEN (str );
1769
- string .encoding = php_mb_get_encoding (encoding , 3 );
1766
+ const mbfl_encoding * mbfl_encoding = string .encoding = php_mb_get_encoding (encoding , 3 );
1770
1767
if (!string .encoding ) {
1771
1768
RETURN_THROWS ();
1772
1769
}
1773
1770
1774
- p = ZSTR_VAL (str ); /* string cursor pointer */
1775
- last = ZSTR_VAL (str ) + ZSTR_LEN (str ); /* last string char pointer */
1776
-
1777
- mbfl_encoding = string .encoding ;
1771
+ const char * p = ZSTR_VAL (str );
1772
+ const char * last = ZSTR_VAL (str ) + ZSTR_LEN (str );
1778
1773
1779
- /* first scenario: 1,2,4-bytes fixed width encodings (head part) */
1774
+ /* first scenario: 1/2/4-byte fixed width encoding */
1780
1775
if (mbfl_encoding -> flag & MBFL_ENCTYPE_SBCS ) { /* 1 byte */
1781
1776
mb_len = string .len ;
1782
1777
chunk_len = (size_t )split_length ; /* chunk length in bytes */
@@ -1786,46 +1781,37 @@ PHP_FUNCTION(mb_str_split)
1786
1781
} else if (mbfl_encoding -> flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE )) { /* 4 bytes */
1787
1782
mb_len = string .len / 4 ;
1788
1783
chunk_len = split_length * 4 ;
1789
- } else if (mbfl_encoding -> mblen_table != NULL ) {
1790
- /* second scenario: variable width encodings with length table */
1791
- char unsigned const * mbtab = mbfl_encoding -> mblen_table ;
1784
+ } else if (mbfl_encoding -> mblen_table ) {
1785
+ /* second scenario: variable width encoding with length table */
1786
+ const unsigned char * mbtab = mbfl_encoding -> mblen_table ;
1792
1787
1793
- /* assume that we have 1-bytes characters */
1788
+ /* assume that we have 1-byte characters */
1794
1789
array_init_size (return_value , (string .len + split_length ) / split_length ); /* round up */
1795
1790
1796
- while (p < last ) { /* split cycle work until the cursor has reached the last byte */
1797
- char const * chunk_p = p ; /* chunk first byte pointer */
1798
- chunk_len = 0 ; /* chunk length in bytes */
1799
- zend_long char_count ;
1791
+ while (p < last ) {
1792
+ char * chunk_p = p ; /* pointer to first byte in chunk */
1800
1793
1801
- for (char_count = 0 ; char_count < split_length && p < last ; ++ char_count ) {
1802
- char unsigned const m = mbtab [* (const unsigned char * )p ]; /* single character length table */
1803
- chunk_len += m ;
1804
- p += m ;
1794
+ for (int char_count = 0 ; char_count < split_length && p < last ; char_count ++ ) {
1795
+ p += mbtab [* (unsigned char * )p ]; /* character byte length table */
1805
1796
}
1806
- if (p >= last ) chunk_len -= p - last ; /* check if chunk is in bounds */
1807
- add_next_index_stringl (return_value , chunk_p , chunk_len );
1797
+ if (p > last ) { /* check if chunk is in bounds */
1798
+ p = last ;
1799
+ }
1800
+ add_next_index_stringl (return_value , chunk_p , p - chunk_p );
1808
1801
}
1809
1802
return ;
1810
1803
} else {
1811
1804
/* third scenario: other multibyte encodings */
1812
- mbfl_convert_filter * filter , * decoder ;
1813
-
1814
- /* assume that we have 1-bytes characters */
1805
+ /* assume that we have 1-byte characters */
1815
1806
array_init_size (return_value , (string .len + split_length ) / split_length ); /* round up */
1816
1807
1817
1808
/* decoder filter to decode wchar to encoding */
1818
1809
mbfl_memory_device device ;
1819
1810
mbfl_memory_device_init (& device , split_length + 1 , 0 );
1820
1811
1821
- decoder = mbfl_convert_filter_new (
1822
- & mbfl_encoding_wchar ,
1823
- string .encoding ,
1824
- mbfl_memory_device_output ,
1825
- NULL ,
1826
- & device );
1827
- /* assert that nothing is wrong with the decoder */
1828
- ZEND_ASSERT (decoder != NULL );
1812
+ mbfl_convert_filter * decoder = mbfl_convert_filter_new (& mbfl_encoding_wchar , string .encoding ,
1813
+ mbfl_memory_device_output , NULL , & device );
1814
+ ZEND_ASSERT (decoder );
1829
1815
1830
1816
/* wchar filter */
1831
1817
mbfl_string_init (& result_string ); /* mbfl_string to store chunk in the callback */
@@ -1837,14 +1823,9 @@ PHP_FUNCTION(mb_str_split)
1837
1823
.next_filter = decoder ,
1838
1824
};
1839
1825
1840
- filter = mbfl_convert_filter_new (
1841
- string .encoding ,
1842
- & mbfl_encoding_wchar ,
1843
- mbfl_split_output ,
1844
- NULL ,
1845
- & params );
1846
- /* assert that nothing is wrong with the filter */
1847
- ZEND_ASSERT (filter != NULL );
1826
+ mbfl_convert_filter * filter = mbfl_convert_filter_new (string .encoding , & mbfl_encoding_wchar ,
1827
+ mbfl_split_output , NULL , & params );
1828
+ ZEND_ASSERT (filter );
1848
1829
1849
1830
while (p < last - 1 ) { /* cycle each byte except last with callback function */
1850
1831
(* filter -> filter_function )(* p ++ , filter );
@@ -1858,16 +1839,12 @@ PHP_FUNCTION(mb_str_split)
1858
1839
return ;
1859
1840
}
1860
1841
1861
- /* first scenario: 1,2,4-bytes fixed width encodings (tail part) */
1862
- chunks = (mb_len + split_length - 1 ) / split_length ; /* ( round up idiom) */
1842
+ /* first scenario: 1/2/4-byte fixed width encoding */
1843
+ chunks = (mb_len + split_length - 1 ) / split_length ; /* round up */
1863
1844
array_init_size (return_value , chunks );
1864
- if (chunks != 0 ) {
1865
- zend_long i ;
1866
-
1867
- for (i = 0 ; i < chunks - 1 ; p += chunk_len , ++ i ) {
1868
- add_next_index_stringl (return_value , p , chunk_len );
1869
- }
1870
- add_next_index_stringl (return_value , p , last - p );
1845
+ while (chunks -- ) {
1846
+ add_next_index_stringl (return_value , p , chunk_len );
1847
+ p += chunk_len ;
1871
1848
}
1872
1849
}
1873
1850
/* }}} */
0 commit comments