@@ -1747,9 +1747,7 @@ PHP_FUNCTION(mb_str_split)
1747
1747
{
1748
1748
zend_string * str , * encoding = NULL ;
1749
1749
size_t mb_len , chunks , chunk_len ;
1750
- const char * p , * last ; /* pointer for the string cursor and last string char */
1751
1750
mbfl_string string , result_string ;
1752
- const mbfl_encoding * mbfl_encoding ;
1753
1751
zend_long split_length = 1 ;
1754
1752
1755
1753
ZEND_PARSE_PARAMETERS_START (1 , 3 )
@@ -1764,20 +1762,17 @@ PHP_FUNCTION(mb_str_split)
1764
1762
RETURN_THROWS ();
1765
1763
}
1766
1764
1767
- /* fill mbfl_string structure */
1768
1765
string .val = (unsigned char * ) ZSTR_VAL (str );
1769
1766
string .len = ZSTR_LEN (str );
1770
- string .encoding = php_mb_get_encoding (encoding , 3 );
1767
+ const mbfl_encoding * mbfl_encoding = string .encoding = php_mb_get_encoding (encoding , 3 );
1771
1768
if (!string .encoding ) {
1772
1769
RETURN_THROWS ();
1773
1770
}
1774
1771
1775
- p = ZSTR_VAL (str ); /* string cursor pointer */
1776
- last = ZSTR_VAL (str ) + ZSTR_LEN (str ); /* last string char pointer */
1777
-
1778
- mbfl_encoding = string .encoding ;
1772
+ const char * p = ZSTR_VAL (str );
1773
+ const char * last = ZSTR_VAL (str ) + ZSTR_LEN (str );
1779
1774
1780
- /* first scenario: 1,2,4-bytes fixed width encodings (head part) */
1775
+ /* first scenario: 1/2/4-byte fixed width encoding */
1781
1776
if (mbfl_encoding -> flag & MBFL_ENCTYPE_SBCS ) { /* 1 byte */
1782
1777
mb_len = string .len ;
1783
1778
chunk_len = (size_t )split_length ; /* chunk length in bytes */
@@ -1787,46 +1782,37 @@ PHP_FUNCTION(mb_str_split)
1787
1782
} else if (mbfl_encoding -> flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE )) { /* 4 bytes */
1788
1783
mb_len = string .len / 4 ;
1789
1784
chunk_len = split_length * 4 ;
1790
- } else if (mbfl_encoding -> mblen_table != NULL ) {
1791
- /* second scenario: variable width encodings with length table */
1792
- char unsigned const * mbtab = mbfl_encoding -> mblen_table ;
1785
+ } else if (mbfl_encoding -> mblen_table ) {
1786
+ /* second scenario: variable width encoding with length table */
1787
+ const unsigned char * mbtab = mbfl_encoding -> mblen_table ;
1793
1788
1794
- /* assume that we have 1-bytes characters */
1789
+ /* assume that we have 1-byte characters */
1795
1790
array_init_size (return_value , (string .len + split_length ) / split_length ); /* round up */
1796
1791
1797
- while (p < last ) { /* split cycle work until the cursor has reached the last byte */
1798
- char const * chunk_p = p ; /* chunk first byte pointer */
1799
- chunk_len = 0 ; /* chunk length in bytes */
1800
- zend_long char_count ;
1792
+ while (p < last ) {
1793
+ char * chunk_p = p ; /* pointer to first byte in chunk */
1801
1794
1802
- for (char_count = 0 ; char_count < split_length && p < last ; ++ char_count ) {
1803
- char unsigned const m = mbtab [* (const unsigned char * )p ]; /* single character length table */
1804
- chunk_len += m ;
1805
- p += m ;
1795
+ for (int char_count = 0 ; char_count < split_length && p < last ; char_count ++ ) {
1796
+ p += mbtab [* (unsigned char * )p ]; /* character byte length table */
1806
1797
}
1807
- if (p >= last ) chunk_len -= p - last ; /* check if chunk is in bounds */
1808
- add_next_index_stringl (return_value , chunk_p , chunk_len );
1798
+ if (p > last ) { /* check if chunk is in bounds */
1799
+ p = last ;
1800
+ }
1801
+ add_next_index_stringl (return_value , chunk_p , p - chunk_p );
1809
1802
}
1810
1803
return ;
1811
1804
} else {
1812
1805
/* third scenario: other multibyte encodings */
1813
- mbfl_convert_filter * filter , * decoder ;
1814
-
1815
- /* assume that we have 1-bytes characters */
1806
+ /* assume that we have 1-byte characters */
1816
1807
array_init_size (return_value , (string .len + split_length ) / split_length ); /* round up */
1817
1808
1818
1809
/* decoder filter to decode wchar to encoding */
1819
1810
mbfl_memory_device device ;
1820
1811
mbfl_memory_device_init (& device , split_length + 1 , 0 );
1821
1812
1822
- decoder = mbfl_convert_filter_new (
1823
- & mbfl_encoding_wchar ,
1824
- string .encoding ,
1825
- mbfl_memory_device_output ,
1826
- NULL ,
1827
- & device );
1828
- /* assert that nothing is wrong with the decoder */
1829
- ZEND_ASSERT (decoder != NULL );
1813
+ mbfl_convert_filter * decoder = mbfl_convert_filter_new (& mbfl_encoding_wchar , string .encoding ,
1814
+ mbfl_memory_device_output , NULL , & device );
1815
+ ZEND_ASSERT (decoder );
1830
1816
1831
1817
/* wchar filter */
1832
1818
mbfl_string_init (& result_string ); /* mbfl_string to store chunk in the callback */
@@ -1838,14 +1824,9 @@ PHP_FUNCTION(mb_str_split)
1838
1824
.next_filter = decoder ,
1839
1825
};
1840
1826
1841
- filter = mbfl_convert_filter_new (
1842
- string .encoding ,
1843
- & mbfl_encoding_wchar ,
1844
- mbfl_split_output ,
1845
- NULL ,
1846
- & params );
1847
- /* assert that nothing is wrong with the filter */
1848
- ZEND_ASSERT (filter != NULL );
1827
+ mbfl_convert_filter * filter = mbfl_convert_filter_new (string .encoding , & mbfl_encoding_wchar ,
1828
+ mbfl_split_output , NULL , & params );
1829
+ ZEND_ASSERT (filter );
1849
1830
1850
1831
while (p < last - 1 ) { /* cycle each byte except last with callback function */
1851
1832
(* filter -> filter_function )(* p ++ , filter );
@@ -1859,16 +1840,12 @@ PHP_FUNCTION(mb_str_split)
1859
1840
return ;
1860
1841
}
1861
1842
1862
- /* first scenario: 1,2,4-bytes fixed width encodings (tail part) */
1863
- chunks = (mb_len + split_length - 1 ) / split_length ; /* ( round up idiom) */
1843
+ /* first scenario: 1/2/4-byte fixed width encoding */
1844
+ chunks = (mb_len + split_length - 1 ) / split_length ; /* round up */
1864
1845
array_init_size (return_value , chunks );
1865
- if (chunks != 0 ) {
1866
- zend_long i ;
1867
-
1868
- for (i = 0 ; i < chunks - 1 ; p += chunk_len , ++ i ) {
1869
- add_next_index_stringl (return_value , p , chunk_len );
1870
- }
1871
- add_next_index_stringl (return_value , p , last - p );
1846
+ while (chunks -- ) {
1847
+ add_next_index_stringl (return_value , p , chunk_len );
1848
+ p += chunk_len ;
1872
1849
}
1873
1850
}
1874
1851
/* }}} */
0 commit comments