From 20977ada85185781cf41ca0645ca86cba66303ff Mon Sep 17 00:00:00 2001 From: Yihang Wang Date: Wed, 4 Sep 2024 02:03:53 +0800 Subject: [PATCH 1/6] Enhance `zend_dump_op_array` to Properly Represent Non-Printable Characters (GH-15680) This change enhances `zend_dump_op_array` to properly represent non-printable characters in strings. This is useful for debugging purposes, as it allows developers to see the actual content of strings that contain non-printable characters. --- Zend/Optimizer/zend_dump.c | 4 +- ext/opcache/tests/match/002.phpt | 6 +-- ext/opcache/tests/match/005.phpt | 36 ++++++++++++++++ ext/opcache/tests/opt/dce_009.phpt | 6 +-- ext/opcache/tests/opt/sccp_032.phpt | 3 +- ext/standard/php_string.h | 1 + ext/standard/string.c | 64 +++++++++++++++++++++++++++++ 7 files changed, 107 insertions(+), 13 deletions(-) create mode 100644 ext/opcache/tests/match/005.phpt diff --git a/Zend/Optimizer/zend_dump.c b/Zend/Optimizer/zend_dump.c index b788b652979de..4ed21150ff5f4 100644 --- a/Zend/Optimizer/zend_dump.c +++ b/Zend/Optimizer/zend_dump.c @@ -67,10 +67,8 @@ void zend_dump_const(const zval *zv) fprintf(stderr, " float(%g)", Z_DVAL_P(zv)); break; case IS_STRING:; - zend_string *escaped_string = php_addcslashes(Z_STR_P(zv), "\"\\", 2); - + zend_string *escaped_string = php_repr_str(Z_STR_P(zv)->val, Z_STR_P(zv)->len); fprintf(stderr, " string(\"%s\")", ZSTR_VAL(escaped_string)); - zend_string_release(escaped_string); break; case IS_ARRAY: diff --git a/ext/opcache/tests/match/002.phpt b/ext/opcache/tests/match/002.phpt index 8e7102d539120..005c2689d1b8a 100644 --- a/ext/opcache/tests/match/002.phpt +++ b/ext/opcache/tests/match/002.phpt @@ -44,16 +44,14 @@ test: ; (lines=2, args=0, vars=0, tmps=0) ; (after optimizer) ; %s -0000 ECHO string("No match -") +0000 ECHO string("No match\n") 0001 RETURN null test2: ; (lines=2, args=0, vars=0, tmps=0) ; (after optimizer) ; %s -0000 ECHO string("No match -") +0000 ECHO string("No match\n") 0001 RETURN null No match No match diff --git a/ext/opcache/tests/match/005.phpt b/ext/opcache/tests/match/005.phpt new file mode 100644 index 0000000000000..7f2f4e371a741 --- /dev/null +++ b/ext/opcache/tests/match/005.phpt @@ -0,0 +1,36 @@ +--TEST-- +Match expression mixed jump table +--INI-- +opcache.enable=1 +opcache.enable_cli=1 +opcache.opt_debug_level=0x20000 +zend_test.observer.enabled=0 +--EXTENSIONS-- +opcache +--FILE-- + +--EXPECTF-- +$_main: + ; (lines=1, args=0, vars=0, tmps=0) + ; (after optimizer) + ; %s.php:1-21 +0000 RETURN string("\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\n\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\n !\"#$%&'()*+,-./\n0123456789:;<=>?\n@ABCDEFGHIJKLMNO\nPQRSTUVWXYZ[\\]^_\n`abcdefghijklmno\npqrstuvwxyz{|}~\x7f\n\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\n\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\n\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\n\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\n\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\n\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\n\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\n\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff") diff --git a/ext/opcache/tests/opt/dce_009.phpt b/ext/opcache/tests/opt/dce_009.phpt index f9c10074fb66c..996825cf96e1a 100644 --- a/ext/opcache/tests/opt/dce_009.phpt +++ b/ext/opcache/tests/opt/dce_009.phpt @@ -50,10 +50,8 @@ Loop::test: ; (lines=3, args=0, vars=0, tmps=0) ; (after optimizer) ; %sdce_009.php:4-10 -0000 ECHO string("Start -") -0001 ECHO string("Done -") +0000 ECHO string("Start\n") +0001 ECHO string("Done\n") 0002 RETURN null Loop::test2: diff --git a/ext/opcache/tests/opt/sccp_032.phpt b/ext/opcache/tests/opt/sccp_032.phpt index dac09a7552745..6ee4a837ecd37 100644 --- a/ext/opcache/tests/opt/sccp_032.phpt +++ b/ext/opcache/tests/opt/sccp_032.phpt @@ -36,8 +36,7 @@ $_main: 0004 INIT_FCALL 1 %d string("var_export") 0005 SEND_VAR CV0($x) 1 0006 DO_ICALL -0007 ECHO string(" -") +0007 ECHO string("\n") 0008 JMP 0003 0009 FE_FREE V1 0010 RETURN int(1) diff --git a/ext/standard/php_string.h b/ext/standard/php_string.h index 1be7d9b283772..86a4c060ed8ca 100644 --- a/ext/standard/php_string.h +++ b/ext/standard/php_string.h @@ -42,6 +42,7 @@ PHPAPI char *php_strtr(char *str, size_t len, const char *str_from, const char * PHPAPI zend_string *php_addslashes(zend_string *str); PHPAPI void php_stripslashes(zend_string *str); PHPAPI zend_string *php_addcslashes_str(const char *str, size_t len, const char *what, size_t what_len); +PHPAPI zend_string *php_repr_str(const char *str, size_t len); PHPAPI zend_string *php_addcslashes(zend_string *str, const char *what, size_t what_len); PHPAPI void php_stripcslashes(zend_string *str); PHPAPI zend_string *php_basename(const char *s, size_t len, const char *suffix, size_t sufflen); diff --git a/ext/standard/string.c b/ext/standard/string.c index 1082066dcec55..ce3ea518b7e94 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -32,6 +32,8 @@ #include "scanf.h" #include "zend_API.h" #include "zend_execute.h" +#include "zend_string.h" +#include "php_globals.h" #include "basic_functions.h" #include "zend_smart_str.h" #include @@ -50,6 +52,19 @@ #include "Zend/zend_bitset.h" #endif +typedef struct { + char c; + const char *repr; +} char_repr_t; + +static const char_repr_t char_reprs[] = { + {'\t', "\\t"}, + {'\n', "\\n"}, + {'\r', "\\r"}, + {'"', "\\\""}, + {'\\', "\\\\"}, +}; + /* this is read-only, so it's ok */ ZEND_SET_ALIGNED(16, static const char hexconvtab[]) = "0123456789abcdef"; @@ -3863,6 +3878,55 @@ PHPAPI zend_string *php_addcslashes_str(const char *str, size_t len, const char } /* }}} */ +static const char *get_char_repr(char c) { + for (size_t i = 0; i < sizeof(char_reprs) / sizeof(char_reprs[0]); i++) { + if (char_reprs[i].c == c) { + return char_reprs[i].repr; + } + } + return NULL; +} + +/* {{{ php_repr_str */ +PHPAPI zend_string *php_repr_str(const char *str, size_t len) { + size_t new_len = 0; + char *repr_str; + + for (size_t i = 0; i < len; i++) { + const char *repr = get_char_repr(str[i]); + if (repr) { + new_len += strlen(repr); + } else if (isprint((unsigned char)str[i])) { + new_len += 1; + } else { + new_len += 4; + } + } + + repr_str = emalloc(new_len + 1); + + size_t pos = 0; + for (size_t i = 0; i < len; i++) { + const char *repr = get_char_repr(str[i]); + if (repr) { + size_t len_repr = strlen(repr); + memcpy(repr_str + pos, repr, len_repr); + pos += len_repr; + } else if (isprint((unsigned char)str[i])) { + repr_str[pos++] = str[i]; + } else { + pos += snprintf(repr_str + pos, 5, "\\x%02x", (unsigned char)str[i]); + } + } + + repr_str[pos] = '\0'; + zend_string *zend_str = zend_string_init(repr_str, pos, 0); + efree(repr_str); + + return zend_str; +} + + /* {{{ php_addcslashes */ PHPAPI zend_string *php_addcslashes(zend_string *str, const char *what, size_t wlength) { From 5aa31c8bec83056fbbea566a484a0af9505030f6 Mon Sep 17 00:00:00 2001 From: Yihang Wang Date: Wed, 4 Sep 2024 04:29:38 +0800 Subject: [PATCH 2/6] Improve performance of `php_repr_str` by 32.56% --- Zend/Optimizer/zend_dump.c | 2 +- ext/opcache/tests/match/005.phpt | 2 +- ext/standard/charrepr.h | 285 +++++++++++++++++++++++++++++++ ext/standard/string.c | 80 +++------ 4 files changed, 312 insertions(+), 57 deletions(-) create mode 100644 ext/standard/charrepr.h diff --git a/Zend/Optimizer/zend_dump.c b/Zend/Optimizer/zend_dump.c index 4ed21150ff5f4..375b17295ef19 100644 --- a/Zend/Optimizer/zend_dump.c +++ b/Zend/Optimizer/zend_dump.c @@ -68,7 +68,7 @@ void zend_dump_const(const zval *zv) break; case IS_STRING:; zend_string *escaped_string = php_repr_str(Z_STR_P(zv)->val, Z_STR_P(zv)->len); - fprintf(stderr, " string(\"%s\")", ZSTR_VAL(escaped_string)); + fprintf(stderr, " string(%s)", ZSTR_VAL(escaped_string)); zend_string_release(escaped_string); break; case IS_ARRAY: diff --git a/ext/opcache/tests/match/005.phpt b/ext/opcache/tests/match/005.phpt index 7f2f4e371a741..448bf20fd487e 100644 --- a/ext/opcache/tests/match/005.phpt +++ b/ext/opcache/tests/match/005.phpt @@ -33,4 +33,4 @@ $_main: ; (lines=1, args=0, vars=0, tmps=0) ; (after optimizer) ; %s.php:1-21 -0000 RETURN string("\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\n\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\n !\"#$%&'()*+,-./\n0123456789:;<=>?\n@ABCDEFGHIJKLMNO\nPQRSTUVWXYZ[\\]^_\n`abcdefghijklmno\npqrstuvwxyz{|}~\x7f\n\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\n\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\n\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\n\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\n\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\n\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\n\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\n\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff") +0000 RETURN string("\x00\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0b\f\r\x0e\x0f\n\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\n !\"#$%&'()*+,-./\n0123456789:;<=>?\n@ABCDEFGHIJKLMNO\nPQRSTUVWXYZ[\\]^_\n`abcdefghijklmno\npqrstuvwxyz{|}~\x7f\n\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\n\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\n\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\n\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\n\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\n\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\n\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\n\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff") diff --git a/ext/standard/charrepr.h b/ext/standard/charrepr.h new file mode 100644 index 0000000000000..fb19aa4ae078a --- /dev/null +++ b/ext/standard/charrepr.h @@ -0,0 +1,285 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | https://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: Sascha Schumann | + +----------------------------------------------------------------------+ +*/ + +#ifndef STRING_H +#define STRING_H + +typedef struct { + unsigned char ch; + const char *repr; + const unsigned char len; +} char_repr_t; + +static const char_repr_t char_reprs[] = { + {'\x00', "\\x00", 4}, + {'\x01', "\\x01", 4}, + {'\x02', "\\x02", 4}, + {'\x03', "\\x03", 4}, + {'\x04', "\\x04", 4}, + {'\x05', "\\x05", 4}, + {'\x06', "\\x06", 4}, + {'\x07', "\\x07", 4}, + {'\b', "\\b", 2}, + {'\t', "\\t", 2}, + {'\n', "\\n", 2}, + {'\x0b', "\\x0b", 4}, + {'\f', "\\f", 2}, + {'\r', "\\r", 2}, + {'\x0e', "\\x0e", 4}, + {'\x0f', "\\x0f", 4}, + {'\x10', "\\x10", 4}, + {'\x11', "\\x11", 4}, + {'\x12', "\\x12", 4}, + {'\x13', "\\x13", 4}, + {'\x14', "\\x14", 4}, + {'\x15', "\\x15", 4}, + {'\x16', "\\x16", 4}, + {'\x17', "\\x17", 4}, + {'\x18', "\\x18", 4}, + {'\x19', "\\x19", 4}, + {'\x1a', "\\x1a", 4}, + {'\x1b', "\\x1b", 4}, + {'\x1c', "\\x1c", 4}, + {'\x1d', "\\x1d", 4}, + {'\x1e', "\\x1e", 4}, + {'\x1f', "\\x1f", 4}, + {' ', " ", 1}, + {'!', "!", 1}, + {'"', "\\\"", 2}, + {'#', "#", 1}, + {'$', "$", 1}, + {'%', "%%", 1}, + {'&', "&", 1}, + {'\'', "\'", 1}, + {'(', "(", 1}, + {')', ")", 1}, + {'*', "*", 1}, + {'+', "+", 1}, + {',', ",", 1}, + {'-', "-", 1}, + {'.', ".", 1}, + {'/', "/", 1}, + {'0', "0", 1}, + {'1', "1", 1}, + {'2', "2", 1}, + {'3', "3", 1}, + {'4', "4", 1}, + {'5', "5", 1}, + {'6', "6", 1}, + {'7', "7", 1}, + {'8', "8", 1}, + {'9', "9", 1}, + {':', ":", 1}, + {';', ";", 1}, + {'<', "<", 1}, + {'=', "=", 1}, + {'>', ">", 1}, + {'?', "?", 1}, + {'@', "@", 1}, + {'A', "A", 1}, + {'B', "B", 1}, + {'C', "C", 1}, + {'D', "D", 1}, + {'E', "E", 1}, + {'F', "F", 1}, + {'G', "G", 1}, + {'H', "H", 1}, + {'I', "I", 1}, + {'J', "J", 1}, + {'K', "K", 1}, + {'L', "L", 1}, + {'M', "M", 1}, + {'N', "N", 1}, + {'O', "O", 1}, + {'P', "P", 1}, + {'Q', "Q", 1}, + {'R', "R", 1}, + {'S', "S", 1}, + {'T', "T", 1}, + {'U', "U", 1}, + {'V', "V", 1}, + {'W', "W", 1}, + {'X', "X", 1}, + {'Y', "Y", 1}, + {'Z', "Z", 1}, + {'[', "[", 1}, + {'\\', "\\\\", 2}, + {']', "]", 1}, + {'^', "^", 1}, + {'_', "_", 1}, + {'`', "`", 1}, + {'a', "a", 1}, + {'b', "b", 1}, + {'c', "c", 1}, + {'d', "d", 1}, + {'e', "e", 1}, + {'f', "f", 1}, + {'g', "g", 1}, + {'h', "h", 1}, + {'i', "i", 1}, + {'j', "j", 1}, + {'k', "k", 1}, + {'l', "l", 1}, + {'m', "m", 1}, + {'n', "n", 1}, + {'o', "o", 1}, + {'p', "p", 1}, + {'q', "q", 1}, + {'r', "r", 1}, + {'s', "s", 1}, + {'t', "t", 1}, + {'u', "u", 1}, + {'v', "v", 1}, + {'w', "w", 1}, + {'x', "x", 1}, + {'y', "y", 1}, + {'z', "z", 1}, + {'{', "{", 1}, + {'|', "|", 1}, + {'}', "}", 1}, + {'~', "~", 1}, + {'\x7f', "\\x7f", 4}, + {'\x80', "\\x80", 4}, + {'\x81', "\\x81", 4}, + {'\x82', "\\x82", 4}, + {'\x83', "\\x83", 4}, + {'\x84', "\\x84", 4}, + {'\x85', "\\x85", 4}, + {'\x86', "\\x86", 4}, + {'\x87', "\\x87", 4}, + {'\x88', "\\x88", 4}, + {'\x89', "\\x89", 4}, + {'\x8a', "\\x8a", 4}, + {'\x8b', "\\x8b", 4}, + {'\x8c', "\\x8c", 4}, + {'\x8d', "\\x8d", 4}, + {'\x8e', "\\x8e", 4}, + {'\x8f', "\\x8f", 4}, + {'\x90', "\\x90", 4}, + {'\x91', "\\x91", 4}, + {'\x92', "\\x92", 4}, + {'\x93', "\\x93", 4}, + {'\x94', "\\x94", 4}, + {'\x95', "\\x95", 4}, + {'\x96', "\\x96", 4}, + {'\x97', "\\x97", 4}, + {'\x98', "\\x98", 4}, + {'\x99', "\\x99", 4}, + {'\x9a', "\\x9a", 4}, + {'\x9b', "\\x9b", 4}, + {'\x9c', "\\x9c", 4}, + {'\x9d', "\\x9d", 4}, + {'\x9e', "\\x9e", 4}, + {'\x9f', "\\x9f", 4}, + {'\xa0', "\\xa0", 4}, + {'\xa1', "\\xa1", 4}, + {'\xa2', "\\xa2", 4}, + {'\xa3', "\\xa3", 4}, + {'\xa4', "\\xa4", 4}, + {'\xa5', "\\xa5", 4}, + {'\xa6', "\\xa6", 4}, + {'\xa7', "\\xa7", 4}, + {'\xa8', "\\xa8", 4}, + {'\xa9', "\\xa9", 4}, + {'\xaa', "\\xaa", 4}, + {'\xab', "\\xab", 4}, + {'\xac', "\\xac", 4}, + {'\xad', "\\xad", 4}, + {'\xae', "\\xae", 4}, + {'\xaf', "\\xaf", 4}, + {'\xb0', "\\xb0", 4}, + {'\xb1', "\\xb1", 4}, + {'\xb2', "\\xb2", 4}, + {'\xb3', "\\xb3", 4}, + {'\xb4', "\\xb4", 4}, + {'\xb5', "\\xb5", 4}, + {'\xb6', "\\xb6", 4}, + {'\xb7', "\\xb7", 4}, + {'\xb8', "\\xb8", 4}, + {'\xb9', "\\xb9", 4}, + {'\xba', "\\xba", 4}, + {'\xbb', "\\xbb", 4}, + {'\xbc', "\\xbc", 4}, + {'\xbd', "\\xbd", 4}, + {'\xbe', "\\xbe", 4}, + {'\xbf', "\\xbf", 4}, + {'\xc0', "\\xc0", 4}, + {'\xc1', "\\xc1", 4}, + {'\xc2', "\\xc2", 4}, + {'\xc3', "\\xc3", 4}, + {'\xc4', "\\xc4", 4}, + {'\xc5', "\\xc5", 4}, + {'\xc6', "\\xc6", 4}, + {'\xc7', "\\xc7", 4}, + {'\xc8', "\\xc8", 4}, + {'\xc9', "\\xc9", 4}, + {'\xca', "\\xca", 4}, + {'\xcb', "\\xcb", 4}, + {'\xcc', "\\xcc", 4}, + {'\xcd', "\\xcd", 4}, + {'\xce', "\\xce", 4}, + {'\xcf', "\\xcf", 4}, + {'\xd0', "\\xd0", 4}, + {'\xd1', "\\xd1", 4}, + {'\xd2', "\\xd2", 4}, + {'\xd3', "\\xd3", 4}, + {'\xd4', "\\xd4", 4}, + {'\xd5', "\\xd5", 4}, + {'\xd6', "\\xd6", 4}, + {'\xd7', "\\xd7", 4}, + {'\xd8', "\\xd8", 4}, + {'\xd9', "\\xd9", 4}, + {'\xda', "\\xda", 4}, + {'\xdb', "\\xdb", 4}, + {'\xdc', "\\xdc", 4}, + {'\xdd', "\\xdd", 4}, + {'\xde', "\\xde", 4}, + {'\xdf', "\\xdf", 4}, + {'\xe0', "\\xe0", 4}, + {'\xe1', "\\xe1", 4}, + {'\xe2', "\\xe2", 4}, + {'\xe3', "\\xe3", 4}, + {'\xe4', "\\xe4", 4}, + {'\xe5', "\\xe5", 4}, + {'\xe6', "\\xe6", 4}, + {'\xe7', "\\xe7", 4}, + {'\xe8', "\\xe8", 4}, + {'\xe9', "\\xe9", 4}, + {'\xea', "\\xea", 4}, + {'\xeb', "\\xeb", 4}, + {'\xec', "\\xec", 4}, + {'\xed', "\\xed", 4}, + {'\xee', "\\xee", 4}, + {'\xef', "\\xef", 4}, + {'\xf0', "\\xf0", 4}, + {'\xf1', "\\xf1", 4}, + {'\xf2', "\\xf2", 4}, + {'\xf3', "\\xf3", 4}, + {'\xf4', "\\xf4", 4}, + {'\xf5', "\\xf5", 4}, + {'\xf6', "\\xf6", 4}, + {'\xf7', "\\xf7", 4}, + {'\xf8', "\\xf8", 4}, + {'\xf9', "\\xf9", 4}, + {'\xfa', "\\xfa", 4}, + {'\xfb', "\\xfb", 4}, + {'\xfc', "\\xfc", 4}, + {'\xfd', "\\xfd", 4}, + {'\xfe', "\\xfe", 4}, + {'\xff', "\\xff", 4}, +}; + +#endif diff --git a/ext/standard/string.c b/ext/standard/string.c index ce3ea518b7e94..7d79123611306 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -33,6 +33,7 @@ #include "zend_API.h" #include "zend_execute.h" #include "zend_string.h" +#include "charrepr.h" #include "php_globals.h" #include "basic_functions.h" #include "zend_smart_str.h" @@ -52,19 +53,6 @@ #include "Zend/zend_bitset.h" #endif -typedef struct { - char c; - const char *repr; -} char_repr_t; - -static const char_repr_t char_reprs[] = { - {'\t', "\\t"}, - {'\n', "\\n"}, - {'\r', "\\r"}, - {'"', "\\\""}, - {'\\', "\\\\"}, -}; - /* this is read-only, so it's ok */ ZEND_SET_ALIGNED(16, static const char hexconvtab[]) = "0123456789abcdef"; @@ -3878,52 +3866,34 @@ PHPAPI zend_string *php_addcslashes_str(const char *str, size_t len, const char } /* }}} */ -static const char *get_char_repr(char c) { - for (size_t i = 0; i < sizeof(char_reprs) / sizeof(char_reprs[0]); i++) { - if (char_reprs[i].c == c) { - return char_reprs[i].repr; - } - } - return NULL; -} - /* {{{ php_repr_str */ PHPAPI zend_string *php_repr_str(const char *str, size_t len) { - size_t new_len = 0; - char *repr_str; - - for (size_t i = 0; i < len; i++) { - const char *repr = get_char_repr(str[i]); - if (repr) { - new_len += strlen(repr); - } else if (isprint((unsigned char)str[i])) { - new_len += 1; - } else { - new_len += 4; - } - } - - repr_str = emalloc(new_len + 1); - - size_t pos = 0; + size_t newlen; + // allocate enough memory for the worst case scenario + // in which every character is a control character + // and we need to represent it as \x00 (4 bytes) + // so we need 4 bytes for each character + // plus 2 byte for the leading quote and the trailing quote + // plus 1 byte for the null terminator + zend_string *new_str = zend_string_alloc(len * 4 + 2 + 1, 0); + char *target = ZSTR_VAL(new_str); + // add the leading quote + *target++ = '"'; for (size_t i = 0; i < len; i++) { - const char *repr = get_char_repr(str[i]); - if (repr) { - size_t len_repr = strlen(repr); - memcpy(repr_str + pos, repr, len_repr); - pos += len_repr; - } else if (isprint((unsigned char)str[i])) { - repr_str[pos++] = str[i]; - } else { - pos += snprintf(repr_str + pos, 5, "\\x%02x", (unsigned char)str[i]); - } + const char *repr = char_reprs[(unsigned char)str[i]].repr; + size_t repr_len = char_reprs[(unsigned char)str[i]].len; + memcpy(target, repr, repr_len); + target += repr_len; } - - repr_str[pos] = '\0'; - zend_string *zend_str = zend_string_init(repr_str, pos, 0); - efree(repr_str); - - return zend_str; + // add the trailing quote + *target++ = '"'; + // add the null terminator + *target = '\0'; + newlen = target - ZSTR_VAL(new_str); + if (newlen < len * 4) { + new_str = zend_string_truncate(new_str, newlen, 0); + } + return new_str; } From 2dc8acc90ac3d732cd53c56c738ed25655d70108 Mon Sep 17 00:00:00 2001 From: Yihang Wang Date: Wed, 4 Sep 2024 04:44:13 +0800 Subject: [PATCH 3/6] Fix invalid condition for `zend_string_truncate` in `php_repr_str` --- ext/standard/string.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ext/standard/string.c b/ext/standard/string.c index 7d79123611306..b331b792b62f3 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -3875,7 +3875,8 @@ PHPAPI zend_string *php_repr_str(const char *str, size_t len) { // so we need 4 bytes for each character // plus 2 byte for the leading quote and the trailing quote // plus 1 byte for the null terminator - zend_string *new_str = zend_string_alloc(len * 4 + 2 + 1, 0); + int alloc_len = len * 4 + 2 + 1; + zend_string *new_str = zend_string_alloc(alloc_len, 0); char *target = ZSTR_VAL(new_str); // add the leading quote *target++ = '"'; @@ -3890,7 +3891,7 @@ PHPAPI zend_string *php_repr_str(const char *str, size_t len) { // add the null terminator *target = '\0'; newlen = target - ZSTR_VAL(new_str); - if (newlen < len * 4) { + if (newlen < alloc_len) { new_str = zend_string_truncate(new_str, newlen, 0); } return new_str; From a4f53c8b88347ebd4107393a2b45e8f15c59a485 Mon Sep 17 00:00:00 2001 From: Yihang Wang Date: Wed, 4 Sep 2024 04:47:53 +0800 Subject: [PATCH 4/6] Fix `char_reprs` array initialization in `charrepr.h` --- ext/standard/charrepr.h | 517 ++++++++++++++++++++-------------------- 1 file changed, 258 insertions(+), 259 deletions(-) diff --git a/ext/standard/charrepr.h b/ext/standard/charrepr.h index fb19aa4ae078a..7fe8d515326e6 100644 --- a/ext/standard/charrepr.h +++ b/ext/standard/charrepr.h @@ -10,7 +10,7 @@ | obtain it through the world-wide-web, please send a note to | | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ - | Author: Sascha Schumann | + | Author: Yihang Wang | +----------------------------------------------------------------------+ */ @@ -18,268 +18,267 @@ #define STRING_H typedef struct { - unsigned char ch; const char *repr; const unsigned char len; } char_repr_t; -static const char_repr_t char_reprs[] = { - {'\x00', "\\x00", 4}, - {'\x01', "\\x01", 4}, - {'\x02', "\\x02", 4}, - {'\x03', "\\x03", 4}, - {'\x04', "\\x04", 4}, - {'\x05', "\\x05", 4}, - {'\x06', "\\x06", 4}, - {'\x07', "\\x07", 4}, - {'\b', "\\b", 2}, - {'\t', "\\t", 2}, - {'\n', "\\n", 2}, - {'\x0b', "\\x0b", 4}, - {'\f', "\\f", 2}, - {'\r', "\\r", 2}, - {'\x0e', "\\x0e", 4}, - {'\x0f', "\\x0f", 4}, - {'\x10', "\\x10", 4}, - {'\x11', "\\x11", 4}, - {'\x12', "\\x12", 4}, - {'\x13', "\\x13", 4}, - {'\x14', "\\x14", 4}, - {'\x15', "\\x15", 4}, - {'\x16', "\\x16", 4}, - {'\x17', "\\x17", 4}, - {'\x18', "\\x18", 4}, - {'\x19', "\\x19", 4}, - {'\x1a', "\\x1a", 4}, - {'\x1b', "\\x1b", 4}, - {'\x1c', "\\x1c", 4}, - {'\x1d', "\\x1d", 4}, - {'\x1e', "\\x1e", 4}, - {'\x1f', "\\x1f", 4}, - {' ', " ", 1}, - {'!', "!", 1}, - {'"', "\\\"", 2}, - {'#', "#", 1}, - {'$', "$", 1}, - {'%', "%%", 1}, - {'&', "&", 1}, - {'\'', "\'", 1}, - {'(', "(", 1}, - {')', ")", 1}, - {'*', "*", 1}, - {'+', "+", 1}, - {',', ",", 1}, - {'-', "-", 1}, - {'.', ".", 1}, - {'/', "/", 1}, - {'0', "0", 1}, - {'1', "1", 1}, - {'2', "2", 1}, - {'3', "3", 1}, - {'4', "4", 1}, - {'5', "5", 1}, - {'6', "6", 1}, - {'7', "7", 1}, - {'8', "8", 1}, - {'9', "9", 1}, - {':', ":", 1}, - {';', ";", 1}, - {'<', "<", 1}, - {'=', "=", 1}, - {'>', ">", 1}, - {'?', "?", 1}, - {'@', "@", 1}, - {'A', "A", 1}, - {'B', "B", 1}, - {'C', "C", 1}, - {'D', "D", 1}, - {'E', "E", 1}, - {'F', "F", 1}, - {'G', "G", 1}, - {'H', "H", 1}, - {'I', "I", 1}, - {'J', "J", 1}, - {'K', "K", 1}, - {'L', "L", 1}, - {'M', "M", 1}, - {'N', "N", 1}, - {'O', "O", 1}, - {'P', "P", 1}, - {'Q', "Q", 1}, - {'R', "R", 1}, - {'S', "S", 1}, - {'T', "T", 1}, - {'U', "U", 1}, - {'V', "V", 1}, - {'W', "W", 1}, - {'X', "X", 1}, - {'Y', "Y", 1}, - {'Z', "Z", 1}, - {'[', "[", 1}, - {'\\', "\\\\", 2}, - {']', "]", 1}, - {'^', "^", 1}, - {'_', "_", 1}, - {'`', "`", 1}, - {'a', "a", 1}, - {'b', "b", 1}, - {'c', "c", 1}, - {'d', "d", 1}, - {'e', "e", 1}, - {'f', "f", 1}, - {'g', "g", 1}, - {'h', "h", 1}, - {'i', "i", 1}, - {'j', "j", 1}, - {'k', "k", 1}, - {'l', "l", 1}, - {'m', "m", 1}, - {'n', "n", 1}, - {'o', "o", 1}, - {'p', "p", 1}, - {'q', "q", 1}, - {'r', "r", 1}, - {'s', "s", 1}, - {'t', "t", 1}, - {'u', "u", 1}, - {'v', "v", 1}, - {'w', "w", 1}, - {'x', "x", 1}, - {'y', "y", 1}, - {'z', "z", 1}, - {'{', "{", 1}, - {'|', "|", 1}, - {'}', "}", 1}, - {'~', "~", 1}, - {'\x7f', "\\x7f", 4}, - {'\x80', "\\x80", 4}, - {'\x81', "\\x81", 4}, - {'\x82', "\\x82", 4}, - {'\x83', "\\x83", 4}, - {'\x84', "\\x84", 4}, - {'\x85', "\\x85", 4}, - {'\x86', "\\x86", 4}, - {'\x87', "\\x87", 4}, - {'\x88', "\\x88", 4}, - {'\x89', "\\x89", 4}, - {'\x8a', "\\x8a", 4}, - {'\x8b', "\\x8b", 4}, - {'\x8c', "\\x8c", 4}, - {'\x8d', "\\x8d", 4}, - {'\x8e', "\\x8e", 4}, - {'\x8f', "\\x8f", 4}, - {'\x90', "\\x90", 4}, - {'\x91', "\\x91", 4}, - {'\x92', "\\x92", 4}, - {'\x93', "\\x93", 4}, - {'\x94', "\\x94", 4}, - {'\x95', "\\x95", 4}, - {'\x96', "\\x96", 4}, - {'\x97', "\\x97", 4}, - {'\x98', "\\x98", 4}, - {'\x99', "\\x99", 4}, - {'\x9a', "\\x9a", 4}, - {'\x9b', "\\x9b", 4}, - {'\x9c', "\\x9c", 4}, - {'\x9d', "\\x9d", 4}, - {'\x9e', "\\x9e", 4}, - {'\x9f', "\\x9f", 4}, - {'\xa0', "\\xa0", 4}, - {'\xa1', "\\xa1", 4}, - {'\xa2', "\\xa2", 4}, - {'\xa3', "\\xa3", 4}, - {'\xa4', "\\xa4", 4}, - {'\xa5', "\\xa5", 4}, - {'\xa6', "\\xa6", 4}, - {'\xa7', "\\xa7", 4}, - {'\xa8', "\\xa8", 4}, - {'\xa9', "\\xa9", 4}, - {'\xaa', "\\xaa", 4}, - {'\xab', "\\xab", 4}, - {'\xac', "\\xac", 4}, - {'\xad', "\\xad", 4}, - {'\xae', "\\xae", 4}, - {'\xaf', "\\xaf", 4}, - {'\xb0', "\\xb0", 4}, - {'\xb1', "\\xb1", 4}, - {'\xb2', "\\xb2", 4}, - {'\xb3', "\\xb3", 4}, - {'\xb4', "\\xb4", 4}, - {'\xb5', "\\xb5", 4}, - {'\xb6', "\\xb6", 4}, - {'\xb7', "\\xb7", 4}, - {'\xb8', "\\xb8", 4}, - {'\xb9', "\\xb9", 4}, - {'\xba', "\\xba", 4}, - {'\xbb', "\\xbb", 4}, - {'\xbc', "\\xbc", 4}, - {'\xbd', "\\xbd", 4}, - {'\xbe', "\\xbe", 4}, - {'\xbf', "\\xbf", 4}, - {'\xc0', "\\xc0", 4}, - {'\xc1', "\\xc1", 4}, - {'\xc2', "\\xc2", 4}, - {'\xc3', "\\xc3", 4}, - {'\xc4', "\\xc4", 4}, - {'\xc5', "\\xc5", 4}, - {'\xc6', "\\xc6", 4}, - {'\xc7', "\\xc7", 4}, - {'\xc8', "\\xc8", 4}, - {'\xc9', "\\xc9", 4}, - {'\xca', "\\xca", 4}, - {'\xcb', "\\xcb", 4}, - {'\xcc', "\\xcc", 4}, - {'\xcd', "\\xcd", 4}, - {'\xce', "\\xce", 4}, - {'\xcf', "\\xcf", 4}, - {'\xd0', "\\xd0", 4}, - {'\xd1', "\\xd1", 4}, - {'\xd2', "\\xd2", 4}, - {'\xd3', "\\xd3", 4}, - {'\xd4', "\\xd4", 4}, - {'\xd5', "\\xd5", 4}, - {'\xd6', "\\xd6", 4}, - {'\xd7', "\\xd7", 4}, - {'\xd8', "\\xd8", 4}, - {'\xd9', "\\xd9", 4}, - {'\xda', "\\xda", 4}, - {'\xdb', "\\xdb", 4}, - {'\xdc', "\\xdc", 4}, - {'\xdd', "\\xdd", 4}, - {'\xde', "\\xde", 4}, - {'\xdf', "\\xdf", 4}, - {'\xe0', "\\xe0", 4}, - {'\xe1', "\\xe1", 4}, - {'\xe2', "\\xe2", 4}, - {'\xe3', "\\xe3", 4}, - {'\xe4', "\\xe4", 4}, - {'\xe5', "\\xe5", 4}, - {'\xe6', "\\xe6", 4}, - {'\xe7', "\\xe7", 4}, - {'\xe8', "\\xe8", 4}, - {'\xe9', "\\xe9", 4}, - {'\xea', "\\xea", 4}, - {'\xeb', "\\xeb", 4}, - {'\xec', "\\xec", 4}, - {'\xed', "\\xed", 4}, - {'\xee', "\\xee", 4}, - {'\xef', "\\xef", 4}, - {'\xf0', "\\xf0", 4}, - {'\xf1', "\\xf1", 4}, - {'\xf2', "\\xf2", 4}, - {'\xf3', "\\xf3", 4}, - {'\xf4', "\\xf4", 4}, - {'\xf5', "\\xf5", 4}, - {'\xf6', "\\xf6", 4}, - {'\xf7', "\\xf7", 4}, - {'\xf8', "\\xf8", 4}, - {'\xf9', "\\xf9", 4}, - {'\xfa', "\\xfa", 4}, - {'\xfb', "\\xfb", 4}, - {'\xfc', "\\xfc", 4}, - {'\xfd', "\\xfd", 4}, - {'\xfe', "\\xfe", 4}, - {'\xff', "\\xff", 4}, +static const char_repr_t char_reprs[256] = { + {"\\x00", 4}, + {"\\x01", 4}, + {"\\x02", 4}, + {"\\x03", 4}, + {"\\x04", 4}, + {"\\x05", 4}, + {"\\x06", 4}, + {"\\x07", 4}, + {"\\b", 2}, + {"\\t", 2}, + {"\\n", 2}, + {"\\x0b", 4}, + {"\\f", 2}, + {"\\r", 2}, + {"\\x0e", 4}, + {"\\x0f", 4}, + {"\\x10", 4}, + {"\\x11", 4}, + {"\\x12", 4}, + {"\\x13", 4}, + {"\\x14", 4}, + {"\\x15", 4}, + {"\\x16", 4}, + {"\\x17", 4}, + {"\\x18", 4}, + {"\\x19", 4}, + {"\\x1a", 4}, + {"\\x1b", 4}, + {"\\x1c", 4}, + {"\\x1d", 4}, + {"\\x1e", 4}, + {"\\x1f", 4}, + {" ", 1}, + {"!", 1}, + {"\\\"", 2}, + {"#", 1}, + {"$", 1}, + {"%%", 1}, + {"&", 1}, + {"\'", 1}, + {"(", 1}, + {")", 1}, + {"*", 1}, + {"+", 1}, + {",", 1}, + {"-", 1}, + {".", 1}, + {"/", 1}, + {"0", 1}, + {"1", 1}, + {"2", 1}, + {"3", 1}, + {"4", 1}, + {"5", 1}, + {"6", 1}, + {"7", 1}, + {"8", 1}, + {"9", 1}, + {":", 1}, + {";", 1}, + {"<", 1}, + {"=", 1}, + {">", 1}, + {"?", 1}, + {"@", 1}, + {"A", 1}, + {"B", 1}, + {"C", 1}, + {"D", 1}, + {"E", 1}, + {"F", 1}, + {"G", 1}, + {"H", 1}, + {"I", 1}, + {"J", 1}, + {"K", 1}, + {"L", 1}, + {"M", 1}, + {"N", 1}, + {"O", 1}, + {"P", 1}, + {"Q", 1}, + {"R", 1}, + {"S", 1}, + {"T", 1}, + {"U", 1}, + {"V", 1}, + {"W", 1}, + {"X", 1}, + {"Y", 1}, + {"Z", 1}, + {"[", 1}, + {"\\\\", 2}, + {"]", 1}, + {"^", 1}, + {"_", 1}, + {"`", 1}, + {"a", 1}, + {"b", 1}, + {"c", 1}, + {"d", 1}, + {"e", 1}, + {"f", 1}, + {"g", 1}, + {"h", 1}, + {"i", 1}, + {"j", 1}, + {"k", 1}, + {"l", 1}, + {"m", 1}, + {"n", 1}, + {"o", 1}, + {"p", 1}, + {"q", 1}, + {"r", 1}, + {"s", 1}, + {"t", 1}, + {"u", 1}, + {"v", 1}, + {"w", 1}, + {"x", 1}, + {"y", 1}, + {"z", 1}, + {"{", 1}, + {"|", 1}, + {"}", 1}, + {"~", 1}, + {"\\x7f", 4}, + {"\\x80", 4}, + {"\\x81", 4}, + {"\\x82", 4}, + {"\\x83", 4}, + {"\\x84", 4}, + {"\\x85", 4}, + {"\\x86", 4}, + {"\\x87", 4}, + {"\\x88", 4}, + {"\\x89", 4}, + {"\\x8a", 4}, + {"\\x8b", 4}, + {"\\x8c", 4}, + {"\\x8d", 4}, + {"\\x8e", 4}, + {"\\x8f", 4}, + {"\\x90", 4}, + {"\\x91", 4}, + {"\\x92", 4}, + {"\\x93", 4}, + {"\\x94", 4}, + {"\\x95", 4}, + {"\\x96", 4}, + {"\\x97", 4}, + {"\\x98", 4}, + {"\\x99", 4}, + {"\\x9a", 4}, + {"\\x9b", 4}, + {"\\x9c", 4}, + {"\\x9d", 4}, + {"\\x9e", 4}, + {"\\x9f", 4}, + {"\\xa0", 4}, + {"\\xa1", 4}, + {"\\xa2", 4}, + {"\\xa3", 4}, + {"\\xa4", 4}, + {"\\xa5", 4}, + {"\\xa6", 4}, + {"\\xa7", 4}, + {"\\xa8", 4}, + {"\\xa9", 4}, + {"\\xaa", 4}, + {"\\xab", 4}, + {"\\xac", 4}, + {"\\xad", 4}, + {"\\xae", 4}, + {"\\xaf", 4}, + {"\\xb0", 4}, + {"\\xb1", 4}, + {"\\xb2", 4}, + {"\\xb3", 4}, + {"\\xb4", 4}, + {"\\xb5", 4}, + {"\\xb6", 4}, + {"\\xb7", 4}, + {"\\xb8", 4}, + {"\\xb9", 4}, + {"\\xba", 4}, + {"\\xbb", 4}, + {"\\xbc", 4}, + {"\\xbd", 4}, + {"\\xbe", 4}, + {"\\xbf", 4}, + {"\\xc0", 4}, + {"\\xc1", 4}, + {"\\xc2", 4}, + {"\\xc3", 4}, + {"\\xc4", 4}, + {"\\xc5", 4}, + {"\\xc6", 4}, + {"\\xc7", 4}, + {"\\xc8", 4}, + {"\\xc9", 4}, + {"\\xca", 4}, + {"\\xcb", 4}, + {"\\xcc", 4}, + {"\\xcd", 4}, + {"\\xce", 4}, + {"\\xcf", 4}, + {"\\xd0", 4}, + {"\\xd1", 4}, + {"\\xd2", 4}, + {"\\xd3", 4}, + {"\\xd4", 4}, + {"\\xd5", 4}, + {"\\xd6", 4}, + {"\\xd7", 4}, + {"\\xd8", 4}, + {"\\xd9", 4}, + {"\\xda", 4}, + {"\\xdb", 4}, + {"\\xdc", 4}, + {"\\xdd", 4}, + {"\\xde", 4}, + {"\\xdf", 4}, + {"\\xe0", 4}, + {"\\xe1", 4}, + {"\\xe2", 4}, + {"\\xe3", 4}, + {"\\xe4", 4}, + {"\\xe5", 4}, + {"\\xe6", 4}, + {"\\xe7", 4}, + {"\\xe8", 4}, + {"\\xe9", 4}, + {"\\xea", 4}, + {"\\xeb", 4}, + {"\\xec", 4}, + {"\\xed", 4}, + {"\\xee", 4}, + {"\\xef", 4}, + {"\\xf0", 4}, + {"\\xf1", 4}, + {"\\xf2", 4}, + {"\\xf3", 4}, + {"\\xf4", 4}, + {"\\xf5", 4}, + {"\\xf6", 4}, + {"\\xf7", 4}, + {"\\xf8", 4}, + {"\\xf9", 4}, + {"\\xfa", 4}, + {"\\xfb", 4}, + {"\\xfc", 4}, + {"\\xfd", 4}, + {"\\xfe", 4}, + {"\\xff", 4}, }; #endif From 90fe73f94929f9f96ed738fddf445f1febb54cf8 Mon Sep 17 00:00:00 2001 From: Yihang Wang Date: Wed, 4 Sep 2024 21:29:15 +0800 Subject: [PATCH 5/6] chore: Optimize string representation in `php_repr_str` --- ext/standard/string.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ext/standard/string.c b/ext/standard/string.c index b331b792b62f3..b188084d5f5fa 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -3876,16 +3876,16 @@ PHPAPI zend_string *php_repr_str(const char *str, size_t len) { // plus 2 byte for the leading quote and the trailing quote // plus 1 byte for the null terminator int alloc_len = len * 4 + 2 + 1; - zend_string *new_str = zend_string_alloc(alloc_len, 0); + zend_string *new_str = zend_string_alloc(alloc_len, 0); char *target = ZSTR_VAL(new_str); // add the leading quote *target++ = '"'; - for (size_t i = 0; i < len; i++) { - const char *repr = char_reprs[(unsigned char)str[i]].repr; + for (size_t i = 0; i < len; i++) { + const char *repr = char_reprs[(unsigned char)str[i]].repr; size_t repr_len = char_reprs[(unsigned char)str[i]].len; memcpy(target, repr, repr_len); target += repr_len; - } + } // add the trailing quote *target++ = '"'; // add the null terminator From 32340baf165f7ec8d0391fa6bdcb37eca3791403 Mon Sep 17 00:00:00 2001 From: Yihang Wang Date: Mon, 9 Sep 2024 18:30:13 +0800 Subject: [PATCH 6/6] refactor charrepr.h to fix array initialization and update macro name --- ext/standard/charrepr.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ext/standard/charrepr.h b/ext/standard/charrepr.h index 7fe8d515326e6..7fb31a20d43f2 100644 --- a/ext/standard/charrepr.h +++ b/ext/standard/charrepr.h @@ -14,8 +14,8 @@ +----------------------------------------------------------------------+ */ -#ifndef STRING_H -#define STRING_H +#ifndef CHARREPR_H +#define CHARREPR_H typedef struct { const char *repr; @@ -60,7 +60,7 @@ static const char_repr_t char_reprs[256] = { {"\\\"", 2}, {"#", 1}, {"$", 1}, - {"%%", 1}, + {"%", 1}, {"&", 1}, {"\'", 1}, {"(", 1},