From b41c5b0addf0a289c3062d2a843f5ff507cf70e7 Mon Sep 17 00:00:00 2001 From: Eddie Kohler Date: Mon, 28 Dec 2015 19:05:09 -0500 Subject: [PATCH] json_encode: Escape U+2028 and U+2029 more often. These characters are illegal in Javascript, so leaving them unescaped is risky. The default encoder ($flags = 0) is fine, but the encoder with JSON_UNESCAPED_UNICODE flag is not. In case anyone wants the ability to leave these characters unescaped, provide JSON_UNESCAPED_LINE_TERMINATORS. --- ext/json/json.c | 1 + ext/json/json_encoder.c | 11 ++++++- ext/json/php_json.h | 1 + ext/json/tests/json_encode_u2028_u2029.phpt | 36 +++++++++++++++++++++ 4 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 ext/json/tests/json_encode_u2028_u2029.phpt diff --git a/ext/json/json.c b/ext/json/json.c index 11018c771f246..4305d8f7ede12 100644 --- a/ext/json/json.c +++ b/ext/json/json.c @@ -117,6 +117,7 @@ static PHP_MINIT_FUNCTION(json) PHP_JSON_REGISTER_CONSTANT("JSON_UNESCAPED_UNICODE", PHP_JSON_UNESCAPED_UNICODE); PHP_JSON_REGISTER_CONSTANT("JSON_PARTIAL_OUTPUT_ON_ERROR", PHP_JSON_PARTIAL_OUTPUT_ON_ERROR); PHP_JSON_REGISTER_CONSTANT("JSON_PRESERVE_ZERO_FRACTION", PHP_JSON_PRESERVE_ZERO_FRACTION); + PHP_JSON_REGISTER_CONSTANT("JSON_UNESCAPED_LINE_TERMINATORS", PHP_JSON_UNESCAPED_LINE_TERMINATORS); /* options for json_decode */ PHP_JSON_REGISTER_CONSTANT("JSON_OBJECT_AS_ARRAY", PHP_JSON_OBJECT_AS_ARRAY); diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index c3aa34862649b..667b0032ee444 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -321,7 +321,7 @@ static void php_json_escape_string(smart_str *buf, char *s, size_t len, int opti do { us = (unsigned char)s[pos]; - if (us >= 0x80 && !(options & PHP_JSON_UNESCAPED_UNICODE)) { + if (us >= 0x80 && (!(options & PHP_JSON_UNESCAPED_UNICODE) || us == 0xE2)) { /* UTF-8 character */ us = php_next_utf8_char((const unsigned char *)s, len, &pos, &status); if (status != SUCCESS) { @@ -332,6 +332,15 @@ static void php_json_escape_string(smart_str *buf, char *s, size_t len, int opti smart_str_appendl(buf, "null", 4); return; } + /* Escape U+2028/U+2029 line terminators, UNLESS both + JSON_UNESCAPED_UNICODE and + JSON_UNESCAPED_LINE_TERMINATORS were provided */ + if ((options & PHP_JSON_UNESCAPED_UNICODE) + && ((options & PHP_JSON_UNESCAPED_LINE_TERMINATORS) + || us < 0x2028 || us > 0x2029)) { + smart_str_appendl(buf, &s[pos - 3], 3); + continue; + } /* From http://en.wikipedia.org/wiki/UTF16 */ if (us >= 0x10000) { unsigned int next_us; diff --git a/ext/json/php_json.h b/ext/json/php_json.h index 4fb0b36881e71..85d76ef40a8de 100644 --- a/ext/json/php_json.h +++ b/ext/json/php_json.h @@ -67,6 +67,7 @@ typedef enum { #define PHP_JSON_UNESCAPED_UNICODE (1<<8) #define PHP_JSON_PARTIAL_OUTPUT_ON_ERROR (1<<9) #define PHP_JSON_PRESERVE_ZERO_FRACTION (1<<10) +#define PHP_JSON_UNESCAPED_LINE_TERMINATORS (1<<11) /* json_decode() options */ #define PHP_JSON_OBJECT_AS_ARRAY (1<<0) diff --git a/ext/json/tests/json_encode_u2028_u2029.phpt b/ext/json/tests/json_encode_u2028_u2029.phpt new file mode 100644 index 0000000000000..4b87e9b307c92 --- /dev/null +++ b/ext/json/tests/json_encode_u2028_u2029.phpt @@ -0,0 +1,36 @@ +--TEST-- +json_encode() tests for U+2028, U+2029 +--SKIPIF-- + +--FILE-- + +--EXPECT-- +string(12) "["a\u00e1b"]" +string(8) "["aáb"]" +string(10) ""a\u2027b"" +string(7) ""a‧b"" +string(10) ""a\u2028b"" +string(10) ""a\u2028b"" +string(10) ""a\u2028b"" +string(7) ""a
b"" +string(10) ""a\u2029b"" +string(10) ""a\u2029b"" +string(10) ""a\u2029b"" +string(7) ""a
b"" +string(10) ""a\u202ab"" +string(7) ""a‪b""