Skip to content

Commit 104876d

Browse files
kohlerbukka
authored andcommitted
json_encode: Escape U+2028 and U+2029 more often.
These characters are illegal in Javascript, so leaving them unescaped is risky. The default encoder ($flags = 0) is fine, but the encoder with JSON_UNESCAPED_UNICODE flag is not. In case anyone wants the ability to leave these characters unescaped, provide JSON_UNESCAPED_LINE_TERMINATORS.
1 parent 9b854eb commit 104876d

File tree

4 files changed

+48
-1
lines changed

4 files changed

+48
-1
lines changed

ext/json/json.c

+1
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ static PHP_MINIT_FUNCTION(json)
117117
PHP_JSON_REGISTER_CONSTANT("JSON_UNESCAPED_UNICODE", PHP_JSON_UNESCAPED_UNICODE);
118118
PHP_JSON_REGISTER_CONSTANT("JSON_PARTIAL_OUTPUT_ON_ERROR", PHP_JSON_PARTIAL_OUTPUT_ON_ERROR);
119119
PHP_JSON_REGISTER_CONSTANT("JSON_PRESERVE_ZERO_FRACTION", PHP_JSON_PRESERVE_ZERO_FRACTION);
120+
PHP_JSON_REGISTER_CONSTANT("JSON_UNESCAPED_LINE_TERMINATORS", PHP_JSON_UNESCAPED_LINE_TERMINATORS);
120121

121122
/* options for json_decode */
122123
PHP_JSON_REGISTER_CONSTANT("JSON_OBJECT_AS_ARRAY", PHP_JSON_OBJECT_AS_ARRAY);

ext/json/json_encoder.c

+10-1
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ static void php_json_escape_string(smart_str *buf, char *s, size_t len, int opti
321321

322322
do {
323323
us = (unsigned char)s[pos];
324-
if (us >= 0x80 && !(options & PHP_JSON_UNESCAPED_UNICODE)) {
324+
if (us >= 0x80 && (!(options & PHP_JSON_UNESCAPED_UNICODE) || us == 0xE2)) {
325325
/* UTF-8 character */
326326
us = php_next_utf8_char((const unsigned char *)s, len, &pos, &status);
327327
if (status != SUCCESS) {
@@ -332,6 +332,15 @@ static void php_json_escape_string(smart_str *buf, char *s, size_t len, int opti
332332
smart_str_appendl(buf, "null", 4);
333333
return;
334334
}
335+
/* Escape U+2028/U+2029 line terminators, UNLESS both
336+
JSON_UNESCAPED_UNICODE and
337+
JSON_UNESCAPED_LINE_TERMINATORS were provided */
338+
if ((options & PHP_JSON_UNESCAPED_UNICODE)
339+
&& ((options & PHP_JSON_UNESCAPED_LINE_TERMINATORS)
340+
|| us < 0x2028 || us > 0x2029)) {
341+
smart_str_appendl(buf, &s[pos - 3], 3);
342+
continue;
343+
}
335344
/* From http://en.wikipedia.org/wiki/UTF16 */
336345
if (us >= 0x10000) {
337346
unsigned int next_us;

ext/json/php_json.h

+1
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ typedef enum {
6767
#define PHP_JSON_UNESCAPED_UNICODE (1<<8)
6868
#define PHP_JSON_PARTIAL_OUTPUT_ON_ERROR (1<<9)
6969
#define PHP_JSON_PRESERVE_ZERO_FRACTION (1<<10)
70+
#define PHP_JSON_UNESCAPED_LINE_TERMINATORS (1<<11)
7071

7172
/* json_decode() options */
7273
#define PHP_JSON_OBJECT_AS_ARRAY (1<<0)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
--TEST--
2+
json_encode() tests for U+2028, U+2029
3+
--SKIPIF--
4+
<?php if (!extension_loaded("json")) print "skip"; ?>
5+
--FILE--
6+
<?php
7+
var_dump(json_encode(array("a\xC3\xA1b")));
8+
var_dump(json_encode(array("a\xC3\xA1b"), JSON_UNESCAPED_UNICODE));
9+
var_dump(json_encode("a\xE2\x80\xA7b"));
10+
var_dump(json_encode("a\xE2\x80\xA7b", JSON_UNESCAPED_UNICODE));
11+
var_dump(json_encode("a\xE2\x80\xA8b"));
12+
var_dump(json_encode("a\xE2\x80\xA8b", JSON_UNESCAPED_UNICODE));
13+
var_dump(json_encode("a\xE2\x80\xA8b", JSON_UNESCAPED_LINE_TERMINATORS));
14+
var_dump(json_encode("a\xE2\x80\xA8b", JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_LINE_TERMINATORS));
15+
var_dump(json_encode("a\xE2\x80\xA9b"));
16+
var_dump(json_encode("a\xE2\x80\xA9b", JSON_UNESCAPED_UNICODE));
17+
var_dump(json_encode("a\xE2\x80\xA9b", JSON_UNESCAPED_LINE_TERMINATORS));
18+
var_dump(json_encode("a\xE2\x80\xA9b", JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_LINE_TERMINATORS));
19+
var_dump(json_encode("a\xE2\x80\xAAb"));
20+
var_dump(json_encode("a\xE2\x80\xAAb", JSON_UNESCAPED_UNICODE));
21+
?>
22+
--EXPECT--
23+
string(12) "["a\u00e1b"]"
24+
string(8) "["aáb"]"
25+
string(10) ""a\u2027b""
26+
string(7) ""a‧b""
27+
string(10) ""a\u2028b""
28+
string(10) ""a\u2028b""
29+
string(10) ""a\u2028b""
30+
string(7) ""a
b""
31+
string(10) ""a\u2029b""
32+
string(10) ""a\u2029b""
33+
string(10) ""a\u2029b""
34+
string(7) ""a
b""
35+
string(10) ""a\u202ab""
36+
string(7) ""a‪b""

0 commit comments

Comments
 (0)