Skip to content

Commit e2421a3

Browse files
[3.11] gh-111942: Fix crashes in TextIOWrapper.reconfigure() (GH-111976) (GH-112059)
* Fix crash when encoding is not string or None. * Fix crash when both line_buffering and write_through raise exception when converted ti int. * Add a number of tests for constructor and reconfigure() method with invalid arguments. (cherry picked from commit ee06fff) Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent a92b9e5 commit e2421a3

File tree

4 files changed

+132
-6
lines changed

4 files changed

+132
-6
lines changed

Lib/test/test_io.py

+82-2
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@ def _default_chunk_size():
8181
)
8282

8383

84+
class BadIndex:
85+
def __index__(self):
86+
1/0
87+
8488
class MockRawIOWithoutRead:
8589
"""A RawIO implementation without read(), so as to exercise the default
8690
RawIO.read() which calls readinto()."""
@@ -2613,8 +2617,29 @@ def test_constructor(self):
26132617
self.assertEqual(t.encoding, "utf-8")
26142618
self.assertEqual(t.line_buffering, True)
26152619
self.assertEqual("\xe9\n", t.readline())
2616-
self.assertRaises(TypeError, t.__init__, b, encoding="utf-8", newline=42)
2617-
self.assertRaises(ValueError, t.__init__, b, encoding="utf-8", newline='xyzzy')
2620+
invalid_type = TypeError if self.is_C else ValueError
2621+
with self.assertRaises(invalid_type):
2622+
t.__init__(b, encoding=42)
2623+
with self.assertRaises(UnicodeEncodeError):
2624+
t.__init__(b, encoding='\udcfe')
2625+
with self.assertRaises(ValueError):
2626+
t.__init__(b, encoding='utf-8\0')
2627+
with self.assertRaises(invalid_type):
2628+
t.__init__(b, encoding="utf-8", errors=42)
2629+
if support.Py_DEBUG or sys.flags.dev_mode or self.is_C:
2630+
with self.assertRaises(UnicodeEncodeError):
2631+
t.__init__(b, encoding="utf-8", errors='\udcfe')
2632+
if support.Py_DEBUG or sys.flags.dev_mode or self.is_C:
2633+
with self.assertRaises(ValueError):
2634+
t.__init__(b, encoding="utf-8", errors='replace\0')
2635+
with self.assertRaises(TypeError):
2636+
t.__init__(b, encoding="utf-8", newline=42)
2637+
with self.assertRaises(ValueError):
2638+
t.__init__(b, encoding="utf-8", newline='\udcfe')
2639+
with self.assertRaises(ValueError):
2640+
t.__init__(b, encoding="utf-8", newline='\n\0')
2641+
with self.assertRaises(ValueError):
2642+
t.__init__(b, encoding="utf-8", newline='xyzzy')
26182643

26192644
def test_uninitialized(self):
26202645
t = self.TextIOWrapper.__new__(self.TextIOWrapper)
@@ -3663,6 +3688,59 @@ def test_reconfigure_defaults(self):
36633688

36643689
self.assertEqual(txt.detach().getvalue(), b'LF\nCRLF\r\n')
36653690

3691+
def test_reconfigure_errors(self):
3692+
txt = self.TextIOWrapper(self.BytesIO(), 'ascii', 'replace', '\r')
3693+
with self.assertRaises(TypeError): # there was a crash
3694+
txt.reconfigure(encoding=42)
3695+
if self.is_C:
3696+
with self.assertRaises(UnicodeEncodeError):
3697+
txt.reconfigure(encoding='\udcfe')
3698+
with self.assertRaises(LookupError):
3699+
txt.reconfigure(encoding='locale\0')
3700+
# TODO: txt.reconfigure(encoding='utf-8\0')
3701+
# TODO: txt.reconfigure(encoding='nonexisting')
3702+
with self.assertRaises(TypeError):
3703+
txt.reconfigure(errors=42)
3704+
if self.is_C:
3705+
with self.assertRaises(UnicodeEncodeError):
3706+
txt.reconfigure(errors='\udcfe')
3707+
# TODO: txt.reconfigure(errors='ignore\0')
3708+
# TODO: txt.reconfigure(errors='nonexisting')
3709+
with self.assertRaises(TypeError):
3710+
txt.reconfigure(newline=42)
3711+
with self.assertRaises(ValueError):
3712+
txt.reconfigure(newline='\udcfe')
3713+
with self.assertRaises(ValueError):
3714+
txt.reconfigure(newline='xyz')
3715+
if not self.is_C:
3716+
# TODO: Should fail in C too.
3717+
with self.assertRaises(ValueError):
3718+
txt.reconfigure(newline='\n\0')
3719+
if self.is_C:
3720+
# TODO: Use __bool__(), not __index__().
3721+
with self.assertRaises(ZeroDivisionError):
3722+
txt.reconfigure(line_buffering=BadIndex())
3723+
with self.assertRaises(OverflowError):
3724+
txt.reconfigure(line_buffering=2**1000)
3725+
with self.assertRaises(ZeroDivisionError):
3726+
txt.reconfigure(write_through=BadIndex())
3727+
with self.assertRaises(OverflowError):
3728+
txt.reconfigure(write_through=2**1000)
3729+
with self.assertRaises(ZeroDivisionError): # there was a crash
3730+
txt.reconfigure(line_buffering=BadIndex(),
3731+
write_through=BadIndex())
3732+
self.assertEqual(txt.encoding, 'ascii')
3733+
self.assertEqual(txt.errors, 'replace')
3734+
self.assertIs(txt.line_buffering, False)
3735+
self.assertIs(txt.write_through, False)
3736+
3737+
txt.reconfigure(encoding='latin1', errors='ignore', newline='\r\n',
3738+
line_buffering=True, write_through=True)
3739+
self.assertEqual(txt.encoding, 'latin1')
3740+
self.assertEqual(txt.errors, 'ignore')
3741+
self.assertIs(txt.line_buffering, True)
3742+
self.assertIs(txt.write_through, True)
3743+
36663744
def test_reconfigure_newline(self):
36673745
raw = self.BytesIO(b'CR\rEOF')
36683746
txt = self.TextIOWrapper(raw, 'ascii', newline='\n')
@@ -4693,9 +4771,11 @@ def load_tests(loader, tests, pattern):
46934771
if test.__name__.startswith("C"):
46944772
for name, obj in c_io_ns.items():
46954773
setattr(test, name, obj)
4774+
test.is_C = True
46964775
elif test.__name__.startswith("Py"):
46974776
for name, obj in py_io_ns.items():
46984777
setattr(test, name, obj)
4778+
test.is_C = False
46994779

47004780
suite = loader.suiteClass()
47014781
for test in tests:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix crashes in :meth:`io.TextIOWrapper.reconfigure` when pass invalid
2+
arguments, e.g. non-string encoding.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix SystemError in the TextIOWrapper constructor with non-encodable "errors"
2+
argument in non-debug mode.

Modules/_io/textio.c

+46-4
Original file line numberDiff line numberDiff line change
@@ -1099,6 +1099,15 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
10991099
else if (io_check_errors(errors)) {
11001100
return -1;
11011101
}
1102+
Py_ssize_t errors_len;
1103+
const char *errors_str = PyUnicode_AsUTF8AndSize(errors, &errors_len);
1104+
if (errors_str == NULL) {
1105+
return -1;
1106+
}
1107+
if (strlen(errors_str) != (size_t)errors_len) {
1108+
PyErr_SetString(PyExc_ValueError, "embedded null character");
1109+
return -1;
1110+
}
11021111

11031112
if (validate_newline(newline) < 0) {
11041113
return -1;
@@ -1171,11 +1180,11 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
11711180
Py_INCREF(buffer);
11721181

11731182
/* Build the decoder object */
1174-
if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1183+
if (_textiowrapper_set_decoder(self, codec_info, errors_str) != 0)
11751184
goto error;
11761185

11771186
/* Build the encoder object */
1178-
if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1187+
if (_textiowrapper_set_encoder(self, codec_info, errors_str) != 0)
11791188
goto error;
11801189

11811190
/* Finished sorting out the codec details */
@@ -1272,24 +1281,34 @@ textiowrapper_change_encoding(textio *self, PyObject *encoding,
12721281
errors = &_Py_ID(strict);
12731282
}
12741283
}
1284+
Py_INCREF(errors);
12751285

1286+
const char *c_encoding = PyUnicode_AsUTF8(encoding);
1287+
if (c_encoding == NULL) {
1288+
Py_DECREF(encoding);
1289+
Py_DECREF(errors);
1290+
return -1;
1291+
}
12761292
const char *c_errors = PyUnicode_AsUTF8(errors);
12771293
if (c_errors == NULL) {
12781294
Py_DECREF(encoding);
1295+
Py_DECREF(errors);
12791296
return -1;
12801297
}
12811298

12821299
// Create new encoder & decoder
12831300
PyObject *codec_info = _PyCodec_LookupTextEncoding(
1284-
PyUnicode_AsUTF8(encoding), "codecs.open()");
1301+
c_encoding, "codecs.open()");
12851302
if (codec_info == NULL) {
12861303
Py_DECREF(encoding);
1304+
Py_DECREF(errors);
12871305
return -1;
12881306
}
12891307
if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
12901308
_textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
12911309
Py_DECREF(codec_info);
12921310
Py_DECREF(encoding);
1311+
Py_DECREF(errors);
12931312
return -1;
12941313
}
12951314
Py_DECREF(codec_info);
@@ -1327,6 +1346,26 @@ _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
13271346
int write_through;
13281347
const char *newline = NULL;
13291348

1349+
if (encoding != Py_None && !PyUnicode_Check(encoding)) {
1350+
PyErr_Format(PyExc_TypeError,
1351+
"reconfigure() argument 'encoding' must be str or None, not %s",
1352+
Py_TYPE(encoding)->tp_name);
1353+
return NULL;
1354+
}
1355+
if (errors != Py_None && !PyUnicode_Check(errors)) {
1356+
PyErr_Format(PyExc_TypeError,
1357+
"reconfigure() argument 'errors' must be str or None, not %s",
1358+
Py_TYPE(errors)->tp_name);
1359+
return NULL;
1360+
}
1361+
if (newline_obj != NULL && newline_obj != Py_None &&
1362+
!PyUnicode_Check(newline_obj))
1363+
{
1364+
PyErr_Format(PyExc_TypeError,
1365+
"reconfigure() argument 'newline' must be str or None, not %s",
1366+
Py_TYPE(newline_obj)->tp_name);
1367+
return NULL;
1368+
}
13301369
/* Check if something is in the read buffer */
13311370
if (self->decoded_chars != NULL) {
13321371
if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
@@ -1345,9 +1384,12 @@ _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
13451384

13461385
line_buffering = convert_optional_bool(line_buffering_obj,
13471386
self->line_buffering);
1387+
if (line_buffering < 0) {
1388+
return NULL;
1389+
}
13481390
write_through = convert_optional_bool(write_through_obj,
13491391
self->write_through);
1350-
if (line_buffering < 0 || write_through < 0) {
1392+
if (write_through < 0) {
13511393
return NULL;
13521394
}
13531395

0 commit comments

Comments
 (0)