Skip to content

Commit efe3a38

Browse files
authored
[3.11] gh-101144: Allow zipfile.Path .open & .read_text encoding to be positional (#101179)
The zipfile.Path open() and read_text() encoding parameter can be supplied as a positional argument without causing a TypeError again. 3.10.0b1 included a regression that made it keyword only. Documentation update included as users writing code to be compatible with a wide range of versions will need to consider this for some time.. (cherry picked from commit 5927013) Co-authored-by: Gregory P. Smith <[email protected]> [Google]
1 parent 1998ea6 commit efe3a38

File tree

4 files changed

+91
-6
lines changed

4 files changed

+91
-6
lines changed

Doc/library/zipfile.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,12 @@ Path objects are traversable using the ``/`` operator or ``joinpath``.
551551
Added support for text and binary modes for open. Default
552552
mode is now text.
553553

554+
.. versionchanged:: 3.11.2
555+
The ``encoding`` parameter can be supplied as a positional argument
556+
without causing a :exc:`TypeError`. As it could in 3.9. Code needing to
557+
be compatible with unpatched 3.10 and 3.11 versions must pass all
558+
:class:`io.TextIOWrapper` arguments, ``encoding`` included, as keywords.
559+
554560
.. method:: Path.iterdir()
555561

556562
Enumerate the children of the current directory.
@@ -596,6 +602,12 @@ Path objects are traversable using the ``/`` operator or ``joinpath``.
596602
:class:`io.TextIOWrapper` (except ``buffer``, which is
597603
implied by the context).
598604

605+
.. versionchanged:: 3.11.2
606+
The ``encoding`` parameter can be supplied as a positional argument
607+
without causing a :exc:`TypeError`. As it could in 3.9. Code needing to
608+
be compatible with unpatched 3.10 and 3.11 versions must pass all
609+
:class:`io.TextIOWrapper` arguments, ``encoding`` included, as keywords.
610+
599611
.. method:: Path.read_bytes()
600612

601613
Read the current file as bytes.

Lib/test/test_zipfile.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import struct
1111
import subprocess
1212
import sys
13+
from test.support.script_helper import assert_python_ok
1314
import time
1415
import unittest
1516
import unittest.mock as mock
@@ -3005,7 +3006,69 @@ def test_open(self, alpharep):
30053006
a, b, g = root.iterdir()
30063007
with a.open(encoding="utf-8") as strm:
30073008
data = strm.read()
3008-
assert data == "content of a"
3009+
self.assertEqual(data, "content of a")
3010+
with a.open('r', "utf-8") as strm: # not a kw, no gh-101144 TypeError
3011+
data = strm.read()
3012+
self.assertEqual(data, "content of a")
3013+
3014+
def test_open_encoding_utf16(self):
3015+
in_memory_file = io.BytesIO()
3016+
zf = zipfile.ZipFile(in_memory_file, "w")
3017+
zf.writestr("path/16.txt", "This was utf-16".encode("utf-16"))
3018+
zf.filename = "test_open_utf16.zip"
3019+
root = zipfile.Path(zf)
3020+
(path,) = root.iterdir()
3021+
u16 = path.joinpath("16.txt")
3022+
with u16.open('r', "utf-16") as strm:
3023+
data = strm.read()
3024+
self.assertEqual(data, "This was utf-16")
3025+
with u16.open(encoding="utf-16") as strm:
3026+
data = strm.read()
3027+
self.assertEqual(data, "This was utf-16")
3028+
3029+
def test_open_encoding_errors(self):
3030+
in_memory_file = io.BytesIO()
3031+
zf = zipfile.ZipFile(in_memory_file, "w")
3032+
zf.writestr("path/bad-utf8.bin", b"invalid utf-8: \xff\xff.")
3033+
zf.filename = "test_read_text_encoding_errors.zip"
3034+
root = zipfile.Path(zf)
3035+
(path,) = root.iterdir()
3036+
u16 = path.joinpath("bad-utf8.bin")
3037+
3038+
# encoding= as a positional argument for gh-101144.
3039+
data = u16.read_text("utf-8", errors="ignore")
3040+
self.assertEqual(data, "invalid utf-8: .")
3041+
with u16.open("r", "utf-8", errors="surrogateescape") as f:
3042+
self.assertEqual(f.read(), "invalid utf-8: \udcff\udcff.")
3043+
3044+
# encoding= both positional and keyword is an error; gh-101144.
3045+
with self.assertRaisesRegex(TypeError, "encoding"):
3046+
data = u16.read_text("utf-8", encoding="utf-8")
3047+
3048+
# both keyword arguments work.
3049+
with u16.open("r", encoding="utf-8", errors="strict") as f:
3050+
# error during decoding with wrong codec.
3051+
with self.assertRaises(UnicodeDecodeError):
3052+
f.read()
3053+
3054+
def test_encoding_warnings(self):
3055+
"""EncodingWarning must blame the read_text and open calls."""
3056+
code = '''\
3057+
import io, zipfile
3058+
with zipfile.ZipFile(io.BytesIO(), "w") as zf:
3059+
zf.filename = '<test_encoding_warnings in memory zip file>'
3060+
zf.writestr("path/file.txt", b"Spanish Inquisition")
3061+
root = zipfile.Path(zf)
3062+
(path,) = root.iterdir()
3063+
file_path = path.joinpath("file.txt")
3064+
unused = file_path.read_text() # should warn
3065+
file_path.open("r").close() # should warn
3066+
'''
3067+
proc = assert_python_ok('-X', 'warn_default_encoding', '-c', code)
3068+
warnings = proc.err.splitlines()
3069+
self.assertEqual(len(warnings), 2, proc.err)
3070+
self.assertRegex(warnings[0], rb"^<string>:8: EncodingWarning:")
3071+
self.assertRegex(warnings[1], rb"^<string>:9: EncodingWarning:")
30093072

30103073
def test_open_write(self):
30113074
"""
@@ -3047,6 +3110,7 @@ def test_read(self, alpharep):
30473110
root = zipfile.Path(alpharep)
30483111
a, b, g = root.iterdir()
30493112
assert a.read_text(encoding="utf-8") == "content of a"
3113+
a.read_text("utf-8") # No positional arg TypeError per gh-101144.
30503114
assert a.read_bytes() == b"content of a"
30513115

30523116
@pass_alpharep

Lib/zipfile.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2287,6 +2287,11 @@ def _name_set(self):
22872287
return self.__lookup
22882288

22892289

2290+
def _extract_text_encoding(encoding=None, *args, **kwargs):
2291+
# stacklevel=3 so that the caller of the caller see any warning.
2292+
return io.text_encoding(encoding, 3), args, kwargs
2293+
2294+
22902295
class Path:
22912296
"""
22922297
A pathlib-compatible interface for zip files.
@@ -2396,9 +2401,9 @@ def open(self, mode='r', *args, pwd=None, **kwargs):
23962401
if args or kwargs:
23972402
raise ValueError("encoding args invalid for binary operation")
23982403
return stream
2399-
else:
2400-
kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
2401-
return io.TextIOWrapper(stream, *args, **kwargs)
2404+
# Text mode:
2405+
encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
2406+
return io.TextIOWrapper(stream, encoding, *args, **kwargs)
24022407

24032408
@property
24042409
def name(self):
@@ -2421,8 +2426,8 @@ def filename(self):
24212426
return pathlib.Path(self.root.filename).joinpath(self.at)
24222427

24232428
def read_text(self, *args, **kwargs):
2424-
kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
2425-
with self.open('r', *args, **kwargs) as strm:
2429+
encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
2430+
with self.open('r', encoding, *args, **kwargs) as strm:
24262431
return strm.read()
24272432

24282433
def read_bytes(self):
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Make :func:`zipfile.Path.open` and :func:`zipfile.Path.read_text` also accept
2+
``encoding`` as a positional argument. This was the behavior in Python 3.9 and
3+
earlier. 3.10 introduced a regression where supplying it as a positional
4+
argument would lead to a :exc:`TypeError`.

0 commit comments

Comments
 (0)