Skip to content

Commit b2a662f

Browse files
[3.10] gh-101144: Allow zipfile.Path .open & .read_text encoding to be positional (GH-101179) (GH-101182)
The zipfile.Path open() and read_text() encoding parameter can be supplied as a positional argument without causing a TypeError again. 3.10.0b1 included a regression that made it keyword only. Documentation update included as users writing code to be compatible with a wide range of versions will need to consider this for some time.. (cherry picked from commit 5927013) (cherry picked from commit efe3a38) Co-authored-by: Gregory P. Smith <[email protected]> [Google] Automerge-Triggered-By: GH:gpshead
1 parent 10c6130 commit b2a662f

File tree

4 files changed

+91
-6
lines changed

4 files changed

+91
-6
lines changed

Doc/library/zipfile.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,12 @@ Path objects are traversable using the ``/`` operator or ``joinpath``.
509509
Added support for text and binary modes for open. Default
510510
mode is now text.
511511

512+
.. versionchanged:: 3.10.10
513+
The ``encoding`` parameter can be supplied as a positional argument
514+
without causing a :exc:`TypeError`. As it could in 3.9. Code needing to
515+
be compatible with unpatched 3.10 and 3.11 versions must pass all
516+
:class:`io.TextIOWrapper` arguments, ``encoding`` included, as keywords.
517+
512518
.. method:: Path.iterdir()
513519

514520
Enumerate the children of the current directory.
@@ -533,6 +539,12 @@ Path objects are traversable using the ``/`` operator or ``joinpath``.
533539
:class:`io.TextIOWrapper` (except ``buffer``, which is
534540
implied by the context).
535541

542+
.. versionchanged:: 3.10.10
543+
The ``encoding`` parameter can be supplied as a positional argument
544+
without causing a :exc:`TypeError`. As it could in 3.9. Code needing to
545+
be compatible with unpatched 3.10 and 3.11 versions must pass all
546+
:class:`io.TextIOWrapper` arguments, ``encoding`` included, as keywords.
547+
536548
.. method:: Path.read_bytes()
537549

538550
Read the current file as bytes.

Lib/test/test_zipfile.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import struct
1111
import subprocess
1212
import sys
13+
from test.support.script_helper import assert_python_ok
1314
import time
1415
import unittest
1516
import unittest.mock as mock
@@ -2933,7 +2934,69 @@ def test_open(self, alpharep):
29332934
a, b, g = root.iterdir()
29342935
with a.open(encoding="utf-8") as strm:
29352936
data = strm.read()
2936-
assert data == "content of a"
2937+
self.assertEqual(data, "content of a")
2938+
with a.open('r', "utf-8") as strm: # not a kw, no gh-101144 TypeError
2939+
data = strm.read()
2940+
self.assertEqual(data, "content of a")
2941+
2942+
def test_open_encoding_utf16(self):
2943+
in_memory_file = io.BytesIO()
2944+
zf = zipfile.ZipFile(in_memory_file, "w")
2945+
zf.writestr("path/16.txt", "This was utf-16".encode("utf-16"))
2946+
zf.filename = "test_open_utf16.zip"
2947+
root = zipfile.Path(zf)
2948+
(path,) = root.iterdir()
2949+
u16 = path.joinpath("16.txt")
2950+
with u16.open('r', "utf-16") as strm:
2951+
data = strm.read()
2952+
self.assertEqual(data, "This was utf-16")
2953+
with u16.open(encoding="utf-16") as strm:
2954+
data = strm.read()
2955+
self.assertEqual(data, "This was utf-16")
2956+
2957+
def test_open_encoding_errors(self):
2958+
in_memory_file = io.BytesIO()
2959+
zf = zipfile.ZipFile(in_memory_file, "w")
2960+
zf.writestr("path/bad-utf8.bin", b"invalid utf-8: \xff\xff.")
2961+
zf.filename = "test_read_text_encoding_errors.zip"
2962+
root = zipfile.Path(zf)
2963+
(path,) = root.iterdir()
2964+
u16 = path.joinpath("bad-utf8.bin")
2965+
2966+
# encoding= as a positional argument for gh-101144.
2967+
data = u16.read_text("utf-8", errors="ignore")
2968+
self.assertEqual(data, "invalid utf-8: .")
2969+
with u16.open("r", "utf-8", errors="surrogateescape") as f:
2970+
self.assertEqual(f.read(), "invalid utf-8: \udcff\udcff.")
2971+
2972+
# encoding= both positional and keyword is an error; gh-101144.
2973+
with self.assertRaisesRegex(TypeError, "encoding"):
2974+
data = u16.read_text("utf-8", encoding="utf-8")
2975+
2976+
# both keyword arguments work.
2977+
with u16.open("r", encoding="utf-8", errors="strict") as f:
2978+
# error during decoding with wrong codec.
2979+
with self.assertRaises(UnicodeDecodeError):
2980+
f.read()
2981+
2982+
def test_encoding_warnings(self):
2983+
"""EncodingWarning must blame the read_text and open calls."""
2984+
code = '''\
2985+
import io, zipfile
2986+
with zipfile.ZipFile(io.BytesIO(), "w") as zf:
2987+
zf.filename = '<test_encoding_warnings in memory zip file>'
2988+
zf.writestr("path/file.txt", b"Spanish Inquisition")
2989+
root = zipfile.Path(zf)
2990+
(path,) = root.iterdir()
2991+
file_path = path.joinpath("file.txt")
2992+
unused = file_path.read_text() # should warn
2993+
file_path.open("r").close() # should warn
2994+
'''
2995+
proc = assert_python_ok('-X', 'warn_default_encoding', '-c', code)
2996+
warnings = proc.err.splitlines()
2997+
self.assertEqual(len(warnings), 2, proc.err)
2998+
self.assertRegex(warnings[0], rb"^<string>:8: EncodingWarning:")
2999+
self.assertRegex(warnings[1], rb"^<string>:9: EncodingWarning:")
29373000

29383001
def test_open_write(self):
29393002
"""
@@ -2975,6 +3038,7 @@ def test_read(self, alpharep):
29753038
root = zipfile.Path(alpharep)
29763039
a, b, g = root.iterdir()
29773040
assert a.read_text(encoding="utf-8") == "content of a"
3041+
a.read_text("utf-8") # No positional arg TypeError per gh-101144.
29783042
assert a.read_bytes() == b"content of a"
29793043

29803044
@pass_alpharep

Lib/zipfile.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2236,6 +2236,11 @@ def _name_set(self):
22362236
return self.__lookup
22372237

22382238

2239+
def _extract_text_encoding(encoding=None, *args, **kwargs):
2240+
# stacklevel=3 so that the caller of the caller see any warning.
2241+
return io.text_encoding(encoding, 3), args, kwargs
2242+
2243+
22392244
class Path:
22402245
"""
22412246
A pathlib-compatible interface for zip files.
@@ -2345,9 +2350,9 @@ def open(self, mode='r', *args, pwd=None, **kwargs):
23452350
if args or kwargs:
23462351
raise ValueError("encoding args invalid for binary operation")
23472352
return stream
2348-
else:
2349-
kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
2350-
return io.TextIOWrapper(stream, *args, **kwargs)
2353+
# Text mode:
2354+
encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
2355+
return io.TextIOWrapper(stream, encoding, *args, **kwargs)
23512356

23522357
@property
23532358
def name(self):
@@ -2358,8 +2363,8 @@ def filename(self):
23582363
return pathlib.Path(self.root.filename).joinpath(self.at)
23592364

23602365
def read_text(self, *args, **kwargs):
2361-
kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
2362-
with self.open('r', *args, **kwargs) as strm:
2366+
encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
2367+
with self.open('r', encoding, *args, **kwargs) as strm:
23632368
return strm.read()
23642369

23652370
def read_bytes(self):
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Make :func:`zipfile.Path.open` and :func:`zipfile.Path.read_text` also accept
2+
``encoding`` as a positional argument. This was the behavior in Python 3.9 and
3+
earlier. Earlier 3.10 versions had a regression where supplying it as a positional
4+
argument would lead to a :exc:`TypeError`.

0 commit comments

Comments
 (0)