Skip to content

[3.10] gh-101144: Allow zipfile.Path .open & .read_text encoding to be positional (GH-101179) #101182

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions Doc/library/zipfile.rst
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,12 @@ Path objects are traversable using the ``/`` operator or ``joinpath``.
Added support for text and binary modes for open. Default
mode is now text.

.. versionchanged:: 3.10.10
The ``encoding`` parameter can be supplied as a positional argument
without causing a :exc:`TypeError`. As it could in 3.9. Code needing to
be compatible with unpatched 3.10 and 3.11 versions must pass all
:class:`io.TextIOWrapper` arguments, ``encoding`` included, as keywords.

.. method:: Path.iterdir()

Enumerate the children of the current directory.
Expand All @@ -533,6 +539,12 @@ Path objects are traversable using the ``/`` operator or ``joinpath``.
:class:`io.TextIOWrapper` (except ``buffer``, which is
implied by the context).

.. versionchanged:: 3.10.10
The ``encoding`` parameter can be supplied as a positional argument
without causing a :exc:`TypeError`. As it could in 3.9. Code needing to
be compatible with unpatched 3.10 and 3.11 versions must pass all
:class:`io.TextIOWrapper` arguments, ``encoding`` included, as keywords.

.. method:: Path.read_bytes()

Read the current file as bytes.
Expand Down
66 changes: 65 additions & 1 deletion Lib/test/test_zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import struct
import subprocess
import sys
from test.support.script_helper import assert_python_ok
import time
import unittest
import unittest.mock as mock
Expand Down Expand Up @@ -2933,7 +2934,69 @@ def test_open(self, alpharep):
a, b, g = root.iterdir()
with a.open(encoding="utf-8") as strm:
data = strm.read()
assert data == "content of a"
self.assertEqual(data, "content of a")
with a.open('r', "utf-8") as strm: # not a kw, no gh-101144 TypeError
data = strm.read()
self.assertEqual(data, "content of a")

def test_open_encoding_utf16(self):
in_memory_file = io.BytesIO()
zf = zipfile.ZipFile(in_memory_file, "w")
zf.writestr("path/16.txt", "This was utf-16".encode("utf-16"))
zf.filename = "test_open_utf16.zip"
root = zipfile.Path(zf)
(path,) = root.iterdir()
u16 = path.joinpath("16.txt")
with u16.open('r', "utf-16") as strm:
data = strm.read()
self.assertEqual(data, "This was utf-16")
with u16.open(encoding="utf-16") as strm:
data = strm.read()
self.assertEqual(data, "This was utf-16")

def test_open_encoding_errors(self):
in_memory_file = io.BytesIO()
zf = zipfile.ZipFile(in_memory_file, "w")
zf.writestr("path/bad-utf8.bin", b"invalid utf-8: \xff\xff.")
zf.filename = "test_read_text_encoding_errors.zip"
root = zipfile.Path(zf)
(path,) = root.iterdir()
u16 = path.joinpath("bad-utf8.bin")

# encoding= as a positional argument for gh-101144.
data = u16.read_text("utf-8", errors="ignore")
self.assertEqual(data, "invalid utf-8: .")
with u16.open("r", "utf-8", errors="surrogateescape") as f:
self.assertEqual(f.read(), "invalid utf-8: \udcff\udcff.")

# encoding= both positional and keyword is an error; gh-101144.
with self.assertRaisesRegex(TypeError, "encoding"):
data = u16.read_text("utf-8", encoding="utf-8")

# both keyword arguments work.
with u16.open("r", encoding="utf-8", errors="strict") as f:
# error during decoding with wrong codec.
with self.assertRaises(UnicodeDecodeError):
f.read()

def test_encoding_warnings(self):
"""EncodingWarning must blame the read_text and open calls."""
code = '''\
import io, zipfile
with zipfile.ZipFile(io.BytesIO(), "w") as zf:
zf.filename = '<test_encoding_warnings in memory zip file>'
zf.writestr("path/file.txt", b"Spanish Inquisition")
root = zipfile.Path(zf)
(path,) = root.iterdir()
file_path = path.joinpath("file.txt")
unused = file_path.read_text() # should warn
file_path.open("r").close() # should warn
'''
proc = assert_python_ok('-X', 'warn_default_encoding', '-c', code)
warnings = proc.err.splitlines()
self.assertEqual(len(warnings), 2, proc.err)
self.assertRegex(warnings[0], rb"^<string>:8: EncodingWarning:")
self.assertRegex(warnings[1], rb"^<string>:9: EncodingWarning:")

def test_open_write(self):
"""
Expand Down Expand Up @@ -2975,6 +3038,7 @@ def test_read(self, alpharep):
root = zipfile.Path(alpharep)
a, b, g = root.iterdir()
assert a.read_text(encoding="utf-8") == "content of a"
a.read_text("utf-8") # No positional arg TypeError per gh-101144.
assert a.read_bytes() == b"content of a"

@pass_alpharep
Expand Down
15 changes: 10 additions & 5 deletions Lib/zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2236,6 +2236,11 @@ def _name_set(self):
return self.__lookup


def _extract_text_encoding(encoding=None, *args, **kwargs):
# stacklevel=3 so that the caller of the caller see any warning.
return io.text_encoding(encoding, 3), args, kwargs


class Path:
"""
A pathlib-compatible interface for zip files.
Expand Down Expand Up @@ -2345,9 +2350,9 @@ def open(self, mode='r', *args, pwd=None, **kwargs):
if args or kwargs:
raise ValueError("encoding args invalid for binary operation")
return stream
else:
kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
return io.TextIOWrapper(stream, *args, **kwargs)
# Text mode:
encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
return io.TextIOWrapper(stream, encoding, *args, **kwargs)

@property
def name(self):
Expand All @@ -2358,8 +2363,8 @@ def filename(self):
return pathlib.Path(self.root.filename).joinpath(self.at)

def read_text(self, *args, **kwargs):
kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
with self.open('r', *args, **kwargs) as strm:
encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
with self.open('r', encoding, *args, **kwargs) as strm:
return strm.read()

def read_bytes(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Make :func:`zipfile.Path.open` and :func:`zipfile.Path.read_text` also accept
``encoding`` as a positional argument. This was the behavior in Python 3.9 and
earlier. Earlier 3.10 versions had a regression where supplying it as a positional
argument would lead to a :exc:`TypeError`.