Skip to content

Commit 0aa1ee2

Browse files
authored
[3.10] gh-123270: Replaced SanitizedNames with a more surgical fix. (GH-123354) (#123426)
Applies changes from zipp 3.20.1 and jaraco/zippGH-124 (cherry picked from commit 2231286) (cherry picked from commit 17b77bb) Co-authored-by: Jason R. Coombs <[email protected]>
1 parent d3f39ce commit 0aa1ee2

File tree

3 files changed

+81
-67
lines changed

3 files changed

+81
-67
lines changed

Lib/test/test_zipfile.py

Lines changed: 70 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import itertools
66
import os
77
import pathlib
8+
import platform
89
import posixpath
910
import string
1011
import struct
@@ -3282,7 +3283,11 @@ def test_extract_orig_with_implied_dirs(self, alpharep):
32823283

32833284
def test_malformed_paths(self):
32843285
"""
3285-
Path should handle malformed paths.
3286+
Path should handle malformed paths gracefully.
3287+
3288+
Paths with leading slashes are not visible.
3289+
3290+
Paths with dots are treated like regular files.
32863291
"""
32873292
data = io.BytesIO()
32883293
zf = zipfile.ZipFile(data, "w")
@@ -3291,11 +3296,70 @@ def test_malformed_paths(self):
32913296
zf.writestr("../parent.txt", b"content")
32923297
zf.filename = ''
32933298
root = zipfile.Path(zf)
3294-
assert list(map(str, root.iterdir())) == [
3295-
'one-slash.txt',
3296-
'two-slash.txt',
3297-
'parent.txt',
3298-
]
3299+
assert list(map(str, root.iterdir())) == ['../']
3300+
assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content'
3301+
3302+
@unittest.skipIf(platform.system() == "Windows", "GH-123693")
3303+
def test_unsupported_names(self):
3304+
"""
3305+
Path segments with special characters are readable.
3306+
3307+
On some platforms or file systems, characters like
3308+
``:`` and ``?`` are not allowed, but they are valid
3309+
in the zip file.
3310+
"""
3311+
data = io.BytesIO()
3312+
zf = zipfile.ZipFile(data, "w")
3313+
zf.writestr("path?", b"content")
3314+
zf.writestr("V: NMS.flac", b"fLaC...")
3315+
zf.filename = ''
3316+
root = zipfile.Path(zf)
3317+
contents = root.iterdir()
3318+
assert next(contents).name == 'path?'
3319+
item = next(contents)
3320+
assert item.name == 'V: NMS.flac', item.name
3321+
assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..."
3322+
3323+
@unittest.skipIf(platform.system() == "Windows", "GH-123693")
3324+
def test_backslash_not_separator(self):
3325+
"""
3326+
In a zip file, backslashes are not separators.
3327+
"""
3328+
data = io.BytesIO()
3329+
zf = zipfile.ZipFile(data, "w")
3330+
zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content")
3331+
zf.filename = ''
3332+
root = zipfile.Path(zf)
3333+
(first,) = root.iterdir()
3334+
assert not first.is_dir()
3335+
assert first.name == 'foo\\bar', first.name
3336+
3337+
3338+
class DirtyZipInfo(zipfile.ZipInfo):
3339+
"""
3340+
Bypass name sanitization.
3341+
"""
3342+
3343+
def __init__(self, filename, *args, **kwargs):
3344+
super().__init__(filename, *args, **kwargs)
3345+
self.filename = filename
3346+
3347+
@classmethod
3348+
def for_name(cls, name, archive):
3349+
"""
3350+
Construct the same way that ZipFile.writestr does.
3351+
3352+
TODO: extract this functionality and re-use
3353+
"""
3354+
self = cls(filename=name, date_time=time.localtime(time.time())[:6])
3355+
self.compress_type = archive.compression
3356+
self.compress_level = archive.compresslevel
3357+
if self.filename.endswith('/'): # pragma: no cover
3358+
self.external_attr = 0o40775 << 16 # drwxrwxr-x
3359+
self.external_attr |= 0x10 # MS-DOS directory flag
3360+
else:
3361+
self.external_attr = 0o600 << 16 # ?rw-------
3362+
return self
32993363

33003364

33013365
class StripExtraTests(unittest.TestCase):

Lib/zipfile.py

Lines changed: 8 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -2152,7 +2152,7 @@ def _parents(path):
21522152
def _ancestry(path):
21532153
"""
21542154
Given a path with elements separated by
2155-
posixpath.sep, generate all elements of that path
2155+
posixpath.sep, generate all elements of that path.
21562156
21572157
>>> list(_ancestry('b/d'))
21582158
['b/d', 'b']
@@ -2164,9 +2164,14 @@ def _ancestry(path):
21642164
['b']
21652165
>>> list(_ancestry(''))
21662166
[]
2167+
2168+
Multiple separators are treated like a single.
2169+
2170+
>>> list(_ancestry('//b//d///f//'))
2171+
['//b//d///f', '//b//d', '//b']
21672172
"""
21682173
path = path.rstrip(posixpath.sep)
2169-
while path and path != posixpath.sep:
2174+
while path.rstrip(posixpath.sep):
21702175
yield path
21712176
path, tail = posixpath.split(path)
21722177

@@ -2183,65 +2188,7 @@ def _difference(minuend, subtrahend):
21832188
return itertools.filterfalse(set(subtrahend).__contains__, minuend)
21842189

21852190

2186-
class SanitizedNames:
2187-
"""
2188-
ZipFile mix-in to ensure names are sanitized.
2189-
"""
2190-
2191-
def namelist(self):
2192-
return list(map(self._sanitize, super().namelist()))
2193-
2194-
@staticmethod
2195-
def _sanitize(name):
2196-
r"""
2197-
Ensure a relative path with posix separators and no dot names.
2198-
Modeled after
2199-
https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813
2200-
but provides consistent cross-platform behavior.
2201-
>>> san = SanitizedNames._sanitize
2202-
>>> san('/foo/bar')
2203-
'foo/bar'
2204-
>>> san('//foo.txt')
2205-
'foo.txt'
2206-
>>> san('foo/.././bar.txt')
2207-
'foo/bar.txt'
2208-
>>> san('foo../.bar.txt')
2209-
'foo../.bar.txt'
2210-
>>> san('\\foo\\bar.txt')
2211-
'foo/bar.txt'
2212-
>>> san('D:\\foo.txt')
2213-
'D/foo.txt'
2214-
>>> san('\\\\server\\share\\file.txt')
2215-
'server/share/file.txt'
2216-
>>> san('\\\\?\\GLOBALROOT\\Volume3')
2217-
'?/GLOBALROOT/Volume3'
2218-
>>> san('\\\\.\\PhysicalDrive1\\root')
2219-
'PhysicalDrive1/root'
2220-
Retain any trailing slash.
2221-
>>> san('abc/')
2222-
'abc/'
2223-
Raises a ValueError if the result is empty.
2224-
>>> san('../..')
2225-
Traceback (most recent call last):
2226-
...
2227-
ValueError: Empty filename
2228-
"""
2229-
2230-
def allowed(part):
2231-
return part and part not in {'..', '.'}
2232-
2233-
# Remove the drive letter.
2234-
# Don't use ntpath.splitdrive, because that also strips UNC paths
2235-
bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE)
2236-
clean = bare.replace('\\', '/')
2237-
parts = clean.split('/')
2238-
joined = '/'.join(filter(allowed, parts))
2239-
if not joined:
2240-
raise ValueError("Empty filename")
2241-
return joined + '/' * name.endswith('/')
2242-
2243-
2244-
class CompleteDirs(SanitizedNames, ZipFile):
2191+
class CompleteDirs(ZipFile):
22452192
"""
22462193
A ZipFile subclass that ensures that implied directories
22472194
are always included in the namelist.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Applied a more surgical fix for malformed payloads in :class:`zipfile.Path`
2+
causing infinite loops (gh-122905) without breaking contents using
3+
legitimate characters.

0 commit comments

Comments
 (0)