From 469d7eceac15c54c2b278abf307c7a6a6b213b8b Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 26 Aug 2024 09:50:50 -0400 Subject: [PATCH 01/10] Add two tests capturing the expectation for unsupported names. --- tests/test_path.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/test_path.py b/tests/test_path.py index 62128e5..7694294 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -10,6 +10,7 @@ from .compat.py39.os_helper import temp_dir, FakePath # type: ignore[import-not-found] +import pytest import jaraco.itertools from jaraco.functools import compose @@ -601,6 +602,40 @@ def test_malformed_paths(self): 'parent.txt', ] + @pytest.mark.xfail(reason="python/cpython#123270") + def test_unsupported_names(self): + """ + Path segments with special characters are readable. + + On some platforms or file systems, characters like + ``:`` and ``?`` are not allowed, but they are valid + in the zip file. + """ + data = io.BytesIO() + zf = zipfile.ZipFile(data, "w") + zf.writestr("path?", b"content") + zf.writestr("V: NMS.flac", b"fLaC...") + zf.filename = '' + root = zipfile.Path(zf) + contents = root.iterdir() + assert next(contents).name == 'path?' + assert next(contents).name == 'V: NMS.flac' + assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..." + + @pytest.mark.xfail(reason="python/cpython#123270") + def test_backslash_not_separator(self): + """ + In a zip file, backslashes are not separators. + """ + data = io.BytesIO() + zf = zipfile.ZipFile(data, "w") + zf.writestr("foo\\bar", b"content") + zf.filename = '' + root = zipfile.Path(zf) + (first,) = root.iterdir() + assert not first.is_dir() + assert first.name == 'foo\\bar' + @pass_alpharep def test_interface(self, alpharep): from .compat.py310 import Traversable From 447e7d07dbe2f5dee151b8c57db6892969ab4ca9 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 26 Aug 2024 09:58:14 -0400 Subject: [PATCH 02/10] Adjust the expectation in test_malformed_paths to expect empty paths ignored and .. to be a path segment. --- tests/test_path.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_path.py b/tests/test_path.py index 7694294..273ede1 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -585,6 +585,7 @@ def test_getinfo_missing(self, alpharep): with self.assertRaises(KeyError): alpharep.getinfo('does-not-exist') + @pytest.mark.xfail(reason="python/cpython#123270") def test_malformed_paths(self): """ Path should handle malformed paths. @@ -599,8 +600,9 @@ def test_malformed_paths(self): assert list(map(str, root.iterdir())) == [ 'one-slash.txt', 'two-slash.txt', - 'parent.txt', + '..', ] + assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content' @pytest.mark.xfail(reason="python/cpython#123270") def test_unsupported_names(self): From 3cb5609002263eb19f7b5efda82d96f1f57fe876 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 26 Aug 2024 10:03:05 -0400 Subject: [PATCH 03/10] Removed SanitizedNames. Restores expectations around special characters in zipfiles, but also restores the infinite loop. --- pyproject.toml | 1 + tests/test_path.py | 3 +-- zipp/__init__.py | 64 +--------------------------------------------- 3 files changed, 3 insertions(+), 65 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index db9823d..874996b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ test = [ "pytest-ignore-flaky", "jaraco.test", "importlib_resources; python_version < '3.9'", + "pytest-timeout", ] doc = [ diff --git a/tests/test_path.py b/tests/test_path.py index 273ede1..4571c00 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -586,6 +586,7 @@ def test_getinfo_missing(self, alpharep): alpharep.getinfo('does-not-exist') @pytest.mark.xfail(reason="python/cpython#123270") + @pytest.mark.timeout(1) def test_malformed_paths(self): """ Path should handle malformed paths. @@ -604,7 +605,6 @@ def test_malformed_paths(self): ] assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content' - @pytest.mark.xfail(reason="python/cpython#123270") def test_unsupported_names(self): """ Path segments with special characters are readable. @@ -624,7 +624,6 @@ def test_unsupported_names(self): assert next(contents).name == 'V: NMS.flac' assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..." - @pytest.mark.xfail(reason="python/cpython#123270") def test_backslash_not_separator(self): """ In a zip file, backslashes are not separators. diff --git a/zipp/__init__.py b/zipp/__init__.py index 10e9540..051bfc9 100644 --- a/zipp/__init__.py +++ b/zipp/__init__.py @@ -95,69 +95,7 @@ def __setstate__(self, state): super().__init__(*args, **kwargs) -class SanitizedNames: - """ - ZipFile mix-in to ensure names are sanitized. - """ - - def namelist(self): - return list(map(self._sanitize, super().namelist())) - - @staticmethod - def _sanitize(name): - r""" - Ensure a relative path with posix separators and no dot names. - - Modeled after - https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813 - but provides consistent cross-platform behavior. - - >>> san = SanitizedNames._sanitize - >>> san('/foo/bar') - 'foo/bar' - >>> san('//foo.txt') - 'foo.txt' - >>> san('foo/.././bar.txt') - 'foo/bar.txt' - >>> san('foo../.bar.txt') - 'foo../.bar.txt' - >>> san('\\foo\\bar.txt') - 'foo/bar.txt' - >>> san('D:\\foo.txt') - 'D/foo.txt' - >>> san('\\\\server\\share\\file.txt') - 'server/share/file.txt' - >>> san('\\\\?\\GLOBALROOT\\Volume3') - '?/GLOBALROOT/Volume3' - >>> san('\\\\.\\PhysicalDrive1\\root') - 'PhysicalDrive1/root' - - Retain any trailing slash. - >>> san('abc/') - 'abc/' - - Raises a ValueError if the result is empty. - >>> san('../..') - Traceback (most recent call last): - ... - ValueError: Empty filename - """ - - def allowed(part): - return part and part not in {'..', '.'} - - # Remove the drive letter. - # Don't use ntpath.splitdrive, because that also strips UNC paths - bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE) - clean = bare.replace('\\', '/') - parts = clean.split('/') - joined = '/'.join(filter(allowed, parts)) - if not joined: - raise ValueError("Empty filename") - return joined + '/' * name.endswith('/') - - -class CompleteDirs(InitializedState, SanitizedNames, zipfile.ZipFile): +class CompleteDirs(InitializedState, zipfile.ZipFile): """ A ZipFile subclass that ensures that implied directories are always included in the namelist. From f89b93f0370dd85d23d243e25dfc1f99f4d8de48 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 26 Aug 2024 10:32:26 -0400 Subject: [PATCH 04/10] Address infinite loop when zipfile begins with more than one leading slash. Alternate and more surgical fix for jaraco/zipp#119. Ref python/cpython#123270 --- pyproject.toml | 1 - tests/test_path.py | 1 - zipp/__init__.py | 9 +++++++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 874996b..db9823d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,6 @@ test = [ "pytest-ignore-flaky", "jaraco.test", "importlib_resources; python_version < '3.9'", - "pytest-timeout", ] doc = [ diff --git a/tests/test_path.py b/tests/test_path.py index 4571c00..4884587 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -586,7 +586,6 @@ def test_getinfo_missing(self, alpharep): alpharep.getinfo('does-not-exist') @pytest.mark.xfail(reason="python/cpython#123270") - @pytest.mark.timeout(1) def test_malformed_paths(self): """ Path should handle malformed paths. diff --git a/zipp/__init__.py b/zipp/__init__.py index 051bfc9..0b7b443 100644 --- a/zipp/__init__.py +++ b/zipp/__init__.py @@ -46,7 +46,7 @@ def _parents(path): def _ancestry(path): """ Given a path with elements separated by - posixpath.sep, generate all elements of that path + posixpath.sep, generate all elements of that path. >>> list(_ancestry('b/d')) ['b/d', 'b'] @@ -58,9 +58,14 @@ def _ancestry(path): ['b'] >>> list(_ancestry('')) [] + + Multiple separators are treated like a single. + + >>> list(_ancestry('//b//d///f//')) + ['//b//d///f', '//b//d', '//b'] """ path = path.rstrip(posixpath.sep) - while path and path != posixpath.sep: + while path and not path.endswith(posixpath.sep): yield path path, tail = posixpath.split(path) From 0a3a7b4652e417f61de2458506d05570e22df018 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 26 Aug 2024 10:38:25 -0400 Subject: [PATCH 05/10] Refine expectation that paths with leading slashes are simply not visible. --- tests/test_path.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/tests/test_path.py b/tests/test_path.py index 4884587..15da1c0 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -10,7 +10,6 @@ from .compat.py39.os_helper import temp_dir, FakePath # type: ignore[import-not-found] -import pytest import jaraco.itertools from jaraco.functools import compose @@ -585,10 +584,13 @@ def test_getinfo_missing(self, alpharep): with self.assertRaises(KeyError): alpharep.getinfo('does-not-exist') - @pytest.mark.xfail(reason="python/cpython#123270") def test_malformed_paths(self): """ - Path should handle malformed paths. + Path should handle malformed paths gracefully. + + Paths with leading slashes are not visible. + + Paths with dots are treated like regular files. """ data = io.BytesIO() zf = zipfile.ZipFile(data, "w") @@ -597,11 +599,7 @@ def test_malformed_paths(self): zf.writestr("../parent.txt", b"content") zf.filename = '' root = zipfile.Path(zf) - assert list(map(str, root.iterdir())) == [ - 'one-slash.txt', - 'two-slash.txt', - '..', - ] + assert list(map(str, root.iterdir())) == ['../'] assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content' def test_unsupported_names(self): From a421f7e38d88ca9b6b58c89c6b3f141c07fdc588 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 26 Aug 2024 11:12:40 -0400 Subject: [PATCH 06/10] Invent DirtyZipInfo to create an unsanitized zipfile with backslashes. --- tests/test_path.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/tests/test_path.py b/tests/test_path.py index 15da1c0..5183377 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -5,6 +5,7 @@ import pickle import stat import sys +import time import unittest from zipp.compat.overlay import zipfile @@ -627,7 +628,7 @@ def test_backslash_not_separator(self): """ data = io.BytesIO() zf = zipfile.ZipFile(data, "w") - zf.writestr("foo\\bar", b"content") + zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content") zf.filename = '' root = zipfile.Path(zf) (first,) = root.iterdir() @@ -640,3 +641,28 @@ def test_interface(self, alpharep): zf = zipfile.Path(alpharep) assert isinstance(zf, Traversable) + + +class DirtyZipInfo(zipfile.ZipInfo): + """ + Bypass name sanitization. + """ + + def __init__(self, filename, *args, **kwargs): + super().__init__(filename, *args, **kwargs) + self.filename = filename + + @classmethod + def for_name(cls, name, archive): + """ + Construct the same way that ZipFile.writestr does. + """ + self = cls(filename=name, date_time=time.localtime(time.time())[:6]) + self.compress_type = archive.compression + self.compress_level = archive.compresslevel + if self.filename.endswith('/'): + self.external_attr = 0o40775 << 16 # drwxrwxr-x + self.external_attr |= 0x10 # MS-DOS directory flag + else: + self.external_attr = 0o600 << 16 # ?rw------- + return self From fde82dcfdea5722c5126e83921773c629a8ba400 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 26 Aug 2024 11:16:34 -0400 Subject: [PATCH 07/10] Add news fragment. --- newsfragments/+b2c63c6b.bugfix.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/+b2c63c6b.bugfix.rst diff --git a/newsfragments/+b2c63c6b.bugfix.rst b/newsfragments/+b2c63c6b.bugfix.rst new file mode 100644 index 0000000..7fc0256 --- /dev/null +++ b/newsfragments/+b2c63c6b.bugfix.rst @@ -0,0 +1 @@ +Replaced SanitizedNames with a more surgical fix for infinite loops, restoring support for names with special characters in the archive. (python/cpython#123270) From bec712f098666b1767502d793d36b51afd0d7e94 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 26 Aug 2024 11:17:18 -0400 Subject: [PATCH 08/10] Mark unused code as uncovered. --- tests/test_path.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_path.py b/tests/test_path.py index 5183377..3dfc711 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -660,7 +660,7 @@ def for_name(cls, name, archive): self = cls(filename=name, date_time=time.localtime(time.time())[:6]) self.compress_type = archive.compression self.compress_level = archive.compresslevel - if self.filename.endswith('/'): + if self.filename.endswith('/'): # pragma: no cover self.external_attr = 0o40775 << 16 # drwxrwxr-x self.external_attr |= 0x10 # MS-DOS directory flag else: From cc61e6140f0dfde2ff372db932442cf6df890f09 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 26 Aug 2024 11:46:25 -0400 Subject: [PATCH 09/10] Prefer simpler path.rstrip to consolidate checks for empty or only paths. --- zipp/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zipp/__init__.py b/zipp/__init__.py index 0b7b443..a3f0b1b 100644 --- a/zipp/__init__.py +++ b/zipp/__init__.py @@ -65,7 +65,7 @@ def _ancestry(path): ['//b//d///f', '//b//d', '//b'] """ path = path.rstrip(posixpath.sep) - while path and not path.endswith(posixpath.sep): + while path.rstrip(posixpath.sep): yield path path, tail = posixpath.split(path) From 774a3ac67f5b827684e8c3b2e03c5f8bbb440593 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 26 Aug 2024 11:50:07 -0400 Subject: [PATCH 10/10] Add TODO to consolidate this behavior in CPython. --- tests/test_path.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_path.py b/tests/test_path.py index 3dfc711..ebd23f6 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -656,6 +656,8 @@ def __init__(self, filename, *args, **kwargs): def for_name(cls, name, archive): """ Construct the same way that ZipFile.writestr does. + + TODO: extract this functionality and re-use """ self = cls(filename=name, date_time=time.localtime(time.time())[:6]) self.compress_type = archive.compression