Skip to content

Commit

Permalink
pythongh-123270: Replaced SanitizedNames with a more surgical fix.
Browse files Browse the repository at this point in the history
Applies changes from zipp 3.20.1 and jaraco/zipp#124
  • Loading branch information
jaraco committed Aug 26, 2024
1 parent 7bd6ebf commit 55c2795
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 71 deletions.
73 changes: 67 additions & 6 deletions Lib/test/test_zipfile/_path/test_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pickle
import stat
import sys
import time
import unittest
import zipfile
import zipfile._path
Expand Down Expand Up @@ -592,7 +593,11 @@ def test_getinfo_missing(self, alpharep):

def test_malformed_paths(self):
"""
Path should handle malformed paths.
Path should handle malformed paths gracefully.
Paths with leading slashes are not visible.
Paths with dots are treated like regular files.
"""
data = io.BytesIO()
zf = zipfile.ZipFile(data, "w")
Expand All @@ -601,15 +606,71 @@ def test_malformed_paths(self):
zf.writestr("../parent.txt", b"content")
zf.filename = ''
root = zipfile.Path(zf)
assert list(map(str, root.iterdir())) == [
'one-slash.txt',
'two-slash.txt',
'parent.txt',
]
assert list(map(str, root.iterdir())) == ['../']
assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content'

def test_unsupported_names(self):
"""
Path segments with special characters are readable.
On some platforms or file systems, characters like
``:`` and ``?`` are not allowed, but they are valid
in the zip file.
"""
data = io.BytesIO()
zf = zipfile.ZipFile(data, "w")
zf.writestr("path?", b"content")
zf.writestr("V: NMS.flac", b"fLaC...")
zf.filename = ''
root = zipfile.Path(zf)
contents = root.iterdir()
assert next(contents).name == 'path?'
assert next(contents).name == 'V: NMS.flac'
assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..."

def test_backslash_not_separator(self):
"""
In a zip file, backslashes are not separators.
"""
data = io.BytesIO()
zf = zipfile.ZipFile(data, "w")
zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content")
zf.filename = ''
root = zipfile.Path(zf)
(first,) = root.iterdir()
assert not first.is_dir()
assert first.name == 'foo\\bar'

@pass_alpharep
def test_interface(self, alpharep):
from importlib.resources.abc import Traversable

zf = zipfile.Path(alpharep)
assert isinstance(zf, Traversable)


class DirtyZipInfo(zipfile.ZipInfo):
"""
Bypass name sanitization.
"""

def __init__(self, filename, *args, **kwargs):
super().__init__(filename, *args, **kwargs)
self.filename = filename

@classmethod
def for_name(cls, name, archive):
"""
Construct the same way that ZipFile.writestr does.
TODO: extract this functionality and re-use
"""
self = cls(filename=name, date_time=time.localtime(time.time())[:6])
self.compress_type = archive.compression
self.compress_level = archive.compresslevel
if self.filename.endswith('/'): # pragma: no cover
self.external_attr = 0o40775 << 16 # drwxrwxr-x
self.external_attr |= 0x10 # MS-DOS directory flag
else:
self.external_attr = 0o600 << 16 # ?rw-------
return self
82 changes: 17 additions & 65 deletions Lib/zipfile/_path/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
"""
A Path-like interface for zipfiles.
This codebase is shared between zipfile.Path in the stdlib
and zipp in PyPI. See
https://github.com/python/importlib_metadata/wiki/Development-Methodology
for more detail.
"""

import io
import posixpath
import zipfile
Expand Down Expand Up @@ -36,7 +45,7 @@ def _parents(path):
def _ancestry(path):
"""
Given a path with elements separated by
posixpath.sep, generate all elements of that path
posixpath.sep, generate all elements of that path.
>>> list(_ancestry('b/d'))
['b/d', 'b']
Expand All @@ -48,9 +57,14 @@ def _ancestry(path):
['b']
>>> list(_ancestry(''))
[]
Multiple separators are treated like a single.
>>> list(_ancestry('//b//d///f//'))
['//b//d///f', '//b//d', '//b']
"""
path = path.rstrip(posixpath.sep)
while path and path != posixpath.sep:
while path.rstrip(posixpath.sep):
yield path
path, tail = posixpath.split(path)

Expand Down Expand Up @@ -85,69 +99,7 @@ def __setstate__(self, state):
super().__init__(*args, **kwargs)


class SanitizedNames:
"""
ZipFile mix-in to ensure names are sanitized.
"""

def namelist(self):
return list(map(self._sanitize, super().namelist()))

@staticmethod
def _sanitize(name):
r"""
Ensure a relative path with posix separators and no dot names.
Modeled after
https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813
but provides consistent cross-platform behavior.
>>> san = SanitizedNames._sanitize
>>> san('/foo/bar')
'foo/bar'
>>> san('//foo.txt')
'foo.txt'
>>> san('foo/.././bar.txt')
'foo/bar.txt'
>>> san('foo../.bar.txt')
'foo../.bar.txt'
>>> san('\\foo\\bar.txt')
'foo/bar.txt'
>>> san('D:\\foo.txt')
'D/foo.txt'
>>> san('\\\\server\\share\\file.txt')
'server/share/file.txt'
>>> san('\\\\?\\GLOBALROOT\\Volume3')
'?/GLOBALROOT/Volume3'
>>> san('\\\\.\\PhysicalDrive1\\root')
'PhysicalDrive1/root'
Retain any trailing slash.
>>> san('abc/')
'abc/'
Raises a ValueError if the result is empty.
>>> san('../..')
Traceback (most recent call last):
...
ValueError: Empty filename
"""

def allowed(part):
return part and part not in {'..', '.'}

# Remove the drive letter.
# Don't use ntpath.splitdrive, because that also strips UNC paths
bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE)
clean = bare.replace('\\', '/')
parts = clean.split('/')
joined = '/'.join(filter(allowed, parts))
if not joined:
raise ValueError("Empty filename")
return joined + '/' * name.endswith('/')


class CompleteDirs(InitializedState, SanitizedNames, zipfile.ZipFile):
class CompleteDirs(InitializedState, zipfile.ZipFile):
"""
A ZipFile subclass that ensures that implied directories
are always included in the namelist.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Applied a more surgical fix for malformed payloads in :class:`zipfile.Path`
causing infinite loops (gh-122905) without breaking contents using
legitimate characters.

0 comments on commit 55c2795

Please sign in to comment.