From cca6d2d88e1482dfbabe3398fd1b963abac58435 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 27 Apr 2026 21:43:15 +0300 Subject: [PATCH] gh-146581: Fix vulnerability in shutil.unpack_archive() for ZIP files on Windows (GH-146591) Use ZipFile.extractall() to sanitize file names and extract files. Files with invalid names (e.g. absolute paths) are now skipped. Files containing ".." in the name are no longer skipped. (cherry picked from commit fc829e88753858c8ac669594bf0093f44948c0f4) --- Lib/shutil.py | 24 +------ Lib/test/test_shutil.py | 65 +++++++++++++++++++ Lib/zipfile.py | 19 ++++-- ...-03-29-12-51-33.gh-issue-146581.4vZfB0.rst | 5 ++ 4 files changed, 87 insertions(+), 26 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2026-03-29-12-51-33.gh-issue-146581.4vZfB0.rst diff --git a/Lib/shutil.py b/Lib/shutil.py index 0fad5eaea04f96..ef58240c03491c 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -1241,27 +1241,9 @@ def _unpack_zipfile(filename, extract_dir): if not zipfile.is_zipfile(filename): raise ReadError("%s is not a zip file" % filename) - zip = zipfile.ZipFile(filename) - try: - for info in zip.infolist(): - name = info.filename - - # don't extract absolute paths or ones with .. in them - if name.startswith('/') or '..' in name: - continue - - targetpath = os.path.join(extract_dir, *name.split('/')) - if not targetpath: - continue - - _ensure_directory(targetpath) - if not name.endswith('/'): - # file - with zip.open(name, 'r') as source, \ - open(targetpath, 'wb') as target: - copyfileobj(source, target) - finally: - zip.close() + with zipfile.ZipFile(filename) as zip: + zip._ignore_invalid_names = True + zip.extractall(extract_dir) def _unpack_tarfile(filename, extract_dir, *, filter=None): """Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir` diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 6728d30958964e..bd3f4e17f50c41 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -1799,6 +1799,71 @@ def test_unpack_archive_zip(self): with self.assertRaises(TypeError): self.check_unpack_archive('zip', filter='data') + def test_unpack_archive_zip_badpaths(self): + srcdir = self.mkdtemp() + zipname = os.path.join(srcdir, 'test.zip') + abspath = os.path.join(srcdir, 'abspath') + with zipfile.ZipFile(zipname, 'w') as zf: + zf.writestr(abspath, 'badfile') + zf.writestr(os.sep + abspath, 'badfile') + zf.writestr('/abspath', 'badfile') + zf.writestr('C:/abspath', 'badfile') + zf.writestr('D:\\abspath', 'badfile') + zf.writestr('E:abspath', 'badfile') + zf.writestr('F:/G:/abspath', 'badfile') + zf.writestr('//server/share/abspath', 'badfile') + zf.writestr('\\\\server2\\share\\abspath', 'badfile') + zf.writestr('../relpath', 'badfile') + zf.writestr(os.pardir + os.sep + 'relpath2', 'badfile') + zf.writestr('good/file', 'goodfile') + zf.writestr('good..file', 'goodfile') + + dstdir = os.path.join(self.mkdtemp(), 'dst') + unpack_archive(zipname, dstdir) + self.assertTrue(os.path.isfile(os.path.join(dstdir, 'good', 'file'))) + self.assertTrue(os.path.isfile(os.path.join(dstdir, 'good..file'))) + self.assertFalse(os.path.exists(abspath)) + self.assertFalse(os.path.exists(os.path.join(dstdir, 'abspath'))) + self.assertFalse(os.path.exists(os.path.join(dstdir, 'G_'))) + self.assertFalse(os.path.exists(os.path.join(dstdir, 'server'))) + if os.name != 'nt': + self.assertTrue(os.path.isfile(os.path.join(dstdir, 'C:', 'abspath'))) + self.assertTrue(os.path.isfile(os.path.join(dstdir, 'D:\\abspath'))) + self.assertTrue(os.path.isfile(os.path.join(dstdir, 'E:abspath'))) + self.assertTrue(os.path.isfile(os.path.join(dstdir, 'F:', 'G:', 'abspath'))) + self.assertTrue(os.path.isfile(os.path.join(dstdir, '\\\\server2\\share\\abspath'))) + if os.pardir == '..': + self.assertFalse(os.path.exists(os.path.join(dstdir, '..', 'relpath'))) + self.assertFalse(os.path.exists(os.path.join(dstdir, 'relpath'))) + else: + self.assertTrue(os.path.isfile(os.path.join(dstdir, '..', 'relpath'))) + self.assertFalse(os.path.exists(os.path.join(dstdir, os.pardir, 'relpath2'))) + self.assertFalse(os.path.exists(os.path.join(dstdir, 'relpath2'))) + + dstdir2 = os.path.join(self.mkdtemp(), 'dst') + os.mkdir(dstdir2) + with os_helper.change_cwd(dstdir2): + unpack_archive(zipname, '') + self.assertTrue(os.path.isfile(os.path.join('good', 'file'))) + self.assertTrue(os.path.isfile('good..file')) + self.assertFalse(os.path.exists(abspath)) + self.assertFalse(os.path.exists('abspath')) + self.assertFalse(os.path.exists('C_')) + self.assertFalse(os.path.exists('server')) + if os.name != 'nt': + self.assertTrue(os.path.isfile(os.path.join('C:', 'abspath'))) + self.assertTrue(os.path.isfile('D:\\abspath')) + self.assertTrue(os.path.isfile('E:abspath')) + self.assertTrue(os.path.isfile(os.path.join('F:', 'G:', 'abspath'))) + self.assertTrue(os.path.isfile('\\\\server2\\share\\abspath')) + if os.pardir == '..': + self.assertFalse(os.path.exists(os.path.join('..', 'relpath'))) + self.assertFalse(os.path.exists('relpath')) + else: + self.assertTrue(os.path.isfile(os.path.join('..', 'relpath'))) + self.assertFalse(os.path.exists(os.path.join(os.pardir, 'relpath2'))) + self.assertFalse(os.path.exists('relpath2')) + def test_unpack_registry(self): formats = get_unpack_formats() diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 64289ebd674659..9789b06e021a65 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -1268,6 +1268,7 @@ class ZipFile: fp = None # Set here since __del__ checks it _windows_illegal_name_trans_table = None + _ignore_invalid_names = False def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True, compresslevel=None, *, strict_timestamps=True, metadata_encoding=None): @@ -1738,19 +1739,27 @@ def _extract_member(self, member, targetpath, pwd): # build the destination pathname, replacing # forward slashes to platform specific separators. - arcname = member.filename.replace('/', os.path.sep) - - if os.path.altsep: + arcname = member.filename + if os.path.sep != '/': + arcname = arcname.replace('/', os.path.sep) + if os.path.altsep and os.path.altsep != '/': arcname = arcname.replace(os.path.altsep, os.path.sep) # interpret absolute pathname as relative, remove drive letter or # UNC path, redundant separators, "." and ".." components. - arcname = os.path.splitdrive(arcname)[1] + drive, arcname = os.path.splitdrive(arcname) + if self._ignore_invalid_names and (drive or arcname.startswith(os.path.sep)): + return None + if self._ignore_invalid_names and os.path.pardir in arcname.split(os.path.sep): + return None invalid_path_parts = ('', os.path.curdir, os.path.pardir) arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) if x not in invalid_path_parts) if os.path.sep == '\\': # filter illegal characters on Windows - arcname = self._sanitize_windows_name(arcname, os.path.sep) + arcname2 = self._sanitize_windows_name(arcname, os.path.sep) + if self._ignore_invalid_names and arcname2 != arcname: + return None + arcname = arcname2 targetpath = os.path.join(targetpath, arcname) targetpath = os.path.normpath(targetpath) diff --git a/Misc/NEWS.d/next/Security/2026-03-29-12-51-33.gh-issue-146581.4vZfB0.rst b/Misc/NEWS.d/next/Security/2026-03-29-12-51-33.gh-issue-146581.4vZfB0.rst new file mode 100644 index 00000000000000..98e65549d79016 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-03-29-12-51-33.gh-issue-146581.4vZfB0.rst @@ -0,0 +1,5 @@ +Fix vulnerability in :func:`shutil.unpack_archive` for ZIP files on Windows +which allowed to write files outside of the destination tree if the patch in +the archive contains a Windows drive prefix. Now such invalid paths will be +skipped. Files containing ".." in the name (like "foo..bar") are no longer +skipped.