Skip to content

Commit 52a4f9a

Browse files
serhiy-storchakamiss-islington
authored andcommitted
gh-146581: Fix vulnerability in shutil.unpack_archive() for ZIP files on Windows (GH-146591)
Use ZipFile.extractall() to sanitize file names and extract files. Files with invalid names (e.g. absolute paths) are now skipped. Files containing ".." in the name are no longer skipped. (cherry picked from commit fc829e8) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent e378eda commit 52a4f9a

4 files changed

Lines changed: 89 additions & 28 deletions

File tree

Lib/shutil.py

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,27 +1237,9 @@ def _unpack_zipfile(filename, extract_dir):
12371237
if not zipfile.is_zipfile(filename):
12381238
raise ReadError("%s is not a zip file" % filename)
12391239

1240-
zip = zipfile.ZipFile(filename)
1241-
try:
1242-
for info in zip.infolist():
1243-
name = info.filename
1244-
1245-
# don't extract absolute paths or ones with .. in them
1246-
if name.startswith('/') or '..' in name:
1247-
continue
1248-
1249-
targetpath = os.path.join(extract_dir, *name.split('/'))
1250-
if not targetpath:
1251-
continue
1252-
1253-
_ensure_directory(targetpath)
1254-
if not name.endswith('/'):
1255-
# file
1256-
with zip.open(name, 'r') as source, \
1257-
open(targetpath, 'wb') as target:
1258-
copyfileobj(source, target)
1259-
finally:
1260-
zip.close()
1240+
with zipfile.ZipFile(filename) as zip:
1241+
zip._ignore_invalid_names = True
1242+
zip.extractall(extract_dir)
12611243

12621244
def _unpack_tarfile(filename, extract_dir, *, filter=None):
12631245
"""Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir`

Lib/test/test_shutil.py

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2059,8 +2059,6 @@ def test_make_zipfile_rootdir_nodir(self):
20592059
def check_unpack_archive(self, format, **kwargs):
20602060
self.check_unpack_archive_with_converter(
20612061
format, lambda path: path, **kwargs)
2062-
self.check_unpack_archive_with_converter(
2063-
format, FakePath, **kwargs)
20642062
self.check_unpack_archive_with_converter(format, FakePath, **kwargs)
20652063

20662064
def check_unpack_archive_with_converter(self, format, converter, **kwargs):
@@ -2116,6 +2114,71 @@ def test_unpack_archive_zip(self):
21162114
with self.assertRaises(TypeError):
21172115
self.check_unpack_archive('zip', filter='data')
21182116

2117+
def test_unpack_archive_zip_badpaths(self):
2118+
srcdir = self.mkdtemp()
2119+
zipname = os.path.join(srcdir, 'test.zip')
2120+
abspath = os.path.join(srcdir, 'abspath')
2121+
with zipfile.ZipFile(zipname, 'w') as zf:
2122+
zf.writestr(abspath, 'badfile')
2123+
zf.writestr(os.sep + abspath, 'badfile')
2124+
zf.writestr('/abspath', 'badfile')
2125+
zf.writestr('C:/abspath', 'badfile')
2126+
zf.writestr('D:\\abspath', 'badfile')
2127+
zf.writestr('E:abspath', 'badfile')
2128+
zf.writestr('F:/G:/abspath', 'badfile')
2129+
zf.writestr('//server/share/abspath', 'badfile')
2130+
zf.writestr('\\\\server2\\share\\abspath', 'badfile')
2131+
zf.writestr('../relpath', 'badfile')
2132+
zf.writestr(os.pardir + os.sep + 'relpath2', 'badfile')
2133+
zf.writestr('good/file', 'goodfile')
2134+
zf.writestr('good..file', 'goodfile')
2135+
2136+
dstdir = os.path.join(self.mkdtemp(), 'dst')
2137+
unpack_archive(zipname, dstdir)
2138+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'good', 'file')))
2139+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'good..file')))
2140+
self.assertFalse(os.path.exists(abspath))
2141+
self.assertFalse(os.path.exists(os.path.join(dstdir, 'abspath')))
2142+
self.assertFalse(os.path.exists(os.path.join(dstdir, 'G_')))
2143+
self.assertFalse(os.path.exists(os.path.join(dstdir, 'server')))
2144+
if os.name != 'nt':
2145+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'C:', 'abspath')))
2146+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'D:\\abspath')))
2147+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'E:abspath')))
2148+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'F:', 'G:', 'abspath')))
2149+
self.assertTrue(os.path.isfile(os.path.join(dstdir, '\\\\server2\\share\\abspath')))
2150+
if os.pardir == '..':
2151+
self.assertFalse(os.path.exists(os.path.join(dstdir, '..', 'relpath')))
2152+
self.assertFalse(os.path.exists(os.path.join(dstdir, 'relpath')))
2153+
else:
2154+
self.assertTrue(os.path.isfile(os.path.join(dstdir, '..', 'relpath')))
2155+
self.assertFalse(os.path.exists(os.path.join(dstdir, os.pardir, 'relpath2')))
2156+
self.assertFalse(os.path.exists(os.path.join(dstdir, 'relpath2')))
2157+
2158+
dstdir2 = os.path.join(self.mkdtemp(), 'dst')
2159+
os.mkdir(dstdir2)
2160+
with os_helper.change_cwd(dstdir2):
2161+
unpack_archive(zipname, '')
2162+
self.assertTrue(os.path.isfile(os.path.join('good', 'file')))
2163+
self.assertTrue(os.path.isfile('good..file'))
2164+
self.assertFalse(os.path.exists(abspath))
2165+
self.assertFalse(os.path.exists('abspath'))
2166+
self.assertFalse(os.path.exists('C_'))
2167+
self.assertFalse(os.path.exists('server'))
2168+
if os.name != 'nt':
2169+
self.assertTrue(os.path.isfile(os.path.join('C:', 'abspath')))
2170+
self.assertTrue(os.path.isfile('D:\\abspath'))
2171+
self.assertTrue(os.path.isfile('E:abspath'))
2172+
self.assertTrue(os.path.isfile(os.path.join('F:', 'G:', 'abspath')))
2173+
self.assertTrue(os.path.isfile('\\\\server2\\share\\abspath'))
2174+
if os.pardir == '..':
2175+
self.assertFalse(os.path.exists(os.path.join('..', 'relpath')))
2176+
self.assertFalse(os.path.exists('relpath'))
2177+
else:
2178+
self.assertTrue(os.path.isfile(os.path.join('..', 'relpath')))
2179+
self.assertFalse(os.path.exists(os.path.join(os.pardir, 'relpath2')))
2180+
self.assertFalse(os.path.exists('relpath2'))
2181+
21192182
def test_unpack_registry(self):
21202183

21212184
formats = get_unpack_formats()

Lib/zipfile/__init__.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1309,6 +1309,7 @@ class ZipFile:
13091309

13101310
fp = None # Set here since __del__ checks it
13111311
_windows_illegal_name_trans_table = None
1312+
_ignore_invalid_names = False
13121313

13131314
def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
13141315
compresslevel=None, *, strict_timestamps=True, metadata_encoding=None):
@@ -1793,21 +1794,31 @@ def _extract_member(self, member, targetpath, pwd):
17931794

17941795
# build the destination pathname, replacing
17951796
# forward slashes to platform specific separators.
1796-
arcname = member.filename.replace('/', os.path.sep)
1797-
1798-
if os.path.altsep:
1797+
arcname = member.filename
1798+
if os.path.sep != '/':
1799+
arcname = arcname.replace('/', os.path.sep)
1800+
if os.path.altsep and os.path.altsep != '/':
17991801
arcname = arcname.replace(os.path.altsep, os.path.sep)
18001802
# interpret absolute pathname as relative, remove drive letter or
18011803
# UNC path, redundant separators, "." and ".." components.
1802-
arcname = os.path.splitdrive(arcname)[1]
1804+
drive, root, arcname = os.path.splitroot(arcname)
1805+
if self._ignore_invalid_names and (drive or root):
1806+
return None
1807+
if self._ignore_invalid_names and os.path.pardir in arcname.split(os.path.sep):
1808+
return None
18031809
invalid_path_parts = ('', os.path.curdir, os.path.pardir)
18041810
arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
18051811
if x not in invalid_path_parts)
18061812
if os.path.sep == '\\':
18071813
# filter illegal characters on Windows
1808-
arcname = self._sanitize_windows_name(arcname, os.path.sep)
1814+
arcname2 = self._sanitize_windows_name(arcname, os.path.sep)
1815+
if self._ignore_invalid_names and arcname2 != arcname:
1816+
return None
1817+
arcname = arcname2
18091818

18101819
if not arcname and not member.is_dir():
1820+
if self._ignore_invalid_names:
1821+
return None
18111822
raise ValueError("Empty filename.")
18121823

18131824
targetpath = os.path.join(targetpath, arcname)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fix vulnerability in :func:`shutil.unpack_archive` for ZIP files on Windows
2+
which allowed to write files outside of the destination tree if the patch in
3+
the archive contains a Windows drive prefix. Now such invalid paths will be
4+
skipped. Files containing ".." in the name (like "foo..bar") are no longer
5+
skipped.

0 commit comments

Comments
 (0)