Skip to content

Commit cca6d2d

Browse files
gh-146581: Fix vulnerability in shutil.unpack_archive() for ZIP files on Windows (GH-146591)
Use ZipFile.extractall() to sanitize file names and extract files. Files with invalid names (e.g. absolute paths) are now skipped. Files containing ".." in the name are no longer skipped. (cherry picked from commit fc829e8)
1 parent e20c6c9 commit cca6d2d

4 files changed

Lines changed: 87 additions & 26 deletions

File tree

Lib/shutil.py

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1241,27 +1241,9 @@ def _unpack_zipfile(filename, extract_dir):
12411241
if not zipfile.is_zipfile(filename):
12421242
raise ReadError("%s is not a zip file" % filename)
12431243

1244-
zip = zipfile.ZipFile(filename)
1245-
try:
1246-
for info in zip.infolist():
1247-
name = info.filename
1248-
1249-
# don't extract absolute paths or ones with .. in them
1250-
if name.startswith('/') or '..' in name:
1251-
continue
1252-
1253-
targetpath = os.path.join(extract_dir, *name.split('/'))
1254-
if not targetpath:
1255-
continue
1256-
1257-
_ensure_directory(targetpath)
1258-
if not name.endswith('/'):
1259-
# file
1260-
with zip.open(name, 'r') as source, \
1261-
open(targetpath, 'wb') as target:
1262-
copyfileobj(source, target)
1263-
finally:
1264-
zip.close()
1244+
with zipfile.ZipFile(filename) as zip:
1245+
zip._ignore_invalid_names = True
1246+
zip.extractall(extract_dir)
12651247

12661248
def _unpack_tarfile(filename, extract_dir, *, filter=None):
12671249
"""Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir`

Lib/test/test_shutil.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1799,6 +1799,71 @@ def test_unpack_archive_zip(self):
17991799
with self.assertRaises(TypeError):
18001800
self.check_unpack_archive('zip', filter='data')
18011801

1802+
def test_unpack_archive_zip_badpaths(self):
1803+
srcdir = self.mkdtemp()
1804+
zipname = os.path.join(srcdir, 'test.zip')
1805+
abspath = os.path.join(srcdir, 'abspath')
1806+
with zipfile.ZipFile(zipname, 'w') as zf:
1807+
zf.writestr(abspath, 'badfile')
1808+
zf.writestr(os.sep + abspath, 'badfile')
1809+
zf.writestr('/abspath', 'badfile')
1810+
zf.writestr('C:/abspath', 'badfile')
1811+
zf.writestr('D:\\abspath', 'badfile')
1812+
zf.writestr('E:abspath', 'badfile')
1813+
zf.writestr('F:/G:/abspath', 'badfile')
1814+
zf.writestr('//server/share/abspath', 'badfile')
1815+
zf.writestr('\\\\server2\\share\\abspath', 'badfile')
1816+
zf.writestr('../relpath', 'badfile')
1817+
zf.writestr(os.pardir + os.sep + 'relpath2', 'badfile')
1818+
zf.writestr('good/file', 'goodfile')
1819+
zf.writestr('good..file', 'goodfile')
1820+
1821+
dstdir = os.path.join(self.mkdtemp(), 'dst')
1822+
unpack_archive(zipname, dstdir)
1823+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'good', 'file')))
1824+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'good..file')))
1825+
self.assertFalse(os.path.exists(abspath))
1826+
self.assertFalse(os.path.exists(os.path.join(dstdir, 'abspath')))
1827+
self.assertFalse(os.path.exists(os.path.join(dstdir, 'G_')))
1828+
self.assertFalse(os.path.exists(os.path.join(dstdir, 'server')))
1829+
if os.name != 'nt':
1830+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'C:', 'abspath')))
1831+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'D:\\abspath')))
1832+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'E:abspath')))
1833+
self.assertTrue(os.path.isfile(os.path.join(dstdir, 'F:', 'G:', 'abspath')))
1834+
self.assertTrue(os.path.isfile(os.path.join(dstdir, '\\\\server2\\share\\abspath')))
1835+
if os.pardir == '..':
1836+
self.assertFalse(os.path.exists(os.path.join(dstdir, '..', 'relpath')))
1837+
self.assertFalse(os.path.exists(os.path.join(dstdir, 'relpath')))
1838+
else:
1839+
self.assertTrue(os.path.isfile(os.path.join(dstdir, '..', 'relpath')))
1840+
self.assertFalse(os.path.exists(os.path.join(dstdir, os.pardir, 'relpath2')))
1841+
self.assertFalse(os.path.exists(os.path.join(dstdir, 'relpath2')))
1842+
1843+
dstdir2 = os.path.join(self.mkdtemp(), 'dst')
1844+
os.mkdir(dstdir2)
1845+
with os_helper.change_cwd(dstdir2):
1846+
unpack_archive(zipname, '')
1847+
self.assertTrue(os.path.isfile(os.path.join('good', 'file')))
1848+
self.assertTrue(os.path.isfile('good..file'))
1849+
self.assertFalse(os.path.exists(abspath))
1850+
self.assertFalse(os.path.exists('abspath'))
1851+
self.assertFalse(os.path.exists('C_'))
1852+
self.assertFalse(os.path.exists('server'))
1853+
if os.name != 'nt':
1854+
self.assertTrue(os.path.isfile(os.path.join('C:', 'abspath')))
1855+
self.assertTrue(os.path.isfile('D:\\abspath'))
1856+
self.assertTrue(os.path.isfile('E:abspath'))
1857+
self.assertTrue(os.path.isfile(os.path.join('F:', 'G:', 'abspath')))
1858+
self.assertTrue(os.path.isfile('\\\\server2\\share\\abspath'))
1859+
if os.pardir == '..':
1860+
self.assertFalse(os.path.exists(os.path.join('..', 'relpath')))
1861+
self.assertFalse(os.path.exists('relpath'))
1862+
else:
1863+
self.assertTrue(os.path.isfile(os.path.join('..', 'relpath')))
1864+
self.assertFalse(os.path.exists(os.path.join(os.pardir, 'relpath2')))
1865+
self.assertFalse(os.path.exists('relpath2'))
1866+
18021867
def test_unpack_registry(self):
18031868

18041869
formats = get_unpack_formats()

Lib/zipfile.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1268,6 +1268,7 @@ class ZipFile:
12681268

12691269
fp = None # Set here since __del__ checks it
12701270
_windows_illegal_name_trans_table = None
1271+
_ignore_invalid_names = False
12711272

12721273
def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
12731274
compresslevel=None, *, strict_timestamps=True, metadata_encoding=None):
@@ -1738,19 +1739,27 @@ def _extract_member(self, member, targetpath, pwd):
17381739

17391740
# build the destination pathname, replacing
17401741
# forward slashes to platform specific separators.
1741-
arcname = member.filename.replace('/', os.path.sep)
1742-
1743-
if os.path.altsep:
1742+
arcname = member.filename
1743+
if os.path.sep != '/':
1744+
arcname = arcname.replace('/', os.path.sep)
1745+
if os.path.altsep and os.path.altsep != '/':
17441746
arcname = arcname.replace(os.path.altsep, os.path.sep)
17451747
# interpret absolute pathname as relative, remove drive letter or
17461748
# UNC path, redundant separators, "." and ".." components.
1747-
arcname = os.path.splitdrive(arcname)[1]
1749+
drive, arcname = os.path.splitdrive(arcname)
1750+
if self._ignore_invalid_names and (drive or arcname.startswith(os.path.sep)):
1751+
return None
1752+
if self._ignore_invalid_names and os.path.pardir in arcname.split(os.path.sep):
1753+
return None
17481754
invalid_path_parts = ('', os.path.curdir, os.path.pardir)
17491755
arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
17501756
if x not in invalid_path_parts)
17511757
if os.path.sep == '\\':
17521758
# filter illegal characters on Windows
1753-
arcname = self._sanitize_windows_name(arcname, os.path.sep)
1759+
arcname2 = self._sanitize_windows_name(arcname, os.path.sep)
1760+
if self._ignore_invalid_names and arcname2 != arcname:
1761+
return None
1762+
arcname = arcname2
17541763

17551764
targetpath = os.path.join(targetpath, arcname)
17561765
targetpath = os.path.normpath(targetpath)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fix vulnerability in :func:`shutil.unpack_archive` for ZIP files on Windows
2+
which allowed to write files outside of the destination tree if the patch in
3+
the archive contains a Windows drive prefix. Now such invalid paths will be
4+
skipped. Files containing ".." in the name (like "foo..bar") are no longer
5+
skipped.

0 commit comments

Comments
 (0)