diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index 0d407371f40a0f..b03563153df578 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -3639,29 +3639,23 @@ def test_read_with_unsuitable_metadata_encoding(self): def test_read_after_append(self): newname = '\u56db' # Han 'four' - expected_names = [name.encode('shift_jis').decode('cp437') - for name in self.file_names[:2]] + self.file_names[2:] - expected_names.append(newname) - expected_content = (*self.file_content, b"newcontent") + newname2 = 'fünf' # encodeable in cp437 + expected_names = [*self.file_names, newname, newname2] + bad_expected_names = [name.encode('shift_jis').decode('cp437') + if i < 2 else name + for i, name in enumerate(expected_names)] + expected_content = (*self.file_content, b"newcontent", b"newcontent2") with zipfile.ZipFile(TESTFN, "a") as zipfp: zipfp.writestr(newname, "newcontent") - self.assertEqual(sorted(zipfp.namelist()), sorted(expected_names)) + zipfp.writestr(newname2, "newcontent2") + self.assertEqual(sorted(zipfp.namelist()), sorted(bad_expected_names)) with zipfile.ZipFile(TESTFN, "r") as zipfp: - self._test_read(zipfp, expected_names, expected_content) + self._test_read(zipfp, bad_expected_names, expected_content) with zipfile.ZipFile(TESTFN, "r", metadata_encoding='shift_jis') as zipfp: - self.assertEqual(sorted(zipfp.namelist()), sorted(expected_names)) - for i, (name, content) in enumerate(zip(expected_names, expected_content)): - info = zipfp.getinfo(name) - self.assertEqual(info.filename, name) - self.assertEqual(info.file_size, len(content)) - if i < 2: - with self.assertRaises(zipfile.BadZipFile): - zipfp.read(name) - else: - self.assertEqual(zipfp.read(name), content) + self._test_read(zipfp, expected_names, expected_content) def test_write_with_metadata_encoding(self): ZF = zipfile.ZipFile @@ -3670,6 +3664,20 @@ def test_write_with_metadata_encoding(self): "^metadata_encoding is only"): ZF("nonesuch.zip", mode, metadata_encoding="shift_jis") + def test_add_comment(self): + with zipfile.ZipFile(TESTFN, "r") as zipfp: + bad_expected_names = zipfp.namelist() + + with zipfile.ZipFile(TESTFN, "a") as zipfp: + zipfp.comment = b'comment' + self.assertEqual(zipfp.namelist(), bad_expected_names) + + with zipfile.ZipFile(TESTFN, "r") as zipfp: + self._test_read(zipfp, bad_expected_names, self.file_content) + + with zipfile.ZipFile(TESTFN, "r", metadata_encoding='shift_jis') as zipfp: + self._test_read(zipfp, self.file_names, self.file_content) + def test_cli_with_metadata_encoding(self): errmsg = "Non-conforming encodings not supported with -c." args = ["--metadata-encoding=shift_jis", "-c", "nonesuch", "nonesuch"] diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 86c3bc36b695c7..7e6e94d82d11fd 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -566,8 +566,12 @@ def FileHeader(self, zip64=None): return header + filename + extra def _encodeFilenameFlags(self): + if self.flag_bits & _MASK_UTF_FILENAME: + encoding = 'ascii' + else: + encoding = 'cp437' try: - return self.filename.encode('ascii'), self.flag_bits + return self.filename.encode(encoding), self.flag_bits & ~_MASK_UTF_FILENAME except UnicodeEncodeError: return self.filename.encode('utf-8'), self.flag_bits | _MASK_UTF_FILENAME @@ -1808,7 +1812,7 @@ def _open_to_write(self, zinfo, force_zip64=False): zinfo.compress_size = 0 zinfo.CRC = 0 - zinfo.flag_bits = 0x00 + zinfo.flag_bits = _MASK_UTF_FILENAME if zinfo.compress_type == ZIP_LZMA: # Compressed data includes an end-of-stream (EOS) marker zinfo.flag_bits |= _MASK_COMPRESS_OPTION_1 diff --git a/Misc/NEWS.d/next/Library/2026-05-19-19-00-49.gh-issue-84353.ZU5zaQ.rst b/Misc/NEWS.d/next/Library/2026-05-19-19-00-49.gh-issue-84353.ZU5zaQ.rst new file mode 100644 index 00000000000000..198af61ba9dd1d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-05-19-19-00-49.gh-issue-84353.ZU5zaQ.rst @@ -0,0 +1,2 @@ +Preserve non-ASCII filenames encoded not in UTF-8 when appending to +:class:`zipfile.ZipFile`.