Skip to content

Commit 133b1a5

Browse files
committed
Add tests to validate the encoding constraints for .start and .pth files
1 parent 6e34cbc commit 133b1a5

1 file changed

Lines changed: 42 additions & 0 deletions

File tree

Lib/test/test_site.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1048,6 +1048,31 @@ def test_read_start_file_duplicates_not_deduplicated(self):
10481048
self.assertEqual(site._pending_entrypoints[fullname],
10491049
['os.path:join', 'os.path:join'])
10501050

1051+
def test_read_start_file_accepts_utf8_bom(self):
1052+
# PEP 829: .start files MUST be utf-8-sig (UTF-8 with optional BOM).
1053+
filepath = os.path.join(self.tmpdir, 'foo.start')
1054+
with open(filepath, 'wb') as f:
1055+
f.write(b'\xef\xbb\xbf' + b'os.path:join\n')
1056+
site._read_start_file(self.sitedir, 'foo.start')
1057+
fullname = os.path.join(self.sitedir, 'foo.start')
1058+
self.assertEqual(
1059+
site._pending_entrypoints[fullname], ['os.path:join'])
1060+
1061+
def test_read_start_file_invalid_utf8_silently_skipped(self):
1062+
# PEP 829: .start files MUST be utf-8-sig. Unlike .pth, there is
1063+
# no locale-encoding fallback -- a .start file that is not valid
1064+
# UTF-8 is silently skipped, with no key registered in
1065+
# _pending_entrypoints and no output to stderr (parsing errors
1066+
# are reported only under -v).
1067+
filepath = os.path.join(self.tmpdir, 'foo.start')
1068+
with open(filepath, 'wb') as f:
1069+
# Bare continuation byte -- invalid as a UTF-8 start byte.
1070+
f.write(b'\x80\x80\x80\n')
1071+
with captured_stderr() as err:
1072+
site._read_start_file(self.sitedir, 'foo.start')
1073+
self.assertEqual(site._pending_entrypoints, {})
1074+
self.assertEqual(err.getvalue(), "")
1075+
10511076
def test_two_start_files_with_duplicates_not_deduplicated(self):
10521077
self._make_start("os.path:join", name="foo")
10531078
self._make_start("os.path:join", name="bar")
@@ -1102,6 +1127,23 @@ def test_read_pth_file_bad_line_continues(self):
11021127
fullname = os.path.join(self.sitedir, 'foo.pth')
11031128
self.assertIn(subdir, site._pending_syspaths.get(fullname, []))
11041129

1130+
def test_read_pth_file_locale_fallback(self):
1131+
# PEP 829: .pth files that fail UTF-8 decoding fall back to the
1132+
# locale encoding for backward compatibility (deprecated in
1133+
# 3.15, to be removed in 3.20). Mock locale.getencoding() so
1134+
# the test does not depend on the host's actual locale.
1135+
subdir = os.path.join(self.sitedir, 'mylib')
1136+
os.mkdir(subdir)
1137+
filepath = os.path.join(self.tmpdir, 'foo.pth')
1138+
# \xe9 is invalid UTF-8 but valid in latin-1.
1139+
with open(filepath, 'wb') as f:
1140+
f.write(b'# caf\xe9 comment\nmylib\n')
1141+
with mock.patch('locale.getencoding', return_value='latin-1'), \
1142+
captured_stderr():
1143+
site._read_pth_file(self.sitedir, 'foo.pth', set())
1144+
fullname = os.path.join(self.sitedir, 'foo.pth')
1145+
self.assertIn(subdir, site._pending_syspaths.get(fullname, []))
1146+
11051147
# --- _execute_start_entrypoints tests ---
11061148

11071149
def test_execute_entrypoints_with_callable(self):

0 commit comments

Comments
 (0)