@@ -1048,6 +1048,31 @@ def test_read_start_file_duplicates_not_deduplicated(self):
10481048 self .assertEqual (site ._pending_entrypoints [fullname ],
10491049 ['os.path:join' , 'os.path:join' ])
10501050
1051+ def test_read_start_file_accepts_utf8_bom (self ):
1052+ # PEP 829: .start files MUST be utf-8-sig (UTF-8 with optional BOM).
1053+ filepath = os .path .join (self .tmpdir , 'foo.start' )
1054+ with open (filepath , 'wb' ) as f :
1055+ f .write (b'\xef \xbb \xbf ' + b'os.path:join\n ' )
1056+ site ._read_start_file (self .sitedir , 'foo.start' )
1057+ fullname = os .path .join (self .sitedir , 'foo.start' )
1058+ self .assertEqual (
1059+ site ._pending_entrypoints [fullname ], ['os.path:join' ])
1060+
1061+ def test_read_start_file_invalid_utf8_silently_skipped (self ):
1062+ # PEP 829: .start files MUST be utf-8-sig. Unlike .pth, there is
1063+ # no locale-encoding fallback -- a .start file that is not valid
1064+ # UTF-8 is silently skipped, with no key registered in
1065+ # _pending_entrypoints and no output to stderr (parsing errors
1066+ # are reported only under -v).
1067+ filepath = os .path .join (self .tmpdir , 'foo.start' )
1068+ with open (filepath , 'wb' ) as f :
1069+ # Bare continuation byte -- invalid as a UTF-8 start byte.
1070+ f .write (b'\x80 \x80 \x80 \n ' )
1071+ with captured_stderr () as err :
1072+ site ._read_start_file (self .sitedir , 'foo.start' )
1073+ self .assertEqual (site ._pending_entrypoints , {})
1074+ self .assertEqual (err .getvalue (), "" )
1075+
10511076 def test_two_start_files_with_duplicates_not_deduplicated (self ):
10521077 self ._make_start ("os.path:join" , name = "foo" )
10531078 self ._make_start ("os.path:join" , name = "bar" )
@@ -1102,6 +1127,23 @@ def test_read_pth_file_bad_line_continues(self):
11021127 fullname = os .path .join (self .sitedir , 'foo.pth' )
11031128 self .assertIn (subdir , site ._pending_syspaths .get (fullname , []))
11041129
1130+ def test_read_pth_file_locale_fallback (self ):
1131+ # PEP 829: .pth files that fail UTF-8 decoding fall back to the
1132+ # locale encoding for backward compatibility (deprecated in
1133+ # 3.15, to be removed in 3.20). Mock locale.getencoding() so
1134+ # the test does not depend on the host's actual locale.
1135+ subdir = os .path .join (self .sitedir , 'mylib' )
1136+ os .mkdir (subdir )
1137+ filepath = os .path .join (self .tmpdir , 'foo.pth' )
1138+ # \xe9 is invalid UTF-8 but valid in latin-1.
1139+ with open (filepath , 'wb' ) as f :
1140+ f .write (b'# caf\xe9 comment\n mylib\n ' )
1141+ with mock .patch ('locale.getencoding' , return_value = 'latin-1' ), \
1142+ captured_stderr ():
1143+ site ._read_pth_file (self .sitedir , 'foo.pth' , set ())
1144+ fullname = os .path .join (self .sitedir , 'foo.pth' )
1145+ self .assertIn (subdir , site ._pending_syspaths .get (fullname , []))
1146+
11051147 # --- _execute_start_entrypoints tests ---
11061148
11071149 def test_execute_entrypoints_with_callable (self ):
0 commit comments