From cef74d0d051ded2e95e06f81c42b2809bf39d826 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Thu, 23 Apr 2026 14:46:35 +0300
Subject: [PATCH 1/9] gh-148821: Always reject known multi-byte encodings in
 pyexpat

The XML parser (pyexpat) now raises ValueError for known unsupported
multi-byte encodings such us "ISO-2022-JP", "utf8" (without hyphen) or
"raw-unicode-escape" instead of failing later, when encounter non-ASCII data.
---
 Include/codecs.h                              |  6 +++
 Include/internal/pycore_codecs.h              |  2 +-
 Lib/codecs.py                                 |  5 +-
 Lib/encodings/big5.py                         |  1 +
 Lib/encodings/big5hkscs.py                    |  1 +
 Lib/encodings/cp932.py                        |  1 +
 Lib/encodings/cp949.py                        |  1 +
 Lib/encodings/cp950.py                        |  1 +
 Lib/encodings/euc_jis_2004.py                 |  1 +
 Lib/encodings/euc_jisx0213.py                 |  1 +
 Lib/encodings/euc_jp.py                       |  1 +
 Lib/encodings/euc_kr.py                       |  1 +
 Lib/encodings/gb18030.py                      |  1 +
 Lib/encodings/gb2312.py                       |  1 +
 Lib/encodings/gbk.py                          |  1 +
 Lib/encodings/hz.py                           |  1 +
 Lib/encodings/idna.py                         |  1 +
 Lib/encodings/iso2022_jp.py                   |  1 +
 Lib/encodings/iso2022_jp_1.py                 |  1 +
 Lib/encodings/iso2022_jp_2.py                 |  1 +
 Lib/encodings/iso2022_jp_2004.py              |  1 +
 Lib/encodings/iso2022_jp_3.py                 |  1 +
 Lib/encodings/iso2022_jp_ext.py               |  1 +
 Lib/encodings/iso2022_kr.py                   |  1 +
 Lib/encodings/johab.py                        |  1 +
 Lib/encodings/punycode.py                     |  1 +
 Lib/encodings/raw_unicode_escape.py           |  1 +
 Lib/encodings/shift_jis.py                    |  1 +
 Lib/encodings/shift_jis_2004.py               |  1 +
 Lib/encodings/shift_jisx0213.py               |  1 +
 Lib/encodings/unicode_escape.py               |  1 +
 Lib/encodings/utf_16.py                       |  1 +
 Lib/encodings/utf_16_be.py                    |  1 +
 Lib/encodings/utf_16_le.py                    |  1 +
 Lib/encodings/utf_32.py                       |  1 +
 Lib/encodings/utf_32_be.py                    |  1 +
 Lib/encodings/utf_32_le.py                    |  1 +
 Lib/encodings/utf_7.py                        |  1 +
 Lib/encodings/utf_8.py                        |  1 +
 Lib/encodings/utf_8_sig.py                    |  1 +
 Lib/test/test_codecs.py                       |  3 ++
 Lib/test/test_pyexpat.py                      | 47 ++++++++++++++++++-
 ...-04-23-14-46-30.gh-issue-148821.cR4kMa.rst |  4 ++
 Modules/pyexpat.c                             | 26 ++++++++++
 Tools/unicode/gencjkcodecs.py                 |  1 +
 45 files changed, 128 insertions(+), 3 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2026-04-23-14-46-30.gh-issue-148821.cR4kMa.rst

diff --git a/Include/codecs.h b/Include/codecs.h
index 512a3c723eca18..d14f527dee75da 100644
--- a/Include/codecs.h
+++ b/Include/codecs.h
@@ -170,6 +170,12 @@ PyAPI_FUNC(PyObject *) PyCodec_NameReplaceErrors(PyObject *exc);
 PyAPI_DATA(const char *) Py_hexdigits;
 #endif
 
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(PyObject*) _PyCodec_LookupTextEncoding(
+   const char *encoding,
+   const char *alternate_command);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/Include/internal/pycore_codecs.h b/Include/internal/pycore_codecs.h
index 52dca1362592d6..bfa10eadf73573 100644
--- a/Include/internal/pycore_codecs.h
+++ b/Include/internal/pycore_codecs.h
@@ -45,7 +45,7 @@ extern int _PyCodec_UnregisterError(const char *name);
    in Python 3.5+?
 
  */
-extern PyObject* _PyCodec_LookupTextEncoding(
+PyAPI_FUNC(PyObject*) _PyCodec_LookupTextEncoding(
    const char *encoding,
    const char *alternate_command);
 
diff --git a/Lib/codecs.py b/Lib/codecs.py
index e4a8010aba90a5..e99460a670a516 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -93,7 +93,8 @@ class CodecInfo(tuple):
 
     def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
         incrementalencoder=None, incrementaldecoder=None, name=None,
-        *, _is_text_encoding=None):
+        *, _is_text_encoding=None,
+        _is_single_byte=None):
         self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
         self.name = name
         self.encode = encode
@@ -104,6 +105,8 @@ def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
         self.streamreader = streamreader
         if _is_text_encoding is not None:
             self._is_text_encoding = _is_text_encoding
+        if _is_single_byte is not None:
+            self._is_single_byte = _is_single_byte
         return self
 
     def __repr__(self):
diff --git a/Lib/encodings/big5.py b/Lib/encodings/big5.py
index 7adeb0e1605274..8bed14b35c5899 100644
--- a/Lib/encodings/big5.py
+++ b/Lib/encodings/big5.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/big5hkscs.py b/Lib/encodings/big5hkscs.py
index 350df37baaedaf..eeeb7865895190 100644
--- a/Lib/encodings/big5hkscs.py
+++ b/Lib/encodings/big5hkscs.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/cp932.py b/Lib/encodings/cp932.py
index e01f59b7190576..3671a4387f96b6 100644
--- a/Lib/encodings/cp932.py
+++ b/Lib/encodings/cp932.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/cp949.py b/Lib/encodings/cp949.py
index 627c87125e2aff..df998ba3bad75c 100644
--- a/Lib/encodings/cp949.py
+++ b/Lib/encodings/cp949.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/cp950.py b/Lib/encodings/cp950.py
index 39eec5ed0ddef9..12c7bbd8d226ad 100644
--- a/Lib/encodings/cp950.py
+++ b/Lib/encodings/cp950.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/euc_jis_2004.py b/Lib/encodings/euc_jis_2004.py
index 72b87aea68862f..68604db3c30b2d 100644
--- a/Lib/encodings/euc_jis_2004.py
+++ b/Lib/encodings/euc_jis_2004.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/euc_jisx0213.py b/Lib/encodings/euc_jisx0213.py
index cc47d04112a187..cd2808965a6edd 100644
--- a/Lib/encodings/euc_jisx0213.py
+++ b/Lib/encodings/euc_jisx0213.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/euc_jp.py b/Lib/encodings/euc_jp.py
index 7bcbe4147f2ad4..bcdd0582d71902 100644
--- a/Lib/encodings/euc_jp.py
+++ b/Lib/encodings/euc_jp.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/euc_kr.py b/Lib/encodings/euc_kr.py
index c1fb1260e879f0..8a81356d8f9980 100644
--- a/Lib/encodings/euc_kr.py
+++ b/Lib/encodings/euc_kr.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/gb18030.py b/Lib/encodings/gb18030.py
index 34fb6c366a7614..98df7d4cbeec3d 100644
--- a/Lib/encodings/gb18030.py
+++ b/Lib/encodings/gb18030.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/gb2312.py b/Lib/encodings/gb2312.py
index 3c3b837d618ecd..ba915a2500f21a 100644
--- a/Lib/encodings/gb2312.py
+++ b/Lib/encodings/gb2312.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/gbk.py b/Lib/encodings/gbk.py
index 1b45db89859cdf..d597c7bb77e93e 100644
--- a/Lib/encodings/gbk.py
+++ b/Lib/encodings/gbk.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/hz.py b/Lib/encodings/hz.py
index 383442a3c9ac9a..43ee36a9286426 100644
--- a/Lib/encodings/hz.py
+++ b/Lib/encodings/hz.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py
index d31ee07ab45b76..98bf9462e36fbf 100644
--- a/Lib/encodings/idna.py
+++ b/Lib/encodings/idna.py
@@ -385,4 +385,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamwriter=StreamWriter,
         streamreader=StreamReader,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/iso2022_jp.py b/Lib/encodings/iso2022_jp.py
index ab0406069356e4..27129ce67aa884 100644
--- a/Lib/encodings/iso2022_jp.py
+++ b/Lib/encodings/iso2022_jp.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/iso2022_jp_1.py b/Lib/encodings/iso2022_jp_1.py
index 997044dc378749..0f41dd95cd4332 100644
--- a/Lib/encodings/iso2022_jp_1.py
+++ b/Lib/encodings/iso2022_jp_1.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/iso2022_jp_2.py b/Lib/encodings/iso2022_jp_2.py
index 9106bf762512fd..25f625819f5ea0 100644
--- a/Lib/encodings/iso2022_jp_2.py
+++ b/Lib/encodings/iso2022_jp_2.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/iso2022_jp_2004.py b/Lib/encodings/iso2022_jp_2004.py
index 40198bf098570b..1f0bd1b7874472 100644
--- a/Lib/encodings/iso2022_jp_2004.py
+++ b/Lib/encodings/iso2022_jp_2004.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/iso2022_jp_3.py b/Lib/encodings/iso2022_jp_3.py
index 346e08beccbbaf..2acdb3a2cd9be3 100644
--- a/Lib/encodings/iso2022_jp_3.py
+++ b/Lib/encodings/iso2022_jp_3.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/iso2022_jp_ext.py b/Lib/encodings/iso2022_jp_ext.py
index 752bab9813a094..a32a533e8bdf00 100644
--- a/Lib/encodings/iso2022_jp_ext.py
+++ b/Lib/encodings/iso2022_jp_ext.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/iso2022_kr.py b/Lib/encodings/iso2022_kr.py
index bf7018763eae38..51dd4ab560422a 100644
--- a/Lib/encodings/iso2022_kr.py
+++ b/Lib/encodings/iso2022_kr.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/johab.py b/Lib/encodings/johab.py
index 512aeeb732b522..e58c50a06c4b96 100644
--- a/Lib/encodings/johab.py
+++ b/Lib/encodings/johab.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/punycode.py b/Lib/encodings/punycode.py
index 268fccbd53974e..335acb87cb9b28 100644
--- a/Lib/encodings/punycode.py
+++ b/Lib/encodings/punycode.py
@@ -250,4 +250,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamwriter=StreamWriter,
         streamreader=StreamReader,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/raw_unicode_escape.py b/Lib/encodings/raw_unicode_escape.py
index 46c8e070dd192e..5c5b41437a84b4 100644
--- a/Lib/encodings/raw_unicode_escape.py
+++ b/Lib/encodings/raw_unicode_escape.py
@@ -43,4 +43,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamwriter=StreamWriter,
         streamreader=StreamReader,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/shift_jis.py b/Lib/encodings/shift_jis.py
index 83381172764dea..bf7fded09468c8 100644
--- a/Lib/encodings/shift_jis.py
+++ b/Lib/encodings/shift_jis.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/shift_jis_2004.py b/Lib/encodings/shift_jis_2004.py
index 161b1e86f9918a..ae40b684a010f2 100644
--- a/Lib/encodings/shift_jis_2004.py
+++ b/Lib/encodings/shift_jis_2004.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/shift_jisx0213.py b/Lib/encodings/shift_jisx0213.py
index cb653f53055e67..5af8565618b40e 100644
--- a/Lib/encodings/shift_jisx0213.py
+++ b/Lib/encodings/shift_jisx0213.py
@@ -36,4 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/unicode_escape.py b/Lib/encodings/unicode_escape.py
index 9b1ce99b339ae0..d896cefc9596be 100644
--- a/Lib/encodings/unicode_escape.py
+++ b/Lib/encodings/unicode_escape.py
@@ -43,4 +43,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamwriter=StreamWriter,
         streamreader=StreamReader,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/utf_16.py b/Lib/encodings/utf_16.py
index d3b9980026666f..eac93bd17d07d1 100644
--- a/Lib/encodings/utf_16.py
+++ b/Lib/encodings/utf_16.py
@@ -152,4 +152,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/utf_16_be.py b/Lib/encodings/utf_16_be.py
index 86b458eb9bcd96..d056cf9202a40f 100644
--- a/Lib/encodings/utf_16_be.py
+++ b/Lib/encodings/utf_16_be.py
@@ -39,4 +39,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/utf_16_le.py b/Lib/encodings/utf_16_le.py
index ec454142eedf25..2e07f76cc3f742 100644
--- a/Lib/encodings/utf_16_le.py
+++ b/Lib/encodings/utf_16_le.py
@@ -39,4 +39,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/utf_32.py b/Lib/encodings/utf_32.py
index 1924bedbb74c68..aebe145ec95e71 100644
--- a/Lib/encodings/utf_32.py
+++ b/Lib/encodings/utf_32.py
@@ -147,4 +147,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/utf_32_be.py b/Lib/encodings/utf_32_be.py
index fe272b5fafec69..ee1b41a11aa35f 100644
--- a/Lib/encodings/utf_32_be.py
+++ b/Lib/encodings/utf_32_be.py
@@ -34,4 +34,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/utf_32_le.py b/Lib/encodings/utf_32_le.py
index 9e48210928ee65..4ac786bb73349b 100644
--- a/Lib/encodings/utf_32_le.py
+++ b/Lib/encodings/utf_32_le.py
@@ -34,4 +34,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/utf_7.py b/Lib/encodings/utf_7.py
index 8e0567f2087d65..3127867fb5bff9 100644
--- a/Lib/encodings/utf_7.py
+++ b/Lib/encodings/utf_7.py
@@ -35,4 +35,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/utf_8.py b/Lib/encodings/utf_8.py
index 1bf6336571547b..3801615ce34001 100644
--- a/Lib/encodings/utf_8.py
+++ b/Lib/encodings/utf_8.py
@@ -39,4 +39,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/encodings/utf_8_sig.py b/Lib/encodings/utf_8_sig.py
index 1bb479203f365d..b5e5c89f80b9eb 100644
--- a/Lib/encodings/utf_8_sig.py
+++ b/Lib/encodings/utf_8_sig.py
@@ -127,4 +127,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 79c8a7ef886482..03dd61a76db154 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1892,6 +1892,7 @@ def test_copy(self):
         self.assertIsNot(dup, orig)
         self.assertEqual(dup, orig)
         self.assertTrue(orig._is_text_encoding)
+        self.assertFalse(orig._is_single_byte)
         self.assertEqual(dup.encode, orig.encode)
         self.assertEqual(dup.name, orig.name)
         self.assertEqual(dup.incrementalencoder, orig.incrementalencoder)
@@ -1912,6 +1913,7 @@ def test_deepcopy(self):
         self.assertIsNot(dup, orig)
         self.assertEqual(dup, orig)
         self.assertTrue(orig._is_text_encoding)
+        self.assertFalse(orig._is_single_byte)
         self.assertEqual(dup.encode, orig.encode)
         self.assertEqual(dup.name, orig.name)
         self.assertEqual(dup.incrementalencoder, orig.incrementalencoder)
@@ -1940,6 +1942,7 @@ def test_pickle(self):
                      unpickled_codec_info.incrementalencoder
                 )
                 self.assertTrue(unpickled_codec_info._is_text_encoding)
+                self.assertFalse(unpickled_codec_info._is_single_byte)
 
         # Test a CodecInfo with _is_text_encoding equal to false.
         codec_info = codecs.lookup('base64')
diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py
index aaa91aca36e3c4..0763bb19865167 100644
--- a/Lib/test/test_pyexpat.py
+++ b/Lib/test/test_pyexpat.py
@@ -227,7 +227,7 @@ def _verify_parse_output(self, operations):
             "Character data: '\xb5'",
             "End element: 'root'",
         ]
-        for operation, expected_operation in zip(operations, expected_operations):
+        for operation, expected_operation in zip(operations, expected_operations, strict=True):
             self.assertEqual(operation, expected_operation)
 
     def test_parse_bytes(self):
@@ -276,6 +276,51 @@ def test_parse_again(self):
         self.assertEqual(expat.ErrorString(cm.exception.code),
                           expat.errors.XML_ERROR_FINISHED)
 
+    @support.subTests('enc', ['UTF-8', 'utf-8', 'utf-16', 'koi8-u',
+                              'cp1125', 'cp1251', 'iso8859-5',
+                              'mac_cyrillic'])
+    def test_supportes_ecodings(self, enc):
+        out = self.Outputter()
+        parser = expat.ParserCreate()
+        self._hookup_callbacks(parser, out)
+        data = (f'<?xml version="1.0" encoding="{enc}"?>\n'
+                '<корінь атрибут="значення">зміст</корінь>').encode(enc)
+        parser.Parse(data, True)
+        self.assertEqual(out.out, [
+            ('XML declaration', ('1.0', enc, -1)),
+            "Start element: 'корінь' {'атрибут': 'значення'}",
+            "Character data: 'зміст'",
+            "End element: 'корінь'",
+        ])
+
+    @support.subTests('enc', [
+        'UTF8', 'UTF-7',
+        "unicode-escape", "raw-unicode-escape",
+        "Big5-HKSCS", "Big5",
+        "cp932", "cp949", "cp950",
+        "EUC_JIS-2004", "EUC_JISX0213", "EUC-JP", "EUC-KR",
+        "GB18030", "GB2312", "GBK",
+        "HZ-GB-2312",
+        "ISO-2022-JP", "ISO-2022-JP-1", "ISO-2022-JP-2004",
+        "ISO-2022-JP-2", "ISO-2022-JP-3", "ISO-2022-JP-EXT",
+        "ISO-2022-KR",
+        "johab",
+        "Shift_JIS", "Shift_JIS-2004", "Shift_JISX0213",
+    ])
+    def test_unsupportes_ecodings(self, enc):
+        parser = expat.ParserCreate()
+        data = (f'<?xml version="1.0" encoding="{enc}"?>\n'
+                '<root></root>').encode(enc)
+        with self.assertRaises(ValueError):
+            parser.Parse(data, True)
+
+    def test_unknown_ecoding(self):
+        parser = expat.ParserCreate()
+        data = b'<?xml version="1.0" encoding="xyz"?>\n<root></root>'
+        with self.assertRaises(LookupError):
+            parser.Parse(data, True)
+
+
 class NamespaceSeparatorTest(unittest.TestCase):
     def test_legal(self):
         # Tests that make sure we get errors when the namespace_separator value
diff --git a/Misc/NEWS.d/next/Library/2026-04-23-14-46-30.gh-issue-148821.cR4kMa.rst b/Misc/NEWS.d/next/Library/2026-04-23-14-46-30.gh-issue-148821.cR4kMa.rst
new file mode 100644
index 00000000000000..5dd95047178938
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-04-23-14-46-30.gh-issue-148821.cR4kMa.rst
@@ -0,0 +1,4 @@
+The :mod:`XML parser <pyexpat>` now raises :exc:`ValueError` for known
+unsupported multi-byte encodings such us "UTF8", "ISO-2022-JP" or
+"raw-unicode-escape" instead of failing later, when encounter non-ASCII
+data.
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index 0f0afe17513ef1..68c8ac0e4accef 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -4,6 +4,7 @@
 
 #include "Python.h"
 #include "pycore_ceval.h"         // _Py_EnterRecursiveCall()
+#include "pycore_codecs.h"        // _PyCodec_LookupTextEncoding()
 #include "pycore_import.h"        // _PyImport_SetModule()
 #include "pycore_pyhash.h"        // _Py_HashSecret
 #include "pycore_traceback.h"     // _PyTraceback_Add()
@@ -1465,6 +1466,31 @@ PyUnknownEncodingHandler(void *encodingHandlerData,
     if (PyErr_Occurred())
         return XML_STATUS_ERROR;
 
+    PyObject *codec = _PyCodec_LookupTextEncoding(name, NULL);
+    if (codec == NULL) {
+        return XML_STATUS_ERROR;
+    }
+    // if (!PyTuple_CheckExact(codec)) {
+    //     PyObject *attr;
+    //     if (PyObject_GetOptionalAttrString(codec, "_is_single_byte", &attr) < 0) {
+    //         Py_DECREF(codec);
+    //         return XML_STATUS_ERROR;
+    //     }
+    //     if (attr != NULL) {
+    //         int is_single_byte = PyObject_IsTrue(attr);
+    //         Py_DECREF(attr);
+    //         if (is_single_byte <= 0) {
+    //             Py_DECREF(codec);
+    //             if (is_single_byte == 0) {
+    //                 PyErr_SetString(PyExc_ValueError,
+    //                                 "multi-byte encodings are not supported");
+    //             }
+    //             return XML_STATUS_ERROR;
+    //         }
+    //     }
+    // }
+    Py_DECREF(codec);
+
     u = PyUnicode_Decode((const char*) template_buffer, 256, name, "replace");
     if (u == NULL) {
         Py_XDECREF(u);
diff --git a/Tools/unicode/gencjkcodecs.py b/Tools/unicode/gencjkcodecs.py
index 45866bf2f61062..eb04f67f2077eb 100644
--- a/Tools/unicode/gencjkcodecs.py
+++ b/Tools/unicode/gencjkcodecs.py
@@ -51,6 +51,7 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
+        _is_single_byte=False,
     )
 """)
 

From 2e2df1ea095bf9263b3aedb6332a5a2ef6c6ed3f Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Thu, 23 Apr 2026 15:47:18 +0300
Subject: [PATCH 2/9] Uncomment temporary commented out code.

---
 Modules/pyexpat.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index 68c8ac0e4accef..e95dcb611a33e2 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -1470,25 +1470,25 @@ PyUnknownEncodingHandler(void *encodingHandlerData,
     if (codec == NULL) {
         return XML_STATUS_ERROR;
     }
-    // if (!PyTuple_CheckExact(codec)) {
-    //     PyObject *attr;
-    //     if (PyObject_GetOptionalAttrString(codec, "_is_single_byte", &attr) < 0) {
-    //         Py_DECREF(codec);
-    //         return XML_STATUS_ERROR;
-    //     }
-    //     if (attr != NULL) {
-    //         int is_single_byte = PyObject_IsTrue(attr);
-    //         Py_DECREF(attr);
-    //         if (is_single_byte <= 0) {
-    //             Py_DECREF(codec);
-    //             if (is_single_byte == 0) {
-    //                 PyErr_SetString(PyExc_ValueError,
-    //                                 "multi-byte encodings are not supported");
-    //             }
-    //             return XML_STATUS_ERROR;
-    //         }
-    //     }
-    // }
+    if (!PyTuple_CheckExact(codec)) {
+        PyObject *attr;
+        if (PyObject_GetOptionalAttrString(codec, "_is_single_byte", &attr) < 0) {
+            Py_DECREF(codec);
+            return XML_STATUS_ERROR;
+        }
+        if (attr != NULL) {
+            int is_single_byte = PyObject_IsTrue(attr);
+            Py_DECREF(attr);
+            if (is_single_byte <= 0) {
+                Py_DECREF(codec);
+                if (is_single_byte == 0) {
+                    PyErr_SetString(PyExc_ValueError,
+                                    "multi-byte encodings are not supported");
+                }
+                return XML_STATUS_ERROR;
+            }
+        }
+    }
     Py_DECREF(codec);
 
     u = PyUnicode_Decode((const char*) template_buffer, 256, name, "replace");

From 91ac15e21f2d81fc2803856f641d7ae5bbaba45a Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sat, 25 Apr 2026 15:59:16 +0300
Subject: [PATCH 3/9] Fix the module reference.

---
 .../next/Library/2026-04-23-14-46-30.gh-issue-148821.cR4kMa.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Misc/NEWS.d/next/Library/2026-04-23-14-46-30.gh-issue-148821.cR4kMa.rst b/Misc/NEWS.d/next/Library/2026-04-23-14-46-30.gh-issue-148821.cR4kMa.rst
index 5dd95047178938..119a465fcb200a 100644
--- a/Misc/NEWS.d/next/Library/2026-04-23-14-46-30.gh-issue-148821.cR4kMa.rst
+++ b/Misc/NEWS.d/next/Library/2026-04-23-14-46-30.gh-issue-148821.cR4kMa.rst
@@ -1,4 +1,4 @@
-The :mod:`XML parser <pyexpat>` now raises :exc:`ValueError` for known
+The :mod:`XML parser <xml.parsers.expat>` now raises :exc:`ValueError` for known
 unsupported multi-byte encodings such us "UTF8", "ISO-2022-JP" or
 "raw-unicode-escape" instead of failing later, when encounter non-ASCII
 data.

From 2177825c7729d03c92b9618b0f98f2aca0abb3b9 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sat, 25 Apr 2026 16:11:37 +0300
Subject: [PATCH 4/9] Fix ElementTree tests.

---
 Lib/test/test_xml_etree.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 51af46f124cac6..730456e7582adc 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -1014,7 +1014,7 @@ def xml(encoding):
         def bxml(encoding):
             return xml(encoding).encode(encoding)
         supported_encodings = [
-            'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le',
+            'ascii', 'utf-8', 'utf-16', 'utf-16be', 'utf-16le',
             'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
             'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
             'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16',
@@ -1025,32 +1025,34 @@ def bxml(encoding):
             'cp1256', 'cp1257', 'cp1258',
             'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2',
             'mac-roman', 'mac-turkish',
-            'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004',
-            'iso2022-jp-3', 'iso2022-jp-ext',
-            'koi8-r', 'koi8-t', 'koi8-u', 'kz1048',
-            'hz', 'ptcp154',
+            'koi8-r', 'koi8-t', 'koi8-u', 'kz1048', 'ptcp154',
         ]
         for encoding in supported_encodings:
-            self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />')
+            with self.subTest(encoding=encoding):
+                self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />')
 
         unsupported_ascii_compatible_encodings = [
             'big5', 'big5hkscs',
             'cp932', 'cp949', 'cp950',
             'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr',
             'gb2312', 'gbk', 'gb18030',
-            'iso2022-kr', 'johab',
+            'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004',
+            'iso2022-jp-3', 'iso2022-jp-ext',
+            'iso2022-kr', 'johab', 'hz',
             'shift-jis', 'shift-jis-2004', 'shift-jisx0213',
-            'utf-7',
+            'utf-7', 'utf-8-sig', 'utf8',
         ]
         for encoding in unsupported_ascii_compatible_encodings:
-            self.assertRaises(ValueError, ET.XML, bxml(encoding))
+            with self.subTest(encoding=encoding):
+                self.assertRaises(ValueError, ET.XML, bxml(encoding))
 
         unsupported_ascii_incompatible_encodings = [
             'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140',
             'utf_32', 'utf_32_be', 'utf_32_le',
         ]
         for encoding in unsupported_ascii_incompatible_encodings:
-            self.assertRaises(ET.ParseError, ET.XML, bxml(encoding))
+            with self.subTest(encoding=encoding):
+                self.assertRaises(ET.ParseError, ET.XML, bxml(encoding))
 
         self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
         self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))

From 6c9588fe822d5c38374b45f04b6060a61c324623 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Mon, 27 Apr 2026 18:47:31 +0300
Subject: [PATCH 5/9] Fix linking error for _PyCodec_LookupTextEncoding.

---
 Include/codecs.h | 6 ------
 Python/codecs.c  | 1 +
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/Include/codecs.h b/Include/codecs.h
index d14f527dee75da..512a3c723eca18 100644
--- a/Include/codecs.h
+++ b/Include/codecs.h
@@ -170,12 +170,6 @@ PyAPI_FUNC(PyObject *) PyCodec_NameReplaceErrors(PyObject *exc);
 PyAPI_DATA(const char *) Py_hexdigits;
 #endif
 
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject*) _PyCodec_LookupTextEncoding(
-   const char *encoding,
-   const char *alternate_command);
-#endif
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/Python/codecs.c b/Python/codecs.c
index 0bde56c0ac662e..a522e6b88068b3 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -10,6 +10,7 @@ Copyright (c) Corporation for National Research Initiatives.
 
 #include "Python.h"
 #include "pycore_call.h"          // _PyObject_CallNoArgs()
+#include "pycore_codecs.h"        // export _PyCodec_LookupTextEncoding()
 #include "pycore_interp.h"        // PyInterpreterState.codec_search_path
 #include "pycore_pyerrors.h"      // _PyErr_FormatNote()
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()

From 02ecf828b36191f32dc466dd26ec948e472cf2b2 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Mon, 27 Apr 2026 19:08:18 +0300
Subject: [PATCH 6/9] Add more strict tests for supported encodings.

---
 Lib/test/test_pyexpat.py   | 54 ++++++++++++++++++++++++++++++--------
 Lib/test/test_xml_etree.py | 13 +++++----
 2 files changed, 51 insertions(+), 16 deletions(-)

diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py
index 0763bb19865167..4d3425cf867227 100644
--- a/Lib/test/test_pyexpat.py
+++ b/Lib/test/test_pyexpat.py
@@ -276,24 +276,56 @@ def test_parse_again(self):
         self.assertEqual(expat.ErrorString(cm.exception.code),
                           expat.errors.XML_ERROR_FINISHED)
 
-    @support.subTests('enc', ['UTF-8', 'utf-8', 'utf-16', 'koi8-u',
-                              'cp1125', 'cp1251', 'iso8859-5',
-                              'mac_cyrillic'])
-    def test_supportes_ecodings(self, enc):
+    @support.subTests('encoding', [
+        'utf-8', 'utf-16', 'utf-16be', 'utf-16le',
+        'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
+        'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
+        'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16',
+        'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852',
+        'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862',
+        'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125',
+        'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
+        'cp1256', 'cp1257', 'cp1258',
+        'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2',
+        'mac-roman', 'mac-turkish',
+        'koi8-r', 'koi8-t', 'koi8-u', 'kz1048', 'ptcp154',
+    ])
+    def test_supported_ecodings(self, encoding):
+        out = self.Outputter()
+        parser = expat.ParserCreate()
+        self._hookup_callbacks(parser, out)
+        c = 'éπя\u05d0\u060c€'.encode(encoding, 'ignore').decode(encoding)[0]
+        data = (f'<?xml version="1.0" encoding="{encoding}"?>\n'
+                f'<root>{c}</root>').encode(encoding)
+        parser.Parse(data, True)
+        self.assertEqual(out.out, [
+            ('XML declaration', ('1.0', encoding, -1)),
+            "Start element: 'root' {}",
+            f'Character data: {c!r}',
+            "End element: 'root'",
+        ])
+
+    @support.subTests('encoding', [
+        'UTF-8', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be',
+        'koi8-u', 'cp1125', 'cp1251', 'iso8859-5', 'mac-cyrillic',
+    ])
+    def test_supported_ecodings2(self, encoding):
         out = self.Outputter()
         parser = expat.ParserCreate()
         self._hookup_callbacks(parser, out)
-        data = (f'<?xml version="1.0" encoding="{enc}"?>\n'
-                '<корінь атрибут="значення">зміст</корінь>').encode(enc)
+        data = (f'<?xml version="1.0" encoding="{encoding}"?>\n'
+                '<!-- коментар -->'
+                '<корінь атрибут="значення">зміст</корінь>').encode(encoding)
         parser.Parse(data, True)
         self.assertEqual(out.out, [
-            ('XML declaration', ('1.0', enc, -1)),
+            ('XML declaration', ('1.0', encoding, -1)),
+            "Comment: ' коментар '",
             "Start element: 'корінь' {'атрибут': 'значення'}",
             "Character data: 'зміст'",
             "End element: 'корінь'",
         ])
 
-    @support.subTests('enc', [
+    @support.subTests('encoding', [
         'UTF8', 'UTF-7',
         "unicode-escape", "raw-unicode-escape",
         "Big5-HKSCS", "Big5",
@@ -307,10 +339,10 @@ def test_supportes_ecodings(self, enc):
         "johab",
         "Shift_JIS", "Shift_JIS-2004", "Shift_JISX0213",
     ])
-    def test_unsupportes_ecodings(self, enc):
+    def test_unsupportes_ecodings(self, encoding):
         parser = expat.ParserCreate()
-        data = (f'<?xml version="1.0" encoding="{enc}"?>\n'
-                '<root></root>').encode(enc)
+        data = (f'<?xml version="1.0" encoding="{encoding}"?>\n'
+                '<root></root>').encode(encoding)
         with self.assertRaises(ValueError):
             parser.Parse(data, True)
 
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 730456e7582adc..71ebb7f3182b26 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -1009,12 +1009,12 @@ def check(encoding, body=''):
         check("cp437", '\u221a')
         check("mac-roman", '\u02da')
 
-        def xml(encoding):
-            return "<?xml version='1.0' encoding='%s'?><xml />" % encoding
-        def bxml(encoding):
-            return xml(encoding).encode(encoding)
+        def xml(encoding, body=''):
+            return "<?xml version='1.0' encoding='%s'?><xml>%s</xml>" % (encoding, body)
+        def bxml(encoding, body=''):
+            return xml(encoding, body).encode(encoding)
         supported_encodings = [
-            'ascii', 'utf-8', 'utf-16', 'utf-16be', 'utf-16le',
+            'utf-8', 'utf-16', 'utf-16be', 'utf-16le',
             'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
             'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
             'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16',
@@ -1030,6 +1030,9 @@ def bxml(encoding):
         for encoding in supported_encodings:
             with self.subTest(encoding=encoding):
                 self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />')
+                c = 'éπя\u05d0\u060c€'.encode(encoding, 'ignore').decode(encoding)[0]
+                self.assertEqual(ET.tostring(ET.XML(bxml(encoding, c))),
+                                 ('<xml>&#%d;</xml>' % ord(c)).encode())
 
         unsupported_ascii_compatible_encodings = [
             'big5', 'big5hkscs',

From 230fde155bc8b7dec70f922839790c851b80d350 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Wed, 13 May 2026 13:46:36 +0300
Subject: [PATCH 7/9] Add a comment.

---
 Modules/pyexpat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index 2fe07f3dda7065..09f9d40d4ec710 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -1479,7 +1479,7 @@ PyUnknownEncodingHandler(void *encodingHandlerData,
         if (attr != NULL) {
             int is_single_byte = PyObject_IsTrue(attr);
             Py_DECREF(attr);
-            if (is_single_byte <= 0) {
+            if (is_single_byte <= 0) {  // error or false
                 Py_DECREF(codec);
                 if (is_single_byte == 0) {
                     PyErr_SetString(PyExc_ValueError,

From ae909b268781c61061f2ce729f644ad1f53106d1 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Wed, 13 May 2026 16:31:59 +0300
Subject: [PATCH 8/9] Use _is_multibyte=True instead of _is_single_byte=False.

---
 Lib/codecs.py                       | 6 +++---
 Lib/encodings/big5.py               | 2 +-
 Lib/encodings/big5hkscs.py          | 2 +-
 Lib/encodings/cp932.py              | 2 +-
 Lib/encodings/cp949.py              | 2 +-
 Lib/encodings/cp950.py              | 2 +-
 Lib/encodings/euc_jis_2004.py       | 2 +-
 Lib/encodings/euc_jisx0213.py       | 2 +-
 Lib/encodings/euc_jp.py             | 2 +-
 Lib/encodings/euc_kr.py             | 2 +-
 Lib/encodings/gb18030.py            | 2 +-
 Lib/encodings/gb2312.py             | 2 +-
 Lib/encodings/gbk.py                | 2 +-
 Lib/encodings/hz.py                 | 2 +-
 Lib/encodings/idna.py               | 2 +-
 Lib/encodings/iso2022_jp.py         | 2 +-
 Lib/encodings/iso2022_jp_1.py       | 2 +-
 Lib/encodings/iso2022_jp_2.py       | 2 +-
 Lib/encodings/iso2022_jp_2004.py    | 2 +-
 Lib/encodings/iso2022_jp_3.py       | 2 +-
 Lib/encodings/iso2022_jp_ext.py     | 2 +-
 Lib/encodings/iso2022_kr.py         | 2 +-
 Lib/encodings/johab.py              | 2 +-
 Lib/encodings/punycode.py           | 2 +-
 Lib/encodings/raw_unicode_escape.py | 2 +-
 Lib/encodings/shift_jis.py          | 2 +-
 Lib/encodings/shift_jis_2004.py     | 2 +-
 Lib/encodings/shift_jisx0213.py     | 2 +-
 Lib/encodings/unicode_escape.py     | 2 +-
 Lib/encodings/utf_16.py             | 2 +-
 Lib/encodings/utf_16_be.py          | 2 +-
 Lib/encodings/utf_16_le.py          | 2 +-
 Lib/encodings/utf_32.py             | 2 +-
 Lib/encodings/utf_32_be.py          | 2 +-
 Lib/encodings/utf_32_le.py          | 2 +-
 Lib/encodings/utf_7.py              | 2 +-
 Lib/encodings/utf_8.py              | 2 +-
 Lib/encodings/utf_8_sig.py          | 2 +-
 Lib/test/test_codecs.py             | 6 +++---
 Modules/pyexpat.c                   | 8 ++++----
 Tools/unicode/gencjkcodecs.py       | 2 +-
 41 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/Lib/codecs.py b/Lib/codecs.py
index e99460a670a516..411856b3738d61 100644
--- a/Lib/codecs.py
+++ b/Lib/codecs.py
@@ -94,7 +94,7 @@ class CodecInfo(tuple):
     def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
         incrementalencoder=None, incrementaldecoder=None, name=None,
         *, _is_text_encoding=None,
-        _is_single_byte=None):
+        _is_multibyte=None):
         self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
         self.name = name
         self.encode = encode
@@ -105,8 +105,8 @@ def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
         self.streamreader = streamreader
         if _is_text_encoding is not None:
             self._is_text_encoding = _is_text_encoding
-        if _is_single_byte is not None:
-            self._is_single_byte = _is_single_byte
+        if _is_multibyte is not None:
+            self._is_multibyte = _is_multibyte
         return self
 
     def __repr__(self):
diff --git a/Lib/encodings/big5.py b/Lib/encodings/big5.py
index 8bed14b35c5899..0ffbf78f8c5f4d 100644
--- a/Lib/encodings/big5.py
+++ b/Lib/encodings/big5.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/big5hkscs.py b/Lib/encodings/big5hkscs.py
index eeeb7865895190..c0c8960516469e 100644
--- a/Lib/encodings/big5hkscs.py
+++ b/Lib/encodings/big5hkscs.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/cp932.py b/Lib/encodings/cp932.py
index 3671a4387f96b6..08213e7d8682ea 100644
--- a/Lib/encodings/cp932.py
+++ b/Lib/encodings/cp932.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/cp949.py b/Lib/encodings/cp949.py
index df998ba3bad75c..4a0fb42579c4e6 100644
--- a/Lib/encodings/cp949.py
+++ b/Lib/encodings/cp949.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/cp950.py b/Lib/encodings/cp950.py
index 12c7bbd8d226ad..a1e0196668a619 100644
--- a/Lib/encodings/cp950.py
+++ b/Lib/encodings/cp950.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/euc_jis_2004.py b/Lib/encodings/euc_jis_2004.py
index 68604db3c30b2d..ede44475ae0891 100644
--- a/Lib/encodings/euc_jis_2004.py
+++ b/Lib/encodings/euc_jis_2004.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/euc_jisx0213.py b/Lib/encodings/euc_jisx0213.py
index cd2808965a6edd..958240852519ce 100644
--- a/Lib/encodings/euc_jisx0213.py
+++ b/Lib/encodings/euc_jisx0213.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/euc_jp.py b/Lib/encodings/euc_jp.py
index bcdd0582d71902..e1d4d25d6b417d 100644
--- a/Lib/encodings/euc_jp.py
+++ b/Lib/encodings/euc_jp.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/euc_kr.py b/Lib/encodings/euc_kr.py
index 8a81356d8f9980..28f491e7367d6a 100644
--- a/Lib/encodings/euc_kr.py
+++ b/Lib/encodings/euc_kr.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/gb18030.py b/Lib/encodings/gb18030.py
index 98df7d4cbeec3d..db8368747bad42 100644
--- a/Lib/encodings/gb18030.py
+++ b/Lib/encodings/gb18030.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/gb2312.py b/Lib/encodings/gb2312.py
index ba915a2500f21a..cb230c22b948a7 100644
--- a/Lib/encodings/gb2312.py
+++ b/Lib/encodings/gb2312.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/gbk.py b/Lib/encodings/gbk.py
index d597c7bb77e93e..7a6402036d39c1 100644
--- a/Lib/encodings/gbk.py
+++ b/Lib/encodings/gbk.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/hz.py b/Lib/encodings/hz.py
index 43ee36a9286426..5d175cc18d80cd 100644
--- a/Lib/encodings/hz.py
+++ b/Lib/encodings/hz.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py
index 98bf9462e36fbf..a7934dd9880dd1 100644
--- a/Lib/encodings/idna.py
+++ b/Lib/encodings/idna.py
@@ -385,5 +385,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamwriter=StreamWriter,
         streamreader=StreamReader,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/iso2022_jp.py b/Lib/encodings/iso2022_jp.py
index 27129ce67aa884..ab2361562b1099 100644
--- a/Lib/encodings/iso2022_jp.py
+++ b/Lib/encodings/iso2022_jp.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/iso2022_jp_1.py b/Lib/encodings/iso2022_jp_1.py
index 0f41dd95cd4332..8066806b212e74 100644
--- a/Lib/encodings/iso2022_jp_1.py
+++ b/Lib/encodings/iso2022_jp_1.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/iso2022_jp_2.py b/Lib/encodings/iso2022_jp_2.py
index 25f625819f5ea0..0804129a08b9db 100644
--- a/Lib/encodings/iso2022_jp_2.py
+++ b/Lib/encodings/iso2022_jp_2.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/iso2022_jp_2004.py b/Lib/encodings/iso2022_jp_2004.py
index 1f0bd1b7874472..292e3a7b95c0cc 100644
--- a/Lib/encodings/iso2022_jp_2004.py
+++ b/Lib/encodings/iso2022_jp_2004.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/iso2022_jp_3.py b/Lib/encodings/iso2022_jp_3.py
index 2acdb3a2cd9be3..036312d202374a 100644
--- a/Lib/encodings/iso2022_jp_3.py
+++ b/Lib/encodings/iso2022_jp_3.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/iso2022_jp_ext.py b/Lib/encodings/iso2022_jp_ext.py
index a32a533e8bdf00..e6a3f888f04516 100644
--- a/Lib/encodings/iso2022_jp_ext.py
+++ b/Lib/encodings/iso2022_jp_ext.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/iso2022_kr.py b/Lib/encodings/iso2022_kr.py
index 51dd4ab560422a..56a6e1d3115f1c 100644
--- a/Lib/encodings/iso2022_kr.py
+++ b/Lib/encodings/iso2022_kr.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/johab.py b/Lib/encodings/johab.py
index e58c50a06c4b96..a835154b552117 100644
--- a/Lib/encodings/johab.py
+++ b/Lib/encodings/johab.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/punycode.py b/Lib/encodings/punycode.py
index 335acb87cb9b28..d274d642d020cd 100644
--- a/Lib/encodings/punycode.py
+++ b/Lib/encodings/punycode.py
@@ -250,5 +250,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamwriter=StreamWriter,
         streamreader=StreamReader,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/raw_unicode_escape.py b/Lib/encodings/raw_unicode_escape.py
index 5c5b41437a84b4..bb8bb15bd589be 100644
--- a/Lib/encodings/raw_unicode_escape.py
+++ b/Lib/encodings/raw_unicode_escape.py
@@ -43,5 +43,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamwriter=StreamWriter,
         streamreader=StreamReader,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/shift_jis.py b/Lib/encodings/shift_jis.py
index bf7fded09468c8..ee2300fb4ad001 100644
--- a/Lib/encodings/shift_jis.py
+++ b/Lib/encodings/shift_jis.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/shift_jis_2004.py b/Lib/encodings/shift_jis_2004.py
index ae40b684a010f2..4d9c6fb8613cc7 100644
--- a/Lib/encodings/shift_jis_2004.py
+++ b/Lib/encodings/shift_jis_2004.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/shift_jisx0213.py b/Lib/encodings/shift_jisx0213.py
index 5af8565618b40e..2b80a1f7b2c102 100644
--- a/Lib/encodings/shift_jisx0213.py
+++ b/Lib/encodings/shift_jisx0213.py
@@ -36,5 +36,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/unicode_escape.py b/Lib/encodings/unicode_escape.py
index d896cefc9596be..65b10462228554 100644
--- a/Lib/encodings/unicode_escape.py
+++ b/Lib/encodings/unicode_escape.py
@@ -43,5 +43,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamwriter=StreamWriter,
         streamreader=StreamReader,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/utf_16.py b/Lib/encodings/utf_16.py
index eac93bd17d07d1..41c4f610532927 100644
--- a/Lib/encodings/utf_16.py
+++ b/Lib/encodings/utf_16.py
@@ -152,5 +152,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/utf_16_be.py b/Lib/encodings/utf_16_be.py
index d056cf9202a40f..9dbb25ff018262 100644
--- a/Lib/encodings/utf_16_be.py
+++ b/Lib/encodings/utf_16_be.py
@@ -39,5 +39,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/utf_16_le.py b/Lib/encodings/utf_16_le.py
index 2e07f76cc3f742..f9655609379e02 100644
--- a/Lib/encodings/utf_16_le.py
+++ b/Lib/encodings/utf_16_le.py
@@ -39,5 +39,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/utf_32.py b/Lib/encodings/utf_32.py
index aebe145ec95e71..e5fd8175809be0 100644
--- a/Lib/encodings/utf_32.py
+++ b/Lib/encodings/utf_32.py
@@ -147,5 +147,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/utf_32_be.py b/Lib/encodings/utf_32_be.py
index ee1b41a11aa35f..100a167a064473 100644
--- a/Lib/encodings/utf_32_be.py
+++ b/Lib/encodings/utf_32_be.py
@@ -34,5 +34,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/utf_32_le.py b/Lib/encodings/utf_32_le.py
index 4ac786bb73349b..1395c51dcfeac7 100644
--- a/Lib/encodings/utf_32_le.py
+++ b/Lib/encodings/utf_32_le.py
@@ -34,5 +34,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/utf_7.py b/Lib/encodings/utf_7.py
index 3127867fb5bff9..a273f0fa26c818 100644
--- a/Lib/encodings/utf_7.py
+++ b/Lib/encodings/utf_7.py
@@ -35,5 +35,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/utf_8.py b/Lib/encodings/utf_8.py
index 3801615ce34001..d5544140451a95 100644
--- a/Lib/encodings/utf_8.py
+++ b/Lib/encodings/utf_8.py
@@ -39,5 +39,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/encodings/utf_8_sig.py b/Lib/encodings/utf_8_sig.py
index b5e5c89f80b9eb..fab8aaf7ba2abb 100644
--- a/Lib/encodings/utf_8_sig.py
+++ b/Lib/encodings/utf_8_sig.py
@@ -127,5 +127,5 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 03dd61a76db154..aada3752e318a0 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1892,7 +1892,7 @@ def test_copy(self):
         self.assertIsNot(dup, orig)
         self.assertEqual(dup, orig)
         self.assertTrue(orig._is_text_encoding)
-        self.assertFalse(orig._is_single_byte)
+        self.assertTrue(orig._is_multibyte)
         self.assertEqual(dup.encode, orig.encode)
         self.assertEqual(dup.name, orig.name)
         self.assertEqual(dup.incrementalencoder, orig.incrementalencoder)
@@ -1913,7 +1913,7 @@ def test_deepcopy(self):
         self.assertIsNot(dup, orig)
         self.assertEqual(dup, orig)
         self.assertTrue(orig._is_text_encoding)
-        self.assertFalse(orig._is_single_byte)
+        self.assertTrue(orig._is_multibyte)
         self.assertEqual(dup.encode, orig.encode)
         self.assertEqual(dup.name, orig.name)
         self.assertEqual(dup.incrementalencoder, orig.incrementalencoder)
@@ -1942,7 +1942,7 @@ def test_pickle(self):
                      unpickled_codec_info.incrementalencoder
                 )
                 self.assertTrue(unpickled_codec_info._is_text_encoding)
-                self.assertFalse(unpickled_codec_info._is_single_byte)
+                self.assertTrue(unpickled_codec_info._is_multibyte)
 
         # Test a CodecInfo with _is_text_encoding equal to false.
         codec_info = codecs.lookup('base64')
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index 09f9d40d4ec710..81a71410c5de71 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -1472,16 +1472,16 @@ PyUnknownEncodingHandler(void *encodingHandlerData,
     }
     if (!PyTuple_CheckExact(codec)) {
         PyObject *attr;
-        if (PyObject_GetOptionalAttrString(codec, "_is_single_byte", &attr) < 0) {
+        if (PyObject_GetOptionalAttrString(codec, "_is_multibyte", &attr) < 0) {
             Py_DECREF(codec);
             return XML_STATUS_ERROR;
         }
         if (attr != NULL) {
-            int is_single_byte = PyObject_IsTrue(attr);
+            int is_multibyte = PyObject_IsTrue(attr);
             Py_DECREF(attr);
-            if (is_single_byte <= 0) {  // error or false
+            if (is_multibyte != 0) {  // true or error
                 Py_DECREF(codec);
-                if (is_single_byte == 0) {
+                if (is_multibyte > 0) {  // true
                     PyErr_SetString(PyExc_ValueError,
                                     "multi-byte encodings are not supported");
                 }
diff --git a/Tools/unicode/gencjkcodecs.py b/Tools/unicode/gencjkcodecs.py
index eb04f67f2077eb..71d19693eb6f7b 100644
--- a/Tools/unicode/gencjkcodecs.py
+++ b/Tools/unicode/gencjkcodecs.py
@@ -51,7 +51,7 @@ def getregentry():
         incrementaldecoder=IncrementalDecoder,
         streamreader=StreamReader,
         streamwriter=StreamWriter,
-        _is_single_byte=False,
+        _is_multibyte=True,
     )
 """)
 

From fb266e193481c320079bd158410b386255de4467 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Wed, 13 May 2026 19:01:26 +0300
Subject: [PATCH 9/9] Include the encoding name in the error message.

---
 Modules/pyexpat.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index 81a71410c5de71..b688cf6a16fc37 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -1482,8 +1482,9 @@ PyUnknownEncodingHandler(void *encodingHandlerData,
             if (is_multibyte != 0) {  // true or error
                 Py_DECREF(codec);
                 if (is_multibyte > 0) {  // true
-                    PyErr_SetString(PyExc_ValueError,
-                                    "multi-byte encodings are not supported");
+                    PyErr_Format(PyExc_ValueError,
+                                 "multi-byte encodings are not supported: '%s'",
+                                 name);
                 }
                 return XML_STATUS_ERROR;
             }
@@ -1499,8 +1500,9 @@ PyUnknownEncodingHandler(void *encodingHandlerData,
 
     if (PyUnicode_GET_LENGTH(u) != 256) {
         Py_DECREF(u);
-        PyErr_SetString(PyExc_ValueError,
-                        "multi-byte encodings are not supported");
+        PyErr_Format(PyExc_ValueError,
+                     "multi-byte encodings are not supported: '%s'",
+                     name);
         return XML_STATUS_ERROR;
     }