From 98b1e519273dd28ce73cc21a636e2f3a937e1f8c Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Fri, 27 Feb 2026 12:44:54 +0000 Subject: [PATCH 1/7] gh-145234: Normalize decoded CR in string tokenizer (#145281) --- Lib/test/test_py_compile.py | 8 ++++++++ .../2026-02-26-21-36-00.gh-issue-145234.w0mQ9n.rst | 5 +++++ Parser/tokenizer/string_tokenizer.c | 13 +++++++++++++ 3 files changed, 26 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-02-26-21-36-00.gh-issue-145234.w0mQ9n.rst diff --git a/Lib/test/test_py_compile.py b/Lib/test/test_py_compile.py index 66de61930968e4..da2d630d7ace7b 100644 --- a/Lib/test/test_py_compile.py +++ b/Lib/test/test_py_compile.py @@ -239,6 +239,14 @@ def test_quiet(self): with self.assertRaises(py_compile.PyCompileError): py_compile.compile(bad_coding, self.pyc_path, doraise=True, quiet=1) + def test_utf7_decoded_cr_compiles(self): + with open(self.source_path, 'wb') as file: + file.write(b"#coding=U7+AA0''\n") + + pyc_path = py_compile.compile(self.source_path, self.pyc_path, doraise=True) + self.assertEqual(pyc_path, self.pyc_path) + self.assertTrue(os.path.exists(self.pyc_path)) + class PyCompileTestsWithSourceEpoch(PyCompileTestsBase, unittest.TestCase, diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-26-21-36-00.gh-issue-145234.w0mQ9n.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-26-21-36-00.gh-issue-145234.w0mQ9n.rst new file mode 100644 index 00000000000000..caeffff0be8a85 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-26-21-36-00.gh-issue-145234.w0mQ9n.rst @@ -0,0 +1,5 @@ +Fixed a ``SystemError`` in the parser when an encoding cookie (for example, +UTF-7) decodes to carriage returns (``\r``). Newlines are now normalized after +decoding in the string tokenizer. + +Patch by Pablo Galindo. diff --git a/Parser/tokenizer/string_tokenizer.c b/Parser/tokenizer/string_tokenizer.c index 7299ecf483ccd9..7f07cca37ee019 100644 --- a/Parser/tokenizer/string_tokenizer.c +++ b/Parser/tokenizer/string_tokenizer.c @@ -108,6 +108,19 @@ decode_str(const char *input, int single, struct tok_state *tok, int preserve_cr else if (!_PyTokenizer_ensure_utf8(str, tok, 1)) { return _PyTokenizer_error_ret(tok); } + if (utf8 != NULL) { + char *translated = _PyTokenizer_translate_newlines( + str, single, preserve_crlf, tok); + if (translated == NULL) { + Py_DECREF(utf8); + return _PyTokenizer_error_ret(tok); + } + PyMem_Free(tok->input); + tok->input = translated; + str = translated; + Py_CLEAR(utf8); + } + tok->str = str; assert(tok->decoding_buffer == NULL); tok->decoding_buffer = utf8; /* CAUTION */ return str; From dc1b56aa03a1764e7c6bbcbf190b1c293eb5c462 Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Fri, 27 Feb 2026 10:11:52 -0500 Subject: [PATCH 2/7] gh-141004: Document missing type flags (GH-145127) --- Doc/c-api/typeobj.rst | 46 ++++++++++++++++++++++++ Tools/check-c-api-docs/ignored_c_api.txt | 3 -- 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/Doc/c-api/typeobj.rst b/Doc/c-api/typeobj.rst index bc134b5d00b4ad..87b488912653b9 100644 --- a/Doc/c-api/typeobj.rst +++ b/Doc/c-api/typeobj.rst @@ -1499,6 +1499,52 @@ and :c:data:`PyType_Type` effectively act as defaults.) It will be removed in a future version of CPython + .. c:macro:: Py_TPFLAGS_HAVE_VERSION_TAG + + This is a :term:`soft deprecated` macro that does nothing. + Historically, this would indicate that the + :c:member:`~PyTypeObject.tp_version_tag` field was available and + initialized. + + + .. c:macro:: Py_TPFLAGS_INLINE_VALUES + + This bit indicates that instances of this type will have an "inline values" + array (containing the object's attributes) placed directly after the end + of the object. + + This requires that :c:macro:`Py_TPFLAGS_HAVE_GC` is set. + + **Inheritance:** + + This flag is not inherited. + + .. versionadded:: 3.13 + + + .. c:macro:: Py_TPFLAGS_IS_ABSTRACT + + This bit indicates that this is an abstract type and therefore cannot + be instantiated. + + **Inheritance:** + + This flag is not inherited. + + .. seealso:: + :mod:`abc` + + + .. c:macro:: Py_TPFLAGS_HAVE_STACKLESS_EXTENSION + + Internal. Do not set or unset this flag. + Historically, this was a reserved flag for use in Stackless Python. + + .. warning:: + This flag is present in header files, but is not be used. + This may be removed in a future version of CPython. + + .. c:member:: const char* PyTypeObject.tp_doc .. corresponding-type-slot:: Py_tp_doc diff --git a/Tools/check-c-api-docs/ignored_c_api.txt b/Tools/check-c-api-docs/ignored_c_api.txt index 7bf79872bd4630..02a3031e52fb8b 100644 --- a/Tools/check-c-api-docs/ignored_c_api.txt +++ b/Tools/check-c-api-docs/ignored_c_api.txt @@ -22,9 +22,6 @@ Py_HASH_EXTERNAL PyABIInfo_FREETHREADING_AGNOSTIC # object.h Py_INVALID_SIZE -Py_TPFLAGS_HAVE_VERSION_TAG -Py_TPFLAGS_INLINE_VALUES -Py_TPFLAGS_IS_ABSTRACT # pyexpat.h PyExpat_CAPI_MAGIC PyExpat_CAPSULE_NAME From 8775f900179aa21e6e9ec318dbb5c7cfd3561b66 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 27 Feb 2026 16:25:46 +0100 Subject: [PATCH 3/7] gh-144693: Clarify that `PyFrame_GetBack` does not raise exceptions (GH-144824) Co-authored-by: Sergey Miryanov Co-authored-by: Peter Bierma --- Doc/c-api/frame.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Doc/c-api/frame.rst b/Doc/c-api/frame.rst index fb17cf7f1da6b2..967cfc727655ec 100644 --- a/Doc/c-api/frame.rst +++ b/Doc/c-api/frame.rst @@ -50,6 +50,7 @@ See also :ref:`Reflection `. Return a :term:`strong reference`, or ``NULL`` if *frame* has no outer frame. + This raises no exceptions. .. versionadded:: 3.9 From a2497955387bc463f05111b803599a92dcfcae29 Mon Sep 17 00:00:00 2001 From: VanshAgarwal24036 <148854295+VanshAgarwal24036@users.noreply.github.com> Date: Fri, 27 Feb 2026 21:38:15 +0530 Subject: [PATCH 4/7] gh-145142: Make str.maketrans safe under free-threading (gh-145157) --- Lib/test/test_free_threading/test_str.py | 16 ++++ ...-02-23-23-18-28.gh-issue-145142.T-XbVe.rst | 2 + Objects/unicodeobject.c | 76 +++++++++++-------- 3 files changed, 63 insertions(+), 31 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-02-23-23-18-28.gh-issue-145142.T-XbVe.rst diff --git a/Lib/test/test_free_threading/test_str.py b/Lib/test/test_free_threading/test_str.py index 72044e979b0f48..9a1ce3620ac4b2 100644 --- a/Lib/test/test_free_threading/test_str.py +++ b/Lib/test/test_free_threading/test_str.py @@ -69,6 +69,22 @@ def reader_func(): for reader in readers: reader.join() + def test_maketrans_dict_concurrent_modification(self): + for _ in range(5): + d = {2000: 'a'} + + def work(dct): + for i in range(100): + str.maketrans(dct) + dct[2000 + i] = chr(i % 16) + dct.pop(2000 + i, None) + + threading_helper.run_concurrently( + work, + nthreads=5, + args=(d,), + ) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-23-23-18-28.gh-issue-145142.T-XbVe.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-23-23-18-28.gh-issue-145142.T-XbVe.rst new file mode 100644 index 00000000000000..5f6043cc3d9660 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-23-23-18-28.gh-issue-145142.T-XbVe.rst @@ -0,0 +1,2 @@ +Fix a crash in the free-threaded build when the dictionary argument to +:meth:`str.maketrans` is concurrently modified. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 988e5f95573fe1..213bae5ca86cd4 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13060,6 +13060,45 @@ unicode_swapcase_impl(PyObject *self) return case_operation(self, do_swapcase); } +static int +unicode_maketrans_from_dict(PyObject *x, PyObject *newdict) +{ + PyObject *key, *value; + Py_ssize_t i = 0; + int res; + while (PyDict_Next(x, &i, &key, &value)) { + if (PyUnicode_Check(key)) { + PyObject *newkey; + int kind; + const void *data; + if (PyUnicode_GET_LENGTH(key) != 1) { + PyErr_SetString(PyExc_ValueError, "string keys in translate" + "table must be of length 1"); + return -1; + } + kind = PyUnicode_KIND(key); + data = PyUnicode_DATA(key); + newkey = PyLong_FromLong(PyUnicode_READ(kind, data, 0)); + if (!newkey) + return -1; + res = PyDict_SetItem(newdict, newkey, value); + Py_DECREF(newkey); + if (res < 0) + return -1; + } + else if (PyLong_Check(key)) { + if (PyDict_SetItem(newdict, key, value) < 0) + return -1; + } + else { + PyErr_SetString(PyExc_TypeError, "keys in translate table must" + "be strings or integers"); + return -1; + } + } + return 0; +} + /*[clinic input] @staticmethod @@ -13145,9 +13184,6 @@ unicode_maketrans_impl(PyObject *x, PyObject *y, PyObject *z) } } } else { - int kind; - const void *data; - /* x must be a dict */ if (!PyAnyDict_CheckExact(x)) { PyErr_SetString(PyExc_TypeError, "if you give only one argument " @@ -13155,34 +13191,12 @@ unicode_maketrans_impl(PyObject *x, PyObject *y, PyObject *z) goto err; } /* copy entries into the new dict, converting string keys to int keys */ - while (PyDict_Next(x, &i, &key, &value)) { - if (PyUnicode_Check(key)) { - /* convert string keys to integer keys */ - PyObject *newkey; - if (PyUnicode_GET_LENGTH(key) != 1) { - PyErr_SetString(PyExc_ValueError, "string keys in translate " - "table must be of length 1"); - goto err; - } - kind = PyUnicode_KIND(key); - data = PyUnicode_DATA(key); - newkey = PyLong_FromLong(PyUnicode_READ(kind, data, 0)); - if (!newkey) - goto err; - res = PyDict_SetItem(new, newkey, value); - Py_DECREF(newkey); - if (res < 0) - goto err; - } else if (PyLong_Check(key)) { - /* just keep integer keys */ - if (PyDict_SetItem(new, key, value) < 0) - goto err; - } else { - PyErr_SetString(PyExc_TypeError, "keys in translate table must " - "be strings or integers"); - goto err; - } - } + int errcode; + Py_BEGIN_CRITICAL_SECTION(x); + errcode = unicode_maketrans_from_dict(x, new); + Py_END_CRITICAL_SECTION(); + if (errcode < 0) + goto err; } return new; err: From 11eec7a492670fff67fc083036d595f8498217db Mon Sep 17 00:00:00 2001 From: indoor47 Date: Fri, 27 Feb 2026 17:24:39 +0100 Subject: [PATCH 5/7] gh-145305: Update ocert.org URLs in docs from http to https (#145304) Co-authored-by: Adam (indoor47) --- Doc/reference/datamodel.rst | 2 +- Doc/using/cmdline.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 27aedfa878af9a..cf5a0e71a104eb 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -2256,7 +2256,7 @@ Basic customization This is intended to provide protection against a denial-of-service caused by carefully chosen inputs that exploit the worst case performance of a dict insertion, *O*\ (*n*\ :sup:`2`) complexity. See - http://ocert.org/advisories/ocert-2011-003.html for details. + https://ocert.org/advisories/ocert-2011-003.html for details. Changing hash values affects the iteration order of sets. Python has never made guarantees about this ordering diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 2e7ea7b4fc4cba..93df4fcdc630a5 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -390,7 +390,7 @@ Miscellaneous options Hash randomization is intended to provide protection against a denial-of-service caused by carefully chosen inputs that exploit the worst case performance of a dict construction, *O*\ (*n*\ :sup:`2`) complexity. See - http://ocert.org/advisories/ocert-2011-003.html for details. + https://ocert.org/advisories/ocert-2011-003.html for details. :envvar:`PYTHONHASHSEED` allows you to set a fixed value for the hash seed secret. From 4d89056ed0f0975e786d859993786a33144cade5 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Fri, 27 Feb 2026 18:46:02 +0000 Subject: [PATCH 6/7] gh-76007: Deprecate `tarfile.version` (#145326) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/deprecations/pending-removal-in-3.20.rst | 1 + Doc/whatsnew/3.15.rst | 1 + Lib/tarfile.py | 11 ++++++++++- Lib/test/test_tarfile.py | 10 ++++++++++ .../2026-02-27-18-04-51.gh-issue-76007.17idfK.rst | 2 ++ 5 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2026-02-27-18-04-51.gh-issue-76007.17idfK.rst diff --git a/Doc/deprecations/pending-removal-in-3.20.rst b/Doc/deprecations/pending-removal-in-3.20.rst index 4e4b2e1d5f8fff..8372432a34daa5 100644 --- a/Doc/deprecations/pending-removal-in-3.20.rst +++ b/Doc/deprecations/pending-removal-in-3.20.rst @@ -21,6 +21,7 @@ Pending removal in Python 3.20 - :mod:`re` - :mod:`socketserver` - :mod:`tabnanny` + - :mod:`tarfile` - :mod:`tkinter.font` - :mod:`tkinter.ttk` - :mod:`wsgiref.simple_server` diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 37ebdfee7915fe..163d50d7e20e20 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -1549,6 +1549,7 @@ New deprecations - :mod:`re` - :mod:`socketserver` - :mod:`tabnanny` + - :mod:`tarfile` - :mod:`tkinter.font` - :mod:`tkinter.ttk` - :mod:`wsgiref.simple_server` diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 7db3a40c9b33cf..75984bf8b262b9 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -28,7 +28,6 @@ """Read from and write to tar format archives. """ -version = "0.9.0" __author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)" __credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend." @@ -3137,5 +3136,15 @@ def main(): if args.verbose: print('{!r} file created.'.format(tar_name)) + +def __getattr__(name): + if name == "version": + from warnings import _deprecated + + _deprecated("version", remove=(3, 20)) + return "0.9.0" # Do not change + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + if __name__ == '__main__': main() diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 9892005787c8a6..139840dd9c1f1b 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -4836,6 +4836,16 @@ def test_ignore_invalid_offset_headers(self): self.assertEqual(members[0].offset, expected_offset) +class TestModule(unittest.TestCase): + def test_deprecated_version(self): + with self.assertWarnsRegex( + DeprecationWarning, + "'version' is deprecated and slated for removal in Python 3.20", + ) as cm: + getattr(tarfile, "version") + self.assertEqual(cm.filename, __file__) + + def setUpModule(): os_helper.unlink(TEMPDIR) os.makedirs(TEMPDIR) diff --git a/Misc/NEWS.d/next/Library/2026-02-27-18-04-51.gh-issue-76007.17idfK.rst b/Misc/NEWS.d/next/Library/2026-02-27-18-04-51.gh-issue-76007.17idfK.rst new file mode 100644 index 00000000000000..4bb230dcb8473f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-02-27-18-04-51.gh-issue-76007.17idfK.rst @@ -0,0 +1,2 @@ +The ``version`` attribute of the :mod:`tarfile` module is deprecated and +slated for removal in Python 3.20. From 72eca2af59043c78647b0e6be3777a947ea9ef0f Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 27 Feb 2026 14:09:05 -0500 Subject: [PATCH 7/7] gh-145230: Update lockbench (gh-145231) Remove PyThread_type_lock (now uses PyMutex internally). Add new benchmark options: - work_inside/work_outside: control work inside and outside the critical section to vary contention levels - num_locks: use multiple independent locks with threads assigned round-robin - total_iters: fixed iteration count per thread instead of time-based, useful for measuring fairness - num_acquisitions: lock acquisitions per loop iteration - random_locks: acquire random lock each iteration Also return elapsed time from benchmark_locks() and switch lockbench.py to use argparse. --- .../_testinternalcapi/clinic/test_lock.c.h | 79 +++++++-- Modules/_testinternalcapi/test_lock.c | 159 +++++++++++------- Tools/lockbench/lockbench.py | 81 ++++++--- 3 files changed, 227 insertions(+), 92 deletions(-) diff --git a/Modules/_testinternalcapi/clinic/test_lock.c.h b/Modules/_testinternalcapi/clinic/test_lock.c.h index 86875767343cd2..6e989a777ac7f0 100644 --- a/Modules/_testinternalcapi/clinic/test_lock.c.h +++ b/Modules/_testinternalcapi/clinic/test_lock.c.h @@ -6,8 +6,9 @@ preserve #include "pycore_modsupport.h" // _PyArg_CheckPositional() PyDoc_STRVAR(_testinternalcapi_benchmark_locks__doc__, -"benchmark_locks($module, num_threads, use_pymutex=True,\n" -" critical_section_length=1, time_ms=1000, /)\n" +"benchmark_locks($module, num_threads, work_inside=1, work_outside=0,\n" +" time_ms=1000, num_acquisitions=1, total_iters=0,\n" +" num_locks=1, random_locks=False, /)\n" "--\n" "\n"); @@ -17,20 +18,26 @@ PyDoc_STRVAR(_testinternalcapi_benchmark_locks__doc__, static PyObject * _testinternalcapi_benchmark_locks_impl(PyObject *module, Py_ssize_t num_threads, - int use_pymutex, - int critical_section_length, - int time_ms); + int work_inside, int work_outside, + int time_ms, int num_acquisitions, + Py_ssize_t total_iters, + Py_ssize_t num_locks, + int random_locks); static PyObject * _testinternalcapi_benchmark_locks(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; Py_ssize_t num_threads; - int use_pymutex = 1; - int critical_section_length = 1; + int work_inside = 1; + int work_outside = 0; int time_ms = 1000; + int num_acquisitions = 1; + Py_ssize_t total_iters = 0; + Py_ssize_t num_locks = 1; + int random_locks = 0; - if (!_PyArg_CheckPositional("benchmark_locks", nargs, 1, 4)) { + if (!_PyArg_CheckPositional("benchmark_locks", nargs, 1, 8)) { goto exit; } { @@ -48,15 +55,15 @@ _testinternalcapi_benchmark_locks(PyObject *module, PyObject *const *args, Py_ss if (nargs < 2) { goto skip_optional; } - use_pymutex = PyObject_IsTrue(args[1]); - if (use_pymutex < 0) { + work_inside = PyLong_AsInt(args[1]); + if (work_inside == -1 && PyErr_Occurred()) { goto exit; } if (nargs < 3) { goto skip_optional; } - critical_section_length = PyLong_AsInt(args[2]); - if (critical_section_length == -1 && PyErr_Occurred()) { + work_outside = PyLong_AsInt(args[2]); + if (work_outside == -1 && PyErr_Occurred()) { goto exit; } if (nargs < 4) { @@ -66,10 +73,54 @@ _testinternalcapi_benchmark_locks(PyObject *module, PyObject *const *args, Py_ss if (time_ms == -1 && PyErr_Occurred()) { goto exit; } + if (nargs < 5) { + goto skip_optional; + } + num_acquisitions = PyLong_AsInt(args[4]); + if (num_acquisitions == -1 && PyErr_Occurred()) { + goto exit; + } + if (nargs < 6) { + goto skip_optional; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[5]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + total_iters = ival; + } + if (nargs < 7) { + goto skip_optional; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[6]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + num_locks = ival; + } + if (nargs < 8) { + goto skip_optional; + } + random_locks = PyObject_IsTrue(args[7]); + if (random_locks < 0) { + goto exit; + } skip_optional: - return_value = _testinternalcapi_benchmark_locks_impl(module, num_threads, use_pymutex, critical_section_length, time_ms); + return_value = _testinternalcapi_benchmark_locks_impl(module, num_threads, work_inside, work_outside, time_ms, num_acquisitions, total_iters, num_locks, random_locks); exit: return return_value; } -/*[clinic end generated code: output=105105d759c0c271 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=6cfed9fc081313ef input=a9049054013a1b77]*/ diff --git a/Modules/_testinternalcapi/test_lock.c b/Modules/_testinternalcapi/test_lock.c index ded76ca9fe6819..596120ef275196 100644 --- a/Modules/_testinternalcapi/test_lock.c +++ b/Modules/_testinternalcapi/test_lock.c @@ -194,65 +194,101 @@ test_lock_counter_slow(PyObject *self, PyObject *obj) Py_RETURN_NONE; } -struct bench_data_locks { - int stop; - int use_pymutex; - int critical_section_length; +struct bench_lock { char padding[200]; - PyThread_type_lock lock; PyMutex m; double value; - Py_ssize_t total_iters; +}; + +struct bench_config { + int stop; + int work_inside; + int work_outside; + int num_acquisitions; + int random_locks; + Py_ssize_t target_iters; + Py_ssize_t num_locks; + struct bench_lock *locks; }; struct bench_thread_data { - struct bench_data_locks *bench_data; + struct bench_config *config; + struct bench_lock *lock; + uint64_t rng_state; Py_ssize_t iters; PyEvent done; }; +static uint64_t +splitmix64(uint64_t *state) +{ + uint64_t z = (*state += 0x9e3779b97f4a7c15); + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; + z = (z ^ (z >> 27)) * 0x94d049bb133111eb; + return z ^ (z >> 31); +} + static void thread_benchmark_locks(void *arg) { - struct bench_thread_data *thread_data = arg; - struct bench_data_locks *bench_data = thread_data->bench_data; - int use_pymutex = bench_data->use_pymutex; - int critical_section_length = bench_data->critical_section_length; - + struct bench_thread_data *td = arg; + struct bench_config *config = td->config; + int work_inside = config->work_inside; + int work_outside = config->work_outside; + int num_acquisitions = config->num_acquisitions; + Py_ssize_t target_iters = config->target_iters; + uint64_t rng_state = td->rng_state; + + double local_value = 0.0; double my_value = 1.0; Py_ssize_t iters = 0; - while (!_Py_atomic_load_int_relaxed(&bench_data->stop)) { - if (use_pymutex) { - PyMutex_Lock(&bench_data->m); - for (int i = 0; i < critical_section_length; i++) { - bench_data->value += my_value; - my_value = bench_data->value; + for (;;) { + if (target_iters > 0) { + if (iters >= target_iters) { + break; } - PyMutex_Unlock(&bench_data->m); } - else { - PyThread_acquire_lock(bench_data->lock, 1); - for (int i = 0; i < critical_section_length; i++) { - bench_data->value += my_value; - my_value = bench_data->value; + else if (_Py_atomic_load_int_relaxed(&config->stop)) { + break; + } + struct bench_lock *lock = td->lock; + if (config->random_locks) { + uint32_t r = (uint32_t)splitmix64(&rng_state); + // Fast modulo reduction to pick a random lock, adapted from: + // https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ + Py_ssize_t idx = ((uint64_t)r * (uint32_t)config->num_locks) >> 32; + lock = &config->locks[idx]; + } + for (int acq = 0; acq < num_acquisitions; acq++) { + PyMutex_Lock(&lock->m); + for (int i = 0; i < work_inside; i++) { + lock->value += my_value; + my_value = lock->value; } - PyThread_release_lock(bench_data->lock); + PyMutex_Unlock(&lock->m); } - iters++; + for (int i = 0; i < work_outside; i++) { + local_value += my_value; + my_value = local_value; + } + iters += num_acquisitions; } - thread_data->iters = iters; - _Py_atomic_add_ssize(&bench_data->total_iters, iters); - _PyEvent_Notify(&thread_data->done); + td->iters = iters; + _PyEvent_Notify(&td->done); } /*[clinic input] _testinternalcapi.benchmark_locks num_threads: Py_ssize_t - use_pymutex: bool = True - critical_section_length: int = 1 + work_inside: int = 1 + work_outside: int = 0 time_ms: int = 1000 + num_acquisitions: int = 1 + total_iters: Py_ssize_t = 0 + num_locks: Py_ssize_t = 1 + random_locks: bool = False / [clinic start generated code]*/ @@ -260,10 +296,12 @@ _testinternalcapi.benchmark_locks static PyObject * _testinternalcapi_benchmark_locks_impl(PyObject *module, Py_ssize_t num_threads, - int use_pymutex, - int critical_section_length, - int time_ms) -/*[clinic end generated code: output=381df8d7e9a74f18 input=f3aeaf688738c121]*/ + int work_inside, int work_outside, + int time_ms, int num_acquisitions, + Py_ssize_t total_iters, + Py_ssize_t num_locks, + int random_locks) +/*[clinic end generated code: output=6258dc9de8cb9af1 input=d622cf4e1c4d008b]*/ { // Run from Tools/lockbench/lockbench.py // Based on the WebKit lock benchmarks: @@ -271,24 +309,28 @@ _testinternalcapi_benchmark_locks_impl(PyObject *module, // See also https://webkit.org/blog/6161/locking-in-webkit/ PyObject *thread_iters = NULL; PyObject *res = NULL; + struct bench_thread_data *thread_data = NULL; - struct bench_data_locks bench_data; - memset(&bench_data, 0, sizeof(bench_data)); - bench_data.use_pymutex = use_pymutex; - bench_data.critical_section_length = critical_section_length; - - bench_data.lock = PyThread_allocate_lock(); - if (bench_data.lock == NULL) { - return PyErr_NoMemory(); + struct bench_config config = { + .work_inside = work_inside, + .work_outside = work_outside, + .num_acquisitions = num_acquisitions, + .target_iters = total_iters, + .num_locks = num_locks, + .random_locks = random_locks, + }; + + config.locks = PyMem_Calloc(num_locks, sizeof(*config.locks)); + if (config.locks == NULL) { + PyErr_NoMemory(); + goto exit; } - struct bench_thread_data *thread_data = NULL; thread_data = PyMem_Calloc(num_threads, sizeof(*thread_data)); if (thread_data == NULL) { PyErr_NoMemory(); goto exit; } - thread_iters = PyList_New(num_threads); if (thread_iters == NULL) { goto exit; @@ -300,40 +342,43 @@ _testinternalcapi_benchmark_locks_impl(PyObject *module, } for (Py_ssize_t i = 0; i < num_threads; i++) { - thread_data[i].bench_data = &bench_data; + thread_data[i].config = &config; + thread_data[i].lock = &config.locks[i % num_locks]; + thread_data[i].rng_state = (uint64_t)i + 1; PyThread_start_new_thread(thread_benchmark_locks, &thread_data[i]); } - // Let the threads run for `time_ms` milliseconds - pysleep(time_ms); - _Py_atomic_store_int(&bench_data.stop, 1); + if (total_iters == 0) { + pysleep(time_ms); + _Py_atomic_store_int(&config.stop, 1); + } - // Wait for the threads to finish for (Py_ssize_t i = 0; i < num_threads; i++) { PyEvent_Wait(&thread_data[i].done); } - Py_ssize_t total_iters = bench_data.total_iters; if (PyTime_PerfCounter(&end) < 0) { goto exit; } - // Return the total number of acquisitions and the number of acquisitions - // for each thread. + Py_ssize_t sum_iters = 0; for (Py_ssize_t i = 0; i < num_threads; i++) { PyObject *iter = PyLong_FromSsize_t(thread_data[i].iters); if (iter == NULL) { goto exit; } PyList_SET_ITEM(thread_iters, i, iter); + sum_iters += thread_data[i].iters; } assert(end != start); - double rate = total_iters * 1e9 / (end - start); - res = Py_BuildValue("(dO)", rate, thread_iters); + PyTime_t elapsed_ns = end - start; + double rate = sum_iters * 1e9 / elapsed_ns; + res = Py_BuildValue("(dOL)", rate, thread_iters, + (long long)elapsed_ns); exit: - PyThread_free_lock(bench_data.lock); + PyMem_Free(config.locks); PyMem_Free(thread_data); Py_XDECREF(thread_iters); return res; @@ -344,7 +389,7 @@ test_lock_benchmark(PyObject *module, PyObject *obj) { // Just make sure the benchmark runs without crashing PyObject *res = _testinternalcapi_benchmark_locks_impl( - module, 1, 1, 1, 100); + module, 1, 1, 0, 100, 1, 0, 1, 0); if (res == NULL) { return NULL; } diff --git a/Tools/lockbench/lockbench.py b/Tools/lockbench/lockbench.py index 9833d703e00cbb..d2608797f3a4d5 100644 --- a/Tools/lockbench/lockbench.py +++ b/Tools/lockbench/lockbench.py @@ -1,14 +1,28 @@ -# Measure the performance of PyMutex and PyThread_type_lock locks -# with short critical sections. +# Measure the performance of PyMutex locks with short critical sections. # -# Usage: python Tools/lockbench/lockbench.py [CRITICAL_SECTION_LENGTH] +# Usage: python Tools/lockbench/lockbench.py [options] +# +# Options: +# --work-inside N Units of work inside the critical section (default: 1). +# --work-outside N Units of work outside the critical section (default: 0). +# Each unit of work is a dependent floating-point +# addition, which takes about 0.4 ns on modern +# Intel / AMD processors. +# --num-locks N Number of independent locks (default: 1). Threads are +# assigned to locks round-robin. +# --random-locks Each thread picks a random lock per acquisition instead +# of using a fixed assignment. Requires --num-locks > 1. +# --acquisitions N Lock acquisitions per loop iteration (default: 1). +# --total-iters N Fixed iterations per thread (default: 0 = time-based). +# Useful for measuring fairness: the benchmark runs until +# the slowest thread finishes. # # How to interpret the results: # # Acquisitions (kHz): Reports the total number of lock acquisitions in # thousands of acquisitions per second. This is the most important metric, # particularly for the 1 thread case because even in multithreaded programs, -# most locks acquisitions are not contended. Values for 2+ threads are +# most lock acquisitions are not contended. Values for 2+ threads are # only meaningful for `--disable-gil` builds, because the GIL prevents most # situations where there is lock contention with short critical sections. # @@ -19,14 +33,15 @@ # See https://en.wikipedia.org/wiki/Fairness_measure#Jain's_fairness_index from _testinternalcapi import benchmark_locks -import sys - -# Max number of threads to test -MAX_THREADS = 10 +import argparse -# How much "work" to do while holding the lock -CRITICAL_SECTION_LENGTH = 1 +def parse_threads(value): + if '-' in value: + lo, hi = value.split('-', 1) + lo, hi = int(lo), int(hi) + return range(lo, hi + 1) + return range(int(value), int(value) + 1) def jains_fairness(values): # Jain's fairness index @@ -34,20 +49,44 @@ def jains_fairness(values): return (sum(values) ** 2) / (len(values) * sum(x ** 2 for x in values)) def main(): - print("Lock Type Threads Acquisitions (kHz) Fairness") - for lock_type in ["PyMutex", "PyThread_type_lock"]: - use_pymutex = (lock_type == "PyMutex") - for num_threads in range(1, MAX_THREADS + 1): - acquisitions, thread_iters = benchmark_locks( - num_threads, use_pymutex, CRITICAL_SECTION_LENGTH) + parser = argparse.ArgumentParser(description="Benchmark PyMutex locks") + parser.add_argument("--work-inside", type=int, default=1, + help="units of work inside the critical section") + parser.add_argument("--work-outside", type=int, default=0, + help="units of work outside the critical section") + parser.add_argument("--acquisitions", type=int, default=1, + help="lock acquisitions per loop iteration") + parser.add_argument("--total-iters", type=int, default=0, + help="fixed iterations per thread (0 = time-based)") + parser.add_argument("--num-locks", type=int, default=1, + help="number of independent locks (round-robin assignment)") + parser.add_argument("--random-locks", action="store_true", + help="pick a random lock per acquisition") + parser.add_argument("threads", type=parse_threads, nargs='?', + default=range(1, 11), + help="Number of threads: N or MIN-MAX (default: 1-10)") + args = parser.parse_args() + + header = f"{'Threads': <10}{'Acq (kHz)': >12}{'Fairness': >10}" + if args.total_iters: + header += f"{'Wall (ms)': >12}" + print(header) + for num_threads in args.threads: + acquisitions, thread_iters, elapsed_ns = \ + benchmark_locks( + num_threads, args.work_inside, args.work_outside, + 1000, args.acquisitions, args.total_iters, + args.num_locks, args.random_locks) - acquisitions /= 1000 # report in kHz for readability - fairness = jains_fairness(thread_iters) + wall_ms = elapsed_ns / 1e6 + acquisitions /= 1000 # report in kHz for readability + fairness = jains_fairness(thread_iters) - print(f"{lock_type: <20}{num_threads: <18}{acquisitions: >5.0f}{fairness: >20.2f}") + line = f"{num_threads: <10}{acquisitions: >12.0f}{fairness: >10.2f}" + if args.total_iters: + line += f"{wall_ms: >12.1f}" + print(line) if __name__ == "__main__": - if len(sys.argv) > 1: - CRITICAL_SECTION_LENGTH = int(sys.argv[1]) main()