Skip to content

Commit bba6c1d

Browse files
ashm-devkumaraditya303gpshead
authored
[3.14] gh-142831: Fix use-after-free in json encoder during re-entrant mutation (gh-142851) (#150078)
gh-142831: Fix use-after-free in json encoder during re-entrant mutation (gh-142851) User callbacks invoked during JSON encoding (e.g. the `default` callback or a custom string encoder) can mutate or clear the dict or sequence being encoded, invalidating borrowed references to items, keys, and values. Hold strong references unconditionally while iterating. (cherry picked from commit 235fa72) Co-authored-by: Kumar Aditya <kumaraditya@python.org> Co-authored-by: Gregory P. Smith <greg@krypto.org>
1 parent accfbb6 commit bba6c1d

3 files changed

Lines changed: 89 additions & 3 deletions

File tree

Lib/test/test_json/test_speedups.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from test.test_json import CTest
2+
from test.support import gc_collect
23

34

45
class BadBool:
@@ -111,3 +112,63 @@ def test_current_indent_level(self):
111112
self.assertEqual(enc(['spam', {'ham': 'eggs'}], 3)[0], expected2)
112113
self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}], 3.0)
113114
self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}])
115+
116+
def test_mutate_dict_items_during_encode(self):
117+
# gh-142831: Clearing the items list via a re-entrant key encoder
118+
# must not cause a use-after-free. BadDict.items() returns a
119+
# mutable list; encode_str clears it while iterating.
120+
items = None
121+
122+
class BadDict(dict):
123+
def items(self):
124+
nonlocal items
125+
items = [("boom", object())]
126+
return items
127+
128+
cleared = False
129+
def encode_str(obj):
130+
nonlocal items, cleared
131+
if items is not None:
132+
items.clear()
133+
items = None
134+
cleared = True
135+
gc_collect()
136+
return '"x"'
137+
138+
encoder = self.json.encoder.c_make_encoder(
139+
None, lambda o: "null",
140+
encode_str, None,
141+
": ", ", ", False,
142+
False, True
143+
)
144+
145+
# Must not crash (use-after-free under ASan before fix)
146+
encoder(BadDict(real=1), 0)
147+
self.assertTrue(cleared)
148+
149+
def test_mutate_list_during_encode(self):
150+
# gh-142831: Clearing a list mid-iteration via the default
151+
# callback must not cause a use-after-free.
152+
call_count = 0
153+
lst = [object() for _ in range(10)]
154+
155+
def default(obj):
156+
nonlocal call_count
157+
call_count += 1
158+
if call_count == 3:
159+
lst.clear()
160+
gc_collect()
161+
return None
162+
163+
encoder = self.json.encoder.c_make_encoder(
164+
None, default,
165+
self.json.encoder.c_encode_basestring, None,
166+
": ", ", ", False,
167+
False, True
168+
)
169+
170+
# Must not crash (use-after-free under ASan before fix)
171+
encoder(lst, 0)
172+
# Verify the mutation path was actually hit and the loop
173+
# stopped iterating after the list was cleared.
174+
self.assertEqual(call_count, 3)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix a crash in the :mod:`json` module where a use-after-free could occur if
2+
the object being encoded is modified during serialization.

Modules/_json.c

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1702,28 +1702,44 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
17021702

17031703
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) {
17041704
PyObject *item = PyList_GET_ITEM(items, i);
1705+
// gh-142831: encoder_encode_key_value() can invoke user code
1706+
// that mutates the items list, invalidating this borrowed ref.
1707+
Py_INCREF(item);
17051708

17061709
if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
17071710
PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1711+
Py_DECREF(item);
17081712
goto bail;
17091713
}
17101714

17111715
key = PyTuple_GET_ITEM(item, 0);
17121716
value = PyTuple_GET_ITEM(item, 1);
17131717
if (encoder_encode_key_value(s, writer, &first, dct, key, value,
17141718
indent_level, indent_cache,
1715-
separator) < 0)
1719+
separator) < 0) {
1720+
Py_DECREF(item);
17161721
goto bail;
1722+
}
1723+
Py_DECREF(item);
17171724
}
17181725
Py_CLEAR(items);
17191726

17201727
} else {
17211728
Py_ssize_t pos = 0;
17221729
while (PyDict_Next(dct, &pos, &key, &value)) {
1730+
// gh-142831: encoder_encode_key_value() can invoke user code
1731+
// that mutates the dict, invalidating these borrowed refs.
1732+
Py_INCREF(key);
1733+
Py_INCREF(value);
17231734
if (encoder_encode_key_value(s, writer, &first, dct, key, value,
17241735
indent_level, indent_cache,
1725-
separator) < 0)
1736+
separator) < 0) {
1737+
Py_DECREF(key);
1738+
Py_DECREF(value);
17261739
goto bail;
1740+
}
1741+
Py_DECREF(key);
1742+
Py_DECREF(value);
17271743
}
17281744
}
17291745

@@ -1800,14 +1816,21 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
18001816
}
18011817
for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
18021818
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
1819+
// gh-142831: encoder_listencode_obj() can invoke user code
1820+
// that mutates the sequence, invalidating this borrowed ref.
1821+
Py_INCREF(obj);
18031822
if (i) {
1804-
if (PyUnicodeWriter_WriteStr(writer, separator) < 0)
1823+
if (PyUnicodeWriter_WriteStr(writer, separator) < 0) {
1824+
Py_DECREF(obj);
18051825
goto bail;
1826+
}
18061827
}
18071828
if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) {
18081829
_PyErr_FormatNote("when serializing %T item %zd", seq, i);
1830+
Py_DECREF(obj);
18091831
goto bail;
18101832
}
1833+
Py_DECREF(obj);
18111834
}
18121835
if (ident != NULL) {
18131836
if (PyDict_DelItem(s->markers, ident))

0 commit comments

Comments
 (0)