Skip to content

Commit d5b9f8d

Browse files
ashm-devkumaraditya303gpshead
committed
gh-142831: Fix use-after-free in json encoder during re-entrant mutation (gh-142851)
User callbacks invoked during JSON encoding (e.g. the `default` callback or a custom string encoder) can mutate or clear the dict or sequence being encoded, invalidating borrowed references to items, keys, and values. Hold strong references unconditionally while iterating. Co-authored-by: Kumar Aditya <kumaraditya@python.org> Co-authored-by: Gregory P. Smith <greg@krypto.org> (cherry picked from commit 235fa72)
1 parent 8e13025 commit d5b9f8d

3 files changed

Lines changed: 89 additions & 3 deletions

File tree

Lib/test/test_json/test_speedups.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from test.test_json import CTest
2+
from test.support import gc_collect
23

34

45
class BadBool:
@@ -111,3 +112,63 @@ def test_current_indent_level(self):
111112
self.assertEqual(enc(['spam', {'ham': 'eggs'}], 3)[0], expected2)
112113
self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}], 3.0)
113114
self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}])
115+
116+
def test_mutate_dict_items_during_encode(self):
117+
# gh-142831: Clearing the items list via a re-entrant key encoder
118+
# must not cause a use-after-free. BadDict.items() returns a
119+
# mutable list; encode_str clears it while iterating.
120+
items = None
121+
122+
class BadDict(dict):
123+
def items(self):
124+
nonlocal items
125+
items = [("boom", object())]
126+
return items
127+
128+
cleared = False
129+
def encode_str(obj):
130+
nonlocal items, cleared
131+
if items is not None:
132+
items.clear()
133+
items = None
134+
cleared = True
135+
gc_collect()
136+
return '"x"'
137+
138+
encoder = self.json.encoder.c_make_encoder(
139+
None, lambda o: "null",
140+
encode_str, None,
141+
": ", ", ", False,
142+
False, True
143+
)
144+
145+
# Must not crash (use-after-free under ASan before fix)
146+
encoder(BadDict(real=1), 0)
147+
self.assertTrue(cleared)
148+
149+
def test_mutate_list_during_encode(self):
150+
# gh-142831: Clearing a list mid-iteration via the default
151+
# callback must not cause a use-after-free.
152+
call_count = 0
153+
lst = [object() for _ in range(10)]
154+
155+
def default(obj):
156+
nonlocal call_count
157+
call_count += 1
158+
if call_count == 3:
159+
lst.clear()
160+
gc_collect()
161+
return None
162+
163+
encoder = self.json.encoder.c_make_encoder(
164+
None, default,
165+
self.json.encoder.c_encode_basestring, None,
166+
": ", ", ", False,
167+
False, True
168+
)
169+
170+
# Must not crash (use-after-free under ASan before fix)
171+
encoder(lst, 0)
172+
# Verify the mutation path was actually hit and the loop
173+
# stopped iterating after the list was cleared.
174+
self.assertEqual(call_count, 3)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix a crash in the :mod:`json` module where a use-after-free could occur if
2+
the object being encoded is modified during serialization.

Modules/_json.c

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1702,28 +1702,44 @@ encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer,
17021702

17031703
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) {
17041704
PyObject *item = PyList_GET_ITEM(items, i);
1705+
// gh-142831: encoder_encode_key_value() can invoke user code
1706+
// that mutates the items list, invalidating this borrowed ref.
1707+
Py_INCREF(item);
17051708

17061709
if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
17071710
PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1711+
Py_DECREF(item);
17081712
goto bail;
17091713
}
17101714

17111715
key = PyTuple_GET_ITEM(item, 0);
17121716
value = PyTuple_GET_ITEM(item, 1);
17131717
if (encoder_encode_key_value(s, writer, &first, dct, key, value,
17141718
indent_level, indent_cache,
1715-
separator) < 0)
1719+
separator) < 0) {
1720+
Py_DECREF(item);
17161721
goto bail;
1722+
}
1723+
Py_DECREF(item);
17171724
}
17181725
Py_CLEAR(items);
17191726

17201727
} else {
17211728
Py_ssize_t pos = 0;
17221729
while (PyDict_Next(dct, &pos, &key, &value)) {
1730+
// gh-142831: encoder_encode_key_value() can invoke user code
1731+
// that mutates the dict, invalidating these borrowed refs.
1732+
Py_INCREF(key);
1733+
Py_INCREF(value);
17231734
if (encoder_encode_key_value(s, writer, &first, dct, key, value,
17241735
indent_level, indent_cache,
1725-
separator) < 0)
1736+
separator) < 0) {
1737+
Py_DECREF(key);
1738+
Py_DECREF(value);
17261739
goto bail;
1740+
}
1741+
Py_DECREF(key);
1742+
Py_DECREF(value);
17271743
}
17281744
}
17291745

@@ -1800,14 +1816,21 @@ encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer,
18001816
}
18011817
for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
18021818
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
1819+
// gh-142831: encoder_listencode_obj() can invoke user code
1820+
// that mutates the sequence, invalidating this borrowed ref.
1821+
Py_INCREF(obj);
18031822
if (i) {
1804-
if (PyUnicodeWriter_WriteStr(writer, separator) < 0)
1823+
if (PyUnicodeWriter_WriteStr(writer, separator) < 0) {
1824+
Py_DECREF(obj);
18051825
goto bail;
1826+
}
18061827
}
18071828
if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) {
18081829
_PyErr_FormatNote("when serializing %T item %zd", seq, i);
1830+
Py_DECREF(obj);
18091831
goto bail;
18101832
}
1833+
Py_DECREF(obj);
18111834
}
18121835
if (ident != NULL) {
18131836
if (PyDict_DelItem(s->markers, ident))

0 commit comments

Comments
 (0)