Skip to content

Commit f33ed21

Browse files
ashm-devkumaraditya303gpshead
committed
gh-142831: Fix use-after-free in json encoder during re-entrant mutation (gh-142851)
User callbacks invoked during JSON encoding (e.g. the `default` callback or a custom string encoder) can mutate or clear the dict or sequence being encoded, invalidating borrowed references to items, keys, and values. Hold strong references unconditionally while iterating. Co-authored-by: Kumar Aditya <kumaraditya@python.org> Co-authored-by: Gregory P. Smith <greg@krypto.org> (cherry picked from commit 235fa72)
1 parent 8ee6aff commit f33ed21

3 files changed

Lines changed: 91 additions & 4 deletions

File tree

Lib/test/test_json/test_speedups.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from test.test_json import CTest
2+
from test.support import gc_collect
23

34

45
class BadBool:
@@ -80,3 +81,63 @@ def test(name):
8081
def test_unsortable_keys(self):
8182
with self.assertRaises(TypeError):
8283
self.json.encoder.JSONEncoder(sort_keys=True).encode({'a': 1, 1: 'a'})
84+
85+
def test_mutate_dict_items_during_encode(self):
86+
# gh-142831: Clearing the items list via a re-entrant key encoder
87+
# must not cause a use-after-free. BadDict.items() returns a
88+
# mutable list; encode_str clears it while iterating.
89+
items = None
90+
91+
class BadDict(dict):
92+
def items(self):
93+
nonlocal items
94+
items = [("boom", object())]
95+
return items
96+
97+
cleared = False
98+
def encode_str(obj):
99+
nonlocal items, cleared
100+
if items is not None:
101+
items.clear()
102+
items = None
103+
cleared = True
104+
gc_collect()
105+
return '"x"'
106+
107+
encoder = self.json.encoder.c_make_encoder(
108+
None, lambda o: "null",
109+
encode_str, None,
110+
": ", ", ", False,
111+
False, True
112+
)
113+
114+
# Must not crash (use-after-free under ASan before fix)
115+
encoder(BadDict(real=1), 0)
116+
self.assertTrue(cleared)
117+
118+
def test_mutate_list_during_encode(self):
119+
# gh-142831: Clearing a list mid-iteration via the default
120+
# callback must not cause a use-after-free.
121+
call_count = 0
122+
lst = [object() for _ in range(10)]
123+
124+
def default(obj):
125+
nonlocal call_count
126+
call_count += 1
127+
if call_count == 3:
128+
lst.clear()
129+
gc_collect()
130+
return None
131+
132+
encoder = self.json.encoder.c_make_encoder(
133+
None, default,
134+
self.json.encoder.c_encode_basestring, None,
135+
": ", ", ", False,
136+
False, True
137+
)
138+
139+
# Must not crash (use-after-free under ASan before fix)
140+
encoder(lst, 0)
141+
# Verify the mutation path was actually hit and the loop
142+
# stopped iterating after the list was cleared.
143+
self.assertEqual(call_count, 3)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix a crash in the :mod:`json` module where a use-after-free could occur if
2+
the object being encoded is modified during serialization.

Modules/_json.c

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1602,28 +1602,44 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
16021602

16031603
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) {
16041604
PyObject *item = PyList_GET_ITEM(items, i);
1605+
// gh-142831: encoder_encode_key_value() can invoke user code
1606+
// that mutates the items list, invalidating this borrowed ref.
1607+
Py_INCREF(item);
16051608

16061609
if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
16071610
PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1611+
Py_DECREF(item);
16081612
goto bail;
16091613
}
16101614

16111615
key = PyTuple_GET_ITEM(item, 0);
16121616
value = PyTuple_GET_ITEM(item, 1);
16131617
if (encoder_encode_key_value(s, writer, &first, key, value,
16141618
new_newline_indent,
1615-
current_item_separator) < 0)
1619+
current_item_separator) < 0) {
1620+
Py_DECREF(item);
16161621
goto bail;
1622+
}
1623+
Py_DECREF(item);
16171624
}
16181625
Py_CLEAR(items);
16191626

16201627
} else {
16211628
Py_ssize_t pos = 0;
16221629
while (PyDict_Next(dct, &pos, &key, &value)) {
1630+
// gh-142831: encoder_encode_key_value() can invoke user code
1631+
// that mutates the dict, invalidating these borrowed refs.
1632+
Py_INCREF(key);
1633+
Py_INCREF(value);
16231634
if (encoder_encode_key_value(s, writer, &first, key, value,
16241635
new_newline_indent,
1625-
current_item_separator) < 0)
1636+
current_item_separator) < 0) {
1637+
Py_DECREF(key);
1638+
Py_DECREF(value);
16261639
goto bail;
1640+
}
1641+
Py_DECREF(key);
1642+
Py_DECREF(value);
16271643
}
16281644
}
16291645

@@ -1712,12 +1728,20 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
17121728
}
17131729
for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
17141730
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
1731+
// gh-142831: encoder_listencode_obj() can invoke user code
1732+
// that mutates the sequence, invalidating this borrowed ref.
1733+
Py_INCREF(obj);
17151734
if (i) {
1716-
if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
1735+
if (_PyUnicodeWriter_WriteStr(writer, separator) < 0) {
1736+
Py_DECREF(obj);
17171737
goto bail;
1738+
}
17181739
}
1719-
if (encoder_listencode_obj(s, writer, obj, new_newline_indent))
1740+
if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) {
1741+
Py_DECREF(obj);
17201742
goto bail;
1743+
}
1744+
Py_DECREF(obj);
17211745
}
17221746
if (ident != NULL) {
17231747
if (PyDict_DelItem(s->markers, ident))

0 commit comments

Comments
 (0)