Skip to content

Commit e87baa8

Browse files
ashm-devkumaraditya303gpshead
authored
[3.13] gh-142831: Fix use-after-free in json encoder during re-entrant mutation (gh-142851) (#150079)
gh-142831: Fix use-after-free in json encoder during re-entrant mutation (gh-142851) User callbacks invoked during JSON encoding (e.g. the `default` callback or a custom string encoder) can mutate or clear the dict or sequence being encoded, invalidating borrowed references to items, keys, and values. Hold strong references unconditionally while iterating. (cherry picked from commit 235fa72) Co-authored-by: Kumar Aditya <kumaraditya@python.org> Co-authored-by: Gregory P. Smith <greg@krypto.org>
1 parent 792a552 commit e87baa8

3 files changed

Lines changed: 91 additions & 4 deletions

File tree

Lib/test/test_json/test_speedups.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from test.test_json import CTest
2+
from test.support import gc_collect
23

34

45
class BadBool:
@@ -80,3 +81,63 @@ def test(name):
8081
def test_unsortable_keys(self):
8182
with self.assertRaises(TypeError):
8283
self.json.encoder.JSONEncoder(sort_keys=True).encode({'a': 1, 1: 'a'})
84+
85+
def test_mutate_dict_items_during_encode(self):
86+
# gh-142831: Clearing the items list via a re-entrant key encoder
87+
# must not cause a use-after-free. BadDict.items() returns a
88+
# mutable list; encode_str clears it while iterating.
89+
items = None
90+
91+
class BadDict(dict):
92+
def items(self):
93+
nonlocal items
94+
items = [("boom", object())]
95+
return items
96+
97+
cleared = False
98+
def encode_str(obj):
99+
nonlocal items, cleared
100+
if items is not None:
101+
items.clear()
102+
items = None
103+
cleared = True
104+
gc_collect()
105+
return '"x"'
106+
107+
encoder = self.json.encoder.c_make_encoder(
108+
None, lambda o: "null",
109+
encode_str, None,
110+
": ", ", ", False,
111+
False, True
112+
)
113+
114+
# Must not crash (use-after-free under ASan before fix)
115+
encoder(BadDict(real=1), 0)
116+
self.assertTrue(cleared)
117+
118+
def test_mutate_list_during_encode(self):
119+
# gh-142831: Clearing a list mid-iteration via the default
120+
# callback must not cause a use-after-free.
121+
call_count = 0
122+
lst = [object() for _ in range(10)]
123+
124+
def default(obj):
125+
nonlocal call_count
126+
call_count += 1
127+
if call_count == 3:
128+
lst.clear()
129+
gc_collect()
130+
return None
131+
132+
encoder = self.json.encoder.c_make_encoder(
133+
None, default,
134+
self.json.encoder.c_encode_basestring, None,
135+
": ", ", ", False,
136+
False, True
137+
)
138+
139+
# Must not crash (use-after-free under ASan before fix)
140+
encoder(lst, 0)
141+
# Verify the mutation path was actually hit and the loop
142+
# stopped iterating after the list was cleared.
143+
self.assertEqual(call_count, 3)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix a crash in the :mod:`json` module where a use-after-free could occur if
2+
the object being encoded is modified during serialization.

Modules/_json.c

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1602,28 +1602,44 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
16021602

16031603
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) {
16041604
PyObject *item = PyList_GET_ITEM(items, i);
1605+
// gh-142831: encoder_encode_key_value() can invoke user code
1606+
// that mutates the items list, invalidating this borrowed ref.
1607+
Py_INCREF(item);
16051608

16061609
if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
16071610
PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1611+
Py_DECREF(item);
16081612
goto bail;
16091613
}
16101614

16111615
key = PyTuple_GET_ITEM(item, 0);
16121616
value = PyTuple_GET_ITEM(item, 1);
16131617
if (encoder_encode_key_value(s, writer, &first, key, value,
16141618
new_newline_indent,
1615-
current_item_separator) < 0)
1619+
current_item_separator) < 0) {
1620+
Py_DECREF(item);
16161621
goto bail;
1622+
}
1623+
Py_DECREF(item);
16171624
}
16181625
Py_CLEAR(items);
16191626

16201627
} else {
16211628
Py_ssize_t pos = 0;
16221629
while (PyDict_Next(dct, &pos, &key, &value)) {
1630+
// gh-142831: encoder_encode_key_value() can invoke user code
1631+
// that mutates the dict, invalidating these borrowed refs.
1632+
Py_INCREF(key);
1633+
Py_INCREF(value);
16231634
if (encoder_encode_key_value(s, writer, &first, key, value,
16241635
new_newline_indent,
1625-
current_item_separator) < 0)
1636+
current_item_separator) < 0) {
1637+
Py_DECREF(key);
1638+
Py_DECREF(value);
16261639
goto bail;
1640+
}
1641+
Py_DECREF(key);
1642+
Py_DECREF(value);
16271643
}
16281644
}
16291645

@@ -1712,12 +1728,20 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
17121728
}
17131729
for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
17141730
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
1731+
// gh-142831: encoder_listencode_obj() can invoke user code
1732+
// that mutates the sequence, invalidating this borrowed ref.
1733+
Py_INCREF(obj);
17151734
if (i) {
1716-
if (_PyUnicodeWriter_WriteStr(writer, separator) < 0)
1735+
if (_PyUnicodeWriter_WriteStr(writer, separator) < 0) {
1736+
Py_DECREF(obj);
17171737
goto bail;
1738+
}
17181739
}
1719-
if (encoder_listencode_obj(s, writer, obj, new_newline_indent))
1740+
if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) {
1741+
Py_DECREF(obj);
17201742
goto bail;
1743+
}
1744+
Py_DECREF(obj);
17211745
}
17221746
if (ident != NULL) {
17231747
if (PyDict_DelItem(s->markers, ident))

0 commit comments

Comments
 (0)