diff --git a/Include/internal/pycore_tuple.h b/Include/internal/pycore_tuple.h index 4fa7a12206bcb2..42b43579e827b6 100644 --- a/Include/internal/pycore_tuple.h +++ b/Include/internal/pycore_tuple.h @@ -11,6 +11,11 @@ extern "C" { extern void _PyTuple_MaybeUntrack(PyObject *); extern void _PyTuple_DebugMallocStats(FILE *out); +// Similar to PyTuple_New() and _PyTuple_Resize(), but don't track the tuple +// by the GC. +extern PyObject* _PyTuple_NewNoTrack(Py_ssize_t size); +extern int _PyTuple_ResizeNoTrack(PyObject **pv, Py_ssize_t newsize); + /* runtime lifecycle */ extern PyStatus _PyTuple_InitGlobalObjects(PyInterpreterState *); diff --git a/Lib/test/test_tuple.py b/Lib/test/test_tuple.py index 9ce80c5e8ea009..657bb9c74aea13 100644 --- a/Lib/test/test_tuple.py +++ b/Lib/test/test_tuple.py @@ -416,6 +416,29 @@ def test_lexicographic_ordering(self): self.assertLess(a, b) self.assertLess(b, c) + def test_sequence_tuple(self): + # gh-39117, gh-59313, gh-107137: PySequence_Tuple() must not track + # the tuple by the GC before it's fully initialized. + + # TAG object used to find the tuple in objects tracked by the GC + TAG = object() + + def my_iter(): + yield TAG + + for ref in gc.get_referrers(TAG): + if isinstance(ref, tuple): + raise Exception("PySequence_Tuple() leaks the tuple") + + # Add 100 items to trigger _PyTuple_Resize() calls. + # PySequence_Tuple() started by allocating a tuple of 10 items. + for num in range(100): + yield num + + seq = tuple(my_iter()) + self.assertEqual(seq, (TAG, *range(100))) + + # Notes on testing hash codes. The primary thing is that Python doesn't # care about "random" hash codes. To the contrary, we like them to be # very regular when possible, so that the low-order bits are as evenly diff --git a/Misc/NEWS.d/next/C API/2023-07-24-18-51-27.gh-issue-107137.zk8it0.rst b/Misc/NEWS.d/next/C API/2023-07-24-18-51-27.gh-issue-107137.zk8it0.rst new file mode 100644 index 00000000000000..c4381ae090c1fc --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-07-24-18-51-27.gh-issue-107137.zk8it0.rst @@ -0,0 +1,4 @@ +Fix a crash in :c:func:`PySequence_Tuple` when the tuple which is being +initialized was accessed via GC introspection, like the +:func:`gc.get_referrers` function. Now the function only tracks the tuple by +the GC once it's fully initialized. Patch by Victor Stinner. diff --git a/Objects/abstract.c b/Objects/abstract.c index b4edcec6007710..a24e0552b7cee7 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -8,6 +8,7 @@ #include "pycore_long.h" // _Py_IsNegative #include "pycore_pyerrors.h" // _PyErr_Occurred() #include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_tuple.h" // _PyTuple_NewNoTrack() #include "pycore_unionobject.h" // _PyUnion_Check() #include #include // offsetof() @@ -2074,11 +2075,6 @@ PySequence_DelSlice(PyObject *s, Py_ssize_t i1, Py_ssize_t i2) PyObject * PySequence_Tuple(PyObject *v) { - PyObject *it; /* iter(v) */ - Py_ssize_t n; /* guess for result tuple size */ - PyObject *result = NULL; - Py_ssize_t j; - if (v == NULL) { return null_error(); } @@ -2091,28 +2087,36 @@ PySequence_Tuple(PyObject *v) a copy, so there's no need for exactness below. */ return Py_NewRef(v); } - if (PyList_CheckExact(v)) + if (PyList_CheckExact(v)) { return PyList_AsTuple(v); + } /* Get iterator. */ - it = PyObject_GetIter(v); - if (it == NULL) + PyObject *it = PyObject_GetIter(v); // iter(v) + if (it == NULL) { return NULL; + } /* Guess result size and allocate space. */ - n = PyObject_LengthHint(v, 10); - if (n == -1) - goto Fail; - result = PyTuple_New(n); - if (result == NULL) + Py_ssize_t n = PyObject_LengthHint(v, 10); // guess for result tuple size + if (n == -1) { + Py_DECREF(it); + return NULL; + } + + PyObject *result = _PyTuple_NewNoTrack(n); + if (result == NULL) { goto Fail; + } /* Fill the tuple. */ - for (j = 0; ; ++j) { + Py_ssize_t j = 0; + for (; ; ++j) { PyObject *item = PyIter_Next(it); if (item == NULL) { - if (PyErr_Occurred()) + if (PyErr_Occurred()) { goto Fail; + } break; } if (j >= n) { @@ -2132,7 +2136,7 @@ PySequence_Tuple(PyObject *v) goto Fail; } n = (Py_ssize_t)newn; - if (_PyTuple_Resize(&result, n) != 0) { + if (_PyTuple_ResizeNoTrack(&result, n) != 0) { Py_DECREF(item); goto Fail; } @@ -2141,11 +2145,15 @@ PySequence_Tuple(PyObject *v) } /* Cut tuple back if guess was too large. */ - if (j < n && - _PyTuple_Resize(&result, j) != 0) + if (j < n && _PyTuple_ResizeNoTrack(&result, j) != 0) { goto Fail; - + } Py_DECREF(it); + + // No need to track the empty tuple singleton + if (j != 0) { + PyObject_GC_Track(result); + } return result; Fail: diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index c3ff40fdb14c60..7b3b8dd6aa32ea 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -65,24 +65,38 @@ tuple_get_empty(void) return Py_NewRef(&_Py_SINGLETON(tuple_empty)); } -PyObject * -PyTuple_New(Py_ssize_t size) + +static inline PyObject * +tuple_new_capi(Py_ssize_t size, int track) { - PyTupleObject *op; if (size == 0) { return tuple_get_empty(); } - op = tuple_alloc(size); + PyTupleObject *op = tuple_alloc(size); if (op == NULL) { return NULL; } for (Py_ssize_t i = 0; i < size; i++) { op->ob_item[i] = NULL; } - _PyObject_GC_TRACK(op); + if (track) { + _PyObject_GC_TRACK(op); + } return (PyObject *) op; } +PyObject * +_PyTuple_NewNoTrack(Py_ssize_t size) +{ + return tuple_new_capi(size, 0); +} + +PyObject * +PyTuple_New(Py_ssize_t size) +{ + return tuple_new_capi(size, 1); +} + Py_ssize_t PyTuple_Size(PyObject *op) { @@ -894,8 +908,8 @@ PyTypeObject PyTuple_Type = { efficiently. In any case, don't use this if the tuple may already be known to some other part of the code. */ -int -_PyTuple_Resize(PyObject **pv, Py_ssize_t newsize) +static int +tuple_resize(PyObject **pv, Py_ssize_t newsize, int track) { PyTupleObject *v; PyTupleObject *sv; @@ -927,7 +941,12 @@ _PyTuple_Resize(PyObject **pv, Py_ssize_t newsize) /* The empty tuple is statically allocated so we never resize it in-place. */ Py_DECREF(v); - *pv = PyTuple_New(newsize); + if (track) { + *pv = PyTuple_New(newsize); + } + else { + *pv = _PyTuple_NewNoTrack(newsize); + } return *pv == NULL ? -1 : 0; } @@ -952,14 +971,29 @@ _PyTuple_Resize(PyObject **pv, Py_ssize_t newsize) } _Py_NewReferenceNoTotal((PyObject *) sv); /* Zero out items added by growing */ - if (newsize > oldsize) + if (newsize > oldsize) { memset(&sv->ob_item[oldsize], 0, sizeof(*sv->ob_item) * (newsize - oldsize)); + } *pv = (PyObject *) sv; - _PyObject_GC_TRACK(sv); + if (track) { + _PyObject_GC_TRACK(*pv); + } return 0; } +int +_PyTuple_ResizeNoTrack(PyObject **pv, Py_ssize_t newsize) +{ + return tuple_resize(pv, newsize, 0); +} + +int +_PyTuple_Resize(PyObject **pv, Py_ssize_t newsize) +{ + return tuple_resize(pv, newsize, 1); +} + static void maybe_freelist_clear(PyInterpreterState *, int);