Skip to content

Commit

Permalink
pythongh-107137: Add _PyTuple_NewNoTrack() internal C API
Browse files Browse the repository at this point in the history
Fix a crash in PySequence_Tuple() when the tuple which is being
initialized was accessed via GC introspection, like the
gc.get_referrers() function. Now the function only tracks the tuple
by the GC once it's fully initialized.

Add _PyTuple_NewNoTrack() and _PyTuple_ResizeNoTrack() functions to
the internal C API: similar to PyTuple_New() and _PyTuple_Resize(),
but don't track the tuple by the GC.

Modify PySequence_Tuple() to use these functions.
  • Loading branch information
vstinner committed Jul 24, 2023
1 parent 0a9b339 commit 9c9f4a7
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 29 deletions.
5 changes: 5 additions & 0 deletions Include/internal/pycore_tuple.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ extern "C" {
extern void _PyTuple_MaybeUntrack(PyObject *);
extern void _PyTuple_DebugMallocStats(FILE *out);

// Similar to PyTuple_New() and _PyTuple_Resize(), but don't track the tuple
// by the GC.
extern PyObject* _PyTuple_NewNoTrack(Py_ssize_t size);
extern int _PyTuple_ResizeNoTrack(PyObject **pv, Py_ssize_t newsize);

/* runtime lifecycle */

extern PyStatus _PyTuple_InitGlobalObjects(PyInterpreterState *);
Expand Down
23 changes: 23 additions & 0 deletions Lib/test/test_tuple.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,29 @@ def test_lexicographic_ordering(self):
self.assertLess(a, b)
self.assertLess(b, c)

def test_sequence_tuple(self):
# gh-39117, gh-59313, gh-107137: PySequence_Tuple() must not track
# the tuple by the GC before it's fully initialized.

# TAG object used to find the tuple in objects tracked by the GC
TAG = object()

def my_iter():
yield TAG

for ref in gc.get_referrers(TAG):
if isinstance(ref, tuple):
raise Exception("PySequence_Tuple() leaks the tuple")

# Add 100 items to trigger _PyTuple_Resize() calls.
# PySequence_Tuple() started by allocating a tuple of 10 items.
for num in range(100):
yield num

seq = tuple(my_iter())
self.assertEqual(seq, (TAG, *range(100)))


# Notes on testing hash codes. The primary thing is that Python doesn't
# care about "random" hash codes. To the contrary, we like them to be
# very regular when possible, so that the low-order bits are as evenly
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Fix a crash in :c:func:`PySequence_Tuple` when the tuple which is being
initialized was accessed via GC introspection, like the
:func:`gc.get_referrers` function. Now the function only tracks the tuple by
the GC once it's fully initialized. Patch by Victor Stinner.
46 changes: 27 additions & 19 deletions Objects/abstract.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "pycore_long.h" // _Py_IsNegative
#include "pycore_pyerrors.h" // _PyErr_Occurred()
#include "pycore_pystate.h" // _PyThreadState_GET()
#include "pycore_tuple.h" // _PyTuple_NewNoTrack()
#include "pycore_unionobject.h" // _PyUnion_Check()
#include <ctype.h>
#include <stddef.h> // offsetof()
Expand Down Expand Up @@ -2074,11 +2075,6 @@ PySequence_DelSlice(PyObject *s, Py_ssize_t i1, Py_ssize_t i2)
PyObject *
PySequence_Tuple(PyObject *v)
{
PyObject *it; /* iter(v) */
Py_ssize_t n; /* guess for result tuple size */
PyObject *result = NULL;
Py_ssize_t j;

if (v == NULL) {
return null_error();
}
Expand All @@ -2091,28 +2087,36 @@ PySequence_Tuple(PyObject *v)
a copy, so there's no need for exactness below. */
return Py_NewRef(v);
}
if (PyList_CheckExact(v))
if (PyList_CheckExact(v)) {
return PyList_AsTuple(v);
}

/* Get iterator. */
it = PyObject_GetIter(v);
if (it == NULL)
PyObject *it = PyObject_GetIter(v); // iter(v)
if (it == NULL) {
return NULL;
}

/* Guess result size and allocate space. */
n = PyObject_LengthHint(v, 10);
if (n == -1)
goto Fail;
result = PyTuple_New(n);
if (result == NULL)
Py_ssize_t n = PyObject_LengthHint(v, 10); // guess for result tuple size
if (n == -1) {
Py_DECREF(it);
return NULL;
}

PyObject *result = _PyTuple_NewNoTrack(n);
if (result == NULL) {
goto Fail;
}

/* Fill the tuple. */
for (j = 0; ; ++j) {
Py_ssize_t j = 0;
for (; ; ++j) {
PyObject *item = PyIter_Next(it);
if (item == NULL) {
if (PyErr_Occurred())
if (PyErr_Occurred()) {
goto Fail;
}
break;
}
if (j >= n) {
Expand All @@ -2132,7 +2136,7 @@ PySequence_Tuple(PyObject *v)
goto Fail;
}
n = (Py_ssize_t)newn;
if (_PyTuple_Resize(&result, n) != 0) {
if (_PyTuple_ResizeNoTrack(&result, n) != 0) {
Py_DECREF(item);
goto Fail;
}
Expand All @@ -2141,11 +2145,15 @@ PySequence_Tuple(PyObject *v)
}

/* Cut tuple back if guess was too large. */
if (j < n &&
_PyTuple_Resize(&result, j) != 0)
if (j < n && _PyTuple_ResizeNoTrack(&result, j) != 0) {
goto Fail;

}
Py_DECREF(it);

// No need to track the empty tuple singleton
if (j != 0) {
PyObject_GC_Track(result);
}
return result;

Fail:
Expand Down
54 changes: 44 additions & 10 deletions Objects/tupleobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,24 +65,38 @@ tuple_get_empty(void)
return Py_NewRef(&_Py_SINGLETON(tuple_empty));
}

PyObject *
PyTuple_New(Py_ssize_t size)

static inline PyObject *
tuple_new_capi(Py_ssize_t size, int track)
{
PyTupleObject *op;
if (size == 0) {
return tuple_get_empty();
}
op = tuple_alloc(size);
PyTupleObject *op = tuple_alloc(size);
if (op == NULL) {
return NULL;
}
for (Py_ssize_t i = 0; i < size; i++) {
op->ob_item[i] = NULL;
}
_PyObject_GC_TRACK(op);
if (track) {
_PyObject_GC_TRACK(op);
}
return (PyObject *) op;
}

PyObject *
_PyTuple_NewNoTrack(Py_ssize_t size)
{
return tuple_new_capi(size, 0);
}

PyObject *
PyTuple_New(Py_ssize_t size)
{
return tuple_new_capi(size, 1);
}

Py_ssize_t
PyTuple_Size(PyObject *op)
{
Expand Down Expand Up @@ -894,8 +908,8 @@ PyTypeObject PyTuple_Type = {
efficiently. In any case, don't use this if the tuple may already be
known to some other part of the code. */

int
_PyTuple_Resize(PyObject **pv, Py_ssize_t newsize)
static int
tuple_resize(PyObject **pv, Py_ssize_t newsize, int track)
{
PyTupleObject *v;
PyTupleObject *sv;
Expand Down Expand Up @@ -927,7 +941,12 @@ _PyTuple_Resize(PyObject **pv, Py_ssize_t newsize)
/* The empty tuple is statically allocated so we never
resize it in-place. */
Py_DECREF(v);
*pv = PyTuple_New(newsize);
if (track) {
*pv = PyTuple_New(newsize);
}
else {
*pv = _PyTuple_NewNoTrack(newsize);
}
return *pv == NULL ? -1 : 0;
}

Expand All @@ -952,14 +971,29 @@ _PyTuple_Resize(PyObject **pv, Py_ssize_t newsize)
}
_Py_NewReferenceNoTotal((PyObject *) sv);
/* Zero out items added by growing */
if (newsize > oldsize)
if (newsize > oldsize) {
memset(&sv->ob_item[oldsize], 0,
sizeof(*sv->ob_item) * (newsize - oldsize));
}
*pv = (PyObject *) sv;
_PyObject_GC_TRACK(sv);
if (track) {
_PyObject_GC_TRACK(*pv);
}
return 0;
}

int
_PyTuple_ResizeNoTrack(PyObject **pv, Py_ssize_t newsize)
{
return tuple_resize(pv, newsize, 0);
}

int
_PyTuple_Resize(PyObject **pv, Py_ssize_t newsize)
{
return tuple_resize(pv, newsize, 1);
}


static void maybe_freelist_clear(PyInterpreterState *, int);

Expand Down

0 comments on commit 9c9f4a7

Please sign in to comment.