From 206571bf0b0d846e578c79bf1dac965c5ad0bd57 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 9 Apr 2024 18:53:23 +0000 Subject: [PATCH 1/4] gh-117376: Partial implementation deferred reference counting This marks objects as using deferred refrence counting using the `ob_gc_bits` field in the free-threaded build and collects those objects during GC. --- Include/internal/pycore_gc.h | 5 +++-- Include/internal/pycore_object.h | 15 +++++++++++++++ Lib/test/test_code.py | 2 ++ Objects/descrobject.c | 1 + Objects/funcobject.c | 9 +++++++++ Objects/moduleobject.c | 26 ++++++++++++++++++-------- Objects/object.c | 13 +++++++++++++ Objects/typeobject.c | 2 ++ Python/gc_free_threading.c | 29 ++++++++++++++++++++++++++++- 9 files changed, 91 insertions(+), 11 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index c4482c4ffcfa60c..60020b5c01f8a6c 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -39,12 +39,13 @@ static inline PyObject* _Py_FROM_GC(PyGC_Head *gc) { /* Bit flags for ob_gc_bits (in Py_GIL_DISABLED builds) */ #ifdef Py_GIL_DISABLED -# define _PyGC_BITS_TRACKED (1) -# define _PyGC_BITS_FINALIZED (2) +# define _PyGC_BITS_TRACKED (1) // Tracked by the GC +# define _PyGC_BITS_FINALIZED (2) // tp_finalize was called # define _PyGC_BITS_UNREACHABLE (4) # define _PyGC_BITS_FROZEN (8) # define _PyGC_BITS_SHARED (16) # define _PyGC_BITS_SHARED_INLINE (32) +# define _PyGC_BITS_DEFERRED (64) // Use deferred reference counting #endif /* True if the object is currently tracked by the GC. */ diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 9aa2e5bf918a7b9..7b1c919e627dd4f 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -158,6 +158,21 @@ static inline void _Py_ClearImmortal(PyObject *op) op = NULL; \ } while (0) +// Mark an object as supporting deferred reference counting. This is a no-op +// in the default (with GIL) build. Objects that use deferred reference +// counting should be tracked by the GC so that they are eventually collected. +extern void _PyObject_SetDeferredRefcount(PyObject *op); + +static inline int +_PyObject_HasDeferredRefcount(PyObject *op) +{ +#ifdef Py_GIL_DISABLED + return (op->ob_gc_bits & _PyGC_BITS_DEFERRED) != 0; +#else + return 0; +#endif +} + #if !defined(Py_GIL_DISABLED) static inline void _Py_DECREF_SPECIALIZED(PyObject *op, const destructor destruct) diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index ecd1e82a6dbef9e..5c0fdc8edc31b66 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -834,6 +834,7 @@ def test_free_called(self): SetExtra(f.__code__, FREE_INDEX, ctypes.c_voidp(100)) del f + gc_collect() # For free-threaded build self.assertEqual(LAST_FREED, 100) def test_get_set(self): @@ -872,6 +873,7 @@ def run(self): del f tt.start() tt.join() + gc_collect() # For free-threaded build self.assertEqual(LAST_FREED, 500) diff --git a/Objects/descrobject.c b/Objects/descrobject.c index 3423f152ce862dd..ec269cabceb0877 100644 --- a/Objects/descrobject.c +++ b/Objects/descrobject.c @@ -909,6 +909,7 @@ descr_new(PyTypeObject *descrtype, PyTypeObject *type, const char *name) descr = (PyDescrObject *)PyType_GenericAlloc(descrtype, 0); if (descr != NULL) { + _PyObject_SetDeferredRefcount((PyObject *)descr); descr->d_type = (PyTypeObject*)Py_XNewRef(type); descr->d_name = PyUnicode_InternFromString(name); if (descr->d_name == NULL) { diff --git a/Objects/funcobject.c b/Objects/funcobject.c index a3c0800e7891d38..276b3db29703711 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -127,6 +127,9 @@ _PyFunction_FromConstructor(PyFrameConstructor *constr) op->func_typeparams = NULL; op->vectorcall = _PyFunction_Vectorcall; op->func_version = 0; + // NOTE: functions created via FrameConstructor do not use deferred + // reference counting because they are typically not part of cycles + // nor accessed by multiple threads. _PyObject_GC_TRACK(op); handle_func_event(PyFunction_EVENT_CREATE, op, NULL); return op; @@ -202,6 +205,12 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname op->func_typeparams = NULL; op->vectorcall = _PyFunction_Vectorcall; op->func_version = 0; + if ((code_obj->co_flags & CO_NESTED) == 0) { + // Use deferred reference counting for top-level functions, but not + // nested functions because they are more likely to capture variables, + // which makes prompt deallocation more important. + _PyObject_SetDeferredRefcount((PyObject *)op); + } _PyObject_GC_TRACK(op); handle_func_event(PyFunction_EVENT_CREATE, op, NULL); return (PyObject *)op; diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index 9cd98fb4345fdd6..1829cd2c6b4abb1 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -88,21 +88,31 @@ new_module_notrack(PyTypeObject *mt) m->md_weaklist = NULL; m->md_name = NULL; m->md_dict = PyDict_New(); - if (m->md_dict != NULL) { - return m; + if (m->md_dict == NULL) { + Py_DECREF(m); + return NULL; } - Py_DECREF(m); - return NULL; + return m; +} + +static void +track_module(PyModuleObject *m) +{ + _PyObject_SetDeferredRefcount(m->md_dict); + PyObject_GC_Track(m->md_dict); + + _PyObject_SetDeferredRefcount((PyObject *)m); + PyObject_GC_Track(m); } static PyObject * new_module(PyTypeObject *mt, PyObject *args, PyObject *kws) { - PyObject *m = (PyObject *)new_module_notrack(mt); + PyModuleObject *m = new_module_notrack(mt); if (m != NULL) { - PyObject_GC_Track(m); + track_module(m); } - return m; + return (PyObject *)m; } PyObject * @@ -113,7 +123,7 @@ PyModule_NewObject(PyObject *name) return NULL; if (module_init_dict(m, m->md_dict, name, NULL) != 0) goto fail; - PyObject_GC_Track(m); + track_module(m); return (PyObject *)m; fail: diff --git a/Objects/object.c b/Objects/object.c index c8e6f8fc1a2b40e..18215139ba87552 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2414,6 +2414,19 @@ _Py_SetImmortal(PyObject *op) _Py_SetImmortalUntracked(op); } +void +_PyObject_SetDeferredRefcount(PyObject *op) +{ +#ifdef Py_GIL_DISABLED + assert(PyType_IS_GC(Py_TYPE(op))); + assert(_Py_IsOwnedByCurrentThread(op)); + assert( op->ob_ref_shared == 0); + op->ob_gc_bits |= _PyGC_BITS_DEFERRED; + op->ob_ref_local += 1; + op->ob_ref_shared = _Py_REF_QUEUED; +#endif +} + void _Py_ResurrectReference(PyObject *op) { diff --git a/Objects/typeobject.c b/Objects/typeobject.c index e9f2d2577e9fabf..e8a1b4d5598b5a4 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3581,6 +3581,8 @@ type_new_alloc(type_new_ctx *ctx) et->ht_module = NULL; et->_ht_tpname = NULL; + _PyObject_SetDeferredRefcount((PyObject *)et); + return type; } diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 111632ffb776410..e3793a6e52687bf 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -159,6 +159,25 @@ gc_decref(PyObject *op) op->ob_tid -= 1; } +static inline Py_ssize_t +gc_refcount(PyObject *op) +{ + Py_ssize_t refcount = Py_REFCNT(op); + if (_PyObject_HasDeferredRefcount(op)) { + refcount -= 1; + } + return refcount; +} + +static void +disable_deferred_refcounting(PyObject *op) +{ + if (_PyObject_HasDeferredRefcount(op)) { + op->ob_gc_bits &= ~_PyGC_BITS_DEFERRED; + op->ob_ref_shared -= (1 << _Py_REF_SHARED_SHIFT); + } +} + static Py_ssize_t merge_refcount(PyObject *op, Py_ssize_t extra) { @@ -375,9 +394,10 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area, } Py_ssize_t refcount = Py_REFCNT(op); + refcount -= _PyObject_HasDeferredRefcount(op); _PyObject_ASSERT(op, refcount >= 0); - if (refcount > 0) { + if (refcount > 0 && !_PyObject_HasDeferredRefcount(op)) { // Untrack tuples and dicts as necessary in this pass, but not objects // with zero refcount, which we will want to collect. if (PyTuple_CheckExact(op)) { @@ -466,6 +486,9 @@ mark_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area, return true; } + _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(op) >= 0, + "refcount is too small"); + if (gc_is_unreachable(op) && gc_get_refs(op) != 0) { // Object is reachable but currently marked as unreachable. // Mark it as reachable and traverse its pointers to find @@ -499,6 +522,10 @@ scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area, struct collection_state *state = (struct collection_state *)args; if (gc_is_unreachable(op)) { + // Disable deferred refcounting for unreachable objects so that they + // are collected immediately after finalization. + disable_deferred_refcounting(op); + // Merge and add one to the refcount to prevent deallocation while we // are holding on to it in a worklist. merge_refcount(op, 1); From 30fc873f67b15573098c5eac2072979b849f9ee0 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 10 Apr 2024 17:11:24 -0400 Subject: [PATCH 2/4] Remove unused function --- Python/gc_free_threading.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index e3793a6e52687bf..9cf0e989d0993f5 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -159,16 +159,6 @@ gc_decref(PyObject *op) op->ob_tid -= 1; } -static inline Py_ssize_t -gc_refcount(PyObject *op) -{ - Py_ssize_t refcount = Py_REFCNT(op); - if (_PyObject_HasDeferredRefcount(op)) { - refcount -= 1; - } - return refcount; -} - static void disable_deferred_refcounting(PyObject *op) { From f8add0e925bc728ff1ca4cf043674075f390b133 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 11 Apr 2024 19:34:27 +0000 Subject: [PATCH 3/4] Simplify module___init___impl --- Objects/moduleobject.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index 1829cd2c6b4abb1..da6a276c41be1fe 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -715,16 +715,7 @@ static int module___init___impl(PyModuleObject *self, PyObject *name, PyObject *doc) /*[clinic end generated code: output=e7e721c26ce7aad7 input=57f9e177401e5e1e]*/ { - PyObject *dict = self->md_dict; - if (dict == NULL) { - dict = PyDict_New(); - if (dict == NULL) - return -1; - self->md_dict = dict; - } - if (module_init_dict(self, dict, name, doc) < 0) - return -1; - return 0; + return module_init_dict(self, self->md_dict, name, doc); } static void From 21ca2bcaf3cd5059b0947ae905a05819e5d50e43 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 12 Apr 2024 17:06:38 +0000 Subject: [PATCH 4/4] WIP --- Include/internal/pycore_gc.h | 9 +++++++++ Objects/object.c | 8 +++++++- Python/gc_free_threading.c | 24 ++++++++++++++++++++++++ Python/pystate.c | 7 +++++++ 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 60020b5c01f8a6c..c8252c40332cf28 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -312,6 +312,10 @@ struct _gc_runtime_state { collections, and are awaiting to undergo a full collection for the first time. */ Py_ssize_t long_lived_pending; + + /* True to use immortalization in places where we would normally use + deferred reference counting. */ + int immortalize_deferred; #endif }; @@ -343,6 +347,11 @@ extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp); extern void _Py_ScheduleGC(PyThreadState *tstate); extern void _Py_RunGC(PyThreadState *tstate); +#ifdef Py_GIL_DISABLED +// gh-117783: Immortalize objects that use deferred reference counting +extern void _PyGC_ImmortalizeDeferredObjects(PyInterpreterState *interp); +#endif + #ifdef __cplusplus } #endif diff --git a/Objects/object.c b/Objects/object.c index 4fcc04ea4c20313..943414d29094ccf 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2430,7 +2430,13 @@ _PyObject_SetDeferredRefcount(PyObject *op) #ifdef Py_GIL_DISABLED assert(PyType_IS_GC(Py_TYPE(op))); assert(_Py_IsOwnedByCurrentThread(op)); - assert( op->ob_ref_shared == 0); + assert(op->ob_ref_shared == 0); + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (interp->gc.immortalize_deferred) { + // gh-117696: + _Py_SetImmortal(op); + return; + } op->ob_gc_bits |= _PyGC_BITS_DEFERRED; op->ob_ref_local += 1; op->ob_ref_shared = _Py_REF_QUEUED; diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 9cf0e989d0993f5..8e99947732d68aa 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1781,6 +1781,30 @@ custom_visitor_wrapper(const mi_heap_t *heap, const mi_heap_area_t *area, return true; } +// gh-117783: Immortalize objects that use deferred reference counting to +// temporarily work around scaling bottlenecks. +static bool +immortalize_visitor(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, false); + if (op != NULL && _PyObject_HasDeferredRefcount(op)) { + _Py_SetImmortal(op); + op->ob_gc_bits &= ~_PyGC_BITS_DEFERRED; + } + return true; +} + +void +_PyGC_ImmortalizeDeferredObjects(PyInterpreterState *interp) +{ + struct visitor_args args; + _PyEval_StopTheWorld(interp); + gc_visit_heaps(interp, &immortalize_visitor, &args); + interp->gc.immortalize_deferred = 1; + _PyEval_StartTheWorld(interp); +} + void PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg) { diff --git a/Python/pystate.c b/Python/pystate.c index acec905484c21f6..d29b345c526b0d3 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1565,6 +1565,13 @@ new_threadstate(PyInterpreterState *interp, int whence) // Must be called with lock unlocked to avoid re-entrancy deadlock. PyMem_RawFree(new_tstate); } + else { +#ifdef Py_GIL_DISABLED + if (!interp->gc.immortalize_deferred) { + _PyGC_ImmortalizeDeferredObjects(interp); + } +#endif + } #ifdef Py_GIL_DISABLED // Must be called with lock unlocked to avoid lock ordering deadlocks.