From 48c70b8f7dfd00a018abbac50ea987f54fa4db51 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Thu, 19 Dec 2024 11:08:17 +0900 Subject: [PATCH] gh-115999: Enable BINARY_SUBSCR_GETITEM for free-threaded build (gh-127737) --- Include/internal/pycore_opcode_metadata.h | 4 +-- Include/internal/pycore_typeobject.h | 1 + Include/internal/pycore_uop_metadata.h | 2 +- Lib/test/test_opcache.py | 19 +++++++++++- Objects/typeobject.c | 25 ++++++++++++++++ Programs/test_frozenmain.h | 2 +- Python/bytecodes.c | 22 +++++++------- Python/executor_cases.c.h | 32 ++++++++++++--------- Python/generated_cases.c.h | 19 ++++++------ Python/optimizer_bytecodes.c | 3 +- Python/optimizer_cases.c.h | 16 ++++++++--- Python/specialize.c | 35 ++++++++++++----------- 12 files changed, 118 insertions(+), 62 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 28aa1120414337..d2ae8928a8fe8f 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -994,7 +994,7 @@ int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect) { return 0; } case BINARY_SUBSCR: { - *effect = 0; + *effect = 1; return 0; } case BINARY_SUBSCR_DICT: { @@ -1002,7 +1002,7 @@ int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect) { return 0; } case BINARY_SUBSCR_GETITEM: { - *effect = 0; + *effect = 1; return 0; } case BINARY_SUBSCR_LIST_INT: { diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 7b39d07f976ee3..581153344a8e05 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -278,6 +278,7 @@ typedef int (*_py_validate_type)(PyTypeObject *); // and if the validation is passed, it will set the ``tp_version`` as valid // tp_version_tag from the ``ty``. extern int _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version); +extern int _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor, uint32_t tp_version); #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index dd775d3f7d3cdd..eadfda472a7270 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -722,7 +722,7 @@ int _PyUop_num_popped(int opcode, int oparg) case _BINARY_SUBSCR_CHECK_FUNC: return 0; case _BINARY_SUBSCR_INIT_CALL: - return 2; + return 3; case _LIST_APPEND: return 1; case _SET_ADD: diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 0a7557adc4763b..94709e2022550a 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -1069,7 +1069,7 @@ def write(items): opname = "STORE_SUBSCR_LIST_INT" self.assert_races_do_not_crash(opname, get_items, read, write) - @requires_specialization + @requires_specialization_ft def test_unpack_sequence_list(self): def get_items(): items = [] @@ -1245,6 +1245,14 @@ def f(o, n): f(test_obj, 1) self.assertEqual(test_obj.b, 0) +# gh-127274: BINARY_SUBSCR_GETITEM will only cache __getitem__ methods that +# are deferred. We only defer functions defined at the top-level. +class CGetItem: + def __init__(self, val): + self.val = val + def __getitem__(self, item): + return self.val + class TestSpecializer(TestBase): @@ -1520,6 +1528,15 @@ def binary_subscr_str_int(): self.assert_specialized(binary_subscr_str_int, "BINARY_SUBSCR_STR_INT") self.assert_no_opcode(binary_subscr_str_int, "BINARY_SUBSCR") + def binary_subscr_getitems(): + items = [CGetItem(i) for i in range(100)] + for i in range(100): + self.assertEqual(items[i][i], i) + + binary_subscr_getitems() + self.assert_specialized(binary_subscr_getitems, "BINARY_SUBSCR_GETITEM") + self.assert_no_opcode(binary_subscr_getitems, "BINARY_SUBSCR") + if __name__ == "__main__": unittest.main() diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 2068d6aa9be52b..7f95b519561e68 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5679,6 +5679,31 @@ _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init, return can_cache; } +int +_PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor, uint32_t tp_version) +{ + if (!descriptor || !tp_version) { + return 0; + } + int can_cache; + BEGIN_TYPE_LOCK(); + can_cache = ((PyTypeObject*)ht)->tp_version_tag == tp_version; + // This pointer is invalidated by PyType_Modified (see the comment on + // struct _specialization_cache): + PyFunctionObject *func = (PyFunctionObject *)descriptor; + uint32_t version = _PyFunction_GetVersionForCurrentState(func); + can_cache = can_cache && _PyFunction_IsVersionValid(version); +#ifdef Py_GIL_DISABLED + can_cache = can_cache && _PyObject_HasDeferredRefcount(descriptor); +#endif + if (can_cache) { + FT_ATOMIC_STORE_PTR_RELEASE(ht->_spec_cache.getitem, descriptor); + FT_ATOMIC_STORE_UINT32_RELAXED(ht->_spec_cache.getitem_version, version); + } + END_TYPE_LOCK(); + return can_cache; +} + static void set_flags(PyTypeObject *self, unsigned long mask, unsigned long flags) { diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index c936622c020e3c..99b0fa48e01c8b 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -1,6 +1,6 @@ // Auto-generated by Programs/freeze_test_frozenmain.py unsigned char M_test_frozenmain[] = { - 227,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0, + 227,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0, 0,0,0,0,0,243,168,0,0,0,149,0,89,0,79,0, 70,0,111,0,89,0,79,0,70,1,111,1,88,2,31,0, 79,1,49,1,0,0,0,0,0,0,29,0,88,2,31,0, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 772b46d17ec198..b67264f0440869 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -865,26 +865,24 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - op(_BINARY_SUBSCR_CHECK_FUNC, (container, unused -- container, unused)) { + op(_BINARY_SUBSCR_CHECK_FUNC, (container, unused -- container, unused, getitem)) { PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE)); PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = ht->_spec_cache.getitem; - DEOPT_IF(getitem == NULL); - assert(PyFunction_Check(getitem)); - uint32_t cached_version = ht->_spec_cache.getitem_version; - DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version); - PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); + PyObject *getitem_o = FT_ATOMIC_LOAD_PTR_ACQUIRE(ht->_spec_cache.getitem); + DEOPT_IF(getitem_o == NULL); + assert(PyFunction_Check(getitem_o)); + uint32_t cached_version = FT_ATOMIC_LOAD_UINT32_RELAXED(ht->_spec_cache.getitem_version); + DEOPT_IF(((PyFunctionObject *)getitem_o)->func_version != cached_version); + PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem_o); assert(code->co_argcount == 2); DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize)); + getitem = PyStackRef_FromPyObjectNew(getitem_o); STAT_INC(BINARY_SUBSCR, hit); } - op(_BINARY_SUBSCR_INIT_CALL, (container, sub -- new_frame: _PyInterpreterFrame* )) { - PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = ht->_spec_cache.getitem; - new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame); + op(_BINARY_SUBSCR_INIT_CALL, (container, sub, getitem -- new_frame: _PyInterpreterFrame* )) { + new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; INPUTS_DEAD(); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 55e9c3aa2db64d..de61a64a6e3374 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1125,6 +1125,7 @@ case _BINARY_SUBSCR_CHECK_FUNC: { _PyStackRef container; + _PyStackRef getitem; container = stack_pointer[-2]; PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); if (!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE)) { @@ -1132,42 +1133,45 @@ JUMP_TO_JUMP_TARGET(); } PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = ht->_spec_cache.getitem; - if (getitem == NULL) { + PyObject *getitem_o = FT_ATOMIC_LOAD_PTR_ACQUIRE(ht->_spec_cache.getitem); + if (getitem_o == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - assert(PyFunction_Check(getitem)); - uint32_t cached_version = ht->_spec_cache.getitem_version; - if (((PyFunctionObject *)getitem)->func_version != cached_version) { + assert(PyFunction_Check(getitem_o)); + uint32_t cached_version = FT_ATOMIC_LOAD_UINT32_RELAXED(ht->_spec_cache.getitem_version); + if (((PyFunctionObject *)getitem_o)->func_version != cached_version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); + PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem_o); assert(code->co_argcount == 2); if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + getitem = PyStackRef_FromPyObjectNew(getitem_o); STAT_INC(BINARY_SUBSCR, hit); + stack_pointer[0] = getitem; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } case _BINARY_SUBSCR_INIT_CALL: { + _PyStackRef getitem; _PyStackRef sub; _PyStackRef container; _PyInterpreterFrame *new_frame; - sub = stack_pointer[-1]; - container = stack_pointer[-2]; - PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = ht->_spec_cache.getitem; - new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame); + getitem = stack_pointer[-1]; + sub = stack_pointer[-2]; + container = stack_pointer[-3]; + new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; frame->return_offset = 2 ; - stack_pointer[-2].bits = (uintptr_t)new_frame; - stack_pointer += -1; + stack_pointer[-3].bits = (uintptr_t)new_frame; + stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 94343f953221eb..8a89ba890fd9c9 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -505,6 +505,7 @@ INSTRUCTION_STATS(BINARY_SUBSCR_GETITEM); static_assert(INLINE_CACHE_ENTRIES_BINARY_SUBSCR == 1, "incorrect cache size"); _PyStackRef container; + _PyStackRef getitem; _PyStackRef sub; _PyInterpreterFrame *new_frame; /* Skip 1 cache entry */ @@ -518,23 +519,21 @@ PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR); PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = ht->_spec_cache.getitem; - DEOPT_IF(getitem == NULL, BINARY_SUBSCR); - assert(PyFunction_Check(getitem)); - uint32_t cached_version = ht->_spec_cache.getitem_version; - DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version, BINARY_SUBSCR); - PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); + PyObject *getitem_o = FT_ATOMIC_LOAD_PTR_ACQUIRE(ht->_spec_cache.getitem); + DEOPT_IF(getitem_o == NULL, BINARY_SUBSCR); + assert(PyFunction_Check(getitem_o)); + uint32_t cached_version = FT_ATOMIC_LOAD_UINT32_RELAXED(ht->_spec_cache.getitem_version); + DEOPT_IF(((PyFunctionObject *)getitem_o)->func_version != cached_version, BINARY_SUBSCR); + PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem_o); assert(code->co_argcount == 2); DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR); + getitem = PyStackRef_FromPyObjectNew(getitem_o); STAT_INC(BINARY_SUBSCR, hit); } // _BINARY_SUBSCR_INIT_CALL { sub = stack_pointer[-1]; - PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *getitem = ht->_spec_cache.getitem; - new_frame = _PyFrame_PushUnchecked(tstate, PyStackRef_FromPyObjectNew(getitem), 2, frame); + new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2, frame); new_frame->localsplus[0] = container; new_frame->localsplus[1] = sub; frame->return_offset = 2 ; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 0b8aff02367e31..e60c0d38425bfe 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -349,9 +349,10 @@ dummy_func(void) { GETLOCAL(this_instr->operand0) = res; } - op(_BINARY_SUBSCR_INIT_CALL, (container, sub -- new_frame: _Py_UOpsAbstractFrame *)) { + op(_BINARY_SUBSCR_INIT_CALL, (container, sub, getitem -- new_frame: _Py_UOpsAbstractFrame *)) { (void)container; (void)sub; + (void)getitem; new_frame = NULL; ctx->done = true; } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index f4fbe8c8aa0480..33b34d6fa0d3f9 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -592,21 +592,29 @@ } case _BINARY_SUBSCR_CHECK_FUNC: { + _Py_UopsSymbol *getitem; + getitem = sym_new_not_null(ctx); + stack_pointer[0] = getitem; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } case _BINARY_SUBSCR_INIT_CALL: { + _Py_UopsSymbol *getitem; _Py_UopsSymbol *sub; _Py_UopsSymbol *container; _Py_UOpsAbstractFrame *new_frame; - sub = stack_pointer[-1]; - container = stack_pointer[-2]; + getitem = stack_pointer[-1]; + sub = stack_pointer[-2]; + container = stack_pointer[-3]; (void)container; (void)sub; + (void)getitem; new_frame = NULL; ctx->done = true; - stack_pointer[-2] = (_Py_UopsSymbol *)new_frame; - stack_pointer += -1; + stack_pointer[-3] = (_Py_UopsSymbol *)new_frame; + stack_pointer += -2; assert(WITHIN_STACK_BOUNDS()); break; } diff --git a/Python/specialize.c b/Python/specialize.c index 6eb298217ec2d3..6c45320f95db8e 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1096,6 +1096,7 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_ATTR_METHOD); return -1; } + /* Don't specialize if PEP 523 is active */ if (_PyInterpreterState_GET()->eval_frame) { SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OTHER); return -1; @@ -1165,6 +1166,7 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na if (version == 0) { return -1; } + /* Don't specialize if PEP 523 is active */ if (_PyInterpreterState_GET()->eval_frame) { SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OTHER); return -1; @@ -1781,12 +1783,12 @@ _Py_Specialize_BinarySubscr( specialized_op = BINARY_SUBSCR_DICT; goto success; } -#ifndef Py_GIL_DISABLED - PyTypeObject *cls = Py_TYPE(container); - PyObject *descriptor = _PyType_Lookup(cls, &_Py_ID(__getitem__)); + unsigned int tp_version; + PyObject *descriptor = _PyType_LookupRefAndVersion(container_type, &_Py_ID(__getitem__), &tp_version); if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) { if (!(container_type->tp_flags & Py_TPFLAGS_HEAPTYPE)) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_SUBSCR_NOT_HEAP_TYPE); + Py_DECREF(descriptor); goto fail; } PyFunctionObject *func = (PyFunctionObject *)descriptor; @@ -1794,30 +1796,29 @@ _Py_Specialize_BinarySubscr( int kind = function_kind(fcode); if (kind != SIMPLE_FUNCTION) { SPECIALIZATION_FAIL(BINARY_SUBSCR, kind); + Py_DECREF(descriptor); goto fail; } if (fcode->co_argcount != 2) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); + Py_DECREF(descriptor); goto fail; } - uint32_t version = _PyFunction_GetVersionForCurrentState(func); - if (!_PyFunction_IsVersionValid(version)) { - SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_VERSIONS); - goto fail; - } + + PyHeapTypeObject *ht = (PyHeapTypeObject *)container_type; + /* Don't specialize if PEP 523 is active */ if (_PyInterpreterState_GET()->eval_frame) { SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OTHER); + Py_DECREF(descriptor); goto fail; } - PyHeapTypeObject *ht = (PyHeapTypeObject *)container_type; - // This pointer is invalidated by PyType_Modified (see the comment on - // struct _specialization_cache): - ht->_spec_cache.getitem = descriptor; - ht->_spec_cache.getitem_version = version; - specialized_op = BINARY_SUBSCR_GETITEM; - goto success; + if (_PyType_CacheGetItemForSpecialization(ht, descriptor, (uint32_t)tp_version)) { + specialized_op = BINARY_SUBSCR_GETITEM; + Py_DECREF(descriptor); + goto success; + } } -#endif // Py_GIL_DISABLED + Py_XDECREF(descriptor); SPECIALIZATION_FAIL(BINARY_SUBSCR, binary_subscr_fail_kind(container_type, sub)); fail: @@ -2617,6 +2618,7 @@ _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int oparg) assert(instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == END_FOR || instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == INSTRUMENTED_END_FOR ); + /* Don't specialize if PEP 523 is active */ if (_PyInterpreterState_GET()->eval_frame) { SPECIALIZATION_FAIL(FOR_ITER, SPEC_FAIL_OTHER); goto failure; @@ -2645,6 +2647,7 @@ _Py_Specialize_Send(_PyStackRef receiver_st, _Py_CODEUNIT *instr) assert(_PyOpcode_Caches[SEND] == INLINE_CACHE_ENTRIES_SEND); PyTypeObject *tp = Py_TYPE(receiver); if (tp == &PyGen_Type || tp == &PyCoro_Type) { + /* Don't specialize if PEP 523 is active */ if (_PyInterpreterState_GET()->eval_frame) { SPECIALIZATION_FAIL(SEND, SPEC_FAIL_OTHER); goto failure;