diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 54dc6dcf408116..3e4dd8b4009cd4 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -68,14 +68,15 @@ extern "C" { #define _CHECK_PEP_523 330 #define _CHECK_PERIODIC 331 #define _CHECK_STACK_SPACE 332 -#define _CHECK_VALIDITY 333 -#define _CHECK_VALIDITY_AND_SET_IP 334 -#define _COLD_EXIT 335 -#define _COMPARE_OP 336 -#define _COMPARE_OP_FLOAT 337 -#define _COMPARE_OP_INT 338 -#define _COMPARE_OP_STR 339 -#define _CONTAINS_OP 340 +#define _CHECK_STACK_SPACE_OPERAND 333 +#define _CHECK_VALIDITY 334 +#define _CHECK_VALIDITY_AND_SET_IP 335 +#define _COLD_EXIT 336 +#define _COMPARE_OP 337 +#define _COMPARE_OP_FLOAT 338 +#define _COMPARE_OP_INT 339 +#define _COMPARE_OP_STR 340 +#define _CONTAINS_OP 341 #define _CONTAINS_OP_DICT CONTAINS_OP_DICT #define _CONTAINS_OP_SET CONTAINS_OP_SET #define _CONVERT_VALUE CONVERT_VALUE @@ -87,47 +88,47 @@ extern "C" { #define _DELETE_GLOBAL DELETE_GLOBAL #define _DELETE_NAME DELETE_NAME #define _DELETE_SUBSCR DELETE_SUBSCR -#define _DEOPT 341 +#define _DEOPT 342 #define _DICT_MERGE DICT_MERGE #define _DICT_UPDATE DICT_UPDATE #define _END_SEND END_SEND -#define _ERROR_POP_N 342 +#define _ERROR_POP_N 343 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK -#define _FATAL_ERROR 343 +#define _FATAL_ERROR 344 #define _FORMAT_SIMPLE FORMAT_SIMPLE #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC -#define _FOR_ITER 344 +#define _FOR_ITER 345 #define _FOR_ITER_GEN FOR_ITER_GEN -#define _FOR_ITER_TIER_TWO 345 +#define _FOR_ITER_TIER_TWO 346 #define _GET_AITER GET_AITER #define _GET_ANEXT GET_ANEXT #define _GET_AWAITABLE GET_AWAITABLE #define _GET_ITER GET_ITER #define _GET_LEN GET_LEN #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER -#define _GUARD_BOTH_FLOAT 346 -#define _GUARD_BOTH_INT 347 -#define _GUARD_BOTH_UNICODE 348 -#define _GUARD_BUILTINS_VERSION 349 -#define _GUARD_DORV_NO_DICT 350 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 351 -#define _GUARD_GLOBALS_VERSION 352 -#define _GUARD_IS_FALSE_POP 353 -#define _GUARD_IS_NONE_POP 354 -#define _GUARD_IS_NOT_NONE_POP 355 -#define _GUARD_IS_TRUE_POP 356 -#define _GUARD_KEYS_VERSION 357 -#define _GUARD_NOT_EXHAUSTED_LIST 358 -#define _GUARD_NOT_EXHAUSTED_RANGE 359 -#define _GUARD_NOT_EXHAUSTED_TUPLE 360 -#define _GUARD_TYPE_VERSION 361 -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 362 -#define _INIT_CALL_PY_EXACT_ARGS 363 -#define _INIT_CALL_PY_EXACT_ARGS_0 364 -#define _INIT_CALL_PY_EXACT_ARGS_1 365 -#define _INIT_CALL_PY_EXACT_ARGS_2 366 -#define _INIT_CALL_PY_EXACT_ARGS_3 367 -#define _INIT_CALL_PY_EXACT_ARGS_4 368 +#define _GUARD_BOTH_FLOAT 347 +#define _GUARD_BOTH_INT 348 +#define _GUARD_BOTH_UNICODE 349 +#define _GUARD_BUILTINS_VERSION 350 +#define _GUARD_DORV_NO_DICT 351 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 352 +#define _GUARD_GLOBALS_VERSION 353 +#define _GUARD_IS_FALSE_POP 354 +#define _GUARD_IS_NONE_POP 355 +#define _GUARD_IS_NOT_NONE_POP 356 +#define _GUARD_IS_TRUE_POP 357 +#define _GUARD_KEYS_VERSION 358 +#define _GUARD_NOT_EXHAUSTED_LIST 359 +#define _GUARD_NOT_EXHAUSTED_RANGE 360 +#define _GUARD_NOT_EXHAUSTED_TUPLE 361 +#define _GUARD_TYPE_VERSION 362 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 363 +#define _INIT_CALL_PY_EXACT_ARGS 364 +#define _INIT_CALL_PY_EXACT_ARGS_0 365 +#define _INIT_CALL_PY_EXACT_ARGS_1 366 +#define _INIT_CALL_PY_EXACT_ARGS_2 367 +#define _INIT_CALL_PY_EXACT_ARGS_3 368 +#define _INIT_CALL_PY_EXACT_ARGS_4 369 #define _INSTRUMENTED_CALL INSTRUMENTED_CALL #define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX #define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW @@ -144,65 +145,65 @@ extern "C" { #define _INSTRUMENTED_RETURN_CONST INSTRUMENTED_RETURN_CONST #define _INSTRUMENTED_RETURN_VALUE INSTRUMENTED_RETURN_VALUE #define _INSTRUMENTED_YIELD_VALUE INSTRUMENTED_YIELD_VALUE -#define _INTERNAL_INCREMENT_OPT_COUNTER 369 -#define _IS_NONE 370 +#define _INTERNAL_INCREMENT_OPT_COUNTER 370 +#define _IS_NONE 371 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 371 -#define _ITER_CHECK_RANGE 372 -#define _ITER_CHECK_TUPLE 373 -#define _ITER_JUMP_LIST 374 -#define _ITER_JUMP_RANGE 375 -#define _ITER_JUMP_TUPLE 376 -#define _ITER_NEXT_LIST 377 -#define _ITER_NEXT_RANGE 378 -#define _ITER_NEXT_TUPLE 379 -#define _JUMP_TO_TOP 380 +#define _ITER_CHECK_LIST 372 +#define _ITER_CHECK_RANGE 373 +#define _ITER_CHECK_TUPLE 374 +#define _ITER_JUMP_LIST 375 +#define _ITER_JUMP_RANGE 376 +#define _ITER_JUMP_TUPLE 377 +#define _ITER_NEXT_LIST 378 +#define _ITER_NEXT_RANGE 379 +#define _ITER_NEXT_TUPLE 380 +#define _JUMP_TO_TOP 381 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND #define _LOAD_ASSERTION_ERROR LOAD_ASSERTION_ERROR -#define _LOAD_ATTR 381 -#define _LOAD_ATTR_CLASS 382 -#define _LOAD_ATTR_CLASS_0 383 -#define _LOAD_ATTR_CLASS_1 384 +#define _LOAD_ATTR 382 +#define _LOAD_ATTR_CLASS 383 +#define _LOAD_ATTR_CLASS_0 384 +#define _LOAD_ATTR_CLASS_1 385 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 385 -#define _LOAD_ATTR_INSTANCE_VALUE_0 386 -#define _LOAD_ATTR_INSTANCE_VALUE_1 387 -#define _LOAD_ATTR_METHOD_LAZY_DICT 388 -#define _LOAD_ATTR_METHOD_NO_DICT 389 -#define _LOAD_ATTR_METHOD_WITH_VALUES 390 -#define _LOAD_ATTR_MODULE 391 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 392 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 393 +#define _LOAD_ATTR_INSTANCE_VALUE 386 +#define _LOAD_ATTR_INSTANCE_VALUE_0 387 +#define _LOAD_ATTR_INSTANCE_VALUE_1 388 +#define _LOAD_ATTR_METHOD_LAZY_DICT 389 +#define _LOAD_ATTR_METHOD_NO_DICT 390 +#define _LOAD_ATTR_METHOD_WITH_VALUES 391 +#define _LOAD_ATTR_MODULE 392 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 393 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 394 #define _LOAD_ATTR_PROPERTY LOAD_ATTR_PROPERTY -#define _LOAD_ATTR_SLOT 394 -#define _LOAD_ATTR_SLOT_0 395 -#define _LOAD_ATTR_SLOT_1 396 -#define _LOAD_ATTR_WITH_HINT 397 +#define _LOAD_ATTR_SLOT 395 +#define _LOAD_ATTR_SLOT_0 396 +#define _LOAD_ATTR_SLOT_1 397 +#define _LOAD_ATTR_WITH_HINT 398 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 398 -#define _LOAD_CONST_INLINE_BORROW 399 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 400 -#define _LOAD_CONST_INLINE_WITH_NULL 401 +#define _LOAD_CONST_INLINE 399 +#define _LOAD_CONST_INLINE_BORROW 400 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 401 +#define _LOAD_CONST_INLINE_WITH_NULL 402 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 402 -#define _LOAD_FAST_0 403 -#define _LOAD_FAST_1 404 -#define _LOAD_FAST_2 405 -#define _LOAD_FAST_3 406 -#define _LOAD_FAST_4 407 -#define _LOAD_FAST_5 408 -#define _LOAD_FAST_6 409 -#define _LOAD_FAST_7 410 +#define _LOAD_FAST 403 +#define _LOAD_FAST_0 404 +#define _LOAD_FAST_1 405 +#define _LOAD_FAST_2 406 +#define _LOAD_FAST_3 407 +#define _LOAD_FAST_4 408 +#define _LOAD_FAST_5 409 +#define _LOAD_FAST_6 410 +#define _LOAD_FAST_7 411 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 411 -#define _LOAD_GLOBAL_BUILTINS 412 -#define _LOAD_GLOBAL_MODULE 413 +#define _LOAD_GLOBAL 412 +#define _LOAD_GLOBAL_BUILTINS 413 +#define _LOAD_GLOBAL_MODULE 414 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR @@ -216,49 +217,49 @@ extern "C" { #define _MATCH_SEQUENCE MATCH_SEQUENCE #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_FRAME 414 -#define _POP_JUMP_IF_FALSE 415 -#define _POP_JUMP_IF_TRUE 416 +#define _POP_FRAME 415 +#define _POP_JUMP_IF_FALSE 416 +#define _POP_JUMP_IF_TRUE 417 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 417 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 418 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 418 +#define _PUSH_FRAME 419 #define _PUSH_NULL PUSH_NULL -#define _REPLACE_WITH_TRUE 419 +#define _REPLACE_WITH_TRUE 420 #define _RESUME_CHECK RESUME_CHECK -#define _SAVE_RETURN_OFFSET 420 -#define _SEND 421 +#define _SAVE_RETURN_OFFSET 421 +#define _SEND 422 #define _SEND_GEN SEND_GEN #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _SIDE_EXIT 422 -#define _START_EXECUTOR 423 -#define _STORE_ATTR 424 -#define _STORE_ATTR_INSTANCE_VALUE 425 -#define _STORE_ATTR_SLOT 426 +#define _SIDE_EXIT 423 +#define _START_EXECUTOR 424 +#define _STORE_ATTR 425 +#define _STORE_ATTR_INSTANCE_VALUE 426 +#define _STORE_ATTR_SLOT 427 #define _STORE_ATTR_WITH_HINT STORE_ATTR_WITH_HINT #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 427 -#define _STORE_FAST_0 428 -#define _STORE_FAST_1 429 -#define _STORE_FAST_2 430 -#define _STORE_FAST_3 431 -#define _STORE_FAST_4 432 -#define _STORE_FAST_5 433 -#define _STORE_FAST_6 434 -#define _STORE_FAST_7 435 +#define _STORE_FAST 428 +#define _STORE_FAST_0 429 +#define _STORE_FAST_1 430 +#define _STORE_FAST_2 431 +#define _STORE_FAST_3 432 +#define _STORE_FAST_4 433 +#define _STORE_FAST_5 434 +#define _STORE_FAST_6 435 +#define _STORE_FAST_7 436 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME #define _STORE_SLICE STORE_SLICE -#define _STORE_SUBSCR 436 +#define _STORE_SUBSCR 437 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TO_BOOL 437 +#define _TO_BOOL 438 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -268,12 +269,12 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 438 +#define _UNPACK_SEQUENCE 439 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START -#define MAX_UOP_ID 438 +#define MAX_UOP_ID 439 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 0f2046fb3d0c3d..111824a938f6cc 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -228,6 +228,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GUARD_IS_NOT_NONE_POP] = HAS_EXIT_FLAG, [_JUMP_TO_TOP] = HAS_EVAL_BREAK_FLAG, [_SET_IP] = 0, + [_CHECK_STACK_SPACE_OPERAND] = HAS_DEOPT_FLAG, [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG, [_EXIT_TRACE] = HAS_EXIT_FLAG, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, @@ -302,6 +303,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_CHECK_PEP_523] = "_CHECK_PEP_523", [_CHECK_PERIODIC] = "_CHECK_PERIODIC", [_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE", + [_CHECK_STACK_SPACE_OPERAND] = "_CHECK_STACK_SPACE_OPERAND", [_CHECK_VALIDITY] = "_CHECK_VALIDITY", [_CHECK_VALIDITY_AND_SET_IP] = "_CHECK_VALIDITY_AND_SET_IP", [_COLD_EXIT] = "_COLD_EXIT", @@ -902,6 +904,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _SET_IP: return 0; + case _CHECK_STACK_SPACE_OPERAND: + return 0; case _SAVE_RETURN_OFFSET: return 0; case _EXIT_TRACE: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index b59f4b74a8593e..ceb49c3c7129cb 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -952,6 +952,269 @@ def testfunc(n): _, ex = self._run_with_optimizer(testfunc, 16) self.assertIsNone(ex) + def test_combine_stack_space_checks_sequential(self): + def dummy12(x): + return x - 1 + def dummy13(y): + z = y + 2 + return y, z + def testfunc(n): + a = 0 + for _ in range(n): + b = dummy12(7) + c, d = dummy13(9) + a += b + c + d + return a + + res, ex = self._run_with_optimizer(testfunc, 32) + self.assertEqual(res, 832) + self.assertIsNotNone(ex) + + uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex] + uop_names = [uop[0] for uop in uops_and_operands] + self.assertEqual(uop_names.count("_PUSH_FRAME"), 2) + self.assertEqual(uop_names.count("_POP_FRAME"), 2) + self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0) + self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1) + # sequential calls: max(12, 13) == 13 + largest_stack = _testinternalcapi.get_co_framesize(dummy13.__code__) + self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) + + def test_combine_stack_space_checks_nested(self): + def dummy12(x): + return x + 3 + def dummy15(y): + z = dummy12(y) + return y, z + def testfunc(n): + a = 0 + for _ in range(n): + b, c = dummy15(2) + a += b + c + return a + + res, ex = self._run_with_optimizer(testfunc, 32) + self.assertEqual(res, 224) + self.assertIsNotNone(ex) + + uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex] + uop_names = [uop[0] for uop in uops_and_operands] + self.assertEqual(uop_names.count("_PUSH_FRAME"), 2) + self.assertEqual(uop_names.count("_POP_FRAME"), 2) + self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0) + self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1) + # nested calls: 15 + 12 == 27 + largest_stack = ( + _testinternalcapi.get_co_framesize(dummy15.__code__) + + _testinternalcapi.get_co_framesize(dummy12.__code__) + ) + self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) + + def test_combine_stack_space_checks_several_calls(self): + def dummy12(x): + return x + 3 + def dummy13(y): + z = y + 2 + return y, z + def dummy18(y): + z = dummy12(y) + x, w = dummy13(z) + return z, x, w + def testfunc(n): + a = 0 + for _ in range(n): + b = dummy12(5) + c, d, e = dummy18(2) + a += b + c + d + e + return a + + res, ex = self._run_with_optimizer(testfunc, 32) + self.assertEqual(res, 800) + self.assertIsNotNone(ex) + + uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex] + uop_names = [uop[0] for uop in uops_and_operands] + self.assertEqual(uop_names.count("_PUSH_FRAME"), 4) + self.assertEqual(uop_names.count("_POP_FRAME"), 4) + self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0) + self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1) + # max(12, 18 + max(12, 13)) == 31 + largest_stack = ( + _testinternalcapi.get_co_framesize(dummy18.__code__) + + _testinternalcapi.get_co_framesize(dummy13.__code__) + ) + self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) + + def test_combine_stack_space_checks_several_calls_different_order(self): + # same as `several_calls` but with top-level calls reversed + def dummy12(x): + return x + 3 + def dummy13(y): + z = y + 2 + return y, z + def dummy18(y): + z = dummy12(y) + x, w = dummy13(z) + return z, x, w + def testfunc(n): + a = 0 + for _ in range(n): + c, d, e = dummy18(2) + b = dummy12(5) + a += b + c + d + e + return a + + res, ex = self._run_with_optimizer(testfunc, 32) + self.assertEqual(res, 800) + self.assertIsNotNone(ex) + + uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex] + uop_names = [uop[0] for uop in uops_and_operands] + self.assertEqual(uop_names.count("_PUSH_FRAME"), 4) + self.assertEqual(uop_names.count("_POP_FRAME"), 4) + self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0) + self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1) + # max(18 + max(12, 13), 12) == 31 + largest_stack = ( + _testinternalcapi.get_co_framesize(dummy18.__code__) + + _testinternalcapi.get_co_framesize(dummy13.__code__) + ) + self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) + + def test_combine_stack_space_complex(self): + def dummy0(x): + return x + def dummy1(x): + return dummy0(x) + def dummy2(x): + return dummy1(x) + def dummy3(x): + return dummy0(x) + def dummy4(x): + y = dummy0(x) + return dummy3(y) + def dummy5(x): + return dummy2(x) + def dummy6(x): + y = dummy5(x) + z = dummy0(y) + return dummy4(z) + def testfunc(n): + a = 0; + for _ in range(32): + b = dummy5(1) + c = dummy0(1) + d = dummy6(1) + a += b + c + d + return a + + res, ex = self._run_with_optimizer(testfunc, 32) + self.assertEqual(res, 96) + self.assertIsNotNone(ex) + + uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex] + uop_names = [uop[0] for uop in uops_and_operands] + self.assertEqual(uop_names.count("_PUSH_FRAME"), 15) + self.assertEqual(uop_names.count("_POP_FRAME"), 15) + + self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 0) + self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1) + largest_stack = ( + _testinternalcapi.get_co_framesize(dummy6.__code__) + + _testinternalcapi.get_co_framesize(dummy5.__code__) + + _testinternalcapi.get_co_framesize(dummy2.__code__) + + _testinternalcapi.get_co_framesize(dummy1.__code__) + + _testinternalcapi.get_co_framesize(dummy0.__code__) + ) + self.assertIn( + ("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands + ) + + def test_combine_stack_space_checks_large_framesize(self): + # Create a function with a large framesize. This ensures _CHECK_STACK_SPACE is + # actually doing its job. Note that the resulting trace hits + # UOP_MAX_TRACE_LENGTH, but since all _CHECK_STACK_SPACEs happen early, this + # test is still meaningful. + repetitions = 10000 + ns = {} + header = """ + def dummy_large(a0): + """ + body = "".join([f""" + a{n+1} = a{n} + 1 + """ for n in range(repetitions)]) + return_ = f""" + return a{repetitions-1} + """ + exec(textwrap.dedent(header + body + return_), ns, ns) + dummy_large = ns['dummy_large'] + + # this is something like: + # + # def dummy_large(a0): + # a1 = a0 + 1 + # a2 = a1 + 1 + # .... + # a9999 = a9998 + 1 + # return a9999 + + def dummy15(z): + y = dummy_large(z) + return y + 3 + + def testfunc(n): + b = 0 + for _ in range(n): + b += dummy15(7) + return b + + res, ex = self._run_with_optimizer(testfunc, 32) + self.assertEqual(res, 32 * (repetitions + 9)) + self.assertIsNotNone(ex) + + uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex] + uop_names = [uop[0] for uop in uops_and_operands] + self.assertEqual(uop_names.count("_PUSH_FRAME"), 2) + self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1) + + # this hits a different case during trace projection in refcount test runs only, + # so we need to account for both possibilities + self.assertIn(uop_names.count("_CHECK_STACK_SPACE"), [0, 1]) + if uop_names.count("_CHECK_STACK_SPACE") == 0: + largest_stack = ( + _testinternalcapi.get_co_framesize(dummy15.__code__) + + _testinternalcapi.get_co_framesize(dummy_large.__code__) + ) + else: + largest_stack = _testinternalcapi.get_co_framesize(dummy15.__code__) + self.assertIn( + ("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands + ) + + def test_combine_stack_space_checks_recursion(self): + def dummy15(x): + while x > 0: + return dummy15(x - 1) + return 42 + def testfunc(n): + a = 0 + for _ in range(n): + a += dummy15(n) + return a + + res, ex = self._run_with_optimizer(testfunc, 32) + self.assertEqual(res, 42 * 32) + self.assertIsNotNone(ex) + + uops_and_operands = [(opcode, operand) for opcode, _, _, operand in ex] + uop_names = [uop[0] for uop in uops_and_operands] + self.assertEqual(uop_names.count("_PUSH_FRAME"), 2) + self.assertEqual(uop_names.count("_POP_FRAME"), 0) + self.assertEqual(uop_names.count("_CHECK_STACK_SPACE"), 1) + self.assertEqual(uop_names.count("_CHECK_STACK_SPACE_OPERAND"), 1) + largest_stack = _testinternalcapi.get_co_framesize(dummy15.__code__) + self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) + def test_many_nested(self): # overflow the trace_stack def dummy_a(x): @@ -976,8 +1239,9 @@ def testfunc(n): a += dummy_h(n) return a - self._run_with_optimizer(testfunc, 32) - + res, ex = self._run_with_optimizer(testfunc, 32) + self.assertEqual(res, 32 * 32) + self.assertIsNone(ex) if __name__ == "__main__": unittest.main() diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index c5d65a373906f2..6b5d99f6ffac1f 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -959,6 +959,17 @@ iframe_getlasti(PyObject *self, PyObject *frame) return PyLong_FromLong(PyUnstable_InterpreterFrame_GetLasti(f)); } +static PyObject * +get_co_framesize(PyObject *self, PyObject *arg) +{ + if (!PyCode_Check(arg)) { + PyErr_SetString(PyExc_TypeError, "argument must be a code object"); + return NULL; + } + PyCodeObject *code = (PyCodeObject *)arg; + return PyLong_FromLong(code->co_framesize); +} + static PyObject * new_counter_optimizer(PyObject *self, PyObject *arg) { @@ -1715,6 +1726,7 @@ static PyMethodDef module_functions[] = { {"iframe_getcode", iframe_getcode, METH_O, NULL}, {"iframe_getline", iframe_getline, METH_O, NULL}, {"iframe_getlasti", iframe_getlasti, METH_O, NULL}, + {"get_co_framesize", get_co_framesize, METH_O, NULL}, {"get_optimizer", get_optimizer, METH_NOARGS, NULL}, {"set_optimizer", set_optimizer, METH_O, NULL}, {"new_counter_optimizer", new_counter_optimizer, METH_NOARGS, NULL}, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index ce208aac9c7953..fa53c969fe361e 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4094,6 +4094,12 @@ dummy_func( frame->instr_ptr = (_Py_CODEUNIT *)instr_ptr; } + tier2 op(_CHECK_STACK_SPACE_OPERAND, (framesize/2 --)) { + assert(framesize <= INT_MAX); + DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, framesize)); + DEOPT_IF(tstate->py_recursion_remaining <= 1); + } + op(_SAVE_RETURN_OFFSET, (--)) { #if TIER_ONE frame->return_offset = (uint16_t)(next_instr - this_instr); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 82f2171f1ede83..98476798fbbbdf 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3592,6 +3592,14 @@ break; } + case _CHECK_STACK_SPACE_OPERAND: { + uint32_t framesize = (uint32_t)CURRENT_OPERAND(); + assert(framesize <= INT_MAX); + if (!_PyThreadState_HasStackSpace(tstate, framesize)) JUMP_TO_JUMP_TARGET(); + if (tstate->py_recursion_remaining <= 1) JUMP_TO_JUMP_TARGET(); + break; + } + case _SAVE_RETURN_OFFSET: { oparg = CURRENT_OPARG(); #if TIER_ONE diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 6f553f8ab8ad2e..a21679f366a74e 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -529,14 +529,41 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } } } - Py_FatalError("No terminating instruction"); Py_UNREACHABLE(); } +/* _PUSH_FRAME/_POP_FRAME's operand can be 0, a PyFunctionObject *, or a + * PyCodeObject *. Retrieve the code object if possible. + */ +static PyCodeObject * +get_co(_PyUOpInstruction *op) +{ + assert(op->opcode == _PUSH_FRAME || op->opcode == _POP_FRAME); + PyCodeObject *co = NULL; + uint64_t operand = op->operand; + if (operand == 0) { + return NULL; + } + if (operand & 1) { + co = (PyCodeObject *)(operand & ~1); + } + else { + PyFunctionObject *func = (PyFunctionObject *)operand; + assert(PyFunction_Check(func)); + co = (PyCodeObject *)func->func_code; + } + assert(PyCode_Check(co)); + return co; +} + static void peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size) { PyCodeObject *co = _PyFrame_GetCode(frame); + int curr_space = 0; + int max_space = 0; + _PyUOpInstruction *first_valid_check_stack = NULL; + _PyUOpInstruction *corresponding_check_stack = NULL; for (int pc = 0; pc < buffer_size; pc++) { int opcode = buffer[pc].opcode; switch(opcode) { @@ -547,8 +574,7 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s buffer[pc].operand = (uintptr_t)val; break; } - case _CHECK_PEP_523: - { + case _CHECK_PEP_523: { /* Setting the eval frame function invalidates * all executors, so no need to check dynamically */ if (_PyInterpreterState_GET()->eval_frame == NULL) { @@ -556,29 +582,72 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s } break; } - case _PUSH_FRAME: - case _POP_FRAME: - { - uint64_t operand = buffer[pc].operand; - if (operand & 1) { - co = (PyCodeObject *)(operand & ~1); - assert(PyCode_Check(co)); - } - else if (operand == 0) { - co = NULL; + case _CHECK_STACK_SPACE: { + assert(corresponding_check_stack == NULL); + corresponding_check_stack = &buffer[pc]; + break; + } + case _PUSH_FRAME: { + assert(corresponding_check_stack != NULL); + co = get_co(&buffer[pc]); + if (co == NULL) { + // should be about to _EXIT_TRACE anyway + goto finish; + } + int framesize = co->co_framesize; + assert(framesize > 0); + curr_space += framesize; + if (curr_space < 0 || curr_space > INT32_MAX) { + // won't fit in signed 32-bit int + goto finish; + } + max_space = curr_space > max_space ? curr_space : max_space; + if (first_valid_check_stack == NULL) { + first_valid_check_stack = corresponding_check_stack; } else { - PyFunctionObject *func = (PyFunctionObject *)operand; - assert(PyFunction_Check(func)); - co = (PyCodeObject *)func->func_code; + // delete all but the first valid _CHECK_STACK_SPACE + corresponding_check_stack->opcode = _NOP; + } + corresponding_check_stack = NULL; + break; + } + case _POP_FRAME: { + assert(corresponding_check_stack == NULL); + assert(co != NULL); + int framesize = co->co_framesize; + assert(framesize > 0); + assert(framesize <= curr_space); + curr_space -= framesize; + co = get_co(&buffer[pc]); + if (co == NULL) { + // might be impossible, but bailing is still safe + goto finish; } break; } case _JUMP_TO_TOP: case _EXIT_TRACE: - return; + goto finish; +#ifdef Py_DEBUG + case _CHECK_STACK_SPACE_OPERAND: { + /* We should never see _CHECK_STACK_SPACE_OPERANDs. + * They are only created at the end of this pass. */ + Py_UNREACHABLE(); + } +#endif } } + Py_UNREACHABLE(); +finish: + if (first_valid_check_stack != NULL) { + assert(first_valid_check_stack->opcode == _CHECK_STACK_SPACE); + assert(max_space > 0); + assert(max_space <= INT_MAX); + assert(max_space <= INT32_MAX); + first_valid_check_stack->opcode = _CHECK_STACK_SPACE_OPERAND; + first_valid_check_stack->operand = max_space; + } } // 0 - failure, no error raised, just fall back to Tier 1 diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index b4a1da8aec14af..209be370c4aa38 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1906,6 +1906,10 @@ break; } + case _CHECK_STACK_SPACE_OPERAND: { + break; + } + case _SAVE_RETURN_OFFSET: { break; }