From 52b7d6030761832a3cd8bdd9e20a99cecdc439c8 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 3 Jun 2024 15:05:04 +0800 Subject: [PATCH] hopefully fix the templates and stuff --- Grammar/python.gram | 9 +- Include/cpython/compile.h | 4 + Parser/action_helpers.c | 59 + Parser/parser.c | 189 +- Parser/pegen.c | 12 + Parser/pegen.h | 5 + Python/ast.c | 37 +- Python/ast_opt.c | 2907 +++++++++++----------- Tools/peg_generator/pegen/c_generator.py | 55 +- 9 files changed, 1670 insertions(+), 1607 deletions(-) diff --git a/Grammar/python.gram b/Grammar/python.gram index 1cfba2a..f0ee59d 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -1252,14 +1252,11 @@ atom[expr_ty]: | 'True' { _PyAST_Constant(Py_True, NULL, EXTRA) } | 'False' { _PyAST_Constant(Py_False, NULL, EXTRA) } | 'None' { _PyAST_Constant(Py_None, NULL, EXTRA) } - | a='$' b[int]=[pos_c_int_opt] { - b == -1 ? NULL : - b < p->subn ? - _PyAST_Template(b, 0, EXTRA) : - RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "template index out of range") } + | t='$' lvl[int]=[pos_c_int_opt] { + lvl >= 0 ? _PyPegen_make_template(p, lvl, t, EXTRA) : NULL } | &(STRING|FSTRING_START) strings | NUMBER - | &'(' (compound_expr | tuplecomp | tuple | group | genexp) + | &'(' (compound_expr | tuplecomp | group | genexp | tuple) | &'[' (listcomp | list) | &'{' (dictcomp | setcomp | dict | set) | '...' { _PyAST_Constant(Py_Ellipsis, NULL, EXTRA) } diff --git a/Include/cpython/compile.h b/Include/cpython/compile.h index ae17cef..167ee94 100644 --- a/Include/cpython/compile.h +++ b/Include/cpython/compile.h @@ -2,6 +2,8 @@ # error "this header file must not be included directly" #endif +#define PY_MAX_TEMPLATE_SUBS 200 + /* Public interface */ #define PyCF_MASK (CO_FUTURE_DIVISION | CO_FUTURE_ABSOLUTE_IMPORT | \ CO_FUTURE_WITH_STATEMENT | CO_FUTURE_PRINT_FUNCTION | \ @@ -24,6 +26,8 @@ PyCF_TYPE_COMMENTS | PyCF_DONT_IMPLY_DEDENT | \ PyCF_ALLOW_INCOMPLETE_INPUT | PyCF_OPTIMIZED_AST) + + typedef struct { int cf_flags; /* bitmask of CO_xxx flags relevant to future */ int cf_feature_version; /* minor Python version (PyCF_ONLY_AST) */ diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index db3ce60..56ed528 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -1749,3 +1749,62 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings, assert(current_pos == n_elements); return _PyAST_JoinedStr(values, lineno, col_offset, end_lineno, end_col_offset, p->arena); } + +expr_ty +_PyPegen_make_template(Parser *p, int level, Token *t, + int lineno, int col_offset, int end_lineno, + int end_col_offset, PyArena *arena) +{ + if (level >= p->subn) { + RAISE_SYNTAX_ERROR_KNOWN_LOCATION(t, "template index out of range"); + return NULL; + } + + expr_ty node = _PyAST_Template(level, 0, + lineno, col_offset, + end_lineno, end_col_offset, + p->arena); + p->template_subs[p->subn - level - 1] = node; + return node; +} + +int +_PyPegen_inc_subn(Parser *p) +{ + if (p->subn >= PY_MAX_TEMPLATE_SUBS) { + int lineno = p->tok->lineno; + int col_offset = CURRENT_POS; + RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, + lineno, col_offset, lineno, col_offset, + "composition/comprehension depth exceeded %d", + PY_MAX_TEMPLATE_SUBS); + return 0; + } + + /* Ensure that the data isn't garbage. */ + p->template_subs[p->subn++] = NULL; + return 1; +} + +int +_PyPegen_dec_subn(Parser *p, int success) +{ + if (p->subn <= 0) { + int lineno = p->tok->lineno; + int col_offset = CURRENT_POS; + RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, + lineno, col_offset, lineno, col_offset, + "composition/comprehension depth underflow", + PY_MAX_TEMPLATE_SUBS); + return -1; + } + + if (!success) { + p->template_subs[--p->subn] = NULL; + p->max_subn = p->subn; + } + else { + p->max_subn = p->subn--; + } + return success; +} diff --git a/Parser/parser.c b/Parser/parser.c index 092f295..475aa38 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -3607,7 +3607,6 @@ assignment_rule(Parser *p) D(fprintf(stderr, "%*c> assignment[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "single_target '|>=' ~ top_rhs")); int _cut_var = 0; Token * _literal; - int _templateuse_1; expr_ty a; expr_ty c; if ( @@ -3617,10 +3616,10 @@ assignment_rule(Parser *p) && (_cut_var = 1) && - (p->subn++, _templateuse_1 = + _PyPegen_inc_subn(p) && _PyPegen_dec_subn(p, ( !! (c = top_rhs_rule(p)) // top_rhs - , p->subn--, _templateuse_1) + )) ) { D(fprintf(stderr, "%*c+ assignment[%d-%d L%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "single_target '|>=' ~ top_rhs")); @@ -3657,7 +3656,6 @@ assignment_rule(Parser *p) D(fprintf(stderr, "%*c> assignment[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "aug_target '|>=' ~ top_rhs")); int _cut_var = 0; Token * _literal; - int _templateuse_1; expr_ty a; expr_ty c; if ( @@ -3667,10 +3665,10 @@ assignment_rule(Parser *p) && (_cut_var = 1) && - (p->subn++, _templateuse_1 = + _PyPegen_inc_subn(p) && _PyPegen_dec_subn(p, ( !! (c = top_rhs_rule(p)) // top_rhs - , p->subn--, _templateuse_1) + )) ) { D(fprintf(stderr, "%*c+ assignment[%d-%d L%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "aug_target '|>=' ~ top_rhs")); @@ -6121,7 +6119,6 @@ assignment_block_rule(Parser *p) D(fprintf(stderr, "%*c> assignment_block[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "single_target '|>=' ~ block_expr")); int _cut_var = 0; Token * _literal; - int _templateuse_1; expr_ty a; expr_ty c; if ( @@ -6131,10 +6128,10 @@ assignment_block_rule(Parser *p) && (_cut_var = 1) && - (p->subn++, _templateuse_1 = + _PyPegen_inc_subn(p) && _PyPegen_dec_subn(p, ( !! (c = block_expr_rule(p)) // block_expr - , p->subn--, _templateuse_1) + )) ) { D(fprintf(stderr, "%*c+ assignment_block[%d-%d L%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "single_target '|>=' ~ block_expr")); @@ -6171,7 +6168,6 @@ assignment_block_rule(Parser *p) D(fprintf(stderr, "%*c> assignment_block[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "aug_target '|>=' ~ block_expr")); int _cut_var = 0; Token * _literal; - int _templateuse_1; expr_ty a; expr_ty c; if ( @@ -6181,10 +6177,10 @@ assignment_block_rule(Parser *p) && (_cut_var = 1) && - (p->subn++, _templateuse_1 = + _PyPegen_inc_subn(p) && _PyPegen_dec_subn(p, ( !! (c = block_expr_rule(p)) // block_expr - , p->subn--, _templateuse_1) + )) ) { D(fprintf(stderr, "%*c+ assignment_block[%d-%d L%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "aug_target '|>=' ~ block_expr")); @@ -6460,7 +6456,6 @@ assignment_block_nonewline_rule(Parser *p) D(fprintf(stderr, "%*c> assignment_block_nonewline[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "single_target '|>=' ~ block_expr_nonewline")); int _cut_var = 0; Token * _literal; - int _templateuse_1; expr_ty a; expr_ty c; if ( @@ -6470,10 +6465,10 @@ assignment_block_nonewline_rule(Parser *p) && (_cut_var = 1) && - (p->subn++, _templateuse_1 = + _PyPegen_inc_subn(p) && _PyPegen_dec_subn(p, ( !! (c = block_expr_nonewline_rule(p)) // block_expr_nonewline - , p->subn--, _templateuse_1) + )) ) { D(fprintf(stderr, "%*c+ assignment_block_nonewline[%d-%d L%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "single_target '|>=' ~ block_expr_nonewline")); @@ -6510,7 +6505,6 @@ assignment_block_nonewline_rule(Parser *p) D(fprintf(stderr, "%*c> assignment_block_nonewline[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "aug_target '|>=' ~ block_expr_nonewline")); int _cut_var = 0; Token * _literal; - int _templateuse_1; expr_ty a; expr_ty c; if ( @@ -6520,10 +6514,10 @@ assignment_block_nonewline_rule(Parser *p) && (_cut_var = 1) && - (p->subn++, _templateuse_1 = + _PyPegen_inc_subn(p) && _PyPegen_dec_subn(p, ( !! (c = block_expr_nonewline_rule(p)) // block_expr_nonewline - , p->subn--, _templateuse_1) + )) ) { D(fprintf(stderr, "%*c+ assignment_block_nonewline[%d-%d L%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "aug_target '|>=' ~ block_expr_nonewline")); @@ -20853,7 +20847,6 @@ composition_raw(Parser *p) } D(fprintf(stderr, "%*c> composition[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "composition '|>' bitwise_or")); Token * _literal; - int _templateuse_1; expr_ty a; expr_ty b; if ( @@ -20861,10 +20854,10 @@ composition_raw(Parser *p) && (_literal = _PyPegen_expect_token(p, 55)) // token='|>' && - (p->subn++, _templateuse_1 = + _PyPegen_inc_subn(p) && _PyPegen_dec_subn(p, ( !! (b = bitwise_or_rule(p)) // bitwise_or - , p->subn--, _templateuse_1) + )) ) { D(fprintf(stderr, "%*c+ composition[%d-%d L%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "composition '|>' bitwise_or")); @@ -22848,7 +22841,7 @@ simple_slice_rule(Parser *p) // | '$' pos_c_int_opt? // | &(STRING | FSTRING_START) strings // | NUMBER -// | &'(' (compound_expr | tuplecomp | tuple | group | genexp) +// | &'(' (compound_expr | tuplecomp | group | genexp | tuple) // | &'[' (listcomp | list) // | &'{' (dictcomp | setcomp | dict | set) // | '...' @@ -22998,12 +22991,12 @@ atom_rule(Parser *p) return NULL; } D(fprintf(stderr, "%*c> atom[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "'$' pos_c_int_opt?")); - Token * a; - int b; + int lvl; + Token * t; if ( - (a = _PyPegen_expect_token(p, 56)) // token='$' + (t = _PyPegen_expect_token(p, 56)) // token='$' && - (b = (int)pos_c_int_opt_rule(p), !p->error_indicator) // pos_c_int_opt? + (lvl = (int)pos_c_int_opt_rule(p), !p->error_indicator) // pos_c_int_opt? ) { D(fprintf(stderr, "%*c+ atom[%d-%d L%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "'$' pos_c_int_opt?")); @@ -23016,7 +23009,7 @@ atom_rule(Parser *p) UNUSED(_end_lineno); // Only used by EXTRA macro int _end_col_offset = _token->end_col_offset; UNUSED(_end_col_offset); // Only used by EXTRA macro - _res = b == - 1 ? NULL : b < p -> subn ? _PyAST_Template ( b , 0 , EXTRA ) : RAISE_SYNTAX_ERROR_KNOWN_LOCATION ( a , "template index out of range" ); + _res = lvl >= 0 ? _PyPegen_make_template ( p , lvl , t , EXTRA ) : NULL; if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; p->level--; @@ -23068,26 +23061,26 @@ atom_rule(Parser *p) D(fprintf(stderr, "%*c%s atom[%d-%d L%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, p->tok->lineno, "NUMBER")); } - { // &'(' (compound_expr | tuplecomp | tuple | group | genexp) + { // &'(' (compound_expr | tuplecomp | group | genexp | tuple) if (p->error_indicator) { p->level--; return NULL; } - D(fprintf(stderr, "%*c> atom[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "&'(' (compound_expr | tuplecomp | tuple | group | genexp)")); + D(fprintf(stderr, "%*c> atom[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "&'(' (compound_expr | tuplecomp | group | genexp | tuple)")); void *_tmp_168_var; if ( _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 7) // token='(' && - (_tmp_168_var = _tmp_168_rule(p)) // compound_expr | tuplecomp | tuple | group | genexp + (_tmp_168_var = _tmp_168_rule(p)) // compound_expr | tuplecomp | group | genexp | tuple ) { - D(fprintf(stderr, "%*c+ atom[%d-%d L%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "&'(' (compound_expr | tuplecomp | tuple | group | genexp)")); + D(fprintf(stderr, "%*c+ atom[%d-%d L%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "&'(' (compound_expr | tuplecomp | group | genexp | tuple)")); _res = _tmp_168_var; goto done; } p->mark = _mark; D(fprintf(stderr, "%*c%s atom[%d-%d L%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, p->tok->lineno, "&'(' (compound_expr | tuplecomp | tuple | group | genexp)")); + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, p->tok->lineno, "&'(' (compound_expr | tuplecomp | group | genexp | tuple)")); } { // &'[' (listcomp | list) if (p->error_indicator) { @@ -27537,13 +27530,16 @@ listcomp_rule(Parser *p) p->level--; return NULL; } - p->subn++; expr_ty _res = NULL; + if (!_PyPegen_inc_subn(p)) { + p->level--; + return NULL; + } int _mark = p->mark; if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) { p->error_indicator = 1; p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } int _start_lineno = p->tokens[_mark]->lineno; @@ -27553,7 +27549,7 @@ listcomp_rule(Parser *p) { // '[' star_named_expression for_if_clauses ']' if (p->error_indicator) { p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } D(fprintf(stderr, "%*c> listcomp[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "'[' star_named_expression for_if_clauses ']'")); @@ -27575,7 +27571,7 @@ listcomp_rule(Parser *p) Token *_token = _PyPegen_get_last_nonnwhitespace_token(p); if (_token == NULL) { p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } int _end_lineno = _token->end_lineno; @@ -27586,7 +27582,7 @@ listcomp_rule(Parser *p) if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } goto done; @@ -27598,7 +27594,7 @@ listcomp_rule(Parser *p) { // '[' for_if_prefix_clauses star_named_expression ']' if (p->error_indicator) { p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } D(fprintf(stderr, "%*c> listcomp[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "'[' for_if_prefix_clauses star_named_expression ']'")); @@ -27620,7 +27616,7 @@ listcomp_rule(Parser *p) Token *_token = _PyPegen_get_last_nonnwhitespace_token(p); if (_token == NULL) { p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } int _end_lineno = _token->end_lineno; @@ -27631,7 +27627,7 @@ listcomp_rule(Parser *p) if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } goto done; @@ -27643,7 +27639,7 @@ listcomp_rule(Parser *p) if (p->call_invalid_rules) { // invalid_comprehension if (p->error_indicator) { p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } D(fprintf(stderr, "%*c> listcomp[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "invalid_comprehension")); @@ -27662,8 +27658,8 @@ listcomp_rule(Parser *p) } _res = NULL; done: - p->subn--; p->level--; + _PyPegen_dec_subn(p, !p->error_indicator && _res != NULL); return _res; } @@ -27682,13 +27678,16 @@ tuplecomp_rule(Parser *p) p->level--; return NULL; } - p->subn++; expr_ty _res = NULL; + if (!_PyPegen_inc_subn(p)) { + p->level--; + return NULL; + } int _mark = p->mark; if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) { p->error_indicator = 1; p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } int _start_lineno = p->tokens[_mark]->lineno; @@ -27698,7 +27697,7 @@ tuplecomp_rule(Parser *p) { // '(' (star_assignment_expression | expression !':=') for_if_clauses ',' ')' if (p->error_indicator) { p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } D(fprintf(stderr, "%*c> tuplecomp[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "'(' (star_assignment_expression | expression !':=') for_if_clauses ',' ')'")); @@ -27723,7 +27722,7 @@ tuplecomp_rule(Parser *p) Token *_token = _PyPegen_get_last_nonnwhitespace_token(p); if (_token == NULL) { p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } int _end_lineno = _token->end_lineno; @@ -27734,7 +27733,7 @@ tuplecomp_rule(Parser *p) if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } goto done; @@ -27746,7 +27745,7 @@ tuplecomp_rule(Parser *p) { // '(' for_if_prefix_clauses (star_assignment_expression | expression !':=') ',' ')' if (p->error_indicator) { p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } D(fprintf(stderr, "%*c> tuplecomp[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "'(' for_if_prefix_clauses (star_assignment_expression | expression !':=') ',' ')'")); @@ -27771,7 +27770,7 @@ tuplecomp_rule(Parser *p) Token *_token = _PyPegen_get_last_nonnwhitespace_token(p); if (_token == NULL) { p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } int _end_lineno = _token->end_lineno; @@ -27782,7 +27781,7 @@ tuplecomp_rule(Parser *p) if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } goto done; @@ -27794,7 +27793,7 @@ tuplecomp_rule(Parser *p) if (p->call_invalid_rules) { // invalid_comprehension if (p->error_indicator) { p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } D(fprintf(stderr, "%*c> tuplecomp[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "invalid_comprehension")); @@ -27813,8 +27812,8 @@ tuplecomp_rule(Parser *p) } _res = NULL; done: - p->subn--; p->level--; + _PyPegen_dec_subn(p, !p->error_indicator && _res != NULL); return _res; } @@ -27833,13 +27832,16 @@ setcomp_rule(Parser *p) p->level--; return NULL; } - p->subn++; expr_ty _res = NULL; + if (!_PyPegen_inc_subn(p)) { + p->level--; + return NULL; + } int _mark = p->mark; if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) { p->error_indicator = 1; p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } int _start_lineno = p->tokens[_mark]->lineno; @@ -27849,7 +27851,7 @@ setcomp_rule(Parser *p) { // '{' star_named_noslice_expression for_if_clauses '}' if (p->error_indicator) { p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } D(fprintf(stderr, "%*c> setcomp[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "'{' star_named_noslice_expression for_if_clauses '}'")); @@ -27871,7 +27873,7 @@ setcomp_rule(Parser *p) Token *_token = _PyPegen_get_last_nonnwhitespace_token(p); if (_token == NULL) { p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } int _end_lineno = _token->end_lineno; @@ -27882,7 +27884,7 @@ setcomp_rule(Parser *p) if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } goto done; @@ -27894,7 +27896,7 @@ setcomp_rule(Parser *p) { // '{' for_if_prefix_clauses star_named_noslice_expression '}' if (p->error_indicator) { p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } D(fprintf(stderr, "%*c> setcomp[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "'{' for_if_prefix_clauses star_named_noslice_expression '}'")); @@ -27916,7 +27918,7 @@ setcomp_rule(Parser *p) Token *_token = _PyPegen_get_last_nonnwhitespace_token(p); if (_token == NULL) { p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } int _end_lineno = _token->end_lineno; @@ -27927,7 +27929,7 @@ setcomp_rule(Parser *p) if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } goto done; @@ -27939,7 +27941,7 @@ setcomp_rule(Parser *p) if (p->call_invalid_rules) { // invalid_comprehension if (p->error_indicator) { p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } D(fprintf(stderr, "%*c> setcomp[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "invalid_comprehension")); @@ -27958,8 +27960,8 @@ setcomp_rule(Parser *p) } _res = NULL; done: - p->subn--; p->level--; + _PyPegen_dec_subn(p, !p->error_indicator && _res != NULL); return _res; } @@ -28113,13 +28115,16 @@ dictcomp_rule(Parser *p) p->level--; return NULL; } - p->subn++; expr_ty _res = NULL; + if (!_PyPegen_inc_subn(p)) { + p->level--; + return NULL; + } int _mark = p->mark; if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) { p->error_indicator = 1; p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } int _start_lineno = p->tokens[_mark]->lineno; @@ -28129,7 +28134,7 @@ dictcomp_rule(Parser *p) { // '{' double_starred_kvpair for_if_clauses '}' if (p->error_indicator) { p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } D(fprintf(stderr, "%*c> dictcomp[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "'{' double_starred_kvpair for_if_clauses '}'")); @@ -28151,7 +28156,7 @@ dictcomp_rule(Parser *p) Token *_token = _PyPegen_get_last_nonnwhitespace_token(p); if (_token == NULL) { p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } int _end_lineno = _token->end_lineno; @@ -28162,7 +28167,7 @@ dictcomp_rule(Parser *p) if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } goto done; @@ -28174,7 +28179,7 @@ dictcomp_rule(Parser *p) { // '{' for_if_prefix_clauses double_starred_kvpair '}' if (p->error_indicator) { p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } D(fprintf(stderr, "%*c> dictcomp[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "'{' for_if_prefix_clauses double_starred_kvpair '}'")); @@ -28196,7 +28201,7 @@ dictcomp_rule(Parser *p) Token *_token = _PyPegen_get_last_nonnwhitespace_token(p); if (_token == NULL) { p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } int _end_lineno = _token->end_lineno; @@ -28207,7 +28212,7 @@ dictcomp_rule(Parser *p) if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } goto done; @@ -28219,7 +28224,7 @@ dictcomp_rule(Parser *p) if (p->call_invalid_rules) { // invalid_dict_comprehension if (p->error_indicator) { p->level--; - p->subn--; + _PyPegen_dec_subn(p, 0); return NULL; } D(fprintf(stderr, "%*c> dictcomp[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "invalid_dict_comprehension")); @@ -28238,8 +28243,8 @@ dictcomp_rule(Parser *p) } _res = NULL; done: - p->subn--; p->level--; + _PyPegen_dec_subn(p, !p->error_indicator && _res != NULL); return _res; } @@ -47870,7 +47875,7 @@ _tmp_167_rule(Parser *p) return _res; } -// _tmp_168: compound_expr | tuplecomp | tuple | group | genexp +// _tmp_168: compound_expr | tuplecomp | group | genexp | tuple static void * _tmp_168_rule(Parser *p) { @@ -47922,25 +47927,6 @@ _tmp_168_rule(Parser *p) D(fprintf(stderr, "%*c%s _tmp_168[%d-%d L%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, p->tok->lineno, "tuplecomp")); } - { // tuple - if (p->error_indicator) { - p->level--; - return NULL; - } - D(fprintf(stderr, "%*c> _tmp_168[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "tuple")); - expr_ty tuple_var; - if ( - (tuple_var = tuple_rule(p)) // tuple - ) - { - D(fprintf(stderr, "%*c+ _tmp_168[%d-%d L%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "tuple")); - _res = tuple_var; - goto done; - } - p->mark = _mark; - D(fprintf(stderr, "%*c%s _tmp_168[%d-%d L%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, p->tok->lineno, "tuple")); - } { // group if (p->error_indicator) { p->level--; @@ -47979,6 +47965,25 @@ _tmp_168_rule(Parser *p) D(fprintf(stderr, "%*c%s _tmp_168[%d-%d L%d]: %s failed!\n", p->level, ' ', p->error_indicator ? "ERROR!" : "-", _mark, p->mark, p->tok->lineno, "genexp")); } + { // tuple + if (p->error_indicator) { + p->level--; + return NULL; + } + D(fprintf(stderr, "%*c> _tmp_168[%d-%d L%d]: %s\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "tuple")); + expr_ty tuple_var; + if ( + (tuple_var = tuple_rule(p)) // tuple + ) + { + D(fprintf(stderr, "%*c+ _tmp_168[%d-%d L%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, p->tok->lineno, "tuple")); + _res = tuple_var; + goto done; + } + p->mark = _mark; + D(fprintf(stderr, "%*c%s _tmp_168[%d-%d L%d]: %s failed!\n", p->level, ' ', + p->error_indicator ? "ERROR!" : "-", _mark, p->mark, p->tok->lineno, "tuple")); + } _res = NULL; done: p->level--; diff --git a/Parser/pegen.c b/Parser/pegen.c index 5db7235..28469a7 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -798,7 +798,9 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags, p->feature_version = feature_version; p->known_err_token = NULL; p->level = 0; + memset(p->template_subs, 0, PY_MAX_TEMPLATE_SUBS * sizeof(expr_ty)); p->subn = 0; + p->max_subn = 0; p->call_invalid_rules = 0; if (restrici == NULL) { restrici = PyMem_Calloc(1, sizeof(int)); @@ -892,6 +894,15 @@ _PyPegen_run_parser(Parser *p) return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement"); } + Py_ssize_t i; + for (i = 0; i < p->max_subn; i++) { + expr_ty last = p->template_subs[i]; + if (last) { + assert(last->kind == Template_kind); + last->v.Template.last = 1; + } + } + // test_peg_generator defines _Py_TEST_PEGEN to not call PyAST_Validate() #if defined(Py_DEBUG) && !defined(_Py_TEST_PEGEN) if (p->start_rule == Py_single_input || @@ -903,6 +914,7 @@ _PyPegen_run_parser(Parser *p) } } #endif + return res; } diff --git a/Parser/pegen.h b/Parser/pegen.h index 97ac5da..fb2ba16 100644 --- a/Parser/pegen.h +++ b/Parser/pegen.h @@ -79,7 +79,9 @@ typedef struct { growable_comment_array type_ignore_comments; Token *known_err_token; int level; + expr_ty template_subs[PY_MAX_TEMPLATE_SUBS]; int subn; + int max_subn; int call_invalid_rules; int edalloc; int *restricted; @@ -343,6 +345,9 @@ int _PyPegen_check_barry_as_flufl(Parser *, Token *); int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t); int _PyPegen_add_restricted(Parser *p, Token *t); int _PyPegen_check_restricted(Parser *p, int type); +expr_ty _PyPegen_make_template(Parser *p, int level, Token *t, int, int, int, int, PyArena *); +int _PyPegen_inc_subn(Parser *p); +int _PyPegen_dec_subn(Parser *p, int success); ResultTokenWithMetadata *_PyPegen_check_fstring_conversion(Parser *p, Token *, expr_ty t); ResultTokenWithMetadata *_PyPegen_setup_full_format_spec(Parser *, Token *, asdl_expr_seq *, int, int, int, int, PyArena *); diff --git a/Python/ast.c b/Python/ast.c index 68413e0..b0f453a 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -12,6 +12,8 @@ struct validator { int recursion_depth; /* current recursion depth */ int recursion_limit; /* recursion limit */ + + int templatesub_depth; /* current depth for template substitution */ }; static int validate_stmts(struct validator *, asdl_stmt_seq *); @@ -304,20 +306,24 @@ validate_expr(struct validator *state, expr_ty exp, expr_context_ty ctx) case Set_kind: ret = validate_exprs(state, exp->v.Set.elts, Load, 0); break; -#define COMP(NAME) \ +#define COMP(NAME, DO_TEMPLATE) \ case NAME ## _kind: \ + if (DO_TEMPLATE) state->templatesub_depth++; \ ret = validate_comprehension(state, exp->v.NAME.generators) && \ validate_expr(state, exp->v.NAME.elt, Load); \ + if (DO_TEMPLATE) state->templatesub_depth--; \ break; - COMP(ListComp) - COMP(TupleComp) - COMP(SetComp) - COMP(GeneratorExp) + COMP(ListComp, 1) + COMP(TupleComp, 1) + COMP(SetComp, 1) + COMP(GeneratorExp, 0) #undef COMP case DictComp_kind: + state->templatesub_depth++; ret = validate_comprehension(state, exp->v.DictComp.generators) && (!exp->v.DictComp.key || validate_expr(state, exp->v.DictComp.key, Load)) && validate_expr(state, exp->v.DictComp.value, Load); + state->templatesub_depth--; break; case Yield_kind: ret = !exp->v.Yield.value || validate_expr(state, exp->v.Yield.value, Load); @@ -390,8 +396,13 @@ validate_expr(struct validator *state, expr_ty exp, expr_context_ty ctx) ret = validate_expr(state, exp->v.NamedExpr.value, Load); break; case Composition_kind: - ret = validate_expr(state, exp->v.Composition.arg, Load) && - validate_expr(state, exp->v.Composition.func, Load); + ret = validate_expr(state, exp->v.Composition.arg, Load); + + if (ret) { + state->templatesub_depth++; + ret = validate_expr(state, exp->v.Composition.func, Load); + state->templatesub_depth--; + } break; case CompoundExpr_kind: ret = validate_stmt(state, exp->v.CompoundExpr.value); @@ -402,8 +413,15 @@ validate_expr(struct validator *state, expr_ty exp, expr_context_ty ctx) case ExprTarget_kind: ret = validate_expr(state, exp->v.ExprTarget.value, Load); break; - /* This last case doesn't have any checking. */ case Template_kind: + if (exp->v.Template.level >= state->templatesub_depth) { + PyErr_SetString(PyExc_SyntaxError, + "template index out of range"); + return 0; + } + ret = 1; + break; + /* This last case doesn't have any checking. */ case Name_kind: ret = 1; break; @@ -1089,6 +1107,7 @@ _PyAST_Validate(mod_ty mod) starting_recursion_depth = recursion_depth * COMPILER_STACK_FRAME_SCALE; state.recursion_depth = starting_recursion_depth; state.recursion_limit = C_RECURSION_LIMIT * COMPILER_STACK_FRAME_SCALE; + state.templatesub_depth = 0; switch (mod->kind) { case Module_kind: @@ -1112,6 +1131,8 @@ _PyAST_Validate(mod_ty mod) return 0; } + assert(state.templatesub_depth == 0); + /* Check that the recursion depth counting balanced correctly */ if (res && state.recursion_depth != starting_recursion_depth) { PyErr_Format(PyExc_SystemError, diff --git a/Python/ast_opt.c b/Python/ast_opt.c index 039d6bb..38eef40 100644 --- a/Python/ast_opt.c +++ b/Python/ast_opt.c @@ -1,1472 +1,1435 @@ -/* AST Optimizer */ -#include "Python.h" -#include "pycore_ast.h" // _PyAST_GetDocString() -#include "pycore_format.h" // F_LJUST -#include "pycore_long.h" // _PyLong -#include "pycore_pystate.h" // _PyThreadState_GET() -#include "pycore_setobject.h" // _PySet_NextEntry() -#include "pycore_dict.h" // _PyFrozenDict_FromItems() - - -typedef struct { - expr_ty sub; - expr_ty last; -} _comp_entry; - -typedef struct { - _comp_entry *arr; - Py_ssize_t capacity; - Py_ssize_t n; -} _comp_list; - -typedef struct { - int optimize; - int ff_features; - - int recursion_depth; /* current recursion depth */ - int recursion_limit; /* recursion limit */ - - _comp_list *comp_ptr; -} _PyASTOptimizeState; - - -static int -make_const(expr_ty node, PyObject *val, PyArena *arena) -{ - // Even if no new value was calculated, make_const may still - // need to clear an error (e.g. for division by zero) - if (val == NULL) { - if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) { - return 0; - } - PyErr_Clear(); - return 1; - } - if (_PyArena_AddPyObject(arena, val) < 0) { - Py_DECREF(val); - return 0; - } - node->kind = Constant_kind; - node->v.Constant.kind = NULL; - node->v.Constant.value = val; - return 1; -} - -#define COPY_NODE(TO, FROM) (memcpy((TO), (FROM), sizeof(struct _expr))) - -static int -has_starred(asdl_expr_seq *elts) -{ - Py_ssize_t n = asdl_seq_LEN(elts); - for (Py_ssize_t i = 0; i < n; i++) { - expr_ty e = (expr_ty)asdl_seq_GET(elts, i); - if (e->kind == Starred_kind) { - return 1; - } - } - return 0; -} - - -static PyObject* -unary_not(PyObject *v) -{ - int r = PyObject_IsTrue(v); - if (r < 0) - return NULL; - return PyBool_FromLong(!r); -} - -static int -fold_unaryop(expr_ty node, PyArena *arena, _PyASTOptimizeState *state) -{ - expr_ty arg = node->v.UnaryOp.operand; - - if (arg->kind != Constant_kind) { - /* Fold not into comparison */ - if (node->v.UnaryOp.op == Not && arg->kind == Compare_kind && - asdl_seq_LEN(arg->v.Compare.ops) == 1) { - /* Eq and NotEq are often implemented in terms of one another, so - folding not (self == other) into self != other breaks implementation - of !=. Detecting such cases doesn't seem worthwhile. - Python uses for 'is subset'/'is superset' operations on sets. - They don't satisfy not folding laws. */ - cmpop_ty op = asdl_seq_GET(arg->v.Compare.ops, 0); - switch (op) { - case Is: - op = IsNot; - break; - case IsNot: - op = Is; - break; - case In: - op = NotIn; - break; - case NotIn: - op = In; - break; - case IsIn: - op = IsNotIn; - break; - case IsNotIn: - op = IsIn; - break; - // The remaining comparison operators can't be safely inverted - case Eq: - case NotEq: - case Lt: - case LtE: - case Gt: - case GtE: - op = 0; // The AST enums leave "0" free as an "unused" marker - break; - // No default case, so the compiler will emit a warning if new - // comparison operators are added without being handled here - } - if (op) { - asdl_seq_SET(arg->v.Compare.ops, 0, op); - COPY_NODE(node, arg); - return 1; - } - } - return 1; - } - - typedef PyObject *(*unary_op)(PyObject*); - static const unary_op ops[] = { - [Invert] = PyNumber_Invert, - [Not] = unary_not, - [UAdd] = PyNumber_Positive, - [USub] = PyNumber_Negative, - }; - PyObject *newval = ops[node->v.UnaryOp.op](arg->v.Constant.value); - return make_const(node, newval, arena); -} - -/* Check whether a collection doesn't containing too much items (including - subcollections). This protects from creating a constant that needs - too much time for calculating a hash. - "limit" is the maximal number of items. - Returns the negative number if the total number of items exceeds the - limit. Otherwise returns the limit minus the total number of items. -*/ - -static Py_ssize_t -check_complexity(PyObject *obj, Py_ssize_t limit) -{ - if (PyTuple_Check(obj)) { - Py_ssize_t i; - limit -= PyTuple_GET_SIZE(obj); - for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) { - limit = check_complexity(PyTuple_GET_ITEM(obj, i), limit); - } - return limit; - } - else if (PyFrozenSet_Check(obj)) { - Py_ssize_t i = 0; - PyObject *item; - Py_hash_t hash; - limit -= PySet_GET_SIZE(obj); - while (limit >= 0 && _PySet_NextEntry(obj, &i, &item, &hash)) { - limit = check_complexity(item, limit); - } - } - return limit; -} - -#define MAX_INT_SIZE 128 /* bits */ -#define MAX_COLLECTION_SIZE 256 /* items */ -#define MAX_STR_SIZE 4096 /* characters */ -#define MAX_TOTAL_ITEMS 1024 /* including nested collections */ - -static PyObject * -safe_multiply(PyObject *v, PyObject *w) -{ - if (PyLong_Check(v) && PyLong_Check(w) && - !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w) - ) { - size_t vbits = _PyLong_NumBits(v); - size_t wbits = _PyLong_NumBits(w); - if (vbits == (size_t)-1 || wbits == (size_t)-1) { - return NULL; - } - if (vbits + wbits > MAX_INT_SIZE) { - return NULL; - } - } - else if (PyLong_Check(v) && (PyTuple_Check(w) || PyFrozenSet_Check(w))) { - Py_ssize_t size = PyTuple_Check(w) ? PyTuple_GET_SIZE(w) : - PySet_GET_SIZE(w); - if (size) { - long n = PyLong_AsLong(v); - if (n < 0 || n > MAX_COLLECTION_SIZE / size) { - return NULL; - } - if (n && check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) { - return NULL; - } - } - } - else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) { - Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) : - PyBytes_GET_SIZE(w); - if (size) { - long n = PyLong_AsLong(v); - if (n < 0 || n > MAX_STR_SIZE / size) { - return NULL; - } - } - } - else if (PyLong_Check(w) && - (PyTuple_Check(v) || PyFrozenSet_Check(v) || - PyUnicode_Check(v) || PyBytes_Check(v))) - { - return safe_multiply(w, v); - } - - return PyNumber_Multiply(v, w); -} - -static PyObject * -safe_power(PyObject *v, PyObject *w) -{ - if (PyLong_Check(v) && PyLong_Check(w) && - !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w) - ) { - size_t vbits = _PyLong_NumBits(v); - size_t wbits = PyLong_AsSize_t(w); - if (vbits == (size_t)-1 || wbits == (size_t)-1) { - return NULL; - } - if (vbits > MAX_INT_SIZE / wbits) { - return NULL; - } - } - - return PyNumber_Power(v, w, Py_None); -} - -static PyObject * -safe_lshift(PyObject *v, PyObject *w) -{ - if (PyLong_Check(v) && PyLong_Check(w) && - !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w) - ) { - size_t vbits = _PyLong_NumBits(v); - size_t wbits = PyLong_AsSize_t(w); - if (vbits == (size_t)-1 || wbits == (size_t)-1) { - return NULL; - } - if (wbits > MAX_INT_SIZE || vbits > MAX_INT_SIZE - wbits) { - return NULL; - } - } - - return PyNumber_Lshift(v, w); -} - -static PyObject * -safe_mod(PyObject *v, PyObject *w) -{ - if (PyUnicode_Check(v) || PyBytes_Check(v)) { - return NULL; - } - - return PyNumber_Remainder(v, w); -} - - -static expr_ty -parse_literal(PyObject *fmt, Py_ssize_t *ppos, PyArena *arena) -{ - const void *data = PyUnicode_DATA(fmt); - int kind = PyUnicode_KIND(fmt); - Py_ssize_t size = PyUnicode_GET_LENGTH(fmt); - Py_ssize_t start, pos; - int has_percents = 0; - start = pos = *ppos; - while (pos < size) { - if (PyUnicode_READ(kind, data, pos) != '%') { - pos++; - } - else if (pos+1 < size && PyUnicode_READ(kind, data, pos+1) == '%') { - has_percents = 1; - pos += 2; - } - else { - break; - } - } - *ppos = pos; - if (pos == start) { - return NULL; - } - PyObject *str = PyUnicode_Substring(fmt, start, pos); - /* str = str.replace('%%', '%') */ - if (str && has_percents) { - _Py_DECLARE_STR(percent, "%"); - _Py_DECLARE_STR(dbl_percent, "%%"); - Py_SETREF(str, PyUnicode_Replace(str, &_Py_STR(dbl_percent), - &_Py_STR(percent), -1)); - } - if (!str) { - return NULL; - } - - if (_PyArena_AddPyObject(arena, str) < 0) { - Py_DECREF(str); - return NULL; - } - return _PyAST_Constant(str, NULL, -1, -1, -1, -1, arena); -} - -#define MAXDIGITS 3 - -static int -simple_format_arg_parse(PyObject *fmt, Py_ssize_t *ppos, - int *spec, int *flags, int *width, int *prec) -{ - Py_ssize_t pos = *ppos, len = PyUnicode_GET_LENGTH(fmt); - Py_UCS4 ch; - -#define NEXTC do { \ - if (pos >= len) { \ - return 0; \ - } \ - ch = PyUnicode_READ_CHAR(fmt, pos); \ - pos++; \ -} while (0) - - *flags = 0; - while (1) { - NEXTC; - switch (ch) { - case '-': *flags |= F_LJUST; continue; - case '+': *flags |= F_SIGN; continue; - case ' ': *flags |= F_BLANK; continue; - case '#': *flags |= F_ALT; continue; - case '0': *flags |= F_ZERO; continue; - } - break; - } - if ('0' <= ch && ch <= '9') { - *width = 0; - int digits = 0; - while ('0' <= ch && ch <= '9') { - *width = *width * 10 + (ch - '0'); - NEXTC; - if (++digits >= MAXDIGITS) { - return 0; - } - } - } - - if (ch == '.') { - NEXTC; - *prec = 0; - if ('0' <= ch && ch <= '9') { - int digits = 0; - while ('0' <= ch && ch <= '9') { - *prec = *prec * 10 + (ch - '0'); - NEXTC; - if (++digits >= MAXDIGITS) { - return 0; - } - } - } - } - *spec = ch; - *ppos = pos; - return 1; - -#undef NEXTC -} - -static expr_ty -parse_format(PyObject *fmt, Py_ssize_t *ppos, expr_ty arg, PyArena *arena) -{ - int spec, flags, width = -1, prec = -1; - if (!simple_format_arg_parse(fmt, ppos, &spec, &flags, &width, &prec)) { - // Unsupported format. - return NULL; - } - if (spec == 's' || spec == 'r' || spec == 'a') { - char buf[1 + MAXDIGITS + 1 + MAXDIGITS + 1], *p = buf; - if (!(flags & F_LJUST) && width > 0) { - *p++ = '>'; - } - if (width >= 0) { - p += snprintf(p, MAXDIGITS + 1, "%d", width); - } - if (prec >= 0) { - p += snprintf(p, MAXDIGITS + 2, ".%d", prec); - } - expr_ty format_spec = NULL; - if (p != buf) { - PyObject *str = PyUnicode_FromString(buf); - if (str == NULL) { - return NULL; - } - if (_PyArena_AddPyObject(arena, str) < 0) { - Py_DECREF(str); - return NULL; - } - format_spec = _PyAST_Constant(str, NULL, -1, -1, -1, -1, arena); - if (format_spec == NULL) { - return NULL; - } - } - return _PyAST_FormattedValue(arg, spec, format_spec, - arg->lineno, arg->col_offset, - arg->end_lineno, arg->end_col_offset, - arena); - } - // Unsupported format. - return NULL; -} - -static int -optimize_format(expr_ty node, PyObject *fmt, asdl_expr_seq *elts, PyArena *arena) -{ - Py_ssize_t pos = 0; - Py_ssize_t cnt = 0; - asdl_expr_seq *seq = _Py_asdl_expr_seq_new(asdl_seq_LEN(elts) * 2 + 1, arena); - if (!seq) { - return 0; - } - seq->size = 0; - - while (1) { - expr_ty lit = parse_literal(fmt, &pos, arena); - if (lit) { - asdl_seq_SET(seq, seq->size++, lit); - } - else if (PyErr_Occurred()) { - return 0; - } - - if (pos >= PyUnicode_GET_LENGTH(fmt)) { - break; - } - if (cnt >= asdl_seq_LEN(elts)) { - // More format units than items. - return 1; - } - assert(PyUnicode_READ_CHAR(fmt, pos) == '%'); - pos++; - expr_ty expr = parse_format(fmt, &pos, asdl_seq_GET(elts, cnt), arena); - cnt++; - if (!expr) { - return !PyErr_Occurred(); - } - asdl_seq_SET(seq, seq->size++, expr); - } - if (cnt < asdl_seq_LEN(elts)) { - // More items than format units. - return 1; - } - expr_ty res = _PyAST_JoinedStr(seq, - node->lineno, node->col_offset, - node->end_lineno, node->end_col_offset, - arena); - if (!res) { - return 0; - } - COPY_NODE(node, res); -// PySys_FormatStderr("format = %R\n", fmt); - return 1; -} - -static int -fold_binop(expr_ty node, PyArena *arena, _PyASTOptimizeState *state) -{ - expr_ty lhs, rhs; - lhs = node->v.BinOp.left; - rhs = node->v.BinOp.right; - if (lhs->kind != Constant_kind) { - return 1; - } - PyObject *lv = lhs->v.Constant.value; - - if (node->v.BinOp.op == Mod && - rhs->kind == Tuple_kind && - PyUnicode_Check(lv) && - !has_starred(rhs->v.Tuple.elts)) - { - return optimize_format(node, lv, rhs->v.Tuple.elts, arena); - } - - if (node->v.BinOp.op == Clsc && Py_Is(lv, Py_None)) - { - COPY_NODE(node, rhs); - return 1; - } - - if (rhs->kind != Constant_kind) { - return 1; - } - - PyObject *rv = rhs->v.Constant.value; - PyObject *newval = NULL; - - switch (node->v.BinOp.op) { - case Add: - newval = PyNumber_Add(lv, rv); - break; - case Sub: - newval = PyNumber_Subtract(lv, rv); - break; - case Mult: - newval = safe_multiply(lv, rv); - break; - case Div: - newval = PyNumber_TrueDivide(lv, rv); - break; - case FloorDiv: - newval = PyNumber_FloorDivide(lv, rv); - break; - case Mod: - newval = safe_mod(lv, rv); - break; - case Pow: - newval = safe_power(lv, rv); - break; - case LShift: - newval = safe_lshift(lv, rv); - break; - case RShift: - newval = PyNumber_Rshift(lv, rv); - break; - case BitOr: - newval = PyNumber_Or(lv, rv); - break; - case BitXor: - newval = PyNumber_Xor(lv, rv); - break; - case BitAnd: - newval = PyNumber_And(lv, rv); - break; - // No builtin constants implement the following operators - case MatMult: - return 1; - // No default case, so the compiler will emit a warning if new binary - // operators are added without being handled here - } - - return make_const(node, newval, arena); -} - -static PyObject* -make_const_tuple(asdl_expr_seq *elts) -{ - for (int i = 0; i < asdl_seq_LEN(elts); i++) { - expr_ty e = (expr_ty)asdl_seq_GET(elts, i); - if (e->kind != Constant_kind) { - return NULL; - } - } - - PyObject *newval = PyTuple_New(asdl_seq_LEN(elts)); - if (newval == NULL) { - return NULL; - } - - for (int i = 0; i < asdl_seq_LEN(elts); i++) { - expr_ty e = (expr_ty)asdl_seq_GET(elts, i); - PyObject *v = e->v.Constant.value; - PyTuple_SET_ITEM(newval, i, Py_NewRef(v)); - } - return newval; -} - -static int -fold_tuple(expr_ty node, PyArena *arena, _PyASTOptimizeState *state) -{ - PyObject *newval; - - if (node->v.Tuple.ctx != Load) - return 1; - - newval = make_const_tuple(node->v.Tuple.elts); - return make_const(node, newval, arena); -} - -static int -fold_subscr(expr_ty node, PyArena *arena, _PyASTOptimizeState *state) -{ - PyObject *newval; - expr_ty arg, idx; - - arg = node->v.Subscript.value; - idx = node->v.Subscript.slice; - if (node->v.Subscript.ctx != Load || - arg->kind != Constant_kind || - idx->kind != Constant_kind) - { - return 1; - } - - newval = PyObject_GetItem(arg->v.Constant.value, idx->v.Constant.value); - return make_const(node, newval, arena); -} - -/* Change literal list or set of constants into constant - tuple or frozenset respectively. Change literal list of - non-constants into tuple. - Used for right operand of "in" and "not in" tests and for iterable - in "for" loop and comprehensions. -*/ -static int -fold_iter(expr_ty arg, PyArena *arena, _PyASTOptimizeState *state) -{ - PyObject *newval; - if (arg->kind == List_kind) { - /* First change a list into tuple. */ - asdl_expr_seq *elts = arg->v.List.elts; - if (has_starred(elts)) { - return 1; - } - expr_context_ty ctx = arg->v.List.ctx; - arg->kind = Tuple_kind; - arg->v.Tuple.elts = elts; - arg->v.Tuple.ctx = ctx; - /* Try to create a constant tuple. */ - newval = make_const_tuple(elts); - } - else if (arg->kind == Set_kind) { - newval = make_const_tuple(arg->v.Set.elts); - if (newval) { - Py_SETREF(newval, PyFrozenSet_New(newval)); - } - } - else if (arg->kind == Dict_kind) { - PyObject *keys = make_const_tuple(arg->v.Dict.keys); - if (keys) { - if (PyTuple_GET_SIZE(keys) == 0) { - Py_DECREF(keys); - newval = PyFrozenDict_New(); - } - else { - PyObject *values = make_const_tuple(arg->v.Dict.values); - if (values) { - newval = _PyFrozenDict_FromItems( - &PyTuple_GET_ITEM(keys, 0), 1, - &PyTuple_GET_ITEM(values, 0), 1, - PyTuple_GET_SIZE(keys) - ); - Py_DECREF(keys); - Py_DECREF(values); - } - else { - PyErr_Clear(); - Py_DECREF(keys); - return 1; - } - } - } - } - else { - return 1; - } - return make_const(arg, newval, arena); -} - -static int -fold_compare(expr_ty node, PyArena *arena, _PyASTOptimizeState *state) -{ - asdl_int_seq *ops; - asdl_expr_seq *args; - Py_ssize_t i; - - ops = node->v.Compare.ops; - args = node->v.Compare.comparators; - /* Change literal list or set in 'in' or 'not in' into - tuple or frozenset respectively. */ - i = asdl_seq_LEN(ops) - 1; - int op = asdl_seq_GET(ops, i); - if (op == In || op == NotIn || op == IsIn || op == IsNotIn) { - if (!fold_iter((expr_ty)asdl_seq_GET(args, i), arena, state)) { - return 0; - } - } - return 1; -} - -static int -fold_slice(expr_ty node, PyArena *arena, _PyASTOptimizeState *state) -{ - PyObject *olower = NULL; - PyObject *oupper = NULL; - PyObject *ostep = NULL; - expr_ty lower = node->v.Slice.lower; - expr_ty upper = node->v.Slice.upper; - expr_ty step = node->v.Slice.step; - - if (lower && lower->kind != Constant_kind || - upper && upper->kind != Constant_kind || - step && step->kind != Constant_kind) - { - return 1; - } - - if (lower) { - olower = lower->v.Constant.value; - } - - if (upper) { - oupper = upper->v.Constant.value; - } - - if (step) { - ostep = step->v.Constant.value; - } - - PyObject *res = PySlice_New(olower, oupper, ostep); - if (res == NULL) { - if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) { - return 0; - } - PyErr_Clear(); - return 1; - } - - if (PyObject_Hash(res) == -1) { - PyErr_Clear(); - return 1; - } - - return make_const(node, res, arena); -} - -static int astfold_mod(mod_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); -static int astfold_stmt(stmt_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); -static int astfold_expr(expr_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); -static int astfold_arguments(arguments_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); -static int astfold_comprehension(comprehension_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); -static int astfold_keyword(keyword_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); -static int astfold_arg(arg_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); -static int astfold_withitem(withitem_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); -static int astfold_excepthandler(excepthandler_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); -static int astfold_switch_case(switch_case_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); -static int astfold_match_case(match_case_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); -static int astfold_pattern(pattern_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); -static int astfold_type_param(type_param_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); - -#define CALL(FUNC, TYPE, ARG) \ - if (!FUNC((ARG), ctx_, state)) \ - return 0; - -#define CALL_OPT(FUNC, TYPE, ARG) \ - if ((ARG) != NULL && !FUNC((ARG), ctx_, state)) \ - return 0; - -#define CALL_SEQ(FUNC, TYPE, ARG) { \ - int i; \ - asdl_ ## TYPE ## _seq *seq = (ARG); /* avoid variable capture */ \ - for (i = 0; i < asdl_seq_LEN(seq); i++) { \ - TYPE ## _ty elt = (TYPE ## _ty)asdl_seq_GET(seq, i); \ - if (elt != NULL && !FUNC(elt, ctx_, state)) \ - return 0; \ - } \ -} - -static inline _comp_entry * -add_comp_entry(_comp_list *ptr, expr_ty sub) -{ - assert(ptr && ptr->arr); - if (ptr->n == ptr->capacity) { - _comp_entry *new_arr = PyMem_Realloc(ptr->arr, 2 * ptr->capacity); - if (new_arr == NULL) { - PyErr_NoMemory(); - return NULL; - } - ptr->arr = new_arr; - ptr->capacity *= 2; - } - - _comp_entry *entry = &ptr->arr[ptr->n++]; - entry->sub = sub; - entry->last = NULL; - return entry; -} - -static int -fold_comp(expr_ty node, PyArena *ctx_, _PyASTOptimizeState *state) -{ - expr_ty arg; - expr_ty func; - int constant = 1; - - assert(node->kind == Composition_kind); - - CALL(astfold_expr, expr_ty, node->v.Composition.arg); - - arg = node->v.Composition.arg; - if (arg->kind != Constant_kind) { - constant = 0; - } - - _comp_entry *entry = add_comp_entry(state->comp_ptr, constant ? arg : NULL); - if (entry == NULL) { - return 0; - } - - func = node->v.Composition.func; - CALL(astfold_expr, expr_ty, func); - - if (!entry->last) { - if (constant) { - asdl_expr_seq *seq = - (asdl_expr_seq*)_Py_asdl_generic_seq_new(1, ctx_); - if (!seq) { - return 0; - } - asdl_seq_SET(seq, 0, arg); - node->kind = Call_kind; - node->v.Call.func = func; - node->v.Call.args = seq; - node->v.Call.keywords = NULL; - node->v.Call.aware = 0; - } - } - else { - if (constant) { - COPY_NODE(node, func); - CALL(astfold_expr, expr_ty, node); /* second pass */ - } - else { - entry->last->v.Template.last = 1; - node->v.Composition.has_templates = 1; - } - } - - state->comp_ptr->n--; - return 1; -} - -static int -fold_compassign(stmt_ty node, PyArena *ctx_, _PyASTOptimizeState *state) -{ - assert(node->kind == AugAssign_kind); - assert(node->v.AugAssign.value); - - _comp_entry *entry = add_comp_entry(state->comp_ptr, NULL); - if (entry == NULL) { - return 0; - } - - CALL(astfold_expr, expr_ty, node->v.AugAssign.value); - - if (!entry->last) { - node->v.AugAssign.op = CompCall; - } - else { - entry->last->v.Template.last = 1; - } - - state->comp_ptr->n--; - return 1; -} - -static int -astfold_body(asdl_stmt_seq *stmts, PyArena *ctx_, _PyASTOptimizeState *state) -{ - int docstring = _PyAST_GetDocString(stmts) != NULL; - CALL_SEQ(astfold_stmt, stmt, stmts); - if (!docstring && _PyAST_GetDocString(stmts) != NULL) { - stmt_ty st = (stmt_ty)asdl_seq_GET(stmts, 0); - asdl_expr_seq *values = _Py_asdl_expr_seq_new(1, ctx_); - if (!values) { - return 0; - } - asdl_seq_SET(values, 0, st->v.Expr.value); - expr_ty expr = _PyAST_JoinedStr(values, st->lineno, st->col_offset, - st->end_lineno, st->end_col_offset, - ctx_); - if (!expr) { - return 0; - } - st->v.Expr.value = expr; - } - return 1; -} - -static int -astfold_mod(mod_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) -{ - switch (node_->kind) { - case Module_kind: - CALL(astfold_body, asdl_seq, node_->v.Module.body); - break; - case Interactive_kind: - CALL_SEQ(astfold_stmt, stmt, node_->v.Interactive.body); - break; - case Expression_kind: - CALL(astfold_expr, expr_ty, node_->v.Expression.body); - break; - // The following top level nodes don't participate in constant folding - case FunctionType_kind: - break; - // No default case, so the compiler will emit a warning if new top level - // compilation nodes are added without being handled here - } - return 1; -} - -static int -astfold_expr(expr_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) -{ - _comp_entry *entry; - - if (++state->recursion_depth > state->recursion_limit) { - PyErr_SetString(PyExc_RecursionError, - "maximum recursion depth exceeded during compilation"); - return 0; - } - switch (node_->kind) { - case BoolOp_kind: - CALL_SEQ(astfold_expr, expr, node_->v.BoolOp.values); - break; - case BinOp_kind: - CALL(astfold_expr, expr_ty, node_->v.BinOp.left); - CALL(astfold_expr, expr_ty, node_->v.BinOp.right); - CALL(fold_binop, expr_ty, node_); - break; - case UnaryOp_kind: - CALL(astfold_expr, expr_ty, node_->v.UnaryOp.operand); - CALL(fold_unaryop, expr_ty, node_); - break; - case Lambda_kind: - CALL(astfold_arguments, arguments_ty, node_->v.Lambda.args); - CALL(astfold_expr, expr_ty, node_->v.Lambda.body); - break; - case IfExp_kind: - CALL(astfold_expr, expr_ty, node_->v.IfExp.test); - CALL(astfold_expr, expr_ty, node_->v.IfExp.body); - CALL(astfold_expr, expr_ty, node_->v.IfExp.orelse); - break; - case Dict_kind: - CALL_SEQ(astfold_expr, expr, node_->v.Dict.keys); - CALL_SEQ(astfold_expr, expr, node_->v.Dict.values); - break; - case Set_kind: - CALL_SEQ(astfold_expr, expr, node_->v.Set.elts); - break; - case ListComp_kind: - entry = add_comp_entry(state->comp_ptr, NULL); - if (entry == NULL) { - return 0; - } - CALL(astfold_expr, expr_ty, node_->v.ListComp.elt); - CALL_SEQ(astfold_comprehension, comprehension, node_->v.ListComp.generators); - state->comp_ptr->n--; - break; - case TupleComp_kind: - entry = add_comp_entry(state->comp_ptr, NULL); - if (entry == NULL) { - return 0; - } - CALL(astfold_expr, expr_ty, node_->v.TupleComp.elt); - CALL_SEQ(astfold_comprehension, comprehension, node_->v.TupleComp.generators); - state->comp_ptr->n--; - break; - case SetComp_kind: - entry = add_comp_entry(state->comp_ptr, NULL); - if (entry == NULL) { - return 0; - } - CALL(astfold_expr, expr_ty, node_->v.SetComp.elt); - CALL_SEQ(astfold_comprehension, comprehension, node_->v.SetComp.generators); - state->comp_ptr->n--; - break; - case DictComp_kind: - entry = add_comp_entry(state->comp_ptr, NULL); - if (entry == NULL) { - return 0; - } - CALL_OPT(astfold_expr, expr_ty, node_->v.DictComp.key); - CALL(astfold_expr, expr_ty, node_->v.DictComp.value); - CALL_SEQ(astfold_comprehension, comprehension, node_->v.DictComp.generators); - state->comp_ptr->n--; - break; - case GeneratorExp_kind: - CALL(astfold_expr, expr_ty, node_->v.GeneratorExp.elt); - CALL_SEQ(astfold_comprehension, comprehension, node_->v.GeneratorExp.generators); - break; - case Await_kind: - CALL(astfold_expr, expr_ty, node_->v.Await.value); - break; - case Yield_kind: - CALL_OPT(astfold_expr, expr_ty, node_->v.Yield.value); - break; - case YieldFrom_kind: - CALL(astfold_expr, expr_ty, node_->v.YieldFrom.value); - break; - case Compare_kind: - CALL(astfold_expr, expr_ty, node_->v.Compare.left); - CALL_SEQ(astfold_expr, expr, node_->v.Compare.comparators); - CALL(fold_compare, expr_ty, node_); - break; -#define NONE_CHECK(K, F) \ - if (node_->v.K.aware) { \ - expr_ty e = node_->v.K.F; \ - if (e->kind == Constant_kind && \ - Py_Is(e->v.Constant.value, Py_None)) \ - { \ - COPY_NODE(node_, e); \ - break; \ - } \ - } - - case Call_kind: - CALL(astfold_expr, expr_ty, node_->v.Call.func); - NONE_CHECK(Call, func) - CALL_SEQ(astfold_expr, expr, node_->v.Call.args); - CALL_SEQ(astfold_keyword, keyword, node_->v.Call.keywords); - break; - case FormattedValue_kind: - CALL(astfold_expr, expr_ty, node_->v.FormattedValue.value); - CALL_OPT(astfold_expr, expr_ty, node_->v.FormattedValue.format_spec); - break; - case JoinedStr_kind: - CALL_SEQ(astfold_expr, expr, node_->v.JoinedStr.values); - break; - case Attribute_kind: - CALL(astfold_expr, expr_ty, node_->v.Attribute.value); - NONE_CHECK(Attribute, value) - break; - case Subscript_kind: - CALL(astfold_expr, expr_ty, node_->v.Subscript.value); - NONE_CHECK(Subscript, value) - CALL(astfold_expr, expr_ty, node_->v.Subscript.slice); - CALL(fold_subscr, expr_ty, node_); - break; - case Starred_kind: - CALL(astfold_expr, expr_ty, node_->v.Starred.value); - break; - case Slice_kind: - CALL_OPT(astfold_expr, expr_ty, node_->v.Slice.lower); - CALL_OPT(astfold_expr, expr_ty, node_->v.Slice.upper); - CALL_OPT(astfold_expr, expr_ty, node_->v.Slice.step); - CALL(fold_slice, expr_ty, node_); - break; - case List_kind: - CALL_SEQ(astfold_expr, expr, node_->v.List.elts); - break; - case Tuple_kind: - CALL_SEQ(astfold_expr, expr, node_->v.Tuple.elts); - CALL(fold_tuple, expr_ty, node_); - break; - case Name_kind: - if (node_->v.Name.ctx == Load && - _PyUnicode_EqualToASCIIString(node_->v.Name.id, "__debug__")) { - state->recursion_depth--; - return make_const(node_, PyBool_FromLong(!state->optimize), ctx_); - } - break; - case NamedExpr_kind: - CALL(astfold_expr, expr_ty, node_->v.NamedExpr.value); - break; - case Composition_kind: - CALL(fold_comp, expr_ty, node_); - break; - case CompoundExpr_kind: - CALL(astfold_stmt, stmt_ty, node_->v.CompoundExpr.value); - break; - case BlockExpr_kind: - CALL_SEQ(astfold_stmt, stmt, node_->v.BlockExpr.body); - break; - case ExprTarget_kind: - CALL(astfold_expr, expr_ty, node_->v.ExprTarget.value); - case Template_kind: - { - assert(node_->v.Template.level < state->comp_ptr->n); - - _comp_entry *entry = - &state->comp_ptr->arr[state->comp_ptr->n - node_->v.Template.level - 1]; - if (entry->sub) { - COPY_NODE(node_, entry->sub); - } - entry->last = node_; - break; - } - case Constant_kind: - // nothing further to do - break; - // No default case, so the compiler will emit a warning if new expression - // kinds are added without being handled here - } - state->recursion_depth--; - return 1; -} - -static int -astfold_keyword(keyword_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) -{ - CALL_OPT(astfold_expr, expr_ty, node_->value); - return 1; -} - -static int -astfold_comprehension(comprehension_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) -{ - CALL(astfold_expr, expr_ty, node_->target); - CALL(astfold_expr, expr_ty, node_->iter); - CALL_SEQ(astfold_expr, expr, node_->ifs); - - CALL(fold_iter, expr_ty, node_->iter); - return 1; -} - -static int -astfold_arguments(arguments_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) -{ - CALL_SEQ(astfold_arg, arg, node_->posonlyargs); - CALL_SEQ(astfold_arg, arg, node_->args); - CALL_OPT(astfold_arg, arg_ty, node_->vararg); - CALL_SEQ(astfold_arg, arg, node_->kwonlyargs); - CALL_SEQ(astfold_expr, expr, node_->kw_defaults); - CALL_OPT(astfold_arg, arg_ty, node_->kwarg); - CALL_SEQ(astfold_expr, expr, node_->defaults); - return 1; -} - -static int -astfold_arg(arg_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) -{ - if (!(state->ff_features & CO_FUTURE_ANNOTATIONS)) { - CALL_OPT(astfold_expr, expr_ty, node_->annotation); - } - return 1; -} - -static int -astfold_stmt(stmt_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) -{ - if (++state->recursion_depth > state->recursion_limit) { - PyErr_SetString(PyExc_RecursionError, - "maximum recursion depth exceeded during compilation"); - return 0; - } - switch (node_->kind) { - case FunctionDef_kind: - CALL_SEQ(astfold_type_param, type_param, node_->v.FunctionDef.type_params); - CALL(astfold_arguments, arguments_ty, node_->v.FunctionDef.args); - CALL(astfold_body, asdl_seq, node_->v.FunctionDef.body); - CALL_SEQ(astfold_expr, expr, node_->v.FunctionDef.decorator_list); - if (!(state->ff_features & CO_FUTURE_ANNOTATIONS)) { - CALL_OPT(astfold_expr, expr_ty, node_->v.FunctionDef.returns); - } - break; - case AsyncFunctionDef_kind: - CALL_SEQ(astfold_type_param, type_param, node_->v.AsyncFunctionDef.type_params); - CALL(astfold_arguments, arguments_ty, node_->v.AsyncFunctionDef.args); - CALL(astfold_body, asdl_seq, node_->v.AsyncFunctionDef.body); - CALL_SEQ(astfold_expr, expr, node_->v.AsyncFunctionDef.decorator_list); - if (!(state->ff_features & CO_FUTURE_ANNOTATIONS)) { - CALL_OPT(astfold_expr, expr_ty, node_->v.AsyncFunctionDef.returns); - } - break; - case ClassDef_kind: - CALL_SEQ(astfold_type_param, type_param, node_->v.ClassDef.type_params); - CALL_SEQ(astfold_expr, expr, node_->v.ClassDef.bases); - CALL_SEQ(astfold_keyword, keyword, node_->v.ClassDef.keywords); - CALL(astfold_body, asdl_seq, node_->v.ClassDef.body); - CALL_SEQ(astfold_expr, expr, node_->v.ClassDef.decorator_list); - break; - case Return_kind: - CALL_OPT(astfold_expr, expr_ty, node_->v.Return.value); - break; - case Delete_kind: - CALL_SEQ(astfold_expr, expr, node_->v.Delete.targets); - break; - case Assign_kind: - CALL_SEQ(astfold_expr, expr, node_->v.Assign.targets); - CALL(astfold_expr, expr_ty, node_->v.Assign.value); - break; - case AugAssign_kind: - CALL(astfold_expr, expr_ty, node_->v.AugAssign.target); - CALL_OPT(astfold_expr, expr_ty, node_->v.AugAssign.value); - if (node_->v.AugAssign.op == Comp && node_->v.AugAssign.value) { - CALL(fold_compassign, expr_ty, node_); - } - break; - case AnnAssign_kind: - CALL(astfold_expr, expr_ty, node_->v.AnnAssign.target); - if (!(state->ff_features & CO_FUTURE_ANNOTATIONS)) { - CALL(astfold_expr, expr_ty, node_->v.AnnAssign.annotation); - } - CALL_OPT(astfold_expr, expr_ty, node_->v.AnnAssign.value); - break; - case TypeAlias_kind: - CALL(astfold_expr, expr_ty, node_->v.TypeAlias.name); - CALL_SEQ(astfold_type_param, type_param, node_->v.TypeAlias.type_params); - CALL(astfold_expr, expr_ty, node_->v.TypeAlias.value); - break; - case For_kind: - CALL(astfold_expr, expr_ty, node_->v.For.target); - CALL(astfold_expr, expr_ty, node_->v.For.iter); - CALL_SEQ(astfold_stmt, stmt, node_->v.For.body); - CALL_SEQ(astfold_stmt, stmt, node_->v.For.orelse); - - CALL(fold_iter, expr_ty, node_->v.For.iter); - break; - case AsyncFor_kind: - CALL(astfold_expr, expr_ty, node_->v.AsyncFor.target); - CALL(astfold_expr, expr_ty, node_->v.AsyncFor.iter); - CALL_SEQ(astfold_stmt, stmt, node_->v.AsyncFor.body); - CALL_SEQ(astfold_stmt, stmt, node_->v.AsyncFor.orelse); - break; - case While_kind: - CALL(astfold_expr, expr_ty, node_->v.While.test); - CALL_SEQ(astfold_stmt, stmt, node_->v.While.body); - CALL_SEQ(astfold_stmt, stmt, node_->v.While.orelse); - break; - case If_kind: - CALL(astfold_expr, expr_ty, node_->v.If.test); - CALL_SEQ(astfold_stmt, stmt, node_->v.If.body); - CALL_SEQ(astfold_stmt, stmt, node_->v.If.orelse); - break; - case With_kind: - CALL_SEQ(astfold_withitem, withitem, node_->v.With.items); - CALL_SEQ(astfold_stmt, stmt, node_->v.With.body); - break; - case AsyncWith_kind: - CALL_SEQ(astfold_withitem, withitem, node_->v.AsyncWith.items); - CALL_SEQ(astfold_stmt, stmt, node_->v.AsyncWith.body); - break; - case Raise_kind: - CALL_OPT(astfold_expr, expr_ty, node_->v.Raise.exc); - CALL_OPT(astfold_expr, expr_ty, node_->v.Raise.cause); - break; - case Try_kind: - CALL_SEQ(astfold_stmt, stmt, node_->v.Try.body); - CALL_SEQ(astfold_excepthandler, excepthandler, node_->v.Try.handlers); - CALL_SEQ(astfold_stmt, stmt, node_->v.Try.orelse); - CALL_SEQ(astfold_stmt, stmt, node_->v.Try.finalbody); - break; - case TryStar_kind: - CALL_SEQ(astfold_stmt, stmt, node_->v.TryStar.body); - CALL_SEQ(astfold_excepthandler, excepthandler, node_->v.TryStar.handlers); - CALL_SEQ(astfold_stmt, stmt, node_->v.TryStar.orelse); - CALL_SEQ(astfold_stmt, stmt, node_->v.TryStar.finalbody); - break; - case Assert_kind: - CALL(astfold_expr, expr_ty, node_->v.Assert.test); - CALL_OPT(astfold_expr, expr_ty, node_->v.Assert.msg); - break; - case Expr_kind: - CALL(astfold_expr, expr_ty, node_->v.Expr.value); - break; - case Switch_kind: - CALL(astfold_expr, expr_ty, node_->v.Switch.subject); - CALL_SEQ(astfold_switch_case, switch_case, node_->v.Switch.cases); - break; - case Match_kind: - CALL(astfold_expr, expr_ty, node_->v.Match.subject); - CALL_SEQ(astfold_match_case, match_case, node_->v.Match.cases); - break; - // The following statements don't contain any subexpressions to be folded - case Import_kind: - case ImportFrom_kind: - case Global_kind: - case Nonlocal_kind: - case Goto_kind: - case Label_kind: - case Pass_kind: - case Break_kind: - case Continue_kind: - break; - // No default case, so the compiler will emit a warning if new statement - // kinds are added without being handled here - } - state->recursion_depth--; - return 1; -} - -static int -astfold_excepthandler(excepthandler_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) -{ - switch (node_->kind) { - case ExceptHandler_kind: - CALL_OPT(astfold_expr, expr_ty, node_->v.ExceptHandler.type); - CALL_SEQ(astfold_stmt, stmt, node_->v.ExceptHandler.body); - break; - // No default case, so the compiler will emit a warning if new handler - // kinds are added without being handled here - } - return 1; -} - -static int -astfold_withitem(withitem_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) -{ - CALL(astfold_expr, expr_ty, node_->context_expr); - CALL_OPT(astfold_expr, expr_ty, node_->optional_vars); - return 1; -} - -static int -astfold_pattern(pattern_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) -{ - // Currently, this is really only used to form complex/negative numeric - // constants in MatchValue and MatchMapping nodes - // We still recurse into all subexpressions and subpatterns anyway - if (++state->recursion_depth > state->recursion_limit) { - PyErr_SetString(PyExc_RecursionError, - "maximum recursion depth exceeded during compilation"); - return 0; - } - switch (node_->kind) { - case MatchValue_kind: - CALL(astfold_expr, expr_ty, node_->v.MatchValue.value); - break; - case MatchSingleton_kind: - break; - case MatchSequence_kind: - CALL_SEQ(astfold_pattern, pattern, node_->v.MatchSequence.patterns); - break; - case MatchMapping_kind: - CALL_SEQ(astfold_expr, expr, node_->v.MatchMapping.keys); - CALL_SEQ(astfold_pattern, pattern, node_->v.MatchMapping.patterns); - break; - case MatchClass_kind: - CALL(astfold_expr, expr_ty, node_->v.MatchClass.cls); - CALL_SEQ(astfold_pattern, pattern, node_->v.MatchClass.patterns); - CALL_SEQ(astfold_pattern, pattern, node_->v.MatchClass.kwd_patterns); - break; - case MatchStar_kind: - break; - case MatchAs_kind: - if (node_->v.MatchAs.pattern) { - CALL(astfold_pattern, pattern_ty, node_->v.MatchAs.pattern); - } - break; - case MatchOr_kind: - CALL_SEQ(astfold_pattern, pattern, node_->v.MatchOr.patterns); - break; - // No default case, so the compiler will emit a warning if new pattern - // kinds are added without being handled here - } - state->recursion_depth--; - return 1; -} - -static int -astfold_match_case(match_case_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) -{ - CALL(astfold_pattern, expr_ty, node_->pattern); - CALL_OPT(astfold_expr, expr_ty, node_->guard); - CALL_SEQ(astfold_stmt, stmt, node_->body); - return 1; -} - -static int -astfold_switch_case(switch_case_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) -{ - CALL_SEQ(astfold_expr, expr, node_->patterns); - CALL_SEQ(astfold_stmt, stmt, node_->body); - return 1; -} - -static int -astfold_switch(stmt_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) -{ - Py_ssize_t i, j; - PyObject *subj; - expr_ty subject_expr; - assert(node_->kind == Switch_kind); - - subject_expr = node_->v.Switch.subject; - if (subject_expr->kind != Constant_kind) { - return 1; - } - subj = subject_expr->v.Constant.value; - - for (i = 0; i < asdl_seq_LEN(node_->v.Switch.cases); i++) { - switch_case_ty s = asdl_seq_GET(node_->v.Switch.cases, i); - for (j = 0; j < asdl_seq_LEN(s->patterns); j++) { - expr_ty e = asdl_seq_GET(s->patterns, j); - if (e->kind != Constant_kind) { - return 1; - } - if (!PyLong_CheckExact(e->v.Constant.value)) { - continue; - } - int res = PyObject_RichCompareBool(subj, e->v.Constant.value, Py_EQ); - if (res < 0) { - PyErr_Clear(); - continue; - } - if (res) { - COPY_NODE(node_, e); - } - } - } - - return 1; -} - -static int -astfold_type_param(type_param_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) -{ - switch (node_->kind) { - case TypeVar_kind: - CALL_OPT(astfold_expr, expr_ty, node_->v.TypeVar.bound); - break; - case ParamSpec_kind: - break; - case TypeVarTuple_kind: - break; - } - return 1; -} - -#undef CALL -#undef CALL_OPT -#undef CALL_SEQ - -/* See comments in symtable.c. */ -#define COMPILER_STACK_FRAME_SCALE 2 - -int -_PyAST_Optimize(mod_ty mod, PyArena *arena, int optimize, int ff_features) -{ - PyThreadState *tstate; - int starting_recursion_depth; - - _PyASTOptimizeState state; - state.optimize = optimize; - state.ff_features = ff_features; - - /* Setup recursion depth check counters */ - tstate = _PyThreadState_GET(); - if (!tstate) { - return 0; - } - /* Be careful here to prevent overflow. */ - int recursion_depth = C_RECURSION_LIMIT - tstate->c_recursion_remaining; - starting_recursion_depth = recursion_depth * COMPILER_STACK_FRAME_SCALE; - state.recursion_depth = starting_recursion_depth; - state.recursion_limit = C_RECURSION_LIMIT * COMPILER_STACK_FRAME_SCALE; - state.comp_ptr = (_comp_list *)PyMem_Malloc(sizeof(_comp_list)); if (state.comp_ptr == NULL) { - PyErr_NoMemory(); return 0; } state.comp_ptr->arr = (_comp_entry *)PyMem_Malloc(2 * sizeof(_comp_entry)); if (state.comp_ptr->arr == NULL) { - PyErr_NoMemory(); return 0; } state.comp_ptr->capacity = 2; state.comp_ptr->n = 0; - - int ret = astfold_mod(mod, arena, &state); - assert(ret || PyErr_Occurred()); - - /* Check that the recursion depth counting balanced correctly */ - if (ret && state.recursion_depth != starting_recursion_depth) { - PyErr_Format(PyExc_SystemError, - "AST optimizer recursion depth mismatch (before=%d, after=%d)", - starting_recursion_depth, state.recursion_depth); - return 0; - } - - assert(state.comp_ptr->n == 0); - PyMem_Free(state.comp_ptr->arr); - PyMem_Free(state.comp_ptr); - - return ret; -} +/* AST Optimizer */ +#include "Python.h" +#include "pycore_ast.h" // _PyAST_GetDocString() +#include "pycore_format.h" // F_LJUST +#include "pycore_long.h" // _PyLong +#include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_setobject.h" // _PySet_NextEntry() +#include "pycore_dict.h" // _PyFrozenDict_FromItems() + + +typedef struct { + expr_ty sub; + expr_ty last; +} _comp_entry; + +typedef struct { + int optimize; + int ff_features; + + int recursion_depth; /* current recursion depth */ + int recursion_limit; /* recursion limit */ + + struct { + _comp_entry arr[PY_MAX_TEMPLATE_SUBS]; + int n; + } comp_ptr; +} _PyASTOptimizeState; + + +static int +make_const(expr_ty node, PyObject *val, PyArena *arena) +{ + // Even if no new value was calculated, make_const may still + // need to clear an error (e.g. for division by zero) + if (val == NULL) { + if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) { + return 0; + } + PyErr_Clear(); + return 1; + } + if (_PyArena_AddPyObject(arena, val) < 0) { + Py_DECREF(val); + return 0; + } + node->kind = Constant_kind; + node->v.Constant.kind = NULL; + node->v.Constant.value = val; + return 1; +} + +#define COPY_NODE(TO, FROM) (memcpy((TO), (FROM), sizeof(struct _expr))) + +static int +has_starred(asdl_expr_seq *elts) +{ + Py_ssize_t n = asdl_seq_LEN(elts); + for (Py_ssize_t i = 0; i < n; i++) { + expr_ty e = (expr_ty)asdl_seq_GET(elts, i); + if (e->kind == Starred_kind) { + return 1; + } + } + return 0; +} + + +static PyObject* +unary_not(PyObject *v) +{ + int r = PyObject_IsTrue(v); + if (r < 0) + return NULL; + return PyBool_FromLong(!r); +} + +static int +fold_unaryop(expr_ty node, PyArena *arena, _PyASTOptimizeState *state) +{ + expr_ty arg = node->v.UnaryOp.operand; + + if (arg->kind != Constant_kind) { + /* Fold not into comparison */ + if (node->v.UnaryOp.op == Not && arg->kind == Compare_kind && + asdl_seq_LEN(arg->v.Compare.ops) == 1) { + /* Eq and NotEq are often implemented in terms of one another, so + folding not (self == other) into self != other breaks implementation + of !=. Detecting such cases doesn't seem worthwhile. + Python uses for 'is subset'/'is superset' operations on sets. + They don't satisfy not folding laws. */ + cmpop_ty op = asdl_seq_GET(arg->v.Compare.ops, 0); + switch (op) { + case Is: + op = IsNot; + break; + case IsNot: + op = Is; + break; + case In: + op = NotIn; + break; + case NotIn: + op = In; + break; + case IsIn: + op = IsNotIn; + break; + case IsNotIn: + op = IsIn; + break; + // The remaining comparison operators can't be safely inverted + case Eq: + case NotEq: + case Lt: + case LtE: + case Gt: + case GtE: + op = 0; // The AST enums leave "0" free as an "unused" marker + break; + // No default case, so the compiler will emit a warning if new + // comparison operators are added without being handled here + } + if (op) { + asdl_seq_SET(arg->v.Compare.ops, 0, op); + COPY_NODE(node, arg); + return 1; + } + } + return 1; + } + + typedef PyObject *(*unary_op)(PyObject*); + static const unary_op ops[] = { + [Invert] = PyNumber_Invert, + [Not] = unary_not, + [UAdd] = PyNumber_Positive, + [USub] = PyNumber_Negative, + }; + PyObject *newval = ops[node->v.UnaryOp.op](arg->v.Constant.value); + return make_const(node, newval, arena); +} + +/* Check whether a collection doesn't containing too much items (including + subcollections). This protects from creating a constant that needs + too much time for calculating a hash. + "limit" is the maximal number of items. + Returns the negative number if the total number of items exceeds the + limit. Otherwise returns the limit minus the total number of items. +*/ + +static Py_ssize_t +check_complexity(PyObject *obj, Py_ssize_t limit) +{ + if (PyTuple_Check(obj)) { + Py_ssize_t i; + limit -= PyTuple_GET_SIZE(obj); + for (i = 0; limit >= 0 && i < PyTuple_GET_SIZE(obj); i++) { + limit = check_complexity(PyTuple_GET_ITEM(obj, i), limit); + } + return limit; + } + else if (PyFrozenSet_Check(obj)) { + Py_ssize_t i = 0; + PyObject *item; + Py_hash_t hash; + limit -= PySet_GET_SIZE(obj); + while (limit >= 0 && _PySet_NextEntry(obj, &i, &item, &hash)) { + limit = check_complexity(item, limit); + } + } + return limit; +} + +#define MAX_INT_SIZE 128 /* bits */ +#define MAX_COLLECTION_SIZE 256 /* items */ +#define MAX_STR_SIZE 4096 /* characters */ +#define MAX_TOTAL_ITEMS 1024 /* including nested collections */ + +static PyObject * +safe_multiply(PyObject *v, PyObject *w) +{ + if (PyLong_Check(v) && PyLong_Check(w) && + !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w) + ) { + size_t vbits = _PyLong_NumBits(v); + size_t wbits = _PyLong_NumBits(w); + if (vbits == (size_t)-1 || wbits == (size_t)-1) { + return NULL; + } + if (vbits + wbits > MAX_INT_SIZE) { + return NULL; + } + } + else if (PyLong_Check(v) && (PyTuple_Check(w) || PyFrozenSet_Check(w))) { + Py_ssize_t size = PyTuple_Check(w) ? PyTuple_GET_SIZE(w) : + PySet_GET_SIZE(w); + if (size) { + long n = PyLong_AsLong(v); + if (n < 0 || n > MAX_COLLECTION_SIZE / size) { + return NULL; + } + if (n && check_complexity(w, MAX_TOTAL_ITEMS / n) < 0) { + return NULL; + } + } + } + else if (PyLong_Check(v) && (PyUnicode_Check(w) || PyBytes_Check(w))) { + Py_ssize_t size = PyUnicode_Check(w) ? PyUnicode_GET_LENGTH(w) : + PyBytes_GET_SIZE(w); + if (size) { + long n = PyLong_AsLong(v); + if (n < 0 || n > MAX_STR_SIZE / size) { + return NULL; + } + } + } + else if (PyLong_Check(w) && + (PyTuple_Check(v) || PyFrozenSet_Check(v) || + PyUnicode_Check(v) || PyBytes_Check(v))) + { + return safe_multiply(w, v); + } + + return PyNumber_Multiply(v, w); +} + +static PyObject * +safe_power(PyObject *v, PyObject *w) +{ + if (PyLong_Check(v) && PyLong_Check(w) && + !_PyLong_IsZero((PyLongObject *)v) && _PyLong_IsPositive((PyLongObject *)w) + ) { + size_t vbits = _PyLong_NumBits(v); + size_t wbits = PyLong_AsSize_t(w); + if (vbits == (size_t)-1 || wbits == (size_t)-1) { + return NULL; + } + if (vbits > MAX_INT_SIZE / wbits) { + return NULL; + } + } + + return PyNumber_Power(v, w, Py_None); +} + +static PyObject * +safe_lshift(PyObject *v, PyObject *w) +{ + if (PyLong_Check(v) && PyLong_Check(w) && + !_PyLong_IsZero((PyLongObject *)v) && !_PyLong_IsZero((PyLongObject *)w) + ) { + size_t vbits = _PyLong_NumBits(v); + size_t wbits = PyLong_AsSize_t(w); + if (vbits == (size_t)-1 || wbits == (size_t)-1) { + return NULL; + } + if (wbits > MAX_INT_SIZE || vbits > MAX_INT_SIZE - wbits) { + return NULL; + } + } + + return PyNumber_Lshift(v, w); +} + +static PyObject * +safe_mod(PyObject *v, PyObject *w) +{ + if (PyUnicode_Check(v) || PyBytes_Check(v)) { + return NULL; + } + + return PyNumber_Remainder(v, w); +} + + +static expr_ty +parse_literal(PyObject *fmt, Py_ssize_t *ppos, PyArena *arena) +{ + const void *data = PyUnicode_DATA(fmt); + int kind = PyUnicode_KIND(fmt); + Py_ssize_t size = PyUnicode_GET_LENGTH(fmt); + Py_ssize_t start, pos; + int has_percents = 0; + start = pos = *ppos; + while (pos < size) { + if (PyUnicode_READ(kind, data, pos) != '%') { + pos++; + } + else if (pos+1 < size && PyUnicode_READ(kind, data, pos+1) == '%') { + has_percents = 1; + pos += 2; + } + else { + break; + } + } + *ppos = pos; + if (pos == start) { + return NULL; + } + PyObject *str = PyUnicode_Substring(fmt, start, pos); + /* str = str.replace('%%', '%') */ + if (str && has_percents) { + _Py_DECLARE_STR(percent, "%"); + _Py_DECLARE_STR(dbl_percent, "%%"); + Py_SETREF(str, PyUnicode_Replace(str, &_Py_STR(dbl_percent), + &_Py_STR(percent), -1)); + } + if (!str) { + return NULL; + } + + if (_PyArena_AddPyObject(arena, str) < 0) { + Py_DECREF(str); + return NULL; + } + return _PyAST_Constant(str, NULL, -1, -1, -1, -1, arena); +} + +#define MAXDIGITS 3 + +static int +simple_format_arg_parse(PyObject *fmt, Py_ssize_t *ppos, + int *spec, int *flags, int *width, int *prec) +{ + Py_ssize_t pos = *ppos, len = PyUnicode_GET_LENGTH(fmt); + Py_UCS4 ch; + +#define NEXTC do { \ + if (pos >= len) { \ + return 0; \ + } \ + ch = PyUnicode_READ_CHAR(fmt, pos); \ + pos++; \ +} while (0) + + *flags = 0; + while (1) { + NEXTC; + switch (ch) { + case '-': *flags |= F_LJUST; continue; + case '+': *flags |= F_SIGN; continue; + case ' ': *flags |= F_BLANK; continue; + case '#': *flags |= F_ALT; continue; + case '0': *flags |= F_ZERO; continue; + } + break; + } + if ('0' <= ch && ch <= '9') { + *width = 0; + int digits = 0; + while ('0' <= ch && ch <= '9') { + *width = *width * 10 + (ch - '0'); + NEXTC; + if (++digits >= MAXDIGITS) { + return 0; + } + } + } + + if (ch == '.') { + NEXTC; + *prec = 0; + if ('0' <= ch && ch <= '9') { + int digits = 0; + while ('0' <= ch && ch <= '9') { + *prec = *prec * 10 + (ch - '0'); + NEXTC; + if (++digits >= MAXDIGITS) { + return 0; + } + } + } + } + *spec = ch; + *ppos = pos; + return 1; + +#undef NEXTC +} + +static expr_ty +parse_format(PyObject *fmt, Py_ssize_t *ppos, expr_ty arg, PyArena *arena) +{ + int spec, flags, width = -1, prec = -1; + if (!simple_format_arg_parse(fmt, ppos, &spec, &flags, &width, &prec)) { + // Unsupported format. + return NULL; + } + if (spec == 's' || spec == 'r' || spec == 'a') { + char buf[1 + MAXDIGITS + 1 + MAXDIGITS + 1], *p = buf; + if (!(flags & F_LJUST) && width > 0) { + *p++ = '>'; + } + if (width >= 0) { + p += snprintf(p, MAXDIGITS + 1, "%d", width); + } + if (prec >= 0) { + p += snprintf(p, MAXDIGITS + 2, ".%d", prec); + } + expr_ty format_spec = NULL; + if (p != buf) { + PyObject *str = PyUnicode_FromString(buf); + if (str == NULL) { + return NULL; + } + if (_PyArena_AddPyObject(arena, str) < 0) { + Py_DECREF(str); + return NULL; + } + format_spec = _PyAST_Constant(str, NULL, -1, -1, -1, -1, arena); + if (format_spec == NULL) { + return NULL; + } + } + return _PyAST_FormattedValue(arg, spec, format_spec, + arg->lineno, arg->col_offset, + arg->end_lineno, arg->end_col_offset, + arena); + } + // Unsupported format. + return NULL; +} + +static int +optimize_format(expr_ty node, PyObject *fmt, asdl_expr_seq *elts, PyArena *arena) +{ + Py_ssize_t pos = 0; + Py_ssize_t cnt = 0; + asdl_expr_seq *seq = _Py_asdl_expr_seq_new(asdl_seq_LEN(elts) * 2 + 1, arena); + if (!seq) { + return 0; + } + seq->size = 0; + + while (1) { + expr_ty lit = parse_literal(fmt, &pos, arena); + if (lit) { + asdl_seq_SET(seq, seq->size++, lit); + } + else if (PyErr_Occurred()) { + return 0; + } + + if (pos >= PyUnicode_GET_LENGTH(fmt)) { + break; + } + if (cnt >= asdl_seq_LEN(elts)) { + // More format units than items. + return 1; + } + assert(PyUnicode_READ_CHAR(fmt, pos) == '%'); + pos++; + expr_ty expr = parse_format(fmt, &pos, asdl_seq_GET(elts, cnt), arena); + cnt++; + if (!expr) { + return !PyErr_Occurred(); + } + asdl_seq_SET(seq, seq->size++, expr); + } + if (cnt < asdl_seq_LEN(elts)) { + // More items than format units. + return 1; + } + expr_ty res = _PyAST_JoinedStr(seq, + node->lineno, node->col_offset, + node->end_lineno, node->end_col_offset, + arena); + if (!res) { + return 0; + } + COPY_NODE(node, res); +// PySys_FormatStderr("format = %R\n", fmt); + return 1; +} + +static int +fold_binop(expr_ty node, PyArena *arena, _PyASTOptimizeState *state) +{ + expr_ty lhs, rhs; + lhs = node->v.BinOp.left; + rhs = node->v.BinOp.right; + if (lhs->kind != Constant_kind) { + return 1; + } + PyObject *lv = lhs->v.Constant.value; + + if (node->v.BinOp.op == Mod && + rhs->kind == Tuple_kind && + PyUnicode_Check(lv) && + !has_starred(rhs->v.Tuple.elts)) + { + return optimize_format(node, lv, rhs->v.Tuple.elts, arena); + } + + if (node->v.BinOp.op == Clsc && Py_Is(lv, Py_None)) + { + COPY_NODE(node, rhs); + return 1; + } + + if (rhs->kind != Constant_kind) { + return 1; + } + + PyObject *rv = rhs->v.Constant.value; + PyObject *newval = NULL; + + switch (node->v.BinOp.op) { + case Add: + newval = PyNumber_Add(lv, rv); + break; + case Sub: + newval = PyNumber_Subtract(lv, rv); + break; + case Mult: + newval = safe_multiply(lv, rv); + break; + case Div: + newval = PyNumber_TrueDivide(lv, rv); + break; + case FloorDiv: + newval = PyNumber_FloorDivide(lv, rv); + break; + case Mod: + newval = safe_mod(lv, rv); + break; + case Pow: + newval = safe_power(lv, rv); + break; + case LShift: + newval = safe_lshift(lv, rv); + break; + case RShift: + newval = PyNumber_Rshift(lv, rv); + break; + case BitOr: + newval = PyNumber_Or(lv, rv); + break; + case BitXor: + newval = PyNumber_Xor(lv, rv); + break; + case BitAnd: + newval = PyNumber_And(lv, rv); + break; + // No builtin constants implement the following operators + case MatMult: + return 1; + // No default case, so the compiler will emit a warning if new binary + // operators are added without being handled here + } + + return make_const(node, newval, arena); +} + +static PyObject* +make_const_tuple(asdl_expr_seq *elts) +{ + for (int i = 0; i < asdl_seq_LEN(elts); i++) { + expr_ty e = (expr_ty)asdl_seq_GET(elts, i); + if (e->kind != Constant_kind) { + return NULL; + } + } + + PyObject *newval = PyTuple_New(asdl_seq_LEN(elts)); + if (newval == NULL) { + return NULL; + } + + for (int i = 0; i < asdl_seq_LEN(elts); i++) { + expr_ty e = (expr_ty)asdl_seq_GET(elts, i); + PyObject *v = e->v.Constant.value; + PyTuple_SET_ITEM(newval, i, Py_NewRef(v)); + } + return newval; +} + +static int +fold_tuple(expr_ty node, PyArena *arena, _PyASTOptimizeState *state) +{ + PyObject *newval; + + if (node->v.Tuple.ctx != Load) + return 1; + + newval = make_const_tuple(node->v.Tuple.elts); + return make_const(node, newval, arena); +} + +static int +fold_subscr(expr_ty node, PyArena *arena, _PyASTOptimizeState *state) +{ + PyObject *newval; + expr_ty arg, idx; + + arg = node->v.Subscript.value; + idx = node->v.Subscript.slice; + if (node->v.Subscript.ctx != Load || + arg->kind != Constant_kind || + idx->kind != Constant_kind) + { + return 1; + } + + newval = PyObject_GetItem(arg->v.Constant.value, idx->v.Constant.value); + return make_const(node, newval, arena); +} + +/* Change literal list or set of constants into constant + tuple or frozenset respectively. Change literal list of + non-constants into tuple. + Used for right operand of "in" and "not in" tests and for iterable + in "for" loop and comprehensions. +*/ +static int +fold_iter(expr_ty arg, PyArena *arena, _PyASTOptimizeState *state) +{ + PyObject *newval; + if (arg->kind == List_kind) { + /* First change a list into tuple. */ + asdl_expr_seq *elts = arg->v.List.elts; + if (has_starred(elts)) { + return 1; + } + expr_context_ty ctx = arg->v.List.ctx; + arg->kind = Tuple_kind; + arg->v.Tuple.elts = elts; + arg->v.Tuple.ctx = ctx; + /* Try to create a constant tuple. */ + newval = make_const_tuple(elts); + } + else if (arg->kind == Set_kind) { + newval = make_const_tuple(arg->v.Set.elts); + if (newval) { + Py_SETREF(newval, PyFrozenSet_New(newval)); + } + } + else if (arg->kind == Dict_kind) { + PyObject *keys = make_const_tuple(arg->v.Dict.keys); + if (keys) { + if (PyTuple_GET_SIZE(keys) == 0) { + Py_DECREF(keys); + newval = PyFrozenDict_New(); + } + else { + PyObject *values = make_const_tuple(arg->v.Dict.values); + if (values) { + newval = _PyFrozenDict_FromItems( + &PyTuple_GET_ITEM(keys, 0), 1, + &PyTuple_GET_ITEM(values, 0), 1, + PyTuple_GET_SIZE(keys) + ); + Py_DECREF(keys); + Py_DECREF(values); + } + else { + PyErr_Clear(); + Py_DECREF(keys); + return 1; + } + } + } + } + else { + return 1; + } + return make_const(arg, newval, arena); +} + +static int +fold_compare(expr_ty node, PyArena *arena, _PyASTOptimizeState *state) +{ + asdl_int_seq *ops; + asdl_expr_seq *args; + Py_ssize_t i; + + ops = node->v.Compare.ops; + args = node->v.Compare.comparators; + /* Change literal list or set in 'in' or 'not in' into + tuple or frozenset respectively. */ + i = asdl_seq_LEN(ops) - 1; + int op = asdl_seq_GET(ops, i); + if (op == In || op == NotIn || op == IsIn || op == IsNotIn) { + if (!fold_iter((expr_ty)asdl_seq_GET(args, i), arena, state)) { + return 0; + } + } + return 1; +} + +static int +fold_slice(expr_ty node, PyArena *arena, _PyASTOptimizeState *state) +{ + PyObject *olower = NULL; + PyObject *oupper = NULL; + PyObject *ostep = NULL; + expr_ty lower = node->v.Slice.lower; + expr_ty upper = node->v.Slice.upper; + expr_ty step = node->v.Slice.step; + + if (lower && lower->kind != Constant_kind || + upper && upper->kind != Constant_kind || + step && step->kind != Constant_kind) + { + return 1; + } + + if (lower) { + olower = lower->v.Constant.value; + } + + if (upper) { + oupper = upper->v.Constant.value; + } + + if (step) { + ostep = step->v.Constant.value; + } + + PyObject *res = PySlice_New(olower, oupper, ostep); + if (res == NULL) { + if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt)) { + return 0; + } + PyErr_Clear(); + return 1; + } + + if (PyObject_Hash(res) == -1) { + PyErr_Clear(); + return 1; + } + + return make_const(node, res, arena); +} + +static int astfold_mod(mod_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); +static int astfold_stmt(stmt_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); +static int astfold_expr(expr_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); +static int astfold_arguments(arguments_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); +static int astfold_comprehension(comprehension_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); +static int astfold_keyword(keyword_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); +static int astfold_arg(arg_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); +static int astfold_withitem(withitem_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); +static int astfold_excepthandler(excepthandler_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); +static int astfold_switch_case(switch_case_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); +static int astfold_match_case(match_case_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); +static int astfold_pattern(pattern_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); +static int astfold_type_param(type_param_ty node_, PyArena *ctx_, _PyASTOptimizeState *state); + +#define CALL(FUNC, TYPE, ARG) \ + if (!FUNC((ARG), ctx_, state)) \ + return 0; + +#define CALL_OPT(FUNC, TYPE, ARG) \ + if ((ARG) != NULL && !FUNC((ARG), ctx_, state)) \ + return 0; + +#define CALL_SEQ(FUNC, TYPE, ARG) { \ + int i; \ + asdl_ ## TYPE ## _seq *seq = (ARG); /* avoid variable capture */ \ + for (i = 0; i < asdl_seq_LEN(seq); i++) { \ + TYPE ## _ty elt = (TYPE ## _ty)asdl_seq_GET(seq, i); \ + if (elt != NULL && !FUNC(elt, ctx_, state)) \ + return 0; \ + } \ +} + +static inline _comp_entry * +add_comp_entry(_PyASTOptimizeState *state, expr_ty sub) +{ + assert(0 <= state->comp_ptr.n && state->comp_ptr.n < PY_MAX_TEMPLATE_SUBS); + + _comp_entry *entry = &state->comp_ptr.arr[state->comp_ptr.n++]; + entry->sub = sub; + entry->last = NULL; + return entry; +} + +static int +fold_comp(expr_ty node, PyArena *ctx_, _PyASTOptimizeState *state) +{ + expr_ty arg; + expr_ty func; + int constant = 1; + + assert(node->kind == Composition_kind); + + CALL(astfold_expr, expr_ty, node->v.Composition.arg); + + arg = node->v.Composition.arg; + if (arg->kind != Constant_kind) { + constant = 0; + } + + _comp_entry *entry = add_comp_entry(state, constant ? arg : NULL); + + func = node->v.Composition.func; + CALL(astfold_expr, expr_ty, func); + + if (!entry->last) { + if (constant) { + asdl_expr_seq *seq = + (asdl_expr_seq*)_Py_asdl_generic_seq_new(1, ctx_); + if (!seq) { + return 0; + } + asdl_seq_SET(seq, 0, arg); + node->kind = Call_kind; + node->v.Call.func = func; + node->v.Call.args = seq; + node->v.Call.keywords = NULL; + node->v.Call.aware = 0; + } + } + else { + if (constant) { + COPY_NODE(node, func); + CALL(astfold_expr, expr_ty, node); /* second pass */ + } + else { + node->v.Composition.has_templates = 1; + } + } + + state->comp_ptr.n--; + return 1; +} + +static int +fold_compassign(stmt_ty node, PyArena *ctx_, _PyASTOptimizeState *state) +{ + assert(node->kind == AugAssign_kind); + assert(node->v.AugAssign.value); + + _comp_entry *entry = add_comp_entry(state, NULL); + + CALL(astfold_expr, expr_ty, node->v.AugAssign.value); + + if (!entry->last) { + node->v.AugAssign.op = CompCall; + } + + state->comp_ptr.n--; + return 1; +} + +static int +astfold_body(asdl_stmt_seq *stmts, PyArena *ctx_, _PyASTOptimizeState *state) +{ + int docstring = _PyAST_GetDocString(stmts) != NULL; + CALL_SEQ(astfold_stmt, stmt, stmts); + if (!docstring && _PyAST_GetDocString(stmts) != NULL) { + stmt_ty st = (stmt_ty)asdl_seq_GET(stmts, 0); + asdl_expr_seq *values = _Py_asdl_expr_seq_new(1, ctx_); + if (!values) { + return 0; + } + asdl_seq_SET(values, 0, st->v.Expr.value); + expr_ty expr = _PyAST_JoinedStr(values, st->lineno, st->col_offset, + st->end_lineno, st->end_col_offset, + ctx_); + if (!expr) { + return 0; + } + st->v.Expr.value = expr; + } + return 1; +} + +static int +astfold_mod(mod_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) +{ + switch (node_->kind) { + case Module_kind: + CALL(astfold_body, asdl_seq, node_->v.Module.body); + break; + case Interactive_kind: + CALL_SEQ(astfold_stmt, stmt, node_->v.Interactive.body); + break; + case Expression_kind: + CALL(astfold_expr, expr_ty, node_->v.Expression.body); + break; + // The following top level nodes don't participate in constant folding + case FunctionType_kind: + break; + // No default case, so the compiler will emit a warning if new top level + // compilation nodes are added without being handled here + } + return 1; +} + +static int +astfold_expr(expr_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) +{ + _comp_entry *entry; + + if (++state->recursion_depth > state->recursion_limit) { + PyErr_SetString(PyExc_RecursionError, + "maximum recursion depth exceeded during compilation"); + return 0; + } + switch (node_->kind) { + case BoolOp_kind: + CALL_SEQ(astfold_expr, expr, node_->v.BoolOp.values); + break; + case BinOp_kind: + CALL(astfold_expr, expr_ty, node_->v.BinOp.left); + CALL(astfold_expr, expr_ty, node_->v.BinOp.right); + CALL(fold_binop, expr_ty, node_); + break; + case UnaryOp_kind: + CALL(astfold_expr, expr_ty, node_->v.UnaryOp.operand); + CALL(fold_unaryop, expr_ty, node_); + break; + case Lambda_kind: + CALL(astfold_arguments, arguments_ty, node_->v.Lambda.args); + CALL(astfold_expr, expr_ty, node_->v.Lambda.body); + break; + case IfExp_kind: + CALL(astfold_expr, expr_ty, node_->v.IfExp.test); + CALL(astfold_expr, expr_ty, node_->v.IfExp.body); + CALL(astfold_expr, expr_ty, node_->v.IfExp.orelse); + break; + case Dict_kind: + CALL_SEQ(astfold_expr, expr, node_->v.Dict.keys); + CALL_SEQ(astfold_expr, expr, node_->v.Dict.values); + break; + case Set_kind: + CALL_SEQ(astfold_expr, expr, node_->v.Set.elts); + break; + case ListComp_kind: + entry = add_comp_entry(state, NULL); + CALL(astfold_expr, expr_ty, node_->v.ListComp.elt); + CALL_SEQ(astfold_comprehension, comprehension, node_->v.ListComp.generators); + state->comp_ptr.n--; + break; + case TupleComp_kind: + entry = add_comp_entry(state, NULL); + CALL(astfold_expr, expr_ty, node_->v.TupleComp.elt); + CALL_SEQ(astfold_comprehension, comprehension, node_->v.TupleComp.generators); + state->comp_ptr.n--; + break; + case SetComp_kind: + entry = add_comp_entry(state, NULL); + CALL(astfold_expr, expr_ty, node_->v.SetComp.elt); + CALL_SEQ(astfold_comprehension, comprehension, node_->v.SetComp.generators); + state->comp_ptr.n--; + break; + case DictComp_kind: + entry = add_comp_entry(state, NULL); + CALL_OPT(astfold_expr, expr_ty, node_->v.DictComp.key); + CALL(astfold_expr, expr_ty, node_->v.DictComp.value); + CALL_SEQ(astfold_comprehension, comprehension, node_->v.DictComp.generators); + state->comp_ptr.n--; + break; + case GeneratorExp_kind: + CALL(astfold_expr, expr_ty, node_->v.GeneratorExp.elt); + CALL_SEQ(astfold_comprehension, comprehension, node_->v.GeneratorExp.generators); + break; + case Await_kind: + CALL(astfold_expr, expr_ty, node_->v.Await.value); + break; + case Yield_kind: + CALL_OPT(astfold_expr, expr_ty, node_->v.Yield.value); + break; + case YieldFrom_kind: + CALL(astfold_expr, expr_ty, node_->v.YieldFrom.value); + break; + case Compare_kind: + CALL(astfold_expr, expr_ty, node_->v.Compare.left); + CALL_SEQ(astfold_expr, expr, node_->v.Compare.comparators); + CALL(fold_compare, expr_ty, node_); + break; +#define NONE_CHECK(K, F) \ + if (node_->v.K.aware) { \ + expr_ty e = node_->v.K.F; \ + if (e->kind == Constant_kind && \ + Py_Is(e->v.Constant.value, Py_None)) \ + { \ + COPY_NODE(node_, e); \ + break; \ + } \ + } + + case Call_kind: + CALL(astfold_expr, expr_ty, node_->v.Call.func); + NONE_CHECK(Call, func) + CALL_SEQ(astfold_expr, expr, node_->v.Call.args); + CALL_SEQ(astfold_keyword, keyword, node_->v.Call.keywords); + break; + case FormattedValue_kind: + CALL(astfold_expr, expr_ty, node_->v.FormattedValue.value); + CALL_OPT(astfold_expr, expr_ty, node_->v.FormattedValue.format_spec); + break; + case JoinedStr_kind: + CALL_SEQ(astfold_expr, expr, node_->v.JoinedStr.values); + break; + case Attribute_kind: + CALL(astfold_expr, expr_ty, node_->v.Attribute.value); + NONE_CHECK(Attribute, value) + break; + case Subscript_kind: + CALL(astfold_expr, expr_ty, node_->v.Subscript.value); + NONE_CHECK(Subscript, value) + CALL(astfold_expr, expr_ty, node_->v.Subscript.slice); + CALL(fold_subscr, expr_ty, node_); + break; + case Starred_kind: + CALL(astfold_expr, expr_ty, node_->v.Starred.value); + break; + case Slice_kind: + CALL_OPT(astfold_expr, expr_ty, node_->v.Slice.lower); + CALL_OPT(astfold_expr, expr_ty, node_->v.Slice.upper); + CALL_OPT(astfold_expr, expr_ty, node_->v.Slice.step); + CALL(fold_slice, expr_ty, node_); + break; + case List_kind: + CALL_SEQ(astfold_expr, expr, node_->v.List.elts); + break; + case Tuple_kind: + CALL_SEQ(astfold_expr, expr, node_->v.Tuple.elts); + CALL(fold_tuple, expr_ty, node_); + break; + case Name_kind: + if (node_->v.Name.ctx == Load && + _PyUnicode_EqualToASCIIString(node_->v.Name.id, "__debug__")) { + state->recursion_depth--; + return make_const(node_, PyBool_FromLong(!state->optimize), ctx_); + } + break; + case NamedExpr_kind: + CALL(astfold_expr, expr_ty, node_->v.NamedExpr.value); + break; + case Composition_kind: + CALL(fold_comp, expr_ty, node_); + break; + case CompoundExpr_kind: + CALL(astfold_stmt, stmt_ty, node_->v.CompoundExpr.value); + break; + case BlockExpr_kind: + CALL_SEQ(astfold_stmt, stmt, node_->v.BlockExpr.body); + break; + case ExprTarget_kind: + CALL(astfold_expr, expr_ty, node_->v.ExprTarget.value); + break; + case Template_kind: + { + assert(0 <= node_->v.Template.level && node_->v.Template.level < state->comp_ptr.n); + + _comp_entry *entry = + &state->comp_ptr.arr[state->comp_ptr.n - node_->v.Template.level - 1]; + if (entry->sub) { + COPY_NODE(node_, entry->sub); + } + entry->last = node_; + break; + } + case Constant_kind: + // nothing further to do + break; + // No default case, so the compiler will emit a warning if new expression + // kinds are added without being handled here + } + state->recursion_depth--; + return 1; +} + +static int +astfold_keyword(keyword_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) +{ + CALL_OPT(astfold_expr, expr_ty, node_->value); + return 1; +} + +static int +astfold_comprehension(comprehension_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) +{ + CALL(astfold_expr, expr_ty, node_->target); + CALL(astfold_expr, expr_ty, node_->iter); + CALL_SEQ(astfold_expr, expr, node_->ifs); + + CALL(fold_iter, expr_ty, node_->iter); + return 1; +} + +static int +astfold_arguments(arguments_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) +{ + CALL_SEQ(astfold_arg, arg, node_->posonlyargs); + CALL_SEQ(astfold_arg, arg, node_->args); + CALL_OPT(astfold_arg, arg_ty, node_->vararg); + CALL_SEQ(astfold_arg, arg, node_->kwonlyargs); + CALL_SEQ(astfold_expr, expr, node_->kw_defaults); + CALL_OPT(astfold_arg, arg_ty, node_->kwarg); + CALL_SEQ(astfold_expr, expr, node_->defaults); + return 1; +} + +static int +astfold_arg(arg_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) +{ + if (!(state->ff_features & CO_FUTURE_ANNOTATIONS)) { + CALL_OPT(astfold_expr, expr_ty, node_->annotation); + } + return 1; +} + +static int +astfold_stmt(stmt_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) +{ + if (++state->recursion_depth > state->recursion_limit) { + PyErr_SetString(PyExc_RecursionError, + "maximum recursion depth exceeded during compilation"); + return 0; + } + switch (node_->kind) { + case FunctionDef_kind: + CALL_SEQ(astfold_type_param, type_param, node_->v.FunctionDef.type_params); + CALL(astfold_arguments, arguments_ty, node_->v.FunctionDef.args); + CALL(astfold_body, asdl_seq, node_->v.FunctionDef.body); + CALL_SEQ(astfold_expr, expr, node_->v.FunctionDef.decorator_list); + if (!(state->ff_features & CO_FUTURE_ANNOTATIONS)) { + CALL_OPT(astfold_expr, expr_ty, node_->v.FunctionDef.returns); + } + break; + case AsyncFunctionDef_kind: + CALL_SEQ(astfold_type_param, type_param, node_->v.AsyncFunctionDef.type_params); + CALL(astfold_arguments, arguments_ty, node_->v.AsyncFunctionDef.args); + CALL(astfold_body, asdl_seq, node_->v.AsyncFunctionDef.body); + CALL_SEQ(astfold_expr, expr, node_->v.AsyncFunctionDef.decorator_list); + if (!(state->ff_features & CO_FUTURE_ANNOTATIONS)) { + CALL_OPT(astfold_expr, expr_ty, node_->v.AsyncFunctionDef.returns); + } + break; + case ClassDef_kind: + CALL_SEQ(astfold_type_param, type_param, node_->v.ClassDef.type_params); + CALL_SEQ(astfold_expr, expr, node_->v.ClassDef.bases); + CALL_SEQ(astfold_keyword, keyword, node_->v.ClassDef.keywords); + CALL(astfold_body, asdl_seq, node_->v.ClassDef.body); + CALL_SEQ(astfold_expr, expr, node_->v.ClassDef.decorator_list); + break; + case Return_kind: + CALL_OPT(astfold_expr, expr_ty, node_->v.Return.value); + break; + case Delete_kind: + CALL_SEQ(astfold_expr, expr, node_->v.Delete.targets); + break; + case Assign_kind: + CALL_SEQ(astfold_expr, expr, node_->v.Assign.targets); + CALL(astfold_expr, expr_ty, node_->v.Assign.value); + break; + case AugAssign_kind: + CALL(astfold_expr, expr_ty, node_->v.AugAssign.target); + CALL_OPT(astfold_expr, expr_ty, node_->v.AugAssign.value); + if (node_->v.AugAssign.op == Comp && node_->v.AugAssign.value) { + CALL(fold_compassign, expr_ty, node_); + } + break; + case AnnAssign_kind: + CALL(astfold_expr, expr_ty, node_->v.AnnAssign.target); + if (!(state->ff_features & CO_FUTURE_ANNOTATIONS)) { + CALL(astfold_expr, expr_ty, node_->v.AnnAssign.annotation); + } + CALL_OPT(astfold_expr, expr_ty, node_->v.AnnAssign.value); + break; + case TypeAlias_kind: + CALL(astfold_expr, expr_ty, node_->v.TypeAlias.name); + CALL_SEQ(astfold_type_param, type_param, node_->v.TypeAlias.type_params); + CALL(astfold_expr, expr_ty, node_->v.TypeAlias.value); + break; + case For_kind: + CALL(astfold_expr, expr_ty, node_->v.For.target); + CALL(astfold_expr, expr_ty, node_->v.For.iter); + CALL_SEQ(astfold_stmt, stmt, node_->v.For.body); + CALL_SEQ(astfold_stmt, stmt, node_->v.For.orelse); + + CALL(fold_iter, expr_ty, node_->v.For.iter); + break; + case AsyncFor_kind: + CALL(astfold_expr, expr_ty, node_->v.AsyncFor.target); + CALL(astfold_expr, expr_ty, node_->v.AsyncFor.iter); + CALL_SEQ(astfold_stmt, stmt, node_->v.AsyncFor.body); + CALL_SEQ(astfold_stmt, stmt, node_->v.AsyncFor.orelse); + break; + case While_kind: + CALL(astfold_expr, expr_ty, node_->v.While.test); + CALL_SEQ(astfold_stmt, stmt, node_->v.While.body); + CALL_SEQ(astfold_stmt, stmt, node_->v.While.orelse); + break; + case If_kind: + CALL(astfold_expr, expr_ty, node_->v.If.test); + CALL_SEQ(astfold_stmt, stmt, node_->v.If.body); + CALL_SEQ(astfold_stmt, stmt, node_->v.If.orelse); + break; + case With_kind: + CALL_SEQ(astfold_withitem, withitem, node_->v.With.items); + CALL_SEQ(astfold_stmt, stmt, node_->v.With.body); + break; + case AsyncWith_kind: + CALL_SEQ(astfold_withitem, withitem, node_->v.AsyncWith.items); + CALL_SEQ(astfold_stmt, stmt, node_->v.AsyncWith.body); + break; + case Raise_kind: + CALL_OPT(astfold_expr, expr_ty, node_->v.Raise.exc); + CALL_OPT(astfold_expr, expr_ty, node_->v.Raise.cause); + break; + case Try_kind: + CALL_SEQ(astfold_stmt, stmt, node_->v.Try.body); + CALL_SEQ(astfold_excepthandler, excepthandler, node_->v.Try.handlers); + CALL_SEQ(astfold_stmt, stmt, node_->v.Try.orelse); + CALL_SEQ(astfold_stmt, stmt, node_->v.Try.finalbody); + break; + case TryStar_kind: + CALL_SEQ(astfold_stmt, stmt, node_->v.TryStar.body); + CALL_SEQ(astfold_excepthandler, excepthandler, node_->v.TryStar.handlers); + CALL_SEQ(astfold_stmt, stmt, node_->v.TryStar.orelse); + CALL_SEQ(astfold_stmt, stmt, node_->v.TryStar.finalbody); + break; + case Assert_kind: + CALL(astfold_expr, expr_ty, node_->v.Assert.test); + CALL_OPT(astfold_expr, expr_ty, node_->v.Assert.msg); + break; + case Expr_kind: + CALL(astfold_expr, expr_ty, node_->v.Expr.value); + break; + case Switch_kind: + CALL(astfold_expr, expr_ty, node_->v.Switch.subject); + CALL_SEQ(astfold_switch_case, switch_case, node_->v.Switch.cases); + break; + case Match_kind: + CALL(astfold_expr, expr_ty, node_->v.Match.subject); + CALL_SEQ(astfold_match_case, match_case, node_->v.Match.cases); + break; + // The following statements don't contain any subexpressions to be folded + case Import_kind: + case ImportFrom_kind: + case Global_kind: + case Nonlocal_kind: + case Goto_kind: + case Label_kind: + case Pass_kind: + case Break_kind: + case Continue_kind: + break; + // No default case, so the compiler will emit a warning if new statement + // kinds are added without being handled here + } + state->recursion_depth--; + return 1; +} + +static int +astfold_excepthandler(excepthandler_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) +{ + switch (node_->kind) { + case ExceptHandler_kind: + CALL_OPT(astfold_expr, expr_ty, node_->v.ExceptHandler.type); + CALL_SEQ(astfold_stmt, stmt, node_->v.ExceptHandler.body); + break; + // No default case, so the compiler will emit a warning if new handler + // kinds are added without being handled here + } + return 1; +} + +static int +astfold_withitem(withitem_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) +{ + CALL(astfold_expr, expr_ty, node_->context_expr); + CALL_OPT(astfold_expr, expr_ty, node_->optional_vars); + return 1; +} + +static int +astfold_pattern(pattern_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) +{ + // Currently, this is really only used to form complex/negative numeric + // constants in MatchValue and MatchMapping nodes + // We still recurse into all subexpressions and subpatterns anyway + if (++state->recursion_depth > state->recursion_limit) { + PyErr_SetString(PyExc_RecursionError, + "maximum recursion depth exceeded during compilation"); + return 0; + } + switch (node_->kind) { + case MatchValue_kind: + CALL(astfold_expr, expr_ty, node_->v.MatchValue.value); + break; + case MatchSingleton_kind: + break; + case MatchSequence_kind: + CALL_SEQ(astfold_pattern, pattern, node_->v.MatchSequence.patterns); + break; + case MatchMapping_kind: + CALL_SEQ(astfold_expr, expr, node_->v.MatchMapping.keys); + CALL_SEQ(astfold_pattern, pattern, node_->v.MatchMapping.patterns); + break; + case MatchClass_kind: + CALL(astfold_expr, expr_ty, node_->v.MatchClass.cls); + CALL_SEQ(astfold_pattern, pattern, node_->v.MatchClass.patterns); + CALL_SEQ(astfold_pattern, pattern, node_->v.MatchClass.kwd_patterns); + break; + case MatchStar_kind: + break; + case MatchAs_kind: + if (node_->v.MatchAs.pattern) { + CALL(astfold_pattern, pattern_ty, node_->v.MatchAs.pattern); + } + break; + case MatchOr_kind: + CALL_SEQ(astfold_pattern, pattern, node_->v.MatchOr.patterns); + break; + // No default case, so the compiler will emit a warning if new pattern + // kinds are added without being handled here + } + state->recursion_depth--; + return 1; +} + +static int +astfold_match_case(match_case_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) +{ + CALL(astfold_pattern, expr_ty, node_->pattern); + CALL_OPT(astfold_expr, expr_ty, node_->guard); + CALL_SEQ(astfold_stmt, stmt, node_->body); + return 1; +} + +static int +astfold_switch_case(switch_case_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) +{ + CALL_SEQ(astfold_expr, expr, node_->patterns); + CALL_SEQ(astfold_stmt, stmt, node_->body); + return 1; +} + +static int +astfold_switch(stmt_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) +{ + Py_ssize_t i, j; + PyObject *subj; + expr_ty subject_expr; + assert(node_->kind == Switch_kind); + + subject_expr = node_->v.Switch.subject; + if (subject_expr->kind != Constant_kind) { + return 1; + } + subj = subject_expr->v.Constant.value; + + for (i = 0; i < asdl_seq_LEN(node_->v.Switch.cases); i++) { + switch_case_ty s = asdl_seq_GET(node_->v.Switch.cases, i); + for (j = 0; j < asdl_seq_LEN(s->patterns); j++) { + expr_ty e = asdl_seq_GET(s->patterns, j); + if (e->kind != Constant_kind) { + return 1; + } + if (!PyLong_CheckExact(e->v.Constant.value)) { + continue; + } + int res = PyObject_RichCompareBool(subj, e->v.Constant.value, Py_EQ); + if (res < 0) { + PyErr_Clear(); + continue; + } + if (res) { + COPY_NODE(node_, e); + } + } + } + + return 1; +} + +static int +astfold_type_param(type_param_ty node_, PyArena *ctx_, _PyASTOptimizeState *state) +{ + switch (node_->kind) { + case TypeVar_kind: + CALL_OPT(astfold_expr, expr_ty, node_->v.TypeVar.bound); + break; + case ParamSpec_kind: + break; + case TypeVarTuple_kind: + break; + } + return 1; +} + +#undef CALL +#undef CALL_OPT +#undef CALL_SEQ + +/* See comments in symtable.c. */ +#define COMPILER_STACK_FRAME_SCALE 2 + +int +_PyAST_Optimize(mod_ty mod, PyArena *arena, int optimize, int ff_features) +{ + PyThreadState *tstate; + int starting_recursion_depth; + + _PyASTOptimizeState state; + state.optimize = optimize; + state.ff_features = ff_features; + + /* Setup recursion depth check counters */ + tstate = _PyThreadState_GET(); + if (!tstate) { + return 0; + } + /* Be careful here to prevent overflow. */ + int recursion_depth = C_RECURSION_LIMIT - tstate->c_recursion_remaining; + starting_recursion_depth = recursion_depth * COMPILER_STACK_FRAME_SCALE; + state.recursion_depth = starting_recursion_depth; + state.recursion_limit = C_RECURSION_LIMIT * COMPILER_STACK_FRAME_SCALE; + state.comp_ptr.n = 0; + + int ret = astfold_mod(mod, arena, &state); + assert(ret || PyErr_Occurred()); + + /* Check that the recursion depth counting balanced correctly */ + if (ret && state.recursion_depth != starting_recursion_depth) { + PyErr_Format(PyExc_SystemError, + "AST optimizer recursion depth mismatch (before=%d, after=%d)", + starting_recursion_depth, state.recursion_depth); + return 0; + } + + assert(state.comp_ptr.n == 0); + + return ret; +} diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py index 5d4b383..620f433 100644 --- a/Tools/peg_generator/pegen/c_generator.py +++ b/Tools/peg_generator/pegen/c_generator.py @@ -370,7 +370,7 @@ def __init__( self, exact_tokens, non_exact_tokens ) self._varname_counter = 0 - self._template_sub_depth = 0 + self._subn_incs: List[bool] = [] self._error_return = "NULL" self.debug = debug self.skip_actions = skip_actions @@ -397,8 +397,11 @@ def add_return(self, ret_val: str) -> None: for stmt in self.cleanup_statements: self.print(stmt) self.remove_level() - if self._template_sub_depth > 0: - self.print("p->subn--;") + if self._subn_incs and self._subn_incs[-1]: + if ret_val == self._error_return: + self.print(f"_PyPegen_dec_subn(p, 0);") + else: + self.print(f"_PyPegen_dec_subn(p, !p->error_indicator && _res != {self._error_return});"); self.print(f"return {ret_val};") def unique_varname(self, name: str = "tmpvar") -> str: @@ -555,6 +558,7 @@ def _check_for_errors(self) -> None: def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None: self.print("{") with self.indent(): + self._subn_incs.append(False) self.add_level() self.print(f"{result_type} _res = {self._error_return};") self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{") @@ -582,6 +586,7 @@ def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None: self.print("}") self.print(f"p->mark = _resmark;") self.add_return("_res") + assert self._subn_incs.pop() is False, "subn mismatch" self.print("}") self.print(f"static {result_type}") self.print(f"{node.name}_raw(Parser *p)") @@ -595,15 +600,18 @@ def _handle_default_rule_body(self, node: Rule, rhs: Rhs, result_type: str) -> N with self.indent(), self.change_error_return("-1" if node.type == "int" else "NULL"): self.add_level() self._check_for_errors() - if template_sub := "$" in node.flags: - self._template_sub_depth += 1 - self.print("p->subn++;") self.print(f"{result_type} _res = {self._error_return};") if memoize: self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{") with self.indent(): self.add_return("_res") self.print("}") + if template_sub := "$" in node.flags: + self.print("if (!_PyPegen_inc_subn(p)) {") + with self.indent(): + self.add_return(self._error_return) + self.print("}") + self._subn_incs.append(template_sub) self.print("int _mark = p->mark;") if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts): self._set_up_token_start_metadata_extraction() @@ -618,12 +626,10 @@ def _handle_default_rule_body(self, node: Rule, rhs: Rhs, result_type: str) -> N self.print(f"_res = {self._error_return};") self.print(" done:") with self.indent(): - if template_sub: - self._template_sub_depth -= 1 - self.print("p->subn--;") if memoize: self.print(f"_PyPegen_insert_memo(p, _mark, {node.name}_type, _res);") self.add_return("_res") + assert self._subn_incs.pop() is template_sub, "subn mismatch" def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None: memoize = self._should_memoize(node) @@ -632,15 +638,18 @@ def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None: with self.indent(): self.add_level() self._check_for_errors() - if template_sub := "$" in node.flags: - self._template_sub_depth += 1 - self.print("p->subn++;") self.print("void *_res = NULL;") if memoize: self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{") with self.indent(): self.add_return("_res") self.print("}") + if template_sub := "$" in node.flags: + self.print("if (!_PyPegen_inc_subn(p)) {") + with self.indent(): + self.add_return(self._error_return) + self.print("}") + self._subn_incs.append(template_sub) self.print("int _mark = p->mark;") if memoize: self.print("int _start_mark = p->mark;") @@ -666,12 +675,10 @@ def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None: self.out_of_memory_return(f"!_seq", cleanup_code="PyMem_Free(_children);") self.print("for (int i = 0; i < _n; i++) asdl_seq_SET_UNTYPED(_seq, i, _children[i]);") self.print("PyMem_Free(_children);") - if template_sub: - self._template_sub_depth -= 1 - self.print("p->subn--;") if memoize and node.name: self.print(f"_PyPegen_insert_memo(p, _start_mark, {node.name}_type, _seq);") self.add_return("_seq") + assert self._subn_incs.pop() is template_sub, "subn mismatch" def visit_Rule(self, node: Rule) -> None: is_loop = node.is_loop() @@ -728,9 +735,7 @@ def visit_Rhs( self.visit(alt, is_loop=is_loop, is_gather=is_gather, rulename=rulename) def visit_TemplateGroup(self, node: TemplateGroup) -> None: - self._template_sub_depth += 1 - temp_var = f"_templateuse_{self._template_sub_depth}" - self.print(f"(p->subn++, {temp_var} = ") + self.print(f"_PyPegen_inc_subn(p) && _PyPegen_dec_subn(p, (") with self.indent(): if len(node.items) == 1: item = node.items[0] @@ -744,8 +749,7 @@ def visit_TemplateGroup(self, node: TemplateGroup) -> None: else: self.print("&&") self.visit(item) - self.print(f", p->subn--, {temp_var})") - self._template_sub_depth -= 1 + self.print("))") def join_conditions(self, keyword: str, node: Any) -> None: self.print(f"{keyword} (") @@ -864,8 +868,6 @@ def visit_Alt( self.print(f"{{ // {node}") with self.indent(): self._check_for_errors() - if is_tmpl_grp := len(node.items) == 1 and isinstance(node.items[0], TemplateGroup): - self._template_sub_depth += 1 node_str = str(node).replace('"', '\\"') self.print( f'D(fprintf(stderr, "%*c> {rulename}[%d-%d L%d]: %s\\n", p->level, \' \', _mark, p->mark, p->tok->lineno, "{node_str}"));' @@ -877,8 +879,8 @@ def visit_Alt( var_type = "void *" else: var_type += " " - if v == "_cut_var": - v += " = 0" # cut_var must be initialized + if v == "_cut_var" or v.startswith("_templateuse_"): + v += " = 0" # cut_var and _templateuse_* must be initialized self.print(f"{var_type}{v};") if v and v.startswith("_opt_var"): self.print(f"UNUSED({v}); // Silence compiler warnings") @@ -895,8 +897,6 @@ def visit_Alt( f"D(fprintf(stderr, \"%*c%s {rulename}[%d-%d L%d]: %s failed!\\n\", p->level, ' ',\n" f' p->error_indicator ? "ERROR!" : "-", _mark, p->mark, p->tok->lineno, "{node_str}"));' ) - if is_tmpl_grp: - self._template_sub_depth -= 1 if "_cut_var" in vars: self.print("if (_cut_var) {") with self.indent(): @@ -907,15 +907,12 @@ def visit_Alt( def collect_template_vars( self, node: TemplateGroup, types: Dict[Optional[str], Optional[str]] ) -> None: - self._template_sub_depth += 1 - types[f"_templateuse_{self._template_sub_depth}"] = "int" for item in node.items: if isinstance(item, TemplateGroup): self.collect_template_vars(item, types) else: name, type = self.add_var(item) types[name] = type - self._template_sub_depth -= 1 def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]: types = {}