Skip to content

Commit

Permalink
Actually correct the ASI generation procedures
Browse files Browse the repository at this point in the history
- Drop the extraneous auto_semi rule, as it turns out that rule is
  **never** hit due to it being effectively replicates the unmatched
  production condition (through auto_semi then error).
- However, on the case where the SEMI is produced through the error
  handler after an error condition that is actually an error, if the
  production stack is just that freshly generated SEMI, it will be
  immediately consumed and used by the empty_statement production rule,
  then the previously erroring token is yielded again, restarting the
  cycle thus that's the real cause behind the infinite loop as reported
  in rspivak/slimit#29.
- However, if the auto generated SEMI tokens are correctly marked, and
  update the existing rules to make use of that token, the immediate
  consumption of the token cannot happen if an empty statement does NOT
  include the auto generated SEMI token (which has been done by creating
  a new AUTOSEMI token type), the condition which the immediate
  consumption of a token to result in the error token can no longer
  occur.
- Removed the seen before token hack.
- Moved all the conditions for which AUTOSEMI token is generated to the
  lexer auto_semi method.
- This also completely eliminates the incorrect production of statements
  that only has SEMIs but not the ASI equivalent, such as the case with
  ``if`` or ``for`` statements as per 7.9.2 of the ECMA-262 5.1 spec,
  which is reported in rspivak/slimit#101.
  • Loading branch information
metatoaster committed Aug 7, 2018
1 parent 4110a41 commit 486626d
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 41 deletions.
6 changes: 4 additions & 2 deletions src/calmjs/parse/lexers/es5.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,8 @@ def token(self):
return self.cur_token

def auto_semi(self, token):
if token is None or token.type == 'RBRACE' or self._is_prev_token_lt():
if token is None or (token.type not in ('SEMI', 'AUTOSEMI') and (
token.type == 'RBRACE' or self._is_prev_token_lt())):
if token:
self.next_tokens.append(token)
return self._create_semi_token(token)
Expand Down Expand Up @@ -303,7 +304,7 @@ def lookup_colno(self, lineno, lexpos):

def _create_semi_token(self, orig_token):
token = AutoLexToken()
token.type = 'SEMI'
token.type = 'AUTOSEMI'
token.value = ';'
if orig_token is not None:
token.lineno = orig_token.lineno
Expand Down Expand Up @@ -352,6 +353,7 @@ def next(self):
tokens = (
# Punctuators
'PERIOD', 'COMMA', 'SEMI', 'COLON', # . , ; :
'AUTOSEMI', # autogenerated ;
'PLUS', 'MINUS', 'MULT', 'DIV', 'MOD', # + - * / %
'BAND', 'BOR', 'BXOR', 'BNOT', # & | ^ ~
'CONDOP', # conditional operator ?
Expand Down
53 changes: 15 additions & 38 deletions src/calmjs/parse/parsers/es5.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,18 +100,6 @@ def __init__(self, lex_optimize=True, lextab=lextab,
# over again.
self._error_tokens = {}

def _has_been_seen_before(self, token):
if token is None:
return False
key = token.type, token.value, token.lineno, token.lexpos
return key in self._error_tokens

def _mark_as_seen(self, token):
if token is None:
return
key = token.type, token.value, token.lineno, token.lexpos
self._error_tokens[key] = True

def _raise_syntax_error(self, token):
tokens = [format_lex_token(t) for t in [
self.lexer.valid_prev_token,
Expand Down Expand Up @@ -141,22 +129,11 @@ def parse(self, text, debug=False):
def p_empty(self, p):
"""empty :"""

def p_auto_semi(self, p):
"""auto_semi : error"""

def p_error(self, token):
# https://github.com/rspivak/slimit/issues/29
if self._has_been_seen_before(token):
self._raise_syntax_error(token)

if token is None or token.type != 'SEMI':
next_token = self.lexer.auto_semi(token)
if next_token is not None:
# https://github.com/rspivak/slimit/issues/29
self._mark_as_seen(token)
self.parser.errok()
return next_token

next_token = self.lexer.auto_semi(token)
if next_token is not None:
self.parser.errok()
return next_token
self._raise_syntax_error(token)

# Comment rules
Expand Down Expand Up @@ -1094,7 +1071,7 @@ def p_expr_nobf(self, p):
# 12.2 Variable Statement
def p_variable_statement(self, p):
"""variable_statement : VAR variable_declaration_list SEMI
| VAR variable_declaration_list auto_semi
| VAR variable_declaration_list AUTOSEMI
"""
p[0] = self.asttypes.VarStatement(p[2])
p[0].setpos(p)
Expand Down Expand Up @@ -1162,7 +1139,7 @@ def p_empty_statement(self, p):
# 12.4 Expression Statement
def p_expr_statement(self, p):
"""expr_statement : expr_nobf SEMI
| expr_nobf auto_semi
| expr_nobf AUTOSEMI
"""
# In 12.4, expression statements cannot start with either the
# 'function' keyword or '{'. However, the lexing and production
Expand Down Expand Up @@ -1200,7 +1177,7 @@ def p_iteration_statement_1(self, p):
"""
iteration_statement \
: DO statement WHILE LPAREN expr RPAREN SEMI
| DO statement WHILE LPAREN expr RPAREN auto_semi
| DO statement WHILE LPAREN expr RPAREN AUTOSEMI
"""
p[0] = self.asttypes.DoWhile(predicate=p[5], statement=p[2])
p[0].setpos(p)
Expand Down Expand Up @@ -1287,44 +1264,44 @@ def p_expr_noin_opt(self, p):
# 12.7 The continue Statement
def p_continue_statement_1(self, p):
"""continue_statement : CONTINUE SEMI
| CONTINUE auto_semi
| CONTINUE AUTOSEMI
"""
p[0] = self.asttypes.Continue()
p[0].setpos(p)

def p_continue_statement_2(self, p):
"""continue_statement : CONTINUE identifier SEMI
| CONTINUE identifier auto_semi
| CONTINUE identifier AUTOSEMI
"""
p[0] = self.asttypes.Continue(p[2])
p[0].setpos(p)

# 12.8 The break Statement
def p_break_statement_1(self, p):
"""break_statement : BREAK SEMI
| BREAK auto_semi
| BREAK AUTOSEMI
"""
p[0] = self.asttypes.Break()
p[0].setpos(p)

def p_break_statement_2(self, p):
"""break_statement : BREAK identifier SEMI
| BREAK identifier auto_semi
| BREAK identifier AUTOSEMI
"""
p[0] = self.asttypes.Break(p[2])
p[0].setpos(p)

# 12.9 The return Statement
def p_return_statement_1(self, p):
"""return_statement : RETURN SEMI
| RETURN auto_semi
| RETURN AUTOSEMI
"""
p[0] = self.asttypes.Return()
p[0].setpos(p)

def p_return_statement_2(self, p):
"""return_statement : RETURN expr SEMI
| RETURN expr auto_semi
| RETURN expr AUTOSEMI
"""
p[0] = self.asttypes.Return(expr=p[2])
p[0].setpos(p)
Expand Down Expand Up @@ -1396,7 +1373,7 @@ def p_labelled_statement(self, p):
# 12.13 The throw Statement
def p_throw_statement(self, p):
"""throw_statement : THROW expr SEMI
| THROW expr auto_semi
| THROW expr AUTOSEMI
"""
p[0] = self.asttypes.Throw(expr=p[2])
p[0].setpos(p)
Expand Down Expand Up @@ -1430,7 +1407,7 @@ def p_finally(self, p):
# 12.15 The debugger statement
def p_debugger_statement(self, p):
"""debugger_statement : DEBUGGER SEMI
| DEBUGGER auto_semi
| DEBUGGER AUTOSEMI
"""
p[0] = self.asttypes.Debugger(p[1])
p[0].setpos(p)
Expand Down
61 changes: 60 additions & 1 deletion src/calmjs/parse/tests/test_es5_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def test_that_parsing_eventually_stops(self):
parser.parse(text)
self.assertEqual(
str(e.exception),
"Unexpected ',' at 2:1 between '\\n' at 1:7 and 'b' at 2:3")
"Unexpected ',' at 2:1 after '\\n' at 1:7")

def test_bare_start(self):
text = textwrap.dedent("""
Expand Down Expand Up @@ -237,6 +237,65 @@ def test_read(self):
node = read(stream)
self.assertEqual(node.sourcepath, 'somefile.js')

# 7.9.2
def test_asi_empty_if_parse_fail(self):
text = "if (true)"
parser = Parser()
with self.assertRaises(ECMASyntaxError) as e:
parser.parse(text)
self.assertEqual(
str(e.exception),
"Unexpected end of input after ')' at 1:9")

def test_asi_empty_if_parse_fail_inside_block(self):
# https://github.com/rspivak/slimit/issues/101
text = textwrap.dedent("""
function foo(args) {
if (true)
}
""").strip()
parser = Parser()
with self.assertRaises(ECMASyntaxError) as e:
parser.parse(text)
self.assertEqual(
str(e.exception),
r"Unexpected '}' at 3:1 after '\n' at 2:14")

def test_asi_for_truncated_fail(self):
text = textwrap.dedent("""
for (a; b
)
""").strip()
parser = Parser()
with self.assertRaises(ECMASyntaxError) as e:
parser.parse(text)
self.assertEqual(
str(e.exception),
r"Unexpected ')' at 2:1 after '\n' at 1:10")

def test_asi_for_bare_fail(self):
text = textwrap.dedent("""
for (a; b; c)
""").strip()
parser = Parser()
with self.assertRaises(ECMASyntaxError) as e:
parser.parse(text)
self.assertEqual(
str(e.exception),
"Unexpected end of input after ')' at 1:13")

def test_asi_omitted_if_else_fail(self):
text = textwrap.dedent("""
if (a > b)
else c = d
""").strip()
parser = Parser()
with self.assertRaises(ECMASyntaxError) as e:
parser.parse(text)
self.assertEqual(
str(e.exception),
r"Unexpected 'else' at 2:1 after '\n' at 1:11")


repr_walker = ReprWalker()

Expand Down

0 comments on commit 486626d

Please sign in to comment.