Actually correct the ASI generation procedures

- Drop the extraneous auto_semi rule, as it turns out that rule is **never** hit due to it being effectively replicates the unmatched production condition (through auto_semi then error). - However, on the case where the SEMI is produced through the error handler after an error condition that is actually an error, if the production stack is just that freshly generated SEMI, it will be immediately consumed and used by the empty_statement production rule, then the previously erroring token is yielded again, restarting the cycle thus that's the real cause behind the infinite loop as reported in rspivak/slimit#29. - However, if the auto generated SEMI tokens are correctly marked, and update the existing rules to make use of that token, the immediate consumption of the token cannot happen if an empty statement does NOT include the auto generated SEMI token (which has been done by creating a new AUTOSEMI token type), the condition which the immediate consumption of a token to result in the error token can no longer occur. - Removed the seen before token hack. - Moved all the conditions for which AUTOSEMI token is generated to the lexer auto_semi method. - This also completely eliminates the incorrect production of statements that only has SEMIs but not the ASI equivalent, such as the case with ``if`` or ``for`` statements as per 7.9.2 of the ECMA-262 5.1 spec, which is reported in rspivak/slimit#101.
metatoaster · Aug 7, 2018 · 486626d · 486626d
1 parent 4110a41
commit 486626d
Show file tree

Hide file tree

Showing 3 changed files with 79 additions and 41 deletions.
diff --git a/src/calmjs/parse/lexers/es5.py b/src/calmjs/parse/lexers/es5.py
@@ -220,7 +220,8 @@ def token(self):
                 return self.cur_token
 
     def auto_semi(self, token):
-        if token is None or token.type == 'RBRACE' or self._is_prev_token_lt():
+        if token is None or (token.type not in ('SEMI', 'AUTOSEMI') and (
+                token.type == 'RBRACE' or self._is_prev_token_lt())):
             if token:
                 self.next_tokens.append(token)
             return self._create_semi_token(token)
@@ -303,7 +304,7 @@ def lookup_colno(self, lineno, lexpos):
 
     def _create_semi_token(self, orig_token):
         token = AutoLexToken()
-        token.type = 'SEMI'
+        token.type = 'AUTOSEMI'
         token.value = ';'
         if orig_token is not None:
             token.lineno = orig_token.lineno
@@ -352,6 +353,7 @@ def next(self):
     tokens = (
         # Punctuators
         'PERIOD', 'COMMA', 'SEMI', 'COLON',     # . , ; :
+        'AUTOSEMI',                             # autogenerated ;
         'PLUS', 'MINUS', 'MULT', 'DIV', 'MOD',  # + - * / %
         'BAND', 'BOR', 'BXOR', 'BNOT',          # & | ^ ~
         'CONDOP',                               # conditional operator ?

diff --git a/src/calmjs/parse/parsers/es5.py b/src/calmjs/parse/parsers/es5.py
@@ -100,18 +100,6 @@ def __init__(self, lex_optimize=True, lextab=lextab,
         # over again.
         self._error_tokens = {}
 
-    def _has_been_seen_before(self, token):
-        if token is None:
-            return False
-        key = token.type, token.value, token.lineno, token.lexpos
-        return key in self._error_tokens
-
-    def _mark_as_seen(self, token):
-        if token is None:
-            return
-        key = token.type, token.value, token.lineno, token.lexpos
-        self._error_tokens[key] = True
-
     def _raise_syntax_error(self, token):
         tokens = [format_lex_token(t) for t in [
             self.lexer.valid_prev_token,
@@ -141,22 +129,11 @@ def parse(self, text, debug=False):
     def p_empty(self, p):
         """empty :"""
 
-    def p_auto_semi(self, p):
-        """auto_semi : error"""
-
     def p_error(self, token):
-        # https://github.com/rspivak/slimit/issues/29
-        if self._has_been_seen_before(token):
-            self._raise_syntax_error(token)
-
-        if token is None or token.type != 'SEMI':
-            next_token = self.lexer.auto_semi(token)
-            if next_token is not None:
-                # https://github.com/rspivak/slimit/issues/29
-                self._mark_as_seen(token)
-                self.parser.errok()
-                return next_token
-
+        next_token = self.lexer.auto_semi(token)
+        if next_token is not None:
+            self.parser.errok()
+            return next_token
         self._raise_syntax_error(token)
 
     # Comment rules
@@ -1094,7 +1071,7 @@ def p_expr_nobf(self, p):
     # 12.2 Variable Statement
     def p_variable_statement(self, p):
         """variable_statement : VAR variable_declaration_list SEMI
-                              | VAR variable_declaration_list auto_semi
+                              | VAR variable_declaration_list AUTOSEMI
         """
         p[0] = self.asttypes.VarStatement(p[2])
         p[0].setpos(p)
@@ -1162,7 +1139,7 @@ def p_empty_statement(self, p):
     # 12.4 Expression Statement
     def p_expr_statement(self, p):
         """expr_statement : expr_nobf SEMI
-                          | expr_nobf auto_semi
+                          | expr_nobf AUTOSEMI
         """
         # In 12.4, expression statements cannot start with either the
         # 'function' keyword or '{'.  However, the lexing and production
@@ -1200,7 +1177,7 @@ def p_iteration_statement_1(self, p):
         """
         iteration_statement \
             : DO statement WHILE LPAREN expr RPAREN SEMI
-            | DO statement WHILE LPAREN expr RPAREN auto_semi
+            | DO statement WHILE LPAREN expr RPAREN AUTOSEMI
         """
         p[0] = self.asttypes.DoWhile(predicate=p[5], statement=p[2])
         p[0].setpos(p)
@@ -1287,44 +1264,44 @@ def p_expr_noin_opt(self, p):
     # 12.7 The continue Statement
     def p_continue_statement_1(self, p):
         """continue_statement : CONTINUE SEMI
-                              | CONTINUE auto_semi
+                              | CONTINUE AUTOSEMI
         """
         p[0] = self.asttypes.Continue()
         p[0].setpos(p)
 
     def p_continue_statement_2(self, p):
         """continue_statement : CONTINUE identifier SEMI
-                              | CONTINUE identifier auto_semi
+                              | CONTINUE identifier AUTOSEMI
         """
         p[0] = self.asttypes.Continue(p[2])
         p[0].setpos(p)
 
     # 12.8 The break Statement
     def p_break_statement_1(self, p):
         """break_statement : BREAK SEMI
-                           | BREAK auto_semi
+                           | BREAK AUTOSEMI
         """
         p[0] = self.asttypes.Break()
         p[0].setpos(p)
 
     def p_break_statement_2(self, p):
         """break_statement : BREAK identifier SEMI
-                           | BREAK identifier auto_semi
+                           | BREAK identifier AUTOSEMI
         """
         p[0] = self.asttypes.Break(p[2])
         p[0].setpos(p)
 
     # 12.9 The return Statement
     def p_return_statement_1(self, p):
         """return_statement : RETURN SEMI
-                            | RETURN auto_semi
+                            | RETURN AUTOSEMI
         """
         p[0] = self.asttypes.Return()
         p[0].setpos(p)
 
     def p_return_statement_2(self, p):
         """return_statement : RETURN expr SEMI
-                            | RETURN expr auto_semi
+                            | RETURN expr AUTOSEMI
         """
         p[0] = self.asttypes.Return(expr=p[2])
         p[0].setpos(p)
@@ -1396,7 +1373,7 @@ def p_labelled_statement(self, p):
     # 12.13 The throw Statement
     def p_throw_statement(self, p):
         """throw_statement : THROW expr SEMI
-                           | THROW expr auto_semi
+                           | THROW expr AUTOSEMI
         """
         p[0] = self.asttypes.Throw(expr=p[2])
         p[0].setpos(p)
@@ -1430,7 +1407,7 @@ def p_finally(self, p):
     # 12.15 The debugger statement
     def p_debugger_statement(self, p):
         """debugger_statement : DEBUGGER SEMI
-                              | DEBUGGER auto_semi
+                              | DEBUGGER AUTOSEMI
         """
         p[0] = self.asttypes.Debugger(p[1])
         p[0].setpos(p)

diff --git a/src/calmjs/parse/tests/test_es5_parser.py b/src/calmjs/parse/tests/test_es5_parser.py
@@ -114,7 +114,7 @@ def test_that_parsing_eventually_stops(self):
             parser.parse(text)
         self.assertEqual(
             str(e.exception),
-            "Unexpected ',' at 2:1 between '\\n' at 1:7 and 'b' at 2:3")
+            "Unexpected ',' at 2:1 after '\\n' at 1:7")
 
     def test_bare_start(self):
         text = textwrap.dedent("""
@@ -237,6 +237,65 @@ def test_read(self):
         node = read(stream)
         self.assertEqual(node.sourcepath, 'somefile.js')
 
+    # 7.9.2
+    def test_asi_empty_if_parse_fail(self):
+        text = "if (true)"
+        parser = Parser()
+        with self.assertRaises(ECMASyntaxError) as e:
+            parser.parse(text)
+        self.assertEqual(
+            str(e.exception),
+            "Unexpected end of input after ')' at 1:9")
+
+    def test_asi_empty_if_parse_fail_inside_block(self):
+        # https://github.com/rspivak/slimit/issues/101
+        text = textwrap.dedent("""
+        function foo(args) {
+            if (true)
+        }
+        """).strip()
+        parser = Parser()
+        with self.assertRaises(ECMASyntaxError) as e:
+            parser.parse(text)
+        self.assertEqual(
+            str(e.exception),
+            r"Unexpected '}' at 3:1 after '\n' at 2:14")
+
+    def test_asi_for_truncated_fail(self):
+        text = textwrap.dedent("""
+        for (a; b
+        )
+        """).strip()
+        parser = Parser()
+        with self.assertRaises(ECMASyntaxError) as e:
+            parser.parse(text)
+        self.assertEqual(
+            str(e.exception),
+            r"Unexpected ')' at 2:1 after '\n' at 1:10")
+
+    def test_asi_for_bare_fail(self):
+        text = textwrap.dedent("""
+        for (a; b; c)
+        """).strip()
+        parser = Parser()
+        with self.assertRaises(ECMASyntaxError) as e:
+            parser.parse(text)
+        self.assertEqual(
+            str(e.exception),
+            "Unexpected end of input after ')' at 1:13")
+
+    def test_asi_omitted_if_else_fail(self):
+        text = textwrap.dedent("""
+        if (a > b)
+        else c = d
+        """).strip()
+        parser = Parser()
+        with self.assertRaises(ECMASyntaxError) as e:
+            parser.parse(text)
+        self.assertEqual(
+            str(e.exception),
+            r"Unexpected 'else' at 2:1 after '\n' at 1:11")
+
 
 repr_walker = ReprWalker()