From 486626d3db752129d37cb1af4ab06a96638797de Mon Sep 17 00:00:00 2001
From: Tommy Yu <tommy.yu@auckland.ac.nz>
Date: Tue, 7 Aug 2018 18:17:34 +1200
Subject: [PATCH] Actually correct the ASI generation procedures

- Drop the extraneous auto_semi rule, as it turns out that rule is
  **never** hit due to it being effectively replicates the unmatched
  production condition (through auto_semi then error).
- However, on the case where the SEMI is produced through the error
  handler after an error condition that is actually an error, if the
  production stack is just that freshly generated SEMI, it will be
  immediately consumed and used by the empty_statement production rule,
  then the previously erroring token is yielded again, restarting the
  cycle thus that's the real cause behind the infinite loop as reported
  in rspivak/slimit#29.
- However, if the auto generated SEMI tokens are correctly marked, and
  update the existing rules to make use of that token, the immediate
  consumption of the token cannot happen if an empty statement does NOT
  include the auto generated SEMI token (which has been done by creating
  a new AUTOSEMI token type), the condition which the immediate
  consumption of a token to result in the error token can no longer
  occur.
- Removed the seen before token hack.
- Moved all the conditions for which AUTOSEMI token is generated to the
  lexer auto_semi method.
- This also completely eliminates the incorrect production of statements
  that only has SEMIs but not the ASI equivalent, such as the case with
  ``if`` or ``for`` statements as per 7.9.2 of the ECMA-262 5.1 spec,
  which is reported in rspivak/slimit#101.
---
 src/calmjs/parse/lexers/es5.py            |  6 ++-
 src/calmjs/parse/parsers/es5.py           | 53 ++++++--------------
 src/calmjs/parse/tests/test_es5_parser.py | 61 ++++++++++++++++++++++-
 3 files changed, 79 insertions(+), 41 deletions(-)

diff --git a/src/calmjs/parse/lexers/es5.py b/src/calmjs/parse/lexers/es5.py
index a5d0c8d..a20e334 100644
--- a/src/calmjs/parse/lexers/es5.py
+++ b/src/calmjs/parse/lexers/es5.py
@@ -220,7 +220,8 @@ def token(self):
                 return self.cur_token
 
     def auto_semi(self, token):
-        if token is None or token.type == 'RBRACE' or self._is_prev_token_lt():
+        if token is None or (token.type not in ('SEMI', 'AUTOSEMI') and (
+                token.type == 'RBRACE' or self._is_prev_token_lt())):
             if token:
                 self.next_tokens.append(token)
             return self._create_semi_token(token)
@@ -303,7 +304,7 @@ def lookup_colno(self, lineno, lexpos):
 
     def _create_semi_token(self, orig_token):
         token = AutoLexToken()
-        token.type = 'SEMI'
+        token.type = 'AUTOSEMI'
         token.value = ';'
         if orig_token is not None:
             token.lineno = orig_token.lineno
@@ -352,6 +353,7 @@ def next(self):
     tokens = (
         # Punctuators
         'PERIOD', 'COMMA', 'SEMI', 'COLON',     # . , ; :
+        'AUTOSEMI',                             # autogenerated ;
         'PLUS', 'MINUS', 'MULT', 'DIV', 'MOD',  # + - * / %
         'BAND', 'BOR', 'BXOR', 'BNOT',          # & | ^ ~
         'CONDOP',                               # conditional operator ?
diff --git a/src/calmjs/parse/parsers/es5.py b/src/calmjs/parse/parsers/es5.py
index dbe7f14..96a4280 100644
--- a/src/calmjs/parse/parsers/es5.py
+++ b/src/calmjs/parse/parsers/es5.py
@@ -100,18 +100,6 @@ def __init__(self, lex_optimize=True, lextab=lextab,
         # over again.
         self._error_tokens = {}
 
-    def _has_been_seen_before(self, token):
-        if token is None:
-            return False
-        key = token.type, token.value, token.lineno, token.lexpos
-        return key in self._error_tokens
-
-    def _mark_as_seen(self, token):
-        if token is None:
-            return
-        key = token.type, token.value, token.lineno, token.lexpos
-        self._error_tokens[key] = True
-
     def _raise_syntax_error(self, token):
         tokens = [format_lex_token(t) for t in [
             self.lexer.valid_prev_token,
@@ -141,22 +129,11 @@ def parse(self, text, debug=False):
     def p_empty(self, p):
         """empty :"""
 
-    def p_auto_semi(self, p):
-        """auto_semi : error"""
-
     def p_error(self, token):
-        # https://github.com/rspivak/slimit/issues/29
-        if self._has_been_seen_before(token):
-            self._raise_syntax_error(token)
-
-        if token is None or token.type != 'SEMI':
-            next_token = self.lexer.auto_semi(token)
-            if next_token is not None:
-                # https://github.com/rspivak/slimit/issues/29
-                self._mark_as_seen(token)
-                self.parser.errok()
-                return next_token
-
+        next_token = self.lexer.auto_semi(token)
+        if next_token is not None:
+            self.parser.errok()
+            return next_token
         self._raise_syntax_error(token)
 
     # Comment rules
@@ -1094,7 +1071,7 @@ def p_expr_nobf(self, p):
     # 12.2 Variable Statement
     def p_variable_statement(self, p):
         """variable_statement : VAR variable_declaration_list SEMI
-                              | VAR variable_declaration_list auto_semi
+                              | VAR variable_declaration_list AUTOSEMI
         """
         p[0] = self.asttypes.VarStatement(p[2])
         p[0].setpos(p)
@@ -1162,7 +1139,7 @@ def p_empty_statement(self, p):
     # 12.4 Expression Statement
     def p_expr_statement(self, p):
         """expr_statement : expr_nobf SEMI
-                          | expr_nobf auto_semi
+                          | expr_nobf AUTOSEMI
         """
         # In 12.4, expression statements cannot start with either the
         # 'function' keyword or '{'.  However, the lexing and production
@@ -1200,7 +1177,7 @@ def p_iteration_statement_1(self, p):
         """
         iteration_statement \
             : DO statement WHILE LPAREN expr RPAREN SEMI
-            | DO statement WHILE LPAREN expr RPAREN auto_semi
+            | DO statement WHILE LPAREN expr RPAREN AUTOSEMI
         """
         p[0] = self.asttypes.DoWhile(predicate=p[5], statement=p[2])
         p[0].setpos(p)
@@ -1287,14 +1264,14 @@ def p_expr_noin_opt(self, p):
     # 12.7 The continue Statement
     def p_continue_statement_1(self, p):
         """continue_statement : CONTINUE SEMI
-                              | CONTINUE auto_semi
+                              | CONTINUE AUTOSEMI
         """
         p[0] = self.asttypes.Continue()
         p[0].setpos(p)
 
     def p_continue_statement_2(self, p):
         """continue_statement : CONTINUE identifier SEMI
-                              | CONTINUE identifier auto_semi
+                              | CONTINUE identifier AUTOSEMI
         """
         p[0] = self.asttypes.Continue(p[2])
         p[0].setpos(p)
@@ -1302,14 +1279,14 @@ def p_continue_statement_2(self, p):
     # 12.8 The break Statement
     def p_break_statement_1(self, p):
         """break_statement : BREAK SEMI
-                           | BREAK auto_semi
+                           | BREAK AUTOSEMI
         """
         p[0] = self.asttypes.Break()
         p[0].setpos(p)
 
     def p_break_statement_2(self, p):
         """break_statement : BREAK identifier SEMI
-                           | BREAK identifier auto_semi
+                           | BREAK identifier AUTOSEMI
         """
         p[0] = self.asttypes.Break(p[2])
         p[0].setpos(p)
@@ -1317,14 +1294,14 @@ def p_break_statement_2(self, p):
     # 12.9 The return Statement
     def p_return_statement_1(self, p):
         """return_statement : RETURN SEMI
-                            | RETURN auto_semi
+                            | RETURN AUTOSEMI
         """
         p[0] = self.asttypes.Return()
         p[0].setpos(p)
 
     def p_return_statement_2(self, p):
         """return_statement : RETURN expr SEMI
-                            | RETURN expr auto_semi
+                            | RETURN expr AUTOSEMI
         """
         p[0] = self.asttypes.Return(expr=p[2])
         p[0].setpos(p)
@@ -1396,7 +1373,7 @@ def p_labelled_statement(self, p):
     # 12.13 The throw Statement
     def p_throw_statement(self, p):
         """throw_statement : THROW expr SEMI
-                           | THROW expr auto_semi
+                           | THROW expr AUTOSEMI
         """
         p[0] = self.asttypes.Throw(expr=p[2])
         p[0].setpos(p)
@@ -1430,7 +1407,7 @@ def p_finally(self, p):
     # 12.15 The debugger statement
     def p_debugger_statement(self, p):
         """debugger_statement : DEBUGGER SEMI
-                              | DEBUGGER auto_semi
+                              | DEBUGGER AUTOSEMI
         """
         p[0] = self.asttypes.Debugger(p[1])
         p[0].setpos(p)
diff --git a/src/calmjs/parse/tests/test_es5_parser.py b/src/calmjs/parse/tests/test_es5_parser.py
index 5215296..80616a8 100644
--- a/src/calmjs/parse/tests/test_es5_parser.py
+++ b/src/calmjs/parse/tests/test_es5_parser.py
@@ -114,7 +114,7 @@ def test_that_parsing_eventually_stops(self):
             parser.parse(text)
         self.assertEqual(
             str(e.exception),
-            "Unexpected ',' at 2:1 between '\\n' at 1:7 and 'b' at 2:3")
+            "Unexpected ',' at 2:1 after '\\n' at 1:7")
 
     def test_bare_start(self):
         text = textwrap.dedent("""
@@ -237,6 +237,65 @@ def test_read(self):
         node = read(stream)
         self.assertEqual(node.sourcepath, 'somefile.js')
 
+    # 7.9.2
+    def test_asi_empty_if_parse_fail(self):
+        text = "if (true)"
+        parser = Parser()
+        with self.assertRaises(ECMASyntaxError) as e:
+            parser.parse(text)
+        self.assertEqual(
+            str(e.exception),
+            "Unexpected end of input after ')' at 1:9")
+
+    def test_asi_empty_if_parse_fail_inside_block(self):
+        # https://github.com/rspivak/slimit/issues/101
+        text = textwrap.dedent("""
+        function foo(args) {
+            if (true)
+        }
+        """).strip()
+        parser = Parser()
+        with self.assertRaises(ECMASyntaxError) as e:
+            parser.parse(text)
+        self.assertEqual(
+            str(e.exception),
+            r"Unexpected '}' at 3:1 after '\n' at 2:14")
+
+    def test_asi_for_truncated_fail(self):
+        text = textwrap.dedent("""
+        for (a; b
+        )
+        """).strip()
+        parser = Parser()
+        with self.assertRaises(ECMASyntaxError) as e:
+            parser.parse(text)
+        self.assertEqual(
+            str(e.exception),
+            r"Unexpected ')' at 2:1 after '\n' at 1:10")
+
+    def test_asi_for_bare_fail(self):
+        text = textwrap.dedent("""
+        for (a; b; c)
+        """).strip()
+        parser = Parser()
+        with self.assertRaises(ECMASyntaxError) as e:
+            parser.parse(text)
+        self.assertEqual(
+            str(e.exception),
+            "Unexpected end of input after ')' at 1:13")
+
+    def test_asi_omitted_if_else_fail(self):
+        text = textwrap.dedent("""
+        if (a > b)
+        else c = d
+        """).strip()
+        parser = Parser()
+        with self.assertRaises(ECMASyntaxError) as e:
+            parser.parse(text)
+        self.assertEqual(
+            str(e.exception),
+            r"Unexpected 'else' at 2:1 after '\n' at 1:11")
+
 
 repr_walker = ReprWalker()