Correct parsing of white space

- Conform to ECMA 262, section 7.2, table 2. - Test case provided by rspivak/slimit#84 on github.
calmjs · Jun 8, 2017 · a5cea45 · a5cea45
1 parent 426f78f
commit a5cea45
Show file tree

Hide file tree

Showing 2 changed files with 24 additions and 1 deletion.
diff --git a/src/calmjs/parse/lexers/es5.py b/src/calmjs/parse/lexers/es5.py
@@ -346,7 +346,18 @@ def t_regex_error(self, token):
 
     t_LINE_TERMINATOR = r'[\n\r]+'
 
-    t_ignore = ' \t'
+    t_ignore = (
+        # space, tab, line tab, form feed, nbsp
+        u' \t\x0b\x0c\xa0'
+        # ogham space mark
+        u'\u1680'
+        # en quad .. hair space
+        u'\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A'
+        # line sep, paragraph sep, narrow nbsp, med math, ideographic space
+        u'\u2028\u2029\u202F\u205F\u3000'
+        # unicode bom
+        u'\uFEFF'
+    )
 
     t_NUMBER = r"""
     (?:

diff --git a/src/calmjs/parse/tests/test_es5_parser.py b/src/calmjs/parse/tests/test_es5_parser.py
@@ -109,6 +109,18 @@ def test_that_parsing_eventually_stops(self):
         parser = Parser()
         self.assertRaises(ECMASyntaxError, parser.parse, text)
 
+    def test_ecma_262_whitespace_slimt_issue_84(self):
+        text = u'''\uFEFF
+        var foo = function() {
+        // a salad of whitespaces
+        \x09\r\n\x0b\x0c\x20\xa0
+        \u1680\u2000\u2001\u2005\u200A
+        \u2028\u2029\u202F\u205F\u3000
+            return 1;
+        };
+        '''
+        self.assertTrue(bool(Parser().parse(text).children()))
+
 
 repr_visitor = generic.ReprVisitor()