From a5cea45142f1527328dead0deb49272d4f626f07 Mon Sep 17 00:00:00 2001 From: Tommy Yu Date: Thu, 8 Jun 2017 16:06:28 +1200 Subject: [PATCH] Correct parsing of white space - Conform to ECMA 262, section 7.2, table 2. - Test case provided by rspivak/slimit#84 on github. --- src/calmjs/parse/lexers/es5.py | 13 ++++++++++++- src/calmjs/parse/tests/test_es5_parser.py | 12 ++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/calmjs/parse/lexers/es5.py b/src/calmjs/parse/lexers/es5.py index 8097d92..325d662 100644 --- a/src/calmjs/parse/lexers/es5.py +++ b/src/calmjs/parse/lexers/es5.py @@ -346,7 +346,18 @@ def t_regex_error(self, token): t_LINE_TERMINATOR = r'[\n\r]+' - t_ignore = ' \t' + t_ignore = ( + # space, tab, line tab, form feed, nbsp + u' \t\x0b\x0c\xa0' + # ogham space mark + u'\u1680' + # en quad .. hair space + u'\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A' + # line sep, paragraph sep, narrow nbsp, med math, ideographic space + u'\u2028\u2029\u202F\u205F\u3000' + # unicode bom + u'\uFEFF' + ) t_NUMBER = r""" (?: diff --git a/src/calmjs/parse/tests/test_es5_parser.py b/src/calmjs/parse/tests/test_es5_parser.py index 6dff014..5b3acb3 100644 --- a/src/calmjs/parse/tests/test_es5_parser.py +++ b/src/calmjs/parse/tests/test_es5_parser.py @@ -109,6 +109,18 @@ def test_that_parsing_eventually_stops(self): parser = Parser() self.assertRaises(ECMASyntaxError, parser.parse, text) + def test_ecma_262_whitespace_slimt_issue_84(self): + text = u'''\uFEFF + var foo = function() { + // a salad of whitespaces + \x09\r\n\x0b\x0c\x20\xa0 + \u1680\u2000\u2001\u2005\u200A + \u2028\u2029\u202F\u205F\u3000 + return 1; + }; + ''' + self.assertTrue(bool(Parser().parse(text).children())) + repr_visitor = generic.ReprVisitor()