Skip to content

Commit

Permalink
Correct parsing of white space
Browse files Browse the repository at this point in the history
- Conform to ECMA 262, section 7.2, table 2.
- Test case provided by rspivak/slimit#84 on github.
  • Loading branch information
metatoaster committed Jun 8, 2017
1 parent 426f78f commit a5cea45
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 1 deletion.
13 changes: 12 additions & 1 deletion src/calmjs/parse/lexers/es5.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,18 @@ def t_regex_error(self, token):

t_LINE_TERMINATOR = r'[\n\r]+'

t_ignore = ' \t'
t_ignore = (
# space, tab, line tab, form feed, nbsp
u' \t\x0b\x0c\xa0'
# ogham space mark
u'\u1680'
# en quad .. hair space
u'\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A'
# line sep, paragraph sep, narrow nbsp, med math, ideographic space
u'\u2028\u2029\u202F\u205F\u3000'
# unicode bom
u'\uFEFF'
)

t_NUMBER = r"""
(?:
Expand Down
12 changes: 12 additions & 0 deletions src/calmjs/parse/tests/test_es5_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,18 @@ def test_that_parsing_eventually_stops(self):
parser = Parser()
self.assertRaises(ECMASyntaxError, parser.parse, text)

def test_ecma_262_whitespace_slimt_issue_84(self):
text = u'''\uFEFF
var foo = function() {
// a salad of whitespaces
\x09\r\n\x0b\x0c\x20\xa0
\u1680\u2000\u2001\u2005\u200A
\u2028\u2029\u202F\u205F\u3000
return 1;
};
'''
self.assertTrue(bool(Parser().parse(text).children()))


repr_visitor = generic.ReprVisitor()

Expand Down

0 comments on commit a5cea45

Please sign in to comment.