-
Notifications
You must be signed in to change notification settings - Fork 0
/
Lexer.py
88 lines (83 loc) · 3 KB
/
Lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# -*- coding: utf-8 -*-
import re
def lexer(tl_file, tok_file):
flag_illegal_int = 0
flag_scanner_error = 0
tl_dict = {'(': 'LP',
')': 'RP',
':=': 'ASGN',
';': 'SC',
'*': 'MULTIPLICATIVE(*)',
'div': 'MULTIPLICATIVE(div)',
'mod': 'MULTIPLICATIVE(mod)',
'+': 'ADDITIVE(+)',
'-': 'ADDITIVE(-)',
'=': 'COMPARE(=)',
'!=': 'COMPARE(!=)',
'<': 'COMPARE(<)',
'>': 'COMPARE(>)',
'<=': 'COMPARE(<=)',
'>=': 'COMPARE(>=)',
'if': 'IF',
'then': 'THEN',
'else': 'ELSE',
'begin': 'BEGIN',
'end': 'END',
'while': 'WHILE',
'do': 'DO',
'program': 'PROGRAM',
'var': 'VAR',
'as': 'AS',
'int': 'INT',
'bool': 'BOOL',
'writeInt': 'WRITEINT',
'readInt': 'READINT',
}
re_num = re.compile(r'^([1-9][0-9]*|0)$')
re_boollit = re.compile(r'^(false|true)$')
re_ident = re.compile(r'^([a-z_A-Z][a-zA-Z0-9]*)$')
re_opr2 = re.compile(r'(:=|!=|<=|>=)')
re_opr1 = re.compile(r'(\(|\)|;|\*|\+|-|=|<|>)')
def match_token(key, f):
global flag_illegal_int
token = tl_dict.get(key)
if token:
pass
elif re_num.match(key):
try:
if int(key) > 2147483647:
flag_illegal_int = 1
return False
except ValueError:
flag_illegal_int = 1
return False
token = 'num(' + key + ')'
elif re_boollit.match(key):
token = 'boollit(' + key + ')'
elif re_ident.match(key):
token = 'ident(' + key + ')'
else:
return False
if f.tell():
token = '\n' + token
f.write(token)
return True
with open(tl_file, 'r') as f_tl, open(tok_file, 'w') as f_tok:
for temp_key1 in f_tl.read().split():
if not match_token(temp_key1, f_tok):
for temp_key2 in re.sub(re_opr2, r' \1 ', temp_key1).split():
if not match_token(temp_key2, f_tok):
for temp_key3 in re.sub(re_opr1, r' \1 ', temp_key2).split():
if not match_token(temp_key3, f_tok):
if flag_illegal_int:
print('SCANNER ERROR due to illegal integer \"'+temp_key3+'\"')
flag_scanner_error = 1
flag_illegal_int = 0
else:
flag_scanner_error = 1
print('SCANNER ERROR due to \"'+temp_key3+'\"')
if flag_scanner_error == 0:
print('Lexer done!')
return True
else:
return False