-
Notifications
You must be signed in to change notification settings - Fork 1
/
M1.py
executable file
·386 lines (354 loc) · 15 KB
/
M1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
#!/usr/bin/env python
#
# A derivitive port of:
# https://github.com/oriansj/mescc-tools/M1-macro.c
#
# Copyright (C) 2016 Jeremiah Orians
# Copyright (C) 2017 Jan Nieuwenhuizen <janneke@gnu.org>
# Copyright (C) 2020 Mark Jenkins <mark@markjenkins.ca>
# This file is part of knightpies
#
# knightpies is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# knightpies is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with knightpies. If not, see <http://www.gnu.org/licenses/>.
from __future__ import generators # for yield keyword in python 2.2
from pythoncompat import \
open_ascii, print_func, COMPAT_TRUE, COMPAT_FALSE, int_as_hex
TOK_TYPE_MACRO, TOK_TYPE_ATOM, TOK_TYPE_STR, TOK_TYPE_DATA, \
TOK_TYPE_COMMENT, TOK_TYPE_NEWLINE = range(6)
TOK_TYPE, TOK_EXPR, TOK_FILENAME, TOK_LINENUM = range(4)
MACRO_NAME, MACRO_VALUE = 0, 1
class MultipleDefinitionsException(Exception):
pass
def read_atom(first_char, f):
buf = first_char
while COMPAT_TRUE:
c = f.read(1)
if c in ('', "\n", "\t", " "):
break
else:
buf += c
return buf, c
def read_until_newline_or_EOF(f):
comment_buffer = ''
while COMPAT_TRUE:
c = f.read(1)
if c == '' or c=='\n' or c=='\r':
return c, comment_buffer
else:
comment_buffer += c
def tokenize_file(f):
line_num = 1
string_char, string_buf = None, None
while COMPAT_TRUE:
c = f.read(1)
if c=='':
if string_char != None:
raise Exception("unmatched %s quote in %s line %s",
string_char, f.name, line_num)
break
# look for being in string stage first, as these are not
# interupted by newline or comments
elif (string_char != None):
if string_char == c:
if string_char == '"':
yield (TOK_TYPE_STR, string_buf, f.name, line_num)
elif string_char == "'":
yield (TOK_TYPE_DATA, string_buf, f.name, line_num)
else:
assert COMPAT_FALSE # we should never reach here
string_char, string_buf = None, None
else:
string_buf += c
elif c == '#' or c == ';':
c, comment = read_until_newline_or_EOF(f)
yield (TOK_TYPE_COMMENT, comment, f.name, line_num)
if c!= '':
yield (TOK_TYPE_NEWLINE, '\n', f.name, line_num)
line_num+=1
else:
break
elif (string_char == None) and (c == '"' or c == "'"):
string_char = c
string_buf = ''
elif c == '\n':
yield (TOK_TYPE_NEWLINE, '\n', f.name, line_num)
line_num+=1
elif c == ' ' or c == '\t':
pass
else:
atom, trailing_char = read_atom(c, f)
yield (TOK_TYPE_ATOM, atom, f.name, line_num)
if trailing_char == '':
break
elif trailing_char == '\n':
yield (TOK_TYPE_NEWLINE, '\n', f.name, line_num)
line_num+=1
yield (TOK_TYPE_NEWLINE, '\n', f.name, line_num)
def get_symbols_used(file_objs, symbols):
symbols_used = {}
for f in file_objs:
for tok_type, tok_expr, tok_filename, tok_linenum in \
upgrade_token_stream_to_include_macro(tokenize_file(f)):
if tok_type == TOK_TYPE_ATOM and tok_expr in symbols:
symbols_used[tok_expr] = None
return list(symbols_used.keys())
def get_macros_defined_and_add_to_sym_table(f, symbols=None):
# start a new dictionary if one wasn't provided, putting this in the
# function definition would cause there to be one dictionary at build time
if symbols == None:
symbols = {}
for tok in upgrade_token_stream_to_include_macro(tokenize_file(f)):
if tok[TOK_TYPE] == TOK_TYPE_MACRO:
tok_type, tok_expr, tok_filename, tok_linenum = tok
macro_name = tok_expr[MACRO_NAME][TOK_EXPR]
if macro_name in symbols:
raise MultipleDefinitionsException(
"DEFINE %s on line %s of %s is a duplicate definition"
% (macro_name, tok_linenum, tok_filename) )
symbols[macro_name] = tok_expr[MACRO_VALUE]
return symbols
def upgrade_token_stream_to_include_macro(input_tokens):
input_tokens_iter = iter(input_tokens)
while COMPAT_TRUE:
try:
tok = next(input_tokens_iter)
except StopIteration:
break
tok_type, tok_expr, tok_filename, tok_linenum = tok
# if we have a DEFINE atom we're going to yield a TOK_TYPE_MACRO
# based on the next two tokens
if tok_type == TOK_TYPE_ATOM and tok_expr == "DEFINE":
# look ahead to token after DEFINE
try:
macro_name_tok = next(input_tokens_iter)
except StopIteration:
raise Exception(
"%s ended with uncompleted DEFINE" % tok_filename
)
# enforce next token after DEFINE atom must be an atom,
# not newline or string
if (macro_name_tok[TOK_TYPE] == TOK_TYPE_STR or
macro_name_tok[TOK_TYPE] == TOK_TYPE_DATA ):
raise Exception(
"Using a string for macro name %s not supported "
"line %s from %s" % (
tok_expr, tok_linenum, tok_filename) )
elif macro_name_tok[TOK_TYPE] == TOK_TYPE_NEWLINE:
raise Exception(
"You can not have a newline in a DEFINE "
"line %s from %s" % (
tok_expr, tok_linenum, tok_filename) )
assert macro_name_tok[TOK_TYPE] == TOK_TYPE_ATOM
# look ahead to second token after DEFINE
try:
macro_value_tok = next(input_tokens_iter)
except StopIteration:
raise Exception(
"%s ended with uncompleted DEFINE" % tok_filename
)
# enforce second token after DEFINE atom must be atom or string
if macro_value_tok[TOK_TYPE] == TOK_TYPE_NEWLINE:
raise Exception(
"You can not have a newline in a DEFINE "
"line %s from %s" % (
tok_expr, tok_linenum, tok_filename) )
# make a macro type token which has a two element tuple
# of name token and value token as the TOK_EXPR component
yield (
TOK_TYPE_MACRO,
(macro_name_tok, macro_value_tok),
tok_filename, tok_linenum
)
# else any atom token that's not DEFINE and two tokens after it
# or any str or newline token, we just pass it through
else:
yield tok
def output_string_as_hex(output_file, string_msg, pad_align=4):
string_len_w_null = len(string_msg) + 1 # at least one null byte
# number of null bytes required past the first one
# we find the difference between how many chars past the alignment
# and the alignment size, which would be 4, 3, 2, or 1 (when pad_align
# is 4), and if it's 4 (pad_align) we chop that back down to 0
extra_pad = (pad_align - (string_len_w_null % pad_align) ) % pad_align
for c in string_msg:
output_file.write("%.2x" % ord(c))
output_file.write('00') # first null byte
output_file.write('00'*extra_pad ) # any extra null bytes for alignment
assert( 0<= extra_pad < pad_align )
assert( (len(string_msg) + 1 + extra_pad) % pad_align == 0 )
def output_regular_atom(output_file, atomstr, big_endian=COMPAT_TRUE):
if atomstr[0:2] == '0x': # atom's prefixed with 0x are hex
try:
hexatom_int = int(atomstr[2:], 16)
except ValueError:
raise Exception("%s can't be parsed to hex" % atomstr)
output_file.write(
int_as_hex(hexatom_int, 2,
big_endian=big_endian, signed=COMPAT_FALSE) )
elif atomstr[0] in "!@$~%&:^":
if atomstr[0] != ':':
output_file.write(' ')
output_file.write(atomstr)
else:
# other regular atoms are treated as decimal values
try:
a = int(atomstr)
except ValueError:
raise Exception("%s can't be parsed to decimal" % atomstr)
output_file.write(
int_as_hex(a, 2, big_endian=big_endian, signed=COMPAT_TRUE) )
def filter_line_pieces_to_empty_if_just_label(pieces):
if len(pieces)==1 and pieces[0].startswith(':'):
return ()
else:
return pieces
def output_file_from_tokens_with_macros_sub_and_string_sub(
input_tokens, output_file, symbols, big_endian=COMPAT_TRUE,
comments=False):
pieces_seen_on_line = []
first_macro_outputted = False
for tok_type, tok_expr, tok_filename, tok_linenum in input_tokens:
if tok_type == TOK_TYPE_ATOM:
pieces_seen_on_line.append(tok_expr)
if tok_expr in symbols: # exact match only
macro_value_token = symbols[tok_expr]
if (macro_value_token[TOK_TYPE] == TOK_TYPE_ATOM or
macro_value_token[TOK_TYPE] == TOK_TYPE_DATA ):
output_file.write( macro_value_token[TOK_EXPR] )
elif macro_value_token[TOK_TYPE] == TOK_TYPE_STR:
output_string_as_hex(
output_file, macro_value_token[TOK_EXPR] )
else:
assert COMPAT_FALSE
else:
output_regular_atom(output_file, tok_expr, big_endian)
elif tok_type == TOK_TYPE_NEWLINE:
pieces_filtered = filter_line_pieces_to_empty_if_just_label(
pieces_seen_on_line)
if (not first_macro_outputted and
len(pieces_filtered)>0 and
comments):
output_file.write( ' # ')
output_file.write(' '.join(pieces_filtered))
output_file.write('\n')
first_macro_outputted = False
pieces_seen_on_line = []
elif tok_type == TOK_TYPE_DATA:
output_file.write(tok_expr)
elif tok_type == TOK_TYPE_STR:
if comments:
output_file.write( '# "')
# [1:] to get rid of leading quote from repr
# [:-1] to get rid of trailing quote from repr
output_file.write( repr(tok_expr)[1:][:-1] )
output_file.write('"\n')
output_string_as_hex(
output_file, tok_expr
)
elif tok_type == TOK_TYPE_COMMENT:
if comments:
if len(pieces_seen_on_line) == 0:
output_file.write(';')
else:
output_file.write(' # ')
output_file.write(' '.join(pieces_seen_on_line))
output_file.write(' ; ')
output_file.write(tok_expr)
first_macro_outputted = True
else:
assert tok_type == TOK_TYPE_MACRO
def get_symbols_from_M1_file_objs(file_objs, rewind_after=COMPAT_TRUE):
symbols = {}
# first pass get the symbols
for f in file_objs:
get_macros_defined_and_add_to_sym_table(f, symbols)
if rewind_after:
f.seek(0) # return to start of file for next pass
return symbols
def M1_file_objs_to_hex2_file(
M1_file_objs, hex2_file_obj, symbols=None, big_endian=COMPAT_TRUE,
comments=False):
if symbols == None:
symbols = get_symbols_from_M1_file_objs(
M1_file_objs, rewind_after=COMPAT_TRUE)
for f in M1_file_objs:
output_file_from_tokens_with_macros_sub_and_string_sub(
upgrade_token_stream_to_include_macro(tokenize_file(f)),
hex2_file_obj, symbols, big_endian, comments)
def main():
from sys import argv, stdout
dump_defs_used = COMPAT_FALSE
output_filename = None # default case will mean stdout
arguments = []
arg_iter = iter(argv[1:])
big_endian = COMPAT_TRUE
endian_flag_seen = COMPAT_FALSE
comment_flag_seen = False
while COMPAT_TRUE:
try:
arg = next(arg_iter)
except StopIteration:
break # break while COMPAT_TRUE
if arg == '--dump-defs-used':
dump_defs_used = COMPAT_TRUE
elif arg == '-o' or arg == '--output':
try:
output_filename = next(arg_iter)
except StopIteration:
raise Exception('--output (-o) followed by end of arguments')
elif arg == '--BigEndian':
if endian_flag_seen and big_endian:
raise Exception("--BigEndian flag seen more than once!")
elif endian_flag_seen and not big_endian:
raise Exception(
"--BigEndian flag seen after --LittleEndian, "
"Johnathan Swift says you can't have it both ways")
endian_flag_seen = COMPAT_TRUE
# big_endian is already COMPAT_TRUE
elif arg == '--LittleEndian':
if endian_flag_seen and not big_endian:
raise Exception("--LittleEndian flag seen more than once!")
elif endian_flag_seen and not big_endian:
raise Exception(
"--LittleEndian flag seen after --BigEndian, "
"Johnathan Swift says you can't have it both ways")
endian_flag_seen = COMPAT_TRUE
big_endian = COMPAT_FALSE
elif arg == '--comment':
comment_flag_seen = True
else:
arguments.append(arg)
M1_file_objs = [open_ascii(filename) for filename in arguments]
symbols = get_symbols_from_M1_file_objs(
M1_file_objs, rewind_after=COMPAT_TRUE)
if dump_defs_used:
# second pass figure out which symbols are used
symbols_used = get_symbols_used(M1_file_objs, symbols)
symbols_used.sort()
for symbol in symbols_used:
print_func(symbol)
# this will be the default case, outputting a processed version of the file
else:
if output_filename == None:
output_file_obj = stdout
else:
output_file_obj = open(output_filename, 'w')
M1_file_objs_to_hex2_file(M1_file_objs, output_file_obj,
symbols=symbols, big_endian=big_endian,
comments=comment_flag_seen)
for f in M1_file_objs:
f.close()
if output_filename != None: # not stdout, but an open file
output_file_obj.close()
if __name__ == '__main__':
main()