From 4bf85883aa7018a72db7f73a4afbd97d05bee2d0 Mon Sep 17 00:00:00 2001 From: NeeEoo Date: Mon, 5 Apr 2021 15:55:12 +0200 Subject: [PATCH 1/5] Added Structs --- Compiler/Compiler.py | 90 ++++++++++++++++++- Compiler/FunctionCompiler.py | 167 +++++++++++++++++++++++++++++++++-- Compiler/General.py | 5 ++ Compiler/Globals.py | 46 +++++++--- Compiler/Lexical_analyzer.py | 3 + Compiler/Node.py | 119 +++++++++++++++++++++---- Compiler/Structs.py | 88 ++++++++++++++++++ Compiler/Token.py | 3 + 8 files changed, 484 insertions(+), 37 deletions(-) create mode 100644 Compiler/Structs.py diff --git a/Compiler/Compiler.py b/Compiler/Compiler.py index 4011bd7..7880a73 100644 --- a/Compiler/Compiler.py +++ b/Compiler/Compiler.py @@ -4,6 +4,7 @@ from .Functions import check_function_exists, get_function_object, insert_function_object from .General import get_NUM_token_value, get_set_cell_value_code, get_literal_token_code, unpack_literal_tokens_to_array_dimensions from .Globals import get_global_variables_size, get_variable_size, get_variable_dimensions, insert_global_variable, create_variable_from_definition +from .Structs import Struct, insert_struct_object, get_struct_object from .Lexical_analyzer import analyze from .LibraryFunctionCompiler import insert_library_functions from .Parser import Parser @@ -43,6 +44,90 @@ def create_function_object(self): function = FunctionCompiler(function_name, function_tokens) return function + def create_struct_object(self): + # struct syntax: STRUCT ID LBRACE ((INT | STRUCT ID) ID ((LBRACK NUM RBRACK)+)? SEMICOLON)+ RBRACE SEMICOLON + + self.parser.check_next_tokens_are([Token.ID, Token.LBRACE]) + self.parser.advance_token() # point to ID + struct_name_token = self.parser.current_token() + struct_name = struct_name_token.data + self.parser.advance_token(amount=2) # point to after LBRACE + + struct_object = Struct(struct_name, struct_name_token) + + defined_field_names = [] + + token = self.parser.current_token() + while token is not None and token.type in [Token.INT, Token.STRUCT]: + if token.type == Token.STRUCT: + self.parser.check_next_tokens_are([Token.ID, Token.ID]) + field_struct_id = self.parser.next_token().data + field_name = self.parser.next_token(2).data + + if self.parser.next_token(3).type == Token.LBRACK: + self.parser.advance_token(amount=3) # point to LBRACK + dimensions = [] # element[i] holds the size of dimension[i] + + while self.parser.current_token().type == Token.LBRACK: + self.parser.check_current_tokens_are([Token.LBRACK, Token.NUM, Token.RBRACK]) + dimensions.append(get_NUM_token_value(self.parser.next_token())) + + self.parser.advance_token(amount=3) # skip LBRACK NUM RBRACK + else: + dimensions = [1] + self.parser.advance_token(amount=3) # point to after ID + + self.parser.check_current_tokens_are([Token.SEMICOLON]) + self.parser.advance_token() # point to after SEMICOLON + + type_obj = { + "type": Token.STRUCT, + "size": get_struct_object(field_struct_id).size, + "id": field_struct_id, + "dimensions": dimensions + } + elif token.type == Token.INT: + self.parser.check_next_tokens_are([Token.ID]) + field_name_token = self.parser.next_token() + field_name = field_name_token.data + + if self.parser.next_token(2).type == Token.LBRACK: + self.parser.advance_token(amount=2) # point to LBRACK + dimensions = [] # element[i] holds the size of dimension[i] + + while self.parser.current_token().type == Token.LBRACK: + self.parser.check_current_tokens_are([Token.LBRACK, Token.NUM, Token.RBRACK]) + dimensions.append(get_NUM_token_value(self.parser.next_token())) + + self.parser.advance_token(amount=3) # skip LBRACK NUM RBRACK + else: + dimensions = [1] + self.parser.advance_token(amount=2) # point to after ID + + self.parser.check_current_tokens_are([Token.SEMICOLON]) + self.parser.advance_token() # point to after SEMICOLON + + type_obj = { + "type": Token.INT, + "size": 1, + "dimensions": dimensions + } + else: + raise BFSyntaxError("Data type %s is not supported in field" % self.parser.current_token()) + + if field_name in defined_field_names: + raise BFSemanticError("Member '%s' is already defined" % field_name_token) + defined_field_names += [field_name] + + struct_object.add_field(type_obj, field_name) + + token = self.parser.current_token() + + self.parser.check_current_tokens_are([Token.RBRACE, Token.SEMICOLON]) + self.parser.advance_token(amount=2) # point to after SEMICOLON + + return struct_object + def compile_global_variable_definition(self): # INT ID (ASSIGN NUM | (LBRACK NUM RBRACK)+ (ASSIGN LBRACE ... RBRACE)?)? SEMICOLON # returns code that initializes this variable, and advances pointer according to variable size @@ -109,7 +194,7 @@ def process_global_definitions(self): """ code = '' token = self.parser.current_token() - while token is not None and token.type in [Token.VOID, Token.INT, Token.SEMICOLON]: + while token is not None and token.type in [Token.VOID, Token.INT, Token.SEMICOLON, Token.STRUCT]: if token.type == Token.SEMICOLON: # can have random semicolons ;) self.parser.advance_token() token = self.parser.current_token() @@ -121,6 +206,9 @@ def process_global_definitions(self): insert_function_object(function) elif token.type is Token.INT and self.parser.next_token(next_amount=2).type in [Token.SEMICOLON, Token.ASSIGN, Token.LBRACK]: code += self.compile_global_variable_definition() + elif token.type == Token.STRUCT: + struct_object = self.create_struct_object() + insert_struct_object(struct_object) else: raise BFSyntaxError("Unexpected '%s' after '%s'. Expected '(' (function definition) or one of: '=', ';', '[' (global variable definition)" % (str(self.parser.next_token(next_amount=2)), str(self.parser.next_token()))) diff --git a/Compiler/FunctionCompiler.py b/Compiler/FunctionCompiler.py index 9469b24..4e54feb 100644 --- a/Compiler/FunctionCompiler.py +++ b/Compiler/FunctionCompiler.py @@ -3,9 +3,12 @@ from .Exceptions import BFSyntaxError, BFSemanticError from .Functions import check_function_exists, get_function_object from .General import get_variable_dimensions_from_token, get_move_to_return_value_cell_code, get_print_string_code, get_variable_from_ID_token +from .General import get_variable_size_from_token from .Globals import create_variable_from_definition, get_global_variables, get_variable_size, is_variable_array from .Node import NodeToken, NodeArraySetElement, NodeUnaryPrefix, NodeUnaryPostfix, NodeArrayGetElement, NodeFunctionCall, NodeArrayAssignment +from .Node import NodeStructGetField, NodeStructSetField from .Parser import Parser +from .Structs import get_struct_from_id_token, check_if_struct_name_exists, get_struct_object from .Token import Token """ @@ -95,7 +98,7 @@ def insert_global_variables_to_function_scope(self): for variable in get_global_variables(): self.insert_to_ids_map(variable) - def get_array_index_expression(self): + def get_array_index_expression(self, struct_object=None): """ the idea - address the multi-dimensional array as a one-dimensional array calculate the appropriate index in the one-dimensional array @@ -108,16 +111,22 @@ def get_array_index_expression(self): e.g if the array is: arr[10][5][2] and we want to get arr[4][3][1] then we want to calculate index = (4*(5*2) + 3*(2) + 1) """ + is_struct_field = struct_object is not None + ID_token = self.parser.current_token() self.parser.advance_token(2) # skip ID, LBRACK first_index_expression = index_expression = self.expression() # first dimension self.parser.check_current_tokens_are([Token.RBRACK]) self.parser.advance_token() # skip RBRACK + multiply_token = Token(Token.BINOP, ID_token.line, ID_token.column, data="*") + # now handle the next dimensions (if multi-dimensional array) - dimensions = get_variable_dimensions_from_token(self.ids_map_list, ID_token) + if is_struct_field: + dimensions = struct_object.get_field_dimensions(ID_token.data) + else: + dimensions = get_variable_dimensions_from_token(self.ids_map_list, ID_token) if len(dimensions) > 1: - multiply_token = Token(Token.BINOP, ID_token.line, ID_token.column, data="*") add_token = Token(Token.BINOP, ID_token.line, ID_token.column, data="+") # multiply by next dimensions sizes @@ -155,16 +164,31 @@ def get_array_index_expression(self): if self.parser.current_token().type == Token.LBRACK: # too many indexes given... raise BFSemanticError("%s is a %s-dimensional array. Unexpected %s" % (str(ID_token), len(dimensions), self.parser.current_token())) + + if is_struct_field: + size = struct_object.get_field_size(ID_token.data) + else: + size = get_variable_size_from_token(self.ids_map_list, ID_token) + + if size > 1: + node_token_multiply_size = NodeToken(self.ids_map_list[:], token=Token(Token.NUM, ID_token.line, ID_token.column, data=str(size))) + index_expression = NodeToken(self.ids_map_list[:], token=multiply_token, left=node_token_multiply_size, right=index_expression) + return index_expression - def get_token_after_array_access(self): + def get_index_after_array_access(self, offset=0): # in case we have: "ID[a][b][c]...[z] next_token", return "next_token" - self.parser.check_current_tokens_are([Token.ID, Token.LBRACK]) - idx = self.parser.current_token_index + 1 # point to LBRACK + idx = self.parser.current_token_index + offset + self.parser.check_next_tokens_are([Token.ID, Token.LBRACK], starting_index=idx - 1) + idx += 1 # point to LBRACK while self.parser.token_at_index(idx).type == Token.LBRACK: idx = self.parser.find_matching(idx) # point to RBRACK idx += 1 # advance to one after the RBRACK + return idx + + def get_token_after_array_access(self, offset=0): + idx = self.get_index_after_array_access(offset) return self.parser.token_at_index(idx) def compile_array_assignment(self, token_id): @@ -265,6 +289,10 @@ def insert_scope_variables_into_ids_map(self): variable = create_variable_from_definition(self.parser, index=i) self.insert_to_ids_map(variable) + elif token.type == Token.STRUCT: + variable = create_variable_from_definition(self.parser, index=i) + self.insert_to_ids_map(variable) + elif token.type == Token.LBRACE: i = self.parser.find_matching(starting_index=i) @@ -338,12 +366,55 @@ def literal(self): token = self.parser.current_token() + if token.type == Token.SIZEOF and self.parser.next_token().type == Token.LPAREN: + self.parser.check_next_tokens_are([Token.LPAREN]) + self.parser.advance_token(amount=2) # point to after LPAREN + param = self.parser.current_token() + if param.type == Token.ID: + if check_if_struct_name_exists(param.data): + size = get_struct_object(param.data).size + else: + variable = get_variable_from_ID_token(self.ids_map_list, param) + size = get_variable_size(variable) + elif param.type == Token.RPAREN: + raise BFSyntaxError("Expected ID in '%s', got nothing" % token) + else: + raise NotImplementedError() + + self.parser.check_next_tokens_are([Token.RPAREN]) + self.parser.advance_token(2) + + num = Token(Token.NUM, token.line, token.column, data=str(size)) + node = NodeToken(self.ids_map_list[:], token=num) + + return node + if token.type == Token.ID and self.parser.next_token().type == Token.LPAREN: return self.function_call() if token.type == Token.ID and self.parser.next_token().type == Token.LBRACK: # array - ID(LBRACK expression RBRACK)+ index_expression = self.get_array_index_expression() - return NodeArrayGetElement(self.ids_map_list[:], token, index_expression) + field_name = None + + if self.parser.current_token().type == Token.DOT: + field_name = self.parser.next_token().data + self.parser.advance_token(amount=2) + + return NodeArrayGetElement(self.ids_map_list[:], token, index_expression, struct_field=field_name) + + if token.type in Token.ID and self.parser.next_token().type == Token.DOT: + self.parser.check_next_tokens_are([Token.DOT, Token.ID]) + self.parser.advance_token(amount=2) # point to ID + field_name = self.parser.current_token().data + + if self.parser.next_token().type == Token.LBRACK: + struct_object = get_struct_from_id_token(self.ids_map_list, token) + index_expression = self.get_array_index_expression(struct_object) + return NodeArrayGetElement(self.ids_map_list[:], token, index_expression, struct_field=field_name) + + self.parser.advance_token() + + return NodeStructGetField(self.ids_map_list[:], token, field_name) if token.type in [Token.NUM, Token.CHAR, Token.ID, Token.TRUE, Token.FALSE]: self.parser.advance_token() @@ -551,7 +622,6 @@ def assignment(self): # assignment: ID ASSIGN expression | ID ASSIGN ARRAY_INITIALIZATION | ID (LBRACK expression RBRACK)+ ASSIGN expression | logical_or if self.parser.current_token().type == Token.ID and self.parser.next_token().type == Token.ASSIGN: - if self.parser.next_token(2).type == Token.LBRACE: # ID ASSIGN ARRAY_INITIALIZATION token_ID = self.parser.current_token() self.parser.advance_token() # skip ID @@ -570,6 +640,65 @@ def assignment(self): new_node = NodeToken(self.ids_map_list[:], left=NodeToken(self.ids_map_list[:], token=id_token), token=assign_token, right=expression_node) return new_node + elif self.parser.current_token().type == Token.ID and self.parser.next_token().type == Token.DOT and self.parser.next_token(2).type == Token.ID and self.parser.next_token(3).type == Token.ASSIGN: + id_token = self.parser.current_token() + field_token = self.parser.next_token(2) + field_name = field_token.data + assign_token = self.parser.next_token(3) + self.parser.advance_token(amount=3) # skip ID DOT ID ASSIGN + + if self.parser.next_token().type == Token.LBRACE: # ID DOT ID ASSIGN ARRAY_INITIALIZATION + raise NotImplementedError("Array Initialization is not currently implemented for fields") + # struct_object = get_struct_from_id_token(self.ids_map_list, id_token) + # if not struct_object.is_field_array(field_name): + # raise BFSemanticError("Trying to assign array to non-array field %s" % field_token) + # return self.compile_array_assignment(id_token) + + self.parser.advance_token() + + # ID DOT ID ASSIGN expression + + expression_node = self.expression() + return NodeStructSetField(self.ids_map_list[:], id_token, field_name, assign_token, expression_node) + + elif self.parser.current_token().type == Token.ID and self.parser.next_token().type == Token.DOT and self.parser.next_token(2).type == Token.ID and self.parser.next_token(3).type == Token.LBRACK and \ + self.get_token_after_array_access(offset=2).type == Token.ASSIGN: + # ID DOT ID (LBRACK expression RBRACK)+ ASSIGN value_expression + id_token = self.parser.current_token() + field_name = self.parser.next_token(2).data + self.parser.advance_token(amount=2) # point to ID (field_name) + + struct_object = get_struct_from_id_token(self.ids_map_list, id_token) + + index_expression = self.get_array_index_expression(struct_object) + self.parser.check_current_tokens_are([Token.ASSIGN]) + + assign_token = self.parser.current_token() + self.parser.advance_token() # skip ASSIGN + value_expression = self.expression() + + return NodeArraySetElement(self.ids_map_list[:], id_token, index_expression, assign_token, value_expression, struct_field=field_name) + + elif (self.parser.current_token().type == Token.ID and + self.parser.next_token().type == Token.LBRACK and + self.get_token_after_array_access().type == Token.DOT and + self.parser.token_at_index(self.get_index_after_array_access() + 1).type == Token.ID and + self.parser.token_at_index(self.get_index_after_array_access() + 2).type == Token.ASSIGN + ): + # ID (LBRACK expression RBRACK)+ DOT ID ASSIGN value_expression + id_token = self.parser.current_token() + index_expression = self.get_array_index_expression() + self.parser.check_current_tokens_are([Token.DOT]) + field_name = self.parser.next_token().data + + self.parser.advance_token(amount=2) + self.parser.check_current_tokens_are([Token.ASSIGN]) + assign_token = self.parser.current_token() + self.parser.advance_token() # skip ASSIGN + value_expression = self.expression() + + return NodeArraySetElement(self.ids_map_list[:], id_token, index_expression, assign_token, value_expression, struct_field=field_name) + elif self.parser.current_token().type == Token.ID and self.parser.next_token().type == Token.LBRACK and \ self.get_token_after_array_access().type == Token.ASSIGN: # ID (LBRACK expression RBRACK)+ ASSIGN value_expression @@ -952,13 +1081,33 @@ def compile_statement(self): return self.compile_expression_as_statement() elif token.type == Token.ID: - if self.parser.next_token().type in [Token.ASSIGN, Token.LBRACK, Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE]: + if self.parser.next_token().type == Token.DOT: + if self.parser.next_token(next_amount=3).type in [Token.ASSIGN, Token.LBRACK, Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE]: + # ID ASSIGN expression; or ID DOT ID([expression])+ ASSIGN expression; or ID DOT ID++; + return self.compile_expression_as_statement() + elif self.parser.next_token().type in [Token.ASSIGN, Token.LBRACK, Token.INCREMENT, Token.DECREMENT, Token.UNARY_MULTIPLICATIVE]: # ID ASSIGN expression; or ID([expression])+ ASSIGN expression; or ID++; return self.compile_expression_as_statement() elif self.parser.next_token().type == Token.LPAREN: # ID(...); (function call) return self.compile_function_call_statement() raise BFSyntaxError("Unexpected '%s' after '%s'. Expected '=|+=|-=|*=|/=|%%=|<<=|>>=|&=|(|=)|^=' (assignment), '++|--' (modification) or '(' (function call)" % (str(self.parser.next_token()), str(token))) + elif token.type == Token.STRUCT: + # STRUCT ID ID SEMICOLON + self.parser.check_next_tokens_are([Token.ID, Token.ID]) + struct_id_token = self.parser.next_token() + variable_name = self.parser.next_token(2) + self.parser.advance_token(amount=3) + + if self.parser.current_token().type == Token.LBRACK: + while self.parser.current_token().type == Token.LBRACK: # loop to skip to after last RBRACK ] + self.parser.check_current_tokens_are([Token.LBRACK, Token.NUM, Token.RBRACK]) + self.parser.advance_token(3) + + self.parser.check_current_tokens_are([Token.SEMICOLON]) + self.parser.advance_token() # point to after SEMICOLON + return '' # no code is generated here. code was generated for defining this variable when we entered the scope + elif token.type == Token.PRINT: # print(string); return self.compile_print_string() diff --git a/Compiler/General.py b/Compiler/General.py index dc0c81f..33e6a7d 100644 --- a/Compiler/General.py +++ b/Compiler/General.py @@ -1089,6 +1089,11 @@ def get_variable_dimensions_from_token(ids_map_list, ID_token): return variable.dimensions +def get_variable_size_from_token(ids_map_list, ID_token): + variable = get_variable_from_ID_token(ids_map_list, ID_token) + return variable.size + + def get_id_index(ids_map_list, ID_token): variable = get_variable_from_ID_token(ids_map_list, ID_token) return variable.cell_index diff --git a/Compiler/Globals.py b/Compiler/Globals.py index d347ba1..1ba0bd4 100644 --- a/Compiler/Globals.py +++ b/Compiler/Globals.py @@ -1,4 +1,5 @@ from collections import namedtuple +from .Structs import get_struct_object from .Token import Token from .General import dimensions_to_size @@ -24,21 +25,23 @@ def get_global_variables_size(): return sum(get_variable_size(variable) for variable in get_global_variables()) -def create_variable(name, type, dimensions): +def create_variable(name, type, dimensions, size=1, extra=None): # return variable named tuple - variable = namedtuple("variable", ["name", "type", "size", "cell_index"]) + variable = namedtuple("variable", ["name", "type", "size", "dimensions", "cell_index", "extra"]) variable.name = name variable.type = type + variable.size = size variable.dimensions = dimensions # list of array dimensions sizes (for non-arrays it will be [1]) variable.cell_index = None # will be updated when we insert this variable into an ids map + variable.extra = extra return variable def get_variable_size(variable): # return total variable size - return dimensions_to_size(variable.dimensions) + return dimensions_to_size(variable.dimensions) * variable.size def get_variable_dimensions(variable): @@ -59,19 +62,38 @@ def create_variable_from_definition(parser, index=None, advance_tokens=False): if index is None: index = parser.current_token_index - assert parser.tokens[index].type == Token.INT + variable_type = parser.tokens[index].type - parser.check_next_tokens_are([Token.ID], starting_index=index) - ID = parser.tokens[index + 1].data + assert variable_type in [Token.INT, Token.STRUCT] + + offset = 0 + extra = None + size = 1 + + if variable_type == Token.STRUCT: + parser.check_next_tokens_are([Token.ID, Token.ID], starting_index=index) + struct_id = parser.tokens[index + 1].data + ID = parser.tokens[index + 2].data + + struct_object = get_struct_object(struct_id) + size = struct_object.size + extra = struct_object + + offset += 2 + else: + parser.check_next_tokens_are([Token.ID], starting_index=index) + ID = parser.tokens[index + 1].data + + offset += 1 if advance_tokens: - parser.advance_token(amount=2) # skip INT ID + parser.advance_token(amount=offset + 1) # skip (INT ID | STRUCT ID ID) - if parser.tokens[index + 2].type == Token.LBRACK: # array (support multi-dimensional arrays) + if parser.tokens[index + offset + 1].type == Token.LBRACK: # array (support multi-dimensional arrays) dimensions = [] # element[i] holds the size of dimension[i] - while parser.tokens[index + 2].type == Token.LBRACK: - parser.check_next_tokens_are([Token.LBRACK, Token.NUM, Token.RBRACK], starting_index=index + 1) - dimensions.append(get_NUM_token_value(parser.tokens[index + 3])) + while parser.tokens[index + offset + 1].type == Token.LBRACK: + parser.check_next_tokens_are([Token.LBRACK, Token.NUM, Token.RBRACK], starting_index=index + offset) + dimensions.append(get_NUM_token_value(parser.tokens[index + offset + 2])) if advance_tokens: parser.advance_token(amount=3) # skip LBRACK NUM RBRACK @@ -79,4 +101,4 @@ def create_variable_from_definition(parser, index=None, advance_tokens=False): else: dimensions = [1] - return create_variable(ID, Token.INT, dimensions) + return create_variable(ID, variable_type, dimensions, size, extra) diff --git a/Compiler/Lexical_analyzer.py b/Compiler/Lexical_analyzer.py index 637c48b..b6d7aa7 100644 --- a/Compiler/Lexical_analyzer.py +++ b/Compiler/Lexical_analyzer.py @@ -18,6 +18,7 @@ def analyze(text): ('int', Token.INT), ('bool', Token.INT), # treat bool as int ('char', Token.INT), # treat char as int + ('struct', Token.STRUCT), ('true', Token.TRUE), ('false', Token.FALSE), @@ -31,10 +32,12 @@ def analyze(text): ('for', Token.FOR), ('do', Token.DO), ('print', Token.PRINT), + ('sizeof', Token.SIZEOF), ('break', Token.BREAK), # todo ('continue', Token.CONTINUE), # todo (';', Token.SEMICOLON), (',', Token.COMMA), + (r'\.', Token.DOT), ('\(', Token.LPAREN), ('\)', Token.RPAREN), diff --git a/Compiler/Node.py b/Compiler/Node.py index a0657ad..96413c2 100644 --- a/Compiler/Node.py +++ b/Compiler/Node.py @@ -5,6 +5,7 @@ from .General import get_op_between_literals_code, get_literal_token_code, get_token_ID_code from .General import get_unary_prefix_op_code, get_unary_postfix_op_code from .General import unpack_literal_tokens_to_array_dimensions +from .Structs import get_offset_to_field, get_struct_from_id_token from .Token import Token """ @@ -79,9 +80,13 @@ def get_code(self, current_pointer, *args, **kwargs): # id = expression code = self.right.get_code(current_pointer) + offset = 0 + if isinstance(self.left, NodeStructGetField): + offset = self.left.get_offset() + # create code to copy from evaluated expression to ID's cell code += "<" # point to evaluated expression cell - code += get_copy_to_variable_code(self.ids_map_list, self.left.token, current_pointer) + code += get_copy_to_variable_code(self.ids_map_list, self.left.token, current_pointer - offset) code += ">" # point to next available cell return code @@ -124,8 +129,9 @@ def get_code(self, current_pointer, *args, **kwargs): token_id, index_node = self.node_literal.token_id, self.node_literal.node_expression code = get_move_right_index_cells_code(current_pointer, index_node) - offset_to_array = get_offset_to_variable(self.ids_map_list, token_id, current_pointer + 2) - # it is +2 because in "get_move_right_index_cells_code", we moved 2 extra cells to the right, for retrieving the value + offset = self.node_literal.get_offset() + offset_to_array = get_offset_to_variable(self.ids_map_list, token_id, current_pointer + offset) + # it is offset by 2 because in "get_move_right_index_cells_code", we moved 2 extra cells to the right, for retrieving the value code += get_unary_prefix_op_code(self.token_operation, offset_to_array) @@ -135,6 +141,10 @@ def get_code(self, current_pointer, *args, **kwargs): code += get_move_left_index_cell_code() return code + elif isinstance(self.node_literal, NodeStructGetField): + offset = self.node_literal.get_offset() + offset_to_ID = get_offset_to_variable(self.ids_map_list, self.node_literal.token, current_pointer - offset) + return get_unary_prefix_op_code(self.token_operation, offset_to_ID) # the token to apply on must be an ID if isinstance(self.node_literal, NodeToken) is False: @@ -161,8 +171,9 @@ def get_code(self, current_pointer, *args, **kwargs): token_id, index_node = self.node_literal.token_id, self.node_literal.node_expression code = get_move_right_index_cells_code(current_pointer, index_node) - offset_to_array = get_offset_to_variable(self.ids_map_list, token_id, current_pointer + 2) - # it is +2 because in "get_move_right_index_cells_code", we moved 2 extra cells to the right, for retrieving the value + offset = self.node_literal.get_offset() + offset_to_array = get_offset_to_variable(self.ids_map_list, token_id, current_pointer + offset) + # it is offset by 2 because in "get_move_right_index_cells_code", we moved 2 extra cells to the right, for retrieving the value code += get_unary_postfix_op_code(self.token_operation, offset_to_array) @@ -172,6 +183,10 @@ def get_code(self, current_pointer, *args, **kwargs): code += get_move_left_index_cell_code() return code + elif isinstance(self.node_literal, NodeStructGetField): + offset = self.node_literal.get_offset() + offset_to_ID = get_offset_to_variable(self.ids_map_list, self.node_literal.token, current_pointer - offset) + return get_unary_postfix_op_code(self.token_operation, offset_to_ID) # the token to apply on must be an ID if isinstance(self.node_literal, NodeToken) is False: @@ -213,9 +228,21 @@ def get_code(self, current_pointer, *args, **kwargs): class NodeArrayElement(Node): - def __init__(self, ids_map_list): + def __init__(self, ids_map_list, token_id, struct_field): Node.__init__(self, ids_map_list) + self.struct_field = struct_field + self.struct_object = None + if struct_field is not None: + self.struct_object = get_struct_from_id_token(ids_map_list, token_id) + + def get_offset(self): + offset = 2 # it is +2 because that is where the value is + if self.struct_field is not None: + offset -= get_offset_to_field(self.struct_object, self.struct_field) + + return offset + """ the idea: 1. evaluate index. it is known only in run time, so we need to perform a little trick @@ -236,15 +263,16 @@ class for getting element of a one-dimensional array and returns a code that gets that element """ - def __init__(self, ids_map_list, token_id, node_expression): - Node.__init__(self, ids_map_list) + def __init__(self, ids_map_list, token_id, node_expression, struct_field=None): + NodeArrayElement.__init__(self, ids_map_list, token_id, struct_field) self.token_id = token_id self.node_expression = node_expression def get_code(self, current_pointer, *args, **kwargs): + offset = self.get_offset() + code = get_move_right_index_cells_code(current_pointer, self.node_expression) - code += get_copy_from_variable_code(self.ids_map_list, self.token_id, current_pointer + 2) - # it is +2 because in "get_move_right_index_cells_code", we moved 2 extra cells to the right, for retrieving the value + code += get_copy_from_variable_code(self.ids_map_list, self.token_id, current_pointer + offset) code += "<" # point to res code += "[<<+>>-]" # move res to old "index cell" @@ -264,8 +292,8 @@ class for setting element of a one-dimensional array and returns a code that gets that element """ - def __init__(self, ids_map_list, token_id, node_expression_index, assign_token, node_expression_value): - Node.__init__(self, ids_map_list) + def __init__(self, ids_map_list, token_id, node_expression_index, assign_token, node_expression_value, struct_field=None): + NodeArrayElement.__init__(self, ids_map_list, token_id, struct_field) self.token_id = token_id self.node_expression_index = node_expression_index @@ -283,7 +311,7 @@ def __init__(self, ids_map_list, token_id, node_expression_index, assign_token, # create a node for id[exp] + expression op_node = self.assign_token_to_op_token(assign_token) - op_node.left = NodeArrayGetElement(self.ids_map_list[:], token_id, node_expression_index) + op_node.left = NodeArrayGetElement(self.ids_map_list[:], token_id, node_expression_index, struct_field=self.struct_field) op_node.right = node_expression_value self.node_expression_value = op_node @@ -291,10 +319,12 @@ def __init__(self, ids_map_list, token_id, node_expression_index, assign_token, def get_code(self, current_pointer, *args, **kwargs): # index, steps_taken_counter, value + offset = self.get_offset() + code = self.node_expression_index.get_code(current_pointer) code += "[-]" # counter = 0 code += ">" # point to value cell - code += self.node_expression_value.get_code(current_pointer + 2) + code += self.node_expression_value.get_code(current_pointer + offset) code += "<<<" # point to index code += "[" # while index != 0 @@ -312,7 +342,7 @@ def get_code(self, current_pointer, *args, **kwargs): code += "]" # end while code += ">>" # point to value - code += get_copy_to_variable_code(self.ids_map_list, self.token_id, current_pointer + 2) + code += get_copy_to_variable_code(self.ids_map_list, self.token_id, current_pointer + offset) # it is +2 because we moved 2 extra cells to the right, for pointing to value # layout: 0, idx, value (pointing to value) @@ -356,3 +386,62 @@ def get_code(self, current_pointer, *args, **kwargs): code += ">" * (offset - len(unpacked_literals_list)) # move back to the original position code += ">" # point to the next cell return code + + +class NodeStruct(Node): + def __init__(self, ids_map_list, token_id): + Node.__init__(self, ids_map_list) + self.struct_object = get_struct_from_id_token(ids_map_list, token_id) + + +class NodeStructGetField(NodeStruct): + def __init__(self, ids_map_list, token, field_name): + NodeStruct.__init__(self, ids_map_list, token) + self.token = token + self.field_name = field_name + + def get_offset(self): + field_offset = get_offset_to_field(self.struct_object, self.field_name) + return field_offset + + def get_code(self, current_pointer, *args, **kwargs): + return get_copy_from_variable_code(self.ids_map_list, self.token, current_pointer - self.get_offset()) + + +class NodeStructSetField(NodeStruct): + def __init__(self, ids_map_list, token, field_name, assign_token, node_expression_value): + NodeStruct.__init__(self, ids_map_list, token) + self.token = token + self.field_name = field_name + self.left = NodeStructGetField(self.ids_map_list[:], token, field_name) + + self.assign_token = assign_token + self.node_expression_value = node_expression_value + + def get_code(self, current_pointer, *args, **kwargs): + cell_field_offset = self.left.get_offset() + + if self.assign_token.data == "=": + # id.id = expression + code = self.node_expression_value.get_code(current_pointer) + + # create code to copy from evaluated expression to the field + code += "<" # point to evaluated expression cell + code += get_copy_to_variable_code(self.ids_map_list, self.token, current_pointer - cell_field_offset) + code += ">" # point to next available cell + + return code + else: + assert self.assign_token.data in ["+=", "-=", "*=", "/=", "%=", "<<=", ">>=", "&=", "|=", "^="] + # id.id += expression + # create a node for id.id + expression + + op_node = self.assign_token_to_op_token(self.assign_token) + op_node.left = self.left + op_node.right = self.node_expression_value + + # create a node for id.id = id.id + expression + assign_token = Token(Token.ASSIGN, self.token.line, self.token.column, data="=") + assignment_node = NodeToken(self.ids_map_list, left=self.left, token=assign_token, right=op_node) + + return assignment_node.get_code(current_pointer) diff --git a/Compiler/Structs.py b/Compiler/Structs.py new file mode 100644 index 0000000..8b55427 --- /dev/null +++ b/Compiler/Structs.py @@ -0,0 +1,88 @@ +from .Exceptions import BFSemanticError +from .General import get_variable_from_ID_token, dimensions_to_size + +structs = dict() # Global dictionary of struct_name --> Struct object + + +class Struct: + def __init__(self, name, name_token): + self.name = name + self.fields = [] + self.name_token = name_token + self.size = 0 + + def add_field(self, field_type, field_name): + dimensions = dimensions_to_size(field_type["dimensions"]) + self.size += field_type["size"] * dimensions + self.fields += [( + field_type, + field_name + )] + + def get_field(self, name): + for field_type, field_name in self.fields: + if field_name == name: + return field_type + + return None + + def is_field_array(self, name): + dimensions = self.get_field_dimensions(name) + + return dimensions != [1] + + def get_field_dimensions(self, name): + field = self.get_field(name) + + if field is None: + return None + + return field["dimensions"] + + def get_field_size(self, name): + field = self.get_field(name) + + if field is None: + return None + + return field["size"] + + +def insert_struct_object(struct): + if check_if_struct_name_exists(struct.name): + raise BFSemanticError("Struct '%s' already exists" % struct.name_token) + + structs[struct.name] = struct + + +def get_struct_object(name): + return structs[name] + + +def check_if_struct_name_exists(struct_name): + return struct_name in structs + + +def get_offsets(struct_object): + offsets = {} + acu = 0 + for field_type, field_name in struct_object.fields: + size = field_type["size"] + offsets[field_name] = acu + acu += size + + return offsets + + +def get_offset_to_field(struct_object, field_name): + offsets = get_offsets(struct_object) + + if field_name not in offsets: + raise BFSemanticError("Field '%s' of struct '%s' doesn't exist" % (field_name, struct_object.name_token)) + + return offsets[field_name] + + +def get_struct_from_id_token(ids_map_list, id_token): + variable = get_variable_from_ID_token(ids_map_list, id_token) + return variable.extra diff --git a/Compiler/Token.py b/Compiler/Token.py index 4d10147..0081b95 100644 --- a/Compiler/Token.py +++ b/Compiler/Token.py @@ -2,6 +2,7 @@ class Token: INT = "INT" VOID = "VOID" + STRUCT = "STRUCT" TRUE = "TRUE" FALSE = "FALSE" AND = "AND" @@ -17,6 +18,7 @@ class Token: CONTINUE = "CONTINUE" SEMICOLON = "SEMICOLON" COMMA = "COMMA" + DOT = "DOT" LPAREN = "LPAREN" RPAREN = "RPAREN" @@ -45,6 +47,7 @@ class Token: CHAR = "CHAR" PRINT = "PRINT" + SIZEOF = "SIZEOF" COMMENT = "COMMENT" UNIDENTIFIED = "UNIDENTIFIED" From 201153472a501243e80987d2b7e73b31ed8845e8 Mon Sep 17 00:00:00 2001 From: NeeEoo Date: Wed, 7 Apr 2021 22:53:31 +0200 Subject: [PATCH 2/5] Made it so you can use dimensions on both the variable and field at the same time on struct variables --- Compiler/FunctionCompiler.py | 48 +++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/Compiler/FunctionCompiler.py b/Compiler/FunctionCompiler.py index 4e54feb..9a771ef 100644 --- a/Compiler/FunctionCompiler.py +++ b/Compiler/FunctionCompiler.py @@ -397,8 +397,21 @@ def literal(self): field_name = None if self.parser.current_token().type == Token.DOT: + # ID (LBRACK expression RBRACK)+ DOT ID + self.parser.check_next_tokens_are([Token.ID]) field_name = self.parser.next_token().data - self.parser.advance_token(amount=2) + self.parser.advance_token() # point to ID (field_name) + + if self.parser.next_token().type == Token.LBRACK: + # ID (LBRACK expression RBRACK)+ DOT ID (LBRACK expression RBRACK)+ + struct_object = get_struct_from_id_token(self.ids_map_list, token) + field_index_expression = self.get_array_index_expression(struct_object) + + # Adds the offset to the struct in the array then it adds the offset to the field + add_token = Token(Token.BINOP, token.line, token.column, data="+") + index_expression = NodeToken(self.ids_map_list[:], token=add_token, left=index_expression, right=field_index_expression) + else: + self.parser.advance_token() # point to after ID return NodeArrayGetElement(self.ids_map_list[:], token, index_expression, struct_field=field_name) @@ -408,6 +421,7 @@ def literal(self): field_name = self.parser.current_token().data if self.parser.next_token().type == Token.LBRACK: + # ID DOT ID (LBRACK expression RBRACK)+ struct_object = get_struct_from_id_token(self.ids_map_list, token) index_expression = self.get_array_index_expression(struct_object) return NodeArrayGetElement(self.ids_map_list[:], token, index_expression, struct_field=field_name) @@ -699,6 +713,38 @@ def assignment(self): return NodeArraySetElement(self.ids_map_list[:], id_token, index_expression, assign_token, value_expression, struct_field=field_name) + elif (self.parser.current_token().type == Token.ID and + self.parser.next_token().type == Token.LBRACK and + self.get_token_after_array_access().type == Token.DOT and + self.parser.token_at_index(self.get_index_after_array_access() + 1).type == Token.ID and + self.parser.token_at_index(self.get_index_after_array_access() + 2).type == Token.LBRACK and + self.parser.token_at_index( + self.get_index_after_array_access( + self.get_index_after_array_access() - self.parser.current_token_index + 1 + ) + ).type == Token.ASSIGN + ): + # ID (LBRACK expression RBRACK)+ DOT ID (LBRACK expression RBRACK)+ ASSIGN value_expression + id_token = self.parser.current_token() + index_expression = self.get_array_index_expression() + self.parser.check_current_tokens_are([Token.DOT]) + field_name = self.parser.next_token().data + struct_object = get_struct_from_id_token(self.ids_map_list, id_token) + + self.parser.advance_token() # point to ID (field_name) + field_index_expression = self.get_array_index_expression(struct_object) + + self.parser.check_current_tokens_are([Token.ASSIGN]) + assign_token = self.parser.current_token() + self.parser.advance_token() # skip ASSIGN + value_expression = self.expression() + + # Adds the offset to the struct in the array then it adds the offset to the field + add_token = Token(Token.BINOP, id_token.line, id_token.column, data="+") + index_expression = NodeToken(self.ids_map_list[:], token=add_token, left=index_expression, right=field_index_expression) + + return NodeArraySetElement(self.ids_map_list[:], id_token, index_expression, assign_token, value_expression, struct_field=field_name) + elif self.parser.current_token().type == Token.ID and self.parser.next_token().type == Token.LBRACK and \ self.get_token_after_array_access().type == Token.ASSIGN: # ID (LBRACK expression RBRACK)+ ASSIGN value_expression From 0dd3bcf2aa473917de2c7754470603036b9df422 Mon Sep 17 00:00:00 2001 From: NeeEoo Date: Thu, 8 Apr 2021 09:44:17 +0200 Subject: [PATCH 3/5] Refactored the assignment expression code --- Compiler/FunctionCompiler.py | 185 +++++++++++++++-------------------- 1 file changed, 80 insertions(+), 105 deletions(-) diff --git a/Compiler/FunctionCompiler.py b/Compiler/FunctionCompiler.py index 9a771ef..47f6ec4 100644 --- a/Compiler/FunctionCompiler.py +++ b/Compiler/FunctionCompiler.py @@ -635,130 +635,105 @@ def logical_or(self): def assignment(self): # assignment: ID ASSIGN expression | ID ASSIGN ARRAY_INITIALIZATION | ID (LBRACK expression RBRACK)+ ASSIGN expression | logical_or - if self.parser.current_token().type == Token.ID and self.parser.next_token().type == Token.ASSIGN: - if self.parser.next_token(2).type == Token.LBRACE: # ID ASSIGN ARRAY_INITIALIZATION - token_ID = self.parser.current_token() - self.parser.advance_token() # skip ID - variable_ID = get_variable_from_ID_token(self.ids_map_list, token_ID) - if not is_variable_array(variable_ID): - raise BFSemanticError("Trying to assign array to non-array variable %s" % token_ID) - return self.compile_array_assignment(token_ID) - - # ID ASSIGN expression + old_token_index = self.parser.current_token_index + + if self.parser.current_token().type == Token.ID: id_token = self.parser.current_token() - assign_token = self.parser.next_token() - self.parser.advance_token(amount=2) # skip ID ASSIGN - expression_node = self.expression() + if self.parser.next_token().type == Token.LBRACK: + index_expression = self.get_array_index_expression() - new_node = NodeToken(self.ids_map_list[:], left=NodeToken(self.ids_map_list[:], token=id_token), token=assign_token, right=expression_node) - return new_node + if self.parser.current_token().type == Token.DOT: + self.parser.check_next_tokens_are([Token.ID]) + field_name = self.parser.next_token().data + self.parser.advance_token() # point to after ID - elif self.parser.current_token().type == Token.ID and self.parser.next_token().type == Token.DOT and self.parser.next_token(2).type == Token.ID and self.parser.next_token(3).type == Token.ASSIGN: - id_token = self.parser.current_token() - field_token = self.parser.next_token(2) - field_name = field_token.data - assign_token = self.parser.next_token(3) - self.parser.advance_token(amount=3) # skip ID DOT ID ASSIGN - - if self.parser.next_token().type == Token.LBRACE: # ID DOT ID ASSIGN ARRAY_INITIALIZATION - raise NotImplementedError("Array Initialization is not currently implemented for fields") - # struct_object = get_struct_from_id_token(self.ids_map_list, id_token) - # if not struct_object.is_field_array(field_name): - # raise BFSemanticError("Trying to assign array to non-array field %s" % field_token) - # return self.compile_array_assignment(id_token) + if self.parser.next_token().type == Token.LBRACK: + struct_object = get_struct_from_id_token(self.ids_map_list, id_token) + field_index_expression = self.get_array_index_expression(struct_object) - self.parser.advance_token() + if self.parser.current_token().type == Token.ASSIGN: + # ID (LBRACK expression RBRACK)+ DOT ID (LBRACK expression RBRACK)+ ASSIGN value_expression + assign_token = self.parser.current_token() + self.parser.advance_token() # point to after ASSIGN + value_expression = self.expression() - # ID DOT ID ASSIGN expression + # Adds the offset to the struct in the array then it adds the offset to the field + add_token = Token(Token.BINOP, id_token.line, id_token.column, data="+") + index_expression = NodeToken(self.ids_map_list[:], token=add_token, left=index_expression, right=field_index_expression) - expression_node = self.expression() - return NodeStructSetField(self.ids_map_list[:], id_token, field_name, assign_token, expression_node) + return NodeArraySetElement(self.ids_map_list[:], id_token, index_expression, assign_token, value_expression, struct_field=field_name) - elif self.parser.current_token().type == Token.ID and self.parser.next_token().type == Token.DOT and self.parser.next_token(2).type == Token.ID and self.parser.next_token(3).type == Token.LBRACK and \ - self.get_token_after_array_access(offset=2).type == Token.ASSIGN: - # ID DOT ID (LBRACK expression RBRACK)+ ASSIGN value_expression - id_token = self.parser.current_token() - field_name = self.parser.next_token(2).data - self.parser.advance_token(amount=2) # point to ID (field_name) + elif self.parser.next_token().type == Token.ASSIGN: + # ID (LBRACK expression RBRACK)+ DOT ID ASSIGN value_expression + assign_token = self.parser.next_token() + self.parser.advance_token(amount=2) # point to after ASSIGN + value_expression = self.expression() - struct_object = get_struct_from_id_token(self.ids_map_list, id_token) + return NodeArraySetElement(self.ids_map_list[:], id_token, index_expression, assign_token, value_expression, struct_field=field_name) - index_expression = self.get_array_index_expression(struct_object) - self.parser.check_current_tokens_are([Token.ASSIGN]) + elif self.parser.current_token().type == Token.ASSIGN: + # ID (LBRACK expression RBRACK)+ ASSIGN value_expression + assign_token = self.parser.current_token() + self.parser.advance_token() # point to after ASSIGN + value_expression = self.expression() - assign_token = self.parser.current_token() - self.parser.advance_token() # skip ASSIGN - value_expression = self.expression() + return NodeArraySetElement(self.ids_map_list[:], id_token, index_expression, assign_token, value_expression) - return NodeArraySetElement(self.ids_map_list[:], id_token, index_expression, assign_token, value_expression, struct_field=field_name) + elif self.parser.next_token().type == Token.DOT: + self.parser.check_next_tokens_are([Token.DOT, Token.ID]) + self.parser.advance_token(amount=2) # point to ID (field_name) + field_token = self.parser.current_token() + field_name = field_token.data - elif (self.parser.current_token().type == Token.ID and - self.parser.next_token().type == Token.LBRACK and - self.get_token_after_array_access().type == Token.DOT and - self.parser.token_at_index(self.get_index_after_array_access() + 1).type == Token.ID and - self.parser.token_at_index(self.get_index_after_array_access() + 2).type == Token.ASSIGN - ): - # ID (LBRACK expression RBRACK)+ DOT ID ASSIGN value_expression - id_token = self.parser.current_token() - index_expression = self.get_array_index_expression() - self.parser.check_current_tokens_are([Token.DOT]) - field_name = self.parser.next_token().data - - self.parser.advance_token(amount=2) - self.parser.check_current_tokens_are([Token.ASSIGN]) - assign_token = self.parser.current_token() - self.parser.advance_token() # skip ASSIGN - value_expression = self.expression() - - return NodeArraySetElement(self.ids_map_list[:], id_token, index_expression, assign_token, value_expression, struct_field=field_name) - - elif (self.parser.current_token().type == Token.ID and - self.parser.next_token().type == Token.LBRACK and - self.get_token_after_array_access().type == Token.DOT and - self.parser.token_at_index(self.get_index_after_array_access() + 1).type == Token.ID and - self.parser.token_at_index(self.get_index_after_array_access() + 2).type == Token.LBRACK and - self.parser.token_at_index( - self.get_index_after_array_access( - self.get_index_after_array_access() - self.parser.current_token_index + 1 - ) - ).type == Token.ASSIGN - ): - # ID (LBRACK expression RBRACK)+ DOT ID (LBRACK expression RBRACK)+ ASSIGN value_expression - id_token = self.parser.current_token() - index_expression = self.get_array_index_expression() - self.parser.check_current_tokens_are([Token.DOT]) - field_name = self.parser.next_token().data - struct_object = get_struct_from_id_token(self.ids_map_list, id_token) + if self.parser.next_token().type == Token.LBRACK: + struct_object = get_struct_from_id_token(self.ids_map_list, id_token) + field_index_expression = self.get_array_index_expression(struct_object) - self.parser.advance_token() # point to ID (field_name) - field_index_expression = self.get_array_index_expression(struct_object) + if self.parser.current_token().type == Token.ASSIGN: + # ID DOT ID (LBRACK expression RBRACK)+ ASSIGN value_expression + assign_token = self.parser.current_token() + self.parser.advance_token() # point to after ASSIGN + value_expression = self.expression() - self.parser.check_current_tokens_are([Token.ASSIGN]) - assign_token = self.parser.current_token() - self.parser.advance_token() # skip ASSIGN - value_expression = self.expression() + return NodeArraySetElement(self.ids_map_list[:], id_token, field_index_expression, assign_token, value_expression, struct_field=field_name) - # Adds the offset to the struct in the array then it adds the offset to the field - add_token = Token(Token.BINOP, id_token.line, id_token.column, data="+") - index_expression = NodeToken(self.ids_map_list[:], token=add_token, left=index_expression, right=field_index_expression) + elif self.parser.next_token().type == Token.ASSIGN: + self.parser.advance_token() # point to ASSIGN - return NodeArraySetElement(self.ids_map_list[:], id_token, index_expression, assign_token, value_expression, struct_field=field_name) + if self.parser.next_token().type == Token.LBRACE: # ID DOT ID ASSIGN ARRAY_INITIALIZATION + raise NotImplementedError("Array Initialization is not currently implemented for fields") + # struct_object = get_struct_from_id_token(self.ids_map_list, id_token) + # if not struct_object.is_field_array(field_name): + # raise BFSemanticError("Trying to assign array to non-array field %s" % field_token) + # return self.compile_array_assignment(id_token) - elif self.parser.current_token().type == Token.ID and self.parser.next_token().type == Token.LBRACK and \ - self.get_token_after_array_access().type == Token.ASSIGN: - # ID (LBRACK expression RBRACK)+ ASSIGN value_expression - id_token = self.parser.current_token() - index_expression = self.get_array_index_expression() - self.parser.check_current_tokens_are([Token.ASSIGN]) - assign_token = self.parser.current_token() - self.parser.advance_token() # skip ASSIGN - value_expression = self.expression() + # ID DOT ID ASSIGN expression + assign_token = self.parser.current_token() + self.parser.advance_token() # point to after ASSIGN + value_expression = self.expression() - return NodeArraySetElement(self.ids_map_list[:], id_token, index_expression, assign_token, value_expression) - else: - # logical or - return self.logical_or() + return NodeStructSetField(self.ids_map_list[:], id_token, field_name, assign_token, value_expression) + + elif self.parser.next_token().type == Token.ASSIGN: + self.parser.advance_token() # point to after ID + if self.parser.next_token().type == Token.LBRACE: # ID ASSIGN ARRAY_INITIALIZATION + variable_ID = get_variable_from_ID_token(self.ids_map_list, id_token) + if not is_variable_array(variable_ID): + raise BFSemanticError("Trying to assign array to non-array variable %s" % id_token) + return self.compile_array_assignment(id_token) + + # ID ASSIGN expression + assign_token = self.parser.current_token() + self.parser.advance_token() # point to after ASSIGN + value_expression = self.expression() + + return NodeToken(self.ids_map_list[:], left=NodeToken(self.ids_map_list[:], token=id_token), token=assign_token, right=value_expression) + + self.parser.advance_to_token_at_index(old_token_index) + + # logical or + return self.logical_or() def expression(self): # expression: assignment From 6cec753008dbf3ff58252e4613e1b998278c776b Mon Sep 17 00:00:00 2001 From: NeeEoo Date: Thu, 8 Apr 2021 11:40:40 +0200 Subject: [PATCH 4/5] Started adding array assignment for struct fields --- Compiler/FunctionCompiler.py | 18 +++++++++--------- Compiler/Node.py | 29 ++++++++++++++++------------- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/Compiler/FunctionCompiler.py b/Compiler/FunctionCompiler.py index 47f6ec4..2273f8f 100644 --- a/Compiler/FunctionCompiler.py +++ b/Compiler/FunctionCompiler.py @@ -191,7 +191,7 @@ def get_token_after_array_access(self, offset=0): idx = self.get_index_after_array_access(offset) return self.parser.token_at_index(idx) - def compile_array_assignment(self, token_id): + def compile_array_assignment(self, token_id, struct_field=None): # int id[a][b][c]... = {1, 2, 3, ...}; # or int id[a][b][c]... = {{1, 2}, {3, 4}, ...}; # or array assignment: id = {1, 2, 3, ...}; @@ -201,10 +201,10 @@ def compile_array_assignment(self, token_id): assert self.parser.current_token().type == Token.ASSIGN and self.parser.current_token().data == "=" self.parser.check_current_tokens_are([Token.ASSIGN, Token.LBRACE]) - self.parser.advance_token(1) # skip to LBRACE + self.parser.advance_token() # skip to LBRACE literal_tokens_list = self.parser.compile_array_initialization_list() - return NodeArrayAssignment(self.ids_map_list[:], token_id, literal_tokens_list) + return NodeArrayAssignment(self.ids_map_list[:], token_id, literal_tokens_list, struct_field) def add_ids_map(self): """ @@ -645,7 +645,8 @@ def assignment(self): if self.parser.current_token().type == Token.DOT: self.parser.check_next_tokens_are([Token.ID]) - field_name = self.parser.next_token().data + field_token = self.parser.next_token() + field_name = field_token.data self.parser.advance_token() # point to after ID if self.parser.next_token().type == Token.LBRACK: @@ -702,11 +703,10 @@ def assignment(self): self.parser.advance_token() # point to ASSIGN if self.parser.next_token().type == Token.LBRACE: # ID DOT ID ASSIGN ARRAY_INITIALIZATION - raise NotImplementedError("Array Initialization is not currently implemented for fields") - # struct_object = get_struct_from_id_token(self.ids_map_list, id_token) - # if not struct_object.is_field_array(field_name): - # raise BFSemanticError("Trying to assign array to non-array field %s" % field_token) - # return self.compile_array_assignment(id_token) + struct_object = get_struct_from_id_token(self.ids_map_list, id_token) + if not struct_object.is_field_array(field_name): + raise BFSemanticError("Trying to assign array to non-array field %s" % field_token) + return self.compile_array_assignment(id_token, field_name) # ID DOT ID ASSIGN expression assign_token = self.parser.current_token() diff --git a/Compiler/Node.py b/Compiler/Node.py index 96413c2..b7c34f1 100644 --- a/Compiler/Node.py +++ b/Compiler/Node.py @@ -129,9 +129,9 @@ def get_code(self, current_pointer, *args, **kwargs): token_id, index_node = self.node_literal.token_id, self.node_literal.node_expression code = get_move_right_index_cells_code(current_pointer, index_node) - offset = self.node_literal.get_offset() - offset_to_array = get_offset_to_variable(self.ids_map_list, token_id, current_pointer + offset) + offset = self.node_literal.get_offset() + 2 # it is offset by 2 because in "get_move_right_index_cells_code", we moved 2 extra cells to the right, for retrieving the value + offset_to_array = get_offset_to_variable(self.ids_map_list, token_id, current_pointer + offset) code += get_unary_prefix_op_code(self.token_operation, offset_to_array) @@ -171,9 +171,9 @@ def get_code(self, current_pointer, *args, **kwargs): token_id, index_node = self.node_literal.token_id, self.node_literal.node_expression code = get_move_right_index_cells_code(current_pointer, index_node) - offset = self.node_literal.get_offset() - offset_to_array = get_offset_to_variable(self.ids_map_list, token_id, current_pointer + offset) + offset = self.node_literal.get_offset() + 2 # it is offset by 2 because in "get_move_right_index_cells_code", we moved 2 extra cells to the right, for retrieving the value + offset_to_array = get_offset_to_variable(self.ids_map_list, token_id, current_pointer + offset) code += get_unary_postfix_op_code(self.token_operation, offset_to_array) @@ -236,8 +236,7 @@ def __init__(self, ids_map_list, token_id, struct_field): if struct_field is not None: self.struct_object = get_struct_from_id_token(ids_map_list, token_id) - def get_offset(self): - offset = 2 # it is +2 because that is where the value is + def get_offset(self, offset=0): if self.struct_field is not None: offset -= get_offset_to_field(self.struct_object, self.struct_field) @@ -269,7 +268,7 @@ def __init__(self, ids_map_list, token_id, node_expression, struct_field=None): self.node_expression = node_expression def get_code(self, current_pointer, *args, **kwargs): - offset = self.get_offset() + offset = self.get_offset() + 2 # it is +2 because that is where the value is code = get_move_right_index_cells_code(current_pointer, self.node_expression) code += get_copy_from_variable_code(self.ids_map_list, self.token_id, current_pointer + offset) @@ -319,7 +318,7 @@ def __init__(self, ids_map_list, token_id, node_expression_index, assign_token, def get_code(self, current_pointer, *args, **kwargs): # index, steps_taken_counter, value - offset = self.get_offset() + offset = self.get_offset() + 2 # it is +2 because that is where the value is code = self.node_expression_index.get_code(current_pointer) code += "[-]" # counter = 0 @@ -365,21 +364,25 @@ def get_code(self, current_pointer, *args, **kwargs): return code -class NodeArrayAssignment(Node): +class NodeArrayAssignment(NodeArrayElement): """ Used for array assignment E.g arr = = { 1, 2, 3... } """ - def __init__(self, ids_map_list, token_id, literal_tokens_list): - Node.__init__(self, ids_map_list) + def __init__(self, ids_map_list, token_id, literal_tokens_list, struct_field=None): + NodeArrayElement.__init__(self, ids_map_list, token_id, struct_field) self.token_id = token_id self.literal_tokens_list = literal_tokens_list def get_code(self, current_pointer, *args, **kwargs): - array_dimensions = get_variable_dimensions_from_token(self.ids_map_list, self.token_id) + if self.struct_field is not None: + array_dimensions = self.struct_object.get_field_dimensions(self.struct_field) + else: + array_dimensions = get_variable_dimensions_from_token(self.ids_map_list, self.token_id) unpacked_literals_list = unpack_literal_tokens_to_array_dimensions(self.token_id, array_dimensions, self.literal_tokens_list) - offset = get_offset_to_variable(self.ids_map_list, self.token_id, current_pointer) + field_offset = self.get_offset() + offset = get_offset_to_variable(self.ids_map_list, self.token_id, current_pointer + field_offset) code = "<" * offset # point to first array element for literal in unpacked_literals_list: code += get_literal_token_code(literal) # evaluate this literal and point to next array element From 5df41889aab8566c43051bc0162b2c4a6d8e0646 Mon Sep 17 00:00:00 2001 From: NeeEoo Date: Sat, 17 Apr 2021 19:55:17 +0200 Subject: [PATCH 5/5] Made the function return cell size dynamic --- Compiler/Compiler.py | 8 +++---- Compiler/FunctionCompiler.py | 34 ++++++++++++++++------------- Compiler/Globals.py | 9 ++++++++ Compiler/LibraryFunctionCompiler.py | 16 +++++++------- Compiler/Node.py | 10 +++++---- 5 files changed, 46 insertions(+), 31 deletions(-) diff --git a/Compiler/Compiler.py b/Compiler/Compiler.py index fd26f73..779754e 100644 --- a/Compiler/Compiler.py +++ b/Compiler/Compiler.py @@ -48,10 +48,9 @@ def create_struct_object(self): # struct syntax: STRUCT ID LBRACE ((INT | STRUCT ID) ID ((LBRACK NUM RBRACK)+)? SEMICOLON)+ RBRACE SEMICOLON self.parser.check_next_tokens_are([Token.ID, Token.LBRACE]) - self.parser.advance_token() # point to ID - struct_name_token = self.parser.current_token() + struct_name_token = self.parser.next_token() struct_name = struct_name_token.data - self.parser.advance_token(amount=2) # point to after LBRACE + self.parser.advance_token(amount=3) # point to after LBRACE struct_object = Struct(struct_name, struct_name_token) @@ -62,7 +61,8 @@ def create_struct_object(self): if token.type == Token.STRUCT: self.parser.check_next_tokens_are([Token.ID, Token.ID]) field_struct_id = self.parser.next_token().data - field_name = self.parser.next_token(2).data + field_name_token = self.parser.next_token(2) + field_name = field_name_token.data if self.parser.next_token(3).type == Token.LBRACK: self.parser.advance_token(amount=3) # point to LBRACK diff --git a/Compiler/FunctionCompiler.py b/Compiler/FunctionCompiler.py index c80ba06..5f71248 100644 --- a/Compiler/FunctionCompiler.py +++ b/Compiler/FunctionCompiler.py @@ -4,7 +4,7 @@ from .Functions import check_function_exists, get_function_object from .General import get_variable_dimensions_from_token, get_move_to_return_value_cell_code, get_print_string_code, get_variable_from_ID_token from .General import get_variable_size_from_token, get_literal_token_value, process_switch_cases, is_token_literal -from .Globals import create_variable_from_definition, get_global_variables, get_variable_size, is_variable_array +from .Globals import create_variable_from_definition, get_global_variables, get_variable_size, is_variable_array, get_data_type_size from .Node import NodeToken, NodeArraySetElement, NodeUnaryPrefix, NodeUnaryPostfix, NodeArrayGetElement, NodeFunctionCall, NodeArrayAssignment from .Node import NodeStructGetField, NodeStructSetField from .Parser import Parser @@ -33,10 +33,11 @@ def __init__(self, name, tokens): self.tokens = tokens self.parser = Parser(self.tokens) self.ids_map_list = list() - self.type = None + self.return_type = None + self.return_size = None + self.return_value_cell = None # will be set on every call to this function self.parameters = None self.process_function_definition() # sets type and parameters - self.return_value_cell = None # will be set on every call to this function """ ids_map_list is a list of named tuples. Each tuple represents a scope, and holds 2 items: @@ -63,7 +64,8 @@ def process_function_definition(self): parameters = self.get_function_parameters_declaration() # parser now points to LBRACE = beginning of function scope - self.type = function_return_type + self.return_type = function_return_type.type + self.return_size = get_data_type_size(self.return_type) self.parameters = parameters def get_code(self, current_stack_pointer): @@ -85,7 +87,7 @@ def get_code(self, current_stack_pointer): # new stack pointer should be at least that size assert self.current_stack_pointer() <= current_stack_pointer self.return_value_cell = current_stack_pointer - self.set_stack_pointer(current_stack_pointer+1) # make room for return_value cell. next available cell is the next one after it. + self.set_stack_pointer(current_stack_pointer + self.return_size) # make room for return_value cell. next available cell is the next one after it. function_code = self.compile_function_scope(self.parameters) self.remove_ids_map() # Global variables return function_code @@ -267,14 +269,6 @@ def insert_to_ids_map(self, variable): ids_map.next_available_cell += get_variable_size(variable) ids_map.IDs_dict[variable.name] = variable - def reserve_cell_in_ids_map(self): - """ - reserve cell by increasing the "pointer" of the next available cell - this is used for making room for return_value cell - """ - ids_map = self.ids_map_list[0] - ids_map.next_available_cell += 1 - def variables_dict_size(self, variables_dict_index): variables_dict = self.ids_map_list[variables_dict_index].IDs_dict @@ -359,7 +353,7 @@ def enter_function_scope(self, parameters): for parameter in parameters: self.insert_to_ids_map(parameter) - code = '>' # skip return_value_cell + code = ">" * self.return_size # skip return_value_cell code += self.insert_scope_variables_into_ids_map() # this inserts scope variables AND moves pointer right, with the amount of BOTH parameters and scope variables @@ -871,6 +865,9 @@ def compile_expression_list(self): def compile_return(self): # this assumes that the return is the last statement in the function + if self.return_type == Token.VOID: + raise BFSemanticError("Unable to return from a function has a return type of VOID at '%s'" % self.parser.current_token()) + self.parser.advance_token() # skip return if self.parser.current_token().type == Token.SEMICOLON: # return; @@ -1301,9 +1298,16 @@ def compile_function_scope(self, parameters): assert self.parser.current_token().type == Token.LBRACE + # print(self.return_type, self.return_size) + # print(self.current_stack_pointer()) + code = self.enter_function_scope(parameters) + # print(self.current_stack_pointer()) code += self.compile_scope_statements() + # print(self.current_stack_pointer()) code += self.exit_scope() - code += "<" # point to return_value_cell + # print(self.current_stack_pointer()) + # print() + code += "<" * self.return_size # point to return_value_cell return code diff --git a/Compiler/Globals.py b/Compiler/Globals.py index 1ba0bd4..91a6738 100644 --- a/Compiler/Globals.py +++ b/Compiler/Globals.py @@ -11,6 +11,11 @@ global_variables = list() # Global list of global variables +data_sizes = { + Token.INT: 1, + Token.VOID: 0 +} + # variables def get_global_variables(): @@ -52,6 +57,10 @@ def is_variable_array(variable): return variable.dimensions != [1] +def get_data_type_size(data_type): + return data_sizes[data_type] + + def create_variable_from_definition(parser, index=None, advance_tokens=False): """ processes the variable definition at index, and returns the variable named tuple diff --git a/Compiler/LibraryFunctionCompiler.py b/Compiler/LibraryFunctionCompiler.py index 5b6567d..fc9da60 100644 --- a/Compiler/LibraryFunctionCompiler.py +++ b/Compiler/LibraryFunctionCompiler.py @@ -1,11 +1,13 @@ from .Functions import insert_function_object +from .Globals import get_data_type_size from .Token import Token class LibraryFunctionCompiler: - def __init__(self, name, type, parameters, code): + def __init__(self, name, return_type, parameters, code): self.name = name - self.type = type + self.return_type = return_type + self.return_size = get_data_type_size(return_type) self.parameters = parameters self.code = code @@ -61,10 +63,9 @@ def get_readint_code(): def get_printint_code(): - # return_cell value_to_print_cell + # value_to_print_cell - code = ">" # point to value_to_print cell - code += ">[-]" * 8 + "<" * 8 # zero some cells + code = ">[-]" * 8 + "<" * 8 # zero some cells code += ">++++++++++<" # div amount code += "[->-[>+>>]>[+[<+>-]>+>>]<<<<<]" # value_to_print/10 @@ -97,7 +98,6 @@ def get_printint_code(): code += "[-]" # zero the third digit code += "<<" # point to value_to_print_cell which is 0 - code += "<" # point to return_cell return code @@ -108,8 +108,8 @@ def get_readchar_code(): def get_printchar_code(): - # point to parameter, output it, and then point back to "return value cell" - code = ">.<" + # output parameter + code = "." return code diff --git a/Compiler/Node.py b/Compiler/Node.py index 2717212..fb1a806 100644 --- a/Compiler/Node.py +++ b/Compiler/Node.py @@ -212,18 +212,20 @@ def __init__(self, ids_map_list, function_to_call, parameters): self.parameters = parameters def get_code(self, current_pointer, *args, **kwargs): - code = '[-]>' # return_value_cell=0 + return_size = self.function_to_call.return_size + + code = '[-]>' * return_size # return_value_cell=0 # evaluate parameters from left to right, and put them on the "stack" in that order # after each parameter code, the pointer points to the next available cell (one after the parameter) for i, parameter in enumerate(self.parameters): - code += parameter.get_code(current_pointer+1+i) # evaluate each parameter at its cell offset (starting at one after return_value_cell) + code += parameter.get_code(current_pointer + return_size + i) # evaluate each parameter at its cell offset (starting at one after return_value_cell) # at this point we point to one after the last parameter code += "<" * len(self.parameters) # point back to first parameter - code += "<" # point to return_value_cell + code += "<" * return_size # point to return_value_cell code += self.function_to_call.get_code(current_stack_pointer=current_pointer) # after this we point to return value cell - code += ">" # point to next available cell (one after return value) + code += ">" # point to next available cell return code