diff --git a/include/ast.hpp b/include/ast.hpp index c302544e..33131e33 100644 --- a/include/ast.hpp +++ b/include/ast.hpp @@ -71,6 +71,7 @@ struct DeclNode // NOLINT(cppcoreguidelines-special-member-functions) std::string id; std::unique_ptr type; + bool is_global{false}; }; /// @note This is an abstract class. @@ -86,6 +87,7 @@ struct ExprNode // NOLINT(cppcoreguidelines-special-member-functions) std::unique_ptr type = std::make_unique(PrimitiveType::kUnknown); + bool is_global{false}; }; /// @brief A designator node is used to explicitly reference a member for diff --git a/src/llvm_ir_generator.cpp b/src/llvm_ir_generator.cpp index 62eb4093..c66db671 100644 --- a/src/llvm_ir_generator.cpp +++ b/src/llvm_ir_generator.cpp @@ -163,39 +163,82 @@ void LLVMIRGenerator::Visit(const DeclStmtNode& decl_stmt) { void LLVMIRGenerator::Visit(const VarDeclNode& decl) { auto var_type = builder_helper_.GetLLVMType(*(decl.type)); - auto addr = builder_.CreateAlloca(var_type); - if (decl.init) { - decl.init->Accept(*this); - auto val = val_recorder.ValOfPrevExpr(); - builder_.CreateStore(val, addr); + if (decl.is_global) { + auto global = module_.getOrInsertGlobal(decl.id, var_type); + auto global_var = llvm::dyn_cast(global); + llvm::Constant* const_val = nullptr; + if (decl.init) { + decl.init->Accept(*this); + auto val = val_recorder.ValOfPrevExpr(); + const_val = llvm::dyn_cast(val); + assert(const_val); + } else { + const_val = llvm::ConstantInt::get(builder_.getInt32Ty(), 0, true); + } + global_var->setInitializer(const_val); + id_to_val[decl.id] = global_var; + } else { + auto addr = builder_.CreateAlloca(var_type); + if (decl.init) { + decl.init->Accept(*this); + auto val = val_recorder.ValOfPrevExpr(); + builder_.CreateStore(val, addr); + } + id_to_val[decl.id] = addr; } - id_to_val[decl.id] = addr; } void LLVMIRGenerator::Visit(const ArrDeclNode& arr_decl) { - auto arr_type = builder_helper_.GetLLVMType(*(arr_decl.type)); - auto base_addr = builder_.CreateAlloca(arr_type); - id_to_val[arr_decl.id] = base_addr; + auto type = builder_helper_.GetLLVMType(*(arr_decl.type)); + if (arr_decl.is_global) { + auto global_arr = module_.getOrInsertGlobal(arr_decl.id, type); + id_to_val[arr_decl.id] = global_arr; + } else { + auto addr = builder_.CreateAlloca(type); + id_to_val[arr_decl.id] = addr; + } auto arr_decl_type = dynamic_cast(arr_decl.type.get()); - for (auto i = std::size_t{0}, e = arr_decl_type->len(); i < e; ++i) { - if (i < arr_decl.init_list.size()) { + // This vector stores the initialize values for a global array. + std::vector arr_elems{}; + for (auto i = std::size_t{0}, e = arr_decl_type->len(), + init_len = arr_decl.init_list.size(); + i < e; ++i) { + if (i < init_len) { auto& arr_init = arr_decl.init_list.at(i); arr_init->Accept(*this); } - auto res_addr = - builder_.CreateConstInBoundsGEP2_32(arr_type, base_addr, 0, i); - - if (i < arr_decl.init_list.size()) { + if (i < init_len) { auto init_val = val_recorder.ValOfPrevExpr(); - builder_.CreateStore(init_val, res_addr); + if (arr_decl.is_global) { + auto const_val = llvm::dyn_cast(init_val); + arr_elems.push_back(const_val); + } else { + auto res_addr = builder_.CreateConstInBoundsGEP2_32( + type, id_to_val.at(arr_decl.id), 0, i); + builder_.CreateStore(init_val, res_addr); + } } else { - // set remaining elements as 0 auto zero = llvm::ConstantInt::get(builder_.getInt32Ty(), 0, true); - builder_.CreateStore(zero, res_addr); + // A global array is always initialized to 0, + // but a local array remains uninitialized if no values are provided. + if (arr_decl.is_global) { + arr_elems.push_back(zero); + } else if (!arr_decl.is_global && init_len != 0) { + auto res_addr = builder_.CreateConstInBoundsGEP2_32( + type, id_to_val.at(arr_decl.id), 0, i); + builder_.CreateStore(zero, res_addr); + } } } + + if (arr_decl.is_global) { + auto arr_type = llvm::dyn_cast(type); + auto arr_init = llvm::ConstantArray::get(arr_type, arr_elems); + auto global_arr = module_.getGlobalVariable(arr_decl.id); + global_arr->setInitializer(arr_init); + } } void LLVMIRGenerator::Visit(const RecordDeclNode& record_decl) { diff --git a/src/qbe_ir_generator.cpp b/src/qbe_ir_generator.cpp index 5baaa55e..36434878 100644 --- a/src/qbe_ir_generator.cpp +++ b/src/qbe_ir_generator.cpp @@ -94,6 +94,12 @@ auto // dependency. = std::map{}; +auto + reg_num_to_id // NOLINT(cppcoreguidelines-avoid-non-const-global-variables): + // Accessible only within this translation unit; declaring as + // a data member introduces unnecessary dependency. + = std::map{}; + /// @brief Every expression generates a temporary. The local number of such /// temporary should be stored, so can propagate to later uses. class PrevExprNumRecorder { @@ -133,6 +139,32 @@ auto label_views_of_jumpable_blocks // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) = std::vector{}; +struct GlobalVarInitVal { + // TODO: support other types, such as float, long, short + std::variant value; + std::shared_ptr type; +}; + +/// @brief Stores values from bottom level nodes and pass them to upper leve +/// nodes because global variables are required to initialize exact values at +/// declaration. +auto + global_var_init_vals // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) + = std::vector{}; + +/// @brief Generate corresponding QBE string based on `type` and `value`. +/// @return formatted QBE string with type and value, e.g. w 4 (word of value 4) +std::string GenerateQBEInit(const GlobalVarInitVal& init) { + return std::visit( + [&init](auto&& arg) { + if (init.type->IsEqual(PrimitiveType::kInt)) { + return "w " + std::to_string(arg); + } + return std::string(""); + }, + init.value); +} + } // namespace void QbeIrGenerator::Visit(const DeclStmtNode& decl_stmt) { @@ -144,65 +176,113 @@ void QbeIrGenerator::Visit(const DeclStmtNode& decl_stmt) { } void QbeIrGenerator::Visit(const VarDeclNode& decl) { - int id_num = NextLocalNum(); - WriteInstr_("{} =l alloc{} {}", FuncScopeTemp{id_num}, decl.type->size(), - decl.type->size()); - if (decl.init) { - decl.init->Accept(*this); - int init_num = num_recorder.NumOfPrevExpr(); - // A pointer declaration may have two options for its right hand side: - if (decl.init->type->IsPtr() || decl.init->type->IsFunc()) { - // 1. int* a = &b; rhs is a reference of integer. We need to store b's - // address to a, where we need to map b's reg_num back to its id_num. - if (dynamic_cast((decl.init).get())) { - WriteInstr_("storel {}, {}", - FuncScopeTemp{reg_num_to_id_num.at(init_num)}, - FuncScopeTemp{id_num}); + if (decl.is_global) { + // TODO: support different data types. + if (decl.init) { + global_var_init_vals.clear(); + decl.init->Accept(*this); + Write_("export data {} = align {} {{ {} }}\n", + user_defined::GlobalPointer{decl.id}, decl.type->size(), + GenerateQBEInit(global_var_init_vals.at(0))); + } else { + // `z` in QBE stands for allocating n bytes of memory space. + Write_("export data {0} = align {1} {{ z {1} }}\n", + user_defined::GlobalPointer{decl.id}, decl.type->size()); + } + } else { + int id_num = NextLocalNum(); + WriteInstr_("{} =l alloc{} {}", FuncScopeTemp{id_num}, decl.type->size(), + decl.type->size()); + if (decl.init) { + decl.init->Accept(*this); + int init_num = num_recorder.NumOfPrevExpr(); + // A pointer declaration may have two options for its right hand side: + if (decl.init->type->IsPtr() || decl.init->type->IsFunc()) { + // 1. int* a = &b; rhs is a reference of integer. We need to store b's + // address to a, where we need to map b's reg_num back to its id_num. + if (dynamic_cast((decl.init).get())) { + WriteInstr_("storel {}, {}", + FuncScopeTemp{reg_num_to_id_num.at(init_num)}, + FuncScopeTemp{id_num}); + } else { + // 2. int* a = c; c itself stores the address of another integer. We + // can directly use the address c currently holds. + WriteInstr_("storel {}, {}", FuncScopeTemp{init_num}, + FuncScopeTemp{id_num}); + } } else { - // 2. int* a = c; c itself stores the address of another integer. We can - // directly use the address c currently holds. - WriteInstr_("storel {}, {}", FuncScopeTemp{init_num}, + WriteInstr_("storew {}, {}", FuncScopeTemp{init_num}, FuncScopeTemp{id_num}); } - } else { - WriteInstr_("storew {}, {}", FuncScopeTemp{init_num}, - FuncScopeTemp{id_num}); } + // Set up the number of the id so we know were to load it back. + id_to_num[decl.id] = id_num; } - // Set up the number of the id so we know were to load it back. - id_to_num[decl.id] = id_num; } void QbeIrGenerator::Visit(const ArrDeclNode& arr_decl) { - int base_addr_num = NextLocalNum(); - assert(arr_decl.type->IsArr()); - const auto* arr_type = dynamic_cast((arr_decl.type).get()); - auto element_size = arr_type->element_type().size(); - WriteInstr_("{} =l alloc{} {}", FuncScopeTemp{base_addr_num}, element_size, - arr_decl.type->size()); - id_to_num[arr_decl.id] = base_addr_num; - - for (auto i = std::size_t{0}, e = arr_type->len(); i < e; ++i) { - if (i < arr_decl.init_list.size()) { + if (arr_decl.is_global) { + const auto* arr_type = dynamic_cast((arr_decl.type).get()); + assert(arr_type); + Write_("export data {} = align {} {{ ", + user_defined::GlobalPointer{arr_decl.id}, + arr_type->element_type().size()); + + global_var_init_vals.clear(); + auto arr_size = arr_type->len(); + auto init_len = arr_decl.init_list.size(); + assert(init_len <= arr_size); + for (auto i = std::size_t{0}; i < init_len; ++i) { auto& arr_init = arr_decl.init_list.at(i); arr_init->Accept(*this); + Write_("{}", GenerateQBEInit(global_var_init_vals.at(i))); + if (i != arr_size - 1) { + Write_(", "); + } } - const int offset = NextLocalNum(); - WriteInstr_("{} =l extsw {}", FuncScopeTemp{offset}, i * element_size); + // set remaining elements as 0 + if (init_len < arr_size) { + Write_("z {}", (arr_size - init_len) * arr_type->element_type().size()); + } + Write_(" }}\n"); + } else { + int base_addr_num = NextLocalNum(); + assert(arr_decl.type->IsArr()); + const auto* arr_type = dynamic_cast((arr_decl.type).get()); + auto element_size = arr_type->element_type().size(); + WriteInstr_("{} =l alloc{} {}", FuncScopeTemp{base_addr_num}, element_size, + arr_decl.type->size()); + id_to_num[arr_decl.id] = base_addr_num; + + // NOTE: Compiler will not set elements to 0 if `init_len` is 0. + // 6.7.9 Initialization + // 10. If an object that has automatic storage duration is not initialized + // explicitly, its value is indeterminate. + for (auto i = std::size_t{0}, e = arr_type->len(), + init_len = arr_decl.init_list.size(); + i < e && init_len != 0; ++i) { + if (i < init_len) { + auto& arr_init = arr_decl.init_list.at(i); + arr_init->Accept(*this); + } + + const int offset = NextLocalNum(); + WriteInstr_("{} =l extsw {}", FuncScopeTemp{offset}, i * element_size); - // res_addr = base_addr + offset - const int res_addr_num = NextLocalNum(); - WriteInstr_("{} =l add {}, {}", FuncScopeTemp{res_addr_num}, - FuncScopeTemp{base_addr_num}, FuncScopeTemp{offset}); + // res_addr = base_addr + offset + const int res_addr_num = NextLocalNum(); + WriteInstr_("{} =l add {}, {}", FuncScopeTemp{res_addr_num}, + FuncScopeTemp{base_addr_num}, FuncScopeTemp{offset}); - if (i < arr_decl.init_list.size()) { - int init_val_num = num_recorder.NumOfPrevExpr(); - WriteInstr_("storew {}, {}", FuncScopeTemp{init_val_num}, - FuncScopeTemp{res_addr_num}); - } else { - // set remaining elements as 0 - WriteInstr_("storew 0, {}", FuncScopeTemp{res_addr_num}); + if (i < init_len) { + int init_val_num = num_recorder.NumOfPrevExpr(); + WriteInstr_("storew {}, {}", FuncScopeTemp{init_val_num}, + FuncScopeTemp{res_addr_num}); + } else { + // set remaining elements as 0 + WriteInstr_("storew 0, {}", FuncScopeTemp{res_addr_num}); + } } } } @@ -644,28 +724,41 @@ void QbeIrGenerator::Visit(const IdExprNode& id_expr) { num_recorder.Record(res_num); return; } - assert(id_to_num.count(id_expr.id) != 0); - /// @brief Plays the role of a "pointer". Its value has to be loaded to - /// the register before use. - int id_num = id_to_num.at(id_expr.id); - int reg_num = NextLocalNum(); - if (id_expr.type->IsPtr() || id_expr.type->IsFunc()) { - WriteInstr_("{} =l loadl {}", FuncScopeTemp{reg_num}, - FuncScopeTemp{id_num}); - } else { + if (id_expr.is_global) { + int reg_num = NextLocalNum(); WriteInstr_("{} =w loadw {}", FuncScopeTemp{reg_num}, - FuncScopeTemp{id_num}); + user_defined::GlobalPointer{id_expr.id}); + num_recorder.Record(reg_num); + reg_num_to_id[reg_num] = id_expr.id; + } else { + assert(id_to_num.count(id_expr.id) != 0); + /// @brief Plays the role of a "pointer". Its value has to be loaded to + /// the register before use. + int id_num = id_to_num.at(id_expr.id); + int reg_num = NextLocalNum(); + if (id_expr.type->IsPtr() || id_expr.type->IsFunc()) { + WriteInstr_("{} =l loadl {}", FuncScopeTemp{reg_num}, + FuncScopeTemp{id_num}); + } else { + WriteInstr_("{} =w loadw {}", FuncScopeTemp{reg_num}, + FuncScopeTemp{id_num}); + } + num_recorder.Record(reg_num); + // Map the temporary reg_num to id_num, so that upper level nodes can store + // value to id_num instead of reg_num. + reg_num_to_id_num[reg_num] = id_num; } - num_recorder.Record(reg_num); - // Map the temporary reg_num to id_num, so that upper level nodes can store - // value to id_num instead of reg_num. - reg_num_to_id_num[reg_num] = id_num; } void QbeIrGenerator::Visit(const IntConstExprNode& int_expr) { - int num = NextLocalNum(); - WriteInstr_("{} =w copy {}", FuncScopeTemp{num}, int_expr.val); - num_recorder.Record(num); + if (int_expr.is_global) { + global_var_init_vals.push_back( + {int_expr.val, std::make_shared(PrimitiveType::kInt)}); + } else { + int num = NextLocalNum(); + WriteInstr_("{} =w copy {}", FuncScopeTemp{num}, int_expr.val); + num_recorder.Record(num); + } } void QbeIrGenerator::Visit(const ArgExprNode& arg_expr) { @@ -675,8 +768,6 @@ void QbeIrGenerator::Visit(const ArgExprNode& arg_expr) { void QbeIrGenerator::Visit(const ArrSubExprNode& arr_sub_expr) { arr_sub_expr.arr->Accept(*this); const int reg_num = num_recorder.NumOfPrevExpr(); - // address of the first element - const int base_addr = reg_num_to_id_num.at(reg_num); arr_sub_expr.index->Accept(*this); const int index_num = num_recorder.NumOfPrevExpr(); @@ -696,8 +787,16 @@ void QbeIrGenerator::Visit(const ArrSubExprNode& arr_sub_expr) { // res_addr = base_addr + offset const int res_addr_num = NextLocalNum(); - WriteInstr_("{} =l add {}, {}", FuncScopeTemp{res_addr_num}, - FuncScopeTemp{base_addr}, FuncScopeTemp{offset}); + if (arr_sub_expr.is_global) { + const auto id = reg_num_to_id.at(reg_num); + WriteInstr_("{} =l add {}, {}", FuncScopeTemp{res_addr_num}, + user_defined::GlobalPointer{id}, FuncScopeTemp{offset}); + } else { + // address of the first element + const int base_addr = reg_num_to_id_num.at(reg_num); + WriteInstr_("{} =l add {}, {}", FuncScopeTemp{res_addr_num}, + FuncScopeTemp{base_addr}, FuncScopeTemp{offset}); + } // load value from res_addr const int res_num = NextLocalNum(); @@ -994,11 +1093,28 @@ void QbeIrGenerator::Visit(const SimpleAssignmentExprNode& assign_expr) { int rhs_num = num_recorder.NumOfPrevExpr(); if (assign_expr.lhs->type->IsPtr()) { // Assign pointer address to another pointer. - WriteInstr_("storel {}, {}", FuncScopeTemp{rhs_num}, - FuncScopeTemp{reg_num_to_id_num.at(lhs_num)}); + if (assign_expr.lhs->is_global) { + WriteInstr_("storel {}, {}", FuncScopeTemp{rhs_num}, + user_defined::GlobalPointer{reg_num_to_id.at(lhs_num)}); + } else { + WriteInstr_("storel {}, {}", FuncScopeTemp{rhs_num}, + FuncScopeTemp{reg_num_to_id_num.at(lhs_num)}); + } } else { - WriteInstr_("storew {}, {}", FuncScopeTemp{rhs_num}, - FuncScopeTemp{reg_num_to_id_num.at(lhs_num)}); + if (assign_expr.lhs->is_global) { + // Global array subscripting will return the target address instead of the + // address of `id`. + if (dynamic_cast(assign_expr.lhs.get())) { + WriteInstr_("storew {}, {}", FuncScopeTemp{rhs_num}, + FuncScopeTemp{reg_num_to_id_num.at(lhs_num)}); + } else { + WriteInstr_("storew {}, {}", FuncScopeTemp{rhs_num}, + user_defined::GlobalPointer{reg_num_to_id.at(lhs_num)}); + } + } else { + WriteInstr_("storew {}, {}", FuncScopeTemp{rhs_num}, + FuncScopeTemp{reg_num_to_id_num.at(lhs_num)}); + } } num_recorder.Record(rhs_num); } diff --git a/src/type_checker.cpp b/src/type_checker.cpp index b43d6813..5f55a220 100644 --- a/src/type_checker.cpp +++ b/src/type_checker.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -52,6 +53,13 @@ std::string MangleRecordTypeId(const std::string& id, } } +auto + id_is_global // NOLINT(cppcoreguidelines-avoid-non-const-global-variables): + // Accessible only within this translation unit; + // declaring as a data member introduces unnecessary + // dependency. + = std::map{}; + } // namespace void TypeChecker::Visit(DeclStmtNode& decl_stmt) { @@ -62,6 +70,10 @@ void TypeChecker::Visit(DeclStmtNode& decl_stmt) { void TypeChecker::Visit(VarDeclNode& decl) { if (decl.init) { + if (env_.CurrentScopeKind() == ScopeKind::kFile) { + decl.init->is_global = true; + } + decl.init->Accept(*this); if (decl.init->type != decl.type) { // TODO: incompatible types when initializing type 'type' using type @@ -74,6 +86,11 @@ void TypeChecker::Visit(VarDeclNode& decl) { } else { auto symbol = std::make_unique(decl.id, decl.type->Clone()); env_.AddSymbol(std::move(symbol), env_.CurrentScopeKind()); + + if (env_.CurrentScopeKind() == ScopeKind::kFile) { + decl.is_global = true; + id_is_global[decl.id] = true; + } } } @@ -89,8 +106,17 @@ void TypeChecker::Visit(ArrDeclNode& arr_decl) { if (!init->type->IsEqual(*symbol->type)) { // TODO: element unmatches array element type } + + if (env_.CurrentScopeKind() == ScopeKind::kFile) { + init->is_global = true; + } } env_.AddSymbol(std::move(symbol), env_.CurrentScopeKind()); + + if (env_.CurrentScopeKind() == ScopeKind::kFile) { + arr_decl.is_global = true; + id_is_global[arr_decl.id] = true; + } } // TODO: Check initializer type @@ -142,8 +168,15 @@ void TypeChecker::Visit(RecordVarDeclNode& record_var_decl) { // TODO: type check between fields and initialized members. for (auto& init : record_var_decl.inits) { init->Accept(*this); + if (env_.CurrentScopeKind() == ScopeKind::kFile) { + init->is_global = true; + } } env_.AddSymbol(std::move(symbol), env_.CurrentScopeKind()); + if (env_.CurrentScopeKind() == ScopeKind::kFile) { + record_var_decl.is_global = true; + id_is_global[record_var_decl.id] = true; + } record_var_decl.type = record_type->type->Clone(); } @@ -384,6 +417,9 @@ void TypeChecker::Visit(InitExprNode& init_expr) { } init_expr.expr->Accept(*this); init_expr.type = init_expr.expr->type->Clone(); + if (env_.CurrentScopeKind() == ScopeKind::kFile) { + init_expr.expr->is_global = true; + } } void TypeChecker::Visit(ArrDesNode& arr_des) { @@ -404,6 +440,9 @@ void TypeChecker::Visit(NullExprNode&) { void TypeChecker::Visit(IdExprNode& id_expr) { if (auto symbol = env_.LookUpSymbol(id_expr.id)) { id_expr.type = symbol->type->Clone(); + if (id_is_global.count(id_expr.id) != 0) { + id_expr.is_global = true; + } } else { // TODO: 'id' undeclared assert(false); @@ -426,6 +465,7 @@ void TypeChecker::Visit(ArrSubExprNode& arr_sub_expr) { assert(arr_type); // arr_sub_expr should have the element type of the array. arr_sub_expr.type = arr_type->element_type().Clone(); + arr_sub_expr.is_global = arr_sub_expr.arr->is_global; } void TypeChecker::Visit(CondExprNode& cond_expr) { diff --git a/test/codegen/array.c b/test/codegen/array.c index 3e6b375a..9b4f0a0a 100644 --- a/test/codegen/array.c +++ b/test/codegen/array.c @@ -22,5 +22,7 @@ int main() { __builtin_print(e[1]); __builtin_print(e[2]); + // NOTE: Local scope array should not be 0-initialized, and the generated IR has to be checked manually. + int f[2]; return 0; } diff --git a/test/codegen/global_decl.c b/test/codegen/global_decl.c new file mode 100644 index 00000000..8dc52d33 --- /dev/null +++ b/test/codegen/global_decl.c @@ -0,0 +1,26 @@ +int c; +int d = 6; + +int b[2]; +int a[3] = {6, 5, 3}; +int e[4] = {8, 9}; + +int main() { + __builtin_print(c); + d = 4; + __builtin_print(d); + + __builtin_print(b[0]); + + a[0] = 7; + a[2] = 4; + __builtin_print(a[0]); + __builtin_print(a[1]); + __builtin_print(a[2]); + + __builtin_print(e[0]); + __builtin_print(e[1]); + __builtin_print(e[2]); + __builtin_print(e[3]); + return 0; +} diff --git a/test/codegen/global_decl.exp b/test/codegen/global_decl.exp new file mode 100644 index 00000000..5f247f2f --- /dev/null +++ b/test/codegen/global_decl.exp @@ -0,0 +1,10 @@ +0 +4 +0 +7 +5 +4 +8 +9 +0 +0