From f9a0ea4469c63743a941e4de68bb475e2800d805 Mon Sep 17 00:00:00 2001 From: Harrand Date: Tue, 14 May 2024 03:29:06 +0100 Subject: [PATCH] [cpp] implementing lexing, parsing and semal for return statements. --- cpp/src/ast.hpp | 16 ++++++++++++++-- cpp/src/lex.cpp | 5 +++++ cpp/src/lex.hpp | 1 + cpp/src/parse.cpp | 39 ++++++++++++++++++++++++++++++++++++++- cpp/src/semal.cpp | 17 ++++++++++++++++- samples/scratchpad.psy | 6 +++++- 6 files changed, 79 insertions(+), 5 deletions(-) diff --git a/cpp/src/ast.hpp b/cpp/src/ast.hpp index 38ce901..9f8ee3f 100644 --- a/cpp/src/ast.hpp +++ b/cpp/src/ast.hpp @@ -94,6 +94,17 @@ struct ast bool operator==(const function_call& rhs) const = default; }; + struct return_statement + { + std::optional expr; + constexpr std::string to_string() const + { + return std::format("return({})", expr.has_value() ? expr.value()->to_string() : ""); + } + bool operator==(const return_statement& rhs) const = default; + }; + + struct expression { std::variant @@ -104,7 +115,8 @@ struct ast ast::decimal_literal, ast::identifier, ast::variable_declaration, - ast::function_call + ast::function_call, + ast::return_statement > expr; bool capped = false; constexpr std::string to_string() const @@ -160,7 +172,7 @@ struct ast struct node { - using payload_t = std::variant; + using payload_t = std::variant; payload_t payload = std::monostate{}; srcloc meta = {}; std::vector children = {}; diff --git a/cpp/src/lex.cpp b/cpp/src/lex.cpp index 9357df1..4d84c25 100644 --- a/cpp/src/lex.cpp +++ b/cpp/src/lex.cpp @@ -301,6 +301,11 @@ namespace lex { return token{.t = type::question_mark}; } + else if(data.starts_with("return")) + { + state.advance(5); + return token{.t = type::return_statement, .lexeme = "return"}; + } else if(data.starts_with("$")) { return token{.t = type::dollar_sign}; diff --git a/cpp/src/lex.hpp b/cpp/src/lex.hpp index cc2e04d..c7794c1 100644 --- a/cpp/src/lex.hpp +++ b/cpp/src/lex.hpp @@ -39,6 +39,7 @@ namespace lex operator_ref, operator_deref, question_mark, + return_statement, initialiser, dollar_sign, _count, diff --git a/cpp/src/parse.cpp b/cpp/src/parse.cpp index 65baf56..0746efa 100644 --- a/cpp/src/parse.cpp +++ b/cpp/src/parse.cpp @@ -61,6 +61,8 @@ namespace parse bool reduce_from_block(std::size_t offset); // given an ast::meta_region subtree at the offset, try to reduce its surrounding tokens/atoms into something bigger. returns true on success, false otherwise. bool reduce_from_meta_region(std::size_t offset); + // given an ast::return_statement subtree at the offset, try to reduce its surrounding tokens/atoms into something bigger. returns true on success, false otherwise. + bool reduce_from_return_statement(std::size_t offset); // given a non-ast token at the offset, try to reduce it and its surrounding tokens/atoms into something bigger. returns true on success, false otherwise. bool reduce_from_token(std::size_t offset); @@ -287,7 +289,6 @@ namespace parse retriever retr{*this, offset}; srcloc meta; auto value = retr.must_retrieve(&meta); - bool check = value.iden == "putchar" && (offset + 4 < this->subtrees.size()); if(!retr.avail()){return false;} auto colon1 = retr.retrieve(); @@ -664,6 +665,15 @@ namespace parse return false; } + bool parser_state::reduce_from_return_statement(std::size_t offset) + { + retriever retr{*this, offset}; + srcloc meta; + auto value = retr.must_retrieve(&meta); + retr.reduce_to(ast::expression{.expr = value, .capped = true}, meta); + return true; + } + bool parser_state::reduce_from_token(std::size_t offset) { retriever retr{*this, offset}; @@ -801,6 +811,29 @@ namespace parse return true; } break; + case lex::type::return_statement: + { + if(!retr.avail()){return false;} + auto expr = retr.retrieve(); + if(!expr.has_value()) + { + retr.undo(); + } + // if we dont have an expression, we must have a semicolon. + // if we do have an expression, we need a semicolon if its not already capped. + if(!expr.has_value() || (expr.has_value() && !expr->capped)) + { + if(!retr.avail()){return false;} + auto semicolon = retr.retrieve(); + if(!semicolon.has_value() || semicolon->t != lex::type::semicolon) + { + return false; + } + } + retr.reduce_to(ast::return_statement{.expr = expr}, meta); + return true; + } + break; default: break; } if(token_is_unary_operator(value)) @@ -876,6 +909,10 @@ namespace parse { ret = this->reduce_from_function_call(i); }, + [&](ast::return_statement arg) + { + ret = this->reduce_from_return_statement(i); + }, [&](ast::expression arg) { ret = this->reduce_from_expression(i); diff --git a/cpp/src/semal.cpp b/cpp/src/semal.cpp index 0cc4403..a89b1db 100644 --- a/cpp/src/semal.cpp +++ b/cpp/src/semal.cpp @@ -571,6 +571,21 @@ namespace semal return maybe_function->return_ty; } + type return_statement(const data& d, const ast::return_statement& payload) + { + const function_t* maybe_parent = d.state.try_find_parent_function(d.tree, d.path); + d.assert_that(maybe_parent != nullptr, "detected return statement outside of a function. return statements are only valid within function implementation blocks."); + if(payload.expr.has_value()) + { + type ret = expression(d, *payload.expr.value()); + d.assert_that(!maybe_parent->return_ty.is_void(), std::format("return statement was an expression of type \"{}\", but the enclosing function \"{}\" returns {}. do not return an expression.", ret.name(), maybe_parent->name, maybe_parent->return_ty.name())); + d.assert_that(ret == maybe_parent->return_ty, std::format("type of `return` statement \"{}\" does not match the return-type of the enclosing function \"{}\", which is a \"{}\"", ret.name(), maybe_parent->name, maybe_parent->return_ty.name())); + return ret; + } + d.assert_that(maybe_parent->return_ty.is_void(), std::format("detected empty `return` statement within function \"{}\" which doesn't return u0. `return;` is only valid in functions that return `u0`.", maybe_parent->name)); + return type::from_primitive(primitive_type::u0); + } + type variable_declaration(const data& d, const ast::variable_declaration& payload) { if(d.path.size() <= 1) @@ -696,11 +711,11 @@ namespace semal { ret = for_statement(d, forst); }, + */ [&](ast::return_statement returnst) { ret = return_statement(d, returnst); }, - */ [&](ast::variable_declaration decl) { ret = variable_declaration(d, decl); diff --git a/samples/scratchpad.psy b/samples/scratchpad.psy index 249d0a3..2ee172c 100644 --- a/samples/scratchpad.psy +++ b/samples/scratchpad.psy @@ -24,7 +24,7 @@ complicated :: (par1 : i64, par : i8, par3 : u32) -> i64 } poggers : i64; -morb :: (par0 : i64) -> u0 +morb :: (par0 : i64) -> f64 { poggers1 : i64; poggers2 : i64; @@ -32,6 +32,10 @@ morb :: (par0 : i64) -> u0 //morb2 :: (par0 : i64) -> u0 := extern; complicated(poggers, dub(5), 3); complicated(--complicated(1, 2, 3), dub(5), 690); + + retval : f64; + return retval; + //return morb(5 * complicated(dub(5))); } morb(65);