From 6f2be360d31fcf8162069ef273830e2679fc1c28 Mon Sep 17 00:00:00 2001 From: Nathan Seymour Date: Tue, 26 Nov 2024 08:50:59 -0600 Subject: [PATCH] add tests and token api; --- CMakeLists.txt | 1 - include/buffalo/buffalo.h | 119 +++++++++++++++++++------ test/buffalo.test.cpp | 106 +++++++++++++++++++++++ test/stmtlist.test.cpp | 177 -------------------------------------- 4 files changed, 200 insertions(+), 203 deletions(-) delete mode 100644 test/stmtlist.test.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 16cd786..b0c04ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,7 +29,6 @@ target_link_libraries(buffalo INTERFACE ctre::ctre) # Tests add_executable(buffalo-test test/buffalo.test.cpp - test/stmtlist.test.cpp ) target_link_libraries(buffalo-test PRIVATE buffalo GTest::gtest_main) target_include_directories(buffalo-test PRIVATE include) diff --git a/include/buffalo/buffalo.h b/include/buffalo/buffalo.h index c6de39e..e48fd9a 100644 --- a/include/buffalo/buffalo.h +++ b/include/buffalo/buffalo.h @@ -1,12 +1,9 @@ #ifndef BUFFALO2_H #define BUFFALO2_H -#include +#include #include -#include #include -#include -#include #include #include #include @@ -285,14 +282,19 @@ namespace bf }; } - constexpr DefineTerminal(Associativity associativity, typename Terminal::ReasonerType reasoner = nullptr, typename G::UserDataType user_data = {}) + constexpr DefineTerminal(Associativity assoc = bf::None, typename G::UserDataType user_data = {}, typename Terminal::ReasonerType reasoner = nullptr) { - this->associativity = associativity; + this->associativity = assoc; + this->user_data = user_data; this->reasoner_ = reasoner; - this->user_data = std::move(user_data); } - constexpr DefineTerminal(typename bf::Terminal::ReasonerType reasoner = nullptr, typename G::UserDataType user_data = {}) : DefineTerminal(bf::None, reasoner, std::move(user_data)) {} + constexpr DefineTerminal(Associativity assoc, typename Terminal::ReasonerType reasoner) : DefineTerminal(assoc, {}, reasoner) {} + + constexpr DefineTerminal(typename G::UserDataType user_data) : DefineTerminal(bf::None, user_data, nullptr) {} + constexpr DefineTerminal(typename G::UserDataType user_data, typename Terminal::ReasonerType reasoner) : DefineTerminal(bf::None, user_data, reasoner) {} + + constexpr DefineTerminal(typename Terminal::ReasonerType reasoner) : DefineTerminal(bf::None, {}, reasoner) {} }; /** @@ -596,6 +598,7 @@ namespace bf [&](Terminal *terminal) { last_terminal = terminal; + this->terminals_.insert(terminal); }, [&](NonTerminal *child_nonterminal) { @@ -617,6 +620,7 @@ namespace bf Grammar(NonTerminal &start) : root(start) { this->EOS = std::make_unique>(); + this->terminals_.insert(this->EOS.get()); this->RegisterSymbols(&start); @@ -812,7 +816,7 @@ namespace bf class Parser { public: - virtual std::expected Parse(std::string_view input) = 0; + virtual std::expected Parse(std::string_view input, std::vector> *tokens) = 0; virtual ~Parser() = default; }; @@ -838,6 +842,67 @@ namespace bf ParseStackItem(lrstate_id_t state, typename G::ValueType value) : state(state), value(std::move(value)) {} }; + struct Tokenizer + { + SLRParser const &parser; + std::string_view input; + std::size_t index = 0; + + std::vector> *tokens; + + std::optional> Peek(lrstate_id_t state = 0, bool permissive = false) + { + while(this->index < this->input.size() && std::isspace(this->input[this->index])) this->index++; + + // IMPORTANT: No need to check for EOF, because it is checked for by special EOF terminal! + + if(permissive) + { + for(auto terminal : this->parser.grammar_.terminals_) + { + auto token = terminal->Lex(this->input.substr(this->index)); + if(token) + { + token->location.begin += this->index; + token->location.end += this->index; + + return token; + } + } + + // No token was matched. So we increment the index to skip this character. + index++; + } + else + { + for(auto terminal : this->parser.action_.at(state) | std::views::keys) + { + auto token = terminal->Lex(this->input.substr(this->index)); + if(token) + { + token->location.begin += this->index; + token->location.end += this->index; + + return token; + } + } + } + + return std::nullopt; + } + + void Consume(Token const &token) + { + this->index += token.Size(); + if(tokens) + { + tokens->push_back(token); + } + } + + Tokenizer(SLRParser const &parser, std::string_view input, std::vector> *tokens = nullptr) : parser(parser), input(input), tokens(tokens) {} + }; + /** * Inserts state into list if does not exist, otherwise returns the index of existing equal state. * @param state_list @@ -988,31 +1053,35 @@ namespace bf return this->grammar_; } - std::expected Parse(std::string_view input) override + std::expected Parse(std::string_view input, std::vector> *tokens = nullptr) override { + Tokenizer tokenizer(*this, input, tokens); + std::stack parse_stack; parse_stack.emplace(0); - std::size_t index = 0; - while(true) { lrstate_id_t state = parse_stack.top().state; - while(index < input.size() && std::isspace(input[index])) - { - index++; - } - - std::optional> lookahead = std::nullopt; - for(auto &[terminal, action] : this->action_[state]) - { - lookahead = terminal->Lex(input.substr(index)); - if(lookahead) break; - } - + std::optional> lookahead = tokenizer.Peek(state); if(!lookahead) { + // Permissively consume rest of tokens + if(tokens) + { + while(true) + { + lookahead = tokenizer.Peek(0, true); + if(!lookahead) continue; + if(lookahead->terminal == this->grammar_.EOS.get()) + { + break; + } + tokenizer.Consume(*lookahead); + } + } + return std::unexpected(Error{"Unexpected Token!"}); } @@ -1037,7 +1106,7 @@ namespace bf parse_stack.emplace(action.state); } - index += lookahead->Size(); + tokenizer.Consume(*lookahead); break; } diff --git a/test/buffalo.test.cpp b/test/buffalo.test.cpp index 3c74edb..201cafe 100644 --- a/test/buffalo.test.cpp +++ b/test/buffalo.test.cpp @@ -1,2 +1,108 @@ #include #include + +/* + * Grammar Definition + */ +using G = bf::GrammarDefinition; + +/* + * Terminals + */ +bf::DefineTerminal NUMBER([](auto const &tok) { + return std::stod(std::string(tok.raw)); +}); + +bf::DefineTerminal OP_EXP(bf::Right); + +bf::DefineTerminal OP_MUL(bf::Left); +bf::DefineTerminal OP_DIV(bf::Left); +bf::DefineTerminal OP_ADD(bf::Left); +bf::DefineTerminal OP_SUB(bf::Left); + +bf::DefineTerminal PAR_OPEN; +bf::DefineTerminal PAR_CLOSE; + +/* + * Non-Terminals + */ +bf::DefineNonTerminal expression + = bf::PR(NUMBER)<=>[](auto &$) { return $[0]; } + | (PAR_OPEN + expression + PAR_CLOSE)<=>[](auto &$) { return $[1]; } + | (expression + OP_EXP + expression)<=>[](auto &$) { return std::pow($[0], $[2]); } + | (expression + OP_MUL + expression)<=>[](auto &$) { return $[0] * $[2]; } + | (expression + OP_DIV + expression)<=>[](auto &$) { return $[0] / $[2]; } + | (expression + OP_ADD + expression)<=>[](auto &$) { return $[0] + $[2]; } + | (expression + OP_SUB + expression)<=>[](auto &$) { return $[0] - $[2]; } + ; + +bf::DefineNonTerminal statement + = bf::PR(expression)<=>[](auto &$) + { + return $[0]; + } + ; + +TEST(Parser, Construction) +{ + auto parser = bf::SLRParser::Build(statement); + ASSERT_TRUE(parser.has_value()); +} + +TEST(Parser, Evaluation) +{ + auto parser = *bf::SLRParser::Build(statement); + + auto res = parser.Parse("3 * 3 + 4^2 - (9 / 3)"); + ASSERT_TRUE(res.has_value()); + + ASSERT_EQ(*res, 22.0); +} + +TEST(Tokenization, Strict) +{ + auto parser = *bf::SLRParser::Build(statement); + + std::vector> tokens; + auto result = parser.Parse("3 + 5 - 2", &tokens); + + ASSERT_TRUE(result.has_value()); + + ASSERT_EQ(tokens.size(), 5); + + ASSERT_EQ(tokens[0].terminal, &NUMBER); + ASSERT_EQ(tokens[0].location.begin, 0); + + ASSERT_EQ(tokens[1].terminal, &OP_ADD); + ASSERT_EQ(tokens[1].location.begin, 2); + + ASSERT_EQ(tokens[2].terminal, &NUMBER); + ASSERT_EQ(tokens[2].location.begin, 4); + + ASSERT_EQ(tokens[3].terminal, &OP_SUB); + ASSERT_EQ(tokens[3].location.begin, 6); + + ASSERT_EQ(tokens[4].terminal, &NUMBER); + ASSERT_EQ(tokens[4].location.begin, 8); +} + +TEST(Tokenization, Permissive) +{ + auto parser = *bf::SLRParser::Build(statement); + + std::vector> tokens; + auto result = parser.Parse("3[[[+]]]&0", &tokens); + + ASSERT_FALSE(result.has_value()); + + ASSERT_EQ(tokens.size(), 3); + + ASSERT_EQ(tokens[0].terminal, &NUMBER); + ASSERT_EQ(tokens[0].location.begin, 0); + + ASSERT_EQ(tokens[1].terminal, &OP_ADD); + ASSERT_EQ(tokens[1].location.begin, 4); + + ASSERT_EQ(tokens[2].terminal, &NUMBER); + ASSERT_EQ(tokens[2].location.begin, 9); +} diff --git a/test/stmtlist.test.cpp b/test/stmtlist.test.cpp deleted file mode 100644 index 23ba309..0000000 --- a/test/stmtlist.test.cpp +++ /dev/null @@ -1,177 +0,0 @@ -/** - * Tests a subset of the `unlogic` grammar for certain properties and consistency. - */ -#include -#include - -struct Node {}; - -using ValueType = std::variant< - double, - std::string, - std::vector, - std::unique_ptr, - std::vector> ->; -using G = bf::GrammarDefinition; - -static bf::DefineTerminal KW_GIVEN; -static bf::DefineTerminal KW_CALC; -static bf::DefineTerminal KW_PLOT; - -static bf::DefineTerminal KW_ON; -static bf::DefineTerminal KW_AS; - -static bf::DefineTerminal NUMBER([](auto const &tok) -> ValueType { - return std::stod(std::string(tok.raw)); -}); - -static bf::DefineTerminal IDENTIFIER([](auto const &tok) -> ValueType { - return std::string(tok.raw); -}); - -static bf::DefineTerminal OP_EXP(bf::Right); - -static bf::DefineTerminal OP_MUL(bf::Left); -static bf::DefineTerminal OP_DIV(bf::Left); -static bf::DefineTerminal OP_ADD(bf::Left); -static bf::DefineTerminal OP_SUB(bf::Left); - -static bf::DefineTerminal OP_ASN(bf::Left); - -static bf::DefineTerminal PAR_OPEN; -static bf::DefineTerminal PAR_CLOSE; - -static bf::DefineTerminal BRK_OPEN; -static bf::DefineTerminal BRK_CLOSE; - -static bf::DefineTerminal STMT_DELIMITER; - -static bf::DefineTerminal SEPARATOR; - -static bf::DefineNonTerminal> expression - = bf::PR(NUMBER)<=>[](auto &$) -> ValueType - { - return std::make_unique(); - } - | bf::PR(IDENTIFIER)<=>[](auto &$) -> ValueType - { - return std::make_unique(); - } - | (PAR_OPEN + expression + PAR_CLOSE)<=>[](auto &$) -> ValueType - { - return std::make_unique(); - } - | (expression + OP_EXP + expression)<=>[](auto &$) -> ValueType - { - return std::make_unique(); - } - | (expression + OP_MUL + expression)<=>[](auto &$) -> ValueType - { - return std::make_unique(); - } - | (expression + OP_DIV + expression)<=>[](auto &$) -> ValueType - { - return std::make_unique(); - } - | (expression + OP_ADD + expression)<=>[](auto &$) -> ValueType - { - return std::make_unique(); - } - | (expression + OP_SUB + expression)<=>[](auto &$) -> ValueType - { - return std::make_unique(); - } - ; - -static bf::DefineNonTerminal> identifier_list - = bf::PR(IDENTIFIER)<=>[](auto &$) -> ValueType - { - return std::vector{ IDENTIFIER($[0]) }; - } - | (identifier_list + SEPARATOR + IDENTIFIER)<=>[](auto &$) -> ValueType - { - auto list = identifier_list($[0]); - list.push_back(IDENTIFIER($[2])); - - return std::move(list); - } - ; - -static bf::DefineNonTerminal> function_definition - = (KW_GIVEN + IDENTIFIER + PAR_OPEN + identifier_list + PAR_CLOSE + OP_ASN + expression)<=>[](auto &$) -> ValueType - { - return std::make_unique(); - } - | (KW_GIVEN + IDENTIFIER + PAR_OPEN + PAR_CLOSE + OP_ASN + expression)<=>[](auto &$) -> ValueType - { - return std::make_unique(); - } - ; - -static bf::DefineNonTerminal> plot_command - = (KW_PLOT + IDENTIFIER)<=>[](auto &$) -> ValueType - { - return std::make_unique(); - } - ; - -static bf::DefineNonTerminal> statement - = (function_definition + STMT_DELIMITER)<=>[](auto &$) -> ValueType - { - return std::move($[0]); - } - | (plot_command + STMT_DELIMITER)<=>[](auto &$) -> ValueType - { - return std::move($[0]); - } - ; - -static bf::DefineNonTerminal>> statement_list - = bf::PR(statement)<=>[](auto &$) -> ValueType - { - std::vector> list; - list.push_back(std::move(statement($[0]))); - - return std::move(list); - } - | (statement_list + statement)<=>[](auto &$) -> ValueType - { - auto list = statement_list($[0]); - list.push_back(std::move(statement($[1]))); - - return std::move(list); - } - ; - -static bf::DefineNonTerminal> program - = bf::PR(statement_list)<=>[](auto &$) -> ValueType - { - return std::make_unique(); - } - ; - -TEST(StmtList, FollowSet) -{ - auto parser = bf::SLRParser::Build(program); - ASSERT_TRUE(parser); - - auto &grammar = parser->GetGrammar(); - ASSERT_TRUE(grammar.NonTerminalHasFollow(function_definition, STMT_DELIMITER)); - - ASSERT_TRUE(grammar.HasNonTerminal(function_definition)); - ASSERT_TRUE(grammar.HasNonTerminal(plot_command)); - ASSERT_TRUE(grammar.HasNonTerminal(statement)); - - ASSERT_TRUE(grammar.NonTerminalHasFirst(function_definition, KW_GIVEN)); - ASSERT_TRUE(grammar.NonTerminalHasFirst(plot_command, KW_PLOT)); - - ASSERT_TRUE(grammar.NonTerminalHasFirst(statement, KW_GIVEN)); - ASSERT_TRUE(grammar.NonTerminalHasFirst(statement, KW_PLOT)); - - ASSERT_TRUE(grammar.NonTerminalHasFollow(statement_list, KW_GIVEN)); - ASSERT_TRUE(grammar.NonTerminalHasFollow(statement_list, KW_PLOT)); - - auto result = parser->Parse("given f(x) := x^2;\nplot f;"); - ASSERT_TRUE(result); -} \ No newline at end of file