Skip to content

Commit

Permalink
add tests and token api;
Browse files Browse the repository at this point in the history
  • Loading branch information
NateSeymour committed Nov 26, 2024
1 parent 21d2958 commit 6f2be36
Show file tree
Hide file tree
Showing 4 changed files with 200 additions and 203 deletions.
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ target_link_libraries(buffalo INTERFACE ctre::ctre)
# Tests
add_executable(buffalo-test
test/buffalo.test.cpp
test/stmtlist.test.cpp
)
target_link_libraries(buffalo-test PRIVATE buffalo GTest::gtest_main)
target_include_directories(buffalo-test PRIVATE include)
Expand Down
119 changes: 94 additions & 25 deletions include/buffalo/buffalo.h
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
#ifndef BUFFALO2_H
#define BUFFALO2_H

#include <memory>
#include <algorithm>
#include <cctype>
#include <exception>
#include <expected>
#include <format>
#include <functional>
#include <map>
#include <optional>
#include <ranges>
Expand Down Expand Up @@ -285,14 +282,19 @@ namespace bf
};
}

constexpr DefineTerminal(Associativity associativity, typename Terminal<G>::ReasonerType reasoner = nullptr, typename G::UserDataType user_data = {})
constexpr DefineTerminal(Associativity assoc = bf::None, typename G::UserDataType user_data = {}, typename Terminal<G>::ReasonerType reasoner = nullptr)
{
this->associativity = associativity;
this->associativity = assoc;
this->user_data = user_data;
this->reasoner_ = reasoner;
this->user_data = std::move(user_data);
}

constexpr DefineTerminal(typename bf::Terminal<G>::ReasonerType reasoner = nullptr, typename G::UserDataType user_data = {}) : DefineTerminal(bf::None, reasoner, std::move(user_data)) {}
constexpr DefineTerminal(Associativity assoc, typename Terminal<G>::ReasonerType reasoner) : DefineTerminal(assoc, {}, reasoner) {}

constexpr DefineTerminal(typename G::UserDataType user_data) : DefineTerminal(bf::None, user_data, nullptr) {}
constexpr DefineTerminal(typename G::UserDataType user_data, typename Terminal<G>::ReasonerType reasoner) : DefineTerminal(bf::None, user_data, reasoner) {}

constexpr DefineTerminal(typename Terminal<G>::ReasonerType reasoner) : DefineTerminal(bf::None, {}, reasoner) {}
};

/**
Expand Down Expand Up @@ -596,6 +598,7 @@ namespace bf
[&](Terminal<G> *terminal)
{
last_terminal = terminal;
this->terminals_.insert(terminal);
},
[&](NonTerminal<G> *child_nonterminal)
{
Expand All @@ -617,6 +620,7 @@ namespace bf
Grammar(NonTerminal<G> &start) : root(start)
{
this->EOS = std::make_unique<DefineTerminal<G, R"(\Z)">>();
this->terminals_.insert(this->EOS.get());

this->RegisterSymbols(&start);

Expand Down Expand Up @@ -812,7 +816,7 @@ namespace bf
class Parser
{
public:
virtual std::expected<typename G::ValueType, Error> Parse(std::string_view input) = 0;
virtual std::expected<typename G::ValueType, Error> Parse(std::string_view input, std::vector<Token<G>> *tokens) = 0;

virtual ~Parser() = default;
};
Expand All @@ -838,6 +842,67 @@ namespace bf
ParseStackItem(lrstate_id_t state, typename G::ValueType value) : state(state), value(std::move(value)) {}
};

struct Tokenizer
{
SLRParser<G> const &parser;
std::string_view input;
std::size_t index = 0;

std::vector<Token<G>> *tokens;

std::optional<Token<G>> Peek(lrstate_id_t state = 0, bool permissive = false)
{
while(this->index < this->input.size() && std::isspace(this->input[this->index])) this->index++;

// IMPORTANT: No need to check for EOF, because it is checked for by special EOF terminal!

if(permissive)
{
for(auto terminal : this->parser.grammar_.terminals_)
{
auto token = terminal->Lex(this->input.substr(this->index));
if(token)
{
token->location.begin += this->index;
token->location.end += this->index;

return token;
}
}

// No token was matched. So we increment the index to skip this character.
index++;
}
else
{
for(auto terminal : this->parser.action_.at(state) | std::views::keys)
{
auto token = terminal->Lex(this->input.substr(this->index));
if(token)
{
token->location.begin += this->index;
token->location.end += this->index;

return token;
}
}
}

return std::nullopt;
}

void Consume(Token<G> const &token)
{
this->index += token.Size();
if(tokens)
{
tokens->push_back(token);
}
}

Tokenizer(SLRParser<G> const &parser, std::string_view input, std::vector<Token<G>> *tokens = nullptr) : parser(parser), input(input), tokens(tokens) {}
};

/**
* Inserts state into list if does not exist, otherwise returns the index of existing equal state.
* @param state_list
Expand Down Expand Up @@ -988,31 +1053,35 @@ namespace bf
return this->grammar_;
}

std::expected<typename G::ValueType, Error> Parse(std::string_view input) override
std::expected<typename G::ValueType, Error> Parse(std::string_view input, std::vector<Token<G>> *tokens = nullptr) override
{
Tokenizer tokenizer(*this, input, tokens);

std::stack<ParseStackItem> parse_stack;
parse_stack.emplace(0);

std::size_t index = 0;

while(true)
{
lrstate_id_t state = parse_stack.top().state;

while(index < input.size() && std::isspace(input[index]))
{
index++;
}

std::optional<Token<G>> lookahead = std::nullopt;
for(auto &[terminal, action] : this->action_[state])
{
lookahead = terminal->Lex(input.substr(index));
if(lookahead) break;
}

std::optional<Token<G>> lookahead = tokenizer.Peek(state);
if(!lookahead)
{
// Permissively consume rest of tokens
if(tokens)
{
while(true)
{
lookahead = tokenizer.Peek(0, true);
if(!lookahead) continue;
if(lookahead->terminal == this->grammar_.EOS.get())
{
break;
}
tokenizer.Consume(*lookahead);
}
}

return std::unexpected(Error{"Unexpected Token!"});
}

Expand All @@ -1037,7 +1106,7 @@ namespace bf
parse_stack.emplace(action.state);
}

index += lookahead->Size();
tokenizer.Consume(*lookahead);
break;
}

Expand Down
106 changes: 106 additions & 0 deletions test/buffalo.test.cpp
Original file line number Diff line number Diff line change
@@ -1,2 +1,108 @@
#include <gtest/gtest.h>
#include <buffalo/buffalo.h>

/*
* Grammar Definition
*/
using G = bf::GrammarDefinition<double>;

/*
* Terminals
*/
bf::DefineTerminal<G, R"(\d+(\.\d+)?)", double> NUMBER([](auto const &tok) {
return std::stod(std::string(tok.raw));
});

bf::DefineTerminal<G, R"(\^)"> OP_EXP(bf::Right);

bf::DefineTerminal<G, R"(\*)"> OP_MUL(bf::Left);
bf::DefineTerminal<G, R"(\/)"> OP_DIV(bf::Left);
bf::DefineTerminal<G, R"(\+)"> OP_ADD(bf::Left);
bf::DefineTerminal<G, R"(\-)"> OP_SUB(bf::Left);

bf::DefineTerminal<G, R"(\()"> PAR_OPEN;
bf::DefineTerminal<G, R"(\))"> PAR_CLOSE;

/*
* Non-Terminals
*/
bf::DefineNonTerminal<G> expression
= bf::PR<G>(NUMBER)<=>[](auto &$) { return $[0]; }
| (PAR_OPEN + expression + PAR_CLOSE)<=>[](auto &$) { return $[1]; }
| (expression + OP_EXP + expression)<=>[](auto &$) { return std::pow($[0], $[2]); }
| (expression + OP_MUL + expression)<=>[](auto &$) { return $[0] * $[2]; }
| (expression + OP_DIV + expression)<=>[](auto &$) { return $[0] / $[2]; }
| (expression + OP_ADD + expression)<=>[](auto &$) { return $[0] + $[2]; }
| (expression + OP_SUB + expression)<=>[](auto &$) { return $[0] - $[2]; }
;

bf::DefineNonTerminal<G> statement
= bf::PR<G>(expression)<=>[](auto &$)
{
return $[0];
}
;

TEST(Parser, Construction)
{
auto parser = bf::SLRParser<G>::Build(statement);
ASSERT_TRUE(parser.has_value());
}

TEST(Parser, Evaluation)
{
auto parser = *bf::SLRParser<G>::Build(statement);

auto res = parser.Parse("3 * 3 + 4^2 - (9 / 3)");
ASSERT_TRUE(res.has_value());

ASSERT_EQ(*res, 22.0);
}

TEST(Tokenization, Strict)
{
auto parser = *bf::SLRParser<G>::Build(statement);

std::vector<bf::Token<G>> tokens;
auto result = parser.Parse("3 + 5 - 2", &tokens);

ASSERT_TRUE(result.has_value());

ASSERT_EQ(tokens.size(), 5);

ASSERT_EQ(tokens[0].terminal, &NUMBER);
ASSERT_EQ(tokens[0].location.begin, 0);

ASSERT_EQ(tokens[1].terminal, &OP_ADD);
ASSERT_EQ(tokens[1].location.begin, 2);

ASSERT_EQ(tokens[2].terminal, &NUMBER);
ASSERT_EQ(tokens[2].location.begin, 4);

ASSERT_EQ(tokens[3].terminal, &OP_SUB);
ASSERT_EQ(tokens[3].location.begin, 6);

ASSERT_EQ(tokens[4].terminal, &NUMBER);
ASSERT_EQ(tokens[4].location.begin, 8);
}

TEST(Tokenization, Permissive)
{
auto parser = *bf::SLRParser<G>::Build(statement);

std::vector<bf::Token<G>> tokens;
auto result = parser.Parse("3[[[+]]]&0", &tokens);

ASSERT_FALSE(result.has_value());

ASSERT_EQ(tokens.size(), 3);

ASSERT_EQ(tokens[0].terminal, &NUMBER);
ASSERT_EQ(tokens[0].location.begin, 0);

ASSERT_EQ(tokens[1].terminal, &OP_ADD);
ASSERT_EQ(tokens[1].location.begin, 4);

ASSERT_EQ(tokens[2].terminal, &NUMBER);
ASSERT_EQ(tokens[2].location.begin, 9);
}
Loading

0 comments on commit 6f2be36

Please sign in to comment.