Skip to content

Commit

Permalink
make ParseError a proper type, now stores line and column numbers
Browse files Browse the repository at this point in the history
  • Loading branch information
matcool committed Nov 10, 2024
1 parent f6b07a2 commit f1ee1ee
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 23 deletions.
20 changes: 18 additions & 2 deletions include/matjson.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,24 @@ namespace matjson {

class Value;

// TODO: make a custom type?
using ParseError = std::string;
struct ParseError {
std::string message;
int offset = 0, line = 0, column = 0;

inline ParseError(std::string msg, int offset, int line, int column) :
message(std::move(msg)), offset(offset), line(line), column(column) {}

/// Returns a string representation of the error, useful for coercing into Result<T>
/// methods, where the error type is a string. *Do not* rely on the format of this string,
/// as it may change in the future. Instead, just access the fields directly.
inline operator std::string() const {
if (line) {
return this->message + " at line " + std::to_string(this->line) + ", column " +
std::to_string(this->column);
}
return this->message;
}
};

static constexpr int NO_INDENTATION = 0;
static constexpr int TAB_INDENTATION = -1;
Expand Down
62 changes: 44 additions & 18 deletions src/parser.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#include "impl.hpp"

#include <charconv>
#include <format>
#include <iostream>
#include <istream>
#include <matjson.hpp>
Expand All @@ -18,30 +17,51 @@ bool isWhitespace(char c) {

struct StringStream {
std::istream& stream;
int line = 1, column = 1;

auto error(std::string_view msg) const noexcept {
return Err(ParseError(std::string(msg), stream.tellg(), line, column));
}

Result<char, ParseError> take() noexcept {
char ch;
if (!stream.get(ch)) return Err("eof");
if (!stream.get(ch)) return this->error("eof");
if (ch == '\n') {
++line;
column = 1;
}
else {
++column;
}
return Ok(ch);
}

Result<std::string, ParseError> take(size_t n) {
// this is only used for constants so its fine to not count lines
std::string buffer;
buffer.resize(n);
if (!stream.read(buffer.data(), n)) return Err("eof");
if (!stream.read(buffer.data(), n)) return this->error("eof");
column += n;
return Ok(buffer);
}

Result<char, ParseError> peek() noexcept {
auto ch = stream.peek();
if (ch == EOF) return Err("eof");
if (ch == EOF) return this->error("eof");
return Ok(ch);
}

// takes until the next char is not whitespace
void skipWhitespace() noexcept {
while (stream.good() && isWhitespace(stream.peek())) {
stream.get();
char ch = stream.get();
if (ch == '\n') {
++line;
column = 1;
}
else {
++column;
}
}
}

Expand Down Expand Up @@ -71,7 +91,7 @@ Result<ValuePtr, ParseError> parseConstant(StringStream& stream) {
}
default: break;
}
return Err("invalid constant");
return stream.error("invalid constant");
}

void encodeUTF8(std::string& str, int32_t code_point) {
Expand Down Expand Up @@ -110,7 +130,7 @@ Result<std::string, ParseError> parseString(StringStream& stream) noexcept {
// char is signed, so utf8 high bit bytes will be interpreted as negative,
// could switch to unsigned char however just checking for c < 0 is easier
if (c >= 0 && c < 0x20) {
return Err("invalid string");
return stream.error("invalid string");
}
// FIXME: standard should also ignore > 0x10FFFF, however that would require decoding utf-8
if (c == '\\') {
Expand All @@ -132,7 +152,7 @@ Result<std::string, ParseError> parseString(StringStream& stream) noexcept {
return Ok(static_cast<uint32_t>(c - 'a' + 10));
else if (c >= 'A' && c <= 'F')
return Ok(static_cast<uint32_t>(c - 'A' + 10));
return Err("invalid hex");
return stream.error("invalid hex");
};
auto const takeUnicodeHex = [&]() -> Result<int32_t, ParseError> {
int32_t result = 0;
Expand All @@ -147,23 +167,23 @@ Result<std::string, ParseError> parseString(StringStream& stream) noexcept {
if (0xd800 <= value && value <= 0xdbff) {
GEODE_UNWRAP_INTO(char c, stream.take());
if (c != '\\') {
return Err("expected backslash");
return stream.error("expected backslash");
}
GEODE_UNWRAP_INTO(c, stream.take());
if (c != 'u') {
return Err("expected u");
return stream.error("expected u");
}
GEODE_UNWRAP_INTO(int32_t value2, takeUnicodeHex());
if (0xdc00 <= value2 && value2 <= 0xdfff) {
value = 0x10000 + ((value & 0x3ff) << 10) + (value2 & 0x3ff);
}
else {
return Err("invalid surrogate pair");
return stream.error("invalid surrogate pair");
}
}
encodeUTF8(str, value);
} break;
default: return Err("invalid backslash escape");
default: return stream.error("invalid backslash escape");
}
}
else {
Expand Down Expand Up @@ -205,7 +225,7 @@ Result<ValuePtr, ParseError> parseNumber(StringStream& stream) noexcept {
}
}
if (!once) {
return Err("expected digits");
return stream.error("expected digits");
}
return Ok();
};
Expand Down Expand Up @@ -245,7 +265,7 @@ Result<ValuePtr, ParseError> parseNumber(StringStream& stream) noexcept {
T value;
if (auto result = std::from_chars(buffer.data(), buffer.data() + buffer.size(), value);
result.ec != std::errc()) {
return Err("failed to parse number");
return stream.error("failed to parse number");
}
return Ok(std::make_unique<ValueImpl>(Type::Number, value));
};
Expand Down Expand Up @@ -276,12 +296,18 @@ Result<ValuePtr, ParseError> parseObject(StringStream& stream) noexcept {
if (p != '}') {
while (true) {
stream.skipWhitespace();
{
GEODE_UNWRAP_INTO(char c, stream.peek());
if (c != '"') {
return stream.error("expected string");
}
}
GEODE_UNWRAP_INTO(auto key, parseString(stream));
stream.skipWhitespace();

GEODE_UNWRAP_INTO(char s, stream.take());
if (s != ':') {
return Err("expected colon");
return stream.error("expected colon");
}

GEODE_UNWRAP_INTO(auto value, parseElement(stream));
Expand All @@ -296,7 +322,7 @@ Result<ValuePtr, ParseError> parseObject(StringStream& stream) noexcept {
break;
}
else {
return Err("expected member");
return stream.error("expected comma");
}
}
}
Expand All @@ -323,7 +349,7 @@ Result<ValuePtr, ParseError> parseArray(StringStream& stream) noexcept {
break;
}
else {
return Err("expected value");
return stream.error("expected value");
}
}
}
Expand Down Expand Up @@ -356,7 +382,7 @@ Result<ValuePtr, ParseError> parseValue(StringStream& stream) noexcept {
case '7':
case '8':
case '9': return parseNumber(stream);
default: return Err("invalid value");
default: return stream.error("invalid value");
}
}

Expand Down
2 changes: 1 addition & 1 deletion test/manual.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ struct Foo {
Bar bar;
};

Result<void, std::string> fancyMain(int argc, char const* argv[]) {
Result<void> fancyMain(int argc, char const* argv[]) {
{
Foo bar{"John", 25, 1.75};
matjson::Value json = bar;
Expand Down
15 changes: 13 additions & 2 deletions test/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,8 @@ TEST_CASE("Invalid json") {

// Very invalid
using namespace std::string_view_literals;
REQUIRE(matjson::parse("[\"hi\x00the\"]"sv).isErrAnd([](auto err) {
return std::string(err) == "invalid string";
REQUIRE(matjson::parse("[\"hi\x00the\"]"sv).isErrAnd([](auto const& err) {
return err.message == "invalid string";
}));
}

Expand Down Expand Up @@ -359,3 +359,14 @@ TEST_CASE("Implicit ctors") {
CoolStruct b{};
value["a"] = b;
}

TEST_CASE("ParseError line numbers") {
auto err = matjson::parse("{").unwrapErr();
REQUIRE(err.line == 1);
REQUIRE(err.column == 2);

err = matjson::parse("{\n\"hello").unwrapErr();

REQUIRE(err.line == 2);
REQUIRE(err.column == 7);
}

0 comments on commit f1ee1ee

Please sign in to comment.