Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

clp-s: Report exactly where parsing error occurs when parsing JSON (fixes #514). #503

Merged
merged 2 commits into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 32 additions & 10 deletions components/core/src/clp_s/JsonParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ bool JsonParser::parse() {

if (simdjson::error_code::SUCCESS != json_file_iterator.get_error()) {
SPDLOG_ERROR(
"Encountered error - {} - while trying to parse {}",
"Encountered error - {} - while trying to parse {} after parsing 0 bytes",
simdjson::error_message(json_file_iterator.get_error()),
file_path
);
Expand All @@ -439,7 +439,8 @@ bool JsonParser::parse() {
simdjson::ondemand::document_stream::iterator json_it;

m_num_messages = 0;
size_t last_num_bytes_consumed = 0;
size_t bytes_consumed_up_to_prev_archive = 0;
size_t bytes_consumed_up_to_prev_record = 0;
while (json_file_iterator.get_json(json_it)) {
m_current_schema.clear();

Expand All @@ -450,39 +451,60 @@ bool JsonParser::parse() {
// that this isn't a valid JSON document but they get set in different situations so we
// need to check both here.
if (is_scalar_result.error() || true == is_scalar_result.value()) {
SPDLOG_ERROR("Encountered non-json-object while trying to parse {}", file_path);
SPDLOG_ERROR(
"Encountered non-json-object while trying to parse {} after parsing {} "
"bytes",
file_path,
bytes_consumed_up_to_prev_record
);
m_archive_writer->close();
return false;
}

// Some errors from simdjson are latent until trying to access invalid JSON fields.
// Instead of checking for an error every time we access a JSON field in parse_line we
// just catch simdjson_error here instead.
try {
parse_line(ref.value(), -1, "");
} catch (simdjson::simdjson_error& error) {
SPDLOG_ERROR(
"Encountered error - {} - while trying to parse {} after parsing {} bytes",
error.what(),
file_path,
bytes_consumed_up_to_prev_record
);
m_archive_writer->close();
return false;
}
parse_line(ref.value(), -1, "");
m_num_messages++;

int32_t current_schema_id = m_archive_writer->add_schema(m_current_schema);
m_current_parsed_message.set_id(current_schema_id);
m_archive_writer
->append_message(current_schema_id, m_current_schema, m_current_parsed_message);

bytes_consumed_up_to_prev_record = json_file_iterator.get_num_bytes_consumed();
if (m_archive_writer->get_data_size() >= m_target_encoded_size) {
size_t num_bytes_read = json_file_iterator.get_num_bytes_consumed();
m_archive_writer->increment_uncompressed_size(
num_bytes_read - last_num_bytes_consumed
bytes_consumed_up_to_prev_record - bytes_consumed_up_to_prev_archive
);
last_num_bytes_consumed = num_bytes_read;
bytes_consumed_up_to_prev_archive = bytes_consumed_up_to_prev_record;
split_archive();
}

m_current_parsed_message.clear();
}

m_archive_writer->increment_uncompressed_size(
json_file_iterator.get_num_bytes_read() - last_num_bytes_consumed
json_file_iterator.get_num_bytes_read() - bytes_consumed_up_to_prev_archive
);

if (simdjson::error_code::SUCCESS != json_file_iterator.get_error()) {
SPDLOG_ERROR(
"Encountered error - {} - while trying to parse {}",
"Encountered error - {} - while trying to parse {} after parsing {} bytes",
simdjson::error_message(json_file_iterator.get_error()),
file_path
file_path,
bytes_consumed_up_to_prev_record
);
m_archive_writer->close();
return false;
Expand Down
1 change: 1 addition & 0 deletions components/core/src/clp_s/JsonParser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class JsonParser {
* @param line the JSON line
* @param parent_node_id the parent node id
* @param key the key of the node
* @throw simdjson::simdjson_error when encountering invalid fields while parsing line
*/
void parse_line(ondemand::value line, int32_t parent_node_id, std::string const& key);

Expand Down
Loading