Skip to content

Commit

Permalink
misc: Surface clear parsing error for invalid prestopage header (#11552)
Browse files Browse the repository at this point in the history
Summary:

During deserialization, PrestoPage header is parsed with the assumption of enough bytes in the provided input stream. This causes exceptions that are not easy to interpret when there is invalid data provided. This change validates existence of enough bytes before parsing the header and surfaces the parsing error with a clear exception message.

Reviewed By: kevinwilfong

Differential Revision: D66003722
  • Loading branch information
Abdullah Ozturk authored and facebook-github-bot committed Nov 18, 2024
1 parent aedf91c commit 07cfd2b
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 8 deletions.
28 changes: 23 additions & 5 deletions velox/serializers/PrestoSerializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,17 +220,30 @@ struct PrestoHeader {
int32_t compressedSize;
int64_t checksum;

static PrestoHeader read(ByteInputStream* source) {
static Expected<PrestoHeader> read(ByteInputStream* source) {
if (source->remainingSize() < kHeaderSize) {
return folly::makeUnexpected(Status::Invalid(
fmt::format("{} bytes for header", source->remainingSize())));
}
PrestoHeader header;
header.numRows = source->read<int32_t>();
header.pageCodecMarker = source->read<int8_t>();
header.uncompressedSize = source->read<int32_t>();
header.compressedSize = source->read<int32_t>();
header.checksum = source->read<int64_t>();

VELOX_CHECK_GE(header.numRows, 0);
VELOX_CHECK_GE(header.uncompressedSize, 0);
VELOX_CHECK_GE(header.compressedSize, 0);
if (header.numRows < 0) {
return folly::makeUnexpected(
Status::Invalid(fmt::format("negative numRows: {}", header.numRows)));
}
if (header.uncompressedSize < 0) {
return folly::makeUnexpected(Status::Invalid(fmt::format(
"negative uncompressedSize: {}", header.uncompressedSize)));
}
if (header.compressedSize < 0) {
return folly::makeUnexpected(Status::Invalid(
fmt::format("negative compressedSize: {}", header.compressedSize)));
}

return header;
}
Expand Down Expand Up @@ -4193,7 +4206,12 @@ void PrestoVectorSerde::deserialize(
const auto prestoOptions = toPrestoOptions(options);
const auto codec =
common::compressionKindToCodec(prestoOptions.compressionKind);
auto const header = PrestoHeader::read(source);
auto maybeHeader = PrestoHeader::read(source);
VELOX_CHECK(
maybeHeader.hasValue(),
fmt::format(
"PrestoPage header is invalid: {}", maybeHeader.error().message()));
auto const header = std::move(maybeHeader.value());

int64_t actualCheckSum = 0;
if (isChecksumBitSet(header.pageCodecMarker)) {
Expand Down
21 changes: 18 additions & 3 deletions velox/serializers/tests/PrestoSerializerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,11 +201,13 @@ class PrestoSerializerTest
RowVectorPtr deserialize(
const RowTypePtr& rowType,
const std::string& input,
const serializer::presto::PrestoVectorSerde::PrestoOptions*
serdeOptions) {
const serializer::presto::PrestoVectorSerde::PrestoOptions* serdeOptions,
bool skipLexer = false) {
auto byteStream = toByteStream(input);
auto paramOptions = getParamSerdeOptions(serdeOptions);
validateLexer(input, paramOptions);
if (!skipLexer) {
validateLexer(input, paramOptions);
}
RowVectorPtr result;
serde_->deserialize(
byteStream.get(), pool_.get(), rowType, &result, 0, &paramOptions);
Expand Down Expand Up @@ -838,6 +840,19 @@ TEST_P(PrestoSerializerTest, emptyPage) {
assertEqualVectors(deserialized, rowVector);
}

TEST_P(PrestoSerializerTest, invalidPage) {
auto rowVector = makeEmptyTestVector();

std::ostringstream out;
serialize(rowVector, &out, nullptr);

auto invalidPage = ""; // empty string
auto rowType = asRowType(rowVector->type());
VELOX_ASSERT_THROW(
deserialize(rowType, invalidPage, nullptr, true /*skipLexer*/),
"PrestoPage header invalid: 0 bytes for header");
}

TEST_P(PrestoSerializerTest, initMemory) {
const auto numRows = 100;
auto testFunc = [&](TypePtr type, int64_t expectedBytes) {
Expand Down

0 comments on commit 07cfd2b

Please sign in to comment.