Skip to content

Commit

Permalink
chore: Update vendored sources to duckdb/duckdb@35dfcc0 (#504)
Browse files Browse the repository at this point in the history
allow external cardinality information (e.g. from iceberg) (duckdb/duckdb#14292)

Co-authored-by: krlmlr <krlmlr@users.noreply.github.com>
  • Loading branch information
github-actions[bot] and krlmlr authored Oct 17, 2024
1 parent 62ff37d commit 27f699f
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 7 deletions.
1 change: 1 addition & 0 deletions src/duckdb/extension/parquet/include/parquet_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ struct ParquetOptions {

MultiFileReaderOptions file_options;
vector<ParquetColumnDefinition> schema;
idx_t explicit_cardinality = 0;

public:
void Serialize(Serializer &serializer) const;
Expand Down
16 changes: 12 additions & 4 deletions src/duckdb/extension/parquet/parquet_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ struct ParquetReadBindData : public TableFunctionData {
// These come from the initial_reader, but need to be stored in case the initial_reader is removed by a filter
idx_t initial_file_cardinality;
idx_t initial_file_row_groups;
idx_t explicit_cardinality = 0; // can be set to inject exterior cardinality knowledge (e.g. from a data lake)
ParquetOptions parquet_options;

MultiFileReaderBindData reader_bind;

void Initialize(shared_ptr<ParquetReader> reader) {
Expand Down Expand Up @@ -395,6 +395,7 @@ class ParquetScanFunction {
table_function.named_parameters["file_row_number"] = LogicalType::BOOLEAN;
table_function.named_parameters["debug_use_openssl"] = LogicalType::BOOLEAN;
table_function.named_parameters["compression"] = LogicalType::VARCHAR;
table_function.named_parameters["explicit_cardinality"] = LogicalType::UBIGINT;
table_function.named_parameters["schema"] =
LogicalType::MAP(LogicalType::INTEGER, LogicalType::STRUCT({{{"name", LogicalType::VARCHAR},
{"type", LogicalType::VARCHAR},
Expand Down Expand Up @@ -545,7 +546,11 @@ class ParquetScanFunction {
result->reader_bind = result->multi_file_reader->BindReader<ParquetReader>(
context, result->types, result->names, *result->file_list, *result, parquet_options);
}

if (parquet_options.explicit_cardinality) {
auto file_count = result->file_list->GetTotalFileCount();
result->explicit_cardinality = parquet_options.explicit_cardinality;
result->initial_file_cardinality = result->explicit_cardinality / (file_count ? file_count : 1);
}
if (return_types.empty()) {
// no expected types - just copy the types
return_types = result->types;
Expand Down Expand Up @@ -618,6 +623,8 @@ class ParquetScanFunction {

// cannot be combined with hive_partitioning=true, so we disable auto-detection
parquet_options.file_options.auto_detect_hive_partitioning = false;
} else if (loption == "explicit_cardinality") {
parquet_options.explicit_cardinality = UBigIntValue::Get(kv.second);
} else if (loption == "encryption_config") {
parquet_options.encryption_config = ParquetEncryptionConfig::Create(context, kv.second);
}
Expand Down Expand Up @@ -847,12 +854,13 @@ class ParquetScanFunction {

static unique_ptr<NodeStatistics> ParquetCardinality(ClientContext &context, const FunctionData *bind_data) {
auto &data = bind_data->Cast<ParquetReadBindData>();

if (data.explicit_cardinality) {
return make_uniq<NodeStatistics>(data.explicit_cardinality);
}
auto file_list_cardinality_estimate = data.file_list->GetCardinality(context);
if (file_list_cardinality_estimate) {
return file_list_cardinality_estimate;
}

return make_uniq<NodeStatistics>(MaxValue(data.initial_file_cardinality, (idx_t)1) *
data.file_list->GetTotalFileCount());
}
Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/src/function/table/version/pragma_version.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef DUCKDB_PATCH_VERSION
#define DUCKDB_PATCH_VERSION "2-dev186"
#define DUCKDB_PATCH_VERSION "2-dev192"
#endif
#ifndef DUCKDB_MINOR_VERSION
#define DUCKDB_MINOR_VERSION 1
Expand All @@ -8,10 +8,10 @@
#define DUCKDB_MAJOR_VERSION 1
#endif
#ifndef DUCKDB_VERSION
#define DUCKDB_VERSION "v1.1.2-dev186"
#define DUCKDB_VERSION "v1.1.2-dev192"
#endif
#ifndef DUCKDB_SOURCE_ID
#define DUCKDB_SOURCE_ID "92e0964376"
#define DUCKDB_SOURCE_ID "35dfcc06e6"
#endif
#include "duckdb/function/table/system_functions.hpp"
#include "duckdb/main/database.hpp"
Expand Down

0 comments on commit 27f699f

Please sign in to comment.