Skip to content

Commit

Permalink
Update vendored sources to duckdb/duckdb@c21a9cb
Browse files Browse the repository at this point in the history
  • Loading branch information
krlmlr committed Sep 5, 2023
1 parent 5d6d02a commit 8ecbee2
Show file tree
Hide file tree
Showing 166 changed files with 9,202 additions and 3,174 deletions.
126 changes: 119 additions & 7 deletions src/duckdb/src/common/enum_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,18 @@
#include "duckdb/common/types/column/column_data_scan_states.hpp"
#include "duckdb/common/types/column/partitioned_column_data.hpp"
#include "duckdb/common/types/conflict_manager.hpp"
#include "duckdb/common/types/hyperloglog.hpp"
#include "duckdb/common/types/row/partitioned_tuple_data.hpp"
#include "duckdb/common/types/row/tuple_data_states.hpp"
#include "duckdb/common/types/timestamp.hpp"
#include "duckdb/common/types/vector.hpp"
#include "duckdb/common/types/vector_buffer.hpp"
#include "duckdb/execution/index/art/art.hpp"
#include "duckdb/execution/index/art/node.hpp"
#include "duckdb/execution/operator/persistent/base_csv_reader.hpp"
#include "duckdb/execution/operator/persistent/csv_reader_options.hpp"
#include "duckdb/execution/operator/scan/csv/base_csv_reader.hpp"
#include "duckdb/execution/operator/scan/csv/csv_reader_options.hpp"
#include "duckdb/execution/operator/scan/csv/csv_state_machine.hpp"
#include "duckdb/execution/operator/scan/csv/quote_rules.hpp"
#include "duckdb/function/aggregate_state.hpp"
#include "duckdb/function/function.hpp"
#include "duckdb/function/macro_function.hpp"
Expand Down Expand Up @@ -639,6 +642,64 @@ CAPIResultSetType EnumUtil::FromString<CAPIResultSetType>(const char *value) {
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
}

template<>
const char* EnumUtil::ToChars<CSVState>(CSVState value) {
switch(value) {
case CSVState::STANDARD:
return "STANDARD";
case CSVState::DELIMITER:
return "DELIMITER";
case CSVState::RECORD_SEPARATOR:
return "RECORD_SEPARATOR";
case CSVState::CARRIAGE_RETURN:
return "CARRIAGE_RETURN";
case CSVState::QUOTED:
return "QUOTED";
case CSVState::UNQUOTED:
return "UNQUOTED";
case CSVState::ESCAPE:
return "ESCAPE";
case CSVState::EMPTY_LINE:
return "EMPTY_LINE";
case CSVState::INVALID:
return "INVALID";
default:
throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
}
}

template<>
CSVState EnumUtil::FromString<CSVState>(const char *value) {
if (StringUtil::Equals(value, "STANDARD")) {
return CSVState::STANDARD;
}
if (StringUtil::Equals(value, "DELIMITER")) {
return CSVState::DELIMITER;
}
if (StringUtil::Equals(value, "RECORD_SEPARATOR")) {
return CSVState::RECORD_SEPARATOR;
}
if (StringUtil::Equals(value, "CARRIAGE_RETURN")) {
return CSVState::CARRIAGE_RETURN;
}
if (StringUtil::Equals(value, "QUOTED")) {
return CSVState::QUOTED;
}
if (StringUtil::Equals(value, "UNQUOTED")) {
return CSVState::UNQUOTED;
}
if (StringUtil::Equals(value, "ESCAPE")) {
return CSVState::ESCAPE;
}
if (StringUtil::Equals(value, "EMPTY_LINE")) {
return CSVState::EMPTY_LINE;
}
if (StringUtil::Equals(value, "INVALID")) {
return CSVState::INVALID;
}
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
}

template<>
const char* EnumUtil::ToChars<CTEMaterialize>(CTEMaterialize value) {
switch(value) {
Expand Down Expand Up @@ -2305,6 +2366,24 @@ FunctionSideEffects EnumUtil::FromString<FunctionSideEffects>(const char *value)
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
}

template<>
const char* EnumUtil::ToChars<HLLStorageType>(HLLStorageType value) {
switch(value) {
case HLLStorageType::UNCOMPRESSED:
return "UNCOMPRESSED";
default:
throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
}
}

template<>
HLLStorageType EnumUtil::FromString<HLLStorageType>(const char *value) {
if (StringUtil::Equals(value, "UNCOMPRESSED")) {
return HLLStorageType::UNCOMPRESSED;
}
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
}

template<>
const char* EnumUtil::ToChars<IndexConstraintType>(IndexConstraintType value) {
switch(value) {
Expand Down Expand Up @@ -3761,8 +3840,6 @@ const char* EnumUtil::ToChars<ParserMode>(ParserMode value) {
switch(value) {
case ParserMode::PARSING:
return "PARSING";
case ParserMode::SNIFFING_DIALECT:
return "SNIFFING_DIALECT";
case ParserMode::SNIFFING_DATATYPES:
return "SNIFFING_DATATYPES";
case ParserMode::PARSING_HEADER:
Expand All @@ -3777,9 +3854,6 @@ ParserMode EnumUtil::FromString<ParserMode>(const char *value) {
if (StringUtil::Equals(value, "PARSING")) {
return ParserMode::PARSING;
}
if (StringUtil::Equals(value, "SNIFFING_DIALECT")) {
return ParserMode::SNIFFING_DIALECT;
}
if (StringUtil::Equals(value, "SNIFFING_DATATYPES")) {
return ParserMode::SNIFFING_DATATYPES;
}
Expand All @@ -3794,6 +3868,8 @@ const char* EnumUtil::ToChars<PartitionSortStage>(PartitionSortStage value) {
switch(value) {
case PartitionSortStage::INIT:
return "INIT";
case PartitionSortStage::SCAN:
return "SCAN";
case PartitionSortStage::PREPARE:
return "PREPARE";
case PartitionSortStage::MERGE:
Expand All @@ -3810,6 +3886,9 @@ PartitionSortStage EnumUtil::FromString<PartitionSortStage>(const char *value) {
if (StringUtil::Equals(value, "INIT")) {
return PartitionSortStage::INIT;
}
if (StringUtil::Equals(value, "SCAN")) {
return PartitionSortStage::SCAN;
}
if (StringUtil::Equals(value, "PREPARE")) {
return PartitionSortStage::PREPARE;
}
Expand Down Expand Up @@ -4547,6 +4626,34 @@ QueryResultType EnumUtil::FromString<QueryResultType>(const char *value) {
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
}

template<>
const char* EnumUtil::ToChars<QuoteRule>(QuoteRule value) {
switch(value) {
case QuoteRule::QUOTES_RFC:
return "QUOTES_RFC";
case QuoteRule::QUOTES_OTHER:
return "QUOTES_OTHER";
case QuoteRule::NO_QUOTES:
return "NO_QUOTES";
default:
throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
}
}

template<>
QuoteRule EnumUtil::FromString<QuoteRule>(const char *value) {
if (StringUtil::Equals(value, "QUOTES_RFC")) {
return QuoteRule::QUOTES_RFC;
}
if (StringUtil::Equals(value, "QUOTES_OTHER")) {
return QuoteRule::QUOTES_OTHER;
}
if (StringUtil::Equals(value, "NO_QUOTES")) {
return QuoteRule::NO_QUOTES;
}
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
}

template<>
const char* EnumUtil::ToChars<RelationType>(RelationType value) {
switch(value) {
Expand Down Expand Up @@ -6009,6 +6116,8 @@ const char* EnumUtil::ToChars<VerificationType>(VerificationType value) {
return "DESERIALIZED";
case VerificationType::DESERIALIZED_V2:
return "DESERIALIZED_V2";
case VerificationType::DESERIALIZED_V2_NO_DEFAULT:
return "DESERIALIZED_V2_NO_DEFAULT";
case VerificationType::PARSED:
return "PARSED";
case VerificationType::UNOPTIMIZED:
Expand Down Expand Up @@ -6040,6 +6149,9 @@ VerificationType EnumUtil::FromString<VerificationType>(const char *value) {
if (StringUtil::Equals(value, "DESERIALIZED_V2")) {
return VerificationType::DESERIALIZED_V2;
}
if (StringUtil::Equals(value, "DESERIALIZED_V2_NO_DEFAULT")) {
return VerificationType::DESERIALIZED_V2_NO_DEFAULT;
}
if (StringUtil::Equals(value, "PARSED")) {
return VerificationType::PARSED;
}
Expand Down
10 changes: 7 additions & 3 deletions src/duckdb/src/common/extra_type_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,9 +326,11 @@ struct EnumTypeInfoTemplated : public EnumTypeInfo {
return make_shared<EnumTypeInfoTemplated>(values_insert_order, size);
}

static shared_ptr<EnumTypeInfoTemplated> FormatDeserialize(FormatDeserializer &source, uint32_t size) {
static shared_ptr<EnumTypeInfoTemplated> FormatDeserialize(FormatDeserializer &deserializer, uint32_t size) {
Vector values_insert_order(LogicalType::VARCHAR, size);
values_insert_order.FormatDeserialize(source, size);
deserializer.ReadObject(201, "values_insert_order", [&](FormatDeserializer &source) {
values_insert_order.FormatDeserialize(source, size);
});
return make_shared<EnumTypeInfoTemplated>(values_insert_order, size);
}

Expand Down Expand Up @@ -477,7 +479,9 @@ void EnumTypeInfo::Serialize(FieldWriter &writer) const {
void EnumTypeInfo::FormatSerialize(FormatSerializer &serializer) const {
ExtraTypeInfo::FormatSerialize(serializer);
serializer.WriteProperty(200, "dict_size", dict_size);
((Vector &)values_insert_order).FormatSerialize(serializer, dict_size); // NOLINT - FIXME
serializer.WriteObject(201, "values_insert_order", [&](FormatSerializer &serializer) {
((Vector &)GetValuesInsertOrder()).FormatSerialize(serializer, dict_size); // NOLINT - FIXME
});
}

} // namespace duckdb
39 changes: 8 additions & 31 deletions src/duckdb/src/common/radix_partitioning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,11 @@ struct RadixPartitioningConstants {
};

template <class OP, class RETURN_TYPE, typename... ARGS>
RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&... args) {
RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&...args) {
D_ASSERT(radix_bits <= RadixPartitioning::MAX_RADIX_BITS);
switch (radix_bits) {
case 0:
return OP::template Operation<0>(std::forward<ARGS>(args)...);
case 1:
return OP::template Operation<1>(std::forward<ARGS>(args)...);
case 2:
Expand Down Expand Up @@ -82,36 +84,6 @@ idx_t RadixPartitioning::Select(Vector &hashes, const SelectionVector *sel, idx_
return RadixBitsSwitch<SelectFunctor, idx_t>(radix_bits, hashes, sel, count, cutoff, true_sel, false_sel);
}

struct HashsToBinsFunctor {
template <idx_t radix_bits>
static void Operation(Vector &hashes, Vector &bins, idx_t count) {
using CONSTANTS = RadixPartitioningConstants<radix_bits>;
UnaryExecutor::Execute<hash_t, hash_t>(hashes, bins, count,
[&](hash_t hash) { return CONSTANTS::ApplyMask(hash); });
}
};

//===--------------------------------------------------------------------===//
// Row Data Partitioning
//===--------------------------------------------------------------------===//
template <idx_t radix_bits>
static void InitPartitions(BufferManager &buffer_manager, vector<unique_ptr<RowDataCollection>> &partition_collections,
RowDataBlock *partition_blocks[], vector<BufferHandle> &partition_handles,
data_ptr_t partition_ptrs[], idx_t block_capacity, idx_t row_width) {
using CONSTANTS = RadixPartitioningConstants<radix_bits>;

partition_collections.reserve(CONSTANTS::NUM_PARTITIONS);
partition_handles.reserve(CONSTANTS::NUM_PARTITIONS);
for (idx_t i = 0; i < CONSTANTS::NUM_PARTITIONS; i++) {
partition_collections.push_back(make_uniq<RowDataCollection>(buffer_manager, block_capacity, row_width));
partition_blocks[i] = &partition_collections[i]->CreateBlock();
partition_handles.push_back(buffer_manager.Pin(partition_blocks[i]->block));
if (partition_ptrs) {
partition_ptrs[i] = partition_handles[i].Ptr();
}
}
}

struct ComputePartitionIndicesFunctor {
template <idx_t radix_bits>
static void Operation(Vector &hashes, Vector &partition_indices, idx_t count) {
Expand All @@ -129,6 +101,7 @@ RadixPartitionedColumnData::RadixPartitionedColumnData(ClientContext &context_p,
idx_t radix_bits_p, idx_t hash_col_idx_p)
: PartitionedColumnData(PartitionedColumnDataType::RADIX, context_p, std::move(types_p)), radix_bits(radix_bits_p),
hash_col_idx(hash_col_idx_p) {
D_ASSERT(radix_bits <= RadixPartitioning::MAX_RADIX_BITS);
D_ASSERT(hash_col_idx < types.size());
const auto num_partitions = RadixPartitioning::NumberOfPartitions(radix_bits);
allocators->allocators.reserve(num_partitions);
Expand Down Expand Up @@ -173,6 +146,7 @@ RadixPartitionedTupleData::RadixPartitionedTupleData(BufferManager &buffer_manag
idx_t radix_bits_p, idx_t hash_col_idx_p)
: PartitionedTupleData(PartitionedTupleDataType::RADIX, buffer_manager, layout_p.Copy()), radix_bits(radix_bits_p),
hash_col_idx(hash_col_idx_p) {
D_ASSERT(radix_bits <= RadixPartitioning::MAX_RADIX_BITS);
D_ASSERT(hash_col_idx < layout.GetTypes().size());
const auto num_partitions = RadixPartitioning::NumberOfPartitions(radix_bits);
allocators->allocators.reserve(num_partitions);
Expand Down Expand Up @@ -215,6 +189,9 @@ void RadixPartitionedTupleData::InitializeAppendStateInternal(PartitionedTupleDa
column_ids.emplace_back(col_idx);
}
partitions[0]->InitializeAppend(state.chunk_state, std::move(column_ids));

// Initialize fixed-size map
state.fixed_partition_entries.resize(RadixPartitioning::NumberOfPartitions(radix_bits));
}

void RadixPartitionedTupleData::ComputePartitionIndices(PartitionedTupleDataAppendState &state, DataChunk &input) {
Expand Down
21 changes: 18 additions & 3 deletions src/duckdb/src/common/row_operations/row_aggregate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ void RowOperations::CombineStates(RowOperationsState &state, TupleDataLayout &la
// Move to the first aggregate states
VectorOperations::AddInPlace(sources, layout.GetAggrOffset(), count);
VectorOperations::AddInPlace(targets, layout.GetAggrOffset(), count);

// Keep track of the offset
idx_t offset = layout.GetAggrOffset();

for (auto &aggr : layout.GetAggregates()) {
D_ASSERT(aggr.function.combine);
AggregateInputData aggr_input_data(aggr.GetFunctionData(), state.allocator);
Expand All @@ -84,23 +88,34 @@ void RowOperations::CombineStates(RowOperationsState &state, TupleDataLayout &la
// Move to the next aggregate states
VectorOperations::AddInPlace(sources, aggr.payload_size, count);
VectorOperations::AddInPlace(targets, aggr.payload_size, count);

// Increment the offset
offset += aggr.payload_size;
}

// Now subtract the offset to get back to the original position
VectorOperations::AddInPlace(sources, -offset, count);
VectorOperations::AddInPlace(targets, -offset, count);
}

void RowOperations::FinalizeStates(RowOperationsState &state, TupleDataLayout &layout, Vector &addresses,
DataChunk &result, idx_t aggr_idx) {
// Copy the addresses
Vector addresses_copy(LogicalType::POINTER);
VectorOperations::Copy(addresses, addresses_copy, result.size(), 0, 0);

// Move to the first aggregate state
VectorOperations::AddInPlace(addresses, layout.GetAggrOffset(), result.size());
VectorOperations::AddInPlace(addresses_copy, layout.GetAggrOffset(), result.size());

auto &aggregates = layout.GetAggregates();
for (idx_t i = 0; i < aggregates.size(); i++) {
auto &target = result.data[aggr_idx + i];
auto &aggr = aggregates[i];
AggregateInputData aggr_input_data(aggr.GetFunctionData(), state.allocator);
aggr.function.finalize(addresses, aggr_input_data, target, result.size(), 0);
aggr.function.finalize(addresses_copy, aggr_input_data, target, result.size(), 0);

// Move to the next aggregate state
VectorOperations::AddInPlace(addresses, aggr.payload_size, result.size());
VectorOperations::AddInPlace(addresses_copy, aggr.payload_size, result.size());
}
}

Expand Down
Loading

0 comments on commit 8ecbee2

Please sign in to comment.