Skip to content

Commit

Permalink
chore: Update vendored sources to duckdb/duckdb@d8a69cc
Browse files Browse the repository at this point in the history
Merge pull request duckdb/duckdb#13372 from Mytherin/walwriteblocks
  • Loading branch information
krlmlr committed Aug 11, 2024
1 parent 54f6176 commit 8fce032
Show file tree
Hide file tree
Showing 50 changed files with 717 additions and 131 deletions.
5 changes: 5 additions & 0 deletions src/duckdb/src/common/enum_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7841,6 +7841,8 @@ const char* EnumUtil::ToChars<WALType>(WALType value) {
return "DELETE_TUPLE";
case WALType::UPDATE_TUPLE:
return "UPDATE_TUPLE";
case WALType::ROW_GROUP_DATA:
return "ROW_GROUP_DATA";
case WALType::WAL_VERSION:
return "WAL_VERSION";
case WALType::CHECKPOINT:
Expand Down Expand Up @@ -7923,6 +7925,9 @@ WALType EnumUtil::FromString<WALType>(const char *value) {
if (StringUtil::Equals(value, "UPDATE_TUPLE")) {
return WALType::UPDATE_TUPLE;
}
if (StringUtil::Equals(value, "ROW_GROUP_DATA")) {
return WALType::ROW_GROUP_DATA;
}
if (StringUtil::Equals(value, "WAL_VERSION")) {
return WALType::WAL_VERSION;
}
Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/src/function/table/version/pragma_version.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef DUCKDB_PATCH_VERSION
#define DUCKDB_PATCH_VERSION "1-dev4052"
#define DUCKDB_PATCH_VERSION "1-dev4072"
#endif
#ifndef DUCKDB_MINOR_VERSION
#define DUCKDB_MINOR_VERSION 0
Expand All @@ -8,10 +8,10 @@
#define DUCKDB_MAJOR_VERSION 1
#endif
#ifndef DUCKDB_VERSION
#define DUCKDB_VERSION "v1.0.1-dev4052"
#define DUCKDB_VERSION "v1.0.1-dev4072"
#endif
#ifndef DUCKDB_SOURCE_ID
#define DUCKDB_SOURCE_ID "f5ab7c167e"
#define DUCKDB_SOURCE_ID "d8a69cc656"
#endif
#include "duckdb/function/table/system_functions.hpp"
#include "duckdb/main/database.hpp"
Expand Down
1 change: 1 addition & 0 deletions src/duckdb/src/include/duckdb/common/enums/wal_type.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ enum class WALType : uint8_t {
INSERT_TUPLE = 26,
DELETE_TUPLE = 27,
UPDATE_TUPLE = 28,
ROW_GROUP_DATA = 29,
// -----------------------------
// Flush
// -----------------------------
Expand Down
2 changes: 2 additions & 0 deletions src/duckdb/src/include/duckdb/main/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ struct DBConfigOptions {
bool initialize_in_main_thread = false;
//! The maximum number of schemas we will look through for "did you mean..." style errors in the catalog
idx_t catalog_error_max_schemas = 100;
//! Whether or not to always write to the WAL file, even if this is not required
bool debug_skip_checkpoint_on_commit = false;

bool operator==(const DBConfigOptions &other) const;
};
Expand Down
9 changes: 9 additions & 0 deletions src/duckdb/src/include/duckdb/main/settings.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,15 @@ struct DebugForceNoCrossProduct {
static Value GetSetting(const ClientContext &context);
};

struct DebugSkipCheckpointOnCommit {
static constexpr const char *Name = "debug_skip_checkpoint_on_commit";
static constexpr const char *Description = "DEBUG SETTING: skip checkpointing on commit";
static constexpr const LogicalTypeId InputType = LogicalTypeId::BOOLEAN;
static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &parameter);
static void ResetGlobal(DatabaseInstance *db, DBConfig &config);
static Value GetSetting(const ClientContext &context);
};

struct OrderedAggregateThreshold {
static constexpr const char *Name = "ordered_aggregate_threshold"; // NOLINT
static constexpr const char *Description = // NOLINT
Expand Down
2 changes: 2 additions & 0 deletions src/duckdb/src/include/duckdb/storage/block_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ class BlockManager {
virtual bool IsRootBlock(MetaBlockPointer root) = 0;
//! Mark a block as "free"; free blocks are immediately added to the free list and can be immediately overwritten
virtual void MarkBlockAsFree(block_id_t block_id) = 0;
//! Mark a block as "used"; either the block is removed from the free list, or the reference count is incremented
virtual void MarkBlockAsUsed(block_id_t block_id) = 0;
//! Mark a block as "modified"; modified blocks are added to the free list after a checkpoint (i.e. their data is
//! assumed to be rewritten)
virtual void MarkBlockAsModified(block_id_t block_id) = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ class RowGroupWriter {

CompressionType GetColumnCompressionType(idx_t i);

virtual void WriteColumnDataPointers(ColumnCheckpointState &column_checkpoint_state, Serializer &serializer) = 0;

virtual CheckpointType GetCheckpointType() const = 0;
virtual MetadataWriter &GetPayloadWriter() = 0;

Expand All @@ -51,8 +49,6 @@ class SingleFileRowGroupWriter : public RowGroupWriter {
TableDataWriter &writer, MetadataWriter &table_data_writer);

public:
void WriteColumnDataPointers(ColumnCheckpointState &column_checkpoint_state, Serializer &serializer) override;

CheckpointType GetCheckpointType() const override;
MetadataWriter &GetPayloadWriter() override;

Expand Down
12 changes: 10 additions & 2 deletions src/duckdb/src/include/duckdb/storage/data_pointer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,19 @@ struct ColumnSegmentState {
DynamicCastCheck<TARGET>(this);
return reinterpret_cast<const TARGET &>(*this);
}

public:
vector<block_id_t> blocks;
};

struct DataPointer {
explicit DataPointer(BaseStatistics stats) : statistics(std::move(stats)) {
}
explicit DataPointer(BaseStatistics stats);
// disable copy constructors
DataPointer(const DataPointer &other) = delete;
DataPointer &operator=(const DataPointer &) = delete;
//! enable move constructors
DUCKDB_API DataPointer(DataPointer &&other) noexcept;
DUCKDB_API DataPointer &operator=(DataPointer &&) noexcept;

uint64_t row_start;
uint64_t tuple_count;
Expand Down
4 changes: 2 additions & 2 deletions src/duckdb/src/include/duckdb/storage/data_table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ class DataTable {
//! Commit the append
void CommitAppend(transaction_t commit_id, idx_t row_start, idx_t count);
//! Write a segment of the table to the WAL
void WriteToLog(WriteAheadLog &log, idx_t row_start, idx_t count);
void WriteToLog(WriteAheadLog &log, idx_t row_start, idx_t count, optional_ptr<StorageCommitState> commit_state);
//! Revert a set of appends made by the given AppendState, used to revert appends in the event of an error during
//! commit (e.g. because of an I/O exception)
void RevertAppend(idx_t start_row, idx_t count);
Expand All @@ -158,7 +158,7 @@ class DataTable {
void ScanTableSegment(idx_t start_row, idx_t count, const std::function<void(DataChunk &chunk)> &function);

//! Merge a row group collection directly into this table - appending it to the end of the table without copying
void MergeStorage(RowGroupCollection &data, TableIndexList &indexes);
void MergeStorage(RowGroupCollection &data, TableIndexList &indexes, optional_ptr<StorageCommitState> commit_state);

//! Append a chunk with the row ids [row_start, ..., row_start + chunk.size()] to all indexes of the table, returns
//! whether or not the append succeeded
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ class InMemoryBlockManager : public BlockManager {
void MarkBlockAsFree(block_id_t block_id) override {
throw InternalException("Cannot perform IO in in-memory database - MarkBlockAsFree!");
}
void MarkBlockAsUsed(block_id_t block_id) override {
throw InternalException("Cannot perform IO in in-memory database - MarkBlockAsUsed!");
}
void MarkBlockAsModified(block_id_t block_id) override {
throw InternalException("Cannot perform IO in in-memory database - MarkBlockAsModified!");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ class SingleFileBlockManager : public BlockManager {
bool IsRootBlock(MetaBlockPointer root) override;
//! Mark a block as free (immediately re-writeable)
void MarkBlockAsFree(block_id_t block_id) override;
//! Mark a block as used (no longer re-writeable)
void MarkBlockAsUsed(block_id_t block_id) override;
//! Mark a block as modified (re-writeable after a checkpoint)
void MarkBlockAsModified(block_id_t block_id) override;
//! Increase the reference count of a block. The block should hold at least one reference
Expand Down Expand Up @@ -98,6 +100,8 @@ class SingleFileBlockManager : public BlockManager {
vector<MetadataHandle> GetFreeListBlocks();
void TrimFreeBlocks();

void IncreaseBlockReferenceCountInternal(block_id_t block_id);

private:
AttachedDatabase &db;
//! The active DatabaseHeader, either 0 (h1) or 1 (h2)
Expand Down
8 changes: 8 additions & 0 deletions src/duckdb/src/include/duckdb/storage/storage_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class CheckpointWriter;
class DatabaseInstance;
class TransactionManager;
class TableCatalogEntry;
struct PersistentCollectionData;

class StorageCommitState {
public:
Expand All @@ -35,6 +36,11 @@ class StorageCommitState {
virtual void RevertCommit() = 0;
// Make the commit persistent
virtual void FlushCommit() = 0;

virtual void AddRowGroupData(DataTable &table, idx_t start_index, idx_t count,
unique_ptr<PersistentCollectionData> row_group_data) = 0;
virtual optional_ptr<PersistentCollectionData> GetRowGroupData(DataTable &table, idx_t start_index,
idx_t &count) = 0;
};

struct CheckpointOptions {
Expand Down Expand Up @@ -94,6 +100,7 @@ class StorageManager {
virtual DatabaseSize GetDatabaseSize() = 0;
virtual vector<MetadataBlockInfo> GetMetadataInfo() = 0;
virtual shared_ptr<TableIOManager> GetTableIOManager(BoundCreateTableInfo *info) = 0;
virtual BlockManager &GetBlockManager() = 0;

protected:
virtual void LoadDatabase(const optional_idx block_alloc_size) = 0;
Expand Down Expand Up @@ -143,6 +150,7 @@ class SingleFileStorageManager : public StorageManager {
DatabaseSize GetDatabaseSize() override;
vector<MetadataBlockInfo> GetMetadataInfo() override;
shared_ptr<TableIOManager> GetTableIOManager(BoundCreateTableInfo *info) override;
BlockManager &GetBlockManager() override;

protected:
void LoadDatabase(const optional_idx block_alloc_size) override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ class ArrayColumnData : public ColumnData {
PartialBlockManager &partial_block_manager) override;
unique_ptr<ColumnCheckpointState> Checkpoint(RowGroup &row_group, ColumnCheckpointInfo &info) override;

void DeserializeColumn(Deserializer &source, BaseStatistics &target_stats) override;
bool IsPersistent() override;
PersistentColumnData Serialize() override;
void InitializeColumn(PersistentColumnData &column_data, BaseStatistics &target_stats) override;

void GetColumnSegmentInfo(duckdb::idx_t row_group_index, vector<duckdb::idx_t> col_path,
vector<duckdb::ColumnSegmentInfo> &result) override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ struct ColumnCheckpointState {
virtual unique_ptr<BaseStatistics> GetStatistics();

virtual void FlushSegment(unique_ptr<ColumnSegment> segment, idx_t segment_size);
virtual void WriteDataPointers(RowGroupWriter &writer, Serializer &serializer);
virtual PersistentColumnData ToPersistentData();

public:
template <class TARGET>
Expand Down
66 changes: 65 additions & 1 deletion src/duckdb/src/include/duckdb/storage/table/column_data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "duckdb/storage/table/column_segment_tree.hpp"
#include "duckdb/common/mutex.hpp"
#include "duckdb/common/enums/scan_vector_type.hpp"
#include "duckdb/common/serializer/serialization_traits.hpp"

namespace duckdb {
class ColumnData;
Expand All @@ -31,6 +32,7 @@ struct PrefetchState;
struct RowGroupWriteInfo;
struct TableScanOptions;
struct TransactionData;
struct PersistentColumnData;

struct ColumnCheckpointInfo {
ColumnCheckpointInfo(RowGroupWriteInfo &info, idx_t column_idx) : info(info), column_idx(column_idx) {
Expand Down Expand Up @@ -147,7 +149,12 @@ class ColumnData {
virtual void CheckpointScan(ColumnSegment &segment, ColumnScanState &state, idx_t row_group_start, idx_t count,
Vector &scan_vector);

virtual void DeserializeColumn(Deserializer &deserializer, BaseStatistics &target_stats);
virtual bool IsPersistent();
vector<DataPointer> GetDataPointers();

virtual PersistentColumnData Serialize();
void InitializeColumn(PersistentColumnData &column_data);
virtual void InitializeColumn(PersistentColumnData &column_data, BaseStatistics &target_stats);
static shared_ptr<ColumnData> Deserialize(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
idx_t start_row, ReadStream &source, const LogicalType &type);

Expand Down Expand Up @@ -203,4 +210,61 @@ class ColumnData {
idx_t allocation_size;
};

struct PersistentColumnData {
explicit PersistentColumnData(PhysicalType physical_type);
PersistentColumnData(PhysicalType physical_type, vector<DataPointer> pointers);
// disable copy constructors
PersistentColumnData(const PersistentColumnData &other) = delete;
PersistentColumnData &operator=(const PersistentColumnData &) = delete;
//! enable move constructors
PersistentColumnData(PersistentColumnData &&other) noexcept = default;
PersistentColumnData &operator=(PersistentColumnData &&) noexcept = default;
~PersistentColumnData();

PhysicalType physical_type;
vector<DataPointer> pointers;
vector<PersistentColumnData> child_columns;

void Serialize(Serializer &serializer) const;
static PersistentColumnData Deserialize(Deserializer &deserializer);
void DeserializeField(Deserializer &deserializer, field_id_t field_idx, const char *field_name,
const LogicalType &type);
};

struct PersistentRowGroupData {
explicit PersistentRowGroupData(vector<LogicalType> types);
PersistentRowGroupData() = default;
// disable copy constructors
PersistentRowGroupData(const PersistentRowGroupData &other) = delete;
PersistentRowGroupData &operator=(const PersistentRowGroupData &) = delete;
//! enable move constructors
PersistentRowGroupData(PersistentRowGroupData &&other) noexcept = default;
PersistentRowGroupData &operator=(PersistentRowGroupData &&) noexcept = default;
~PersistentRowGroupData() = default;

vector<LogicalType> types;
vector<PersistentColumnData> column_data;
idx_t start;
idx_t count;

void Serialize(Serializer &serializer) const;
static PersistentRowGroupData Deserialize(Deserializer &deserializer);
};

struct PersistentCollectionData {
PersistentCollectionData() = default;
// disable copy constructors
PersistentCollectionData(const PersistentCollectionData &other) = delete;
PersistentCollectionData &operator=(const PersistentCollectionData &) = delete;
//! enable move constructors
PersistentCollectionData(PersistentCollectionData &&other) noexcept = default;
PersistentCollectionData &operator=(PersistentCollectionData &&) noexcept = default;
~PersistentCollectionData() = default;

vector<PersistentRowGroupData> row_group_data;

void Serialize(Serializer &serializer) const;
static PersistentCollectionData Deserialize(Deserializer &deserializer);
};

} // namespace duckdb
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ class ColumnSegment : public SegmentBase<ColumnSegment> {
//! Updates pointers to refer to the given block and offset. This is only used
//! when sharing a block among segments. This is invoked only AFTER the block is written.
void MarkAsPersistent(shared_ptr<BlockHandle> block, uint32_t offset_in_block);
//! Gets a data pointer from a persistent column segment
DataPointer GetDataPointer();

block_id_t GetBlockId() {
D_ASSERT(segment_type == ColumnSegmentType::PERSISTENT);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ class ListColumnData : public ColumnData {
PartialBlockManager &partial_block_manager) override;
unique_ptr<ColumnCheckpointState> Checkpoint(RowGroup &row_group, ColumnCheckpointInfo &info) override;

void DeserializeColumn(Deserializer &deserializer, BaseStatistics &target_stats) override;
bool IsPersistent() override;
PersistentColumnData Serialize() override;
void InitializeColumn(PersistentColumnData &column_data, BaseStatistics &target_stats) override;

void GetColumnSegmentInfo(duckdb::idx_t row_group_index, vector<duckdb::idx_t> col_path,
vector<duckdb::ColumnSegmentInfo> &result) override;
Expand Down
7 changes: 7 additions & 0 deletions src/duckdb/src/include/duckdb/storage/table/row_group.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ class TableStatistics;
struct ColumnSegmentInfo;
class Vector;
struct ColumnCheckpointState;
struct PersistentColumnData;
struct PersistentRowGroupData;
struct RowGroupPointer;
struct TransactionData;
class CollectionScanState;
Expand All @@ -44,6 +46,7 @@ struct RowGroupAppendState;
class MetadataManager;
class RowVersionManager;
class ScanFilterInfo;
class StorageCommitState;

struct RowGroupWriteInfo {
RowGroupWriteInfo(PartialBlockManager &manager, const vector<CompressionType> &compression_types,
Expand All @@ -68,6 +71,7 @@ class RowGroup : public SegmentBase<RowGroup> {
public:
RowGroup(RowGroupCollection &collection, idx_t start, idx_t count);
RowGroup(RowGroupCollection &collection, RowGroupPointer pointer);
RowGroup(RowGroupCollection &collection, PersistentRowGroupData &data);
~RowGroup();

private:
Expand Down Expand Up @@ -139,6 +143,8 @@ class RowGroup : public SegmentBase<RowGroup> {
idx_t GetCommittedRowCount();
RowGroupWriteData WriteToDisk(RowGroupWriter &writer);
RowGroupPointer Checkpoint(RowGroupWriteData write_data, RowGroupWriter &writer, TableStatistics &global_stats);
bool IsPersistent() const;
PersistentRowGroupData SerializeRowGroupInfo() const;

void InitializeAppend(RowGroupAppendState &append_state);
void Append(RowGroupAppendState &append_state, DataChunk &chunk, idx_t append_count);
Expand All @@ -152,6 +158,7 @@ class RowGroup : public SegmentBase<RowGroup> {

void MergeStatistics(idx_t column_idx, const BaseStatistics &other);
void MergeIntoStatistics(idx_t column_idx, BaseStatistics &other);
void MergeIntoStatistics(TableStatistics &other);
unique_ptr<BaseStatistics> GetStatistics(idx_t column_idx);

void GetColumnSegmentInfo(idx_t row_group_index, vector<ColumnSegmentInfo> &result);
Expand Down
Loading

0 comments on commit 8fce032

Please sign in to comment.