Skip to content

Commit

Permalink
Add options and code to disable all preloading of .sst files on DB::O…
Browse files Browse the repository at this point in the history
…pen() (#33)
  • Loading branch information
matthewvon authored Aug 20, 2021
1 parent bad4c41 commit d583bfc
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 11 deletions.
16 changes: 11 additions & 5 deletions db/version_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ class VersionBuilder::Rep {
// kLevel0 -- NewestFirstBySeqNo
// kLevelNon0 -- BySmallestKey
struct FileComparator {
enum SortMethod { kLevel0 = 0, kLevelNon0 = 1, } sort_method;
enum SortMethod {
kLevel0 = 0,
kLevelNon0 = 1,
} sort_method;
const InternalKeyComparator* internal_comparator;

FileComparator() : internal_comparator(nullptr) {}
Expand Down Expand Up @@ -880,7 +883,7 @@ class VersionBuilder::Rep {
auto added_end = added_files.end();
while (added_iter != added_end || base_iter != base_end) {
if (base_iter == base_end ||
(added_iter != added_end && cmp(*added_iter, *base_iter))) {
(added_iter != added_end && cmp(*added_iter, *base_iter))) {
MaybeAddFile(vstorage, level, *added_iter++);
} else {
MaybeAddFile(vstorage, level, *base_iter++);
Expand All @@ -901,11 +904,15 @@ class VersionBuilder::Rep {
size_t max_file_size_for_l0_meta_pin) {
assert(table_cache_ != nullptr);

if (kFilePreloadDisabled == ioptions_->file_preload) {
return Status::OK();
}
size_t table_cache_capacity = table_cache_->get_cache()->GetCapacity();
bool always_load = (table_cache_capacity == TableCache::kInfiniteCapacity);
size_t max_load = port::kMaxSizet;
#ifndef NDEBUG
bool debug_override = true; // to enable CompactedDB related tests and some property tests
bool debug_override =
true; // to enable CompactedDB related tests and some property tests
#else
bool debug_override = false;
#endif
Expand Down Expand Up @@ -989,9 +996,8 @@ class VersionBuilder::Rep {
} else {
table_cache_->ReleaseHandle(file_meta->table_reader_handle);
file_meta->table_reader_handle = nullptr;
} // else
} // else
}

}
});

Expand Down
6 changes: 4 additions & 2 deletions db/version_edit_handler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,8 @@ Status VersionEditHandler::MaybeCreateVersion(const VersionEdit& /*edit*/,
// Install new version
v->PrepareApply(
*cfd->GetLatestMutableCFOptions(),
!(version_set_->db_options_->skip_stats_update_on_db_open));
!(version_set_->db_options_->skip_stats_update_on_db_open ||
kFilePreloadDisabled == cfd->ioptions()->file_preload));
version_set_->AppendVersion(cfd, v);
} else {
delete v;
Expand Down Expand Up @@ -667,7 +668,8 @@ Status VersionEditHandlerPointInTime::MaybeCreateVersion(
if (s.ok()) {
version->PrepareApply(
*cfd->GetLatestMutableCFOptions(),
!version_set_->db_options_->skip_stats_update_on_db_open);
!(version_set_->db_options_->skip_stats_update_on_db_open ||
kFilePreloadDisabled == cfd->ioptions()->file_preload));
auto v_iter = versions_.find(cfd->GetID());
if (v_iter != versions_.end()) {
delete v_iter->second;
Expand Down
26 changes: 26 additions & 0 deletions include/rocksdb/advanced_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,25 @@ enum CompactionPri : char {
kMinOverlappingRatio = 0x3,
};

// RocksDB uses the first 25% of num_open_files for precaching during
// start-up and after compactions. The files precached in this fashion
// provide faster access. However, these files are also never released.
// Scenarios that have large bloom filters not cached or scenarios where
// user is manually lowering the num_open_files at runtime might want
// to disable this behavior.
enum FilePreload : char {
// RocksDB uses the first 25% of num_open_files for precaching during
// start-up and after compactions. The files precached in this fashion
// provide faster access. However, these files are also never released.
kFilePreloadWithPinning = 0x0,
// RocksDB uses the first 25% of num_open_files for precaching during
// start-up and after compactions. No pinning within cache, so access
// has one additional layer of indirection. But cache space can free.
kFilePreloadWithoutPinning = 0x1,
// RocksDB does not open existing table files during start-up.
kFilePreloadDisabled = 0x2,
};

struct CompactionOptionsFIFO {
// once the total sum of table files reaches this, we will delete the oldest
// table file
Expand Down Expand Up @@ -691,6 +710,13 @@ struct AdvancedColumnFamilyOptions {
// Default: true
bool force_consistency_checks = true;

// RocksDB can preload and optionally pin table files within the table
// cache at start-up and after compactions. The files precached in this
// fashion provide faster access. However, these files are also never
// released from the table cache.
// Default: kFilePreloadWithPinning
FilePreload file_preload = kFilePreloadWithPinning;

// Measure IO stats in compactions and flushes, if true.
//
// Default: false
Expand Down
2 changes: 1 addition & 1 deletion include/rocksdb/write_batch.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ class WriteBatch : public WriteBatchBase {
Status PopSavePoint() override;

void setContentFlag(uint32_t theContentFlag) {
content_flags_.store(theContentFlag,std::memory_order_seq_cst);
content_flags_.store(theContentFlag, std::memory_order_seq_cst);
}

// Support for iterating over the contents of a batch.
Expand Down
11 changes: 8 additions & 3 deletions options/cf_options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,10 @@ static std::unordered_map<std::string, OptionTypeInfo>
{"compaction_measure_io_stats",
{0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
OptionTypeFlags::kNone}},
{"file_preload",
{offset_of(&ColumnFamilyOptions::file_preload),
OptionType::kFilePreload, OptionVerificationType::kNormal,
OptionTypeFlags::kNone}},
{"inplace_update_support",
{offset_of(&ColumnFamilyOptions::inplace_update_support),
OptionType::kBoolean, OptionVerificationType::kNormal,
Expand Down Expand Up @@ -841,6 +845,7 @@ ImmutableCFOptions::ImmutableCFOptions(const ImmutableDBOptions& db_options,
num_levels(cf_options.num_levels),
optimize_filters_for_hits(cf_options.optimize_filters_for_hits),
force_consistency_checks(cf_options.force_consistency_checks),
file_preload(cf_options.file_preload),
allow_ingest_behind(db_options.allow_ingest_behind),
preserve_deletes(db_options.preserve_deletes),
listeners(db_options.listeners),
Expand Down Expand Up @@ -869,9 +874,9 @@ uint64_t MultiplyCheckOverflow(uint64_t op1, double op2) {
// when level_compaction_dynamic_level_bytes is true and leveled compaction
// is used, the base level is not always L1, so precomupted max_file_size can
// no longer be used. Recompute file_size_for_level from base level.
uint64_t MaxFileSizeForLevel(const MutableCFOptions& cf_options,
int level, CompactionStyle compaction_style, int base_level,
bool level_compaction_dynamic_level_bytes) {
uint64_t MaxFileSizeForLevel(const MutableCFOptions& cf_options, int level,
CompactionStyle compaction_style, int base_level,
bool level_compaction_dynamic_level_bytes) {
if (!level_compaction_dynamic_level_bytes || level < base_level ||
compaction_style != kCompactionStyleLevel) {
assert(level >= 0);
Expand Down
2 changes: 2 additions & 0 deletions options/cf_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ struct ImmutableCFOptions {

bool force_consistency_checks;

FilePreload file_preload;

bool allow_ingest_behind;

bool preserve_deletes;
Expand Down

0 comments on commit d583bfc

Please sign in to comment.