Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

merge queue: embarking main (290ccf2) and #7731 together #7775

Closed
wants to merge 18 commits into from
125 changes: 82 additions & 43 deletions zebra-state/src/service/finalized_state/disk_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use std::{
use itertools::Itertools;
use rlimit::increase_nofile_limit;

use rocksdb::ReadOptions;
use zebra_chain::parameters::Network;

use crate::{
Expand Down Expand Up @@ -194,7 +195,7 @@ pub trait ReadDisk {
fn zs_first_key_value<C, K, V>(&self, cf: &C) -> Option<(K, V)>
where
C: rocksdb::AsColumnFamilyRef,
K: FromDisk,
K: IntoDisk + FromDisk,
V: FromDisk;

/// Returns the highest key in `cf`, and the corresponding value.
Expand All @@ -203,7 +204,7 @@ pub trait ReadDisk {
fn zs_last_key_value<C, K, V>(&self, cf: &C) -> Option<(K, V)>
where
C: rocksdb::AsColumnFamilyRef,
K: FromDisk,
K: IntoDisk + FromDisk,
V: FromDisk;

/// Returns the first key greater than or equal to `lower_bound` in `cf`,
Expand Down Expand Up @@ -321,34 +322,22 @@ impl ReadDisk for DiskDb {
fn zs_first_key_value<C, K, V>(&self, cf: &C) -> Option<(K, V)>
where
C: rocksdb::AsColumnFamilyRef,
K: FromDisk,
K: IntoDisk + FromDisk,
V: FromDisk,
{
// Reading individual values from iterators does not seem to cause database hangs.
self.db
.iterator_cf(cf, rocksdb::IteratorMode::Start)
.next()?
.map(|(key_bytes, value_bytes)| {
Some((K::from_bytes(key_bytes), V::from_bytes(value_bytes)))
})
.expect("unexpected database failure")
self.zs_range_iter(cf, .., false).next()
}

#[allow(clippy::unwrap_in_result)]
fn zs_last_key_value<C, K, V>(&self, cf: &C) -> Option<(K, V)>
where
C: rocksdb::AsColumnFamilyRef,
K: FromDisk,
K: IntoDisk + FromDisk,
V: FromDisk,
{
// Reading individual values from iterators does not seem to cause database hangs.
self.db
.iterator_cf(cf, rocksdb::IteratorMode::End)
.next()?
.map(|(key_bytes, value_bytes)| {
Some((K::from_bytes(key_bytes), V::from_bytes(value_bytes)))
})
.expect("unexpected database failure")
self.zs_range_iter(cf, .., true).next()
}

#[allow(clippy::unwrap_in_result)]
Expand All @@ -358,17 +347,8 @@ impl ReadDisk for DiskDb {
K: IntoDisk + FromDisk,
V: FromDisk,
{
let lower_bound = lower_bound.as_bytes();
let from = rocksdb::IteratorMode::From(lower_bound.as_ref(), rocksdb::Direction::Forward);

// Reading individual values from iterators does not seem to cause database hangs.
self.db
.iterator_cf(cf, from)
.next()?
.map(|(key_bytes, value_bytes)| {
Some((K::from_bytes(key_bytes), V::from_bytes(value_bytes)))
})
.expect("unexpected database failure")
self.zs_range_iter(cf, lower_bound.., false).next()
}

#[allow(clippy::unwrap_in_result)]
Expand All @@ -378,17 +358,8 @@ impl ReadDisk for DiskDb {
K: IntoDisk + FromDisk,
V: FromDisk,
{
let upper_bound = upper_bound.as_bytes();
let from = rocksdb::IteratorMode::From(upper_bound.as_ref(), rocksdb::Direction::Reverse);

// Reading individual values from iterators does not seem to cause database hangs.
self.db
.iterator_cf(cf, from)
.next()?
.map(|(key_bytes, value_bytes)| {
Some((K::from_bytes(key_bytes), V::from_bytes(value_bytes)))
})
.expect("unexpected database failure")
self.zs_range_iter(cf, ..=upper_bound, true).next()
}

fn zs_items_in_range_ordered<C, K, V, R>(&self, cf: &C, range: R) -> BTreeMap<K, V>
Expand All @@ -398,7 +369,7 @@ impl ReadDisk for DiskDb {
V: FromDisk,
R: RangeBounds<K>,
{
self.zs_range_iter(cf, range).collect()
self.zs_range_iter(cf, range, false).collect()
}

fn zs_items_in_range_unordered<C, K, V, R>(&self, cf: &C, range: R) -> HashMap<K, V>
Expand All @@ -408,7 +379,7 @@ impl ReadDisk for DiskDb {
V: FromDisk,
R: RangeBounds<K>,
{
self.zs_range_iter(cf, range).collect()
self.zs_range_iter(cf, range, false).collect()
}
}

Expand All @@ -430,15 +401,24 @@ impl DiskWriteBatch {
impl DiskDb {
/// Returns an iterator over the items in `cf` in `range`.
///
/// Accepts a `reverse` argument. If it is `true`, creates the iterator with an
/// [`IteratorMode`](rocksdb::IteratorMode) of [`End`](rocksdb::IteratorMode::End), or
/// [`From`](rocksdb::IteratorMode::From) with [`Direction::Reverse`](rocksdb::Direction::Reverse).
///
/// Holding this iterator open might delay block commit transactions.
pub fn zs_range_iter<C, K, V, R>(&self, cf: &C, range: R) -> impl Iterator<Item = (K, V)> + '_
pub fn zs_range_iter<C, K, V, R>(
&self,
cf: &C,
range: R,
reverse: bool,
) -> impl Iterator<Item = (K, V)> + '_
where
C: rocksdb::AsColumnFamilyRef,
K: IntoDisk + FromDisk,
V: FromDisk,
R: RangeBounds<K>,
{
self.zs_range_iter_with_direction(cf, range, false)
self.zs_range_iter_with_direction(cf, range, reverse)
}

/// Returns a reverse iterator over the items in `cf` in `range`.
Expand Down Expand Up @@ -495,11 +475,12 @@ impl DiskDb {
let range = (start_bound, end_bound);

let mode = Self::zs_iter_mode(&range, reverse);
let opts = Self::zs_iter_opts(&range);

// Reading multiple items from iterators has caused database hangs,
// in previous RocksDB versions
self.db
.iterator_cf(cf, mode)
.iterator_cf_opt(cf, opts, mode)
.map(|result| result.expect("unexpected database failure"))
.map(|(key, value)| (key.to_vec(), value))
// Skip excluded "from" bound and empty ranges. The `mode` already skips keys
Expand All @@ -514,6 +495,64 @@ impl DiskDb {
.map(|(key, value)| (K::from_bytes(key), V::from_bytes(value)))
}

/// Returns the RocksDB ReadOptions with a lower and upper bound for a range.
fn zs_iter_opts<R>(range: &R) -> ReadOptions
where
R: RangeBounds<Vec<u8>>,
{
let mut opts = ReadOptions::default();
let (lower_bound, upper_bound) = Self::zs_iter_bounds(range);

if let Some(bound) = lower_bound {
opts.set_iterate_lower_bound(bound);
};

if let Some(bound) = upper_bound {
opts.set_iterate_upper_bound(bound);
};

opts
}

/// Returns a lower and upper iterate bounds for a range.
///
/// Note: Since upper iterate bounds are always exclusive in RocksDB, this method
/// will increment the upper bound by 1 if the end bound of the provided range
/// is inclusive.
fn zs_iter_bounds<R>(range: &R) -> (Option<Vec<u8>>, Option<Vec<u8>>)
where
R: RangeBounds<Vec<u8>>,
{
use std::ops::Bound::*;

let lower_bound = match range.start_bound() {
Included(bound) | Excluded(bound) => Some(bound.clone()),
Unbounded => None,
};

let upper_bound = match range.end_bound().cloned() {
Included(mut bound) => {
// Increment the last byte in the upper bound that is less than u8::MAX, and
// clear any bytes after it to increment the next key in lexicographic order
// (next big-endian number) this Vec represents to RocksDB.
let is_wrapped_overflow = bound.iter_mut().rev().all(|v| {
*v = v.wrapping_add(1);
v == &0
});

if is_wrapped_overflow {
bound.insert(0, 0x01)
}

Some(bound)
}
Excluded(bound) => Some(bound),
Unbounded => None,
};

(lower_bound, upper_bound)
}

/// Returns the RocksDB iterator "from" mode for `range`.
///
/// RocksDB iterators are ordered by increasing key bytes by default.
Expand Down
47 changes: 47 additions & 0 deletions zebra-state/src/service/finalized_state/disk_db/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,50 @@ impl DiskDb {
rocksdb::DB::list_cf(&opts, path)
}
}

/// Check that zs_iter_opts returns an upper bound one greater than provided inclusive end bounds.
#[test]
fn zs_iter_opts_increments_key_by_one() {
let _init_guard = zebra_test::init();

// TODO: add an empty key (`()` type or `[]` when serialized) test case
let keys: [u32; 14] = [
0,
1,
200,
255,
256,
257,
65535,
65536,
65537,
16777215,
16777216,
16777217,
16777218,
u32::MAX,
];

for key in keys {
let (_, bytes) = DiskDb::zs_iter_bounds(&..=key.to_be_bytes().to_vec());
let mut extra_bytes = bytes.expect("there should be an upper bound");
let bytes = extra_bytes.split_off(extra_bytes.len() - 4);
let upper_bound = u32::from_be_bytes(bytes.clone().try_into().expect("should be 4 bytes"));
let expected_upper_bound = key.wrapping_add(1);

assert_eq!(
expected_upper_bound, upper_bound,
"the upper bound should be 1 greater than the original key"
);

if expected_upper_bound == 0 {
assert_eq!(
extra_bytes,
vec![1],
"there should be an extra byte with a value of 1"
);
} else {
assert_eq!(extra_bytes.len(), 0, "there should be no extra bytes");
}
}
}
12 changes: 6 additions & 6 deletions zebra-state/src/service/finalized_state/zebra_db/shielded.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ impl ZebraDb {
R: std::ops::RangeBounds<Height>,
{
let sapling_trees = self.db.cf_handle("sapling_note_commitment_tree").unwrap();
self.db.zs_range_iter(&sapling_trees, range)
self.db.zs_range_iter(&sapling_trees, range, false)
}

/// Returns the Sapling note commitment trees in the reversed range, in decreasing height order.
Expand Down Expand Up @@ -282,7 +282,7 @@ impl ZebraDb {
if let Some(exclusive_end_bound) = exclusive_end_bound {
list = self
.db
.zs_range_iter(&sapling_subtrees, start_index..exclusive_end_bound)
.zs_range_iter(&sapling_subtrees, start_index..exclusive_end_bound, false)
.collect();
} else {
// If there is no end bound, just return all the trees.
Expand All @@ -291,7 +291,7 @@ impl ZebraDb {
// the trees run out.)
list = self
.db
.zs_range_iter(&sapling_subtrees, start_index..)
.zs_range_iter(&sapling_subtrees, start_index.., false)
.collect();
}

Expand Down Expand Up @@ -382,7 +382,7 @@ impl ZebraDb {
R: std::ops::RangeBounds<Height>,
{
let orchard_trees = self.db.cf_handle("orchard_note_commitment_tree").unwrap();
self.db.zs_range_iter(&orchard_trees, range)
self.db.zs_range_iter(&orchard_trees, range, false)
}

/// Returns the Orchard note commitment trees in the reversed range, in decreasing height order.
Expand Down Expand Up @@ -455,7 +455,7 @@ impl ZebraDb {
if let Some(exclusive_end_bound) = exclusive_end_bound {
list = self
.db
.zs_range_iter(&orchard_subtrees, start_index..exclusive_end_bound)
.zs_range_iter(&orchard_subtrees, start_index..exclusive_end_bound, false)
.collect();
} else {
// If there is no end bound, just return all the trees.
Expand All @@ -464,7 +464,7 @@ impl ZebraDb {
// the trees run out.)
list = self
.db
.zs_range_iter(&orchard_subtrees, start_index..)
.zs_range_iter(&orchard_subtrees, start_index.., false)
.collect();
}

Expand Down
Loading