Skip to content

Commit

Permalink
test(scan): Add raw database format snapshots to the scanner (#8075)
Browse files Browse the repository at this point in the history
* Make some scanner storage methods more flexible

* Move tests to a submodule and expose test functions and constants

* Make scanner functions clearer and easier to use

* Simplify state snapshot test code

* Add raw data snapshot tests for the scanner

* Add snapshots

* Fix import path

* Fix import conditional compilation

* fix imports

* fix imports 2

* Put read and write db exports together

* Remove confusing IntoDisk/FromDisk impl

* Fix an incorrect unused method that could panic

* Delete a test that is no longer valid

---------

Co-authored-by: Alfredo Garcia <oxarbitrage@gmail.com>
  • Loading branch information
teor2345 and oxarbitrage authored Dec 12, 2023
1 parent 5bdad1b commit 3318eaa
Show file tree
Hide file tree
Showing 38 changed files with 703 additions and 253 deletions.
3 changes: 3 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -5885,8 +5885,11 @@ dependencies = [
"ff",
"group",
"indexmap 2.1.0",
"insta",
"itertools 0.12.0",
"jubjub",
"proptest",
"proptest-derive",
"rand 0.8.5",
"semver 1.0.20",
"serde",
Expand Down
33 changes: 31 additions & 2 deletions zebra-scan/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,21 @@ categories = ["cryptography::cryptocurrencies"]

# Production features that activate extra dependencies, or extra features in dependencies

# Test features

proptest-impl = [
"proptest",
"proptest-derive",
"zebra-state/proptest-impl",
"zebra-chain/proptest-impl",
"bls12_381",
"ff",
"group",
"jubjub",
"rand",
"zcash_note_encryption",
]

[dependencies]

color-eyre = "0.6.2"
Expand All @@ -37,15 +52,29 @@ zebra-state = { path = "../zebra-state", version = "1.0.0-beta.31", features = [

chrono = { version = "0.4.31", default-features = false, features = ["clock", "std", "serde"] }

# test feature proptest-impl
proptest = { version = "1.4.0", optional = true }
proptest-derive = { version = "0.4.0", optional = true }

bls12_381 = { version = "0.8.0", optional = true }
ff = { version = "0.13.0", optional = true }
group = { version = "0.13.0", optional = true }
jubjub = { version = "0.10.0", optional = true }
rand = { version = "0.8.5", optional = true }
zcash_note_encryption = { version = "0.4.0", optional = true }

[dev-dependencies]

insta = { version = "1.33.0", features = ["ron", "redactions"] }
tokio = { version = "1.34.0", features = ["test-util"] }

proptest = "1.4.0"
proptest-derive = "0.4.0"
bls12_381 = "0.8.0"
ff = "0.13.0"
group = "0.13.0"
jubjub = "0.10.0"
rand = "0.8.5"
tokio = { version = "1.34.0", features = ["test-util"] }

zcash_note_encryption = "0.4.0"

zebra-state = { path = "../zebra-state", version = "1.0.0-beta.31", features = ["proptest-impl"] }
Expand Down
4 changes: 2 additions & 2 deletions zebra-scan/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ pub mod init;
pub mod scan;
pub mod storage;

#[cfg(test)]
mod tests;
#[cfg(any(test, feature = "proptest-impl"))]
pub mod tests;

pub use config::Config;
pub use init::{init, spawn_init};
6 changes: 3 additions & 3 deletions zebra-scan/src/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,8 @@ pub async fn scan_height_and_store_results(
let dfvk_res = scanned_block_to_db_result(dfvk_res);
let ivk_res = scanned_block_to_db_result(ivk_res);

storage.add_sapling_results(sapling_key.clone(), height, dfvk_res);
storage.add_sapling_results(sapling_key, height, ivk_res);
storage.add_sapling_results(&sapling_key, height, dfvk_res);
storage.add_sapling_results(&sapling_key, height, ivk_res);

Ok::<_, Report>(())
})
Expand Down Expand Up @@ -398,7 +398,7 @@ fn scanned_block_to_db_result<Nf>(
.map(|tx| {
(
TransactionIndex::from_usize(tx.index),
SaplingScannedResult::from(tx.txid.as_ref()),
SaplingScannedResult::from_bytes_in_display_order(*tx.txid.as_ref()),
)
})
.collect()
Expand Down
13 changes: 10 additions & 3 deletions zebra-scan/src/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,13 @@ impl Storage {
///
/// This method can block while writing database files, so it must be inside spawn_blocking()
/// in async code.
pub fn add_sapling_key(&mut self, sapling_key: &SaplingScanningKey, birthday: Option<Height>) {
pub fn add_sapling_key(
&mut self,
sapling_key: &SaplingScanningKey,
birthday: impl Into<Option<Height>>,
) {
let birthday = birthday.into();

// It's ok to write some keys and not others during shutdown, so each key can get its own
// batch. (They will be re-written on startup anyway.)
let mut batch = ScannerWriteBatch::default();
Expand All @@ -93,15 +99,16 @@ impl Storage {
self.sapling_keys_and_birthday_heights()
}

/// Add the sapling results for `height` to the storage.
/// Add the sapling results for `height` to the storage. The results can be any map of
/// [`TransactionIndex`] to [`SaplingScannedResult`].
///
/// # Performance / Hangs
///
/// This method can block while writing database files, so it must be inside spawn_blocking()
/// in async code.
pub fn add_sapling_results(
&mut self,
sapling_key: SaplingScanningKey,
sapling_key: &SaplingScanningKey,
height: Height,
sapling_results: BTreeMap<TransactionIndex, SaplingScannedResult>,
) {
Expand Down
3 changes: 3 additions & 0 deletions zebra-scan/src/storage/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ pub use zebra_state::{

pub mod sapling;

#[cfg(test)]
mod tests;

/// The directory name used to distinguish the scanner database from Zebra's other databases or
/// flat files.
///
Expand Down
4 changes: 3 additions & 1 deletion zebra-scan/src/storage/db/sapling.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,15 @@ impl Storage {
// Reading Sapling database entries

/// Returns the result for a specific database index (key, block height, transaction index).
/// Returns `None` if the result is missing or an empty marker for a birthday or progress
/// height.
//
// TODO: add tests for this method
pub fn sapling_result_for_index(
&self,
index: &SaplingScannedDatabaseIndex,
) -> Option<SaplingScannedResult> {
self.db.zs_get(&self.sapling_tx_ids_cf(), &index)
self.db.zs_get(&self.sapling_tx_ids_cf(), &index).flatten()
}

/// Returns the results for a specific key and block height.
Expand Down
3 changes: 3 additions & 0 deletions zebra-scan/src/storage/db/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
//! General scanner database tests.
mod snapshot;
164 changes: 164 additions & 0 deletions zebra-scan/src/storage/db/tests/snapshot.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
//! Raw data snapshot tests for the scanner database format.
//!
//! These tests check:
//! - the name of each column family
//! - the number of key-value entries
//! - the bytes in each key and value
//!
//! These tests currently use fixed test vectors.
//!
//! # Fixing Test Failures
//!
//! If this test fails, run:
//! ```sh
//! cd zebra-scan
//! cargo insta test --review --features shielded-scan
//! ```
//! to update the test snapshots, then commit the `test_*.snap` files using git.
//!
//! # Snapshot Format
//!
//! These snapshots use [RON (Rusty Object Notation)](https://github.com/ron-rs/ron#readme),
//! a text format similar to Rust syntax. Raw byte data is encoded in hexadecimal.
//!
//! Due to `serde` limitations, some object types can't be represented exactly,
//! so RON uses the closest equivalent structure.
use std::{collections::BTreeMap, sync::Arc};

use zebra_chain::{
block::{Block, Height},
parameters::Network::{self, *},
serialization::ZcashDeserializeInto,
};
use zebra_state::{RawBytes, ReadDisk, TransactionIndex, KV};

use crate::{
storage::{db::ScannerDb, Storage},
tests::{FAKE_SAPLING_VIEWING_KEY, ZECPAGES_SAPLING_VIEWING_KEY},
Config,
};

/// Snapshot test for RocksDB column families, and their key-value data.
///
/// These snapshots contain the `default` column family, but it is not used by Zebra.
#[test]
fn test_raw_rocksdb_column_families() {
let _init_guard = zebra_test::init();

test_raw_rocksdb_column_families_with_network(Mainnet);
test_raw_rocksdb_column_families_with_network(Testnet);
}

/// Snapshot raw column families for `network`.
///
/// See [`test_raw_rocksdb_column_families`].
fn test_raw_rocksdb_column_families_with_network(network: Network) {
let mut net_suffix = network.to_string();
net_suffix.make_ascii_lowercase();

let mut storage = Storage::new(&Config::ephemeral(), network);

// Snapshot the column family names
let mut cf_names = storage.db.list_cf().expect("empty database is valid");

// The order that RocksDB returns column families is irrelevant,
// because we always access them by name.
cf_names.sort();

// Assert that column family names are the same, regardless of the network.
// Later, we check they are also the same regardless of the block height.
insta::assert_ron_snapshot!("column_family_names", cf_names);

// Assert that empty databases are the same, regardless of the network.
let mut settings = insta::Settings::clone_current();

settings.set_snapshot_suffix("empty");
settings.bind(|| snapshot_raw_rocksdb_column_family_data(&storage.db, &cf_names));

// Snapshot a birthday that is automatically set to activation height
storage.add_sapling_key(&ZECPAGES_SAPLING_VIEWING_KEY.to_string(), None);
// Snapshot a birthday above activation height
storage.add_sapling_key(&FAKE_SAPLING_VIEWING_KEY.to_string(), Height(1_000_000));

settings.set_snapshot_suffix(format!("{net_suffix}_keys"));
settings.bind(|| snapshot_raw_rocksdb_column_family_data(&storage.db, &cf_names));

// Snapshot raw database data for:
// - mainnet and testnet
// - genesis, block 1, and block 2
let blocks = match network {
Mainnet => &*zebra_test::vectors::CONTINUOUS_MAINNET_BLOCKS,
Testnet => &*zebra_test::vectors::CONTINUOUS_TESTNET_BLOCKS,
};

// We limit the number of blocks, because the serialized data is a few kilobytes per block.
for height in 0..=2 {
let block: Arc<Block> = blocks
.get(&height)
.expect("block height has test data")
.zcash_deserialize_into()
.expect("test data deserializes");

// Fake results from the first few blocks
storage.add_sapling_results(
&ZECPAGES_SAPLING_VIEWING_KEY.to_string(),
Height(height),
block
.transactions
.iter()
.enumerate()
.map(|(index, tx)| (TransactionIndex::from_usize(index), tx.hash().into()))
.collect(),
);

let mut settings = insta::Settings::clone_current();
settings.set_snapshot_suffix(format!("{net_suffix}_{height}"));

settings.bind(|| snapshot_raw_rocksdb_column_family_data(&storage.db, &cf_names));
}
}

/// Snapshot the data in each column family, using `cargo insta` and RON serialization.
fn snapshot_raw_rocksdb_column_family_data(db: &ScannerDb, original_cf_names: &[String]) {
let mut new_cf_names = db.list_cf().expect("empty database is valid");
new_cf_names.sort();

// Assert that column family names are the same, regardless of the network or block height.
assert_eq!(
original_cf_names, new_cf_names,
"unexpected extra column families",
);

let mut empty_column_families = Vec::new();

// Now run the data snapshots
for cf_name in original_cf_names {
let cf_handle = db
.cf_handle(cf_name)
.expect("RocksDB API provides correct names");

// Correctness: Multi-key iteration causes hangs in concurrent code, but seems ok in tests.
let cf_items: BTreeMap<RawBytes, RawBytes> = db.zs_items_in_range_ordered(&cf_handle, ..);

// The default raw data serialization is very verbose, so we hex-encode the bytes.
let cf_data: Vec<KV> = cf_items
.iter()
.map(|(key, value)| KV::new(key.raw_bytes(), value.raw_bytes()))
.collect();

if cf_name == "default" {
assert_eq!(cf_data.len(), 0, "default column family is never used");
} else if cf_data.is_empty() {
// distinguish column family names from empty column families
empty_column_families.push(format!("{cf_name}: no entries"));
} else {
// The note commitment tree snapshots will change if the trees do not have cached roots.
// But we expect them to always have cached roots,
// because those roots are used to populate the anchor column families.
insta::assert_ron_snapshot!(format!("{cf_name}_raw_data"), cf_data);
}
}

insta::assert_ron_snapshot!("empty_column_families", empty_column_families);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
source: zebra-scan/src/storage/db/tests/snapshot.rs
expression: cf_names
---
[
"default",
"sapling_tx_ids",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
source: zebra-scan/src/storage/db/tests/snapshot.rs
expression: empty_column_families
---
[
"sapling_tx_ids: no entries",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: zebra-scan/src/storage/db/tests/snapshot.rs
expression: empty_column_families
---
[]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: zebra-scan/src/storage/db/tests/snapshot.rs
expression: empty_column_families
---
[]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: zebra-scan/src/storage/db/tests/snapshot.rs
expression: empty_column_families
---
[]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: zebra-scan/src/storage/db/tests/snapshot.rs
expression: empty_column_families
---
[]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: zebra-scan/src/storage/db/tests/snapshot.rs
expression: empty_column_families
---
[]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: zebra-scan/src/storage/db/tests/snapshot.rs
expression: empty_column_families
---
[]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: zebra-scan/src/storage/db/tests/snapshot.rs
expression: empty_column_families
---
[]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: zebra-scan/src/storage/db/tests/snapshot.rs
expression: empty_column_families
---
[]
Loading

0 comments on commit 3318eaa

Please sign in to comment.