From 30a034a89229fdd2d6a44795455bbdf8b5b390a2 Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Fri, 8 Dec 2023 16:38:45 -0300 Subject: [PATCH 01/21] start scanner where it was left --- zebra-scan/src/scan.rs | 40 ++++++------- zebra-scan/src/storage.rs | 24 +++++++- zebra-scan/src/storage/db.rs | 12 +++- zebra-scan/src/storage/db/sapling.rs | 59 +++++++++++++++++++ .../finalized_state/disk_format/scan.rs | 3 +- 5 files changed, 112 insertions(+), 26 deletions(-) diff --git a/zebra-scan/src/scan.rs b/zebra-scan/src/scan.rs index 0db045232be..8c5f7a02acc 100644 --- a/zebra-scan/src/scan.rs +++ b/zebra-scan/src/scan.rs @@ -54,7 +54,7 @@ const INITIAL_WAIT: Duration = Duration::from_secs(15); const CHECK_INTERVAL: Duration = Duration::from_secs(30); /// We log an info log with progress after this many blocks. -const INFO_LOG_INTERVAL: u32 = 100_000; +const INFO_LOG_INTERVAL: u32 = 1_000; /// Start a scan task that reads blocks from `state`, scans them with the configured keys in /// `storage`, and then writes the results to `storage`. @@ -68,17 +68,17 @@ pub async fn start( // Read keys from the storage on disk, which can block async execution. let key_storage = storage.clone(); - let key_birthdays = tokio::task::spawn_blocking(move || key_storage.sapling_keys()) + let key_heights = tokio::task::spawn_blocking(move || key_storage.sapling_keys_last_heights()) .wait_for_panics() .await; - let key_birthdays = Arc::new(key_birthdays); + let key_heights = Arc::new(key_heights); // Parse and convert keys once, then use them to scan all blocks. // There is some cryptography here, but it should be fast even with thousands of keys. let parsed_keys: HashMap< SaplingScanningKey, (Vec, Vec), - > = key_birthdays + > = key_heights .keys() .map(|key| { let parsed_keys = sapling_key_to_scan_block_keys(key, network)?; @@ -96,7 +96,7 @@ pub async fn start( state.clone(), chain_tip_change.clone(), storage.clone(), - key_birthdays.clone(), + key_heights.clone(), parsed_keys.clone(), ) .await?; @@ -125,7 +125,7 @@ pub async fn scan_height_and_store_results( mut state: State, chain_tip_change: ChainTipChange, storage: Storage, - key_birthdays: Arc>, + key_last_scanned_heights: Arc>, parsed_keys: Arc< HashMap, Vec)>, >, @@ -138,17 +138,6 @@ pub async fn scan_height_and_store_results( let is_info_log = height == storage.min_sapling_birthday_height() || height.0 % INFO_LOG_INTERVAL == 0; - // TODO: add debug logs? - if is_info_log { - info!( - "Scanning the blockchain: now at block {:?}, current tip {:?}", - height, - chain_tip_change - .latest_chain_tip() - .best_tip_height_and_hash(), - ); - } - // Get a block from the state. // We can't use ServiceExt::oneshot() here, because it causes lifetime errors in init(). let block = state @@ -168,24 +157,29 @@ pub async fn scan_height_and_store_results( // Scan it with all the keys. // // TODO: scan each key in parallel (after MVP?) - for (key_num, (sapling_key, birthday_height)) in key_birthdays.iter().enumerate() { + for (key_num, (sapling_key, last_scanned_height)) in key_last_scanned_heights.iter().enumerate() + { + // Only scan what was not scanned for each key + if height <= *last_scanned_height { + continue; + } + // # Security // // We can't log `sapling_key` here because it is a private viewing key. Anyone who reads // the logs could use the key to view those transactions. if is_info_log { info!( - "Scanning the blockchain for key {}, started at block {:?}", - key_num, birthday_height, + "Scanning the blockchain for key {}, started at block {:?}, now at block {:?}, current tip {:?}", + key_num, last_scanned_height, + height, + chain_tip_change.latest_chain_tip().best_tip_height_and_hash() ); } // Get the pre-parsed keys for this configured key. let (dfvks, ivks) = parsed_keys.get(sapling_key).cloned().unwrap_or_default(); - // Scan the block, which blocks async execution until the scan is complete. - // - // TODO: skip scanning before birthday height (#8022) let sapling_key = sapling_key.clone(); let block = block.clone(); let mut storage = storage.clone(); diff --git a/zebra-scan/src/storage.rs b/zebra-scan/src/storage.rs index c4324ceda45..02e3d09e92a 100644 --- a/zebra-scan/src/storage.rs +++ b/zebra-scan/src/storage.rs @@ -19,6 +19,10 @@ pub use db::{SaplingScannedResult, SaplingScanningKey}; use self::db::ScannerWriteBatch; +/// We insert an empty results entry to the database every 1000 blocks for each stored key, +/// so we can track progress. +const INSERT_CONTROL_INTERVAL: u32 = 1_000; + /// Store key info and results of the scan. /// /// `rocksdb` allows concurrent writes through a shared reference, @@ -89,10 +93,20 @@ impl Storage { /// /// This method can block while reading database files, so it must be inside spawn_blocking() /// in async code. - pub fn sapling_keys(&self) -> HashMap { + pub fn sapling_keys_birthdays(&self) -> HashMap { self.sapling_keys_and_birthday_heights() } + /// Returns all the keys and their last scanned heights. + /// + /// # Performance / Hangs + /// + /// This method can block while reading database files, so it must be inside spawn_blocking() + /// in async code. + pub fn sapling_keys_last_heights(&self) -> HashMap { + self.sapling_keys_and_last_scanned_heights() + } + /// Add the sapling results for `height` to the storage. /// /// # Performance / Hangs @@ -109,6 +123,10 @@ impl Storage { // in a single batch. let mut batch = ScannerWriteBatch::default(); + // Every `INSERT_CONTROL_INTERVAL` we add a new control result to the scanner database so we can track progress + // made in the last interval even if no result was found. + let is_control_time = height.0 % INSERT_CONTROL_INTERVAL == 0 && sapling_results.is_empty(); + for (index, sapling_result) in sapling_results { let index = SaplingScannedDatabaseIndex { sapling_key: sapling_key.clone(), @@ -123,6 +141,10 @@ impl Storage { batch.insert_sapling_result(self, entry); } + if is_control_time { + batch.insert_sapling_height(self, &sapling_key, height); + } + self.write_batch(batch); } diff --git a/zebra-scan/src/storage/db.rs b/zebra-scan/src/storage/db.rs index 00262b01437..580033e113e 100644 --- a/zebra-scan/src/storage/db.rs +++ b/zebra-scan/src/storage/db.rs @@ -78,7 +78,17 @@ impl Storage { let new_storage = Self { db }; - // TODO: report the last scanned height here? + // Report where we are for each key in the database. + let keys = new_storage.sapling_keys_last_heights(); + for (key_num, (_key, height)) in keys.iter().enumerate() { + tracing::info!( + "last scanned height for key number {} is {}, resuming at {}", + key_num, + height.0 - 1, + height.0 + ); + } + tracing::info!("loaded Zebra scanner cache"); new_storage diff --git a/zebra-scan/src/storage/db/sapling.rs b/zebra-scan/src/storage/db/sapling.rs index b47d344a9d9..21d95b1d3b6 100644 --- a/zebra-scan/src/storage/db/sapling.rs +++ b/zebra-scan/src/storage/db/sapling.rs @@ -141,6 +141,54 @@ impl Storage { keys } + /// Returns all the keys and their last scanned heights. + pub fn sapling_keys_and_last_scanned_heights(&self) -> HashMap { + let sapling_tx_ids = self.sapling_tx_ids_cf(); + let mut keys = HashMap::new(); + + let last_stored_record: Option<(SaplingScannedDatabaseIndex, SaplingScannedResult)> = + self.db.zs_last_key_value(&sapling_tx_ids); + if last_stored_record.is_none() { + return keys; + } + + let mut last_stored_record_index = last_stored_record + .expect("checked this is `Some` in the code branch above") + .0; + + loop { + // Find the previous key, and the last height we have for it. + let Some(entry) = self + .db + .zs_prev_key_value_back_from(&sapling_tx_ids, &last_stored_record_index) + else { + break; + }; + + let sapling_key = entry.0.sapling_key; + let mut height = entry.0.tx_loc.height; + let _last_result: Option = entry.1; + + let height_results = self.sapling_results_for_key_and_height(&sapling_key, height); + + // If there are no results for this block, then it's a "skip up to height" marker, and + // the birthday height is the next height. If there are some results, it's the actual + // birthday height. + if height_results.values().all(Option::is_none) { + height = height + .next() + .expect("results should only be stored for validated block heights"); + } + + keys.insert(sapling_key.clone(), height); + + // Skip all the results after the next key. + last_stored_record_index = SaplingScannedDatabaseIndex::min_for_key(&sapling_key); + } + + keys + } + /// Returns the Sapling indexes and results in the supplied range. /// /// Convenience method for accessing raw data with the correct types. @@ -214,4 +262,15 @@ impl ScannerWriteBatch { SaplingScannedDatabaseIndex::min_for_key_and_height(sapling_key, skip_up_to_height); self.zs_insert(&storage.sapling_tx_ids_cf(), index, None); } + + /// Insert sapling height with no results + pub(crate) fn insert_sapling_height( + &mut self, + storage: &Storage, + sapling_key: &SaplingScanningKey, + height: Height, + ) { + let index = SaplingScannedDatabaseIndex::min_for_key_and_height(sapling_key, height); + self.zs_insert(&storage.sapling_tx_ids_cf(), index, None); + } } diff --git a/zebra-state/src/service/finalized_state/disk_format/scan.rs b/zebra-state/src/service/finalized_state/disk_format/scan.rs index c6a47ebc712..da1e4d67d1b 100644 --- a/zebra-state/src/service/finalized_state/disk_format/scan.rs +++ b/zebra-state/src/service/finalized_state/disk_format/scan.rs @@ -163,7 +163,8 @@ impl IntoDisk for SaplingScannedResult { impl FromDisk for SaplingScannedResult { fn from_bytes(bytes: impl AsRef<[u8]>) -> Self { - SaplingScannedResult(bytes.as_ref().try_into().unwrap()) + // TODO: Change of confirm the `unwrap_or` is good enough. + SaplingScannedResult(bytes.as_ref().try_into().unwrap_or([0; 32])) } } From b5bd607087d061f3b85722798b53476c9e516fa7 Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Fri, 8 Dec 2023 17:44:50 -0300 Subject: [PATCH 02/21] fix tests --- zebra-scan/src/tests.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zebra-scan/src/tests.rs b/zebra-scan/src/tests.rs index 99b8c87d907..8adee53349f 100644 --- a/zebra-scan/src/tests.rs +++ b/zebra-scan/src/tests.rs @@ -192,9 +192,9 @@ fn scanning_fake_blocks_store_key_and_results() -> Result<()> { s.add_sapling_key(&key_to_be_stored, None); // Check key was added - assert_eq!(s.sapling_keys().len(), 1); + assert_eq!(s.sapling_keys_birthdays().len(), 1); assert_eq!( - s.sapling_keys().get(&key_to_be_stored), + s.sapling_keys_birthdays().get(&key_to_be_stored), Some(&s.min_sapling_birthday_height()) ); From db38a27d511803079ce4860370af5facef97dc7b Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Sat, 9 Dec 2023 11:36:24 -0300 Subject: [PATCH 03/21] add a `scan_start_where_left` test --- zebra-scan/src/config.rs | 5 ++ zebra-scan/src/scan.rs | 6 +-- zebra-scan/src/storage/db.rs | 2 +- zebrad/tests/acceptance.rs | 97 ++++++++++++++++++++++++++++++++++++ 4 files changed, 106 insertions(+), 4 deletions(-) diff --git a/zebra-scan/src/config.rs b/zebra-scan/src/config.rs index df217e90629..c51930168e5 100644 --- a/zebra-scan/src/config.rs +++ b/zebra-scan/src/config.rs @@ -47,4 +47,9 @@ impl Config { pub fn db_config(&self) -> &DbConfig { &self.db_config } + + /// Returns the database-specific config as mutable. + pub fn db_config_mut(&mut self) -> &mut DbConfig { + &mut self.db_config + } } diff --git a/zebra-scan/src/scan.rs b/zebra-scan/src/scan.rs index 8c5f7a02acc..e8b92d33d10 100644 --- a/zebra-scan/src/scan.rs +++ b/zebra-scan/src/scan.rs @@ -171,9 +171,9 @@ pub async fn scan_height_and_store_results( if is_info_log { info!( "Scanning the blockchain for key {}, started at block {:?}, now at block {:?}, current tip {:?}", - key_num, last_scanned_height, - height, - chain_tip_change.latest_chain_tip().best_tip_height_and_hash() + key_num, last_scanned_height.as_usize(), + height.as_usize(), + chain_tip_change.latest_chain_tip().best_tip_height().expect("we should have a tip to scan").as_usize(), ); } diff --git a/zebra-scan/src/storage/db.rs b/zebra-scan/src/storage/db.rs index 580033e113e..74ce036ccdc 100644 --- a/zebra-scan/src/storage/db.rs +++ b/zebra-scan/src/storage/db.rs @@ -82,7 +82,7 @@ impl Storage { let keys = new_storage.sapling_keys_last_heights(); for (key_num, (_key, height)) in keys.iter().enumerate() { tracing::info!( - "last scanned height for key number {} is {}, resuming at {}", + "Last scanned height for key number {} is {}, resuming at {}", key_num, height.0 - 1, height.0 diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index c2cb2ec9300..daf647fe403 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -2860,3 +2860,100 @@ fn scan_task_starts() -> Result<()> { Ok(()) } + +/// Test that the scanner can continue scanning where it was left when zebrad restarts. +/// +/// Needs a cache state close to the tip. A possible way to run it locally is: +/// +/// export ZEBRA_CACHED_STATE_DIR="/path/to/zebra/state" +/// cargo test scan_start_where_left --features="shielded-scan" -- --ignored --nocapture +/// +/// The test will run zebrad with a key to scan, scan the first few blocks after sapling and then stops. +/// Then it will restart zebrad and check that it resumes scanning where it was left. +/// +/// Note: This test will remove all the contents you may have in the ZEBRA_CACHED_STATE_DIR/private-scan directory +/// so it can start with an empty scanning state. +#[ignore] +#[test] +#[cfg(feature = "shielded-scan")] +fn scan_start_where_left() -> Result<()> { + use indexmap::IndexMap; + use zebra_scan::storage::db::SCANNER_DATABASE_KIND; + + let _init_guard = zebra_test::init(); + + // use `UpdateZebraCachedStateNoRpc` as the test type to make sure a zebrad cache state is available. + let test_type = TestType::UpdateZebraCachedStateNoRpc; + if let Some(cache_dir) = test_type.zebrad_state_path("scan test") { + // Add a key to the config + const ZECPAGES_VIEWING_KEY: &str = "zxviews1q0duytgcqqqqpqre26wkl45gvwwwd706xw608hucmvfalr759ejwf7qshjf5r9aa7323zulvz6plhttp5mltqcgs9t039cx2d09mgq05ts63n8u35hyv6h9nc9ctqqtue2u7cer2mqegunuulq2luhq3ywjcz35yyljewa4mgkgjzyfwh6fr6jd0dzd44ghk0nxdv2hnv4j5nxfwv24rwdmgllhe0p8568sgqt9ckt02v2kxf5ahtql6s0ltjpkckw8gtymxtxuu9gcr0swvz"; + let mut config = default_test_config(Mainnet)?; + let mut keys = IndexMap::new(); + keys.insert(ZECPAGES_VIEWING_KEY.to_string(), 1); + config.shielded_scan.sapling_keys_to_scan = keys; + + // Add the cache dir to shielded scan, make it the same as the zebrad cache state. + config.shielded_scan.db_config_mut().cache_dir = cache_dir.clone(); + config.shielded_scan.db_config_mut().ephemeral = false; + + // Add the cache dir to state. + config.state.cache_dir = cache_dir.clone(); + config.state.ephemeral = false; + + // Remove the scan directory before starting. + let scan_db_path = cache_dir.join(SCANNER_DATABASE_KIND); + fs::remove_dir_all(std::path::Path::new(&scan_db_path)).ok(); + + // Start zebra with the config. + let mut zebrad = testdir()? + .with_exact_config(&config)? + .spawn_child(args!["start"])? + .with_timeout(test_type.zebrad_timeout()); + + // Check scanner was started. + zebrad.expect_stdout_line_matches("loaded Zebra scanner cache")?; + + // The first time + zebrad.expect_stdout_line_matches( + r"Scanning the blockchain for key 0, started at block 419200, now at block 420000", + )?; + + // Make sure scanner scans a few blocks. + zebrad.expect_stdout_line_matches( + r"Scanning the blockchain for key 0, started at block 419200, now at block 421000", + )?; + zebrad.expect_stdout_line_matches( + r"Scanning the blockchain for key 0, started at block 419200, now at block 422000", + )?; + + // Kill the node. + zebrad.kill(false)?; + let output = zebrad.wait_with_output()?; + + // Make sure the command was killed + output.assert_was_killed()?; + output.assert_failure()?; + + // Start the node again. + let mut zebrad = testdir()? + .with_exact_config(&config)? + .spawn_child(args!["start"])? + .with_timeout(test_type.zebrad_timeout()); + + // Resuming message. + zebrad.expect_stdout_line_matches( + "Last scanned height for key number 0 is 421000, resuming at 421001", + )?; + zebrad.expect_stdout_line_matches("loaded Zebra scanner cache")?; + + // Start scanning where it was left. + zebrad.expect_stdout_line_matches( + r"Scanning the blockchain for key 0, started at block 421001, now at block 422000", + )?; + zebrad.expect_stdout_line_matches( + r"canning the blockchain for key 0, started at block 421001, now at block 423000", + )?; + } + + Ok(()) +} From 39a0bfeb0fffe4e332834c9c44d5830b9fde3dc4 Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Sat, 9 Dec 2023 11:44:16 -0300 Subject: [PATCH 04/21] refactor a log msg --- zebra-scan/src/storage/db.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zebra-scan/src/storage/db.rs b/zebra-scan/src/storage/db.rs index 74ce036ccdc..02b210127af 100644 --- a/zebra-scan/src/storage/db.rs +++ b/zebra-scan/src/storage/db.rs @@ -84,8 +84,8 @@ impl Storage { tracing::info!( "Last scanned height for key number {} is {}, resuming at {}", key_num, - height.0 - 1, - height.0 + height.previous().expect("height is not genesis").as_usize(), + height.as_usize(), ); } From 369ae38ee1283bbd9a101e83c67422712119ad9d Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Sat, 9 Dec 2023 14:25:42 -0300 Subject: [PATCH 05/21] fix some comments --- zebra-scan/src/storage.rs | 7 ++++--- zebra-scan/src/storage/db/sapling.rs | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/zebra-scan/src/storage.rs b/zebra-scan/src/storage.rs index 02e3d09e92a..5737e1254f5 100644 --- a/zebra-scan/src/storage.rs +++ b/zebra-scan/src/storage.rs @@ -19,7 +19,7 @@ pub use db::{SaplingScannedResult, SaplingScanningKey}; use self::db::ScannerWriteBatch; -/// We insert an empty results entry to the database every 1000 blocks for each stored key, +/// We insert an empty results entry to the database every this interval for each stored key, /// so we can track progress. const INSERT_CONTROL_INTERVAL: u32 = 1_000; @@ -123,8 +123,8 @@ impl Storage { // in a single batch. let mut batch = ScannerWriteBatch::default(); - // Every `INSERT_CONTROL_INTERVAL` we add a new control result to the scanner database so we can track progress - // made in the last interval even if no result was found. + // Every `INSERT_CONTROL_INTERVAL` we add a new entry to the scanner database for each key + // so we can track progress made in the last interval even if no transaction was yet found. let is_control_time = height.0 % INSERT_CONTROL_INTERVAL == 0 && sapling_results.is_empty(); for (index, sapling_result) in sapling_results { @@ -141,6 +141,7 @@ impl Storage { batch.insert_sapling_result(self, entry); } + // Add tracking entry for key. if is_control_time { batch.insert_sapling_height(self, &sapling_key, height); } diff --git a/zebra-scan/src/storage/db/sapling.rs b/zebra-scan/src/storage/db/sapling.rs index 21d95b1d3b6..c5ffbd35845 100644 --- a/zebra-scan/src/storage/db/sapling.rs +++ b/zebra-scan/src/storage/db/sapling.rs @@ -172,8 +172,8 @@ impl Storage { let height_results = self.sapling_results_for_key_and_height(&sapling_key, height); // If there are no results for this block, then it's a "skip up to height" marker, and - // the birthday height is the next height. If there are some results, it's the actual - // birthday height. + // the target height is the next height. If there are some results, it's the actual + // target height. if height_results.values().all(Option::is_none) { height = height .next() From 53dcb31882b12f954e8ee4a4fa742249f3dd1faa Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Mon, 11 Dec 2023 14:33:21 -0300 Subject: [PATCH 06/21] remove function --- zebra-scan/src/storage.rs | 12 -------- zebra-scan/src/storage/db/sapling.rs | 46 ---------------------------- zebra-scan/src/tests.rs | 4 +-- 3 files changed, 2 insertions(+), 60 deletions(-) diff --git a/zebra-scan/src/storage.rs b/zebra-scan/src/storage.rs index 5737e1254f5..74d366fdfd0 100644 --- a/zebra-scan/src/storage.rs +++ b/zebra-scan/src/storage.rs @@ -85,18 +85,6 @@ impl Storage { self.write_batch(batch); } - /// Returns all the keys and their birthdays. - /// - /// Birthdays are adjusted to sapling activation if they are too low or missing. - /// - /// # Performance / Hangs - /// - /// This method can block while reading database files, so it must be inside spawn_blocking() - /// in async code. - pub fn sapling_keys_birthdays(&self) -> HashMap { - self.sapling_keys_and_birthday_heights() - } - /// Returns all the keys and their last scanned heights. /// /// # Performance / Hangs diff --git a/zebra-scan/src/storage/db/sapling.rs b/zebra-scan/src/storage/db/sapling.rs index c5ffbd35845..e18babd7637 100644 --- a/zebra-scan/src/storage/db/sapling.rs +++ b/zebra-scan/src/storage/db/sapling.rs @@ -95,52 +95,6 @@ impl Storage { .collect() } - /// Returns all the keys and their birthday heights. - pub fn sapling_keys_and_birthday_heights(&self) -> HashMap { - // This code is a bit complex because we don't have a separate column family for keys - // and their birthday heights. - // - // TODO: make a separate column family after the MVP. - - let sapling_tx_ids = self.sapling_tx_ids_cf(); - let mut keys = HashMap::new(); - - // The minimum key is invalid or a dummy key, so we will never have an entry for it. - let mut find_next_key_index = SaplingScannedDatabaseIndex::min(); - - loop { - // Find the next key, and the first height we have for it. - let Some(entry) = self - .db - .zs_next_key_value_from(&sapling_tx_ids, &find_next_key_index) - else { - break; - }; - - let sapling_key = entry.0.sapling_key; - let mut height = entry.0.tx_loc.height; - let _first_result: Option = entry.1; - - let height_results = self.sapling_results_for_key_and_height(&sapling_key, height); - - // If there are no results for this block, then it's a "skip up to height" marker, and - // the birthday height is the next height. If there are some results, it's the actual - // birthday height. - if height_results.values().all(Option::is_none) { - height = height - .next() - .expect("results should only be stored for validated block heights"); - } - - keys.insert(sapling_key.clone(), height); - - // Skip all the results before the next key. - find_next_key_index = SaplingScannedDatabaseIndex::max_for_key(&sapling_key); - } - - keys - } - /// Returns all the keys and their last scanned heights. pub fn sapling_keys_and_last_scanned_heights(&self) -> HashMap { let sapling_tx_ids = self.sapling_tx_ids_cf(); diff --git a/zebra-scan/src/tests.rs b/zebra-scan/src/tests.rs index 8adee53349f..5d74d82814e 100644 --- a/zebra-scan/src/tests.rs +++ b/zebra-scan/src/tests.rs @@ -192,9 +192,9 @@ fn scanning_fake_blocks_store_key_and_results() -> Result<()> { s.add_sapling_key(&key_to_be_stored, None); // Check key was added - assert_eq!(s.sapling_keys_birthdays().len(), 1); + assert_eq!(s.sapling_keys_last_heights().len(), 1); assert_eq!( - s.sapling_keys_birthdays().get(&key_to_be_stored), + s.sapling_keys_last_heights().get(&key_to_be_stored), Some(&s.min_sapling_birthday_height()) ); From 68bd53f0a0e11168c4765bbc20d21e7b70a0a517 Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Mon, 11 Dec 2023 15:16:17 -0300 Subject: [PATCH 07/21] fix doc comment --- zebra-scan/src/storage/db/sapling.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zebra-scan/src/storage/db/sapling.rs b/zebra-scan/src/storage/db/sapling.rs index e18babd7637..b37119e3575 100644 --- a/zebra-scan/src/storage/db/sapling.rs +++ b/zebra-scan/src/storage/db/sapling.rs @@ -136,7 +136,7 @@ impl Storage { keys.insert(sapling_key.clone(), height); - // Skip all the results after the next key. + // Skip all the results until the next key. last_stored_record_index = SaplingScannedDatabaseIndex::min_for_key(&sapling_key); } From bf27cfdab28132771201082b2a1e929dfed3b0f7 Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Tue, 12 Dec 2023 10:43:35 -0300 Subject: [PATCH 08/21] clippy --- zebra-scan/src/storage.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zebra-scan/src/storage.rs b/zebra-scan/src/storage.rs index 42afba90e0a..664ba5f37b8 100644 --- a/zebra-scan/src/storage.rs +++ b/zebra-scan/src/storage.rs @@ -138,7 +138,7 @@ impl Storage { // Add tracking entry for key. if is_control_time { - batch.insert_sapling_height(self, &sapling_key, height); + batch.insert_sapling_height(self, sapling_key, height); } self.write_batch(batch); From b73a28aa160dd98c064ef14e1fb327445e9f7fbc Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Tue, 12 Dec 2023 11:16:54 -0300 Subject: [PATCH 09/21] fix `sapling_keys_and_last_scanned_heights()` --- zebra-scan/src/scan.rs | 2 +- zebra-scan/src/storage/db.rs | 2 +- zebra-scan/src/storage/db/sapling.rs | 27 ++++++++------------------- 3 files changed, 10 insertions(+), 21 deletions(-) diff --git a/zebra-scan/src/scan.rs b/zebra-scan/src/scan.rs index 5a006155ce0..9bb40dad8a8 100644 --- a/zebra-scan/src/scan.rs +++ b/zebra-scan/src/scan.rs @@ -171,7 +171,7 @@ pub async fn scan_height_and_store_results( if is_info_log { info!( "Scanning the blockchain for key {}, started at block {:?}, now at block {:?}, current tip {:?}", - key_num, last_scanned_height.as_usize(), + key_num, last_scanned_height.next().expect("height is not maximum").as_usize(), height.as_usize(), chain_tip_change.latest_chain_tip().best_tip_height().expect("we should have a tip to scan").as_usize(), ); diff --git a/zebra-scan/src/storage/db.rs b/zebra-scan/src/storage/db.rs index 96ace0cea4f..7ea9b876989 100644 --- a/zebra-scan/src/storage/db.rs +++ b/zebra-scan/src/storage/db.rs @@ -87,8 +87,8 @@ impl Storage { tracing::info!( "Last scanned height for key number {} is {}, resuming at {}", key_num, - height.previous().expect("height is not genesis").as_usize(), height.as_usize(), + height.next().expect("height is not maximum").as_usize(), ); } diff --git a/zebra-scan/src/storage/db/sapling.rs b/zebra-scan/src/storage/db/sapling.rs index 3ebd95e9db9..83896fd06ae 100644 --- a/zebra-scan/src/storage/db/sapling.rs +++ b/zebra-scan/src/storage/db/sapling.rs @@ -115,7 +115,13 @@ impl Storage { .0; loop { - // Find the previous key, and the last height we have for it. + let sapling_key = last_stored_record_index.sapling_key; + let height = last_stored_record_index.tx_loc.height; + + keys.insert(sapling_key.clone(), height); + + // Skip all the results until the next key. + last_stored_record_index = SaplingScannedDatabaseIndex::min_for_key(&sapling_key); let Some(entry) = self .db .zs_prev_key_value_back_from(&sapling_tx_ids, &last_stored_record_index) @@ -123,25 +129,8 @@ impl Storage { break; }; - let sapling_key = entry.0.sapling_key; - let mut height = entry.0.tx_loc.height; + // Needed annotation. let _last_result: Option = entry.1; - - let height_results = self.sapling_results_for_key_and_height(&sapling_key, height); - - // If there are no results for this block, then it's a "skip up to height" marker, and - // the target height is the next height. If there are some results, it's the actual - // target height. - if height_results.values().all(Option::is_none) { - height = height - .next() - .expect("results should only be stored for validated block heights"); - } - - keys.insert(sapling_key.clone(), height); - - // Skip all the results until the next key. - last_stored_record_index = SaplingScannedDatabaseIndex::min_for_key(&sapling_key); } keys From d7c3440994420ba1c91a4ebee1d5a9a8270a1df0 Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Tue, 12 Dec 2023 15:00:19 -0300 Subject: [PATCH 10/21] simplify start height --- zebra-scan/src/scan.rs | 2 +- zebra-scan/src/storage/db/sapling.rs | 4 +--- zebrad/tests/acceptance.rs | 4 ++-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/zebra-scan/src/scan.rs b/zebra-scan/src/scan.rs index 9bb40dad8a8..5a006155ce0 100644 --- a/zebra-scan/src/scan.rs +++ b/zebra-scan/src/scan.rs @@ -171,7 +171,7 @@ pub async fn scan_height_and_store_results( if is_info_log { info!( "Scanning the blockchain for key {}, started at block {:?}, now at block {:?}, current tip {:?}", - key_num, last_scanned_height.next().expect("height is not maximum").as_usize(), + key_num, last_scanned_height.as_usize(), height.as_usize(), chain_tip_change.latest_chain_tip().best_tip_height().expect("we should have a tip to scan").as_usize(), ); diff --git a/zebra-scan/src/storage/db/sapling.rs b/zebra-scan/src/storage/db/sapling.rs index 83896fd06ae..10b56c3b5f3 100644 --- a/zebra-scan/src/storage/db/sapling.rs +++ b/zebra-scan/src/storage/db/sapling.rs @@ -202,11 +202,9 @@ impl ScannerWriteBatch { let birthday_height = birthday_height .unwrap_or(min_birthday_height) .max(min_birthday_height); - // And we want to skip up to the height before it. - let skip_up_to_height = birthday_height.previous().unwrap_or(Height::MIN); let index = - SaplingScannedDatabaseIndex::min_for_key_and_height(sapling_key, skip_up_to_height); + SaplingScannedDatabaseIndex::min_for_key_and_height(sapling_key, birthday_height); self.zs_insert(&storage.sapling_tx_ids_cf(), index, None); } diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index 51d4f329378..a9b6f3fe0d9 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -2948,10 +2948,10 @@ fn scan_start_where_left() -> Result<()> { // Start scanning where it was left. zebrad.expect_stdout_line_matches( - r"Scanning the blockchain for key 0, started at block 421001, now at block 422000", + r"Scanning the blockchain for key 0, started at block 421000, now at block 422000", )?; zebrad.expect_stdout_line_matches( - r"canning the blockchain for key 0, started at block 421001, now at block 423000", + r"canning the blockchain for key 0, started at block 421000, now at block 423000", )?; } From 024a1dd7d371524f3409b01d7d1725f54544ae9d Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Tue, 12 Dec 2023 18:56:52 -0300 Subject: [PATCH 11/21] i went too far, revert some changes back --- zebra-scan/src/scan.rs | 2 +- zebra-scan/src/storage/db/sapling.rs | 4 +++- zebra-scan/src/tests/vectors.rs | 8 ++++++-- zebrad/tests/acceptance.rs | 4 ++-- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/zebra-scan/src/scan.rs b/zebra-scan/src/scan.rs index 5a006155ce0..9bb40dad8a8 100644 --- a/zebra-scan/src/scan.rs +++ b/zebra-scan/src/scan.rs @@ -171,7 +171,7 @@ pub async fn scan_height_and_store_results( if is_info_log { info!( "Scanning the blockchain for key {}, started at block {:?}, now at block {:?}, current tip {:?}", - key_num, last_scanned_height.as_usize(), + key_num, last_scanned_height.next().expect("height is not maximum").as_usize(), height.as_usize(), chain_tip_change.latest_chain_tip().best_tip_height().expect("we should have a tip to scan").as_usize(), ); diff --git a/zebra-scan/src/storage/db/sapling.rs b/zebra-scan/src/storage/db/sapling.rs index 10b56c3b5f3..83896fd06ae 100644 --- a/zebra-scan/src/storage/db/sapling.rs +++ b/zebra-scan/src/storage/db/sapling.rs @@ -202,9 +202,11 @@ impl ScannerWriteBatch { let birthday_height = birthday_height .unwrap_or(min_birthday_height) .max(min_birthday_height); + // And we want to skip up to the height before it. + let skip_up_to_height = birthday_height.previous().unwrap_or(Height::MIN); let index = - SaplingScannedDatabaseIndex::min_for_key_and_height(sapling_key, birthday_height); + SaplingScannedDatabaseIndex::min_for_key_and_height(sapling_key, skip_up_to_height); self.zs_insert(&storage.sapling_tx_ids_cf(), index, None); } diff --git a/zebra-scan/src/tests/vectors.rs b/zebra-scan/src/tests/vectors.rs index 0461599b85c..1352a6bfd14 100644 --- a/zebra-scan/src/tests/vectors.rs +++ b/zebra-scan/src/tests/vectors.rs @@ -165,8 +165,12 @@ fn scanning_fake_blocks_store_key_and_results() -> Result<()> { // Check key was added assert_eq!(s.sapling_keys_last_heights().len(), 1); assert_eq!( - s.sapling_keys_last_heights().get(&key_to_be_stored), - Some(&s.min_sapling_birthday_height()) + s.sapling_keys_last_heights() + .get(&key_to_be_stored) + .expect("height is stored") + .next() + .expect("height is not maximum"), + s.min_sapling_birthday_height() ); let nf = Nullifier([7; 32]); diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index a9b6f3fe0d9..5b841439821 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -2948,10 +2948,10 @@ fn scan_start_where_left() -> Result<()> { // Start scanning where it was left. zebrad.expect_stdout_line_matches( - r"Scanning the blockchain for key 0, started at block 421000, now at block 422000", + r"Scanning the blockchain for key 0, started at block 421001, now at block 422000", )?; zebrad.expect_stdout_line_matches( - r"canning the blockchain for key 0, started at block 421000, now at block 423000", + r"Scanning the blockchain for key 0, started at block 421001, now at block 423000", )?; } From fea879c480d4ab1c46aabe4cd258e885b43c072c Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Tue, 12 Dec 2023 19:38:32 -0300 Subject: [PATCH 12/21] change log info to every 10k blocks --- zebra-scan/src/scan.rs | 5 ++--- zebrad/tests/acceptance.rs | 10 +++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/zebra-scan/src/scan.rs b/zebra-scan/src/scan.rs index 9bb40dad8a8..3874d86d708 100644 --- a/zebra-scan/src/scan.rs +++ b/zebra-scan/src/scan.rs @@ -54,7 +54,7 @@ const INITIAL_WAIT: Duration = Duration::from_secs(15); const CHECK_INTERVAL: Duration = Duration::from_secs(30); /// We log an info log with progress after this many blocks. -const INFO_LOG_INTERVAL: u32 = 1_000; +const INFO_LOG_INTERVAL: u32 = 10_000; /// Start a scan task that reads blocks from `state`, scans them with the configured keys in /// `storage`, and then writes the results to `storage`. @@ -135,8 +135,7 @@ pub async fn scan_height_and_store_results( // Only log at info level every 100,000 blocks. // // TODO: also log progress every 5 minutes once we reach the tip? - let is_info_log = - height == storage.min_sapling_birthday_height() || height.0 % INFO_LOG_INTERVAL == 0; + let is_info_log = height.0 % INFO_LOG_INTERVAL == 0; // Get a block from the state. // We can't use ServiceExt::oneshot() here, because it causes lifetime errors in init(). diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index 5b841439821..787c8097f7a 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -2920,10 +2920,10 @@ fn scan_start_where_left() -> Result<()> { // Make sure scanner scans a few blocks. zebrad.expect_stdout_line_matches( - r"Scanning the blockchain for key 0, started at block 419200, now at block 421000", + r"Scanning the blockchain for key 0, started at block 419200, now at block 430000", )?; zebrad.expect_stdout_line_matches( - r"Scanning the blockchain for key 0, started at block 419200, now at block 422000", + r"Scanning the blockchain for key 0, started at block 419200, now at block 440000", )?; // Kill the node. @@ -2942,16 +2942,16 @@ fn scan_start_where_left() -> Result<()> { // Resuming message. zebrad.expect_stdout_line_matches( - "Last scanned height for key number 0 is 421000, resuming at 421001", + "Last scanned height for key number 0 is 439000, resuming at 439001", )?; zebrad.expect_stdout_line_matches("loaded Zebra scanner cache")?; // Start scanning where it was left. zebrad.expect_stdout_line_matches( - r"Scanning the blockchain for key 0, started at block 421001, now at block 422000", + r"Scanning the blockchain for key 0, started at block 439001, now at block 440000", )?; zebrad.expect_stdout_line_matches( - r"Scanning the blockchain for key 0, started at block 421001, now at block 423000", + r"Scanning the blockchain for key 0, started at block 439001, now at block 450000", )?; } From 6b04fb08bd1d046729b27ec2f1b50ee1c8c94abe Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Tue, 12 Dec 2023 20:33:41 -0300 Subject: [PATCH 13/21] fix build --- zebra-scan/src/storage/db/tests/snapshot.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zebra-scan/src/storage/db/tests/snapshot.rs b/zebra-scan/src/storage/db/tests/snapshot.rs index b5b933f2746..dd38a92031a 100644 --- a/zebra-scan/src/storage/db/tests/snapshot.rs +++ b/zebra-scan/src/storage/db/tests/snapshot.rs @@ -153,7 +153,7 @@ fn snapshot_typed_result_data(storage: &Storage) { // Make sure the typed key format doesn't accidentally change. // // TODO: update this after PR #8080 - let sapling_keys_and_birthday_heights = storage.sapling_keys(); + let sapling_keys_and_birthday_heights = storage.sapling_keys_last_heights(); // HashMap has an unstable order across Rust releases, so we need to sort it here. insta::assert_ron_snapshot!( "sapling_keys", From 9d3b21da0bc309ee3dca627b56f529d1fb568388 Mon Sep 17 00:00:00 2001 From: teor Date: Wed, 13 Dec 2023 10:12:55 +1000 Subject: [PATCH 14/21] Update height snapshot code and check last height is consistent --- zebra-scan/src/storage/db/tests/snapshot.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/zebra-scan/src/storage/db/tests/snapshot.rs b/zebra-scan/src/storage/db/tests/snapshot.rs index dd38a92031a..a1d1a184fc1 100644 --- a/zebra-scan/src/storage/db/tests/snapshot.rs +++ b/zebra-scan/src/storage/db/tests/snapshot.rs @@ -147,30 +147,25 @@ fn snapshot_raw_rocksdb_column_family_data(db: &ScannerDb, original_cf_names: &[ /// Snapshot typed scanner result data using high-level storage methods, /// using `cargo insta` and RON serialization. fn snapshot_typed_result_data(storage: &Storage) { - // TODO: snapshot the latest scanned heights after PR #8080 merges - //insta::assert_ron_snapshot!("latest_heights", latest_scanned_heights); - // Make sure the typed key format doesn't accidentally change. - // - // TODO: update this after PR #8080 - let sapling_keys_and_birthday_heights = storage.sapling_keys_last_heights(); + let sapling_keys_last_heights = storage.sapling_keys_last_heights(); // HashMap has an unstable order across Rust releases, so we need to sort it here. insta::assert_ron_snapshot!( "sapling_keys", - sapling_keys_and_birthday_heights, + sapling_keys_last_heights, { "." => insta::sorted_redaction() } ); // HashMap has an unstable order across Rust releases, so we need to sort it here as well. - for (key_index, (sapling_key, _birthday_height)) in sapling_keys_and_birthday_heights - .iter() - .sorted() - .enumerate() + for (key_index, (sapling_key, last_height)) in + sapling_keys_last_heights.iter().sorted().enumerate() { let sapling_results = storage.sapling_results(sapling_key); + assert_eq!(sapling_results.keys().max(), Some(last_height)); + // Check internal database method consistency for (height, results) in sapling_results.iter() { let sapling_index_and_results = From 24ea29b46efe83926e043ca5feeb6a9f1ebef3de Mon Sep 17 00:00:00 2001 From: teor Date: Wed, 13 Dec 2023 10:52:01 +1000 Subject: [PATCH 15/21] Add strictly before and strictly after database key gets --- .../src/service/finalized_state/disk_db.rs | 48 ++++++++++++++++--- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/zebra-state/src/service/finalized_state/disk_db.rs b/zebra-state/src/service/finalized_state/disk_db.rs index ab20a62faa3..a9bad0cb472 100644 --- a/zebra-state/src/service/finalized_state/disk_db.rs +++ b/zebra-state/src/service/finalized_state/disk_db.rs @@ -220,6 +220,16 @@ pub trait ReadDisk { K: IntoDisk + FromDisk, V: FromDisk; + /// Returns the first key strictly greater than `lower_bound` in `cf`, + /// and the corresponding value. + /// + /// Returns `None` if there are no keys greater than `lower_bound`. + fn zs_next_key_value_strictly_after(&self, cf: &C, lower_bound: &K) -> Option<(K, V)> + where + C: rocksdb::AsColumnFamilyRef, + K: IntoDisk + FromDisk, + V: FromDisk; + /// Returns the first key less than or equal to `upper_bound` in `cf`, /// and the corresponding value. /// @@ -230,6 +240,16 @@ pub trait ReadDisk { K: IntoDisk + FromDisk, V: FromDisk; + /// Returns the first key strictly less than `upper_bound` in `cf`, + /// and the corresponding value. + /// + /// Returns `None` if there are no keys less than `upper_bound`. + fn zs_prev_key_value_strictly_before(&self, cf: &C, upper_bound: &K) -> Option<(K, V)> + where + C: rocksdb::AsColumnFamilyRef, + K: IntoDisk + FromDisk, + V: FromDisk; + /// Returns the keys and values in `cf` in `range`, in an ordered `BTreeMap`. /// /// Holding this iterator open might delay block commit transactions. @@ -321,7 +341,6 @@ impl ReadDisk for DiskDb { .is_some() } - #[allow(clippy::unwrap_in_result)] fn zs_first_key_value(&self, cf: &C) -> Option<(K, V)> where C: rocksdb::AsColumnFamilyRef, @@ -332,7 +351,6 @@ impl ReadDisk for DiskDb { self.zs_forward_range_iter(cf, ..).next() } - #[allow(clippy::unwrap_in_result)] fn zs_last_key_value(&self, cf: &C) -> Option<(K, V)> where C: rocksdb::AsColumnFamilyRef, @@ -343,28 +361,46 @@ impl ReadDisk for DiskDb { self.zs_reverse_range_iter(cf, ..).next() } - #[allow(clippy::unwrap_in_result)] fn zs_next_key_value_from(&self, cf: &C, lower_bound: &K) -> Option<(K, V)> where C: rocksdb::AsColumnFamilyRef, K: IntoDisk + FromDisk, V: FromDisk, { - // Reading individual values from iterators does not seem to cause database hangs. self.zs_forward_range_iter(cf, lower_bound..).next() } - #[allow(clippy::unwrap_in_result)] + fn zs_next_key_value_strictly_after(&self, cf: &C, lower_bound: &K) -> Option<(K, V)> + where + C: rocksdb::AsColumnFamilyRef, + K: IntoDisk + FromDisk, + V: FromDisk, + { + use std::ops::Bound::*; + + // There is no standard syntax for an excluded start bound. + self.zs_forward_range_iter(cf, (Excluded(lower_bound), Unbounded)) + .next() + } + fn zs_prev_key_value_back_from(&self, cf: &C, upper_bound: &K) -> Option<(K, V)> where C: rocksdb::AsColumnFamilyRef, K: IntoDisk + FromDisk, V: FromDisk, { - // Reading individual values from iterators does not seem to cause database hangs. self.zs_reverse_range_iter(cf, ..=upper_bound).next() } + fn zs_prev_key_value_strictly_before(&self, cf: &C, upper_bound: &K) -> Option<(K, V)> + where + C: rocksdb::AsColumnFamilyRef, + K: IntoDisk + FromDisk, + V: FromDisk, + { + self.zs_reverse_range_iter(cf, ..upper_bound).next() + } + fn zs_items_in_range_ordered(&self, cf: &C, range: R) -> BTreeMap where C: rocksdb::AsColumnFamilyRef, From cf44fef9756abd5ce85379d5cd74104f95196602 Mon Sep 17 00:00:00 2001 From: teor Date: Wed, 13 Dec 2023 10:52:46 +1000 Subject: [PATCH 16/21] Move to the previous key using strictly before ops --- zebra-scan/src/storage/db/sapling.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zebra-scan/src/storage/db/sapling.rs b/zebra-scan/src/storage/db/sapling.rs index 83896fd06ae..78ee761ad57 100644 --- a/zebra-scan/src/storage/db/sapling.rs +++ b/zebra-scan/src/storage/db/sapling.rs @@ -124,7 +124,7 @@ impl Storage { last_stored_record_index = SaplingScannedDatabaseIndex::min_for_key(&sapling_key); let Some(entry) = self .db - .zs_prev_key_value_back_from(&sapling_tx_ids, &last_stored_record_index) + .zs_prev_key_value_strictly_before(&sapling_tx_ids, &last_stored_record_index) else { break; }; From c8ac3089704169ad8d70235ada7df427bb33f65f Mon Sep 17 00:00:00 2001 From: teor Date: Wed, 13 Dec 2023 11:01:36 +1000 Subject: [PATCH 17/21] Assert that keys are only inserted once --- zebra-scan/src/storage/db/sapling.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/zebra-scan/src/storage/db/sapling.rs b/zebra-scan/src/storage/db/sapling.rs index 78ee761ad57..17a14d3b593 100644 --- a/zebra-scan/src/storage/db/sapling.rs +++ b/zebra-scan/src/storage/db/sapling.rs @@ -115,10 +115,15 @@ impl Storage { .0; loop { - let sapling_key = last_stored_record_index.sapling_key; + let sapling_key = last_stored_record_index.sapling_key.clone(); let height = last_stored_record_index.tx_loc.height; - keys.insert(sapling_key.clone(), height); + let prev_height = keys.insert(sapling_key.clone(), height); + assert_eq!( + prev_height, None, + "unexpected duplicate key: keys must only be inserted once\ + last_stored_record_index: {last_stored_record_index:?}", + ); // Skip all the results until the next key. last_stored_record_index = SaplingScannedDatabaseIndex::min_for_key(&sapling_key); From 42bf26d0c7b04dda725f7903888024f0b7596060 Mon Sep 17 00:00:00 2001 From: teor Date: Wed, 13 Dec 2023 11:20:32 +1000 Subject: [PATCH 18/21] Update the index in each loop --- zebra-scan/src/storage/db/sapling.rs | 32 +++++++++------------ zebra-scan/src/storage/db/tests/snapshot.rs | 3 ++ 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/zebra-scan/src/storage/db/sapling.rs b/zebra-scan/src/storage/db/sapling.rs index 17a14d3b593..d95ee868ef3 100644 --- a/zebra-scan/src/storage/db/sapling.rs +++ b/zebra-scan/src/storage/db/sapling.rs @@ -102,22 +102,21 @@ impl Storage { let sapling_tx_ids = self.sapling_tx_ids_cf(); let mut keys = HashMap::new(); - let last_stored_record: Option<( + tracing::info!("searching backwards for last entry"); + let mut last_stored_record: Option<( SaplingScannedDatabaseIndex, Option, )> = self.db.zs_last_key_value(&sapling_tx_ids); - if last_stored_record.is_none() { - return keys; - } - - let mut last_stored_record_index = last_stored_record - .expect("checked this is `Some` in the code branch above") - .0; loop { + let Some((mut last_stored_record_index, _result)) = last_stored_record else { + return keys; + }; + let sapling_key = last_stored_record_index.sapling_key.clone(); let height = last_stored_record_index.tx_loc.height; + tracing::info!(?sapling_key, ?height, "inserting key and last height"); let prev_height = keys.insert(sapling_key.clone(), height); assert_eq!( prev_height, None, @@ -127,18 +126,15 @@ impl Storage { // Skip all the results until the next key. last_stored_record_index = SaplingScannedDatabaseIndex::min_for_key(&sapling_key); - let Some(entry) = self + tracing::info!( + ?last_stored_record_index, + "searching backwards strictly before" + ); + last_stored_record = self .db - .zs_prev_key_value_strictly_before(&sapling_tx_ids, &last_stored_record_index) - else { - break; - }; - - // Needed annotation. - let _last_result: Option = entry.1; + .zs_prev_key_value_strictly_before(&sapling_tx_ids, &last_stored_record_index); + tracing::info!(?last_stored_record_index, "found last storede"); } - - keys } /// Returns the Sapling indexes and results in the supplied range. diff --git a/zebra-scan/src/storage/db/tests/snapshot.rs b/zebra-scan/src/storage/db/tests/snapshot.rs index a1d1a184fc1..9645c64be0b 100644 --- a/zebra-scan/src/storage/db/tests/snapshot.rs +++ b/zebra-scan/src/storage/db/tests/snapshot.rs @@ -147,8 +147,11 @@ fn snapshot_raw_rocksdb_column_family_data(db: &ScannerDb, original_cf_names: &[ /// Snapshot typed scanner result data using high-level storage methods, /// using `cargo insta` and RON serialization. fn snapshot_typed_result_data(storage: &Storage) { + tracing::info!("about to call sapling_keys_last_heights()"); // Make sure the typed key format doesn't accidentally change. let sapling_keys_last_heights = storage.sapling_keys_last_heights(); + tracing::info!("called sapling_keys_last_heights()"); + // HashMap has an unstable order across Rust releases, so we need to sort it here. insta::assert_ron_snapshot!( "sapling_keys", From 454ca9a9db28ba92fced080277b1a930a7440ecf Mon Sep 17 00:00:00 2001 From: teor Date: Wed, 13 Dec 2023 11:20:45 +1000 Subject: [PATCH 19/21] Update snapshots --- .../storage/db/tests/snapshots/sapling_keys@mainnet_0.snap | 6 +++--- .../storage/db/tests/snapshots/sapling_keys@mainnet_1.snap | 6 +++--- .../storage/db/tests/snapshots/sapling_keys@mainnet_2.snap | 6 +++--- .../db/tests/snapshots/sapling_keys@mainnet_keys.snap | 6 +++--- .../storage/db/tests/snapshots/sapling_keys@testnet_0.snap | 6 +++--- .../storage/db/tests/snapshots/sapling_keys@testnet_1.snap | 6 +++--- .../storage/db/tests/snapshots/sapling_keys@testnet_2.snap | 6 +++--- .../db/tests/snapshots/sapling_keys@testnet_keys.snap | 6 +++--- 8 files changed, 24 insertions(+), 24 deletions(-) diff --git a/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@mainnet_0.snap b/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@mainnet_0.snap index dceb1749c54..1a7b8aefebb 100644 --- a/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@mainnet_0.snap +++ b/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@mainnet_0.snap @@ -1,8 +1,8 @@ --- source: zebra-scan/src/storage/db/tests/snapshot.rs -expression: sapling_keys_and_birthday_heights +expression: sapling_keys_last_heights --- { - "zxviews1q0duytgcqqqqpqre26wkl45gvwwwd706xw608hucmvfalr759ejwf7qshjf5r9aa7323zulvz6plhttp5mltqcgs9t039cx2d09mgq05ts63n8u35hyv6h9nc9ctqqtue2u7cer2mqegunuulq2luhq3ywjcz35yyljewa4mgkgjzyfwh6fr6jd0dzd44ghk0nxdv2hnv4j5nxfwv24rwdmgllhe0p8568sgqt9ckt02v2kxf5ahtql6s0ltjpkckw8gtymxtxuu9gcr0swvz": Height(0), - "zxviewsfake": Height(1000000), + "zxviews1q0duytgcqqqqpqre26wkl45gvwwwd706xw608hucmvfalr759ejwf7qshjf5r9aa7323zulvz6plhttp5mltqcgs9t039cx2d09mgq05ts63n8u35hyv6h9nc9ctqqtue2u7cer2mqegunuulq2luhq3ywjcz35yyljewa4mgkgjzyfwh6fr6jd0dzd44ghk0nxdv2hnv4j5nxfwv24rwdmgllhe0p8568sgqt9ckt02v2kxf5ahtql6s0ltjpkckw8gtymxtxuu9gcr0swvz": Height(419199), + "zxviewsfake": Height(999999), } diff --git a/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@mainnet_1.snap b/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@mainnet_1.snap index dceb1749c54..1a7b8aefebb 100644 --- a/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@mainnet_1.snap +++ b/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@mainnet_1.snap @@ -1,8 +1,8 @@ --- source: zebra-scan/src/storage/db/tests/snapshot.rs -expression: sapling_keys_and_birthday_heights +expression: sapling_keys_last_heights --- { - "zxviews1q0duytgcqqqqpqre26wkl45gvwwwd706xw608hucmvfalr759ejwf7qshjf5r9aa7323zulvz6plhttp5mltqcgs9t039cx2d09mgq05ts63n8u35hyv6h9nc9ctqqtue2u7cer2mqegunuulq2luhq3ywjcz35yyljewa4mgkgjzyfwh6fr6jd0dzd44ghk0nxdv2hnv4j5nxfwv24rwdmgllhe0p8568sgqt9ckt02v2kxf5ahtql6s0ltjpkckw8gtymxtxuu9gcr0swvz": Height(0), - "zxviewsfake": Height(1000000), + "zxviews1q0duytgcqqqqpqre26wkl45gvwwwd706xw608hucmvfalr759ejwf7qshjf5r9aa7323zulvz6plhttp5mltqcgs9t039cx2d09mgq05ts63n8u35hyv6h9nc9ctqqtue2u7cer2mqegunuulq2luhq3ywjcz35yyljewa4mgkgjzyfwh6fr6jd0dzd44ghk0nxdv2hnv4j5nxfwv24rwdmgllhe0p8568sgqt9ckt02v2kxf5ahtql6s0ltjpkckw8gtymxtxuu9gcr0swvz": Height(419199), + "zxviewsfake": Height(999999), } diff --git a/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@mainnet_2.snap b/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@mainnet_2.snap index dceb1749c54..1a7b8aefebb 100644 --- a/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@mainnet_2.snap +++ b/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@mainnet_2.snap @@ -1,8 +1,8 @@ --- source: zebra-scan/src/storage/db/tests/snapshot.rs -expression: sapling_keys_and_birthday_heights +expression: sapling_keys_last_heights --- { - "zxviews1q0duytgcqqqqpqre26wkl45gvwwwd706xw608hucmvfalr759ejwf7qshjf5r9aa7323zulvz6plhttp5mltqcgs9t039cx2d09mgq05ts63n8u35hyv6h9nc9ctqqtue2u7cer2mqegunuulq2luhq3ywjcz35yyljewa4mgkgjzyfwh6fr6jd0dzd44ghk0nxdv2hnv4j5nxfwv24rwdmgllhe0p8568sgqt9ckt02v2kxf5ahtql6s0ltjpkckw8gtymxtxuu9gcr0swvz": Height(0), - "zxviewsfake": Height(1000000), + "zxviews1q0duytgcqqqqpqre26wkl45gvwwwd706xw608hucmvfalr759ejwf7qshjf5r9aa7323zulvz6plhttp5mltqcgs9t039cx2d09mgq05ts63n8u35hyv6h9nc9ctqqtue2u7cer2mqegunuulq2luhq3ywjcz35yyljewa4mgkgjzyfwh6fr6jd0dzd44ghk0nxdv2hnv4j5nxfwv24rwdmgllhe0p8568sgqt9ckt02v2kxf5ahtql6s0ltjpkckw8gtymxtxuu9gcr0swvz": Height(419199), + "zxviewsfake": Height(999999), } diff --git a/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@mainnet_keys.snap b/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@mainnet_keys.snap index a3cee16c226..1a7b8aefebb 100644 --- a/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@mainnet_keys.snap +++ b/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@mainnet_keys.snap @@ -1,8 +1,8 @@ --- source: zebra-scan/src/storage/db/tests/snapshot.rs -expression: sapling_keys_and_birthday_heights +expression: sapling_keys_last_heights --- { - "zxviews1q0duytgcqqqqpqre26wkl45gvwwwd706xw608hucmvfalr759ejwf7qshjf5r9aa7323zulvz6plhttp5mltqcgs9t039cx2d09mgq05ts63n8u35hyv6h9nc9ctqqtue2u7cer2mqegunuulq2luhq3ywjcz35yyljewa4mgkgjzyfwh6fr6jd0dzd44ghk0nxdv2hnv4j5nxfwv24rwdmgllhe0p8568sgqt9ckt02v2kxf5ahtql6s0ltjpkckw8gtymxtxuu9gcr0swvz": Height(419200), - "zxviewsfake": Height(1000000), + "zxviews1q0duytgcqqqqpqre26wkl45gvwwwd706xw608hucmvfalr759ejwf7qshjf5r9aa7323zulvz6plhttp5mltqcgs9t039cx2d09mgq05ts63n8u35hyv6h9nc9ctqqtue2u7cer2mqegunuulq2luhq3ywjcz35yyljewa4mgkgjzyfwh6fr6jd0dzd44ghk0nxdv2hnv4j5nxfwv24rwdmgllhe0p8568sgqt9ckt02v2kxf5ahtql6s0ltjpkckw8gtymxtxuu9gcr0swvz": Height(419199), + "zxviewsfake": Height(999999), } diff --git a/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@testnet_0.snap b/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@testnet_0.snap index dceb1749c54..4fcc5b8d921 100644 --- a/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@testnet_0.snap +++ b/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@testnet_0.snap @@ -1,8 +1,8 @@ --- source: zebra-scan/src/storage/db/tests/snapshot.rs -expression: sapling_keys_and_birthday_heights +expression: sapling_keys_last_heights --- { - "zxviews1q0duytgcqqqqpqre26wkl45gvwwwd706xw608hucmvfalr759ejwf7qshjf5r9aa7323zulvz6plhttp5mltqcgs9t039cx2d09mgq05ts63n8u35hyv6h9nc9ctqqtue2u7cer2mqegunuulq2luhq3ywjcz35yyljewa4mgkgjzyfwh6fr6jd0dzd44ghk0nxdv2hnv4j5nxfwv24rwdmgllhe0p8568sgqt9ckt02v2kxf5ahtql6s0ltjpkckw8gtymxtxuu9gcr0swvz": Height(0), - "zxviewsfake": Height(1000000), + "zxviews1q0duytgcqqqqpqre26wkl45gvwwwd706xw608hucmvfalr759ejwf7qshjf5r9aa7323zulvz6plhttp5mltqcgs9t039cx2d09mgq05ts63n8u35hyv6h9nc9ctqqtue2u7cer2mqegunuulq2luhq3ywjcz35yyljewa4mgkgjzyfwh6fr6jd0dzd44ghk0nxdv2hnv4j5nxfwv24rwdmgllhe0p8568sgqt9ckt02v2kxf5ahtql6s0ltjpkckw8gtymxtxuu9gcr0swvz": Height(279999), + "zxviewsfake": Height(999999), } diff --git a/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@testnet_1.snap b/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@testnet_1.snap index dceb1749c54..4fcc5b8d921 100644 --- a/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@testnet_1.snap +++ b/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@testnet_1.snap @@ -1,8 +1,8 @@ --- source: zebra-scan/src/storage/db/tests/snapshot.rs -expression: sapling_keys_and_birthday_heights +expression: sapling_keys_last_heights --- { - "zxviews1q0duytgcqqqqpqre26wkl45gvwwwd706xw608hucmvfalr759ejwf7qshjf5r9aa7323zulvz6plhttp5mltqcgs9t039cx2d09mgq05ts63n8u35hyv6h9nc9ctqqtue2u7cer2mqegunuulq2luhq3ywjcz35yyljewa4mgkgjzyfwh6fr6jd0dzd44ghk0nxdv2hnv4j5nxfwv24rwdmgllhe0p8568sgqt9ckt02v2kxf5ahtql6s0ltjpkckw8gtymxtxuu9gcr0swvz": Height(0), - "zxviewsfake": Height(1000000), + "zxviews1q0duytgcqqqqpqre26wkl45gvwwwd706xw608hucmvfalr759ejwf7qshjf5r9aa7323zulvz6plhttp5mltqcgs9t039cx2d09mgq05ts63n8u35hyv6h9nc9ctqqtue2u7cer2mqegunuulq2luhq3ywjcz35yyljewa4mgkgjzyfwh6fr6jd0dzd44ghk0nxdv2hnv4j5nxfwv24rwdmgllhe0p8568sgqt9ckt02v2kxf5ahtql6s0ltjpkckw8gtymxtxuu9gcr0swvz": Height(279999), + "zxviewsfake": Height(999999), } diff --git a/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@testnet_2.snap b/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@testnet_2.snap index dceb1749c54..4fcc5b8d921 100644 --- a/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@testnet_2.snap +++ b/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@testnet_2.snap @@ -1,8 +1,8 @@ --- source: zebra-scan/src/storage/db/tests/snapshot.rs -expression: sapling_keys_and_birthday_heights +expression: sapling_keys_last_heights --- { - "zxviews1q0duytgcqqqqpqre26wkl45gvwwwd706xw608hucmvfalr759ejwf7qshjf5r9aa7323zulvz6plhttp5mltqcgs9t039cx2d09mgq05ts63n8u35hyv6h9nc9ctqqtue2u7cer2mqegunuulq2luhq3ywjcz35yyljewa4mgkgjzyfwh6fr6jd0dzd44ghk0nxdv2hnv4j5nxfwv24rwdmgllhe0p8568sgqt9ckt02v2kxf5ahtql6s0ltjpkckw8gtymxtxuu9gcr0swvz": Height(0), - "zxviewsfake": Height(1000000), + "zxviews1q0duytgcqqqqpqre26wkl45gvwwwd706xw608hucmvfalr759ejwf7qshjf5r9aa7323zulvz6plhttp5mltqcgs9t039cx2d09mgq05ts63n8u35hyv6h9nc9ctqqtue2u7cer2mqegunuulq2luhq3ywjcz35yyljewa4mgkgjzyfwh6fr6jd0dzd44ghk0nxdv2hnv4j5nxfwv24rwdmgllhe0p8568sgqt9ckt02v2kxf5ahtql6s0ltjpkckw8gtymxtxuu9gcr0swvz": Height(279999), + "zxviewsfake": Height(999999), } diff --git a/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@testnet_keys.snap b/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@testnet_keys.snap index 786580e52c0..4fcc5b8d921 100644 --- a/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@testnet_keys.snap +++ b/zebra-scan/src/storage/db/tests/snapshots/sapling_keys@testnet_keys.snap @@ -1,8 +1,8 @@ --- source: zebra-scan/src/storage/db/tests/snapshot.rs -expression: sapling_keys_and_birthday_heights +expression: sapling_keys_last_heights --- { - "zxviews1q0duytgcqqqqpqre26wkl45gvwwwd706xw608hucmvfalr759ejwf7qshjf5r9aa7323zulvz6plhttp5mltqcgs9t039cx2d09mgq05ts63n8u35hyv6h9nc9ctqqtue2u7cer2mqegunuulq2luhq3ywjcz35yyljewa4mgkgjzyfwh6fr6jd0dzd44ghk0nxdv2hnv4j5nxfwv24rwdmgllhe0p8568sgqt9ckt02v2kxf5ahtql6s0ltjpkckw8gtymxtxuu9gcr0swvz": Height(280000), - "zxviewsfake": Height(1000000), + "zxviews1q0duytgcqqqqpqre26wkl45gvwwwd706xw608hucmvfalr759ejwf7qshjf5r9aa7323zulvz6plhttp5mltqcgs9t039cx2d09mgq05ts63n8u35hyv6h9nc9ctqqtue2u7cer2mqegunuulq2luhq3ywjcz35yyljewa4mgkgjzyfwh6fr6jd0dzd44ghk0nxdv2hnv4j5nxfwv24rwdmgllhe0p8568sgqt9ckt02v2kxf5ahtql6s0ltjpkckw8gtymxtxuu9gcr0swvz": Height(279999), + "zxviewsfake": Height(999999), } From 371b5581ab4442e2f97ae220f315b73da8b2e55f Mon Sep 17 00:00:00 2001 From: teor Date: Wed, 13 Dec 2023 11:21:36 +1000 Subject: [PATCH 20/21] Remove debugging code --- zebra-scan/src/storage/db/sapling.rs | 7 ------- zebra-scan/src/storage/db/tests/snapshot.rs | 2 -- 2 files changed, 9 deletions(-) diff --git a/zebra-scan/src/storage/db/sapling.rs b/zebra-scan/src/storage/db/sapling.rs index d95ee868ef3..c36d9ad004e 100644 --- a/zebra-scan/src/storage/db/sapling.rs +++ b/zebra-scan/src/storage/db/sapling.rs @@ -102,7 +102,6 @@ impl Storage { let sapling_tx_ids = self.sapling_tx_ids_cf(); let mut keys = HashMap::new(); - tracing::info!("searching backwards for last entry"); let mut last_stored_record: Option<( SaplingScannedDatabaseIndex, Option, @@ -116,7 +115,6 @@ impl Storage { let sapling_key = last_stored_record_index.sapling_key.clone(); let height = last_stored_record_index.tx_loc.height; - tracing::info!(?sapling_key, ?height, "inserting key and last height"); let prev_height = keys.insert(sapling_key.clone(), height); assert_eq!( prev_height, None, @@ -126,14 +124,9 @@ impl Storage { // Skip all the results until the next key. last_stored_record_index = SaplingScannedDatabaseIndex::min_for_key(&sapling_key); - tracing::info!( - ?last_stored_record_index, - "searching backwards strictly before" - ); last_stored_record = self .db .zs_prev_key_value_strictly_before(&sapling_tx_ids, &last_stored_record_index); - tracing::info!(?last_stored_record_index, "found last storede"); } } diff --git a/zebra-scan/src/storage/db/tests/snapshot.rs b/zebra-scan/src/storage/db/tests/snapshot.rs index 9645c64be0b..6abf955e896 100644 --- a/zebra-scan/src/storage/db/tests/snapshot.rs +++ b/zebra-scan/src/storage/db/tests/snapshot.rs @@ -147,10 +147,8 @@ fn snapshot_raw_rocksdb_column_family_data(db: &ScannerDb, original_cf_names: &[ /// Snapshot typed scanner result data using high-level storage methods, /// using `cargo insta` and RON serialization. fn snapshot_typed_result_data(storage: &Storage) { - tracing::info!("about to call sapling_keys_last_heights()"); // Make sure the typed key format doesn't accidentally change. let sapling_keys_last_heights = storage.sapling_keys_last_heights(); - tracing::info!("called sapling_keys_last_heights()"); // HashMap has an unstable order across Rust releases, so we need to sort it here. insta::assert_ron_snapshot!( From 0a5c08628abb412515acc4bd6c3b10ad3be3dc34 Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Wed, 13 Dec 2023 13:41:53 -0300 Subject: [PATCH 21/21] start scanning at min available height --- zebra-scan/src/scan.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/zebra-scan/src/scan.rs b/zebra-scan/src/scan.rs index 3874d86d708..7c4f96b73f9 100644 --- a/zebra-scan/src/scan.rs +++ b/zebra-scan/src/scan.rs @@ -64,8 +64,6 @@ pub async fn start( storage: Storage, ) -> Result<(), Report> { let network = storage.network(); - let mut height = storage.min_sapling_birthday_height(); - // Read keys from the storage on disk, which can block async execution. let key_storage = storage.clone(); let key_heights = tokio::task::spawn_blocking(move || key_storage.sapling_keys_last_heights()) @@ -73,6 +71,8 @@ pub async fn start( .await; let key_heights = Arc::new(key_heights); + let mut height = get_min_height(&key_heights).unwrap_or(storage.min_sapling_birthday_height()); + // Parse and convert keys once, then use them to scan all blocks. // There is some cryptography here, but it should be fast even with thousands of keys. let parsed_keys: HashMap< @@ -396,3 +396,8 @@ fn scanned_block_to_db_result( }) .collect() } + +/// Get the minimal height available in a key_heights map. +fn get_min_height(map: &HashMap) -> Option { + map.values().cloned().min() +}