From c35105d58803ca813c15f887d38b11a6beb1da6c Mon Sep 17 00:00:00 2001 From: bayk Date: Sat, 7 Dec 2024 00:39:46 -0800 Subject: [PATCH 1/6] Add retry logic for requests. Limit headers cache --- chain/src/pibd_params.rs | 18 +- chain/src/txhashset/headers_desegmenter.rs | 3 +- servers/src/mwc/sync/body_sync.rs | 69 ++++++-- servers/src/mwc/sync/header_sync.rs | 111 ++++++++---- servers/src/mwc/sync/state_sync.rs | 151 +++++++++------- servers/src/mwc/sync/sync_utils.rs | 190 +++++++++++++++------ 6 files changed, 372 insertions(+), 170 deletions(-) diff --git a/chain/src/pibd_params.rs b/chain/src/pibd_params.rs index 1dbf418d0..3c5126f37 100644 --- a/chain/src/pibd_params.rs +++ b/chain/src/pibd_params.rs @@ -37,6 +37,8 @@ pub const RANGEPROOF_SEGMENT_HEIGHT_RANGE: Range = 6..9; // ~ 675 b // Here are series for different available resources. Mem and CPU thresholds are allways the same. const HEADERS_HASH_BUFFER_LEN: [usize; 4] = [10, 20, 30, 60]; + +const HEADERS_BUFFER_LEN: [usize; 4] = [50, 100, 250, 400]; const BITMAPS_BUFFER_LEN: [usize; 4] = [10, 20, 30, 40]; const OUTPUTS_BUFFER_LEN: [usize; 4] = [7, 15, 30, 40]; @@ -52,6 +54,9 @@ const SEGMENTS_REQUEST_LIMIT: [usize; 4] = [20, 40, 80, 120]; /// deciding the segment isn't going to arrive. The syncer will then re-request the segment pub const SEGMENT_REQUEST_TIMEOUT_SECS: i64 = 60; +/// Default expected response time for a new peer. Units: ms +pub const SEGMENT_DEFAULT_RETRY_MS: i64 = 10000; // retry request after 10 seconds by default + struct SysMemoryInfo { available_memory_mb: u64, update_time: DateTime, @@ -152,6 +157,15 @@ impl PibdParams { ) } + /// Buffer size for headers + pub fn get_headers_buffer_len(&self) -> usize { + Self::calc_mem_adequate_val2( + &HEADERS_BUFFER_LEN, + self.get_available_memory_mb(), + self.cpu_num, + ) + } + /// Buffer size for output bitmaps pub fn get_bitmaps_buffer_len(&self) -> usize { Self::calc_mem_adequate_val2( @@ -220,8 +234,8 @@ impl PibdParams { pub fn get_segments_request_per_peer(&self) -> usize { match self.cpu_num { 1 => 2, - 2 => 4, - _ => 6, + 2 => 3, + _ => 4, } } diff --git a/chain/src/txhashset/headers_desegmenter.rs b/chain/src/txhashset/headers_desegmenter.rs index 7d9785b2a..a9441f9af 100644 --- a/chain/src/txhashset/headers_desegmenter.rs +++ b/chain/src/txhashset/headers_desegmenter.rs @@ -279,13 +279,14 @@ impl HeadersRecieveCache { headers: &HeaderHashesDesegmenter, elements: usize, requested_hashes: &HashMap, + headers_cache_size_limit: usize, ) -> Result, Error> { let mut return_vec = vec![]; let tip = self.chain.header_head()?; let base_hash_idx = tip.height / HEADERS_PER_BATCH as u64; // Still limiting by 1000 because of memory. Cache is limited, we better wait if theer are so many behind... let max_idx = cmp::min( - base_hash_idx + 1000, + base_hash_idx + headers_cache_size_limit as u64, self.archive_header_height / HEADERS_PER_BATCH as u64, ); diff --git a/servers/src/mwc/sync/body_sync.rs b/servers/src/mwc/sync/body_sync.rs index b48be602d..1db3dc626 100644 --- a/servers/src/mwc/sync/body_sync.rs +++ b/servers/src/mwc/sync/body_sync.rs @@ -30,7 +30,7 @@ use std::sync::Arc; pub struct BodySync { chain: Arc, required_capabilities: Capabilities, - request_tracker: RequestTracker, + request_tracker: RequestTracker, request_series: Vec<(Hash, u64)>, // Hash, height pibd_params: Arc, } @@ -53,7 +53,7 @@ impl BodySync { // Expected that it is called ONLY when state_sync is done pub fn request( &mut self, - peers: &Arc, + in_peers: &Arc, sync_state: &SyncState, sync_peers: &mut SyncPeers, best_height: u64, @@ -115,28 +115,58 @@ impl BodySync { }; self.required_capabilities = required_capabilities; - let (peers, excluded_requests) = sync_utils::get_sync_peers( - peers, + let (peers, excluded_requests, excluded_peers) = sync_utils::get_sync_peers( + in_peers, self.pibd_params.get_blocks_request_per_peer(), peer_capabilities, head.height, self.request_tracker.get_requests_num(), - &self.request_tracker.get_peers_queue_size(), + &self.request_tracker.get_peers_track_data(), ); if peers.is_empty() { - return Ok(SyncResponse::new( - SyncRequestResponses::WaitingForPeers, - self.get_peer_capabilities(), - format!( - "No available peers, waiting Q size: {}", - self.request_tracker.get_requests_num() - ), - )); + if excluded_peers == 0 { + return Ok(SyncResponse::new( + SyncRequestResponses::WaitingForPeers, + self.get_peer_capabilities(), + format!( + "No available peers, waiting Q size: {}", + self.request_tracker.get_requests_num() + ), + )); + } else { + return Ok(SyncResponse::new( + SyncRequestResponses::Syncing, + self.get_peer_capabilities(), + format!( + "Peers: {} Waiting Q size: {}", + peers.len() + excluded_peers as usize, + self.request_tracker.get_requests_num() + ), + )); + } } // requested_blocks, check for expiration - self.request_tracker - .retain_expired(pibd_params::SEGMENT_REQUEST_TIMEOUT_SECS, sync_peers); + self.request_tracker.retain_expired( + pibd_params::SEGMENT_REQUEST_TIMEOUT_SECS, + sync_peers, + |peer, request| { + debug!( + "Making retry send_block_request({}) call for peer {:?}", + request, peer + ); + if let Some(peer) = in_peers.get_connected_peer(peer) { + match peer.send_block_request(request.clone(), chain::Options::SYNC) { + Ok(_) => return true, + Err(e) => error!( + "Unable to retry send_block_request({}) for peer {:?}. Error: {}", + request, peer, e + ), + } + } + false + }, + ); sync_state.update(SyncStatus::BodySync { archive_height: if self.chain.archive_mode() { @@ -155,6 +185,7 @@ impl BodySync { let mut need_request = self.request_tracker.calculate_needed_requests( peers.len(), excluded_requests as usize, + excluded_peers as usize, self.pibd_params.get_blocks_request_per_peer(), self.pibd_params.get_blocks_request_limit(), ); @@ -227,7 +258,7 @@ impl BodySync { self.get_peer_capabilities(), format!( "Peers: {} Waiting Q size: {}", - peers.len(), + peers.len() + excluded_peers as usize, self.request_tracker.get_requests_num() ), )); @@ -259,19 +290,20 @@ impl BodySync { // let's request next package since we get this one... if self.request_tracker.get_update_requests_to_next_ask() == 0 { if let Ok(head) = self.chain.head() { - let (peers, excluded_requests) = sync_utils::get_sync_peers( + let (peers, excluded_requests, excluded_peers) = sync_utils::get_sync_peers( peers, self.pibd_params.get_blocks_request_per_peer(), self.required_capabilities, head.height, self.request_tracker.get_requests_num(), - &self.request_tracker.get_peers_queue_size(), + &self.request_tracker.get_peers_track_data(), ); if !peers.is_empty() { // requested_blocks, check for expiration let mut need_request = self.request_tracker.calculate_needed_requests( peers.len(), excluded_requests as usize, + excluded_peers as usize, self.pibd_params.get_blocks_request_per_peer(), self.pibd_params.get_blocks_request_limit(), ); @@ -325,6 +357,7 @@ impl BodySync { hash.clone(), peer.info.addr.clone(), format!("Block {}, {}", hash, height), + hash.clone(), ); *need_request -= 1; if *need_request == 0 { diff --git a/servers/src/mwc/sync/header_sync.rs b/servers/src/mwc/sync/header_sync.rs index a09c0cbee..01446635d 100644 --- a/servers/src/mwc/sync/header_sync.rs +++ b/servers/src/mwc/sync/header_sync.rs @@ -40,7 +40,7 @@ pub struct HeaderSync { chain: Arc, received_cache: Option, // requested_heights is expected to be at response height, the next tothe requested - request_tracker: RequestTracker, + request_tracker: RequestTracker>, // Vec - locator data for headers request cached_response: Option>, headers_series_cache: HashMap<(PeerAddr, Hash), (Vec, DateTime)>, pibd_params: Arc, @@ -92,8 +92,26 @@ impl HeaderSync { return resp; } - self.request_tracker - .retain_expired(pibd_params::SEGMENT_REQUEST_TIMEOUT_SECS, sync_peers); + self.request_tracker.retain_expired( + pibd_params::SEGMENT_REQUEST_TIMEOUT_SECS, + sync_peers, + |peer, request| { + debug!( + "Making retry send_header_request({:?}) call for peer {:?}", + request, peer + ); + if let Some(peer) = peers.get_connected_peer(peer) { + match peer.send_header_request(request.clone()) { + Ok(_) => return true, + Err(e) => error!( + "Unable to retry send_header_request({:?}) for peer {:?}. Error: {}", + request, peer, e + ), + } + } + false + }, + ); // it is initial statis flag if !header_hashes.is_pibd_headers_are_loaded() { @@ -130,23 +148,35 @@ impl HeaderSync { "Chain is corrupted, please clean up the data manually and restart the node", ) { // Requesting multiple headers - let (peers, excluded_requests) = sync_utils::get_sync_peers( + let (peers, excluded_requests, excluded_peers) = sync_utils::get_sync_peers( peers, self.pibd_params.get_segments_request_per_peer(), Capabilities::HEADER_HIST, header_hashes.get_target_archive_height(), self.request_tracker.get_requests_num(), - &self.request_tracker.get_peers_queue_size(), + &self.request_tracker.get_peers_track_data(), ); if peers.is_empty() { - return SyncResponse::new( - SyncRequestResponses::WaitingForPeers, - Self::get_peer_capabilities(), - format!( - "No peers are available, requests waiting: {}", - self.request_tracker.get_requests_num() - ), - ); + if excluded_peers == 0 { + return SyncResponse::new( + SyncRequestResponses::WaitingForPeers, + Self::get_peer_capabilities(), + format!( + "No peers are available, requests waiting: {}", + self.request_tracker.get_requests_num() + ), + ); + } else { + return SyncResponse::new( + SyncRequestResponses::Syncing, + Self::get_peer_capabilities(), + format!( + "Has peers {}, requests waiting: {}", + excluded_peers, + self.request_tracker.get_requests_num() + ), + ); + } } sync_state.update(SyncStatus::HeaderSync { @@ -157,12 +187,14 @@ impl HeaderSync { let need_request = self.request_tracker.calculate_needed_requests( peers.len(), excluded_requests as usize, + excluded_peers as usize, self.pibd_params.get_segments_request_per_peer(), self.pibd_params.get_segments_requests_limit(), ); if need_request > 0 { let hashes = received_cache.next_desired_headers(headers_hash_desegmenter, - need_request, self.request_tracker.get_requested()) + need_request, self.request_tracker.get_requested(), + self.pibd_params.get_headers_buffer_len()) .expect("Chain is corrupted, please clean up the data manually and restart the node"); let mut rng = rand::thread_rng(); @@ -173,11 +205,12 @@ impl HeaderSync { .expect("Internal error. peers are empty"); match self.request_headers_for_hash(hash.clone(), height, peer.clone()) { - Ok(_) => { + Ok(locator) => { self.request_tracker.register_request( hash, peer.info.addr.clone(), format!("Header {}, {}", hash, height), + locator, ); } Err(e) => { @@ -193,7 +226,7 @@ impl HeaderSync { Self::get_peer_capabilities(), format!( "Loading headers below horizon. Has peers: {} Requests in waiting Q: {}", - peers.len(), + peers.len() + excluded_peers as usize, self.request_tracker.get_requests_num() ), ); @@ -249,11 +282,12 @@ impl HeaderSync { } match self.request_headers(header_head, sync_peer.clone()) { - Ok(_) => { + Ok(locator) => { self.request_tracker.register_request( header_head_hash, sync_peer.info.addr.clone(), format!("Tail header for {}", header_head.height), + locator, ); } Err(e) => { @@ -346,24 +380,26 @@ impl HeaderSync { if headers_hash_desegmenter.is_complete() { // Requesting multiple headers - let (peers, excluded_requests) = sync_utils::get_sync_peers( - peers, - self.pibd_params.get_segments_request_per_peer(), - Capabilities::HEADER_HIST, - headers_hash_desegmenter.get_target_height(), - self.request_tracker.get_requests_num(), - &self.request_tracker.get_peers_queue_size(), - ); + let (peers, excluded_requests, excluded_peers) = + sync_utils::get_sync_peers( + peers, + self.pibd_params.get_segments_request_per_peer(), + Capabilities::HEADER_HIST, + headers_hash_desegmenter.get_target_height(), + self.request_tracker.get_requests_num(), + &self.request_tracker.get_peers_track_data(), + ); if !peers.is_empty() { let need_request = self.request_tracker.calculate_needed_requests( peers.len(), excluded_requests as usize, + excluded_peers as usize, self.pibd_params.get_segments_request_per_peer(), self.pibd_params.get_segments_requests_limit(), ); if need_request > 0 { - let hashes = received_cache.next_desired_headers(headers_hash_desegmenter, need_request, self.request_tracker.get_requested()) + let hashes = received_cache.next_desired_headers(headers_hash_desegmenter, need_request, self.request_tracker.get_requested(), self.pibd_params.get_headers_buffer_len()) .expect("Chain is corrupted, please clean up the data manually and restart the node"); let mut rng = rand::thread_rng(); @@ -378,11 +414,12 @@ impl HeaderSync { height, peer.clone(), ) { - Ok(_) => { + Ok(locator) => { self.request_tracker.register_request( hash, peer.info.addr.clone(), format!("Header {}, {}", hash, height), + locator, ); } Err(e) => { @@ -463,11 +500,12 @@ impl HeaderSync { if !self.request_tracker.has_request(&sync_head.last_block_h) { if let Some(sync_peer) = Self::choose_sync_peer(peers) { match self.request_headers(sync_head, sync_peer.clone()) { - Ok(_) => { + Ok(locator) => { self.request_tracker.register_request( sync_head.last_block_h, sync_peer.info.addr.clone(), format!("Tail headers for {}", sync_head.height), + locator, ); sync_peers.report_ok_response(peer); } @@ -512,7 +550,11 @@ impl HeaderSync { } /// Request some block headers from a peer to advance us. - fn request_headers(&self, sync_head: chain::Tip, peer: Arc) -> Result<(), chain::Error> { + fn request_headers( + &self, + sync_head: chain::Tip, + peer: Arc, + ) -> Result, chain::Error> { let locator = self .get_locator(sync_head) .map_err(|e| chain::Error::Other(format!("{}", e)))?; @@ -520,9 +562,9 @@ impl HeaderSync { "sync: request_headers: asking {} for headers at {}", peer.info.addr, sync_head.height ); - peer.send_header_request(locator) + peer.send_header_request(locator.clone()) .map_err(|e| chain::Error::Other(format!("{}", e)))?; - Ok(()) + Ok(locator) } fn request_headers_for_hash( @@ -530,14 +572,15 @@ impl HeaderSync { header_hash: Hash, height: u64, peer: Arc, - ) -> Result<(), chain::Error> { + ) -> Result, chain::Error> { debug!( "sync: request_headers: asking {} for headers at hash {}, height {}", peer.info.addr, header_hash, height ); - peer.send_header_request(vec![header_hash]) + let locator: Vec = vec![header_hash]; + peer.send_header_request(locator.clone()) .map_err(|e| chain::Error::Other(format!("{}", e)))?; - Ok(()) + Ok(locator) } /// We build a locator based on sync_head. diff --git a/servers/src/mwc/sync/state_sync.rs b/servers/src/mwc/sync/state_sync.rs index d8a5c6a1c..cb66752e3 100644 --- a/servers/src/mwc/sync/state_sync.rs +++ b/servers/src/mwc/sync/state_sync.rs @@ -25,8 +25,8 @@ use mwc_chain::pibd_params::PibdParams; use mwc_chain::txhashset::{BitmapChunk, Desegmenter}; use mwc_chain::Chain; use mwc_core::core::hash::Hash; -use mwc_core::core::{OutputIdentifier, Segment, TxKernel}; -use mwc_p2p::PeerAddr; +use mwc_core::core::{OutputIdentifier, Segment, SegmentTypeIdentifier, TxKernel}; +use mwc_p2p::{Error, PeerAddr}; use mwc_util::secp::pedersen::RangeProof; use rand::seq::SliceRandom; use std::collections::{HashMap, HashSet}; @@ -48,7 +48,7 @@ pub struct StateSync { responded_root_hash: HashMap)>, responded_with_another_height: HashSet, // sync for segments - request_tracker: RequestTracker<(SegmentType, u64)>, + request_tracker: RequestTracker<(SegmentType, u64), (SegmentTypeIdentifier, Hash)>, is_complete: bool, pibd_params: Arc, } @@ -74,7 +74,7 @@ impl StateSync { pub fn request( &mut self, - peers: &Arc, + in_peers: &Arc, sync_state: Arc, sync_peers: &mut SyncPeers, stop_state: Arc, @@ -147,23 +147,35 @@ impl StateSync { }; // Requesting root_hash... - let (peers, excluded_requests) = sync_utils::get_sync_peers( - peers, + let (peers, excluded_requests, excluded_peers) = sync_utils::get_sync_peers( + in_peers, self.pibd_params.get_segments_request_per_peer(), Capabilities::PIBD_HIST, self.target_archive_height, self.request_tracker.get_requests_num(), - &self.request_tracker.get_peers_queue_size(), + &self.request_tracker.get_peers_track_data(), ); if peers.is_empty() { - return SyncResponse::new( - SyncRequestResponses::WaitingForPeers, - Self::get_peer_capabilities(), - format!( - "No peers to make requests. Waiting Q size: {}", - self.request_tracker.get_requests_num() - ), - ); + if excluded_peers == 0 { + return SyncResponse::new( + SyncRequestResponses::WaitingForPeers, + Self::get_peer_capabilities(), + format!( + "No peers to make requests. Waiting Q size: {}", + self.request_tracker.get_requests_num() + ), + ); + } else { + return SyncResponse::new( + SyncRequestResponses::Syncing, + Self::get_peer_capabilities(), + format!( + "Has peers: {} Requests in waiting Q: {}", + excluded_peers, + self.request_tracker.get_requests_num() + ), + ); + } } let now = Utc::now(); @@ -253,7 +265,7 @@ impl StateSync { Self::get_peer_capabilities(), format!( "Waiting for PIBD root. Hash peers: {} Get respoinses {} from {}", - peers.len(), + peers.len() + excluded_peers as usize, self.responded_root_hash.len(), self.requested_root_hash.len() ), @@ -295,7 +307,7 @@ impl StateSync { return SyncResponse::new( SyncRequestResponses::StatePibdReady, Capabilities::UNKNOWN, - "PIBD download and valiadion is done with success!".into(), + "PIBD download and validaion is done with success!".into(), ); } Err(e) => { @@ -318,8 +330,27 @@ impl StateSync { debug_assert!(!desegmenter.is_complete()); - self.request_tracker - .retain_expired(pibd_params::SEGMENT_REQUEST_TIMEOUT_SECS, sync_peers); + self.request_tracker.retain_expired( + pibd_params::SEGMENT_REQUEST_TIMEOUT_SECS, + sync_peers, + |peer, (segment, target_archive_hash)| { + debug!( + "Making request retry for segment {:?}, peer {:?}", + segment, peer + ); + if let Some(peer) = in_peers.get_connected_peer(peer) { + match Self::send_request(&peer, &segment, &target_archive_hash) { + Ok(_) => return true, + Err(e) => error!( + "Unable to retry request for segment {:?}, peer {:?}. Error: {}", + segment, peer, e + ), + } + } + false + }, + ); + sync_state.update(desegmenter.get_pibd_progress()); let mut rng = rand::thread_rng(); @@ -367,6 +398,7 @@ impl StateSync { let need_request = self.request_tracker.calculate_needed_requests( root_hash_peers.len(), excluded_requests as usize, + excluded_peers as usize, self.pibd_params.get_segments_request_per_peer(), self.pibd_params.get_segments_requests_limit(), ); @@ -383,24 +415,7 @@ impl StateSync { .choose(&mut rng) .expect("peers is not empty"); - let send_res = match seg.segment_type { - SegmentType::Bitmap => peer.send_bitmap_segment_request( - self.target_archive_hash.clone(), - seg.identifier, - ), - SegmentType::Output => peer.send_output_segment_request( - self.target_archive_hash.clone(), - seg.identifier, - ), - SegmentType::RangeProof => peer.send_rangeproof_segment_request( - self.target_archive_hash.clone(), - seg.identifier, - ), - SegmentType::Kernel => peer.send_kernel_segment_request( - self.target_archive_hash.clone(), - seg.identifier, - ), - }; + let send_res = Self::send_request(peer, &seg, &self.target_archive_hash); match send_res { Ok(_) => { let msg = format!("{:?}", key); @@ -408,6 +423,7 @@ impl StateSync { key, peer.info.addr.clone(), msg, + (seg.clone(), self.target_archive_hash.clone()), ); } Err(e) => { @@ -425,7 +441,7 @@ impl StateSync { Self::get_peer_capabilities(), format!( "Has peers: {} Requests in waiting Q: {}", - root_hash_peers.len(), + root_hash_peers.len() + excluded_peers as usize, self.request_tracker.get_requests_num() ), ); @@ -448,7 +464,7 @@ impl StateSync { Self::get_peer_capabilities(), format!( "Has peers {}, Requests in waiting Q: {}", - root_hash_peers.len(), + root_hash_peers.len() + excluded_peers as usize, self.request_tracker.get_requests_num() ), ); @@ -546,13 +562,13 @@ impl StateSync { if let Some(peer_addr) = self.request_tracker.remove_request(key) { if peer_addr == *peer { if self.request_tracker.get_update_requests_to_next_ask() == 0 { - let (peers, excluded_requests) = sync_utils::get_sync_peers( + let (peers, excluded_requests, excluded_peers) = sync_utils::get_sync_peers( peers, self.pibd_params.get_segments_request_per_peer(), Capabilities::PIBD_HIST, self.target_archive_height, self.request_tracker.get_requests_num(), - &self.request_tracker.get_peers_queue_size(), + &self.request_tracker.get_peers_track_data(), ); if peers.is_empty() { return; @@ -586,6 +602,7 @@ impl StateSync { let need_request = self.request_tracker.calculate_needed_requests( root_hash_peers.len(), excluded_requests as usize, + excluded_peers as usize, self.pibd_params.get_segments_request_per_peer(), self.pibd_params.get_segments_requests_limit(), ); @@ -606,28 +623,11 @@ impl StateSync { .choose(&mut rng) .expect("peers is not empty"); - let send_res = match seg.segment_type { - SegmentType::Bitmap => peer - .send_bitmap_segment_request( - self.target_archive_hash.clone(), - seg.identifier, - ), - SegmentType::Output => peer - .send_output_segment_request( - self.target_archive_hash.clone(), - seg.identifier, - ), - SegmentType::RangeProof => peer - .send_rangeproof_segment_request( - self.target_archive_hash.clone(), - seg.identifier, - ), - SegmentType::Kernel => peer - .send_kernel_segment_request( - self.target_archive_hash.clone(), - seg.identifier, - ), - }; + let send_res = Self::send_request( + peer, + &seg, + &self.target_archive_hash, + ); match send_res { Ok(_) => { let msg = format!("{:?}", key); @@ -635,6 +635,10 @@ impl StateSync { key, peer.info.addr.clone(), msg, + ( + seg.clone(), + self.target_archive_hash.clone(), + ), ); } Err(e) => { @@ -817,4 +821,25 @@ impl StateSync { self.track_and_request_more_segments(&key, peer, peers, sync_peers); } + + fn send_request( + peer: &Arc, + segment: &SegmentTypeIdentifier, + target_archive_hash: &Hash, + ) -> Result<(), Error> { + let send_res = match segment.segment_type { + SegmentType::Bitmap => { + peer.send_bitmap_segment_request(target_archive_hash.clone(), segment.identifier) + } + SegmentType::Output => { + peer.send_output_segment_request(target_archive_hash.clone(), segment.identifier) + } + SegmentType::RangeProof => peer + .send_rangeproof_segment_request(target_archive_hash.clone(), segment.identifier), + SegmentType::Kernel => { + peer.send_kernel_segment_request(target_archive_hash.clone(), segment.identifier) + } + }; + send_res + } } diff --git a/servers/src/mwc/sync/sync_utils.rs b/servers/src/mwc/sync/sync_utils.rs index 491df255f..20ef34c3b 100644 --- a/servers/src/mwc/sync/sync_utils.rs +++ b/servers/src/mwc/sync/sync_utils.rs @@ -17,10 +17,10 @@ use crate::mwc::sync::sync_peers::SyncPeers; use chrono::{DateTime, Duration, Utc}; -use mwc_chain::Chain; +use mwc_chain::{pibd_params, Chain}; use mwc_p2p::{Capabilities, Peer, PeerAddr, Peers}; use std::cmp; -use std::collections::HashMap; +use std::collections::{HashMap, VecDeque}; use std::sync::Arc; #[derive(Clone, Debug, PartialEq)] @@ -81,61 +81,145 @@ impl CachedResponse { } } +pub struct PeerTrackData { + requests: u32, + response_time: VecDeque, + response_time_sum: i64, +} + +impl PeerTrackData { + fn new(requests: u32) -> Self { + PeerTrackData { + requests, + response_time: VecDeque::new(), // units: ms + response_time_sum: 0, + } + } + + fn get_response_time(&self) -> i64 { + if self.response_time.is_empty() { + pibd_params::SEGMENT_DEFAULT_RETRY_MS + } else { + self.response_time_sum / self.response_time.len() as i64 + } + } + + fn report_response(&mut self, response_latency: Duration) { + self.requests = self.requests.saturating_sub(1); + let response_latency = response_latency.num_milliseconds(); + self.response_time_sum += response_latency; + self.response_time.push_back(response_latency); + if self.response_time.len() > 10 { + self.response_time_sum -= self + .response_time + .pop_front() + .expect("response_time not empty"); + } + } +} + +pub struct RequestData { + peer: PeerAddr, + request_time: DateTime, + retry_time: DateTime, + request_message: String, // for logging and debugging + request_data: V, // data enough to retry the same request +} + +impl RequestData { + fn new(peer: PeerAddr, request_message: String, request_data: V) -> Self { + let now = Utc::now(); + RequestData { + peer, + request_time: now.clone(), + retry_time: now, + request_message, + request_data, + } + } +} + /// Utility class or tracking requests. Here we put common request related functionality /// Idea behind that is to make sync tolerate stale peer. We don't want to wait slow peer for full timeout, /// instead we want to utilize more faster peers. Also, we don't want superfast peer to take more /// traffic. In other words, we don't want peers be able to manipulate traffic shceduler. -pub struct RequestTracker +pub struct RequestTracker where K: std::cmp::Eq + std::hash::Hash, { - requested_hashes: HashMap, String)>, // Values: peer, time, message - peers_queue_size: HashMap, // there are so many peers and many requests, so we better to hande 'slow' peer cases + requested: HashMap>, // Values: peer, time, message + peers_stats: HashMap, // there are so many peers and many requests, so we better to hande 'slow' peer cases requests_to_next_ask: usize, } -impl RequestTracker +impl RequestTracker where K: std::cmp::Eq + std::hash::Hash, { pub fn new() -> Self { RequestTracker { - requested_hashes: HashMap::new(), - peers_queue_size: HashMap::new(), + requested: HashMap::new(), + peers_stats: HashMap::new(), requests_to_next_ask: 0, } } - pub fn retain_expired( + pub fn retain_expired( &mut self, expiration_time_interval_sec: i64, sync_peers: &mut SyncPeers, - ) { - let requested_hashes = &mut self.requested_hashes; - let peers_queue_size = &mut self.peers_queue_size; + retry_callback: F, + ) where + // Callback function that suppose to retry request to the peer. Return true if peer was alive and retry was sent. + F: Fn(&PeerAddr, &V) -> bool, + { + let requested = &mut self.requested; + let peers_stats = &mut self.peers_stats; let now = Utc::now(); // first let's clean up stale requests... - requested_hashes.retain(|_, (peer, req_time, message)| { - if (now - *req_time).num_seconds() > expiration_time_interval_sec { - sync_peers.report_no_response(peer, message.clone()); - if let Some(n) = peers_queue_size.get_mut(peer) { - *n = n.saturating_sub(1); + requested.retain(|_, request_data| { + let peer_stat = peers_stats.get_mut(&request_data.peer); + if (now - request_data.request_time).num_seconds() > expiration_time_interval_sec { + sync_peers + .report_no_response(&request_data.peer, request_data.request_message.clone()); + if let Some(n) = peer_stat { + n.requests = n.requests.saturating_sub(1); } return false; } + // check we want to retry + let retry_ms = match peer_stat.as_ref() { + Some(ps) => ps.get_response_time() * 2, + None => pibd_params::SEGMENT_DEFAULT_RETRY_MS * 2, + }; + if (now - request_data.retry_time).num_milliseconds() > retry_ms { + if !retry_callback(&request_data.peer, &request_data.request_data) { + // retry failed, so the peer is offline. + sync_peers.report_no_response( + &request_data.peer, + request_data.request_message.clone(), + ); + if let Some(n) = peer_stat { + n.requests = n.requests.saturating_sub(1); + } + return false; + } + // retry was sent, we are good... + request_data.retry_time = now; + } true }); } pub fn clear(&mut self) { - self.requested_hashes.clear(); - self.peers_queue_size.clear(); + self.requested.clear(); + self.peers_stats.clear(); self.requests_to_next_ask = 0; } - pub fn get_requested(&self) -> &HashMap, String)> { - &self.requested_hashes + pub fn get_requested(&self) -> &HashMap> { + &self.requested } /// Calculate how many new requests we can make to the peers. This call updates requests_to_next_ask @@ -143,24 +227,22 @@ where &mut self, peer_num: usize, excluded_requests: usize, + _excluded_peers: usize, request_per_peer: usize, requests_limit: usize, ) -> usize { - let requests_in_queue = self - .requested_hashes - .len() - .saturating_sub(excluded_requests); + let requests_in_queue = self.requested.len().saturating_sub(excluded_requests); let expected_total_request = cmp::min(peer_num * request_per_peer, requests_limit); - self.requests_to_next_ask = expected_total_request / 5; + self.requests_to_next_ask = (expected_total_request + excluded_requests) / 5; expected_total_request.saturating_sub(requests_in_queue) } pub fn get_requests_num(&self) -> usize { - self.requested_hashes.len() + self.requested.len() } pub fn has_request(&self, req: &K) -> bool { - self.requested_hashes.contains_key(req) + self.requested.contains_key(req) } pub fn get_update_requests_to_next_ask(&mut self) -> usize { @@ -168,37 +250,37 @@ where self.requests_to_next_ask } - pub fn get_peers_queue_size(&self) -> &HashMap { - &self.peers_queue_size + pub fn get_peers_track_data(&self) -> &HashMap { + &self.peers_stats } - pub fn register_request(&mut self, key: K, peer: PeerAddr, message: String) { - match self.peers_queue_size.get_mut(&peer) { + pub fn register_request(&mut self, key: K, peer: PeerAddr, message: String, request_data: V) { + match self.peers_stats.get_mut(&peer) { Some(n) => { - *n = n.saturating_add(1); + n.requests += 1; } None => { - self.peers_queue_size.insert(peer.clone(), 1); + self.peers_stats.insert(peer.clone(), PeerTrackData::new(1)); } } - self.requested_hashes - .insert(key, (peer, Utc::now(), message)); + self.requested + .insert(key, RequestData::new(peer, message, request_data)); } pub fn remove_request(&mut self, key: &K) -> Option { - if let Some((peer, _time, _message)) = self.requested_hashes.remove(key) { - if let Some(n) = self.peers_queue_size.get_mut(&peer) { - *n = n.saturating_sub(1); + if let Some(request_data) = self.requested.remove(key) { + if let Some(n) = self.peers_stats.get_mut(&request_data.peer) { + n.report_response(Utc::now() - request_data.request_time); } - Some(peer) + Some(request_data.peer) } else { None } } pub fn get_expected_peer(&self, key: &K) -> Option { - if let Some((peer, _time, _message)) = self.requested_hashes.get(key) { - Some(peer.clone()) + if let Some(req_data) = self.requested.get(key) { + Some(req_data.peer.clone()) } else { None } @@ -231,13 +313,14 @@ pub fn get_sync_peers( capabilities: Capabilities, min_height: u64, total_queue_requests: usize, - peers_queue_size: &HashMap, -) -> (Vec>, u32) { + peers_queue_size: &HashMap, +) -> (Vec>, u32, u32) { // Excluding peers with totally full Q - let peer_requests_limit = (expected_requests_per_peer * 2) as u32; + let peer_requests_limit = expected_requests_per_peer as u32; let mut res: Vec> = Vec::new(); // for excluded we nned to cover offline prrs as well. That is why we are counting back let mut excluded_requests: usize = total_queue_requests; + let mut excluded_peers = 0; let mut found_outbound = false; for peer in peers .iter() @@ -247,10 +330,11 @@ pub fn get_sync_peers( .with_min_height(min_height) { found_outbound = true; - if let Some(sz) = peers_queue_size.get(&peer.info.addr) { - if *sz < peer_requests_limit { - excluded_requests = excluded_requests.saturating_sub(*sz as usize); + if let Some(track_data) = peers_queue_size.get(&peer.info.addr) { + if track_data.requests < peer_requests_limit { + excluded_requests = excluded_requests.saturating_sub(track_data.requests as usize); } else { + excluded_peers += 1; continue; } } @@ -265,15 +349,17 @@ pub fn get_sync_peers( .inbound() .with_min_height(min_height) { - if let Some(sz) = peers_queue_size.get(&peer.info.addr) { - if *sz < peer_requests_limit { - excluded_requests = excluded_requests.saturating_sub(*sz as usize); + if let Some(track_data) = peers_queue_size.get(&peer.info.addr) { + if track_data.requests < peer_requests_limit { + excluded_requests = + excluded_requests.saturating_sub(track_data.requests as usize); } else { + excluded_peers += 1; continue; } } res.push(peer); } } - (res, excluded_requests as u32) + (res, excluded_requests as u32, excluded_peers) } From 599974804004a4600fafcccf696c345d885449b7 Mon Sep 17 00:00:00 2001 From: bayk Date: Sat, 7 Dec 2024 00:51:39 -0800 Subject: [PATCH 2/6] Remove adding headers is large bulks. --- chain/src/txhashset/headers_desegmenter.rs | 34 +++------------------- 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/chain/src/txhashset/headers_desegmenter.rs b/chain/src/txhashset/headers_desegmenter.rs index a9441f9af..affb30ed6 100644 --- a/chain/src/txhashset/headers_desegmenter.rs +++ b/chain/src/txhashset/headers_desegmenter.rs @@ -379,8 +379,6 @@ impl HeadersRecieveCache { .insert(first_header.height, (bhs, peer_info)); // Apply data from cache if possible - let mut headers_all: Vec = Vec::new(); - let mut headers_by_peer: Vec<(Vec, T)> = Vec::new(); let tip = self .chain .header_head() @@ -401,37 +399,13 @@ impl HeadersRecieveCache { if *height > tip_height + 1 { break; } - let (_, (mut bhs, peer)) = self.main_headers_cache.pop_first().unwrap(); + let (_, (bhs, peer)) = self.main_headers_cache.pop_first().unwrap(); tip_height = bhs.last().expect("bhs can't be empty").height; - headers_by_peer.push((bhs.clone(), peer)); - headers_all.append(&mut bhs); - } - - if !headers_all.is_empty() { - match self - .chain - .sync_block_headers(&headers_all, tip, Options::NONE) - { + // Adding headers into the blockchian. Adding by 512 is optimal, DB not design to add large number of headers + match self.chain.sync_block_headers(&bhs, tip, Options::NONE) { Ok(_) => {} - Err(e) => { - warn!( - "add_headers in bulk is failed, will add one by one. Error: {}", - e - ); - // apply one by one - for (hdr, peer) in headers_by_peer { - let tip = self - .chain - .header_head() - .expect("Header head must be always defined"); - - match self.chain.sync_block_headers(&hdr, tip, Options::NONE) { - Ok(_) => {} - Err(e) => return Err((peer, e)), - } - } - } + Err(e) => return Err((peer, e)), } } From fd25d7ba4ec2436f099e9af0b6226552ad0df20a Mon Sep 17 00:00:00 2001 From: bayk Date: Sat, 7 Dec 2024 01:34:27 -0800 Subject: [PATCH 3/6] Updated Peers discovery --- chain/src/txhashset/headers_desegmenter.rs | 63 ++++++++++++++++++++-- p2p/src/store.rs | 7 ++- 2 files changed, 65 insertions(+), 5 deletions(-) diff --git a/chain/src/txhashset/headers_desegmenter.rs b/chain/src/txhashset/headers_desegmenter.rs index affb30ed6..fb8872866 100644 --- a/chain/src/txhashset/headers_desegmenter.rs +++ b/chain/src/txhashset/headers_desegmenter.rs @@ -379,6 +379,8 @@ impl HeadersRecieveCache { .insert(first_header.height, (bhs, peer_info)); // Apply data from cache if possible + let mut headers_all: Vec = Vec::new(); + let mut headers_by_peer: Vec<(Vec, T)> = Vec::new(); let tip = self .chain .header_head() @@ -399,13 +401,66 @@ impl HeadersRecieveCache { if *height > tip_height + 1 { break; } - let (_, (bhs, peer)) = self.main_headers_cache.pop_first().unwrap(); + let (_, (mut bhs, peer)) = self.main_headers_cache.pop_first().unwrap(); tip_height = bhs.last().expect("bhs can't be empty").height; - // Adding headers into the blockchian. Adding by 512 is optimal, DB not design to add large number of headers - match self.chain.sync_block_headers(&bhs, tip, Options::NONE) { + headers_by_peer.push((bhs.clone(), peer)); + headers_all.append(&mut bhs); + + if headers_all.len() > 5000 { + match self + .chain + .sync_block_headers(&headers_all, tip, Options::NONE) + { + Ok(_) => {} + Err(e) => { + warn!( + "add_headers in bulk is failed, will add one by one. Error: {}", + e + ); + // apply one by one + for (hdr, peer) in headers_by_peer { + let tip = self + .chain + .header_head() + .expect("Header head must be always defined"); + + match self.chain.sync_block_headers(&hdr, tip, Options::NONE) { + Ok(_) => {} + Err(e) => return Err((peer, e)), + } + } + } + } + headers_all = Vec::new(); + headers_by_peer = Vec::new(); + } + } + + if !headers_all.is_empty() { + match self + .chain + .sync_block_headers(&headers_all, tip, Options::NONE) + { Ok(_) => {} - Err(e) => return Err((peer, e)), + Err(e) => { + warn!( + "add_headers in bulk is failed, will add one by one. Error: {}", + e + ); + // apply one by one + for (hdr, peer) in headers_by_peer { + let tip = self + .chain + .header_head() + .expect("Header head must be always defined"); + + match self.chain.sync_block_headers(&hdr, tip, Options::NONE) { + Ok(_) => {} + Err(e) => return Err((peer, e)), + } + } + } } } diff --git a/p2p/src/store.rs b/p2p/src/store.rs index 603e38f3f..c3494570f 100644 --- a/p2p/src/store.rs +++ b/p2p/src/store.rs @@ -174,9 +174,14 @@ impl PeerStore { cap: Capabilities, count: usize, ) -> Result, Error> { + // All new peers has flags Capabilities::UNKNOWN, that is why we better to return themn as well. + // Node will try to connect to them and find the capability. let mut peers = self .peers_iter()? - .filter(|p| p.flags == state && p.capabilities.contains(cap)) + .filter(|p| { + p.flags == state + && (p.capabilities == Capabilities::UNKNOWN || p.capabilities.contains(cap)) + }) .collect::>(); peers[..].shuffle(&mut thread_rng()); Ok(peers.iter().take(count).cloned().collect()) From 3bbcb8d5f42dd173b444cc8bd53b3cbdfd3d29d0 Mon Sep 17 00:00:00 2001 From: bayk Date: Sat, 14 Dec 2024 20:38:18 -0800 Subject: [PATCH 4/6] Refactor Segmenter objects to allow parallel data access with minimal blocking. --- Cargo.toml | 5 +- chain/src/chain.rs | 155 ++--- chain/src/pibd_params.rs | 56 +- chain/src/pipe.rs | 130 +--- chain/src/txhashset.rs | 2 + chain/src/txhashset/bitmap_accumulator.rs | 5 + chain/src/txhashset/desegmenter.rs | 281 ++++---- chain/src/txhashset/headers_desegmenter.rs | 176 +++-- chain/src/txhashset/request_lookup.rs | 30 + chain/src/txhashset/segments_cache.rs | 70 +- chain/src/txhashset/txhashset.rs | 6 + chain/src/types.rs | 16 +- chain/tests/process_block_cut_through.rs | 3 +- chain/tests/test_pibd_copy.rs | 173 +++-- core/src/core/pmmr/vec_backend.rs | 9 + p2p/src/peers.rs | 28 +- p2p/src/serv.rs | 17 + p2p/src/types.rs | 2 +- servers/src/common/adapters.rs | 56 +- servers/src/mwc/seed.rs | 65 +- servers/src/mwc/server.rs | 4 +- servers/src/mwc/sync/body_sync.rs | 295 ++++++--- servers/src/mwc/sync/header_hashes_sync.rs | 75 ++- servers/src/mwc/sync/header_sync.rs | 482 ++++++++------ servers/src/mwc/sync/state_sync.rs | 728 ++++++++++++--------- servers/src/mwc/sync/sync_manager.rs | 123 ++-- servers/src/mwc/sync/sync_peers.rs | 31 +- servers/src/mwc/sync/sync_utils.rs | 243 +++---- servers/src/mwc/sync/syncer.rs | 18 +- src/bin/tui/status.rs | 25 +- 30 files changed, 1881 insertions(+), 1428 deletions(-) create mode 100644 chain/src/txhashset/request_lookup.rs diff --git a/Cargo.toml b/Cargo.toml index 82d3ebee9..c66d15763 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -63,4 +63,7 @@ debug = true #debug = false # Disable debug symbols (if not needed) #lto = true # Enable Link-Time Optimization #codegen-units = 1 # Optimize for size/speed -#overflow-checks = false \ No newline at end of file +#overflow-checks = false + +#[profile.release] +#debug = true \ No newline at end of file diff --git a/chain/src/chain.rs b/chain/src/chain.rs index a02435fd8..717d8fde0 100644 --- a/chain/src/chain.rs +++ b/chain/src/chain.rs @@ -184,8 +184,6 @@ pub struct Chain { txhashset: Arc>, // Lock order (with childrer): 2 header_pmmr: Arc>>, // Lock order (with childrer): 1 pibd_segmenter: Arc>>, - pibd_desegmenter: Arc>>, - reset_pibd_desegmenter: Arc>, // POW verification function pow_verifier: fn(&BlockHeader) -> Result<(), pow::Error>, denylist: Arc>>, @@ -247,8 +245,6 @@ impl Chain { txhashset: Arc::new(RwLock::new(txhashset)), header_pmmr: Arc::new(RwLock::new(header_pmmr)), pibd_segmenter: Arc::new(RwLock::new(None)), - pibd_desegmenter: Arc::new(RwLock::new(None)), - reset_pibd_desegmenter: Arc::new(RwLock::new(false)), pow_verifier, denylist: Arc::new(RwLock::new(vec![])), archive_mode, @@ -553,33 +549,39 @@ impl Chain { } break; } - if blocks.len() > 1 { - // good, we can process multiple blocks, it should be faster than one by one - let block_hashes: Vec<(u64, Hash)> = - blocks.iter().map(|b| (b.header.height, b.hash())).collect(); - match self.process_block_multiple(blocks, opts) { - Ok(tip) => { - // We are good, let's clean up the orphans - for (height, hash) in block_hashes { - let _ = self.orphans.remove_by_height_header_hash(height, &hash); - } - return Ok(tip); // Done with success + // good, we can process multiple blocks, it should be faster than one by one + let block_hashes: Vec<(u64, Hash)> = + blocks.iter().map(|b| (b.header.height, b.hash())).collect(); + match self.process_block_multiple(&blocks, opts) { + Ok(tip) => { + // We are good, let's clean up the orphans + for (height, hash) in block_hashes { + let _ = self.orphans.remove_by_height_header_hash(height, &hash); } - Err(e) => { - debug!("Failed process_block_multiple with error {}", e); - } // Continue processing one by one + return Ok(tip); // Done with success + } + Err(e) => { + info!( + "Failed to process multiple blocks, will try process one by one. {}", + e + ); } } } } - // Processing blocks one by one. It is slower, by eny possible error will be caught on block level. + // Processing blocks one by one. It is slower, but any possible error will be caught on block level. let height = b.header.height; - let res = self.process_block_single(b, opts); - if res.is_ok() { - self.check_orphans(height + 1); + match self.process_block_single(b, opts) { + Ok(tip) => { + self.check_orphans(height + 1); + return Ok(tip); + } + Err(e) => { + error!("process_block_single failed with error: {}", e); + return Err(e); + } } - res } /// We plan to support receiving blocks with CommitOnly inputs. @@ -717,20 +719,16 @@ impl Chain { /// Returns true if it has been added to the longest chain /// or false if it has added to a fork (or orphan?). fn process_block_single(&self, b: Block, opts: Options) -> Result, Error> { - // We can only reliably convert to "v2" if not an orphan (may spend output from previous block). - // We convert from "v3" to "v2" by looking up outputs to be spent. - // This conversion also ensures a block received in "v2" has valid input features (prevents malleability). - let b = self.convert_block_v2(b)?; - - let (head, fork_point, prev_head) = { + let (head, fork_point, prev_head, b) = { let mut header_pmmr = self.header_pmmr.write(); let mut txhashset = self.txhashset.write(); let batch = self.store.batch_write()?; let prev_head = batch.head()?; let mut ctx = self.new_ctx(opts, batch, &mut header_pmmr, &mut txhashset)?; - let (head, fork_point) = pipe::process_block( - &b, + let mut bv = vec![b.clone()]; + let (head, fork_point) = pipe::process_blocks_series( + &bv, &mut ctx, &mut *self.cache_header_difficulty.write(), self.secp(), @@ -739,7 +737,7 @@ impl Chain { ctx.batch.commit()?; // release the lock and let the batch go before post-processing - (head, fork_point, prev_head) + (head, fork_point, prev_head, bv.remove(0)) }; let prev = self.get_previous_header(&b.header)?; @@ -750,6 +748,11 @@ impl Chain { Tip::from_header(&fork_point), ); + info!( + "Accepted single block {} for height {}", + b.hash(), + b.header.height + ); // notifying other parts of the system of the update self.adapter.block_accepted(&b, status, opts); @@ -761,18 +764,9 @@ impl Chain { // Since they are orphans - check_block was called to them when they were added to orphan pool. fn process_block_multiple( &self, - blocks: Vec, + blocks: &Vec, opts: Options, ) -> Result, Error> { - // We can only reliably convert to "v2" if not an orphan (may spend output from previous block). - // We convert from "v3" to "v2" by looking up outputs to be spent. - // This conversion also ensures a block received in "v2" has valid input features (prevents malleability). - let mut blocks_v2 = Vec::new(); - for b in blocks { - blocks_v2.push(self.convert_block_v2(b)?); - } - debug_assert!(blocks_v2.len() > 1); - let (head, fork_point, prev_head) = { let mut header_pmmr = self.header_pmmr.write(); let mut txhashset = self.txhashset.write(); @@ -780,8 +774,12 @@ impl Chain { let prev_head = batch.head()?; let mut ctx = self.new_ctx(opts, batch, &mut header_pmmr, &mut txhashset)?; - let (head, fork_point) = - pipe::process_blocks_series(&blocks_v2, &mut ctx, self.secp())?; + let (head, fork_point) = pipe::process_blocks_series( + &blocks, + &mut ctx, + &mut *self.cache_header_difficulty.write(), + self.secp(), + )?; ctx.batch.commit()?; @@ -789,7 +787,7 @@ impl Chain { (head, fork_point, prev_head) }; - let last_block = blocks_v2.last().unwrap(); + let last_block = blocks.last().unwrap(); let prev = self.get_previous_header(&last_block.header)?; let status = self.determine_status( head, @@ -798,8 +796,15 @@ impl Chain { Tip::from_header(&fork_point), ); + debug!( + "Accepted multiple {} block from height {} to {}", + blocks.len(), + blocks.first().unwrap().header.height, + blocks.last().unwrap().header.height + ); + // notifying other parts of the system of the update - for b in &blocks_v2 { + for b in blocks { self.adapter.block_accepted(b, status, opts); } @@ -1304,6 +1309,26 @@ impl Chain { now.elapsed().as_millis() ); + // Let's check if mmr roots are matching the header + #[cfg(debug_assertions)] + { + use mwc_core::core::pmmr::ReadablePMMR; + + let txhashset = self.txhashset.read(); + + let output_pmmr = txhashset.output_pmmr_at(&header); + let output_pmmr_root = output_pmmr.root().unwrap(); + assert!(header.output_root == output_pmmr_root); + + let rangeproof_pmmr = txhashset.rangeproof_pmmr_at(&header); + let rangeproof_pmmr_root = rangeproof_pmmr.root().unwrap(); + assert!(header.range_proof_root == rangeproof_pmmr_root); + + let kernel_pmmr = txhashset.kernel_pmmr_at(&header); + let kernel_pmmr_root = kernel_pmmr.root().unwrap(); + assert!(header.kernel_root == kernel_pmmr_root); + } + Ok(Segmenter::new( Arc::new(RwLock::new(segm_header_pmmr_backend)), self.txhashset.clone(), @@ -1312,46 +1337,10 @@ impl Chain { )) } - /// instantiate desegmenter for this header. Expected that handshake is done and as a result, header with bitmap_root_hash is known - pub fn create_desegmenter( - &self, - archive_header_height: u64, - bitmap_root_hash: Hash, - ) -> Result<(), Error> { - let uploaded_height = self.head()?.height; - if uploaded_height >= archive_header_height { - return Err(Error::DesegmenterCreationError(format!("No need to create desegmenter, data is uploaded until height {}, archive height is {}", uploaded_height, archive_header_height))); - } - self.reset_pibd_chain()?; - let desegmenter = self.init_desegmenter(archive_header_height, bitmap_root_hash)?; - *self.pibd_desegmenter.write() = Some(desegmenter); - *self.reset_pibd_desegmenter.write() = false; - Ok(()) - } - - /// instantiate desegmenter (in same lazy fashion as segmenter, though this should not be as - /// expensive an operation) - pub fn get_desegmenter(&self) -> Arc>> { - // Use our cached desegmenter if we have one and the associated header matches. - let mut reset_pibd_desegmenter = self.reset_pibd_desegmenter.write(); - if *reset_pibd_desegmenter { - *self.pibd_desegmenter.write() = None; - *reset_pibd_desegmenter = false; - } - return self.pibd_desegmenter.clone(); - } - - /// Reset desegmenter associated with this seesion - pub fn reset_desegmenter(&self) { - // We can't modify desegmenter here, it is already locked. - //*self.pibd_desegmenter.write() = None - *self.reset_pibd_desegmenter.write() = true; - } - /// initialize a desegmenter, which is capable of extending the hashset by appending /// PIBD segments of the three PMMR trees + Bitmap PMMR /// header should be the same header as selected for the txhashset.zip archive - fn init_desegmenter( + pub fn init_desegmenter( &self, archive_header_hegiht: u64, bitmap_root_hash: Hash, diff --git a/chain/src/pibd_params.rs b/chain/src/pibd_params.rs index 3c5126f37..5979b20f5 100644 --- a/chain/src/pibd_params.rs +++ b/chain/src/pibd_params.rs @@ -35,27 +35,32 @@ pub const OUTPUT_SEGMENT_HEIGHT_RANGE: Range = 10..13; // ~ 33 b /// Segment heights for rangeproofs pub const RANGEPROOF_SEGMENT_HEIGHT_RANGE: Range = 6..9; // ~ 675 b +/// Retry request to the header if next 10 are already returned. +pub const HEADERS_RETRY_DELTA: u64 = 10; + +/// Retry request to the segments if next 5 are already returned. +pub const SEGMENTS_RETRY_DELTA: u64 = 5; + +/// Retry request to the blocks if next 10 are already returned. +pub const BLOCKS_RETRY_DELTA: u64 = 10; + // Here are series for different available resources. Mem and CPU thresholds are allways the same. const HEADERS_HASH_BUFFER_LEN: [usize; 4] = [10, 20, 30, 60]; const HEADERS_BUFFER_LEN: [usize; 4] = [50, 100, 250, 400]; const BITMAPS_BUFFER_LEN: [usize; 4] = [10, 20, 30, 40]; -const OUTPUTS_BUFFER_LEN: [usize; 4] = [7, 15, 30, 40]; -const KERNELS_BUFFER_LEN: [usize; 4] = [7, 15, 30, 40]; -const RANGEPROOFS_BUFFER_LEN: [usize; 4] = [7, 15, 30, 40]; +// segment size are from around 30-40 kB. Then double for every level +const SEGMENTS_BUFFER_LEN: [usize; 4] = [30, 40, 50, 60]; // One block can be up to 1.5Mb in size. We still need some to run the node const ORPHANS_BUFFER_LEN: [usize; 4] = [20, 100, 250, 500]; -const SEGMENTS_REQUEST_LIMIT: [usize; 4] = [20, 40, 80, 120]; +const SEGMENTS_REQUEST_LIMIT: [usize; 4] = [20, 30, 40, 40]; /// How long the state sync should wait after requesting a segment from a peer before /// deciding the segment isn't going to arrive. The syncer will then re-request the segment -pub const SEGMENT_REQUEST_TIMEOUT_SECS: i64 = 60; - -/// Default expected response time for a new peer. Units: ms -pub const SEGMENT_DEFAULT_RETRY_MS: i64 = 10000; // retry request after 10 seconds by default +pub const PIBD_REQUESTS_TIMEOUT_SECS: i64 = 30; struct SysMemoryInfo { available_memory_mb: u64, @@ -176,33 +181,12 @@ impl PibdParams { } /// Buffer size for outputs - pub fn get_outputs_buffer_len(&self, non_complete_num: usize) -> usize { - let k = if non_complete_num <= 1 { 2 } else { 1 }; - Self::calc_mem_adequate_val2( - &OUTPUTS_BUFFER_LEN, - self.get_available_memory_mb(), - self.cpu_num, - ) * k - } - - /// Buffer size for kernels - pub fn get_kernels_buffer_len(&self, non_complete_num: usize) -> usize { - let k = if non_complete_num <= 1 { 2 } else { 1 }; + pub fn get_segments_buffer_len(&self) -> usize { Self::calc_mem_adequate_val2( - &KERNELS_BUFFER_LEN, + &SEGMENTS_BUFFER_LEN, self.get_available_memory_mb(), self.cpu_num, - ) * k - } - - /// Buffer size for rangeproofs - pub fn get_rangeproofs_buffer_len(&self, non_complete_num: usize) -> usize { - let k = if non_complete_num <= 1 { 2 } else { 1 }; - Self::calc_mem_adequate_val2( - &RANGEPROOFS_BUFFER_LEN, - self.get_available_memory_mb(), - self.cpu_num, - ) * k + ) } /// Man number of orphans to keep @@ -217,9 +201,9 @@ impl PibdParams { /// Number of simultaneous requests for blocks we should make per available peer. pub fn get_blocks_request_per_peer(&self) -> usize { match self.cpu_num { - 1 => 3, - 2 => 6, - _ => 15, + 1 => 2, + 2 => 3, + _ => 5, } } @@ -235,7 +219,7 @@ impl PibdParams { match self.cpu_num { 1 => 2, 2 => 3, - _ => 4, + _ => 5, } } diff --git a/chain/src/pipe.rs b/chain/src/pipe.rs index e877e0054..5978a461b 100644 --- a/chain/src/pipe.rs +++ b/chain/src/pipe.rs @@ -168,6 +168,7 @@ fn validate_pow_only(header: &BlockHeader, ctx: &BlockContext<'_>) -> Result<(), pub fn process_blocks_series( blocks: &Vec, ctx: &mut BlockContext<'_>, + cache_values: &mut VecDeque, secp: &Secp256k1, ) -> Result<(Option, BlockHeader), Error> { debug_assert!(!blocks.is_empty()); @@ -206,11 +207,17 @@ pub fn process_blocks_series( // Check if we have already processed the first block previously. check_known(&first_block.header, &head, ctx)?; - // Quick pow validation. No point proceeding if this is invalid. - // We want to do this before we add the block to the orphan pool so we - // want to do this now and not later during header validation. for b in blocks { + // Quick pow validation. No point proceeding if this is invalid. + // We want to do this before we add the block to the orphan pool so we + // want to do this now and not later during header validation. validate_pow_only(&b.header, ctx)?; + + // Process the header for the block. + // Note: We still want to process the full block if we have seen this header before + // as we may have processed it "header first" and not yet processed the full block. + process_block_header(&b.header, ctx, cache_values)?; + // Validate the block itself, make sure it is internally consistent. // Use the verifier_cache for verifying rangeproofs and kernel signatures. validate_block(b, ctx, secp)?; @@ -294,115 +301,6 @@ pub fn process_blocks_series( res } -/// Runs the block processing pipeline, including validation and finding a -/// place for the new block in the chain. -/// Returns new head if chain head updated and the "fork point" rewound to when processing the new block. -pub fn process_block( - b: &Block, - ctx: &mut BlockContext<'_>, - cache_values: &mut VecDeque, - secp: &Secp256k1, -) -> Result<(Option, BlockHeader), Error> { - debug!( - "pipe: process_block {} at {} [in/out/kern: {}/{}/{}] ({})", - b.hash(), - b.header.height, - b.inputs().len(), - b.outputs().len(), - b.kernels().len(), - b.inputs().version_str(), - ); - - // Read current chain head from db via the batch. - // We use this for various operations later. - let head = ctx.batch.head()?; - - // Check if we have already processed this block previously. - check_known(&b.header, &head, ctx)?; - - // Quick pow validation. No point proceeding if this is invalid. - // We want to do this before we add the block to the orphan pool so we - // want to do this now and not later during header validation. - validate_pow_only(&b.header, ctx)?; - - // Get previous header from the db. - let prev = prev_header_store(&b.header, &mut ctx.batch)?; - - // Process the header for the block. - // Note: We still want to process the full block if we have seen this header before - // as we may have processed it "header first" and not yet processed the full block. - process_block_header(&b.header, ctx, cache_values)?; - - // Validate the block itself, make sure it is internally consistent. - // Use the verifier_cache for verifying rangeproofs and kernel signatures. - validate_block(b, ctx, secp)?; - - // Start a chain extension unit of work dependent on the success of the - // internal validation and saving operations - let header_pmmr = &mut ctx.header_pmmr; - let txhashset = &mut ctx.txhashset; - let batch = &mut ctx.batch; - let ctx_specific_validation = &ctx.header_allowed; - let fork_point = txhashset::extending(header_pmmr, txhashset, batch, |ext, batch| { - let fork_point_local_blocks = - rewind_and_apply_fork(&prev, ext, batch, ctx_specific_validation, secp)?; - let fork_point = fork_point_local_blocks.0; - let local_branch_blocks = fork_point_local_blocks.1; - - replay_attack_check(b, fork_point.height, &local_branch_blocks, ext, batch)?; - - // Check any coinbase being spent have matured sufficiently. - // This needs to be done within the context of a potentially - // rewound txhashset extension to reflect chain state prior - // to applying the new block. - verify_coinbase_maturity(b, ext, batch)?; - - // Validate the block against the UTXO set. - validate_utxo(b, ext, batch)?; - - // Using block_sums (utxo_sum, kernel_sum) for the previous block from the db - // we can verify_kernel_sums across the full UTXO sum and full kernel sum - // accounting for inputs/outputs/kernels in this new block. - // We know there are no double-spends etc. if this verifies successfully. - verify_block_sums(b, batch, secp)?; - - // Apply the block to the txhashset state. - // Validate the txhashset roots and sizes against the block header. - // Block is invalid if there are any discrepencies. - apply_block_to_txhashset(b, ext, batch)?; - - // If applying this block does not increase the work on the chain then - // we know we have not yet updated the chain to produce a new chain head. - // We discard the "child" batch used in this extension (original ctx batch still active). - // We discard any MMR modifications applied in this extension. - let head = batch.head()?; - if !has_more_work(&b.header, &head) { - ext.extension.force_rollback(); - } - - Ok(fork_point) - })?; - - // Add the validated block to the db. - // Note we do this in the outer batch, not the child batch from the extension - // as we only commit the child batch if the extension increases total work. - // We want to save the block to the db regardless. - add_block(b, &ctx.batch)?; - - // If we have no "tail" then set it now. - if ctx.batch.tail().is_err() { - update_body_tail(&b.header, &ctx.batch)?; - } - - if has_more_work(&b.header, &head) { - let head = Tip::from_header(&b.header); - update_head(&head, &mut ctx.batch)?; - Ok((Some(head), fork_point)) - } else { - Ok((None, fork_point)) - } -} - /// pub fn replay_attack_check( b: &Block, @@ -916,9 +814,13 @@ pub fn rewind_and_apply_fork( fork_hashes.reverse(); for h in &fork_hashes { - let fb = batch + let fb = match batch .get_block(&h) - .map_err(|e| Error::StoreErr(e, "getting forked blocks".to_string()))?; + .map_err(|e| Error::StoreErr(e, "getting forked blocks".to_string())) + { + Ok(fb) => fb, + Err(e) => return Err(e), + }; // Re-verify coinbase maturity along this fork. verify_coinbase_maturity(&fb, ext, batch)?; diff --git a/chain/src/txhashset.rs b/chain/src/txhashset.rs index 77ef9f921..f41b64b16 100644 --- a/chain/src/txhashset.rs +++ b/chain/src/txhashset.rs @@ -19,6 +19,8 @@ mod bitmap_accumulator; mod desegmenter; mod headers_desegmenter; +/// Requests lookup interface. +pub mod request_lookup; mod rewindable_kernel_view; mod segmenter; mod segments_cache; diff --git a/chain/src/txhashset/bitmap_accumulator.rs b/chain/src/txhashset/bitmap_accumulator.rs index d6c8ada36..a17d4e575 100644 --- a/chain/src/txhashset/bitmap_accumulator.rs +++ b/chain/src/txhashset/bitmap_accumulator.rs @@ -60,6 +60,11 @@ impl BitmapAccumulator { } } + /// Reset bitmap data + pub fn reset(&mut self) { + self.backend.reset(); + } + /// Initialize a bitmap accumulator given the provided idx iterator. pub fn init>(&mut self, idx: T, size: u64) -> Result<(), Error> { self.apply_from(idx, 0, size) diff --git a/chain/src/txhashset/desegmenter.rs b/chain/src/txhashset/desegmenter.rs index 32843e867..e8dda3432 100644 --- a/chain/src/txhashset/desegmenter.rs +++ b/chain/src/txhashset/desegmenter.rs @@ -30,12 +30,11 @@ use crate::types::Tip; use crate::util::secp::pedersen::RangeProof; use crate::util::{RwLock, StopState}; use crate::{Chain, SyncState, SyncStatus}; -use std::cmp; -use std::collections::HashMap; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; use crate::pibd_params::PibdParams; +use crate::txhashset::request_lookup::RequestLookup; use crate::txhashset::segments_cache::SegmentsCache; use croaring::Bitmap; use log::Level; @@ -54,15 +53,15 @@ pub struct Desegmenter { genesis: BlockHeader, - outputs_bitmap_accumulator: Arc>, + outputs_bitmap_accumulator: RwLock, // Lock 1 outputs_bitmap_mmr_size: u64, /// In-memory 'raw' bitmap corresponding to contents of bitmap accumulator - outputs_bitmap: Option, + outputs_bitmap: RwLock>, - bitmap_segment_cache: SegmentsCache, - output_segment_cache: SegmentsCache, - rangeproof_segment_cache: SegmentsCache, - kernel_segment_cache: SegmentsCache, + bitmap_segment_cache: RwLock>, // Lock 1 + output_segment_cache: RwLock>, + rangeproof_segment_cache: RwLock>, + kernel_segment_cache: RwLock>, pibd_params: Arc, } @@ -112,26 +111,26 @@ impl Desegmenter { bitmap_root_hash, store, genesis, - outputs_bitmap_accumulator: Arc::new(RwLock::new(BitmapAccumulator::new())), + outputs_bitmap_accumulator: RwLock::new(BitmapAccumulator::new()), outputs_bitmap_mmr_size: bitmap_mmr_size, - bitmap_segment_cache: SegmentsCache::new( + bitmap_segment_cache: RwLock::new(SegmentsCache::new( SegmentType::Bitmap, total_bitmap_segment_count, - ), - output_segment_cache: SegmentsCache::new( + )), + output_segment_cache: RwLock::new(SegmentsCache::new( SegmentType::Output, total_outputs_segment_count, - ), - rangeproof_segment_cache: SegmentsCache::new( + )), + rangeproof_segment_cache: RwLock::new(SegmentsCache::new( SegmentType::RangeProof, total_rangeproof_segment_count, - ), - kernel_segment_cache: SegmentsCache::new( + )), + kernel_segment_cache: RwLock::new(SegmentsCache::new( SegmentType::Kernel, total_kernel_segment_count, - ), + )), - outputs_bitmap: None, + outputs_bitmap: RwLock::new(None), pibd_params, } } @@ -142,13 +141,13 @@ impl Desegmenter { } /// Reset all state - pub fn reset(&mut self) { - self.bitmap_segment_cache.reset(); - self.output_segment_cache.reset(); - self.rangeproof_segment_cache.reset(); - self.kernel_segment_cache.reset(); - self.outputs_bitmap = None; - self.outputs_bitmap_accumulator = Arc::new(RwLock::new(BitmapAccumulator::new())); + pub fn reset(&self) { + self.bitmap_segment_cache.write().reset(); + self.output_segment_cache.write().reset(); + self.rangeproof_segment_cache.write().reset(); + self.kernel_segment_cache.write().reset(); + *self.outputs_bitmap.write() = None; + self.outputs_bitmap_accumulator.write().reset(); } /// Return reference to the header used for validation @@ -158,23 +157,38 @@ impl Desegmenter { /// Whether we have all the segments we need pub fn is_complete(&self) -> bool { - self.output_segment_cache.is_complete() - && self.rangeproof_segment_cache.is_complete() - && self.kernel_segment_cache.is_complete() + let c1 = self.output_segment_cache.read().is_complete(); + let c2 = self.rangeproof_segment_cache.read().is_complete(); + let c3 = self.kernel_segment_cache.read().is_complete(); + c1 && c2 && c3 } /// Check progress, update status if needed, returns true if all required /// segments are in place pub fn get_pibd_progress(&self) -> SyncStatus { - let required = self.bitmap_segment_cache.get_required_segments() - + self.output_segment_cache.get_required_segments() - + self.rangeproof_segment_cache.get_required_segments() - + self.kernel_segment_cache.get_required_segments(); + let (req1, rec1) = { + let cache = self.bitmap_segment_cache.read(); + (cache.get_required_segments(), cache.get_received_segments()) + }; - let received = self.bitmap_segment_cache.get_received_segments() - + self.output_segment_cache.get_received_segments() - + self.rangeproof_segment_cache.get_received_segments() - + self.kernel_segment_cache.get_received_segments(); + let (req2, rec2) = { + let cache = self.output_segment_cache.read(); + (cache.get_required_segments(), cache.get_received_segments()) + }; + + let (req3, rec3) = { + let cache = self.rangeproof_segment_cache.read(); + (cache.get_required_segments(), cache.get_received_segments()) + }; + + let (req4, rec4) = { + let cache = self.kernel_segment_cache.read(); + (cache.get_required_segments(), cache.get_received_segments()) + }; + + let required = req1 + req2 + req3 + req4; + + let received = rec1 + rec2 + rec3 + rec4; // Expected by QT wallet info!("PIBD sync progress: {} from {}", received, required); @@ -193,8 +207,9 @@ impl Desegmenter { let mut _batch = self.store.batch_write()?; txhashset::extending(&mut header_pmmr, &mut txhashset, &mut _batch, |ext, _| { let extension = &mut ext.extension; - if let Some(b) = &self.outputs_bitmap { - extension.update_leaf_sets(&b)?; + let outputs_bitmap = self.outputs_bitmap.read(); + if let Some(b) = outputs_bitmap.as_ref() { + extension.update_leaf_sets(b)?; } Ok(()) })?; @@ -215,6 +230,8 @@ impl Desegmenter { txhashset.roots()?.validate(&self.archive_header)?; } + status.update(SyncStatus::ValidatingKernelsHistory); + // Validate kernel history { info!("desegmenter validation: rewinding and validating kernel history (readonly)"); @@ -391,103 +408,121 @@ impl Desegmenter { } /// Return list of the next preferred segments the desegmenter needs based on - /// the current real state of the underlying elements - pub fn next_desired_segments( - &mut self, + /// the current real state of the underlying elements. Second array - list of delayed requests. We better to retry them + pub fn next_desired_segments( + &self, need_requests: usize, - requested: &HashMap<(SegmentType, u64), V>, - ) -> Result, Error> { + requested: &dyn RequestLookup<(SegmentType, u64)>, + ) -> Result<(Vec, Vec), Error> { // First check for required bitmap elements - if self.outputs_bitmap.is_none() { - debug_assert!(!self.bitmap_segment_cache.is_complete()); + if self.outputs_bitmap.read().is_none() { let mut bitmap_result: Vec = Vec::new(); - for id in self.bitmap_segment_cache.next_desired_segments( - self.pibd_params.get_bitmap_segment_height(), - need_requests, - requested, - self.pibd_params.get_bitmaps_buffer_len(), - ) { + // For bitmaps there is no duplicated requests, there is not much data. + for id in self + .bitmap_segment_cache + .read() + .next_desired_segments( + self.pibd_params.get_bitmap_segment_height(), + need_requests, + requested, + self.pibd_params.get_bitmaps_buffer_len(), + ) + .0 + { bitmap_result.push(SegmentTypeIdentifier::new(SegmentType::Bitmap, id)) } - return Ok(bitmap_result); + return Ok((bitmap_result, Vec::new())); } else { // We have all required bitmap segments and have recreated our local // bitmap, now continue with other segments, evenly spreading requests // among MMRs - let mut result: Vec = Vec::new(); - - let mut non_complete_num = 0; - if !self.output_segment_cache.is_complete() { - non_complete_num += 1; - } - if !self.rangeproof_segment_cache.is_complete() { - non_complete_num += 1; - } - if !self.kernel_segment_cache.is_complete() { - non_complete_num += 1; - } - if non_complete_num == 0 { - return Ok(result); // All done, nothing is needed - } - - let max_elements = need_requests / non_complete_num; - let mut extra_for_first = need_requests % non_complete_num; - debug_assert!(max_elements + extra_for_first > 0); + debug_assert!(need_requests > 0); + let mut need_requests = need_requests; // Note, first requesting segments largest data items. Since item is large, the number of items per segment is low, // so the number of segments is high. - if !self.rangeproof_segment_cache.is_complete() && max_elements + extra_for_first > 0 { - for id in self.rangeproof_segment_cache.next_desired_segments( - self.pibd_params.get_rangeproof_segment_height(), - max_elements + cmp::min(1, extra_for_first), - requested, - self.pibd_params - .get_rangeproofs_buffer_len(non_complete_num), - ) { - result.push(SegmentTypeIdentifier::new(SegmentType::RangeProof, id)) - } - extra_for_first = extra_for_first.saturating_sub(1); - } - - if !self.kernel_segment_cache.is_complete() && max_elements + extra_for_first > 0 { - debug_assert!(extra_for_first <= 1); - for id in self.kernel_segment_cache.next_desired_segments( - self.pibd_params.get_kernel_segment_height(), - max_elements + extra_for_first, - requested, - self.pibd_params.get_kernels_buffer_len(non_complete_num), - ) { - result.push(SegmentTypeIdentifier::new(SegmentType::Kernel, id)) - } - extra_for_first = extra_for_first.saturating_sub(1); - } - - if !self.output_segment_cache.is_complete() && max_elements + extra_for_first > 0 { - for id in self.output_segment_cache.next_desired_segments( - self.pibd_params.get_output_segment_height(), - max_elements + cmp::min(1, extra_for_first), - requested, - self.pibd_params.get_outputs_buffer_len(non_complete_num), - ) { - result.push(SegmentTypeIdentifier::new(SegmentType::Output, id)) - } + let mut res_req: Vec = Vec::new(); + let mut res_dup_req: Vec = Vec::new(); + if need_requests > 0 && !self.rangeproof_segment_cache.read().is_complete() { + let (requests, retry_requests) = + self.rangeproof_segment_cache.read().next_desired_segments( + self.pibd_params.get_rangeproof_segment_height(), + need_requests, + requested, + self.pibd_params.get_segments_buffer_len(), + ); + debug_assert!(requests.len() <= need_requests); + need_requests = need_requests.saturating_sub(res_req.len()); + res_req.extend( + requests + .into_iter() + .map(|id| SegmentTypeIdentifier::new(SegmentType::RangeProof, id)), + ); + res_dup_req.extend( + retry_requests + .into_iter() + .map(|id| SegmentTypeIdentifier::new(SegmentType::RangeProof, id)), + ); + }; + + if need_requests > 0 && !self.kernel_segment_cache.read().is_complete() { + let (requests, retry_requests) = + self.kernel_segment_cache.read().next_desired_segments( + self.pibd_params.get_kernel_segment_height(), + need_requests, + requested, + self.pibd_params.get_segments_buffer_len(), + ); + debug_assert!(requests.len() <= need_requests); + need_requests = need_requests.saturating_sub(res_req.len()); + res_req.extend( + requests + .into_iter() + .map(|id| SegmentTypeIdentifier::new(SegmentType::Kernel, id)), + ); + res_dup_req.extend( + retry_requests + .into_iter() + .map(|id| SegmentTypeIdentifier::new(SegmentType::Kernel, id)), + ); + }; + + if need_requests > 0 && !self.output_segment_cache.read().is_complete() { + let (requests, retry_requests) = + self.output_segment_cache.read().next_desired_segments( + self.pibd_params.get_output_segment_height(), + need_requests, + requested, + self.pibd_params.get_segments_buffer_len(), + ); + debug_assert!(requests.len() <= need_requests); + need_requests = need_requests.saturating_sub(res_req.len()); + res_req.extend( + requests + .into_iter() + .map(|id| SegmentTypeIdentifier::new(SegmentType::Output, id)), + ); + res_dup_req.extend( + retry_requests + .into_iter() + .map(|id| SegmentTypeIdentifier::new(SegmentType::Output, id)), + ); } + let _ = need_requests; - debug_assert!(result.len() <= need_requests); - - return Ok(result); + return Ok((res_req, res_dup_req)); } } /// 'Finalize' the bitmap accumulator, storing an in-memory copy of the bitmap for /// use in further validation and setting the accumulator on the underlying txhashset - fn finalize_bitmap(&mut self) -> Result<(), Error> { + fn finalize_bitmap(&self) -> Result<(), Error> { trace!( "pibd_desegmenter: finalizing and caching bitmap - accumulator root: {}", self.outputs_bitmap_accumulator.read().root() ); - self.outputs_bitmap = Some(self.outputs_bitmap_accumulator.read().build_bitmap()); + *self.outputs_bitmap.write() = Some(self.outputs_bitmap_accumulator.read().build_bitmap()); Ok(()) } @@ -519,7 +554,7 @@ impl Desegmenter { /// Adds and validates a bitmap chunk pub fn add_bitmap_segment( - &mut self, + &self, segment: Segment, bitmap_root_hash: &Hash, ) -> Result<(), Error> { @@ -541,7 +576,7 @@ impl Desegmenter { // All okay, add to our cached list of bitmap segments { - let bitmap_segment_cache = &mut self.bitmap_segment_cache; + let mut bitmap_segment_cache = self.bitmap_segment_cache.write(); let mut bitmap_accumulator = self.outputs_bitmap_accumulator.write(); bitmap_segment_cache.apply_new_segment(segment, |segm_v| { @@ -559,7 +594,7 @@ impl Desegmenter { })?; } - if self.bitmap_segment_cache.is_complete() { + if self.bitmap_segment_cache.read().is_complete() { self.finalize_bitmap()?; } @@ -568,7 +603,7 @@ impl Desegmenter { /// Adds a output segment pub fn add_output_segment( - &mut self, + &self, segment: Segment, bitmap_root_hash: &Hash, ) -> Result<(), Error> { @@ -580,7 +615,7 @@ impl Desegmenter { return Err(Error::InvalidSegmentHeght); } - match self.outputs_bitmap.as_ref() { + match self.outputs_bitmap.read().as_ref() { Some(outputs_bitmap) => { trace!("pibd_desegmenter: add output segment"); segment.validate( @@ -589,7 +624,7 @@ impl Desegmenter { &self.archive_header.output_root, // Output root we're checking for )?; - let output_segment_cache = &mut self.output_segment_cache; + let mut output_segment_cache = self.output_segment_cache.write(); let mut header_pmmr = self.header_pmmr.write(); let mut txhashset = self.txhashset.write(); let mut batch = self.store.batch_write()?; @@ -623,7 +658,7 @@ impl Desegmenter { /// Adds a Rangeproof segment pub fn add_rangeproof_segment( - &mut self, + &self, segment: Segment, bitmap_root_hash: &Hash, ) -> Result<(), Error> { @@ -635,16 +670,16 @@ impl Desegmenter { return Err(Error::InvalidSegmentHeght); } - match self.outputs_bitmap.as_ref() { + match self.outputs_bitmap.read().as_ref() { Some(outputs_bitmap) => { trace!("pibd_desegmenter: add rangeproof segment"); segment.validate( self.archive_header.output_mmr_size, // Last MMR pos at the height being validated - self.outputs_bitmap.as_ref(), + Some(outputs_bitmap), &self.archive_header.range_proof_root, // Range proof root we're checking for )?; - let rangeproof_segment_cache = &mut self.rangeproof_segment_cache; + let mut rangeproof_segment_cache = self.rangeproof_segment_cache.write(); let mut header_pmmr = self.header_pmmr.write(); let mut txhashset = self.txhashset.write(); let mut batch = self.store.batch_write()?; @@ -675,7 +710,7 @@ impl Desegmenter { /// Adds a Kernel segment pub fn add_kernel_segment( - &mut self, + &self, segment: Segment, bitmap_root_hash: &Hash, ) -> Result<(), Error> { @@ -693,7 +728,7 @@ impl Desegmenter { &self.archive_header.kernel_root, // Kernel root we're checking for )?; - let kernel_segment_cache = &mut self.kernel_segment_cache; + let mut kernel_segment_cache = self.kernel_segment_cache.write(); let mut header_pmmr = self.header_pmmr.write(); let mut txhashset = self.txhashset.write(); let mut batch = self.store.batch_write()?; diff --git a/chain/src/txhashset/headers_desegmenter.rs b/chain/src/txhashset/headers_desegmenter.rs index fb8872866..e98010f0e 100644 --- a/chain/src/txhashset/headers_desegmenter.rs +++ b/chain/src/txhashset/headers_desegmenter.rs @@ -20,14 +20,16 @@ use crate::core::core::pmmr; use crate::core::core::{BlockHeader, Segment}; use crate::error::Error; use crate::pibd_params::PibdParams; +use crate::txhashset::request_lookup::RequestLookup; use crate::txhashset::segments_cache::SegmentsCache; use crate::txhashset::{sort_pmmr_hashes_and_leaves, OrderedHashLeafNode}; use crate::types::HEADERS_PER_BATCH; -use crate::Options; +use crate::{pibd_params, Options}; use mwc_core::core::pmmr::{VecBackend, PMMR}; use mwc_core::core::{SegmentIdentifier, SegmentType}; +use mwc_util::RwLock; use std::cmp; -use std::collections::{BTreeMap, HashMap}; +use std::collections::BTreeMap; use std::sync::Arc; /// There is no reasons to introduce a special type, for that. For place maker any type will work @@ -102,18 +104,21 @@ impl HeaderHashesDesegmenter { /// Return list of the next preferred segments the desegmenter needs based on /// the current real state of the underlying elements - pub fn next_desired_segments( + pub fn next_desired_segments( &mut self, max_elements: usize, - requested_segments: &HashMap<(SegmentType, u64), T>, + requested_segments: &dyn RequestLookup<(SegmentType, u64)>, pibd_params: &PibdParams, ) -> Vec { - self.header_segment_cache.next_desired_segments( - pibd_params.get_headers_segment_height(), - max_elements, - requested_segments, - self.pibd_params.get_headers_hash_buffer_len(), - ) + // For headers hashes there is no duplicate requests. There are not much data... + self.header_segment_cache + .next_desired_segments( + pibd_params.get_headers_segment_height(), + max_elements, + requested_segments, + self.pibd_params.get_headers_hash_buffer_len(), + ) + .0 } /// Adds a output segment @@ -198,7 +203,7 @@ pub struct HeadersRecieveCache { // Archive header height used for the sync process archive_header_height: u64, // cahce with recievd headers - main_headers_cache: BTreeMap, T)>, + main_headers_cache: RwLock, T)>>, // target chain to feed the data chain: Arc, } @@ -211,7 +216,7 @@ impl HeadersRecieveCache { ) -> Self { let mut res = HeadersRecieveCache { archive_header_height: 0, - main_headers_cache: BTreeMap::new(), + main_headers_cache: RwLock::new(BTreeMap::new()), chain: chain.clone(), }; res.prepare_download_headers(header_desegmenter) @@ -261,7 +266,7 @@ impl HeadersRecieveCache { /// Reset all state pub fn reset(&mut self) { - self.main_headers_cache.clear(); + self.main_headers_cache.write().clear(); self.archive_header_height = 0; } @@ -273,14 +278,14 @@ impl HeadersRecieveCache { } /// Return list of the next preferred segments the desegmenter needs based on - /// the current real state of the underlying elements - pub fn next_desired_headers( - &mut self, + /// the current real state of the underlying elements. Second array - list of delayed requests. We better to retry them + pub fn next_desired_headers( + &self, headers: &HeaderHashesDesegmenter, elements: usize, - requested_hashes: &HashMap, + request_tracker: &dyn RequestLookup, headers_cache_size_limit: usize, - ) -> Result, Error> { + ) -> Result<(Vec<(Hash, u64)>, Vec<(Hash, u64)>), Error> { let mut return_vec = vec![]; let tip = self.chain.header_head()?; let base_hash_idx = tip.height / HEADERS_PER_BATCH as u64; @@ -290,15 +295,36 @@ impl HeadersRecieveCache { self.archive_header_height / HEADERS_PER_BATCH as u64, ); + let mut waiting_indexes: Vec<(u64, (Hash, u64))> = Vec::new(); + + let mut first_in_cache = 0; + let mut last_in_cache = 0; + let mut has10_idx = 0; + for hash_idx in base_hash_idx..=max_idx { // let's check if cache already have it if self .main_headers_cache + .read() .contains_key(&(hash_idx * HEADERS_PER_BATCH as u64 + 1)) { + if hash_idx == last_in_cache + 1 { + last_in_cache = hash_idx; + } else { + first_in_cache = hash_idx; + last_in_cache = hash_idx; + } continue; } + if last_in_cache > 0 { + if last_in_cache - first_in_cache > pibd_params::HEADERS_RETRY_DELTA { + has10_idx = first_in_cache; + } + first_in_cache = 0; + last_in_cache = 0; + } + let hinfo: Option<&Hash> = headers .header_pmmr .data @@ -307,23 +333,38 @@ impl HeadersRecieveCache { .get(hash_idx as usize); match hinfo { Some(h) => { + let request = (h.clone(), hash_idx * HEADERS_PER_BATCH as u64); // check if already requested first - if !requested_hashes.contains_key(h) { - return_vec.push((h.clone(), hash_idx * HEADERS_PER_BATCH as u64)); + if !request_tracker.contains_request(h) { + return_vec.push(request); if return_vec.len() >= elements { break; } + } else { + waiting_indexes.push((hash_idx, request)); } } None => break, } } - Ok(return_vec) + + // Let's check if we want to retry something... + let mut retry_vec = vec![]; + if has10_idx > 0 { + for (idx, req) in waiting_indexes { + if idx >= has10_idx { + break; + } + retry_vec.push(req); + } + } + + Ok((return_vec, retry_vec)) } /// Adds a output segment - pub fn add_headers( - &mut self, + pub fn add_headers_to_cache( + &self, headers: &HeaderHashesDesegmenter, bhs: Vec, peer_info: T, @@ -370,14 +411,19 @@ impl HeadersRecieveCache { )); } + let mut main_headers_cache = self.main_headers_cache.write(); // duplicated data, skipping it - if self.main_headers_cache.contains_key(&first_header.height) { + if main_headers_cache.contains_key(&first_header.height) { return Ok(()); } - self.main_headers_cache - .insert(first_header.height, (bhs, peer_info)); + main_headers_cache.insert(first_header.height, (bhs, peer_info)); + + Ok(()) + } + /// Apply cache to the chain. Return true if more data is available + pub fn apply_cache(&self) -> Result { // Apply data from cache if possible let mut headers_all: Vec = Vec::new(); let mut headers_by_peer: Vec<(Vec, T)> = Vec::new(); @@ -388,52 +434,30 @@ impl HeadersRecieveCache { let mut tip_height = tip.height; - while let Some((height, (headers, _))) = self.main_headers_cache.first_key_value() { - debug_assert!(!headers.is_empty()); - debug_assert!(headers.len() == HEADERS_PER_BATCH as usize); - debug_assert!(headers.first().unwrap().height == *height); - let ending_height = headers.last().expect("headers can't empty").height; - if ending_height <= tip_height { - // duplicated data, skipping it... - let _ = self.main_headers_cache.pop_first(); - continue; - } - if *height > tip_height + 1 { - break; - } - let (_, (mut bhs, peer)) = self.main_headers_cache.pop_first().unwrap(); - tip_height = bhs.last().expect("bhs can't be empty").height; + { + let mut main_headers_cache = self.main_headers_cache.write(); + while let Some((height, (headers, _))) = main_headers_cache.first_key_value() { + debug_assert!(!headers.is_empty()); + debug_assert!(headers.len() == HEADERS_PER_BATCH as usize); + debug_assert!(headers.first().unwrap().height == *height); + let ending_height = headers.last().expect("headers can't empty").height; + if ending_height <= tip_height { + // duplicated data, skipping it... + let _ = main_headers_cache.pop_first(); + continue; + } + if *height > tip_height + 1 { + break; + } + let (_, (mut bhs, peer)) = main_headers_cache.pop_first().unwrap(); + tip_height = bhs.last().expect("bhs can't be empty").height; - headers_by_peer.push((bhs.clone(), peer)); - headers_all.append(&mut bhs); + headers_by_peer.push((bhs.clone(), peer)); + headers_all.append(&mut bhs); - if headers_all.len() > 5000 { - match self - .chain - .sync_block_headers(&headers_all, tip, Options::NONE) - { - Ok(_) => {} - Err(e) => { - warn!( - "add_headers in bulk is failed, will add one by one. Error: {}", - e - ); - // apply one by one - for (hdr, peer) in headers_by_peer { - let tip = self - .chain - .header_head() - .expect("Header head must be always defined"); - - match self.chain.sync_block_headers(&hdr, tip, Options::NONE) { - Ok(_) => {} - Err(e) => return Err((peer, e)), - } - } - } + if headers_all.len() > 2000 { + break; // we don't want add too much at a single session. } - headers_all = Vec::new(); - headers_by_peer = Vec::new(); } } @@ -462,8 +486,18 @@ impl HeadersRecieveCache { } } } - } - Ok(()) + let tip = self + .chain + .header_head() + .expect("Header head must be always defined"); + + match self.main_headers_cache.read().first_key_value() { + Some((height, _)) => Ok(*height <= tip.height + 1), + None => Ok(false), + } + } else { + Ok(false) + } } } diff --git a/chain/src/txhashset/request_lookup.rs b/chain/src/txhashset/request_lookup.rs new file mode 100644 index 000000000..e02162245 --- /dev/null +++ b/chain/src/txhashset/request_lookup.rs @@ -0,0 +1,30 @@ +// Copyright 2024 The MWC Developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashMap; + +/// Lookup for registered (requests) +pub trait RequestLookup { + /// return true if request is already registered + fn contains_request(&self, key: &K) -> bool; +} + +impl<'a, K, V> RequestLookup for &'a HashMap +where + K: std::cmp::Eq + std::hash::Hash, +{ + fn contains_request(&self, key: &K) -> bool { + self.contains_key(key) + } +} diff --git a/chain/src/txhashset/segments_cache.rs b/chain/src/txhashset/segments_cache.rs index 47eb6c8dc..88e754b51 100644 --- a/chain/src/txhashset/segments_cache.rs +++ b/chain/src/txhashset/segments_cache.rs @@ -15,6 +15,8 @@ //! Manages the segments caching use crate::error::Error; +use crate::pibd_params; +use crate::txhashset::request_lookup::RequestLookup; use mwc_core::core::{Segment, SegmentIdentifier, SegmentType}; use std::cmp; use std::collections::HashMap; @@ -65,13 +67,13 @@ impl SegmentsCache { /// Return list of the next preferred segments the desegmenter needs based on /// the current real state of the underlying elements - pub fn next_desired_segments( + pub fn next_desired_segments( &self, height: u8, max_elements: usize, - requested: &HashMap<(SegmentType, u64), V>, + requested: &dyn RequestLookup<(SegmentType, u64)>, cache_size_limit: usize, - ) -> Vec { + ) -> (Vec, Vec) { let mut result = vec![]; debug_assert!(max_elements > 0); debug_assert!(cache_size_limit > 0); @@ -80,20 +82,58 @@ impl SegmentsCache { self.received_segments + cache_size_limit as u64, self.required_segments, ); + + let mut waiting_indexes: Vec<(u64, SegmentIdentifier)> = Vec::new(); + let mut first_in_cache = 0; + let mut last_in_cache = 0; + let mut has_5_idx = 0; + for idx in self.received_segments..max_segm_idx { - if !self.segment_cache.contains_key(&idx) { - if !requested.contains_key(&(self.seg_type.clone(), idx)) { - result.push(SegmentIdentifier { - height: height, - idx: idx, - }); - if result.len() >= max_elements { - break; - } + if self.segment_cache.contains_key(&idx) { + if idx == last_in_cache + 1 { + last_in_cache = idx; + } else { + first_in_cache = idx; + last_in_cache = idx; + } + continue; + } + + if last_in_cache > 0 { + if last_in_cache - first_in_cache > pibd_params::SEGMENTS_RETRY_DELTA { + has_5_idx = first_in_cache; + } + first_in_cache = 0; + last_in_cache = 0; + } + + let request = SegmentIdentifier { + height: height, + idx: idx, + }; + + if !requested.contains_request(&(self.seg_type.clone(), idx)) { + result.push(request); + if result.len() >= max_elements { + break; + } + } else { + waiting_indexes.push((idx, request)); + } + } + + // Let's check if we want to retry something... + let mut retry_vec = vec![]; + if has_5_idx > 0 { + for (idx, req) in waiting_indexes { + if idx >= has_5_idx { + break; } + retry_vec.push(req); } } - result + + (result, retry_vec) } pub fn is_duplicate_segment(&self, segment_idx: u64) -> bool { @@ -108,6 +148,10 @@ impl SegmentsCache { where F: FnMut(Vec>) -> Result<(), Error>, { + if segment.id().idx < self.received_segments { + return Ok(()); + } + self.segment_cache.insert(segment.id().idx, segment); // apply found data from the cache diff --git a/chain/src/txhashset/txhashset.rs b/chain/src/txhashset/txhashset.rs index 3bb7e2378..cdb736d86 100644 --- a/chain/src/txhashset/txhashset.rs +++ b/chain/src/txhashset/txhashset.rs @@ -454,8 +454,11 @@ impl TxHashSet { Ok(TxHashSetRoots { output_root: output_pmmr.root().map_err(|e| Error::InvalidRoot(e))?, + output_mmr_size: self.output_pmmr_h.size, rproof_root: rproof_pmmr.root().map_err(|e| Error::InvalidRoot(e))?, + rproof_mmr_size: self.rproof_pmmr_h.size, kernel_root: kernel_pmmr.root().map_err(|e| Error::InvalidRoot(e))?, + kernel_mmr_size: self.kernel_pmmr_h.size, }) } @@ -1647,8 +1650,11 @@ impl<'a> Extension<'a> { pub fn roots(&self) -> Result { Ok(TxHashSetRoots { output_root: self.output_pmmr.root().map_err(|e| Error::InvalidRoot(e))?, + output_mmr_size: self.output_pmmr.size, rproof_root: self.rproof_pmmr.root().map_err(|e| Error::InvalidRoot(e))?, + rproof_mmr_size: self.rproof_pmmr.size, kernel_root: self.kernel_pmmr.root().map_err(|e| Error::InvalidRoot(e))?, + kernel_mmr_size: self.kernel_pmmr.size, }) } diff --git a/chain/src/types.rs b/chain/src/types.rs index 602c536ca..69971bfca 100644 --- a/chain/src/types.rs +++ b/chain/src/types.rs @@ -74,6 +74,8 @@ pub enum SyncStatus { /// total number of segments required total_segments: u64, }, + /// Validating kernels history + ValidatingKernelsHistory, /// Setting up before validation TxHashsetHeadersValidation { /// number of 'headers' for which kernels have been checked @@ -221,10 +223,16 @@ impl SyncState { pub struct TxHashSetRoots { /// Output roots pub output_root: Hash, + /// Output mmr size + pub output_mmr_size: u64, /// Range Proof root pub rproof_root: Hash, + /// Range Proof mmr size + pub rproof_mmr_size: u64, /// Kernel root pub kernel_root: Hash, + /// Kernel mmr size + pub kernel_mmr_size: u64, } impl TxHashSetRoots { @@ -235,15 +243,21 @@ impl TxHashSetRoots { header.height, header.output_mmr_size, header.kernel_mmr_size ); debug!( - "validate roots: {} at {}, Outputs roots {} vs. {}, Range Proof roots {} vs {}, Kernel Roots {} vs {}", + "validate roots: {} at {}, Outputs roots {} vs. {}, sz {} vs {}, Range Proof roots {} vs {}, sz {} vs {}, Kernel Roots {} vs {}, sz {} vs {}", header.hash(), header.height, header.output_root, self.output_root, + header.output_mmr_size, + self.output_mmr_size, header.range_proof_root, self.rproof_root, + header.output_mmr_size, + self.rproof_mmr_size, header.kernel_root, self.kernel_root, + header.kernel_mmr_size, + self.kernel_mmr_size, ); if header.output_root != self.output_root { diff --git a/chain/tests/process_block_cut_through.rs b/chain/tests/process_block_cut_through.rs index 82aa23d1c..8a5f8ce8c 100644 --- a/chain/tests/process_block_cut_through.rs +++ b/chain/tests/process_block_cut_through.rs @@ -191,7 +191,8 @@ fn process_block_cut_through() -> Result<(), chain::Error> { let mut ctx = chain.new_ctx(Options::NONE, batch, &mut header_pmmr, &mut txhashset)?; let mut cache_values = VecDeque::new(); - let res = pipe::process_block(&block, &mut ctx, &mut cache_values, chain.secp()); + let res = + pipe::process_blocks_series(&vec![block], &mut ctx, &mut cache_values, chain.secp()); assert_eq!( res, Err(chain::Error::Block(block::Error::Transaction( diff --git a/chain/tests/test_pibd_copy.rs b/chain/tests/test_pibd_copy.rs index 7ffabaaa5..c514309b1 100644 --- a/chain/tests/test_pibd_copy.rs +++ b/chain/tests/test_pibd_copy.rs @@ -30,7 +30,7 @@ use crate::core::core::{ use crate::core::{genesis, global, pow}; use crate::util::secp::pedersen::RangeProof; use mwc_chain::pibd_params::PibdParams; -use mwc_chain::txhashset::{HeaderHashesDesegmenter, HeadersRecieveCache}; +use mwc_chain::txhashset::{Desegmenter, HeaderHashesDesegmenter, HeadersRecieveCache}; use mwc_chain::types::HEADERS_PER_BATCH; use mwc_chain::{Error, Options, SyncState}; use mwc_util::secp::rand::Rng; @@ -59,7 +59,7 @@ fn _copy_dir_all(src: impl AsRef, dst: impl AsRef) -> io::Result<()> Ok(()) } -// Canned segmenter responder, which will simulate feeding back segments as requested +// segmenter responder, which will simulate feeding back segments as requested // by the desegmenter struct SegmenterResponder { chain: Arc, @@ -162,10 +162,14 @@ impl DesegmenterRequestor { res } - pub fn init_desegmenter(&mut self, archive_header_height: u64, bitmap_root_hash: Hash) { + pub fn init_desegmenter( + &mut self, + archive_header_height: u64, + bitmap_root_hash: Hash, + ) -> Desegmenter { self.chain - .create_desegmenter(archive_header_height, bitmap_root_hash) - .unwrap(); + .init_desegmenter(archive_header_height, bitmap_root_hash) + .unwrap() } // return whether is complete @@ -175,7 +179,9 @@ impl DesegmenterRequestor { header_root_hash: &Hash, pibd_params: &PibdParams, ) -> bool { - let asks = header_desegmenter.next_desired_segments::(10, &HashMap::new(), pibd_params); + let empty_map: HashMap<(SegmentType, u64), u8> = HashMap::new(); + let empty_map = &empty_map; + let asks = header_desegmenter.next_desired_segments(10, &empty_map, pibd_params); debug!("Next segment IDS: {:?}", asks); @@ -207,9 +213,10 @@ impl DesegmenterRequestor { if headers_cache.is_complete().unwrap() { return true; } - - let hashes = headers_cache - .next_desired_headers::(header_desegmenter, 15, &HashMap::new()) + let empty_map: HashMap = HashMap::new(); + let empty_map = &empty_map; + let (hashes, _reply_hashes) = headers_cache + .next_desired_headers(header_desegmenter, 15, &empty_map, 100) .unwrap(); if hashes.is_empty() { assert!(false); @@ -253,10 +260,13 @@ impl DesegmenterRequestor { assert_eq!(headers.len(), HEADERS_PER_BATCH as usize); if let Err((peer, err)) = - headers_cache.add_headers(header_desegmenter, headers, "0".to_string()) + headers_cache.add_headers_to_cache(header_desegmenter, headers, "0".to_string()) { panic!("Error {}, for peer id {}", err, peer); } + while headers_cache.apply_cache().unwrap() { + debug!("Applying headers cache once more..."); + } } false @@ -264,21 +274,19 @@ impl DesegmenterRequestor { // Emulate `continue_pibd` function, which would be called from state sync // return whether is complete - pub fn continue_pibd(&mut self, bitmap_root_hash: Hash) -> bool { + pub fn continue_pibd(&mut self, bitmap_root_hash: Hash, desegmenter: &Desegmenter) -> bool { //let archive_header = self.chain.txhashset_archive_header_header_only().unwrap(); - let desegmenter = self.chain.get_desegmenter(); - - let mut next_segment_ids = vec![]; - let mut is_complete = false; - if let Some(d) = desegmenter.write().as_mut() { - // Figure out the next segments we need - // (12 is divisible by 3, to try and evenly spread the requests among the 3 - // main pmmrs. Bitmaps segments will always be requested first) - let now = Instant::now(); - next_segment_ids = d.next_desired_segments::(60, &HashMap::new()).unwrap(); - debug!("next_desired_segments took {}ms", now.elapsed().as_millis()); - is_complete = d.is_complete() - } + + // Figure out the next segments we need + // (12 is divisible by 3, to try and evenly spread the requests among the 3 + // main pmmrs. Bitmaps segments will always be requested first) + let now = Instant::now(); + let empty_map: HashMap<(SegmentType, u64), u8> = HashMap::new(); + let empty_map = &empty_map; + let (mut next_segment_ids, _retry_ids) = + desegmenter.next_desired_segments(60, &empty_map).unwrap(); + debug!("next_desired_segments took {}ms", now.elapsed().as_millis()); + let is_complete = desegmenter.is_complete(); debug!("Next segment IDS: {:?}", next_segment_ids); let mut rng = rand::thread_rng(); @@ -290,52 +298,52 @@ impl DesegmenterRequestor { match seg_id.segment_type { SegmentType::Bitmap => { let seg = self.responder.get_bitmap_segment(seg_id.identifier.clone()); - if let Some(d) = desegmenter.write().as_mut() { - let now = Instant::now(); - d.add_bitmap_segment(seg, &bitmap_root_hash).unwrap(); - debug!("next_desired_segments took {}ms", now.elapsed().as_millis()); - } + let now = Instant::now(); + desegmenter + .add_bitmap_segment(seg, &bitmap_root_hash) + .unwrap(); + debug!("next_desired_segments took {}ms", now.elapsed().as_millis()); } SegmentType::Output => { let seg = self.responder.get_output_segment(seg_id.identifier.clone()); - if let Some(d) = desegmenter.write().as_mut() { - let now = Instant::now(); - let id = seg.id().clone(); - d.add_output_segment(seg, &bitmap_root_hash).unwrap(); - debug!( - "Added output segment {}, took {}ms", - id, - now.elapsed().as_millis() - ); - } + let now = Instant::now(); + let id = seg.id().clone(); + desegmenter + .add_output_segment(seg, &bitmap_root_hash) + .unwrap(); + debug!( + "Added output segment {}, took {}ms", + id, + now.elapsed().as_millis() + ); } SegmentType::RangeProof => { let seg = self .responder .get_rangeproof_segment(seg_id.identifier.clone()); - if let Some(d) = desegmenter.write().as_mut() { - let now = Instant::now(); - let id = seg.id().clone(); - d.add_rangeproof_segment(seg, &bitmap_root_hash).unwrap(); - debug!( - "Added rangeproof segment {}, took {}ms", - id, - now.elapsed().as_millis() - ); - } + let now = Instant::now(); + let id = seg.id().clone(); + desegmenter + .add_rangeproof_segment(seg, &bitmap_root_hash) + .unwrap(); + debug!( + "Added rangeproof segment {}, took {}ms", + id, + now.elapsed().as_millis() + ); } SegmentType::Kernel => { let seg = self.responder.get_kernel_segment(seg_id.identifier.clone()); - if let Some(d) = desegmenter.write().as_mut() { - let now = Instant::now(); - let id = seg.id().clone(); - d.add_kernel_segment(seg, &bitmap_root_hash).unwrap(); - debug!( - "Added kernels segment {}, took {}ms", - id, - now.elapsed().as_millis() - ); - } + let now = Instant::now(); + let id = seg.id().clone(); + desegmenter + .add_kernel_segment(seg, &bitmap_root_hash) + .unwrap(); + debug!( + "Added kernels segment {}, took {}ms", + id, + now.elapsed().as_millis() + ); } }; } @@ -356,36 +364,21 @@ impl DesegmenterRequestor { assert_eq!(archive_header.output_root, roots.output_root); } - pub fn validate_complete_state(&self) { + pub fn validate_complete_state(&self, desegmenter: &Desegmenter) { let status = Arc::new(SyncState::new()); let stop_state = Arc::new(StopState::new()); let secp = self.chain.secp(); - self.chain - .get_desegmenter() - .read() - .as_ref() - .unwrap() - .check_update_leaf_set_state() - .unwrap(); + desegmenter.check_update_leaf_set_state().unwrap(); - self.chain - .get_desegmenter() - .read() - .as_ref() - .unwrap() + desegmenter .validate_complete_state(status, stop_state, secp) .unwrap(); } } -fn test_pibd_copy_impl(is_test_chain: bool, src_root_dir: &str, dest_root_dir: &str) { - global::set_global_chain_type(global::ChainTypes::Floonet); - let mut genesis = genesis::genesis_floo(); - - if is_test_chain { - global::set_global_chain_type(global::ChainTypes::AutomatedTesting); - genesis = pow::mine_genesis_block().unwrap(); - } +fn test_pibd_copy_impl(src_root_dir: &str, dest_root_dir: &str) { + global::set_global_chain_type(global::ChainTypes::Mainnet); + let genesis = genesis::genesis_main(); let src_responder = Arc::new(SegmenterResponder::new(src_root_dir, genesis.clone())); @@ -423,14 +416,14 @@ fn test_pibd_copy_impl(is_test_chain: bool, src_root_dir: &str, dest_root_dir: & while !dest_requestor.continue_copy_headers(&header_desegmenter, &mut headers_cache) {} - dest_requestor.init_desegmenter(archive_header_height, bitmap_root_hash); + let desegmenter = dest_requestor.init_desegmenter(archive_header_height, bitmap_root_hash); // Perform until desegmenter reports it's done - while !dest_requestor.continue_pibd(bitmap_root_hash) {} + while !dest_requestor.continue_pibd(bitmap_root_hash, &desegmenter) {} dest_requestor.check_roots(archive_header_height); - dest_requestor.validate_complete_state(); + dest_requestor.validate_complete_state(&desegmenter); } #[test] @@ -442,11 +435,11 @@ fn test_pibd_copy_real() { util::init_test_logger(); // if testing against a real chain, insert location here - let src_root_dir = format!("/Users/bay/.mwc/_floo/chain_data"); - let dest_root_dir = format!("/Users/bay/.mwc/_floo3/chain_data"); + let src_root_dir = format!("/Users/bay/.mwc/main_orig/chain_data"); + let dest_root_dir = format!("/Users/bay/.mwc/main_copy/chain_data"); //self::chain_test_helper::clean_output_dir(&dest_root_dir); - test_pibd_copy_impl(false, &src_root_dir, &dest_root_dir); + test_pibd_copy_impl(&src_root_dir, &dest_root_dir); //self::chain_test_helper::clean_output_dir(&dest_root_dir); } @@ -458,11 +451,11 @@ fn test_pibd_copy_real() { fn test_chain_validation() { util::init_test_logger(); - let src_root_dir = format!("/Users/bay/.mwc/_floo/chain_data"); - let dest_root_dir = format!("/Users/bay/.mwc/_floo3/chain_data"); + let src_root_dir = format!("/Users/bay/.mwc/main_orig/chain_data"); + let dest_root_dir = format!("/Users/bay/.mwc/main_copy/chain_data"); - global::set_global_chain_type(global::ChainTypes::Floonet); - let genesis = genesis::genesis_floo(); + global::set_global_chain_type(global::ChainTypes::Mainnet); + let genesis = genesis::genesis_main(); let dummy_adapter = Arc::new(NoopAdapter {}); diff --git a/core/src/core/pmmr/vec_backend.rs b/core/src/core/pmmr/vec_backend.rs index c73e600f0..6e87f2564 100644 --- a/core/src/core/pmmr/vec_backend.rs +++ b/core/src/core/pmmr/vec_backend.rs @@ -170,4 +170,13 @@ impl VecBackend { pub fn size(&self) -> u64 { self.hashes.len() as u64 } + + /// Reset backend data + pub fn reset(&mut self) { + if let Some(data) = self.data.as_mut() { + data.clear(); + } + self.hashes.clear(); + self.removed.clear(); + } } diff --git a/p2p/src/peers.rs b/p2p/src/peers.rs index e90661156..db5f26ca9 100644 --- a/p2p/src/peers.rs +++ b/p2p/src/peers.rs @@ -15,7 +15,7 @@ use crate::util::RwLock; use std::cmp; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::path::PathBuf; use std::sync::Arc; @@ -55,6 +55,7 @@ pub struct Peers { config: P2PConfig, stop_state: Arc, boost_peers_capabilities: RwLock, + excluded_peers: Arc>>, } impl Peers { @@ -74,6 +75,16 @@ impl Peers { capabilities: Capabilities::UNKNOWN, time: DateTime::default(), }), + excluded_peers: Arc::new(RwLock::new(HashSet::new())), + } + } + + /// Mark those peers as excluded, so the will never be in 'connected' list + pub fn set_excluded_peers(&self, peers: &Vec) { + let mut excluded_peers = self.excluded_peers.write(); + excluded_peers.clear(); + for p in peers { + excluded_peers.insert(p.clone()); } } @@ -105,6 +116,14 @@ impl Peers { self.boost_peers_capabilities.read().capabilities.clone() } + /// Number of peers that already has connection. The total number of connections needs tobe be limited + pub fn get_number_connected_peers(&self) -> usize { + match self.peers.try_read_for(LOCK_TIMEOUT) { + Some(peers) => peers.len(), + None => 0, + } + } + /// Adds the peer to our internal peer mapping. Note that the peer is still /// returned so the server can run it. pub fn add_connected(&self, peer: Arc) -> Result<(), Error> { @@ -165,8 +184,13 @@ impl Peers { /// This allows us to hide try_read_for() behind a cleaner interface. /// PeersIter lets us chain various adaptors for convenience. pub fn iter(&self) -> PeersIter>> { + let excluded_peers = self.excluded_peers.read(); let peers = match self.peers.try_read_for(LOCK_TIMEOUT) { - Some(peers) => peers.values().cloned().collect(), + Some(peers) => peers + .values() + .cloned() + .filter(|p| !excluded_peers.contains(&p.info.addr)) + .collect(), None => { if !self.stop_state.is_stopped() { // When stopped, peers access is locked by stopped thread diff --git a/p2p/src/serv.rs b/p2p/src/serv.rs index c9d7a73af..635a1d5ab 100644 --- a/p2p/src/serv.rs +++ b/p2p/src/serv.rs @@ -183,6 +183,14 @@ impl Server { ))); } + let max_allowed_connections = + self.config.peer_max_inbound_count() + self.config.peer_max_outbound_count(true) + 10; + if self.peers.get_number_connected_peers() > max_allowed_connections as usize { + return Err(Error::ConnectionClose(String::from( + "Too many established connections...", + ))); + } + if global::is_production_mode() { let hs = self.handshake.clone(); let addrs = hs.addrs.read(); @@ -310,6 +318,15 @@ impl Server { if self.stop_state.is_stopped() { return Err(Error::ConnectionClose(String::from("Server is stopping"))); } + + let max_allowed_connections = + self.config.peer_max_inbound_count() + self.config.peer_max_outbound_count(true) + 10; + if self.peers.get_number_connected_peers() > max_allowed_connections as usize { + return Err(Error::ConnectionClose(String::from( + "Too many established connections...", + ))); + } + let total_diff = self.peers.total_difficulty()?; // accept the peer and add it to the server map diff --git a/p2p/src/types.rs b/p2p/src/types.rs index 5ab32ca3e..cfad2a96a 100644 --- a/p2p/src/types.rs +++ b/p2p/src/types.rs @@ -68,7 +68,7 @@ const PEER_MAX_OUTBOUND_COUNT: u32 = 10; const PEER_MIN_PREFERRED_OUTBOUND_COUNT: u32 = 8; /// During sync process we want to boost peers discovery. -const PEER_BOOST_OUTBOUND_COUNT: u32 = 25; +const PEER_BOOST_OUTBOUND_COUNT: u32 = 20; /// The peer listener buffer count. Allows temporarily accepting more connections /// than allowed by PEER_MAX_INBOUND_COUNT to encourage network bootstrapping. diff --git a/servers/src/common/adapters.rs b/servers/src/common/adapters.rs index 9f020a360..d1cbcb378 100644 --- a/servers/src/common/adapters.rs +++ b/servers/src/common/adapters.rs @@ -100,7 +100,7 @@ where P: PoolAdapter, { sync_state: Arc, - sync_manager: Arc>, + sync_manager: Arc, chain: Weak, tx_pool: Arc>>, peers: OneTime>, @@ -412,7 +412,6 @@ where } self.sync_manager - .write() .receive_headers(&peer_info.addr, bhs, remaining, self.peers()); Ok(()) } @@ -568,12 +567,8 @@ where "Received PIBD handshake response from {}. Header {} at {}, root_hash {}", peer, header_hash, header_height, output_bitmap_root ); - self.sync_manager.write().recieve_pibd_status( - peer, - header_hash, - header_height, - output_bitmap_root, - ); + self.sync_manager + .recieve_pibd_status(peer, header_hash, header_height, output_bitmap_root); Ok(()) } @@ -588,7 +583,6 @@ where peer, header_hash, header_height ); self.sync_manager - .write() .recieve_another_archive_header(peer, header_hash, header_height); Ok(()) } @@ -603,11 +597,8 @@ where "Received headers hash response {}, {} from {}", archive_height, headers_hash_root, peer ); - self.sync_manager.write().receive_headers_hash_response( - peer, - archive_height, - headers_hash_root, - ); + self.sync_manager + .receive_headers_hash_response(peer, archive_height, headers_hash_root); Ok(()) } @@ -647,7 +638,6 @@ where peer ); self.sync_manager - .write() .receive_header_hashes_segment(peer, header_hashes_root, segment); Ok(()) } @@ -659,12 +649,13 @@ where segment: Segment, ) -> Result<(), chain::Error> { info!( - "Received bitmap segment {} for block_hash: {}", + "Received bitmap segment {} for block_hash: {} from {}", segment.identifier().idx, - archive_header_hash + archive_header_hash, + peer ); - self.sync_manager.write().receive_bitmap_segment( + self.sync_manager.receive_bitmap_segment( peer, &archive_header_hash, segment, @@ -680,12 +671,13 @@ where segment: Segment, ) -> Result<(), chain::Error> { info!( - "Received output segment {} for block_hash: {}", + "Received output segment {} for block_hash: {} from {}", segment.identifier().idx, archive_header_hash, + peer, ); - self.sync_manager.write().receive_output_segment( + self.sync_manager.receive_output_segment( peer, &archive_header_hash, segment, @@ -701,12 +693,13 @@ where segment: Segment, ) -> Result<(), chain::Error> { info!( - "Received proof segment {} for block_hash: {}", + "Received proof segment {} for block_hash: {} from {}", segment.identifier().idx, - archive_header_hash + archive_header_hash, + peer ); - self.sync_manager.write().receive_rangeproof_segment( + self.sync_manager.receive_rangeproof_segment( peer, &archive_header_hash, segment, @@ -722,12 +715,13 @@ where segment: Segment, ) -> Result<(), chain::Error> { info!( - "Received kernel segment {} for block_hash: {}", + "Received kernel segment {} for block_hash: {} from {}", segment.identifier().idx, - archive_header_hash + archive_header_hash, + peer ); - self.sync_manager.write().receive_kernel_segment( + self.sync_manager.receive_kernel_segment( peer, &archive_header_hash, segment, @@ -746,7 +740,7 @@ where pub fn new( sync_state: Arc, chain: Arc, - sync_manager: Arc>, + sync_manager: Arc, tx_pool: Arc>>, config: ServerConfig, hooks: Vec>, @@ -810,7 +804,7 @@ where Ok(_) => { self.validate_chain(&bhash); self.check_compact(); - self.sync_manager.write().recieve_block_reporting( + self.sync_manager.recieve_block_reporting( true, &peer_info.addr, &bhash, @@ -820,7 +814,7 @@ where } Err(ref e) if e.is_bad_data() => { self.validate_chain(&bhash); - self.sync_manager.write().recieve_block_reporting( + self.sync_manager.recieve_block_reporting( false, &peer_info.addr, &bhash, @@ -831,7 +825,7 @@ where Err(e) => { match e { chain::Error::Orphan(orph_msg) => { - self.sync_manager.write().recieve_block_reporting( + self.sync_manager.recieve_block_reporting( true, &peer_info.addr, &bhash, @@ -850,7 +844,7 @@ where } _ => { debug!("process_block: block {} refused by chain: {}", bhash, e); - self.sync_manager.write().recieve_block_reporting( + self.sync_manager.recieve_block_reporting( false, &peer_info.addr, &bhash, diff --git a/servers/src/mwc/seed.rs b/servers/src/mwc/seed.rs index 0ecfaa7e5..516dcee69 100644 --- a/servers/src/mwc/seed.rs +++ b/servers/src/mwc/seed.rs @@ -18,7 +18,14 @@ //! configurable with either no peers, a user-defined list or a preset //! list of DNS records (the default). +use crate::core::global; +use crate::core::global::{FLOONET_DNS_SEEDS, MAINNET_DNS_SEEDS}; use crate::core::pow::Difficulty; +use crate::p2p; +use crate::p2p::libp2p_connection; +use crate::p2p::types::PeerAddr; +use crate::p2p::ChainAdapter; +use crate::util::StopState; use chrono::prelude::{DateTime, Utc}; use chrono::Duration; use mwc_p2p::PeerAddr::Onion; @@ -26,25 +33,18 @@ use mwc_p2p::{msg::PeerAddrs, Capabilities, P2PConfig}; use rand::prelude::*; use std::collections::HashMap; use std::net::ToSocketAddrs; +use std::sync::atomic::{AtomicI32, Ordering}; use std::sync::{mpsc, Arc}; use std::{thread, time}; -use crate::core::global; -use crate::core::global::{FLOONET_DNS_SEEDS, MAINNET_DNS_SEEDS}; -use crate::p2p; -use crate::p2p::libp2p_connection; -use crate::p2p::types::PeerAddr; -use crate::p2p::ChainAdapter; -use crate::util::StopState; - const CONNECT_TO_SEED_INTERVAL: i64 = 15; const EXPIRE_INTERVAL: i64 = 3600; const PEERS_CHECK_TIME_FULL: i64 = 30; const PEERS_CHECK_TIME_BOOST: i64 = 3; const PEERS_MONITOR_INTERVAL: i64 = 60; -const PEER_RECONNECT_INTERVAL: i64 = 60; -const PEER_MAX_INITIATE_CONNECTIONS: usize = 2000; +const PEER_RECONNECT_INTERVAL: i64 = 600; +const PEER_MAX_INITIATE_CONNECTIONS: usize = 200; const PEER_PING_INTERVAL: i64 = 10; @@ -85,6 +85,9 @@ pub fn connect_and_monitor( libp2p_connection::set_seed_list(&seed_list, true); let mut prev_ping = Utc::now(); + let connections_in_action = Arc::new(AtomicI32::new(0)); + + let mut listen_q_addrs: Vec = Vec::new(); loop { if stop_state.is_stopped() { @@ -129,7 +132,12 @@ pub fn connect_and_monitor( tx.clone(), peers.is_boosting_mode(), ); - peer_monitor_time = now + Duration::seconds(PEERS_MONITOR_INTERVAL); + + if peers.is_sync_mode() { + peer_monitor_time = now + Duration::seconds(PEERS_MONITOR_INTERVAL / 5); // every 12 seconds let's do the check + } else { + peer_monitor_time = now + Duration::seconds(PEERS_MONITOR_INTERVAL); // once a minute checking + } } // make several attempts to get peers as quick as possible @@ -146,8 +154,10 @@ pub fn connect_and_monitor( &rx, &mut connecting_history, use_tor_connection, + &connections_in_action, + &mut listen_q_addrs, ); - let duration = if is_boost { + let duration = if is_boost || !listen_q_addrs.is_empty() { PEERS_CHECK_TIME_BOOST } else { PEERS_CHECK_TIME_FULL @@ -372,21 +382,35 @@ fn listen_for_addrs( rx: &mpsc::Receiver, connecting_history: &mut HashMap>, use_tor_connection: bool, + connections_in_action: &Arc, + listen_q_addrs: &mut Vec, ) { // Pull everything currently on the queue off the queue. // Does not block so addrs may be empty. // We will take(max_peers) from this later but we want to drain the rx queue // here to prevent it backing up. // It is expected that peers are come with expected capabilites - let addrs: Vec = rx.try_iter().collect(); + { + let mut addrs: Vec = rx.try_iter().collect(); + listen_q_addrs.append(&mut addrs); + } // If we have a healthy number of outbound peers then we are done here. debug_assert!(!peers.enough_outbound_peers()); let now = Utc::now(); - for addr in addrs.into_iter().as_ref() { - // ignore the duplicate connecting to same peer within 30 seconds - if let Some(last_connect_time) = connecting_history.get(addr) { + while !listen_q_addrs.is_empty() { + debug_assert!(connections_in_action.load(Ordering::Relaxed) >= 0); + if connecting_history.len() as i32 + connections_in_action.load(Ordering::Relaxed) + > PEER_MAX_INITIATE_CONNECTIONS as i32 + { + break; + } + + let addr = listen_q_addrs.pop().expect("listen_q_addrs is not empty"); + + // listen_q_addrs can have duplicated requests or already processed, so still need to dedup + if let Some(last_connect_time) = connecting_history.get(&addr) { if *last_connect_time + Duration::seconds(PEER_RECONNECT_INTERVAL) > now { debug!( "peer_connect: ignore a duplicate request to {}. previous connecting time: {}", @@ -397,15 +421,12 @@ fn listen_for_addrs( } } - if connecting_history.len() > PEER_MAX_INITIATE_CONNECTIONS { - break; - } - connecting_history.insert(addr.clone(), now); let addr_c = addr.clone(); let peers_c = peers.clone(); let p2p_c = p2p.clone(); + let connections_in_action = connections_in_action.clone(); thread::Builder::new() .name("peer_connect".to_string()) .spawn(move || { @@ -421,6 +442,7 @@ fn listen_for_addrs( }; if update_possible { + let _ = connections_in_action.fetch_add(1, Ordering::Relaxed); match p2p_c.connect(&addr_c) { Ok(p) => { debug!( @@ -472,6 +494,7 @@ fn listen_for_addrs( let _ = peers_c.update_state(&addr_c, p2p::State::Defunct); } } + let _ = connections_in_action.fetch_sub(1, Ordering::Relaxed); } }) .expect("failed to launch peer_connect thread"); @@ -479,7 +502,7 @@ fn listen_for_addrs( // shrink the connecting history. // put a threshold here to avoid frequent shrinking in every call - if connecting_history.len() > PEER_MAX_INITIATE_CONNECTIONS { + if connecting_history.len() > PEER_MAX_INITIATE_CONNECTIONS * 10 { let now = Utc::now(); connecting_history .retain(|_, time| *time + Duration::seconds(PEER_RECONNECT_INTERVAL) > now); diff --git a/servers/src/mwc/server.rs b/servers/src/mwc/server.rs index dc7380466..e3bacccbc 100644 --- a/servers/src/mwc/server.rs +++ b/servers/src/mwc/server.rs @@ -271,11 +271,11 @@ impl Server { pool_adapter.set_chain(shared_chain.clone()); - let sync_manager: Arc> = Arc::new(RwLock::new(SyncManager::new( + let sync_manager: Arc = Arc::new(SyncManager::new( shared_chain.clone(), sync_state.clone(), stop_state.clone(), - ))); + )); let net_adapter = Arc::new(NetToChainAdapter::new( sync_state.clone(), diff --git a/servers/src/mwc/sync/body_sync.rs b/servers/src/mwc/sync/body_sync.rs index 1db3dc626..ce8bc17c5 100644 --- a/servers/src/mwc/sync/body_sync.rs +++ b/servers/src/mwc/sync/body_sync.rs @@ -19,20 +19,25 @@ use crate::mwc::sync::sync_peers::SyncPeers; use crate::mwc::sync::sync_utils; use crate::mwc::sync::sync_utils::{RequestTracker, SyncRequestResponses, SyncResponse}; use crate::p2p; +use chrono::{DateTime, Utc}; use mwc_chain::pibd_params::PibdParams; use mwc_chain::{pibd_params, Chain}; use mwc_p2p::{Peer, PeerAddr}; +use mwc_util::RwLock; use p2p::Capabilities; use rand::prelude::*; use std::cmp; +use std::collections::VecDeque; use std::sync::Arc; pub struct BodySync { chain: Arc, - required_capabilities: Capabilities, - request_tracker: RequestTracker, - request_series: Vec<(Hash, u64)>, // Hash, height + required_capabilities: RwLock, + request_tracker: RequestTracker, + request_series: RwLock>, // Hash, height pibd_params: Arc, + last_retry_height: RwLock, + retry_expiration_times: RwLock>>, } impl BodySync { @@ -40,22 +45,24 @@ impl BodySync { BodySync { pibd_params: chain.get_pibd_params().clone(), chain, - required_capabilities: Capabilities::UNKNOWN, + required_capabilities: RwLock::new(Capabilities::UNKNOWN), request_tracker: RequestTracker::new(), - request_series: Vec::new(), + request_series: RwLock::new(Vec::new()), + last_retry_height: RwLock::new(0), + retry_expiration_times: RwLock::new(VecDeque::new()), } } pub fn get_peer_capabilities(&self) -> Capabilities { - self.required_capabilities + self.required_capabilities.read().clone() } // Expected that it is called ONLY when state_sync is done pub fn request( - &mut self, + &self, in_peers: &Arc, sync_state: &SyncState, - sync_peers: &mut SyncPeers, + sync_peers: &SyncPeers, best_height: u64, ) -> Result { // check if we need something @@ -88,7 +95,7 @@ impl BodySync { let archive_height = Chain::height_2_archive_height(best_height); let head = self.chain.head()?; - let fork_point = self.chain.fork_point()?; + let mut fork_point = self.chain.fork_point()?; if !self.chain.archive_mode() { if fork_point.height < archive_height { @@ -113,15 +120,14 @@ impl BodySync { } else { (Capabilities::UNKNOWN, Capabilities::HEADER_HIST) // needed for headers sync, that can go in parallel }; - self.required_capabilities = required_capabilities; + *self.required_capabilities.write() = required_capabilities; let (peers, excluded_requests, excluded_peers) = sync_utils::get_sync_peers( in_peers, self.pibd_params.get_blocks_request_per_peer(), peer_capabilities, head.height, - self.request_tracker.get_requests_num(), - &self.request_tracker.get_peers_track_data(), + &self.request_tracker, ); if peers.is_empty() { if excluded_peers == 0 { @@ -147,26 +153,8 @@ impl BodySync { } // requested_blocks, check for expiration - self.request_tracker.retain_expired( - pibd_params::SEGMENT_REQUEST_TIMEOUT_SECS, - sync_peers, - |peer, request| { - debug!( - "Making retry send_block_request({}) call for peer {:?}", - request, peer - ); - if let Some(peer) = in_peers.get_connected_peer(peer) { - match peer.send_block_request(request.clone(), chain::Options::SYNC) { - Ok(_) => return true, - Err(e) => error!( - "Unable to retry send_block_request({}) for peer {:?}. Error: {}", - request, peer, e - ), - } - } - false - }, - ); + self.request_tracker + .retain_expired(pibd_params::PIBD_REQUESTS_TIMEOUT_SECS, sync_peers); sync_state.update(SyncStatus::BodySync { archive_height: if self.chain.archive_mode() { @@ -178,6 +166,22 @@ impl BodySync { highest_height: best_height, }); + // Check for stuck orphan + if let Ok(next_block) = self.chain.get_header_by_height(fork_point.height + 1) { + let next_block_hash = next_block.hash(); + // Kick the stuck orphan + match self.chain.get_orphan(&next_block_hash) { + Some(orph) => { + debug!("There is stuck orphan is found, let's kick it..."); + if self.chain.process_block(orph.block, orph.opts).is_ok() { + debug!("push stuck orphan was successful. Should be able continue to go forward now"); + fork_point = self.chain.fork_point()?; + } + } + None => {} + } + } + // if we have 5 peers to sync from then ask for 50 blocks total (peer_count * // 10) max will be 80 if all 8 peers are advertising more work // also if the chain is already saturated with orphans, throttle @@ -191,18 +195,17 @@ impl BodySync { ); if need_request > 0 { - let mut rng = rand::thread_rng(); - - self.send_requests(&mut need_request, &peers, &mut rng, sync_peers)?; + self.send_requests(&mut need_request, &peers, sync_peers)?; // We can send more requests, let's check if we need to update request_series if need_request > 0 { let mut need_refresh_request_series = false; // If request_series first if processed, need to update - if let Some((hash, height)) = self.request_series.last() { + let last_request_series = self.request_series.read().last().cloned(); + if let Some((hash, height)) = last_request_series { debug!("Updating body request series for {} / {}", hash, height); - if !self.is_need_request_block(hash)? { + if self.chain.block_exists(&hash)? { // The tail is updated, so we can request more need_refresh_request_series = true; } @@ -210,24 +213,9 @@ impl BodySync { need_refresh_request_series = true; } - // Check for stuck orphan - if let Ok(next_block) = self.chain.get_header_by_height(fork_point.height + 1) { - let next_block_hash = next_block.hash(); - // Kick the stuck orphan - match self.chain.get_orphan(&next_block_hash) { - Some(orph) => { - debug!("There is stuck orphan is found, let's kick it..."); - if self.chain.process_block(orph.block, orph.opts).is_ok() { - debug!("push stuck orphan was successful. Should be able continue to go forward now"); - need_refresh_request_series = true; - } - } - None => {} - } - } - if need_refresh_request_series { - self.request_series.clear(); + let mut new_request_series: Vec<(Hash, u64)> = Vec::new(); + // Don't collect more than 500 blocks in the cache. The block size limit is 1.5MB, so total cache mem can be up to 750 Mb which is ok let max_height = cmp::min( fork_point.height + (self.pibd_params.get_orphans_num_limit() / 2) as u64, @@ -238,18 +226,22 @@ impl BodySync { while current.height > fork_point.height { let hash = current.hash(); if !self.chain.is_orphan(&hash) { - self.request_series.push((hash, current.height)); + new_request_series.push((hash, current.height)); } current = self.chain.get_previous_header(¤t)?; } - if let Some((hash, height)) = self.request_series.last() { - debug!("New body request series tail is {} / {}", hash, height); + if let Some((hash, height)) = new_request_series.last() { + debug!( + "New body request series starting from {} / {}", + hash, height + ); } + *self.request_series.write() = new_request_series; } // Now we can try to submit more requests... - self.send_requests(&mut need_request, &peers, &mut rng, sync_peers)?; + self.send_requests(&mut need_request, &peers, sync_peers)?; } } @@ -265,14 +257,14 @@ impl BodySync { } pub fn recieve_block_reporting( - &mut self, + &self, accepted: bool, // block accepted/rejected flag block_hash: &Hash, peer: &PeerAddr, peers: &Arc, - sync_peers: &mut SyncPeers, + sync_peers: &SyncPeers, ) { - if let Some(peer_adr) = self.request_tracker.remove_request(block_hash) { + if let Some(peer_adr) = self.request_tracker.remove_request(block_hash, peer) { if accepted { if peer_adr == *peer { sync_peers.report_ok_response(peer); @@ -293,10 +285,9 @@ impl BodySync { let (peers, excluded_requests, excluded_peers) = sync_utils::get_sync_peers( peers, self.pibd_params.get_blocks_request_per_peer(), - self.required_capabilities, + *self.required_capabilities.read(), head.height, - self.request_tracker.get_requests_num(), - &self.request_tracker.get_peers_track_data(), + &self.request_tracker, ); if !peers.is_empty() { // requested_blocks, check for expiration @@ -308,10 +299,7 @@ impl BodySync { self.pibd_params.get_blocks_request_limit(), ); if need_request > 0 { - let mut rng = rand::thread_rng(); - if let Err(e) = - self.send_requests(&mut need_request, &peers, &mut rng, sync_peers) - { + if let Err(e) = self.send_requests(&mut need_request, &peers, sync_peers) { error!("Unable to call send_requests, error: {}", e); } } @@ -320,49 +308,182 @@ impl BodySync { } } - fn is_need_request_block(&self, hash: &Hash) -> Result { - Ok(!(self.request_tracker.has_request(&hash) - || self.chain.is_orphan(&hash) - || self.chain.block_exists(&hash)?)) + fn is_block_recieved(&self, hash: &Hash) -> Result { + Ok(self.chain.is_orphan(&hash) || self.chain.block_exists(&hash)?) + } + + fn calc_retry_running_requests(&self) -> usize { + let now = Utc::now(); + let mut retry_expiration_times = self.retry_expiration_times.write(); + while !retry_expiration_times.is_empty() { + if retry_expiration_times[0] < now { + retry_expiration_times.pop_front(); + } else { + break; + } + } + retry_expiration_times.len() } fn send_requests( - &mut self, + &self, need_request: &mut usize, peers: &Vec>, - rng: &mut ThreadRng, - sync_peers: &mut SyncPeers, + sync_peers: &SyncPeers, ) -> Result<(), chain::Error> { // request_series naturally from head to tail, but requesting better to send from tail to the head.... let mut peers = peers.clone(); - for (hash, height) in self.request_series.iter().rev() { - if self.is_need_request_block(&hash)? { - // For tip we don't want request data from the peers that don't have anuthing. - peers.retain(|p| p.info.live_info.read().height >= *height); + // Requests wuth try write because otherwise somebody else is sending, it is mean we are good... + if let Some(request_series) = self.request_series.try_write() { + *need_request = need_request.saturating_sub(self.calc_retry_running_requests()); + if *need_request == 0 { + return Ok(()); + } + + let mut rng = rand::thread_rng(); + let now = Utc::now(); + + let mut new_requests: Vec<(u64, Hash)> = Vec::new(); + let mut waiting_heights: Vec<(u64, Hash)> = Vec::new(); + + let mut first_in_cache = 0; + let mut last_in_cache = 0; + let mut has10_idx = 0; + + for (hash, height) in request_series.iter().rev() { + if self.is_block_recieved(&hash)? { + if *height == last_in_cache + 1 { + last_in_cache = *height; + } else { + first_in_cache = *height; + last_in_cache = *height; + } + continue; + } + + if last_in_cache > 0 { + if last_in_cache - first_in_cache > pibd_params::BLOCKS_RETRY_DELTA { + has10_idx = first_in_cache; + } + first_in_cache = 0; + last_in_cache = 0; + } + + if self.request_tracker.has_request(&hash) { + waiting_heights.push((height.clone(), hash.clone())); + } else { + new_requests.push((height.clone(), hash.clone())); + if new_requests.len() >= *need_request { + break; + } + } + } + + let mut retry_requests: Vec<(u64, Hash)> = Vec::new(); + if has10_idx > 0 { + for (height, req) in waiting_heights { + if height >= has10_idx { + break; + } + retry_requests.push((height, req)); + } + } + + // Now let's try to send retry requests first + if let Some(mut last_retry_height) = self.last_retry_height.try_write() { + for (height, hash) in retry_requests { + if height <= *last_retry_height { + continue; + } + + if *need_request == 0 { + break; + } + + // We don't want to send retry to the peer whom we already send the data + if let Some(requested_peer) = self.request_tracker.get_expected_peer(&hash) { + let dup_peers: Vec> = peers + .iter() + .filter(|p| { + p.info.addr != requested_peer + && p.info.live_info.read().height >= height + }) + .cloned() + .choose_multiple(&mut rng, 2); + + if dup_peers.len() == 0 { + break; + } + + if *need_request < dup_peers.len() { + *need_request = 0; + break; + } + *need_request = need_request.saturating_sub(dup_peers.len()); + + // we can do retry now + for p in dup_peers { + debug!( + "Processing duplicated request for the block {} at {}, peer {:?}", + hash, height, p.info.addr + ); + match p.send_block_request(hash, chain::Options::SYNC) { + Ok(_) => self + .retry_expiration_times + .write() + .push_back(now + self.request_tracker.get_average_latency()), + Err(e) => { + let msg = format!( + "Failed to send duplicate block request to peer {}, {}", + p.info.addr, e + ); + warn!("{}", msg); + sync_peers.report_no_response(&p.info.addr, msg); + break; + } + } + } + + *last_retry_height = height; + } + } + } + + // Now sending normal requests, no retry for now + for (height, hash) in new_requests { + if *need_request == 0 { + break; + } + *need_request = need_request.saturating_sub(1); + + peers.retain(|p| p.info.live_info.read().height >= height); if peers.is_empty() { *need_request = 0; return Ok(()); } - // can request a block... - let peer = peers.choose(rng).expect("Peers can't be empty"); + + // sending request + let peer = peers + .choose(&mut rng) + .expect("Internal error. peers are empty"); + + debug!( + "Processing request for the block {} at {}, peer {:?}", + hash, height, peer.info.addr + ); if let Err(e) = peer.send_block_request(hash.clone(), chain::Options::SYNC) { let msg = format!( "Failed to send block request to peer {}, {}", peer.info.addr, e ); warn!("{}", msg); - sync_peers.report_error_response(&peer.info.addr, msg); + sync_peers.report_no_response(&peer.info.addr, msg); } else { self.request_tracker.register_request( hash.clone(), peer.info.addr.clone(), format!("Block {}, {}", hash, height), - hash.clone(), ); - *need_request -= 1; - if *need_request == 0 { - break; - } } } } diff --git a/servers/src/mwc/sync/header_hashes_sync.rs b/servers/src/mwc/sync/header_hashes_sync.rs index 76ba26753..8e52927a2 100644 --- a/servers/src/mwc/sync/header_hashes_sync.rs +++ b/servers/src/mwc/sync/header_hashes_sync.rs @@ -26,6 +26,7 @@ use mwc_chain::Chain; use mwc_core::core::hash::Hash; use mwc_core::core::{Segment, SegmentType}; use mwc_p2p::{PeerAddr, ReasonForBan}; +use mwc_util::RwLock; use rand::seq::SliceRandom; use std::cmp; use std::collections::{HashMap, HashSet}; @@ -45,9 +46,9 @@ pub struct HeadersHashSync { // sync for segments requested_segments: HashMap<(SegmentType, u64), (PeerAddr, DateTime)>, // pibd ready flag for quick response during waiting time intervals - pibd_headers_are_loaded: bool, + pibd_headers_are_loaded: RwLock, - cached_response: Option>, + cached_response: RwLock>>, pibd_params: Arc, } @@ -62,13 +63,13 @@ impl HeadersHashSync { responded_headers_hash_from: HashMap::new(), responded_with_another_height: HashSet::new(), requested_segments: HashMap::new(), - pibd_headers_are_loaded: false, - cached_response: None, + pibd_headers_are_loaded: RwLock::new(false), + cached_response: RwLock::new(None), } } pub fn is_pibd_headers_are_loaded(&self) -> bool { - self.pibd_headers_are_loaded + *self.pibd_headers_are_loaded.read() } fn get_peer_capabilities() -> Capabilities { @@ -82,8 +83,8 @@ impl HeadersHashSync { self.responded_headers_hash_from.clear(); self.responded_with_another_height.clear(); self.requested_segments.clear(); - self.pibd_headers_are_loaded = false; - self.cached_response = None; + *self.pibd_headers_are_loaded.write() = false; + *self.cached_response.write() = None; } pub fn is_complete(&self) -> bool { @@ -111,11 +112,7 @@ impl HeadersHashSync { // At this point we found that all hash download process is failed. Now we need to ban peers that // was commited to headers hash roots. Other banned peers needs to be unbanned - pub fn reset_ban_commited_to_hash( - &mut self, - peers: &Arc, - sync_peers: &mut SyncPeers, - ) { + pub fn reset_ban_commited_to_hash(&mut self, peers: &Arc, sync_peers: &SyncPeers) { debug_assert!(self.headers_hash_desegmenter.is_some()); if let Some(headers_hash_desegmenter) = self.headers_hash_desegmenter.as_ref() { @@ -143,19 +140,15 @@ impl HeadersHashSync { self.reset(); } - pub fn request( - &mut self, - peers: &Arc, - sync_state: &SyncState, - sync_peers: &mut SyncPeers, - best_height: u64, - ) -> SyncResponse { + // Lightweight request processing for non active case. Immutable method + pub fn request_pre(&self, best_height: u64) -> Option { // Sending headers hash request to all peers that has the same archive height... - if let Some(cached_response) = &self.cached_response { + let cached_response = self.cached_response.read().clone(); + if let Some(cached_response) = cached_response { if !cached_response.is_expired() { - return cached_response.get_response().clone(); + return Some(cached_response.to_response()); } else { - self.cached_response = None; + *self.cached_response.write() = None; } } @@ -163,7 +156,7 @@ impl HeadersHashSync { if let Ok(tip) = self.chain.header_head() { if tip.height > target_archive_height { - self.pibd_headers_are_loaded = true; + *self.pibd_headers_are_loaded.write() = true; let resp = SyncResponse::new( SyncRequestResponses::HeadersPibdReady, Self::get_peer_capabilities(), @@ -172,11 +165,23 @@ impl HeadersHashSync { tip.height, target_archive_height ), ); - self.cached_response = + *self.cached_response.write() = Some(CachedResponse::new(resp.clone(), Duration::seconds(60))); - return resp; + return Some(resp); } } + None + } + + // Full processing, Mutable method + pub fn request_impl( + &mut self, + peers: &Arc, + sync_state: &SyncState, + sync_peers: &SyncPeers, + best_height: u64, + ) -> SyncResponse { + let target_archive_height = Chain::height_2_archive_height(best_height); if self.headers_hash_desegmenter.is_none() { let now = Utc::now(); @@ -189,7 +194,7 @@ impl HeadersHashSync { } self.requested_headers_hash_from.retain(|peer, req_time| { - if (now - *req_time).num_seconds() > pibd_params::SEGMENT_REQUEST_TIMEOUT_SECS { + if (now - *req_time).num_seconds() > pibd_params::PIBD_REQUESTS_TIMEOUT_SECS { sync_peers.report_no_response(peer, "header hashes".into()); return false; } @@ -205,10 +210,11 @@ impl HeadersHashSync { } if !self.responded_headers_hash_from.is_empty() - && (self.responded_headers_hash_from.len() + && ((self.responded_headers_hash_from.len() >= self.requested_headers_hash_from.len() + && self.responded_headers_hash_from.len() > 1) || (now - first_request).num_seconds() - > pibd_params::SEGMENT_REQUEST_TIMEOUT_SECS * 3) + > pibd_params::PIBD_REQUESTS_TIMEOUT_SECS / 2) { // We can elect the group with a most representative hash let mut hash_counts: HashMap = HashMap::new(); @@ -235,7 +241,7 @@ impl HeadersHashSync { total_blocks: segment_num, }); // Headers desegmenter is ready - let's retry and request some headers - return self.request(peers, sync_state, sync_peers, best_height); + return self.request_impl(peers, sync_state, sync_peers, best_height); } let headers_hash_peers = sync_utils::get_qualify_peers( @@ -306,7 +312,8 @@ impl HeadersHashSync { Self::get_peer_capabilities(), format!("headers_hash_desegmenter is complete"), ); - self.cached_response = Some(CachedResponse::new(resp.clone(), Duration::seconds(180))); + *self.cached_response.write() = + Some(CachedResponse::new(resp.clone(), Duration::seconds(180))); return resp; } @@ -361,7 +368,7 @@ impl HeadersHashSync { headers_hash_peers.len() * self.pibd_params.get_segments_request_per_peer(), self.pibd_params.get_segments_requests_limit(), ), - &self.requested_segments, + &&self.requested_segments, &*self.pibd_params, ) }; @@ -370,7 +377,7 @@ impl HeadersHashSync { // clean up expired let now = Utc::now(); self.requested_segments.retain(|_idx, (peer, time)| { - if (now - *time).num_seconds() > pibd_params::SEGMENT_REQUEST_TIMEOUT_SECS { + if (now - *time).num_seconds() > pibd_params::PIBD_REQUESTS_TIMEOUT_SECS { sync_peers.report_no_response(peer, "header hashes".into()); // it is expired return false; } @@ -442,7 +449,7 @@ impl HeadersHashSync { peer: &PeerAddr, archive_height: u64, headers_hash_root: Hash, - sync_peers: &mut SyncPeers, + sync_peers: &SyncPeers, ) { // Adding only once, so attacker will not be able to escape the ban if archive_height == self.target_archive_height @@ -472,7 +479,7 @@ impl HeadersHashSync { peer: &PeerAddr, header_hashes_root: Hash, segment: Segment, - sync_peers: &mut SyncPeers, + sync_peers: &SyncPeers, ) { if let Some(headers_hash_desegmenter) = self.headers_hash_desegmenter.as_mut() { if *headers_hash_desegmenter.get_headers_root_hash() != header_hashes_root { diff --git a/servers/src/mwc/sync/header_sync.rs b/servers/src/mwc/sync/header_sync.rs index 01446635d..1c514836a 100644 --- a/servers/src/mwc/sync/header_sync.rs +++ b/servers/src/mwc/sync/header_sync.rs @@ -32,18 +32,23 @@ use mwc_chain::txhashset::{HeaderHashesDesegmenter, HeadersRecieveCache}; use mwc_core::core::hash::Hashed; use mwc_core::core::BlockHeader; use mwc_p2p::PeerAddr; +use mwc_util::RwLock; +use rand::seq::IteratorRandom; use rand::seq::SliceRandom; -use std::collections::HashMap; +use std::collections::{HashMap, VecDeque}; use std::sync::Arc; pub struct HeaderSync { chain: Arc, - received_cache: Option, + received_cache: RwLock>, // requested_heights is expected to be at response height, the next tothe requested - request_tracker: RequestTracker>, // Vec - locator data for headers request - cached_response: Option>, - headers_series_cache: HashMap<(PeerAddr, Hash), (Vec, DateTime)>, + request_tracker: RequestTracker, // Vec - locator data for headers request + cached_response: RwLock>>, + headers_series_cache: RwLock, DateTime)>>, pibd_params: Arc, + last_retry_height: RwLock, + retry_expiration_times: RwLock>>, + send_requests_lock: RwLock, } impl HeaderSync { @@ -51,10 +56,13 @@ impl HeaderSync { HeaderSync { pibd_params: chain.get_pibd_params().clone(), chain: chain.clone(), - received_cache: None, + received_cache: RwLock::new(None), request_tracker: RequestTracker::new(), - cached_response: None, - headers_series_cache: HashMap::new(), + cached_response: RwLock::new(None), + headers_series_cache: RwLock::new(HashMap::new()), + last_retry_height: RwLock::new(0), + retry_expiration_times: RwLock::new(VecDeque::new()), + send_requests_lock: RwLock::new(0), } } @@ -63,18 +71,19 @@ impl HeaderSync { } pub fn request( - &mut self, + &self, peers: &Arc, sync_state: &SyncState, - sync_peers: &mut SyncPeers, + sync_peers: &SyncPeers, header_hashes: &HeadersHashSync, best_height: u64, ) -> SyncResponse { - if let Some(cached_response) = &self.cached_response { + let cached_response = self.cached_response.read().clone(); + if let Some(cached_response) = cached_response { if !cached_response.is_expired() { - return cached_response.get_response().clone(); + return cached_response.to_response(); } else { - self.cached_response = None; + *self.cached_response.write() = None; } } @@ -88,30 +97,13 @@ impl HeaderSync { Self::get_peer_capabilities(), format!("Header head {} vs {}", header_head.height, best_height), ); - self.cached_response = Some(CachedResponse::new(resp.clone(), Duration::seconds(60))); + *self.cached_response.write() = + Some(CachedResponse::new(resp.clone(), Duration::seconds(60))); return resp; } - self.request_tracker.retain_expired( - pibd_params::SEGMENT_REQUEST_TIMEOUT_SECS, - sync_peers, - |peer, request| { - debug!( - "Making retry send_header_request({:?}) call for peer {:?}", - request, peer - ); - if let Some(peer) = peers.get_connected_peer(peer) { - match peer.send_header_request(request.clone()) { - Ok(_) => return true, - Err(e) => error!( - "Unable to retry send_header_request({:?}) for peer {:?}. Error: {}", - request, peer, e - ), - } - } - false - }, - ); + self.request_tracker + .retain_expired(pibd_params::PIBD_REQUESTS_TIMEOUT_SECS, sync_peers); // it is initial statis flag if !header_hashes.is_pibd_headers_are_loaded() { @@ -125,21 +117,40 @@ impl HeaderSync { ); } else { // finally we have a hashes, on the first attempt we need to validate if what is already uploaded is good - if self.received_cache.is_none() { + if self.received_cache.read().is_none() { let header_hashes = header_hashes .get_headers_hash_desegmenter() .expect("header_hashes must be is_complete"); let received_cache = HeadersRecieveCache::new(self.chain.clone(), header_hashes); - self.received_cache = Some(received_cache); + *self.received_cache.write() = Some(received_cache); self.request_tracker.clear(); } - let received_cache = self - .received_cache - .as_mut() + let received_cache = self.received_cache.read(); + let received_cache = received_cache + .as_ref() .expect("Internal error. Received_cache is not initialized."); + // filrst checking if some headers needs to be uploaded to the chain + match received_cache.apply_cache() { + Ok(has_more_data) => { + if has_more_data { + return SyncResponse::new( + SyncRequestResponses::HashMoreHeadersToApply, + Self::get_peer_capabilities(), + "Has more headers data to apply".into(), + ); + } + } + Err((peer, err)) => { + let msg = + format!("Failed to process add_headers for {}. Error: {}", peer, err); + error!("{}", msg); + sync_peers.report_error_response_for_peerstr(peer, msg); + } + } + let headers_hash_desegmenter = header_hashes.get_headers_hash_desegmenter().expect( "Internal error. header_hashes.get_headers_hash_desegmenter is not ready", ); @@ -153,8 +164,7 @@ impl HeaderSync { self.pibd_params.get_segments_request_per_peer(), Capabilities::HEADER_HIST, header_hashes.get_target_archive_height(), - self.request_tracker.get_requests_num(), - &self.request_tracker.get_peers_track_data(), + &self.request_tracker, ); if peers.is_empty() { if excluded_peers == 0 { @@ -184,43 +194,14 @@ impl HeaderSync { archive_height: received_cache.get_archive_header_height(), }); - let need_request = self.request_tracker.calculate_needed_requests( - peers.len(), - excluded_requests as usize, - excluded_peers as usize, - self.pibd_params.get_segments_request_per_peer(), - self.pibd_params.get_segments_requests_limit(), + self.send_requests( + &peers, + headers_hash_desegmenter, + sync_peers, + excluded_requests, + excluded_peers, ); - if need_request > 0 { - let hashes = received_cache.next_desired_headers(headers_hash_desegmenter, - need_request, self.request_tracker.get_requested(), - self.pibd_params.get_headers_buffer_len()) - .expect("Chain is corrupted, please clean up the data manually and restart the node"); - - let mut rng = rand::thread_rng(); - for (hash, height) in hashes { - // sending request - let peer = peers - .choose(&mut rng) - .expect("Internal error. peers are empty"); - match self.request_headers_for_hash(hash.clone(), height, peer.clone()) - { - Ok(locator) => { - self.request_tracker.register_request( - hash, - peer.info.addr.clone(), - format!("Header {}, {}", hash, height), - locator, - ); - } - Err(e) => { - let msg = format!("Failed to send headers request to {} for hash {}, Error: {}", peer.info.addr, hash, e); - error!("{}", msg); - sync_peers.report_error_response(&peer.info.addr, msg); - } - } - } - } + return SyncResponse::new( SyncRequestResponses::Syncing, Self::get_peer_capabilities(), @@ -257,10 +238,7 @@ impl HeaderSync { return SyncResponse::new( SyncRequestResponses::HeadersPibdReady, Self::get_peer_capabilities(), - format!( - "Loading headers above horizon, requests waiting: {}", - self.request_tracker.get_requests_num() - ), + "Loading headers above horizon".into(), ); } @@ -277,17 +255,17 @@ impl HeaderSync { Self::get_peer_capabilities(), format!("At height {} now", header_head.height), ); - self.cached_response = Some(CachedResponse::new(resp.clone(), Duration::seconds(60))); + *self.cached_response.write() = + Some(CachedResponse::new(resp.clone(), Duration::seconds(60))); return resp; } match self.request_headers(header_head, sync_peer.clone()) { - Ok(locator) => { + Ok(_) => { self.request_tracker.register_request( header_head_hash, sync_peer.info.addr.clone(), format!("Tail header for {}", header_head.height), - locator, ); } Err(e) => { @@ -296,7 +274,7 @@ impl HeaderSync { sync_peer.info.addr, header_head.height, e ); error!("{}", msg); - sync_peers.report_error_response(&sync_peer.info.addr, msg); + sync_peers.report_no_response(&sync_peer.info.addr, msg); } } @@ -309,11 +287,11 @@ impl HeaderSync { /// Recieved headers handler pub fn receive_headers( - &mut self, + &self, peer: &PeerAddr, bhs: &[BlockHeader], remaining: u64, - sync_peers: &mut SyncPeers, + sync_peers: &SyncPeers, header_hashes: Option<&HeaderHashesDesegmenter>, peers: &Arc, ) -> Result<(), mwc_chain::Error> { @@ -324,114 +302,77 @@ impl HeaderSync { bhs.first().expect("bhs can't be empty").prev_hash.clone(), ); - let bhs = match self.headers_series_cache.remove(&series_key) { - Some((mut peer_bhs, _)) => { - debug_assert!(!peer_bhs.is_empty()); - peer_bhs.extend_from_slice(bhs); - if remaining > 0 { - self.headers_series_cache.insert( - ( - series_key.0, - peer_bhs.last().expect("peer_bhs can't be empty").hash(), - ), - (peer_bhs, Utc::now()), - ); - return Ok(()); + let bhs = { + let mut headers_series_cache = self.headers_series_cache.write(); + let bhs = match headers_series_cache.remove(&series_key) { + Some((mut peer_bhs, _)) => { + debug_assert!(!peer_bhs.is_empty()); + peer_bhs.extend_from_slice(bhs); + if remaining > 0 { + headers_series_cache.insert( + ( + series_key.0, + peer_bhs.last().expect("peer_bhs can't be empty").hash(), + ), + (peer_bhs, Utc::now()), + ); + return Ok(()); + } + peer_bhs } - peer_bhs - } - None => { - if remaining == 0 { - // no need to combine anything - bhs.to_vec() - } else { - // putting into the cache and waiting for the rest - self.headers_series_cache.insert( - (series_key.0, bhs.last().expect("bhs can't be empty").hash()), - (bhs.to_vec(), Utc::now()), - ); - return Ok(()); + None => { + if remaining == 0 { + // no need to combine anything + bhs.to_vec() + } else { + // putting into the cache and waiting for the rest + headers_series_cache.insert( + (series_key.0, bhs.last().expect("bhs can't be empty").hash()), + (bhs.to_vec(), Utc::now()), + ); + return Ok(()); + } } + }; + + // some stale data we better to retain sometimes + if headers_series_cache.len() > 2000 { + let expiration_time = + Utc::now() - Duration::seconds(pibd_params::PIBD_REQUESTS_TIMEOUT_SECS * 2); + headers_series_cache.retain(|_, (_, time)| *time > expiration_time); } + bhs }; - // some stale data we better to retain sometimes - if self.headers_series_cache.len() > 2000 { - let expiration_time = - Utc::now() - Duration::seconds(pibd_params::SEGMENT_REQUEST_TIMEOUT_SECS * 2); - self.headers_series_cache - .retain(|_, (_, time)| *time > expiration_time); - } - let mut expected_peer = false; - if let Some(peer_addr) = self.request_tracker.remove_request(&bhs[0].prev_hash) { - if peer_addr == *peer { - expected_peer = true; - // let's request next package since we get this one... - if self.request_tracker.get_update_requests_to_next_ask() == 0 { - // it is initial statis flag - if header_hashes.is_some() && self.received_cache.is_some() { - let received_cache = self - .received_cache - .as_mut() - .expect("Internal error. Received_cache is not initialized."); - - let headers_hash_desegmenter = header_hashes.unwrap(); - if headers_hash_desegmenter.is_complete() { - // Requesting multiple headers - - let (peers, excluded_requests, excluded_peers) = - sync_utils::get_sync_peers( - peers, - self.pibd_params.get_segments_request_per_peer(), - Capabilities::HEADER_HIST, - headers_hash_desegmenter.get_target_height(), - self.request_tracker.get_requests_num(), - &self.request_tracker.get_peers_track_data(), - ); - - if !peers.is_empty() { - let need_request = self.request_tracker.calculate_needed_requests( - peers.len(), - excluded_requests as usize, - excluded_peers as usize, - self.pibd_params.get_segments_request_per_peer(), - self.pibd_params.get_segments_requests_limit(), - ); - if need_request > 0 { - let hashes = received_cache.next_desired_headers(headers_hash_desegmenter, need_request, self.request_tracker.get_requested(), self.pibd_params.get_headers_buffer_len()) - .expect("Chain is corrupted, please clean up the data manually and restart the node"); - - let mut rng = rand::thread_rng(); - - for (hash, height) in hashes { - // sending request - let peer = peers - .choose(&mut rng) - .expect("Internal error. peers are empty"); - match self.request_headers_for_hash( - hash.clone(), - height, - peer.clone(), - ) { - Ok(locator) => { - self.request_tracker.register_request( - hash, - peer.info.addr.clone(), - format!("Header {}, {}", hash, height), - locator, - ); - } - Err(e) => { - let msg = format!("Failed to send headers request to {} for hash {}, Error: {}", peer.info.addr, hash, e); - error!("{}", msg); - sync_peers - .report_error_response(&peer.info.addr, msg); - } - } - } - } - } + let peer_adr = self.request_tracker.remove_request(&bhs[0].prev_hash, peer); + if let Some(peer_addr) = peer_adr { + expected_peer = peer_addr == *peer; + + // let's request next package since we get this one... + if self.request_tracker.get_update_requests_to_next_ask() == 0 { + // it is initial statis flag + if header_hashes.is_some() { + let headers_hash_desegmenter = header_hashes.unwrap(); + if headers_hash_desegmenter.is_complete() { + // Requesting multiple headers + + let (peers, excluded_requests, excluded_peers) = sync_utils::get_sync_peers( + peers, + self.pibd_params.get_segments_request_per_peer(), + Capabilities::HEADER_HIST, + headers_hash_desegmenter.get_target_height(), + &self.request_tracker, + ); + + if !peers.is_empty() { + self.send_requests( + &peers, + headers_hash_desegmenter, + sync_peers, + excluded_requests, + excluded_peers, + ); } } } @@ -460,9 +401,10 @@ impl HeaderSync { // try to add headers to our header chain if let Some(header_hashes) = header_hashes { if bhs[0].height <= header_hashes.get_target_height() { - if let Some(received_cache) = self.received_cache.as_mut() { + if let Some(received_cache) = self.received_cache.read().as_ref() { // Processing with a cache - match received_cache.add_headers(header_hashes, bhs, peer.to_string()) { + match received_cache.add_headers_to_cache(header_hashes, bhs, peer.to_string()) + { Ok(_) => { // Reporting ok only for expected. We don't want attacker to make good points with not expected responses if expected_peer { @@ -478,6 +420,13 @@ impl HeaderSync { sync_peers.report_error_response_for_peerstr(peer, msg); } } + // Cache we neeed apply once from here. Even if data exist + if let Err((peer, err)) = received_cache.apply_cache() { + let msg = + format!("Failed to process add_headers for {}. Error: {}", peer, err); + error!("{}", msg); + sync_peers.report_error_response_for_peerstr(peer, msg); + } return Ok(()); } } @@ -500,19 +449,18 @@ impl HeaderSync { if !self.request_tracker.has_request(&sync_head.last_block_h) { if let Some(sync_peer) = Self::choose_sync_peer(peers) { match self.request_headers(sync_head, sync_peer.clone()) { - Ok(locator) => { + Ok(_) => { self.request_tracker.register_request( sync_head.last_block_h, sync_peer.info.addr.clone(), format!("Tail headers for {}", sync_head.height), - locator, ); sync_peers.report_ok_response(peer); } Err(e) => { let msg = format!("Failed to send headers request to {} for height {}, Error: {}", sync_peer.info.addr, sync_head.height, e); error!("{}", msg); - sync_peers.report_error_response(&sync_peer.info.addr, msg); + sync_peers.report_no_response(&sync_peer.info.addr, msg); } } } @@ -550,11 +498,7 @@ impl HeaderSync { } /// Request some block headers from a peer to advance us. - fn request_headers( - &self, - sync_head: chain::Tip, - peer: Arc, - ) -> Result, chain::Error> { + fn request_headers(&self, sync_head: chain::Tip, peer: Arc) -> Result<(), chain::Error> { let locator = self .get_locator(sync_head) .map_err(|e| chain::Error::Other(format!("{}", e)))?; @@ -562,9 +506,9 @@ impl HeaderSync { "sync: request_headers: asking {} for headers at {}", peer.info.addr, sync_head.height ); - peer.send_header_request(locator.clone()) + peer.send_header_request(locator) .map_err(|e| chain::Error::Other(format!("{}", e)))?; - Ok(locator) + Ok(()) } fn request_headers_for_hash( @@ -572,15 +516,14 @@ impl HeaderSync { header_hash: Hash, height: u64, peer: Arc, - ) -> Result, chain::Error> { + ) -> Result<(), chain::Error> { debug!( "sync: request_headers: asking {} for headers at hash {}, height {}", peer.info.addr, header_hash, height ); - let locator: Vec = vec![header_hash]; - peer.send_header_request(locator.clone()) + peer.send_header_request(vec![header_hash]) .map_err(|e| chain::Error::Other(format!("{}", e)))?; - Ok(locator) + Ok(()) } /// We build a locator based on sync_head. @@ -591,6 +534,135 @@ impl HeaderSync { let locator = self.chain.get_locator_hashes(sync_head, &heights)?; Ok(locator) } + + fn calc_retry_running_requests(&self) -> usize { + let now = Utc::now(); + let mut retry_expiration_times = self.retry_expiration_times.write(); + while !retry_expiration_times.is_empty() { + if retry_expiration_times[0] < now { + retry_expiration_times.pop_front(); + } else { + break; + } + } + retry_expiration_times.len() + } + + fn send_requests( + &self, + peers: &Vec>, + headers_hash_desegmenter: &HeaderHashesDesegmenter, + sync_peers: &SyncPeers, + excluded_requests: u32, + excluded_peers: u32, + ) { + if let Some(_) = self.send_requests_lock.try_write() { + let mut need_request = self.request_tracker.calculate_needed_requests( + peers.len(), + excluded_requests as usize, + excluded_peers as usize, + self.pibd_params.get_segments_request_per_peer(), + self.pibd_params.get_segments_requests_limit(), + ); + need_request = need_request.saturating_sub(self.calc_retry_running_requests()); + if need_request > 0 { + let received_cache = self.received_cache.read(); + let received_cache = received_cache + .as_ref() + .expect("Internal error. Received_cache is not initialized."); + + let (hashes, retry_reqs) = received_cache.next_desired_headers(headers_hash_desegmenter, + need_request, &self.request_tracker, + self.pibd_params.get_headers_buffer_len()) + .expect("Chain is corrupted, please clean up the data manually and restart the node"); + + // let's do retry requests first. + let mut rng = rand::thread_rng(); + let now = Utc::now(); + + // Whoever lock, can send duplicate requests + let last_retry_height = self.last_retry_height.try_write(); + if let Some(mut last_retry_height) = last_retry_height { + for (hash, height) in retry_reqs { + if height <= *last_retry_height { + continue; + } + + if need_request == 0 { + break; + } + + // We don't want to send retry to the peer whom we already send the data + if let Some(requested_peer) = self.request_tracker.get_expected_peer(&hash) + { + let dup_peers: Vec> = peers + .iter() + .filter(|p| p.info.addr != requested_peer) + .cloned() + .choose_multiple(&mut rng, 2); + + if dup_peers.len() == 0 { + break; + } + + if need_request < dup_peers.len() { + need_request = 0; + break; + } + need_request = need_request.saturating_sub(dup_peers.len()); + + // we can do retry now + for p in dup_peers { + debug!("Processing duplicated request for the headers {} at {}, peer {:?}", hash, height, p.info.addr); + match self.request_headers_for_hash(hash.clone(), height, p.clone()) + { + Ok(_) => self.retry_expiration_times.write().push_back( + now + self.request_tracker.get_average_latency(), + ), + Err(e) => { + let msg = format!("Failed to send duplicate headers request to {} for hash {}, Error: {}", p.info.addr, hash, e); + error!("{}", msg); + sync_peers.report_no_response(&p.info.addr, msg); + break; + } + } + } + } + + *last_retry_height = height; + } + } + + for (hash, height) in hashes { + if need_request == 0 { + break; + } + need_request = need_request.saturating_sub(1); + // sending request + let peer = peers + .choose(&mut rng) + .expect("Internal error. peers are empty"); + match self.request_headers_for_hash(hash.clone(), height, peer.clone()) { + Ok(_) => { + self.request_tracker.register_request( + hash, + peer.info.addr.clone(), + format!("Header {}, {}", hash, height), + ); + } + Err(e) => { + let msg = format!( + "Failed to send headers request to {} for hash {}, Error: {}", + peer.info.addr, hash, e + ); + error!("{}", msg); + sync_peers.report_no_response(&peer.info.addr, msg); + } + } + } + } + } + } } // current height back to 0 decreasing in powers of 2 diff --git a/servers/src/mwc/sync/state_sync.rs b/servers/src/mwc/sync/state_sync.rs index cb66752e3..7b0bbbc70 100644 --- a/servers/src/mwc/sync/state_sync.rs +++ b/servers/src/mwc/sync/state_sync.rs @@ -23,13 +23,16 @@ use crate::util::StopState; use chrono::prelude::{DateTime, Utc}; use mwc_chain::pibd_params::PibdParams; use mwc_chain::txhashset::{BitmapChunk, Desegmenter}; -use mwc_chain::Chain; +use mwc_chain::{Chain, SyncStatus}; use mwc_core::core::hash::Hash; use mwc_core::core::{OutputIdentifier, Segment, SegmentTypeIdentifier, TxKernel}; use mwc_p2p::{Error, PeerAddr}; use mwc_util::secp::pedersen::RangeProof; +use mwc_util::RwLock; +use rand::prelude::IteratorRandom; use rand::seq::SliceRandom; -use std::collections::{HashMap, HashSet}; +use std::collections::{HashMap, HashSet, VecDeque}; +use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; use std::sync::Arc; /// Fast sync has 3 "states": @@ -40,17 +43,24 @@ use std::sync::Arc; /// The StateSync struct implements and monitors the middle step. pub struct StateSync { chain: Arc, + desegmenter: RwLock>, // Expected to have write only for initializetion. Then always read. + reset_desegmenter: AtomicBool, // Target height needs to be calculated by the top peers, can be different from headers, it is no problem - target_archive_height: u64, - target_archive_hash: Hash, - requested_root_hash: HashMap>, - responded_root_hash: HashMap)>, - responded_with_another_height: HashSet, + target_archive_height: AtomicU64, + target_archive_hash: RwLock, + requested_root_hash: RwLock>>, // Lock 1 + responded_root_hash: RwLock)>>, // Lock 2 + responded_with_another_height: RwLock>, // Lock 3 // sync for segments - request_tracker: RequestTracker<(SegmentType, u64), (SegmentTypeIdentifier, Hash)>, - is_complete: bool, + request_tracker: RequestTracker<(SegmentType, u64)>, + is_complete: AtomicBool, pibd_params: Arc, + + last_retry_idx: RwLock>, + retry_expiration_times: RwLock>>, + + send_requests_lock: RwLock, } impl StateSync { @@ -58,13 +68,18 @@ impl StateSync { StateSync { pibd_params: chain.get_pibd_params().clone(), chain, - target_archive_height: 0, - target_archive_hash: Hash::default(), - requested_root_hash: HashMap::new(), - responded_root_hash: HashMap::new(), - responded_with_another_height: HashSet::new(), + desegmenter: RwLock::new(None), + reset_desegmenter: AtomicBool::new(false), + target_archive_height: AtomicU64::new(0), + target_archive_hash: RwLock::new(Hash::default()), + requested_root_hash: RwLock::new(HashMap::new()), + responded_root_hash: RwLock::new(HashMap::new()), + responded_with_another_height: RwLock::new(HashSet::new()), request_tracker: RequestTracker::new(), - is_complete: false, + is_complete: AtomicBool::new(false), + last_retry_idx: RwLock::new(HashMap::new()), + retry_expiration_times: RwLock::new(VecDeque::new()), + send_requests_lock: RwLock::new(0), } } @@ -73,37 +88,35 @@ impl StateSync { } pub fn request( - &mut self, + &self, in_peers: &Arc, sync_state: Arc, - sync_peers: &mut SyncPeers, + sync_peers: &SyncPeers, stop_state: Arc, best_height: u64, ) -> SyncResponse { // In case of archive mode, this step is must be skipped. Body sync will catch up. - if self.is_complete || self.chain.archive_mode() { + if self.is_complete.load(Ordering::Relaxed) || self.chain.archive_mode() { return SyncResponse::new( SyncRequestResponses::StatePibdReady, Capabilities::UNKNOWN, - format!( - "is_complete={} archive_mode={}", - self.is_complete, - self.chain.archive_mode() - ), + "".into(), ); } // Let's check if we need to calculate/update archive height. let target_archive_height = Chain::height_2_archive_height(best_height); - if self.target_archive_height != target_archive_height { - // Resetting all internal state, starting from the scratch - self.target_archive_height = target_archive_height; + // Event it is not atomic operation, it is safe because request called from a single thread + if self.target_archive_height.load(Ordering::Relaxed) != target_archive_height { // total reset, nothing needs to be saved... self.reset_desegmenter_data(); + // Resetting all internal state, starting from the scratch + self.target_archive_height + .store(target_archive_height, Ordering::Relaxed); } - if self.target_archive_height == 0 { + if target_archive_height == 0 { return SyncResponse::new( SyncRequestResponses::WaitingForPeers, Self::get_peer_capabilities(), @@ -119,7 +132,7 @@ impl StateSync { if head.height >= target_archive_height { // We are good, no needs to PIBD sync info!("No needs to sync, data until archive is ready"); - self.is_complete = true; + self.is_complete.store(true, Ordering::Relaxed); return SyncResponse::new( SyncRequestResponses::StatePibdReady, Capabilities::UNKNOWN, @@ -132,16 +145,13 @@ impl StateSync { } // Checking if archive header is already in the chain - let archive_header = match self.chain.get_header_by_height(self.target_archive_height) { + let archive_header = match self.chain.get_header_by_height(target_archive_height) { Ok(archive_header) => archive_header, Err(_) => { return SyncResponse::new( SyncRequestResponses::WaitingForHeaders, Self::get_peer_capabilities(), - format!( - "Header at height {} doesn't exist", - self.target_archive_height - ), + format!("Header at height {} doesn't exist", target_archive_height), ); } }; @@ -151,9 +161,8 @@ impl StateSync { in_peers, self.pibd_params.get_segments_request_per_peer(), Capabilities::PIBD_HIST, - self.target_archive_height, - self.request_tracker.get_requests_num(), - &self.request_tracker.get_peers_track_data(), + target_archive_height, + &self.request_tracker, ); if peers.is_empty() { if excluded_peers == 0 { @@ -180,53 +189,70 @@ impl StateSync { let now = Utc::now(); - // checking to timeouts for handshakes... - self.requested_root_hash.retain(|peer, req_time| { - if (now - *req_time).num_seconds() > pibd_params::SEGMENT_REQUEST_TIMEOUT_SECS { - sync_peers.report_no_response(peer, "root hash".into()); - return false; - } - true - }); - - // request handshakes if needed - for peer in &peers { - if !(self.requested_root_hash.contains_key(&peer.info.addr) - || self.responded_root_hash.contains_key(&peer.info.addr) - || self.responded_with_another_height.contains(&peer.info.addr)) - { - // can request a handshake - match peer - .send_start_pibd_sync_request(archive_header.height, archive_header.hash()) + { + let mut requested_root_hash = self.requested_root_hash.write(); + let responded_root_hash = self.responded_root_hash.read(); + let responded_with_another_height = self.responded_with_another_height.read(); + + // checking to timeouts for handshakes... + requested_root_hash.retain(|peer, req_time| { + if (now - *req_time).num_seconds() > pibd_params::PIBD_REQUESTS_TIMEOUT_SECS { + sync_peers.report_no_response(peer, "root hash".into()); + return false; + } + true + }); + + // request handshakes if needed + for peer in &peers { + if !(requested_root_hash.contains_key(&peer.info.addr) + || responded_root_hash.contains_key(&peer.info.addr) + || responded_with_another_height.contains(&peer.info.addr)) { - Ok(_) => { - self.requested_root_hash - .insert(peer.info.addr.clone(), now.clone()); - } - Err(e) => { - error!("send_start_pibd_sync_request failed with error: {}", e); + // can request a handshake + match peer + .send_start_pibd_sync_request(archive_header.height, archive_header.hash()) + { + Ok(_) => { + requested_root_hash.insert(peer.info.addr.clone(), now.clone()); + } + Err(e) => { + error!("send_start_pibd_sync_request failed with error: {}", e); + } } } } } + if self.reset_desegmenter.swap(false, Ordering::Relaxed) { + self.reset_desegmenter_data(); + } + // Checking if need to init desegmenter - if self.chain.get_desegmenter().read().is_none() { + if self.desegmenter.read().is_none() { + sync_state.update(SyncStatus::TxHashsetPibd { + recieved_segments: 0, + total_segments: 100, + }); let mut first_request = now; - for (_, (_, time)) in &self.responded_root_hash { + let requested_root_hash = self.requested_root_hash.read(); + let responded_root_hash = self.responded_root_hash.read(); + + for (_, (_, time)) in &*responded_root_hash { if *time < first_request { first_request = *time; } } - if !self.responded_root_hash.is_empty() - && (self.responded_root_hash.len() >= self.requested_root_hash.len() + if !responded_root_hash.is_empty() + && ((responded_root_hash.len() >= requested_root_hash.len() + && responded_root_hash.len() > 1) || (now - first_request).num_seconds() - > pibd_params::SEGMENT_REQUEST_TIMEOUT_SECS) + > pibd_params::PIBD_REQUESTS_TIMEOUT_SECS / 2) { // We can elect the group with a most representative hash let mut hash_counts: HashMap = HashMap::new(); - for (_, (hash, _)) in &self.responded_root_hash { + for (_, (hash, _)) in &*responded_root_hash { hash_counts.insert(hash.clone(), hash_counts.get(hash).unwrap_or(&0) + 1); } // selecting hash with max value @@ -236,12 +262,27 @@ impl StateSync { .max_by_key(|&(_, count)| count) .expect("hash_counts is empty?"); + info!("Creating desegmenter for root hash {}", best_root_hash); + + if let Err(e) = self.chain.reset_pibd_chain() { + let msg = format!( + "Failed to reset chain before start BIPD state sync. Error: {}", + e + ); + error!("{}", msg); + return SyncResponse::new( + SyncRequestResponses::Syncing, + Self::get_peer_capabilities(), + msg, + ); + } match self .chain - .create_desegmenter(archive_header.height, best_root_hash.clone()) + .init_desegmenter(archive_header.height, best_root_hash.clone()) { - Ok(_) => { - self.target_archive_hash = archive_header.hash(); + Ok(desegmenter) => { + *self.target_archive_hash.write() = archive_header.hash(); + *self.desegmenter.write() = Some(desegmenter); } Err(e) => { error!("Failed to create PIBD desgmenter, {}", e); @@ -266,18 +307,17 @@ impl StateSync { format!( "Waiting for PIBD root. Hash peers: {} Get respoinses {} from {}", peers.len() + excluded_peers as usize, - self.responded_root_hash.len(), - self.requested_root_hash.len() + responded_root_hash.len(), + requested_root_hash.len() ), ); } } - let desegmenter = self.chain.get_desegmenter(); - let mut desegmenter = desegmenter.write(); + let desegmenter = self.desegmenter.read(); debug_assert!(desegmenter.is_some()); let desegmenter = desegmenter - .as_mut() + .as_ref() .expect("Desegmenter must be created at this point"); if desegmenter.is_complete() { @@ -287,7 +327,7 @@ impl StateSync { "Restarting because check_update_leaf_set_state failed with error {}", e ); - self.ban_this_session(desegmenter, sync_peers); + self.ban_this_session(desegmenter.get_bitmap_root_hash(), sync_peers); return SyncResponse::new( SyncRequestResponses::Syncing, Self::get_peer_capabilities(), @@ -303,7 +343,7 @@ impl StateSync { match desegmenter.validate_complete_state(sync_state, stop_state, self.chain.secp()) { Ok(_) => { info!("PIBD download and valiadion is done with success!"); - self.is_complete = true; + self.is_complete.store(true, Ordering::Relaxed); return SyncResponse::new( SyncRequestResponses::StatePibdReady, Capabilities::UNKNOWN, @@ -315,7 +355,7 @@ impl StateSync { "Restarting because validate_complete_state failed with error {}", e ); - self.ban_this_session(desegmenter, sync_peers); + self.ban_this_session(desegmenter.get_bitmap_root_hash(), sync_peers); return SyncResponse::new( SyncRequestResponses::Syncing, Self::get_peer_capabilities(), @@ -330,41 +370,21 @@ impl StateSync { debug_assert!(!desegmenter.is_complete()); - self.request_tracker.retain_expired( - pibd_params::SEGMENT_REQUEST_TIMEOUT_SECS, - sync_peers, - |peer, (segment, target_archive_hash)| { - debug!( - "Making request retry for segment {:?}, peer {:?}", - segment, peer - ); - if let Some(peer) = in_peers.get_connected_peer(peer) { - match Self::send_request(&peer, &segment, &target_archive_hash) { - Ok(_) => return true, - Err(e) => error!( - "Unable to retry request for segment {:?}, peer {:?}. Error: {}", - segment, peer, e - ), - } - } - false - }, - ); + self.request_tracker + .retain_expired(pibd_params::PIBD_REQUESTS_TIMEOUT_SECS, sync_peers); sync_state.update(desegmenter.get_pibd_progress()); - let mut rng = rand::thread_rng(); - // let's check what peers with root hash are exist let root_hash = desegmenter.get_bitmap_root_hash(); let mut root_hash_peers: Vec> = Vec::new(); let mut other_hashes = 0; for p in peers { let addr = &p.info.addr; - if self.responded_with_another_height.contains(addr) { + if self.responded_with_another_height.read().contains(addr) { continue; } - if let Some((hash, _)) = self.responded_root_hash.get(addr) { + if let Some((hash, _)) = self.responded_root_hash.read().get(addr) { if hash == root_hash { root_hash_peers.push(p.clone()); } else { @@ -376,166 +396,112 @@ impl StateSync { if root_hash_peers.is_empty() { if other_hashes > 0 { // no peers commited to hash, resetting download process if we have alternatives. - self.chain.reset_desegmenter(); - // Sinse there are other groups, treating that as attack. Banning all supporters - self.ban_this_session(desegmenter, sync_peers); + self.ban_this_session(desegmenter.get_bitmap_root_hash(), sync_peers); return SyncResponse::new( SyncRequestResponses::Syncing, Self::get_peer_capabilities(), "Banning this PIBD session. Seems like that was a fraud".into(), ); } else { - // Since there are no alternatives, keep waiting... - return SyncResponse::new( - SyncRequestResponses::WaitingForPeers, - Self::get_peer_capabilities(), - "No peers that support PIBD.".into(), - ); - } - } - - let need_request = self.request_tracker.calculate_needed_requests( - root_hash_peers.len(), - excluded_requests as usize, - excluded_peers as usize, - self.pibd_params.get_segments_request_per_peer(), - self.pibd_params.get_segments_requests_limit(), - ); - if need_request > 0 { - match desegmenter - .next_desired_segments(need_request, self.request_tracker.get_requested()) - { - Ok(segments) => { - for seg in segments { - let key = (seg.segment_type.clone(), seg.identifier.idx.clone()); - debug_assert!(!self.request_tracker.has_request(&key)); - debug_assert!(!root_hash_peers.is_empty()); - let peer = root_hash_peers - .choose(&mut rng) - .expect("peers is not empty"); - - let send_res = Self::send_request(peer, &seg, &self.target_archive_hash); - match send_res { - Ok(_) => { - let msg = format!("{:?}", key); - self.request_tracker.register_request( - key, - peer.info.addr.clone(), - msg, - (seg.clone(), self.target_archive_hash.clone()), - ); - } - Err(e) => { - let msg = format!( - "Error sending segment request to peer at {}, reason: {:?}", - peer.info.addr, e - ); - info!("{}", msg); - sync_peers.report_error_response(&peer.info.addr, msg); - } - } - } + if excluded_requests == 0 { + // Since there are no alternatives, keep waiting... return SyncResponse::new( - SyncRequestResponses::Syncing, + SyncRequestResponses::WaitingForPeers, Self::get_peer_capabilities(), - format!( - "Has peers: {} Requests in waiting Q: {}", - root_hash_peers.len() + excluded_peers as usize, - self.request_tracker.get_requests_num() - ), + "No peers that support PIBD.".into(), ); - } - Err(err) => { - error!("Failed to request more segments. Error: {}", err); - // let's reset everything and restart - self.ban_this_session(desegmenter, sync_peers); + } else { return SyncResponse::new( SyncRequestResponses::Syncing, Self::get_peer_capabilities(), - format!("Failed to request more segments. Error: {}", err), + format!( + "All PIBD peers are busy. Requests in waiting Q: {}", + self.request_tracker.get_requests_num() + ), ); } } } - // waiting for responses... - return SyncResponse::new( - SyncRequestResponses::Syncing, - Self::get_peer_capabilities(), - format!( - "Has peers {}, Requests in waiting Q: {}", - root_hash_peers.len() + excluded_peers as usize, - self.request_tracker.get_requests_num() - ), - ); + + self.send_requests( + &root_hash_peers, + &root_hash_peers, + excluded_requests, + excluded_peers, + desegmenter, + sync_peers, + ) } - fn ban_this_session(&mut self, desegmenter: &Desegmenter, sync_peers: &mut SyncPeers) { - let root_hash = desegmenter.get_bitmap_root_hash(); - error!( - "Banning all peers joind for root hash {}", - desegmenter.get_bitmap_root_hash() - ); + fn ban_this_session(&self, root_hash: &Hash, sync_peers: &SyncPeers) { + error!("Banning all peers joind for root hash {}", root_hash); // Banning all peers that was agree with that hash... - for (peer, (hash, _)) in &self.responded_root_hash { - if *hash == *root_hash { - sync_peers.ban_peer(peer, "bad root hash".into()); + { + let responded_root_hash = self.responded_root_hash.read(); + for (peer, (hash, _)) in &*responded_root_hash { + if *hash == *root_hash { + sync_peers.ban_peer(peer, "bad root hash".into()); + } } } - self.reset_desegmenter_data(); + self.reset_desegmenter.store(true, Ordering::Relaxed); } - pub fn reset_desegmenter_data(&mut self) { - self.chain.reset_desegmenter(); - self.requested_root_hash.clear(); - self.responded_root_hash.clear(); - self.responded_with_another_height.clear(); + pub fn reset_desegmenter_data(&self) { + *self.desegmenter.write() = None; + self.requested_root_hash.write().clear(); + self.responded_root_hash.write().clear(); + self.responded_with_another_height.write().clear(); self.request_tracker.clear(); - self.is_complete = false; + self.is_complete.store(false, Ordering::Relaxed); } pub fn recieve_pibd_status( - &mut self, + &self, peer: &PeerAddr, _header_hash: Hash, header_height: u64, output_bitmap_root: Hash, ) { // Only one commitment allowed per peer. - if self.responded_root_hash.contains_key(peer) - || header_height != self.target_archive_height + if self.responded_root_hash.read().contains_key(peer) + || header_height != self.target_archive_height.load(Ordering::Relaxed) { return; } self.responded_root_hash + .write() .insert(peer.clone(), (output_bitmap_root, Utc::now())); } pub fn recieve_another_archive_header( - &mut self, + &self, peer: &PeerAddr, _header_hash: &Hash, header_height: u64, ) { - if header_height == self.target_archive_height { + if header_height == self.target_archive_height.load(Ordering::Relaxed) { return; } - self.responded_with_another_height.insert(peer.clone()); + self.responded_with_another_height + .write() + .insert(peer.clone()); } // return Some root hash if validation was successfull fn validate_root_hash(&self, peer: &PeerAddr, archive_header_hash: &Hash) -> Option { - let desegmenter = self.chain.get_desegmenter(); - let desegmenter = desegmenter.read(); - if desegmenter.is_none() || self.target_archive_hash != *archive_header_hash { + let desegmenter = self.desegmenter.read(); + if desegmenter.is_none() || *self.target_archive_hash.read() != *archive_header_hash { return None; } - match self.responded_root_hash.get(peer) { + let hash_for_peer = self.responded_root_hash.read().get(peer).cloned(); + match hash_for_peer { Some((hash, _)) => { - if desegmenter.as_ref().unwrap().get_bitmap_root_hash() == hash { - return Some(hash.clone()); + if *desegmenter.as_ref().unwrap().get_bitmap_root_hash() == hash { + return Some(hash); } } None => {} @@ -553,133 +519,76 @@ impl StateSync { // return true if peer matched registered, so we get response from whom it was requested fn track_and_request_more_segments( - &mut self, + &self, key: &(SegmentType, u64), peer: &PeerAddr, peers: &Arc, - sync_peers: &mut SyncPeers, + sync_peers: &SyncPeers, ) { - if let Some(peer_addr) = self.request_tracker.remove_request(key) { - if peer_addr == *peer { - if self.request_tracker.get_update_requests_to_next_ask() == 0 { - let (peers, excluded_requests, excluded_peers) = sync_utils::get_sync_peers( - peers, - self.pibd_params.get_segments_request_per_peer(), - Capabilities::PIBD_HIST, - self.target_archive_height, - self.request_tracker.get_requests_num(), - &self.request_tracker.get_peers_track_data(), - ); - if peers.is_empty() { - return; - } - - let desegmenter = self.chain.get_desegmenter(); - let mut desegmenter = desegmenter.write(); - if let Some(desegmenter) = desegmenter.as_mut() { - if !desegmenter.is_complete() { - let mut rng = rand::thread_rng(); - - // let's check what peers with root hash are exist - let root_hash = desegmenter.get_bitmap_root_hash(); - let mut root_hash_peers: Vec> = Vec::new(); - for p in peers { - let addr = &p.info.addr; - if self.responded_with_another_height.contains(addr) { - continue; - } - if let Some((hash, _)) = self.responded_root_hash.get(addr) { - if hash == root_hash { - root_hash_peers.push(p.clone()); - } - } - } - - if root_hash_peers.is_empty() { - return; - } + let _ = self.request_tracker.remove_request(key, peer); + + if self.request_tracker.get_update_requests_to_next_ask() == 0 { + let (peers, excluded_requests, excluded_peers) = sync_utils::get_sync_peers( + peers, + self.pibd_params.get_segments_request_per_peer(), + Capabilities::PIBD_HIST, + self.target_archive_height.load(Ordering::Relaxed), + &self.request_tracker, + ); + if peers.is_empty() { + return; + } - let need_request = self.request_tracker.calculate_needed_requests( - root_hash_peers.len(), - excluded_requests as usize, - excluded_peers as usize, - self.pibd_params.get_segments_request_per_peer(), - self.pibd_params.get_segments_requests_limit(), - ); - if need_request > 0 { - match desegmenter.next_desired_segments( - need_request, - self.request_tracker.get_requested(), - ) { - Ok(segments) => { - for seg in segments { - let key = ( - seg.segment_type.clone(), - seg.identifier.idx.clone(), - ); - debug_assert!(!self.request_tracker.has_request(&key)); - debug_assert!(!root_hash_peers.is_empty()); - let peer = root_hash_peers - .choose(&mut rng) - .expect("peers is not empty"); - - let send_res = Self::send_request( - peer, - &seg, - &self.target_archive_hash, - ); - match send_res { - Ok(_) => { - let msg = format!("{:?}", key); - self.request_tracker.register_request( - key, - peer.info.addr.clone(), - msg, - ( - seg.clone(), - self.target_archive_hash.clone(), - ), - ); - } - Err(e) => { - let msg = format!("Error sending segment request to peer at {}, reason: {:?}",peer.info.addr, e); - info!("{}", msg); - sync_peers.report_error_response( - &peer.info.addr, - msg, - ); - } - } - } - } - Err(err) => { - error!("Failed to request more segments during update. Error: {}", err); - } - } + let desegmenter = self.desegmenter.read(); + if let Some(desegmenter) = desegmenter.as_ref() { + if !desegmenter.is_complete() { + // let's check what peers with root hash are exist + let root_hash = desegmenter.get_bitmap_root_hash(); + let mut root_hash_peers: Vec> = Vec::new(); + for p in peers { + let addr = &p.info.addr; + if self.responded_with_another_height.read().contains(addr) { + continue; + } + if let Some((hash, _)) = self.responded_root_hash.read().get(addr) { + if hash == root_hash { + root_hash_peers.push(p.clone()); } } } + + if root_hash_peers.is_empty() { + return; + } + + let _ = self.send_requests( + &root_hash_peers, + &root_hash_peers, + excluded_requests, + excluded_peers, + desegmenter, + sync_peers, + ); } } } } pub fn receive_bitmap_segment( - &mut self, + &self, peer: &PeerAddr, archive_header_hash: &Hash, segment: Segment, peers: &Arc, - sync_peers: &mut SyncPeers, + sync_peers: &SyncPeers, ) { let key = (SegmentType::Bitmap, segment.id().idx); let expected_peer = self.is_expected_peer(&key, peer); if let Some(root_hash) = self.validate_root_hash(peer, archive_header_hash) { - let desegmenter = self.chain.get_desegmenter(); - let mut desegmenter = desegmenter.write(); + let desegmenter = self.desegmenter.read(); let desegmenter = desegmenter - .as_mut() + .as_ref() .expect("Desegmenter must exist at this point"); match desegmenter.add_bitmap_segment(segment, &root_hash) { Ok(_) => { @@ -705,21 +614,20 @@ impl StateSync { } pub fn receive_output_segment( - &mut self, + &self, peer: &PeerAddr, archive_header_hash: &Hash, segment: Segment, peers: &Arc, - sync_peers: &mut SyncPeers, + sync_peers: &SyncPeers, ) { let key = (SegmentType::Output, segment.id().idx); let expected_peer = self.is_expected_peer(&key, peer); if let Some(root_hash) = self.validate_root_hash(peer, archive_header_hash) { - let desegmenter = self.chain.get_desegmenter(); - let mut desegmenter = desegmenter.write(); + let desegmenter = self.desegmenter.read(); let desegmenter = desegmenter - .as_mut() + .as_ref() .expect("Desegmenter must exist at this point"); match desegmenter.add_output_segment(segment, &root_hash) { Ok(_) => { @@ -744,22 +652,21 @@ impl StateSync { } pub fn receive_rangeproof_segment( - &mut self, + &self, peer: &PeerAddr, archive_header_hash: &Hash, segment: Segment, peers: &Arc, - sync_peers: &mut SyncPeers, + sync_peers: &SyncPeers, ) { let key = (SegmentType::RangeProof, segment.id().idx); let expected_peer = self.is_expected_peer(&key, peer); // Process first, unregister after. During unregister we might issue more requests. if let Some(root_hash) = self.validate_root_hash(peer, archive_header_hash) { - let desegmenter = self.chain.get_desegmenter(); - let mut desegmenter = desegmenter.write(); + let desegmenter = self.desegmenter.read(); let desegmenter = desegmenter - .as_mut() + .as_ref() .expect("Desegmenter must exist at this point"); match desegmenter.add_rangeproof_segment(segment, &root_hash) { Ok(_) => { @@ -784,21 +691,20 @@ impl StateSync { } pub fn receive_kernel_segment( - &mut self, + &self, peer: &PeerAddr, archive_header_hash: &Hash, segment: Segment, peers: &Arc, - sync_peers: &mut SyncPeers, + sync_peers: &SyncPeers, ) { let key = (SegmentType::Kernel, segment.id().idx); let expected_peer = self.is_expected_peer(&key, peer); if let Some(root_hash) = self.validate_root_hash(peer, archive_header_hash) { - let desegmenter = self.chain.get_desegmenter(); - let mut desegmenter = desegmenter.write(); + let desegmenter = self.desegmenter.read(); let desegmenter = desegmenter - .as_mut() + .as_ref() .expect("Desegmenter must exist at this point"); match desegmenter.add_kernel_segment(segment, &root_hash) { Ok(_) => { @@ -842,4 +748,182 @@ impl StateSync { }; send_res } + + fn calc_retry_running_requests(&self) -> usize { + let now = Utc::now(); + let mut retry_expiration_times = self.retry_expiration_times.write(); + while !retry_expiration_times.is_empty() { + if retry_expiration_times[0] < now { + retry_expiration_times.pop_front(); + } else { + break; + } + } + retry_expiration_times.len() + } + + fn send_requests( + &self, + peers: &Vec>, + root_hash_peers: &Vec>, + excluded_requests: u32, + excluded_peers: u32, + desegmenter: &Desegmenter, + sync_peers: &SyncPeers, + ) -> SyncResponse { + if let Some(_) = self.send_requests_lock.try_write() { + let mut need_request = self.request_tracker.calculate_needed_requests( + root_hash_peers.len(), + excluded_requests as usize, + excluded_peers as usize, + self.pibd_params.get_segments_request_per_peer(), + self.pibd_params.get_segments_requests_limit(), + ); + need_request = need_request.saturating_sub(self.calc_retry_running_requests()); + if need_request > 0 { + match desegmenter.next_desired_segments(need_request, &self.request_tracker) { + Ok((req_segments, retry_segments)) => { + let mut rng = rand::thread_rng(); + let now = Utc::now(); + let target_archive_hash = self.target_archive_hash.read().clone(); + + if !retry_segments.is_empty() { + let segm_type = &retry_segments[0].segment_type; + + let last_retry_idx = self.last_retry_idx.try_write(); + if let Some(mut last_retry_idx) = last_retry_idx { + let retry_idx = + last_retry_idx.get(&segm_type).cloned().unwrap_or(0); + + for segm in &retry_segments { + debug_assert!(*segm_type == segm.segment_type); + if segm.identifier.idx < retry_idx { + continue; + } + + if need_request == 0 { + break; + } + + // We don't want to send retry to the peer whom we already send the data + if let Some(requested_peer) = + self.request_tracker.get_expected_peer(&( + segm_type.clone(), + segm.identifier.idx, + )) { + let dup_peers: Vec> = peers + .iter() + .filter(|p| p.info.addr != requested_peer) + .cloned() + .choose_multiple(&mut rng, 2); + + if dup_peers.len() == 0 { + break; + } + + if need_request < dup_peers.len() { + need_request = 0; + break; + } + need_request = need_request.saturating_sub(dup_peers.len()); + + // we can do retry now + for p in dup_peers { + debug!("Processing duplicated request for the segment {:?} at {}, peer {:?}", segm_type, segm.identifier.idx, p.info.addr); + match Self::send_request( + &p, + &segm, + &target_archive_hash, + ) { + Ok(_) => { + self.retry_expiration_times.write().push_back( + now + self + .request_tracker + .get_average_latency(), + ) + } + Err(e) => { + let msg = format!("Failed to send duplicate segment {:?} at {}, peer {:?}, Error: {}", segm_type, segm.identifier.idx, p.info.addr, e); + error!("{}", msg); + sync_peers + .report_no_response(&p.info.addr, msg); + break; + } + } + } + } + + (*last_retry_idx) + .insert(segm_type.clone(), segm.identifier.idx); + } + } + } + + for seg in req_segments { + if need_request == 0 { + break; + } + need_request = need_request.saturating_sub(1); + + let key = (seg.segment_type.clone(), seg.identifier.idx.clone()); + debug_assert!(!self.request_tracker.has_request(&key)); + debug_assert!(!root_hash_peers.is_empty()); + let peer = root_hash_peers + .choose(&mut rng) + .expect("peers is not empty"); + + let send_res = Self::send_request(peer, &seg, &target_archive_hash); + match send_res { + Ok(_) => { + let msg = format!("{:?}", key); + self.request_tracker.register_request( + key, + peer.info.addr.clone(), + msg, + ); + } + Err(e) => { + let msg = format!( + "Error sending segment request to peer at {}, reason: {:?}", + peer.info.addr, e + ); + info!("{}", msg); + sync_peers.report_no_response(&peer.info.addr, msg); + } + } + } + return SyncResponse::new( + SyncRequestResponses::Syncing, + Self::get_peer_capabilities(), + format!( + "Has peers: {} Requests in waiting Q: {}", + root_hash_peers.len() + excluded_peers as usize, + self.request_tracker.get_requests_num() + ), + ); + } + Err(err) => { + error!("Failed to request more segments. Error: {}", err); + // let's reset everything and restart + self.ban_this_session(desegmenter.get_bitmap_root_hash(), sync_peers); + return SyncResponse::new( + SyncRequestResponses::Syncing, + Self::get_peer_capabilities(), + format!("Failed to request more segments. Error: {}", err), + ); + } + } + } + } + // waiting for responses... + return SyncResponse::new( + SyncRequestResponses::Syncing, + Self::get_peer_capabilities(), + format!( + "Has peers {}, Requests in waiting Q: {}", + root_hash_peers.len() + excluded_peers as usize, + self.request_tracker.get_requests_num() + ), + ); + } } diff --git a/servers/src/mwc/sync/sync_manager.rs b/servers/src/mwc/sync/sync_manager.rs index 81c9ddca0..447e079a9 100644 --- a/servers/src/mwc/sync/sync_manager.rs +++ b/servers/src/mwc/sync/sync_manager.rs @@ -28,12 +28,13 @@ use mwc_core::core::hash::Hash; use mwc_core::core::{OutputIdentifier, Segment, TxKernel}; use mwc_p2p::{Capabilities, PeerAddr, Peers}; use mwc_util::secp::pedersen::RangeProof; -use mwc_util::StopState; +use mwc_util::secp::rand::Rng; +use mwc_util::{RwLock, StopState}; use std::sync::Arc; /// Sync Manager is reponsible for coordination of all syncing process pub struct SyncManager { - headers_hashes: HeadersHashSync, + headers_hashes: RwLock, headers: HeaderSync, state: StateSync, body: BodySync, @@ -46,13 +47,13 @@ pub struct SyncManager { sync_state: Arc, stop_state: Arc, - cached_response: Option>, + cached_response: RwLock>>, } impl SyncManager { pub fn new(chain: Arc, sync_state: Arc, stop_state: Arc) -> Self { SyncManager { - headers_hashes: HeadersHashSync::new(chain.clone()), + headers_hashes: RwLock::new(HeadersHashSync::new(chain.clone())), headers: HeaderSync::new(chain.clone()), state: StateSync::new(chain.clone()), body: BodySync::new(chain), @@ -61,22 +62,28 @@ impl SyncManager { state_sync_peers: SyncPeers::new(), sync_state, stop_state, - cached_response: None, + cached_response: RwLock::new(None), } } - pub fn request(&mut self, peers: &Arc) -> SyncResponse { - if let Some(cached_response) = &self.cached_response { + pub fn request(&self, peers: &Arc) -> SyncResponse { + let cached_response = self.cached_response.read().clone(); + if let Some(cached_response) = cached_response { if !cached_response.is_expired() { - return cached_response.get_response().clone(); + return cached_response.to_response(); } else { - self.cached_response = None; + *self.cached_response.write() = None; } } // Apply peers status (ban if needed) - self.headers_sync_peers.apply_peers_status(peers); - self.state_sync_peers.apply_peers_status(peers); + let mut offline1 = self.headers_sync_peers.apply_peers_status(peers); + let mut offline2 = self.state_sync_peers.apply_peers_status(peers); + + offline1.append(&mut offline2); + let mut rng = rand::thread_rng(); + offline1.retain(|_| rng.gen_range(0, 10) != 7); // We want to exclude some, because peer might become online + peers.set_excluded_peers(&offline1); let mut best_height = peers .iter() @@ -118,12 +125,17 @@ impl SyncManager { ); } - let headers_hash_resp = self.headers_hashes.request( - peers, - &self.sync_state, - &mut self.headers_sync_peers, - best_height, - ); + let r = self.headers_hashes.read().request_pre(best_height); + let headers_hash_resp = match r { + Some(resp) => resp, + None => self.headers_hashes.write().request_impl( + peers, + &self.sync_state, + &self.headers_sync_peers, + best_height, + ), + }; + debug!("headers_hash_resp: {:?}", headers_hash_resp); match headers_hash_resp.response { SyncRequestResponses::WaitingForPeers => return headers_hash_resp, @@ -139,24 +151,26 @@ impl SyncManager { let headers_resp = self.headers.request( peers, &self.sync_state, - &mut self.headers_sync_peers, - &self.headers_hashes, + &self.headers_sync_peers, + &self.headers_hashes.read(), best_height, ); debug!("headers_resp: {:?}", headers_resp); match headers_resp.response { SyncRequestResponses::WaitingForPeers => { self.headers_hashes - .reset_ban_commited_to_hash(peers, &mut self.headers_sync_peers); + .write() + .reset_ban_commited_to_hash(peers, &self.headers_sync_peers); self.headers_sync_peers.reset(); return headers_resp; } SyncRequestResponses::Syncing => return headers_resp, + SyncRequestResponses::HashMoreHeadersToApply => return headers_resp, SyncRequestResponses::WaitingForHeadersHash => { debug_assert!(false); // should never happen, headers_hashes above must be in sync or wait for peers return headers_resp; } - SyncRequestResponses::HeadersPibdReady => self.headers_hashes.reset_hash_data(), + SyncRequestResponses::HeadersPibdReady => self.headers_hashes.write().reset_hash_data(), SyncRequestResponses::HeadersReady => headers_ready = true, _ => { debug_assert!(false); @@ -166,7 +180,7 @@ impl SyncManager { let state_resp = self.state.request( peers, self.sync_state.clone(), - &mut self.state_sync_peers, + &self.state_sync_peers, self.stop_state.clone(), best_height, ); @@ -181,12 +195,10 @@ impl SyncManager { } } - match self.body.request( - peers, - &self.sync_state, - &mut self.state_sync_peers, - best_height, - ) { + match self + .body + .request(peers, &self.sync_state, &self.state_sync_peers, best_height) + { Ok(body_resp) => { debug!("body_resp: {:?}", body_resp); match body_resp.response { @@ -198,7 +210,8 @@ impl SyncManager { Capabilities::UNKNOWN, "DONE!".into(), ); - self.cached_response = + peers.set_excluded_peers(&vec![]); + *self.cached_response.write() = Some(CachedResponse::new(resp.clone(), Duration::seconds(180))); return resp; } else { @@ -229,46 +242,49 @@ impl SyncManager { } pub fn receive_headers_hash_response( - &mut self, + &self, peer: &PeerAddr, archive_height: u64, headers_hash_root: Hash, ) { - self.headers_hashes.receive_headers_hash_response( + self.headers_hashes.write().receive_headers_hash_response( peer, archive_height, headers_hash_root, - &mut self.headers_sync_peers, + &self.headers_sync_peers, ); } pub fn receive_header_hashes_segment( - &mut self, + &self, peer: &PeerAddr, header_hashes_root: Hash, segment: Segment, ) { - self.headers_hashes.receive_header_hashes_segment( + self.headers_hashes.write().receive_header_hashes_segment( peer, header_hashes_root, segment, - &mut self.headers_sync_peers, + &self.headers_sync_peers, ); } pub fn receive_headers( - &mut self, + &self, peer: &PeerAddr, bhs: &[mwc_core::core::BlockHeader], remaining: u64, peers: Arc, ) { + // Note, becauce of hight throughput, it must be unblocking read, blocking write is not OK + let headers_hashes = self.headers_hashes.read(); + let headers_hash_desegmenter = headers_hashes.get_headers_hash_desegmenter(); if let Err(e) = self.headers.receive_headers( peer, bhs, remaining, - &mut self.headers_sync_peers, - self.headers_hashes.get_headers_hash_desegmenter(), + &self.headers_sync_peers, + headers_hash_desegmenter, &peers, ) { error!("receive_headers failed with error: {}", e); @@ -276,7 +292,7 @@ impl SyncManager { } pub fn recieve_pibd_status( - &mut self, + &self, peer: &PeerAddr, header_hash: Hash, header_height: u64, @@ -287,19 +303,22 @@ impl SyncManager { } pub fn recieve_another_archive_header( - &mut self, + &self, peer: &PeerAddr, header_hash: Hash, header_height: u64, ) { - self.headers_hashes - .recieve_another_archive_header(peer, &header_hash, header_height); + self.headers_hashes.write().recieve_another_archive_header( + peer, + &header_hash, + header_height, + ); self.state .recieve_another_archive_header(peer, &header_hash, header_height); } pub fn receive_bitmap_segment( - &mut self, + &self, peer: &PeerAddr, archive_header_hash: &Hash, segment: Segment, @@ -310,12 +329,12 @@ impl SyncManager { archive_header_hash, segment, peers, - &mut self.state_sync_peers, + &self.state_sync_peers, ); } pub fn receive_output_segment( - &mut self, + &self, peer: &PeerAddr, bitmap_root_hash: &Hash, segment: Segment, @@ -326,12 +345,12 @@ impl SyncManager { bitmap_root_hash, segment, peers, - &mut self.state_sync_peers, + &self.state_sync_peers, ); } pub fn receive_rangeproof_segment( - &mut self, + &self, peer: &PeerAddr, bitmap_root_hash: &Hash, segment: Segment, @@ -342,12 +361,12 @@ impl SyncManager { bitmap_root_hash, segment, peers, - &mut self.state_sync_peers, + &self.state_sync_peers, ); } pub fn receive_kernel_segment( - &mut self, + &self, peer: &PeerAddr, bitmap_root_hash: &Hash, segment: Segment, @@ -358,12 +377,12 @@ impl SyncManager { bitmap_root_hash, segment, peers, - &mut self.state_sync_peers, + &self.state_sync_peers, ); } pub fn recieve_block_reporting( - &mut self, + &self, accepted: bool, // block accepted/rejected flag peer: &PeerAddr, block_hash: &Hash, @@ -374,7 +393,7 @@ impl SyncManager { block_hash, peer, peers, - &mut self.state_sync_peers, + &self.state_sync_peers, ); } } diff --git a/servers/src/mwc/sync/sync_peers.rs b/servers/src/mwc/sync/sync_peers.rs index 3452280e6..fac3a93ec 100644 --- a/servers/src/mwc/sync/sync_peers.rs +++ b/servers/src/mwc/sync/sync_peers.rs @@ -48,7 +48,8 @@ impl PeerPibdStatus { /// Checking events log to decide if peer wasn't active enough /// Note, this method is expecting to truncate responses, so data will be managable /// during long run - fn check_for_ban(&mut self, peer: &String) -> (bool, String) { + /// Return: (ban, offline, comment) + fn check_for_ban(&mut self, peer: &String) -> (bool, bool, String) { let mut bans = 0; let mut errors = 0; let mut no_response = 0; @@ -84,20 +85,21 @@ impl PeerPibdStatus { } } - let res = bans > 0 - || errors > 1 - || (self.responses.len() >= MIN_RESPONSE_NUM && success <= self.responses.len() / 2); + let res_ban = bans > 0 || errors > 1; + + let res_network_issue = + self.responses.len() >= MIN_RESPONSE_NUM && success <= self.responses.len() / 2; debug!( - "Checking for Ban. Peer: {}, bans={} errors={} no_resp={} ok={} RES={}", - peer, bans, errors, no_response, success, res + "Checking for Ban. Peer: {}, bans={} errors={} no_resp={} ok={} RES={},{}", + peer, bans, errors, no_response, success, res_ban, res_network_issue ); while self.responses.len() > MIN_RESPONSE_NUM { self.responses.pop_front(); } - (res, comment) + (res_ban, res_network_issue, comment) } } @@ -116,7 +118,7 @@ impl SyncPeers { } } - pub fn reset(&mut self) { + pub fn reset(&self) { self.peers_status.write().clear(); self.banned_peers.write().clear(); self.new_events_peers.write().clear(); @@ -150,23 +152,28 @@ impl SyncPeers { self.add_event(peer.as_key(), PeerStatusEvent::Ban(message)); } - pub fn apply_peers_status(&self, peers: &Arc) { + pub fn apply_peers_status(&self, peers: &Arc) -> Vec { let mut peers_status = self.peers_status.write(); let mut check_peers = self.new_events_peers.write(); + let mut offline_peers: Vec = Vec::new(); for cp in check_peers.iter() { if let Some(status) = peers_status.get_mut(cp) { - let (ban, comment) = status.check_for_ban(cp); + let (ban, offline, comment) = status.check_for_ban(cp); + let peer_addr = PeerAddr::from_str(cp); if ban { - let peer_addr = PeerAddr::from_str(cp); if let Err(e) = peers.ban_peer(&peer_addr, ReasonForBan::PibdFailure, &comment) { warn!("ban_peer is failed with error: {}", e); } - self.banned_peers.write().insert(peer_addr); + self.banned_peers.write().insert(peer_addr.clone()); + } + if ban || offline { + offline_peers.push(peer_addr); } } } check_peers.clear(); + offline_peers } fn add_event(&self, peer: String, event: PeerStatusEvent) { diff --git a/servers/src/mwc/sync/sync_utils.rs b/servers/src/mwc/sync/sync_utils.rs index 20ef34c3b..6cbff71f4 100644 --- a/servers/src/mwc/sync/sync_utils.rs +++ b/servers/src/mwc/sync/sync_utils.rs @@ -17,10 +17,13 @@ use crate::mwc::sync::sync_peers::SyncPeers; use chrono::{DateTime, Duration, Utc}; +use mwc_chain::txhashset::request_lookup::RequestLookup; use mwc_chain::{pibd_params, Chain}; use mwc_p2p::{Capabilities, Peer, PeerAddr, Peers}; +use mwc_util::RwLock; use std::cmp; use std::collections::{HashMap, VecDeque}; +use std::sync::atomic::{AtomicI32, Ordering}; use std::sync::Arc; #[derive(Clone, Debug, PartialEq)] @@ -31,6 +34,7 @@ pub enum SyncRequestResponses { WaitingForHeadersHash, HeadersPibdReady, HeadersReady, + HashMoreHeadersToApply, WaitingForHeaders, StatePibdReady, BadState, // need update state, probably horizon was changed, need to retry @@ -59,6 +63,7 @@ impl SyncResponse { } } +#[derive(Clone)] pub struct CachedResponse { time: DateTime, response: T, @@ -76,105 +81,118 @@ impl CachedResponse { Utc::now() > self.time } - pub fn get_response(&self) -> &T { - &self.response + pub fn to_response(self) -> T { + self.response } } +#[derive(Clone)] pub struct PeerTrackData { requests: u32, - response_time: VecDeque, - response_time_sum: i64, } impl PeerTrackData { fn new(requests: u32) -> Self { - PeerTrackData { - requests, - response_time: VecDeque::new(), // units: ms - response_time_sum: 0, - } - } - - fn get_response_time(&self) -> i64 { - if self.response_time.is_empty() { - pibd_params::SEGMENT_DEFAULT_RETRY_MS - } else { - self.response_time_sum / self.response_time.len() as i64 - } - } - - fn report_response(&mut self, response_latency: Duration) { - self.requests = self.requests.saturating_sub(1); - let response_latency = response_latency.num_milliseconds(); - self.response_time_sum += response_latency; - self.response_time.push_back(response_latency); - if self.response_time.len() > 10 { - self.response_time_sum -= self - .response_time - .pop_front() - .expect("response_time not empty"); - } + PeerTrackData { requests } } } -pub struct RequestData { +pub struct RequestData { peer: PeerAddr, request_time: DateTime, - retry_time: DateTime, request_message: String, // for logging and debugging - request_data: V, // data enough to retry the same request } -impl RequestData { - fn new(peer: PeerAddr, request_message: String, request_data: V) -> Self { +impl RequestData { + fn new(peer: PeerAddr, request_message: String) -> Self { let now = Utc::now(); RequestData { peer, request_time: now.clone(), - retry_time: now, request_message, - request_data, } } } +struct LatencyTracker { + latency_history: VecDeque, + latency_sum: i64, +} + +impl LatencyTracker { + fn new() -> Self { + LatencyTracker { + latency_history: VecDeque::new(), + latency_sum: 0, + } + } + + fn clear(&mut self) { + self.latency_history.clear(); + self.latency_sum = 0; + } + + fn add_latency(&mut self, latency_ms: i64) { + self.latency_history.push_back(latency_ms); + self.latency_sum += latency_ms; + while self.latency_history.len() > 15 { + let lt = self.latency_history.pop_front().expect("non empty data"); + self.latency_sum -= lt; + } + } + + fn get_average_latency(&self) -> Duration { + let dur_ms = if self.latency_history.is_empty() { + pibd_params::PIBD_REQUESTS_TIMEOUT_SECS * 1000 + } else { + self.latency_sum / self.latency_history.len() as i64 + }; + Duration::microseconds(dur_ms) + } +} + /// Utility class or tracking requests. Here we put common request related functionality /// Idea behind that is to make sync tolerate stale peer. We don't want to wait slow peer for full timeout, /// instead we want to utilize more faster peers. Also, we don't want superfast peer to take more /// traffic. In other words, we don't want peers be able to manipulate traffic shceduler. -pub struct RequestTracker +pub struct RequestTracker +where + K: std::cmp::Eq + std::hash::Hash, +{ + // Values: peer, time, message. + requested: RwLock>, // Lock 1 + // there are so many peers and many requests, so we better to hande 'slow' peer cases + peers_stats: RwLock>, // Lock 2 + requests_to_next_ask: AtomicI32, + // latency in MS + latency_tracker: RwLock, +} + +impl RequestLookup for RequestTracker where K: std::cmp::Eq + std::hash::Hash, { - requested: HashMap>, // Values: peer, time, message - peers_stats: HashMap, // there are so many peers and many requests, so we better to hande 'slow' peer cases - requests_to_next_ask: usize, + fn contains_request(&self, key: &K) -> bool { + self.requested.read().contains_key(key) + } } -impl RequestTracker +impl RequestTracker where K: std::cmp::Eq + std::hash::Hash, { pub fn new() -> Self { RequestTracker { - requested: HashMap::new(), - peers_stats: HashMap::new(), - requests_to_next_ask: 0, + requested: RwLock::new(HashMap::new()), + peers_stats: RwLock::new(HashMap::new()), + requests_to_next_ask: AtomicI32::new(0), + latency_tracker: RwLock::new(LatencyTracker::new()), } } - pub fn retain_expired( - &mut self, - expiration_time_interval_sec: i64, - sync_peers: &mut SyncPeers, - retry_callback: F, - ) where - // Callback function that suppose to retry request to the peer. Return true if peer was alive and retry was sent. - F: Fn(&PeerAddr, &V) -> bool, - { - let requested = &mut self.requested; - let peers_stats = &mut self.peers_stats; + pub fn retain_expired(&self, expiration_time_interval_sec: i64, sync_peers: &SyncPeers) { + let mut requested = self.requested.write(); + let peers_stats = &mut self.peers_stats.write(); let now = Utc::now(); // first let's clean up stale requests... @@ -188,98 +206,102 @@ where } return false; } - // check we want to retry - let retry_ms = match peer_stat.as_ref() { - Some(ps) => ps.get_response_time() * 2, - None => pibd_params::SEGMENT_DEFAULT_RETRY_MS * 2, - }; - if (now - request_data.retry_time).num_milliseconds() > retry_ms { - if !retry_callback(&request_data.peer, &request_data.request_data) { - // retry failed, so the peer is offline. - sync_peers.report_no_response( - &request_data.peer, - request_data.request_message.clone(), - ); - if let Some(n) = peer_stat { - n.requests = n.requests.saturating_sub(1); - } - return false; - } - // retry was sent, we are good... - request_data.retry_time = now; - } true }); } - pub fn clear(&mut self) { - self.requested.clear(); - self.peers_stats.clear(); - self.requests_to_next_ask = 0; - } - - pub fn get_requested(&self) -> &HashMap> { - &self.requested + pub fn clear(&self) { + self.requested.write().clear(); + self.peers_stats.write().clear(); + self.requests_to_next_ask.store(0, Ordering::Relaxed); + self.latency_tracker.write().clear(); } /// Calculate how many new requests we can make to the peers. This call updates requests_to_next_ask pub fn calculate_needed_requests( - &mut self, + &self, peer_num: usize, excluded_requests: usize, _excluded_peers: usize, request_per_peer: usize, requests_limit: usize, ) -> usize { - let requests_in_queue = self.requested.len().saturating_sub(excluded_requests); + let requests_in_queue = self + .requested + .read() + .len() + .saturating_sub(excluded_requests); let expected_total_request = cmp::min(peer_num * request_per_peer, requests_limit); - self.requests_to_next_ask = (expected_total_request + excluded_requests) / 5; + self.requests_to_next_ask.store( + (expected_total_request + excluded_requests) as i32 / 5, + Ordering::Relaxed, + ); expected_total_request.saturating_sub(requests_in_queue) } pub fn get_requests_num(&self) -> usize { - self.requested.len() + self.requested.read().len() } pub fn has_request(&self, req: &K) -> bool { - self.requested.contains_key(req) + self.requested.read().contains_key(req) } - pub fn get_update_requests_to_next_ask(&mut self) -> usize { - self.requests_to_next_ask = self.requests_to_next_ask.saturating_sub(1); - self.requests_to_next_ask + pub fn get_update_requests_to_next_ask(&self) -> usize { + let res = self.requests_to_next_ask.fetch_sub(1, Ordering::Relaxed); + if res >= 0 { + res as usize + } else { + 0 + } } - pub fn get_peers_track_data(&self) -> &HashMap { - &self.peers_stats + pub fn get_peer_track_data(&self, peer: &PeerAddr) -> Option { + self.peers_stats.read().get(peer).cloned() } - pub fn register_request(&mut self, key: K, peer: PeerAddr, message: String, request_data: V) { - match self.peers_stats.get_mut(&peer) { + pub fn register_request(&self, key: K, peer: PeerAddr, message: String) { + let mut requested = self.requested.write(); + let peers_stats = &mut self.peers_stats.write(); + + match peers_stats.get_mut(&peer) { Some(n) => { n.requests += 1; } None => { - self.peers_stats.insert(peer.clone(), PeerTrackData::new(1)); + peers_stats.insert(peer.clone(), PeerTrackData::new(1)); } } - self.requested - .insert(key, RequestData::new(peer, message, request_data)); + requested.insert(key, RequestData::new(peer, message)); } - pub fn remove_request(&mut self, key: &K) -> Option { - if let Some(request_data) = self.requested.remove(key) { - if let Some(n) = self.peers_stats.get_mut(&request_data.peer) { - n.report_response(Utc::now() - request_data.request_time); + pub fn remove_request(&self, key: &K, peer: &PeerAddr) -> Option { + let mut requested = self.requested.write(); + let peers_stats = &mut self.peers_stats.write(); + + if let Some(request_data) = requested.get(key) { + let res_peer = request_data.peer.clone(); + if request_data.peer == *peer { + if let Some(n) = peers_stats.get_mut(&request_data.peer) { + n.requests = n.requests.saturating_sub(1); + } + let latency_ms = (Utc::now() - request_data.request_time).num_milliseconds(); + debug_assert!(latency_ms >= 0); + self.latency_tracker.write().add_latency(latency_ms); + requested.remove(key); } - Some(request_data.peer) + Some(res_peer) } else { None } } + pub fn get_average_latency(&self) -> Duration { + self.latency_tracker.read().get_average_latency() + } + pub fn get_expected_peer(&self, key: &K) -> Option { - if let Some(req_data) = self.requested.get(key) { + if let Some(req_data) = self.requested.read().get(key) { Some(req_data.peer.clone()) } else { None @@ -307,19 +329,18 @@ pub fn get_qualify_peers( } // return: (peers, number of excluded requests) -pub fn get_sync_peers( +pub fn get_sync_peers( peers: &Arc, expected_requests_per_peer: usize, capabilities: Capabilities, min_height: u64, - total_queue_requests: usize, - peers_queue_size: &HashMap, + request_tracker: &RequestTracker, ) -> (Vec>, u32, u32) { // Excluding peers with totally full Q let peer_requests_limit = expected_requests_per_peer as u32; let mut res: Vec> = Vec::new(); // for excluded we nned to cover offline prrs as well. That is why we are counting back - let mut excluded_requests: usize = total_queue_requests; + let mut excluded_requests: usize = request_tracker.get_requests_num(); let mut excluded_peers = 0; let mut found_outbound = false; for peer in peers @@ -330,7 +351,7 @@ pub fn get_sync_peers( .with_min_height(min_height) { found_outbound = true; - if let Some(track_data) = peers_queue_size.get(&peer.info.addr) { + if let Some(track_data) = request_tracker.get_peer_track_data(&peer.info.addr) { if track_data.requests < peer_requests_limit { excluded_requests = excluded_requests.saturating_sub(track_data.requests as usize); } else { @@ -349,7 +370,7 @@ pub fn get_sync_peers( .inbound() .with_min_height(min_height) { - if let Some(track_data) = peers_queue_size.get(&peer.info.addr) { + if let Some(track_data) = request_tracker.get_peer_track_data(&peer.info.addr) { if track_data.requests < peer_requests_limit { excluded_requests = excluded_requests.saturating_sub(track_data.requests as usize); diff --git a/servers/src/mwc/sync/syncer.rs b/servers/src/mwc/sync/syncer.rs index 8bdf51b58..2e013a216 100644 --- a/servers/src/mwc/sync/syncer.rs +++ b/servers/src/mwc/sync/syncer.rs @@ -19,7 +19,6 @@ use crate::mwc::sync::sync_utils::SyncRequestResponses; use crate::p2p; use crate::util::StopState; use mwc_p2p::Capabilities; -use mwc_util::RwLock; use std::sync::Arc; use std::thread; use std::time; @@ -29,7 +28,7 @@ pub fn run_sync( peers: Arc, chain: Arc, stop_state: Arc, - sync_manager: Arc>, + sync_manager: Arc, ) -> std::io::Result> { thread::Builder::new() .name("sync".to_string()) @@ -44,7 +43,7 @@ pub struct SyncRunner { peers: Arc, chain: Arc, stop_state: Arc, - sync_manager: Arc>, + sync_manager: Arc, } impl SyncRunner { @@ -53,7 +52,7 @@ impl SyncRunner { peers: Arc, chain: Arc, stop_state: Arc, - sync_manager: Arc>, + sync_manager: Arc, ) -> SyncRunner { SyncRunner { sync_state, @@ -115,17 +114,18 @@ impl SyncRunner { } // Main syncing loop + let mut sleep_time = 1000; loop { if self.stop_state.is_stopped() { break; } // Sync manager request might be relatevely heavy, it is expected that latency is higer then 1 second, so // waiting time for 1000ms is reasonable. - thread::sleep(time::Duration::from_millis(1000)); + thread::sleep(time::Duration::from_millis(sleep_time)); // run each sync stage, each of them deciding whether they're needed // except for state sync that only runs if body sync return true (means txhashset is needed) - let sync_reponse = self.sync_manager.write().request(&self.peers); + let sync_reponse = self.sync_manager.request(&self.peers); if sync_reponse.response == SyncRequestResponses::SyncDone { debug!("sync_manager responsed with {:?}", sync_reponse); } else { @@ -133,7 +133,7 @@ impl SyncRunner { } let prev_state = self.sync_state.status(); - + sleep_time = 1000; match sync_reponse.response { SyncRequestResponses::WaitingForPeers => { info!("Waiting for the peers"); @@ -146,6 +146,10 @@ impl SyncRunner { self.peers .set_boost_peers_capabilities(sync_reponse.peers_capabilities); } + SyncRequestResponses::HashMoreHeadersToApply => { + debug!("Has more headers to apply, will continue soon"); + sleep_time = 100; + } SyncRequestResponses::SyncDone => { self.sync_state.update(SyncStatus::NoSync); // reset the boost mode diff --git a/src/bin/tui/status.rs b/src/bin/tui/status.rs index 5368b6b01..e9146de05 100644 --- a/src/bin/tui/status.rs +++ b/src/bin/tui/status.rs @@ -59,15 +59,24 @@ impl TUIStatusView { recieved_segments, total_segments, } => { - let percent = if total_segments == 0 { - 0 + if recieved_segments == 0 && total_segments == 100 { + Cow::Owned( + "Sync step 2/7: Selecting peers, waiting for PIBD root hash".to_string(), + ) } else { - recieved_segments * 100 / total_segments - }; - Cow::Owned(format!( - "Sync step 2/7: Downloading Tx state (PIBD) - {} / {} segments - {}%", - recieved_segments, total_segments, percent - )) + let percent = if total_segments == 0 { + 0 + } else { + recieved_segments * 100 / total_segments + }; + Cow::Owned(format!( + "Sync step 2/7: Downloading Tx state (PIBD) - {} / {} segments - {}%", + recieved_segments, total_segments, percent + )) + } + } + SyncStatus::ValidatingKernelsHistory => { + Cow::Owned("Sync step 3/7: Validating kernels history".to_string()) } SyncStatus::TxHashsetHeadersValidation { headers, From 71370eb594f1be9522f6a602b73869cde030a21f Mon Sep 17 00:00:00 2001 From: bayk Date: Sat, 14 Dec 2024 22:55:37 -0800 Subject: [PATCH 5/6] Retry for items at the end of the sync process, stuck moment --- chain/src/chain.rs | 15 ++++-- chain/src/pibd_params.rs | 4 +- chain/src/txhashset/desegmenter.rs | 36 ++++++++++--- chain/src/txhashset/headers_desegmenter.rs | 15 ++++-- chain/src/txhashset/segments_cache.rs | 20 ++++--- chain/src/types.rs | 63 +++++++++++----------- chain/tests/test_pibd_copy.rs | 4 +- servers/src/mwc/sync/body_sync.rs | 50 +++++++++++++++-- servers/src/mwc/sync/header_sync.rs | 46 +++++++++++++++- servers/src/mwc/sync/state_sync.rs | 44 ++++++++++++++- 10 files changed, 236 insertions(+), 61 deletions(-) diff --git a/chain/src/chain.rs b/chain/src/chain.rs index 717d8fde0..3fedcf63e 100644 --- a/chain/src/chain.rs +++ b/chain/src/chain.rs @@ -561,10 +561,11 @@ impl Chain { return Ok(tip); // Done with success } Err(e) => { - info!( - "Failed to process multiple blocks, will try process one by one. {}", - e - ); + if e.is_bad_data() { + info!("Failed to process multiple blocks, will try process one by one. {}",e); + } else { + debug!("Failed to process multiple blocks, will try process one by one. {}",e); + } } } } @@ -578,7 +579,11 @@ impl Chain { return Ok(tip); } Err(e) => { - error!("process_block_single failed with error: {}", e); + if e.is_bad_data() { + error!("process_block_single failed with error: {}", e); + } else { + debug!("process_block_single failed with error: {}", e); + } return Err(e); } } diff --git a/chain/src/pibd_params.rs b/chain/src/pibd_params.rs index 5979b20f5..bd13c4767 100644 --- a/chain/src/pibd_params.rs +++ b/chain/src/pibd_params.rs @@ -203,7 +203,7 @@ impl PibdParams { match self.cpu_num { 1 => 2, 2 => 3, - _ => 5, + _ => 4, } } @@ -219,7 +219,7 @@ impl PibdParams { match self.cpu_num { 1 => 2, 2 => 3, - _ => 5, + _ => 4, } } diff --git a/chain/src/txhashset/desegmenter.rs b/chain/src/txhashset/desegmenter.rs index e8dda3432..09a1eedad 100644 --- a/chain/src/txhashset/desegmenter.rs +++ b/chain/src/txhashset/desegmenter.rs @@ -409,11 +409,19 @@ impl Desegmenter { /// Return list of the next preferred segments the desegmenter needs based on /// the current real state of the underlying elements. Second array - list of delayed requests. We better to retry them + /// 3-rd array - the list of waiting requests pub fn next_desired_segments( &self, need_requests: usize, requested: &dyn RequestLookup<(SegmentType, u64)>, - ) -> Result<(Vec, Vec), Error> { + ) -> Result< + ( + Vec, + Vec, + Vec, + ), + Error, + > { // First check for required bitmap elements if self.outputs_bitmap.read().is_none() { let mut bitmap_result: Vec = Vec::new(); @@ -431,7 +439,7 @@ impl Desegmenter { { bitmap_result.push(SegmentTypeIdentifier::new(SegmentType::Bitmap, id)) } - return Ok((bitmap_result, Vec::new())); + return Ok((bitmap_result, Vec::new(), Vec::new())); } else { // We have all required bitmap segments and have recreated our local // bitmap, now continue with other segments, evenly spreading requests @@ -444,8 +452,9 @@ impl Desegmenter { // so the number of segments is high. let mut res_req: Vec = Vec::new(); let mut res_dup_req: Vec = Vec::new(); + let mut waiting_req: Vec = Vec::new(); if need_requests > 0 && !self.rangeproof_segment_cache.read().is_complete() { - let (requests, retry_requests) = + let (requests, retry_requests, waiting_requests) = self.rangeproof_segment_cache.read().next_desired_segments( self.pibd_params.get_rangeproof_segment_height(), need_requests, @@ -464,10 +473,15 @@ impl Desegmenter { .into_iter() .map(|id| SegmentTypeIdentifier::new(SegmentType::RangeProof, id)), ); + waiting_req.extend( + waiting_requests + .into_iter() + .map(|id| SegmentTypeIdentifier::new(SegmentType::RangeProof, id)), + ); }; if need_requests > 0 && !self.kernel_segment_cache.read().is_complete() { - let (requests, retry_requests) = + let (requests, retry_requests, waiting_requests) = self.kernel_segment_cache.read().next_desired_segments( self.pibd_params.get_kernel_segment_height(), need_requests, @@ -486,10 +500,15 @@ impl Desegmenter { .into_iter() .map(|id| SegmentTypeIdentifier::new(SegmentType::Kernel, id)), ); + waiting_req.extend( + waiting_requests + .into_iter() + .map(|id| SegmentTypeIdentifier::new(SegmentType::Kernel, id)), + ); }; if need_requests > 0 && !self.output_segment_cache.read().is_complete() { - let (requests, retry_requests) = + let (requests, retry_requests, waiting_requests) = self.output_segment_cache.read().next_desired_segments( self.pibd_params.get_output_segment_height(), need_requests, @@ -508,10 +527,15 @@ impl Desegmenter { .into_iter() .map(|id| SegmentTypeIdentifier::new(SegmentType::Output, id)), ); + waiting_req.extend( + waiting_requests + .into_iter() + .map(|id| SegmentTypeIdentifier::new(SegmentType::Output, id)), + ); } let _ = need_requests; - return Ok((res_req, res_dup_req)); + return Ok((res_req, res_dup_req, waiting_req)); } } diff --git a/chain/src/txhashset/headers_desegmenter.rs b/chain/src/txhashset/headers_desegmenter.rs index e98010f0e..5ea5faf01 100644 --- a/chain/src/txhashset/headers_desegmenter.rs +++ b/chain/src/txhashset/headers_desegmenter.rs @@ -279,13 +279,14 @@ impl HeadersRecieveCache { /// Return list of the next preferred segments the desegmenter needs based on /// the current real state of the underlying elements. Second array - list of delayed requests. We better to retry them + /// 3rd array - all requesrs that are expected. pub fn next_desired_headers( &self, headers: &HeaderHashesDesegmenter, elements: usize, request_tracker: &dyn RequestLookup, headers_cache_size_limit: usize, - ) -> Result<(Vec<(Hash, u64)>, Vec<(Hash, u64)>), Error> { + ) -> Result<(Vec<(Hash, u64)>, Vec<(Hash, u64)>, Vec<(Hash, u64)>), Error> { let mut return_vec = vec![]; let tip = self.chain.header_head()?; let base_hash_idx = tip.height / HEADERS_PER_BATCH as u64; @@ -351,15 +352,19 @@ impl HeadersRecieveCache { // Let's check if we want to retry something... let mut retry_vec = vec![]; if has10_idx > 0 { - for (idx, req) in waiting_indexes { - if idx >= has10_idx { + for (idx, req) in &waiting_indexes { + if *idx >= has10_idx { break; } - retry_vec.push(req); + retry_vec.push(req.clone()); } } - Ok((return_vec, retry_vec)) + Ok(( + return_vec, + retry_vec, + waiting_indexes.into_iter().map(|(_, v)| v).collect(), + )) } /// Adds a output segment diff --git a/chain/src/txhashset/segments_cache.rs b/chain/src/txhashset/segments_cache.rs index 88e754b51..c8eae8e91 100644 --- a/chain/src/txhashset/segments_cache.rs +++ b/chain/src/txhashset/segments_cache.rs @@ -66,14 +66,18 @@ impl SegmentsCache { } /// Return list of the next preferred segments the desegmenter needs based on - /// the current real state of the underlying elements + /// the current real state of the underlying elements, retry requests, all waiting requests pub fn next_desired_segments( &self, height: u8, max_elements: usize, requested: &dyn RequestLookup<(SegmentType, u64)>, cache_size_limit: usize, - ) -> (Vec, Vec) { + ) -> ( + Vec, + Vec, + Vec, + ) { let mut result = vec![]; debug_assert!(max_elements > 0); debug_assert!(cache_size_limit > 0); @@ -125,15 +129,19 @@ impl SegmentsCache { // Let's check if we want to retry something... let mut retry_vec = vec![]; if has_5_idx > 0 { - for (idx, req) in waiting_indexes { - if idx >= has_5_idx { + for (idx, req) in &waiting_indexes { + if *idx >= has_5_idx { break; } - retry_vec.push(req); + retry_vec.push(req.clone()); } } - (result, retry_vec) + ( + result, + retry_vec, + waiting_indexes.into_iter().map(|w| w.1).collect(), + ) } pub fn is_duplicate_segment(&self, segment_idx: u64) -> bool { diff --git a/chain/src/types.rs b/chain/src/types.rs index 69971bfca..78a9c69bb 100644 --- a/chain/src/types.rs +++ b/chain/src/types.rs @@ -238,44 +238,47 @@ pub struct TxHashSetRoots { impl TxHashSetRoots { /// Validate roots against the provided block header. pub fn validate(&self, header: &BlockHeader) -> Result<(), Error> { - debug!( - "Validating at height {}. Output MMR size: {} Kernel MMR size: {}", - header.height, header.output_mmr_size, header.kernel_mmr_size - ); - debug!( - "validate roots: {} at {}, Outputs roots {} vs. {}, sz {} vs {}, Range Proof roots {} vs {}, sz {} vs {}, Kernel Roots {} vs {}, sz {} vs {}", - header.hash(), - header.height, - header.output_root, - self.output_root, - header.output_mmr_size, - self.output_mmr_size, - header.range_proof_root, - self.rproof_root, - header.output_mmr_size, - self.rproof_mmr_size, - header.kernel_root, - self.kernel_root, - header.kernel_mmr_size, - self.kernel_mmr_size, - ); + debug!("{}", self.get_validate_info_str(header)); if header.output_root != self.output_root { - Err(Error::InvalidRoot( - "Failed Output root validation".to_string(), - )) + Err(Error::InvalidRoot(format!( + "Failed Output root validation. {}", + self.get_validate_info_str(header) + ))) } else if header.range_proof_root != self.rproof_root { - Err(Error::InvalidRoot( - "Failed Range Proof root validation".to_string(), - )) + Err(Error::InvalidRoot(format!( + "Failed Range Proof root validation. {}", + self.get_validate_info_str(header) + ))) } else if header.kernel_root != self.kernel_root { - Err(Error::InvalidRoot( - "Failed Kernel root validation".to_string(), - )) + Err(Error::InvalidRoot(format!( + "Failed Kernel root validation. {}", + self.get_validate_info_str(header) + ))) } else { Ok(()) } } + + fn get_validate_info_str(&self, header: &BlockHeader) -> String { + format!("Validating at height {}. Output MMR size: {} Kernel MMR size: {} .validate roots: {} at {}, Outputs roots {} vs. {}, sz {} vs {}, Range Proof roots {} vs {}, sz {} vs {}, Kernel Roots {} vs {}, sz {} vs {}", + header.height, header.output_mmr_size, header.kernel_mmr_size, + header.hash(), + header.height, + header.output_root, + self.output_root, + header.output_mmr_size, + self.output_mmr_size, + header.range_proof_root, + self.rproof_root, + header.output_mmr_size, + self.rproof_mmr_size, + header.kernel_root, + self.kernel_root, + header.kernel_mmr_size, + self.kernel_mmr_size, + ) + } } /// Minimal struct representing a known MMR position and associated block height. diff --git a/chain/tests/test_pibd_copy.rs b/chain/tests/test_pibd_copy.rs index c514309b1..f0dba1dd6 100644 --- a/chain/tests/test_pibd_copy.rs +++ b/chain/tests/test_pibd_copy.rs @@ -215,7 +215,7 @@ impl DesegmenterRequestor { } let empty_map: HashMap = HashMap::new(); let empty_map = &empty_map; - let (hashes, _reply_hashes) = headers_cache + let (hashes, _reply_hashes, _) = headers_cache .next_desired_headers(header_desegmenter, 15, &empty_map, 100) .unwrap(); if hashes.is_empty() { @@ -283,7 +283,7 @@ impl DesegmenterRequestor { let now = Instant::now(); let empty_map: HashMap<(SegmentType, u64), u8> = HashMap::new(); let empty_map = &empty_map; - let (mut next_segment_ids, _retry_ids) = + let (mut next_segment_ids, _retry_ids, _) = desegmenter.next_desired_segments(60, &empty_map).unwrap(); debug!("next_desired_segments took {}ms", now.elapsed().as_millis()); let is_complete = desegmenter.is_complete(); diff --git a/servers/src/mwc/sync/body_sync.rs b/servers/src/mwc/sync/body_sync.rs index ce8bc17c5..03508907e 100644 --- a/servers/src/mwc/sync/body_sync.rs +++ b/servers/src/mwc/sync/body_sync.rs @@ -381,11 +381,11 @@ impl BodySync { let mut retry_requests: Vec<(u64, Hash)> = Vec::new(); if has10_idx > 0 { - for (height, req) in waiting_heights { - if height >= has10_idx { + for (height, req) in &waiting_heights { + if *height >= has10_idx { break; } - retry_requests.push((height, req)); + retry_requests.push((height.clone(), req.clone())); } } @@ -486,6 +486,50 @@ impl BodySync { ); } } + + if *need_request > 0 { + // Free requests, lets duplicated some random from the expected buffer + let duplicate_reqs: Vec<(u64, Hash)> = waiting_heights + .choose_multiple(&mut rng, *need_request) + .cloned() + .collect(); + *need_request = 0; + + for (height, hash) in duplicate_reqs { + // We don't want to send retry to the peer whom we already send the data + if let Some(requested_peer) = self.request_tracker.get_expected_peer(&hash) { + let dup_peer = peers + .iter() + .filter(|p| p.info.addr != requested_peer) + .choose(&mut rng); + + if dup_peer.is_none() { + break; + } + let dup_peer = dup_peer.unwrap(); + + debug!( + "Processing duplicated request for the block {} at {}, peer {:?}", + hash, height, dup_peer.info.addr + ); + match dup_peer.send_block_request(hash, chain::Options::SYNC) { + Ok(_) => self + .retry_expiration_times + .write() + .push_back(now + self.request_tracker.get_average_latency()), + Err(e) => { + let msg = format!( + "Failed to send duplicate block request to peer {}, {}", + dup_peer.info.addr, e + ); + warn!("{}", msg); + sync_peers.report_no_response(&dup_peer.info.addr, msg); + break; + } + } + } + } + } } Ok(()) } diff --git a/servers/src/mwc/sync/header_sync.rs b/servers/src/mwc/sync/header_sync.rs index 1c514836a..0effe60f8 100644 --- a/servers/src/mwc/sync/header_sync.rs +++ b/servers/src/mwc/sync/header_sync.rs @@ -571,7 +571,7 @@ impl HeaderSync { .as_ref() .expect("Internal error. Received_cache is not initialized."); - let (hashes, retry_reqs) = received_cache.next_desired_headers(headers_hash_desegmenter, + let (hashes, retry_reqs, waiting_reqs) = received_cache.next_desired_headers(headers_hash_desegmenter, need_request, &self.request_tracker, self.pibd_params.get_headers_buffer_len()) .expect("Chain is corrupted, please clean up the data manually and restart the node"); @@ -660,6 +660,50 @@ impl HeaderSync { } } } + + if need_request > 0 { + // Free requests, lets duplicated some random from the expected buffer + let duplicate_reqs: Vec<(Hash, u64)> = waiting_reqs + .choose_multiple(&mut rng, need_request) + .cloned() + .collect(); + for (hash, height) in duplicate_reqs { + // We don't want to send retry to the peer whom we already send the data + if let Some(requested_peer) = self.request_tracker.get_expected_peer(&hash) + { + let dup_peer = peers + .iter() + .filter(|p| p.info.addr != requested_peer) + .choose(&mut rng); + + if dup_peer.is_none() { + break; + } + let dup_peer = dup_peer.unwrap(); + + debug!( + "Processing duplicated request for the headers {} at {}, peer {:?}", + hash, height, dup_peer.info.addr + ); + match self.request_headers_for_hash( + hash.clone(), + height, + dup_peer.clone(), + ) { + Ok(_) => self + .retry_expiration_times + .write() + .push_back(now + self.request_tracker.get_average_latency()), + Err(e) => { + let msg = format!("Failed to send duplicate headers request to {} for hash {}, Error: {}", dup_peer.info.addr, hash, e); + error!("{}", msg); + sync_peers.report_no_response(&dup_peer.info.addr, msg); + break; + } + } + } + } + } } } } diff --git a/servers/src/mwc/sync/state_sync.rs b/servers/src/mwc/sync/state_sync.rs index 7b0bbbc70..1092adc6a 100644 --- a/servers/src/mwc/sync/state_sync.rs +++ b/servers/src/mwc/sync/state_sync.rs @@ -782,7 +782,7 @@ impl StateSync { need_request = need_request.saturating_sub(self.calc_retry_running_requests()); if need_request > 0 { match desegmenter.next_desired_segments(need_request, &self.request_tracker) { - Ok((req_segments, retry_segments)) => { + Ok((req_segments, retry_segments, waiting_segments)) => { let mut rng = rand::thread_rng(); let now = Utc::now(); let target_archive_hash = self.target_archive_hash.read().clone(); @@ -892,6 +892,48 @@ impl StateSync { } } } + + if need_request > 0 { + // If nothing to do, there are some requests are available. We can use them for more duplicates + let duplicate_reqs: Vec = waiting_segments + .choose_multiple(&mut rng, need_request) + .cloned() + .collect(); + + for segm in &duplicate_reqs { + // We don't want to send retry to the peer whom we already send the data + if let Some(requested_peer) = + self.request_tracker.get_expected_peer(&( + segm.segment_type.clone(), + segm.identifier.idx, + )) { + let dup_peer = peers + .iter() + .filter(|p| p.info.addr != requested_peer) + .choose(&mut rng); + + if dup_peer.is_none() { + break; + } + let dup_peer = dup_peer.unwrap(); + + debug!("Processing duplicated request for the segment {:?} at {}, peer {:?}", segm.segment_type, segm.identifier.idx, dup_peer.info.addr); + match Self::send_request(&dup_peer, &segm, &target_archive_hash) + { + Ok(_) => self.retry_expiration_times.write().push_back( + now + self.request_tracker.get_average_latency(), + ), + Err(e) => { + let msg = format!("Failed to send duplicate segment {:?} at {}, peer {:?}, Error: {}", segm.segment_type, segm.identifier.idx, dup_peer.info.addr, e); + error!("{}", msg); + sync_peers.report_no_response(&dup_peer.info.addr, msg); + break; + } + } + } + } + } + return SyncResponse::new( SyncRequestResponses::Syncing, Self::get_peer_capabilities(), From 5e6fbb442e3e7791da3cf6354ef054343d16a2f2 Mon Sep 17 00:00:00 2001 From: bayk Date: Sun, 15 Dec 2024 18:17:56 -0800 Subject: [PATCH 6/6] PIBD fix that broke mmr root calculation --- chain/src/chain.rs | 10 +++ chain/src/txhashset/desegmenter.rs | 14 ++++- chain/src/txhashset/txhashset.rs | 58 ++++++++++------- core/src/core/pmmr/backend.rs | 5 +- core/src/core/pmmr/pmmr.rs | 7 ++- core/src/core/pmmr/segment.rs | 2 +- core/src/core/pmmr/vec_backend.rs | 6 +- servers/src/mwc/sync/body_sync.rs | 99 +++++++++++++++++------------- store/src/pmmr.rs | 7 ++- 9 files changed, 130 insertions(+), 78 deletions(-) diff --git a/chain/src/chain.rs b/chain/src/chain.rs index 3fedcf63e..f970e07b9 100644 --- a/chain/src/chain.rs +++ b/chain/src/chain.rs @@ -1334,6 +1334,16 @@ impl Chain { assert!(header.kernel_root == kernel_pmmr_root); } + /*{ + use mwc_core::core::pmmr::ReadablePMMR; + + let txhashset = self.txhashset.read(); + let rangeproof_pmmr = txhashset.rangeproof_pmmr_at(&header); + let rangeproof_pmmr_root = rangeproof_pmmr.root().unwrap(); + error!("rangeproof_pmmr_root: {} at height: {}, mmr size: {}", rangeproof_pmmr_root, header.height, header.output_mmr_size); + txhashset.dump_rproof_mmrs() + }*/ + Ok(Segmenter::new( Arc::new(RwLock::new(segm_header_pmmr_backend)), self.txhashset.clone(), diff --git a/chain/src/txhashset/desegmenter.rs b/chain/src/txhashset/desegmenter.rs index 09a1eedad..79e550d6e 100644 --- a/chain/src/txhashset/desegmenter.rs +++ b/chain/src/txhashset/desegmenter.rs @@ -228,6 +228,16 @@ impl Desegmenter { { let txhashset = self.txhashset.read(); txhashset.roots()?.validate(&self.archive_header)?; + /*match txhashset.roots()?.validate(&self.archive_header) { + Ok(_) => {} + Err(e) => { + error!("validate error: {}", e); + txhashset.dump_rproof_mmrs(); + error!("Dump is done. There was Validate error: {}", e); + panic!("Exiting..."); + return Err(e); + } + }*/ } status.update(SyncStatus::ValidatingKernelsHistory); @@ -667,7 +677,7 @@ impl Desegmenter { &mut batch, |ext, _batch| { let extension = &mut ext.extension; - extension.apply_output_segments(segm, outputs_bitmap)?; + extension.apply_output_segments(segm)?; Ok(()) }, )?; @@ -720,7 +730,7 @@ impl Desegmenter { &mut batch, |ext, _batch| { let extension = &mut ext.extension; - extension.apply_rangeproof_segments(seg, outputs_bitmap)?; + extension.apply_rangeproof_segments(seg)?; Ok(()) }, )?; diff --git a/chain/src/txhashset/txhashset.rs b/chain/src/txhashset/txhashset.rs index cdb736d86..40a8b866e 100644 --- a/chain/src/txhashset/txhashset.rs +++ b/chain/src/txhashset/txhashset.rs @@ -462,6 +462,32 @@ impl TxHashSet { }) } + /// For debug only, dump for range proof data + pub fn dump_rproof_mmrs(&self) { + info!( + "Generating dump with MMR roots at sizes: Outputs: {} Rangeproofs: {} Kernels: {}", + self.output_pmmr_h.size, self.rproof_pmmr_h.size, self.kernel_pmmr_h.size + ); + + for i in 0..self.rproof_pmmr_h.size { + let mut s = format!("{} ", i); + if let Some(hash) = self.rproof_pmmr_h.backend.get_hash(i) { + s.push_str(&format!("Hash: {}", hash)); + } + + if let Some(rp) = self.rproof_pmmr_h.backend.get_data(i) { + s.push_str(&format!(" RP: {:?}", rp)); + } + + let root = ReadonlyPMMR::at(&self.rproof_pmmr_h.backend, i + 1); + if let Ok(root) = root.root() { + s.push_str(&format!(" ROOT: {}", root)); + } + + info!("{}", s); + } + } + /// Return Commit's MMR position pub fn get_output_pos(&self, commit: &Commitment) -> Result { Ok(self.commit_index.get_output_pos(&commit)?) @@ -1307,8 +1333,10 @@ impl<'a> Extension<'a> { let flipped = bitmap.flip(0u32..bitmap.maximum().unwrap() + 1); for spent_pmmr_index in flipped.iter() { let pos0 = pmmr::insertion_to_pmmr_index(spent_pmmr_index.into()); - self.output_pmmr.remove_from_leaf_set(pos0); - self.rproof_pmmr.remove_from_leaf_set(pos0); + // Note, remove_from_leaf_set can;t be used, because the root will be affected + // Some segments might not be pruned, it is very expected. + let _ = self.output_pmmr.prune(pos0); + let _ = self.rproof_pmmr.prune(pos0); } Ok(()) } @@ -1322,7 +1350,6 @@ impl<'a> Extension<'a> { pub fn apply_output_segments( &mut self, segments: Vec>, - bitmap: &Bitmap, ) -> Result<(), Error> { for segm in segments { let (_sid, hash_pos, hashes, leaf_pos, leaf_data, _proof) = segm.parts(); @@ -1350,15 +1377,8 @@ impl<'a> Extension<'a> { .push(&leaf_data[idx]) .map_err(&Error::TxHashSetErr)?; } - let pmmr_index = pmmr::pmmr_leaf_to_insertion_index(pos0); - match pmmr_index { - Some(i) => { - if !bitmap.contains(i as u32) { - self.output_pmmr.remove_from_leaf_set(pos0); - } - } - None => {} - }; + // Note, extra unproned segments will be upadted later + // Prone will be due } } } @@ -1372,11 +1392,12 @@ impl<'a> Extension<'a> { pub fn apply_rangeproof_segments( &mut self, segments: Vec>, - bitmap: &Bitmap, ) -> Result<(), Error> { for segm in segments { let (_sid, hash_pos, hashes, leaf_pos, leaf_data, _proof) = segm.parts(); + //info!("Adding proof segment {}, from mmr pos: {} hashes sz: {} leaf_data sz: {} hash_pos: {:?} hashes: {:?} leaf_pos: {:?} leaf_data: {:?}", sid.idx, self.rproof_pmmr.size, hashes.len(), leaf_data.len(), hash_pos, hashes, leaf_pos, leaf_data ); + // insert either leaves or pruned subtrees as we go for insert in sort_pmmr_hashes_and_leaves(hash_pos, leaf_pos, Some(0)) { match insert { @@ -1400,15 +1421,8 @@ impl<'a> Extension<'a> { .push(&leaf_data[idx]) .map_err(&Error::TxHashSetErr)?; } - let pmmr_index = pmmr::pmmr_leaf_to_insertion_index(pos0); - match pmmr_index { - Some(i) => { - if !bitmap.contains(i as u32) { - self.rproof_pmmr.remove_from_leaf_set(pos0); - } - } - None => {} - }; + // Note, extra unproned segments will be upadted later + // Prone will be due } } } diff --git a/core/src/core/pmmr/backend.rs b/core/src/core/pmmr/backend.rs index c66677e2a..244287b1f 100644 --- a/core/src/core/pmmr/backend.rs +++ b/core/src/core/pmmr/backend.rs @@ -82,8 +82,9 @@ pub trait Backend { /// triggered removal). fn remove(&mut self, position: u64) -> Result<(), String>; - /// Remove a leaf from the leaf set - fn remove_from_leaf_set(&mut self, pos0: u64); + // Remove a leaf from the leaf set. + // DON'T USE IS, use prune instead + //fn remove_from_leaf_set(&mut self, pos0: u64); /// Release underlying datafiles and locks fn release_files(&mut self); diff --git a/core/src/core/pmmr/pmmr.rs b/core/src/core/pmmr/pmmr.rs index ea5c46383..199c0e8c9 100644 --- a/core/src/core/pmmr/pmmr.rs +++ b/core/src/core/pmmr/pmmr.rs @@ -284,10 +284,11 @@ where self.backend.reset_prune_list(); } - /// Remove the specified position from the leaf set - pub fn remove_from_leaf_set(&mut self, pos0: u64) { + // Remove the specified position from the leaf set + // DON'T USE IS, use prune instead + /*pub fn remove_from_leaf_set(&mut self, pos0: u64) { self.backend.remove_from_leaf_set(pos0); - } + }*/ /// Saves a snapshot of the MMR tagged with the block hash. /// Specifically - snapshots the utxo file as we need this rewound before diff --git a/core/src/core/pmmr/segment.rs b/core/src/core/pmmr/segment.rs index 3ca4aa412..731012751 100644 --- a/core/src/core/pmmr/segment.rs +++ b/core/src/core/pmmr/segment.rs @@ -384,7 +384,7 @@ where T: PMMRIndexHashable, { /// Calculate root hash of this segment - /// Returns `None` iff the segment is full and completely pruned + /// Returns `None` if the segment is full and completely pruned pub fn root( &self, mmr_size: u64, diff --git a/core/src/core/pmmr/vec_backend.rs b/core/src/core/pmmr/vec_backend.rs index 6e87f2564..0113b5544 100644 --- a/core/src/core/pmmr/vec_backend.rs +++ b/core/src/core/pmmr/vec_backend.rs @@ -120,9 +120,9 @@ impl Backend for VecBackend { Ok(()) } - fn remove_from_leaf_set(&mut self, _pos0: u64) { - unimplemented!() - } + //fn remove_from_leaf_set(&mut self, _pos0: u64) { + // unimplemented!() + //} fn reset_prune_list(&mut self) { unimplemented!() diff --git a/servers/src/mwc/sync/body_sync.rs b/servers/src/mwc/sync/body_sync.rs index 03508907e..4269d535e 100644 --- a/servers/src/mwc/sync/body_sync.rs +++ b/servers/src/mwc/sync/body_sync.rs @@ -195,7 +195,7 @@ impl BodySync { ); if need_request > 0 { - self.send_requests(&mut need_request, &peers, sync_peers)?; + let mut waiting_requests = self.send_requests(&mut need_request, &peers, sync_peers)?; // We can send more requests, let's check if we need to update request_series if need_request > 0 { @@ -241,7 +241,11 @@ impl BodySync { } // Now we can try to submit more requests... - self.send_requests(&mut need_request, &peers, sync_peers)?; + waiting_requests = self.send_requests(&mut need_request, &peers, sync_peers)?; + } + + if need_request > 0 && !waiting_requests.is_empty() { + self.send_waiting_requests(waiting_requests, need_request, &peers, sync_peers)?; } } @@ -325,26 +329,27 @@ impl BodySync { retry_expiration_times.len() } + // return waiting requests fn send_requests( &self, need_request: &mut usize, peers: &Vec>, sync_peers: &SyncPeers, - ) -> Result<(), chain::Error> { + ) -> Result, chain::Error> { // request_series naturally from head to tail, but requesting better to send from tail to the head.... let mut peers = peers.clone(); + let mut waiting_heights: Vec<(u64, Hash)> = Vec::new(); // Requests wuth try write because otherwise somebody else is sending, it is mean we are good... if let Some(request_series) = self.request_series.try_write() { *need_request = need_request.saturating_sub(self.calc_retry_running_requests()); if *need_request == 0 { - return Ok(()); + return Ok(waiting_heights); } let mut rng = rand::thread_rng(); let now = Utc::now(); let mut new_requests: Vec<(u64, Hash)> = Vec::new(); - let mut waiting_heights: Vec<(u64, Hash)> = Vec::new(); let mut first_in_cache = 0; let mut last_in_cache = 0; @@ -459,7 +464,7 @@ impl BodySync { peers.retain(|p| p.info.live_info.read().height >= height); if peers.is_empty() { *need_request = 0; - return Ok(()); + return Ok(waiting_heights); } // sending request @@ -486,47 +491,57 @@ impl BodySync { ); } } + } + Ok(waiting_heights) + } - if *need_request > 0 { - // Free requests, lets duplicated some random from the expected buffer - let duplicate_reqs: Vec<(u64, Hash)> = waiting_heights - .choose_multiple(&mut rng, *need_request) - .cloned() - .collect(); - *need_request = 0; + fn send_waiting_requests( + &self, + waiting_heights: Vec<(u64, Hash)>, + need_request: usize, + peers: &Vec>, + sync_peers: &SyncPeers, + ) -> Result<(), chain::Error> { + debug_assert!(need_request > 0); - for (height, hash) in duplicate_reqs { - // We don't want to send retry to the peer whom we already send the data - if let Some(requested_peer) = self.request_tracker.get_expected_peer(&hash) { - let dup_peer = peers - .iter() - .filter(|p| p.info.addr != requested_peer) - .choose(&mut rng); + let mut rng = rand::thread_rng(); + let now = Utc::now(); - if dup_peer.is_none() { - break; - } - let dup_peer = dup_peer.unwrap(); + // Free requests, lets duplicated some random from the expected buffer + let duplicate_reqs: Vec<(u64, Hash)> = waiting_heights + .into_iter() + .choose_multiple(&mut rng, need_request); - debug!( - "Processing duplicated request for the block {} at {}, peer {:?}", - hash, height, dup_peer.info.addr + for (height, hash) in duplicate_reqs { + // We don't want to send retry to the peer whom we already send the data + if let Some(requested_peer) = self.request_tracker.get_expected_peer(&hash) { + let dup_peer = peers + .iter() + .filter(|p| p.info.addr != requested_peer) + .choose(&mut rng); + + if dup_peer.is_none() { + break; + } + let dup_peer = dup_peer.unwrap(); + debug!( + "Processing duplicated request for the block {} at {}, peer {:?}", + hash, height, dup_peer.info.addr + ); + + match dup_peer.send_block_request(hash, chain::Options::SYNC) { + Ok(_) => self + .retry_expiration_times + .write() + .push_back(now + self.request_tracker.get_average_latency()), + Err(e) => { + let msg = format!( + "Failed to send duplicate block request to peer {}, {}", + dup_peer.info.addr, e ); - match dup_peer.send_block_request(hash, chain::Options::SYNC) { - Ok(_) => self - .retry_expiration_times - .write() - .push_back(now + self.request_tracker.get_average_latency()), - Err(e) => { - let msg = format!( - "Failed to send duplicate block request to peer {}, {}", - dup_peer.info.addr, e - ); - warn!("{}", msg); - sync_peers.report_no_response(&dup_peer.info.addr, msg); - break; - } - } + warn!("{}", msg); + sync_peers.report_no_response(&dup_peer.info.addr, msg); + break; } } } diff --git a/store/src/pmmr.rs b/store/src/pmmr.rs index 66b466325..7e26b69e5 100644 --- a/store/src/pmmr.rs +++ b/store/src/pmmr.rs @@ -157,10 +157,11 @@ impl Backend for PMMRBackend { self.get_data_from_file(pos0) } - /// Remove leaf from leaf set - fn remove_from_leaf_set(&mut self, pos0: u64) { + // Remove leaf from leaf set + // DON'T USE IS, use prune instead + /*fn remove_from_leaf_set(&mut self, pos0: u64) { self.leaf_set.remove(pos0); - } + }*/ /// Returns an iterator over all the leaf positions. /// for a prunable PMMR this is an iterator over the leaf_set bitmap.