Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix wait on metrics and path checking #248

Merged
merged 1 commit into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions crates/orchestrator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -462,16 +462,19 @@ fn validate_spec_with_provider_capabilities(

// now check the binaries
let path = std::env::var("PATH").unwrap_or_default(); // path should always be set
println!("PATH es: {path}");
trace!("current PATH: {path}");
let parts: Vec<_> = path.split(":").collect();
for cmd in cmds {
let missing = if cmd.contains('/') {
trace!("checking {cmd}");
std::fs::metadata(cmd).is_err()
} else {
// should be in the PATH
!parts
.iter()
.any(|part| std::fs::metadata(format!("{}/{}", part, cmd)).is_ok())
!parts.iter().any(|part| {
let path_to = format!("{}/{}", part, cmd);
trace!("checking {path_to}");
std::fs::metadata(path_to).is_ok()
})
};

if missing {
Expand All @@ -494,7 +497,10 @@ fn help_msg(cmd: &str) -> String {
format!("Missing binary {cmd}, compile by running: \n\tcargo build --package {cmd} --release")
},
"polkadot" => {
format!("Missing binary {cmd}, compile by running: \n\t cargo build --locked --release --features fast-runtime --bin {cmd} --bin polkadot-prepare-worker --bin polkadot-execute-worker")
format!("Missing binary {cmd}, compile by running (in the polkadot-sdk repo): \n\t cargo build --locked --release --features fast-runtime --bin {cmd} --bin polkadot-prepare-worker --bin polkadot-execute-worker")
},
"polkadot-parachain" => {
format!("Missing binary {cmd}, compile by running (in the polkadot-sdk repo): \n\t cargo build --release --locked -p {cmd}-bin --bin {cmd}")
},
_ => {
format!("Missing binary {cmd}, please compile it.")
Expand Down
39 changes: 17 additions & 22 deletions crates/orchestrator/src/network/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use provider::DynNode;
use regex::Regex;
use serde::Serialize;
use subxt::{backend::rpc::RpcClient, OnlineClient};
use support::net::wait_ws_ready;
use support::net::{skip_err_while_waiting, wait_ws_ready};
use thiserror::Error;
use tokio::sync::RwLock;
use tracing::{debug, trace};
Expand Down Expand Up @@ -206,27 +206,22 @@ impl NetworkNode {
return Ok(());
}
},
Err(e) => {
match e.downcast::<reqwest::Error>() {
Ok(io) => {
// if the error is connecting could be the case that the node
// is not listening yet, so we keep waiting
// Skipped err is: 'tcp connect error: Connection refused (os error 61)'
if !io.is_connect() {
return Err(io.into());
}
},
Err(other) => {
match other.downcast::<NetworkNodeError>() {
Ok(node_err) => {
if !matches!(node_err, NetworkNodeError::MetricNotFound(_)) {
return Err(node_err.into());
}
},
Err(other) => return Err(other),
};
},
}
Err(e) => match e.downcast::<reqwest::Error>() {
Ok(io_err) => {
if !skip_err_while_waiting(&io_err) {
return Err(io_err.into());
}
},
Err(other) => {
match other.downcast::<NetworkNodeError>() {
Ok(node_err) => {
if !matches!(node_err, NetworkNodeError::MetricNotFound(_)) {
return Err(node_err.into());
}
},
Err(other) => return Err(other),
};
},
},
}

Expand Down
18 changes: 11 additions & 7 deletions crates/support/src/net.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,7 @@ pub async fn wait_ws_ready(url: &str) -> Result<()> {
trace!("http_client status: {}, continuing...", res.status());
},
Err(e) => {
// if the error is connecting/request could be the case that the node
// is not listening yet, so we keep waiting
// Skipped errs like:
// 'tcp connect error: Connection refused (os error 61)'
// 'operation was canceled: connection closed before message completed'
// 'connection error: Connection reset by peer (os error 54)'
if !(e.is_connect() || e.is_request()) {
if !skip_err_while_waiting(&e) {
return Err(e.into());
}

Expand All @@ -54,3 +48,13 @@ pub async fn wait_ws_ready(url: &str) -> Result<()> {

Ok(())
}

pub fn skip_err_while_waiting(e: &reqwest::Error) -> bool {
// if the error is connecting/request could be the case that the node
// is not listening yet, so we keep waiting
// Skipped errs like:
// 'tcp connect error: Connection refused (os error 61)'
// 'operation was canceled: connection closed before message completed'
// 'connection error: Connection reset by peer (os error 54)'
e.is_connect() || e.is_request()
}
Loading