Skip to content

Commit

Permalink
Merge pull request #15 from avnik/avnik/fix-restart
Browse files Browse the repository at this point in the history
fix restart crashed applications
  • Loading branch information
mbssrc authored Sep 11, 2024
2 parents d3291cc + 06f5849 commit 6a85012
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 12 deletions.
7 changes: 6 additions & 1 deletion common/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,12 @@ pub struct UnitStatus {

impl UnitStatus {
pub fn is_running(&self) -> bool {
self.active_state == "active"
self.active_state == "active" && self.load_state == "loaded" && self.sub_state == "running"
}
pub fn is_exitted(&self) -> bool {
self.active_state == "inactive"
&& self.load_state == "not-found"
&& self.sub_state == "dead"
}
}

Expand Down
26 changes: 21 additions & 5 deletions nixos/tests/admin.nix
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,10 @@ in
admin = adminSettings;
tls = mkTls "chromium-vm";
applications = lib.mkForce (
builtins.toJSON { "foot" = "/run/current-system/sw/bin/run-waypipe ${pkgs.foot}/bin/foot"; }
builtins.toJSON {
"foot" = "/run/current-system/sw/bin/run-waypipe ${pkgs.foot}/bin/foot";
"clearexit" = "/run/current-system/sw/bin/sleep 5";
}
);
};
};
Expand Down Expand Up @@ -288,10 +291,23 @@ in
swaymsg("exec ssh -R /tmp/vsock:/tmp/vsock -f -N ${addrs.appvm}")
time.sleep(5) # Give ssh some time to setup remote socket
#swaymsg("exec run-waypipe foot")
print(hostvm.succeed("${cli} --addr ${nodes.adminvm.config.givc.admin.addr} --port ${nodes.adminvm.config.givc.admin.port} --cacert ${nodes.hostvm.givc.host.tls.caCertPath} --cert ${nodes.hostvm.givc.host.tls.certPath} --key ${nodes.hostvm.givc.host.tls.keyPath} ${if tls then "" else "--notls"} --name ${nodes.adminvm.config.givc.admin.name} start foot"))
time.sleep(10) # Give few seconds to application to spin up
wait_for_window("ghaf@appvm")
with subtest("Clean run"):
print(hostvm.succeed("${cli} --addr ${nodes.adminvm.config.givc.admin.addr} --port ${nodes.adminvm.config.givc.admin.port} --cacert ${nodes.hostvm.givc.host.tls.caCertPath} --cert ${nodes.hostvm.givc.host.tls.certPath} --key ${nodes.hostvm.givc.host.tls.keyPath} ${if tls then "" else "--notls"} --name ${nodes.adminvm.config.givc.admin.name} start foot"))
time.sleep(10) # Give few seconds to application to spin up
wait_for_window("ghaf@appvm")
with subtest("crash and restart"):
# Crash application
appvm.succeed("pkill foot")
time.sleep(10)
# .. then ask to restart
print(hostvm.succeed("${cli} --addr ${nodes.adminvm.config.givc.admin.addr} --port ${nodes.adminvm.config.givc.admin.port} --cacert ${nodes.hostvm.givc.host.tls.caCertPath} --cert ${nodes.hostvm.givc.host.tls.certPath} --key ${nodes.hostvm.givc.host.tls.keyPath} ${if tls then "" else "--notls"} --name ${nodes.adminvm.config.givc.admin.name} start foot"))
wait_for_window("ghaf@appvm")
with subtest("clear exit and restart"):
print(hostvm.succeed("${cli} --addr ${nodes.adminvm.config.givc.admin.addr} --port ${nodes.adminvm.config.givc.admin.port} --cacert ${nodes.hostvm.givc.host.tls.caCertPath} --cert ${nodes.hostvm.givc.host.tls.certPath} --key ${nodes.hostvm.givc.host.tls.keyPath} ${if tls then "" else "--notls"} --name ${nodes.adminvm.config.givc.admin.name} start --vm foot-vm clearexit"))
time.sleep(20) # Give few seconds to application to spin up, exit, then start it again
print(hostvm.succeed("${cli} --addr ${nodes.adminvm.config.givc.admin.addr} --port ${nodes.adminvm.config.givc.admin.port} --cacert ${nodes.hostvm.givc.host.tls.caCertPath} --cert ${nodes.hostvm.givc.host.tls.certPath} --key ${nodes.hostvm.givc.host.tls.keyPath} ${if tls then "" else "--notls"} --name ${nodes.adminvm.config.givc.admin.name} start --vm foot-vm clearexit"))
'';
};
};
Expand Down
10 changes: 4 additions & 6 deletions src/admin/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,9 @@ impl AdminServiceImpl {
pub async fn handle_error(&self, entry: RegistryEntry) -> anyhow::Result<()> {
match (entry.r#type.vm, entry.r#type.service) {
(VmType::AppVM, ServiceType::App) => {
self.registry.deregister(&entry.name)?;
if entry.status.is_exitted() {
self.registry.deregister(&entry.name)?;
}
Ok(())
}
(VmType::AppVM, ServiceType::Mgr) | (VmType::SysVM, ServiceType::Mgr) => {
Expand All @@ -184,11 +186,7 @@ impl AdminServiceImpl {
debug!("Monitoring {}...", &entry.name);
match self.get_remote_status(&entry).await {
Err(err) => {
error!(
"could not get status of unit {}: {}",
entry.name.clone(),
err
);
error!("could not get status of unit {}: {}", &entry.name, err);
self.handle_error(entry)
.await
.with_context(|| "during handle error")?
Expand Down

0 comments on commit 6a85012

Please sign in to comment.