Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

suspend + resume support #112

Merged
merged 2 commits into from
Jul 7, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions dapp_manager/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,61 @@ def start(
print(dapp.app_id)


@_with_app_id
@cli.command()
@click.option(
"--config",
"-c",
required=True,
type=Path,
help="Path to the file containing yagna-specific config.",
)
@click.option(
"--log-level",
type=click.Choice(LOG_CHOICES, case_sensitive=False),
)
@click.option("--api-port", type=int, help="Enable the GAOM API on a given port.")
@click.option(
"--api-host",
type=str,
help="Specify a host address for the GAOM API to bind to. (default: 127.0.0.1)"
"Requires `--api-port` to also be specified.",
)
@click.option(
"--skip-manifest-validation",
is_flag=True,
default=False,
)
@_capture_api_exceptions
def resume(
app_id: str,
*,
config: Path,
log_level: Optional[str],
api_port: Optional[int],
api_host: str,
skip_manifest_validation: bool,
):
"""Resume an application from the saved state."""
if api_port:
api_kwargs = {"api_host": api_host or "127.0.0.1", "api_port": api_port}
elif api_host:
raise DappManagerException("To enable the API, please specify the `--api-port` too.")
else:
api_kwargs = {}

dapp = DappManager(app_id)

print(
dapp.resume(
config=config,
log_level=log_level,
skip_manifest_validation=skip_manifest_validation,
**api_kwargs, # type: ignore [arg-type] # noqa
)
)


@cli.command()
@_capture_api_exceptions
def list():
Expand Down Expand Up @@ -180,10 +235,20 @@ def exec(*, app_id, service, command, timeout):
@_with_app_id
@_capture_api_exceptions
def inspect(*, app_id):
"""Display detailed information about the running application's state."""
dapp = DappManager(app_id)
print(dapp.inspect())


@cli.command()
@_with_app_id
@_capture_api_exceptions
def suspend(*, app_id):
"""Suspend a running application and save its state."""
dapp = DappManager(app_id)
print(dapp.suspend())


@cli.command()
@_with_app_id
@click.argument("file-type", type=click.Choice(["state", "data", "log", "stdout", "stderr"]))
Expand Down
56 changes: 54 additions & 2 deletions dapp_manager/dapp_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@

import appdirs
import psutil
import requests

from .dapp_starter import DappStarter
from .exceptions import AppNotRunning, GaomApiUnavailable
from .exceptions import AppNotRunning, AppRunning, GaomApiError, GaomApiUnavailable, NoGaomSaveFile
from .inspect import Inspect
from .storage import RunnerReadFileType, SimpleStorage

Expand Down Expand Up @@ -95,7 +96,7 @@ def prune(cls) -> List[str]:
pruned = []
for app_id in cls.list():
storage = cls._create_storage(app_id)
if not storage.alive:
if not storage.alive and not storage.gaom_saved:
storage.delete()
pruned.append(app_id)
return pruned
Expand Down Expand Up @@ -215,6 +216,53 @@ def inspect(self) -> str:
inspect = Inspect(api)
return inspect.display_app_structure()

def suspend(self) -> str:
"""Signal the runner to suspend its operation and preserve the app's state."""
self._ensure_alive()
api = self._ensure_api()
app_gaom = requests.post(f"{api}/suspend")
if not app_gaom.status_code == requests.codes.ok:
raise GaomApiError(self.app_id)

self.storage.write_file("gaom_save", app_gaom.text)
return "App suspended"

def resume(
self,
config: PathType,
log_level: Optional[str] = None,
api_host: Optional[str] = None,
api_port: Optional[int] = None,
skip_manifest_validation: bool = False,
timeout: float = 1,
) -> str:
"""Resume the application from its saved state."""

self._ensure_stopped()
gaom_save = self.storage.file_name("gaom_save")
gaom_resume = self.storage.file_name("gaom_resume")
try:
gaom_save.replace(gaom_resume)
except FileNotFoundError:
raise NoGaomSaveFile(self.app_id)

descriptor_paths = [Path(gaom_resume)]
config_path = Path(config)

starter = DappStarter(
descriptor_paths,
config_path,
self.storage,
log_level=log_level,
api_host=api_host,
api_port=api_port,
skip_manifest_validation=skip_manifest_validation,
resume=True,
)
starter.start(timeout=timeout)

return "App resumed"

def stop(self, timeout: int) -> bool:
"""Stop the dapp gracefully (SIGINT), waiting at most `timeout` seconds.

Expand Down Expand Up @@ -277,6 +325,10 @@ def _ensure_alive(self) -> None:
if not self.alive:
raise AppNotRunning(self.app_id)

def _ensure_stopped(self) -> None:
if self.alive:
raise AppRunning(self.app_id)

def _ensure_api(self) -> str:
if not self.storage.api:
raise GaomApiUnavailable(self.app_id)
Expand Down
12 changes: 10 additions & 2 deletions dapp_manager/dapp_starter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Any, Dict, List, Optional, Tuple

from .exceptions import StartupFailed
from .storage import RunnerFileType, SimpleStorage
from .storage import PublicRunnerFileType, SimpleStorage

DEFAULT_EXEC_STR = "dapp-runner"

Expand All @@ -21,6 +21,7 @@ def __init__(
api_host: Optional[str] = None,
api_port: Optional[int] = None,
skip_manifest_validation: bool = False,
resume: bool = False,
):
self.descriptors = descriptors
self.config = config
Expand All @@ -29,6 +30,7 @@ def __init__(
self.api_host = api_host
self.api_port = api_port
self.skip_manifest_validation = skip_manifest_validation
self.resume = resume

def start(self, timeout: float) -> None:
"""Start a dapp. Wait TIMEOUT seconds. Raise StartupFailed if process is not running."""
Expand All @@ -48,6 +50,7 @@ def start(self, timeout: float) -> None:
proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs)

success, stdout, stderr = self._check_succesful_startup(proc, timeout)

if not success:
try:
runner_stdout = self.storage.read_file("stdout")
Expand Down Expand Up @@ -102,11 +105,16 @@ def _cli_args(self) -> List[str]:
args = ["start"]
args += ["--config", str(self.config.resolve())]

if self.resume:
args += [
"--resume",
]

if self.log_level:
args += ["--log-level", self.log_level]

# TODO: is there's a better way to iterate over elements of a Literal type?
for file_type in RunnerFileType.__args__: # type: ignore [attr-defined]
for file_type in PublicRunnerFileType.__args__: # type: ignore [attr-defined]
file_name = str(self.storage.file_name(file_type).resolve())
args += [f"--{file_type}", file_name]

Expand Down
31 changes: 31 additions & 0 deletions dapp_manager/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,34 @@ def __init__(self, app_id):
super().__init__(
f"GAOM API unavailable for {app_id}. Please start the app with the `--app-port` option."
)


class AppRunning(DappManagerException):
"""Exception raised when a given app is running but expected to have been stopped.

We expect the app to be not running (e.g. after having been suspended). We raise the exception
when it's running in such case.
"""

SHELL_EXIT_CODE = 8

def __init__(self, app_id):
super().__init__(f"App {app_id} is running.")


class NoGaomSaveFile(DappManagerException):
"""Exception raised when no saved GAOM state is available when an app is about to be resumed."""

SHELL_EXIT_CODE = 9

def __init__(self, app_id):
super().__init__(f"No saved GAOM state available for {app_id}.")


class GaomApiError(DappManagerException):
"""General error while accessing the GAOM API."""

SHELL_EXIT_CODE = 10

def __init__(self, app_id):
super().__init__(f"GAOM API error in {app_id}. Check the logs for details")
2 changes: 1 addition & 1 deletion dapp_manager/inspect/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def _get_template(name: str) -> Template:
def fetch_gaom(self):
"""Retrieve the current Golem Application Object Model data."""

data = requests.get(f"{self.api_address}/gaom/")
data = requests.get(f"{self.api_address}/gaom")
self._gaom = data.json()

@property
Expand Down
9 changes: 8 additions & 1 deletion dapp_manager/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@

READ_FILE_ITER_CHUNK_SIZE = 1024

RunnerFileType = Literal["data", "state", "log", "stdout", "stderr", "commands"]
PublicRunnerFileType = Literal["data", "state", "log", "stdout", "stderr", "commands"]
RunnerFileType = Literal[
"data", "state", "log", "stdout", "stderr", "commands", "gaom_save", "gaom_resume"
]
RunnerReadFileType = Literal["data", "state", "log", "stdout", "stderr"]


Expand Down Expand Up @@ -60,6 +63,10 @@ def delete(self) -> None:
def alive(self) -> bool:
return os.path.isfile(self.pid_file)

@property
def gaom_saved(self) -> bool:
return os.path.isfile(self.file_name("gaom_save"))

@contextmanager
def open(self, file_type: RunnerFileType, mode):
with self.file_name(file_type).open(mode) as f:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ psutil = "^5.9"
appdirs = "^1.4"
click = "^7.0" # requires bump to goth's dependencies https://github.com/golemfactory/goth/issues/605
pydantic = "^1.9"
dapp-runner = { git = "https://github.com/golemfactory/dapp-runner.git", branch = "main" }
dapp-runner = { git = "https://github.com/golemfactory/dapp-runner.git", branch = "blue/suspend-resume" }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this to be merged?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, but until the dapp-runner change is merged, this feature won't work on main

mako = "^1.2.4"
requests = "^2.31.0"

Expand Down