From 42bb2ee3d8e4744649466969eee582a3b5c0c84f Mon Sep 17 00:00:00 2001 From: Wyatt Rees Date: Tue, 2 Jul 2024 15:37:05 -0600 Subject: [PATCH 01/13] Set up juju HA when number of machines is odd and > 2 --- anvil-python/anvil/provider/local/commands.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/anvil-python/anvil/provider/local/commands.py b/anvil-python/anvil/provider/local/commands.py index 53997b3..52d86b7 100644 --- a/anvil-python/anvil/provider/local/commands.py +++ b/anvil-python/anvil/provider/local/commands.py @@ -12,7 +12,7 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. - +import asyncio import logging from pathlib import Path from typing import List @@ -44,6 +44,7 @@ RegisterJujuUserStep, RemoveJujuMachineStep, SaveJujuUserLocallyStep, + ScaleJujuStep, ) from sunbeam.jobs.checks import ( JujuSnapCheck, @@ -501,6 +502,19 @@ def join( ) run_plan(plan2, console) + machines = asyncio.run( + jhelper.get_machines(deployment.infrastructure_model) + ) + n_machines = len(machines) + LOG.debug(f"Juju machines: {machines}") + if n_machines > 2 and n_machines % 2 == 1: + plan2.append( + ScaleJujuStep( + controller, + n_machines, + ["--to", ",".join([str(x) for x in n_machines.keys()])], + ) + ) click.echo(f"Node joined cluster with roles: {pretty_roles}") From 4d834201d02c9a9acefac516f66a68dac4269cc7 Mon Sep 17 00:00:00 2001 From: Wyatt Rees Date: Wed, 3 Jul 2024 07:58:37 -0600 Subject: [PATCH 02/13] fix ScaleJujuStep args --- anvil-python/anvil/provider/local/commands.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/anvil-python/anvil/provider/local/commands.py b/anvil-python/anvil/provider/local/commands.py index 52d86b7..5c293dc 100644 --- a/anvil-python/anvil/provider/local/commands.py +++ b/anvil-python/anvil/provider/local/commands.py @@ -501,7 +501,6 @@ def join( ) ) - run_plan(plan2, console) machines = asyncio.run( jhelper.get_machines(deployment.infrastructure_model) ) @@ -512,9 +511,10 @@ def join( ScaleJujuStep( controller, n_machines, - ["--to", ",".join([str(x) for x in n_machines.keys()])], + ["--to", ",".join(machines.keys())], ) ) + run_plan(plan2, console) click.echo(f"Node joined cluster with roles: {pretty_roles}") From c0af08dea31edeca131a7fd327b19df5cbf3d9c2 Mon Sep 17 00:00:00 2001 From: Wyatt Rees Date: Wed, 3 Jul 2024 13:06:28 -0600 Subject: [PATCH 03/13] Use juju CLI to determine machine count and which machines need controllers --- anvil-python/anvil/provider/local/commands.py | 16 +++++++++++----- anvil-python/anvil/utils.py | 19 +++++++++++++++++++ 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/anvil-python/anvil/provider/local/commands.py b/anvil-python/anvil/provider/local/commands.py index 5c293dc..e1812fe 100644 --- a/anvil-python/anvil/provider/local/commands.py +++ b/anvil-python/anvil/provider/local/commands.py @@ -13,8 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. import asyncio +import json import logging from pathlib import Path +import subprocess from typing import List import click @@ -98,7 +100,7 @@ from anvil.jobs.juju import CONTROLLER from anvil.jobs.manifest import Manifest from anvil.provider.local.deployment import LocalDeployment -from anvil.utils import CatchGroup +from anvil.utils import CatchGroup, machines_missing_juju_controllers LOG = logging.getLogger(__name__) console = Console() @@ -501,17 +503,21 @@ def join( ) ) - machines = asyncio.run( - jhelper.get_machines(deployment.infrastructure_model) + machines_res = subprocess.run( + ["juju", "machines", "--format", "json"], capture_output=True ) + machines = json.loads(machines_res.stdout)["machines"] n_machines = len(machines) - LOG.debug(f"Juju machines: {machines}") if n_machines > 2 and n_machines % 2 == 1: + machines_to_join = machines_missing_juju_controllers() + LOG.debug( + f"Will enable Juju controller on machines {machines_to_join}" + ) plan2.append( ScaleJujuStep( controller, n_machines, - ["--to", ",".join(machines.keys())], + ["--to", ",".join(machines_to_join)], ) ) run_plan(plan2, console) diff --git a/anvil-python/anvil/utils.py b/anvil-python/anvil/utils.py index ce35682..3ab326a 100644 --- a/anvil-python/anvil/utils.py +++ b/anvil-python/anvil/utils.py @@ -13,7 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json import logging +import subprocess import sys import click @@ -43,3 +45,20 @@ def __call__(self, *args, **kwargs): # type: ignore[no-untyped-def] LOG.warn(message) LOG.error("Error: %s", e) sys.exit(1) + + +def machines_missing_juju_controllers() -> list[str]: + result = subprocess.run( + ["juju", "show-controller", "anvil-controller", "--format", "json"], + capture_output=True, + ) + controllers = json.loads(result.stdout) + controller_machines = set( + controllers["anvil-controller"]["controller-machines"].keys() + ) + + machines_res = subprocess.run( + ["juju", "machines", "--format", "json"], capture_output=True + ) + machines = set(json.loads(machines_res.stdout)["machines"].keys()) + return list(machines - controller_machines) From 0f876e42bdedba5c5085f290b595bb516cd1a203 Mon Sep 17 00:00:00 2001 From: Wyatt Rees Date: Tue, 9 Jul 2024 16:18:36 -0600 Subject: [PATCH 04/13] override ScaleJujuStep.run to use correct model name --- README.md | 3 ++ anvil-python/anvil/jobs/juju.py | 45 +++++++++++++++++++ anvil-python/anvil/provider/local/commands.py | 5 +-- 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 7a7cf4e..f8e55e4 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,9 @@ ubuntu@infra3:~$ maas-anvil cluster join \ --token eyJuYW1lIjoibWFhcy00Lm1hYXMiLCJzZWNyZXQiOiI3MmE512342abcdEASWWxOWNlYWNkYmJjMWRmMjk4OThkYWFkYzQzMDAzZjk4NmRkZDI2MWRhYWVkZTIxIiwiZmluZ2VycHJpbnQiOiJlODU5ZmY5NjAwMDU4OGFjZmQ5ZDM0NjFhMDk5NmU1YTU3YjhjN2Q2ZjE4M2NjZDRlOTg2NGRkZjQ3NWMwZWM1Iiwiam9pbl9hZGRyZXNzZXMiOlsiMTAuMjAuMC43OjcwMDAiLCIxMC4yMC4wLjg6NzAwMCJdfQ== ``` +#### Juju Controller HA +Starting with the third machine joining the cluster, `maas-anvil` will automatically start adding Juju controllers to machines that are missing them. This will occur at every other join (When machines 3, 5, 7, and so on join) + ### Confirm the cluster status ```bash diff --git a/anvil-python/anvil/jobs/juju.py b/anvil-python/anvil/jobs/juju.py index ba578c7..dec99b9 100644 --- a/anvil-python/anvil/jobs/juju.py +++ b/anvil-python/anvil/jobs/juju.py @@ -12,5 +12,50 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging +import subprocess + +from rich.status import Status +from sunbeam.commands.juju import ScaleJujuStep +from sunbeam.jobs.common import Result, ResultType CONTROLLER = "anvil-controller" +LOG = logging.getLogger(__name__) + + +class AnvilScaleJujuStep(ScaleJujuStep): + def run(self, status: Status | None = None) -> Result: + cmd = [ + self._get_juju_binary(), + "enable-ha", + "-n", + str(self.n), + *self.extra_args, + ] + LOG.debug(f'Running command {" ".join(cmd)}') + process = subprocess.run( + cmd, capture_output=True, text=True, check=True + ) + LOG.debug( + f"Command finished. stdout={process.stdout}, stderr={process.stderr}" + ) + cmd = [ + self._get_juju_binary(), + "wait-for", + "application", + "-m", + "admin/controller", + "controller", + "--timeout", + "15m", + ] + self.update_status(status, "scaling controller") + LOG.debug("Waiting for HA to be enabled") + LOG.debug(f'Running command {" ".join(cmd)}') + process = subprocess.run( + cmd, capture_output=True, text=True, check=True + ) + LOG.debug( + f"Command finished. stdout={process.stdout}, stderr={process.stderr}" + ) + return Result(ResultType.COMPLETED) diff --git a/anvil-python/anvil/provider/local/commands.py b/anvil-python/anvil/provider/local/commands.py index e1812fe..870ed1c 100644 --- a/anvil-python/anvil/provider/local/commands.py +++ b/anvil-python/anvil/provider/local/commands.py @@ -46,7 +46,6 @@ RegisterJujuUserStep, RemoveJujuMachineStep, SaveJujuUserLocallyStep, - ScaleJujuStep, ) from sunbeam.jobs.checks import ( JujuSnapCheck, @@ -97,7 +96,7 @@ roles_to_str_list, validate_roles, ) -from anvil.jobs.juju import CONTROLLER +from anvil.jobs.juju import CONTROLLER, AnvilScaleJujuStep from anvil.jobs.manifest import Manifest from anvil.provider.local.deployment import LocalDeployment from anvil.utils import CatchGroup, machines_missing_juju_controllers @@ -514,7 +513,7 @@ def join( f"Will enable Juju controller on machines {machines_to_join}" ) plan2.append( - ScaleJujuStep( + AnvilScaleJujuStep( controller, n_machines, ["--to", ",".join(machines_to_join)], From 30632bbb5eccd88774426351927eb4839729c252 Mon Sep 17 00:00:00 2001 From: Wyatt Rees Date: Tue, 9 Jul 2024 16:21:55 -0600 Subject: [PATCH 05/13] use set difference method instead of - operator --- anvil-python/anvil/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/anvil-python/anvil/utils.py b/anvil-python/anvil/utils.py index 3ab326a..c6c8a59 100644 --- a/anvil-python/anvil/utils.py +++ b/anvil-python/anvil/utils.py @@ -61,4 +61,4 @@ def machines_missing_juju_controllers() -> list[str]: ["juju", "machines", "--format", "json"], capture_output=True ) machines = set(json.loads(machines_res.stdout)["machines"].keys()) - return list(machines - controller_machines) + return list(machines.difference(controller_machines)) From 6809f165c37c143858704b20781a94a182d0f910 Mon Sep 17 00:00:00 2001 From: Wyatt Rees Date: Fri, 12 Jul 2024 14:39:35 -0600 Subject: [PATCH 06/13] Move AnvilScaleJujuStep to anvil.commands to avoid circular import. Add custom JujuManifest to remove scaling_args. Add is_skip override to determine whether it needs to be run and determine which machines need controllers --- anvil-python/anvil/commands/juju.py | 63 +++++++++++++++++++ anvil-python/anvil/jobs/juju.py | 45 ------------- anvil-python/anvil/jobs/manifest.py | 15 ++++- anvil-python/anvil/provider/local/commands.py | 28 ++------- anvil-python/anvil/utils.py | 6 +- 5 files changed, 85 insertions(+), 72 deletions(-) diff --git a/anvil-python/anvil/commands/juju.py b/anvil-python/anvil/commands/juju.py index 3fbaa3b..c8909d4 100644 --- a/anvil-python/anvil/commands/juju.py +++ b/anvil-python/anvil/commands/juju.py @@ -13,13 +13,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json +import logging from os import environ import os.path import subprocess from rich.status import Status +from sunbeam.commands.juju import ScaleJujuStep from sunbeam.jobs.common import BaseStep, Result, ResultType +from anvil.utils import machines_missing_juju_controllers + +LOG = logging.getLogger(__name__) + class JujuAddSSHKeyStep(BaseStep): """Add this node's SSH key to the Juju model""" @@ -54,3 +61,59 @@ def run(self, status: Status | None) -> Result: message="Could not find public ssh key (~/.ssh/id_rsa.pub)", ) return Result(ResultType.COMPLETED) + + +class AnvilScaleJujuStep(ScaleJujuStep): + def run(self, status: Status | None = None) -> Result: + cmd = [ + self._get_juju_binary(), + "enable-ha", + "-n", + str(self.n), + *self.extra_args, + ] + LOG.debug(f'Running command {" ".join(cmd)}') + process = subprocess.run( + cmd, capture_output=True, text=True, check=True + ) + LOG.debug( + f"Command finished. stdout={process.stdout}, stderr={process.stderr}" + ) + cmd = [ + self._get_juju_binary(), + "wait-for", + "application", + "-m", + "admin/controller", + "controller", + "--timeout", + "15m", + ] + self.update_status(status, "scaling controller") + LOG.debug("Waiting for HA to be enabled") + LOG.debug(f'Running command {" ".join(cmd)}') + process = subprocess.run( + cmd, capture_output=True, text=True, check=True + ) + LOG.debug( + f"Command finished. stdout={process.stdout}, stderr={process.stderr}" + ) + return Result(ResultType.COMPLETED) + + def is_skip(self, status: Status | None = None) -> Result: + """Determines if the step should be skipped or not.""" + machines_res = subprocess.run( + ["juju", "machines", "--format", "json"], capture_output=True + ) + machines = json.loads(machines_res.stdout)["machines"] + n_machines = len(machines) + if n_machines > 2 and n_machines % 2 == 1: + machines_to_join = machines_missing_juju_controllers() + self.n = n_machines + self.extra_args.extend(("--to", ",".join(machines_to_join))) + LOG.debug( + f"Will enable Juju controller on machines {machines_to_join}" + ) + return Result(ResultType.COMPLETED) + LOG.debug("Wrong number of machines, skipping scaling Juju") + return Result(ResultType.SKIPPED) diff --git a/anvil-python/anvil/jobs/juju.py b/anvil-python/anvil/jobs/juju.py index dec99b9..ba578c7 100644 --- a/anvil-python/anvil/jobs/juju.py +++ b/anvil-python/anvil/jobs/juju.py @@ -12,50 +12,5 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. -import logging -import subprocess - -from rich.status import Status -from sunbeam.commands.juju import ScaleJujuStep -from sunbeam.jobs.common import Result, ResultType CONTROLLER = "anvil-controller" -LOG = logging.getLogger(__name__) - - -class AnvilScaleJujuStep(ScaleJujuStep): - def run(self, status: Status | None = None) -> Result: - cmd = [ - self._get_juju_binary(), - "enable-ha", - "-n", - str(self.n), - *self.extra_args, - ] - LOG.debug(f'Running command {" ".join(cmd)}') - process = subprocess.run( - cmd, capture_output=True, text=True, check=True - ) - LOG.debug( - f"Command finished. stdout={process.stdout}, stderr={process.stderr}" - ) - cmd = [ - self._get_juju_binary(), - "wait-for", - "application", - "-m", - "admin/controller", - "controller", - "--timeout", - "15m", - ] - self.update_status(status, "scaling controller") - LOG.debug("Waiting for HA to be enabled") - LOG.debug(f'Running command {" ".join(cmd)}') - process = subprocess.run( - cmd, capture_output=True, text=True, check=True - ) - LOG.debug( - f"Command finished. stdout={process.stdout}, stderr={process.stderr}" - ) - return Result(ResultType.COMPLETED) diff --git a/anvil-python/anvil/jobs/manifest.py b/anvil-python/anvil/jobs/manifest.py index 269a5e6..253b7db 100644 --- a/anvil-python/anvil/jobs/manifest.py +++ b/anvil-python/anvil/jobs/manifest.py @@ -20,6 +20,7 @@ import shutil from typing import Any, Dict, List +from pydantic import Field from pydantic.dataclasses import dataclass from snaphelpers import Snap from sunbeam import utils @@ -36,7 +37,6 @@ from sunbeam.jobs.deployment import Deployment from sunbeam.jobs.manifest import ( CharmsManifest, - JujuManifest, MissingTerraformInfoException, TerraformManifest, ) @@ -52,6 +52,19 @@ LOG = logging.getLogger(__name__) +@dataclass +class JujuManifest: + # Setting Field alias not supported in pydantic 1.10.0 + # Old version of pydantic is used due to dependencies + # with older version of paramiko from python-libjuju + # Newer version of pydantic can be used once the below + # PR is released + # https://github.com/juju/python-libjuju/pull/1005 + bootstrap_args: list[str] = Field( + default=[], description="Extra args for juju bootstrap" + ) + + @dataclass(config=dict(extra="allow")) # type: ignore[call-overload] class SoftwareConfig: deployment: InitVar[Deployment] diff --git a/anvil-python/anvil/provider/local/commands.py b/anvil-python/anvil/provider/local/commands.py index 870ed1c..829756e 100644 --- a/anvil-python/anvil/provider/local/commands.py +++ b/anvil-python/anvil/provider/local/commands.py @@ -12,11 +12,8 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. -import asyncio -import json import logging from pathlib import Path -import subprocess from typing import List import click @@ -76,7 +73,7 @@ RemoveHAProxyUnitStep, haproxy_install_steps, ) -from anvil.commands.juju import JujuAddSSHKeyStep +from anvil.commands.juju import AnvilScaleJujuStep, JujuAddSSHKeyStep from anvil.commands.maas_agent import ( RemoveMAASAgentUnitStep, maas_agent_install_steps, @@ -96,10 +93,10 @@ roles_to_str_list, validate_roles, ) -from anvil.jobs.juju import CONTROLLER, AnvilScaleJujuStep +from anvil.jobs.juju import CONTROLLER from anvil.jobs.manifest import Manifest from anvil.provider.local.deployment import LocalDeployment -from anvil.utils import CatchGroup, machines_missing_juju_controllers +from anvil.utils import CatchGroup LOG = logging.getLogger(__name__) console = Console() @@ -501,24 +498,7 @@ def join( name, ) ) - - machines_res = subprocess.run( - ["juju", "machines", "--format", "json"], capture_output=True - ) - machines = json.loads(machines_res.stdout)["machines"] - n_machines = len(machines) - if n_machines > 2 and n_machines % 2 == 1: - machines_to_join = machines_missing_juju_controllers() - LOG.debug( - f"Will enable Juju controller on machines {machines_to_join}" - ) - plan2.append( - AnvilScaleJujuStep( - controller, - n_machines, - ["--to", ",".join(machines_to_join)], - ) - ) + plan2.append(AnvilScaleJujuStep(controller)) run_plan(plan2, console) click.echo(f"Node joined cluster with roles: {pretty_roles}") diff --git a/anvil-python/anvil/utils.py b/anvil-python/anvil/utils.py index c6c8a59..1c20e9c 100644 --- a/anvil-python/anvil/utils.py +++ b/anvil-python/anvil/utils.py @@ -21,6 +21,8 @@ import click from sunbeam.plugins.interface.v1.base import PluginError +from anvil.jobs.juju import CONTROLLER + LOG = logging.getLogger(__name__) LOCAL_ACCESS = "local" REMOTE_ACCESS = "remote" @@ -49,12 +51,12 @@ def __call__(self, *args, **kwargs): # type: ignore[no-untyped-def] def machines_missing_juju_controllers() -> list[str]: result = subprocess.run( - ["juju", "show-controller", "anvil-controller", "--format", "json"], + ["juju", "show-controller", CONTROLLER, "--format", "json"], capture_output=True, ) controllers = json.loads(result.stdout) controller_machines = set( - controllers["anvil-controller"]["controller-machines"].keys() + controllers[CONTROLLER]["controller-machines"].keys() ) machines_res = subprocess.run( From b6a388585221133131661a1717d52cb5d061def5 Mon Sep 17 00:00:00 2001 From: Wyatt Rees Date: Tue, 16 Jul 2024 09:53:18 -0600 Subject: [PATCH 07/13] Add ScaleDownJujuStep (WIP), change inheritence to BaseStep, JujuStepHelper --- anvil-python/anvil/commands/juju.py | 28 +++++++++++++++++-- anvil-python/anvil/jobs/manifest.py | 8 +----- anvil-python/anvil/provider/local/commands.py | 4 +-- anvil-python/anvil/utils.py | 7 +++++ 4 files changed, 35 insertions(+), 12 deletions(-) diff --git a/anvil-python/anvil/commands/juju.py b/anvil-python/anvil/commands/juju.py index c8909d4..f7d47f8 100644 --- a/anvil-python/anvil/commands/juju.py +++ b/anvil-python/anvil/commands/juju.py @@ -20,7 +20,8 @@ import subprocess from rich.status import Status -from sunbeam.commands.juju import ScaleJujuStep + +from sunbeam.commands.juju import JujuStepHelper from sunbeam.jobs.common import BaseStep, Result, ResultType from anvil.utils import machines_missing_juju_controllers @@ -63,7 +64,28 @@ def run(self, status: Status | None) -> Result: return Result(ResultType.COMPLETED) -class AnvilScaleJujuStep(ScaleJujuStep): + +class ScaleDownJujuStep(BaseStep, JujuStepHelper): + """Remove Juju controller from the machine""" + + def __init__(self, controller: str): + super().__init__("Scale Down Juju Controller", "Remove Juju Controller from Machine") + + def is_skip(self, status: Status | None = None) -> Result: + return Result(ResultType.COMPLETED) + + +class ScaleUpJujuStep(BaseStep, JujuStepHelper): + """Enable Juju HA.""" + + def __init__( + self, controller: str, n: int = 3, extra_args: list[str] | None = None + ): + super().__init__("Juju HA", "Enable Juju High Availability") + self.controller = controller + self.n = n + self.extra_args = extra_args or [] + def run(self, status: Status | None = None) -> Result: cmd = [ self._get_juju_binary(), @@ -115,5 +137,5 @@ def is_skip(self, status: Status | None = None) -> Result: f"Will enable Juju controller on machines {machines_to_join}" ) return Result(ResultType.COMPLETED) - LOG.debug("Wrong number of machines, skipping scaling Juju") + LOG.debug("Number of machines must be odd and at least 3, skipping scaling Juju controllers") return Result(ResultType.SKIPPED) diff --git a/anvil-python/anvil/jobs/manifest.py b/anvil-python/anvil/jobs/manifest.py index 253b7db..fa2970c 100644 --- a/anvil-python/anvil/jobs/manifest.py +++ b/anvil-python/anvil/jobs/manifest.py @@ -54,14 +54,8 @@ @dataclass class JujuManifest: - # Setting Field alias not supported in pydantic 1.10.0 - # Old version of pydantic is used due to dependencies - # with older version of paramiko from python-libjuju - # Newer version of pydantic can be used once the below - # PR is released - # https://github.com/juju/python-libjuju/pull/1005 bootstrap_args: list[str] = Field( - default=[], description="Extra args for juju bootstrap" + default=[], description="Extra args for juju bootstrap", alias="bootstrap_args" ) diff --git a/anvil-python/anvil/provider/local/commands.py b/anvil-python/anvil/provider/local/commands.py index 829756e..915959c 100644 --- a/anvil-python/anvil/provider/local/commands.py +++ b/anvil-python/anvil/provider/local/commands.py @@ -73,7 +73,7 @@ RemoveHAProxyUnitStep, haproxy_install_steps, ) -from anvil.commands.juju import AnvilScaleJujuStep, JujuAddSSHKeyStep +from anvil.commands.juju import ScaleUpJujuStep, JujuAddSSHKeyStep from anvil.commands.maas_agent import ( RemoveMAASAgentUnitStep, maas_agent_install_steps, @@ -498,7 +498,7 @@ def join( name, ) ) - plan2.append(AnvilScaleJujuStep(controller)) + plan2.append(ScaleUpJujuStep(controller)) run_plan(plan2, console) click.echo(f"Node joined cluster with roles: {pretty_roles}") diff --git a/anvil-python/anvil/utils.py b/anvil-python/anvil/utils.py index 1c20e9c..6f11027 100644 --- a/anvil-python/anvil/utils.py +++ b/anvil-python/anvil/utils.py @@ -17,6 +17,7 @@ import logging import subprocess import sys +from typing import Any import click from sunbeam.plugins.interface.v1.base import PluginError @@ -48,6 +49,12 @@ def __call__(self, *args, **kwargs): # type: ignore[no-untyped-def] LOG.error("Error: %s", e) sys.exit(1) +def get_all_machines() -> dict[str: Any]: + machines_res = subprocess.run( + ["juju", "machines", "--format", "json"], capture_output=True + ) + return json.loads(machines_res.stdout)["machines"] + def machines_missing_juju_controllers() -> list[str]: result = subprocess.run( From 252c2f87dc5907a33a5f570a08fbb9bba338a60f Mon Sep 17 00:00:00 2001 From: Wyatt Rees Date: Tue, 16 Jul 2024 13:42:33 -0600 Subject: [PATCH 08/13] Remove ScaleDownJujuStep as removing controller units is not supported --- anvil-python/anvil/commands/juju.py | 15 +++------------ anvil-python/anvil/jobs/manifest.py | 4 +++- anvil-python/anvil/utils.py | 7 ------- 3 files changed, 6 insertions(+), 20 deletions(-) diff --git a/anvil-python/anvil/commands/juju.py b/anvil-python/anvil/commands/juju.py index f7d47f8..0f3fbad 100644 --- a/anvil-python/anvil/commands/juju.py +++ b/anvil-python/anvil/commands/juju.py @@ -64,17 +64,6 @@ def run(self, status: Status | None) -> Result: return Result(ResultType.COMPLETED) - -class ScaleDownJujuStep(BaseStep, JujuStepHelper): - """Remove Juju controller from the machine""" - - def __init__(self, controller: str): - super().__init__("Scale Down Juju Controller", "Remove Juju Controller from Machine") - - def is_skip(self, status: Status | None = None) -> Result: - return Result(ResultType.COMPLETED) - - class ScaleUpJujuStep(BaseStep, JujuStepHelper): """Enable Juju HA.""" @@ -137,5 +126,7 @@ def is_skip(self, status: Status | None = None) -> Result: f"Will enable Juju controller on machines {machines_to_join}" ) return Result(ResultType.COMPLETED) - LOG.debug("Number of machines must be odd and at least 3, skipping scaling Juju controllers") + LOG.debug( + "Number of machines must be odd and at least 3, skipping scaling Juju controllers" + ) return Result(ResultType.SKIPPED) diff --git a/anvil-python/anvil/jobs/manifest.py b/anvil-python/anvil/jobs/manifest.py index fa2970c..3048074 100644 --- a/anvil-python/anvil/jobs/manifest.py +++ b/anvil-python/anvil/jobs/manifest.py @@ -55,7 +55,9 @@ @dataclass class JujuManifest: bootstrap_args: list[str] = Field( - default=[], description="Extra args for juju bootstrap", alias="bootstrap_args" + default=[], + description="Extra args for juju bootstrap", + alias="bootstrap_args", ) diff --git a/anvil-python/anvil/utils.py b/anvil-python/anvil/utils.py index 6f11027..1c20e9c 100644 --- a/anvil-python/anvil/utils.py +++ b/anvil-python/anvil/utils.py @@ -17,7 +17,6 @@ import logging import subprocess import sys -from typing import Any import click from sunbeam.plugins.interface.v1.base import PluginError @@ -49,12 +48,6 @@ def __call__(self, *args, **kwargs): # type: ignore[no-untyped-def] LOG.error("Error: %s", e) sys.exit(1) -def get_all_machines() -> dict[str: Any]: - machines_res = subprocess.run( - ["juju", "machines", "--format", "json"], capture_output=True - ) - return json.loads(machines_res.stdout)["machines"] - def machines_missing_juju_controllers() -> list[str]: result = subprocess.run( From 80dea1b2d44f3d16dfbc1a8297fd71ef81702386 Mon Sep 17 00:00:00 2001 From: Wyatt Rees Date: Wed, 17 Jul 2024 10:16:40 -0600 Subject: [PATCH 09/13] Put upper limit of 7 on juju controllers, update debug stmt to indicate this --- anvil-python/anvil/commands/juju.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/anvil-python/anvil/commands/juju.py b/anvil-python/anvil/commands/juju.py index 0f3fbad..b19e057 100644 --- a/anvil-python/anvil/commands/juju.py +++ b/anvil-python/anvil/commands/juju.py @@ -118,7 +118,7 @@ def is_skip(self, status: Status | None = None) -> Result: ) machines = json.loads(machines_res.stdout)["machines"] n_machines = len(machines) - if n_machines > 2 and n_machines % 2 == 1: + if n_machines > 2 and n_machines <= 7 and n_machines % 2 == 1: machines_to_join = machines_missing_juju_controllers() self.n = n_machines self.extra_args.extend(("--to", ",".join(machines_to_join))) @@ -127,6 +127,7 @@ def is_skip(self, status: Status | None = None) -> Result: ) return Result(ResultType.COMPLETED) LOG.debug( - "Number of machines must be odd and at least 3, skipping scaling Juju controllers" + "Number of machines must be odd and between 3 and 7 (inclusive), " + "skipping scaling Juju controllers" ) return Result(ResultType.SKIPPED) From a348fbdcc65620586963d39fde38def2b0eadf99 Mon Sep 17 00:00:00 2001 From: Wyatt Rees Date: Fri, 19 Jul 2024 13:49:34 -0600 Subject: [PATCH 10/13] Set limit of juju controllers to 3. Add controllers if machines with controllers are removed --- anvil-python/anvil/commands/juju.py | 34 +++++++++++++++---- anvil-python/anvil/provider/local/commands.py | 3 +- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/anvil-python/anvil/commands/juju.py b/anvil-python/anvil/commands/juju.py index b19e057..87686c6 100644 --- a/anvil-python/anvil/commands/juju.py +++ b/anvil-python/anvil/commands/juju.py @@ -27,6 +27,7 @@ from anvil.utils import machines_missing_juju_controllers LOG = logging.getLogger(__name__) +MAX_JUJU_CONTROLLERS = 3 class JujuAddSSHKeyStep(BaseStep): @@ -68,10 +69,15 @@ class ScaleUpJujuStep(BaseStep, JujuStepHelper): """Enable Juju HA.""" def __init__( - self, controller: str, n: int = 3, extra_args: list[str] | None = None + self, + controller: str, + joining: bool, + n: int = MAX_JUJU_CONTROLLERS, + extra_args: list[str] | None = None, ): super().__init__("Juju HA", "Enable Juju High Availability") self.controller = controller + self.joining = joining self.n = n self.extra_args = extra_args or [] @@ -118,16 +124,32 @@ def is_skip(self, status: Status | None = None) -> Result: ) machines = json.loads(machines_res.stdout)["machines"] n_machines = len(machines) - if n_machines > 2 and n_machines <= 7 and n_machines % 2 == 1: - machines_to_join = machines_missing_juju_controllers() - self.n = n_machines + machines_to_join = machines_missing_juju_controllers() + n_machines_no_controller = len(machines_to_join) + n_controller_machines = n_machines - n_machines_no_controller + if ( + self.joining + and n_controller_machines < MAX_JUJU_CONTROLLERS + and n_machines == 3 + ): self.extra_args.extend(("--to", ",".join(machines_to_join))) LOG.debug( f"Will enable Juju controller on machines {machines_to_join}" ) return Result(ResultType.COMPLETED) + elif ( + not self.joining + and n_controller_machines < MAX_JUJU_CONTROLLERS + and n_machines >= MAX_JUJU_CONTROLLERS + ): + # a controller machine has been removed, need to pick a new one + machines_to_join = machines_to_join[ + : (MAX_JUJU_CONTROLLERS - n_controller_machines) + ] + self.extra_args.extend(("--to", ",".join(machines_to_join))) + return Result(ResultType.COMPLETED) LOG.debug( - "Number of machines must be odd and between 3 and 7 (inclusive), " - "skipping scaling Juju controllers" + "Number of machines with controllers must not be greater than " + f"{MAX_JUJU_CONTROLLERS}, skipping scaling Juju controllers" ) return Result(ResultType.SKIPPED) diff --git a/anvil-python/anvil/provider/local/commands.py b/anvil-python/anvil/provider/local/commands.py index 915959c..368a349 100644 --- a/anvil-python/anvil/provider/local/commands.py +++ b/anvil-python/anvil/provider/local/commands.py @@ -498,7 +498,7 @@ def join( name, ) ) - plan2.append(ScaleUpJujuStep(controller)) + plan2.append(ScaleUpJujuStep(controller, True)) run_plan(plan2, console) click.echo(f"Node joined cluster with roles: {pretty_roles}") @@ -590,6 +590,7 @@ def remove(ctx: click.Context, name: str) -> None: # Cannot remove user as the same user name cannot be reused, # so commenting the RemoveJujuUserStep # RemoveJujuUserStep(name), + ScaleUpJujuStep(CONTROLLER, False), ClusterRemoveNodeStep(client, name), ] run_plan(plan, console) From 761e7c4ae2ef72e3da59b3d481d92adaa2c1422c Mon Sep 17 00:00:00 2001 From: Wyatt Rees Date: Mon, 22 Jul 2024 09:37:04 -0600 Subject: [PATCH 11/13] Use sunbeam for determining number of machines need controllers --- anvil-python/anvil/commands/juju.py | 91 +++++++++---------- anvil-python/anvil/provider/local/commands.py | 6 +- anvil-python/anvil/utils.py | 21 ----- 3 files changed, 48 insertions(+), 70 deletions(-) diff --git a/anvil-python/anvil/commands/juju.py b/anvil-python/anvil/commands/juju.py index 87686c6..a14e9d0 100644 --- a/anvil-python/anvil/commands/juju.py +++ b/anvil-python/anvil/commands/juju.py @@ -13,18 +13,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json import logging from os import environ import os.path +import random import subprocess from rich.status import Status - from sunbeam.commands.juju import JujuStepHelper from sunbeam.jobs.common import BaseStep, Result, ResultType +from sunbeam.jobs.juju import JujuHelper, run_sync -from anvil.utils import machines_missing_juju_controllers +from anvil.jobs.juju import CONTROLLER LOG = logging.getLogger(__name__) MAX_JUJU_CONTROLLERS = 3 @@ -65,29 +65,43 @@ def run(self, status: Status | None) -> Result: return Result(ResultType.COMPLETED) -class ScaleUpJujuStep(BaseStep, JujuStepHelper): +class ScaleJujuStep(BaseStep, JujuStepHelper): """Enable Juju HA.""" def __init__( self, - controller: str, - joining: bool, - n: int = MAX_JUJU_CONTROLLERS, - extra_args: list[str] | None = None, + jhelper: JujuHelper, + model: str, ): super().__init__("Juju HA", "Enable Juju High Availability") - self.controller = controller - self.joining = joining - self.n = n - self.extra_args = extra_args or [] + + self.jhelper = jhelper + self.model = model + + self.controller_machines = self.get_controller(CONTROLLER)[ + "controller-machines" + ].keys() + self.machines = run_sync(self.jhelper.get_machines(self.model)).keys() def run(self, status: Status | None = None) -> Result: + """Run the step to completion.""" + + available_machines = list(self.machines ^ self.controller_machines) + n_machines_to_join = min( + len(available_machines), + MAX_JUJU_CONTROLLERS - len(self.controller_machines), + ) + cmd = [ self._get_juju_binary(), "enable-ha", "-n", - str(self.n), - *self.extra_args, + str(len(self.controller_machines) + n_machines_to_join), + "--to", + ",".join( + str(s) + for s in random.sample(available_machines, n_machines_to_join) + ), ] LOG.debug(f'Running command {" ".join(cmd)}') process = subprocess.run( @@ -119,37 +133,22 @@ def run(self, status: Status | None = None) -> Result: def is_skip(self, status: Status | None = None) -> Result: """Determines if the step should be skipped or not.""" - machines_res = subprocess.run( - ["juju", "machines", "--format", "json"], capture_output=True - ) - machines = json.loads(machines_res.stdout)["machines"] - n_machines = len(machines) - machines_to_join = machines_missing_juju_controllers() - n_machines_no_controller = len(machines_to_join) - n_controller_machines = n_machines - n_machines_no_controller - if ( - self.joining - and n_controller_machines < MAX_JUJU_CONTROLLERS - and n_machines == 3 - ): - self.extra_args.extend(("--to", ",".join(machines_to_join))) + + available_machines = self.machines ^ self.controller_machines + + if len(self.controller_machines) == MAX_JUJU_CONTROLLERS: LOG.debug( - f"Will enable Juju controller on machines {machines_to_join}" + "Number of machines with controllers must not be greater than " + f"{MAX_JUJU_CONTROLLERS}, skipping scaling Juju controllers" ) - return Result(ResultType.COMPLETED) - elif ( - not self.joining - and n_controller_machines < MAX_JUJU_CONTROLLERS - and n_machines >= MAX_JUJU_CONTROLLERS - ): - # a controller machine has been removed, need to pick a new one - machines_to_join = machines_to_join[ - : (MAX_JUJU_CONTROLLERS - n_controller_machines) - ] - self.extra_args.extend(("--to", ",".join(machines_to_join))) - return Result(ResultType.COMPLETED) - LOG.debug( - "Number of machines with controllers must not be greater than " - f"{MAX_JUJU_CONTROLLERS}, skipping scaling Juju controllers" - ) - return Result(ResultType.SKIPPED) + return Result(ResultType.SKIPPED) + if len(available_machines) == 0: + LOG.debug( + "No available machines, skipping scaling Juju controllers" + ) + return Result(ResultType.SKIPPED) + if len(self.machines) < 3: + LOG.debug("Number of machines must be at least 3") + return Result(ResultType.SKIPPED) + + return Result(ResultType.COMPLETED) diff --git a/anvil-python/anvil/provider/local/commands.py b/anvil-python/anvil/provider/local/commands.py index 368a349..961947c 100644 --- a/anvil-python/anvil/provider/local/commands.py +++ b/anvil-python/anvil/provider/local/commands.py @@ -73,7 +73,7 @@ RemoveHAProxyUnitStep, haproxy_install_steps, ) -from anvil.commands.juju import ScaleUpJujuStep, JujuAddSSHKeyStep +from anvil.commands.juju import JujuAddSSHKeyStep, ScaleJujuStep from anvil.commands.maas_agent import ( RemoveMAASAgentUnitStep, maas_agent_install_steps, @@ -498,7 +498,7 @@ def join( name, ) ) - plan2.append(ScaleUpJujuStep(controller, True)) + plan2.append(ScaleJujuStep(jhelper, deployment.infrastructure_model)) run_plan(plan2, console) click.echo(f"Node joined cluster with roles: {pretty_roles}") @@ -590,7 +590,7 @@ def remove(ctx: click.Context, name: str) -> None: # Cannot remove user as the same user name cannot be reused, # so commenting the RemoveJujuUserStep # RemoveJujuUserStep(name), - ScaleUpJujuStep(CONTROLLER, False), + ScaleJujuStep(jhelper, deployment.infrastructure_model), ClusterRemoveNodeStep(client, name), ] run_plan(plan, console) diff --git a/anvil-python/anvil/utils.py b/anvil-python/anvil/utils.py index 1c20e9c..ce35682 100644 --- a/anvil-python/anvil/utils.py +++ b/anvil-python/anvil/utils.py @@ -13,16 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json import logging -import subprocess import sys import click from sunbeam.plugins.interface.v1.base import PluginError -from anvil.jobs.juju import CONTROLLER - LOG = logging.getLogger(__name__) LOCAL_ACCESS = "local" REMOTE_ACCESS = "remote" @@ -47,20 +43,3 @@ def __call__(self, *args, **kwargs): # type: ignore[no-untyped-def] LOG.warn(message) LOG.error("Error: %s", e) sys.exit(1) - - -def machines_missing_juju_controllers() -> list[str]: - result = subprocess.run( - ["juju", "show-controller", CONTROLLER, "--format", "json"], - capture_output=True, - ) - controllers = json.loads(result.stdout) - controller_machines = set( - controllers[CONTROLLER]["controller-machines"].keys() - ) - - machines_res = subprocess.run( - ["juju", "machines", "--format", "json"], capture_output=True - ) - machines = set(json.loads(machines_res.stdout)["machines"].keys()) - return list(machines.difference(controller_machines)) From 810e964738e4f0dd34c73459d69d36cafc8a1519 Mon Sep 17 00:00:00 2001 From: Wyatt Rees Date: Mon, 22 Jul 2024 14:32:54 -0600 Subject: [PATCH 12/13] Wait to get machines list and controller machines list until ScaleJujuStep is about to run --- anvil-python/anvil/commands/juju.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/anvil-python/anvil/commands/juju.py b/anvil-python/anvil/commands/juju.py index a14e9d0..aab5bba 100644 --- a/anvil-python/anvil/commands/juju.py +++ b/anvil-python/anvil/commands/juju.py @@ -78,10 +78,8 @@ def __init__( self.jhelper = jhelper self.model = model - self.controller_machines = self.get_controller(CONTROLLER)[ - "controller-machines" - ].keys() - self.machines = run_sync(self.jhelper.get_machines(self.model)).keys() + self.controller_machines = None + self.machines = None def run(self, status: Status | None = None) -> Result: """Run the step to completion.""" @@ -134,6 +132,10 @@ def run(self, status: Status | None = None) -> Result: def is_skip(self, status: Status | None = None) -> Result: """Determines if the step should be skipped or not.""" + self.controller_machines = self.get_controller(CONTROLLER)[ + "controller-machines" + ].keys() + self.machines = run_sync(self.jhelper.get_machines(self.model)).keys() available_machines = self.machines ^ self.controller_machines if len(self.controller_machines) == MAX_JUJU_CONTROLLERS: @@ -151,4 +153,5 @@ def is_skip(self, status: Status | None = None) -> Result: LOG.debug("Number of machines must be at least 3") return Result(ResultType.SKIPPED) + return Result(ResultType.COMPLETED) From 647c17f3d4b47c8e11985e0776a329c89b713990 Mon Sep 17 00:00:00 2001 From: Wyatt Rees Date: Tue, 23 Jul 2024 08:32:47 -0600 Subject: [PATCH 13/13] lint --- anvil-python/anvil/commands/juju.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/anvil-python/anvil/commands/juju.py b/anvil-python/anvil/commands/juju.py index aab5bba..6242b04 100644 --- a/anvil-python/anvil/commands/juju.py +++ b/anvil-python/anvil/commands/juju.py @@ -78,8 +78,8 @@ def __init__( self.jhelper = jhelper self.model = model - self.controller_machines = None - self.machines = None + self.controller_machines: set[str] = set() + self.machines: set[str] = set() def run(self, status: Status | None = None) -> Result: """Run the step to completion.""" @@ -132,10 +132,12 @@ def run(self, status: Status | None = None) -> Result: def is_skip(self, status: Status | None = None) -> Result: """Determines if the step should be skipped or not.""" - self.controller_machines = self.get_controller(CONTROLLER)[ - "controller-machines" - ].keys() - self.machines = run_sync(self.jhelper.get_machines(self.model)).keys() + self.controller_machines = set( + self.get_controller(CONTROLLER)["controller-machines"].keys() + ) + self.machines = set( + run_sync(self.jhelper.get_machines(self.model)).keys() + ) available_machines = self.machines ^ self.controller_machines if len(self.controller_machines) == MAX_JUJU_CONTROLLERS: @@ -153,5 +155,4 @@ def is_skip(self, status: Status | None = None) -> Result: LOG.debug("Number of machines must be at least 3") return Result(ResultType.SKIPPED) - return Result(ResultType.COMPLETED)