diff --git a/README.md b/README.md index 7a7cf4e..f8e55e4 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,9 @@ ubuntu@infra3:~$ maas-anvil cluster join \ --token eyJuYW1lIjoibWFhcy00Lm1hYXMiLCJzZWNyZXQiOiI3MmE512342abcdEASWWxOWNlYWNkYmJjMWRmMjk4OThkYWFkYzQzMDAzZjk4NmRkZDI2MWRhYWVkZTIxIiwiZmluZ2VycHJpbnQiOiJlODU5ZmY5NjAwMDU4OGFjZmQ5ZDM0NjFhMDk5NmU1YTU3YjhjN2Q2ZjE4M2NjZDRlOTg2NGRkZjQ3NWMwZWM1Iiwiam9pbl9hZGRyZXNzZXMiOlsiMTAuMjAuMC43OjcwMDAiLCIxMC4yMC4wLjg6NzAwMCJdfQ== ``` +#### Juju Controller HA +Starting with the third machine joining the cluster, `maas-anvil` will automatically start adding Juju controllers to machines that are missing them. This will occur at every other join (When machines 3, 5, 7, and so on join) + ### Confirm the cluster status ```bash diff --git a/anvil-python/anvil/commands/juju.py b/anvil-python/anvil/commands/juju.py index 3fbaa3b..6242b04 100644 --- a/anvil-python/anvil/commands/juju.py +++ b/anvil-python/anvil/commands/juju.py @@ -13,12 +13,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging from os import environ import os.path +import random import subprocess from rich.status import Status +from sunbeam.commands.juju import JujuStepHelper from sunbeam.jobs.common import BaseStep, Result, ResultType +from sunbeam.jobs.juju import JujuHelper, run_sync + +from anvil.jobs.juju import CONTROLLER + +LOG = logging.getLogger(__name__) +MAX_JUJU_CONTROLLERS = 3 class JujuAddSSHKeyStep(BaseStep): @@ -54,3 +63,96 @@ def run(self, status: Status | None) -> Result: message="Could not find public ssh key (~/.ssh/id_rsa.pub)", ) return Result(ResultType.COMPLETED) + + +class ScaleJujuStep(BaseStep, JujuStepHelper): + """Enable Juju HA.""" + + def __init__( + self, + jhelper: JujuHelper, + model: str, + ): + super().__init__("Juju HA", "Enable Juju High Availability") + + self.jhelper = jhelper + self.model = model + + self.controller_machines: set[str] = set() + self.machines: set[str] = set() + + def run(self, status: Status | None = None) -> Result: + """Run the step to completion.""" + + available_machines = list(self.machines ^ self.controller_machines) + n_machines_to_join = min( + len(available_machines), + MAX_JUJU_CONTROLLERS - len(self.controller_machines), + ) + + cmd = [ + self._get_juju_binary(), + "enable-ha", + "-n", + str(len(self.controller_machines) + n_machines_to_join), + "--to", + ",".join( + str(s) + for s in random.sample(available_machines, n_machines_to_join) + ), + ] + LOG.debug(f'Running command {" ".join(cmd)}') + process = subprocess.run( + cmd, capture_output=True, text=True, check=True + ) + LOG.debug( + f"Command finished. stdout={process.stdout}, stderr={process.stderr}" + ) + cmd = [ + self._get_juju_binary(), + "wait-for", + "application", + "-m", + "admin/controller", + "controller", + "--timeout", + "15m", + ] + self.update_status(status, "scaling controller") + LOG.debug("Waiting for HA to be enabled") + LOG.debug(f'Running command {" ".join(cmd)}') + process = subprocess.run( + cmd, capture_output=True, text=True, check=True + ) + LOG.debug( + f"Command finished. stdout={process.stdout}, stderr={process.stderr}" + ) + return Result(ResultType.COMPLETED) + + def is_skip(self, status: Status | None = None) -> Result: + """Determines if the step should be skipped or not.""" + + self.controller_machines = set( + self.get_controller(CONTROLLER)["controller-machines"].keys() + ) + self.machines = set( + run_sync(self.jhelper.get_machines(self.model)).keys() + ) + available_machines = self.machines ^ self.controller_machines + + if len(self.controller_machines) == MAX_JUJU_CONTROLLERS: + LOG.debug( + "Number of machines with controllers must not be greater than " + f"{MAX_JUJU_CONTROLLERS}, skipping scaling Juju controllers" + ) + return Result(ResultType.SKIPPED) + if len(available_machines) == 0: + LOG.debug( + "No available machines, skipping scaling Juju controllers" + ) + return Result(ResultType.SKIPPED) + if len(self.machines) < 3: + LOG.debug("Number of machines must be at least 3") + return Result(ResultType.SKIPPED) + + return Result(ResultType.COMPLETED) diff --git a/anvil-python/anvil/jobs/manifest.py b/anvil-python/anvil/jobs/manifest.py index 269a5e6..3048074 100644 --- a/anvil-python/anvil/jobs/manifest.py +++ b/anvil-python/anvil/jobs/manifest.py @@ -20,6 +20,7 @@ import shutil from typing import Any, Dict, List +from pydantic import Field from pydantic.dataclasses import dataclass from snaphelpers import Snap from sunbeam import utils @@ -36,7 +37,6 @@ from sunbeam.jobs.deployment import Deployment from sunbeam.jobs.manifest import ( CharmsManifest, - JujuManifest, MissingTerraformInfoException, TerraformManifest, ) @@ -52,6 +52,15 @@ LOG = logging.getLogger(__name__) +@dataclass +class JujuManifest: + bootstrap_args: list[str] = Field( + default=[], + description="Extra args for juju bootstrap", + alias="bootstrap_args", + ) + + @dataclass(config=dict(extra="allow")) # type: ignore[call-overload] class SoftwareConfig: deployment: InitVar[Deployment] diff --git a/anvil-python/anvil/provider/local/commands.py b/anvil-python/anvil/provider/local/commands.py index 53997b3..961947c 100644 --- a/anvil-python/anvil/provider/local/commands.py +++ b/anvil-python/anvil/provider/local/commands.py @@ -12,7 +12,6 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. - import logging from pathlib import Path from typing import List @@ -74,7 +73,7 @@ RemoveHAProxyUnitStep, haproxy_install_steps, ) -from anvil.commands.juju import JujuAddSSHKeyStep +from anvil.commands.juju import JujuAddSSHKeyStep, ScaleJujuStep from anvil.commands.maas_agent import ( RemoveMAASAgentUnitStep, maas_agent_install_steps, @@ -499,7 +498,7 @@ def join( name, ) ) - + plan2.append(ScaleJujuStep(jhelper, deployment.infrastructure_model)) run_plan(plan2, console) click.echo(f"Node joined cluster with roles: {pretty_roles}") @@ -591,6 +590,7 @@ def remove(ctx: click.Context, name: str) -> None: # Cannot remove user as the same user name cannot be reused, # so commenting the RemoveJujuUserStep # RemoveJujuUserStep(name), + ScaleJujuStep(jhelper, deployment.infrastructure_model), ClusterRemoveNodeStep(client, name), ] run_plan(plan, console)