From 5fdb8fcb03fc040b995c6d4da7c4903db41bfd08 Mon Sep 17 00:00:00 2001 From: Stamatis Katsaounis Date: Fri, 5 Jul 2024 14:38:35 +0300 Subject: [PATCH] bug: configure maximum db connections --- README.md | 7 + anvil-python/anvil/commands/maas_agent.py | 13 +- anvil-python/anvil/commands/maas_region.py | 13 +- anvil-python/anvil/commands/postgresql.py | 162 +++++++++++++++--- anvil-python/anvil/provider/local/commands.py | 57 +++++- .../anvil/provider/local/deployment.py | 18 ++ anvil-python/anvil/versions.py | 2 +- cloud/etc/deploy-maas-region/main.tf | 4 +- cloud/etc/deploy-maas-region/variables.tf | 8 +- cloud/etc/deploy-postgresql/main.tf | 12 +- cloud/etc/deploy-postgresql/variables.tf | 18 ++ 11 files changed, 263 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index fa907c2..218f070 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,13 @@ ubuntu@infra1:~$ maas-anvil cluster bootstrap \ Note: You will be asked for a `virtual_ip` during installation of the HAProxy charm, if `accept-defaults` is omitted. Pass an empty value to disable it, or any valid IP to enable; the Keepalived charm will be installed to enable connecting to HA MAAS using the VIP. +#### PostgreSQL max_connections + +You will be asked for a `max_connections` during installation of the PostgreSQL charm, if `accept-defaults` is omitted. Use `default` if you need the default values of PostgreSQL to be applied to [max_connections](https://www.postgresql.org/docs/14/runtime-config-connection.html). If you are aiming for MAAS HA though you have to do one of the following: + +- If number of MAAS region nodes is known beforehand, you can calculate the desired max_connections and set them, based on the formula: `max_connections = max(100, 10 + 50 * number_of_region_nodes)`. +- If number of MAAS region nodes is not known, you can set `max_connections` to `dynamic` and let MAAS Anvil recalculate the appropriate PostgreSQL `max_connections` every time a region node is joining or leaving the Anvil cluster. **This options includes a database restart with every modification.** + ### Add new nodes to the MAAS cluster ```bash diff --git a/anvil-python/anvil/commands/maas_agent.py b/anvil-python/anvil/commands/maas_agent.py index 6b2b632..0b2934d 100644 --- a/anvil-python/anvil/commands/maas_agent.py +++ b/anvil-python/anvil/commands/maas_agent.py @@ -17,8 +17,8 @@ from sunbeam.clusterd.client import Client from sunbeam.commands.terraform import TerraformInitStep +from sunbeam.jobs.common import BaseStep from sunbeam.jobs.juju import JujuHelper -from sunbeam.jobs.manifest import BaseStep from sunbeam.jobs.steps import ( AddMachineUnitsStep, DeployMachineApplicationStep, @@ -26,7 +26,6 @@ ) from anvil.jobs.manifest import Manifest -from anvil.provider.local.deployment import LocalDeployment APPLICATION = "maas-agent" CONFIG_KEY = "TerraformVarsMaasagentPlan" @@ -116,15 +115,11 @@ def maas_agent_install_steps( client: Client, manifest: Manifest, jhelper: JujuHelper, - deployment: LocalDeployment, + model: str, fqdn: str, ) -> List[BaseStep]: return [ TerraformInitStep(manifest.get_tfhelper("maas-agent-plan")), - DeployMAASAgentApplicationStep( - client, manifest, jhelper, deployment.infrastructure_model - ), - AddMAASAgentUnitsStep( - client, fqdn, jhelper, deployment.infrastructure_model - ), + DeployMAASAgentApplicationStep(client, manifest, jhelper, model), + AddMAASAgentUnitsStep(client, fqdn, jhelper, model), ] diff --git a/anvil-python/anvil/commands/maas_region.py b/anvil-python/anvil/commands/maas_region.py index e955f33..cc2e9ad 100644 --- a/anvil-python/anvil/commands/maas_region.py +++ b/anvil-python/anvil/commands/maas_region.py @@ -17,8 +17,8 @@ from sunbeam.clusterd.client import Client from sunbeam.commands.terraform import TerraformInitStep +from sunbeam.jobs.common import BaseStep from sunbeam.jobs.juju import JujuHelper -from sunbeam.jobs.manifest import BaseStep from sunbeam.jobs.steps import ( AddMachineUnitsStep, DeployMachineApplicationStep, @@ -26,7 +26,6 @@ ) from anvil.jobs.manifest import Manifest -from anvil.provider.local.deployment import LocalDeployment APPLICATION = "maas-region" CONFIG_KEY = "TerraformVarsMaasregionPlan" @@ -124,15 +123,11 @@ def maas_region_install_steps( client: Client, manifest: Manifest, jhelper: JujuHelper, - deployment: LocalDeployment, + model: str, fqdn: str, ) -> List[BaseStep]: return [ TerraformInitStep(manifest.get_tfhelper("maas-region-plan")), - DeployMAASRegionApplicationStep( - client, manifest, jhelper, deployment.infrastructure_model - ), - AddMAASRegionUnitsStep( - client, fqdn, jhelper, deployment.infrastructure_model - ), + DeployMAASRegionApplicationStep(client, manifest, jhelper, model), + AddMAASRegionUnitsStep(client, fqdn, jhelper, model), ] diff --git a/anvil-python/anvil/commands/postgresql.py b/anvil-python/anvil/commands/postgresql.py index 5a3809f..d447835 100644 --- a/anvil-python/anvil/commands/postgresql.py +++ b/anvil-python/anvil/commands/postgresql.py @@ -13,12 +13,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import List +import logging +from typing import Any, List +from rich.status import Status from sunbeam.clusterd.client import Client from sunbeam.commands.terraform import TerraformInitStep +from sunbeam.jobs import questions +from sunbeam.jobs.common import BaseStep, Result, ResultType from sunbeam.jobs.juju import JujuHelper -from sunbeam.jobs.manifest import BaseStep from sunbeam.jobs.steps import ( AddMachineUnitsStep, DeployMachineApplicationStep, @@ -26,10 +29,11 @@ ) from anvil.jobs.manifest import Manifest -from anvil.provider.local.deployment import LocalDeployment +LOG = logging.getLogger(__name__) APPLICATION = "postgresql" CONFIG_KEY = "TerraformVarsPostgresqlPlan" +POSTGRESQL_CONFIG_KEY = "TerraformVarsPostgresql" POSTGRESQL_APP_TIMEOUT = ( 180 # 3 minutes, managing the application should be fast ) @@ -38,15 +42,66 @@ ) +def postgresql_install_steps( + client: Client, + manifest: Manifest, + jhelper: JujuHelper, + model: str, + fqdn: str, + accept_defaults: bool, + preseed: dict[Any, Any], +) -> List[BaseStep]: + return [ + TerraformInitStep(manifest.get_tfhelper("postgresql-plan")), + DeployPostgreSQLApplicationStep( + client, + manifest, + jhelper, + model, + accept_defaults=accept_defaults, + deployment_preseed=preseed, + ), + AddPostgreSQLUnitsStep(client, fqdn, jhelper, model), + ] + + +def postgresql_questions() -> dict[str, questions.PromptQuestion]: + return { + "max_connections": questions.PromptQuestion( + "Maximum number of concurrent connections to allow to the database server", + default_value="default", + validation_function=validate_max_connections, + ), + } + + +def validate_max_connections(value: str) -> str | ValueError: + if value in ["default", "dynamic"]: + return value + try: + if 100 <= int(value) <= 500: + return value + else: + raise ValueError + except ValueError: + raise ValueError( + "Please provide either a number between 100 and 500 or 'default' for system default or 'dynamic' for calculating max_connections relevant to maas regions" + ) + + class DeployPostgreSQLApplicationStep(DeployMachineApplicationStep): """Deploy PostgreSQL application using Terraform""" + _CONFIG = POSTGRESQL_CONFIG_KEY + def __init__( self, client: Client, manifest: Manifest, jhelper: JujuHelper, model: str, + deployment_preseed: dict[Any, Any] | None = None, + accept_defaults: bool = False, refresh: bool = False, ): super().__init__( @@ -62,9 +117,92 @@ def __init__( refresh, ) + self.preseed = deployment_preseed or {} + self.accept_defaults = accept_defaults + + def get_application_timeout(self) -> int: + return POSTGRESQL_APP_TIMEOUT + + def prompt(self, console: questions.Console | None = None) -> None: + variables = questions.load_answers(self.client, self._CONFIG) + variables.setdefault("max_connections", "default") + + # Set defaults + self.preseed.setdefault("max_connections", "default") + + postgresql_config_bank = questions.QuestionBank( + questions=postgresql_questions(), + console=console, + preseed=self.preseed.get("postgres"), + previous_answers=variables, + accept_defaults=self.accept_defaults, + ) + max_connections = postgresql_config_bank.max_connections.ask() + variables["max_connections"] = max_connections + + LOG.debug(variables) + questions.write_answers(self.client, self._CONFIG, variables) + + def extra_tfvars(self) -> dict[str, Any]: + variables: dict[str, Any] = questions.load_answers( + self.client, self._CONFIG + ) + variables["maas_region_nodes"] = len( + self.client.cluster.list_nodes_by_role("region") + ) + return variables + + def has_prompts(self) -> bool: + return True + + +class ReapplyPostgreSQLTerraformPlanStep(DeployMachineApplicationStep): + """Reapply PostgreSQL Terraform plan""" + + _CONFIG = POSTGRESQL_CONFIG_KEY + + def __init__( + self, + client: Client, + manifest: Manifest, + jhelper: JujuHelper, + model: str, + ): + super().__init__( + client, + manifest, + jhelper, + CONFIG_KEY, + APPLICATION, + model, + "postgresql-plan", + "Reapply PostgreSQL Terraform plan", + "Reapplying PostgreSQL Terraform plan", + True, + ) + def get_application_timeout(self) -> int: return POSTGRESQL_APP_TIMEOUT + def extra_tfvars(self) -> dict[str, Any]: + variables: dict[str, Any] = questions.load_answers( + self.client, self._CONFIG + ) + variables["maas_region_nodes"] = len( + self.client.cluster.list_nodes_by_role("region") + ) + return variables + + def is_skip(self, status: Status | None = None) -> Result: + variables: dict[str, Any] = questions.load_answers( + self.client, self._CONFIG + ) + variables.setdefault("max_connections", "default") + if variables["max_connections"] != "dynamic": + return Result(ResultType.SKIPPED) + else: + return super().is_skip(status) + class AddPostgreSQLUnitsStep(AddMachineUnitsStep): """Add PostgreSQL Unit.""" @@ -110,21 +248,3 @@ def __init__( def get_unit_timeout(self) -> int: return POSTGRESQL_UNIT_TIMEOUT - - -def postgresql_install_steps( - client: Client, - manifest: Manifest, - jhelper: JujuHelper, - deployment: LocalDeployment, - fqdn: str, -) -> List[BaseStep]: - return [ - TerraformInitStep(manifest.get_tfhelper("postgresql-plan")), - DeployPostgreSQLApplicationStep( - client, manifest, jhelper, deployment.infrastructure_model - ), - AddPostgreSQLUnitsStep( - client, fqdn, jhelper, deployment.infrastructure_model - ), - ] diff --git a/anvil-python/anvil/provider/local/commands.py b/anvil-python/anvil/provider/local/commands.py index dc07e2d..0de28c0 100644 --- a/anvil-python/anvil/provider/local/commands.py +++ b/anvil-python/anvil/provider/local/commands.py @@ -83,6 +83,7 @@ maas_region_install_steps, ) from anvil.commands.postgresql import ( + ReapplyPostgreSQLTerraformPlanStep, RemovePostgreSQLUnitStep, postgresql_install_steps, ) @@ -264,7 +265,13 @@ def bootstrap( jhelper = JujuHelper(deployment.get_connected_controller()) plan4 = postgresql_install_steps( - client, manifest_obj, jhelper, deployment, fqdn + client, + manifest_obj, + jhelper, + deployment.infrastructure_model, + fqdn, + accept_defaults, + preseed, ) if is_haproxy_node: plan4.extend( @@ -281,13 +288,21 @@ def bootstrap( if is_region_node: plan4.extend( maas_region_install_steps( - client, manifest_obj, jhelper, deployment, fqdn + client, + manifest_obj, + jhelper, + deployment.infrastructure_model, + fqdn, ) ) if is_agent_node: plan4.extend( maas_agent_install_steps( - client, manifest_obj, jhelper, deployment, fqdn + client, + manifest_obj, + jhelper, + deployment.infrastructure_model, + fqdn, ) ) run_plan(plan4, console) @@ -354,7 +369,9 @@ def _print_output(token: str) -> None: @click.command() -@click.option("-a", "--accept-defaults", help="Accept all defaults.", is_flag=True) +@click.option( + "-a", "--accept-defaults", help="Accept all defaults.", is_flag=True +) @click.option("--token", type=str, help="Join token") @click.option( "--role", @@ -434,7 +451,13 @@ def join( if is_database_node: plan2.extend( postgresql_install_steps( - client, manifest_obj, jhelper, deployment, name + client, + manifest_obj, + jhelper, + deployment.infrastructure_model, + name, + accept_defaults, + preseed, ) ) if is_haproxy_node: @@ -452,13 +475,26 @@ def join( if is_region_node: plan2.extend( maas_region_install_steps( - client, manifest_obj, jhelper, deployment, name + client, + manifest_obj, + jhelper, + deployment.infrastructure_model, + name, + ) + ) + plan2.append( + ReapplyPostgreSQLTerraformPlanStep( + client, manifest_obj, jhelper, deployment.infrastructure_model ) ) if is_agent_node: plan2.extend( maas_agent_install_steps( - client, manifest_obj, jhelper, deployment, name + client, + manifest_obj, + jhelper, + deployment.infrastructure_model, + name, ) ) @@ -528,6 +564,10 @@ def remove(ctx: click.Context, name: str) -> None: preflight_checks = [DaemonGroupCheck()] run_preflight_checks(preflight_checks, console) + manifest_obj = Manifest.load_latest_from_clusterdb( + deployment, include_defaults=True + ) + plan = [ JujuLoginStep(deployment.juju_account), RemovePostgreSQLUnitStep( @@ -539,6 +579,9 @@ def remove(ctx: click.Context, name: str) -> None: RemoveMAASRegionUnitStep( client, name, jhelper, deployment.infrastructure_model ), + ReapplyPostgreSQLTerraformPlanStep( + client, manifest_obj, jhelper, deployment.infrastructure_model + ), RemoveMAASAgentUnitStep( client, name, jhelper, deployment.infrastructure_model ), diff --git a/anvil-python/anvil/provider/local/deployment.py b/anvil-python/anvil/provider/local/deployment.py index 48028d1..9877c5e 100644 --- a/anvil-python/anvil/provider/local/deployment.py +++ b/anvil-python/anvil/provider/local/deployment.py @@ -27,6 +27,10 @@ ) from anvil.commands.haproxy import KEEPALIVED_CONFIG_KEY, keepalived_questions +from anvil.commands.postgresql import ( + POSTGRESQL_CONFIG_KEY, + postgresql_questions, +) LOG = logging.getLogger(__name__) LOCAL_TYPE = "local" @@ -53,6 +57,20 @@ def generate_preseed(self, console: Console) -> str: show_questions(bootstrap_bank, section="bootstrap") ) + # PostgreSQL questions + try: + variables = load_answers(client, POSTGRESQL_CONFIG_KEY) + except ClusterServiceUnavailableException: + variables = {} + postgresql_config_bank = QuestionBank( + questions=postgresql_questions(), + console=console, + previous_answers=variables, + ) + preseed_content.extend( + show_questions(postgresql_config_bank, section="postgres") + ) + # HAProxy questions try: variables = load_answers(client, KEEPALIVED_CONFIG_KEY) diff --git a/anvil-python/anvil/versions.py b/anvil-python/anvil/versions.py index de991d9..9688710 100644 --- a/anvil-python/anvil/versions.py +++ b/anvil-python/anvil/versions.py @@ -16,7 +16,7 @@ MAAS_REGION_CHANNEL = "latest/edge" MAAS_AGENT_CHANNEL = "latest/edge" POSTGRESQL_CHANNEL = "14/candidate" -PGBOUNCER_CHANNEL = "1/candidate" +PGBOUNCER_CHANNEL = "1/beta" HAPROXY_CHANNEL = "latest/stable" KEEPALIVED_CHANNEL = "latest/stable" diff --git a/cloud/etc/deploy-maas-region/main.tf b/cloud/etc/deploy-maas-region/main.tf index 6434fcf..c08dd5e 100644 --- a/cloud/etc/deploy-maas-region/main.tf +++ b/cloud/etc/deploy-maas-region/main.tf @@ -58,8 +58,8 @@ resource "juju_application" "pgbouncer" { } config = merge({ - pool_mode = "transaction" - max_db_connections = floor(90 / max(length(var.machine_ids), 1)) + pool_mode = "session" + max_db_connections = var.max_connections_per_region }, var.charm_pgbouncer_config) } diff --git a/cloud/etc/deploy-maas-region/variables.tf b/cloud/etc/deploy-maas-region/variables.tf index 9822e2f..4497396 100644 --- a/cloud/etc/deploy-maas-region/variables.tf +++ b/cloud/etc/deploy-maas-region/variables.tf @@ -51,7 +51,7 @@ variable "enable_haproxy" { variable "charm_pgbouncer_channel" { description = "Operator channel for PgBouncer deployment" type = string - default = "1/candidate" + default = "1/beta" } variable "charm_pgbouncer_revision" { @@ -65,3 +65,9 @@ variable "charm_pgbouncer_config" { type = map(string) default = {} } + +variable "max_connections_per_region" { + description = "Maximum number of concurrent connections to allow to the database server per region" + type = number + default = 50 +} diff --git a/cloud/etc/deploy-postgresql/main.tf b/cloud/etc/deploy-postgresql/main.tf index ec16ab0..9381718 100644 --- a/cloud/etc/deploy-postgresql/main.tf +++ b/cloud/etc/deploy-postgresql/main.tf @@ -30,6 +30,16 @@ data "juju_model" "machine_model" { name = var.machine_model } +locals { + max_connections = var.max_connections == "default" ? {} : ( + var.max_connections == "dynamic" ? { + experimental_max_connections = max(100, 10 + var.max_connections_per_region * var.maas_region_nodes) + } : { + experimental_max_connections = tonumber(var.max_connections) + } + ) +} + resource "juju_application" "postgresql" { name = "postgresql" model = data.juju_model.machine_model.name @@ -42,5 +52,5 @@ resource "juju_application" "postgresql" { base = "ubuntu@22.04" } - config = var.charm_postgresql_config + config = merge(local.max_connections, var.charm_postgresql_config) } diff --git a/cloud/etc/deploy-postgresql/variables.tf b/cloud/etc/deploy-postgresql/variables.tf index 8989b9a..b67ffc5 100644 --- a/cloud/etc/deploy-postgresql/variables.tf +++ b/cloud/etc/deploy-postgresql/variables.tf @@ -41,3 +41,21 @@ variable "machine_model" { description = "Model to deploy to" type = string } + +variable "max_connections" { + description = "Maximum number of concurrent connections to allow to the database server" + type = string + default = "default" +} + +variable "maas_region_nodes" { + description = "Total number of MAAS region nodes" + type = number + default = 0 +} + +variable "max_connections_per_region" { + description = "Maximum number of concurrent connections to allow to the database server per region" + type = number + default = 50 +}