Add option to run CRDs updates and DaemonSet deletion on user's behal…

…f. (#2544)
nebari-dev · Jun 26, 2024 · bdcd532 · bdcd532
2 parents c385172 + 88a1549
commit bdcd532
Show file tree

Hide file tree

Showing 2 changed files with 126 additions and 34 deletions.
diff --git a/src/_nebari/upgrade.py b/src/_nebari/upgrade.py
@@ -14,6 +14,9 @@
 from pathlib import Path
 from typing import Any, ClassVar, Dict
 
+import kubernetes.client
+import kubernetes.config
+import requests
 import rich
 from packaging.version import Version
 from pydantic import ValidationError
@@ -995,52 +998,119 @@ def _version_specific_upgrade(
     ):
         # Prompt users to manually update kube-prometheus-stack CRDs if monitoring is enabled
         if config.get("monitoring", {}).get("enabled", True):
-            rich.print(
-                "\n ⚠️  Warning ⚠️"
-                "\n-> [red bold]Nebari version 2024.6.1 comes with a new version of Grafana. Any custom dashboards that you created will be deleted after upgrading Nebari. Make sure to [link=https://grafana.com/docs/grafana/latest/dashboards/share-dashboards-panels/#export-a-dashboard-as-json]export them as JSON[/link] so you can [link=https://grafana.com/docs/grafana/latest/dashboards/build-dashboards/import-dashboards/#import-a-dashboard]import them[/link] again afterwards.[/red bold]"
-                "\n-> [red bold]Before upgrading, you need to manually delete the prometheus-node-exporter daemonset and update the kube-prometheus-stack CRDs. To do that, please run the following commands.[/red bold]"
-            )
+
+            crd_urls = [
+                "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.73.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml",
+                "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.73.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml",
+                "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.73.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml",
+                "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.73.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml",
+                "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.73.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusagents.yaml",
+                "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.73.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml",
+                "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.73.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml",
+                "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.73.0/example/prometheus-operator-crd/monitoring.coreos.com_scrapeconfigs.yaml",
+                "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.73.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml",
+                "https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.73.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml",
+            ]
+            daemonset_name = "prometheus-node-exporter"
+            namespace = config.get("namespace", "default")
 
             # We're upgrading from version 30.1.0 to 58.4.0. This is a major upgrade and requires manual intervention.
             # See https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/README.md#upgrading-chart
             # for more information on why the following commands are necessary.
-            commands = textwrap.dedent(
-                f"""
-                [cyan bold]
-                kubectl apply --server-side --force-conflicts -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.73.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagerconfigs.yaml
-                kubectl apply --server-side --force-conflicts -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.73.0/example/prometheus-operator-crd/monitoring.coreos.com_alertmanagers.yaml
-                kubectl apply --server-side --force-conflicts -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.73.0/example/prometheus-operator-crd/monitoring.coreos.com_podmonitors.yaml
-                kubectl apply --server-side --force-conflicts -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.73.0/example/prometheus-operator-crd/monitoring.coreos.com_probes.yaml
-                kubectl apply --server-side --force-conflicts -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.73.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusagents.yaml
-                kubectl apply --server-side --force-conflicts -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.73.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml
-                kubectl apply --server-side --force-conflicts -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.73.0/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
-                kubectl apply --server-side --force-conflicts -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.73.0/example/prometheus-operator-crd/monitoring.coreos.com_scrapeconfigs.yaml
-                kubectl apply --server-side --force-conflicts -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.73.0/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
-                kubectl apply --server-side --force-conflicts -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.73.0/example/prometheus-operator-crd/monitoring.coreos.com_thanosrulers.yaml
-                kubectl delete daemonset -l app=prometheus-node-exporter --namespace {config['namespace']}
-                [/cyan bold]
-            """
+            commands = "[cyan bold]"
+            for url in crd_urls:
+                commands += f"kubectl apply --server-side --force-conflicts -f {url}\n"
+            commands += f"kubectl delete daemonset -l app={daemonset_name} --namespace {namespace}\n"
+            commands += "[/cyan bold]"
+
+            rich.print(
+                "\n ⚠️  Warning ⚠️"
+                "\n-> [red bold]Nebari version 2024.6.1 comes with a new version of Grafana. Any custom dashboards that you created will be deleted after upgrading Nebari. Make sure to [link=https://grafana.com/docs/grafana/latest/dashboards/share-dashboards-panels/#export-a-dashboard-as-json]export them as JSON[/link] so you can [link=https://grafana.com/docs/grafana/latest/dashboards/build-dashboards/import-dashboards/#import-a-dashboard]import them[/link] again afterwards.[/red bold]"
+                f"\n-> [red bold]Before upgrading, kube-prometheus-stack CRDs need to be updated and the {daemonset_name} daemonset needs to be deleted.[/red bold]"
+            )
+            run_commands = Prompt.ask(
+                "\nDo you want Nebari to update the kube-prometheus-stack CRDs and delete the prometheus-node-exporter for you? If not, you'll have to do it manually.",
+                choices=["y", "N"],
+                default="N",
             )
 
             # By default, rich wraps lines by splitting them into multiple lines. This is
             # far from ideal, as users copy-pasting the commands will get errors when running them.
             # To avoid this, we use a rich console with a larger width to print the entire commands
             # and let the terminal wrap them if needed.
-            Prompt.ask("Hit enter to show the commands")
             console = rich.console.Console(width=220)
-            console.print(commands)
+            if run_commands == "y":
+                try:
+                    kubernetes.config.load_kube_config()
+                except kubernetes.config.config_exception.ConfigException:
+                    rich.print(
+                        "[red bold]No default kube configuration file was found. Make sure to [link=https://www.nebari.dev/docs/how-tos/debug-nebari#generating-the-kubeconfig]have one pointing to your Nebari cluster[/link] before upgrading.[/red bold]"
+                    )
+                    exit()
+                current_kube_context = kubernetes.config.list_kube_config_contexts()[1]
+                cluster_name = current_kube_context["context"]["cluster"]
+                rich.print(
+                    f"The following commands will be run for the [cyan bold]{cluster_name}[/cyan bold] cluster"
+                )
+                Prompt.ask("Hit enter to show the commands")
+                console.print(commands)
+
+                Prompt.ask("Hit enter to continue")
+                # We need to add a special constructor to the yaml loader to handle a specific
+                # tag as otherwise the kubernetes API will fail when updating the CRD.
+                yaml.constructor.add_constructor(
+                    "tag:yaml.org,2002:value", lambda loader, node: node.value
+                )
+                for url in crd_urls:
+                    response = requests.get(url)
+                    response.raise_for_status()
+                    crd = yaml.load(response.text)
+                    crd_name = crd["metadata"]["name"]
+                    api_instance = kubernetes.client.ApiextensionsV1Api()
+                    try:
+                        api_response = api_instance.read_custom_resource_definition(
+                            name=crd_name
+                        )
+                    except kubernetes.client.exceptions.ApiException:
+                        api_response = api_instance.create_custom_resource_definition(
+                            body=crd
+                        )
+                    else:
+                        api_response = api_instance.patch_custom_resource_definition(
+                            name=crd["metadata"]["name"], body=crd
+                        )
 
-            Prompt.ask("Hit enter to continue")
-            continue_ = Prompt.ask(
-                "Have you backed up your custom dashboards (if necessary), deleted the prometheus-node-exporter daemonset and updated the kube-prometheus-stack CRDs?",
-                choices=["y", "N"],
-                default="N",
-            )
-            if not continue_ == "y":
+                api_instance = kubernetes.client.AppsV1Api()
+                api_response = api_instance.list_namespaced_daemon_set(
+                    namespace=namespace, label_selector=f"app={daemonset_name}"
+                )
+                if api_response.items:
+                    api_instance.delete_namespaced_daemon_set(
+                        name=api_response.items[0].metadata.name,
+                        namespace=namespace,
+                    )
+
+                rich.print(
+                    f"The kube-prometheus-stack CRDs have been updated and the {daemonset_name} daemonset has been deleted."
+                )
+            else:
                 rich.print(
-                    f"[red bold]You must back up your custom dashboards (if necessary), delete the prometheus-node-exporter daemonset and update the kube-prometheus-stack CRDs before upgrading to [green]{self.version}[/green] (or later).[/bold red]"
+                    "[red bold]Before upgrading, you need to manually delete the prometheus-node-exporter daemonset and update the kube-prometheus-stack CRDs. To do that, please run the following commands.[/red bold]"
+                )
+                Prompt.ask("Hit enter to show the commands")
+                console.print(commands)
+
+                Prompt.ask("Hit enter to continue")
+                continue_ = Prompt.ask(
+                    f"Have you backed up your custom dashboards (if necessary), deleted the {daemonset_name} daemonset and updated the kube-prometheus-stack CRDs?",
+                    choices=["y", "N"],
+                    default="N",
                 )
-                exit()
+                if not continue_ == "y":
+                    rich.print(
+                        f"[red bold]You must back up your custom dashboards (if necessary), delete the {daemonset_name} daemonset and update the kube-prometheus-stack CRDs before upgrading to [green]{self.version}[/green] (or later).[/bold red]"
+                    )
+                    exit()
 
         # Prompt users to upgrade to the new default node groups for GCP
         if (provider := config.get("provider", "")) == ProviderEnum.gcp.value:

diff --git a/tests/tests_unit/test_upgrade.py b/tests/tests_unit/test_upgrade.py
@@ -2,6 +2,7 @@
 from pathlib import Path
 
 import pytest
+from rich.prompt import Prompt
 
 from _nebari.upgrade import do_upgrade
 from _nebari.version import __version__, rounded_ver_parse
@@ -48,8 +49,29 @@ def test_upgrade_4_0(
     qhub_users_import_json,
     monkeypatch,
 ):
-    # Return "y" when asked if you've deleted the Argo CRDs
-    monkeypatch.setattr("builtins.input", lambda: "y")
+
+    def mock_input(prompt, **kwargs):
+        # Mock different upgrade steps prompt answers
+        if (
+            prompt
+            == "Have you deleted the Argo Workflows CRDs and service accounts? [y/N] "
+        ):
+            return "y"
+        elif (
+            prompt
+            == "\nDo you want Nebari to update the kube-prometheus-stack CRDs and delete the prometheus-node-exporter for you? If not, you'll have to do it manually."
+        ):
+            return "N"
+        elif (
+            prompt
+            == "Have you backed up your custom dashboards (if necessary), deleted the prometheus-node-exporter daemonset and updated the kube-prometheus-stack CRDs?"
+        ):
+            return "y"
+        # All other prompts will be answered with "y"
+        else:
+            return "y"
+
+    monkeypatch.setattr(Prompt, "ask", mock_input)
 
     old_qhub_config_path = Path(__file__).parent / old_qhub_config_path_str