canonical · delgod · Jul 11, 2023 · Jul 4, 2023 · Jul 5, 2023 · Jul 5, 2023
diff --git a/src/charm.py b/src/charm.py
@@ -4,6 +4,7 @@
 # See LICENSE file for licensing details.
 
 import logging
+import time
 from typing import List, Optional, Set
 
 from charms.grafana_k8s.v0.grafana_dashboard import GrafanaDashboardProvider
@@ -29,13 +30,7 @@
     OperatorUser,
 )
 from charms.prometheus_k8s.v0.prometheus_scrape import MetricsEndpointProvider
-from ops.charm import (
-    ActionEvent,
-    CharmBase,
-    RelationDepartedEvent,
-    StartEvent,
-    StorageDetachingEvent,
-)
+from ops.charm import ActionEvent, CharmBase, RelationDepartedEvent, StartEvent
 from ops.main import main
 from ops.model import (
     ActiveStatus,
@@ -47,13 +42,15 @@
 )
 from ops.pebble import ExecError, Layer, PathError, ProtocolError
 from pymongo.errors import PyMongoError
-from tenacity import Retrying, before_log, retry, stop_after_attempt, wait_fixed
+from tenacity import before_log, retry, stop_after_attempt, wait_fixed
 
 from config import Config
 from exceptions import AdminUserCreationError
 
 logger = logging.getLogger(__name__)
 
+UNIT_REMOVAL_TIMEOUT = 1000
+
 
 class MongoDBCharm(CharmBase):
     """A Juju Charm to deploy MongoDB on Kubernetes."""
@@ -78,10 +75,10 @@ def __init__(self, *args):
 
         # if a new leader has been elected update hosts of MongoDB
         self.framework.observe(self.on.leader_elected, self._relation_changes_handler)
-        self.framework.observe(self.on.mongodb_storage_detaching, self._on_storage_detaching)
 
         self.framework.observe(self.on.get_password_action, self._on_get_password)
         self.framework.observe(self.on.set_password_action, self._on_set_password)
+        self.framework.observe(self.on.stop, self._on_stop)
 
         self.client_relations = MongoDBProvider(self)
         self.tls = MongoDBTLS(self, Config.Relations.PEERS, Config.SUBSTRATE)
@@ -295,6 +292,10 @@ def _relation_changes_handler(self, event) -> None:
         """Handles different relation events and updates MongoDB replica set."""
         self._connect_mongodb_exporter()
 
+        if type(event) is RelationDepartedEvent:
+            if event.departing_unit.name == self.unit.name:
+                self.unit_peer_data.setdefault("unit_departed", "True")
+
         if not self.unit.is_leader():
             return
 
@@ -337,37 +338,19 @@ def _relation_changes_handler(self, event) -> None:
                 logger.info("Deferring reconfigure: error=%r", e)
                 event.defer()
 
-    def _on_storage_detaching(self, event: StorageDetachingEvent) -> None:
-        """Before storage detaches, allow removing unit to remove itself from the set.
-
-        If the removing unit is primary also allow it to step down and elect another unit as
-        primary while it still has access to its storage.
-        """
-        # if we are removing the last replica it will not be able to step down as primary and we
-        # cannot reconfigure the replica set to have 0 members. To prevent retrying for 10 minutes
-        # set this flag to True. please note that planned_units will always be >=1. When planned
-        # units is 1 that means there are no other peers expected.
-
-        if self.app.planned_units() == 1 and (not self._peers or len(self._peers.units)) == 0:
-            return
-
-        try:
-            logger.debug("Removing %s from replica set", self.get_hostname_for_unit(self.unit))
-            # retries over a period of 10 minutes in an attempt to resolve race conditions it is
-            # not possible to defer in storage detached.
-            retries = Retrying(stop=stop_after_attempt(10), wait=wait_fixed(1), reraise=True)
-            for attempt in retries:
-                with attempt:
-                    # remove_replset_member retries for 60 seconds
-                    with MongoDBConnection(self.mongodb_config) as mongo:
-                        hostname = self.get_hostname_for_unit(self.unit)
-                        mongo.remove_replset_member(hostname)
-        except NotReadyError:
-            logger.info(
-                "Failed to remove %s from replica set, another member is syncing", self.unit.name
-            )
-        except PyMongoError as e:
-            logger.error("Failed to remove %s from replica set, error=%r", self.unit.name, e)
+    def _on_stop(self, event) -> None:
+        if "True" == self.unit_peer_data.get("unit_departed", "False"):
+            logger.debug(f"{self.unit.name} blocking on_stop")
+            is_in_replica_set = True
+            timeout = UNIT_REMOVAL_TIMEOUT
+            while is_in_replica_set and timeout > 0:
+                is_in_replica_set = self.is_unit_in_replica_set()
+                time.sleep(1)
+                timeout -= 1
+                if timeout < 0:
+                    raise Exception(f"{self.unit.name}.on_stop timeout exceeded")
+            logger.debug(f"{self.unit.name} releasing on_stop")
+            self.unit_peer_data["unit_departed"] = ""
 
     # END: charm events
 
@@ -779,6 +762,18 @@ def _connect_mongodb_exporter(self) -> None:
         # Restart changed services and start startup-enabled services.
         container.replan()
 
+    def is_unit_in_replica_set(self) -> bool:
+        """Check if the unit is in the replica set."""
+        with MongoDBConnection(self.mongodb_config) as mongo:
+            try:
+                replset_members = mongo.get_replset_members()
+                return self.get_hostname_for_unit(self.unit) in replset_members
+            except NotReadyError as e:
+                logger.error(f"{self.unit.name}.is_unit_in_replica_set NotReadyError={e}")
+            except PyMongoError as e:
+                logger.error(f"{self.unit.name}.is_unit_in_replica_set PyMongoError={e}")
+        return False
+
     # END: helper functions
 
     # BEGIN: static methods