Skip to content

Commit

Permalink
fix: stop_pods: ignore failed (like Evicted) Pods
Browse files Browse the repository at this point in the history
Also print what still needs to be stopped and simplify internal logic
  • Loading branch information
desaintmartin committed Oct 9, 2021
1 parent a9a361d commit 42bfb4d
Show file tree
Hide file tree
Showing 10 changed files with 143 additions and 122 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# wiremind-kubernetes

## v6.3.3 (2021-10-05)
### Fixes
- stop_pods: ignore failed (like Evicted) Pods, print what still needs to be stopped and simplify internal logic.

## v6.3.2 (2021-10-04)
### Fixes
- setup.py: require kubernetes>=18.
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
6.3.2
6.3.3
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,6 @@ python_version = 3.7
ignore_missing_imports = True
no_implicit_optional = True
strict_optional = True

[tool:pytest]
log_cli = True
68 changes: 36 additions & 32 deletions src/wiremind_kubernetes/kubernetes_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ class KubernetesHelper:
A simple helper for Kubernetes manipulation.
"""

SCALE_DOWN_MAX_WAIT_TIME = 3600

def __init__(
self,
use_kubeconfig: bool = False,
Expand Down Expand Up @@ -135,31 +137,41 @@ def is_statefulset_stopped(self, deployment_name: str) -> bool:
return self.is_deployment_stopped(deployment_name, statefulset=True)

@retry_kubernetes_request_no_ignore
def is_deployment_stopped(self, deployment_name: str, statefulset: bool = False) -> bool:
logger.debug("Asking if deployment %s is stopped", deployment_name)
labels: Dict[str, str]
def _get_pods_from_deployment(self, deployment_name: str, statefulset: bool = False) -> List:
if statefulset:
logger.debug("Asking if StatefulSet %s is stopped", deployment_name)
labels = self.client_appsv1_api.read_namespaced_stateful_set(
deployment_name, self.namespace
).spec.selector.match_labels
else:
logger.debug("Asking if Deployment %s is stopped", deployment_name)
labels = self.client_appsv1_api.read_namespaced_deployment(
deployment_name, self.namespace
).spec.selector.match_labels

try:
found_pods = self.client_corev1_api.list_namespaced_pod(
return self.client_corev1_api.list_namespaced_pod(
namespace=self.namespace, label_selector=",".join(["%s=%s" % kv for kv in labels.items()])
).items
except kubernetes.client.rest.ApiException as e:
if e.status == 404:
found_pods = []
return []
else:
raise

current_scale = len(found_pods)
def is_deployment_stopped(self, deployment_name: str, statefulset: bool = False) -> bool:
pod_list: List = self._get_pods_from_deployment(deployment_name, statefulset)

current_scale = 0
for pod in pod_list:
if pod.status.phase not in ("Failed"):
current_scale += 1

logger.debug("%s has %s replicas", deployment_name, current_scale)
return (current_scale == 0) or self.dry_run
if current_scale > 0:
kind = statefulset and "StatefulSet" or "Deployment"
logger.info("%s %s still has %s living replicas", deployment_name, kind, current_scale)
return False
return True

def is_deployment_ready(self, deployment_name: str, statefulset: bool = False):
if statefulset:
Expand Down Expand Up @@ -265,7 +277,7 @@ def _get_expected_deployment_scale_dict(self) -> Dict[int, Dict[str, int]]:

eds_dict[priority][deployment_name] = expected_scale

logger.debug("Deployments are is %s", pprint.pformat(eds_dict))
logger.debug("Deployments are %s", pprint.pformat(eds_dict))
return eds_dict

def start_pods(self):
Expand Down Expand Up @@ -295,29 +307,27 @@ def start_pods(self):
else:
logger.info("No Deployments to scale up")

def _are_deployments_stopped(self, deployment_dict: Dict[str, int]) -> bool:
for deployment_name in deployment_dict:
if not self.is_deployment_stopped(deployment_name):
return False
return True

def _stop_deployments(self, deployment_dict: Dict[str, int]):
"""
Scale down a dict (deployment_name, expected_scale) of Deployments.
Return True if any Deployment was scaled, False otherwise
"""
for deployment_name in deployment_dict:
self.scale_down_deployment(deployment_name)

def _wait_for_deployments_stopped(self, deployment_dict: Dict[str, int]):
length: int = len(deployment_dict)
for _ in range(3600): # seconds
time.sleep(1)
stopped: int = 0

stopped = False
for _ in range(self.SCALE_DOWN_MAX_WAIT_TIME):
for deployment_name in deployment_dict:
if self.is_deployment_stopped(deployment_name):
stopped += 1
if stopped == length:
return
else:
logger.info("All pods not deleted yet. Waiting...")
# Retry stopping in case there deployment definition changed
self._stop_deployments(deployment_dict)
stopped = self._are_deployments_stopped(deployment_dict)
if stopped:
break
time.sleep(1)
if stopped:
break
else:
raise Exception("Timed out waiting for pods to be deleted: aborting.")

Expand All @@ -336,17 +346,11 @@ def stop_pods(self):

priority: int
priorities: List[int] = sorted(expected_deployment_scale_dict, reverse=True)
scaled: bool = False
for priority in priorities:
priority_dict: Dict[str, int] = expected_deployment_scale_dict[priority]
if len(priority_dict):
self._stop_deployments(priority_dict)
self._wait_for_deployments_stopped(priority_dict)
scaled = True
if scaled:
logger.info("Done scaling down application Deployments, all Pods have been deleted")
else:
logger.info("No Deployments to scale down")
logger.info("Done scaling down application Deployments.")

def generate_job(
self,
Expand Down
3 changes: 1 addition & 2 deletions src/wiremind_kubernetes/tests/e2e_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
TEST_NAMESPACE = "wiremind-kube-e2e-test"


logging.getLogger("wiremind_kubernetes").setLevel(logging.DEBUG)
logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -64,9 +65,7 @@ def populate_cluster():
logger.info("Waiting for deployments to be started...")
if (
not concerned_dm.is_deployment_ready("concerned")
or not concerned_dm.is_deployment_ready("concerned-new-style")
or not unconcerned_dm.is_deployment_ready("unconcerned")
or not unconcerned_dm.is_deployment_ready("unconcerned-new-style")
):
logger.info("All Deployments not ready yet, waiting...")
run_command(f"kubectl get pods --namespace {TEST_NAMESPACE}")
Expand Down
54 changes: 15 additions & 39 deletions src/wiremind_kubernetes/tests/e2e_tests/manifests/1_deployments.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,88 +24,87 @@ spec:
apiVersion: apps/v1
kind: Deployment
metadata:
name: concerned-high-priority
name: concerned-2
spec:
replicas: 1
replicas: 2
selector:
matchLabels:
app: sleepy
component: concerned-high-priority
component: concerned-2
template:
metadata:
labels:
app: sleepy
component: concerned-high-priority
component: concerned-2
spec:
containers:
- name: sleep
image: gcr.io/google_containers/pause-amd64:3.1
terminationGracePeriodSeconds: 1

---

apiVersion: apps/v1
kind: Deployment
metadata:
name: concerned-very-high-priority
name: concerned-high-priority
spec:
replicas: 1
selector:
matchLabels:
app: sleepy
component: concerned-very-high-priority
component: concerned-high-priority
template:
metadata:
labels:
app: sleepy
component: concerned-very-high-priority
component: concerned-high-priority
spec:
containers:
- name: sleep
image: gcr.io/google_containers/pause-amd64:3.1
terminationGracePeriodSeconds: 1

---


apiVersion: apps/v1
kind: Deployment
metadata:
name: concerned-low-priority
name: concerned-very-high-priority
spec:
replicas: 1
selector:
matchLabels:
app: sleepy
component: concerned-low-priority
component: concerned-very-high-priority
template:
metadata:
labels:
app: sleepy
component: concerned-low-priority
component: concerned-very-high-priority
spec:
containers:
- name: sleep
image: gcr.io/google_containers/pause-amd64:3.1
terminationGracePeriodSeconds: 1


---


apiVersion: apps/v1
kind: Deployment
metadata:
name: concerned-new-style
name: concerned-low-priority
spec:
replicas: 1
selector:
matchLabels:
app: sleepy
component: concerned-new-style
component: concerned-low-priority
template:
metadata:
labels:
app: sleepy
component: concerned-new-style
component: concerned-low-priority
spec:
containers:
- name: sleep
Expand Down Expand Up @@ -134,26 +133,3 @@ spec:
- name: sleep
image: gcr.io/google_containers/pause-amd64:3.1
terminationGracePeriodSeconds: 1

---

apiVersion: apps/v1
kind: Deployment
metadata:
name: unconcerned-new-style
spec:
replicas: 1
selector:
matchLabels:
app: sleepy
component: unconcerned-new-style
template:
metadata:
labels:
app: sleepy
component: unconcerned-new-style
spec:
containers:
- name: sleep
image: gcr.io/google_containers/pause-amd64:3.1
terminationGracePeriodSeconds: 1
46 changes: 17 additions & 29 deletions src/wiremind_kubernetes/tests/e2e_tests/manifests/2_edss.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ kind: ExpectedDeploymentScale
metadata:
name: concerned
labels:
release: concerned
app.kubernetes.io/instance: concerned
spec:
deploymentName: concerned
expectedScale: 1
Expand All @@ -13,51 +13,51 @@ spec:
apiVersion: "wiremind.io/v1"
kind: ExpectedDeploymentScale
metadata:
name: concerned-very-high-priority
name: concerned-2
labels:
release: concerned
app.kubernetes.io/instance: concerned
spec:
deploymentName: concerned-very-high-priority
expectedScale: 1
priority: 100
deploymentName: concerned-2
expectedScale: 2

---

apiVersion: "wiremind.io/v1"
kind: ExpectedDeploymentScale
metadata:
name: concerned-low-priority
name: concerned-very-high-priority
labels:
release: concerned
app.kubernetes.io/instance: concerned
spec:
deploymentName: concerned-low-priority
deploymentName: concerned-very-high-priority
expectedScale: 1
priority: -10
priority: 100

---

apiVersion: "wiremind.io/v1"
kind: ExpectedDeploymentScale
metadata:
name: concerned-high-priority
name: concerned-low-priority
labels:
release: concerned
app.kubernetes.io/instance: concerned
spec:
deploymentName: concerned-high-priority
deploymentName: concerned-low-priority
expectedScale: 1
priority: 10
priority: -10

---

apiVersion: "wiremind.io/v1"
kind: ExpectedDeploymentScale
metadata:
name: concerned-new-style
name: concerned-high-priority
labels:
app.kubernetes.io/instance: concerned
spec:
deploymentName: concerned-new-style
deploymentName: concerned-high-priority
expectedScale: 1
priority: 10

---

Expand All @@ -66,19 +66,7 @@ kind: ExpectedDeploymentScale
metadata:
name: unconcerned
labels:
release: unconcerned
spec:
deploymentName: unconcerned
expectedScale: 1

---

apiVersion: "wiremind.io/v1"
kind: ExpectedDeploymentScale
metadata:
name: unconcerned-new-style
labels:
release: unconcerned
app.kubernetes.io/instance: unconcerned
spec:
deploymentName: unconcerned
expectedScale: 1
Loading

0 comments on commit 42bfb4d

Please sign in to comment.