From 8c86b9b4ebe7f06fc9f54522c589d5599ef313fb Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Tue, 30 Jan 2024 13:31:53 -0500 Subject: [PATCH] Next steps tweaks (#315) * update namespace health error messages * add next step * add more messages * update * update syntax issue * fix var --- .../workload_next_steps.sh | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/codebundles/k8s-namespace-healthcheck/workload_next_steps.sh b/codebundles/k8s-namespace-healthcheck/workload_next_steps.sh index f3698363..ddece481 100755 --- a/codebundles/k8s-namespace-healthcheck/workload_next_steps.sh +++ b/codebundles/k8s-namespace-healthcheck/workload_next_steps.sh @@ -48,6 +48,13 @@ if [[ $messages =~ "ImagePullBackOff" || $messages =~ "Back-off pulling image" | next_steps+=("List Images and Tags for Every Container in Failed Pods for Namespace \`$NAMESPACE\`") fi +if [[ $messages =~ "Back-off restarting failed container" ]]; then + next_steps+=("Check Log for $owner_kind \`$owner_name\`") + next_steps+=("Troubleshoot Warning Events for $owner_kind \`$owner_name\`") + +fi + + if [[ $messages =~ "ImagePullBackOff" || $messages =~ "Back-off pulling image" || $messages =~ "ErrImagePull" ]]; then next_steps+=("List ImagePullBackoff Events and Test Path and Tags for Namespace \`$NAMESPACE\`") next_steps+=("List Images and Tags for Every Container in Failed Pods for Namespace \`$NAMESPACE\`") @@ -57,7 +64,25 @@ if [[ $messages =~ "forbidden: failed quota" ]]; then next_steps+=("Check Resource Quota Utilization in Namepace `${NAMESPACE}`") fi +if [[ $messages =~ "No preemption victims found for incoming pod" || $messages =~ "Insufficient cpu" ]]; then + next_steps+=("Not enough node resources available to schedule pods. Escalate this issue to your cluster owner. ") + next_steps+=("Increase Node Count in Cluster") + next_steps+=("Check for Quota Errors") +fi + +if [[ $messages =~ "max node group size reached" ]]; then + next_steps+=("Not enough node resources available to schedule pods. Escalate this issue to your cluster owner.") + next_steps+=("Increase node count in cluster.") + next_steps+=("Check for quota errors.") +fi +if [[ $messages =~ "Health check failed after" ]]; then + next_steps+=("Check $owner_kind \`$owner_name\` Health") +fi + +if [[ ${#next_steps[@]} -eq 0 ]]; then + next_steps+=("Please review the report logs and escalate the issue if necessary.") +fi # Display the list of recommendations printf "%s\n" "${next_steps[@]}" | sort | uniq