Fix runtime issues (#258)

* testing * touch up error handling with owner discovery * fix missed cli keyword update * add next step to probe port type * minor next step update * small text update * fix readiness probe strings * version bump
runwhen-contrib · Nov 21, 2023 · 959f493 · 959f493
1 parent e6a9a39
commit 959f493
Show file tree

Hide file tree

Showing 7 changed files with 95 additions and 40 deletions.
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.0.8
+0.0.9
diff --git a/codebundles/k8s-deployment-healthcheck/validate_probes.sh b/codebundles/k8s-deployment-healthcheck/validate_probes.sh
@@ -55,7 +55,8 @@ for ((i=0; i<NUM_CONTAINERS; i++)); do
         CONTAINER_PORTS=$(extract_data "$MANIFEST" ".spec.template.spec.containers[$i].ports[].containerPort")
 
         if [[ ! " $CONTAINER_PORTS " == *"$PROBE_PORT"* ]]; then
-            echo "Container \`$CONTAINER_NAME\`: Port $PROBE_PORT used in ${PROBE_TYPE} is not exposed by the container."
+            echo "Container \`$CONTAINER_NAME\`: Port $PROBE_PORT used in $PROBE_TYPE is not exposed by the container."
+            next_steps+=("Update $PROBE_TYPE For \`${DEPLOYMENT_NAME}\` to use one of the following ports: $CONTAINER_PORTS")
         else
             echo "Container \`$CONTAINER_NAME\`: ${PROBE_TYPE} port $PROBE_PORT is valid."
         fi

diff --git a/codebundles/k8s-deployment-healthcheck/workload_next_steps.sh b/codebundles/k8s-deployment-healthcheck/workload_next_steps.sh
@@ -36,11 +36,11 @@ if [[ $messages =~ "Misconfiguration" ]]; then
     next_steps+=("Check for Node Failures or Maintenance Activities in Cluster \`$CONTEXT\`")
 fi
 
-if [[ $messages =~ "Liveness probe failed" ]]; then
+if [[ $messages =~ "Liveness probe failed" || $messages =~ "Liveness probe errored" ]]; then
     next_steps+=("Check Liveliness Probe Configuration for Deployment \`${DEPLOYMENT_NAME}\`")
 fi
 
-if [[ $messages =~ "Readiness probe errored" ]]; then
+if [[ $messages =~ "Readiness probe errored" || $messages =~ "Readiness probe failed" ]]; then
     next_steps+=("Check Readiness Probe Configuration for Deployment \`${DEPLOYMENT_NAME}\`")
 fi
 

diff --git a/codebundles/k8s-namespace-healthcheck/find_resource_owners.sh b/codebundles/k8s-namespace-healthcheck/find_resource_owners.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-set -eo pipefail
+# set -eo pipefail
 
 # -----------------------------------------------------------------------------
 # Script Information and Metadata
@@ -27,7 +27,6 @@ KUBERNETES_DISTRIBUTION_BINARY="kubectl"
 get_owner() {
     local resource_name=$1
     local resource_kind=$2
-
     owner_kind=$(${KUBERNETES_DISTRIBUTION_BINARY} get $resource_kind $resource_name -n "${NAMESPACE}" --context="${CONTEXT}" -o=jsonpath="{.metadata.ownerReferences[0].kind}")
     if [ "$owner_kind" = "" ]; then
         # No owner reference means there is no parent object. Return the direct object.

diff --git a/codebundles/k8s-namespace-healthcheck/runbook.robot b/codebundles/k8s-namespace-healthcheck/runbook.robot
@@ -45,16 +45,17 @@ Troubleshoot Warning Events in Namespace `${NAMESPACE}`
             ...    env=${env}
             ...    secret_file__kubeconfig=${kubeconfig}
             ...    include_in_history=False
+            ${messages}=    Replace String    ${item["summary_messages"]}    "    ${EMPTY}
             ${item_owner_output}=    RW.CLI.Run Cli
             ...    cmd=echo "${item_owner.stdout}" | sed 's/ *$//' | tr -d '\n'
             ...    env=${env}
             ...    include_in_history=False
-            ${messages}=    Replace String    ${item["summary_messages"]}    "    ${EMPTY}
-            IF    len($item_owner_output.stdout) > 0
+            IF    len($item_owner_output.stdout) > 0 and ($item_owner_output.stdout) != "No resource found"
                 ${owner_kind}    ${owner_name}=    Split String    ${item_owner_output.stdout}    ${SPACE}
                 ${owner_name}=    Replace String    ${owner_name}    \n    ${EMPTY}
             ELSE
-                ${owner_kind}    ${owner_name}=    Set Variable    ""
+                ${owner_kind}=    Set Variable    "Unknown"
+                ${owner_name}=    Set Variable    "Unknown"
             END
             ${item_next_steps}=    RW.CLI.Run Bash File
             ...    bash_file=workload_next_steps.sh
@@ -148,11 +149,16 @@ Troubleshoot Pending Pods In Namespace `${NAMESPACE}`
                 ...    env=${env}
                 ...    secret_file__kubeconfig=${kubeconfig}
                 ...    include_in_history=False
-                IF    len($item_owner.stdout) > 0
-                    ${owner_kind}    ${owner_name}=    Split String    ${item_owner.stdout}    ${SPACE}
+                ${item_owner_output}=    RW.CLI.Run Cli
+                ...    cmd=echo "${item_owner.stdout}" | sed 's/ *$//' | tr -d '\n'
+                ...    env=${env}
+                ...    include_in_history=False
+                IF    len($item_owner_output.stdout) > 0 and ($item_owner_output.stdout) != "No resource found"
+                    ${owner_kind}    ${owner_name}=    Split String    ${item_owner_output.stdout}    ${SPACE}
                     ${owner_name}=    Replace String    ${owner_name}    \n    ${EMPTY}
                 ELSE
-                    ${owner_kind}    ${owner_name}=    Set Variable    ""
+                    ${owner_kind}=    Set Variable    "Unknown"
+                    ${owner_name}=    Set Variable    "Unknown"
                 END
                 ${item_next_steps}=    RW.CLI.Run Bash File
                 ...    bash_file=workload_next_steps.sh
@@ -211,11 +217,16 @@ Troubleshoot Failed Pods In Namespace `${NAMESPACE}`
                 ...    env=${env}
                 ...    secret_file__kubeconfig=${kubeconfig}
                 ...    include_in_history=False
-                IF    len($item_owner.stdout) > 0
-                    ${owner_kind}    ${owner_name}=    Split String    ${item_owner.stdout}    ${SPACE}
+                ${item_owner_output}=    RW.CLI.Run Cli
+                ...    cmd=echo "${item_owner.stdout}" | sed 's/ *$//' | tr -d '\n'
+                ...    env=${env}
+                ...    include_in_history=False
+                IF    len($item_owner_output.stdout) > 0 and ($item_owner_output.stdout) != "No resource found"
+                    ${owner_kind}    ${owner_name}=    Split String    ${item_owner_output.stdout}    ${SPACE}
                     ${owner_name}=    Replace String    ${owner_name}    \n    ${EMPTY}
                 ELSE
-                    ${owner_kind}    ${owner_name}=    Set Variable    ""
+                    ${owner_kind}=    Set Variable    "Unknown"
+                    ${owner_name}=    Set Variable    "Unknown"
                 END
                 ${item_next_steps}=    RW.CLI.Run Bash File
                 ...    bash_file=workload_next_steps.sh
@@ -288,7 +299,8 @@ Troubleshoot Workload Status Conditions In Namespace `${NAMESPACE}`
                 ${owner_kind}    ${owner_name}=    Split String    ${item_owner_output.stdout}    ${SPACE}
                 ${owner_name}=    Replace String    ${owner_name}    \n    ${EMPTY}
             ELSE
-                ${owner_kind}    ${owner_name}=    Set Variable    ""
+                ${owner_kind}=    Set Variable    "Unknown"
+                ${owner_name}=    Set Variable    "Unknown"
             END
             ${item_next_steps}=    RW.CLI.Run Bash File
             ...    bash_file=workload_next_steps.sh
@@ -319,6 +331,7 @@ Get Listing Of Resources In Namespace `${NAMESPACE}`
     ...    env=${env}
     ...    secret_file__kubeconfig=${kubeconfig}
     ...    render_in_commandlist=true
+    ...    timeout_seconds=180
     ${history}=    RW.CLI.Pop Shell History
     RW.Core.Add Pre To Report    Informational Get All for Namespace: ${NAMESPACE}
     RW.Core.Add Pre To Report    ${all_results.stdout}
@@ -351,11 +364,16 @@ Check Event Anomalies in Namespace `${NAMESPACE}`
             ...    secret_file__kubeconfig=${kubeconfig}
             ...    include_in_history=False
             ${messages}=    Replace String    ${item["summary_messages"]}    "    ${EMPTY}
-            IF    len($item_owner.stdout) > 0
-                ${owner_kind}    ${owner_name}=    Split String    ${item_owner.stdout}    ${SPACE}
+            ${item_owner_output}=    RW.CLI.Run Cli
+            ...    cmd=echo "${item_owner.stdout}" | sed 's/ *$//' | tr -d '\n'
+            ...    env=${env}
+            ...    include_in_history=False
+            IF    len($item_owner_output.stdout) > 0 and ($item_owner_output.stdout) != "No resource found"
+                ${owner_kind}    ${owner_name}=    Split String    ${item_owner_output.stdout}    ${SPACE}
                 ${owner_name}=    Replace String    ${owner_name}    \n    ${EMPTY}
             ELSE
-                ${owner_kind}    ${owner_name}=    Set Variable    ""
+                ${owner_kind}=    Set Variable    "Unknown"
+                ${owner_name}=    Set Variable    "Unknown"
             END
             ${item_next_steps}=    RW.CLI.Run Bash File
             ...    bash_file=anomaly_next_steps.sh

diff --git a/codebundles/k8s-namespace-healthcheck/workload_next_steps.sh b/codebundles/k8s-namespace-healthcheck/workload_next_steps.sh
@@ -31,12 +31,12 @@ if [[ $messages =~ "Misconfiguration" ]]; then
     next_steps+=("Check for Node Failures or Maintenance Activities in Cluster \`$CONTEXT\`")
 fi
 
-if [[ $messages =~ "Liveness probe failed" ]]; then
-    next_steps+=("Check Liveliness Probe Configuration for $owner_kind \`$owner_name\`")
+if [[ $messages =~ "Liveness probe failed" || $messages =~ "Liveness probe errored" ]]; then
+    next_steps+=("Check Liveliness Probe Configuration for Deployment \`${DEPLOYMENT_NAME}\`")
 fi
 
-if [[ $messages =~ "Readiness probe errored" ]]; then
-    next_steps+=("Check Readiness Probe Configuration for $owner_kind \`$owner_name\`")
+if [[ $messages =~ "Readiness probe errored" || $messages =~ "Readiness probe failed" ]]; then
+    next_steps+=("Check Readiness Probe Configuration for Deployment \`${DEPLOYMENT_NAME}\`")
 fi
 
 if [[ $messages =~ "PodFailed" ]]; then

diff --git a/libraries/RW/CLI/CLI.py b/libraries/RW/CLI/CLI.py
@@ -62,7 +62,11 @@ def execute_command(
     """
     if not service:
         return execute_local_command(
-            cmd=cmd, request_secrets=request_secrets, env=env, files=files, timeout_seconds=timeout_seconds
+            cmd=cmd,
+            request_secrets=request_secrets,
+            env=env,
+            files=files,
+            timeout_seconds=timeout_seconds,
         )
     else:
         return platform.execute_shell_command(
@@ -87,21 +91,26 @@ def _create_kubernetes_remote_exec(
     """**DEPRECATED**"""
     # if no specific workload name but labels provided, fetch the first running pod with labels
     if not workload_name and labels:
-        request_secrets: [platform.ShellServiceRequestSecret] = [] if len(kwargs.keys()) > 0 else None
+        request_secrets: [platform.ShellServiceRequestSecret] = (
+            [] if len(kwargs.keys()) > 0 else None
+        )
         request_secrets = _create_secrets_from_kwargs(**kwargs)
         pod_name_cmd = (
             f"kubectl get pods --field-selector=status.phase==Running -l {labels}"
             + " -o jsonpath='{.items[0].metadata.name}'"
             + f" -n {namespace} --context={context}"
         )
-        rsp = execute_command(cmd=pod_name_cmd, service=target_service, request_secrets=request_secrets, env=env)
+        rsp = execute_command(
+            cmd=pod_name_cmd,
+            service=target_service,
+            request_secrets=request_secrets,
+            env=env,
+        )
         SHELL_HISTORY.append(pod_name_cmd)
         cli_utils.verify_rsp(rsp)
         workload_name = rsp.stdout
     # use eval so that env variables are evaluated in the subprocess
-    cmd_template: str = (
-        f"eval $(echo \"kubectl exec -n {namespace} --context={context} {workload_name} -- /bin/bash -c '{cmd}'\")"
-    )
+    cmd_template: str = f"eval $(echo \"kubectl exec -n {namespace} --context={context} {workload_name} -- /bin/bash -c '{cmd}'\")"
     cmd = cmd_template
     logger.info(f"Templated remote exec: {cmd}")
     return cmd
@@ -115,17 +124,23 @@ def _create_secrets_from_kwargs(**kwargs) -> list[platform.ShellServiceRequestSe
     """
     global SECRET_PREFIX
     global SECRET_FILE_PREFIX
-    request_secrets: list[platform.ShellServiceRequestSecret] = [] if len(kwargs.keys()) > 0 else None
+    request_secrets: list[platform.ShellServiceRequestSecret] = (
+        [] if len(kwargs.keys()) > 0 else None
+    )
     for key, value in kwargs.items():
         if not key.startswith(SECRET_PREFIX) and not key.startswith(SECRET_FILE_PREFIX):
             continue
         if not isinstance(value, platform.Secret):
-            logger.warning(f"kwarg secret {value} in key {key} is the wrong type, should be platform.Secret")
+            logger.warning(
+                f"kwarg secret {value} in key {key} is the wrong type, should be platform.Secret"
+            )
             continue
         if key.startswith(SECRET_PREFIX):
             request_secrets.append(platform.ShellServiceRequestSecret(value))
         elif key.startswith(SECRET_FILE_PREFIX):
-            request_secrets.append(platform.ShellServiceRequestSecret(value, as_file=True))
+            request_secrets.append(
+                platform.ShellServiceRequestSecret(value, as_file=True)
+            )
     return request_secrets
 
 
@@ -167,17 +182,31 @@ def run_bash_file(
                         path, _ = rw_path_to_robot.split(pattern)
                         new_path = os.path.join("/collection", path)
                         # Modify the bash_file to point to the new directory
+                        local_bash_file = f"./{bash_file}"
                         bash_file = os.path.join(new_path, bash_file)
                         if os.path.exists(bash_file):
-                            logger.info(f"File '{bash_file}' found at derived path: {new_path}.")
-                            cmd_overide = f"{bash_file}"
+                            logger.info(
+                                f"File '{bash_file}' found at derived path: {new_path}."
+                            )
+                            if cmd_overide:
+                                cmd_overide = cmd_overide.replace(
+                                    f"{local_bash_file}", f"{bash_file}"
+                                )
+                            else:
+                                cmd_overide = f"{bash_file}"
                             break
                         else:
-                            logger.warning(f"File '{bash_file}' not found at derived path: {new_path}.")
+                            logger.warning(
+                                f"File '{bash_file}' not found at derived path: {new_path}."
+                            )
             else:
-                logger.warning("Current directory is root, but 'RW_PATH_TO_ROBOT' is not set.")
+                logger.warning(
+                    "Current directory is root, but 'RW_PATH_TO_ROBOT' is not set."
+                )
         else:
-            logger.warning(f"File '{bash_file}' not found in the current directory and current directory is not root.")
+            logger.warning(
+                f"File '{bash_file}' not found in the current directory and current directory is not root."
+            )
 
     if not cmd_overide:
         cmd_overide = f"./{bash_file}"
@@ -244,7 +273,9 @@ def run_cli(
     global SHELL_HISTORY
     looped_results = []
     rsp = None
-    logger.info(f"Requesting command: {cmd} with service: {target_service} - None indicates run local")
+    logger.info(
+        f"Requesting command: {cmd} with service: {target_service} - None indicates run local"
+    )
     if run_in_workload_with_labels or run_in_workload_with_name:
         cmd = _create_kubernetes_remote_exec(
             cmd=cmd,
@@ -256,7 +287,9 @@ def run_cli(
             context=optional_context,
             **kwargs,
         )
-    request_secrets: [platform.ShellServiceRequestSecret] = [] if len(kwargs.keys()) > 0 else None
+    request_secrets: [platform.ShellServiceRequestSecret] = (
+        [] if len(kwargs.keys()) > 0 else None
+    )
     logger.info(f"Received kwargs: {kwargs}")
     request_secrets = _create_secrets_from_kwargs(**kwargs)
     if loop_with_items and len(loop_with_items) > 0:
@@ -288,7 +321,11 @@ def run_cli(
         )
     else:
         rsp = execute_command(
-            cmd=cmd, service=target_service, request_secrets=request_secrets, env=env, timeout_seconds=timeout_seconds
+            cmd=cmd,
+            service=target_service,
+            request_secrets=request_secrets,
+            env=env,
+            timeout_seconds=timeout_seconds,
         )
         if include_in_history:
             SHELL_HISTORY.append(cmd)