Skip to content

Commit

Permalink
Fix runtime issues (#258)
Browse files Browse the repository at this point in the history
* testing

* touch up error handling with owner discovery

* fix missed cli keyword update

* add next step to probe port type

* minor next step update

* small text update

* fix readiness probe strings

* version bump
  • Loading branch information
stewartshea authored Nov 21, 2023
1 parent e6a9a39 commit 959f493
Show file tree
Hide file tree
Showing 7 changed files with 95 additions and 40 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.0.8
0.0.9
3 changes: 2 additions & 1 deletion codebundles/k8s-deployment-healthcheck/validate_probes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ for ((i=0; i<NUM_CONTAINERS; i++)); do
CONTAINER_PORTS=$(extract_data "$MANIFEST" ".spec.template.spec.containers[$i].ports[].containerPort")

if [[ ! " $CONTAINER_PORTS " == *"$PROBE_PORT"* ]]; then
echo "Container \`$CONTAINER_NAME\`: Port $PROBE_PORT used in ${PROBE_TYPE} is not exposed by the container."
echo "Container \`$CONTAINER_NAME\`: Port $PROBE_PORT used in $PROBE_TYPE is not exposed by the container."
next_steps+=("Update $PROBE_TYPE For \`${DEPLOYMENT_NAME}\` to use one of the following ports: $CONTAINER_PORTS")
else
echo "Container \`$CONTAINER_NAME\`: ${PROBE_TYPE} port $PROBE_PORT is valid."
fi
Expand Down
4 changes: 2 additions & 2 deletions codebundles/k8s-deployment-healthcheck/workload_next_steps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ if [[ $messages =~ "Misconfiguration" ]]; then
next_steps+=("Check for Node Failures or Maintenance Activities in Cluster \`$CONTEXT\`")
fi

if [[ $messages =~ "Liveness probe failed" ]]; then
if [[ $messages =~ "Liveness probe failed" || $messages =~ "Liveness probe errored" ]]; then
next_steps+=("Check Liveliness Probe Configuration for Deployment \`${DEPLOYMENT_NAME}\`")
fi

if [[ $messages =~ "Readiness probe errored" ]]; then
if [[ $messages =~ "Readiness probe errored" || $messages =~ "Readiness probe failed" ]]; then
next_steps+=("Check Readiness Probe Configuration for Deployment \`${DEPLOYMENT_NAME}\`")
fi

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
set -eo pipefail
# set -eo pipefail

# -----------------------------------------------------------------------------
# Script Information and Metadata
Expand Down Expand Up @@ -27,7 +27,6 @@ KUBERNETES_DISTRIBUTION_BINARY="kubectl"
get_owner() {
local resource_name=$1
local resource_kind=$2

owner_kind=$(${KUBERNETES_DISTRIBUTION_BINARY} get $resource_kind $resource_name -n "${NAMESPACE}" --context="${CONTEXT}" -o=jsonpath="{.metadata.ownerReferences[0].kind}")
if [ "$owner_kind" = "" ]; then
# No owner reference means there is no parent object. Return the direct object.
Expand Down
44 changes: 31 additions & 13 deletions codebundles/k8s-namespace-healthcheck/runbook.robot
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,17 @@ Troubleshoot Warning Events in Namespace `${NAMESPACE}`
... env=${env}
... secret_file__kubeconfig=${kubeconfig}
... include_in_history=False
${messages}= Replace String ${item["summary_messages"]} " ${EMPTY}
${item_owner_output}= RW.CLI.Run Cli
... cmd=echo "${item_owner.stdout}" | sed 's/ *$//' | tr -d '\n'
... env=${env}
... include_in_history=False
${messages}= Replace String ${item["summary_messages"]} " ${EMPTY}
IF len($item_owner_output.stdout) > 0
IF len($item_owner_output.stdout) > 0 and ($item_owner_output.stdout) != "No resource found"
${owner_kind} ${owner_name}= Split String ${item_owner_output.stdout} ${SPACE}
${owner_name}= Replace String ${owner_name} \n ${EMPTY}
ELSE
${owner_kind} ${owner_name}= Set Variable ""
${owner_kind}= Set Variable "Unknown"
${owner_name}= Set Variable "Unknown"
END
${item_next_steps}= RW.CLI.Run Bash File
... bash_file=workload_next_steps.sh
Expand Down Expand Up @@ -148,11 +149,16 @@ Troubleshoot Pending Pods In Namespace `${NAMESPACE}`
... env=${env}
... secret_file__kubeconfig=${kubeconfig}
... include_in_history=False
IF len($item_owner.stdout) > 0
${owner_kind} ${owner_name}= Split String ${item_owner.stdout} ${SPACE}
${item_owner_output}= RW.CLI.Run Cli
... cmd=echo "${item_owner.stdout}" | sed 's/ *$//' | tr -d '\n'
... env=${env}
... include_in_history=False
IF len($item_owner_output.stdout) > 0 and ($item_owner_output.stdout) != "No resource found"
${owner_kind} ${owner_name}= Split String ${item_owner_output.stdout} ${SPACE}
${owner_name}= Replace String ${owner_name} \n ${EMPTY}
ELSE
${owner_kind} ${owner_name}= Set Variable ""
${owner_kind}= Set Variable "Unknown"
${owner_name}= Set Variable "Unknown"
END
${item_next_steps}= RW.CLI.Run Bash File
... bash_file=workload_next_steps.sh
Expand Down Expand Up @@ -211,11 +217,16 @@ Troubleshoot Failed Pods In Namespace `${NAMESPACE}`
... env=${env}
... secret_file__kubeconfig=${kubeconfig}
... include_in_history=False
IF len($item_owner.stdout) > 0
${owner_kind} ${owner_name}= Split String ${item_owner.stdout} ${SPACE}
${item_owner_output}= RW.CLI.Run Cli
... cmd=echo "${item_owner.stdout}" | sed 's/ *$//' | tr -d '\n'
... env=${env}
... include_in_history=False
IF len($item_owner_output.stdout) > 0 and ($item_owner_output.stdout) != "No resource found"
${owner_kind} ${owner_name}= Split String ${item_owner_output.stdout} ${SPACE}
${owner_name}= Replace String ${owner_name} \n ${EMPTY}
ELSE
${owner_kind} ${owner_name}= Set Variable ""
${owner_kind}= Set Variable "Unknown"
${owner_name}= Set Variable "Unknown"
END
${item_next_steps}= RW.CLI.Run Bash File
... bash_file=workload_next_steps.sh
Expand Down Expand Up @@ -288,7 +299,8 @@ Troubleshoot Workload Status Conditions In Namespace `${NAMESPACE}`
${owner_kind} ${owner_name}= Split String ${item_owner_output.stdout} ${SPACE}
${owner_name}= Replace String ${owner_name} \n ${EMPTY}
ELSE
${owner_kind} ${owner_name}= Set Variable ""
${owner_kind}= Set Variable "Unknown"
${owner_name}= Set Variable "Unknown"
END
${item_next_steps}= RW.CLI.Run Bash File
... bash_file=workload_next_steps.sh
Expand Down Expand Up @@ -319,6 +331,7 @@ Get Listing Of Resources In Namespace `${NAMESPACE}`
... env=${env}
... secret_file__kubeconfig=${kubeconfig}
... render_in_commandlist=true
... timeout_seconds=180
${history}= RW.CLI.Pop Shell History
RW.Core.Add Pre To Report Informational Get All for Namespace: ${NAMESPACE}
RW.Core.Add Pre To Report ${all_results.stdout}
Expand Down Expand Up @@ -351,11 +364,16 @@ Check Event Anomalies in Namespace `${NAMESPACE}`
... secret_file__kubeconfig=${kubeconfig}
... include_in_history=False
${messages}= Replace String ${item["summary_messages"]} " ${EMPTY}
IF len($item_owner.stdout) > 0
${owner_kind} ${owner_name}= Split String ${item_owner.stdout} ${SPACE}
${item_owner_output}= RW.CLI.Run Cli
... cmd=echo "${item_owner.stdout}" | sed 's/ *$//' | tr -d '\n'
... env=${env}
... include_in_history=False
IF len($item_owner_output.stdout) > 0 and ($item_owner_output.stdout) != "No resource found"
${owner_kind} ${owner_name}= Split String ${item_owner_output.stdout} ${SPACE}
${owner_name}= Replace String ${owner_name} \n ${EMPTY}
ELSE
${owner_kind} ${owner_name}= Set Variable ""
${owner_kind}= Set Variable "Unknown"
${owner_name}= Set Variable "Unknown"
END
${item_next_steps}= RW.CLI.Run Bash File
... bash_file=anomaly_next_steps.sh
Expand Down
8 changes: 4 additions & 4 deletions codebundles/k8s-namespace-healthcheck/workload_next_steps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ if [[ $messages =~ "Misconfiguration" ]]; then
next_steps+=("Check for Node Failures or Maintenance Activities in Cluster \`$CONTEXT\`")
fi

if [[ $messages =~ "Liveness probe failed" ]]; then
next_steps+=("Check Liveliness Probe Configuration for $owner_kind \`$owner_name\`")
if [[ $messages =~ "Liveness probe failed" || $messages =~ "Liveness probe errored" ]]; then
next_steps+=("Check Liveliness Probe Configuration for Deployment \`${DEPLOYMENT_NAME}\`")
fi

if [[ $messages =~ "Readiness probe errored" ]]; then
next_steps+=("Check Readiness Probe Configuration for $owner_kind \`$owner_name\`")
if [[ $messages =~ "Readiness probe errored" || $messages =~ "Readiness probe failed" ]]; then
next_steps+=("Check Readiness Probe Configuration for Deployment \`${DEPLOYMENT_NAME}\`")
fi

if [[ $messages =~ "PodFailed" ]]; then
Expand Down
71 changes: 54 additions & 17 deletions libraries/RW/CLI/CLI.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,11 @@ def execute_command(
"""
if not service:
return execute_local_command(
cmd=cmd, request_secrets=request_secrets, env=env, files=files, timeout_seconds=timeout_seconds
cmd=cmd,
request_secrets=request_secrets,
env=env,
files=files,
timeout_seconds=timeout_seconds,
)
else:
return platform.execute_shell_command(
Expand All @@ -87,21 +91,26 @@ def _create_kubernetes_remote_exec(
"""**DEPRECATED**"""
# if no specific workload name but labels provided, fetch the first running pod with labels
if not workload_name and labels:
request_secrets: [platform.ShellServiceRequestSecret] = [] if len(kwargs.keys()) > 0 else None
request_secrets: [platform.ShellServiceRequestSecret] = (
[] if len(kwargs.keys()) > 0 else None
)
request_secrets = _create_secrets_from_kwargs(**kwargs)
pod_name_cmd = (
f"kubectl get pods --field-selector=status.phase==Running -l {labels}"
+ " -o jsonpath='{.items[0].metadata.name}'"
+ f" -n {namespace} --context={context}"
)
rsp = execute_command(cmd=pod_name_cmd, service=target_service, request_secrets=request_secrets, env=env)
rsp = execute_command(
cmd=pod_name_cmd,
service=target_service,
request_secrets=request_secrets,
env=env,
)
SHELL_HISTORY.append(pod_name_cmd)
cli_utils.verify_rsp(rsp)
workload_name = rsp.stdout
# use eval so that env variables are evaluated in the subprocess
cmd_template: str = (
f"eval $(echo \"kubectl exec -n {namespace} --context={context} {workload_name} -- /bin/bash -c '{cmd}'\")"
)
cmd_template: str = f"eval $(echo \"kubectl exec -n {namespace} --context={context} {workload_name} -- /bin/bash -c '{cmd}'\")"
cmd = cmd_template
logger.info(f"Templated remote exec: {cmd}")
return cmd
Expand All @@ -115,17 +124,23 @@ def _create_secrets_from_kwargs(**kwargs) -> list[platform.ShellServiceRequestSe
"""
global SECRET_PREFIX
global SECRET_FILE_PREFIX
request_secrets: list[platform.ShellServiceRequestSecret] = [] if len(kwargs.keys()) > 0 else None
request_secrets: list[platform.ShellServiceRequestSecret] = (
[] if len(kwargs.keys()) > 0 else None
)
for key, value in kwargs.items():
if not key.startswith(SECRET_PREFIX) and not key.startswith(SECRET_FILE_PREFIX):
continue
if not isinstance(value, platform.Secret):
logger.warning(f"kwarg secret {value} in key {key} is the wrong type, should be platform.Secret")
logger.warning(
f"kwarg secret {value} in key {key} is the wrong type, should be platform.Secret"
)
continue
if key.startswith(SECRET_PREFIX):
request_secrets.append(platform.ShellServiceRequestSecret(value))
elif key.startswith(SECRET_FILE_PREFIX):
request_secrets.append(platform.ShellServiceRequestSecret(value, as_file=True))
request_secrets.append(
platform.ShellServiceRequestSecret(value, as_file=True)
)
return request_secrets


Expand Down Expand Up @@ -167,17 +182,31 @@ def run_bash_file(
path, _ = rw_path_to_robot.split(pattern)
new_path = os.path.join("/collection", path)
# Modify the bash_file to point to the new directory
local_bash_file = f"./{bash_file}"
bash_file = os.path.join(new_path, bash_file)
if os.path.exists(bash_file):
logger.info(f"File '{bash_file}' found at derived path: {new_path}.")
cmd_overide = f"{bash_file}"
logger.info(
f"File '{bash_file}' found at derived path: {new_path}."
)
if cmd_overide:
cmd_overide = cmd_overide.replace(
f"{local_bash_file}", f"{bash_file}"
)
else:
cmd_overide = f"{bash_file}"
break
else:
logger.warning(f"File '{bash_file}' not found at derived path: {new_path}.")
logger.warning(
f"File '{bash_file}' not found at derived path: {new_path}."
)
else:
logger.warning("Current directory is root, but 'RW_PATH_TO_ROBOT' is not set.")
logger.warning(
"Current directory is root, but 'RW_PATH_TO_ROBOT' is not set."
)
else:
logger.warning(f"File '{bash_file}' not found in the current directory and current directory is not root.")
logger.warning(
f"File '{bash_file}' not found in the current directory and current directory is not root."
)

if not cmd_overide:
cmd_overide = f"./{bash_file}"
Expand Down Expand Up @@ -244,7 +273,9 @@ def run_cli(
global SHELL_HISTORY
looped_results = []
rsp = None
logger.info(f"Requesting command: {cmd} with service: {target_service} - None indicates run local")
logger.info(
f"Requesting command: {cmd} with service: {target_service} - None indicates run local"
)
if run_in_workload_with_labels or run_in_workload_with_name:
cmd = _create_kubernetes_remote_exec(
cmd=cmd,
Expand All @@ -256,7 +287,9 @@ def run_cli(
context=optional_context,
**kwargs,
)
request_secrets: [platform.ShellServiceRequestSecret] = [] if len(kwargs.keys()) > 0 else None
request_secrets: [platform.ShellServiceRequestSecret] = (
[] if len(kwargs.keys()) > 0 else None
)
logger.info(f"Received kwargs: {kwargs}")
request_secrets = _create_secrets_from_kwargs(**kwargs)
if loop_with_items and len(loop_with_items) > 0:
Expand Down Expand Up @@ -288,7 +321,11 @@ def run_cli(
)
else:
rsp = execute_command(
cmd=cmd, service=target_service, request_secrets=request_secrets, env=env, timeout_seconds=timeout_seconds
cmd=cmd,
service=target_service,
request_secrets=request_secrets,
env=env,
timeout_seconds=timeout_seconds,
)
if include_in_history:
SHELL_HISTORY.append(cmd)
Expand Down

0 comments on commit 959f493

Please sign in to comment.