Skip to content

Commit

Permalink
updates resource task (#382)
Browse files Browse the repository at this point in the history
* switch order of add to report in case there are no issues and empty json loads dails

* add previous pods logs to log scan

* rewrite cpu resource recommendations

* add config provided to template

* change title

* change title

* remove ticks from resource change recommendation
  • Loading branch information
stewartshea authored Jun 1, 2024
1 parent b7d0bb7 commit e953c4a
Show file tree
Hide file tree
Showing 9 changed files with 291 additions and 221 deletions.
2 changes: 2 additions & 0 deletions codebundles/gcp-bucket-health/check_security.sh
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ done
echo "Security Issues:"
if [ ${#ISSUES[@]} -eq 0 ]; then
echo "No security issues found."
# Add empty json list to file so that json loads doesn't fail.
echo "[{}]" > $HOME/bucket_security_issues.json
else
echo "${ISSUES[@]}" | jq -s . > $HOME/bucket_security_issues.json
cat $HOME/bucket_security_issues.json
Expand Down
5 changes: 3 additions & 2 deletions codebundles/gcp-bucket-health/runbook.robot
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ Check GCP Bucket Security Configuration for `${PROJECT_IDS}`
... env=${env}
... secret_file__gcp_credentials_json=${gcp_credentials_json}
... show_in_rwl_cheatsheet=true
RW.Core.Add Pre To Report GCP Security Configuration Check:\n${bucket_security_configuration.stdout}
RW.Core.Add Pre To Report Commands Used:\n${bucket_security_configuration.cmd}

${bucket_security_output}= RW.CLI.Run Cli
... cmd=cat $HOME/bucket_security_issues.json | jq .
... env=${env}
Expand All @@ -82,8 +85,6 @@ Check GCP Bucket Security Configuration for `${PROJECT_IDS}`
... next_steps=Review IAM configuration for GCP storage bucket `${item["bucket"]}` in project `${item["project"]}`
END
END
RW.Core.Add Pre To Report GCP Security Configuration Check:\n${bucket_security_configuration.stdout}
RW.Core.Add Pre To Report Commands Used:\n${bucket_security_configuration.cmd}

*** Keywords ***
Suite Initialization
Expand Down
57 changes: 33 additions & 24 deletions codebundles/k8s-deployment-healthcheck/deployment_logs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -70,34 +70,43 @@ if [ -z "$SELECTOR" ]; then
exit 1
fi

fetch_logs() {
local POD=$1
local CONTAINER=$2
local PREVIOUS_FLAG=$3

if [ -n "$LOGS_ERROR_PATTERN" ] && [ -n "$LOGS_EXCLUDE_PATTERN" ]; then
# Both error and exclusion patterns provided
LOGS=$($KUBERNETES_DISTRIBUTION_BINARY logs $POD -c $CONTAINER $PREVIOUS_FLAG --limit-bytes=256000 --since=3h --context=$CONTEXT -n $NAMESPACE | grep -Ei "$LOGS_ERROR_PATTERN" | grep -Eiv "$LOGS_EXCLUDE_PATTERN")
elif [ -n "$LOGS_ERROR_PATTERN" ]; then
# Only error pattern provided
LOGS=$($KUBERNETES_DISTRIBUTION_BINARY logs $POD -c $CONTAINER $PREVIOUS_FLAG --limit-bytes=256000 --since=3h --context=$CONTEXT -n $NAMESPACE | grep -Ei "$LOGS_ERROR_PATTERN")
elif [ -n "$LOGS_EXCLUDE_PATTERN" ]; then
# Only exclusion pattern provided
LOGS=$($KUBERNETES_DISTRIBUTION_BINARY logs $POD -c $CONTAINER $PREVIOUS_FLAG --limit-bytes=256000 --since=3h --context=$CONTEXT -n $NAMESPACE | grep -Eiv "$LOGS_EXCLUDE_PATTERN")
else
# Neither pattern provided
LOGS=$($KUBERNETES_DISTRIBUTION_BINARY logs $POD -c $CONTAINER $PREVIOUS_FLAG --limit-bytes=256000 --since=3h --context=$CONTEXT -n $NAMESPACE)
fi

# Check log format and store appropriately
FIRST_LINE=$(echo "$LOGS" | head -n 1)
EXT=$(echo "$FIRST_LINE" | jq -e . &>/dev/null && echo "json" || echo "txt")
FILENAME="${POD}_${CONTAINER}_logs${PREVIOUS_FLAG:+_previous}.$EXT"
LOG_FILES+=("$FILENAME")
echo "Fetching logs for Pod: $POD, Container: $CONTAINER. Saving to $FILENAME."
echo "$LOGS" > $FILENAME
}

# Iterate through the pods based on the selector and fetch logs
LOG_FILES=()
while read POD; do
CONTAINERS=$(${KUBERNETES_DISTRIBUTION_BINARY} get pod $POD -n ${NAMESPACE} --context=${CONTEXT} -o=jsonpath='{range .spec.containers[*]}{.name}{"\n"}{end}')
CONTAINERS=$($KUBERNETES_DISTRIBUTION_BINARY get pod $POD -n $NAMESPACE --context=$CONTEXT -o=jsonpath='{range .spec.containers[*]}{.name}{"\n"}{end}')
for CONTAINER in $CONTAINERS; do
if [ -n "$LOGS_ERROR_PATTERN" ] && [ -n "$LOGS_EXCLUDE_PATTERN" ]; then
# Both error and exclusion patterns provided
LOGS=$(${KUBERNETES_DISTRIBUTION_BINARY} logs $POD -c $CONTAINER --limit-bytes=256000 --since=3h --context=${CONTEXT} -n ${NAMESPACE} | grep -Ei "${LOGS_ERROR_PATTERN}" | grep -Eiv "${LOGS_EXCLUDE_PATTERN}")
elif [ -n "$LOGS_ERROR_PATTERN" ]; then
# Only error pattern provided
LOGS=$(${KUBERNETES_DISTRIBUTION_BINARY} logs $POD -c $CONTAINER --limit-bytes=256000 --since=3h --context=${CONTEXT} -n ${NAMESPACE} | grep -Ei "${LOGS_ERROR_PATTERN}")
elif [ -n "$LOGS_EXCLUDE_PATTERN" ]; then
# Only exclusion pattern provided
LOGS=$(${KUBERNETES_DISTRIBUTION_BINARY} logs $POD -c $CONTAINER --limit-bytes=256000 --since=3h --context=${CONTEXT} -n ${NAMESPACE} | grep -Eiv "${LOGS_EXCLUDE_PATTERN}")
else
# Neither pattern provided
LOGS=$(${KUBERNETES_DISTRIBUTION_BINARY} logs $POD -c $CONTAINER --limit-bytes=256000 --since=3h --context=${CONTEXT} -n ${NAMESPACE})
fi

# Check log format and store appropriately
FIRST_LINE=$(echo "$LOGS" | head -n 1)
EXT=$(echo "$FIRST_LINE" | jq -e . &>/dev/null && echo "json" || echo "txt")
FILENAME="${POD}_${CONTAINER}_logs.$EXT"
LOG_FILES+=("$FILENAME")
echo "Fetching logs for Pod: $POD, Container: $CONTAINER. Saving to $FILENAME."
echo "$LOGS" > $FILENAME
fetch_logs $POD $CONTAINER ""
fetch_logs $POD $CONTAINER "-p"
done
done < <(${KUBERNETES_DISTRIBUTION_BINARY} get pods --selector=$SELECTOR -n ${NAMESPACE} --context=${CONTEXT} -o=jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}')
done < <($KUBERNETES_DISTRIBUTION_BINARY get pods --selector=$SELECTOR -n $NAMESPACE --context=$CONTEXT -o=jsonpath='{range .items[*]}{.metadata.name}{"\n"}{end}')


# Initialize an issue description array
issue_descriptions=()
Expand Down
1 change: 0 additions & 1 deletion codebundles/k8s-deployment-healthcheck/runbook.robot
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ Check Deployment Log For Issues with `${DEPLOYMENT_NAME}`
... Recent logs from Deployment ${DEPLOYMENT_NAME} in Namespace ${NAMESPACE}:\n\n${logs.stdout}
RW.Core.Add Pre To Report Commands Used: ${history}

# Fetch Previous Logs for Deployment `${DEPLOYMENT_NAME}`

Check Liveness Probe Configuration for Deployment `${DEPLOYMENT_NAME}`
[Documentation] Validates if a Liveliness probe has possible misconfigurations
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ spec:
value: {{context}}
- name: LABELS
value: ''
- name: UTILIZATION_THRESHOLD
value: 95
- name: DEFAULT_INCREASE
value: 25
secretsProvided:
- name: kubeconfig
workspaceKey: {{custom.kubeconfig_secret_name}}
59 changes: 59 additions & 0 deletions codebundles/k8s-podresources-health/find_resource_owners.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/bin/bash
# set -eo pipefail

# -----------------------------------------------------------------------------
# Script Information and Metadata
# -----------------------------------------------------------------------------
# Author: @stewartshea
# Description: This script is designed to take in some information about a
# resource (typically a pod) and return it's owner.
# NOTES:
# Not sure if this is best served as a bash script or keyword
# This is quickly added and likely requires further expansion
# Not sure if it makes sense to keep this as a shared script, or
# packaged multiple times with each codebundle depending on cases
# -----------------------------------------------------------------------------

# Define the kind of resource, name (or part of it), namespace, and context
RESOURCE_KIND="$1"
RESOURCE_NAME="$2"
NAMESPACE="$3"
CONTEXT="$4"

# Command to get the Kubernetes distribution binary, for example, kubectl
KUBERNETES_DISTRIBUTION_BINARY="kubectl"

# Function to get the owner of a resource
get_owner() {
local resource_name=$1
local resource_kind=$2
owner_kind=$(${KUBERNETES_DISTRIBUTION_BINARY} get $resource_kind $resource_name -n "${NAMESPACE}" --context="${CONTEXT}" -o=jsonpath="{.metadata.ownerReferences[0].kind}")
if [ "$owner_kind" = "" ]; then
# No owner reference means there is no parent object. Return the direct object.
echo "$resource_kind $resource_name"
elif [ "$owner_kind" = "ReplicaSet" ]; then
replicaset=$(${KUBERNETES_DISTRIBUTION_BINARY} get $resource_kind $resource_name -n "${NAMESPACE}" --context="${CONTEXT}" -o=jsonpath="{.metadata.ownerReferences[0].name}")
deployment_name=$(${KUBERNETES_DISTRIBUTION_BINARY} get replicaset $replicaset -n "${NAMESPACE}" --context="${CONTEXT}" -o=jsonpath="{.metadata.ownerReferences[0].name}")
echo "Deployment $deployment_name"
else
owner_info=$(${KUBERNETES_DISTRIBUTION_BINARY} get $resource_kind $resource_name -n "${NAMESPACE}" --context="${CONTEXT}" -o=jsonpath="{.metadata.ownerReferences[0].name}")
echo "$owner_kind $owner_info"
fi
}


resources=$(${KUBERNETES_DISTRIBUTION_BINARY} get $RESOURCE_KIND -n "${NAMESPACE}" --context="${CONTEXT}" | grep "${RESOURCE_NAME}" | awk '{print $1}' || true)

if [ -z "$resources" ]; then
echo "No resource found"
else
while read resource_name; do
if [ ! -z "$resource_name" ]; then
owner=$(get_owner "$resource_name" "$RESOURCE_KIND")
if [ -n "$owner" ]; then
echo "$owner" | tr -d '\n'
exit 0
fi
fi
done <<< $resources
fi
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#!/bin/bash

# Ensure KUBERNETES_DISTRIBUTION_BINARY, CONTEXT, NAMESPACE, UTILIZATION_THRESHOLD, and DEFAULT_INCREASE are set
if [[ -z "${KUBERNETES_DISTRIBUTION_BINARY}" || -z "${CONTEXT}" || -z "${NAMESPACE}" || -z "${UTILIZATION_THRESHOLD}" || -z "${DEFAULT_INCREASE}" ]]; then
echo "KUBERNETES_DISTRIBUTION_BINARY, CONTEXT, NAMESPACE, UTILIZATION_THRESHOLD, and DEFAULT_INCREASE environment variables must be set."
exit 1
fi

# Function to check if a pod has OOMKilled status or exit code 137
check_pod_status() {
local pod_name=$1

pod_status=$(${KUBERNETES_DISTRIBUTION_BINARY} get pod "$pod_name" -n "${NAMESPACE}" --context ${CONTEXT} -o jsonpath='{.status.containerStatuses[*].state.terminated.reason}')
exit_code=$(${KUBERNETES_DISTRIBUTION_BINARY} get pod "$pod_name" -n "${NAMESPACE}" --context ${CONTEXT} -o jsonpath='{.status.containerStatuses[*].state.terminated.exitCode}')

if [[ $pod_status == *"OOMKilled"* || $exit_code -eq 137 ]]; then
echo true
else
echo false
fi
}

# Initialize an empty array to store overutilized pods
overutilized_pods=()

# Get the list of pods and their resource usage in the specified namespace
pods=$(${KUBERNETES_DISTRIBUTION_BINARY} top pod -n "${NAMESPACE}" --context ${CONTEXT} --no-headers | awk '{print $1}')

# Loop through each pod
for pod in $pods; do
# Get pod resource limits
echo "---"
echo "Processing Pod $pod"
cpu_limit=$(${KUBERNETES_DISTRIBUTION_BINARY} get pod "$pod" -n "${NAMESPACE}" --context ${CONTEXT} -o jsonpath='{.spec.containers[*].resources.limits.cpu}')
mem_limit=$(${KUBERNETES_DISTRIBUTION_BINARY} get pod "$pod" -n "${NAMESPACE}" --context ${CONTEXT} -o jsonpath='{.spec.containers[*].resources.limits.memory}')

# Convert memory limit to Mi
if [[ $mem_limit == *Gi ]]; then
mem_limit=$(echo "$mem_limit" | sed 's/Gi//' | awk '{printf "%.0f", $1 * 1024}')
elif [[ $mem_limit == *Mi ]]; then
mem_limit=$(echo "$mem_limit" | sed 's/Mi//')
fi

# Convert CPU limit to millicores
if [[ $cpu_limit == *m ]]; then
cpu_limit=$(echo "$cpu_limit" | sed 's/m//')
else
cpu_limit=$(echo "$cpu_limit" | awk '{printf "%.0f", $1 * 1000}')
fi

# Handle cases where limits are not set (0 or empty)
cpu_limit=${cpu_limit:-0}
mem_limit=${mem_limit:-0}

# Get pod current resource usage
cpu_usage=$(${KUBERNETES_DISTRIBUTION_BINARY} top pod "$pod" -n "${NAMESPACE}" --context ${CONTEXT} --no-headers | awk '{print $2}' | sed 's/m//')
mem_usage=$(${KUBERNETES_DISTRIBUTION_BINARY} top pod "$pod" -n "${NAMESPACE}" --context ${CONTEXT} --no-headers | awk '{print $3}' | sed 's/Mi//')
echo "CPU Limit: $cpu_limit (m)"
echo "CPU Usage: $cpu_usage (m)"
echo "Memory Limit: $mem_limit (Mi)"
echo "Memory Usage: $mem_usage (Mi)"

# Calculate threshold values
if [[ $cpu_limit -ne 0 ]]; then
cpu_threshold=$(awk "BEGIN {printf \"%.0f\", $cpu_limit * $UTILIZATION_THRESHOLD / 100}")
else
cpu_threshold=0
fi

if [[ $mem_limit -ne 0 ]]; then
mem_threshold=$(awk "BEGIN {printf \"%.0f\", $mem_limit * $UTILIZATION_THRESHOLD / 100}")
else
mem_threshold=0
fi

echo "CPU Threshold: $cpu_threshold (m)"
echo "Memory Threshold: $mem_threshold (Mi)"

# Check if the pod is overutilized
reason=""
if [[ $cpu_limit -ne 0 && $cpu_usage -gt $cpu_threshold ]]; then
reason="CPU usage exceeds threshold"
fi
if [[ $mem_limit -ne 0 && $mem_usage -gt $mem_threshold ]]; then
if [[ -n $reason ]]; then
reason="$reason and memory usage exceeds threshold"
else
reason="Memory usage exceeds threshold"
fi
fi

if [[ -n $reason ]]; then
recommended_cpu_increase=$(awk "BEGIN {printf \"%.0f\", $cpu_limit * (1 + $DEFAULT_INCREASE / 100)}")
recommended_mem_increase=$(awk "BEGIN {printf \"%.0f\", $mem_limit * (1 + $DEFAULT_INCREASE / 100)}")
overutilized_pods+=("{\"namespace\":\"${NAMESPACE}\", \"pod\":\"$pod\", \"reason\":\"$reason\", \"cpu_usage\":\"$cpu_usage\", \"mem_usage\":\"$mem_usage\", \"cpu_limit\":\"$cpu_limit\", \"mem_limit\":\"$mem_limit\", \"cpu_threshold\":\"$cpu_threshold\", \"mem_threshold\":\"$mem_threshold\", \"recommended_cpu_increase\":\"$recommended_cpu_increase (m)\", \"recommended_mem_increase\":\"$recommended_mem_increase (Mi)\"}")
fi

# Check if the pod has an exit code of 137 or OOMKilled status
if [[ $(check_pod_status "$pod") == "true" ]]; then
overutilized_pods+=("{\"namespace\":\"${NAMESPACE}\", \"pod\":\"$pod\", \"reason\":\"OOMKilled or exit code 137\", \"cpu_usage\":\"$cpu_usage\", \"mem_usage\":\"$mem_usage\", \"cpu_limit\":\"$cpu_limit\", \"mem_limit\":\"$mem_limit\"}")
fi
done

# Convert the array to JSON format
json_output=$(printf '%s\n' "${overutilized_pods[@]}" | jq -s '.')

# Write the JSON output to a file
output_file="$HOME/overutilized_pods.json"
if [[ ${#overutilized_pods[@]} -eq 0 ]]; then
echo "[]" > "$output_file"
else
echo "$json_output" > "$output_file"
fi

# Print the JSON output
echo "$json_output"
Loading

0 comments on commit e953c4a

Please sign in to comment.