diff --git a/codebundles/k8s-chaos-namespace/oomkill_pod.sh b/codebundles/k8s-chaos-namespace/oomkill_pod.sh index 0a2419f3..d7812c6e 100755 --- a/codebundles/k8s-chaos-namespace/oomkill_pod.sh +++ b/codebundles/k8s-chaos-namespace/oomkill_pod.sh @@ -13,10 +13,10 @@ for pod_name in $POD_NAMES; do # Roll a 50/50 chance if (( RANDOM % 2 == 0 )); then echo "Creating background process to OOMkill pod $pod_name by applying pressure to $MEMORY_PRESSURE_AMOUNT of memory..." - kubectl exec --context $CONTEXT -n $NAMESPACE $pod_name -- /bin/sh -c 'for i in 1 2 3 4 5; do (while :; do dd if=/dev/zero of=/dev/null bs=10485760 count=100 & done) & done' + kubectl exec --context $CONTEXT -n $NAMESPACE $pod_name -- /bin/sh -c 'for i in 1 2 3 4 5; do (while :; do dd if=/dev/zero of=/dev/null bs=2147483648 count=100 & done) & done' echo "Checking on pod..." sleep 3 - pod_state=$(kubectl --context $CONTEXT describe $pod_name -n $NAMESPACE) + kubectl --context $CONTEXT describe $pod_name -n $NAMESPACE # Increment the killed count ((killed_count++)) fi diff --git a/codebundles/k8s-chaos-namespace/runbook.robot b/codebundles/k8s-chaos-namespace/runbook.robot index 37a44262..da65e3c7 100644 --- a/codebundles/k8s-chaos-namespace/runbook.robot +++ b/codebundles/k8s-chaos-namespace/runbook.robot @@ -16,7 +16,7 @@ Library Process Suite Setup Suite Initialization *** Tasks *** -Test Namespace ${NAMESPACE} Highly Available +Kill Random Pods In Namespace `${NAMESPACE}` [Documentation] Randomly selects up to 10 pods in a namespace to delete to test HA [Tags] Kubernetes Namespace Deployments Pods Highly Available ${process}= RW.CLI.Run Bash File @@ -25,7 +25,7 @@ Test Namespace ${NAMESPACE} Highly Available ... secret_file__kubeconfig=${kubeconfig} RW.Core.Add Pre To Report ${process.stdout} -OOMKill Pods in Namespace ${NAMESPACE} +OOMKill Pods In Namespace `${NAMESPACE}` [Documentation] Randomly selects n number of pods to oomkill [Tags] Kubernetes Namespace Deployments Pods Highly Available OOMkill Memory ${process}= RW.CLI.Run Bash File @@ -45,7 +45,7 @@ OOMKill Pods in Namespace ${NAMESPACE} # ... secret_file__kubeconfig=${kubeconfig} # RW.Core.Add Pre To Report ${process.stdout} -Mangle Service Selector In Namespace ${NAMESPACE} +Mangle Service Selector In Namespace `${NAMESPACE}` [Documentation] Breaks a service's label selector to cause a network disruption [Tags] Kubernetes networking Services Selector ${process}= RW.CLI.Run Bash File change_service_selector.sh @@ -54,7 +54,7 @@ Mangle Service Selector In Namespace ${NAMESPACE} ... secret_file__kubeconfig=${kubeconfig} RW.Core.Add Pre To Report ${process.stdout} -Mangle Service Port In Namespace ${NAMESPACE} +Mangle Service Port In Namespace `${NAMESPACE}` [Documentation] Changes a service's port to cause a network disruption [Tags] Kubernetes networking Services Port ${process}= RW.CLI.Run Bash File change_service_port.sh @@ -63,7 +63,7 @@ Mangle Service Port In Namespace ${NAMESPACE} ... secret_file__kubeconfig=${kubeconfig} RW.Core.Add Pre To Report ${process.stdout} -Fill Pod Tmp In Namespace ${NAMESPACE} +Fill Random Pod Tmp Directory In Namespace `${NAMESPACE}` [Documentation] Attaches to a pod and fills the /tmp directory with random data [Tags] Kubernetes pods volumes tmp ${process}= RW.CLI.Run Bash File expand_tmp.sh diff --git a/codebundles/k8s-chaos-workload/README.md b/codebundles/k8s-chaos-workload/README.md new file mode 100644 index 00000000..15f30549 --- /dev/null +++ b/codebundles/k8s-chaos-workload/README.md @@ -0,0 +1,19 @@ +# Kubernetes Workload Chaos Engineering + +This codebundle provides chaos injection for a specific workload within a Kubernetes namespace. + +## Configuration +The TaskSet requires initialization to import necessary secrets, services, and user variables. The following variables should be set: + +- `KUBECONFIG`: The kubeconfig secret containing access info for the cluster. +- `CONTEXT`: The Kubernetes context to operate within. +- `NAMESPACE`: The name of the namespace to search. Leave it blank to search in all namespaces. +- `WORKLOAD_NAME`: The specific workload to inject chaos experiments into. Eg: deployment/my-app + + +## Requirements +- A kubeconfig with appropriate RBAC permissions to perform the desired command. + +## TODO +- [ ] Add additional documentation. + diff --git a/codebundles/k8s-chaos-workload/auth.sh b/codebundles/k8s-chaos-workload/auth.sh new file mode 100755 index 00000000..a195bf17 --- /dev/null +++ b/codebundles/k8s-chaos-workload/auth.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# Check if required kubectl environment variables set +if [[ -z $CONTEXT || -z $KUBECONFIG ]]; then + echo "Missing required environment variables for kubectl: CONTEXT, KUBECONFIG" + exit 1 +fi +if [[ -f $KUBECONFIG ]]; then + cat "$KUBECONFIG" > /tmp/kubeconfig +else + echo "$KUBECONFIG" > /tmp/kubeconfig +fi +export KUBECONFIG="/tmp/kubeconfig" +kubectl config set-context "$CONTEXT" > /dev/null \ No newline at end of file diff --git a/codebundles/k8s-chaos-workload/change_service_port.sh b/codebundles/k8s-chaos-workload/change_service_port.sh new file mode 100755 index 00000000..88843d26 --- /dev/null +++ b/codebundles/k8s-chaos-workload/change_service_port.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +# Environment Variables: +# NAMESPACE +# CONTEXT +# WORKLOAD_NAME + +CONFIGURED_PORT="9999" +service_to_mangle="" + +workload_selectors=$(kubectl get --context "$CONTEXT" -n "$NAMESPACE" "$WORKLOAD_NAME" -o jsonpath='{ .spec.template.metadata.labels }') +workload_selectors=$(echo $workload_selectors | jq -r 'to_entries | map("\(.key)=\(.value)") | join(",")') +if [[ -z $workload_selectors ]]; then + echo "No selectors found for workload $WORKLOAD_NAME, got '$workload_selectors'" + exit 1 +fi + +services=$(kubectl get --context "$CONTEXT" services -n "$NAMESPACE" -oname) +for service in $services; do + service_selectors=$(kubectl get --context "$CONTEXT" -n "$NAMESPACE" "$service" -o jsonpath='{ .spec.selector }') + service_selectors=$(echo $service_selectors | jq -r 'to_entries | map("\(.key)=\(.value)") | join(",")') + if [[ -z $service_selectors ]]; then + echo "No selectors found for service $service, got '$service_selectors'" + continue + fi + if [[ $workload_selectors == *"$service_selectors"* ]]; then + echo "Service $service selects workload $WORKLOAD_NAME with $service_selectors on $workload_selectors" + service_to_mangle=$service + fi +done + +if [[ -z $service_to_mangle ]]; then + echo "No service found selecting the workload: $WORKLOAD_NAME" + exit 1 +fi + +# Get the current port of the service +service_first_port=$(kubectl get --context $CONTEXT $service_to_mangle -n $NAMESPACE -o jsonpath='{.spec.ports[0]}') +port_name=$(echo "$service_first_port" | jq -r '.name') +port_protocol=$(echo "$service_first_port" | jq -r '.protocol') +port_target_port=$(echo "$service_first_port" | jq -r '.targetPort') +port_val=$(echo "$service_first_port" | jq -r '.port') +port_val=$(kubectl get --context $CONTEXT $service_to_mangle -n $NAMESPACE -o jsonpath='{.spec.ports[0].port}') +echo "Current port of service $service_to_mangle: $port_val" + +# Update the service's port to the configured value +kubectl patch --context $CONTEXT $service_to_mangle -n $NAMESPACE --type='json' -p '[{"op": "replace", "path": "/spec/ports/0", "value": {"name": "'$port_name'", "protocol": "'$port_protocol'", "targetPort": '$port_target_port', "port": '$CONFIGURED_PORT'}}]' + +echo "Service $service_to_mangle port changed to $CONFIGURED_PORT" + +# Echo all services with the configured port +echo "-----------------------------------" +echo "Current services with chaos port $CONFIGURED_PORT:" +kubectl get --context $CONTEXT services -n $NAMESPACE | grep "$CONFIGURED_PORT" +echo "-----------------------------------" diff --git a/codebundles/k8s-chaos-workload/change_service_selector.sh b/codebundles/k8s-chaos-workload/change_service_selector.sh new file mode 100755 index 00000000..2b66fd4e --- /dev/null +++ b/codebundles/k8s-chaos-workload/change_service_selector.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# Environment Variables: +# NAMESPACE +# CONTEXT +# WORKLOAD_NAME + +service_to_mangle="" +CHAOS_LABEL="chaos" + +workload_selectors=$(kubectl get --context "$CONTEXT" -n "$NAMESPACE" "$WORKLOAD_NAME" -o jsonpath='{ .spec.template.metadata.labels }') +workload_selectors=$(echo $workload_selectors | jq -r 'to_entries | map("\(.key)=\(.value)") | join(",")') +if [[ -z $workload_selectors ]]; then + echo "No selectors found for workload $WORKLOAD_NAME, got '$workload_selectors'" + exit 1 +fi + +services=$(kubectl get --context "$CONTEXT" services -n "$NAMESPACE" -oname) +for service in $services; do + service_selectors=$(kubectl get --context "$CONTEXT" -n "$NAMESPACE" "$service" -o jsonpath='{ .spec.selector }') + service_selectors=$(echo $service_selectors | jq -r 'to_entries | map("\(.key)=\(.value)") | join(",")') + if [[ -z $service_selectors ]]; then + echo "No selectors found for service $service, got '$service_selectors'" + continue + fi + if [[ $workload_selectors == *"$service_selectors"* ]]; then + echo "Service $service selects workload $WORKLOAD_NAME with $service_selectors on $workload_selectors" + service_to_mangle=$service + fi +done + +if [[ -z $service_to_mangle ]]; then + echo "No service found selecting the workload: $WORKLOAD_NAME" + exit 1 +fi + +# Get the current port of the service +SELECTOR=$(kubectl get --context $CONTEXT $service_to_mangle -n $NAMESPACE -o jsonpath='{.spec.selector}') +KEY=$(echo $SELECTOR | jq -r 'keys[0]') +VALUE=$(echo $SELECTOR | jq -r '.[keys[0]]') +echo "Current selector of service $service_to_mangle: $SELECTOR" +echo "Patching with chaos label..." +# Update the service's port to the configured value +kubectl patch --context $CONTEXT $service_to_mangle -n $NAMESPACE -p '{"spec":{"selector":{"'$KEY'":"'$CHAOS_LABEL'"}}}' + +# # Echo all services with the chaos label +echo "-----------------------------------" +echo "Current services with chaos selector $CHAOS_LABEL:" +SERVICE_JSON=$(kubectl get --context $CONTEXT services -n $NAMESPACE -ojson) +CHAOSED_SERVICES=$(echo "$SERVICE_JSON" | jq -r '.items[] | select(.spec.selector == {"'$KEY'":"'$CHAOS_LABEL'"}) | .metadata.name') +echo $CHAOSED_SERVICES +echo "-----------------------------------" diff --git a/codebundles/k8s-chaos-workload/expand_tmp.sh b/codebundles/k8s-chaos-workload/expand_tmp.sh new file mode 100755 index 00000000..97d3d83c --- /dev/null +++ b/codebundles/k8s-chaos-workload/expand_tmp.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# Environment Variables: +# NAMESPACE +# CONTEXT +# WORKLOAD_NAME + +selectors=$(kubectl get --context "$CONTEXT" -n "$NAMESPACE" "$WORKLOAD_NAME" -o jsonpath='{ .spec.selector.matchLabels }') +selectors=$(echo $selectors | jq -r 'to_entries | map("\(.key)=\(.value)") | join(",")') +echo "Fetching pods with label selector: $selectors" +pods=$(kubectl get --context "$CONTEXT" pods -n "$NAMESPACE" -l "$selectors" -o jsonpath='{.items[*].metadata.name}') + +# Find a random pod in the given namespace +pod=$(echo $pods | tr ' ' '\n' | shuf -n 1) + +echo "Expanding /tmp of pod $pod in namespace $NAMESPACE" + +# Exec into the pod and create a file at /tmp/chaos +kubectl exec --context $CONTEXT -n "$NAMESPACE" "$pod" -- touch /tmp/chaos + +# Fill the file with random data until it consumes all space in the container +kubectl exec --context $CONTEXT -n "$NAMESPACE" "$pod" -- sh -c "dd if=/dev/zero of=/tmp/chaos bs=1M count=1024" diff --git a/codebundles/k8s-chaos-workload/kill_workload_pod.sh b/codebundles/k8s-chaos-workload/kill_workload_pod.sh new file mode 100755 index 00000000..86df5583 --- /dev/null +++ b/codebundles/k8s-chaos-workload/kill_workload_pod.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +# Environment Variables +# NAMESPACE +# CONTEXT +# WORKLOAD_NAME + +selectors=$(kubectl get --context "$CONTEXT" -n "$NAMESPACE" "$WORKLOAD_NAME" -o jsonpath='{ .spec.selector.matchLabels }') +selectors=$(echo $selectors | jq -r 'to_entries | map("\(.key)=\(.value)") | join(",")') +echo "Fetching pods with label selector: $selectors" +pods=$(kubectl get --context "$CONTEXT" pods -n "$NAMESPACE" -l "$selectors" -o jsonpath='{.items[*].metadata.name}') + +MAX_DELETIONS=1 +echo "Killing a pod owned by "$WORKLOAD_NAME" in namespace $NAMESPACE" +deleted_count=0 +for pod_name in $pods; do + # Delete the pod + kubectl delete --context $CONTEXT pod $pod_name -n $NAMESPACE + # Increment the deleted count + ((deleted_count++)) + # Check if we have deleted 10 pods + if (( deleted_count >= MAX_DELETIONS )); then + break + fi +done + +echo "Deletions complete. Current Pod States:" +kubectl get --context $CONTEXT pods -n $NAMESPACE diff --git a/codebundles/k8s-chaos-workload/oomkill_workload_pod.sh b/codebundles/k8s-chaos-workload/oomkill_workload_pod.sh new file mode 100755 index 00000000..3a34730b --- /dev/null +++ b/codebundles/k8s-chaos-workload/oomkill_workload_pod.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# Environment Variables +# NAMESPACE +# CONTEXT +# WORKLOAD_NAME + +selectors=$(kubectl get --context "$CONTEXT" -n "$NAMESPACE" "$WORKLOAD_NAME" -o jsonpath='{ .spec.selector.matchLabels }') +selectors=$(echo "$selectors" | jq -r 'to_entries | map("\(.key)=\(.value)") | join(",")') +echo "Fetching pods with label selector: $selectors" +pods=$(kubectl get --context "$CONTEXT" pods -n "$NAMESPACE" -l "$selectors" --field-selector=status.phase=Running -o jsonpath='{.items[*].metadata.name}') + +MAX_KILL=1 +MEMORY_PRESSURE_AMOUNT=2147483648 # 2GB +echo "Starting $WORKLOAD_NAME random pod oomkill in namespace $NAMESPACE" +killed_count=0 +for pod_name in $pods; do + echo "Creating background process to OOMkill pod $pod_name by applying pressure to $MEMORY_PRESSURE_AMOUNT of memory..." + kubectl exec --context $CONTEXT -n $NAMESPACE $pod_name -- /bin/sh -c 'for i in 1 2 3 4 5; do (while :; do dd if=/dev/zero of=/dev/null bs=2147483648 count=100 & done) & done' + echo "Checking on pod..." + sleep 3 + kubectl --context $CONTEXT describe $pod_name -n $NAMESPACE + # Increment the killed count + ((killed_count++)) + # Check if we have deleted 10 pods + if (( killed_count >= MAX_KILL )); then + break + fi +done +echo "Current Pod States:" +kubectl get --context $CONTEXT pods -n $NAMESPACE diff --git a/codebundles/k8s-chaos-workload/runbook.robot b/codebundles/k8s-chaos-workload/runbook.robot new file mode 100644 index 00000000..02f53cf9 --- /dev/null +++ b/codebundles/k8s-chaos-workload/runbook.robot @@ -0,0 +1,92 @@ +*** Settings *** +Documentation Provides chaos injection tasks for specific workloads like your apps in a Kubernetes namespace. These are destructive tasks and the expectation is that you can heal these changes by enabling your GitOps reconciliation. +Metadata Author jon-funk +Metadata Display Name Kubernetes Workload Chaos Engineering +Metadata Supports Kubernetes Chaos Engineering Workload Application Deployments StatefulSet +Metadata Builder + +Library BuiltIn +Library RW.Core +Library RW.CLI +Library RW.platform +Library OperatingSystem +Library String +Library Process + +Suite Setup Suite Initialization + +*** Tasks *** +Test `${WORKLOAD_NAME}` High Availability + [Documentation] Kills a pod under this workload to test high availability. + [Tags] Kubernetes StatefulSet Deployments Pods Highly Available + ${process}= RW.CLI.Run Bash File + ... bash_file=kill_workload_pod.sh + ... env=${env} + ... secret_file__kubeconfig=${kubeconfig} + RW.Core.Add Pre To Report ${process.stdout} + +OOMKill `${WORKLOAD_NAME}` Pod + [Documentation] Kills the oldest pod running under the configured workload. + [Tags] Kubernetes StatefulSet Deployments Pods Highly Available OOMkill Memory + ${process}= RW.CLI.Run Bash File + ... bash_file=oomkill_workload_pod.sh + ... env=${env} + ... secret_file__kubeconfig=${kubeconfig} + RW.Core.Add Pre To Report ${process.stdout} + +Mangle Service Selector For `${WORKLOAD_NAME}` + [Documentation] Breaks a service's label selector to cause a network disruption + [Tags] Kubernetes networking Services Selector + ${process}= RW.CLI.Run Bash File + ... bash_file=change_service_selector.sh + ... env=${env} + ... secret_file__kubeconfig=${kubeconfig} + RW.Core.Add Pre To Report ${process.stdout} + +Mangle Service Port For `${WORKLOAD_NAME}` + [Documentation] Changes a service's port to cause a network disruption + [Tags] Kubernetes networking Services Port + ${process}= RW.CLI.Run Bash File + ... bash_file=change_service_port.sh + ... env=${env} + ... secret_file__kubeconfig=${kubeconfig} + RW.Core.Add Pre To Report ${process.stdout} + +Fill Tmp Directory Of Pod From `${WORKLOAD_NAME}` + [Documentation] Attaches to a pod and fills the /tmp directory with random data + [Tags] Kubernetes pods volumes tmp + ${process}= RW.CLI.Run Bash File expand_tmp.sh + ... cmd_override=./expand_tmp.sh + ... env=${env} + ... secret_file__kubeconfig=${kubeconfig} + RW.Core.Add Pre To Report ${process.stdout} + +*** Keywords *** +Suite Initialization + ${kubeconfig}= RW.Core.Import Secret kubeconfig + ... type=string + ... description=The kubeconfig secret to use for authenticating with the cluster. + ... pattern=\w* + ${CONTEXT}= RW.Core.Import User Variable CONTEXT + ... type=string + ... description=The kubernetes context to use in the kubeconfig provided. + ... pattern=\w* + ${NAMESPACE}= RW.Core.Import User Variable NAMESPACE + ... type=string + ... description=The namespace to target for scripts. + ... pattern=\w* + ${WORKLOAD_NAME}= RW.Core.Import User Variable WORKLOAD_NAME + ... type=string + ... description=The name of the workload to perform chaos testing on. Include the kind in the name, eg: deployment/my-app + ... pattern=\w* + + Set Suite Variable ${CONTEXT} ${CONTEXT} + Set Suite Variable ${WORKLOAD_NAME} ${WORKLOAD_NAME} + Set Suite Variable ${NAMESPACE} ${NAMESPACE} + Set Suite Variable ${kubeconfig} ${kubeconfig} + Set Suite Variable + ... &{env} + ... KUBECONFIG=${kubeconfig.key} + ... CONTEXT=${CONTEXT} + ... NAMESPACE=${NAMESPACE} + ... WORKLOAD_NAME=${WORKLOAD_NAME}