Skip to content

Commit

Permalink
CASMTRIAGE-7657 adjust k8s_verify_cluster_2 test to avoid some false (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
jacobsalmela authored Jan 10, 2025
1 parent 7c81bbb commit 88c4ea9
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 10 deletions.
74 changes: 74 additions & 0 deletions goss-testing/scripts/verify_kube_system_pods.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/bin/bash
#
# MIT License
#
# (C) Copyright 2022-2025 Hewlett Packard Enterprise Development LP
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
set -euo pipefail

DEBUG=${DEBUG:=false}

# get an array of all non-running pods in the kube-system namespace
while IFS='' read -r line; do non_running_pods+=("$line"); done < <(kubectl get po -n kube-system --no-headers \
| awk '{ print $1" "$3 }' \
| grep -Ev ' (Running|Completed)$' \
| grep -Ev '-etcdbackup-.* (ContainerCreating|Init:[0-9]+/[0-9]+|NotReady|Pending|PodInitializing|Terminating)$' \
| awk '{ print $1 }')


for pod_name in "${non_running_pods[@]}"; do
if [ "${DEBUG}" == "true" ]; then echo "checking non-running pod: $pod_name";fi
# get the label name of the current pod
label_name=$(kubectl get -n kube-system po "$pod_name" -o jsonpath='{.metadata.labels.app\.kubernetes\.io/name}')
if [ "${DEBUG}" == "true" ]; then echo "checking for other pods with the label: $label_name";fi
# check if there are any other pods with the same label name since they may be newer and have a different status
while IFS='' read -r line; do same_pods+=("$line"); done < <(kubectl get pods -l app.kubernetes.io/name="$label_name" -n kube-system -o json | jq -r '.items[].metadata.name')
most_recent_pod=""
most_recent_time=0
# for each pod with the same label name, check if it is running or completed
for same in "${same_pods[@]}"; do
status=$(kubectl get pod -n kube-system "$same" -o json | jq -r '.status.phase')
if [ "${DEBUG}" == "true" ]; then echo " found: $same $status";fi
# if there is no error, get the start time of the pod
if [ "$status" != "Error" ]; then
start_time=$(kubectl get pod -n kube-system "$same" -o json | jq -r '.status.startTime')
# Calculate the start time in seconds since epoch
start_time_seconds=$(date -u -d "$start_time" +%s)
# if the start time is greater than the most recent time, update the most recent time and pod
if [ "$start_time_seconds" -gt "$most_recent_time" ]; then
if [ "${DEBUG}" == "true" ]; then echo " $same start_time is greater than most_recent_time: $start_time_seconds > $most_recent_time";fi
most_recent_time=$start_time_seconds
most_recent_pod=$same
fi
fi
done
# if there is a most recent pod and it is not the current pod, go to the next non_running_pod
# this is considered a success and prevents false positives since the newer pod has successfully started
if [ -n "$most_recent_pod" ] && [ "$most_recent_pod" != "$pod_name" ]; then
if [ "${DEBUG}" == "true" ]; then echo " a more recent pod is not in a fail state: $pod_name";fi
continue
else
# print the pod that is in a poor state to fail the test
echo "$pod_name is not running or completed" >&2
fi
done

exit 0
18 changes: 8 additions & 10 deletions goss-testing/tests/ncn/goss-k8s-verify-cluster.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# MIT License
#
# (C) Copyright 2014-2022, 2024 Hewlett Packard Enterprise Development LP
# (C) Copyright 2014-2025 Hewlett Packard Enterprise Development LP
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
Expand All @@ -21,9 +21,10 @@
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#

{{ $kubectl := .Vars.kubectl }}
{{ $logrun := .Env.GOSS_BASE | printf "%s/scripts/log_run.sh" }}
{{ $verify_kube_system_pods := .Env.GOSS_BASE | printf "%s/scripts/verify_kube_system_pods.sh" }}
command:
{{ $testlabel_1 := "k8s_verify_cluster_1" }}
{{$testlabel_1}}:
Expand Down Expand Up @@ -56,16 +57,13 @@ command:
# Look for pods that are not Running or Completed
# However, we do allow etcdbackup job pods to be in other "normal" lifecycle states
exec: |-
"{{$logrun}}" -l "{{$testlabel_2}}" \
"{{$kubectl}}" get po -n kube-system --no-headers \
| awk '{ print $1" "$3 }' \
| grep -Ev ' (Running|Completed)$' \
| grep -Ev '-etcdbackup-.* (ContainerCreating|Init:[0-9]+/[0-9]+|NotReady|Pending|PodInitializing|Terminating)$'
# We expect no output and for the grep command to return non-0
"{{$logrun}}" -l "{{$testlabel_2}}" "{{$verify_kube_system_pods}}"
# We expect no output when all pods are in a good state
# or if a failed pod has a newer one that succeeded
stdout:
- "!/./"
stderr:
- "!/./"
exit-status:
gt: 0
# the script should return 0 as it checks all pods and the errors will be checked via the stdout/stderr
exit-status: 0
timeout: 20000

0 comments on commit 88c4ea9

Please sign in to comment.