From dd0272414969cbcac7fcc73cfd1853a6a3cd0498 Mon Sep 17 00:00:00 2001
From: Shea Stewart <shea.stewart@runwhen.com>
Date: Mon, 26 Feb 2024 16:16:09 -0500
Subject: [PATCH] Updates/rs and owner (#340)

* replica health script

* integrate task

* fix runwhen flag

* add util to find related resources for certain label / annotation patterns

* update default error codes

* switch to a script for future improvements

* add additional related objects

* simplify issue titles
---
 .../runbook.robot                             |  24 ++-
 .../check_replicaset.sh                       | 146 ++++++++++++++++++
 .../event_anomalies.sh                        |  51 ++++++
 .../k8s-deployment-healthcheck/runbook.robot  |  69 ++++++++-
 .../k8s-jaeger-http-query/runbook.robot       |   2 +-
 libraries/RW/K8sHelper/__init__.py            |   1 +
 libraries/RW/K8sHelper/k8s_helper.py          |  40 +++++
 7 files changed, 309 insertions(+), 24 deletions(-)
 create mode 100755 codebundles/k8s-deployment-healthcheck/check_replicaset.sh
 create mode 100755 codebundles/k8s-deployment-healthcheck/event_anomalies.sh
 create mode 100644 libraries/RW/K8sHelper/__init__.py
 create mode 100644 libraries/RW/K8sHelper/k8s_helper.py

diff --git a/codebundles/curl-gmp-nginx-ingress-inspection/runbook.robot b/codebundles/curl-gmp-nginx-ingress-inspection/runbook.robot
index 5f042cb6..28ff08b3 100644
--- a/codebundles/curl-gmp-nginx-ingress-inspection/runbook.robot
+++ b/codebundles/curl-gmp-nginx-ingress-inspection/runbook.robot
@@ -8,6 +8,7 @@ Metadata            Supports    GCP,GMP,Ingress,Nginx,Metrics
 Library             BuiltIn
 Library             RW.Core
 Library             RW.CLI
+Library             RW.K8sHelper
 Library             RW.platform
 Library             OperatingSystem
 
@@ -42,6 +43,12 @@ Fetch Nginx HTTP Errors From GMP for Ingress `${INGRESS_OBJECT_NAME}`
     ${owner_name}=    RW.CLI.Run Cli
     ...    cmd=echo "${k8s_ingress_details.stdout}" | grep 'Owner:[^ ]*' | awk -F': ' '{print $2}' |awk -F':' '{print $2}'| sed 's/ *$//' | tr -d '\n'
     ...    include_in_history=false
+   ${k8s_ingress_details}=    RW.CLI.Run Cli
+    ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get ingress ${INGRESS_OBJECT_NAME} -n ${NAMESPACE} --context ${CONTEXT} -o json
+    ...    env=${env}
+    ...    secret_file__kubeconfig=${kubeconfig}
+    ${related_resource_recommendations}=    RW.K8sHelper.Get Related Resource Recommendations
+    ...    k8s_object=${k8s_ingress_details.stdout}
     RW.CLI.Parse Cli Output By Line
     ...    rsp=${gmp_rsp}
     ...    set_severity_level=2
@@ -49,7 +56,7 @@ Fetch Nginx HTTP Errors From GMP for Ingress `${INGRESS_OBJECT_NAME}`
     ...    set_issue_actual=We found the following HTTP error codes: ${ERROR_CODES} associated with the ingress in $_line
     ...    set_issue_title=Detected HTTP Error Codes for Ingress `${INGRESS_OBJECT_NAME}`
     ...    set_issue_details=HTTP error codes in ingress and service "$_line". Troubleshoot the application associated with ${owner_kind.stdout} `${owner_name.stdout}`
-    ...    set_issue_next_steps=Check Deployment Log For Issues with `${owner_name.stdout}`\nQuery Traces for HTTP Errors in Namespace `${NAMESPACE}`
+    ...    set_issue_next_steps=Check Deployment Log For Issues with `${owner_name.stdout}`\nQuery Traces for HTTP Errors in Namespace `${NAMESPACE}`\n${related_resource_recommendations}
     ...    _line__raise_issue_if_contains=Host
     ${ingress_info}=    Set Variable    ${gmp_rsp.stdout}
     IF    """${ingress_info}""" == "" or """${ingress_info}""".isspace()
@@ -75,7 +82,6 @@ Find Owner and Service Health for Ingress `${INGRESS_OBJECT_NAME}`
     RW.Core.Add Pre To Report    Commands Used: ${history}
     RW.Core.Add Pre To Report    Ingress Info:\n${k8s_ingress_details.stdout}
 
-
 *** Keywords ***
 Suite Initialization
     ${kubeconfig}=    RW.Core.Import Secret
@@ -84,10 +90,6 @@ Suite Initialization
     ...    description=The kubernetes kubeconfig yaml containing connection configuration used to connect to cluster(s).
     ...    pattern=\w*
     ...    example=For examples, start here https://kubernetes.io/docs/concepts/configuration/organize-cluster-access-kubeconfig/
-    ${kubectl}=    RW.Core.Import Service    kubectl
-    ...    description=The location service used to interpret shell commands.
-    ...    default=kubectl-service.shared
-    ...    example=kubectl-service.shared
     ${KUBERNETES_DISTRIBUTION_BINARY}=    RW.Core.Import User Variable    KUBERNETES_DISTRIBUTION_BINARY
     ...    type=string
     ...    description=Which binary to use for Kubernetes CLI commands.
@@ -105,12 +107,6 @@ Suite Initialization
     ...    pattern=\w*
     ...    example=otel-demo
     ...    default=
-    ${GCLOUD_SERVICE}=    RW.Core.Import Service    gcloud
-    ...    type=string
-    ...    description=The selected RunWhen Service to use for accessing services within a network.
-    ...    pattern=\w*
-    ...    example=gcloud-service.shared
-    ...    default=gcloud-service.shared
     ${gcp_credentials_json}=    RW.Core.Import Secret    gcp_credentials_json
     ...    type=string
     ...    description=GCP service account json used to authenticate with GCP APIs.
@@ -151,15 +147,13 @@ Suite Initialization
     ...    description=Which http status codes to look for and classify as errors.
     ...    pattern=\w*
     ...    example=500
-    ...    default=500|501|502
+    ...    default=500|501|502|503|504
     ${OS_PATH}=    Get Environment Variable    PATH
     Set Suite Variable    ${kubeconfig}    ${kubeconfig}
-    Set Suite Variable    ${kubectl}    ${kubectl}
     Set Suite Variable    ${KUBERNETES_DISTRIBUTION_BINARY}    ${KUBERNETES_DISTRIBUTION_BINARY}
     Set Suite Variable    ${CONTEXT}    ${CONTEXT}
     Set Suite Variable    ${NAMESPACE}    ${NAMESPACE}
     Set Suite Variable    ${ERROR_CODES}    ${ERROR_CODES}
-    Set Suite Variable    ${GCLOUD_SERVICE}    ${GCLOUD_SERVICE}
     Set Suite Variable    ${gcp_credentials_json}    ${gcp_credentials_json}
     Set Suite Variable    ${GCP_PROJECT_ID}    ${GCP_PROJECT_ID}
     Set Suite Variable    ${INGRESS_HOST}    ${INGRESS_HOST}
diff --git a/codebundles/k8s-deployment-healthcheck/check_replicaset.sh b/codebundles/k8s-deployment-healthcheck/check_replicaset.sh
new file mode 100755
index 00000000..7d9a38a4
--- /dev/null
+++ b/codebundles/k8s-deployment-healthcheck/check_replicaset.sh
@@ -0,0 +1,146 @@
+#!/bin/bash
+
+# Kubernetes Deployment ReplicaSet Management Script
+# This script checks Kubernetes deployments to ensure they are running the latest ReplicaSet. It is designed to manage
+# ReplicaSets during normal operations and rolling updates, checking for multiple ReplicaSets, verifying the active latest ReplicaSet, and providing actionable insights for any inactive or conflicting ReplicaSets.
+
+# Function to check for rolling update status
+check_rolling_update_status() {
+    # Extract conditions and replica counts
+    local progressingCondition=$(echo "$DEPLOYMENT_JSON" | jq '.status.conditions[] | select(.type=="Progressing")')
+    local availableCondition=$(echo "$DEPLOYMENT_JSON" | jq '.status.conditions[] | select(.type=="Available").status')
+    local replicas=$(echo "$DEPLOYMENT_JSON" | jq '.status.replicas // 0')
+    local updatedReplicas=$(echo "$DEPLOYMENT_JSON" | jq '.status.updatedReplicas // 0')
+    local availableReplicas=$(echo "$DEPLOYMENT_JSON" | jq '.status.availableReplicas // 0')
+    local readyReplicas=$(echo "$DEPLOYMENT_JSON" | jq '.status.readyReplicas // 0')
+
+    # Interpret 'Progressing' condition more accurately
+    local progressingStatus=$(echo "$progressingCondition" | jq -r '.status')
+    local progressingReason=$(echo "$progressingCondition" | jq -r '.reason')
+    local lastUpdateTime=$(echo "$progressingCondition" | jq -r '.lastUpdateTime')
+
+    # Current time in UTC for comparison (assuming 'date' command is available and system timezone is correctly set)
+    local currentTime=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
+
+    # Compare replica counts for a more accurate ongoing rollout check
+    if [[ "$progressingStatus" == "True" && "$progressingReason" == "NewReplicaSetAvailable" && "$updatedReplicas" == "$replicas" && "$availableReplicas" == "$updatedReplicas" && "$readyReplicas" == "$updatedReplicas" ]]; then
+        # Check how recent the last update was to consider a buffer for stabilization
+        if [[ $(date -d "$lastUpdateTime" +%s) -lt $(date -d "$currentTime" +%s --date='-2 minutes') ]]; then
+            echo "Deployment $DEPLOYMENT_NAME is stable. No active rollout detected."
+            ROLLING_UPDATE_STATUS=1 # Indicates no update is in progress
+        else
+            echo "Deployment $DEPLOYMENT_NAME has recently updated and may still be stabilizing."
+            ROLLING_UPDATE_STATUS=0 # Indicates recent update, considering stabilization
+        fi
+    elif [[ "$updatedReplicas" -lt "$replicas" ]] || [[ "$availableReplicas" -lt "$updatedReplicas" ]] || [[ "$readyReplicas" -lt "$updatedReplicas" ]]; then
+        echo "Deployment $DEPLOYMENT_NAME is undergoing a rollout."
+        ROLLING_UPDATE_STATUS=0 # Indicates an update is in progress
+    else
+        echo "Deployment $DEPLOYMENT_NAME is stable. No active rollout detected."
+        ROLLING_UPDATE_STATUS=1 # Indicates no update is in progress
+    fi
+}
+
+
+
+verify_pods_association_with_latest_rs() {
+    # Fetch all pods associated with the deployment
+    PODS_JSON=$(${KUBERNETES_DISTRIBUTION_BINARY} get pods -n $NAMESPACE --context $CONTEXT --selector=app=$DEPLOYMENT_NAME --context $CONTEXT -o json)
+    PODS_COUNT=$(echo "$PODS_JSON" | jq '.items | length')
+    OUTDATED_PODS_COUNT=0
+
+    for ((i=0; i<PODS_COUNT; i++)); do
+        POD_RS=$(echo "$PODS_JSON" | jq -r ".items[$i].metadata.ownerReferences[] | select(.kind == \"ReplicaSet\") | .name")
+        if [[ "$POD_RS" != "$LATEST_RS" ]]; then
+            OUTDATED_PODS_COUNT=$((OUTDATED_PODS_COUNT + 1))
+        fi
+    done
+
+    if [[ "$OUTDATED_PODS_COUNT" -eq 0 ]]; then
+        echo "All pods are correctly associated with the latest ReplicaSet."
+    else
+        echo "Warning: $OUTDATED_PODS_COUNT pod(s) are not associated with the latest ReplicaSet."
+        issue_details="{\"severity\":\"2\",\"title\":\"$OUTDATED_PODS_COUNT pod(s) are not running the latest version of Deployment \`$DEPLOYMENT_NAME\` in namespace \`${NAMESPACE}\`\",\"next_steps\":\"Clean up stale ReplicaSet \`$RS\` for Deployment \`$DEPLOYMENT_NAME\` in namespace \`${NAMESPACE}\` \",\"details\":\"$RS_DETAILS\"}"
+    fi
+}
+
+# Get Deployment JSON
+DEPLOYMENT_JSON=$(${KUBERNETES_DISTRIBUTION_BINARY} get deployment $DEPLOYMENT_NAME -n $NAMESPACE --context $CONTEXT -o json)
+
+# Get the deployment's latest ReplicaSet
+REPLICASETS_JSON=$(${KUBERNETES_DISTRIBUTION_BINARY} get rs -n $NAMESPACE --context $CONTEXT -o json | jq --arg DEPLOYMENT_NAME "$DEPLOYMENT_NAME" \
+    '[.items[] | select(.metadata.ownerReferences[]? | select(.kind == "Deployment" and .name == $DEPLOYMENT_NAME))]')
+
+# Extract the name of the latest ReplicaSet from the filtered JSON
+LATEST_RS=$(echo "$REPLICASETS_JSON" | jq -r 'sort_by(.metadata.creationTimestamp) | last(.[]).metadata.name')
+
+# Extract names of all ReplicaSets associated with the Deployment from the filtered JSON
+ALL_RS=$(echo "$REPLICASETS_JSON" | jq -r '.[].metadata.name' | tr '\n' ' ')
+readarray -t ALL_RS_NAMES < <(echo "$REPLICASETS_JSON" | jq -r '.[].metadata.name')
+
+echo "Latest ReplicaSet: $LATEST_RS"
+echo "All ReplicaSets for the deployment: $ALL_RS"
+
+ROLLING_UPDATE_STATUS=-1 # Default to -1; will be set to 0 or 1 by check_rolling_update_status
+check_rolling_update_status
+
+# Check if there are multiple ReplicaSets and if the latest is active
+if [[ $(echo $ALL_RS | tr ' ' '\n' | wc -l) -gt 1 ]]; then
+    echo "Multiple ReplicaSets detected. Verifying..."
+
+    # Loop through all ReplicaSets
+    for RS in $ALL_RS; do
+        # Skip the latest ReplicaSet
+        if [[ "$RS" == "$LATEST_RS" ]]; then
+            continue
+        fi
+
+        # Check the status of older ReplicaSets (replicas, availableReplicas, readyReplicas)
+        RS_DETAILS_JSON=$(echo "$REPLICASETS_JSON" | jq --arg RS "$RS" '.[] | select(.metadata.name==$RS)')
+        REPLICAS=$(echo "$RS_DETAILS_JSON" | jq '.status.replicas')
+        if [[ "$REPLICAS" == "0" ]]; then
+            echo "ReplicaSet $RS for Deployment $DEPLOYMENT_NAME is not active. Consider for cleanup..."
+        else
+            if [[ $ROLLING_UPDATE_STATUS -eq 0 ]]; then
+                date
+                echo "Multiple ReplicaSets are active, which is expected due to the rolling update process."
+                issue_details="{\"severity\":\"4\",\"title\":\"A rolling update is in progress for Deployment \`$DEPLOYMENT_NAME\` in namespace \`${NAMESPACE}\`\",\"next_steps\":\"Wait for Rollout to Complete and Check Again.\",\"details\":\"$RS_DETAILS\"}"
+                
+            elif [[ $ROLLING_UPDATE_STATUS -eq 1 ]]; then
+                echo "Multiple ReplicaSets are active and no update appears to be in place. Investigation may be required to ensure they are not conflicting."
+                verify_pods_association_with_latest_rs
+                issue_details="{\"severity\":\"2\",\"title\":\"Conflicting versions detected for Deployment \`$DEPLOYMENT_NAME\` in namespace \`${NAMESPACE}\`\",\"next_steps\":\"Clean up stale ReplicaSet \`$RS\` for Deployment \`$DEPLOYMENT_NAME\` in namespace \`${NAMESPACE}\` \",\"details\":\"$RS_DETAILS_JSON\"}"
+            else
+                echo "Multiple ReplicaSets are active and no update appears to be in place. Investigation may be required to ensure they are not conflicting."
+            fi
+        fi
+    
+        # Initialize issues as an empty array if not already set
+        if [ -z "$issues" ]; then
+            issues="[]"
+        fi
+
+        # Concatenate issue detail to the string
+        if [ -n "$issue_details" ]; then
+            # Remove the closing bracket from issues to prepare for adding a new item
+            issues="${issues%]}"
+
+            # If issues is not an empty array (more than just "["), add a comma before the new item
+            if [ "$issues" != "[" ]; then
+                issues="$issues,"
+            fi
+
+            # Add the new issue detail and close the array
+            issues="$issues $issue_details]"
+        fi
+    done
+else
+    echo "Only one ReplicaSet is active. Deployment is up to date."
+fi
+
+
+# Display all unique recommendations that can be shown as Next Steps
+if [ -n "$issues" ]; then
+    echo -e "\nRecommended Next Steps: \n"
+    echo "$issues"
+fi
\ No newline at end of file
diff --git a/codebundles/k8s-deployment-healthcheck/event_anomalies.sh b/codebundles/k8s-deployment-healthcheck/event_anomalies.sh
new file mode 100755
index 00000000..8b33133d
--- /dev/null
+++ b/codebundles/k8s-deployment-healthcheck/event_anomalies.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Assuming environment variables are already exported and available
+
+# Command to get Kubernetes events in JSON format
+EVENTS_JSON=$(${KUBERNETES_DISTRIBUTION_BINARY} get events --context ${CONTEXT} -n ${NAMESPACE} -o json)
+
+# Use jq to process the JSON, skipping events without valid timestamps
+PROCESSED_EVENTS=$(echo "${EVENTS_JSON}" | jq --arg DEPLOYMENT_NAME "${DEPLOYMENT_NAME}" '
+  [ .items[]
+    | select(
+        .type != "Warning"
+        and (.involvedObject.kind | test("Deployment|ReplicaSet|Pod"))
+        and (.involvedObject.name | contains($DEPLOYMENT_NAME))
+        and (.firstTimestamp | fromdateiso8601? // empty) and (.lastTimestamp | fromdateiso8601? // empty)
+      )
+    | {
+        kind: .involvedObject.kind,
+        count: .count,
+        name: .involvedObject.name,
+        reason: .reason,
+        message: .message,
+        firstTimestamp: .firstTimestamp,
+        lastTimestamp: .lastTimestamp,
+        duration: (
+          if (((.lastTimestamp | fromdateiso8601) - (.firstTimestamp | fromdateiso8601)) == 0)
+          then 1
+          else (((.lastTimestamp | fromdateiso8601) - (.firstTimestamp | fromdateiso8601)) / 60)
+          end
+        )
+      }
+  ]
+  | group_by([.kind, .name])
+  | map({
+      kind: .[0].kind,
+      name: .[0].name,
+      count: (map(.count) | add),
+      reasons: (map(.reason) | unique),
+      messages: (map(.message) | unique),
+      average_events_per_minute: (
+        if .[0].duration == 1
+        then 1
+        else ((map(.count) | add) / .[0].duration)
+        end
+      ),
+      firstTimestamp: (map(.firstTimestamp | fromdateiso8601) | sort | .[0] | todateiso8601),
+      lastTimestamp: (map(.lastTimestamp | fromdateiso8601) | sort | reverse | .[0] | todateiso8601)
+    })
+')
+
+echo "${PROCESSED_EVENTS}"
diff --git a/codebundles/k8s-deployment-healthcheck/runbook.robot b/codebundles/k8s-deployment-healthcheck/runbook.robot
index f316e091..6f3dbb99 100644
--- a/codebundles/k8s-deployment-healthcheck/runbook.robot
+++ b/codebundles/k8s-deployment-healthcheck/runbook.robot
@@ -9,6 +9,7 @@ Library             RW.Core
 Library             RW.CLI
 Library             RW.platform
 Library             RW.NextSteps
+Library             RW.K8sHelper
 Library             OperatingSystem
 Library             String
 
@@ -145,6 +146,12 @@ Troubleshoot Deployment Warning Events for `${DEPLOYMENT_NAME}`
     ...    secret_file__kubeconfig=${kubeconfig}
     ...    show_in_rwl_cheatsheet=true
     ...    render_in_commandlist=true
+   ${k8s_deployment_details}=    RW.CLI.Run Cli
+    ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get deployment ${DEPLOYMENT_NAME} -n ${NAMESPACE} --context ${CONTEXT} -o json
+    ...    env=${env}
+    ...    secret_file__kubeconfig=${kubeconfig}
+    ${related_resource_recommendations}=    RW.K8sHelper.Get Related Resource Recommendations
+    ...    k8s_object=${k8s_deployment_details.stdout}
     ${object_list}=    Evaluate    json.loads(r'''${events.stdout}''')    json
     IF    len(@{object_list}) > 0
         FOR    ${item}    IN    @{object_list}
@@ -163,7 +170,7 @@ Troubleshoot Deployment Warning Events for `${DEPLOYMENT_NAME}`
             ...    title= Deployment `${DEPLOYMENT_NAME}` generated warning events for ${item["kind"]} `${item["name"]}`.
             ...    reproduce_hint=View Commands Used in Report Output
             ...    details=${item["kind"]} `${item["name"]}` generated the following warning details:\n`${item}`
-            ...    next_steps=${item_next_steps.stdout}
+            ...    next_steps=${item_next_steps.stdout}\n${related_resource_recommendations}
         END
     END
     ${history}=    RW.CLI.Pop Shell History
@@ -216,7 +223,7 @@ Troubleshoot Deployment Replicas for `${DEPLOYMENT_NAME}`
         ...    severity=1
         ...    expected=Deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}` should have minimum availability / pod.
         ...    actual=Deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}` does not have minimum availability / pods.
-        ...    title= Deployment `${DEPLOYMENT_NAME}` has status: ${deployment_status["available_condition"]["message"]}
+        ...    title= Deployment `${DEPLOYMENT_NAME}` is unavailable. Status: `${deployment_status["available_condition"]["message"]}`
         ...    reproduce_hint=View Commands Used in Report Output
         ...    details=Deployment `${DEPLOYMENT_NAME}` has ${deployment_status["ready_replicas"]} pods and needs ${deployment_status["desired_replicas"]}:\n`${deployment_status}`
         ...    next_steps=${item_next_steps.stdout}
@@ -225,7 +232,7 @@ Troubleshoot Deployment Replicas for `${DEPLOYMENT_NAME}`
         ...    severity=3
         ...    expected=Deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}` should have ${deployment_status["desired_replicas"]} pods.
         ...    actual=Deployment `${DEPLOYMENT_NAME}` in namespace `${NAMESPACE}` has ${deployment_status["ready_replicas"]} pods.
-        ...    title= Deployment `${DEPLOYMENT_NAME}` has ${deployment_status["unavailable_replicas"]} unavailable pods.
+        ...    title= Deployment `${DEPLOYMENT_NAME}` has ${deployment_status["unavailable_replicas"]} pods that are not running.
         ...    reproduce_hint=View Commands Used in Report Output
         ...    details=Deployment `${DEPLOYMENT_NAME}` has minimum availability, but has unready pods:\n`${deployment_status}`
         ...    next_steps=Troubleshoot Deployment Warning Events for `${DEPLOYMENT_NAME}`
@@ -256,12 +263,18 @@ Check Deployment Event Anomalies for `${DEPLOYMENT_NAME}`
     ...    occurences
     ...    connection error
     ...    ${DEPLOYMENT_NAME}
-    ${recent_anomalies}=    RW.CLI.Run Cli
-    ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get events --context ${CONTEXT} -n ${NAMESPACE} -o json | jq '(now - (60*60)) as $time_limit | [ .items[] | select(.type != "Warning" and (.involvedObject.kind == "Deployment" or .involvedObject.kind == "ReplicaSet" or .involvedObject.kind == "Pod") and (.involvedObject.name | tostring | contains("${DEPLOYMENT_NAME}"))) | {kind: .involvedObject.kind, count: .count, name: .involvedObject.name, reason: .reason, message: .message, firstTimestamp: .firstTimestamp, lastTimestamp: .lastTimestamp, duration: (if (((.lastTimestamp | fromdateiso8601) - (.firstTimestamp | fromdateiso8601)) == 0) then 1 else (((.lastTimestamp | fromdateiso8601) - (.firstTimestamp | fromdateiso8601))/60) end) } ] | group_by([.kind, .name]) | map({kind: .[0].kind, name: .[0].name, count: (map(.count) | add), reasons: map(.reason) | unique, messages: map(.message) | unique, average_events_per_minute: (if .[0].duration == 1 then 1 else ((map(.count) | add)/.[0].duration ) end),firstTimestamp: map(.firstTimestamp | fromdateiso8601) | sort | .[0] | todateiso8601, lastTimestamp: map(.lastTimestamp | fromdateiso8601) | sort | reverse | .[0] | todateiso8601})'
+    ${recent_anomalies}=    RW.CLI.Run Bash File
+    ...    bash_file=event_anomalies.sh 
     ...    env=${env}
     ...    secret_file__kubeconfig=${kubeconfig}
+    ...    include_in_history=false
     ...    show_in_rwl_cheatsheet=true
-    ...    render_in_commandlist=true
+   ${k8s_deployment_details}=    RW.CLI.Run Cli
+    ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} get deployment ${DEPLOYMENT_NAME} -n ${NAMESPACE} --context ${CONTEXT} -o json
+    ...    env=${env}
+    ...    secret_file__kubeconfig=${kubeconfig}
+    ${related_resource_recommendations}=    RW.K8sHelper.Get Related Resource Recommendations
+    ...    k8s_object=${k8s_deployment_details.stdout}
     ${anomaly_list}=    Evaluate    json.loads(r'''${recent_anomalies.stdout}''')    json
     IF    len($anomaly_list) > 0
         FOR    ${item}    IN    @{anomaly_list}
@@ -279,7 +292,7 @@ Check Deployment Event Anomalies for `${DEPLOYMENT_NAME}`
                 ...    title= ${item["kind"]} `${item["name"]}` has an average of ${item["average_events_per_minute"]} events per minute (above the threshold of ${ANOMALY_THRESHOLD})
                 ...    reproduce_hint=View Commands Used in Report Output
                 ...    details=${item["kind"]} `${item["name"]}` has ${item["count"]} normal events that should be reviewed:\n`${item}`
-                ...    next_steps=${item_next_steps.stdout}
+                ...    next_steps=${item_next_steps.stdout}\n${related_resource_recommendations}
             END
         END
         ${anomalies_report_output}=    Set Variable    ${recent_anomalies.stdout}
@@ -291,6 +304,46 @@ Check Deployment Event Anomalies for `${DEPLOYMENT_NAME}`
     RW.Core.Add To Report    ${anomalies_report_output}\n
     RW.Core.Add Pre To Report    Commands Used:\n${history}
 
+Check ReplicaSet Health for Deployment `${DEPLOYMENT_NAME}`
+    [Documentation]    Fetches all replicasets related to deployment to ensure that conflicting versions don't exist. 
+    [Tags]
+    ...    replica
+    ...    replicaset
+    ...    versions
+    ...    container
+    ...    pods
+    ...    deployment
+    ...    ${DEPLOYMENT_NAME}
+    ${check_replicaset}=    RW.CLI.Run Bash File
+    ...    bash_file=check_replicaset.sh 
+    ...    cmd_override=./check_replicaset.sh | tee "${SCRIPT_TMP_DIR}/rs_analysis"
+    ...    env=${env}
+    ...    secret_file__kubeconfig=${kubeconfig}
+    ...    timeout_seconds=180
+    ...    include_in_history=false
+    ...    show_in_rwl_cheatsheet=true
+    ${recommendations}=    RW.CLI.Run Cli
+    ...    cmd=awk "/Recommended Next Steps:/ {start=1; getline} start" "${SCRIPT_TMP_DIR}/rs_analysis"
+    ...    env=${env}
+    ...    include_in_history=false
+    IF    $recommendations.stdout != ""
+        ${recommendation_list}=    Evaluate    json.loads(r'''${recommendations.stdout}''')    json
+        IF    len(@{recommendation_list}) > 0
+            FOR    ${item}    IN    @{recommendation_list}
+                RW.Core.Add Issue
+                ...    severity=${item["severity"]}
+                ...    expected=Deployment `${DEPLOYMENT_NAME}` should only have one active replicaset in namespace `${NAMESPACE}`
+                ...    actual=Deployment `${DEPLOYMENT_NAME}` has more than one active replicaset in namespace `${NAMESPACE}`
+                ...    title=${item["title"]}
+                ...    reproduce_hint=${check_replicaset.cmd}
+                ...    details=${item["details"]}
+                ...    next_steps=${item["next_steps"]}
+            END
+        END
+    END
+    RW.Core.Add Pre To Report    ${check_replicaset.stdout}\n
+    ${history}=    RW.CLI.Pop Shell History
+    RW.Core.Add Pre To Report    Commands Used: ${history}
 
 *** Keywords ***
 Suite Initialization
@@ -339,7 +392,7 @@ Suite Initialization
     ...    description=Pattern used to exclude entries from log results when searching in log results.
     ...    pattern=\w*
     ...    example=(node_modules|opentelemetry)
-    ...    default=(node_modules|opentelemetry)
+    ...    default=("")
     ${KUBERNETES_DISTRIBUTION_BINARY}=    RW.Core.Import User Variable    KUBERNETES_DISTRIBUTION_BINARY
     ...    type=string
     ...    description=Which binary to use for Kubernetes CLI commands.
diff --git a/codebundles/k8s-jaeger-http-query/runbook.robot b/codebundles/k8s-jaeger-http-query/runbook.robot
index 23970327..95344588 100644
--- a/codebundles/k8s-jaeger-http-query/runbook.robot
+++ b/codebundles/k8s-jaeger-http-query/runbook.robot
@@ -22,7 +22,7 @@ Query Traces in Jaeger for Unhealthy HTTP Response Codes in Namespace `${NAMESPA
     ...    secret_file__kubeconfig=${kubeconfig}
     ...    timeout_seconds=180
     ...    include_in_history=false
-    ...    render_in_commandlist=true
+    ...    show_in_rwl_cheatsheet=true
     ${recommendations}=    RW.CLI.Run Cli
     ...    cmd=echo '${http_traces.stdout}' | awk '/Recommended Next Steps:/ {flag=1; next} flag'
     ...    env=${env}
diff --git a/libraries/RW/K8sHelper/__init__.py b/libraries/RW/K8sHelper/__init__.py
new file mode 100644
index 00000000..fac3a82e
--- /dev/null
+++ b/libraries/RW/K8sHelper/__init__.py
@@ -0,0 +1 @@
+from .k8s_helper import *
diff --git a/libraries/RW/K8sHelper/k8s_helper.py b/libraries/RW/K8sHelper/k8s_helper.py
new file mode 100644
index 00000000..f7d93706
--- /dev/null
+++ b/libraries/RW/K8sHelper/k8s_helper.py
@@ -0,0 +1,40 @@
+import json
+
+def get_related_resource_recommendations(k8s_object):
+    """
+    Parse a Kubernetes object JSON for specific annotations or labels and return recommendations.
+
+    Args:
+    obj_json (dict): The Kubernetes object JSON.
+
+    Returns:
+    str: Recommendations based on the object's annotations or labels.
+    """
+    # Convert the string representation of the JSON to a Python dictionary
+    try:
+        obj_json = json.loads(k8s_object)
+    except json.JSONDecodeError as e:
+        return f"Error decoding JSON: {e}"
+
+    recommendations = "No recommendations available."
+
+    # Check for specific labels or annotations in the object JSON
+    labels = obj_json.get("metadata", {}).get("labels", {})
+    annotations = obj_json.get("metadata", {}).get("annotations", {})
+
+    # Checking for an ArgoCD label
+    if 'argocd.argoproj.io/instance' in labels:
+        app_name = labels['argocd.argoproj.io/instance'].split('_')[0]
+        recommendations = f"Troubleshoot ArgoCD Application `{app_name.capitalize()}`"
+
+    # Check for Flux Resources
+    if 'helm.toolkit.fluxcd.io/name' in labels:
+        fluxcd_helm_name = labels['helm.toolkit.fluxcd.io/name']
+        fluxcd_helm_namespace = labels['helm.toolkit.fluxcd.io/namespace']
+        recommendations = f"Troubleshoot `{fluxcd_helm_name}` Helm Release Health in Namespace `{fluxcd_helm_namespace}`"
+
+    # Extend this function to check for other specific labels or annotations as needed
+
+    return recommendations
+
+