From 5ab13a58d17c66c6168448b1840e6c29c9cc75b2 Mon Sep 17 00:00:00 2001 From: Shea Stewart Date: Mon, 15 Jul 2024 13:48:59 -0400 Subject: [PATCH] Adjust template / match rules for tail-logs (#398) --- README.md | 1 + .../k8s-namespace-healthcheck.yaml | 1 + .../k8s-namespace-healthcheck-sli.yaml | 4 +-- .../k8s-namespace-healthcheck-taskset.yaml | 8 +---- .../k8s-namespace-healthcheck-workflow.yaml | 30 +++++++++++++++++++ .../k8s-namespace-healthcheck/runbook.robot | 7 ----- .../k8s-namespace-healthcheck/sli.robot | 4 +-- .../k8s-tail-logs-dynamic.yaml | 8 ++--- .../templates/k8s-tail-logs-dynamic-sli.yaml | 2 +- .../k8s-tail-logs-dynamic-taskset.yaml | 2 +- 10 files changed, 43 insertions(+), 24 deletions(-) create mode 100644 codebundles/k8s-namespace-healthcheck/.runwhen/templates/k8s-namespace-healthcheck-workflow.yaml diff --git a/README.md b/README.md index 4a8f0a5b..98f29d16 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ Troubleshooting Tasks in Codecollection: **178** Codebundles in Codecollection: **66** + ![](docs/GitHub_Banner.jpg)

diff --git a/codebundles/k8s-namespace-healthcheck/.runwhen/generation-rules/k8s-namespace-healthcheck.yaml b/codebundles/k8s-namespace-healthcheck/.runwhen/generation-rules/k8s-namespace-healthcheck.yaml index 7008f4d8..83b3e9c5 100644 --- a/codebundles/k8s-namespace-healthcheck/.runwhen/generation-rules/k8s-namespace-healthcheck.yaml +++ b/codebundles/k8s-namespace-healthcheck/.runwhen/generation-rules/k8s-namespace-healthcheck.yaml @@ -20,3 +20,4 @@ spec: - type: slo - type: runbook templateName: k8s-namespace-healthcheck-taskset.yaml + # - type: workflow diff --git a/codebundles/k8s-namespace-healthcheck/.runwhen/templates/k8s-namespace-healthcheck-sli.yaml b/codebundles/k8s-namespace-healthcheck/.runwhen/templates/k8s-namespace-healthcheck-sli.yaml index bde6cfd9..75fafb2b 100644 --- a/codebundles/k8s-namespace-healthcheck/.runwhen/templates/k8s-namespace-healthcheck-sli.yaml +++ b/codebundles/k8s-namespace-healthcheck/.runwhen/templates/k8s-namespace-healthcheck-sli.yaml @@ -40,9 +40,9 @@ spec: - name: DISTRIBUTION value: Kubernetes - name: EVENT_THRESHOLD - value: '0' + value: '3' - name: CONTAINER_RESTART_THRESHOLD - value: '0' + value: '2' secretsProvided: - name: kubeconfig workspaceKey: {{custom.kubeconfig_secret_name}} \ No newline at end of file diff --git a/codebundles/k8s-namespace-healthcheck/.runwhen/templates/k8s-namespace-healthcheck-taskset.yaml b/codebundles/k8s-namespace-healthcheck/.runwhen/templates/k8s-namespace-healthcheck-taskset.yaml index bd085f06..0c3e70fb 100644 --- a/codebundles/k8s-namespace-healthcheck/.runwhen/templates/k8s-namespace-healthcheck-taskset.yaml +++ b/codebundles/k8s-namespace-healthcheck/.runwhen/templates/k8s-namespace-healthcheck-taskset.yaml @@ -25,18 +25,12 @@ spec: value: {{custom.kubernetes_distribution_binary}} - name: NAMESPACE value: {{match_resource.resource.metadata.name}} - - name: ERROR_PATTERN - value: (Error|Exception) - name: CONTEXT value: {{context}} - - name: SERVICE_ERROR_PATTERN - value: (Error:) - - name: SERVICE_EXCLUDE_PATTERN - value: (node_modules|opentelemetry) - name: ANOMALY_THRESHOLD value: "3.0" - name: EVENT_AGE - value: "30" + value: "5m" secretsProvided: - name: kubeconfig workspaceKey: {{custom.kubeconfig_secret_name}} \ No newline at end of file diff --git a/codebundles/k8s-namespace-healthcheck/.runwhen/templates/k8s-namespace-healthcheck-workflow.yaml b/codebundles/k8s-namespace-healthcheck/.runwhen/templates/k8s-namespace-healthcheck-workflow.yaml new file mode 100644 index 00000000..c03e6d74 --- /dev/null +++ b/codebundles/k8s-namespace-healthcheck/.runwhen/templates/k8s-namespace-healthcheck-workflow.yaml @@ -0,0 +1,30 @@ +apiVersion: runwhen.com/v1 +kind: Workflow +metadata: + name: {{slx_name}}-ns-alert-workflow + labels: + {% include "common-labels.yaml" %} + annotations: + {% include "common-annotations.yaml" %} +spec: + fromActivities: + - displayName: {{namespace.name}} Namespace SLO Alert Workflow + description: Start RunSession with Eager Edgar when SLO is alerting for {{namespace.name}} namespace health + actions: + - tasks: + slx: {{slx_name.split('--')[1]}} + persona: eager-edgar + titles: + - Inspect Warning Events in Namespace `${NAMESPACE}` + - Inspect Container Restarts In Namespace `${NAMESPACE}` + - Inspect Pending Pods In Namespace `${NAMESPACE}` + - Inspect Failed Pods In Namespace `${NAMESPACE}` + - Inspect Workload Status Conditions In Namespace `${NAMESPACE}` + - Check Event Anomalies in Namespace `${NAMESPACE}` + - Check Resource Quota Utilization in Namespace `${NAMESPACE}` + match: + activityVerbs: + - ALERTS_STARTED + slxs: + - {{slx_name.split('--')[1]}} + name: {{slx_name.split('--')[1]}}-slo-alert-workflow \ No newline at end of file diff --git a/codebundles/k8s-namespace-healthcheck/runbook.robot b/codebundles/k8s-namespace-healthcheck/runbook.robot index fed2b97b..e44eccd8 100644 --- a/codebundles/k8s-namespace-healthcheck/runbook.robot +++ b/codebundles/k8s-namespace-healthcheck/runbook.robot @@ -497,12 +497,6 @@ Suite Initialization ... description=Which Kubernetes context to operate within. ... pattern=\w* ... example=my-main-cluster - ${ERROR_PATTERN}= RW.Core.Import User Variable ERROR_PATTERN - ... type=string - ... description=The error pattern to use when grep-ing logs. - ... pattern=\w* - ... example=(Error|Exception) - ... default=(Error|Exception) ${ANOMALY_THRESHOLD}= RW.Core.Import User Variable ... ANOMALY_THRESHOLD ... type=string @@ -533,7 +527,6 @@ Suite Initialization Set Suite Variable ${KUBERNETES_DISTRIBUTION_BINARY} ${KUBERNETES_DISTRIBUTION_BINARY} Set Suite Variable ${NAMESPACE} ${NAMESPACE} Set Suite Variable ${EVENT_AGE} ${EVENT_AGE} - Set Suite Variable ${ERROR_PATTERN} ${ERROR_PATTERN} Set Suite Variable ${ANOMALY_THRESHOLD} ${ANOMALY_THRESHOLD} Set Suite Variable ${HOME} ${HOME} Set Suite Variable diff --git a/codebundles/k8s-namespace-healthcheck/sli.robot b/codebundles/k8s-namespace-healthcheck/sli.robot index b4f5fe72..7059520f 100644 --- a/codebundles/k8s-namespace-healthcheck/sli.robot +++ b/codebundles/k8s-namespace-healthcheck/sli.robot @@ -38,7 +38,7 @@ Suite Initialization ... description=The maximum total events to be still considered healthy. ... pattern=^\d+$ ... example=2 - ... default=0 + ... default=2 ${CONTAINER_RESTART_AGE}= RW.Core.Import User Variable CONTAINER_RESTART_AGE ... type=string ... description=The time window in minutes as search for container restarts. @@ -50,7 +50,7 @@ Suite Initialization ... description=The maximum total container restarts to be still considered healthy. ... pattern=^\d+$ ... example=2 - ... default=0 + ... default=3 ${KUBERNETES_DISTRIBUTION_BINARY}= RW.Core.Import User Variable KUBERNETES_DISTRIBUTION_BINARY ... type=string ... description=Which binary to use for Kubernetes CLI commands. diff --git a/codebundles/k8s-tail-logs-dynamic/.runwhen/generation-rules/k8s-tail-logs-dynamic.yaml b/codebundles/k8s-tail-logs-dynamic/.runwhen/generation-rules/k8s-tail-logs-dynamic.yaml index f2a596c2..8688c588 100644 --- a/codebundles/k8s-tail-logs-dynamic/.runwhen/generation-rules/k8s-tail-logs-dynamic.yaml +++ b/codebundles/k8s-tail-logs-dynamic/.runwhen/generation-rules/k8s-tail-logs-dynamic.yaml @@ -8,12 +8,12 @@ spec: - type: and matches: - type: pattern - pattern: "kubectl.kubernetes.io//default-container" - properties: ["spec/template/metadata/annotations"] + pattern: ".+" + properties: ["spec/template/metadata/annotations/kubectl.kubernetes.io//default-container"] mode: substring - type: pattern - pattern: "codecollection.runwhen.com//app" - properties: [annotations] + pattern: "codecollection.runwhen.com/app" + properties: [labels] mode: substring slxs: - baseName: k8s-tail-logs-dynamic diff --git a/codebundles/k8s-tail-logs-dynamic/.runwhen/templates/k8s-tail-logs-dynamic-sli.yaml b/codebundles/k8s-tail-logs-dynamic/.runwhen/templates/k8s-tail-logs-dynamic-sli.yaml index 5e753fbf..0fe4331c 100644 --- a/codebundles/k8s-tail-logs-dynamic/.runwhen/templates/k8s-tail-logs-dynamic-sli.yaml +++ b/codebundles/k8s-tail-logs-dynamic/.runwhen/templates/k8s-tail-logs-dynamic-sli.yaml @@ -32,7 +32,7 @@ spec: - name: LOGS_SINCE value: 10m - name: LABELS - value: app={{match_resource.resource.metadata.labels.app}} + value: codecollection.runwhen.com/app={{match_resource.resource.metadata.labels.get('codecollection.runwhen.com/app')}} - name: EXCLUDE_PATTERN value: INFO - name: CONTAINER_NAME diff --git a/codebundles/k8s-tail-logs-dynamic/.runwhen/templates/k8s-tail-logs-dynamic-taskset.yaml b/codebundles/k8s-tail-logs-dynamic/.runwhen/templates/k8s-tail-logs-dynamic-taskset.yaml index 13832ee8..e6e3b3cc 100644 --- a/codebundles/k8s-tail-logs-dynamic/.runwhen/templates/k8s-tail-logs-dynamic-taskset.yaml +++ b/codebundles/k8s-tail-logs-dynamic/.runwhen/templates/k8s-tail-logs-dynamic-taskset.yaml @@ -26,7 +26,7 @@ spec: - name: LOGS_SINCE value: 10m - name: LABELS - value: app={{match_resource.resource.metadata.labels.app}} + value: codecollection.runwhen.com/app={{match_resource.resource.metadata.labels.get('codecollection.runwhen.com/app')}} - name: EXCLUDE_PATTERN value: INFO - name: CONTAINER_NAME