From 4a0151659654d60c3d2848b587d1eba34688a020 Mon Sep 17 00:00:00 2001 From: ido Date: Wed, 25 Sep 2024 17:14:18 -0400 Subject: [PATCH] prometheus-rules, default values --- charts/hermes/files/config.toml | 4 ++-- charts/hermes/templates/_helpers.tpl | 14 ++++++++++++ charts/hermes/templates/prometheusrule.yaml | 20 +++++++++++++++++ charts/hermes/templates/service.yaml | 2 +- charts/hermes/values.yaml | 25 ++++++++++++++++++++- 5 files changed, 61 insertions(+), 4 deletions(-) create mode 100644 charts/hermes/templates/prometheusrule.yaml diff --git a/charts/hermes/files/config.toml b/charts/hermes/files/config.toml index cdc9a90ce2..ff2b396478 100644 --- a/charts/hermes/files/config.toml +++ b/charts/hermes/files/config.toml @@ -8,7 +8,7 @@ enabled = {{ .Values.rest.enabled }} # Specify the IPv4/6 host over which the built-in HTTP server will serve the RESTful # API requests. Default: 127.0.0.1 -host = '127.0.0.1' +host = '0.0.0.0' # Specify the port over which the built-in HTTP server will serve the restful API # requests. Default: 3000 @@ -35,7 +35,7 @@ tx_confirmation = {{ .Values.mode.packets.txConfirmation }} [telemetry] enabled = {{ .Values.telemetry.enabled }} -host = '127.0.0.1' +host = '0.0.0.0' port = {{ .Values.ports.telemetry }} [telemetry.buckets] diff --git a/charts/hermes/templates/_helpers.tpl b/charts/hermes/templates/_helpers.tpl index 3b63b396d0..d00d784ce5 100644 --- a/charts/hermes/templates/_helpers.tpl +++ b/charts/hermes/templates/_helpers.tpl @@ -22,6 +22,20 @@ We truncate at 63 chars because some Kubernetes name fields are limited to this {{- end -}} {{- end -}} +{{/* +Common labels +*/}} +{{- define "hermes.labels" -}} +{{ include "hermes.selectorLabels" . }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "hermes.selectorLabels" -}} +app: {{ include "hermes.fullname" . }} +{{- end }} + {{/* Return if ingress is stable. */}} diff --git a/charts/hermes/templates/prometheusrule.yaml b/charts/hermes/templates/prometheusrule.yaml new file mode 100644 index 0000000000..f07374c0f1 --- /dev/null +++ b/charts/hermes/templates/prometheusrule.yaml @@ -0,0 +1,20 @@ +{{- if .Values.alerting.enabled -}} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ template "hermes.fullname" . }} +{{- if .Values.alerting.prometheusRule.namespace }} + namespace: {{ .Values.alerting.prometheusRule.namespace | quote }} +{{- end }} + labels: + {{- include "hermes.labels" . | nindent 4 }} + {{- if .Values.alerting.prometheusRule.additionalLabels }} + {{- toYaml .Values.alerting.prometheusRule.additionalLabels | nindent 4 }} + {{- end }} +spec: +{{- if .Values.alerting.prometheusRule.rules }} + groups: + - name: {{ template "hermes.fullname" . }} + rules: {{- toYaml .Values.alerting.prometheusRule.rules | nindent 4 }} +{{- end }} +{{- end }} diff --git a/charts/hermes/templates/service.yaml b/charts/hermes/templates/service.yaml index 0eda70c8a7..d95aecf713 100644 --- a/charts/hermes/templates/service.yaml +++ b/charts/hermes/templates/service.yaml @@ -16,7 +16,7 @@ spec: targetPort: rest {{- end }} {{- if .Values.telemetry.enabled }} - - name: telemetry-svc + - name: telemetry port: {{ .Values.ports.telemetry }} targetPort: telemetry {{- end }} diff --git a/charts/hermes/values.yaml b/charts/hermes/values.yaml index 27a745fd00..cc878df14a 100644 --- a/charts/hermes/values.yaml +++ b/charts/hermes/values.yaml @@ -3,7 +3,7 @@ global: replicaCount: 1 logLevel: debug -image: ghcr.io/penumbra-zone/hermes:main +image: ghcr.io/astriaorg/hermes:sha-450f848 imagePullPolicy: IfNotPresent fullnameOverride: "" @@ -195,3 +195,26 @@ ingress: # - secretName: chart-example-tls # hosts: # - chart-example.local + +alerting: + enabled: false + interval: "" + additionalLabels: + release: kube-prometheus-stack + annotations: {} + # scrapeTimeout: 10s + # path: /metrics + prometheusRule: + enabled: true + additionalLabels: + release: kube-prometheus-stack + namespace: monitoring + rules: + - alert: Chain_Node_Down + expr: up{container="cometbft"} == 0 # Insert your query Expression + for: 1m # Rough number but should be enough to init warn + labels: + severity: critical + annotations: + summary: Chain Node is Down (instance {{ $labels.instance }}) + description: "chain node '{{ $labels.namespace }}' has disappeared from Prometheus target discovery.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"