From 5aa823ec89789c944bf8bf60649f12c57d108ce4 Mon Sep 17 00:00:00 2001 From: Sebastian Widmer Date: Fri, 21 Jun 2024 18:39:46 +0200 Subject: [PATCH 1/3] Add `/healthz` endpoint that checks Alertmanager connection --- internal/healthcheck/healthcheck.go | 45 +++++++++++++ internal/healthcheck/healthcheck_test.go | 85 ++++++++++++++++++++++++ main.go | 2 + 3 files changed, 132 insertions(+) create mode 100644 internal/healthcheck/healthcheck.go create mode 100644 internal/healthcheck/healthcheck_test.go diff --git a/internal/healthcheck/healthcheck.go b/internal/healthcheck/healthcheck.go new file mode 100644 index 0000000..6fdc007 --- /dev/null +++ b/internal/healthcheck/healthcheck.go @@ -0,0 +1,45 @@ +package healthcheck + +import ( + "encoding/json" + "net/http" + + "github.com/go-openapi/strfmt" + "github.com/prometheus/alertmanager/api/v2/client/general" + "github.com/prometheus/alertmanager/api/v2/models" +) + +// HealthCheck is a health check handler for the Alertmanager API. +type HealthCheck struct { + GeneralService general.ClientService +} + +// HandleHealthz handles a health check request. +// It returns a JSON response with the status of the Alertmanager API or an error if the client returns an error or if receiving a nil response. +func (h HealthCheck) HandleHealthz(res http.ResponseWriter, req *http.Request) { + ams, err := h.GeneralService.GetStatus(general.NewGetStatusParamsWithContext(req.Context())) + if err != nil { + http.Error(res, err.Error(), http.StatusInternalServerError) + return + } + if ams == nil || ams.Payload == nil { + http.Error(res, "Nil response from Alertmanager", http.StatusInternalServerError) + return + } + if err := json.NewEncoder(res).Encode(response{ + Status: "connected", + AlertmanagerCluster: ams.Payload.Cluster, + AlertmanagerVersion: ams.Payload.VersionInfo, + AlertmanagerUptime: ams.Payload.Uptime, + }); err != nil { + http.Error(res, "Encoding error: "+err.Error(), http.StatusInternalServerError) + return + } +} + +type response struct { + Status string `json:"status"` + AlertmanagerCluster *models.ClusterStatus `json:"alertmanager_cluster"` + AlertmanagerVersion *models.VersionInfo `json:"alertmanager_version"` + AlertmanagerUptime *strfmt.DateTime `json:"alertmanager_uptime"` +} diff --git a/internal/healthcheck/healthcheck_test.go b/internal/healthcheck/healthcheck_test.go new file mode 100644 index 0000000..2cad88e --- /dev/null +++ b/internal/healthcheck/healthcheck_test.go @@ -0,0 +1,85 @@ +package healthcheck_test + +import ( + "errors" + "net/http" + "net/http/httptest" + "testing" + + "github.com/appuio/alerts_exporter/internal/healthcheck" + "github.com/go-openapi/runtime" + "github.com/prometheus/alertmanager/api/v2/client/general" + "github.com/prometheus/alertmanager/api/v2/models" + "github.com/stretchr/testify/require" +) + +func TestOk(t *testing.T) { + t.Parallel() + + hc := &healthcheck.HealthCheck{ + GeneralService: &mockClientService{ + OkResponse: &general.GetStatusOK{ + Payload: &models.AlertmanagerStatus{ + VersionInfo: &models.VersionInfo{ + Version: ptr("v0.22.2"), + }, + }, + }, + }, + } + + req := httptest.NewRecorder() + hc.HandleHealthz(req, httptest.NewRequest("GET", "/healthz", nil)) + res := req.Result() + require.Equal(t, http.StatusOK, res.StatusCode) + require.Contains(t, req.Body.String(), `"status":"connected"`) + require.Contains(t, req.Body.String(), `v0.22.2`) +} + +func TestErrResponse(t *testing.T) { + t.Parallel() + + hc := &healthcheck.HealthCheck{ + GeneralService: &mockClientService{ + Err: errors.New("some error"), + }, + } + + req := httptest.NewRecorder() + hc.HandleHealthz(req, httptest.NewRequest("GET", "/healthz", nil)) + res := req.Result() + require.Equal(t, http.StatusInternalServerError, res.StatusCode) + require.Contains(t, req.Body.String(), "some error") +} + +func TestNilResponse(t *testing.T) { + t.Parallel() + + hc := &healthcheck.HealthCheck{ + GeneralService: &mockClientService{}, + } + + req := httptest.NewRecorder() + hc.HandleHealthz(req, httptest.NewRequest("GET", "/healthz", nil)) + res := req.Result() + require.Equal(t, http.StatusInternalServerError, res.StatusCode) + require.Contains(t, req.Body.String(), "Nil response") +} + +type mockClientService struct { + OkResponse *general.GetStatusOK + Err error +} + +var _ general.ClientService = (*mockClientService)(nil) + +func (m *mockClientService) GetStatus(*general.GetStatusParams, ...general.ClientOption) (*general.GetStatusOK, error) { + if m.Err != nil { + return nil, m.Err + } + return m.OkResponse, nil +} + +func (m *mockClientService) SetTransport(runtime.ClientTransport) {} + +func ptr[T any](t T) *T { return &t } diff --git a/main.go b/main.go index 9657b8c..5414952 100644 --- a/main.go +++ b/main.go @@ -7,6 +7,7 @@ import ( "net/http" alertscollector "github.com/appuio/alerts_exporter/internal/alerts_collector" + "github.com/appuio/alerts_exporter/internal/healthcheck" "github.com/appuio/alerts_exporter/internal/saauth" openapiclient "github.com/go-openapi/runtime/client" alertmanagerclient "github.com/prometheus/alertmanager/api/v2/client" @@ -100,6 +101,7 @@ func main() { // Expose metrics and custom registry via an HTTP server // using the HandleFor function. "/metrics" is the usual endpoint for that. http.Handle("/metrics", promhttp.HandlerFor(reg, promhttp.HandlerOpts{Registry: reg})) + http.HandleFunc("/healthz", healthcheck.HealthCheck{GeneralService: ac.General}.HandleHealthz) log.Printf("Listening on `%s`", listenAddr) log.Fatal(http.ListenAndServe(listenAddr, nil)) } From 5e0aefe6a1166d5a80215a3a6c51f80e56d1a67c Mon Sep 17 00:00:00 2001 From: Sebastian Widmer Date: Fri, 21 Jun 2024 18:49:12 +0200 Subject: [PATCH 2/3] Fix flake by using EventuallyWithT correctly --- internal/saauth/saauth_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/saauth/saauth_test.go b/internal/saauth/saauth_test.go index c438d8d..15fc134 100644 --- a/internal/saauth/saauth_test.go +++ b/internal/saauth/saauth_test.go @@ -27,8 +27,8 @@ func Test_ServiceAccountAuthInfoWriter_AuthenticateRequest(t *testing.T) { require.NoError(t, os.WriteFile(tokenFile, []byte("new-token"), 0644)) require.EventuallyWithT(t, func(t *assert.CollectT) { r := new(runtime.TestClientRequest) - require.NoError(t, subject.AuthenticateRequest(r, nil)) - require.Equal(t, "Bearer new-token", r.GetHeaderParams().Get("Authorization")) + assert.NoError(t, subject.AuthenticateRequest(r, nil)) + assert.Equal(t, "Bearer new-token", r.GetHeaderParams().Get("Authorization")) }, 5*time.Second, time.Millisecond) } From e96bc8aae3ae83638671e0234d83007be3485767 Mon Sep 17 00:00:00 2001 From: Sebastian Widmer Date: Fri, 21 Jun 2024 18:58:58 +0200 Subject: [PATCH 3/3] Add default `livenessProbe` to kustomize deployment --- config/exporter/exporter.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/config/exporter/exporter.yaml b/config/exporter/exporter.yaml index c3528c9..0527cdd 100644 --- a/config/exporter/exporter.yaml +++ b/config/exporter/exporter.yaml @@ -54,5 +54,12 @@ spec: requests: cpu: 10m memory: 64Mi + livenessProbe: + httpGet: + path: /healthz + port: 8080 + periodSeconds: 20 + initialDelaySeconds: 15 + timeoutSeconds: 3 serviceAccountName: alerts-exporter terminationGracePeriodSeconds: 10