Skip to content

Commit

Permalink
Fix/remove labelled metrics (#180)
Browse files Browse the repository at this point in the history
* fix: removed targets do not produce metrics anymore

A check that gets updated with new targets will not continue exposing the last metric
for that target. The metric(s) with the target label will be deleted.

The responsibility for the metric cleanup must lie with the checks themselves,
as they are the only ones who have access to the metrics themselves.

Signed-off-by: Bruno Bressi <bruno.bressi@telekom.de>

* chore: removed deprecated latency metric

The metric was being already deleted. Instead of just removing the deletion, the metric itself can be finally removed for the next version.

* chore: clean up & renaming

Renamed the parameter of the metric cleanup to make the behavior clearer

Signed-off-by: Bruno Bressi <bruno.bressi@telekom.de>

* chore: clearer comment

The term Collector for the prometheus metrics is avoided, as it was only creating confusion

Signed-off-by: Bruno Bressi <bruno.bressi@telekom.de>

* fix: removed double increase of count

The metrics was being incremented twice for each check. Now it is not.

Signed-off-by: Bruno Bressi <bruno.bressi@telekom.de>

* chore: use quoted string for error

Signed-off-by: Bruno Bressi <bruno.bressi@telekom.de>

* chore: restructuring for consistency

---------

Signed-off-by: Bruno Bressi <bruno.bressi@telekom.de>
Co-authored-by: lvlcn-t <75443136+lvlcn-t@users.noreply.github.com>
  • Loading branch information
puffitos and lvlcn-t authored Sep 11, 2024
1 parent 1d11e6f commit fdb3ad9
Show file tree
Hide file tree
Showing 19 changed files with 355 additions and 168 deletions.
4 changes: 4 additions & 0 deletions .golangci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ linters-settings:
- (github.com/golangci/golangci-lint/pkg/logutils.Log).Errorf
- (github.com/golangci/golangci-lint/pkg/logutils.Log).Fatalf

gosec:
excludes:
- "G115" # Excluded per default https://github.com/golangci/golangci-lint/pull/4941

lll:
line-length: 140
misspell:
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ repos:
# If you have installed it in a different way, you should use the binary of the
# go toolchain instead, because behavior may differ.
- repo: https://github.com/norwoodj/helm-docs
rev: v1.13.1
rev: v1.14.2
hooks:
- id: helm-docs
args:
Expand Down
7 changes: 5 additions & 2 deletions pkg/checks/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ type Check interface {
Run(ctx context.Context, cResult chan ResultDTO) error
// Shutdown is called once when the check is unregistered or sparrow shuts down
Shutdown()
// SetConfig is called once when the check is registered
// UpdateConfig is called once when the check is registered
// This is also called while the check is running, if the remote config is updated
// This should return an error if the config is invalid
SetConfig(config Runtime) error
UpdateConfig(config Runtime) error
// GetConfig returns the current configuration of the check
GetConfig() Runtime
// Name returns the name of the check
Expand All @@ -56,6 +56,9 @@ type Check interface {
Schema() (*openapi3.SchemaRef, error)
// GetMetricCollectors allows the check to provide prometheus metric collectors
GetMetricCollectors() []prometheus.Collector
// RemoveLabelledMetrics allows the check to remove the prometheus metrics
// of the check whose `target` label matches the passed value
RemoveLabelledMetrics(target string) error
}

// CheckBase is a struct providing common fields used by implementations of the Check interface.
Expand Down
144 changes: 94 additions & 50 deletions pkg/checks/base_moq.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 18 additions & 1 deletion pkg/checks/dns/dns.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ package dns
import (
"context"
"net"
"slices"
"sync"
"time"

Expand Down Expand Up @@ -112,10 +113,20 @@ func (d *DNS) Shutdown() {
close(d.DoneChan)
}

func (d *DNS) SetConfig(cfg checks.Runtime) error {
func (d *DNS) UpdateConfig(cfg checks.Runtime) error {
if c, ok := cfg.(*Config); ok {
d.Mu.Lock()
defer d.Mu.Unlock()

for _, target := range d.config.Targets {
if !slices.Contains(c.Targets, target) {
err := d.metrics.Remove(target)
if err != nil {
return err
}
}
}

d.config = *c
return nil
}
Expand All @@ -137,6 +148,12 @@ func (d *DNS) GetMetricCollectors() []prometheus.Collector {
return d.metrics.GetCollectors()
}

// RemoveLabelledMetrics removes the metrics which have the passed
// target as a label
func (d *DNS) RemoveLabelledMetrics(target string) error {
return d.metrics.Remove(target)
}

// check performs DNS checks for all configured targets using a custom net.Resolver.
// Returns a map where each target is associated with its DNS check result.
func (d *DNS) check(ctx context.Context) map[string]result {
Expand Down
14 changes: 7 additions & 7 deletions pkg/checks/dns/dns_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,13 +168,13 @@ func TestDNS_Run(t *testing.T) {
cResult := make(chan checks.ResultDTO, 1)
defer close(cResult)

err := c.SetConfig(&Config{
err := c.UpdateConfig(&Config{
Targets: tt.targets,
Interval: 1 * time.Second,
Timeout: 5 * time.Millisecond,
})
if err != nil {
t.Fatalf("DNS.SetConfig() error = %v", err)
t.Fatalf("DNS.UpdateConfig() error = %v", err)
}

go func() {
Expand Down Expand Up @@ -218,11 +218,11 @@ func TestDNS_Run_Context_Done(t *testing.T) {
cResult := make(chan checks.ResultDTO, 1)
defer close(cResult)

err := c.SetConfig(&Config{
err := c.UpdateConfig(&Config{
Interval: time.Second,
})
if err != nil {
t.Fatalf("DNS.SetConfig() error = %v", err)
t.Fatalf("DNS.UpdateConfig() error = %v", err)
}

go func() {
Expand Down Expand Up @@ -256,7 +256,7 @@ func TestDNS_Shutdown(t *testing.T) {
}
}

func TestDNS_SetConfig(t *testing.T) {
func TestDNS_UpdateConfig(t *testing.T) {
tests := []struct {
name string
input checks.Runtime
Expand Down Expand Up @@ -301,8 +301,8 @@ func TestDNS_SetConfig(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
c := &DNS{}

if err := c.SetConfig(tt.input); (err != nil) != tt.wantErr {
t.Errorf("DNS.SetConfig() error = %v, wantErr %v", err, tt.wantErr)
if err := c.UpdateConfig(tt.input); (err != nil) != tt.wantErr {
t.Errorf("DNS.UpdateConfig() error = %v, wantErr %v", err, tt.wantErr)
}
assert.Equal(t, tt.want, c.config, "Config is not equal")
})
Expand Down
22 changes: 22 additions & 0 deletions pkg/checks/dns/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package dns

import (
"github.com/caas-team/sparrow/pkg/checks"
"github.com/prometheus/client_golang/prometheus"
)

Expand Down Expand Up @@ -81,3 +82,24 @@ func (m *metrics) Set(target string, results map[string]result, status float64)
m.status.WithLabelValues(target).Set(status)
m.count.WithLabelValues(target).Inc()
}

// Remove removes the metrics of one lookup target
func (m *metrics) Remove(target string) error {
if !m.status.DeleteLabelValues(target) {
return checks.ErrMetricNotFound{Label: target}
}

if !m.duration.DeleteLabelValues(target) {
return checks.ErrMetricNotFound{Label: target}
}

if !m.count.DeleteLabelValues(target) {
return checks.ErrMetricNotFound{Label: target}
}

if !m.histogram.DeleteLabelValues(target) {
return checks.ErrMetricNotFound{Label: target}
}

return nil
}
9 changes: 9 additions & 0 deletions pkg/checks/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,12 @@ type ErrInvalidConfig struct {
func (e ErrInvalidConfig) Error() string {
return fmt.Sprintf("invalid configuration field %q in check %q: %s", e.Field, e.CheckName, e.Reason)
}

// ErrMetricNotFound is returned when a metric is not found
type ErrMetricNotFound struct {
Label string
}

func (e ErrMetricNotFound) Error() string {
return fmt.Sprintf("metric %q not found", e.Label)
}
Loading

0 comments on commit fdb3ad9

Please sign in to comment.