Skip to content

Commit

Permalink
Ignore flapping state if flapping detection isn't enabled
Browse files Browse the repository at this point in the history
  • Loading branch information
yhabteab committed Jul 18, 2024
1 parent 9864d0e commit dd1c0db
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 10 deletions.
9 changes: 9 additions & 0 deletions internal/icinga2/api_responses.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ type HostServiceRuntimeAttributes struct {
Acknowledgement int `json:"acknowledgement"`
IsFlapping bool `json:"flapping"`
AcknowledgementLastChange UnixFloat `json:"acknowledgement_last_change"`
EnableFlapping bool `json:"enable_flapping"`
}

// MarshalLogObject implements the zapcore.ObjectMarshaler interface.
Expand Down Expand Up @@ -352,6 +353,14 @@ type ObjectCreatedDeleted struct {
EventType string `json:"type"`
}

// IcingaApplication represents the Icinga 2 API status endpoint query result of type IcingaApplication.
// https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#status-and-statistics
type IcingaApplication struct {
App struct {
EnableFlapping bool `json:"enable_flapping"`
} `json:"app"`
}

// UnmarshalEventStreamResponse unmarshal a JSON response line from the Icinga 2 API Event Stream.
//
// The function expects an Icinga 2 API Event Stream Response in its JSON form and tries to unmarshal it into one of the
Expand Down
12 changes: 9 additions & 3 deletions internal/icinga2/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,9 @@ func (client *Client) buildAcknowledgementEvent(ctx context.Context, ack *Acknow
if err != nil {
return nil, err
}
if !isMuted(queryResult) {
if muted, err := isMuted(ctx, client, queryResult); err != nil {
return nil, err
} else if !muted {
ev.Message = queryResult.Attrs.LastCheckResult.Output
ev.SetMute(false, "Acknowledgement cleared")
}
Expand Down Expand Up @@ -310,7 +312,9 @@ func (client *Client) buildDowntimeEvent(ctx context.Context, d Downtime, startE
if err != nil {
return nil, err
}
if !isMuted(queryResult) {
if muted, err := isMuted(ctx, client, queryResult); err != nil {
return nil, err
} else if !muted {
// When a downtime is cancelled/expired and there's no other active downtime/ack, we're going to send some
// notifications if there's still an active incident. Therefore, we need the most recent CheckResult of
// that Checkable to use it for the notifications.
Expand Down Expand Up @@ -347,7 +351,9 @@ func (client *Client) buildFlappingEvent(ctx context.Context, flapping *Flapping
if err != nil {
return nil, err
}
if !isMuted(queryResult) {
if muted, err := isMuted(ctx, client, queryResult); err != nil {
return nil, err
} else if !muted {
ev.Message = queryResult.Attrs.LastCheckResult.Output
ev.SetMute(false, reason)
}
Expand Down
54 changes: 49 additions & 5 deletions internal/icinga2/client_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,45 @@ func (client *Client) queryObjectsApiQuery(ctx context.Context, objType string,
})
}

// fetchIcingaAppStatus retrieves the global state of the IcingaApplication type via the /v1/status endpoint.
func (client *Client) fetchIcingaAppStatus(ctx context.Context) (*IcingaApplication, error) {
response, err := client.queryObjectsApi(
ctx,
[]string{"/v1/status/IcingaApplication/"},
http.MethodGet,
nil,
map[string]string{"Accept": "application/json"})
if err != nil {
return nil, err
}

defer func() {
_, _ = io.Copy(io.Discard, response)
_ = response.Close()
}()

type status struct {
Status struct {
IcingaApplication *IcingaApplication `json:"icingaapplication"`
} `json:"status"`
}

var results []status
err = json.NewDecoder(response).Decode(&struct {
Results *[]status `json:"results"`
}{&results})
if err != nil {
return nil, err
}

app := new(IcingaApplication)
if len(results) != 0 {
app = results[0].Status.IcingaApplication
}

return app, nil
}

// fetchCheckable fetches the Checkable config state of the given Host/Service name from the Icinga 2 API.
func (client *Client) fetchCheckable(ctx context.Context, host, service string) (*ObjectQueriesResult[HostServiceRuntimeAttributes], error) {
objType, objName := "host", host
Expand Down Expand Up @@ -260,8 +299,13 @@ func (client *Client) checkMissedChanges(ctx context.Context, objType string, ca
}

attrs := objQueriesResult.Attrs
checkableIsMuted, err := isMuted(ctx, client, &objQueriesResult)
if err != nil {
return err
}

var fakeEv *event.Event
if attrs.Acknowledgement != AcknowledgementNone {
if checkableIsMuted && attrs.Acknowledgement != AcknowledgementNone {
ackComment, err := client.fetchAcknowledgementComment(ctx, hostName, serviceName, attrs.AcknowledgementLastChange.Time())
if err != nil {
return fmt.Errorf("fetching acknowledgement comment for %q failed, %w", objectName, err)
Expand All @@ -275,17 +319,17 @@ func (client *Client) checkMissedChanges(ctx context.Context, objType string, ca
if err != nil {
return fmt.Errorf("failed to construct Event from Acknowledgement response, %w", err)
}
} else if isMuted(&objQueriesResult) {
} else if checkableIsMuted {
fakeEv, err = client.buildCommonEvent(ctx, hostName, serviceName)
if err != nil {
return fmt.Errorf("failed to construct checkable fake mute event: %w", err)
}

fakeEv.Type = event.TypeMute
if attrs.IsFlapping {
fakeEv.SetMute(true, "Checkable is flapping, but we missed the Icinga 2 FlappingStart event")
} else {
if attrs.DowntimeDepth != 0 {
fakeEv.SetMute(true, "Checkable is in downtime, but we missed the Icinga 2 DowntimeStart event")
} else {
fakeEv.SetMute(true, "Checkable is flapping, but we missed the Icinga 2 FlappingStart event")
}
} else {
// This could potentially produce numerous superfluous database (event table) entries if we generate such
Expand Down
23 changes: 21 additions & 2 deletions internal/icinga2/util.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package icinga2

import (
"context"
"net/url"
"strings"
)
Expand All @@ -18,6 +19,24 @@ func rawurlencode(s string) string {
}

// isMuted returns true if the given checkable is either in Downtime, Flapping or acknowledged, otherwise false.
func isMuted(checkable *ObjectQueriesResult[HostServiceRuntimeAttributes]) bool {
return checkable.Attrs.IsFlapping || checkable.Attrs.Acknowledgement != AcknowledgementNone || checkable.Attrs.DowntimeDepth != 0
//
// When the checkable is Flapping, and neither the flapping detection for that Checkable nor for the entire zone is
// enabled, this will always return false.
//
// Returns an error if it fails to query the status of IcingaApplication from the /v1/status endpoint.
func isMuted(ctx context.Context, client *Client, checkable *ObjectQueriesResult[HostServiceRuntimeAttributes]) (bool, error) {
if checkable.Attrs.Acknowledgement != AcknowledgementNone || checkable.Attrs.DowntimeDepth != 0 {
return true, nil
}

if checkable.Attrs.IsFlapping && checkable.Attrs.EnableFlapping {
status, err := client.fetchIcingaAppStatus(ctx)
if err != nil {
return false, err
}

return status.App.EnableFlapping, nil
}

return false, nil
}

0 comments on commit dd1c0db

Please sign in to comment.