Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ignore flapping state if flapping detection isn't enabled #230

Merged
merged 1 commit into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions internal/icinga2/api_responses.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ type HostServiceRuntimeAttributes struct {
Acknowledgement int `json:"acknowledgement"`
IsFlapping bool `json:"flapping"`
AcknowledgementLastChange UnixFloat `json:"acknowledgement_last_change"`
EnableFlapping bool `json:"enable_flapping"`
}

// MarshalLogObject implements the zapcore.ObjectMarshaler interface.
Expand Down Expand Up @@ -352,6 +353,14 @@ type ObjectCreatedDeleted struct {
EventType string `json:"type"`
}

// IcingaApplication represents the Icinga 2 API status endpoint query result of type IcingaApplication.
// https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#status-and-statistics
type IcingaApplication struct {
App struct {
EnableFlapping bool `json:"enable_flapping"`
} `json:"app"`
}

// UnmarshalEventStreamResponse unmarshal a JSON response line from the Icinga 2 API Event Stream.
//
// The function expects an Icinga 2 API Event Stream Response in its JSON form and tries to unmarshal it into one of the
Expand Down
12 changes: 9 additions & 3 deletions internal/icinga2/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,9 @@ func (client *Client) buildAcknowledgementEvent(ctx context.Context, ack *Acknow
if err != nil {
return nil, err
}
if !isMuted(queryResult) {
if muted, err := isMuted(ctx, client, queryResult); err != nil {
return nil, err
} else if !muted {
ev.Message = queryResult.Attrs.LastCheckResult.Output
ev.SetMute(false, "Acknowledgement cleared")
}
Expand Down Expand Up @@ -310,7 +312,9 @@ func (client *Client) buildDowntimeEvent(ctx context.Context, d Downtime, startE
if err != nil {
return nil, err
}
if !isMuted(queryResult) {
if muted, err := isMuted(ctx, client, queryResult); err != nil {
return nil, err
} else if !muted {
// When a downtime is cancelled/expired and there's no other active downtime/ack, we're going to send some
// notifications if there's still an active incident. Therefore, we need the most recent CheckResult of
// that Checkable to use it for the notifications.
Expand Down Expand Up @@ -347,7 +351,9 @@ func (client *Client) buildFlappingEvent(ctx context.Context, flapping *Flapping
if err != nil {
return nil, err
}
if !isMuted(queryResult) {
if muted, err := isMuted(ctx, client, queryResult); err != nil {
return nil, err
} else if !muted {
ev.Message = queryResult.Attrs.LastCheckResult.Output
ev.SetMute(false, reason)
}
Expand Down
53 changes: 48 additions & 5 deletions internal/icinga2/client_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,44 @@ func (client *Client) queryObjectsApiQuery(ctx context.Context, objType string,
})
}

// fetchIcingaAppStatus retrieves the global state of the IcingaApplication type via the /v1/status endpoint.
func (client *Client) fetchIcingaAppStatus(ctx context.Context) (*IcingaApplication, error) {
response, err := client.queryObjectsApi(
ctx,
[]string{"/v1/status/IcingaApplication/"},
yhabteab marked this conversation as resolved.
Show resolved Hide resolved
http.MethodGet,
nil,
map[string]string{"Accept": "application/json"})
if err != nil {
return nil, err
}

defer func() {
_, _ = io.Copy(io.Discard, response)
_ = response.Close()
}()

type status struct {
Status struct {
IcingaApplication *IcingaApplication `json:"icingaapplication"`
} `json:"status"`
}

var results []status
err = json.NewDecoder(response).Decode(&struct {
Results *[]status `json:"results"`
}{&results})
if err != nil {
return nil, err
}

if len(results) == 0 {
return nil, fmt.Errorf("unable to fetch IcingaApplication status")
}

return results[0].Status.IcingaApplication, nil
}

// fetchCheckable fetches the Checkable config state of the given Host/Service name from the Icinga 2 API.
func (client *Client) fetchCheckable(ctx context.Context, host, service string) (*ObjectQueriesResult[HostServiceRuntimeAttributes], error) {
objType, objName := "host", host
Expand Down Expand Up @@ -260,8 +298,13 @@ func (client *Client) checkMissedChanges(ctx context.Context, objType string, ca
}

attrs := objQueriesResult.Attrs
checkableIsMuted, err := isMuted(ctx, client, &objQueriesResult)
if err != nil {
return err
}

var fakeEv *event.Event
if attrs.Acknowledgement != AcknowledgementNone {
if checkableIsMuted && attrs.Acknowledgement != AcknowledgementNone {
ackComment, err := client.fetchAcknowledgementComment(ctx, hostName, serviceName, attrs.AcknowledgementLastChange.Time())
if err != nil {
return fmt.Errorf("fetching acknowledgement comment for %q failed, %w", objectName, err)
Expand All @@ -275,17 +318,17 @@ func (client *Client) checkMissedChanges(ctx context.Context, objType string, ca
if err != nil {
return fmt.Errorf("failed to construct Event from Acknowledgement response, %w", err)
}
} else if isMuted(&objQueriesResult) {
} else if checkableIsMuted {
fakeEv, err = client.buildCommonEvent(ctx, hostName, serviceName)
if err != nil {
return fmt.Errorf("failed to construct checkable fake mute event: %w", err)
}

fakeEv.Type = event.TypeMute
if attrs.IsFlapping {
fakeEv.SetMute(true, "Checkable is flapping, but we missed the Icinga 2 FlappingStart event")
} else {
if attrs.DowntimeDepth != 0 {
oxzi marked this conversation as resolved.
Show resolved Hide resolved
fakeEv.SetMute(true, "Checkable is in downtime, but we missed the Icinga 2 DowntimeStart event")
} else {
fakeEv.SetMute(true, "Checkable is flapping, but we missed the Icinga 2 FlappingStart event")
}
} else {
// This could potentially produce numerous superfluous database (event table) entries if we generate such
Expand Down
23 changes: 21 additions & 2 deletions internal/icinga2/util.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package icinga2

import (
"context"
"net/url"
"strings"
)
Expand All @@ -18,6 +19,24 @@ func rawurlencode(s string) string {
}

// isMuted returns true if the given checkable is either in Downtime, Flapping or acknowledged, otherwise false.
func isMuted(checkable *ObjectQueriesResult[HostServiceRuntimeAttributes]) bool {
return checkable.Attrs.IsFlapping || checkable.Attrs.Acknowledgement != AcknowledgementNone || checkable.Attrs.DowntimeDepth != 0
//
// When the checkable is Flapping, and neither the flapping detection for that Checkable nor for the entire zone is
// enabled, this will always return false.
yhabteab marked this conversation as resolved.
Show resolved Hide resolved
//
// Returns an error if it fails to query the status of IcingaApplication from the /v1/status endpoint.
func isMuted(ctx context.Context, client *Client, checkable *ObjectQueriesResult[HostServiceRuntimeAttributes]) (bool, error) {
if checkable.Attrs.Acknowledgement != AcknowledgementNone || checkable.Attrs.DowntimeDepth != 0 {
return true, nil
}

if checkable.Attrs.IsFlapping && checkable.Attrs.EnableFlapping {
status, err := client.fetchIcingaAppStatus(ctx)
if err != nil {
return false, err
}
yhabteab marked this conversation as resolved.
Show resolved Hide resolved

return status.App.EnableFlapping, nil
}

return false, nil
}
Loading