From fcfb235bdc37e3579842b4841545cc12cbae3977 Mon Sep 17 00:00:00 2001 From: Yonas Habteab Date: Fri, 12 May 2023 13:13:29 +0200 Subject: [PATCH] Trigger time based escalations --- internal/event/event.go | 1 + internal/filter/contracts.go | 1 + internal/filter/parser.go | 16 ++--- internal/filter/parser_test.go | 20 +++--- internal/filter/types.go | 102 ++++++++++++++++++++--------- internal/incident/incident.go | 113 +++++++++++++++++++++++++++++++-- internal/rule/condition.go | 45 +++++++++---- 7 files changed, 236 insertions(+), 62 deletions(-) diff --git a/internal/event/event.go b/internal/event/event.go index e2a99c0f9..bd094b637 100644 --- a/internal/event/event.go +++ b/internal/event/event.go @@ -28,6 +28,7 @@ type Event struct { const ( TypeState = "state" TypeAcknowledgement = "acknowledgement" + TypeInternal = "internal" ) func (e *Event) String() string { diff --git a/internal/filter/contracts.go b/internal/filter/contracts.go index 6ccbb6bed..35a4abbad 100644 --- a/internal/filter/contracts.go +++ b/internal/filter/contracts.go @@ -12,4 +12,5 @@ type Filterable interface { // Filter is implemented by every filter chains and filter conditions. type Filter interface { Eval(filterable Filterable) (bool, error) + ExtractConditions() []Condition } diff --git a/internal/filter/parser.go b/internal/filter/parser.go index 1ce642bf7..31d73a6d9 100644 --- a/internal/filter/parser.go +++ b/internal/filter/parser.go @@ -257,24 +257,24 @@ func (p *Parser) createCondition(column string, operator string, value string) ( switch operator { case "=": if strings.Contains(value, "*") { - return &Like{column: column, value: value}, nil + return &Like{Condition{Column: column, Value: value}}, nil } - return &Equal{column: column, value: value}, nil + return &Equal{Condition{Column: column, Value: value}}, nil case "!=": if strings.Contains(value, "*") { - return &Unlike{column: column, value: value}, nil + return &Unlike{Condition{Column: column, Value: value}}, nil } - return &UnEqual{column: column, value: value}, nil + return &UnEqual{Condition{Column: column, Value: value}}, nil case ">": - return &GreaterThan{column: column, value: value}, nil + return &GreaterThan{Condition{Column: column, Value: value}}, nil case ">=": - return &GreaterThanOrEqual{column: column, value: value}, nil + return &GreaterThanOrEqual{Condition{Column: column, Value: value}}, nil case "<": - return &LessThan{column: column, value: value}, nil + return &LessThan{Condition{Column: column, Value: value}}, nil case "<=": - return &LessThanOrEqual{column: column, value: value}, nil + return &LessThanOrEqual{Condition{Column: column, Value: value}}, nil default: return nil, fmt.Errorf("invalid operator %s provided", operator) } diff --git a/internal/filter/parser_test.go b/internal/filter/parser_test.go index ebbba4c75..7246a7e1d 100644 --- a/internal/filter/parser_test.go +++ b/internal/filter/parser_test.go @@ -118,12 +118,12 @@ func TestFilter(t *testing.T) { expected := &All{rules: []Filter{ &None{rules: []Filter{ - &Equal{column: "foo", value: "bar"}, - &Equal{column: "bar", value: "foo"}, + &Equal{Condition{Column: "foo", Value: "bar"}}, + &Equal{Condition{Column: "bar", Value: "foo"}}, }}, &Any{rules: []Filter{ - &Equal{column: "foo", value: "bar"}, - &Equal{column: "bar", value: "foo"}, + &Equal{Condition{Column: "foo", Value: "bar"}}, + &Equal{Condition{Column: "bar", Value: "foo"}}, }}, }} assert.Equal(t, expected, rule) @@ -133,30 +133,30 @@ func TestFilter(t *testing.T) { rule, err := Parse("foo=bar") assert.Nil(t, err, "There should be no errors but got: %s", err) - expected := &Equal{column: "foo", value: "bar"} + expected := &Equal{Condition{Column: "foo", Value: "bar"}} assert.Equal(t, expected, rule, "Parser doesn't parse single condition correctly") }) t.Run("UrlEncodedFilterExpression", func(t *testing.T) { rule, err := Parse("col%3Cumnval%28ue") assert.Nil(t, err, "There should be no errors but got: %s", err) - assert.Equal(t, &GreaterThan{column: "col(umn", value: "val(ue"}, rule) + assert.Equal(t, &GreaterThan{Condition{Column: "col(umn", Value: "val(ue"}}, rule) rule, err = Parse("col%29umn>=val%29ue") assert.Nil(t, err, "There should be no errors but got: %s", err) - assert.Equal(t, &GreaterThanOrEqual{column: "col)umn", value: "val)ue"}, rule) + assert.Equal(t, &GreaterThanOrEqual{Condition{Column: "col)umn", Value: "val)ue"}}, rule) }) } diff --git a/internal/filter/types.go b/internal/filter/types.go index 3153cf33d..4445b2409 100644 --- a/internal/filter/types.go +++ b/internal/filter/types.go @@ -20,6 +20,10 @@ func (a *All) Eval(filterable Filterable) (bool, error) { return true, nil } +func (a *All) ExtractConditions() []Condition { + return extractConditions(a.rules) +} + // Any represents a filter chain type that matches when at least one of its Rules matches. type Any struct { rules []Filter @@ -40,6 +44,10 @@ func (a *Any) Eval(filterable Filterable) (bool, error) { return false, nil } +func (a *Any) ExtractConditions() []Condition { + return extractConditions(a.rules) +} + // None represents a filter chain type that matches when none of its Rules matches. type None struct { rules []Filter @@ -60,33 +68,45 @@ func (n *None) Eval(filterable Filterable) (bool, error) { return true, nil } +func (n *None) ExtractConditions() []Condition { + return extractConditions(n.rules) +} + // Condition represents a single filter condition. type Condition struct { - column string - value string + Column string + Value string } -func NewCondition(column string, value string) *Condition { - return &Condition{ - column: column, - value: value, +func NewCondition(column string, value string) Condition { + return Condition{ + Column: column, + Value: value, } } -type Exists Condition +func (e Condition) ExtractConditions() []Condition { + return []Condition{e} +} + +type Exists struct { + Condition +} func NewExists(column string) *Exists { - return &Exists{column: column} + return &Exists{Condition{Column: column}} } func (e *Exists) Eval(filterable Filterable) (bool, error) { - return filterable.EvalExists(e.column), nil + return filterable.EvalExists(e.Column), nil } -type Equal Condition +type Equal struct { + Condition +} func (e *Equal) Eval(filterable Filterable) (bool, error) { - match, err := filterable.EvalEqual(e.column, e.value) + match, err := filterable.EvalEqual(e.Column, e.Value) if err != nil { return false, err } @@ -94,21 +114,25 @@ func (e *Equal) Eval(filterable Filterable) (bool, error) { return match, nil } -type UnEqual Condition +type UnEqual struct { + Condition +} func (u *UnEqual) Eval(filterable Filterable) (bool, error) { - match, err := filterable.EvalEqual(u.column, u.value) + match, err := filterable.EvalEqual(u.Column, u.Value) if err != nil { return false, err } - return filterable.EvalExists(u.column) && !match, nil + return filterable.EvalExists(u.Column) && !match, nil } -type Like Condition +type Like struct { + Condition +} func (l *Like) Eval(filterable Filterable) (bool, error) { - match, err := filterable.EvalLike(l.column, l.value) + match, err := filterable.EvalLike(l.Column, l.Value) if err != nil { return false, err } @@ -116,21 +140,25 @@ func (l *Like) Eval(filterable Filterable) (bool, error) { return match, nil } -type Unlike Condition +type Unlike struct { + Condition +} func (u *Unlike) Eval(filterable Filterable) (bool, error) { - match, err := filterable.EvalLike(u.column, u.value) + match, err := filterable.EvalLike(u.Column, u.Value) if err != nil { return false, err } - return filterable.EvalExists(u.column) && !match, nil + return filterable.EvalExists(u.Column) && !match, nil } -type LessThan Condition +type LessThan struct { + Condition +} func (less *LessThan) Eval(filterable Filterable) (bool, error) { - match, err := filterable.EvalLess(less.column, less.value) + match, err := filterable.EvalLess(less.Column, less.Value) if err != nil { return false, err } @@ -138,10 +166,12 @@ func (less *LessThan) Eval(filterable Filterable) (bool, error) { return match, nil } -type LessThanOrEqual Condition +type LessThanOrEqual struct { + Condition +} func (loe *LessThanOrEqual) Eval(filterable Filterable) (bool, error) { - match, err := filterable.EvalLessOrEqual(loe.column, loe.value) + match, err := filterable.EvalLessOrEqual(loe.Column, loe.Value) if err != nil { return false, err } @@ -149,26 +179,40 @@ func (loe *LessThanOrEqual) Eval(filterable Filterable) (bool, error) { return match, nil } -type GreaterThan Condition +type GreaterThan struct { + Condition +} func (g *GreaterThan) Eval(filterable Filterable) (bool, error) { - match, err := filterable.EvalLessOrEqual(g.column, g.value) + match, err := filterable.EvalLessOrEqual(g.Column, g.Value) if err != nil { return false, err } - return filterable.EvalExists(g.column) && !match, nil + return filterable.EvalExists(g.Column) && !match, nil } -type GreaterThanOrEqual Condition +type GreaterThanOrEqual struct { + Condition +} func (goe *GreaterThanOrEqual) Eval(filterable Filterable) (bool, error) { - match, err := filterable.EvalLess(goe.column, goe.value) + match, err := filterable.EvalLess(goe.Column, goe.Value) if err != nil { return false, err } - return filterable.EvalExists(goe.column) && !match, nil + return filterable.EvalExists(goe.Column) && !match, nil +} + +// extractConditions extracts filter conditions from the specified filter rules. +func extractConditions(rules []Filter) []Condition { + var conditions []Condition + for _, rule := range rules { + conditions = append(conditions, rule.ExtractConditions()...) + } + + return conditions } var ( diff --git a/internal/incident/incident.go b/internal/incident/incident.go index f1c952ca3..9d2753af7 100644 --- a/internal/incident/incident.go +++ b/internal/incident/incident.go @@ -38,6 +38,8 @@ type Incident struct { incidentRowID int64 causedByHistoryID types.Int + scheduler *EscalationScheduler + db *icingadb.DB logger *logging.Logger runtimeConfig *config.RuntimeConfig @@ -276,6 +278,10 @@ func (i *Incident) processIncidentAndSourceSeverity(ctx context.Context, tx *sql return errors.New("can't insert incident closed history to the database") } + + if i.scheduler != nil && i.scheduler.timer != nil { + i.scheduler.timer.Stop() + } } if i.Rules == nil { @@ -335,6 +341,18 @@ func (i *Incident) evaluateRules(ctx context.Context, tx *sqlx.Tx, eventID int64 // evaluateEscalations evaluates this incidents rule escalations if they aren't already. // Returns error on database failure. func (i *Incident) evaluateEscalations() error { + cond := &rule.EscalationFilter{ + IncidentAge: time.Since(i.StartedAt), + IncidentSeverity: i.Severity(), + } + + if i.scheduler == nil { + i.scheduler = &EscalationScheduler{} + } else if i.scheduler.condition != nil { + cond = i.scheduler.condition + } + + var retryAfter time.Duration for rID := range i.Rules { r := i.runtimeConfig.Rules[rID] @@ -350,13 +368,34 @@ func (i *Incident) evaluateEscalations() error { if escalation.Condition == nil { matched = true } else { - cond := &rule.EscalationFilter{ - IncidentAge: time.Now().Sub(i.StartedAt), - IncidentSeverity: i.Severity(), + evaluateCond := func(cond *rule.EscalationFilter) (bool, error) { + before := &rule.EscalationFilter{ + IncidentAge: cond.IncidentAge - time.Nanosecond, + IncidentSeverity: cond.IncidentSeverity, + } + matched, err := escalation.Condition.Eval(before) + if err != nil { + return false, err + } + + if !matched { + matched, err = escalation.Condition.Eval(cond) + if err != nil { + return false, err + } + } + + if !matched { + return escalation.Condition.Eval( + &rule.EscalationFilter{IncidentAge: cond.IncidentAge + time.Nanosecond, IncidentSeverity: cond.IncidentSeverity}, + ) + } + + return matched, nil } var err error - matched, err = escalation.Condition.Eval(cond) + matched, err = evaluateCond(cond) if err != nil { i.logger.Warnf( "[%s %s] rule %q failed to evaulte escalation %q condition: %s", @@ -364,6 +403,15 @@ func (i *Incident) evaluateEscalations() error { ) matched = false + } else if !matched { + age := cond.ExtractRetryAfter(escalation.Condition.ExtractConditions(), cond.IncidentAge) + if age > 0 && (retryAfter <= 0 || retryAfter > age) { + retryAfter = time.Until(i.StartedAt.Add(age)) + i.scheduler.condition = cond + i.scheduler.condition.IncidentAge = age + } else { + i.scheduler.condition = nil + } } } @@ -374,6 +422,10 @@ func (i *Incident) evaluateEscalations() error { } } + if retryAfter > 0 && (i.scheduler.timer == nil || i.scheduler.completed) { + i.scheduler.timer = time.AfterFunc(retryAfter, i.RetriggerEscalations) + } + return nil } @@ -571,6 +623,53 @@ func (i *Incident) processAcknowledgmentEvent(ctx context.Context, tx *sqlx.Tx, return nil } +// RetriggerEscalations tries to re-evaluate the escalations and notify contacts. +func (i *Incident) RetriggerEscalations() { + defer func() { i.scheduler.completed = true }() + + ctx := context.TODO() + tx, err := i.db.BeginTxx(ctx, nil) + if err != nil { + i.logger.Errorln(err) + return + } + defer func() { _ = tx.Rollback() }() + + if err := i.ReloadRecipients(ctx, tx); err != nil { + i.Lock() + defer i.Unlock() + + i.logger.Errorln(err) + return + } + + i.Lock() + i.runtimeConfig.RLock() + defer func() { + i.runtimeConfig.RUnlock() + i.Unlock() + }() + + if !i.RecoveredAt.IsZero() { + // Incident is recovered in the meantime. + return + } + + if err := i.evaluateEscalations(); err != nil { + return + } + + err = i.notifyContacts(ctx, tx, &event.Event{Time: time.Now(), Type: event.TypeInternal}) + if err != nil { + i.logger.Errorw("can't notify contacts", zap.Error(err)) + return + } + + if err = tx.Commit(); err != nil { + i.logger.Errorw("failed to commit time based escalations transaction", zap.Error(err)) + } +} + // ReloadRecipients reloads the current incident recipients from the database. // Returns error on database failure. func (i *Incident) ReloadRecipients(ctx context.Context, tx *sqlx.Tx) error { @@ -609,6 +708,12 @@ type RecipientState struct { Role ContactRole } +type EscalationScheduler struct { + timer *time.Timer + condition *rule.EscalationFilter + completed bool +} + var ( _ contracts.Incident = (*Incident)(nil) ) diff --git a/internal/rule/condition.go b/internal/rule/condition.go index 77e005f99..8fa1480e9 100644 --- a/internal/rule/condition.go +++ b/internal/rule/condition.go @@ -3,6 +3,7 @@ package rule import ( "fmt" "github.com/icinga/icinga-notifications/internal/event" + "github.com/icinga/icinga-notifications/internal/filter" "time" ) @@ -11,7 +12,29 @@ type EscalationFilter struct { IncidentSeverity event.Severity } -func (c *EscalationFilter) EvalEqual(key string, value string) (bool, error) { +// ExtractRetryAfter extracts a time.Duration from the given filter conditions. +// The retry after duration is extracted from the specified filter conditions that evaluate the "incident_age" +// filter column. This function returns time.Duration(0) when all the incident age filter values are < the actual +// incident age or none of the filter conditions evaluates the "incident_age" column. +func (e *EscalationFilter) ExtractRetryAfter(conditions []filter.Condition, incidentAge time.Duration) time.Duration { + var retryAfter time.Duration + for _, condition := range conditions { + if condition.Column == "incident_age" { + age, err := time.ParseDuration(condition.Value) + if err != nil { + continue + } + + if incidentAge < age && (retryAfter <= 0 || retryAfter > age) { + retryAfter = age + } + } + } + + return retryAfter +} + +func (e *EscalationFilter) EvalEqual(key string, value string) (bool, error) { switch key { case "incident_age": age, err := time.ParseDuration(value) @@ -19,20 +42,20 @@ func (c *EscalationFilter) EvalEqual(key string, value string) (bool, error) { return false, err } - return c.IncidentAge == age, nil + return e.IncidentAge == age, nil case "incident_severity": severity, err := event.GetSeverityByName(value) if err != nil { return false, err } - return c.IncidentSeverity == severity, nil + return e.IncidentSeverity == severity, nil default: return false, nil } } -func (c *EscalationFilter) EvalLess(key string, value string) (bool, error) { +func (e *EscalationFilter) EvalLess(key string, value string) (bool, error) { switch key { case "incident_age": age, err := time.ParseDuration(value) @@ -40,24 +63,24 @@ func (c *EscalationFilter) EvalLess(key string, value string) (bool, error) { return false, err } - return c.IncidentAge < age, nil + return e.IncidentAge < age, nil case "incident_severity": severity, err := event.GetSeverityByName(value) if err != nil { return false, err } - return c.IncidentSeverity < severity, nil + return e.IncidentSeverity < severity, nil default: return false, nil } } -func (c *EscalationFilter) EvalLike(key string, value string) (bool, error) { +func (e *EscalationFilter) EvalLike(key string, value string) (bool, error) { return false, fmt.Errorf("escalation filter doesn't support wildcard matches") } -func (c *EscalationFilter) EvalLessOrEqual(key string, value string) (bool, error) { +func (e *EscalationFilter) EvalLessOrEqual(key string, value string) (bool, error) { switch key { case "incident_age": age, err := time.ParseDuration(value) @@ -65,20 +88,20 @@ func (c *EscalationFilter) EvalLessOrEqual(key string, value string) (bool, erro return false, err } - return c.IncidentAge <= age, nil + return e.IncidentAge <= age, nil case "incident_severity": severity, err := event.GetSeverityByName(value) if err != nil { return false, err } - return c.IncidentSeverity <= severity, nil + return e.IncidentSeverity <= severity, nil default: return false, nil } } -func (c *EscalationFilter) EvalExists(key string) bool { +func (e *EscalationFilter) EvalExists(key string) bool { switch key { case "incident_age": fallthrough