diff --git a/internal/incident/history.go b/internal/incident/history.go index 91780cf82..0e1de2a6f 100644 --- a/internal/incident/history.go +++ b/internal/incident/history.go @@ -7,7 +7,6 @@ import ( "github.com/icinga/icinga-notifications/internal/rule" "github.com/icinga/icinga-notifications/internal/utils" "github.com/icinga/icingadb/pkg/types" - "log" "time" ) @@ -101,7 +100,7 @@ func (i *Incident) AddRecipient(escalation *rule.Escalation, eventId int64) erro oldRole := state.Role state.Role = newRole - log.Printf("[%s %s] contact %q role changed from %s to %s", i.Object.DisplayName(), i.String(), r, state.Role.String(), newRole.String()) + i.logger.Infof("[%s %s] contact %q role changed from %s to %s", i.Object.DisplayName(), i.String(), r, state.Role.String(), newRole.String()) hr := &HistoryRow{ IncidentID: i.incidentRowID, diff --git a/internal/incident/incident.go b/internal/incident/incident.go index 193460bf7..6bc8bd022 100644 --- a/internal/incident/incident.go +++ b/internal/incident/incident.go @@ -222,14 +222,21 @@ func (i *Incident) processIncidentAndSourceSeverity(ev event.Event, created bool i.RecoveredAt = time.Now() i.logger.Infof("[%s %s] all sources recovered, closing incident", i.Object.DisplayName(), i.String()) + RemoveCurrent(i.Object) + + incidentRow := &IncidentRow{ID: i.incidentRowID, RecoveredAt: types.UnixMilli(i.RecoveredAt)} + _, err := i.db.NamedExec(`UPDATE "incident" SET "recovered_at" = :recovered_at WHERE id = :id`, incidentRow) + if err != nil { + return fmt.Errorf("failed to update current incident: %w", err) + } + history = &HistoryRow{ EventID: utils.ToDBInt(ev.ID), Time: types.UnixMilli(i.RecoveredAt), Type: Closed, } - if err = RemoveCurrent(i.Object, history); err != nil { - i.logger.Errorln(err) - + _, err = i.AddHistory(history, false) + if err != nil { return err } } diff --git a/internal/incident/incidents.go b/internal/incident/incidents.go new file mode 100644 index 000000000..c380ebe6c --- /dev/null +++ b/internal/incident/incidents.go @@ -0,0 +1,123 @@ +package incident + +import ( + "database/sql" + "fmt" + "github.com/icinga/icingadb/pkg/icingadb" + "github.com/icinga/icingadb/pkg/logging" + "github.com/icinga/noma/internal/config" + "github.com/icinga/noma/internal/event" + "github.com/icinga/noma/internal/object" + "github.com/icinga/noma/internal/recipient" + "sync" +) + +var ( + currentIncidents = make(map[*object.Object]*Incident) + currentIncidentsMu sync.Mutex +) + +func GetCurrent(db *icingadb.DB, obj *object.Object, logger *logging.Logger, runtimeConfig *config.RuntimeConfig, create bool) (*Incident, bool, error) { + currentIncidentsMu.Lock() + defer currentIncidentsMu.Unlock() + + created := false + currentIncident := currentIncidents[obj] + + if currentIncident == nil { + ir := &IncidentRow{} + incident := &Incident{Object: obj, db: db, logger: logger, runtimeConfig: runtimeConfig} + incident.SeverityBySource = make(map[int64]event.Severity) + incident.EscalationState = make(map[escalationID]*EscalationState) + incident.Recipients = make(map[recipient.Key]*RecipientState) + + err := db.QueryRowx(db.Rebind(db.BuildSelectStmt(ir, ir)+` WHERE "object_id" = ? AND "recovered_at" IS NULL`), obj.ID).StructScan(ir) + if err != nil && err != sql.ErrNoRows { + return nil, false, fmt.Errorf("incident query failed with: %w", err) + } else if err == nil { + incident.incidentRowID = ir.ID + incident.StartedAt = ir.StartedAt.Time() + + sourceSeverity := &SourceSeverity{IncidentID: ir.ID} + var sources []SourceSeverity + err := db.Select( + &sources, + db.Rebind(db.BuildSelectStmt(sourceSeverity, sourceSeverity)+` WHERE "incident_id" = ? AND "severity" != ?`), + ir.ID, event.SeverityOK, + ) + if err != nil { + return nil, false, fmt.Errorf("failed to fetch incident sources Severity: %w", err) + } + + for _, source := range sources { + incident.SeverityBySource[source.SourceID] = source.Severity + } + + state := &EscalationState{} + var states []*EscalationState + err = db.Select(&states, db.Rebind(db.BuildSelectStmt(state, state)+` WHERE "incident_id" = ?`), ir.ID) + if err != nil { + return nil, false, fmt.Errorf("failed to fetch incident rule escalation state: %w", err) + } + + for _, state := range states { + incident.EscalationState[state.RuleEscalationID] = state + } + + currentIncident = incident + } + + if create && currentIncident == nil { + created = true + currentIncident = incident + } + + if currentIncident != nil { + currentIncidents[obj] = currentIncident + } + } + + if !created && currentIncident != nil { + currentIncident.Lock() + defer currentIncident.Unlock() + + contact := &ContactRow{} + var contacts []*ContactRow + err := db.Select(&contacts, db.Rebind(db.BuildSelectStmt(contact, contact)+` WHERE "incident_id" = ?`), currentIncident.ID()) + if err != nil { + return nil, false, fmt.Errorf("failed to fetch incident recipients: %w", err) + } + + recipients := make(map[recipient.Key]*RecipientState) + for _, contact := range contacts { + recipients[contact.Key] = &RecipientState{Role: contact.Role} + } + + currentIncident.Recipients = recipients + } + + return currentIncident, created, nil +} + +func RemoveCurrent(obj *object.Object) { + currentIncidentsMu.Lock() + defer currentIncidentsMu.Unlock() + + currentIncident := currentIncidents[obj] + + if currentIncident != nil { + delete(currentIncidents, obj) + } +} + +// GetCurrentIncidents returns a map of all incidents for debugging purposes. +func GetCurrentIncidents() map[int64]*Incident { + currentIncidentsMu.Lock() + defer currentIncidentsMu.Unlock() + + m := make(map[int64]*Incident) + for _, incident := range currentIncidents { + m[incident.incidentRowID] = incident + } + return m +}