diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 2d786f97b..95c226f60 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -7,6 +7,13 @@ jobs: name: lint runs-on: ubuntu-latest steps: + - name: Clear up disk space + run: | + rm -rf /usr/share/dotnet + rm -rf /opt/ghc + rm -rf /usr/local/share/boost + rm -rf $AGENT_TOOLSDIRECTORY + rm -rf /opt/hostedtoolcache - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - name: Install Go uses: buildjet/setup-go@v5 diff --git a/notification/context.go b/notification/context.go index 6aa631f53..149d27229 100644 --- a/notification/context.go +++ b/notification/context.go @@ -1,9 +1,13 @@ package notification import ( + "encoding/json" + "github.com/flanksource/duty/context" "github.com/flanksource/duty/models" "github.com/google/uuid" + "github.com/samber/lo" + "github.com/samber/oops" ) type RecipientType string @@ -45,8 +49,18 @@ func (t *Context) WithRecipientType(recipientType RecipientType) { t.recipientType = recipientType } -func (t *Context) WithError(err string) { - t.log.Error = &err +func (t *Context) WithError(err error) { + if o, ok := oops.AsOops(err); ok { + oopsErr := map[string]any{ + "error": o.ToMap(), + "hint": o.Hint(), + } + + bb, _ := json.Marshal(oopsErr) + t.log.Error = lo.ToPtr(string(bb)) + } else { + t.log.Error = lo.ToPtr(err.Error()) + } } func (t *Context) WithSource(event string, resourceID uuid.UUID) { diff --git a/notification/events.go b/notification/events.go index 0304fabe2..0657134a7 100644 --- a/notification/events.go +++ b/notification/events.go @@ -264,7 +264,7 @@ func sendNotifications(ctx context.Context, events models.Events) models.Events if err := json.Unmarshal(payload.Properties, &originalEvent.Properties); err != nil { e.SetError(err.Error()) failedEvents = append(failedEvents, e) - notificationContext.WithError(err.Error()) + notificationContext.WithError(err) continue } } @@ -273,11 +273,11 @@ func sendNotifications(ctx context.Context, events models.Events) models.Events if err != nil { e.SetError(err.Error()) failedEvents = append(failedEvents, e) - notificationContext.WithError(err.Error()) + notificationContext.WithError(err) } else if err := PrepareAndSendEventNotification(notificationContext, payload, celEnv); err != nil { e.SetError(err.Error()) failedEvents = append(failedEvents, e) - notificationContext.WithError(err.Error()) + notificationContext.WithError(err) } logs.IfError(notificationContext.EndLog(), "error persisting end of notification send history") diff --git a/notification/notification_test.go b/notification/notification_test.go index c3e99933d..d55d4cf14 100644 --- a/notification/notification_test.go +++ b/notification/notification_test.go @@ -10,6 +10,7 @@ import ( "github.com/flanksource/incident-commander/api" dbModels "github.com/flanksource/incident-commander/db/models" "github.com/flanksource/incident-commander/events" + "github.com/flanksource/incident-commander/notification" "github.com/google/uuid" "github.com/lib/pq" ginkgo "github.com/onsi/ginkgo/v2" @@ -18,7 +19,6 @@ import ( // register event handlers _ "github.com/flanksource/incident-commander/incidents/responder" - _ "github.com/flanksource/incident-commander/notification" _ "github.com/flanksource/incident-commander/playbook" _ "github.com/flanksource/incident-commander/upstream" ) @@ -26,12 +26,20 @@ import ( var _ = ginkgo.Describe("Notifications", ginkgo.Ordered, func() { var _ = ginkgo.Describe("Notification on incident creation", ginkgo.Ordered, func() { var ( + notif models.Notification john *models.Person incident *models.Incident component *models.Component team *dbModels.Team ) + ginkgo.AfterAll(func() { + err := DefaultContext.DB().Delete(¬if).Error + Expect(err).To(BeNil()) + + notification.PurgeCache(notif.ID.String()) + }) + ginkgo.It("should create a person", func() { john = &models.Person{ ID: uuid.New(), @@ -85,9 +93,9 @@ var _ = ginkgo.Describe("Notifications", ginkgo.Ordered, func() { }) ginkgo.It("should create a new notification", func() { - notif := models.Notification{ + notif = models.Notification{ ID: uuid.New(), - Name: "test-notif-1", + Name: "incident-test-notification", Events: pq.StringArray([]string{"incident.created"}), Template: "Severity: {{.incident.severity}}", TeamID: &team.ID, @@ -144,7 +152,7 @@ var _ = ginkgo.Describe("Notifications", ginkgo.Ordered, func() { n = models.Notification{ ID: uuid.New(), - Name: "test-notif-2", + Name: "repeat-interval-test", Events: pq.StringArray([]string{"config.updated"}), Source: models.SourceCRD, Title: "Dummy", @@ -168,6 +176,16 @@ var _ = ginkgo.Describe("Notifications", ginkgo.Ordered, func() { Expect(err).To(BeNil()) }) + ginkgo.AfterAll(func() { + err := DefaultContext.DB().Delete(&n).Error + Expect(err).To(BeNil()) + + err = DefaultContext.DB().Delete(&config).Error + Expect(err).To(BeNil()) + + notification.PurgeCache(n.ID.String()) + }) + ginkgo.It("should have sent a notification for a config update", func() { event := models.Event{ Name: "config.updated", @@ -183,11 +201,13 @@ var _ = ginkgo.Describe("Notifications", ginkgo.Ordered, func() { return c }, "10s", "200ms").Should(Equal(int64(0))) - // Check send history - var sentHistoryCount int64 - err = DefaultContext.DB().Model(&models.NotificationSendHistory{}).Where("notification_id = ?", n.ID).Count(&sentHistoryCount).Error - Expect(err).To(BeNil()) - Expect(sentHistoryCount).To(Equal(int64(1))) + Eventually(func() int64 { + // Check send history + var sentHistoryCount int64 + err = DefaultContext.DB().Model(&models.NotificationSendHistory{}).Where("notification_id = ?", n.ID).Count(&sentHistoryCount).Error + Expect(err).To(BeNil()) + return sentHistoryCount + }, "10s", "200ms").Should(Equal(int64(1))) }) ginkgo.It("should NOT have sent a notification for a subsequent config update", func() { @@ -215,4 +235,146 @@ var _ = ginkgo.Describe("Notifications", ginkgo.Ordered, func() { Expect(sentHistoryCount).To(Equal(int64(1))) }) }) + + var _ = ginkgo.Describe("notification error handling on send", ginkgo.Ordered, func() { + var goodNotif models.Notification + var badNotif models.Notification + var deployment1 models.ConfigItem + var pod1 models.ConfigItem + + ginkgo.BeforeAll(func() { + { + customReceiver := []api.NotificationConfig{ + { + URL: fmt.Sprintf("generic+%s", webhookEndpoint), + Properties: map[string]string{ + "disabletls": "yes", + "template": "json", + }, + }, + } + customReceiverJson, err := json.Marshal(customReceiver) + Expect(err).To(BeNil()) + + goodNotif = models.Notification{ + ID: uuid.New(), + Name: "test-notification-error-on-send-1", + Events: pq.StringArray([]string{"config.updated"}), + Filter: ".config.type == 'Kubernetes::Deployment'", + Source: models.SourceCRD, + Title: "Dummy", + Template: "dummy", + CustomServices: types.JSON(customReceiverJson), + } + + err = DefaultContext.DB().Create(&goodNotif).Error + Expect(err).To(BeNil()) + } + + { + badReceiver := []api.NotificationConfig{ + { + URL: "generic+bad", + Properties: map[string]string{ + "disabletls": "yes", + "template": "json", + }, + }, + } + customReceiverJson, err := json.Marshal(badReceiver) + Expect(err).To(BeNil()) + + badNotif = models.Notification{ + ID: uuid.New(), + Name: "test-notification-error-on-send-2", + Events: pq.StringArray([]string{"config.updated"}), + Filter: ".config.type == 'Kubernetes::Pod'", + Source: models.SourceCRD, + Title: "Dummy", + Template: "dummy", + CustomServices: types.JSON(customReceiverJson), + } + + err = DefaultContext.DB().Create(&badNotif).Error + Expect(err).To(BeNil()) + } + + { + deployment1 = models.ConfigItem{ + ID: uuid.New(), + Name: lo.ToPtr("deployment-1"), + ConfigClass: models.ConfigClassDeployment, + Config: lo.ToPtr(`{"replicas": 1}`), + Type: lo.ToPtr("Kubernetes::Deployment"), + } + + err := DefaultContext.DB().Create(&deployment1).Error + Expect(err).To(BeNil()) + } + + { + pod1 = models.ConfigItem{ + ID: uuid.New(), + Name: lo.ToPtr("deployment-2"), + ConfigClass: models.ConfigClassDeployment, + Config: lo.ToPtr(`{"replicas": 2}`), + Type: lo.ToPtr("Kubernetes::Pod"), + } + + err := DefaultContext.DB().Create(&pod1).Error + Expect(err).To(BeNil()) + } + }) + + ginkgo.AfterAll(func() { + err := DefaultContext.DB().Delete(&goodNotif).Error + Expect(err).To(BeNil()) + err = DefaultContext.DB().Delete(&badNotif).Error + Expect(err).To(BeNil()) + err = DefaultContext.DB().Delete(&deployment1).Error + Expect(err).To(BeNil()) + err = DefaultContext.DB().Delete(&pod1).Error + Expect(err).To(BeNil()) + + notification.PurgeCache(goodNotif.ID.String()) + notification.PurgeCache(badNotif.ID.String()) + }) + + ginkgo.It("should have consumed all events", func() { + testEvents := []models.Event{ + { + Name: "config.updated", + Properties: types.JSONStringMap{"id": deployment1.ID.String()}, + }, { + Name: "config.updated", + Properties: types.JSONStringMap{"id": pod1.ID.String()}, + }, + } + err := DefaultContext.DB().Create(&testEvents).Error + Expect(err).To(BeNil()) + + events.ConsumeAll(DefaultContext) + Eventually(func() int64 { + var c int64 + DefaultContext.DB().Model(&models.Event{}).Where("name = 'config.updated'").Count(&c) + return c + }, "10s", "200ms").Should(Equal(int64(0))) + }) + + ginkgo.It("one notification.send event with max attempt should be in the event_queue", func() { + Eventually(func() int { + var event models.Event + err := DefaultContext.DB().Where("name = 'notification.send'").First(&event).Error + Expect(err).To(BeNil()) + return event.Attempts + }, "10s", "200ms").Should(Equal(4)) + }) + + ginkgo.It("only one notification must have been sent", func() { + var sentHistoryCount int64 + err := DefaultContext.DB().Model(&models.NotificationSendHistory{}).Where("notification_id = ?", goodNotif.ID).Count(&sentHistoryCount).Error + Expect(err).To(BeNil()) + Expect(sentHistoryCount).To(Equal(int64(1))) + }) + }) }) diff --git a/notification/send.go b/notification/send.go index 82b2aabe4..27b0c28e8 100644 --- a/notification/send.go +++ b/notification/send.go @@ -162,7 +162,12 @@ func SendNotification(ctx *Context, connectionName, shoutrrrURL string, celEnv m if err := templater.Walk(&data); err != nil { return "", fmt.Errorf("error templating notification: %w", err) } - return "slack", SlackSend(ctx, connection.Password, connection.Username, data) + + if err := SlackSend(ctx, connection.Password, connection.Username, data); err != nil { + return "", ctx.Oops().Hint(data.Message).Wrap(err) + } + + return "slack", nil } service, err := shoutrrrSend(ctx, celEnv, shoutrrrURL, data) diff --git a/notification/shoutrrr.go b/notification/shoutrrr.go index 594ff44a6..b921d0c8c 100644 --- a/notification/shoutrrr.go +++ b/notification/shoutrrr.go @@ -117,7 +117,7 @@ func shoutrrrSend(ctx *Context, celEnv map[string]any, shoutrrrURL string, data sendErrors := sender.Send(data.Message, params) for _, err := range sendErrors { if err != nil { - return "", fmt.Errorf("error publishing notification (service=%s): %w", service, err) + return "", ctx.Oops().Hint(data.Message).Wrapf(err, "error publishing notification (service=%s)", service) } }