Skip to content

Commit

Permalink
Alert squads on PodErrorEvents (#394)
Browse files Browse the repository at this point in the history
* init

* send slack msg

* Update cmd/daemon/kubernetes/kubernetes.go

Co-authored-by: Bjørn <bso@lunar.app>

* string value

Co-authored-by: Bjørn <bso@lunar.app>
  • Loading branch information
MariaCFFrandsen and Crevil authored Jul 22, 2022
1 parent 26a247a commit 16c6873
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 36 deletions.
11 changes: 6 additions & 5 deletions cmd/daemon/kubernetes/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,12 @@ func isCorrectlyAnnotated(annotations map[string]string) bool {
}

const (
observedAnnotationKey = "lunarway.com/observed-artifact-id"
artifactIDAnnotationKey = "lunarway.com/artifact-id"
authorAnnotationKey = "lunarway.com/author"
controlledAnnotationKey = "lunarway.com/controlled-by-release-manager"
squadLabelKey = "squad"
observedAnnotationKey = "lunarway.com/observed-artifact-id"
artifactIDAnnotationKey = "lunarway.com/artifact-id"
authorAnnotationKey = "lunarway.com/author"
controlledAnnotationKey = "lunarway.com/controlled-by-release-manager"
runtimeAlertsAnnotationKey = "lunarway.com/runtime-alerts"
squadLabelKey = "squad"
)

func observe(annotations map[string]string) {
Expand Down
53 changes: 24 additions & 29 deletions cmd/daemon/kubernetes/pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@ func (p *PodInformer) handle(e interface{}) {
}

ctx := context.Background()
event := http.PodErrorEvent{
PodName: pod.Name,
Namespace: pod.Namespace,
ArtifactID: pod.Annotations[artifactIDAnnotationKey],
AuthorEmail: pod.Annotations[authorAnnotationKey],
Squad: getCodeOwnerSquad(pod.Labels),
AlertSquad: alertSquad(getCodeOwnerSquad(pod.Labels), pod.Annotations),
}

if isPodInCrashLoopBackOff(pod) {
if isPodControlledByJob(pod) {
Expand Down Expand Up @@ -91,15 +99,8 @@ func (p *PodInformer) handle(e interface{}) {
})
}
}

err := p.exporter.SendPodErrorEvent(ctx, http.PodErrorEvent{
PodName: pod.Name,
Namespace: pod.Namespace,
Errors: errorContainers,
ArtifactID: pod.Annotations[artifactIDAnnotationKey],
AuthorEmail: pod.Annotations[authorAnnotationKey],
Squad: getCodeOwnerSquad(pod.Labels),
})
event.Errors = errorContainers
err := p.exporter.SendPodErrorEvent(ctx, event)
if err != nil {
log.Errorf("Failed to send crash loop backoff event: %v", err)
}
Expand All @@ -119,23 +120,15 @@ func (p *PodInformer) handle(e interface{}) {
})
}
}

err := p.exporter.SendPodErrorEvent(ctx, http.PodErrorEvent{
PodName: pod.Name,
Namespace: pod.Namespace,
Errors: errorContainers,
ArtifactID: pod.Annotations[artifactIDAnnotationKey],
AuthorEmail: pod.Annotations[authorAnnotationKey],
Squad: getCodeOwnerSquad(pod.Labels),
})
event.Errors = errorContainers
err := p.exporter.SendPodErrorEvent(ctx, event)
if err != nil {
log.Errorf("Failed to send create container config error: %v", err)
}
}

if isPodOOMKilled(pod) {
log.Infof("Pod: %s was OOMKilled owned by squad %s", pod.Name, getCodeOwnerSquad(pod.Labels))

var errorContainers []http.ContainerError
for _, cst := range pod.Status.ContainerStatuses {
if isContainerOOMKilled(cst) {
Expand All @@ -146,15 +139,8 @@ func (p *PodInformer) handle(e interface{}) {
})
}
}

err := p.exporter.SendPodErrorEvent(ctx, http.PodErrorEvent{
PodName: pod.Name,
Namespace: pod.Namespace,
Errors: errorContainers,
ArtifactID: pod.Annotations[artifactIDAnnotationKey],
AuthorEmail: pod.Annotations[authorAnnotationKey],
Squad: getCodeOwnerSquad(pod.Labels),
})
event.Errors = errorContainers
err := p.exporter.SendPodErrorEvent(ctx, event)
if err != nil {
log.Errorf("Failed to send create container config error: %v", err)
}
Expand Down Expand Up @@ -259,7 +245,6 @@ type ContainerLog struct {
func parseToJSONAray(str string) ([]ContainerLog, error) {
str = strings.ReplaceAll(str, "}\n{", "},{")
str = fmt.Sprintf("[%s]", str)

var logs []ContainerLog
err := json.Unmarshal([]byte(str), &logs)
if err != nil {
Expand All @@ -274,3 +259,13 @@ func getCodeOwnerSquad(labels map[string]string) string {
}
return "no-one"
}

func alertSquad(squad string, annotations map[string]string) (alertChannel string) {
if value, ok := annotations[runtimeAlertsAnnotationKey]; ok {
if value == "false" {
return ""
}
return value
}
return fmt.Sprintf("#squad-%s-alerts", squad)
}
1 change: 1 addition & 0 deletions internal/http/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ type PodErrorEvent struct {
Environment string `json:"environment,omitempty"`
ArtifactID string `json:"artifactId,omitempty"`
Squad string `json:"squad,omitempty"`
AlertSquad string `json:"alertSquad,omitempty"`
}

type JobConditionError struct {
Expand Down
4 changes: 2 additions & 2 deletions internal/slack/slack.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,8 +313,8 @@ func (c *Client) NotifyK8SPodErrorEvent(ctx context.Context, event *http.PodErro
if err != nil {
return err
}
if event.Squad == "aura" || event.Squad == "odyssey" { //check if you opted in
_, _, err = c.client.PostMessageContext(ctx, fmt.Sprintf("#squad-%s-alerts", event.Squad), asUser, attachments)
if event.AlertSquad != "" {
_, _, err = c.client.PostMessageContext(ctx, event.AlertSquad, asUser, attachments)
}
return err
}
Expand Down

0 comments on commit 16c6873

Please sign in to comment.