From 39f9fc69002a71e7bd6bda2f362443ff1fce6305 Mon Sep 17 00:00:00 2001 From: Srikanth Chekuri Date: Fri, 13 Sep 2024 17:30:02 +0530 Subject: [PATCH] fix: missing related logs or traces links in alert notification (#5946) --- pkg/query-service/rules/base_rule.go | 1 + pkg/query-service/rules/promrule_test.go | 2 +- .../rules/threshold_rule_test.go | 214 +++++++++++++++++- .../rules/threshold_rule_test_data.go | 68 ++++++ 4 files changed, 276 insertions(+), 9 deletions(-) create mode 100644 pkg/query-service/rules/threshold_rule_test_data.go diff --git a/pkg/query-service/rules/base_rule.go b/pkg/query-service/rules/base_rule.go index 492f6f685c..6b359d5d38 100644 --- a/pkg/query-service/rules/base_rule.go +++ b/pkg/query-service/rules/base_rule.go @@ -109,6 +109,7 @@ func NewBaseRule(id string, p *PostableRule, reader interfaces.Reader, opts ...R id: id, name: p.AlertName, source: p.Source, + typ: p.AlertType, ruleCondition: p.RuleCondition, evalWindow: time.Duration(p.EvalWindow), labels: qslabels.FromMap(p.Labels), diff --git a/pkg/query-service/rules/promrule_test.go b/pkg/query-service/rules/promrule_test.go index 7c559d1eee..c87ef2cee9 100644 --- a/pkg/query-service/rules/promrule_test.go +++ b/pkg/query-service/rules/promrule_test.go @@ -13,7 +13,7 @@ import ( func TestPromRuleShouldAlert(t *testing.T) { postableRule := PostableRule{ AlertName: "Test Rule", - AlertType: "METRIC_BASED_ALERT", + AlertType: AlertTypeMetric, RuleType: RuleTypeProm, EvalWindow: Duration(5 * time.Minute), Frequency: Duration(1 * time.Minute), diff --git a/pkg/query-service/rules/threshold_rule_test.go b/pkg/query-service/rules/threshold_rule_test.go index ab37ad6af1..e000d71257 100644 --- a/pkg/query-service/rules/threshold_rule_test.go +++ b/pkg/query-service/rules/threshold_rule_test.go @@ -18,7 +18,7 @@ import ( func TestThresholdRuleShouldAlert(t *testing.T) { postableRule := PostableRule{ AlertName: "Tricky Condition Tests", - AlertType: "METRIC_BASED_ALERT", + AlertType: AlertTypeMetric, RuleType: RuleTypeThreshold, EvalWindow: Duration(5 * time.Minute), Frequency: Duration(1 * time.Minute), @@ -788,7 +788,7 @@ func TestPrepareLinksToLogs(t *testing.T) { func TestPrepareLinksToTraces(t *testing.T) { postableRule := PostableRule{ AlertName: "Links to traces test", - AlertType: "TRACES_BASED_ALERT", + AlertType: AlertTypeTraces, RuleType: RuleTypeThreshold, EvalWindow: Duration(5 * time.Minute), Frequency: Duration(1 * time.Minute), @@ -830,7 +830,7 @@ func TestPrepareLinksToTraces(t *testing.T) { func TestThresholdRuleLabelNormalization(t *testing.T) { postableRule := PostableRule{ AlertName: "Tricky Condition Tests", - AlertType: "METRIC_BASED_ALERT", + AlertType: AlertTypeMetric, RuleType: RuleTypeThreshold, EvalWindow: Duration(5 * time.Minute), Frequency: Duration(1 * time.Minute), @@ -914,7 +914,7 @@ func TestThresholdRuleLabelNormalization(t *testing.T) { func TestThresholdRuleEvalDelay(t *testing.T) { postableRule := PostableRule{ AlertName: "Test Eval Delay", - AlertType: "METRIC_BASED_ALERT", + AlertType: AlertTypeMetric, RuleType: RuleTypeThreshold, EvalWindow: Duration(5 * time.Minute), Frequency: Duration(1 * time.Minute), @@ -963,7 +963,7 @@ func TestThresholdRuleEvalDelay(t *testing.T) { func TestThresholdRuleClickHouseTmpl(t *testing.T) { postableRule := PostableRule{ AlertName: "Tricky Condition Tests", - AlertType: "METRIC_BASED_ALERT", + AlertType: AlertTypeMetric, RuleType: RuleTypeThreshold, EvalWindow: Duration(5 * time.Minute), Frequency: Duration(1 * time.Minute), @@ -1019,7 +1019,7 @@ func (m *queryMatcherAny) Match(string, string) error { func TestThresholdRuleUnitCombinations(t *testing.T) { postableRule := PostableRule{ AlertName: "Units test", - AlertType: "METRIC_BASED_ALERT", + AlertType: AlertTypeMetric, RuleType: RuleTypeThreshold, EvalWindow: Duration(5 * time.Minute), Frequency: Duration(1 * time.Minute), @@ -1170,8 +1170,8 @@ func TestThresholdRuleUnitCombinations(t *testing.T) { func TestThresholdRuleNoData(t *testing.T) { postableRule := PostableRule{ - AlertName: "Units test", - AlertType: "METRIC_BASED_ALERT", + AlertName: "No data test", + AlertType: AlertTypeMetric, RuleType: RuleTypeThreshold, EvalWindow: Duration(5 * time.Minute), Frequency: Duration(1 * time.Minute), @@ -1261,3 +1261,201 @@ func TestThresholdRuleNoData(t *testing.T) { } } } + +func TestThresholdRuleTracesLink(t *testing.T) { + postableRule := PostableRule{ + AlertName: "Traces link test", + AlertType: AlertTypeTraces, + RuleType: RuleTypeThreshold, + EvalWindow: Duration(5 * time.Minute), + Frequency: Duration(1 * time.Minute), + RuleCondition: &RuleCondition{ + CompositeQuery: &v3.CompositeQuery{ + QueryType: v3.QueryTypeBuilder, + BuilderQueries: map[string]*v3.BuilderQuery{ + "A": { + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{ + Key: "durationNano", + }, + AggregateOperator: v3.AggregateOperatorP95, + DataSource: v3.DataSourceTraces, + Expression: "A", + Filters: &v3.FilterSet{ + Operator: "AND", + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{Key: "httpMethod", IsColumn: true, Type: v3.AttributeKeyTypeTag, DataType: v3.AttributeKeyDataTypeString}, + Value: "GET", + Operator: v3.FilterOperatorEqual, + }, + }, + }, + }, + }, + }, + }, + } + fm := featureManager.StartManager() + mock, err := cmock.NewClickHouseWithQueryMatcher(nil, &queryMatcherAny{}) + if err != nil { + t.Errorf("an error '%s' was not expected when opening a stub database connection", err) + } + + cols := make([]cmock.ColumnType, 0) + cols = append(cols, cmock.ColumnType{Name: "value", Type: "Float64"}) + cols = append(cols, cmock.ColumnType{Name: "attr", Type: "String"}) + cols = append(cols, cmock.ColumnType{Name: "timestamp", Type: "String"}) + + for idx, c := range testCases { + rows := cmock.NewRows(cols, c.values) + + // We are testing the eval logic after the query is run + // so we don't care about the query string here + queryString := "SELECT any" + mock. + ExpectQuery(queryString). + WillReturnRows(rows) + postableRule.RuleCondition.CompareOp = CompareOp(c.compareOp) + postableRule.RuleCondition.MatchType = MatchType(c.matchType) + postableRule.RuleCondition.Target = &c.target + postableRule.RuleCondition.CompositeQuery.Unit = c.yAxisUnit + postableRule.RuleCondition.TargetUnit = c.targetUnit + postableRule.Annotations = map[string]string{ + "description": "This alert is fired when the defined metric (current value: {{$value}}) crosses the threshold ({{$threshold}})", + "summary": "The rule threshold is set to {{$threshold}}, and the observed metric value is {{$value}}", + } + + options := clickhouseReader.NewOptions("", 0, 0, 0, "", "archiveNamespace") + reader := clickhouseReader.NewReaderFromClickhouseConnection(mock, options, nil, "", fm, "", true) + + rule, err := NewThresholdRule("69", &postableRule, fm, reader, true) + rule.temporalityMap = map[string]map[v3.Temporality]bool{ + "signoz_calls_total": { + v3.Delta: true, + }, + } + if err != nil { + assert.NoError(t, err) + } + + retVal, err := rule.Eval(context.Background(), time.Now()) + if err != nil { + assert.NoError(t, err) + } + + if c.expectAlerts == 0 { + assert.Equal(t, 0, retVal.(int), "case %d", idx) + } else { + assert.Equal(t, c.expectAlerts, retVal.(int), "case %d", idx) + for _, item := range rule.active { + for name, value := range item.Annotations.Map() { + if name == "related_traces" { + assert.NotEmpty(t, value, "case %d", idx) + assert.Contains(t, value, "GET") + } + } + } + } + } +} + +func TestThresholdRuleLogsLink(t *testing.T) { + postableRule := PostableRule{ + AlertName: "Logs link test", + AlertType: AlertTypeLogs, + RuleType: RuleTypeThreshold, + EvalWindow: Duration(5 * time.Minute), + Frequency: Duration(1 * time.Minute), + RuleCondition: &RuleCondition{ + CompositeQuery: &v3.CompositeQuery{ + QueryType: v3.QueryTypeBuilder, + BuilderQueries: map[string]*v3.BuilderQuery{ + "A": { + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{ + Key: "component", + }, + AggregateOperator: v3.AggregateOperatorCountDistinct, + DataSource: v3.DataSourceLogs, + Expression: "A", + Filters: &v3.FilterSet{ + Operator: "AND", + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{Key: "k8s.container.name", IsColumn: false, Type: v3.AttributeKeyTypeTag, DataType: v3.AttributeKeyDataTypeString}, + Value: "testcontainer", + Operator: v3.FilterOperatorEqual, + }, + }, + }, + }, + }, + }, + }, + } + fm := featureManager.StartManager() + mock, err := cmock.NewClickHouseWithQueryMatcher(nil, &queryMatcherAny{}) + if err != nil { + t.Errorf("an error '%s' was not expected when opening a stub database connection", err) + } + + cols := make([]cmock.ColumnType, 0) + cols = append(cols, cmock.ColumnType{Name: "value", Type: "Float64"}) + cols = append(cols, cmock.ColumnType{Name: "attr", Type: "String"}) + cols = append(cols, cmock.ColumnType{Name: "timestamp", Type: "String"}) + + for idx, c := range testCases { + rows := cmock.NewRows(cols, c.values) + + // We are testing the eval logic after the query is run + // so we don't care about the query string here + queryString := "SELECT any" + mock. + ExpectQuery(queryString). + WillReturnRows(rows) + postableRule.RuleCondition.CompareOp = CompareOp(c.compareOp) + postableRule.RuleCondition.MatchType = MatchType(c.matchType) + postableRule.RuleCondition.Target = &c.target + postableRule.RuleCondition.CompositeQuery.Unit = c.yAxisUnit + postableRule.RuleCondition.TargetUnit = c.targetUnit + postableRule.Annotations = map[string]string{ + "description": "This alert is fired when the defined metric (current value: {{$value}}) crosses the threshold ({{$threshold}})", + "summary": "The rule threshold is set to {{$threshold}}, and the observed metric value is {{$value}}", + } + + options := clickhouseReader.NewOptions("", 0, 0, 0, "", "archiveNamespace") + reader := clickhouseReader.NewReaderFromClickhouseConnection(mock, options, nil, "", fm, "", true) + + rule, err := NewThresholdRule("69", &postableRule, fm, reader, true) + rule.temporalityMap = map[string]map[v3.Temporality]bool{ + "signoz_calls_total": { + v3.Delta: true, + }, + } + if err != nil { + assert.NoError(t, err) + } + + retVal, err := rule.Eval(context.Background(), time.Now()) + if err != nil { + assert.NoError(t, err) + } + + if c.expectAlerts == 0 { + assert.Equal(t, 0, retVal.(int), "case %d", idx) + } else { + assert.Equal(t, c.expectAlerts, retVal.(int), "case %d", idx) + for _, item := range rule.active { + for name, value := range item.Annotations.Map() { + if name == "related_logs" { + assert.NotEmpty(t, value, "case %d", idx) + assert.Contains(t, value, "testcontainer") + } + } + } + } + } +} diff --git a/pkg/query-service/rules/threshold_rule_test_data.go b/pkg/query-service/rules/threshold_rule_test_data.go new file mode 100644 index 0000000000..3a28bdf38b --- /dev/null +++ b/pkg/query-service/rules/threshold_rule_test_data.go @@ -0,0 +1,68 @@ +package rules + +import "time" + +var ( + testCases = []struct { + targetUnit string + yAxisUnit string + values [][]interface{} + expectAlerts int + compareOp string + matchType string + target float64 + summaryAny []string + }{ + { + targetUnit: "s", + yAxisUnit: "ns", + values: [][]interface{}{ + {float64(572588400), "attr", time.Now()}, // 0.57 seconds + {float64(572386400), "attr", time.Now().Add(1 * time.Second)}, // 0.57 seconds + {float64(300947400), "attr", time.Now().Add(2 * time.Second)}, // 0.3 seconds + {float64(299316000), "attr", time.Now().Add(3 * time.Second)}, // 0.3 seconds + {float64(66640400.00000001), "attr", time.Now().Add(4 * time.Second)}, // 0.06 seconds + }, + expectAlerts: 0, + compareOp: "1", // Above + matchType: "1", // Once + target: 1, // 1 second + }, + { + targetUnit: "ms", + yAxisUnit: "ns", + values: [][]interface{}{ + {float64(572588400), "attr", time.Now()}, // 572.58 ms + {float64(572386400), "attr", time.Now().Add(1 * time.Second)}, // 572.38 ms + {float64(300947400), "attr", time.Now().Add(2 * time.Second)}, // 300.94 ms + {float64(299316000), "attr", time.Now().Add(3 * time.Second)}, // 299.31 ms + {float64(66640400.00000001), "attr", time.Now().Add(4 * time.Second)}, // 66.64 ms + }, + expectAlerts: 4, + compareOp: "1", // Above + matchType: "1", // Once + target: 200, // 200 ms + summaryAny: []string{ + "observed metric value is 299 ms", + "the observed metric value is 573 ms", + "the observed metric value is 572 ms", + "the observed metric value is 301 ms", + }, + }, + { + targetUnit: "decgbytes", + yAxisUnit: "bytes", + values: [][]interface{}{ + {float64(2863284053), "attr", time.Now()}, // 2.86 GB + {float64(2863388842), "attr", time.Now().Add(1 * time.Second)}, // 2.86 GB + {float64(300947400), "attr", time.Now().Add(2 * time.Second)}, // 0.3 GB + {float64(299316000), "attr", time.Now().Add(3 * time.Second)}, // 0.3 GB + {float64(66640400.00000001), "attr", time.Now().Add(4 * time.Second)}, // 66.64 MB + }, + expectAlerts: 0, + compareOp: "1", // Above + matchType: "1", // Once + target: 200, // 200 GB + }, + } +)