diff --git a/pkg/query-service/app/clickhouseReader/reader.go b/pkg/query-service/app/clickhouseReader/reader.go index e0af954f3e..58f0683c07 100644 --- a/pkg/query-service/app/clickhouseReader/reader.go +++ b/pkg/query-service/app/clickhouseReader/reader.go @@ -3257,6 +3257,7 @@ func (r *ClickHouseReader) GetDashboardsInfo(ctx context.Context) (*model.Dashbo totalDashboardsWithPanelAndName := 0 var dashboardNames []string count := 0 + logChQueriesCount := 0 for _, dashboard := range dashboardsData { if isDashboardWithPanelAndName(dashboard.Data) { totalDashboardsWithPanelAndName = totalDashboardsWithPanelAndName + 1 @@ -3272,12 +3273,16 @@ func (r *ClickHouseReader) GetDashboardsInfo(ctx context.Context) (*model.Dashbo if isDashboardWithTSV2(dashboard.Data) { count = count + 1 } + if isDashboardWithLogsClickhouseQuery(dashboard.Data) { + logChQueriesCount = logChQueriesCount + 1 + } } dashboardsInfo.DashboardNames = dashboardNames dashboardsInfo.TotalDashboards = len(dashboardsData) dashboardsInfo.TotalDashboardsWithPanelAndName = totalDashboardsWithPanelAndName dashboardsInfo.QueriesWithTSV2 = count + dashboardsInfo.DashboardsWithLogsChQuery = logChQueriesCount return &dashboardsInfo, nil } @@ -3289,6 +3294,16 @@ func isDashboardWithTSV2(data map[string]interface{}) bool { return strings.Contains(string(jsonData), "time_series_v2") } +func isDashboardWithLogsClickhouseQuery(data map[string]interface{}) bool { + jsonData, err := json.Marshal(data) + if err != nil { + return false + } + result := strings.Contains(string(jsonData), "signoz_logs.distributed_logs") || + strings.Contains(string(jsonData), "signoz_logs.logs") + return result +} + func isDashboardWithPanelAndName(data map[string]interface{}) bool { isDashboardName := false isDashboardWithPanelAndName := false @@ -4866,7 +4881,7 @@ func (r *ClickHouseReader) GetTimeSeriesResultV3(ctx context.Context, query stri if err != nil { zap.L().Error("error while reading time series result", zap.Error(err)) - return nil, err + return nil, errors.New(err.Error()) } defer rows.Close() @@ -4913,7 +4928,7 @@ func (r *ClickHouseReader) GetListResultV3(ctx context.Context, query string) ([ if err != nil { zap.L().Error("error while reading time series result", zap.Error(err)) - return nil, err + return nil, errors.New(err.Error()) } defer rows.Close() diff --git a/pkg/query-service/app/http_handler.go b/pkg/query-service/app/http_handler.go index 9355658186..20852f1660 100644 --- a/pkg/query-service/app/http_handler.go +++ b/pkg/query-service/app/http_handler.go @@ -3674,8 +3674,12 @@ func (aH *APIHandler) queryRangeV3(ctx context.Context, queryRangeParams *v3.Que result, errQuriesByName, err = aH.querier.QueryRange(ctx, queryRangeParams) if err != nil { - apiErrObj := &model.ApiError{Typ: model.ErrorBadData, Err: err} - RespondError(w, apiErrObj, errQuriesByName) + queryErrors := map[string]string{} + for name, err := range errQuriesByName { + queryErrors[fmt.Sprintf("Query-%s", name)] = err.Error() + } + apiErrObj := &model.ApiError{Typ: model.ErrorInternal, Err: err} + RespondError(w, apiErrObj, queryErrors) return } @@ -4118,8 +4122,12 @@ func (aH *APIHandler) queryRangeV4(ctx context.Context, queryRangeParams *v3.Que result, errQuriesByName, err = aH.querierV2.QueryRange(ctx, queryRangeParams) if err != nil { - apiErrObj := &model.ApiError{Typ: model.ErrorBadData, Err: err} - RespondError(w, apiErrObj, errQuriesByName) + queryErrors := map[string]string{} + for name, err := range errQuriesByName { + queryErrors[fmt.Sprintf("Query-%s", name)] = err.Error() + } + apiErrObj := &model.ApiError{Typ: model.ErrorInternal, Err: err} + RespondError(w, apiErrObj, queryErrors) return } diff --git a/pkg/query-service/app/querier/querier.go b/pkg/query-service/app/querier/querier.go index 9adea09d47..50ef63394a 100644 --- a/pkg/query-service/app/querier/querier.go +++ b/pkg/query-service/app/querier/querier.go @@ -566,7 +566,7 @@ func (q *querier) runBuilderListQueries(ctx context.Context, params *v3.QueryRan rowList, err := q.reader.GetListResultV3(ctx, query) if err != nil { - ch <- channelResult{Err: fmt.Errorf("error in query-%s: %v", name, err), Name: name, Query: query} + ch <- channelResult{Err: err, Name: name, Query: query} return } ch <- channelResult{List: rowList, Name: name, Query: query} diff --git a/pkg/query-service/app/querier/v2/querier.go b/pkg/query-service/app/querier/v2/querier.go index 19538fa9a5..b71a8cc0cc 100644 --- a/pkg/query-service/app/querier/v2/querier.go +++ b/pkg/query-service/app/querier/v2/querier.go @@ -574,7 +574,7 @@ func (q *querier) runBuilderListQueries(ctx context.Context, params *v3.QueryRan rowList, err := q.reader.GetListResultV3(ctx, query) if err != nil { - ch <- channelResult{Err: fmt.Errorf("error in query-%s: %v", name, err), Name: name, Query: query} + ch <- channelResult{Err: err, Name: name, Query: query} return } ch <- channelResult{List: rowList, Name: name, Query: query} diff --git a/pkg/query-service/model/response.go b/pkg/query-service/model/response.go index 8e3348ac8a..1b86ff7e8b 100644 --- a/pkg/query-service/model/response.go +++ b/pkg/query-service/model/response.go @@ -630,6 +630,7 @@ type AlertsInfo struct { SpanMetricsPrometheusQueries int `json:"spanMetricsPrometheusQueries"` AlertNames []string `json:"alertNames"` AlertsWithTSV2 int `json:"alertsWithTSv2"` + AlertsWithLogsChQuery int `json:"alertsWithLogsChQuery"` } type SavedViewsInfo struct { @@ -646,6 +647,7 @@ type DashboardsInfo struct { TracesBasedPanels int `json:"tracesBasedPanels"` DashboardNames []string `json:"dashboardNames"` QueriesWithTSV2 int `json:"queriesWithTSV2"` + DashboardsWithLogsChQuery int `json:"dashboardsWithLogsChQuery"` } type TagTelemetryData struct { diff --git a/pkg/query-service/rules/base_rule.go b/pkg/query-service/rules/base_rule.go index 7ab57905eb..6fbaa655c7 100644 --- a/pkg/query-service/rules/base_rule.go +++ b/pkg/query-service/rules/base_rule.go @@ -109,6 +109,7 @@ func NewBaseRule(id string, p *PostableRule, reader interfaces.Reader, opts ...R id: id, name: p.AlertName, source: p.Source, + typ: p.AlertType, ruleCondition: p.RuleCondition, evalWindow: time.Duration(p.EvalWindow), labels: qslabels.FromMap(p.Labels), diff --git a/pkg/query-service/rules/db.go b/pkg/query-service/rules/db.go index d9a9be195c..f3a9de1c62 100644 --- a/pkg/query-service/rules/db.go +++ b/pkg/query-service/rules/db.go @@ -319,6 +319,10 @@ func (r *ruleDB) GetAlertsInfo(ctx context.Context) (*model.AlertsInfo, error) { if strings.Contains(alert, "time_series_v2") { alertsInfo.AlertsWithTSV2 = alertsInfo.AlertsWithTSV2 + 1 } + if strings.Contains(alert, "signoz_logs.distributed_logs") || + strings.Contains(alert, "signoz_logs.logs") { + alertsInfo.AlertsWithLogsChQuery = alertsInfo.AlertsWithLogsChQuery + 1 + } err = json.Unmarshal([]byte(alert), &rule) if err != nil { zap.L().Error("invalid rule data", zap.Error(err)) diff --git a/pkg/query-service/rules/promrule_test.go b/pkg/query-service/rules/promrule_test.go index 7c559d1eee..c87ef2cee9 100644 --- a/pkg/query-service/rules/promrule_test.go +++ b/pkg/query-service/rules/promrule_test.go @@ -13,7 +13,7 @@ import ( func TestPromRuleShouldAlert(t *testing.T) { postableRule := PostableRule{ AlertName: "Test Rule", - AlertType: "METRIC_BASED_ALERT", + AlertType: AlertTypeMetric, RuleType: RuleTypeProm, EvalWindow: Duration(5 * time.Minute), Frequency: Duration(1 * time.Minute), diff --git a/pkg/query-service/rules/threshold_rule_test.go b/pkg/query-service/rules/threshold_rule_test.go index ab37ad6af1..e000d71257 100644 --- a/pkg/query-service/rules/threshold_rule_test.go +++ b/pkg/query-service/rules/threshold_rule_test.go @@ -18,7 +18,7 @@ import ( func TestThresholdRuleShouldAlert(t *testing.T) { postableRule := PostableRule{ AlertName: "Tricky Condition Tests", - AlertType: "METRIC_BASED_ALERT", + AlertType: AlertTypeMetric, RuleType: RuleTypeThreshold, EvalWindow: Duration(5 * time.Minute), Frequency: Duration(1 * time.Minute), @@ -788,7 +788,7 @@ func TestPrepareLinksToLogs(t *testing.T) { func TestPrepareLinksToTraces(t *testing.T) { postableRule := PostableRule{ AlertName: "Links to traces test", - AlertType: "TRACES_BASED_ALERT", + AlertType: AlertTypeTraces, RuleType: RuleTypeThreshold, EvalWindow: Duration(5 * time.Minute), Frequency: Duration(1 * time.Minute), @@ -830,7 +830,7 @@ func TestPrepareLinksToTraces(t *testing.T) { func TestThresholdRuleLabelNormalization(t *testing.T) { postableRule := PostableRule{ AlertName: "Tricky Condition Tests", - AlertType: "METRIC_BASED_ALERT", + AlertType: AlertTypeMetric, RuleType: RuleTypeThreshold, EvalWindow: Duration(5 * time.Minute), Frequency: Duration(1 * time.Minute), @@ -914,7 +914,7 @@ func TestThresholdRuleLabelNormalization(t *testing.T) { func TestThresholdRuleEvalDelay(t *testing.T) { postableRule := PostableRule{ AlertName: "Test Eval Delay", - AlertType: "METRIC_BASED_ALERT", + AlertType: AlertTypeMetric, RuleType: RuleTypeThreshold, EvalWindow: Duration(5 * time.Minute), Frequency: Duration(1 * time.Minute), @@ -963,7 +963,7 @@ func TestThresholdRuleEvalDelay(t *testing.T) { func TestThresholdRuleClickHouseTmpl(t *testing.T) { postableRule := PostableRule{ AlertName: "Tricky Condition Tests", - AlertType: "METRIC_BASED_ALERT", + AlertType: AlertTypeMetric, RuleType: RuleTypeThreshold, EvalWindow: Duration(5 * time.Minute), Frequency: Duration(1 * time.Minute), @@ -1019,7 +1019,7 @@ func (m *queryMatcherAny) Match(string, string) error { func TestThresholdRuleUnitCombinations(t *testing.T) { postableRule := PostableRule{ AlertName: "Units test", - AlertType: "METRIC_BASED_ALERT", + AlertType: AlertTypeMetric, RuleType: RuleTypeThreshold, EvalWindow: Duration(5 * time.Minute), Frequency: Duration(1 * time.Minute), @@ -1170,8 +1170,8 @@ func TestThresholdRuleUnitCombinations(t *testing.T) { func TestThresholdRuleNoData(t *testing.T) { postableRule := PostableRule{ - AlertName: "Units test", - AlertType: "METRIC_BASED_ALERT", + AlertName: "No data test", + AlertType: AlertTypeMetric, RuleType: RuleTypeThreshold, EvalWindow: Duration(5 * time.Minute), Frequency: Duration(1 * time.Minute), @@ -1261,3 +1261,201 @@ func TestThresholdRuleNoData(t *testing.T) { } } } + +func TestThresholdRuleTracesLink(t *testing.T) { + postableRule := PostableRule{ + AlertName: "Traces link test", + AlertType: AlertTypeTraces, + RuleType: RuleTypeThreshold, + EvalWindow: Duration(5 * time.Minute), + Frequency: Duration(1 * time.Minute), + RuleCondition: &RuleCondition{ + CompositeQuery: &v3.CompositeQuery{ + QueryType: v3.QueryTypeBuilder, + BuilderQueries: map[string]*v3.BuilderQuery{ + "A": { + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{ + Key: "durationNano", + }, + AggregateOperator: v3.AggregateOperatorP95, + DataSource: v3.DataSourceTraces, + Expression: "A", + Filters: &v3.FilterSet{ + Operator: "AND", + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{Key: "httpMethod", IsColumn: true, Type: v3.AttributeKeyTypeTag, DataType: v3.AttributeKeyDataTypeString}, + Value: "GET", + Operator: v3.FilterOperatorEqual, + }, + }, + }, + }, + }, + }, + }, + } + fm := featureManager.StartManager() + mock, err := cmock.NewClickHouseWithQueryMatcher(nil, &queryMatcherAny{}) + if err != nil { + t.Errorf("an error '%s' was not expected when opening a stub database connection", err) + } + + cols := make([]cmock.ColumnType, 0) + cols = append(cols, cmock.ColumnType{Name: "value", Type: "Float64"}) + cols = append(cols, cmock.ColumnType{Name: "attr", Type: "String"}) + cols = append(cols, cmock.ColumnType{Name: "timestamp", Type: "String"}) + + for idx, c := range testCases { + rows := cmock.NewRows(cols, c.values) + + // We are testing the eval logic after the query is run + // so we don't care about the query string here + queryString := "SELECT any" + mock. + ExpectQuery(queryString). + WillReturnRows(rows) + postableRule.RuleCondition.CompareOp = CompareOp(c.compareOp) + postableRule.RuleCondition.MatchType = MatchType(c.matchType) + postableRule.RuleCondition.Target = &c.target + postableRule.RuleCondition.CompositeQuery.Unit = c.yAxisUnit + postableRule.RuleCondition.TargetUnit = c.targetUnit + postableRule.Annotations = map[string]string{ + "description": "This alert is fired when the defined metric (current value: {{$value}}) crosses the threshold ({{$threshold}})", + "summary": "The rule threshold is set to {{$threshold}}, and the observed metric value is {{$value}}", + } + + options := clickhouseReader.NewOptions("", 0, 0, 0, "", "archiveNamespace") + reader := clickhouseReader.NewReaderFromClickhouseConnection(mock, options, nil, "", fm, "", true) + + rule, err := NewThresholdRule("69", &postableRule, fm, reader, true) + rule.temporalityMap = map[string]map[v3.Temporality]bool{ + "signoz_calls_total": { + v3.Delta: true, + }, + } + if err != nil { + assert.NoError(t, err) + } + + retVal, err := rule.Eval(context.Background(), time.Now()) + if err != nil { + assert.NoError(t, err) + } + + if c.expectAlerts == 0 { + assert.Equal(t, 0, retVal.(int), "case %d", idx) + } else { + assert.Equal(t, c.expectAlerts, retVal.(int), "case %d", idx) + for _, item := range rule.active { + for name, value := range item.Annotations.Map() { + if name == "related_traces" { + assert.NotEmpty(t, value, "case %d", idx) + assert.Contains(t, value, "GET") + } + } + } + } + } +} + +func TestThresholdRuleLogsLink(t *testing.T) { + postableRule := PostableRule{ + AlertName: "Logs link test", + AlertType: AlertTypeLogs, + RuleType: RuleTypeThreshold, + EvalWindow: Duration(5 * time.Minute), + Frequency: Duration(1 * time.Minute), + RuleCondition: &RuleCondition{ + CompositeQuery: &v3.CompositeQuery{ + QueryType: v3.QueryTypeBuilder, + BuilderQueries: map[string]*v3.BuilderQuery{ + "A": { + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{ + Key: "component", + }, + AggregateOperator: v3.AggregateOperatorCountDistinct, + DataSource: v3.DataSourceLogs, + Expression: "A", + Filters: &v3.FilterSet{ + Operator: "AND", + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{Key: "k8s.container.name", IsColumn: false, Type: v3.AttributeKeyTypeTag, DataType: v3.AttributeKeyDataTypeString}, + Value: "testcontainer", + Operator: v3.FilterOperatorEqual, + }, + }, + }, + }, + }, + }, + }, + } + fm := featureManager.StartManager() + mock, err := cmock.NewClickHouseWithQueryMatcher(nil, &queryMatcherAny{}) + if err != nil { + t.Errorf("an error '%s' was not expected when opening a stub database connection", err) + } + + cols := make([]cmock.ColumnType, 0) + cols = append(cols, cmock.ColumnType{Name: "value", Type: "Float64"}) + cols = append(cols, cmock.ColumnType{Name: "attr", Type: "String"}) + cols = append(cols, cmock.ColumnType{Name: "timestamp", Type: "String"}) + + for idx, c := range testCases { + rows := cmock.NewRows(cols, c.values) + + // We are testing the eval logic after the query is run + // so we don't care about the query string here + queryString := "SELECT any" + mock. + ExpectQuery(queryString). + WillReturnRows(rows) + postableRule.RuleCondition.CompareOp = CompareOp(c.compareOp) + postableRule.RuleCondition.MatchType = MatchType(c.matchType) + postableRule.RuleCondition.Target = &c.target + postableRule.RuleCondition.CompositeQuery.Unit = c.yAxisUnit + postableRule.RuleCondition.TargetUnit = c.targetUnit + postableRule.Annotations = map[string]string{ + "description": "This alert is fired when the defined metric (current value: {{$value}}) crosses the threshold ({{$threshold}})", + "summary": "The rule threshold is set to {{$threshold}}, and the observed metric value is {{$value}}", + } + + options := clickhouseReader.NewOptions("", 0, 0, 0, "", "archiveNamespace") + reader := clickhouseReader.NewReaderFromClickhouseConnection(mock, options, nil, "", fm, "", true) + + rule, err := NewThresholdRule("69", &postableRule, fm, reader, true) + rule.temporalityMap = map[string]map[v3.Temporality]bool{ + "signoz_calls_total": { + v3.Delta: true, + }, + } + if err != nil { + assert.NoError(t, err) + } + + retVal, err := rule.Eval(context.Background(), time.Now()) + if err != nil { + assert.NoError(t, err) + } + + if c.expectAlerts == 0 { + assert.Equal(t, 0, retVal.(int), "case %d", idx) + } else { + assert.Equal(t, c.expectAlerts, retVal.(int), "case %d", idx) + for _, item := range rule.active { + for name, value := range item.Annotations.Map() { + if name == "related_logs" { + assert.NotEmpty(t, value, "case %d", idx) + assert.Contains(t, value, "testcontainer") + } + } + } + } + } +} diff --git a/pkg/query-service/rules/threshold_rule_test_data.go b/pkg/query-service/rules/threshold_rule_test_data.go new file mode 100644 index 0000000000..3a28bdf38b --- /dev/null +++ b/pkg/query-service/rules/threshold_rule_test_data.go @@ -0,0 +1,68 @@ +package rules + +import "time" + +var ( + testCases = []struct { + targetUnit string + yAxisUnit string + values [][]interface{} + expectAlerts int + compareOp string + matchType string + target float64 + summaryAny []string + }{ + { + targetUnit: "s", + yAxisUnit: "ns", + values: [][]interface{}{ + {float64(572588400), "attr", time.Now()}, // 0.57 seconds + {float64(572386400), "attr", time.Now().Add(1 * time.Second)}, // 0.57 seconds + {float64(300947400), "attr", time.Now().Add(2 * time.Second)}, // 0.3 seconds + {float64(299316000), "attr", time.Now().Add(3 * time.Second)}, // 0.3 seconds + {float64(66640400.00000001), "attr", time.Now().Add(4 * time.Second)}, // 0.06 seconds + }, + expectAlerts: 0, + compareOp: "1", // Above + matchType: "1", // Once + target: 1, // 1 second + }, + { + targetUnit: "ms", + yAxisUnit: "ns", + values: [][]interface{}{ + {float64(572588400), "attr", time.Now()}, // 572.58 ms + {float64(572386400), "attr", time.Now().Add(1 * time.Second)}, // 572.38 ms + {float64(300947400), "attr", time.Now().Add(2 * time.Second)}, // 300.94 ms + {float64(299316000), "attr", time.Now().Add(3 * time.Second)}, // 299.31 ms + {float64(66640400.00000001), "attr", time.Now().Add(4 * time.Second)}, // 66.64 ms + }, + expectAlerts: 4, + compareOp: "1", // Above + matchType: "1", // Once + target: 200, // 200 ms + summaryAny: []string{ + "observed metric value is 299 ms", + "the observed metric value is 573 ms", + "the observed metric value is 572 ms", + "the observed metric value is 301 ms", + }, + }, + { + targetUnit: "decgbytes", + yAxisUnit: "bytes", + values: [][]interface{}{ + {float64(2863284053), "attr", time.Now()}, // 2.86 GB + {float64(2863388842), "attr", time.Now().Add(1 * time.Second)}, // 2.86 GB + {float64(300947400), "attr", time.Now().Add(2 * time.Second)}, // 0.3 GB + {float64(299316000), "attr", time.Now().Add(3 * time.Second)}, // 0.3 GB + {float64(66640400.00000001), "attr", time.Now().Add(4 * time.Second)}, // 66.64 MB + }, + expectAlerts: 0, + compareOp: "1", // Above + matchType: "1", // Once + target: 200, // 200 GB + }, + } +) diff --git a/pkg/query-service/telemetry/telemetry.go b/pkg/query-service/telemetry/telemetry.go index 7f282ea3f9..048d23498f 100644 --- a/pkg/query-service/telemetry/telemetry.go +++ b/pkg/query-service/telemetry/telemetry.go @@ -339,6 +339,7 @@ func createTelemetry() { "metricBasedPanels": dashboardsInfo.MetricBasedPanels, "tracesBasedPanels": dashboardsInfo.TracesBasedPanels, "dashboardsWithTSV2": dashboardsInfo.QueriesWithTSV2, + "dashboardWithLogsChQuery": dashboardsInfo.DashboardsWithLogsChQuery, "totalAlerts": alertsInfo.TotalAlerts, "alertsWithTSV2": alertsInfo.AlertsWithTSV2, "logsBasedAlerts": alertsInfo.LogsBasedAlerts, @@ -358,6 +359,7 @@ func createTelemetry() { "metricsClickHouseQueries": alertsInfo.MetricsClickHouseQueries, "metricsPrometheusQueries": alertsInfo.MetricsPrometheusQueries, "spanMetricsPrometheusQueries": alertsInfo.SpanMetricsPrometheusQueries, + "alertsWithLogsChQuery": alertsInfo.AlertsWithLogsChQuery, } // send event only if there are dashboards or alerts or channels if (dashboardsInfo.TotalDashboards > 0 || alertsInfo.TotalAlerts > 0 || len(*channels) > 0 || savedViewsInfo.TotalSavedViews > 0) && apiErr == nil {