diff --git a/pkg/query-service/app/logs/v3/json_filter.go b/pkg/query-service/app/logs/v3/json_filter.go index 887baaab4c..d883b61797 100644 --- a/pkg/query-service/app/logs/v3/json_filter.go +++ b/pkg/query-service/app/logs/v3/json_filter.go @@ -20,7 +20,7 @@ const ( NGRAM_SIZE = 4 ) -var dataTypeMapping = map[string]string{ +var DataTypeMapping = map[string]string{ "string": STRING, "int64": INT64, "float64": FLOAT64, @@ -31,7 +31,7 @@ var dataTypeMapping = map[string]string{ "array(bool)": ARRAY_BOOL, } -var arrayValueTypeMapping = map[string]string{ +var ArrayValueTypeMapping = map[string]string{ "array(string)": "string", "array(int64)": "int64", "array(float64)": "float64", @@ -59,7 +59,7 @@ var jsonLogOperators = map[v3.FilterOperator]string{ v3.FilterOperatorNotHas: "NOT has(%s, %s)", } -func getPath(keyArr []string) string { +func GetPath(keyArr []string) string { path := []string{} for i := 0; i < len(keyArr); i++ { if strings.HasSuffix(keyArr[i], "[*]") { @@ -71,7 +71,7 @@ func getPath(keyArr []string) string { return strings.Join(path, ".") } -func getJSONFilterKey(key v3.AttributeKey, op v3.FilterOperator, isArray bool) (string, error) { +func GetJSONFilterKey(key v3.AttributeKey, op v3.FilterOperator, isArray bool) (string, error) { keyArr := strings.Split(key.Key, ".") // i.e it should be at least body.name, and not something like body if len(keyArr) < 2 { @@ -89,11 +89,11 @@ func getJSONFilterKey(key v3.AttributeKey, op v3.FilterOperator, isArray bool) ( var dataType string var ok bool - if dataType, ok = dataTypeMapping[string(key.DataType)]; !ok { + if dataType, ok = DataTypeMapping[string(key.DataType)]; !ok { return "", fmt.Errorf("unsupported dataType for JSON: %s", key.DataType) } - path := getPath(keyArr[1:]) + path := GetPath(keyArr[1:]) if isArray { return fmt.Sprintf("JSONExtract(JSON_QUERY(%s, '$.%s'), '%s')", keyArr[0], path, dataType), nil @@ -109,7 +109,7 @@ func getJSONFilterKey(key v3.AttributeKey, op v3.FilterOperator, isArray bool) ( } // takes the path and the values and generates where clauses for better usage of index -func getPathIndexFilter(path string) string { +func GetPathIndexFilter(path string) string { filters := []string{} keyArr := strings.Split(path, ".") if len(keyArr) < 2 { @@ -136,7 +136,7 @@ func GetJSONFilter(item v3.FilterItem) (string, error) { dataType := item.Key.DataType isArray := false // check if its an array and handle it - if val, ok := arrayValueTypeMapping[string(item.Key.DataType)]; ok { + if val, ok := ArrayValueTypeMapping[string(item.Key.DataType)]; ok { if item.Operator != v3.FilterOperatorHas && item.Operator != v3.FilterOperatorNotHas { return "", fmt.Errorf("only has operator is supported for array") } @@ -144,7 +144,7 @@ func GetJSONFilter(item v3.FilterItem) (string, error) { dataType = v3.AttributeKeyDataType(val) } - key, err := getJSONFilterKey(item.Key, item.Operator, isArray) + key, err := GetJSONFilterKey(item.Key, item.Operator, isArray) if err != nil { return "", err } @@ -164,7 +164,7 @@ func GetJSONFilter(item v3.FilterItem) (string, error) { if logsOp, ok := jsonLogOperators[op]; ok { switch op { case v3.FilterOperatorExists, v3.FilterOperatorNotExists: - filter = fmt.Sprintf(logsOp, key, getPath(strings.Split(item.Key.Key, ".")[1:])) + filter = fmt.Sprintf(logsOp, key, GetPath(strings.Split(item.Key.Key, ".")[1:])) case v3.FilterOperatorRegex, v3.FilterOperatorNotRegex, v3.FilterOperatorHas, v3.FilterOperatorNotHas: fmtVal := utils.ClickHouseFormattedValue(value) filter = fmt.Sprintf(logsOp, key, fmtVal) @@ -181,7 +181,7 @@ func GetJSONFilter(item v3.FilterItem) (string, error) { filters := []string{} - pathFilter := getPathIndexFilter(item.Key.Key) + pathFilter := GetPathIndexFilter(item.Key.Key) if pathFilter != "" { filters = append(filters, pathFilter) } @@ -196,7 +196,7 @@ func GetJSONFilter(item v3.FilterItem) (string, error) { // add exists check for non array items as default values of int/float/bool will corrupt the results if !isArray && !(item.Operator == v3.FilterOperatorExists || item.Operator == v3.FilterOperatorNotExists) { - existsFilter := fmt.Sprintf("JSON_EXISTS(body, '$.%s')", getPath(strings.Split(item.Key.Key, ".")[1:])) + existsFilter := fmt.Sprintf("JSON_EXISTS(body, '$.%s')", GetPath(strings.Split(item.Key.Key, ".")[1:])) filter = fmt.Sprintf("%s AND %s", existsFilter, filter) } diff --git a/pkg/query-service/app/logs/v3/json_filter_test.go b/pkg/query-service/app/logs/v3/json_filter_test.go index 0a71cd67b2..060ba63707 100644 --- a/pkg/query-service/app/logs/v3/json_filter_test.go +++ b/pkg/query-service/app/logs/v3/json_filter_test.go @@ -140,7 +140,7 @@ var testGetJSONFilterKeyData = []struct { func TestGetJSONFilterKey(t *testing.T) { for _, tt := range testGetJSONFilterKeyData { Convey("testgetKey", t, func() { - columnName, err := getJSONFilterKey(tt.Key, tt.Operator, tt.IsArray) + columnName, err := GetJSONFilterKey(tt.Key, tt.Operator, tt.IsArray) if tt.Error { So(err, ShouldNotBeNil) } else { diff --git a/pkg/query-service/app/logs/v3/query_builder.go b/pkg/query-service/app/logs/v3/query_builder.go index 2aa56002ff..bd64b4d0e6 100644 --- a/pkg/query-service/app/logs/v3/query_builder.go +++ b/pkg/query-service/app/logs/v3/query_builder.go @@ -9,7 +9,7 @@ import ( "go.signoz.io/signoz/pkg/query-service/utils" ) -var aggregateOperatorToPercentile = map[v3.AggregateOperator]float64{ +var AggregateOperatorToPercentile = map[v3.AggregateOperator]float64{ v3.AggregateOperatorP05: 0.05, v3.AggregateOperatorP10: 0.10, v3.AggregateOperatorP20: 0.20, @@ -21,7 +21,7 @@ var aggregateOperatorToPercentile = map[v3.AggregateOperator]float64{ v3.AggregateOperatorP99: 0.99, } -var aggregateOperatorToSQLFunc = map[v3.AggregateOperator]string{ +var AggregateOperatorToSQLFunc = map[v3.AggregateOperator]string{ v3.AggregateOperatorAvg: "avg", v3.AggregateOperatorMax: "max", v3.AggregateOperatorMin: "min", @@ -53,7 +53,7 @@ var logOperators = map[v3.FilterOperator]string{ const BODY = "body" -func getClickhouseLogsColumnType(columnType v3.AttributeKeyType) string { +func GetClickhouseLogsColumnType(columnType v3.AttributeKeyType) string { if columnType == v3.AttributeKeyTypeTag { return "attributes" } @@ -83,7 +83,7 @@ func getClickhouseColumnName(key v3.AttributeKey) string { //if the key is present in the topLevelColumn then it will be only searched in those columns, //regardless if it is indexed/present again in resource or column attribute if !key.IsColumn { - columnType := getClickhouseLogsColumnType(key.Type) + columnType := GetClickhouseLogsColumnType(key.Type) columnDataType := getClickhouseLogsColumnDataType(key.DataType) clickhouseColumn = fmt.Sprintf("%s_%s_value[indexOf(%s_%s_key, '%s')]", columnType, columnDataType, columnType, columnDataType, key.Key) return clickhouseColumn @@ -114,7 +114,7 @@ func getSelectLabels(aggregatorOperator v3.AggregateOperator, groupBy []v3.Attri return selectLabels } -func getSelectKeys(aggregatorOperator v3.AggregateOperator, groupBy []v3.AttributeKey) string { +func GetSelectKeys(aggregatorOperator v3.AggregateOperator, groupBy []v3.AttributeKey) string { var selectLabels []string if aggregatorOperator == v3.AggregateOperatorNoOp { return "" @@ -154,7 +154,7 @@ func GetExistsNexistsFilter(op v3.FilterOperator, item v3.FilterItem) string { } return fmt.Sprintf("%s_exists`=%v", strings.TrimSuffix(getClickhouseColumnName(item.Key), "`"), val) } - columnType := getClickhouseLogsColumnType(item.Key.Type) + columnType := GetClickhouseLogsColumnType(item.Key.Type) columnDataType := getClickhouseLogsColumnDataType(item.Key.DataType) return fmt.Sprintf(logOperators[op], columnType, columnDataType, item.Key.Key) } @@ -224,7 +224,7 @@ func buildLogsTimeSeriesFilterQuery(fs *v3.FilterSet, groupBy []v3.AttributeKey, // add group by conditions to filter out log lines which doesn't have the key for _, attr := range groupBy { if !attr.IsColumn { - columnType := getClickhouseLogsColumnType(attr.Type) + columnType := GetClickhouseLogsColumnType(attr.Type) columnDataType := getClickhouseLogsColumnDataType(attr.DataType) conditions = append(conditions, fmt.Sprintf("has(%s_%s_key, '%s')", columnType, columnDataType, attr.Key)) } else if attr.Type != v3.AttributeKeyTypeUnspecified { @@ -258,7 +258,7 @@ func buildLogsQuery(panelType v3.PanelType, start, end, step int64, mq *v3.Build selectLabels := getSelectLabels(mq.AggregateOperator, mq.GroupBy) - having := having(mq.Having) + having := Having(mq.Having) if having != "" { having = " having " + having } @@ -288,10 +288,10 @@ func buildLogsQuery(panelType v3.PanelType, start, end, step int64, mq *v3.Build // we dont need value for first query // going with this route as for a cleaner approach on implementation if graphLimitQtype == constants.FirstQueryGraphLimit { - queryTmpl = "SELECT " + getSelectKeys(mq.AggregateOperator, mq.GroupBy) + " from (" + queryTmpl + ")" + queryTmpl = "SELECT " + GetSelectKeys(mq.AggregateOperator, mq.GroupBy) + " from (" + queryTmpl + ")" } - groupBy := groupByAttributeKeyTags(panelType, graphLimitQtype, mq.GroupBy...) + groupBy := GroupByAttributeKeyTags(panelType, graphLimitQtype, mq.GroupBy...) if panelType != v3.PanelTypeList && groupBy != "" { groupBy = " group by " + groupBy } @@ -301,7 +301,7 @@ func buildLogsQuery(panelType v3.PanelType, start, end, step int64, mq *v3.Build } if graphLimitQtype == constants.SecondQueryGraphLimit { - filterSubQuery = filterSubQuery + " AND " + fmt.Sprintf("(%s) GLOBAL IN (", getSelectKeys(mq.AggregateOperator, mq.GroupBy)) + "#LIMIT_PLACEHOLDER)" + filterSubQuery = filterSubQuery + " AND " + fmt.Sprintf("(%s) GLOBAL IN (", GetSelectKeys(mq.AggregateOperator, mq.GroupBy)) + "#LIMIT_PLACEHOLDER)" } aggregationKey := "" @@ -329,7 +329,7 @@ func buildLogsQuery(panelType v3.PanelType, start, end, step int64, mq *v3.Build rate = rate / 60.0 } - op := fmt.Sprintf("%s(%s)/%f", aggregateOperatorToSQLFunc[mq.AggregateOperator], aggregationKey, rate) + op := fmt.Sprintf("%s(%s)/%f", AggregateOperatorToSQLFunc[mq.AggregateOperator], aggregationKey, rate) query := fmt.Sprintf(queryTmpl, op, filterSubQuery, groupBy, having, orderBy) return query, nil case @@ -342,11 +342,11 @@ func buildLogsQuery(panelType v3.PanelType, start, end, step int64, mq *v3.Build v3.AggregateOperatorP90, v3.AggregateOperatorP95, v3.AggregateOperatorP99: - op := fmt.Sprintf("quantile(%v)(%s)", aggregateOperatorToPercentile[mq.AggregateOperator], aggregationKey) + op := fmt.Sprintf("quantile(%v)(%s)", AggregateOperatorToPercentile[mq.AggregateOperator], aggregationKey) query := fmt.Sprintf(queryTmpl, op, filterSubQuery, groupBy, having, orderBy) return query, nil case v3.AggregateOperatorAvg, v3.AggregateOperatorSum, v3.AggregateOperatorMin, v3.AggregateOperatorMax: - op := fmt.Sprintf("%s(%s)", aggregateOperatorToSQLFunc[mq.AggregateOperator], aggregationKey) + op := fmt.Sprintf("%s(%s)", AggregateOperatorToSQLFunc[mq.AggregateOperator], aggregationKey) query := fmt.Sprintf(queryTmpl, op, filterSubQuery, groupBy, having, orderBy) return query, nil case v3.AggregateOperatorCount: @@ -394,7 +394,7 @@ func groupBy(panelType v3.PanelType, graphLimitQtype string, tags ...string) str return strings.Join(tags, ",") } -func groupByAttributeKeyTags(panelType v3.PanelType, graphLimitQtype string, tags ...v3.AttributeKey) string { +func GroupByAttributeKeyTags(panelType v3.PanelType, graphLimitQtype string, tags ...v3.AttributeKey) string { groupTags := []string{} for _, tag := range tags { groupTags = append(groupTags, "`"+tag.Key+"`") @@ -446,7 +446,7 @@ func orderByAttributeKeyTags(panelType v3.PanelType, items []v3.OrderBy, tags [] return str } -func having(items []v3.Having) string { +func Having(items []v3.Having) string { // aggregate something and filter on that aggregate var having []string for _, item := range items { @@ -455,7 +455,7 @@ func having(items []v3.Having) string { return strings.Join(having, " AND ") } -func reduceQuery(query string, reduceTo v3.ReduceToOperator, aggregateOperator v3.AggregateOperator) (string, error) { +func ReduceQuery(query string, reduceTo v3.ReduceToOperator, aggregateOperator v3.AggregateOperator) (string, error) { // the timestamp picked is not relevant here since the final value used is show the single // chart with just the query value. switch reduceTo { @@ -475,14 +475,14 @@ func reduceQuery(query string, reduceTo v3.ReduceToOperator, aggregateOperator v return query, nil } -func addLimitToQuery(query string, limit uint64) string { +func AddLimitToQuery(query string, limit uint64) string { if limit == 0 { return query } return fmt.Sprintf("%s LIMIT %d", query, limit) } -func addOffsetToQuery(query string, offset uint64) string { +func AddOffsetToQuery(query string, offset uint64) string { return fmt.Sprintf("%s OFFSET %d", query, offset) } @@ -492,7 +492,7 @@ type Options struct { PreferRPM bool } -func isOrderByTs(orderBy []v3.OrderBy) bool { +func IsOrderByTs(orderBy []v3.OrderBy) bool { if len(orderBy) == 1 && (orderBy[0].Key == constants.TIMESTAMP || orderBy[0].ColumnName == constants.TIMESTAMP) { return true } @@ -523,7 +523,7 @@ func PrepareLogsQuery(start, end int64, queryType v3.QueryType, panelType v3.Pan if err != nil { return "", err } - query = addLimitToQuery(query, mq.Limit) + query = AddLimitToQuery(query, mq.Limit) return query, nil } else if options.GraphLimitQtype == constants.SecondQueryGraphLimit { @@ -539,7 +539,7 @@ func PrepareLogsQuery(start, end int64, queryType v3.QueryType, panelType v3.Pan return "", err } if panelType == v3.PanelTypeValue { - query, err = reduceQuery(query, mq.ReduceTo, mq.AggregateOperator) + query, err = ReduceQuery(query, mq.ReduceTo, mq.AggregateOperator) } if panelType == v3.PanelTypeList { @@ -550,21 +550,21 @@ func PrepareLogsQuery(start, end int64, queryType v3.QueryType, panelType v3.Pan if mq.PageSize > 0 { if mq.Limit > 0 && mq.Offset+mq.PageSize > mq.Limit { - query = addLimitToQuery(query, mq.Limit-mq.Offset) + query = AddLimitToQuery(query, mq.Limit-mq.Offset) } else { - query = addLimitToQuery(query, mq.PageSize) + query = AddLimitToQuery(query, mq.PageSize) } // add offset to the query only if it is not orderd by timestamp. - if !isOrderByTs(mq.OrderBy) { - query = addOffsetToQuery(query, mq.Offset) + if !IsOrderByTs(mq.OrderBy) { + query = AddOffsetToQuery(query, mq.Offset) } } else { - query = addLimitToQuery(query, mq.Limit) + query = AddLimitToQuery(query, mq.Limit) } } else if panelType == v3.PanelTypeTable { - query = addLimitToQuery(query, mq.Limit) + query = AddLimitToQuery(query, mq.Limit) } return query, err diff --git a/pkg/query-service/app/logs/v4/json_filter.go b/pkg/query-service/app/logs/v4/json_filter.go new file mode 100644 index 0000000000..cde88e748a --- /dev/null +++ b/pkg/query-service/app/logs/v4/json_filter.go @@ -0,0 +1,105 @@ +package v4 + +import ( + "fmt" + "strings" + + logsV3 "go.signoz.io/signoz/pkg/query-service/app/logs/v3" + v3 "go.signoz.io/signoz/pkg/query-service/model/v3" + "go.signoz.io/signoz/pkg/query-service/utils" +) + +var jsonLogOperators = map[v3.FilterOperator]string{ + v3.FilterOperatorEqual: "=", + v3.FilterOperatorNotEqual: "!=", + v3.FilterOperatorLessThan: "<", + v3.FilterOperatorLessThanOrEq: "<=", + v3.FilterOperatorGreaterThan: ">", + v3.FilterOperatorGreaterThanOrEq: ">=", + v3.FilterOperatorLike: "LIKE", + v3.FilterOperatorNotLike: "NOT LIKE", + v3.FilterOperatorContains: "LIKE", + v3.FilterOperatorNotContains: "NOT LIKE", + v3.FilterOperatorRegex: "match(%s, %s)", + v3.FilterOperatorNotRegex: "NOT match(%s, %s)", + v3.FilterOperatorIn: "IN", + v3.FilterOperatorNotIn: "NOT IN", + v3.FilterOperatorExists: "JSON_EXISTS(%s, '$.%s')", + v3.FilterOperatorNotExists: "NOT JSON_EXISTS(%s, '$.%s')", + v3.FilterOperatorHas: "has(%s, %s)", + v3.FilterOperatorNotHas: "NOT has(%s, %s)", +} + +func GetJSONFilter(item v3.FilterItem) (string, error) { + + dataType := item.Key.DataType + isArray := false + // check if its an array and handle it + if val, ok := logsV3.ArrayValueTypeMapping[string(item.Key.DataType)]; ok { + if item.Operator != v3.FilterOperatorHas && item.Operator != v3.FilterOperatorNotHas { + return "", fmt.Errorf("only has operator is supported for array") + } + isArray = true + dataType = v3.AttributeKeyDataType(val) + } + + key, err := logsV3.GetJSONFilterKey(item.Key, item.Operator, isArray) + if err != nil { + return "", err + } + + // non array + op := v3.FilterOperator(strings.ToLower(strings.TrimSpace(string(item.Operator)))) + + var value interface{} + if op != v3.FilterOperatorExists && op != v3.FilterOperatorNotExists { + value, err = utils.ValidateAndCastValue(item.Value, dataType) + if err != nil { + return "", fmt.Errorf("failed to validate and cast value for %s: %v", item.Key.Key, err) + } + } + + var filter string + if logsOp, ok := jsonLogOperators[op]; ok { + switch op { + case v3.FilterOperatorExists, v3.FilterOperatorNotExists: + filter = fmt.Sprintf(logsOp, key, logsV3.GetPath(strings.Split(item.Key.Key, ".")[1:])) + case v3.FilterOperatorRegex, v3.FilterOperatorNotRegex, v3.FilterOperatorHas, v3.FilterOperatorNotHas: + fmtVal := utils.ClickHouseFormattedValue(value) + filter = fmt.Sprintf(logsOp, key, fmtVal) + case v3.FilterOperatorContains, v3.FilterOperatorNotContains: + val := utils.QuoteEscapedString(fmt.Sprintf("%v", item.Value)) + filter = fmt.Sprintf("%s %s '%%%s%%'", key, logsOp, val) + default: + fmtVal := utils.ClickHouseFormattedValue(value) + filter = fmt.Sprintf("%s %s %s", key, logsOp, fmtVal) + } + } else { + return "", fmt.Errorf("unsupported operator: %s", op) + } + + filters := []string{} + + pathFilter := logsV3.GetPathIndexFilter(item.Key.Key) + if pathFilter != "" { + filters = append(filters, pathFilter) + } + if op == v3.FilterOperatorContains || + op == v3.FilterOperatorEqual || + op == v3.FilterOperatorHas { + val, ok := item.Value.(string) + if ok && len(val) >= logsV3.NGRAM_SIZE { + filters = append(filters, fmt.Sprintf("lower(body) like lower('%%%s%%')", utils.QuoteEscapedString(strings.ToLower(val)))) + } + } + + // add exists check for non array items as default values of int/float/bool will corrupt the results + if !isArray && !(item.Operator == v3.FilterOperatorExists || item.Operator == v3.FilterOperatorNotExists) { + existsFilter := fmt.Sprintf("JSON_EXISTS(body, '$.%s')", logsV3.GetPath(strings.Split(item.Key.Key, ".")[1:])) + filter = fmt.Sprintf("%s AND %s", existsFilter, filter) + } + + filters = append(filters, filter) + + return strings.Join(filters, " AND "), nil +} diff --git a/pkg/query-service/app/logs/v4/json_filter_test.go b/pkg/query-service/app/logs/v4/json_filter_test.go new file mode 100644 index 0000000000..c8b2e44847 --- /dev/null +++ b/pkg/query-service/app/logs/v4/json_filter_test.go @@ -0,0 +1,200 @@ +package v4 + +import ( + "testing" + + . "github.com/smartystreets/goconvey/convey" + logsV3 "go.signoz.io/signoz/pkg/query-service/app/logs/v3" + v3 "go.signoz.io/signoz/pkg/query-service/model/v3" +) + +var testGetJSONFilterData = []struct { + Name string + FilterItem v3.FilterItem + Filter string + Error bool +}{ + { + Name: "Array membership string", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.requestor_list[*]", + DataType: "array(string)", + IsJSON: true, + }, + Operator: "has", + Value: "index_service", + }, + Filter: "lower(body) like lower('%requestor_list%') AND lower(body) like lower('%index_service%') AND has(JSONExtract(JSON_QUERY(body, '$.\"requestor_list\"[*]'), 'Array(String)'), 'index_service')", + }, + { + Name: "Array membership int64", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.int_numbers[*]", + DataType: "array(int64)", + IsJSON: true, + }, + Operator: "has", + Value: 2, + }, + Filter: "lower(body) like lower('%int_numbers%') AND has(JSONExtract(JSON_QUERY(body, '$.\"int_numbers\"[*]'), '" + logsV3.ARRAY_INT64 + "'), 2)", + }, + { + Name: "Array membership float64", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.nested_num[*].float_nums[*]", + DataType: "array(float64)", + IsJSON: true, + }, + Operator: "nhas", + Value: 2.2, + }, + Filter: "lower(body) like lower('%nested_num%float_nums%') AND NOT has(JSONExtract(JSON_QUERY(body, '$.\"nested_num\"[*].\"float_nums\"[*]'), '" + logsV3.ARRAY_FLOAT64 + "'), 2.200000)", + }, + { + Name: "Array membership bool", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.bool[*]", + DataType: "array(bool)", + IsJSON: true, + }, + Operator: "has", + Value: true, + }, + Filter: "lower(body) like lower('%bool%') AND has(JSONExtract(JSON_QUERY(body, '$.\"bool\"[*]'), '" + logsV3.ARRAY_BOOL + "'), true)", + }, + { + Name: "eq operator", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.message", + DataType: "string", + IsJSON: true, + }, + Operator: "=", + Value: "hello", + }, + Filter: "lower(body) like lower('%message%') AND lower(body) like lower('%hello%') AND JSON_EXISTS(body, '$.\"message\"') AND JSON_VALUE(body, '$.\"message\"') = 'hello'", + }, + { + Name: "eq operator number", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.status", + DataType: "int64", + IsJSON: true, + }, + Operator: "=", + Value: 1, + }, + Filter: "lower(body) like lower('%status%') AND JSON_EXISTS(body, '$.\"status\"') AND JSONExtract(JSON_VALUE(body, '$.\"status\"'), '" + logsV3.INT64 + "') = 1", + }, + { + Name: "neq operator number", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.status", + DataType: "float64", + IsJSON: true, + }, + Operator: "=", + Value: 1.1, + }, + Filter: "lower(body) like lower('%status%') AND JSON_EXISTS(body, '$.\"status\"') AND JSONExtract(JSON_VALUE(body, '$.\"status\"'), '" + logsV3.FLOAT64 + "') = 1.100000", + }, + { + Name: "eq operator bool", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.boolkey", + DataType: "bool", + IsJSON: true, + }, + Operator: "=", + Value: true, + }, + Filter: "lower(body) like lower('%boolkey%') AND JSON_EXISTS(body, '$.\"boolkey\"') AND JSONExtract(JSON_VALUE(body, '$.\"boolkey\"'), '" + logsV3.BOOL + "') = true", + }, + { + Name: "greater than operator", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.status", + DataType: "int64", + IsJSON: true, + }, + Operator: ">", + Value: 1, + }, + Filter: "lower(body) like lower('%status%') AND JSON_EXISTS(body, '$.\"status\"') AND JSONExtract(JSON_VALUE(body, '$.\"status\"'), '" + logsV3.INT64 + "') > 1", + }, + { + Name: "regex operator", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.message", + DataType: "string", + IsJSON: true, + }, + Operator: "regex", + Value: "a*", + }, + Filter: "lower(body) like lower('%message%') AND JSON_EXISTS(body, '$.\"message\"') AND match(JSON_VALUE(body, '$.\"message\"'), 'a*')", + }, + { + Name: "contains operator", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.message", + DataType: "string", + IsJSON: true, + }, + Operator: "contains", + Value: "a", + }, + Filter: "lower(body) like lower('%message%') AND JSON_EXISTS(body, '$.\"message\"') AND JSON_VALUE(body, '$.\"message\"') LIKE '%a%'", + }, + { + Name: "contains operator with quotes", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.message", + DataType: "string", + IsJSON: true, + }, + Operator: "contains", + Value: "hello 'world'", + }, + Filter: "lower(body) like lower('%message%') AND lower(body) like lower('%hello \\'world\\'%') AND JSON_EXISTS(body, '$.\"message\"') AND JSON_VALUE(body, '$.\"message\"') LIKE '%hello \\'world\\'%'", + }, + { + Name: "exists", + FilterItem: v3.FilterItem{ + Key: v3.AttributeKey{ + Key: "body.message", + DataType: "string", + IsJSON: true, + }, + Operator: "exists", + Value: "", + }, + Filter: "lower(body) like lower('%message%') AND JSON_EXISTS(body, '$.\"message\"')", + }, +} + +func TestGetJSONFilter(t *testing.T) { + for _, tt := range testGetJSONFilterData { + Convey("testGetJSONFilter", t, func() { + filter, err := GetJSONFilter(tt.FilterItem) + if tt.Error { + So(err, ShouldNotBeNil) + } else { + So(err, ShouldBeNil) + So(filter, ShouldEqual, tt.Filter) + } + }) + } +} diff --git a/pkg/query-service/app/logs/v4/query_builder.go b/pkg/query-service/app/logs/v4/query_builder.go new file mode 100644 index 0000000000..7caef2692d --- /dev/null +++ b/pkg/query-service/app/logs/v4/query_builder.go @@ -0,0 +1,507 @@ +package v4 + +import ( + "fmt" + "strings" + + logsV3 "go.signoz.io/signoz/pkg/query-service/app/logs/v3" + "go.signoz.io/signoz/pkg/query-service/constants" + v3 "go.signoz.io/signoz/pkg/query-service/model/v3" + "go.signoz.io/signoz/pkg/query-service/utils" +) + +var logOperators = map[v3.FilterOperator]string{ + v3.FilterOperatorEqual: "=", + v3.FilterOperatorNotEqual: "!=", + v3.FilterOperatorLessThan: "<", + v3.FilterOperatorLessThanOrEq: "<=", + v3.FilterOperatorGreaterThan: ">", + v3.FilterOperatorGreaterThanOrEq: ">=", + v3.FilterOperatorLike: "LIKE", + v3.FilterOperatorNotLike: "NOT LIKE", + v3.FilterOperatorContains: "LIKE", + v3.FilterOperatorNotContains: "NOT LIKE", + v3.FilterOperatorRegex: "match(%s, %s)", + v3.FilterOperatorNotRegex: "NOT match(%s, %s)", + v3.FilterOperatorIn: "IN", + v3.FilterOperatorNotIn: "NOT IN", + v3.FilterOperatorExists: "mapContains(%s_%s, '%s')", + v3.FilterOperatorNotExists: "not mapContains(%s_%s, '%s')", +} + +const ( + BODY = "body" + DISTRIBUTED_LOGS_V2 = "distributed_logs_v2" + DISTRIBUTED_LOGS_V2_RESOURCE = "distributed_logs_v2_resource" + NANOSECOND = 1000000000 +) + +func getClickhouseLogsColumnDataType(columnDataType v3.AttributeKeyDataType) string { + if columnDataType == v3.AttributeKeyDataTypeFloat64 || columnDataType == v3.AttributeKeyDataTypeInt64 { + return "number" + } + if columnDataType == v3.AttributeKeyDataTypeBool { + return "bool" + } + return "string" +} + +// getClickhouseColumnName returns the corresponding clickhouse column name for the given attribute/resource key +func getClickhouseColumnName(key v3.AttributeKey) string { + clickhouseColumn := key.Key + if key.Key == constants.TIMESTAMP || key.Key == "id" { + return key.Key + } + + //if the key is present in the topLevelColumn then it will be only searched in those columns, + //regardless if it is indexed/present again in resource or column attribute + if !key.IsColumn { + columnType := logsV3.GetClickhouseLogsColumnType(key.Type) + columnDataType := getClickhouseLogsColumnDataType(key.DataType) + clickhouseColumn = fmt.Sprintf("%s_%s['%s']", columnType, columnDataType, key.Key) + return clickhouseColumn + } + + // check if it is a static field + if key.Type == v3.AttributeKeyTypeUnspecified { + // name is the column name + return clickhouseColumn + } + + // materialized column created from query + clickhouseColumn = utils.GetClickhouseColumnNameV2(string(key.Type), string(key.DataType), key.Key) + return clickhouseColumn +} + +func getSelectLabels(aggregatorOperator v3.AggregateOperator, groupBy []v3.AttributeKey) string { + var selectLabels string + if aggregatorOperator == v3.AggregateOperatorNoOp { + selectLabels = "" + } else { + for _, tag := range groupBy { + columnName := getClickhouseColumnName(tag) + selectLabels += fmt.Sprintf(" %s as `%s`,", columnName, tag.Key) + } + } + return selectLabels +} + +func GetExistsNexistsFilter(op v3.FilterOperator, item v3.FilterItem) string { + if item.Key.Type == v3.AttributeKeyTypeUnspecified { + top := "!=" + if op == v3.FilterOperatorNotExists { + top = "=" + } + if val, ok := constants.StaticFieldsLogsV3[item.Key.Key]; ok { + // skip for timestamp and id + if val.Key == "" { + return "" + } + + columnName := getClickhouseColumnName(item.Key) + if val.DataType == v3.AttributeKeyDataTypeString { + return fmt.Sprintf("%s %s ''", columnName, top) + } else { + // we just have two types, number and string + return fmt.Sprintf("%s %s 0", columnName, top) + } + } + + } else if item.Key.IsColumn { + val := true + if op == v3.FilterOperatorNotExists { + val = false + } + return fmt.Sprintf("%s_exists`=%v", strings.TrimSuffix(getClickhouseColumnName(item.Key), "`"), val) + } + columnType := logsV3.GetClickhouseLogsColumnType(item.Key.Type) + columnDataType := getClickhouseLogsColumnDataType(item.Key.DataType) + return fmt.Sprintf(logOperators[op], columnType, columnDataType, item.Key.Key) +} + +func buildLogsTimeSeriesFilterQuery(fs *v3.FilterSet, groupBy []v3.AttributeKey, aggregateAttribute v3.AttributeKey) (string, error) { + var conditions []string + + if fs != nil && len(fs.Items) != 0 { + for _, item := range fs.Items { + + // skip if it's a resource attribute + if item.Key.Type == v3.AttributeKeyTypeResource { + continue + } + + if item.Key.IsJSON { + filter, err := GetJSONFilter(item) + if err != nil { + return "", err + } + conditions = append(conditions, filter) + continue + } + + // check if the user is searching for all attributes + if item.Key.Key == "__attrs" { + if (item.Operator != v3.FilterOperatorEqual && item.Operator != v3.FilterOperatorContains) || item.Key.DataType != v3.AttributeKeyDataTypeString { + return "", fmt.Errorf("only = operator and string data type is supported for __attrs") + } + val := utils.QuoteEscapedString(fmt.Sprintf("%v", item.Value)) + conditions = append(conditions, fmt.Sprintf("has(mapValues(attributes_string), '%v')", val)) + continue + } + + op := v3.FilterOperator(strings.ToLower(string(item.Operator))) + + var value interface{} + var err error + if op != v3.FilterOperatorExists && op != v3.FilterOperatorNotExists { + value, err = utils.ValidateAndCastValue(item.Value, item.Key.DataType) + if err != nil { + return "", fmt.Errorf("failed to validate and cast value for %s: %v", item.Key.Key, err) + } + } + + if logsOp, ok := logOperators[op]; ok { + switch op { + case v3.FilterOperatorExists, v3.FilterOperatorNotExists: + conditions = append(conditions, GetExistsNexistsFilter(op, item)) + case v3.FilterOperatorRegex, v3.FilterOperatorNotRegex: + columnName := getClickhouseColumnName(item.Key) + fmtVal := utils.ClickHouseFormattedValue(value) + conditions = append(conditions, fmt.Sprintf(logsOp, columnName, fmtVal)) + case v3.FilterOperatorContains, v3.FilterOperatorNotContains: + columnName := getClickhouseColumnName(item.Key) + val := utils.QuoteEscapedString(fmt.Sprintf("%v", item.Value)) + if columnName == BODY { + conditions = append(conditions, fmt.Sprintf("lower(%s) %s lower('%%%s%%')", columnName, logsOp, val)) + } else { + conditions = append(conditions, fmt.Sprintf("%s %s '%%%s%%'", columnName, logsOp, val)) + } + default: + columnName := getClickhouseColumnName(item.Key) + fmtVal := utils.ClickHouseFormattedValue(value) + + // for use lower for like and ilike + if op == v3.FilterOperatorLike || op == v3.FilterOperatorNotLike { + if columnName == BODY { + columnName = fmt.Sprintf("lower(%s)", columnName) + fmtVal = fmt.Sprintf("lower(%s)", fmtVal) + } + } + conditions = append(conditions, fmt.Sprintf("%s %s %s", columnName, logsOp, fmtVal)) + } + } else { + return "", fmt.Errorf("unsupported operator: %s", op) + } + + // add extra condition for map contains + // by default clickhouse is not able to utilize indexes for keys with all operators. + // mapContains forces the use of index. + if item.Key.IsColumn == false && op != v3.FilterOperatorExists && op != v3.FilterOperatorNotExists { + conditions = append(conditions, GetExistsNexistsFilter(v3.FilterOperatorExists, item)) + } + } + } + + // add group by conditions to filter out log lines which doesn't have the key + for _, attr := range groupBy { + // skip if it's a resource attribute + if attr.Type == v3.AttributeKeyTypeResource { + continue + } + + if !attr.IsColumn { + columnType := logsV3.GetClickhouseLogsColumnType(attr.Type) + columnDataType := getClickhouseLogsColumnDataType(attr.DataType) + conditions = append(conditions, fmt.Sprintf("mapContains(%s_%s, '%s')", columnType, columnDataType, attr.Key)) + } else if attr.Type != v3.AttributeKeyTypeUnspecified { + // for materialzied columns + conditions = append(conditions, fmt.Sprintf("%s_exists`=true", strings.TrimSuffix(getClickhouseColumnName(attr), "`"))) + } + } + + // add conditions for aggregate attribute + if aggregateAttribute.Key != "" && aggregateAttribute.Type != v3.AttributeKeyTypeResource { + existsFilter := GetExistsNexistsFilter(v3.FilterOperatorExists, v3.FilterItem{Key: aggregateAttribute}) + conditions = append(conditions, existsFilter) + } + + queryString := strings.Join(conditions, " AND ") + return queryString, nil +} + +func buildLogsQuery(panelType v3.PanelType, start, end, step int64, mq *v3.BuilderQuery, graphLimitQtype string, preferRPM bool) (string, error) { + + filterSubQuery, err := buildLogsTimeSeriesFilterQuery(mq.Filters, mq.GroupBy, mq.AggregateAttribute) + if err != nil { + return "", err + } + if len(filterSubQuery) > 0 { + filterSubQuery = " AND " + filterSubQuery + } + + // timerange will be sent in epoch millisecond + logsStart := utils.GetEpochNanoSecs(start) + logsEnd := utils.GetEpochNanoSecs(end) + + // -1800 this is added so that the bucket start considers all the fingerprints. + bucketStart := logsStart/NANOSECOND - 1800 + bucketEnd := logsEnd / NANOSECOND + + // bucket_start filter is added for primary key + timeFilter := fmt.Sprintf("(timestamp >= %d AND timestamp <= %d) AND (ts_bucket_start >= %d AND ts_bucket_start <= %d)", logsStart, logsEnd, bucketStart, bucketEnd) + + resourceSubQuery, err := buildResourceSubQuery(bucketStart, bucketEnd, mq.Filters, mq.GroupBy, mq.AggregateAttribute) + if err != nil { + return "", err + } + + if len(resourceSubQuery) > 0 { + filterSubQuery = filterSubQuery + " AND (resource_fingerprint GLOBAL IN " + resourceSubQuery + ")" + } + + selectLabels := getSelectLabels(mq.AggregateOperator, mq.GroupBy) + + having := logsV3.Having(mq.Having) + if having != "" { + having = " having " + having + } + + var queryTmpl string + if graphLimitQtype == constants.FirstQueryGraphLimit { + queryTmpl = "SELECT" + } else if panelType == v3.PanelTypeTable { + queryTmpl = + "SELECT now() as ts," + // step or aggregate interval is whole time period in case of table panel + step = (utils.GetEpochNanoSecs(end) - utils.GetEpochNanoSecs(start)) / NANOSECOND + } else if panelType == v3.PanelTypeGraph || panelType == v3.PanelTypeValue { + // Select the aggregate value for interval + queryTmpl = + fmt.Sprintf("SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL %d SECOND) AS ts,", step) + } + + queryTmpl = + queryTmpl + selectLabels + + " %s as value " + + "from signoz_logs." + DISTRIBUTED_LOGS_V2 + + " where " + timeFilter + "%s" + + "%s%s" + + "%s" + + // we dont need value for first query + // going with this route as for a cleaner approach on implementation + if graphLimitQtype == constants.FirstQueryGraphLimit { + queryTmpl = "SELECT " + logsV3.GetSelectKeys(mq.AggregateOperator, mq.GroupBy) + " from (" + queryTmpl + ")" + } + + groupBy := logsV3.GroupByAttributeKeyTags(panelType, graphLimitQtype, mq.GroupBy...) + if panelType != v3.PanelTypeList && groupBy != "" { + groupBy = " group by " + groupBy + } + orderBy := orderByAttributeKeyTags(panelType, mq.OrderBy, mq.GroupBy) + if panelType != v3.PanelTypeList && orderBy != "" { + orderBy = " order by " + orderBy + } + + if graphLimitQtype == constants.SecondQueryGraphLimit { + filterSubQuery = filterSubQuery + " AND " + fmt.Sprintf("(%s) GLOBAL IN (", logsV3.GetSelectKeys(mq.AggregateOperator, mq.GroupBy)) + "#LIMIT_PLACEHOLDER)" + } + + aggregationKey := "" + if mq.AggregateAttribute.Key != "" { + aggregationKey = getClickhouseColumnName(mq.AggregateAttribute) + } + + switch mq.AggregateOperator { + case v3.AggregateOperatorRate: + rate := float64(step) + if preferRPM { + rate = rate / 60.0 + } + + op := fmt.Sprintf("count(%s)/%f", aggregationKey, rate) + query := fmt.Sprintf(queryTmpl, op, filterSubQuery, groupBy, having, orderBy) + return query, nil + case + v3.AggregateOperatorRateSum, + v3.AggregateOperatorRateMax, + v3.AggregateOperatorRateAvg, + v3.AggregateOperatorRateMin: + rate := float64(step) + if preferRPM { + rate = rate / 60.0 + } + + op := fmt.Sprintf("%s(%s)/%f", logsV3.AggregateOperatorToSQLFunc[mq.AggregateOperator], aggregationKey, rate) + query := fmt.Sprintf(queryTmpl, op, filterSubQuery, groupBy, having, orderBy) + return query, nil + case + v3.AggregateOperatorP05, + v3.AggregateOperatorP10, + v3.AggregateOperatorP20, + v3.AggregateOperatorP25, + v3.AggregateOperatorP50, + v3.AggregateOperatorP75, + v3.AggregateOperatorP90, + v3.AggregateOperatorP95, + v3.AggregateOperatorP99: + op := fmt.Sprintf("quantile(%v)(%s)", logsV3.AggregateOperatorToPercentile[mq.AggregateOperator], aggregationKey) + query := fmt.Sprintf(queryTmpl, op, filterSubQuery, groupBy, having, orderBy) + return query, nil + case v3.AggregateOperatorAvg, v3.AggregateOperatorSum, v3.AggregateOperatorMin, v3.AggregateOperatorMax: + op := fmt.Sprintf("%s(%s)", logsV3.AggregateOperatorToSQLFunc[mq.AggregateOperator], aggregationKey) + query := fmt.Sprintf(queryTmpl, op, filterSubQuery, groupBy, having, orderBy) + return query, nil + case v3.AggregateOperatorCount: + op := "toFloat64(count(*))" + query := fmt.Sprintf(queryTmpl, op, filterSubQuery, groupBy, having, orderBy) + return query, nil + case v3.AggregateOperatorCountDistinct: + op := fmt.Sprintf("toFloat64(count(distinct(%s)))", aggregationKey) + query := fmt.Sprintf(queryTmpl, op, filterSubQuery, groupBy, having, orderBy) + return query, nil + case v3.AggregateOperatorNoOp: + // sqlSelect := constants.LogsSQLSelect + // with noop any filter or different order by other than ts will use new table + sqlSelect := constants.LogsSQLSelectV2 + queryTmpl := sqlSelect + "from signoz_logs.%s where %s%s order by %s" + query := fmt.Sprintf(queryTmpl, DISTRIBUTED_LOGS_V2, timeFilter, filterSubQuery, orderBy) + return query, nil + default: + return "", fmt.Errorf("unsupported aggregate operator") + } +} + +func buildLogsLiveTailQuery(mq *v3.BuilderQuery) (string, error) { + filterSubQuery, err := buildLogsTimeSeriesFilterQuery(mq.Filters, mq.GroupBy, v3.AttributeKey{}) + if err != nil { + return "", err + } + + switch mq.AggregateOperator { + case v3.AggregateOperatorNoOp: + query := constants.LogsSQLSelectV2 + "from signoz_logs." + DISTRIBUTED_LOGS_V2 + " where " + if len(filterSubQuery) > 0 { + query = query + filterSubQuery + " AND " + } + + return query, nil + default: + return "", fmt.Errorf("unsupported aggregate operator in live tail") + } +} + +// orderBy returns a string of comma separated tags for order by clause +// if there are remaining items which are not present in tags they are also added +// if the order is not specified, it defaults to ASC +func orderBy(panelType v3.PanelType, items []v3.OrderBy, tagLookup map[string]struct{}) []string { + var orderBy []string + + for _, item := range items { + if item.ColumnName == constants.SigNozOrderByValue { + orderBy = append(orderBy, fmt.Sprintf("value %s", item.Order)) + } else if _, ok := tagLookup[item.ColumnName]; ok { + orderBy = append(orderBy, fmt.Sprintf("`%s` %s", item.ColumnName, item.Order)) + } else if panelType == v3.PanelTypeList { + attr := v3.AttributeKey{Key: item.ColumnName, DataType: item.DataType, Type: item.Type, IsColumn: item.IsColumn} + name := getClickhouseColumnName(attr) + if item.IsColumn { + name = "`" + name + "`" + } + orderBy = append(orderBy, fmt.Sprintf("%s %s", name, item.Order)) + } + } + return orderBy +} + +func orderByAttributeKeyTags(panelType v3.PanelType, items []v3.OrderBy, tags []v3.AttributeKey) string { + + tagLookup := map[string]struct{}{} + for _, v := range tags { + tagLookup[v.Key] = struct{}{} + } + + orderByArray := orderBy(panelType, items, tagLookup) + + if len(orderByArray) == 0 { + if panelType == v3.PanelTypeList { + orderByArray = append(orderByArray, constants.TIMESTAMP+" DESC") + } else { + orderByArray = append(orderByArray, "value DESC") + } + } + + str := strings.Join(orderByArray, ",") + return str +} + +// PrepareLogsQuery prepares the query for logs +// start and end are in epoch millisecond +// step is in seconds +func PrepareLogsQuery(start, end int64, queryType v3.QueryType, panelType v3.PanelType, mq *v3.BuilderQuery, options v3.LogQBOptions) (string, error) { + + // adjust the start and end time to the step interval + // NOTE: Disabling this as it's creating confusion between charts and actual data + // if panelType != v3.PanelTypeList { + // start = start - (start % (mq.StepInterval * 1000)) + // end = end - (end % (mq.StepInterval * 1000)) + // } + + if options.IsLivetailQuery { + query, err := buildLogsLiveTailQuery(mq) + if err != nil { + return "", err + } + return query, nil + } else if options.GraphLimitQtype == constants.FirstQueryGraphLimit { + // give me just the groupby names + query, err := buildLogsQuery(panelType, start, end, mq.StepInterval, mq, options.GraphLimitQtype, options.PreferRPM) + if err != nil { + return "", err + } + query = logsV3.AddLimitToQuery(query, mq.Limit) + + return query, nil + } else if options.GraphLimitQtype == constants.SecondQueryGraphLimit { + query, err := buildLogsQuery(panelType, start, end, mq.StepInterval, mq, options.GraphLimitQtype, options.PreferRPM) + if err != nil { + return "", err + } + return query, nil + } + + query, err := buildLogsQuery(panelType, start, end, mq.StepInterval, mq, options.GraphLimitQtype, options.PreferRPM) + if err != nil { + return "", err + } + if panelType == v3.PanelTypeValue { + query, err = logsV3.ReduceQuery(query, mq.ReduceTo, mq.AggregateOperator) + } + + if panelType == v3.PanelTypeList { + // check if limit exceeded + if mq.Limit > 0 && mq.Offset >= mq.Limit { + return "", fmt.Errorf("max limit exceeded") + } + + if mq.PageSize > 0 { + if mq.Limit > 0 && mq.Offset+mq.PageSize > mq.Limit { + query = logsV3.AddLimitToQuery(query, mq.Limit-mq.Offset) + } else { + query = logsV3.AddLimitToQuery(query, mq.PageSize) + } + + // add offset to the query only if it is not orderd by timestamp. + if !logsV3.IsOrderByTs(mq.OrderBy) { + query = logsV3.AddOffsetToQuery(query, mq.Offset) + } + + } else { + query = logsV3.AddLimitToQuery(query, mq.Limit) + } + } else if panelType == v3.PanelTypeTable { + query = logsV3.AddLimitToQuery(query, mq.Limit) + } + + return query, err +} diff --git a/pkg/query-service/app/logs/v4/query_builder_test.go b/pkg/query-service/app/logs/v4/query_builder_test.go new file mode 100644 index 0000000000..0de9bf1917 --- /dev/null +++ b/pkg/query-service/app/logs/v4/query_builder_test.go @@ -0,0 +1,1650 @@ +package v4 + +import ( + "testing" + + . "github.com/smartystreets/goconvey/convey" + "go.signoz.io/signoz/pkg/query-service/constants" + v3 "go.signoz.io/signoz/pkg/query-service/model/v3" +) + +var testGetClickhouseColumnNameData = []struct { + Name string + AttributeKey v3.AttributeKey + ExpectedColumnName string +}{ + { + Name: "attribute", + AttributeKey: v3.AttributeKey{Key: "user_name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + ExpectedColumnName: "attributes_string['user_name']", + }, + { + Name: "resource", + AttributeKey: v3.AttributeKey{Key: "servicename", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeResource}, + ExpectedColumnName: "resources_string['servicename']", + }, + { + Name: "selected field", + AttributeKey: v3.AttributeKey{Key: "servicename", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag, IsColumn: true}, + ExpectedColumnName: "`attribute_string_servicename`", + }, + { + Name: "selected field resource", + AttributeKey: v3.AttributeKey{Key: "sdk_version", DataType: v3.AttributeKeyDataTypeInt64, Type: v3.AttributeKeyTypeResource, IsColumn: true}, + ExpectedColumnName: "`resource_number_sdk_version`", + }, + { + Name: "selected field float", + AttributeKey: v3.AttributeKey{Key: "sdk_version", DataType: v3.AttributeKeyDataTypeFloat64, Type: v3.AttributeKeyTypeTag, IsColumn: true}, + ExpectedColumnName: "`attribute_number_sdk_version`", + }, + { + Name: "same name as top level column", + AttributeKey: v3.AttributeKey{Key: "trace_id", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + ExpectedColumnName: "attributes_string['trace_id']", + }, + { + Name: "top level column", + AttributeKey: v3.AttributeKey{Key: "trace_id", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeUnspecified, IsColumn: true}, + ExpectedColumnName: "trace_id", + }, + { + Name: "name with - ", + AttributeKey: v3.AttributeKey{Key: "test-attr", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag, IsColumn: true}, + ExpectedColumnName: "`attribute_string_test-attr`", + }, +} + +func TestGetClickhouseColumnName(t *testing.T) { + for _, tt := range testGetClickhouseColumnNameData { + Convey("testGetClickhouseColumnNameData", t, func() { + columnName := getClickhouseColumnName(tt.AttributeKey) + So(columnName, ShouldEqual, tt.ExpectedColumnName) + }) + } +} + +var testGetSelectLabelsData = []struct { + Name string + AggregateOperator v3.AggregateOperator + GroupByTags []v3.AttributeKey + SelectLabels string +}{ + { + Name: "select fields for groupBy attribute", + AggregateOperator: v3.AggregateOperatorCount, + GroupByTags: []v3.AttributeKey{{Key: "user_name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + SelectLabels: " attributes_string['user_name'] as `user_name`,", + }, + { + Name: "select fields for groupBy resource", + AggregateOperator: v3.AggregateOperatorCount, + GroupByTags: []v3.AttributeKey{{Key: "user_name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeResource}}, + SelectLabels: " resources_string['user_name'] as `user_name`,", + }, + { + Name: "select fields for groupBy attribute and resource", + AggregateOperator: v3.AggregateOperatorCount, + GroupByTags: []v3.AttributeKey{ + {Key: "user_name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeResource}, + {Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + }, + SelectLabels: " resources_string['user_name'] as `user_name`, attributes_string['host'] as `host`,", + }, + { + Name: "select fields for groupBy materialized columns", + AggregateOperator: v3.AggregateOperatorCount, + GroupByTags: []v3.AttributeKey{{Key: "host", IsColumn: true}}, + SelectLabels: " host as `host`,", + }, + { + Name: "trace_id field as an attribute", + AggregateOperator: v3.AggregateOperatorCount, + GroupByTags: []v3.AttributeKey{{Key: "trace_id", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + SelectLabels: " attributes_string['trace_id'] as `trace_id`,", + }, +} + +func TestGetSelectLabels(t *testing.T) { + for _, tt := range testGetSelectLabelsData { + Convey("testGetSelectLabelsData", t, func() { + selectLabels := getSelectLabels(tt.AggregateOperator, tt.GroupByTags) + So(selectLabels, ShouldEqual, tt.SelectLabels) + }) + } +} + +var timeSeriesFilterQueryData = []struct { + Name string + FilterSet *v3.FilterSet + GroupBy []v3.AttributeKey + ExpectedFilter string + Fields map[string]v3.AttributeKey + Error string +}{ + { + Name: "Test attribute and resource attribute", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "user_name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "john", Operator: "="}, + {Key: v3.AttributeKey{Key: "k8s_namespace", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeResource}, Value: "my_service", Operator: "!="}, + }}, + ExpectedFilter: "attributes_string['user_name'] = 'john' AND mapContains(attributes_string, 'user_name')", + }, + { + Name: "Test attribute and resource attribute with different case", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "user_name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "%JoHn%", Operator: "like"}, + {Key: v3.AttributeKey{Key: "k8s_namespace", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeResource}, Value: "%MyService%", Operator: "nlike"}, + }}, + ExpectedFilter: "attributes_string['user_name'] LIKE '%JoHn%' AND mapContains(attributes_string, 'user_name')", + }, + { + Name: "Test materialized column", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "user_name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag, IsColumn: true}, Value: "john", Operator: "="}, + {Key: v3.AttributeKey{Key: "k8s_namespace", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeResource}, Value: "my_service", Operator: "!="}, + }}, + ExpectedFilter: "`attribute_string_user_name` = 'john'", + }, + { + Name: "Test like", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "102.%", Operator: "like"}, + }}, + ExpectedFilter: "attributes_string['host'] LIKE '102.%' AND mapContains(attributes_string, 'host')", + }, + { + Name: "Test IN", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "bytes", DataType: v3.AttributeKeyDataTypeFloat64, Type: v3.AttributeKeyTypeTag}, Value: []interface{}{1, 2, 3, 4}, Operator: "in"}, + }}, + ExpectedFilter: "attributes_number['bytes'] IN [1,2,3,4] AND mapContains(attributes_number, 'bytes')", + }, + { + Name: "Test DataType int64", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "bytes", DataType: v3.AttributeKeyDataTypeInt64, Type: v3.AttributeKeyTypeTag}, Value: 10, Operator: ">"}, + }}, + ExpectedFilter: "attributes_number['bytes'] > 10 AND mapContains(attributes_number, 'bytes')", + }, + { + Name: "Test NOT IN", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: []interface{}{"john", "bunny"}, Operator: "nin"}, + }}, + ExpectedFilter: "attributes_string['name'] NOT IN ['john','bunny'] AND mapContains(attributes_string, 'name')", + }, + { + Name: "Test exists", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "bytes", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "", Operator: "exists"}, + }}, + ExpectedFilter: "mapContains(attributes_string, 'bytes')", + }, + { + Name: "Test not exists", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "bytes", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "", Operator: "nexists"}, + }}, + ExpectedFilter: "not mapContains(attributes_string, 'bytes')", + }, + { + Name: "Test contains", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "102.", Operator: "contains"}, + }}, + ExpectedFilter: "attributes_string['host'] LIKE '%102.%' AND mapContains(attributes_string, 'host')", + }, + { + Name: "Test contains with single quotes", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "message", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "hello 'world'", Operator: "contains"}, + }}, + ExpectedFilter: "attributes_string['message'] LIKE '%hello \\'world\\'%' AND mapContains(attributes_string, 'message')", + }, + { + Name: "Test not contains", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "102.", Operator: "ncontains"}, + }}, + ExpectedFilter: "attributes_string['host'] NOT LIKE '%102.%' AND mapContains(attributes_string, 'host')", + }, + { + Name: "Test regex", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag, IsColumn: true}, Value: "host: \"(?P\\S+)\"", Operator: "regex"}, + }}, + ExpectedFilter: "match(`attribute_string_host`, 'host: \"(?P\\\\S+)\"')", + }, + { + Name: "Test not regex", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "102.", Operator: "nregex"}, + }}, + ExpectedFilter: "NOT match(attributes_string['host'], '102.') AND mapContains(attributes_string, 'host')", + }, + { + Name: "Test groupBy", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "102.", Operator: "ncontains"}, + }}, + GroupBy: []v3.AttributeKey{{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + ExpectedFilter: "attributes_string['host'] NOT LIKE '%102.%' AND mapContains(attributes_string, 'host') AND mapContains(attributes_string, 'host')", + }, + { + Name: "Test groupBy isColumn", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "102.", Operator: "ncontains"}, + }}, + GroupBy: []v3.AttributeKey{{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag, IsColumn: true}}, + ExpectedFilter: "attributes_string['host'] NOT LIKE '%102.%' AND mapContains(attributes_string, 'host') AND `attribute_string_host_exists`=true", + }, + { + Name: "Wrong data", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "bytes", Type: v3.AttributeKeyTypeTag, DataType: v3.AttributeKeyDataTypeFloat64}, Value: true, Operator: "="}, + }}, + Error: "failed to validate and cast value for bytes: invalid data type, expected float, got bool", + }, + { + Name: "Test top level field with metadata", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "body", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "%test%", Operator: "like"}, + }}, + ExpectedFilter: "attributes_string['body'] LIKE '%test%' AND mapContains(attributes_string, 'body')", + }, + { + Name: "Test exists on top level field", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "trace_id", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeUnspecified, IsColumn: true}, Operator: "exists"}, + }}, + ExpectedFilter: "trace_id != ''", + }, + { + Name: "Test not exists on top level field", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "span_id", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeUnspecified, IsColumn: true}, Operator: "nexists"}, + }}, + ExpectedFilter: "span_id = ''", + }, + { + Name: "Test exists on top level field number", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "trace_flags", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeUnspecified, IsColumn: true}, Operator: "exists"}, + }}, + ExpectedFilter: "trace_flags != 0", + }, + { + Name: "Test not exists on top level field number", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "severity_number", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeUnspecified, IsColumn: true}, Operator: "nexists"}, + }}, + ExpectedFilter: "severity_number = 0", + }, + { + Name: "Test exists on materiazlied column", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag, IsColumn: true}, Operator: "exists"}, + }}, + ExpectedFilter: "`attribute_string_method_exists`=true", + }, + { + Name: "Test nexists on materiazlied column", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "status", DataType: v3.AttributeKeyDataTypeInt64, Type: v3.AttributeKeyTypeTag, IsColumn: true}, Operator: "nexists"}, + }}, + ExpectedFilter: "`attribute_number_status_exists`=false", + }, + { + Name: "Test for body contains and ncontains", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "body", DataType: v3.AttributeKeyDataTypeString, IsColumn: true}, Operator: "contains", Value: "test"}, + {Key: v3.AttributeKey{Key: "body", DataType: v3.AttributeKeyDataTypeString, IsColumn: true}, Operator: "ncontains", Value: "test1"}, + }}, + ExpectedFilter: "lower(body) LIKE lower('%test%') AND lower(body) NOT LIKE lower('%test1%')", + }, + { + Name: "Test for body like and nlike", + FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "body", DataType: v3.AttributeKeyDataTypeString, IsColumn: true}, Operator: "like", Value: "test"}, + {Key: v3.AttributeKey{Key: "body", DataType: v3.AttributeKeyDataTypeString, IsColumn: true}, Operator: "nlike", Value: "test1"}, + }}, + ExpectedFilter: "lower(body) LIKE lower('test') AND lower(body) NOT LIKE lower('test1')", + }, +} + +func TestBuildLogsTimeSeriesFilterQuery(t *testing.T) { + for _, tt := range timeSeriesFilterQueryData { + Convey("TestBuildLogsTimeSeriesFilterQuery", t, func() { + query, err := buildLogsTimeSeriesFilterQuery(tt.FilterSet, tt.GroupBy, v3.AttributeKey{}) + if tt.Error != "" { + So(err.Error(), ShouldEqual, tt.Error) + } else { + So(err, ShouldBeNil) + So(query, ShouldEqual, tt.ExpectedFilter) + } + + }) + } +} + +var testBuildLogsQueryData = []struct { + Name string + PanelType v3.PanelType + Start int64 + End int64 + Step int64 + BuilderQuery *v3.BuilderQuery + GroupByTags []v3.AttributeKey + TableName string + AggregateOperator v3.AggregateOperator + ExpectedQuery string + Type int + PreferRPM bool +}{ + { + Name: "Test aggregate count on select field", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorCount, + Expression: "A", + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, toFloat64(count(*)) as value " + + "from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "group by ts order by value DESC", + }, + { + Name: "Test aggregate count on a attribute", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "user_name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + AggregateOperator: v3.AggregateOperatorCount, + Expression: "A", + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, toFloat64(count(*)) as value " + + "from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " + + "AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) AND mapContains(attributes_string, 'user_name') group by ts order by value DESC", + }, + { + Name: "Test aggregate count on a with filter", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "user_name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + AggregateOperator: v3.AggregateOperatorCount, + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "bytes", DataType: v3.AttributeKeyDataTypeFloat64, Type: v3.AttributeKeyTypeTag}, Value: 100, Operator: ">"}, + }}, + Expression: "A", + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, toFloat64(count(*)) as value " + + "from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND " + + "(ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) AND " + + "attributes_number['bytes'] > 100.000000 AND mapContains(attributes_number, 'bytes') AND mapContains(attributes_string, 'user_name') " + + "group by ts order by value DESC", + }, + // { + // Name: "Test aggregate count on a with resource filter", + // PanelType: v3.PanelTypeGraph, + // Start: 1680066360726210000, + // End: 1680066458000000000, + // BuilderQuery: &v3.BuilderQuery{ + // QueryName: "A", + // StepInterval: 60, + // AggregateAttribute: v3.AttributeKey{Key: "user_name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + // AggregateOperator: v3.AggregateOperatorCount, + // Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + // {Key: v3.AttributeKey{Key: "bytes", DataType: v3.AttributeKeyDataTypeFloat64, Type: v3.AttributeKeyTypeResource}, Value: 100, Operator: ">"}, + // }}, + // Expression: "A", + // }, + // TableName: "logs", + // ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, toFloat64(count(*)) as value " + + // "from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND " + + // "(ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) AND " + + // "mapContains(attributes_string, 'user_name') AND (resource_fingerprint GLOBAL IN (SELECT fingerprint FROM signoz_logs.distributed_logs_v2_resource " + + // "WHERE (seen_at_ts_bucket_start >= 1680064560) AND (seen_at_ts_bucket_start <= 1680066458) AND simpleJSONExtractString(lower(labels), 'bytes') > 100.000000" + + // " AND lower(labels) like '%bytes%')) group by ts order by value DESC", + // }, + { + Name: "Test aggregate count distinct and order by value", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag, IsColumn: true}, + AggregateOperator: v3.AggregateOperatorCountDistinct, + Expression: "A", + OrderBy: []v3.OrderBy{{ColumnName: "#SIGNOZ_VALUE", Order: "ASC"}}, + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, toFloat64(count(distinct(`attribute_string_name`))) as value " + + "from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) AND `attribute_string_name_exists`=true group by " + + "ts order by value ASC", + }, + { + Name: "Test aggregate count distinct on non selected field", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + AggregateOperator: v3.AggregateOperatorCountDistinct, + Expression: "A", + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, toFloat64(count(distinct(attributes_string['name']))) as value " + + "from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) AND mapContains(attributes_string, 'name') group by ts order by value DESC", + }, + { + Name: "Test aggregate count distinct on non selected field containing dot", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "method.name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + AggregateOperator: v3.AggregateOperatorCountDistinct, + Expression: "A", + GroupBy: []v3.AttributeKey{{Key: "host.name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + OrderBy: []v3.OrderBy{{ColumnName: "host.name", Order: "ASC"}, {ColumnName: "ts", Order: "ASC", Key: "ts"}}, + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, attributes_string['host.name'] as `host.name`, " + + "toFloat64(count(distinct(attributes_string['method.name']))) as value from signoz_logs.distributed_logs_v2 " + + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) AND mapContains(attributes_string, 'host.name') " + + "AND mapContains(attributes_string, 'method.name') group by `host.name`,ts order by `host.name` ASC", + }, + { + Name: "Test aggregate count distinct on selected field containing dot", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "method.name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag, IsColumn: true}, + AggregateOperator: v3.AggregateOperatorCountDistinct, + Expression: "A", + GroupBy: []v3.AttributeKey{{Key: "host.name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag, IsColumn: true}}, + OrderBy: []v3.OrderBy{{ColumnName: "host.name", Order: "ASC"}, {ColumnName: "ts", Order: "ASC", Key: "ts", IsColumn: true}}, + }, + + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, `attribute_string_host$$name` as `host.name`, toFloat64(count(distinct(`attribute_string_method$$name`))) as value" + + " from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND `attribute_string_host$$name_exists`=true AND `attribute_string_method$$name_exists`=true " + + "group by `host.name`,ts " + + "order by `host.name` ASC", + }, + { + Name: "Test aggregate count distinct with filter and groupBy", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag, IsColumn: true}, + AggregateOperator: v3.AggregateOperatorCountDistinct, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "GET", Operator: "="}, + {Key: v3.AttributeKey{Key: "x", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeResource}, Value: "abc", Operator: "!="}, + }, + }, + GroupBy: []v3.AttributeKey{{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + OrderBy: []v3.OrderBy{{ColumnName: "method", Order: "ASC"}, {ColumnName: "ts", Order: "ASC", Key: "ts", IsColumn: true}}, + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts," + + " attributes_string['method'] as `method`, " + + "toFloat64(count(distinct(`attribute_string_name`))) as value from signoz_logs.distributed_logs_v2 " + + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND attributes_string['method'] = 'GET' AND mapContains(attributes_string, 'method') " + + "AND mapContains(attributes_string, 'method') AND `attribute_string_name_exists`=true " + + "AND (resource_fingerprint GLOBAL IN (SELECT fingerprint FROM signoz_logs.distributed_logs_v2_resource " + + "WHERE (seen_at_ts_bucket_start >= 1680064560) AND (seen_at_ts_bucket_start <= 1680066458) AND simpleJSONExtractString(lower(labels), 'x') != 'abc' " + + "AND lower(labels) not like '%x%abc%')) group by `method`,ts " + + "order by `method` ASC", + }, + { + Name: "Test aggregate count with multiple filter,groupBy and orderBy", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag, IsColumn: true}, + AggregateOperator: v3.AggregateOperatorCountDistinct, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "GET", Operator: "="}, + {Key: v3.AttributeKey{Key: "x", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeResource}, Value: "abc", Operator: "!="}, + }, + }, + GroupBy: []v3.AttributeKey{{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, {Key: "x", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeResource}}, + OrderBy: []v3.OrderBy{{ColumnName: "method", Order: "ASC"}, {ColumnName: "x", Order: "ASC"}}, + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts," + + " attributes_string['method'] as `method`, " + + "resources_string['x'] as `x`, " + + "toFloat64(count(distinct(`attribute_string_name`))) as value from signoz_logs.distributed_logs_v2 " + + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND attributes_string['method'] = 'GET' AND mapContains(attributes_string, 'method') " + + "AND mapContains(attributes_string, 'method') AND `attribute_string_name_exists`=true " + + "AND (resource_fingerprint GLOBAL IN (SELECT fingerprint FROM signoz_logs.distributed_logs_v2_resource " + + "WHERE (seen_at_ts_bucket_start >= 1680064560) AND (seen_at_ts_bucket_start <= 1680066458) AND simpleJSONExtractString(lower(labels), 'x') != 'abc' " + + "AND lower(labels) not like '%x%abc%' AND ( (simpleJSONHas(lower(labels), 'x') AND lower(labels) like '%x%') ))) group by `method`,`x`,ts order by `method` ASC,`x` ASC", + }, + { + Name: "Test aggregate avg", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "bytes", DataType: v3.AttributeKeyDataTypeFloat64, Type: v3.AttributeKeyTypeTag}, + AggregateOperator: v3.AggregateOperatorAvg, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "GET", Operator: "="}, + }, + }, + GroupBy: []v3.AttributeKey{{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + OrderBy: []v3.OrderBy{{ColumnName: "method", Order: "ASC"}, {ColumnName: "x", Order: "ASC", Key: "x", IsColumn: true}}, + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts," + + " attributes_string['method'] as `method`, " + + "avg(attributes_number['bytes']) as value " + + "from signoz_logs.distributed_logs_v2 " + + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND attributes_string['method'] = 'GET' AND mapContains(attributes_string, 'method') " + + "AND mapContains(attributes_string, 'method') " + + "AND mapContains(attributes_number, 'bytes') " + + "group by `method`,ts " + + "order by `method` ASC", + }, + { + Name: "Test aggregate sum", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "bytes", DataType: v3.AttributeKeyDataTypeFloat64, Type: v3.AttributeKeyTypeTag, IsColumn: true}, + AggregateOperator: v3.AggregateOperatorSum, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "GET", Operator: "="}, + }, + }, + GroupBy: []v3.AttributeKey{{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + OrderBy: []v3.OrderBy{{ColumnName: "method", Order: "ASC"}}, + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts," + + " attributes_string['method'] as `method`, " + + "sum(`attribute_number_bytes`) as value " + + "from signoz_logs.distributed_logs_v2 " + + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND attributes_string['method'] = 'GET' AND mapContains(attributes_string, 'method') " + + "AND mapContains(attributes_string, 'method') " + + "AND `attribute_number_bytes_exists`=true " + + "group by `method`,ts " + + "order by `method` ASC", + }, + { + Name: "Test aggregate min", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "bytes", DataType: v3.AttributeKeyDataTypeFloat64, Type: v3.AttributeKeyTypeTag, IsColumn: true}, + AggregateOperator: v3.AggregateOperatorMin, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "GET", Operator: "="}, + }, + }, + GroupBy: []v3.AttributeKey{{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + OrderBy: []v3.OrderBy{{ColumnName: "method", Order: "ASC"}}, + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts," + + " attributes_string['method'] as `method`, " + + "min(`attribute_number_bytes`) as value " + + "from signoz_logs.distributed_logs_v2 " + + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND attributes_string['method'] = 'GET' AND mapContains(attributes_string, 'method') " + + "AND mapContains(attributes_string, 'method') " + + "AND `attribute_number_bytes_exists`=true " + + "group by `method`,ts " + + "order by `method` ASC", + }, + { + Name: "Test aggregate max", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "bytes", DataType: v3.AttributeKeyDataTypeFloat64, Type: v3.AttributeKeyTypeTag, IsColumn: true}, + AggregateOperator: v3.AggregateOperatorMax, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "GET", Operator: "="}, + }, + }, + GroupBy: []v3.AttributeKey{{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + OrderBy: []v3.OrderBy{{ColumnName: "method", Order: "ASC"}}, + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts," + + " attributes_string['method'] as `method`, " + + "max(`attribute_number_bytes`) as value " + + "from signoz_logs.distributed_logs_v2 " + + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND attributes_string['method'] = 'GET' AND mapContains(attributes_string, 'method') " + + "AND mapContains(attributes_string, 'method') " + + "AND `attribute_number_bytes_exists`=true " + + "group by `method`,ts " + + "order by `method` ASC", + }, + { + Name: "Test aggregate PXX", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "bytes", DataType: v3.AttributeKeyDataTypeFloat64, Type: v3.AttributeKeyTypeTag, IsColumn: true}, + AggregateOperator: v3.AggregateOperatorP05, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{}}, + GroupBy: []v3.AttributeKey{{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + OrderBy: []v3.OrderBy{{ColumnName: "method", Order: "ASC"}}, + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts," + + " attributes_string['method'] as `method`, " + + "quantile(0.05)(`attribute_number_bytes`) as value " + + "from signoz_logs.distributed_logs_v2 " + + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND mapContains(attributes_string, 'method') " + + "AND `attribute_number_bytes_exists`=true " + + "group by `method`,ts " + + "order by `method` ASC", + }, + { + Name: "Test aggregate RateSum", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "bytes", DataType: v3.AttributeKeyDataTypeFloat64, Type: v3.AttributeKeyTypeTag, IsColumn: true}, + AggregateOperator: v3.AggregateOperatorRateSum, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{}}, + GroupBy: []v3.AttributeKey{{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + OrderBy: []v3.OrderBy{{ColumnName: "method", Order: "ASC"}}, + }, + TableName: "logs", + PreferRPM: true, + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, attributes_string['method'] as `method`" + + ", sum(`attribute_number_bytes`)/1.000000 as value from signoz_logs.distributed_logs_v2 " + + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND mapContains(attributes_string, 'method') " + + "AND `attribute_number_bytes_exists`=true " + + "group by `method`,ts order by `method` ASC", + }, + { + Name: "Test aggregate rate", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "bytes", Type: v3.AttributeKeyTypeTag, DataType: v3.AttributeKeyDataTypeFloat64}, + AggregateOperator: v3.AggregateOperatorRate, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{}}, + GroupBy: []v3.AttributeKey{{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + OrderBy: []v3.OrderBy{{ColumnName: "method", Order: "ASC"}}, + }, + TableName: "logs", + PreferRPM: false, + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, attributes_string['method'] as `method`" + + ", count(attributes_number['bytes'])/60.000000 as value " + + "from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND mapContains(attributes_string, 'method') " + + "AND mapContains(attributes_number, 'bytes') " + + "group by `method`,ts " + + "order by `method` ASC", + }, + { + Name: "Test aggregate RateSum without materialized column", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "bytes", Type: v3.AttributeKeyTypeTag, DataType: v3.AttributeKeyDataTypeFloat64}, + AggregateOperator: v3.AggregateOperatorRateSum, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{}}, + GroupBy: []v3.AttributeKey{{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + OrderBy: []v3.OrderBy{{ColumnName: "method", Order: "ASC"}}, + }, + TableName: "logs", + PreferRPM: true, + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, " + + "attributes_string['method'] as `method`, " + + "sum(attributes_number['bytes'])/1.000000 as value " + + "from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND mapContains(attributes_string, 'method') " + + "AND mapContains(attributes_number, 'bytes') " + + "group by `method`,ts " + + "order by `method` ASC", + }, + { + Name: "Test Noop", + PanelType: v3.PanelTypeList, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + SelectColumns: []v3.AttributeKey{}, + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorNoOp, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{}}, + OrderBy: []v3.OrderBy{ + v3.OrderBy{ColumnName: "timestamp", Order: "DESC"}, + }, + }, + ExpectedQuery: "SELECT timestamp, id, trace_id, span_id, trace_flags, severity_text, severity_number, body,attributes_string,attributes_number,attributes_bool,resources_string " + + "from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "order by timestamp DESC", + }, + { + Name: "Test Noop order by custom", + PanelType: v3.PanelTypeList, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + SelectColumns: []v3.AttributeKey{}, + QueryName: "A", + AggregateOperator: v3.AggregateOperatorNoOp, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{}}, + OrderBy: []v3.OrderBy{{ColumnName: "method", DataType: v3.AttributeKeyDataTypeString, Order: "ASC", IsColumn: true}}, + }, + ExpectedQuery: "SELECT timestamp, id, trace_id, span_id, trace_flags, severity_text, severity_number, body,attributes_string,attributes_number,attributes_bool,resources_string " + + "from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) order by `method` ASC", + }, + { + Name: "Test Noop with filter", + PanelType: v3.PanelTypeList, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + SelectColumns: []v3.AttributeKey{}, + QueryName: "A", + AggregateOperator: v3.AggregateOperatorNoOp, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "severity_number", DataType: v3.AttributeKeyDataTypeInt64, IsColumn: true}, Operator: "!=", Value: 0}, + }}, + }, + ExpectedQuery: "SELECT timestamp, id, trace_id, span_id, trace_flags, severity_text, severity_number, body,attributes_string,attributes_number,attributes_bool,resources_string " + + "from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " + + "AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) AND severity_number != 0 order by timestamp DESC", + }, + { + Name: "Test aggregate with having clause", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + AggregateOperator: v3.AggregateOperatorCountDistinct, + Expression: "A", + Having: []v3.Having{ + { + ColumnName: "name", + Operator: ">", + Value: 10, + }, + }, + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, toFloat64(count(distinct(attributes_string['name']))) as value " + + "from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND mapContains(attributes_string, 'name') group by ts having value > 10 order by value DESC", + }, + { + Name: "Test aggregate with having clause and filters", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + AggregateOperator: v3.AggregateOperatorCountDistinct, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "GET", Operator: "="}, + }, + }, + Having: []v3.Having{ + { + ColumnName: "name", + Operator: ">", + Value: 10, + }, + }, + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, toFloat64(count(distinct(attributes_string['name']))) as value " + + "from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND attributes_string['method'] = 'GET' AND mapContains(attributes_string, 'method') AND mapContains(attributes_string, 'name') group by ts having value > 10 order by value DESC", + }, + { + Name: "Test top level key", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + AggregateOperator: v3.AggregateOperatorCountDistinct, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "body", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeUnspecified, IsColumn: true}, Value: "%test%", Operator: "like"}, + }, + }, + Having: []v3.Having{ + { + ColumnName: "name", + Operator: ">", + Value: 10, + }, + }, + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, toFloat64(count(distinct(attributes_string['name']))) as value " + + "from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND lower(body) LIKE lower('%test%') AND mapContains(attributes_string, 'name') group by ts having value > 10 order by value DESC", + }, + { + Name: "Test attribute with same name as top level key", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + AggregateOperator: v3.AggregateOperatorCountDistinct, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "body", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "%test%", Operator: "like"}, + }, + }, + Having: []v3.Having{ + { + ColumnName: "name", + Operator: ">", + Value: 10, + }, + }, + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, toFloat64(count(distinct(attributes_string['name']))) as value " + + "from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND attributes_string['body'] LIKE '%test%' AND mapContains(attributes_string, 'body') AND mapContains(attributes_string, 'name') group by ts having value > 10 order by value DESC", + }, + + // Tests for table panel type + { + Name: "TABLE: Test count", + PanelType: v3.PanelTypeTable, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorCount, + Expression: "A", + }, + TableName: "logs", + ExpectedQuery: "SELECT now() as ts, toFloat64(count(*)) as value from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " + + "AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) order by value DESC", + }, + { + Name: "TABLE: Test count with groupBy", + PanelType: v3.PanelTypeTable, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorCount, + Expression: "A", + GroupBy: []v3.AttributeKey{ + {Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + }, + }, + TableName: "logs", + ExpectedQuery: "SELECT now() as ts, attributes_string['name'] as `name`, toFloat64(count(*)) as value from signoz_logs.distributed_logs_v2 where " + + "(timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND mapContains(attributes_string, 'name') group by `name` order by value DESC", + }, + { + Name: "TABLE: Test rate with groupBy", + PanelType: v3.PanelTypeTable, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorRate, + Expression: "A", + GroupBy: []v3.AttributeKey{ + {Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + }, + }, + TableName: "logs", + ExpectedQuery: "SELECT now() as ts, attributes_string['name'] as `name`, count()/97.000000 as value from signoz_logs.distributed_logs_v2 " + + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND mapContains(attributes_string, 'name') group by `name` order by value DESC", + }, + { + Name: "TABLE: Test count with groupBy, orderBy", + PanelType: v3.PanelTypeTable, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorCount, + Expression: "A", + GroupBy: []v3.AttributeKey{ + {Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + }, + OrderBy: []v3.OrderBy{ + {ColumnName: "name", Order: "DESC"}, + }, + }, + TableName: "logs", + ExpectedQuery: "SELECT now() as ts, attributes_string['name'] as `name`, toFloat64(count(*)) as value from signoz_logs.distributed_logs_v2 " + + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND mapContains(attributes_string, 'name') group by `name` order by `name` DESC", + }, + { + Name: "TABLE: Test count with JSON Filter, groupBy, orderBy", + PanelType: v3.PanelTypeTable, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorCount, + Expression: "A", + Filters: &v3.FilterSet{ + Operator: "AND", + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{ + Key: "body.message", + DataType: "string", + IsJSON: true, + }, + Operator: "contains", + Value: "a", + }, + }, + }, + GroupBy: []v3.AttributeKey{ + {Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + }, + OrderBy: []v3.OrderBy{ + {ColumnName: "name", Order: "DESC"}, + }, + }, + TableName: "logs", + ExpectedQuery: "SELECT now() as ts, attributes_string['name'] as `name`, toFloat64(count(*)) as value from signoz_logs.distributed_logs_v2 " + + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND lower(body) like lower('%message%') AND JSON_EXISTS(body, '$.\"message\"') AND JSON_VALUE(body, '$.\"message\"') LIKE '%a%' " + + "AND mapContains(attributes_string, 'name') group by `name` order by `name` DESC", + }, + { + Name: "TABLE: Test count with JSON Filter Array, groupBy, orderBy", + PanelType: v3.PanelTypeTable, + Start: 1680066360726210000, + End: 1680066458000000000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorCount, + Expression: "A", + Filters: &v3.FilterSet{ + Operator: "AND", + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{ + Key: "body.requestor_list[*]", + DataType: "array(string)", + IsJSON: true, + }, + Operator: "has", + Value: "index_service", + }, + }, + }, + GroupBy: []v3.AttributeKey{ + {Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + }, + OrderBy: []v3.OrderBy{ + {ColumnName: "name", Order: "DESC"}, + }, + }, + TableName: "logs", + ExpectedQuery: "SELECT now() as ts, attributes_string['name'] as `name`, toFloat64(count(*)) as value from signoz_logs.distributed_logs_v2 " + + "where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND lower(body) like lower('%requestor_list%') AND lower(body) like lower('%index_service%') AND " + + "has(JSONExtract(JSON_QUERY(body, '$.\"requestor_list\"[*]'), 'Array(String)'), 'index_service') AND mapContains(attributes_string, 'name') " + + "group by `name` order by `name` DESC", + }, +} + +func TestBuildLogsQuery(t *testing.T) { + for _, tt := range testBuildLogsQueryData { + Convey("TestBuildLogsQuery", t, func() { + query, err := buildLogsQuery(tt.PanelType, tt.Start, tt.End, tt.BuilderQuery.StepInterval, tt.BuilderQuery, "", tt.PreferRPM) + So(err, ShouldBeNil) + So(query, ShouldEqual, tt.ExpectedQuery) + + }) + } +} + +var testOrderBy = []struct { + Name string + PanelType v3.PanelType + Items []v3.OrderBy + Tags []v3.AttributeKey + Result string +}{ + { + Name: "Test 1", + PanelType: v3.PanelTypeGraph, + Items: []v3.OrderBy{ + { + ColumnName: "name", + Order: "asc", + }, + { + ColumnName: constants.SigNozOrderByValue, + Order: "desc", + }, + }, + Tags: []v3.AttributeKey{ + {Key: "name"}, + }, + Result: "`name` asc,value desc", + }, + { + Name: "Test 2", + PanelType: v3.PanelTypeGraph, + Items: []v3.OrderBy{ + { + ColumnName: "name", + Order: "asc", + }, + { + ColumnName: "bytes", + Order: "asc", + }, + }, + Tags: []v3.AttributeKey{ + {Key: "name"}, + {Key: "bytes"}, + }, + Result: "`name` asc,`bytes` asc", + }, + { + Name: "Test Graph item not present in tag", + PanelType: v3.PanelTypeGraph, + Items: []v3.OrderBy{ + { + ColumnName: "name", + Order: "asc", + }, + { + ColumnName: "bytes", + Order: "asc", + }, + { + ColumnName: "method", + Order: "asc", + }, + }, + Tags: []v3.AttributeKey{ + {Key: "name"}, + {Key: "bytes"}, + }, + Result: "`name` asc,`bytes` asc", + }, + { + Name: "Test 3", + PanelType: v3.PanelTypeList, + Items: []v3.OrderBy{ + { + ColumnName: "name", + Order: "asc", + }, + { + ColumnName: constants.SigNozOrderByValue, + Order: "asc", + }, + { + ColumnName: "bytes", + Order: "asc", + }, + }, + Tags: []v3.AttributeKey{ + {Key: "name"}, + {Key: "bytes"}, + }, + Result: "`name` asc,value asc,`bytes` asc", + }, + { + Name: "Test 4", + PanelType: v3.PanelTypeList, + Items: []v3.OrderBy{ + { + ColumnName: "name", + Order: "asc", + }, + { + ColumnName: constants.SigNozOrderByValue, + Order: "asc", + }, + { + ColumnName: "bytes", + Order: "asc", + }, + { + ColumnName: "response_time", + Order: "desc", + Key: "response_time", + Type: v3.AttributeKeyTypeTag, + DataType: v3.AttributeKeyDataTypeString, + }, + }, + Tags: []v3.AttributeKey{ + {Key: "name"}, + {Key: "bytes"}, + }, + Result: "`name` asc,value asc,`bytes` asc,attributes_string['response_time'] desc", + }, +} + +func TestOrderBy(t *testing.T) { + for _, tt := range testOrderBy { + Convey("testOrderBy", t, func() { + res := orderByAttributeKeyTags(tt.PanelType, tt.Items, tt.Tags) + So(res, ShouldResemble, tt.Result) + }) + } +} + +var testPrepLogsQueryData = []struct { + Name string + PanelType v3.PanelType + Start int64 + End int64 + Step int64 + BuilderQuery *v3.BuilderQuery + GroupByTags []v3.AttributeKey + TableName string + AggregateOperator v3.AggregateOperator + ExpectedQuery string + Options v3.LogQBOptions +}{ + { + Name: "Test TS with limit- first", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726, + End: 1680066458000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + AggregateOperator: v3.AggregateOperatorCountDistinct, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "GET", Operator: "="}, + }, + }, + Limit: 10, + GroupBy: []v3.AttributeKey{{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + }, + TableName: "logs", + ExpectedQuery: "SELECT `method` from (SELECT attributes_string['method'] as `method`, toFloat64(count(distinct(attributes_string['name']))) " + + "as value from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND attributes_string['method'] = 'GET' AND mapContains(attributes_string, 'method') AND mapContains(attributes_string, 'method') AND mapContains(attributes_string, 'name') group by `method` order by value DESC) LIMIT 10", + Options: v3.LogQBOptions{GraphLimitQtype: constants.FirstQueryGraphLimit, PreferRPM: true}, + }, + { + Name: "Test TS with limit- first - with order by value", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726, + End: 1680066458000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + AggregateOperator: v3.AggregateOperatorCountDistinct, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "GET", Operator: "="}, + }, + }, + Limit: 10, + GroupBy: []v3.AttributeKey{{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + OrderBy: []v3.OrderBy{{ColumnName: constants.SigNozOrderByValue, Order: "ASC"}}, + }, + TableName: "logs", + ExpectedQuery: "SELECT `method` from (SELECT attributes_string['method'] as `method`, toFloat64(count(distinct(attributes_string['name']))) " + + "as value from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND attributes_string['method'] = 'GET' AND mapContains(attributes_string, 'method') AND mapContains(attributes_string, 'method') AND mapContains(attributes_string, 'name') group by `method` order by value ASC) LIMIT 10", + Options: v3.LogQBOptions{GraphLimitQtype: constants.FirstQueryGraphLimit, PreferRPM: true}, + }, + { + Name: "Test TS with limit- first - with order by attribute", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726, + End: 1680066458000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + AggregateOperator: v3.AggregateOperatorCountDistinct, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "GET", Operator: "="}, + }, + }, + Limit: 10, + GroupBy: []v3.AttributeKey{{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + OrderBy: []v3.OrderBy{{ColumnName: "method", Order: "ASC"}}, + }, + TableName: "logs", + ExpectedQuery: "SELECT `method` from (SELECT attributes_string['method'] as `method`, toFloat64(count(distinct(attributes_string['name']))) " + + "as value from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND attributes_string['method'] = 'GET' AND mapContains(attributes_string, 'method') AND mapContains(attributes_string, 'method') AND mapContains(attributes_string, 'name') group by `method` order by `method` ASC) LIMIT 10", + Options: v3.LogQBOptions{GraphLimitQtype: constants.FirstQueryGraphLimit, PreferRPM: true}, + }, + { + Name: "Test TS with limit- second", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726, + End: 1680066458000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + AggregateOperator: v3.AggregateOperatorCountDistinct, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "GET", Operator: "="}, + }, + }, + GroupBy: []v3.AttributeKey{{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + Limit: 2, + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, attributes_string['method'] as `method`, " + + "toFloat64(count(distinct(attributes_string['name']))) as value from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) " + + "AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) AND attributes_string['method'] = 'GET' AND mapContains(attributes_string, 'method') AND mapContains(attributes_string, 'method') " + + "AND mapContains(attributes_string, 'name') AND (`method`) GLOBAL IN (#LIMIT_PLACEHOLDER) group by `method`,ts order by value DESC", + Options: v3.LogQBOptions{GraphLimitQtype: constants.SecondQueryGraphLimit}, + }, + { + Name: "Test TS with limit- second - with order by", + PanelType: v3.PanelTypeGraph, + Start: 1680066360726, + End: 1680066458000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateAttribute: v3.AttributeKey{Key: "name", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, + AggregateOperator: v3.AggregateOperatorCountDistinct, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "GET", Operator: "="}, + }, + }, + GroupBy: []v3.AttributeKey{{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + OrderBy: []v3.OrderBy{{ColumnName: "method", Order: "ASC"}}, + Limit: 2, + }, + TableName: "logs", + ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, attributes_string['method'] as `method`, " + + "toFloat64(count(distinct(attributes_string['name']))) as value from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) " + + "AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) AND attributes_string['method'] = 'GET' AND mapContains(attributes_string, 'method') AND mapContains(attributes_string, 'method') " + + "AND mapContains(attributes_string, 'name') AND (`method`) GLOBAL IN (#LIMIT_PLACEHOLDER) group by `method`,ts order by `method` ASC", + Options: v3.LogQBOptions{GraphLimitQtype: constants.SecondQueryGraphLimit}, + }, + // Live tail + { + Name: "Live Tail Query", + PanelType: v3.PanelTypeList, + Start: 1680066360726, + End: 1680066458000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorNoOp, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "GET", Operator: "="}, + }, + }, + }, + TableName: "logs", + ExpectedQuery: "SELECT timestamp, id, trace_id, span_id, trace_flags, severity_text, severity_number, body,attributes_string,attributes_number,attributes_bool,resources_string " + + "from signoz_logs.distributed_logs_v2 where attributes_string['method'] = 'GET' AND mapContains(attributes_string, 'method') AND ", + Options: v3.LogQBOptions{IsLivetailQuery: true}, + }, + { + Name: "Live Tail Query with contains", + PanelType: v3.PanelTypeList, + Start: 1680066360726, + End: 1680066458000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorNoOp, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "GET", Operator: "contains"}, + }, + }, + }, + TableName: "logs", + ExpectedQuery: "SELECT timestamp, id, trace_id, span_id, trace_flags, severity_text, severity_number, body,attributes_string,attributes_number,attributes_bool,resources_string " + + "from signoz_logs.distributed_logs_v2 where attributes_string['method'] LIKE '%GET%' AND mapContains(attributes_string, 'method') AND ", + Options: v3.LogQBOptions{IsLivetailQuery: true}, + }, + { + Name: "Live Tail Query W/O filter", + PanelType: v3.PanelTypeList, + Start: 1680066360726, + End: 1680066458000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorNoOp, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{}}, + }, + TableName: "logs", + ExpectedQuery: "SELECT timestamp, id, trace_id, span_id, trace_flags, severity_text, severity_number, body,attributes_string,attributes_number,attributes_bool,resources_string " + + "from signoz_logs.distributed_logs_v2 where ", + Options: v3.LogQBOptions{IsLivetailQuery: true}, + }, + { + Name: "Table query w/o limit", + PanelType: v3.PanelTypeTable, + Start: 1680066360726, + End: 1680066458000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorCount, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{}}, + }, + TableName: "logs", + ExpectedQuery: "SELECT now() as ts, toFloat64(count(*)) as value from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) order by value DESC", + Options: v3.LogQBOptions{}, + }, + { + Name: "Table query with limit", + PanelType: v3.PanelTypeTable, + Start: 1680066360726, + End: 1680066458000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorCount, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{}}, + Limit: 10, + }, + TableName: "logs", + ExpectedQuery: "SELECT now() as ts, toFloat64(count(*)) as value from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) order by value DESC LIMIT 10", + Options: v3.LogQBOptions{}, + }, + { + Name: "Ignore offset if order by is timestamp in list queries", + PanelType: v3.PanelTypeList, + Start: 1680066360726, + End: 1680066458000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorNoOp, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "id", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeUnspecified, IsColumn: true}, Value: "logid", Operator: "<"}, + }, + }, + OrderBy: []v3.OrderBy{ + { + ColumnName: "timestamp", + Order: "DESC", + }, + }, + Offset: 100, + PageSize: 100, + }, + TableName: "logs", + ExpectedQuery: "SELECT timestamp, id, trace_id, span_id, trace_flags, severity_text, severity_number, body,attributes_string,attributes_number,attributes_bool,resources_string " + + "from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND id < 'logid' order by timestamp DESC LIMIT 100", + }, + { + Name: "Don't ignore offset if order by is not timestamp", + PanelType: v3.PanelTypeList, + Start: 1680066360726, + End: 1680066458000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorNoOp, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "GET", Operator: "="}, + }, + }, + OrderBy: []v3.OrderBy{ + { + ColumnName: "mycolumn", + Order: "DESC", + }, + }, + Offset: 100, + PageSize: 100, + }, + TableName: "logs", + ExpectedQuery: "SELECT timestamp, id, trace_id, span_id, trace_flags, severity_text, severity_number, body,attributes_string,attributes_number,attributes_bool,resources_string " + + "from signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND " + + "ts_bucket_start <= 1680066458) AND attributes_string['method'] = 'GET' AND mapContains(attributes_string, 'method') order by resources_string['mycolumn'] DESC LIMIT 100 OFFSET 100", + }, +} + +func TestPrepareLogsQuery(t *testing.T) { + for _, tt := range testPrepLogsQueryData { + Convey("TestBuildLogsQuery", t, func() { + query, err := PrepareLogsQuery(tt.Start, tt.End, "", tt.PanelType, tt.BuilderQuery, tt.Options) + So(err, ShouldBeNil) + So(query, ShouldEqual, tt.ExpectedQuery) + + }) + } +} + +var testPrepLogsQueryLimitOffsetData = []struct { + Name string + PanelType v3.PanelType + Start int64 + End int64 + Step int64 + BuilderQuery *v3.BuilderQuery + GroupByTags []v3.AttributeKey + TableName string + AggregateOperator v3.AggregateOperator + ExpectedQuery string + Options v3.LogQBOptions +}{ + { + Name: "Test limit less than pageSize - order by ts", + PanelType: v3.PanelTypeList, + Start: 1680066360726, + End: 1680066458000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorNoOp, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{}}, + OrderBy: []v3.OrderBy{{ColumnName: constants.TIMESTAMP, Order: "desc", Key: constants.TIMESTAMP, DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeUnspecified, IsColumn: true}}, + Limit: 1, + Offset: 0, + PageSize: 5, + }, + TableName: "logs", + ExpectedQuery: "SELECT timestamp, id, trace_id, span_id, trace_flags, severity_text, severity_number, body,attributes_string,attributes_number,attributes_bool,resources_string from " + + "signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "order by `timestamp` desc LIMIT 1", + }, + { + Name: "Test limit greater than pageSize - order by ts", + PanelType: v3.PanelTypeList, + Start: 1680066360726, + End: 1680066458000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorNoOp, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "id", Type: v3.AttributeKeyTypeUnspecified, DataType: v3.AttributeKeyDataTypeString, IsColumn: true}, Operator: v3.FilterOperatorLessThan, Value: "2TNh4vp2TpiWyLt3SzuadLJF2s4"}, + }}, + OrderBy: []v3.OrderBy{{ColumnName: constants.TIMESTAMP, Order: "desc", Key: constants.TIMESTAMP, DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeUnspecified, IsColumn: true}}, + Limit: 100, + Offset: 10, + PageSize: 10, + }, + TableName: "logs", + ExpectedQuery: "SELECT timestamp, id, trace_id, span_id, trace_flags, severity_text, severity_number, body,attributes_string,attributes_number,attributes_bool,resources_string from " + + "signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "AND id < '2TNh4vp2TpiWyLt3SzuadLJF2s4' order by `timestamp` desc LIMIT 10", + }, + { + Name: "Test limit less than pageSize - order by custom", + PanelType: v3.PanelTypeList, + Start: 1680066360726, + End: 1680066458000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorNoOp, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{}}, + OrderBy: []v3.OrderBy{{ColumnName: "method", Order: "desc", Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + Limit: 1, + Offset: 0, + PageSize: 5, + }, + TableName: "logs", + ExpectedQuery: "SELECT timestamp, id, trace_id, span_id, trace_flags, severity_text, severity_number, body,attributes_string,attributes_number,attributes_bool,resources_string from " + + "signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) " + + "order by attributes_string['method'] desc LIMIT 1 OFFSET 0", + }, + { + Name: "Test limit greater than pageSize - order by custom", + PanelType: v3.PanelTypeList, + Start: 1680066360726, + End: 1680066458000, + BuilderQuery: &v3.BuilderQuery{ + QueryName: "A", + StepInterval: 60, + AggregateOperator: v3.AggregateOperatorNoOp, + Expression: "A", + Filters: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{ + {Key: v3.AttributeKey{Key: "id", Type: v3.AttributeKeyTypeUnspecified, DataType: v3.AttributeKeyDataTypeString, IsColumn: true}, Operator: v3.FilterOperatorLessThan, Value: "2TNh4vp2TpiWyLt3SzuadLJF2s4"}, + }}, + OrderBy: []v3.OrderBy{{ColumnName: "method", Order: "desc", Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}}, + Limit: 100, + Offset: 50, + PageSize: 50, + }, + TableName: "logs", + ExpectedQuery: "SELECT timestamp, id, trace_id, span_id, trace_flags, severity_text, severity_number, body,attributes_string,attributes_number,attributes_bool,resources_string from " + + "signoz_logs.distributed_logs_v2 where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND (ts_bucket_start >= 1680064560 AND ts_bucket_start <= 1680066458) AND " + + "id < '2TNh4vp2TpiWyLt3SzuadLJF2s4' order by attributes_string['method'] desc LIMIT 50 OFFSET 50", + }, +} + +func TestPrepareLogsQueryLimitOffset(t *testing.T) { + for _, tt := range testPrepLogsQueryLimitOffsetData { + Convey("TestBuildLogsQuery", t, func() { + query, err := PrepareLogsQuery(tt.Start, tt.End, "", tt.PanelType, tt.BuilderQuery, tt.Options) + So(err, ShouldBeNil) + So(query, ShouldEqual, tt.ExpectedQuery) + + }) + } +} diff --git a/pkg/query-service/app/logs/v4/resource_query_builder.go b/pkg/query-service/app/logs/v4/resource_query_builder.go new file mode 100644 index 0000000000..004c9269fb --- /dev/null +++ b/pkg/query-service/app/logs/v4/resource_query_builder.go @@ -0,0 +1,201 @@ +package v4 + +import ( + "fmt" + "strings" + + v3 "go.signoz.io/signoz/pkg/query-service/model/v3" + "go.signoz.io/signoz/pkg/query-service/utils" +) + +// buildResourceFilter builds a clickhouse filter string for resource labels +func buildResourceFilter(logsOp string, key string, op v3.FilterOperator, value interface{}) string { + searchKey := fmt.Sprintf("simpleJSONExtractString(labels, '%s')", key) + + chFmtVal := utils.ClickHouseFormattedValue(value) + + switch op { + case v3.FilterOperatorExists: + return fmt.Sprintf("simpleJSONHas(labels, '%s')", key) + case v3.FilterOperatorNotExists: + return fmt.Sprintf("not simpleJSONHas(labels, '%s')", key) + case v3.FilterOperatorRegex, v3.FilterOperatorNotRegex: + return fmt.Sprintf(logsOp, searchKey, chFmtVal) + case v3.FilterOperatorContains, v3.FilterOperatorNotContains: + // this is required as clickhouseFormattedValue add's quotes to the string + escapedStringValue := utils.QuoteEscapedStringForContains(fmt.Sprintf("%s", value)) + return fmt.Sprintf("%s %s '%%%s%%'", searchKey, logsOp, escapedStringValue) + default: + return fmt.Sprintf("%s %s %s", searchKey, logsOp, chFmtVal) + } +} + +// buildIndexFilterForInOperator builds a clickhouse filter string for in operator +// example:= x in a,b,c = (labels like '%x%a%' or labels like '%"x":"b"%' or labels like '%"x"="c"%') +// example:= x nin a,b,c = (labels nlike '%x%a%' AND labels nlike '%"x"="b"' AND labels nlike '%"x"="c"%') +func buildIndexFilterForInOperator(key string, op v3.FilterOperator, value interface{}) string { + conditions := []string{} + separator := " OR " + sqlOp := "like" + if op == v3.FilterOperatorNotIn { + separator = " AND " + sqlOp = "not like" + } + + // values is a slice of strings, we need to convert value to this type + // value can be string or []interface{} + values := []string{} + switch value.(type) { + case string: + values = append(values, value.(string)) + case []interface{}: + for _, v := range (value).([]interface{}) { + // also resources attributes are always string values + strV, ok := v.(string) + if !ok { + continue + } + values = append(values, strV) + } + } + + // if there are no values to filter on, return an empty string + if len(values) > 0 { + for _, v := range values { + value := utils.QuoteEscapedStringForContains(v) + conditions = append(conditions, fmt.Sprintf("labels %s '%%\"%s\":\"%s\"%%'", sqlOp, key, value)) + } + return "(" + strings.Join(conditions, separator) + ")" + } + return "" +} + +// buildResourceIndexFilter builds a clickhouse filter string for resource labels +// example:= x like '%john%' = labels like '%x%john%' +func buildResourceIndexFilter(key string, op v3.FilterOperator, value interface{}) string { + // not using clickhouseFormattedValue as we don't wan't the quotes + formattedValueEscaped := utils.QuoteEscapedStringForContains(fmt.Sprintf("%s", value)) + + // add index filters + switch op { + case v3.FilterOperatorContains, v3.FilterOperatorEqual, v3.FilterOperatorLike: + return fmt.Sprintf("labels like '%%%s%%%s%%'", key, formattedValueEscaped) + case v3.FilterOperatorNotContains, v3.FilterOperatorNotEqual, v3.FilterOperatorNotLike: + return fmt.Sprintf("labels not like '%%%s%%%s%%'", key, formattedValueEscaped) + case v3.FilterOperatorNotRegex: + return fmt.Sprintf("labels not like '%%%s%%'", key) + case v3.FilterOperatorIn, v3.FilterOperatorNotIn: + return buildIndexFilterForInOperator(key, op, value) + default: + return fmt.Sprintf("labels like '%%%s%%'", key) + } +} + +// buildResourceFiltersFromFilterItems builds a list of clickhouse filter strings for resource labels from a FilterSet. +// It skips any filter items that are not resource attributes and checks that the operator is supported and the data type is correct. +func buildResourceFiltersFromFilterItems(fs *v3.FilterSet) ([]string, error) { + var conditions []string + if fs == nil || len(fs.Items) == 0 { + return nil, nil + } + for _, item := range fs.Items { + // skip anything other than resource attribute + if item.Key.Type != v3.AttributeKeyTypeResource { + continue + } + + // since out map is in lower case we are converting it to lowercase + operatorLower := strings.ToLower(string(item.Operator)) + op := v3.FilterOperator(operatorLower) + keyName := item.Key.Key + + // resource filter value data type will always be string + // will be an interface if the operator is IN or NOT IN + if item.Key.DataType != v3.AttributeKeyDataTypeString && + (op != v3.FilterOperatorIn && op != v3.FilterOperatorNotIn) { + return nil, fmt.Errorf("invalid data type for resource attribute: %s", item.Key.Key) + } + + var value interface{} + var err error + if op != v3.FilterOperatorExists && op != v3.FilterOperatorNotExists { + // make sure to cast the value regardless of the actual type + value, err = utils.ValidateAndCastValue(item.Value, item.Key.DataType) + if err != nil { + return nil, fmt.Errorf("failed to validate and cast value for %s: %v", item.Key.Key, err) + } + } + + if logsOp, ok := logOperators[op]; ok { + // the filter + if resourceFilter := buildResourceFilter(logsOp, keyName, op, value); resourceFilter != "" { + conditions = append(conditions, resourceFilter) + } + // the additional filter for better usage of the index + if resourceIndexFilter := buildResourceIndexFilter(keyName, op, value); resourceIndexFilter != "" { + conditions = append(conditions, resourceIndexFilter) + } + } else { + return nil, fmt.Errorf("unsupported operator: %s", op) + } + + } + + return conditions, nil +} + +func buildResourceFiltersFromGroupBy(groupBy []v3.AttributeKey) []string { + var conditions []string + + for _, attr := range groupBy { + if attr.Type != v3.AttributeKeyTypeResource { + continue + } + conditions = append(conditions, fmt.Sprintf("(simpleJSONHas(labels, '%s') AND labels like '%%%s%%')", attr.Key, attr.Key)) + } + return conditions +} + +func buildResourceFiltersFromAggregateAttribute(aggregateAttribute v3.AttributeKey) string { + if aggregateAttribute.Key != "" && aggregateAttribute.Type == v3.AttributeKeyTypeResource { + return fmt.Sprintf("(simpleJSONHas(labels, '%s') AND labels like '%%%s%%')", aggregateAttribute.Key, aggregateAttribute.Key) + } + + return "" +} + +func buildResourceSubQuery(bucketStart, bucketEnd int64, fs *v3.FilterSet, groupBy []v3.AttributeKey, aggregateAttribute v3.AttributeKey) (string, error) { + + // BUILD THE WHERE CLAUSE + var conditions []string + // only add the resource attributes to the filters here + rs, err := buildResourceFiltersFromFilterItems(fs) + if err != nil { + return "", err + } + conditions = append(conditions, rs...) + + // for aggregate attribute add exists check in resources + aggregateAttributeResourceFilter := buildResourceFiltersFromAggregateAttribute(aggregateAttribute) + if aggregateAttributeResourceFilter != "" { + conditions = append(conditions, aggregateAttributeResourceFilter) + } + + groupByResourceFilters := buildResourceFiltersFromGroupBy(groupBy) + if len(groupByResourceFilters) > 0 { + // TODO: change AND to OR once we know how to solve for group by ( i.e show values if one is not present) + groupByStr := "( " + strings.Join(groupByResourceFilters, " AND ") + " )" + conditions = append(conditions, groupByStr) + } + if len(conditions) == 0 { + return "", nil + } + conditionStr := strings.Join(conditions, " AND ") + + // BUILD THE FINAL QUERY + query := fmt.Sprintf("SELECT fingerprint FROM signoz_logs.%s WHERE (seen_at_ts_bucket_start >= %d) AND (seen_at_ts_bucket_start <= %d) AND ", DISTRIBUTED_LOGS_V2_RESOURCE, bucketStart, bucketEnd) + + query = "(" + query + conditionStr + ")" + + return query, nil +} diff --git a/pkg/query-service/app/logs/v4/resource_query_builder_test.go b/pkg/query-service/app/logs/v4/resource_query_builder_test.go new file mode 100644 index 0000000000..17a49926f3 --- /dev/null +++ b/pkg/query-service/app/logs/v4/resource_query_builder_test.go @@ -0,0 +1,482 @@ +package v4 + +import ( + "reflect" + "testing" + + v3 "go.signoz.io/signoz/pkg/query-service/model/v3" +) + +func Test_buildResourceFilter(t *testing.T) { + type args struct { + logsOp string + key string + op v3.FilterOperator + value interface{} + } + tests := []struct { + name string + args args + want string + }{ + { + name: "test exists", + args: args{ + key: "service.name", + op: v3.FilterOperatorExists, + }, + want: `simpleJSONHas(labels, 'service.name')`, + }, + { + name: "test nexists", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotExists, + }, + want: `not simpleJSONHas(labels, 'service.name')`, + }, + { + name: "test regex", + args: args{ + logsOp: "match(%s, %s)", + key: "service.name", + op: v3.FilterOperatorRegex, + value: ".*", + }, + want: `match(simpleJSONExtractString(labels, 'service.name'), '.*')`, + }, + { + name: "test contains", + args: args{ + logsOp: "LIKE", + key: "service.name", + op: v3.FilterOperatorContains, + value: "application%_", + }, + want: `simpleJSONExtractString(labels, 'service.name') LIKE '%application\%\_%'`, + }, + { + name: "test eq", + args: args{ + logsOp: "=", + key: "service.name", + op: v3.FilterOperatorEqual, + value: "Application", + }, + want: `simpleJSONExtractString(labels, 'service.name') = 'Application'`, + }, + { + name: "test value with quotes", + args: args{ + logsOp: "=", + key: "service.name", + op: v3.FilterOperatorEqual, + value: "Application's", + }, + want: `simpleJSONExtractString(labels, 'service.name') = 'Application\'s'`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := buildResourceFilter(tt.args.logsOp, tt.args.key, tt.args.op, tt.args.value); got != tt.want { + t.Errorf("buildResourceFilter() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_buildIndexFilterForInOperator(t *testing.T) { + type args struct { + key string + op v3.FilterOperator + value interface{} + } + tests := []struct { + name string + args args + want string + }{ + { + name: "test in array", + args: args{ + key: "service.name", + op: v3.FilterOperatorIn, + value: []interface{}{"Application", "Test"}, + }, + want: `(labels like '%"service.name":"Application"%' OR labels like '%"service.name":"Test"%')`, + }, + { + name: "test nin array", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotIn, + value: []interface{}{"Application", "Test"}, + }, + want: `(labels not like '%"service.name":"Application"%' AND labels not like '%"service.name":"Test"%')`, + }, + { + name: "test in string", + args: args{ + key: "service.name", + op: v3.FilterOperatorIn, + value: "application", + }, + want: `(labels like '%"service.name":"application"%')`, + }, + { + name: "test nin string", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotIn, + value: "application'\"_s", + }, + want: `(labels not like '%"service.name":"application\'"\_s"%')`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := buildIndexFilterForInOperator(tt.args.key, tt.args.op, tt.args.value); got != tt.want { + t.Errorf("buildIndexFilterForInOperator() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_buildResourceIndexFilter(t *testing.T) { + type args struct { + key string + op v3.FilterOperator + value interface{} + } + tests := []struct { + name string + args args + want string + }{ + { + name: "test contains", + args: args{ + key: "service.name", + op: v3.FilterOperatorContains, + value: "application", + }, + want: `labels like '%service.name%application%'`, + }, + { + name: "test not contains", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotContains, + value: "application", + }, + want: `labels not like '%service.name%application%'`, + }, + { + name: "test contains with % and _", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotContains, + value: "application%_test", + }, + want: `labels not like '%service.name%application\%\_test%'`, + }, + { + name: "test not regex", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotRegex, + value: ".*", + }, + want: `labels not like '%service.name%'`, + }, + { + name: "test in", + args: args{ + key: "service.name", + op: v3.FilterOperatorNotIn, + value: []interface{}{"Application", "Test"}, + }, + want: `(labels not like '%"service.name":"Application"%' AND labels not like '%"service.name":"Test"%')`, + }, + { + name: "test eq", + args: args{ + key: "service.name", + op: v3.FilterOperatorEqual, + value: "Application", + }, + want: `labels like '%service.name%Application%'`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := buildResourceIndexFilter(tt.args.key, tt.args.op, tt.args.value); got != tt.want { + t.Errorf("buildResourceIndexFilter() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_buildResourceFiltersFromFilterItems(t *testing.T) { + type args struct { + fs *v3.FilterSet + } + tests := []struct { + name string + args args + want []string + wantErr bool + }{ + { + name: "ignore attribute", + args: args{ + fs: &v3.FilterSet{ + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{ + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeTag, + }, + Operator: v3.FilterOperatorEqual, + Value: "test", + }, + }, + }, + }, + want: nil, + wantErr: false, + }, + { + name: "build filter", + args: args{ + fs: &v3.FilterSet{ + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{ + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + Operator: v3.FilterOperatorEqual, + Value: "test", + }, + }, + }, + }, + want: []string{ + "simpleJSONExtractString(labels, 'service.name') = 'test'", + "labels like '%service.name%test%'", + }, + wantErr: false, + }, + { + name: "build filter with multiple items", + args: args{ + fs: &v3.FilterSet{ + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{ + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + Operator: v3.FilterOperatorEqual, + Value: "test", + }, + { + Key: v3.AttributeKey{ + Key: "namespace", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + Operator: v3.FilterOperatorContains, + Value: "test1", + }, + }, + }, + }, + want: []string{ + "simpleJSONExtractString(labels, 'service.name') = 'test'", + "labels like '%service.name%test%'", + "simpleJSONExtractString(labels, 'namespace') LIKE '%test1%'", + "labels like '%namespace%test1%'", + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := buildResourceFiltersFromFilterItems(tt.args.fs) + if (err != nil) != tt.wantErr { + t.Errorf("buildResourceFiltersFromFilterItems() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("buildResourceFiltersFromFilterItems() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_buildResourceFiltersFromGroupBy(t *testing.T) { + type args struct { + groupBy []v3.AttributeKey + } + tests := []struct { + name string + args args + want []string + }{ + { + name: "build filter", + args: args{ + groupBy: []v3.AttributeKey{ + { + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + }, + }, + want: []string{ + "(simpleJSONHas(labels, 'service.name') AND labels like '%service.name%')", + }, + }, + { + name: "build filter multiple group by", + args: args{ + groupBy: []v3.AttributeKey{ + { + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + { + Key: "namespace", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + }, + }, + want: []string{ + "(simpleJSONHas(labels, 'service.name') AND labels like '%service.name%')", + "(simpleJSONHas(labels, 'namespace') AND labels like '%namespace%')", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := buildResourceFiltersFromGroupBy(tt.args.groupBy); !reflect.DeepEqual(got, tt.want) { + t.Errorf("buildResourceFiltersFromGroupBy() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_buildResourceFiltersFromAggregateAttribute(t *testing.T) { + type args struct { + aggregateAttribute v3.AttributeKey + } + tests := []struct { + name string + args args + want string + }{ + { + name: "build filter", + args: args{ + aggregateAttribute: v3.AttributeKey{ + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + }, + want: "(simpleJSONHas(labels, 'service.name') AND labels like '%service.name%')", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := buildResourceFiltersFromAggregateAttribute(tt.args.aggregateAttribute); got != tt.want { + t.Errorf("buildResourceFiltersFromAggregateAttribute() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_buildResourceSubQuery(t *testing.T) { + type args struct { + bucketStart int64 + bucketEnd int64 + fs *v3.FilterSet + groupBy []v3.AttributeKey + aggregateAttribute v3.AttributeKey + } + tests := []struct { + name string + args args + want string + wantErr bool + }{ + { + name: "build sub query", + args: args{ + bucketStart: 1680064560, + bucketEnd: 1680066458, + fs: &v3.FilterSet{ + Items: []v3.FilterItem{ + { + Key: v3.AttributeKey{ + Key: "service.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + Operator: v3.FilterOperatorEqual, + Value: "test", + }, + { + Key: v3.AttributeKey{ + Key: "namespace", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + Operator: v3.FilterOperatorContains, + Value: "test1", + }, + }, + }, + groupBy: []v3.AttributeKey{ + { + Key: "host.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + }, + aggregateAttribute: v3.AttributeKey{ + Key: "cluster.name", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeResource, + }, + }, + want: "(SELECT fingerprint FROM signoz_logs.distributed_logs_v2_resource WHERE " + + "(seen_at_ts_bucket_start >= 1680064560) AND (seen_at_ts_bucket_start <= 1680066458) AND " + + "simpleJSONExtractString(labels, 'service.name') = 'test' AND labels like '%service.name%test%' " + + "AND simpleJSONExtractString(labels, 'namespace') LIKE '%test1%' AND labels like '%namespace%test1%' " + + "AND (simpleJSONHas(labels, 'cluster.name') AND labels like '%cluster.name%') AND " + + "( (simpleJSONHas(labels, 'host.name') AND labels like '%host.name%') ))", + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := buildResourceSubQuery(tt.args.bucketStart, tt.args.bucketEnd, tt.args.fs, tt.args.groupBy, tt.args.aggregateAttribute) + if (err != nil) != tt.wantErr { + t.Errorf("buildResourceSubQuery() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("buildResourceSubQuery() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/query-service/constants/constants.go b/pkg/query-service/constants/constants.go index 4b5134c6ee..3b8be03351 100644 --- a/pkg/query-service/constants/constants.go +++ b/pkg/query-service/constants/constants.go @@ -311,6 +311,12 @@ const ( "CAST((attributes_float64_key, attributes_float64_value), 'Map(String, Float64)') as attributes_float64," + "CAST((attributes_bool_key, attributes_bool_value), 'Map(String, Bool)') as attributes_bool," + "CAST((resources_string_key, resources_string_value), 'Map(String, String)') as resources_string " + LogsSQLSelectV2 = "SELECT " + + "timestamp, id, trace_id, span_id, trace_flags, severity_text, severity_number, body," + + "attributes_string," + + "attributes_number," + + "attributes_bool," + + "resources_string " TracesExplorerViewSQLSelectWithSubQuery = "WITH subQuery AS (SELECT distinct on (traceID) traceID, durationNano, " + "serviceName, name FROM %s.%s WHERE parentSpanID = '' AND %s %s ORDER BY durationNano DESC " TracesExplorerViewSQLSelectQuery = "SELECT subQuery.serviceName, subQuery.name, count() AS " + @@ -375,6 +381,12 @@ var StaticFieldsLogsV3 = map[string]v3.AttributeKey{ Type: v3.AttributeKeyTypeUnspecified, IsColumn: true, }, + "__attrs": { + Key: "__attrs", + DataType: v3.AttributeKeyDataTypeString, + Type: v3.AttributeKeyTypeUnspecified, + IsColumn: true, + }, } const SigNozOrderByValue = "#SIGNOZ_VALUE" diff --git a/pkg/query-service/model/v3/v3.go b/pkg/query-service/model/v3/v3.go index b0e786a6d6..2ed526842d 100644 --- a/pkg/query-service/model/v3/v3.go +++ b/pkg/query-service/model/v3/v3.go @@ -1288,3 +1288,9 @@ type URLShareableOptions struct { Format string `json:"format"` SelectColumns []AttributeKey `json:"selectColumns"` } + +type LogQBOptions struct { + GraphLimitQtype string + IsLivetailQuery bool + PreferRPM bool +} diff --git a/pkg/query-service/utils/format.go b/pkg/query-service/utils/format.go index 4de081940d..9f06d712de 100644 --- a/pkg/query-service/utils/format.go +++ b/pkg/query-service/utils/format.go @@ -154,6 +154,14 @@ func QuoteEscapedString(str string) string { return str } +func QuoteEscapedStringForContains(str string) string { + // https: //clickhouse.com/docs/en/sql-reference/functions/string-search-functions#like + str = QuoteEscapedString(str) + str = strings.ReplaceAll(str, `%`, `\%`) + str = strings.ReplaceAll(str, `_`, `\_`) + return str +} + // ClickHouseFormattedValue formats the value to be used in clickhouse query func ClickHouseFormattedValue(v interface{}) string { // if it's pointer convert it to a value @@ -264,6 +272,28 @@ func GetClickhouseColumnName(typeName string, dataType, field string) string { return colName } +func GetClickhouseColumnNameV2(typeName string, dataType, field string) string { + if typeName == string(v3.AttributeKeyTypeTag) { + typeName = constants.Attributes + } + + if typeName != string(v3.AttributeKeyTypeResource) { + typeName = typeName[:len(typeName)-1] + } + + dataType = strings.ToLower(dataType) + + if dataType == "int64" || dataType == "float64" { + dataType = "number" + } + + // if name contains . replace it with `$$` + field = strings.ReplaceAll(field, ".", "$$") + + colName := fmt.Sprintf("`%s_%s_%s`", strings.ToLower(typeName), dataType, field) + return colName +} + // GetEpochNanoSecs takes epoch and returns it in ns func GetEpochNanoSecs(epoch int64) int64 { temp := epoch