Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(improvement)(chat) Including agg refers to the metric pattern, adding the missing dimensions for count distinct in the group by clause, and increasing the timeout duration in H2 #1700

Merged
merged 1 commit into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -42,23 +42,18 @@ private Boolean needAddGroupBy(
SqlInfo sqlInfo = semanticParseInfo.getSqlInfo();
String correctS2SQL = sqlInfo.getCorrectedS2SQL();
SemanticSchema semanticSchema = chatQueryContext.getSemanticSchema();
// check has distinct
if (SqlSelectHelper.hasDistinct(correctS2SQL)) {
log.debug("no need to add groupby ,existed distinct in s2sql:{}", correctS2SQL);
return false;
}
// add alias field name
Set<String> dimensions = getDimensions(dataSetId, semanticSchema);
List<String> selectFields = SqlSelectHelper.getSelectFields(correctS2SQL);
if (CollectionUtils.isEmpty(selectFields) || CollectionUtils.isEmpty(dimensions)) {
return false;
}
// if only date in select not add group by.
if (selectFields.size() == 1 && selectFields.contains(TimeDimensionEnum.DAY.getChName())) {
if (selectFields.size() == 1 && TimeDimensionEnum.containsZhTimeDimension(selectFields)) {
return false;
}
if (SqlSelectHelper.hasGroupBy(correctS2SQL)) {
log.debug("No need to add groupby, existed groupby in s2sql:{}", correctS2SQL);
log.debug("No need to add 'group by', existed 'group by' in s2sql:{}", correctS2SQL);
return false;
}
Environment environment = ContextUtils.getBean(Environment.class);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.tencent.supersonic.headless.chat.parser;

import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.common.jsqlparser.SqlSelectFunctionHelper;
import com.tencent.supersonic.common.jsqlparser.SqlSelectHelper;
import com.tencent.supersonic.common.pojo.enums.QueryType;
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
Expand Down Expand Up @@ -70,7 +71,8 @@ private QueryType getQueryType(ChatQueryContext chatQueryContext, SemanticQuery
}

// 2. metric queryType
if (selectContainsMetric(sqlInfo, dataSetId, semanticSchema)) {
if (selectContainsMetric(sqlInfo, dataSetId, semanticSchema)
|| SqlSelectFunctionHelper.hasAggregateFunction(sqlInfo.getParsedS2SQL())) {
return QueryType.METRIC;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
spring:
datasource:
driver-class-name: org.h2.Driver
url: jdbc:h2:mem:semantic;DATABASE_TO_UPPER=false
url: jdbc:h2:mem:semantic;DATABASE_TO_UPPER=false;QUERY_TIMEOUT=30
username: root
password: semantic
sql:
Expand Down
2 changes: 1 addition & 1 deletion launchers/standalone/src/main/resources/s2-exemplar.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
"question": "过去半个月核心用户的访问次数",
"sideInfo": "CurrentDate=[2023-09-15],DomainTerms=[<核心用户 COMMENT '用户为alice'>]",
"dbSchema": "DatabaseType=[h2], Table=[超音数产品], PartitionTimeField=[数据日期 FORMAT 'yyyy-MM-dd'], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<部门>,<数据日期>], Values=[]",
"sql": "SELECT 用户,SUM(访问次数) FROM 超音数产品 WHERE 用户='alice' AND 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15'"
"sql": "SELECT 用户,SUM(访问次数) FROM 超音数产品 WHERE 用户='alice' AND 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15' GROUP BY 用户"
},
{
"question": "过去半个月忠实用户有哪一些",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
spring:
datasource:
driver-class-name: org.h2.Driver
url: jdbc:h2:mem:semantic;DATABASE_TO_UPPER=false
url: jdbc:h2:mem:semantic;DATABASE_TO_UPPER=false;QUERY_TIMEOUT=30
username: root
password: semantic
sql:
Expand Down
2 changes: 1 addition & 1 deletion launchers/standalone/src/test/resources/s2-exemplar.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
"question": "过去半个月核心用户的访问次数",
"sideInfo": "CurrentDate=[2023-09-15],DomainTerms=[<核心用户 COMMENT '用户为alice'>]",
"dbSchema": "DatabaseType=[h2], Table=[超音数产品], PartitionTimeField=[数据日期 FORMAT 'yyyy-MM-dd'], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<部门>,<数据日期>], Values=[]",
"sql": "SELECT 用户,SUM(访问次数) FROM 超音数产品 WHERE 用户='alice' AND 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15'"
"sql": "SELECT 用户,SUM(访问次数) FROM 超音数产品 WHERE 用户='alice' AND 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15' GROUP BY 用户"
},
{
"question": "过去半个月忠实用户有哪一些",
Expand Down
Loading