Skip to content

Commit

Permalink
(improvement)(chat) Including agg refers to the metric pattern, addin…
Browse files Browse the repository at this point in the history
…g the missing dimensions for count distinct in the group by clause, and increasing the timeout duration in H2 (#1700)
  • Loading branch information
lexluo09 committed Sep 23, 2024
1 parent 23489a6 commit 4ef5719
Show file tree
Hide file tree
Showing 6 changed files with 9 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,23 +42,18 @@ private Boolean needAddGroupBy(
SqlInfo sqlInfo = semanticParseInfo.getSqlInfo();
String correctS2SQL = sqlInfo.getCorrectedS2SQL();
SemanticSchema semanticSchema = chatQueryContext.getSemanticSchema();
// check has distinct
if (SqlSelectHelper.hasDistinct(correctS2SQL)) {
log.debug("no need to add groupby ,existed distinct in s2sql:{}", correctS2SQL);
return false;
}
// add alias field name
Set<String> dimensions = getDimensions(dataSetId, semanticSchema);
List<String> selectFields = SqlSelectHelper.getSelectFields(correctS2SQL);
if (CollectionUtils.isEmpty(selectFields) || CollectionUtils.isEmpty(dimensions)) {
return false;
}
// if only date in select not add group by.
if (selectFields.size() == 1 && selectFields.contains(TimeDimensionEnum.DAY.getChName())) {
if (selectFields.size() == 1 && TimeDimensionEnum.containsZhTimeDimension(selectFields)) {
return false;
}
if (SqlSelectHelper.hasGroupBy(correctS2SQL)) {
log.debug("No need to add groupby, existed groupby in s2sql:{}", correctS2SQL);
log.debug("No need to add 'group by', existed 'group by' in s2sql:{}", correctS2SQL);
return false;
}
Environment environment = ContextUtils.getBean(Environment.class);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.tencent.supersonic.headless.chat.parser;

import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.common.jsqlparser.SqlSelectFunctionHelper;
import com.tencent.supersonic.common.jsqlparser.SqlSelectHelper;
import com.tencent.supersonic.common.pojo.enums.QueryType;
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
Expand Down Expand Up @@ -70,7 +71,8 @@ private QueryType getQueryType(ChatQueryContext chatQueryContext, SemanticQuery
}

// 2. metric queryType
if (selectContainsMetric(sqlInfo, dataSetId, semanticSchema)) {
if (selectContainsMetric(sqlInfo, dataSetId, semanticSchema)
|| SqlSelectFunctionHelper.hasAggregateFunction(sqlInfo.getParsedS2SQL())) {
return QueryType.METRIC;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
spring:
datasource:
driver-class-name: org.h2.Driver
url: jdbc:h2:mem:semantic;DATABASE_TO_UPPER=false
url: jdbc:h2:mem:semantic;DATABASE_TO_UPPER=false;QUERY_TIMEOUT=30
username: root
password: semantic
sql:
Expand Down
2 changes: 1 addition & 1 deletion launchers/standalone/src/main/resources/s2-exemplar.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
"question": "过去半个月核心用户的访问次数",
"sideInfo": "CurrentDate=[2023-09-15],DomainTerms=[<核心用户 COMMENT '用户为alice'>]",
"dbSchema": "DatabaseType=[h2], Table=[超音数产品], PartitionTimeField=[数据日期 FORMAT 'yyyy-MM-dd'], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<部门>,<数据日期>], Values=[]",
"sql": "SELECT 用户,SUM(访问次数) FROM 超音数产品 WHERE 用户='alice' AND 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15'"
"sql": "SELECT 用户,SUM(访问次数) FROM 超音数产品 WHERE 用户='alice' AND 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15' GROUP BY 用户"
},
{
"question": "过去半个月忠实用户有哪一些",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
spring:
datasource:
driver-class-name: org.h2.Driver
url: jdbc:h2:mem:semantic;DATABASE_TO_UPPER=false
url: jdbc:h2:mem:semantic;DATABASE_TO_UPPER=false;QUERY_TIMEOUT=30
username: root
password: semantic
sql:
Expand Down
2 changes: 1 addition & 1 deletion launchers/standalone/src/test/resources/s2-exemplar.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
"question": "过去半个月核心用户的访问次数",
"sideInfo": "CurrentDate=[2023-09-15],DomainTerms=[<核心用户 COMMENT '用户为alice'>]",
"dbSchema": "DatabaseType=[h2], Table=[超音数产品], PartitionTimeField=[数据日期 FORMAT 'yyyy-MM-dd'], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<部门>,<数据日期>], Values=[]",
"sql": "SELECT 用户,SUM(访问次数) FROM 超音数产品 WHERE 用户='alice' AND 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15'"
"sql": "SELECT 用户,SUM(访问次数) FROM 超音数产品 WHERE 用户='alice' AND 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15' GROUP BY 用户"
},
{
"question": "过去半个月忠实用户有哪一些",
Expand Down

0 comments on commit 4ef5719

Please sign in to comment.