From fcd4e85b46201b18a6f4dbc60a52de42fdebd78d Mon Sep 17 00:00:00 2001 From: lexluo09 <39718951+lexluo09@users.noreply.github.com> Date: Mon, 23 Sep 2024 15:57:34 +0800 Subject: [PATCH] (improvement)(chat) Including agg refers to the metric pattern, adding the missing dimensions for count distinct in the group by clause, and increasing the timeout duration in H2 --- .../headless/chat/corrector/GroupByCorrector.java | 9 ++------- .../supersonic/headless/chat/parser/QueryTypeParser.java | 4 +++- .../standalone/src/main/resources/application-local.yaml | 2 +- launchers/standalone/src/main/resources/s2-exemplar.json | 2 +- .../standalone/src/test/resources/application-local.yaml | 2 +- launchers/standalone/src/test/resources/s2-exemplar.json | 2 +- 6 files changed, 9 insertions(+), 12 deletions(-) diff --git a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/corrector/GroupByCorrector.java b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/corrector/GroupByCorrector.java index f42289f15..c045bf930 100644 --- a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/corrector/GroupByCorrector.java +++ b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/corrector/GroupByCorrector.java @@ -42,11 +42,6 @@ private Boolean needAddGroupBy( SqlInfo sqlInfo = semanticParseInfo.getSqlInfo(); String correctS2SQL = sqlInfo.getCorrectedS2SQL(); SemanticSchema semanticSchema = chatQueryContext.getSemanticSchema(); - // check has distinct - if (SqlSelectHelper.hasDistinct(correctS2SQL)) { - log.debug("no need to add groupby ,existed distinct in s2sql:{}", correctS2SQL); - return false; - } // add alias field name Set dimensions = getDimensions(dataSetId, semanticSchema); List selectFields = SqlSelectHelper.getSelectFields(correctS2SQL); @@ -54,11 +49,11 @@ private Boolean needAddGroupBy( return false; } // if only date in select not add group by. - if (selectFields.size() == 1 && selectFields.contains(TimeDimensionEnum.DAY.getChName())) { + if (selectFields.size() == 1 && TimeDimensionEnum.containsZhTimeDimension(selectFields)) { return false; } if (SqlSelectHelper.hasGroupBy(correctS2SQL)) { - log.debug("No need to add groupby, existed groupby in s2sql:{}", correctS2SQL); + log.debug("No need to add 'group by', existed 'group by' in s2sql:{}", correctS2SQL); return false; } Environment environment = ContextUtils.getBean(Environment.class); diff --git a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/parser/QueryTypeParser.java b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/parser/QueryTypeParser.java index 4c749645c..b2f3fe7fd 100644 --- a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/parser/QueryTypeParser.java +++ b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/parser/QueryTypeParser.java @@ -1,6 +1,7 @@ package com.tencent.supersonic.headless.chat.parser; import com.tencent.supersonic.auth.api.authentication.pojo.User; +import com.tencent.supersonic.common.jsqlparser.SqlSelectFunctionHelper; import com.tencent.supersonic.common.jsqlparser.SqlSelectHelper; import com.tencent.supersonic.common.pojo.enums.QueryType; import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum; @@ -70,7 +71,8 @@ private QueryType getQueryType(ChatQueryContext chatQueryContext, SemanticQuery } // 2. metric queryType - if (selectContainsMetric(sqlInfo, dataSetId, semanticSchema)) { + if (selectContainsMetric(sqlInfo, dataSetId, semanticSchema) + || SqlSelectFunctionHelper.hasAggregateFunction(sqlInfo.getParsedS2SQL())) { return QueryType.METRIC; } diff --git a/launchers/standalone/src/main/resources/application-local.yaml b/launchers/standalone/src/main/resources/application-local.yaml index 9e1d79013..650ca3a44 100644 --- a/launchers/standalone/src/main/resources/application-local.yaml +++ b/launchers/standalone/src/main/resources/application-local.yaml @@ -1,7 +1,7 @@ spring: datasource: driver-class-name: org.h2.Driver - url: jdbc:h2:mem:semantic;DATABASE_TO_UPPER=false + url: jdbc:h2:mem:semantic;DATABASE_TO_UPPER=false;QUERY_TIMEOUT=30 username: root password: semantic sql: diff --git a/launchers/standalone/src/main/resources/s2-exemplar.json b/launchers/standalone/src/main/resources/s2-exemplar.json index 9f5c3508b..0210b7bf4 100644 --- a/launchers/standalone/src/main/resources/s2-exemplar.json +++ b/launchers/standalone/src/main/resources/s2-exemplar.json @@ -39,7 +39,7 @@ "question": "过去半个月核心用户的访问次数", "sideInfo": "CurrentDate=[2023-09-15],DomainTerms=[<核心用户 COMMENT '用户为alice'>]", "dbSchema": "DatabaseType=[h2], Table=[超音数产品], PartitionTimeField=[数据日期 FORMAT 'yyyy-MM-dd'], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<部门>,<数据日期>], Values=[]", - "sql": "SELECT 用户,SUM(访问次数) FROM 超音数产品 WHERE 用户='alice' AND 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15'" + "sql": "SELECT 用户,SUM(访问次数) FROM 超音数产品 WHERE 用户='alice' AND 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15' GROUP BY 用户" }, { "question": "过去半个月忠实用户有哪一些", diff --git a/launchers/standalone/src/test/resources/application-local.yaml b/launchers/standalone/src/test/resources/application-local.yaml index 9e1d79013..650ca3a44 100644 --- a/launchers/standalone/src/test/resources/application-local.yaml +++ b/launchers/standalone/src/test/resources/application-local.yaml @@ -1,7 +1,7 @@ spring: datasource: driver-class-name: org.h2.Driver - url: jdbc:h2:mem:semantic;DATABASE_TO_UPPER=false + url: jdbc:h2:mem:semantic;DATABASE_TO_UPPER=false;QUERY_TIMEOUT=30 username: root password: semantic sql: diff --git a/launchers/standalone/src/test/resources/s2-exemplar.json b/launchers/standalone/src/test/resources/s2-exemplar.json index 9f5c3508b..0210b7bf4 100644 --- a/launchers/standalone/src/test/resources/s2-exemplar.json +++ b/launchers/standalone/src/test/resources/s2-exemplar.json @@ -39,7 +39,7 @@ "question": "过去半个月核心用户的访问次数", "sideInfo": "CurrentDate=[2023-09-15],DomainTerms=[<核心用户 COMMENT '用户为alice'>]", "dbSchema": "DatabaseType=[h2], Table=[超音数产品], PartitionTimeField=[数据日期 FORMAT 'yyyy-MM-dd'], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<部门>,<数据日期>], Values=[]", - "sql": "SELECT 用户,SUM(访问次数) FROM 超音数产品 WHERE 用户='alice' AND 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15'" + "sql": "SELECT 用户,SUM(访问次数) FROM 超音数产品 WHERE 用户='alice' AND 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15' GROUP BY 用户" }, { "question": "过去半个月忠实用户有哪一些",