From ebb62a68c0e65dcacc4259c27204780e84d9a436 Mon Sep 17 00:00:00 2001 From: Donghai Date: Wed, 8 Jan 2025 16:26:33 +0800 Subject: [PATCH] init finstate --- kag/examples/FinState/builder/__init__.py | 14 ++ .../FinState/builder/data/__init__.py | 14 ++ kag/examples/FinState/builder/indexer.py | 10 ++ .../FinState/builder/prompt/__init__.py | 14 ++ kag/examples/FinState/kag_config.yaml | 126 +++++++++++++++ kag/examples/FinState/reasoner/__init__.py | 20 +++ kag/examples/FinState/schema/FinState.schema | 145 ++++++++++++++++++ kag/examples/FinState/schema/__init__.py | 18 +++ kag/examples/FinState/solver/__init__.py | 0 kag/examples/FinState/solver/data/__init__.py | 14 ++ .../FinState/solver/prompt/__init__.py | 14 ++ kag/examples/kag_config.yaml | 126 +++++++++++++++ 12 files changed, 515 insertions(+) create mode 100644 kag/examples/FinState/builder/__init__.py create mode 100644 kag/examples/FinState/builder/data/__init__.py create mode 100644 kag/examples/FinState/builder/indexer.py create mode 100644 kag/examples/FinState/builder/prompt/__init__.py create mode 100644 kag/examples/FinState/kag_config.yaml create mode 100644 kag/examples/FinState/reasoner/__init__.py create mode 100644 kag/examples/FinState/schema/FinState.schema create mode 100644 kag/examples/FinState/schema/__init__.py create mode 100644 kag/examples/FinState/solver/__init__.py create mode 100644 kag/examples/FinState/solver/data/__init__.py create mode 100644 kag/examples/FinState/solver/prompt/__init__.py create mode 100644 kag/examples/kag_config.yaml diff --git a/kag/examples/FinState/builder/__init__.py b/kag/examples/FinState/builder/__init__.py new file mode 100644 index 00000000..7a018e7c --- /dev/null +++ b/kag/examples/FinState/builder/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 OpenSPG Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. + +""" +Builder Dir. +""" diff --git a/kag/examples/FinState/builder/data/__init__.py b/kag/examples/FinState/builder/data/__init__.py new file mode 100644 index 00000000..59bacd4d --- /dev/null +++ b/kag/examples/FinState/builder/data/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 OpenSPG Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. + +""" +Place the files to be used for building the index in this directory. +""" diff --git a/kag/examples/FinState/builder/indexer.py b/kag/examples/FinState/builder/indexer.py new file mode 100644 index 00000000..6f6914a4 --- /dev/null +++ b/kag/examples/FinState/builder/indexer.py @@ -0,0 +1,10 @@ +# Copyright 2023 OpenSPG Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. diff --git a/kag/examples/FinState/builder/prompt/__init__.py b/kag/examples/FinState/builder/prompt/__init__.py new file mode 100644 index 00000000..ba7d5d56 --- /dev/null +++ b/kag/examples/FinState/builder/prompt/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 OpenSPG Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. + +""" +Place the prompts to be used for building the index in this directory. +""" diff --git a/kag/examples/FinState/kag_config.yaml b/kag/examples/FinState/kag_config.yaml new file mode 100644 index 00000000..11dcb05a --- /dev/null +++ b/kag/examples/FinState/kag_config.yaml @@ -0,0 +1,126 @@ +#------------project configuration start----------------# +openie_llm: &openie_llm + api_key: sk-4323e7aaab36449fab52b0ed86e29696 + base_url: https://api.deepseek.com + model: deepseek-chat + type: maas + +chat_llm: &chat_llm + api_key: sk-4323e7aaab36449fab52b0ed86e29696 + base_url: https://api.deepseek.com + model: deepseek-chat + type: maas + +vectorize_model: &vectorize_model + api_key: sk-yndixxjfxvnsqfkvfuyubkxidhtwicjcflprvqguffrmxbrv + base_url: https://api.siliconflow.cn/v1/ + model: BAAI/bge-m3 + type: openai + vector_dimensions: 1024 +vectorizer: *vectorize_model + +log: + level: INFO + +project: + biz_scene: table + host_addr: http://127.0.0.1:8887 + id: '1' + language: en + namespace: FinState +#------------project configuration end----------------# + +#------------kag-builder configuration start----------------# +kag_builder_pipeline: + chain: + type: unstructured_builder_chain # kag.builder.default_chain.DefaultUnstructuredBuilderChain + extractor: + type: schema_free_extractor # kag.builder.component.extractor.schema_free_extractor.SchemaFreeExtractor + llm: *openie_llm + ner_prompt: + type: default_ner # kag.builder.prompt.default.ner.OpenIENERPrompt + std_prompt: + type: default_std # kag.builder.prompt.default.std.OpenIEEntitystandardizationdPrompt + triple_prompt: + type: default_triple # kag.builder.prompt.default.triple.OpenIETriplePrompt + reader: + type: dict_reader # kag.builder.component.reader.dict_reader.DictReader + post_processor: + type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor + similarity_threshold: 0.9 + splitter: + type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter + split_length: 1000 + window_length: 0 + vectorizer: + type: batch_vectorizer # kag.builder.component.vectorizer.batch_vectorizer.BatchVectorizer + vectorize_model: *vectorize_model + writer: + type: kg_writer # kag.builder.component.writer.kg_writer.KGWriter + num_threads_per_chain: 1 + num_chains: 16 + scanner: + type: hotpotqa_dataset_scanner # kag.builder.component.scanner.dataset_scanner.HotpotqaCorpusScanner +#------------kag-builder configuration end----------------# + +#------------kag-solver configuration start----------------# +search_api: &search_api + type: openspg_search_api #kag.solver.tools.search_api.impl.openspg_search_api.OpenSPGSearchAPI + +graph_api: &graph_api + type: openspg_graph_api #kag.solver.tools.graph_api.impl.openspg_graph_api.OpenSPGGraphApi + +exact_kg_retriever: &exact_kg_retriever + type: default_exact_kg_retriever # kag.solver.retriever.impl.default_exact_kg_retriever.DefaultExactKgRetriever + el_num: 5 + llm_client: *chat_llm + search_api: *search_api + graph_api: *graph_api + +fuzzy_kg_retriever: &fuzzy_kg_retriever + type: default_fuzzy_kg_retriever # kag.solver.retriever.impl.default_fuzzy_kg_retriever.DefaultFuzzyKgRetriever + el_num: 5 + vectorize_model: *vectorize_model + llm_client: *chat_llm + search_api: *search_api + graph_api: *graph_api + +chunk_retriever: &chunk_retriever + type: default_chunk_retriever # kag.solver.retriever.impl.default_fuzzy_kg_retriever.DefaultFuzzyKgRetriever + llm_client: *chat_llm + recall_num: 10 + rerank_topk: 10 + +kag_solver_pipeline: + memory: + type: default_memory # kag.solver.implementation.default_memory.DefaultMemory + llm_client: *chat_llm + max_iterations: 3 + reasoner: + type: default_reasoner # kag.solver.implementation.default_reasoner.DefaultReasoner + llm_client: *chat_llm + lf_planner: + type: default_lf_planner # kag.solver.plan.default_lf_planner.DefaultLFPlanner + llm_client: *chat_llm + vectorize_model: *vectorize_model + lf_executor: + type: default_lf_executor # kag.solver.execute.default_lf_executor.DefaultLFExecutor + llm_client: *chat_llm + force_chunk_retriever: true + exact_kg_retriever: *exact_kg_retriever + fuzzy_kg_retriever: *fuzzy_kg_retriever + chunk_retriever: *chunk_retriever + merger: + type: default_lf_sub_query_res_merger # kag.solver.execute.default_sub_query_merger.DefaultLFSubQueryResMerger + vectorize_model: *vectorize_model + chunk_retriever: *chunk_retriever + generator: + type: default_generator # kag.solver.implementation.default_generator.DefaultGenerator + llm_client: *chat_llm + generate_prompt: + type: resp_simple # kag/examples/hotpotqa/solver/prompt/resp_generator.py + reflector: + type: default_reflector # kag.solver.implementation.default_reflector.DefaultReflector + llm_client: *chat_llm + +#------------kag-solver configuration end----------------# diff --git a/kag/examples/FinState/reasoner/__init__.py b/kag/examples/FinState/reasoner/__init__.py new file mode 100644 index 00000000..8b8a3c91 --- /dev/null +++ b/kag/examples/FinState/reasoner/__init__.py @@ -0,0 +1,20 @@ +# Copyright 2023 OpenSPG Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. + +""" +Place the DSL file for graph reasoning in this directory. +For example: + +```company.dsl +MATCH (s:DEFAULT.Company) +RETURN s.id, s.address +``` +""" diff --git a/kag/examples/FinState/schema/FinState.schema b/kag/examples/FinState/schema/FinState.schema new file mode 100644 index 00000000..8d41fb56 --- /dev/null +++ b/kag/examples/FinState/schema/FinState.schema @@ -0,0 +1,145 @@ +namespace FinState + +Chunk(文本块): EntityType + properties: + content(内容): Text + index: TextAndVector + +DateTime(日期和时间): EntityType + properties: + desc(描述): Text + index: TextAndVector + semanticType(语义类型): Text + index: Text + +Location(地点): EntityType + properties: + desc(描述): Text + index: TextAndVector + semanticType(语义类型): Text + index: Text + +Person(人物): EntityType + properties: + desc(描述): Text + index: TextAndVector + semanticType(语义类型): Text + index: Text + +Event(事件): EntityType + properties: + desc(描述): Text + index: TextAndVector + semanticType(语义类型): Text + index: Text + +Metric(指标): EntityType + properties: + desc(描述): Text + index: TextAndVector + semanticType(语义类型): Text + index: Text + +Organization(组织机构): EntityType + properties: + desc(描述): Text + index: TextAndVector + semanticType(语义类型): Text + index: Text + +Company(公司): EntityType + properties: + desc(描述): Text + index: TextAndVector + semanticType(语义类型): Text + index: Text + +Products(产品): EntityType + properties: + desc(描述): Text + index: TextAndVector + semanticType(语义类型): Text + index: Text + +Services(服务): EntityType + properties: + desc(描述): Text + index: TextAndVector + semanticType(语义类型): Text + index: Text + +Laws(法律法规): EntityType + properties: + desc(描述): Text + index: TextAndVector + semanticType(语义类型): Text + index: Text + +Table(表格): EntityType + properties: + desc(描述): Text + index: TextAndVector + content(内容): Text + index: None + csv(csv格式的表格数据): Text + index: None + containRow(包含行): TableRow + constraint: MultiValue + containColumn(包含列): TableColumn + constraint: MultiValue + +TableRow(表格行): EntityType + properties: + desc(描述): Text + index: TextAndVector + content(内容): Text + index: None + containCell(包含格子): TableCell + constraint: MultiValue + partOf(属于): Table + constraint: MultiValue + subitem(下位关系): TableRow + constraint: MultiValue + +TableColumn(表格列): EntityType + properties: + desc(描述): Text + index: TextAndVector + content(内容): Text + index: None + containCell(包含格子): TableCell + constraint: MultiValue + partOf(属于): Table + constraint: MultiValue + +TableCell(单元格): EntityType + properties: + desc(描述): Text + index: TextAndVector + value(指标值): Text + index: None + scale(量级): Text + index: None + unit(单位): Text + index: None + partOfTable(属于): Table + constraint: MultiValue + partOfTableRow(属于): TableRow + constraint: MultiValue + partOfTableColumn(属于): TableColumn + constraint: MultiValue + +TableKeyWord(表格关键字): EntityType + properties: + desc(描述): Text + index: TextAndVector + keyword(归属于表格): Table + constraint: MultiValue + +Others(其他): EntityType + properties: + desc(描述): Text + index: TextAndVector + semanticType(语义类型): Text + index: Text + diff --git a/kag/examples/FinState/schema/__init__.py b/kag/examples/FinState/schema/__init__.py new file mode 100644 index 00000000..8ac86acc --- /dev/null +++ b/kag/examples/FinState/schema/__init__.py @@ -0,0 +1,18 @@ +# Copyright 2023 OpenSPG Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. + +""" +{{namespace}}.schema: + The MarkLang file for the schema of this project. + You can execute `kag schema commit` to commit your schema to SPG server. + + +""" diff --git a/kag/examples/FinState/solver/__init__.py b/kag/examples/FinState/solver/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/kag/examples/FinState/solver/data/__init__.py b/kag/examples/FinState/solver/data/__init__.py new file mode 100644 index 00000000..59bacd4d --- /dev/null +++ b/kag/examples/FinState/solver/data/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 OpenSPG Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. + +""" +Place the files to be used for building the index in this directory. +""" diff --git a/kag/examples/FinState/solver/prompt/__init__.py b/kag/examples/FinState/solver/prompt/__init__.py new file mode 100644 index 00000000..dfa931cd --- /dev/null +++ b/kag/examples/FinState/solver/prompt/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 OpenSPG Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. + +""" +Place the prompts to be used for solving problems in this directory. +""" diff --git a/kag/examples/kag_config.yaml b/kag/examples/kag_config.yaml new file mode 100644 index 00000000..606885d5 --- /dev/null +++ b/kag/examples/kag_config.yaml @@ -0,0 +1,126 @@ +#------------project configuration start----------------# +openie_llm: &openie_llm + api_key: sk-4323e7aaab36449fab52b0ed86e29696 + base_url: https://api.deepseek.com + model: deepseek-chat + type: maas + +chat_llm: &chat_llm + api_key: sk-4323e7aaab36449fab52b0ed86e29696 + base_url: https://api.deepseek.com + model: deepseek-chat + type: maas + +vectorize_model: &vectorize_model + api_key: sk-yndixxjfxvnsqfkvfuyubkxidhtwicjcflprvqguffrmxbrv + base_url: https://api.siliconflow.cn/v1/ + model: BAAI/bge-m3 + type: openai + vector_dimensions: 1024 +vectorizer: *vectorize_model + +log: + level: INFO + +project: + biz_scene: table + host_addr: http://127.0.0.1:8887 + id: '9' + language: en + namespace: FinState +#------------project configuration end----------------# + +#------------kag-builder configuration start----------------# +kag_builder_pipeline: + chain: + type: unstructured_builder_chain # kag.builder.default_chain.DefaultUnstructuredBuilderChain + extractor: + type: schema_free_extractor # kag.builder.component.extractor.schema_free_extractor.SchemaFreeExtractor + llm: *openie_llm + ner_prompt: + type: default_ner # kag.builder.prompt.default.ner.OpenIENERPrompt + std_prompt: + type: default_std # kag.builder.prompt.default.std.OpenIEEntitystandardizationdPrompt + triple_prompt: + type: default_triple # kag.builder.prompt.default.triple.OpenIETriplePrompt + reader: + type: dict_reader # kag.builder.component.reader.dict_reader.DictReader + post_processor: + type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor + similarity_threshold: 0.9 + splitter: + type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter + split_length: 1000 + window_length: 0 + vectorizer: + type: batch_vectorizer # kag.builder.component.vectorizer.batch_vectorizer.BatchVectorizer + vectorize_model: *vectorize_model + writer: + type: kg_writer # kag.builder.component.writer.kg_writer.KGWriter + num_threads_per_chain: 1 + num_chains: 16 + scanner: + type: hotpotqa_dataset_scanner # kag.builder.component.scanner.dataset_scanner.HotpotqaCorpusScanner +#------------kag-builder configuration end----------------# + +#------------kag-solver configuration start----------------# +search_api: &search_api + type: openspg_search_api #kag.solver.tools.search_api.impl.openspg_search_api.OpenSPGSearchAPI + +graph_api: &graph_api + type: openspg_graph_api #kag.solver.tools.graph_api.impl.openspg_graph_api.OpenSPGGraphApi + +exact_kg_retriever: &exact_kg_retriever + type: default_exact_kg_retriever # kag.solver.retriever.impl.default_exact_kg_retriever.DefaultExactKgRetriever + el_num: 5 + llm_client: *chat_llm + search_api: *search_api + graph_api: *graph_api + +fuzzy_kg_retriever: &fuzzy_kg_retriever + type: default_fuzzy_kg_retriever # kag.solver.retriever.impl.default_fuzzy_kg_retriever.DefaultFuzzyKgRetriever + el_num: 5 + vectorize_model: *vectorize_model + llm_client: *chat_llm + search_api: *search_api + graph_api: *graph_api + +chunk_retriever: &chunk_retriever + type: default_chunk_retriever # kag.solver.retriever.impl.default_fuzzy_kg_retriever.DefaultFuzzyKgRetriever + llm_client: *chat_llm + recall_num: 10 + rerank_topk: 10 + +kag_solver_pipeline: + memory: + type: default_memory # kag.solver.implementation.default_memory.DefaultMemory + llm_client: *chat_llm + max_iterations: 3 + reasoner: + type: default_reasoner # kag.solver.implementation.default_reasoner.DefaultReasoner + llm_client: *chat_llm + lf_planner: + type: default_lf_planner # kag.solver.plan.default_lf_planner.DefaultLFPlanner + llm_client: *chat_llm + vectorize_model: *vectorize_model + lf_executor: + type: default_lf_executor # kag.solver.execute.default_lf_executor.DefaultLFExecutor + llm_client: *chat_llm + force_chunk_retriever: true + exact_kg_retriever: *exact_kg_retriever + fuzzy_kg_retriever: *fuzzy_kg_retriever + chunk_retriever: *chunk_retriever + merger: + type: default_lf_sub_query_res_merger # kag.solver.execute.default_sub_query_merger.DefaultLFSubQueryResMerger + vectorize_model: *vectorize_model + chunk_retriever: *chunk_retriever + generator: + type: default_generator # kag.solver.implementation.default_generator.DefaultGenerator + llm_client: *chat_llm + generate_prompt: + type: resp_simple # kag/examples/hotpotqa/solver/prompt/resp_generator.py + reflector: + type: default_reflector # kag.solver.implementation.default_reflector.DefaultReflector + llm_client: *chat_llm + +#------------kag-solver configuration end----------------#