Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
youdonghai committed Jan 15, 2025
1 parent 9e8dbdc commit 57f58b8
Show file tree
Hide file tree
Showing 12 changed files with 480 additions and 2 deletions.
14 changes: 14 additions & 0 deletions kag/examples/FinState/FinStateDemo/builder/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright 2023 OpenSPG Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.

"""
Builder Dir.
"""
14 changes: 14 additions & 0 deletions kag/examples/FinState/FinStateDemo/builder/data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright 2023 OpenSPG Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.

"""
Place the files to be used for building the index in this directory.
"""
52 changes: 52 additions & 0 deletions kag/examples/FinState/FinStateDemo/builder/indexer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Copyright 2023 OpenSPG Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.


import os
import logging
from kag.common.registry import import_modules_from_path

from kag.builder.runner import BuilderChainRunner

from kag.examples.FinState.builder.graph_db_tools import clear_neo4j_data

logger = logging.getLogger(__name__)


def buildKB(file_path):
from kag.common.conf import KAG_CONFIG

runner = BuilderChainRunner.from_config(
KAG_CONFIG.all_config["kag_builder_pipeline"]
)
runner.invoke(file_path)

logger.info(f"\n\nbuildKB successfully for {file_path}\n\n")
#

def buildChunkKB(file_path):
from kag.common.conf import KAG_CONFIG

runner = BuilderChainRunner.from_config(
KAG_CONFIG.all_config["kag_chunk_builder_pipeline"]
)
runner.invoke(file_path)

logger.info(f"\n\nbuildChunkKB successfully for {file_path}\n\n")

if __name__ == "__main__":
import_modules_from_path(
os.path.join(os.path.abspath(os.path.dirname(__file__)), "../../builder_component")
)
dir_path = os.path.dirname(__file__)
file_path = os.path.join(dir_path, "../../builder/data/阿里巴巴2025财年度中期报告.md")

buildChunkKB(file_path)
14 changes: 14 additions & 0 deletions kag/examples/FinState/FinStateDemo/builder/prompt/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright 2023 OpenSPG Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.

"""
Place the prompts to be used for building the index in this directory.
"""
173 changes: 173 additions & 0 deletions kag/examples/FinState/FinStateDemo/kag_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
#------------project configuration start----------------#
openie_llm: &openie_llm
api_key: sk-4323e7aaab36449fab52b0ed86e29696
base_url: https://api.deepseek.com
model: deepseek-chat
type: maas

llm: &llm
api_key: sk-4323e7aaab36449fab52b0ed86e29696
base_url: https://api.deepseek.com
model: deepseek-chat
type: maas

chat_llm: &chat_llm
api_key: sk-4323e7aaab36449fab52b0ed86e29696
base_url: https://api.deepseek.com
model: deepseek-chat
type: maas

vectorize_model: &vectorize_model
api_key: sk-yndixxjfxvnsqfkvfuyubkxidhtwicjcflprvqguffrmxbrv
base_url: https://api.siliconflow.cn/v1/
model: BAAI/bge-m3
type: openai
vector_dimensions: 1024
vectorizer: *vectorize_model

log:
level: INFO

project:
biz_scene: table
#host_addr: http://127.0.0.1:8887
host_addr: http://antspg-gz00b-006000164057.sa128-sqa.alipay.net:8887
id: '3300002'
language: zh
namespace: FinStateDemo
#------------project configuration end----------------#

#------------kag-builder configuration start----------------#
kag_builder_pipeline:
chain:
type: unstructured_builder_chain # kag.builder.default_chain.DefaultUnstructuredBuilderChain
extractor:
type: table_and_text_extractor # kag.builder.component.extractor.schema_free_extractor.SchemaFreeExtractor
llm: *openie_llm
ner_prompt:
type: default_ner # kag.builder.prompt.default.ner.OpenIENERPrompt
std_prompt:
type: default_std # kag.builder.prompt.default.std.OpenIEEntitystandardizationdPrompt
triple_prompt:
type: default_triple # kag.builder.prompt.default.triple.OpenIETriplePrompt
table_classify_prompt:
type: table_classify
table_context_prompt:
type: table_context
table_keywords_prompt:
type: table_keywords
table_reformat_prompt:
type: table_reformat
reader:
type: md_reader # kag.builder.component.reader.dict_reader.DictReader
post_processor:
type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor
similarity_threshold: 0.9
splitter:
type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter
split_length: 1000
window_length: 0
vectorizer:
type: batch_vectorizer # kag.builder.component.vectorizer.batch_vectorizer.BatchVectorizer
vectorize_model: *vectorize_model
writer:
type: kg_writer # kag.builder.component.writer.kg_writer.KGWriter
num_threads_per_chain: 1
num_chains: 1
scanner:
type: file_scanner # kag.builder.component.scanner.dataset_scanner.HotpotqaCorpusScanner

kag_chunk_builder_pipeline:
chain:
type: unstructured_builder_chain # kag.builder.default_chain.DefaultUnstructuredBuilderChain
extractor:
type: schema_free_extractor # kag.builder.component.extractor.schema_free_extractor.SchemaFreeExtractor
llm: *openie_llm
ner_prompt:
type: default_ner # kag.builder.prompt.default.ner.OpenIENERPrompt
std_prompt:
type: default_std # kag.builder.prompt.default.std.OpenIEEntitystandardizationdPrompt
triple_prompt:
type: default_triple # kag.builder.prompt.default.triple.OpenIETriplePrompt
reader:
type: md_reader # kag.builder.component.reader.dict_reader.DictReader
post_processor:
type: kag_post_processor # kag.builder.component.postprocessor.kag_postprocessor.KAGPostProcessor
similarity_threshold: 0.9
splitter:
type: length_splitter # kag.builder.component.splitter.length_splitter.LengthSplitter
split_length: 100000
window_length: 0
vectorizer:
type: batch_vectorizer # kag.builder.component.vectorizer.batch_vectorizer.BatchVectorizer
vectorize_model: *vectorize_model
writer:
type: kg_writer # kag.builder.component.writer.kg_writer.KGWriter
num_threads_per_chain: 1
num_chains: 1
scanner:
type: file_scanner # kag.builder.component.scanner.dataset_scanner.MusiqueCorpusScanner
#------------kag-builder configuration end----------------#

#------------kag-solver configuration start----------------#
search_api: &search_api
type: openspg_search_api #kag.solver.tools.search_api.impl.openspg_search_api.OpenSPGSearchAPI

graph_api: &graph_api
type: openspg_graph_api #kag.solver.tools.graph_api.impl.openspg_graph_api.OpenSPGGraphApi

exact_kg_retriever: &exact_kg_retriever
type: default_exact_kg_retriever # kag.solver.retriever.impl.default_exact_kg_retriever.DefaultExactKgRetriever
el_num: 5
llm_client: *chat_llm
search_api: *search_api
graph_api: *graph_api

fuzzy_kg_retriever: &fuzzy_kg_retriever
type: default_fuzzy_kg_retriever # kag.solver.retriever.impl.default_fuzzy_kg_retriever.DefaultFuzzyKgRetriever
el_num: 5
vectorize_model: *vectorize_model
llm_client: *chat_llm
search_api: *search_api
graph_api: *graph_api

chunk_retriever: &chunk_retriever
type: default_chunk_retriever # kag.solver.retriever.impl.default_fuzzy_kg_retriever.DefaultFuzzyKgRetriever
llm_client: *chat_llm
ner_prompt:
type: table_question_ner
recall_num: 10
rerank_topk: 10

kag_solver_pipeline:
memory:
type: default_memory # kag.solver.implementation.default_memory.DefaultMemory
llm_client: *chat_llm
max_iterations: 3
reasoner:
type: default_reasoner # kag.solver.implementation.default_reasoner.DefaultReasoner
llm_client: *chat_llm
lf_planner:
type: default_lf_planner # kag.solver.plan.default_lf_planner.DefaultLFPlanner
llm_client: *chat_llm
vectorize_model: *vectorize_model
logic_form_plan_prompt:
type: only_retriever_plan
lf_executor:
type: default_lf_executor # kag.solver.execute.default_lf_executor.DefaultLFExecutor
llm_client: *chat_llm
force_chunk_retriever: true
exact_kg_retriever: *exact_kg_retriever
fuzzy_kg_retriever: *fuzzy_kg_retriever
chunk_retriever: *chunk_retriever
merger:
type: default_lf_sub_query_res_merger # kag.solver.execute.default_sub_query_merger.DefaultLFSubQueryResMerger
vectorize_model: *vectorize_model
chunk_retriever: *chunk_retriever
generator:
type: default_generator # kag.solver.implementation.default_generator.DefaultGenerator
llm_client: *chat_llm
reflector:
type: default_reflector # kag.solver.implementation.default_reflector.DefaultReflector
llm_client: *chat_llm
#------------kag-solver configuration end----------------#
20 changes: 20 additions & 0 deletions kag/examples/FinState/FinStateDemo/reasoner/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright 2023 OpenSPG Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.

"""
Place the DSL file for graph reasoning in this directory.
For example:
```company.dsl
MATCH (s:DEFAULT.Company)
RETURN s.id, s.address
```
"""
Loading

0 comments on commit 57f58b8

Please sign in to comment.