diff --git a/kag/examples/baike/builder/indexer.py b/kag/examples/baike/builder/indexer.py index 06d562f7..14b10f70 100644 --- a/kag/examples/baike/builder/indexer.py +++ b/kag/examples/baike/builder/indexer.py @@ -19,7 +19,7 @@ def buildKB(file_path): from kag.common.conf import KAG_CONFIG - runner = BuilderChainRunner.from_config(KAG_CONFIG.all_config["runner"]) + runner = BuilderChainRunner.from_config(KAG_CONFIG.all_config["kag-indexer"]) runner.invoke(file_path) logger.info(f"\n\nbuildKB successfully for {file_path}\n\n") diff --git a/kag/examples/baike/kag_config.yaml b/kag/examples/baike/kag_config.yaml index edf2e53b..2abd2307 100644 --- a/kag/examples/baike/kag_config.yaml +++ b/kag/examples/baike/kag_config.yaml @@ -1,28 +1,34 @@ -llm: &id001 +openie-llm: &id001 api_key: key base_url: https://api.deepseek.com model: deepseek-chat type: maas + +llm: + api_key: key + base_url: https://api.deepseek.com + model: deepseek-chat + type: maas + +vectorize_model: &id002 + api_key: key + base_url: https://api.siliconflow.cn/v1/ + model: BAAI/bge-m3 + type: openai + vector_dimensions: 1024 +vectorizer: *id002 + log: level: INFO + project: biz_scene: default host_addr: http://127.0.0.1:8887 - id: '2' + id: '7' language: zh namespace: BaiKe -resp_solver_pipeline: - generator: - generate_prompt: - type: resp_simple - reasoner: - lf_planner: - type: empty - lf_solver: - chunk_retriever: - type: kag - kg_retriever: null -runner: + +kag-indexer: chain: extractor: llm: *id001 @@ -36,7 +42,7 @@ runner: type: spg_relation type: schema_constraint reader: - type: txt + type: txt # kag.builder.component.reader.txt_reader.TXTReader post_processor: similarity_threshold: 0.9 type: base @@ -44,22 +50,44 @@ runner: split_length: 300 type: length window_length: 0 - type: unstructured + type: unstructured # kag.builder.default_chain.DefaultUnstructuredBuilderChain vectorizer: type: batch - vectorize_model: &id002 - api_key: key - base_url: https://api.siliconflow.cn/v1/ - model: BAAI/bge-m3 - type: openai - vector_dimensions: 1024 + vectorize_model: *id002 writer: type: kg num_threads_per_chain: 2 - num_chains: 4 + num_chains: 4 scanner: type: dir -vectorize_model: *id002 -vectorizer: *id002 -qa: - force_chunk_retriever: true + +lf_solver_pipeline: + generator: + generate_prompt: + type: resp_simple + reasoner: + type: base + lf_executor: + type: base + force_chunk_retriever: true + exact_kg_retriever: + type: default + el_num: 5 + search_api: &id003 + type: openspg + graph_api: &id004 + type: openspg + fuzzy_kg_retriever: + type: default + el_num: 5 + vectorize_model: *id002 + llm_client: *id001 + search_api: *id003 + graph_api: *id004 + chunk_retriever: &id005 + type: default + recall_num: 10 + rerank_topk: 10 + merger: + type: base + chunk_retriever: *id005 diff --git a/kag/examples/baike/solver/eval.py b/kag/examples/baike/solver/eval.py index 7e0ef08c..c497760c 100644 --- a/kag/examples/baike/solver/eval.py +++ b/kag/examples/baike/solver/eval.py @@ -1,9 +1,22 @@ +import json +import logging +import os +import time +from concurrent.futures import ThreadPoolExecutor, as_completed + +from tqdm import tqdm + +from kag.common.benchmarks.evaluate import Evaluate from kag.solver.logic.solver_pipeline import SolverPipeline +from kag.common.conf import KAG_CONFIG +from kag.common.registry import import_modules_from_path + +from kag.common.checkpointer import CheckpointerManager def qa(query): # CA - resp = SolverPipeline() + resp = SolverPipeline.from_config(KAG_CONFIG.all_config["lf_solver_pipeline"]) answer, traceLog = resp.run(query) print(f"\n\nso the answer for '{query}' is: {answer}\n\n") # @@ -12,6 +25,7 @@ def qa(query): if __name__ == "__main__": + import_modules_from_path("./prompt") queries = [ "周星驰的姓名有何含义?", "周星驰和万梓良有什么关系", diff --git a/kag/examples/baike/solver/prompt/__init__.py b/kag/examples/baike/solver/prompt/__init__.py new file mode 100644 index 00000000..dfa931cd --- /dev/null +++ b/kag/examples/baike/solver/prompt/__init__.py @@ -0,0 +1,14 @@ +# Copyright 2023 OpenSPG Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. + +""" +Place the prompts to be used for solving problems in this directory. +""" diff --git a/kag/examples/baike/solver/prompt/resp_generator.py b/kag/examples/baike/solver/prompt/resp_generator.py new file mode 100644 index 00000000..cb8d76ab --- /dev/null +++ b/kag/examples/baike/solver/prompt/resp_generator.py @@ -0,0 +1,28 @@ +import re +from string import Template +from typing import List +import logging + +from kag.interface import PromptABC + +logger = logging.getLogger(__name__) + + +@PromptABC.register("resp_simple") +class RespGenerator(PromptABC): + template_zh = ( + "基于给定的引用信息回答问题。" "\n只输出答案,不需要输出额外的信息。" "\n给定的引用信息:'$memory'\n问题:'$instruction'" + ) + template_en = ( + "Answer the question based on the given reference." + "\nOnly give me the answer and do not output any other words." + "\nThe following are given reference:'$memory'\nQuestion: '$instruction'" + ) + + @property + def template_variables(self) -> List[str]: + return ["memory", "instruction"] + + def parse_response(self, response: str, **kwargs): + logger.debug("推理器判别:{}".format(response)) + return response