Skip to content

Commit

Permalink
update conf for baike
Browse files Browse the repository at this point in the history
  • Loading branch information
caszkgui committed Dec 25, 2024
1 parent adf8e01 commit 63308ff
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 29 deletions.
2 changes: 1 addition & 1 deletion kag/examples/baike/builder/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
def buildKB(file_path):
from kag.common.conf import KAG_CONFIG

runner = BuilderChainRunner.from_config(KAG_CONFIG.all_config["runner"])
runner = BuilderChainRunner.from_config(KAG_CONFIG.all_config["kag-indexer"])
runner.invoke(file_path)

logger.info(f"\n\nbuildKB successfully for {file_path}\n\n")
Expand Down
82 changes: 55 additions & 27 deletions kag/examples/baike/kag_config.yaml
Original file line number Diff line number Diff line change
@@ -1,28 +1,34 @@
llm: &id001
openie-llm: &id001
api_key: key
base_url: https://api.deepseek.com
model: deepseek-chat
type: maas

llm:
api_key: key
base_url: https://api.deepseek.com
model: deepseek-chat
type: maas

vectorize_model: &id002
api_key: key
base_url: https://api.siliconflow.cn/v1/
model: BAAI/bge-m3
type: openai
vector_dimensions: 1024
vectorizer: *id002

log:
level: INFO

project:
biz_scene: default
host_addr: http://127.0.0.1:8887
id: '2'
id: '7'
language: zh
namespace: BaiKe
resp_solver_pipeline:
generator:
generate_prompt:
type: resp_simple
reasoner:
lf_planner:
type: empty
lf_solver:
chunk_retriever:
type: kag
kg_retriever: null
runner:

kag-indexer:
chain:
extractor:
llm: *id001
Expand All @@ -36,30 +42,52 @@ runner:
type: spg_relation
type: schema_constraint
reader:
type: txt
type: txt # kag.builder.component.reader.txt_reader.TXTReader
post_processor:
similarity_threshold: 0.9
type: base
splitter:
split_length: 300
type: length
window_length: 0
type: unstructured
type: unstructured # kag.builder.default_chain.DefaultUnstructuredBuilderChain
vectorizer:
type: batch
vectorize_model: &id002
api_key: key
base_url: https://api.siliconflow.cn/v1/
model: BAAI/bge-m3
type: openai
vector_dimensions: 1024
vectorize_model: *id002
writer:
type: kg
num_threads_per_chain: 2
num_chains: 4
num_chains: 4
scanner:
type: dir
vectorize_model: *id002
vectorizer: *id002
qa:
force_chunk_retriever: true

lf_solver_pipeline:
generator:
generate_prompt:
type: resp_simple
reasoner:
type: base
lf_executor:
type: base
force_chunk_retriever: true
exact_kg_retriever:
type: default
el_num: 5
search_api: &id003
type: openspg
graph_api: &id004
type: openspg
fuzzy_kg_retriever:
type: default
el_num: 5
vectorize_model: *id002
llm_client: *id001
search_api: *id003
graph_api: *id004
chunk_retriever: &id005
type: default
recall_num: 10
rerank_topk: 10
merger:
type: base
chunk_retriever: *id005
16 changes: 15 additions & 1 deletion kag/examples/baike/solver/eval.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,22 @@
import json
import logging
import os
import time
from concurrent.futures import ThreadPoolExecutor, as_completed

from tqdm import tqdm

from kag.common.benchmarks.evaluate import Evaluate
from kag.solver.logic.solver_pipeline import SolverPipeline
from kag.common.conf import KAG_CONFIG
from kag.common.registry import import_modules_from_path

from kag.common.checkpointer import CheckpointerManager


def qa(query):
# CA
resp = SolverPipeline()
resp = SolverPipeline.from_config(KAG_CONFIG.all_config["lf_solver_pipeline"])
answer, traceLog = resp.run(query)

print(f"\n\nso the answer for '{query}' is: {answer}\n\n") #
Expand All @@ -12,6 +25,7 @@ def qa(query):


if __name__ == "__main__":
import_modules_from_path("./prompt")
queries = [
"周星驰的姓名有何含义?",
"周星驰和万梓良有什么关系",
Expand Down
14 changes: 14 additions & 0 deletions kag/examples/baike/solver/prompt/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright 2023 OpenSPG Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.

"""
Place the prompts to be used for solving problems in this directory.
"""
28 changes: 28 additions & 0 deletions kag/examples/baike/solver/prompt/resp_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import re
from string import Template
from typing import List
import logging

from kag.interface import PromptABC

logger = logging.getLogger(__name__)


@PromptABC.register("resp_simple")
class RespGenerator(PromptABC):
template_zh = (
"基于给定的引用信息回答问题。" "\n只输出答案,不需要输出额外的信息。" "\n给定的引用信息:'$memory'\n问题:'$instruction'"
)
template_en = (
"Answer the question based on the given reference."
"\nOnly give me the answer and do not output any other words."
"\nThe following are given reference:'$memory'\nQuestion: '$instruction'"
)

@property
def template_variables(self) -> List[str]:
return ["memory", "instruction"]

def parse_response(self, response: str, **kwargs):
logger.debug("推理器判别:{}".format(response))
return response

0 comments on commit 63308ff

Please sign in to comment.