Skip to content

Commit

Permalink
add table reasoner
Browse files Browse the repository at this point in the history
  • Loading branch information
sjnn12138 committed Jan 9, 2025
1 parent 7d3e2dd commit 4f8325d
Show file tree
Hide file tree
Showing 38 changed files with 2,479 additions and 18 deletions.
2 changes: 1 addition & 1 deletion kag/builder/prompt/spg_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class SPGPrompt(PromptABC):
ignored_relations (List[str]): List of relations to be ignored.
"""

ignored_types: List[str] = ["Chunk"]
ignored_types: List[str] = ["Chunk", "Table", "MetricConstraint", "TableMetric"]
ignored_properties: List[str] = [
"id",
"stdId",
Expand Down
14 changes: 0 additions & 14 deletions kag/examples/FinState/solver/data/__init__.py

This file was deleted.

File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ project:
host_addr: http://127.0.0.1:8887
id: '1'
language: en
namespace: FinState
namespace: finstate
#------------project configuration end----------------#

#------------kag-builder configuration start----------------#
Expand Down Expand Up @@ -97,7 +97,7 @@ kag_solver_pipeline:
llm_client: *chat_llm
max_iterations: 3
reasoner:
type: default_reasoner # kag.solver.implementation.default_reasoner.DefaultReasoner
type: table_reasoner # kag.solver.implementation.default_reasoner.DefaultReasoner
llm_client: *chat_llm
lf_planner:
type: default_lf_planner # kag.solver.plan.default_lf_planner.DefaultLFPlanner
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Empty file.
34 changes: 34 additions & 0 deletions kag/examples/finstate/solver/impl/chunk_lf_planner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import os
import re
from typing import List

from kag.common.base.prompt_op import PromptOp
from kag.interface.solver.lf_planner_abc import LFPlannerABC
from kag.solver.logic.core_modules.common.base_model import LFPlanResult, LogicNode
from kag.solver.logic.core_modules.common.schema_utils import SchemaUtils
from kag.solver.logic.core_modules.config import LogicFormConfiguration
from kag.solver.logic.core_modules.parser.logic_node_parser import ParseLogicForm
from kag.solver.logic.core_modules.retriver.schema_std import SchemaRetrieval


class ChunkLFPlanner(LFPlannerABC):
"""
Planner class that extends the base planner functionality to generate sub-queries and logic forms.
"""

def __init__(self, **kwargs):
super().__init__(**kwargs)

# 需要把大模型生成结果记录下来
def lf_planing(self, question, llm_output=None) -> List[LFPlanResult]:
"""
Generates sub-queries and logic forms based on the input question or provided LLM output.
Parameters:
question (str): The question or task to plan.
llm_output (Any, optional): Output from the LLM module. Defaults to None.
Returns:
list of LFPlanResult
"""
return []
30 changes: 30 additions & 0 deletions kag/examples/finstate/solver/impl/spo_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from tenacity import stop_after_attempt, retry

from kag.common.base.prompt_op import PromptOp
from kag.interface.solver.kag_generator_abc import KAGGeneratorABC
from kag.solver.implementation.default_memory import DefaultMemory


class SPOGenerator(KAGGeneratorABC):
"""
The Generator class is an abstract base class for generating responses using a language model module.
It initializes prompts for judging and generating responses based on the business scene and language settings.
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.generate_prompt = PromptOp.load('finstate', "retriever_generator")(
language=self.language
)

@retry(stop=stop_after_attempt(3))
def generate(self, instruction, memory: DefaultMemory):
# solved_answer = memory.get_solved_answer()
# if solved_answer is not None:
# try:
# if "." in str(solved_answer):
# solved_answer = str(round(float(solved_answer), 5))
# except Exception:
# pass
# return solved_answer
serialize_memory = memory.serialize_memory()
return self.llm_module.invoke({'memory': serialize_memory, 'instruction': instruction}, self.generate_prompt, with_json_parse=False, with_except=True)
91 changes: 91 additions & 0 deletions kag/examples/finstate/solver/impl/spo_lf_planner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import os
import re
from typing import List

from kag.common.base.prompt_op import PromptOp
from kag.interface.solver.lf_planner_abc import LFPlannerABC
from kag.solver.logic.core_modules.common.base_model import LFPlanResult, LogicNode
from kag.solver.logic.core_modules.common.schema_utils import SchemaUtils
from kag.solver.logic.core_modules.config import LogicFormConfiguration
from kag.solver.logic.core_modules.parser.logic_node_parser import ParseLogicForm
from kag.solver.logic.core_modules.retriver.schema_std import SchemaRetrieval


class SPOLFPlanner(LFPlannerABC):
"""
Planner class that extends the base planner functionality to generate sub-queries and logic forms.
"""

def __init__(self, **kwargs):
super().__init__(**kwargs)
config = LogicFormConfiguration(kwargs)
schema = SchemaUtils(config)
schema.get_schema()
std_schema = SchemaRetrieval(**kwargs)
self.parser = ParseLogicForm(schema, std_schema)
# Load the prompt for generating logic forms based on the business scene and language
self.logic_form_plan_prompt = PromptOp.load('finstate', "get_spo_plan")(
language=self.language
)

# 需要把大模型生成结果记录下来
def lf_planing(self, question, llm_output=None) -> List[LFPlanResult]:
"""
Generates sub-queries and logic forms based on the input question or provided LLM output.
Parameters:
question (str): The question or task to plan.
llm_output (Any, optional): Output from the LLM module. Defaults to None.
Returns:
list of LFPlanResult
"""
if llm_output is not None:
sub_querys, logic_forms = self.parse_logic_form_llm_output(llm_output)
else:
sub_querys, logic_forms = self.generate_logic_form(question)
return self._parse_lf(question, sub_querys, logic_forms)

def _split_sub_query(self, logic_nodes: List[LogicNode]) -> List[LFPlanResult]:
query_lf_map = {}
for n in logic_nodes:
if n.sub_query in query_lf_map.keys():
query_lf_map[n.sub_query] = query_lf_map[n.sub_query] + [n]
else:
query_lf_map[n.sub_query] = [n]
plan_result = []
for k, v in query_lf_map.items():
plan_result.append(LFPlanResult(query=k, lf_nodes=v))
return plan_result

def _parse_lf(self, question, sub_querys, logic_forms) -> List[LFPlanResult]:
if sub_querys is None:
sub_querys = []
parsed_logic_nodes = self.parser.parse_logic_form_set(logic_forms, sub_querys, question)
return self._split_sub_query(parsed_logic_nodes)

def generate_logic_form(self, question: str):
return self.llm_module.invoke({'question': question}, self.logic_form_plan_prompt, with_json_parse=False, with_except=True)

def parse_logic_form_llm_output(self, llm_output):
_output_string = llm_output.replace(":", ":")
_output_string = llm_output.strip()
sub_querys = []
logic_forms = []
current_sub_query = ''
for line in _output_string.split('\n'):
line = line.strip()
if line.startswith('Step'):
sub_querys_regex = re.search('Step\d+:(.*)', line)
if sub_querys_regex is not None:
sub_querys.append(sub_querys_regex.group(1))
current_sub_query = sub_querys_regex.group(1)
elif line.startswith('Output'):
sub_querys.append("output")
elif line.startswith('Action'):
logic_forms_regex = re.search('Action\d+:(.*)', line)
if logic_forms_regex:
logic_forms.append(logic_forms_regex.group(1))
if len(logic_forms) - len(sub_querys) == 1:
sub_querys.append(current_sub_query)
return sub_querys, logic_forms
40 changes: 40 additions & 0 deletions kag/examples/finstate/solver/impl/spo_memory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from tenacity import retry, stop_after_attempt

from kag.common.base.prompt_op import PromptOp
from kag.interface.solver.kag_memory_abc import KagMemoryABC


class SpoMemory(KagMemoryABC):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.verify_prompt = PromptOp.load(self.biz_scene, "resp_verifier")(
language=self.language
)
self.extractor_prompt = PromptOp.load(self.biz_scene, "resp_extractor")(
language=self.language
)
self.state_memory = []
self.evidence_memory = []
self.exact_answer = []
self.instruction_set = []
def save_memory(self, solved_answer, supporting_fact, instruction):
if solved_answer != "":
self.exact_answer.append(solved_answer)
return
self.evidence_memory.append(supporting_fact)

def get_solved_answer(self):
return self.exact_answer[-1] if len(self.exact_answer) > 0 else None

def serialize_memory(self):
if len(self.exact_answer) > 0:
return f"[Solved Answer]{self.exact_answer[-1]}"
serialize_memory = "[State Memory]:{}\n[Evidence Memory]:{}\n".format(
str(self.state_memory), str(self.evidence_memory)
)
return serialize_memory

def refresh(self):
self.state_memory = []
self.evidence_memory = []
self.exact_answer = []
23 changes: 23 additions & 0 deletions kag/examples/finstate/solver/impl/spo_reflector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from tenacity import retry, stop_after_attempt

from kag.common.base.prompt_op import PromptOp
from kag.interface.solver.kag_reflector_abc import KagMemoryABC
from kag.interface.solver.kag_reflector_abc import KagReflectorABC


class SPOReflector(KagReflectorABC):
def __init__(self, **kwargs):
"""
A class for rewriting instructions based on provided memory information.
Attributes:
- llm_module (Any): The LLM module to be used by this instance.
- rewrite_prompt (PromptOp): The prompt operation for rewriting responses.
"""
super().__init__(**kwargs)
@retry(stop=stop_after_attempt(3))
def _can_answer(self, memory: KagMemoryABC, instruction: str):
return True
@retry(stop=stop_after_attempt(3))
def _refine_query(self, memory: KagMemoryABC, instruction: str):
return instruction
2 changes: 1 addition & 1 deletion kag/examples/kag_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ project:
host_addr: http://127.0.0.1:8887
id: '9'
language: en
namespace: FinState
namespace: finstate
#------------project configuration end----------------#

#------------kag-builder configuration start----------------#
Expand Down
Empty file.
81 changes: 81 additions & 0 deletions kag/solver/implementation/table/python_coder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import io
import os
import sys
import contextlib
import traceback
import tempfile
import subprocess

from kag.solver.common.base import KagBaseModule

from kag.solver.implementation.table.search_tree import SearchTree, SearchTreeNode
from kag.common.base.prompt_op import PromptOp
from kag.common.llm.client import LLMClient


class PythonCoderAgent(KagBaseModule):
def __init__(
self, init_question: str, question: str, history: SearchTree, **kwargs
):
super().__init__(**kwargs)

self.init_question = init_question
self.question = question
self.history = history
self.code_prompt = PromptOp.load(self.biz_scene, "python_coder_prompt")(
language=self.language
)

def answer(self):
try_times = 3
error = None
while try_times > 0:
try_times -= 1
rst, run_error, code = self._run_onetime(error)
if rst is not None:
return rst, code
error = f"code:\n{code}\nerror:\n{run_error}"
print("code=" + str(code) + ",error=" + str(run_error))
return "I don't know", code

def _run_onetime(self, error: str):
llm: LLMClient = self.llm_module
python_code = llm.invoke(
{
"question": self.question,
"context": str(self.history.as_subquestion_context_json()),
"error": error,
"dk": self.history.dk,
},
self.code_prompt,
with_except=True,
)

with tempfile.NamedTemporaryFile(delete=False, suffix=".py") as temp_file:
temp_file.write(python_code.encode("utf-8"))
temp_file_path = temp_file.name
os.chmod(temp_file_path, 0o777)

try:
# 获取当前Python环境的可执行文件路径
python_executable = sys.executable
# 使用subprocess模块来执行临时文件
result = subprocess.run(
[python_executable, temp_file_path], capture_output=True, text=True
)
print(f"stdout:{result.stdout}, stderr:{result.stderr}")
except Exception as e:
if result:
print(f"stdout:{result.stdout}, stderr:{result.stderr} {e}")
else:
print(f"subprocess.run failed {e}")
finally:
# 清理临时文件
os.remove(temp_file_path)

# 获取捕获的输出和错误信息
stdout_value = result.stdout
stderr_value = result.stderr
if len(stderr_value) > 0:
return None, stderr_value, python_code
return stdout_value, None, python_code
Loading

0 comments on commit 4f8325d

Please sign in to comment.