Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(builder): add namespace for extracted subgraphs #71

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion KAG_VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.5.2-beta1
0.5.2-beta2
2 changes: 1 addition & 1 deletion kag/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@


__package_name__ = "openspg-kag"
__version__ = "0.5.2-beta1"
__version__ = "0.5.2-beta2"

from kag.common.env import init_env

Expand Down
58 changes: 41 additions & 17 deletions kag/builder/component/extractor/kag_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def __init__(self, **kwargs):
self.language = self.prompt_config.get("language") or os.getenv(
"KAG_PROMPT_LANGUAGE", "en"
)
self.namespace = os.environ["KAG_PROJECT_NAMESPACE"]
self.schema = SchemaClient(project_id=self.project_id).load()
self.ner_prompt = PromptOp.load(self.biz_scene, "ner")(
language=self.language, project_id=self.project_id
Expand Down Expand Up @@ -124,6 +125,12 @@ def triples_extraction(self, passage: str, entities: List[Dict]):
{"input": passage, "entity_list": entities}, self.triple_prompt
)

def get_std_type_name(self, type_name):
spg_type = self.schema.get(type_name)
if spg_type:
return spg_type.name
return f"{self.namespace}.{type_name}"

def assemble_sub_graph_with_spg_records(self, entities: List[Dict]):
sub_graph = SubGraph([], [])
for record in entities:
Expand All @@ -145,24 +152,30 @@ def assemble_sub_graph_with_spg_records(self, entities: List[Dict]):
if isinstance(prop_value, str):
prop_value = [prop_value]
for o_name in prop_value:
sub_graph.add_node(id=o_name, name=o_name, label=o_label)
sub_graph.add_node(
id=o_name,
name=o_name,
label=self.get_std_type_name(o_label),
)
sub_graph.add_edge(
s_id=s_name,
s_label=s_label,
s_label=self.get_std_type_name(s_label),
p=prop_name,
o_id=o_name,
o_label=o_label,
o_label=self.get_std_type_name(o_label),
)
tmp_properties.pop(prop_name)
record["properties"] = tmp_properties
sub_graph.add_node(
id=s_name, name=s_name, label=s_label, properties=properties
id=s_name,
name=s_name,
label=self.get_std_type_name(s_label),
properties=properties,
)
return sub_graph, entities

@staticmethod
def assemble_sub_graph_with_triples(
sub_graph: SubGraph, entities: List[Dict], triples: List[list]
self, sub_graph: SubGraph, entities: List[Dict], triples: List[list]
):
"""
Assembles edges in the subgraph based on a list of triples and entities.
Expand All @@ -185,33 +198,44 @@ def get_category(entities_data, entity_name):
tri[0] = processing_phrases(tri[0])
if s_category is None:
s_category = OTHER_TYPE
sub_graph.add_node(tri[0], tri[0], s_category)
sub_graph.add_node(tri[0], tri[0], self.get_std_type_name(s_category))
o_category = get_category(entities, tri[2])
tri[2] = processing_phrases(tri[2])
if o_category is None:
o_category = OTHER_TYPE
sub_graph.add_node(tri[2], tri[2], o_category)
sub_graph.add_node(tri[2], tri[2], self.get_std_type_name(o_category))

edge_type = to_camel_case(tri[1])
if edge_type:
sub_graph.add_edge(tri[0], s_category, edge_type, tri[2], o_category)
sub_graph.add_edge(
tri[0],
self.get_std_type_name(s_category),
edge_type,
tri[2],
self.get_std_type_name(o_category),
)

return sub_graph

@staticmethod
def assemble_sub_graph_with_chunk(sub_graph: SubGraph, chunk: Chunk):
def assemble_sub_graph_with_chunk(self, sub_graph: SubGraph, chunk: Chunk):
"""
Associates a Chunk object with the subgraph, adding it as a node and connecting it with existing nodes.
Args:
sub_graph (SubGraph): The subgraph to add the chunk information to.
chunk (Chunk): The chunk object containing the text and metadata.
"""
for node in sub_graph.nodes:
sub_graph.add_edge(node.id, node.label, "source", chunk.id, CHUNK_TYPE)
sub_graph.add_edge(
node.id,
self.get_std_type_name(node.label),
"source",
chunk.id,
self.get_std_type_name(CHUNK_TYPE),
)
sub_graph.add_node(
chunk.id,
chunk.name,
CHUNK_TYPE,
self.get_std_type_name(CHUNK_TYPE),
{
"id": chunk.id,
"name": chunk.name,
Expand Down Expand Up @@ -260,7 +284,7 @@ def assemble_sub_graph_with_entities(
sub_graph.add_node(
name,
name,
ent["category"],
self.get_std_type_name(ent["category"]),
{
"desc": ent.get("description", ""),
"semanticType": ent.get("type", ""),
Expand All @@ -274,7 +298,7 @@ def assemble_sub_graph_with_entities(
sub_graph.add_node(
official_name,
official_name,
ent["category"],
self.get_std_type_name(ent["category"]),
{
"desc": ent.get("description", ""),
"semanticType": ent.get("type", ""),
Expand All @@ -283,10 +307,10 @@ def assemble_sub_graph_with_entities(
)
sub_graph.add_edge(
name,
ent["category"],
self.get_std_type_name(ent["category"]),
"OfficialName",
official_name,
ent["category"],
self.get_std_type_name(ent["category"]),
)

def append_official_name(
Expand Down
36 changes: 30 additions & 6 deletions kag/builder/component/extractor/spg_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# or implied.
import copy
import logging
import os
from typing import List, Dict

from tenacity import retry, stop_after_attempt
Expand Down Expand Up @@ -42,8 +43,12 @@ def __init__(self, **kwargs):
self.spg_ner_types.append(type_name)
continue
self.kag_ner_types.append(type_name)
self.kag_ner_prompt = PromptOp.load(self.biz_scene, "ner")(language=self.language, project_id=self.project_id)
self.spg_ner_prompt = SPG_KGPrompt(self.spg_ner_types, self.language, project_id=self.project_id)
self.kag_ner_prompt = PromptOp.load(self.biz_scene, "ner")(
language=self.language, project_id=self.project_id
)
self.spg_ner_prompt = SPG_KGPrompt(
self.spg_ner_types, self.language, project_id=self.project_id
)

@retry(stop=stop_after_attempt(3))
def named_entity_recognition(self, passage: str):
Expand Down Expand Up @@ -72,17 +77,33 @@ def assemble_sub_graph_with_spg_records(self, entities: List[Dict]):
continue
if prop_name in spg_type.properties:
from knext.schema.model.property import Property

prop: Property = spg_type.properties.get(prop_name)
o_label = prop.object_type_name_en
if o_label not in BASIC_TYPES:
if isinstance(prop_value, str):
prop_value = [prop_value]
for o_name in prop_value:
sub_graph.add_node(id=o_name, name=o_name, label=o_label)
sub_graph.add_edge(s_id=s_name, s_label=s_label, p=prop_name, o_id=o_name, o_label=o_label)
sub_graph.add_node(
id=o_name,
name=o_name,
label=self.get_std_type_name(o_label),
)
sub_graph.add_edge(
s_id=s_name,
s_label=self.get_std_type_name(s_label),
p=prop_name,
o_id=o_name,
o_label=self.get_std_type_name(o_label),
)
tmp_properties.pop(prop_name)
record["properties"] = tmp_properties
sub_graph.add_node(id=s_name, name=s_name, label=s_label, properties=properties)
sub_graph.add_node(
id=s_name,
name=s_name,
label=self.get_std_type_name(s_label),
properties=properties,
)
return sub_graph, entities

def invoke(self, input: Input, **kwargs) -> List[Output]:
Expand All @@ -102,7 +123,10 @@ def invoke(self, input: Input, **kwargs) -> List[Output]:
try:
entities = self.named_entity_recognition(passage)
sub_graph, entities = self.assemble_sub_graph_with_spg_records(entities)
filtered_entities = [{k: v for k, v in ent.items() if k in ["entity", "category"]} for ent in entities]
filtered_entities = [
{k: v for k, v in ent.items() if k in ["entity", "category"]}
for ent in entities
]
triples = self.triples_extraction(passage, filtered_entities)
std_entities = self.named_entity_standardization(passage, filtered_entities)
self.append_official_name(entities, std_entities)
Expand Down