Skip to content

Commit

Permalink
Remove sensitive information
Browse files Browse the repository at this point in the history
  • Loading branch information
andylau-55 committed Oct 24, 2024
1 parent 95354d0 commit d0d352d
Show file tree
Hide file tree
Showing 429 changed files with 224,242 additions and 0 deletions.
1 change: 1 addition & 0 deletions KAG_VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.0.3.20241022.2
7 changes: 7 additions & 0 deletions LEGAL.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Legal Disclaimer

Within this source code, the comments in Chinese shall be the original, governing version. Any comment in other languages are for reference only. In the event of any conflict between the Chinese language version comments and other language version comments, the Chinese language version shall prevail.

法律免责声明

关于代码注释部分,中文注释为官方版本,其它语言注释仅做参考。中文注释可能与其它语言注释存在不一致,当中文注释与其它语言注释存在不一致时,请以中文注释为准。
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
recursive-include kag *
recursive-exclude kag/examples *
18 changes: 18 additions & 0 deletions kag/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright 2024 OpenSPG Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.


__package_name__ = "openspg-kag"
__version__ = "0.0.3.20241022.2"

from kag.common.env import init_env

init_env()
10 changes: 10 additions & 0 deletions kag/builder/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Copyright 2023 OpenSPG Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.
22 changes: 22 additions & 0 deletions kag/builder/component/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-
# Copyright 2023 OpenSPG Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.

from kag.builder.component.mapping.spg_type_mapping import SPGTypeMapping
from kag.builder.component.mapping.relation_mapping import RelationMapping
from kag.builder.component.writer.kg_writer import KGWriter


__all__ = [
"SPGTypeMapping",
"RelationMapping",
"KGWriter",
]
17 changes: 17 additions & 0 deletions kag/builder/component/aligner/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# -*- coding: utf-8 -*-
# Copyright 2023 OpenSPG Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.

from kag.builder.component.aligner.semantic_aligner import SemanticAligner

__all__ = [
'SemanticAligner',
]
49 changes: 49 additions & 0 deletions kag/builder/component/aligner/kag_post_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
# Copyright 2023 OpenSPG Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.

from typing import List, Sequence, Dict, Type

from kag.builder.model.sub_graph import SubGraph
from kag.interface.builder import AlignerABC
from knext.common.base.runnable import Input, Output


class KAGPostProcessorAligner(AlignerABC):
def __init__(self, **kwargs):
super().__init__(**kwargs)

@property
def input_types(self) -> Type[Input]:
return SubGraph

@property
def output_types(self) -> Type[Output]:
return SubGraph

def invoke(self, input: List[SubGraph], **kwargs) -> SubGraph:
merged_sub_graph = SubGraph(nodes=[], edges=[])
for sub_graph in input:
for node in sub_graph.nodes:
if node not in merged_sub_graph.nodes:
merged_sub_graph.nodes.append(node)
for edge in sub_graph.edges:
if edge not in merged_sub_graph.edges:
merged_sub_graph.edges.append(edge)
return merged_sub_graph

def _handle(self, input: Sequence[Dict]) -> Dict:
_input = [self.input_types.from_dict(i) for i in input]
_output = self.invoke(_input)
return _output.to_dict()

def batch(self, inputs: List[Input], **kwargs) -> List[Output]:
pass
90 changes: 90 additions & 0 deletions kag/builder/component/aligner/semantic_aligner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# -*- coding: utf-8 -*-
# Copyright 2023 OpenSPG Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.

from typing import List, Type

from kag.interface.builder import AlignerABC
from kag.builder.model.sub_graph import SubGraph
from knext.common.base.runnable import Input, Output

from kag.common.semantic_infer import SemanticEnhance


class SemanticAligner(AlignerABC, SemanticEnhance):
"""
A class for semantic alignment and enhancement, inheriting from Aligner and SemanticEnhance.
"""

def __init__(self, **kwargs):
AlignerABC.__init__(self, **kwargs)
SemanticEnhance.__init__(self, **kwargs)

@property
def input_types(self) -> Type[Input]:
return SubGraph

@property
def output_types(self) -> Type[Output]:
return SubGraph

def invoke(self, input: SubGraph, **kwargs) -> List[SubGraph]:
"""
Generates and adds concept nodes based on extracted entities and their context.
Args:
input (SubGraph): The input subgraph.
**kwargs: Additional keyword arguments.
Returns:
List[SubGraph]: A list containing the updated subgraph.
"""
expanded_concept_nodes = []
expanded_concept_edges = []

context = [
node.properties.get("content")
for node in input.nodes if node.label == 'Chunk'
]
context = context[0] if context else None
_dedup_keys = set()
for node in input.nodes:
if node.id == "" or node.name == "" or node.label == 'Chunk':
continue
if node.name in _dedup_keys:
continue
_dedup_keys.add(node.name)
expand_dict = self.expand_semantic_concept(node.name, context=context, target=None)
expand_nodes = [
{
"id": info["name"], "name": info["name"],
"label": self.concept_label,
"properties": {"desc": info["desc"]}
}
for info in expand_dict
]
expanded_concept_nodes.extend(expand_nodes)
path_nodes = [node.to_dict()] + expand_nodes
# entity -> concept, concept -> concept
for ix, concept in enumerate(path_nodes):
if ix == 0:
continue
expanded_concept_edges.append({
"s_id": path_nodes[ix-1]["id"],
"s_label": path_nodes[ix-1]["label"],
"p": self.hyper_edge,
"o_id": path_nodes[ix]["id"],
"o_label": path_nodes[ix]["label"]
})
[input.add_node(**n) for n in expanded_concept_nodes]
[input.add_edge(**e) for e in expanded_concept_edges]

return [input]
116 changes: 116 additions & 0 deletions kag/builder/component/aligner/spg_post_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# -*- coding: utf-8 -*-
# Copyright 2023 OpenSPG Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.

from typing import List, Type, Dict

from kag.interface.builder import AlignerABC
from knext.schema.client import BASIC_TYPES
from kag.builder.model.spg_record import SPGRecord
from kag.builder.model.sub_graph import SubGraph
from knext.common.base.runnable import Input, Output
from knext.schema.client import SchemaClient
from knext.schema.model.base import ConstraintTypeEnum, BaseSpgType


class SPGPostProcessorAligner(AlignerABC):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.spg_types = SchemaClient(project_id=self.project_id).load()

@property
def input_types(self) -> Type[Input]:
return SPGRecord

@property
def output_types(self) -> Type[Output]:
return SubGraph

def merge(self, spg_records: List[SPGRecord]):
merged_spg_records = {}
for record in spg_records:
key = f"{record.spg_type_name}#{record.get_property('name', '')}"
if key not in merged_spg_records:
merged_spg_records[key] = record
else:
old_record = merged_spg_records[key]
for prop_name, prop_value in record.properties.items():
if prop_name not in old_record.properties:
old_record.properties[prop_name] = prop_value
else:
prop = self.spg_types.get(record.spg_type_name).properties.get(
prop_name
)
if not prop:
continue
if (
prop.object_type_name not in BASIC_TYPES
or prop.constraint.get(ConstraintTypeEnum.MultiValue)
):
old_value = old_record.properties.get(prop_name)
if not prop_value:
prop_value = ""
prop_value_list = (
prop_value + "," + old_value
if old_value
else prop_value
).split(",")
old_record.properties[prop_name] = ",".join(
list(set(prop_value_list))
)
else:
old_record.properties[prop_name] = prop_value

return list(merged_spg_records.values())

@staticmethod
def from_spg_record(
spg_types: Dict[str, BaseSpgType], spg_records: List[SPGRecord]
):
sub_graph = SubGraph([], [])
for record in spg_records:
s_id = record.id
s_name = record.name
s_label = record.spg_type_name
properties = record.properties

spg_type = spg_types.get(record.spg_type_name)
for prop_name, prop_value in record.properties.items():
if prop_name in spg_type.properties:
from knext.schema.model.property import Property

prop: Property = spg_type.properties.get(prop_name)
o_label = prop.object_type_name
if o_label not in BASIC_TYPES:
prop_value_list = prop_value.split(",")
for o_id in prop_value_list:
sub_graph.add_edge(
s_id=s_id,
s_label=s_label,
p=prop_name,
o_id=o_id,
o_label=o_label,
)
properties.pop(prop_name)
sub_graph.add_node(
id=s_id, name=s_name, label=s_label, properties=properties
)

return sub_graph

def invoke(self, input: Input, **kwargs) -> List[Output]:
subgraph = SubGraph.from_spg_record(self.spg_types, [input])
return [subgraph]

def batch(self, inputs: List[Input], **kwargs) -> List[Output]:
merged_records = self.merge(inputs)
subgraph = SubGraph.from_spg_record(self.spg_types, merged_records)
return [subgraph]
Loading

0 comments on commit d0d352d

Please sign in to comment.