Skip to content

Commit

Permalink
Merge branch '0.6_dev' into patch-1
Browse files Browse the repository at this point in the history
  • Loading branch information
caszkgui authored Nov 15, 2024
2 parents 34146f8 + c7e8745 commit 83f255c
Show file tree
Hide file tree
Showing 232 changed files with 8,259 additions and 4,091 deletions.
22 changes: 22 additions & 0 deletions .github/workflows/cloud-code-scan.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: Alipay Cloud Devops Codescan
on:
pull_request_target:
jobs:
stc:
runs-on: ubuntu-latest
steps:
- name: codeScan
uses: layotto/alipay-cloud-devops-codescan@main
with:
parent_uid: ${{ secrets.ALI_PID }}
private_key: ${{ secrets.ALI_PK }}
scan_type: stc
sca:
runs-on: ubuntu-latest
steps:
- name: codeScan
uses: layotto/alipay-cloud-devops-codescan@main
with:
parent_uid: ${{ secrets.ALI_PID }}
private_key: ${{ secrets.ALI_PK }}
scan_type: sca
28 changes: 28 additions & 0 deletions .github/workflows/code-format-check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Code Format Check

on:
push:
pull_request:
workflow_dispatch:
repository_dispatch:
types: [my_event]
jobs:
format-check:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pre-commit
- name: Run pre-commit
run: pre-commit run --all-files
25 changes: 25 additions & 0 deletions .github/workflows/license-checker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: License Checker

on:
push:
branches:
- master
pull_request:
branches:
- master

jobs:
check:
name: "License Validation"
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- name: Check License Header
uses: apache/skywalking-eyes@main
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
log: info
- name: Check Dependencies' License
uses: apache/skywalking-eyes/dependency@main
28 changes: 28 additions & 0 deletions .github/workflows/pr-title-check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: "Lint PR"

on:
pull_request_target:
types:
- opened
- edited
- synchronize

jobs:
main:
name: Validate PR title
runs-on: ubuntu-latest
steps:
# https://www.conventionalcommits.org/en/v1.0.0/#summary
- uses: amannn/action-semantic-pull-request@v5
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
requireScope: true
subjectPattern: ^(?![A-Z]).+$
# If `subjectPattern` is configured, you can use this property to override
# the default error message that is shown when the pattern doesn't match.
# The variables `subject` and `title` can be used within the message.
subjectPatternError: |
The subject "{subject}" found in the pull request title "{title}"
didn't match the configured pattern. Please ensure that the subject
doesn't start with an uppercase character.
12 changes: 12 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
repos:
- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- id: black
files: ^kag/.*\.py$
- repo: https://github.com/pycqa/flake8
rev: 4.0.1
hooks:
- id: flake8
files: ^kag/.*\.py$

2 changes: 1 addition & 1 deletion KAG_VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.5-beta1
0.5.2-beta1
2 changes: 1 addition & 1 deletion kag/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@


__package_name__ = "openspg-kag"
__version__ = "0.5-beta1"
__version__ = "0.5.2-beta1"

from kag.common.env import init_env

Expand Down
1 change: 0 additions & 1 deletion kag/builder/component/aligner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,3 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.

18 changes: 12 additions & 6 deletions kag/builder/component/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,28 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied.
import os
from abc import ABC
from typing import List, Dict
import logging

from knext.common.base.component import Component
from knext.common.base.runnable import Input, Output
from knext.project.client import ProjectClient
from kag.common.llm.client import LLMClient
from kag.common.llm import LLMClient
from kag.common.registry import Registrable


class BuilderComponent(Component, ABC):
@Registrable.register("builder")
class BuilderComponent(Component, Registrable):
"""
Abstract base class for all builder component.
"""

project_id: str = None
def __init__(self, project_id: int = None, **kwargs):
super().__init__(**kwargs)
if project_id is None:
project_id = int(os.getenv("KAG_PROJECT_ID"))
self.project_id = project_id
self.config = ProjectClient().get_config(self.project_id)

def _init_llm(self) -> LLMClient:
"""
Expand All @@ -48,9 +54,9 @@ def _init_llm(self) -> LLMClient:
try:
config = ProjectClient().get_config(project_id)
llm_config.update(config.get("llm", {}))
except:
except Exception as e:
logging.warning(
f"Failed to get project config for project id: {project_id}"
f"Failed to get project config for project id: {project_id}, info: {e}"
)
llm = LLMClient.from_config(llm_config)
return llm
Expand Down
50 changes: 37 additions & 13 deletions kag/builder/component/extractor/kag_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,17 @@ class KAGExtractor(ExtractorABC):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.llm = self._init_llm()
self.biz_scene = os.getenv("KAG_PROMPT_BIZ_SCENE", "default")
self.language = os.getenv("KAG_PROMPT_LANGUAGE", "en")
self.prompt_config = self.config.get("prompt", {})
self.biz_scene = self.prompt_config.get("biz_scene") or os.getenv(
"KAG_PROMPT_BIZ_SCENE", "default"
)
self.language = self.prompt_config.get("language") or os.getenv(
"KAG_PROMPT_LANGUAGE", "en"
)
self.schema = SchemaClient(project_id=self.project_id).load()
self.ner_prompt = PromptOp.load(self.biz_scene, "ner")(language=self.language, project_id=self.project_id)
self.ner_prompt = PromptOp.load(self.biz_scene, "ner")(
language=self.language, project_id=self.project_id
)
self.std_prompt = PromptOp.load(self.biz_scene, "std")(language=self.language)
self.triple_prompt = PromptOp.load(self.biz_scene, "triple")(
language=self.language
Expand All @@ -59,7 +66,9 @@ def __init__(self, **kwargs):
self.kg_types.append(type_name)
break
if self.kg_types:
self.kg_prompt = SPG_KGPrompt(self.kg_types, language=self.language, project_id=self.project_id)
self.kg_prompt = SPG_KGPrompt(
self.kg_types, language=self.language, project_id=self.project_id
)

@property
def input_types(self) -> Type[Input]:
Expand Down Expand Up @@ -129,17 +138,26 @@ def assemble_sub_graph_with_spg_records(self, entities: List[Dict]):
continue
if prop_name in spg_type.properties:
from knext.schema.model.property import Property

prop: Property = spg_type.properties.get(prop_name)
o_label = prop.object_type_name_en
if o_label not in BASIC_TYPES:
if isinstance(prop_value, str):
prop_value = [prop_value]
for o_name in prop_value:
sub_graph.add_node(id=o_name, name=o_name, label=o_label)
sub_graph.add_edge(s_id=s_name, s_label=s_label, p=prop_name, o_id=o_name, o_label=o_label)
sub_graph.add_edge(
s_id=s_name,
s_label=s_label,
p=prop_name,
o_id=o_name,
o_label=o_label,
)
tmp_properties.pop(prop_name)
record["properties"] = tmp_properties
sub_graph.add_node(id=s_name, name=s_name, label=s_label, properties=properties)
sub_graph.add_node(
id=s_name, name=s_name, label=s_label, properties=properties
)
return sub_graph, entities

@staticmethod
Expand Down Expand Up @@ -173,10 +191,9 @@ def get_category(entities_data, entity_name):
if o_category is None:
o_category = OTHER_TYPE
sub_graph.add_node(tri[2], tri[2], o_category)

sub_graph.add_edge(
tri[0], s_category, to_camel_case(tri[1]), tri[2], o_category
)
edge_type = to_camel_case(tri[1])
if edge_type:
sub_graph.add_edge(tri[0], s_category, edge_type, tri[2], o_category)

return sub_graph

Expand All @@ -198,14 +215,18 @@ def assemble_sub_graph_with_chunk(sub_graph: SubGraph, chunk: Chunk):
"id": chunk.id,
"name": chunk.name,
"content": f"{chunk.name}\n{chunk.content}",
**chunk.kwargs
**chunk.kwargs,
},
)
sub_graph.id = chunk.id
return sub_graph

def assemble_sub_graph(
self, sub_graph: SubGraph, chunk: Chunk, entities: List[Dict], triples: List[list]
self,
sub_graph: SubGraph,
chunk: Chunk,
entities: List[Dict],
triples: List[list],
):
"""
Integrates entity and triple information into a subgraph, and associates it with a chunk of text.
Expand Down Expand Up @@ -310,7 +331,10 @@ def invoke(self, input: Input, **kwargs) -> List[Output]:
try:
entities = self.named_entity_recognition(passage)
sub_graph, entities = self.assemble_sub_graph_with_spg_records(entities)
filtered_entities = [{k: v for k, v in ent.items() if k in ["entity", "category"]} for ent in entities]
filtered_entities = [
{k: v for k, v in ent.items() if k in ["entity", "category"]}
for ent in entities
]
triples = self.triples_extraction(passage, filtered_entities)
std_entities = self.named_entity_standardization(passage, filtered_entities)
self.append_official_name(entities, std_entities)
Expand Down
22 changes: 18 additions & 4 deletions kag/builder/component/extractor/spg_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ def __init__(self, **kwargs):
self.spg_ner_types.append(type_name)
continue
self.kag_ner_types.append(type_name)
self.kag_ner_prompt = PromptOp.load(self.biz_scene, "ner")(language=self.language, project_id=self.project_id)
self.kag_ner_prompt = PromptOp.load(self.biz_scene, "ner")(
language=self.language, project_id=self.project_id
)
self.spg_ner_prompt = SPG_KGPrompt(self.spg_ner_types, self.language)

@retry(stop=stop_after_attempt(3))
Expand Down Expand Up @@ -72,17 +74,26 @@ def assemble_sub_graph_with_spg_records(self, entities: List[Dict]):
continue
if prop_name in spg_type.properties:
from knext.schema.model.property import Property

prop: Property = spg_type.properties.get(prop_name)
o_label = prop.object_type_name_en
if o_label not in BASIC_TYPES:
if isinstance(prop_value, str):
prop_value = [prop_value]
for o_name in prop_value:
sub_graph.add_node(id=o_name, name=o_name, label=o_label)
sub_graph.add_edge(s_id=s_name, s_label=s_label, p=prop_name, o_id=o_name, o_label=o_label)
sub_graph.add_edge(
s_id=s_name,
s_label=s_label,
p=prop_name,
o_id=o_name,
o_label=o_label,
)
tmp_properties.pop(prop_name)
record["properties"] = tmp_properties
sub_graph.add_node(id=s_name, name=s_name, label=s_label, properties=properties)
sub_graph.add_node(
id=s_name, name=s_name, label=s_label, properties=properties
)
return sub_graph, entities

def invoke(self, input: Input, **kwargs) -> List[Output]:
Expand All @@ -102,7 +113,10 @@ def invoke(self, input: Input, **kwargs) -> List[Output]:
try:
entities = self.named_entity_recognition(passage)
sub_graph, entities = self.assemble_sub_graph_with_spg_records(entities)
filtered_entities = [{k: v for k, v in ent.items() if k in ["entity", "category"]} for ent in entities]
filtered_entities = [
{k: v for k, v in ent.items() if k in ["entity", "category"]}
for ent in entities
]
triples = self.triples_extraction(passage, filtered_entities)
std_entities = self.named_entity_standardization(passage, filtered_entities)
self.append_official_name(entities, std_entities)
Expand Down
2 changes: 1 addition & 1 deletion kag/builder/component/mapping/relation_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def __init__(
subject_name: SPGTypeName,
predicate_name: RelationName,
object_name: SPGTypeName,
**kwargs
**kwargs,
):
super().__init__(**kwargs)
schema = SchemaClient(project_id=self.project_id).load()
Expand Down
4 changes: 3 additions & 1 deletion kag/builder/component/mapping/spg_type_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ class SPGTypeMapping(MappingABC):
fuse_op (FuseOpABC, optional): The user-defined fuse operator. Defaults to None.
"""

def __init__(self, spg_type_name: SPGTypeName, fuse_func: FuseFunc = None, **kwargs):
def __init__(
self, spg_type_name: SPGTypeName, fuse_func: FuseFunc = None, **kwargs
):
super().__init__(**kwargs)
self.schema = SchemaClient(project_id=self.project_id).load()
assert (
Expand Down
Loading

0 comments on commit 83f255c

Please sign in to comment.